patina-cli 3.11.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.patina.default.yaml +29 -29
- package/CHANGELOG.md +53 -0
- package/NOTICE +21 -0
- package/README.md +117 -224
- package/README_JA.md +134 -77
- package/README_KR.md +132 -74
- package/README_ZH.md +137 -80
- package/SKILL.md +11 -20
- package/artifacts/rebaseline-2025/README.md +147 -0
- package/artifacts/rebaseline-2025/human-controls.public.jsonl +250 -0
- package/artifacts/rebaseline-2025/intake.example.jsonl +2 -0
- package/artifacts/rebaseline-2025/intake.local.example.jsonl +25 -0
- package/artifacts/rebaseline-2025/prompts.template.jsonl +7 -0
- package/artifacts/rebaseline-2025/sources.ko-public.jsonl +39 -0
- package/assets/brand/patina-badge.svg +18 -0
- package/assets/brand/patina-mark.svg +8 -0
- package/assets/demo/README.md +79 -0
- package/core/scoring.md +12 -12
- package/core/standalone-prompt.md +3 -1
- package/core/stylometry.md +93 -22
- package/docs/API.md +1554 -0
- package/docs/AUTHENTICATION.md +50 -26
- package/docs/AUTHENTICATION_KR.md +54 -29
- package/docs/BRANDING.md +9 -8
- package/docs/CLI.md +55 -14
- package/docs/COOKBOOK.md +8 -21
- package/docs/DEMO.md +32 -5
- package/docs/EXIT-CODES.md +2 -3
- package/docs/FALSE-POSITIVES.md +63 -0
- package/docs/FAQ.md +9 -1
- package/docs/FAQ_KR.md +3 -1
- package/docs/FLAG-PARITY.md +33 -47
- package/docs/ISSUE-WAVES.md +57 -0
- package/docs/PATTERNS-EN.md +67 -3
- package/docs/PATTERNS-JA.md +68 -2
- package/docs/PATTERNS-KO.md +70 -7
- package/docs/PATTERNS-ZH.md +67 -3
- package/docs/PATTERNS.md +5 -5
- package/docs/RESEARCH-DOCS-PLATFORM.md +54 -0
- package/docs/ROADMAP.md +46 -66
- package/docs/TRANSLATIONESE-KO.md +51 -0
- package/docs/audits/2026-05-deep-research.md +3 -1
- package/docs/benchmarks/README.md +51 -0
- package/docs/benchmarks/detector-comparison.json +69 -9
- package/docs/benchmarks/detector-comparison.md +10 -5
- package/docs/benchmarks/katfish-ko-latest.json +657 -0
- package/docs/benchmarks/katfish-ko-latest.md +77 -0
- package/docs/benchmarks/latest.json +1183 -108
- package/docs/benchmarks/latest.md +84 -60
- package/docs/benchmarks/lexicon-freshness-en-2026-05-22.json +1121 -0
- package/docs/benchmarks/lexicon-freshness-en-2026-05-22.md +136 -0
- package/docs/benchmarks/rebaseline-latest.json +381 -0
- package/docs/benchmarks/rebaseline-latest.md +121 -0
- package/docs/benchmarks/register-stratified-latest.json +164 -0
- package/docs/benchmarks/register-stratified-latest.md +99 -0
- package/docs/benchmarks/register-stratified.md +43 -0
- package/docs/integrations/github-action.md +44 -11
- package/docs/integrations/playground.md +58 -0
- package/docs/integrations/pre-commit.md +5 -5
- package/docs/integrations/release.md +5 -3
- package/docs/integrations/static-sites.md +83 -0
- package/docs/research/2025-rebaseline-plan.md +71 -2
- package/docs/research/2026-rebaseline.md +102 -0
- package/docs/research/adversarial-mps.md +41 -0
- package/docs/research/ai-human-metrics.md +35 -23
- package/docs/research/human-eval-panel.md +42 -0
- package/docs/research/judge-agreement.md +24 -0
- package/docs/research/ko-2025-corpus-sources.md +135 -0
- package/docs/research/lexicon-freshness-audit.md +64 -0
- package/docs/research/zh-ja-lexicon-calibration.md +60 -0
- package/docs/social/patina-launch-copy.md +173 -100
- package/docs/social/patina-launch-execution.md +94 -0
- package/docs/social/patina-launch-korean-first.md +83 -0
- package/docs/social/signs-of-ai-writing.md +26 -0
- package/docs/social/signs-of-ai-writing_KR.md +26 -0
- package/lexicon/ai-en.md +21 -24
- package/lexicon/ai-ja.md +158 -0
- package/lexicon/ai-ko.md +9 -9
- package/lexicon/ai-zh.md +158 -0
- package/lexicon/provenance/ai-en.json +970 -0
- package/lexicon/provenance/ai-ja.json +542 -0
- package/lexicon/provenance/ai-ko.json +866 -0
- package/lexicon/provenance/ai-zh.json +542 -0
- package/package.json +49 -8
- package/patterns/en-communication.md +5 -0
- package/patterns/en-content.md +5 -0
- package/patterns/en-filler.md +5 -0
- package/patterns/en-language.md +29 -1
- package/patterns/en-structure.md +5 -0
- package/patterns/en-style.md +5 -0
- package/patterns/en-viral-hook.md +42 -2
- package/patterns/ja-communication.md +5 -0
- package/patterns/ja-content.md +5 -0
- package/patterns/ja-filler.md +5 -0
- package/patterns/ja-language.md +33 -1
- package/patterns/ja-structure.md +12 -0
- package/patterns/ja-style.md +5 -0
- package/patterns/ja-viral-hook.md +41 -2
- package/patterns/ko-communication.md +5 -0
- package/patterns/ko-content.md +5 -0
- package/patterns/ko-filler.md +5 -0
- package/patterns/ko-language.md +33 -1
- package/patterns/ko-structure.md +25 -6
- package/patterns/ko-style.md +5 -0
- package/patterns/ko-viral-hook.md +38 -2
- package/patterns/zh-communication.md +5 -0
- package/patterns/zh-content.md +5 -0
- package/patterns/zh-filler.md +5 -0
- package/patterns/zh-language.md +37 -1
- package/patterns/zh-structure.md +12 -0
- package/patterns/zh-style.md +5 -0
- package/patterns/zh-viral-hook.md +38 -2
- package/playground/README.md +55 -0
- package/playground/analytics.js +4 -0
- package/playground/analyzer.js +883 -0
- package/playground/app.js +157 -0
- package/playground/data/lexicons.js +343 -0
- package/playground/index.html +138 -0
- package/playground/styles.css +267 -0
- package/profiles/namuwiki.md +111 -0
- package/scripts/adversarial-mps-report.mjs +201 -0
- package/scripts/badge-json.mjs +79 -0
- package/scripts/benchmark-report.mjs +56 -9
- package/scripts/check-release-metadata.mjs +0 -2
- package/scripts/detector-comparison.mjs +7 -7
- package/scripts/generate-playground-data.mjs +77 -0
- package/scripts/katfish-calibration.mjs +464 -0
- package/scripts/lexicon-freshness.mjs +485 -0
- package/scripts/lint.mjs +1 -1
- package/scripts/precommit-score.mjs +4 -3
- package/scripts/prose-score.mjs +81 -5
- package/scripts/rebaseline-intake.mjs +242 -0
- package/scripts/rebaseline-score.mjs +268 -0
- package/scripts/rebaseline-summary.mjs +773 -0
- package/scripts/rebaseline-web-collect.mjs +410 -0
- package/scripts/update-benchmark-ranges.mjs +1 -0
- package/src/api.js +69 -105
- package/src/auth.js +50 -2
- package/src/backends/claude-cli.js +19 -4
- package/src/backends/codex-cli.js +19 -3
- package/src/backends/contract.js +230 -1
- package/src/backends/gemini-cli.js +18 -5
- package/src/backends/index.js +87 -12
- package/src/backends/kimi-cli.js +161 -0
- package/src/cli.js +577 -567
- package/src/commands/doctor.js +2 -2
- package/src/config.js +29 -0
- package/src/errors.js +53 -1
- package/src/features/discourse-tells.js +68 -0
- package/src/features/index.js +82 -8
- package/src/features/lexicon.js +40 -6
- package/src/features/markup-leakage.js +69 -0
- package/src/features/segment.js +41 -0
- package/src/features/signal-strength.js +81 -0
- package/src/features/stylometry.js +231 -1
- package/src/features/translationese.js +127 -0
- package/src/loader.js +76 -0
- package/src/logger.js +22 -23
- package/src/model-defaults.js +55 -0
- package/src/ouroboros.js +31 -0
- package/src/output.js +102 -90
- package/src/prompt-builder.js +103 -68
- package/src/providers.js +51 -4
- package/src/scoring.js +210 -2
- package/src/security.js +75 -0
- package/tests/fixtures/live-quality/en/public-docs-01.md +26 -0
- package/tests/fixtures/live-quality/ko/public-docs-01.md +26 -0
- package/tests/fixtures/suspect-zones/expected-ranges.json +207 -16
- package/tests/fixtures/suspect-zones/ja/ai/ja-ai-04-lexicon.md +11 -0
- package/tests/fixtures/suspect-zones/ja/natural/ja-nat-04-lexicon-cold.md +11 -0
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +4 -5
- package/tests/fixtures/suspect-zones/ko/ai/ko-ai-07-ko-diagnostic.md +11 -0
- package/tests/fixtures/suspect-zones/zh/ai/zh-ai-04-lexicon.md +11 -0
- package/tests/fixtures/suspect-zones/zh/natural/zh-nat-04-lexicon-cold.md +11 -0
- package/tests/quality/README.md +188 -11
- package/tests/quality/adversarial-mps/fixtures.jsonl +10 -0
- package/tests/quality/benchmark.mjs +39 -1
- package/tests/quality/dogfood.mjs +5 -3
- package/tests/quality/live-fixtures.jsonl +2 -0
- package/tests/quality/live-quality.mjs +596 -0
- package/tests/quality/ranking-metrics.mjs +136 -0
- package/tests/quality/rebaseline-manifest.example.jsonl +5 -0
- package/vercel.json +53 -0
- package/SKILL-MAX.md +0 -455
- package/docs/internal/HARNESS.md +0 -14
- package/docs/internal/README.md +0 -14
- package/docs/internal/WARP.md +0 -23
- package/patina-max/SKILL.md +0 -523
- package/patina-max/composite.py +0 -457
- package/src/cache.js +0 -106
- package/src/commands/init.js +0 -208
- package/src/manifest.js +0 -162
- package/src/max-mode.js +0 -207
package/patina-max/composite.py
DELETED
|
@@ -1,457 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""patina-max composite: deterministic 4-axis winner reselection over an
|
|
3
|
-
existing patina-max run directory.
|
|
4
|
-
|
|
5
|
-
The default patina-max winner picker only sees AI-likeness and MPS, so it
|
|
6
|
-
goes noise-bound when a baseline is already humanized. This script adds two
|
|
7
|
-
Korean-aware deterministic metrics — Register Stability Score (RSS) and
|
|
8
|
-
Edit Conservativeness (EditCons) — and reselects the winner.
|
|
9
|
-
|
|
10
|
-
Usage
|
|
11
|
-
-----
|
|
12
|
-
python3 patina-max/composite.py <run_dir> [--weights ...]
|
|
13
|
-
|
|
14
|
-
Layout consumed
|
|
15
|
-
---------------
|
|
16
|
-
<run_dir>/
|
|
17
|
-
input.md baseline source MDX (required)
|
|
18
|
-
claude.md candidate (optional; absent → "missing")
|
|
19
|
-
gemini.md candidate (optional)
|
|
20
|
-
codex.md candidate (optional; may be a failure note)
|
|
21
|
-
meta.md YAML; per-candidate ai_score / mps / status (recommended)
|
|
22
|
-
|
|
23
|
-
Layout produced
|
|
24
|
-
---------------
|
|
25
|
-
<run_dir>/
|
|
26
|
-
composite.md per-candidate metric table + weighted totals
|
|
27
|
-
winner.md winning candidate's text (or a none-found notice)
|
|
28
|
-
|
|
29
|
-
Default weights (renormalised after dropping the LLM-Judge slot):
|
|
30
|
-
|
|
31
|
-
AI=0.353 MPS=0.235 RSS=0.235 EditCons=0.176
|
|
32
|
-
|
|
33
|
-
Override via .patina.default.yaml:
|
|
34
|
-
|
|
35
|
-
composite-weights:
|
|
36
|
-
ai: 0.353
|
|
37
|
-
mps: 0.235
|
|
38
|
-
rss: 0.235
|
|
39
|
-
edit_cons: 0.176
|
|
40
|
-
|
|
41
|
-
Or inline:
|
|
42
|
-
|
|
43
|
-
python3 patina-max/composite.py <run_dir> --weights ai=0.4,rss=0.3
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
from __future__ import annotations
|
|
47
|
-
|
|
48
|
-
import argparse
|
|
49
|
-
import difflib
|
|
50
|
-
import math
|
|
51
|
-
import re
|
|
52
|
-
import sys
|
|
53
|
-
from collections import Counter
|
|
54
|
-
from dataclasses import dataclass, field
|
|
55
|
-
from pathlib import Path
|
|
56
|
-
from typing import Optional
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
# ---------------------------------------------------------------------------
|
|
60
|
-
# Korean register / edit metrics
|
|
61
|
-
# ---------------------------------------------------------------------------
|
|
62
|
-
|
|
63
|
-
# Sentence-final ending vocabulary. Order matters — longer forms first so the
|
|
64
|
-
# regex engine matches `합니다` before falling back to `다`.
|
|
65
|
-
_ENDING_PATTERNS = [
|
|
66
|
-
# 합쇼체 (deferential formal): ~ㅂ니다 / ~습니다 / ~ㅂ니까 / ~습니까 / ~십시오
|
|
67
|
-
("hapsho", r"(?:[가-힣]니다|[가-힣]니까|[가-힣]시오|십시오|십시요)"),
|
|
68
|
-
# 해요체 (polite informal)
|
|
69
|
-
("haeyo", r"(?:세요|예요|이에요|에요|해요|어요|아요|네요|군요|지요|죠|[가-힣]요)"),
|
|
70
|
-
# 해라체 (plain declarative / imperative)
|
|
71
|
-
("haera", r"(?:[가-힣]는다|한다|[가-힣]다|하라|마라|보라|들라|[가-힣]아라|[가-힣]어라|[가-힣]라)"),
|
|
72
|
-
# 해체 (casual / 반말)
|
|
73
|
-
("hae", r"(?:해|야|아|어|네|군|지)"),
|
|
74
|
-
]
|
|
75
|
-
|
|
76
|
-
_SENTENCE_SPLIT = re.compile(r"[.!?。]+\s+|\n+")
|
|
77
|
-
_TRAILING_PUNCT = re.compile(r"[\s.,!?;:。、]+$")
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def _strip_markdown_noise(text: str) -> str:
|
|
81
|
-
"""Drop fenced code blocks, JSX tags, image lines, and href payloads.
|
|
82
|
-
|
|
83
|
-
Composite metrics are about Korean prose. MDX fences and JSX scaffolding
|
|
84
|
-
would otherwise inflate the token count and skew Edit Conservativeness.
|
|
85
|
-
"""
|
|
86
|
-
text = re.sub(r"```[\s\S]*?```", "", text)
|
|
87
|
-
text = re.sub(r"<[A-Z][\w]*\b[^>]*?/?>", "", text)
|
|
88
|
-
text = re.sub(r"</[A-Z][\w]*>", "", text)
|
|
89
|
-
text = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", text)
|
|
90
|
-
text = re.sub(r"\[([^\]]*)\]\([^)]*\)", r"\1", text)
|
|
91
|
-
text = re.sub(r"\A---\n[\s\S]*?\n---\n", "", text)
|
|
92
|
-
return text
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def _split_sentences(text: str) -> list[str]:
|
|
96
|
-
cleaned = _strip_markdown_noise(text)
|
|
97
|
-
parts = _SENTENCE_SPLIT.split(cleaned)
|
|
98
|
-
sentences: list[str] = []
|
|
99
|
-
for part in parts:
|
|
100
|
-
for line in part.splitlines():
|
|
101
|
-
line = line.strip()
|
|
102
|
-
if not line:
|
|
103
|
-
continue
|
|
104
|
-
line = re.sub(r"^\s*([>#\-*]+\s*)+", "", line)
|
|
105
|
-
line = re.sub(r"^\*\*[^*]+\*\*[\s:—-]*", "", line)
|
|
106
|
-
line = line.strip()
|
|
107
|
-
if line:
|
|
108
|
-
sentences.append(line)
|
|
109
|
-
return sentences
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def ending_distribution(text: str) -> Counter[str]:
|
|
113
|
-
dist: Counter[str] = Counter()
|
|
114
|
-
for sentence in _split_sentences(text):
|
|
115
|
-
tail = _TRAILING_PUNCT.sub("", sentence)
|
|
116
|
-
if not tail:
|
|
117
|
-
continue
|
|
118
|
-
bucket = "other"
|
|
119
|
-
for name, pattern in _ENDING_PATTERNS:
|
|
120
|
-
if re.search(pattern + r"$", tail):
|
|
121
|
-
bucket = name
|
|
122
|
-
break
|
|
123
|
-
dist[bucket] += 1
|
|
124
|
-
return dist
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def cosine_similarity(a: Counter[str], b: Counter[str]) -> float:
|
|
128
|
-
keys = set(a) | set(b)
|
|
129
|
-
if not keys:
|
|
130
|
-
return 0.0
|
|
131
|
-
dot = sum(a[k] * b[k] for k in keys)
|
|
132
|
-
norm_a = math.sqrt(sum(v * v for v in a.values()))
|
|
133
|
-
norm_b = math.sqrt(sum(v * v for v in b.values()))
|
|
134
|
-
if norm_a == 0 or norm_b == 0:
|
|
135
|
-
return 0.0
|
|
136
|
-
return dot / (norm_a * norm_b)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def register_stability(baseline: str, candidate: str) -> float:
|
|
140
|
-
"""RSS: cosine similarity of register distributions, scaled to 0-100."""
|
|
141
|
-
return cosine_similarity(ending_distribution(baseline), ending_distribution(candidate)) * 100.0
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
def edit_conservativeness(baseline: str, candidate: str) -> float:
|
|
145
|
-
"""EditCons: SequenceMatcher ratio on whitespace tokens (0-100)."""
|
|
146
|
-
base_tokens = _strip_markdown_noise(baseline).split()
|
|
147
|
-
cand_tokens = _strip_markdown_noise(candidate).split()
|
|
148
|
-
if not base_tokens and not cand_tokens:
|
|
149
|
-
return 100.0
|
|
150
|
-
if not base_tokens or not cand_tokens:
|
|
151
|
-
return 0.0
|
|
152
|
-
matcher = difflib.SequenceMatcher(None, base_tokens, cand_tokens, autojunk=False)
|
|
153
|
-
return matcher.ratio() * 100.0
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
# ---------------------------------------------------------------------------
|
|
157
|
-
# Composite scoring + run-dir IO
|
|
158
|
-
# ---------------------------------------------------------------------------
|
|
159
|
-
|
|
160
|
-
DEFAULT_WEIGHTS = {
|
|
161
|
-
"ai": 0.353,
|
|
162
|
-
"mps": 0.235,
|
|
163
|
-
"rss": 0.235,
|
|
164
|
-
"edit_cons": 0.176,
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
CANDIDATE_MODELS = ("claude", "gemini", "codex")
|
|
168
|
-
RUN_FRONTMATTER = re.compile(r"\A---\n([\s\S]*?)\n---\n", re.MULTILINE)
|
|
169
|
-
NUMBER_RANGE = re.compile(r"(\d+(?:\.\d+)?)\s*[-–~]\s*(\d+(?:\.\d+)?)")
|
|
170
|
-
SINGLE_NUMBER = re.compile(r"(\d+(?:\.\d+)?)")
|
|
171
|
-
NON_NUMERIC_PLACEHOLDERS = {"n/a", "na", "none", "—", "-", "pending", "tbd", "unknown"}
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
@dataclass
|
|
175
|
-
class Candidate:
|
|
176
|
-
model: str
|
|
177
|
-
text: str
|
|
178
|
-
ai_score: Optional[float] = None
|
|
179
|
-
mps: Optional[float] = None
|
|
180
|
-
rss: Optional[float] = None
|
|
181
|
-
edit_cons: Optional[float] = None
|
|
182
|
-
composite: Optional[float] = None
|
|
183
|
-
status: str = "unknown"
|
|
184
|
-
notes: list[str] = field(default_factory=list)
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
def parse_metric(raw: Optional[str]) -> Optional[float]:
|
|
188
|
-
"""Coerce metric strings from meta.md into floats.
|
|
189
|
-
|
|
190
|
-
`0-2 (within noise floor)` -> 1.0 (midpoint)
|
|
191
|
-
`92 (all anchors preserved)` -> 92.0
|
|
192
|
-
`n/a` / `pending` / `—` -> None
|
|
193
|
-
"""
|
|
194
|
-
if raw is None:
|
|
195
|
-
return None
|
|
196
|
-
raw = str(raw).strip().strip('"').strip("'")
|
|
197
|
-
if not raw or raw.lower() in NON_NUMERIC_PLACEHOLDERS:
|
|
198
|
-
return None
|
|
199
|
-
range_match = NUMBER_RANGE.search(raw)
|
|
200
|
-
if range_match:
|
|
201
|
-
return (float(range_match.group(1)) + float(range_match.group(2))) / 2.0
|
|
202
|
-
single_match = SINGLE_NUMBER.search(raw)
|
|
203
|
-
if single_match:
|
|
204
|
-
return float(single_match.group(1))
|
|
205
|
-
return None
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
def parse_meta_candidates(meta_text: str) -> dict[str, dict[str, str]]:
|
|
209
|
-
"""Pull per-candidate score lines from meta.md without a YAML library."""
|
|
210
|
-
info: dict[str, dict[str, str]] = {}
|
|
211
|
-
in_candidates = False
|
|
212
|
-
current: Optional[dict[str, str]] = None
|
|
213
|
-
for raw_line in meta_text.splitlines():
|
|
214
|
-
line = raw_line.rstrip()
|
|
215
|
-
if not line.startswith(" ") and line.endswith(":"):
|
|
216
|
-
in_candidates = line.strip() == "candidates:"
|
|
217
|
-
current = None
|
|
218
|
-
continue
|
|
219
|
-
if not in_candidates:
|
|
220
|
-
continue
|
|
221
|
-
stripped = line.lstrip()
|
|
222
|
-
if stripped.startswith("- model:"):
|
|
223
|
-
model = stripped.split(":", 1)[1].strip()
|
|
224
|
-
current = {"model": model}
|
|
225
|
-
info[model] = current
|
|
226
|
-
continue
|
|
227
|
-
if current is None:
|
|
228
|
-
continue
|
|
229
|
-
if ":" not in stripped:
|
|
230
|
-
continue
|
|
231
|
-
key, value = stripped.split(":", 1)
|
|
232
|
-
key = key.strip()
|
|
233
|
-
value = value.strip()
|
|
234
|
-
if value == "|":
|
|
235
|
-
value = "<multiline>"
|
|
236
|
-
if key in {"ai_score", "ai_score_instructional", "ai_score_technical", "mps", "status", "wall_time_seconds"}:
|
|
237
|
-
current[key] = value
|
|
238
|
-
return info
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def read_candidate_text(path: Path) -> str:
|
|
242
|
-
if not path.exists():
|
|
243
|
-
return ""
|
|
244
|
-
text = path.read_text(encoding="utf-8")
|
|
245
|
-
return RUN_FRONTMATTER.sub("", text, count=1)
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
def normalise_weights(weights: dict[str, float]) -> dict[str, float]:
|
|
249
|
-
total = sum(weights.values())
|
|
250
|
-
if total <= 0:
|
|
251
|
-
raise ValueError("weights must sum to a positive number")
|
|
252
|
-
return {k: v / total for k, v in weights.items()}
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
def parse_weight_overrides(spec: str) -> dict[str, float]:
|
|
256
|
-
overrides: dict[str, float] = {}
|
|
257
|
-
for chunk in spec.split(","):
|
|
258
|
-
chunk = chunk.strip()
|
|
259
|
-
if not chunk:
|
|
260
|
-
continue
|
|
261
|
-
if "=" not in chunk:
|
|
262
|
-
raise ValueError(f"invalid weight override `{chunk}` (expected key=value)")
|
|
263
|
-
key, value = chunk.split("=", 1)
|
|
264
|
-
key = key.strip().lower()
|
|
265
|
-
if key not in DEFAULT_WEIGHTS:
|
|
266
|
-
raise ValueError(f"unknown weight key `{key}`; valid: {sorted(DEFAULT_WEIGHTS)}")
|
|
267
|
-
try:
|
|
268
|
-
overrides[key] = float(value)
|
|
269
|
-
except ValueError as exc:
|
|
270
|
-
raise ValueError(f"weight `{key}` not a number: {value}") from exc
|
|
271
|
-
return overrides
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
def load_yaml_weights(yaml_path: Path) -> dict[str, float]:
|
|
275
|
-
"""Pick `composite-weights:` out of the patina config without PyYAML."""
|
|
276
|
-
if not yaml_path.exists():
|
|
277
|
-
return {}
|
|
278
|
-
weights: dict[str, float] = {}
|
|
279
|
-
in_block = False
|
|
280
|
-
for raw_line in yaml_path.read_text(encoding="utf-8").splitlines():
|
|
281
|
-
if raw_line.startswith("composite-weights:"):
|
|
282
|
-
in_block = True
|
|
283
|
-
continue
|
|
284
|
-
if in_block:
|
|
285
|
-
if not raw_line.startswith(" "):
|
|
286
|
-
break
|
|
287
|
-
stripped = raw_line.strip()
|
|
288
|
-
if not stripped or stripped.startswith("#"):
|
|
289
|
-
continue
|
|
290
|
-
if ":" not in stripped:
|
|
291
|
-
break
|
|
292
|
-
key, value = stripped.split(":", 1)
|
|
293
|
-
key = key.strip().lower()
|
|
294
|
-
try:
|
|
295
|
-
weights[key] = float(value.strip().split("#", 1)[0].strip())
|
|
296
|
-
except ValueError:
|
|
297
|
-
continue
|
|
298
|
-
return {k: v for k, v in weights.items() if k in DEFAULT_WEIGHTS}
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
def resolve_weights(yaml_path: Path, cli_override: Optional[str]) -> dict[str, float]:
|
|
302
|
-
weights = dict(DEFAULT_WEIGHTS)
|
|
303
|
-
weights.update(load_yaml_weights(yaml_path))
|
|
304
|
-
if cli_override:
|
|
305
|
-
weights.update(parse_weight_overrides(cli_override))
|
|
306
|
-
return normalise_weights(weights)
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
def composite_score(candidate: Candidate, weights: dict[str, float]) -> Optional[float]:
|
|
310
|
-
if candidate.status != "success":
|
|
311
|
-
return None
|
|
312
|
-
if any(v is None for v in (candidate.ai_score, candidate.mps, candidate.rss, candidate.edit_cons)):
|
|
313
|
-
return None
|
|
314
|
-
return (
|
|
315
|
-
(100.0 - candidate.ai_score) * weights["ai"]
|
|
316
|
-
+ candidate.mps * weights["mps"]
|
|
317
|
-
+ candidate.rss * weights["rss"]
|
|
318
|
-
+ candidate.edit_cons * weights["edit_cons"]
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
def render_composite_md(
|
|
323
|
-
run_dir: Path,
|
|
324
|
-
weights: dict[str, float],
|
|
325
|
-
candidates: list[Candidate],
|
|
326
|
-
winner: Optional[Candidate],
|
|
327
|
-
) -> str:
|
|
328
|
-
lines: list[str] = []
|
|
329
|
-
lines.append(f"# patina-composite scores for `{run_dir.name}`")
|
|
330
|
-
lines.append("")
|
|
331
|
-
lines.append("Generated by `patina-max/composite.py` — deterministic 4-axis reselection.")
|
|
332
|
-
lines.append("")
|
|
333
|
-
lines.append("## Weights")
|
|
334
|
-
lines.append("")
|
|
335
|
-
lines.append("| Axis | Weight |")
|
|
336
|
-
lines.append("|------|-------:|")
|
|
337
|
-
for key in ("ai", "mps", "rss", "edit_cons"):
|
|
338
|
-
lines.append(f"| {key} | {weights[key]:.4f} |")
|
|
339
|
-
lines.append("")
|
|
340
|
-
lines.append("## Candidate scores")
|
|
341
|
-
lines.append("")
|
|
342
|
-
lines.append("| Model | Status | AI | MPS | RSS | EditCons | Composite |")
|
|
343
|
-
lines.append("|-------|--------|---:|----:|----:|--------:|----------:|")
|
|
344
|
-
for cand in candidates:
|
|
345
|
-
ai = "—" if cand.ai_score is None else f"{cand.ai_score:.1f}"
|
|
346
|
-
mps = "—" if cand.mps is None else f"{cand.mps:.1f}"
|
|
347
|
-
rss = "—" if cand.rss is None else f"{cand.rss:.1f}"
|
|
348
|
-
edit = "—" if cand.edit_cons is None else f"{cand.edit_cons:.1f}"
|
|
349
|
-
comp = "—" if cand.composite is None else f"{cand.composite:.2f}"
|
|
350
|
-
lines.append(f"| {cand.model} | {cand.status} | {ai} | {mps} | {rss} | {edit} | {comp} |")
|
|
351
|
-
lines.append("")
|
|
352
|
-
if winner:
|
|
353
|
-
lines.append(f"**Winner:** `{winner.model}` — composite {winner.composite:.2f}")
|
|
354
|
-
else:
|
|
355
|
-
lines.append("**Winner:** none (no candidate scored successfully)")
|
|
356
|
-
lines.append("")
|
|
357
|
-
if any(cand.notes for cand in candidates):
|
|
358
|
-
lines.append("")
|
|
359
|
-
lines.append("## Notes")
|
|
360
|
-
for cand in candidates:
|
|
361
|
-
for note in cand.notes:
|
|
362
|
-
lines.append(f"- **{cand.model}**: {note}")
|
|
363
|
-
return "\n".join(lines) + "\n"
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
def main(argv: Optional[list[str]] = None) -> int:
|
|
367
|
-
parser = argparse.ArgumentParser(description="patina-max composite winner reselection.")
|
|
368
|
-
parser.add_argument("run_dir", type=Path, help="path to a patina-max run dir")
|
|
369
|
-
parser.add_argument(
|
|
370
|
-
"--weights",
|
|
371
|
-
type=str,
|
|
372
|
-
default=None,
|
|
373
|
-
help="override weights, comma-separated (e.g. ai=0.4,rss=0.3)",
|
|
374
|
-
)
|
|
375
|
-
parser.add_argument(
|
|
376
|
-
"--config",
|
|
377
|
-
type=Path,
|
|
378
|
-
default=Path(__file__).resolve().parents[1] / ".patina.default.yaml",
|
|
379
|
-
help="patina config to read composite-weights from",
|
|
380
|
-
)
|
|
381
|
-
args = parser.parse_args(argv)
|
|
382
|
-
|
|
383
|
-
run_dir: Path = args.run_dir.resolve()
|
|
384
|
-
if not run_dir.is_dir():
|
|
385
|
-
print(f"error: not a directory: {run_dir}", file=sys.stderr)
|
|
386
|
-
return 2
|
|
387
|
-
|
|
388
|
-
input_path = run_dir / "input.md"
|
|
389
|
-
if not input_path.exists():
|
|
390
|
-
print(f"error: missing required file: {input_path}", file=sys.stderr)
|
|
391
|
-
return 2
|
|
392
|
-
baseline = read_candidate_text(input_path)
|
|
393
|
-
|
|
394
|
-
meta_path = run_dir / "meta.md"
|
|
395
|
-
meta_info: dict[str, dict[str, str]] = {}
|
|
396
|
-
if meta_path.exists():
|
|
397
|
-
meta_info = parse_meta_candidates(meta_path.read_text(encoding="utf-8"))
|
|
398
|
-
else:
|
|
399
|
-
print(f"warning: missing meta.md at {meta_path}; AI/MPS will be marked unknown", file=sys.stderr)
|
|
400
|
-
|
|
401
|
-
weights = resolve_weights(args.config, args.weights)
|
|
402
|
-
|
|
403
|
-
candidates: list[Candidate] = []
|
|
404
|
-
for model in CANDIDATE_MODELS:
|
|
405
|
-
path = run_dir / f"{model}.md"
|
|
406
|
-
if not path.exists():
|
|
407
|
-
candidates.append(Candidate(model=model, text="", status="missing"))
|
|
408
|
-
continue
|
|
409
|
-
text = read_candidate_text(path)
|
|
410
|
-
info = meta_info.get(model, {})
|
|
411
|
-
cand = Candidate(
|
|
412
|
-
model=model,
|
|
413
|
-
text=text,
|
|
414
|
-
status=info.get("status", "unknown"),
|
|
415
|
-
ai_score=parse_metric(info.get("ai_score") or info.get("ai_score_instructional") or info.get("ai_score_technical")),
|
|
416
|
-
mps=parse_metric(info.get("mps")),
|
|
417
|
-
)
|
|
418
|
-
if cand.status == "success" and cand.text.strip():
|
|
419
|
-
cand.rss = register_stability(baseline, cand.text)
|
|
420
|
-
cand.edit_cons = edit_conservativeness(baseline, cand.text)
|
|
421
|
-
else:
|
|
422
|
-
cand.notes.append("skipping deterministic metrics (status not success or empty text)")
|
|
423
|
-
cand.composite = composite_score(cand, weights)
|
|
424
|
-
if cand.status == "success" and cand.composite is None:
|
|
425
|
-
cand.notes.append("composite undefined — at least one of AI/MPS could not be parsed from meta.md")
|
|
426
|
-
candidates.append(cand)
|
|
427
|
-
|
|
428
|
-
scored = [c for c in candidates if c.composite is not None]
|
|
429
|
-
winner: Optional[Candidate] = max(scored, key=lambda c: c.composite) if scored else None
|
|
430
|
-
|
|
431
|
-
composite_path = run_dir / "composite.md"
|
|
432
|
-
composite_path.write_text(
|
|
433
|
-
render_composite_md(run_dir, weights, candidates, winner),
|
|
434
|
-
encoding="utf-8",
|
|
435
|
-
)
|
|
436
|
-
|
|
437
|
-
winner_path = run_dir / "winner.md"
|
|
438
|
-
if winner is not None:
|
|
439
|
-
winner_path.write_text(
|
|
440
|
-
f"---\nwinner_model: {winner.model}\ncomposite_score: {winner.composite:.2f}\n---\n\n{winner.text.lstrip()}",
|
|
441
|
-
encoding="utf-8",
|
|
442
|
-
)
|
|
443
|
-
else:
|
|
444
|
-
winner_path.write_text("# winner.md\n\nNo candidate scored successfully.\n", encoding="utf-8")
|
|
445
|
-
|
|
446
|
-
cwd = Path.cwd()
|
|
447
|
-
print(f"wrote {composite_path.relative_to(cwd) if composite_path.is_relative_to(cwd) else composite_path}")
|
|
448
|
-
print(f"wrote {winner_path.relative_to(cwd) if winner_path.is_relative_to(cwd) else winner_path}")
|
|
449
|
-
if winner:
|
|
450
|
-
print(f"winner: {winner.model} (composite {winner.composite:.2f})")
|
|
451
|
-
else:
|
|
452
|
-
print("winner: none")
|
|
453
|
-
return 0
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
if __name__ == "__main__":
|
|
457
|
-
sys.exit(main())
|
package/src/cache.js
DELETED
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
import { createHash } from 'node:crypto';
|
|
2
|
-
import { mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
|
|
3
|
-
import { resolve } from 'node:path';
|
|
4
|
-
|
|
5
|
-
export const CACHE_SCHEMA_VERSION = 1;
|
|
6
|
-
export const DEFAULT_CACHE_TTL_SECONDS = 24 * 60 * 60;
|
|
7
|
-
|
|
8
|
-
export function createResponseCache({
|
|
9
|
-
dir,
|
|
10
|
-
ttlSeconds = DEFAULT_CACHE_TTL_SECONDS,
|
|
11
|
-
now = () => Date.now(),
|
|
12
|
-
} = {}) {
|
|
13
|
-
if (!dir) return null;
|
|
14
|
-
const stats = {
|
|
15
|
-
hits: 0,
|
|
16
|
-
misses: 0,
|
|
17
|
-
writes: 0,
|
|
18
|
-
expired: 0,
|
|
19
|
-
errors: 0,
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
return {
|
|
23
|
-
dir,
|
|
24
|
-
ttlSeconds,
|
|
25
|
-
stats,
|
|
26
|
-
get(args) {
|
|
27
|
-
const key = responseCacheKey(args);
|
|
28
|
-
const path = responseCachePath(dir, key);
|
|
29
|
-
try {
|
|
30
|
-
const entry = JSON.parse(readFileSync(path, 'utf8'));
|
|
31
|
-
const expiresAt = Date.parse(entry.expiresAt || '');
|
|
32
|
-
if (Number.isFinite(expiresAt) && expiresAt <= now()) {
|
|
33
|
-
stats.misses++;
|
|
34
|
-
stats.expired++;
|
|
35
|
-
return null;
|
|
36
|
-
}
|
|
37
|
-
if (typeof entry.response !== 'string') {
|
|
38
|
-
stats.misses++;
|
|
39
|
-
return null;
|
|
40
|
-
}
|
|
41
|
-
stats.hits++;
|
|
42
|
-
return {
|
|
43
|
-
...entry,
|
|
44
|
-
key,
|
|
45
|
-
path,
|
|
46
|
-
content: entry.response,
|
|
47
|
-
};
|
|
48
|
-
} catch (err) {
|
|
49
|
-
if (err?.code !== 'ENOENT') stats.errors++;
|
|
50
|
-
stats.misses++;
|
|
51
|
-
return null;
|
|
52
|
-
}
|
|
53
|
-
},
|
|
54
|
-
set(args, response, metadata = {}) {
|
|
55
|
-
const key = responseCacheKey(args);
|
|
56
|
-
const path = responseCachePath(dir, key);
|
|
57
|
-
const createdAt = new Date(now()).toISOString();
|
|
58
|
-
const expiresAt = new Date(now() + ttlSeconds * 1000).toISOString();
|
|
59
|
-
const entry = {
|
|
60
|
-
cacheVersion: CACHE_SCHEMA_VERSION,
|
|
61
|
-
key,
|
|
62
|
-
createdAt,
|
|
63
|
-
expiresAt,
|
|
64
|
-
baseURLHost: baseURLHost(args.baseURL),
|
|
65
|
-
model: args.model ?? null,
|
|
66
|
-
temperature: args.temperature ?? null,
|
|
67
|
-
response,
|
|
68
|
-
usage: metadata.usage ?? null,
|
|
69
|
-
responseModel: metadata.model ?? null,
|
|
70
|
-
};
|
|
71
|
-
|
|
72
|
-
try {
|
|
73
|
-
mkdirSync(dir, { recursive: true });
|
|
74
|
-
const tmp = `${path}.${process.pid}.${Date.now()}.tmp`;
|
|
75
|
-
writeFileSync(tmp, JSON.stringify(entry, null, 2) + '\n');
|
|
76
|
-
renameSync(tmp, path);
|
|
77
|
-
stats.writes++;
|
|
78
|
-
} catch {
|
|
79
|
-
stats.errors++;
|
|
80
|
-
}
|
|
81
|
-
return { key, path };
|
|
82
|
-
},
|
|
83
|
-
};
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
export function responseCacheKey({ prompt, model, temperature, baseURL } = {}) {
|
|
87
|
-
const input = [
|
|
88
|
-
String(prompt ?? ''),
|
|
89
|
-
String(model ?? ''),
|
|
90
|
-
String(temperature ?? ''),
|
|
91
|
-
baseURLHost(baseURL),
|
|
92
|
-
].join('\0');
|
|
93
|
-
return `sha256:${createHash('sha256').update(input).digest('hex')}`;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
export function responseCachePath(dir, key) {
|
|
97
|
-
return resolve(dir, `${String(key).replace(/^sha256:/, '')}.json`);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
export function baseURLHost(baseURL) {
|
|
101
|
-
try {
|
|
102
|
-
return new URL(baseURL || 'https://api.openai.com/v1').host;
|
|
103
|
-
} catch {
|
|
104
|
-
return String(baseURL || '');
|
|
105
|
-
}
|
|
106
|
-
}
|