patina-cli 3.11.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/.patina.default.yaml +29 -29
  2. package/CHANGELOG.md +53 -0
  3. package/NOTICE +21 -0
  4. package/README.md +117 -224
  5. package/README_JA.md +134 -77
  6. package/README_KR.md +132 -74
  7. package/README_ZH.md +137 -80
  8. package/SKILL.md +11 -20
  9. package/artifacts/rebaseline-2025/README.md +147 -0
  10. package/artifacts/rebaseline-2025/human-controls.public.jsonl +250 -0
  11. package/artifacts/rebaseline-2025/intake.example.jsonl +2 -0
  12. package/artifacts/rebaseline-2025/intake.local.example.jsonl +25 -0
  13. package/artifacts/rebaseline-2025/prompts.template.jsonl +7 -0
  14. package/artifacts/rebaseline-2025/sources.ko-public.jsonl +39 -0
  15. package/assets/brand/patina-badge.svg +18 -0
  16. package/assets/brand/patina-mark.svg +8 -0
  17. package/assets/demo/README.md +79 -0
  18. package/core/scoring.md +12 -12
  19. package/core/standalone-prompt.md +3 -1
  20. package/core/stylometry.md +93 -22
  21. package/docs/API.md +1554 -0
  22. package/docs/AUTHENTICATION.md +50 -26
  23. package/docs/AUTHENTICATION_KR.md +54 -29
  24. package/docs/BRANDING.md +9 -8
  25. package/docs/CLI.md +55 -14
  26. package/docs/COOKBOOK.md +8 -21
  27. package/docs/DEMO.md +32 -5
  28. package/docs/EXIT-CODES.md +2 -3
  29. package/docs/FALSE-POSITIVES.md +63 -0
  30. package/docs/FAQ.md +9 -1
  31. package/docs/FAQ_KR.md +3 -1
  32. package/docs/FLAG-PARITY.md +33 -47
  33. package/docs/ISSUE-WAVES.md +57 -0
  34. package/docs/PATTERNS-EN.md +67 -3
  35. package/docs/PATTERNS-JA.md +68 -2
  36. package/docs/PATTERNS-KO.md +70 -7
  37. package/docs/PATTERNS-ZH.md +67 -3
  38. package/docs/PATTERNS.md +5 -5
  39. package/docs/RESEARCH-DOCS-PLATFORM.md +54 -0
  40. package/docs/ROADMAP.md +46 -66
  41. package/docs/TRANSLATIONESE-KO.md +51 -0
  42. package/docs/audits/2026-05-deep-research.md +3 -1
  43. package/docs/benchmarks/README.md +51 -0
  44. package/docs/benchmarks/detector-comparison.json +69 -9
  45. package/docs/benchmarks/detector-comparison.md +10 -5
  46. package/docs/benchmarks/katfish-ko-latest.json +657 -0
  47. package/docs/benchmarks/katfish-ko-latest.md +77 -0
  48. package/docs/benchmarks/latest.json +1183 -108
  49. package/docs/benchmarks/latest.md +84 -60
  50. package/docs/benchmarks/lexicon-freshness-en-2026-05-22.json +1121 -0
  51. package/docs/benchmarks/lexicon-freshness-en-2026-05-22.md +136 -0
  52. package/docs/benchmarks/rebaseline-latest.json +381 -0
  53. package/docs/benchmarks/rebaseline-latest.md +121 -0
  54. package/docs/benchmarks/register-stratified-latest.json +164 -0
  55. package/docs/benchmarks/register-stratified-latest.md +99 -0
  56. package/docs/benchmarks/register-stratified.md +43 -0
  57. package/docs/integrations/github-action.md +44 -11
  58. package/docs/integrations/playground.md +58 -0
  59. package/docs/integrations/pre-commit.md +5 -5
  60. package/docs/integrations/release.md +5 -3
  61. package/docs/integrations/static-sites.md +83 -0
  62. package/docs/research/2025-rebaseline-plan.md +71 -2
  63. package/docs/research/2026-rebaseline.md +102 -0
  64. package/docs/research/adversarial-mps.md +41 -0
  65. package/docs/research/ai-human-metrics.md +35 -23
  66. package/docs/research/human-eval-panel.md +42 -0
  67. package/docs/research/judge-agreement.md +24 -0
  68. package/docs/research/ko-2025-corpus-sources.md +135 -0
  69. package/docs/research/lexicon-freshness-audit.md +64 -0
  70. package/docs/research/zh-ja-lexicon-calibration.md +60 -0
  71. package/docs/social/patina-launch-copy.md +173 -100
  72. package/docs/social/patina-launch-execution.md +94 -0
  73. package/docs/social/patina-launch-korean-first.md +83 -0
  74. package/docs/social/signs-of-ai-writing.md +26 -0
  75. package/docs/social/signs-of-ai-writing_KR.md +26 -0
  76. package/lexicon/ai-en.md +21 -24
  77. package/lexicon/ai-ja.md +158 -0
  78. package/lexicon/ai-ko.md +9 -9
  79. package/lexicon/ai-zh.md +158 -0
  80. package/lexicon/provenance/ai-en.json +970 -0
  81. package/lexicon/provenance/ai-ja.json +542 -0
  82. package/lexicon/provenance/ai-ko.json +866 -0
  83. package/lexicon/provenance/ai-zh.json +542 -0
  84. package/package.json +49 -8
  85. package/patterns/en-communication.md +5 -0
  86. package/patterns/en-content.md +5 -0
  87. package/patterns/en-filler.md +5 -0
  88. package/patterns/en-language.md +29 -1
  89. package/patterns/en-structure.md +5 -0
  90. package/patterns/en-style.md +5 -0
  91. package/patterns/en-viral-hook.md +42 -2
  92. package/patterns/ja-communication.md +5 -0
  93. package/patterns/ja-content.md +5 -0
  94. package/patterns/ja-filler.md +5 -0
  95. package/patterns/ja-language.md +33 -1
  96. package/patterns/ja-structure.md +12 -0
  97. package/patterns/ja-style.md +5 -0
  98. package/patterns/ja-viral-hook.md +41 -2
  99. package/patterns/ko-communication.md +5 -0
  100. package/patterns/ko-content.md +5 -0
  101. package/patterns/ko-filler.md +5 -0
  102. package/patterns/ko-language.md +33 -1
  103. package/patterns/ko-structure.md +25 -6
  104. package/patterns/ko-style.md +5 -0
  105. package/patterns/ko-viral-hook.md +38 -2
  106. package/patterns/zh-communication.md +5 -0
  107. package/patterns/zh-content.md +5 -0
  108. package/patterns/zh-filler.md +5 -0
  109. package/patterns/zh-language.md +37 -1
  110. package/patterns/zh-structure.md +12 -0
  111. package/patterns/zh-style.md +5 -0
  112. package/patterns/zh-viral-hook.md +38 -2
  113. package/playground/README.md +55 -0
  114. package/playground/analytics.js +4 -0
  115. package/playground/analyzer.js +883 -0
  116. package/playground/app.js +157 -0
  117. package/playground/data/lexicons.js +343 -0
  118. package/playground/index.html +138 -0
  119. package/playground/styles.css +267 -0
  120. package/profiles/namuwiki.md +111 -0
  121. package/scripts/adversarial-mps-report.mjs +201 -0
  122. package/scripts/badge-json.mjs +79 -0
  123. package/scripts/benchmark-report.mjs +56 -9
  124. package/scripts/check-release-metadata.mjs +0 -2
  125. package/scripts/detector-comparison.mjs +7 -7
  126. package/scripts/generate-playground-data.mjs +77 -0
  127. package/scripts/katfish-calibration.mjs +464 -0
  128. package/scripts/lexicon-freshness.mjs +485 -0
  129. package/scripts/lint.mjs +1 -1
  130. package/scripts/precommit-score.mjs +4 -3
  131. package/scripts/prose-score.mjs +81 -5
  132. package/scripts/rebaseline-intake.mjs +242 -0
  133. package/scripts/rebaseline-score.mjs +268 -0
  134. package/scripts/rebaseline-summary.mjs +773 -0
  135. package/scripts/rebaseline-web-collect.mjs +410 -0
  136. package/scripts/update-benchmark-ranges.mjs +1 -0
  137. package/src/api.js +69 -105
  138. package/src/auth.js +50 -2
  139. package/src/backends/claude-cli.js +19 -4
  140. package/src/backends/codex-cli.js +19 -3
  141. package/src/backends/contract.js +230 -1
  142. package/src/backends/gemini-cli.js +18 -5
  143. package/src/backends/index.js +87 -12
  144. package/src/backends/kimi-cli.js +161 -0
  145. package/src/cli.js +577 -567
  146. package/src/commands/doctor.js +2 -2
  147. package/src/config.js +29 -0
  148. package/src/errors.js +53 -1
  149. package/src/features/discourse-tells.js +68 -0
  150. package/src/features/index.js +82 -8
  151. package/src/features/lexicon.js +40 -6
  152. package/src/features/markup-leakage.js +69 -0
  153. package/src/features/segment.js +41 -0
  154. package/src/features/signal-strength.js +81 -0
  155. package/src/features/stylometry.js +231 -1
  156. package/src/features/translationese.js +127 -0
  157. package/src/loader.js +76 -0
  158. package/src/logger.js +22 -23
  159. package/src/model-defaults.js +55 -0
  160. package/src/ouroboros.js +31 -0
  161. package/src/output.js +102 -90
  162. package/src/prompt-builder.js +103 -68
  163. package/src/providers.js +51 -4
  164. package/src/scoring.js +210 -2
  165. package/src/security.js +75 -0
  166. package/tests/fixtures/live-quality/en/public-docs-01.md +26 -0
  167. package/tests/fixtures/live-quality/ko/public-docs-01.md +26 -0
  168. package/tests/fixtures/suspect-zones/expected-ranges.json +207 -16
  169. package/tests/fixtures/suspect-zones/ja/ai/ja-ai-04-lexicon.md +11 -0
  170. package/tests/fixtures/suspect-zones/ja/natural/ja-nat-04-lexicon-cold.md +11 -0
  171. package/tests/fixtures/suspect-zones/ko/ai/ko-ai-02.md +4 -5
  172. package/tests/fixtures/suspect-zones/ko/ai/ko-ai-07-ko-diagnostic.md +11 -0
  173. package/tests/fixtures/suspect-zones/zh/ai/zh-ai-04-lexicon.md +11 -0
  174. package/tests/fixtures/suspect-zones/zh/natural/zh-nat-04-lexicon-cold.md +11 -0
  175. package/tests/quality/README.md +188 -11
  176. package/tests/quality/adversarial-mps/fixtures.jsonl +10 -0
  177. package/tests/quality/benchmark.mjs +39 -1
  178. package/tests/quality/dogfood.mjs +5 -3
  179. package/tests/quality/live-fixtures.jsonl +2 -0
  180. package/tests/quality/live-quality.mjs +596 -0
  181. package/tests/quality/ranking-metrics.mjs +136 -0
  182. package/tests/quality/rebaseline-manifest.example.jsonl +5 -0
  183. package/vercel.json +53 -0
  184. package/SKILL-MAX.md +0 -455
  185. package/docs/internal/HARNESS.md +0 -14
  186. package/docs/internal/README.md +0 -14
  187. package/docs/internal/WARP.md +0 -23
  188. package/patina-max/SKILL.md +0 -523
  189. package/patina-max/composite.py +0 -457
  190. package/src/cache.js +0 -106
  191. package/src/commands/init.js +0 -208
  192. package/src/manifest.js +0 -162
  193. package/src/max-mode.js +0 -207
@@ -1,457 +0,0 @@
1
- #!/usr/bin/env python3
2
- """patina-max composite: deterministic 4-axis winner reselection over an
3
- existing patina-max run directory.
4
-
5
- The default patina-max winner picker only sees AI-likeness and MPS, so it
6
- goes noise-bound when a baseline is already humanized. This script adds two
7
- Korean-aware deterministic metrics — Register Stability Score (RSS) and
8
- Edit Conservativeness (EditCons) — and reselects the winner.
9
-
10
- Usage
11
- -----
12
- python3 patina-max/composite.py <run_dir> [--weights ...]
13
-
14
- Layout consumed
15
- ---------------
16
- <run_dir>/
17
- input.md baseline source MDX (required)
18
- claude.md candidate (optional; absent → "missing")
19
- gemini.md candidate (optional)
20
- codex.md candidate (optional; may be a failure note)
21
- meta.md YAML; per-candidate ai_score / mps / status (recommended)
22
-
23
- Layout produced
24
- ---------------
25
- <run_dir>/
26
- composite.md per-candidate metric table + weighted totals
27
- winner.md winning candidate's text (or a none-found notice)
28
-
29
- Default weights (renormalised after dropping the LLM-Judge slot):
30
-
31
- AI=0.353 MPS=0.235 RSS=0.235 EditCons=0.176
32
-
33
- Override via .patina.default.yaml:
34
-
35
- composite-weights:
36
- ai: 0.353
37
- mps: 0.235
38
- rss: 0.235
39
- edit_cons: 0.176
40
-
41
- Or inline:
42
-
43
- python3 patina-max/composite.py <run_dir> --weights ai=0.4,rss=0.3
44
- """
45
-
46
- from __future__ import annotations
47
-
48
- import argparse
49
- import difflib
50
- import math
51
- import re
52
- import sys
53
- from collections import Counter
54
- from dataclasses import dataclass, field
55
- from pathlib import Path
56
- from typing import Optional
57
-
58
-
59
- # ---------------------------------------------------------------------------
60
- # Korean register / edit metrics
61
- # ---------------------------------------------------------------------------
62
-
63
- # Sentence-final ending vocabulary. Order matters — longer forms first so the
64
- # regex engine matches `합니다` before falling back to `다`.
65
- _ENDING_PATTERNS = [
66
- # 합쇼체 (deferential formal): ~ㅂ니다 / ~습니다 / ~ㅂ니까 / ~습니까 / ~십시오
67
- ("hapsho", r"(?:[가-힣]니다|[가-힣]니까|[가-힣]시오|십시오|십시요)"),
68
- # 해요체 (polite informal)
69
- ("haeyo", r"(?:세요|예요|이에요|에요|해요|어요|아요|네요|군요|지요|죠|[가-힣]요)"),
70
- # 해라체 (plain declarative / imperative)
71
- ("haera", r"(?:[가-힣]는다|한다|[가-힣]다|하라|마라|보라|들라|[가-힣]아라|[가-힣]어라|[가-힣]라)"),
72
- # 해체 (casual / 반말)
73
- ("hae", r"(?:해|야|아|어|네|군|지)"),
74
- ]
75
-
76
- _SENTENCE_SPLIT = re.compile(r"[.!?。]+\s+|\n+")
77
- _TRAILING_PUNCT = re.compile(r"[\s.,!?;:。、]+$")
78
-
79
-
80
- def _strip_markdown_noise(text: str) -> str:
81
- """Drop fenced code blocks, JSX tags, image lines, and href payloads.
82
-
83
- Composite metrics are about Korean prose. MDX fences and JSX scaffolding
84
- would otherwise inflate the token count and skew Edit Conservativeness.
85
- """
86
- text = re.sub(r"```[\s\S]*?```", "", text)
87
- text = re.sub(r"<[A-Z][\w]*\b[^>]*?/?>", "", text)
88
- text = re.sub(r"</[A-Z][\w]*>", "", text)
89
- text = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", text)
90
- text = re.sub(r"\[([^\]]*)\]\([^)]*\)", r"\1", text)
91
- text = re.sub(r"\A---\n[\s\S]*?\n---\n", "", text)
92
- return text
93
-
94
-
95
- def _split_sentences(text: str) -> list[str]:
96
- cleaned = _strip_markdown_noise(text)
97
- parts = _SENTENCE_SPLIT.split(cleaned)
98
- sentences: list[str] = []
99
- for part in parts:
100
- for line in part.splitlines():
101
- line = line.strip()
102
- if not line:
103
- continue
104
- line = re.sub(r"^\s*([>#\-*]+\s*)+", "", line)
105
- line = re.sub(r"^\*\*[^*]+\*\*[\s:—-]*", "", line)
106
- line = line.strip()
107
- if line:
108
- sentences.append(line)
109
- return sentences
110
-
111
-
112
- def ending_distribution(text: str) -> Counter[str]:
113
- dist: Counter[str] = Counter()
114
- for sentence in _split_sentences(text):
115
- tail = _TRAILING_PUNCT.sub("", sentence)
116
- if not tail:
117
- continue
118
- bucket = "other"
119
- for name, pattern in _ENDING_PATTERNS:
120
- if re.search(pattern + r"$", tail):
121
- bucket = name
122
- break
123
- dist[bucket] += 1
124
- return dist
125
-
126
-
127
- def cosine_similarity(a: Counter[str], b: Counter[str]) -> float:
128
- keys = set(a) | set(b)
129
- if not keys:
130
- return 0.0
131
- dot = sum(a[k] * b[k] for k in keys)
132
- norm_a = math.sqrt(sum(v * v for v in a.values()))
133
- norm_b = math.sqrt(sum(v * v for v in b.values()))
134
- if norm_a == 0 or norm_b == 0:
135
- return 0.0
136
- return dot / (norm_a * norm_b)
137
-
138
-
139
- def register_stability(baseline: str, candidate: str) -> float:
140
- """RSS: cosine similarity of register distributions, scaled to 0-100."""
141
- return cosine_similarity(ending_distribution(baseline), ending_distribution(candidate)) * 100.0
142
-
143
-
144
- def edit_conservativeness(baseline: str, candidate: str) -> float:
145
- """EditCons: SequenceMatcher ratio on whitespace tokens (0-100)."""
146
- base_tokens = _strip_markdown_noise(baseline).split()
147
- cand_tokens = _strip_markdown_noise(candidate).split()
148
- if not base_tokens and not cand_tokens:
149
- return 100.0
150
- if not base_tokens or not cand_tokens:
151
- return 0.0
152
- matcher = difflib.SequenceMatcher(None, base_tokens, cand_tokens, autojunk=False)
153
- return matcher.ratio() * 100.0
154
-
155
-
156
- # ---------------------------------------------------------------------------
157
- # Composite scoring + run-dir IO
158
- # ---------------------------------------------------------------------------
159
-
160
- DEFAULT_WEIGHTS = {
161
- "ai": 0.353,
162
- "mps": 0.235,
163
- "rss": 0.235,
164
- "edit_cons": 0.176,
165
- }
166
-
167
- CANDIDATE_MODELS = ("claude", "gemini", "codex")
168
- RUN_FRONTMATTER = re.compile(r"\A---\n([\s\S]*?)\n---\n", re.MULTILINE)
169
- NUMBER_RANGE = re.compile(r"(\d+(?:\.\d+)?)\s*[-–~]\s*(\d+(?:\.\d+)?)")
170
- SINGLE_NUMBER = re.compile(r"(\d+(?:\.\d+)?)")
171
- NON_NUMERIC_PLACEHOLDERS = {"n/a", "na", "none", "—", "-", "pending", "tbd", "unknown"}
172
-
173
-
174
- @dataclass
175
- class Candidate:
176
- model: str
177
- text: str
178
- ai_score: Optional[float] = None
179
- mps: Optional[float] = None
180
- rss: Optional[float] = None
181
- edit_cons: Optional[float] = None
182
- composite: Optional[float] = None
183
- status: str = "unknown"
184
- notes: list[str] = field(default_factory=list)
185
-
186
-
187
- def parse_metric(raw: Optional[str]) -> Optional[float]:
188
- """Coerce metric strings from meta.md into floats.
189
-
190
- `0-2 (within noise floor)` -> 1.0 (midpoint)
191
- `92 (all anchors preserved)` -> 92.0
192
- `n/a` / `pending` / `—` -> None
193
- """
194
- if raw is None:
195
- return None
196
- raw = str(raw).strip().strip('"').strip("'")
197
- if not raw or raw.lower() in NON_NUMERIC_PLACEHOLDERS:
198
- return None
199
- range_match = NUMBER_RANGE.search(raw)
200
- if range_match:
201
- return (float(range_match.group(1)) + float(range_match.group(2))) / 2.0
202
- single_match = SINGLE_NUMBER.search(raw)
203
- if single_match:
204
- return float(single_match.group(1))
205
- return None
206
-
207
-
208
- def parse_meta_candidates(meta_text: str) -> dict[str, dict[str, str]]:
209
- """Pull per-candidate score lines from meta.md without a YAML library."""
210
- info: dict[str, dict[str, str]] = {}
211
- in_candidates = False
212
- current: Optional[dict[str, str]] = None
213
- for raw_line in meta_text.splitlines():
214
- line = raw_line.rstrip()
215
- if not line.startswith(" ") and line.endswith(":"):
216
- in_candidates = line.strip() == "candidates:"
217
- current = None
218
- continue
219
- if not in_candidates:
220
- continue
221
- stripped = line.lstrip()
222
- if stripped.startswith("- model:"):
223
- model = stripped.split(":", 1)[1].strip()
224
- current = {"model": model}
225
- info[model] = current
226
- continue
227
- if current is None:
228
- continue
229
- if ":" not in stripped:
230
- continue
231
- key, value = stripped.split(":", 1)
232
- key = key.strip()
233
- value = value.strip()
234
- if value == "|":
235
- value = "<multiline>"
236
- if key in {"ai_score", "ai_score_instructional", "ai_score_technical", "mps", "status", "wall_time_seconds"}:
237
- current[key] = value
238
- return info
239
-
240
-
241
- def read_candidate_text(path: Path) -> str:
242
- if not path.exists():
243
- return ""
244
- text = path.read_text(encoding="utf-8")
245
- return RUN_FRONTMATTER.sub("", text, count=1)
246
-
247
-
248
- def normalise_weights(weights: dict[str, float]) -> dict[str, float]:
249
- total = sum(weights.values())
250
- if total <= 0:
251
- raise ValueError("weights must sum to a positive number")
252
- return {k: v / total for k, v in weights.items()}
253
-
254
-
255
- def parse_weight_overrides(spec: str) -> dict[str, float]:
256
- overrides: dict[str, float] = {}
257
- for chunk in spec.split(","):
258
- chunk = chunk.strip()
259
- if not chunk:
260
- continue
261
- if "=" not in chunk:
262
- raise ValueError(f"invalid weight override `{chunk}` (expected key=value)")
263
- key, value = chunk.split("=", 1)
264
- key = key.strip().lower()
265
- if key not in DEFAULT_WEIGHTS:
266
- raise ValueError(f"unknown weight key `{key}`; valid: {sorted(DEFAULT_WEIGHTS)}")
267
- try:
268
- overrides[key] = float(value)
269
- except ValueError as exc:
270
- raise ValueError(f"weight `{key}` not a number: {value}") from exc
271
- return overrides
272
-
273
-
274
- def load_yaml_weights(yaml_path: Path) -> dict[str, float]:
275
- """Pick `composite-weights:` out of the patina config without PyYAML."""
276
- if not yaml_path.exists():
277
- return {}
278
- weights: dict[str, float] = {}
279
- in_block = False
280
- for raw_line in yaml_path.read_text(encoding="utf-8").splitlines():
281
- if raw_line.startswith("composite-weights:"):
282
- in_block = True
283
- continue
284
- if in_block:
285
- if not raw_line.startswith(" "):
286
- break
287
- stripped = raw_line.strip()
288
- if not stripped or stripped.startswith("#"):
289
- continue
290
- if ":" not in stripped:
291
- break
292
- key, value = stripped.split(":", 1)
293
- key = key.strip().lower()
294
- try:
295
- weights[key] = float(value.strip().split("#", 1)[0].strip())
296
- except ValueError:
297
- continue
298
- return {k: v for k, v in weights.items() if k in DEFAULT_WEIGHTS}
299
-
300
-
301
- def resolve_weights(yaml_path: Path, cli_override: Optional[str]) -> dict[str, float]:
302
- weights = dict(DEFAULT_WEIGHTS)
303
- weights.update(load_yaml_weights(yaml_path))
304
- if cli_override:
305
- weights.update(parse_weight_overrides(cli_override))
306
- return normalise_weights(weights)
307
-
308
-
309
- def composite_score(candidate: Candidate, weights: dict[str, float]) -> Optional[float]:
310
- if candidate.status != "success":
311
- return None
312
- if any(v is None for v in (candidate.ai_score, candidate.mps, candidate.rss, candidate.edit_cons)):
313
- return None
314
- return (
315
- (100.0 - candidate.ai_score) * weights["ai"]
316
- + candidate.mps * weights["mps"]
317
- + candidate.rss * weights["rss"]
318
- + candidate.edit_cons * weights["edit_cons"]
319
- )
320
-
321
-
322
- def render_composite_md(
323
- run_dir: Path,
324
- weights: dict[str, float],
325
- candidates: list[Candidate],
326
- winner: Optional[Candidate],
327
- ) -> str:
328
- lines: list[str] = []
329
- lines.append(f"# patina-composite scores for `{run_dir.name}`")
330
- lines.append("")
331
- lines.append("Generated by `patina-max/composite.py` — deterministic 4-axis reselection.")
332
- lines.append("")
333
- lines.append("## Weights")
334
- lines.append("")
335
- lines.append("| Axis | Weight |")
336
- lines.append("|------|-------:|")
337
- for key in ("ai", "mps", "rss", "edit_cons"):
338
- lines.append(f"| {key} | {weights[key]:.4f} |")
339
- lines.append("")
340
- lines.append("## Candidate scores")
341
- lines.append("")
342
- lines.append("| Model | Status | AI | MPS | RSS | EditCons | Composite |")
343
- lines.append("|-------|--------|---:|----:|----:|--------:|----------:|")
344
- for cand in candidates:
345
- ai = "—" if cand.ai_score is None else f"{cand.ai_score:.1f}"
346
- mps = "—" if cand.mps is None else f"{cand.mps:.1f}"
347
- rss = "—" if cand.rss is None else f"{cand.rss:.1f}"
348
- edit = "—" if cand.edit_cons is None else f"{cand.edit_cons:.1f}"
349
- comp = "—" if cand.composite is None else f"{cand.composite:.2f}"
350
- lines.append(f"| {cand.model} | {cand.status} | {ai} | {mps} | {rss} | {edit} | {comp} |")
351
- lines.append("")
352
- if winner:
353
- lines.append(f"**Winner:** `{winner.model}` — composite {winner.composite:.2f}")
354
- else:
355
- lines.append("**Winner:** none (no candidate scored successfully)")
356
- lines.append("")
357
- if any(cand.notes for cand in candidates):
358
- lines.append("")
359
- lines.append("## Notes")
360
- for cand in candidates:
361
- for note in cand.notes:
362
- lines.append(f"- **{cand.model}**: {note}")
363
- return "\n".join(lines) + "\n"
364
-
365
-
366
- def main(argv: Optional[list[str]] = None) -> int:
367
- parser = argparse.ArgumentParser(description="patina-max composite winner reselection.")
368
- parser.add_argument("run_dir", type=Path, help="path to a patina-max run dir")
369
- parser.add_argument(
370
- "--weights",
371
- type=str,
372
- default=None,
373
- help="override weights, comma-separated (e.g. ai=0.4,rss=0.3)",
374
- )
375
- parser.add_argument(
376
- "--config",
377
- type=Path,
378
- default=Path(__file__).resolve().parents[1] / ".patina.default.yaml",
379
- help="patina config to read composite-weights from",
380
- )
381
- args = parser.parse_args(argv)
382
-
383
- run_dir: Path = args.run_dir.resolve()
384
- if not run_dir.is_dir():
385
- print(f"error: not a directory: {run_dir}", file=sys.stderr)
386
- return 2
387
-
388
- input_path = run_dir / "input.md"
389
- if not input_path.exists():
390
- print(f"error: missing required file: {input_path}", file=sys.stderr)
391
- return 2
392
- baseline = read_candidate_text(input_path)
393
-
394
- meta_path = run_dir / "meta.md"
395
- meta_info: dict[str, dict[str, str]] = {}
396
- if meta_path.exists():
397
- meta_info = parse_meta_candidates(meta_path.read_text(encoding="utf-8"))
398
- else:
399
- print(f"warning: missing meta.md at {meta_path}; AI/MPS will be marked unknown", file=sys.stderr)
400
-
401
- weights = resolve_weights(args.config, args.weights)
402
-
403
- candidates: list[Candidate] = []
404
- for model in CANDIDATE_MODELS:
405
- path = run_dir / f"{model}.md"
406
- if not path.exists():
407
- candidates.append(Candidate(model=model, text="", status="missing"))
408
- continue
409
- text = read_candidate_text(path)
410
- info = meta_info.get(model, {})
411
- cand = Candidate(
412
- model=model,
413
- text=text,
414
- status=info.get("status", "unknown"),
415
- ai_score=parse_metric(info.get("ai_score") or info.get("ai_score_instructional") or info.get("ai_score_technical")),
416
- mps=parse_metric(info.get("mps")),
417
- )
418
- if cand.status == "success" and cand.text.strip():
419
- cand.rss = register_stability(baseline, cand.text)
420
- cand.edit_cons = edit_conservativeness(baseline, cand.text)
421
- else:
422
- cand.notes.append("skipping deterministic metrics (status not success or empty text)")
423
- cand.composite = composite_score(cand, weights)
424
- if cand.status == "success" and cand.composite is None:
425
- cand.notes.append("composite undefined — at least one of AI/MPS could not be parsed from meta.md")
426
- candidates.append(cand)
427
-
428
- scored = [c for c in candidates if c.composite is not None]
429
- winner: Optional[Candidate] = max(scored, key=lambda c: c.composite) if scored else None
430
-
431
- composite_path = run_dir / "composite.md"
432
- composite_path.write_text(
433
- render_composite_md(run_dir, weights, candidates, winner),
434
- encoding="utf-8",
435
- )
436
-
437
- winner_path = run_dir / "winner.md"
438
- if winner is not None:
439
- winner_path.write_text(
440
- f"---\nwinner_model: {winner.model}\ncomposite_score: {winner.composite:.2f}\n---\n\n{winner.text.lstrip()}",
441
- encoding="utf-8",
442
- )
443
- else:
444
- winner_path.write_text("# winner.md\n\nNo candidate scored successfully.\n", encoding="utf-8")
445
-
446
- cwd = Path.cwd()
447
- print(f"wrote {composite_path.relative_to(cwd) if composite_path.is_relative_to(cwd) else composite_path}")
448
- print(f"wrote {winner_path.relative_to(cwd) if winner_path.is_relative_to(cwd) else winner_path}")
449
- if winner:
450
- print(f"winner: {winner.model} (composite {winner.composite:.2f})")
451
- else:
452
- print("winner: none")
453
- return 0
454
-
455
-
456
- if __name__ == "__main__":
457
- sys.exit(main())
package/src/cache.js DELETED
@@ -1,106 +0,0 @@
1
- import { createHash } from 'node:crypto';
2
- import { mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
3
- import { resolve } from 'node:path';
4
-
5
- export const CACHE_SCHEMA_VERSION = 1;
6
- export const DEFAULT_CACHE_TTL_SECONDS = 24 * 60 * 60;
7
-
8
- export function createResponseCache({
9
- dir,
10
- ttlSeconds = DEFAULT_CACHE_TTL_SECONDS,
11
- now = () => Date.now(),
12
- } = {}) {
13
- if (!dir) return null;
14
- const stats = {
15
- hits: 0,
16
- misses: 0,
17
- writes: 0,
18
- expired: 0,
19
- errors: 0,
20
- };
21
-
22
- return {
23
- dir,
24
- ttlSeconds,
25
- stats,
26
- get(args) {
27
- const key = responseCacheKey(args);
28
- const path = responseCachePath(dir, key);
29
- try {
30
- const entry = JSON.parse(readFileSync(path, 'utf8'));
31
- const expiresAt = Date.parse(entry.expiresAt || '');
32
- if (Number.isFinite(expiresAt) && expiresAt <= now()) {
33
- stats.misses++;
34
- stats.expired++;
35
- return null;
36
- }
37
- if (typeof entry.response !== 'string') {
38
- stats.misses++;
39
- return null;
40
- }
41
- stats.hits++;
42
- return {
43
- ...entry,
44
- key,
45
- path,
46
- content: entry.response,
47
- };
48
- } catch (err) {
49
- if (err?.code !== 'ENOENT') stats.errors++;
50
- stats.misses++;
51
- return null;
52
- }
53
- },
54
- set(args, response, metadata = {}) {
55
- const key = responseCacheKey(args);
56
- const path = responseCachePath(dir, key);
57
- const createdAt = new Date(now()).toISOString();
58
- const expiresAt = new Date(now() + ttlSeconds * 1000).toISOString();
59
- const entry = {
60
- cacheVersion: CACHE_SCHEMA_VERSION,
61
- key,
62
- createdAt,
63
- expiresAt,
64
- baseURLHost: baseURLHost(args.baseURL),
65
- model: args.model ?? null,
66
- temperature: args.temperature ?? null,
67
- response,
68
- usage: metadata.usage ?? null,
69
- responseModel: metadata.model ?? null,
70
- };
71
-
72
- try {
73
- mkdirSync(dir, { recursive: true });
74
- const tmp = `${path}.${process.pid}.${Date.now()}.tmp`;
75
- writeFileSync(tmp, JSON.stringify(entry, null, 2) + '\n');
76
- renameSync(tmp, path);
77
- stats.writes++;
78
- } catch {
79
- stats.errors++;
80
- }
81
- return { key, path };
82
- },
83
- };
84
- }
85
-
86
- export function responseCacheKey({ prompt, model, temperature, baseURL } = {}) {
87
- const input = [
88
- String(prompt ?? ''),
89
- String(model ?? ''),
90
- String(temperature ?? ''),
91
- baseURLHost(baseURL),
92
- ].join('\0');
93
- return `sha256:${createHash('sha256').update(input).digest('hex')}`;
94
- }
95
-
96
- export function responseCachePath(dir, key) {
97
- return resolve(dir, `${String(key).replace(/^sha256:/, '')}.json`);
98
- }
99
-
100
- export function baseURLHost(baseURL) {
101
- try {
102
- return new URL(baseURL || 'https://api.openai.com/v1').host;
103
- } catch {
104
- return String(baseURL || '');
105
- }
106
- }