novel-writer-cli 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +103 -0
- package/agents/chapter-writer.md +142 -0
- package/agents/character-weaver.md +117 -0
- package/agents/consistency-auditor.md +85 -0
- package/agents/plot-architect.md +128 -0
- package/agents/quality-judge.md +232 -0
- package/agents/style-analyzer.md +109 -0
- package/agents/style-refiner.md +97 -0
- package/agents/summarizer.md +128 -0
- package/agents/world-builder.md +161 -0
- package/dist/__tests__/character-voice.test.js +445 -0
- package/dist/__tests__/commit-prototype-pollution.test.js +45 -0
- package/dist/__tests__/engagement.test.js +382 -0
- package/dist/__tests__/foreshadow-visibility.test.js +131 -0
- package/dist/__tests__/hook-ledger.test.js +1028 -0
- package/dist/__tests__/naming-lint.test.js +132 -0
- package/dist/__tests__/narrative-health-injection.test.js +359 -0
- package/dist/__tests__/next-step-prejudge-guardrails.test.js +325 -0
- package/dist/__tests__/next-step-title-fix.test.js +153 -0
- package/dist/__tests__/platform-profile.test.js +274 -0
- package/dist/__tests__/promise-ledger.test.js +189 -0
- package/dist/__tests__/readability-lint.test.js +209 -0
- package/dist/__tests__/text-utils.test.js +39 -0
- package/dist/__tests__/title-policy.test.js +147 -0
- package/dist/advance.js +75 -0
- package/dist/character-voice.js +805 -0
- package/dist/checkpoint.js +126 -0
- package/dist/cli.js +563 -0
- package/dist/cliche-lint.js +515 -0
- package/dist/commit.js +1460 -0
- package/dist/consistency-auditor.js +684 -0
- package/dist/engagement.js +687 -0
- package/dist/errors.js +7 -0
- package/dist/fingerprint.js +16 -0
- package/dist/foreshadow-visibility.js +214 -0
- package/dist/fs-utils.js +68 -0
- package/dist/hook-ledger.js +721 -0
- package/dist/hook-policy.js +107 -0
- package/dist/instruction-gates.js +51 -0
- package/dist/instructions.js +406 -0
- package/dist/latest-summary-loader.js +29 -0
- package/dist/lock.js +121 -0
- package/dist/naming-lint.js +531 -0
- package/dist/ner.js +73 -0
- package/dist/next-step.js +408 -0
- package/dist/novel-ask.js +270 -0
- package/dist/output.js +9 -0
- package/dist/platform-constraints.js +518 -0
- package/dist/platform-profile.js +325 -0
- package/dist/prejudge-guardrails.js +370 -0
- package/dist/project.js +40 -0
- package/dist/promise-ledger.js +723 -0
- package/dist/readability-lint.js +555 -0
- package/dist/safe-parse.js +36 -0
- package/dist/safe-path.js +29 -0
- package/dist/scoring-weights.js +290 -0
- package/dist/steps.js +60 -0
- package/dist/text-utils.js +18 -0
- package/dist/title-policy.js +251 -0
- package/dist/type-guards.js +6 -0
- package/dist/validate.js +131 -0
- package/docs/user/README.md +17 -0
- package/docs/user/guardrails.md +179 -0
- package/docs/user/interactive-gates.md +124 -0
- package/docs/user/novel-cli.md +289 -0
- package/docs/user/ops.md +123 -0
- package/docs/user/quick-start.md +97 -0
- package/docs/user/spec-system.md +166 -0
- package/docs/user/storylines.md +144 -0
- package/package.json +48 -0
- package/schemas/README.md +18 -0
- package/schemas/character-voice-drift.schema.json +135 -0
- package/schemas/character-voice-profiles.schema.json +141 -0
- package/schemas/engagement-metrics.schema.json +38 -0
- package/schemas/hook-ledger.schema.json +108 -0
- package/schemas/platform-profile.schema.json +235 -0
- package/schemas/promise-ledger.schema.json +97 -0
- package/scripts/calibrate-quality-judge.sh +91 -0
- package/scripts/compare-regression-runs.sh +86 -0
- package/scripts/lib/_common.py +131 -0
- package/scripts/lib/calibrate_quality_judge.py +312 -0
- package/scripts/lib/compare_regression_runs.py +142 -0
- package/scripts/lib/run_regression.py +621 -0
- package/scripts/lint-blacklist.sh +201 -0
- package/scripts/lint-cliche.sh +370 -0
- package/scripts/lint-readability.sh +404 -0
- package/scripts/query-foreshadow.sh +252 -0
- package/scripts/run-ner.sh +669 -0
- package/scripts/run-regression.sh +122 -0
- package/skills/cli-step/SKILL.md +158 -0
- package/skills/continue/SKILL.md +348 -0
- package/skills/continue/references/context-contracts.md +169 -0
- package/skills/continue/references/continuity-checks.md +187 -0
- package/skills/continue/references/file-protocols.md +64 -0
- package/skills/continue/references/foreshadowing.md +130 -0
- package/skills/continue/references/gate-decision.md +53 -0
- package/skills/continue/references/periodic-maintenance.md +46 -0
- package/skills/novel-writing/SKILL.md +77 -0
- package/skills/novel-writing/references/quality-rubric.md +140 -0
- package/skills/novel-writing/references/style-guide.md +145 -0
- package/skills/start/SKILL.md +458 -0
- package/skills/start/references/quality-review.md +86 -0
- package/skills/start/references/setting-update.md +44 -0
- package/skills/start/references/vol-planning.md +61 -0
- package/skills/start/references/vol-review.md +58 -0
- package/skills/status/SKILL.md +116 -0
- package/skills/status/references/sample-output.md +60 -0
- package/templates/ai-blacklist.json +79 -0
- package/templates/brief-template.md +46 -0
- package/templates/genre-weight-profiles.json +90 -0
- package/templates/novel-ask/example.answer.json +12 -0
- package/templates/novel-ask/example.question.json +51 -0
- package/templates/platform-profile.json +148 -0
- package/templates/style-profile-template.json +58 -0
- package/templates/web-novel-cliche-lint.json +41 -0
|
@@ -0,0 +1,621 @@
|
|
|
1
|
+
"""Regression runner for M2 outputs (M3).
|
|
2
|
+
|
|
3
|
+
Extracted from the heredoc in scripts/run-regression.sh.
|
|
4
|
+
Reads existing project outputs (evaluations/logs/etc) and summarizes
|
|
5
|
+
regression-friendly metrics.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import shutil
|
|
12
|
+
import sys
|
|
13
|
+
import tempfile
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
16
|
+
|
|
17
|
+
import _common
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Local helpers (thin wrappers keeping "run-regression.sh:" prefix)
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
def _die(msg: str, exit_code: int = 1) -> None:
|
|
25
|
+
_common.die(f"run-regression.sh: {msg}", exit_code)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _load_json(path: str) -> Any:
|
|
29
|
+
"""Load JSON; return None on missing, die with prefix on parse error."""
|
|
30
|
+
try:
|
|
31
|
+
return _common.load_json(path, missing_ok=True)
|
|
32
|
+
except Exception as e:
|
|
33
|
+
_die(f"invalid JSON at {path}: {e}", 1)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _timestamp_id() -> str:
|
|
37
|
+
now = datetime.now(timezone.utc)
|
|
38
|
+
return now.strftime("%Y%m%dT%H%M%S") + f"_{now.strftime('%f')[:4]}Z"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _mkdir(path: str) -> None:
|
|
42
|
+
os.makedirs(path, exist_ok=True)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _severity_rank(v: str) -> int:
|
|
46
|
+
return {"high": 0, "medium": 1, "low": 2}.get(v, 9)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _summarize_continuity(report: Any) -> Optional[Dict[str, Any]]:
|
|
50
|
+
if report is None:
|
|
51
|
+
return None
|
|
52
|
+
if not isinstance(report, dict):
|
|
53
|
+
return {"error": "continuity latest.json is not an object"}
|
|
54
|
+
stats = report.get("stats") if isinstance(report.get("stats"), dict) else {}
|
|
55
|
+
issues = report.get("issues") if isinstance(report.get("issues"), list) else []
|
|
56
|
+
top_issues: List[Dict[str, Any]] = []
|
|
57
|
+
for it in issues[:50]:
|
|
58
|
+
if not isinstance(it, dict):
|
|
59
|
+
continue
|
|
60
|
+
top_issues.append(
|
|
61
|
+
{
|
|
62
|
+
"id": it.get("id"),
|
|
63
|
+
"type": it.get("type"),
|
|
64
|
+
"severity": it.get("severity"),
|
|
65
|
+
"confidence": it.get("confidence"),
|
|
66
|
+
"description": it.get("description"),
|
|
67
|
+
}
|
|
68
|
+
)
|
|
69
|
+
top_issues.sort(key=lambda x: (_severity_rank(str(x.get("severity"))), str(x.get("type")), str(x.get("id"))))
|
|
70
|
+
top_issues = top_issues[:5]
|
|
71
|
+
return {
|
|
72
|
+
"schema_version": report.get("schema_version"),
|
|
73
|
+
"generated_at": report.get("generated_at"),
|
|
74
|
+
"scope": report.get("scope"),
|
|
75
|
+
"volume": report.get("volume"),
|
|
76
|
+
"chapter_range": report.get("chapter_range"),
|
|
77
|
+
"stats": {
|
|
78
|
+
"chapters_checked": stats.get("chapters_checked"),
|
|
79
|
+
"issues_total": stats.get("issues_total"),
|
|
80
|
+
"issues_by_severity": stats.get("issues_by_severity"),
|
|
81
|
+
},
|
|
82
|
+
"top_issues": top_issues,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _summarize_style_drift(obj: Any) -> Optional[Dict[str, Any]]:
|
|
87
|
+
if obj is None:
|
|
88
|
+
return None
|
|
89
|
+
if not isinstance(obj, dict):
|
|
90
|
+
return {"error": "style-drift.json is not an object"}
|
|
91
|
+
drifts = obj.get("drifts") if isinstance(obj.get("drifts"), list) else []
|
|
92
|
+
return {
|
|
93
|
+
"active": obj.get("active"),
|
|
94
|
+
"detected_chapter": obj.get("detected_chapter"),
|
|
95
|
+
"window": obj.get("window"),
|
|
96
|
+
"drifts_count": len(drifts),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _as_range(value: Any) -> Optional[Tuple[int, int]]:
|
|
101
|
+
if not isinstance(value, list) or len(value) != 2:
|
|
102
|
+
return None
|
|
103
|
+
a = _common.as_int(value[0])
|
|
104
|
+
b = _common.as_int(value[1])
|
|
105
|
+
if a is None or b is None:
|
|
106
|
+
return None
|
|
107
|
+
if a < 1 or b < 1 or a > b:
|
|
108
|
+
return None
|
|
109
|
+
return (a, b)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _foreshadow_overdue_short(item: Dict[str, Any], last_completed_chapter: int) -> bool:
|
|
113
|
+
if item.get("scope") != "short":
|
|
114
|
+
return False
|
|
115
|
+
if item.get("status") == "resolved":
|
|
116
|
+
return False
|
|
117
|
+
r = _as_range(item.get("target_resolve_range"))
|
|
118
|
+
if r is None:
|
|
119
|
+
return False
|
|
120
|
+
return last_completed_chapter > r[1]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _summarize_foreshadowing(project_dir: str, last_completed_chapter: int) -> Optional[Dict[str, Any]]:
|
|
124
|
+
global_path = os.path.join(project_dir, "foreshadowing", "global.json")
|
|
125
|
+
global_obj = _load_json(global_path)
|
|
126
|
+
if global_obj is None:
|
|
127
|
+
return None
|
|
128
|
+
if isinstance(global_obj, list):
|
|
129
|
+
items = global_obj
|
|
130
|
+
elif isinstance(global_obj, dict) and isinstance(global_obj.get("foreshadowing"), list):
|
|
131
|
+
items = global_obj["foreshadowing"]
|
|
132
|
+
else:
|
|
133
|
+
return {"error": "foreshadowing/global.json has unsupported schema (expected list or {foreshadowing:[]})"}
|
|
134
|
+
|
|
135
|
+
normalized: List[Dict[str, Any]] = [it for it in items if isinstance(it, dict)]
|
|
136
|
+
active = [it for it in normalized if it.get("status") != "resolved"]
|
|
137
|
+
resolved = [it for it in normalized if it.get("status") == "resolved"]
|
|
138
|
+
overdue = [it for it in normalized if _foreshadow_overdue_short(it, last_completed_chapter)]
|
|
139
|
+
|
|
140
|
+
overdue_ids = []
|
|
141
|
+
for it in overdue:
|
|
142
|
+
fid = _common.as_str(it.get("id"))
|
|
143
|
+
if fid:
|
|
144
|
+
overdue_ids.append(fid)
|
|
145
|
+
|
|
146
|
+
# Optional plan alignment using checkpoint current_volume.
|
|
147
|
+
plan_stats = None
|
|
148
|
+
ck = _load_json(os.path.join(project_dir, ".checkpoint.json"))
|
|
149
|
+
vol = ck.get("current_volume") if isinstance(ck, dict) else None
|
|
150
|
+
vol_int = _common.as_int(vol)
|
|
151
|
+
if vol_int is not None and vol_int >= 1:
|
|
152
|
+
plan_path = os.path.join(project_dir, "volumes", f"vol-{vol_int:02d}", "foreshadowing.json")
|
|
153
|
+
plan_obj = _load_json(plan_path)
|
|
154
|
+
plan_items: List[Dict[str, Any]] = []
|
|
155
|
+
if isinstance(plan_obj, dict) and isinstance(plan_obj.get("foreshadowing"), list):
|
|
156
|
+
plan_items = [it for it in plan_obj["foreshadowing"] if isinstance(it, dict)]
|
|
157
|
+
elif isinstance(plan_obj, list):
|
|
158
|
+
plan_items = [it for it in plan_obj if isinstance(it, dict)]
|
|
159
|
+
|
|
160
|
+
if plan_items:
|
|
161
|
+
global_ids = {str(_common.as_str(it.get("id")) or "") for it in normalized}
|
|
162
|
+
planned_ids = [str(_common.as_str(it.get("id")) or "") for it in plan_items if _common.as_str(it.get("id"))]
|
|
163
|
+
planned_total = len(planned_ids)
|
|
164
|
+
missing_in_global = [pid for pid in planned_ids if pid not in global_ids]
|
|
165
|
+
|
|
166
|
+
resolved_in_global = 0
|
|
167
|
+
pending_in_global = 0
|
|
168
|
+
global_by_id = {str(_common.as_str(it.get("id")) or ""): it for it in normalized if _common.as_str(it.get("id"))}
|
|
169
|
+
for pid in planned_ids:
|
|
170
|
+
it = global_by_id.get(pid)
|
|
171
|
+
if not it:
|
|
172
|
+
continue
|
|
173
|
+
if it.get("status") == "resolved":
|
|
174
|
+
resolved_in_global += 1
|
|
175
|
+
else:
|
|
176
|
+
pending_in_global += 1
|
|
177
|
+
|
|
178
|
+
plan_stats = {
|
|
179
|
+
"path": plan_path,
|
|
180
|
+
"planned_total": planned_total,
|
|
181
|
+
"resolved_in_global": resolved_in_global,
|
|
182
|
+
"pending_in_global": pending_in_global,
|
|
183
|
+
"missing_in_global": missing_in_global[:50],
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return {
|
|
187
|
+
"global_path": global_path,
|
|
188
|
+
"items_total": len(normalized),
|
|
189
|
+
"active_count": len(active),
|
|
190
|
+
"resolved_count": len(resolved),
|
|
191
|
+
"overdue_short_count": len(overdue),
|
|
192
|
+
"overdue_short_ids": overdue_ids[:50],
|
|
193
|
+
"plan_alignment": plan_stats,
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _summarize_ai_blacklist(project_dir: str) -> Optional[Dict[str, Any]]:
|
|
198
|
+
path = os.path.join(project_dir, "ai-blacklist.json")
|
|
199
|
+
obj = _load_json(path)
|
|
200
|
+
if obj is None:
|
|
201
|
+
return None
|
|
202
|
+
if not isinstance(obj, dict):
|
|
203
|
+
return {"error": "ai-blacklist.json is not an object"}
|
|
204
|
+
words = obj.get("words") if isinstance(obj.get("words"), list) else []
|
|
205
|
+
whitelist_words = []
|
|
206
|
+
whitelist = obj.get("whitelist")
|
|
207
|
+
if isinstance(whitelist, list):
|
|
208
|
+
whitelist_words = [w for w in whitelist if isinstance(w, str)]
|
|
209
|
+
elif isinstance(whitelist, dict) and isinstance(whitelist.get("words"), list):
|
|
210
|
+
whitelist_words = [w for w in whitelist["words"] if isinstance(w, str)]
|
|
211
|
+
return {
|
|
212
|
+
"version": obj.get("version"),
|
|
213
|
+
"last_updated": obj.get("last_updated"),
|
|
214
|
+
"words_count": len(words),
|
|
215
|
+
"whitelist_words_count": len(whitelist_words),
|
|
216
|
+
"path": path,
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _summarize_logs(project_dir: str) -> Dict[str, Any]:
|
|
221
|
+
logs_dir = os.path.join(project_dir, "logs")
|
|
222
|
+
if not os.path.isdir(logs_dir):
|
|
223
|
+
return {"present": False}
|
|
224
|
+
log_files = []
|
|
225
|
+
for name in os.listdir(logs_dir):
|
|
226
|
+
if re.match(r"^chapter-\d+-log\.json$", name):
|
|
227
|
+
log_files.append(os.path.join(logs_dir, name))
|
|
228
|
+
log_files.sort()
|
|
229
|
+
|
|
230
|
+
stages_by_model: Dict[str, int] = {}
|
|
231
|
+
judge_models: Dict[str, int] = {}
|
|
232
|
+
gate_decisions: Dict[str, int] = {}
|
|
233
|
+
revisions_sum = 0
|
|
234
|
+
force_passed_count = 0
|
|
235
|
+
|
|
236
|
+
for path in log_files:
|
|
237
|
+
obj = _load_json(path)
|
|
238
|
+
if not isinstance(obj, dict):
|
|
239
|
+
continue
|
|
240
|
+
gate = _common.as_str(obj.get("gate_decision")) or "unknown"
|
|
241
|
+
gate_decisions[gate] = gate_decisions.get(gate, 0) + 1
|
|
242
|
+
|
|
243
|
+
rev = _common.as_int(obj.get("revisions"))
|
|
244
|
+
if rev is not None:
|
|
245
|
+
revisions_sum += rev
|
|
246
|
+
if obj.get("force_passed") is True:
|
|
247
|
+
force_passed_count += 1
|
|
248
|
+
|
|
249
|
+
stages = obj.get("stages")
|
|
250
|
+
if isinstance(stages, list):
|
|
251
|
+
for st in stages:
|
|
252
|
+
if not isinstance(st, dict):
|
|
253
|
+
continue
|
|
254
|
+
m = _common.as_str(st.get("model"))
|
|
255
|
+
if m:
|
|
256
|
+
stages_by_model[m] = stages_by_model.get(m, 0) + 1
|
|
257
|
+
|
|
258
|
+
judges = obj.get("judges")
|
|
259
|
+
if isinstance(judges, dict):
|
|
260
|
+
primary = judges.get("primary")
|
|
261
|
+
if isinstance(primary, dict):
|
|
262
|
+
m = _common.as_str(primary.get("model"))
|
|
263
|
+
if m:
|
|
264
|
+
judge_models[m] = judge_models.get(m, 0) + 1
|
|
265
|
+
secondary = judges.get("secondary")
|
|
266
|
+
if isinstance(secondary, dict):
|
|
267
|
+
m = _common.as_str(secondary.get("model"))
|
|
268
|
+
if m:
|
|
269
|
+
judge_models[m] = judge_models.get(m, 0) + 1
|
|
270
|
+
|
|
271
|
+
return {
|
|
272
|
+
"present": True,
|
|
273
|
+
"chapter_logs_count": len(log_files),
|
|
274
|
+
"gate_decisions": dict(sorted(gate_decisions.items())),
|
|
275
|
+
"revisions_sum": revisions_sum,
|
|
276
|
+
"force_passed_count": force_passed_count,
|
|
277
|
+
"stages_by_model": dict(sorted(stages_by_model.items())),
|
|
278
|
+
"judge_models": dict(sorted(judge_models.items())),
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _get_rule_id(layer: str, item: Dict[str, Any]) -> str:
|
|
283
|
+
if layer == "L1":
|
|
284
|
+
return _common.as_str(item.get("rule_id")) or "UNKNOWN"
|
|
285
|
+
if layer == "L2":
|
|
286
|
+
return _common.as_str(item.get("contract_id")) or _common.as_str(item.get("rule_id")) or "UNKNOWN"
|
|
287
|
+
if layer == "L3":
|
|
288
|
+
return _common.as_str(item.get("objective_id")) or _common.as_str(item.get("rule_id")) or "UNKNOWN"
|
|
289
|
+
if layer == "LS":
|
|
290
|
+
return _common.as_str(item.get("rule_id")) or "UNKNOWN"
|
|
291
|
+
return _common.as_str(item.get("rule_id")) or "UNKNOWN"
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _norm_confidence(v: Any) -> str:
|
|
295
|
+
s = _common.as_str(v)
|
|
296
|
+
if not s:
|
|
297
|
+
return "unknown"
|
|
298
|
+
s = s.lower()
|
|
299
|
+
if s in {"high", "medium", "low"}:
|
|
300
|
+
return s
|
|
301
|
+
return "unknown"
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _norm_status(v: Any) -> str:
|
|
305
|
+
s = _common.as_str(v)
|
|
306
|
+
if not s:
|
|
307
|
+
return "unknown"
|
|
308
|
+
return s.lower()
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _is_violation_status(status: str) -> bool:
|
|
312
|
+
return status in {"violation", "violation_suspected"}
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _is_high_conf_violation(layer: str, item: Dict[str, Any]) -> bool:
|
|
316
|
+
status = _norm_status(item.get("status"))
|
|
317
|
+
if status != "violation":
|
|
318
|
+
return False
|
|
319
|
+
conf = _norm_confidence(item.get("confidence"))
|
|
320
|
+
if conf != "high":
|
|
321
|
+
return False
|
|
322
|
+
if layer != "LS":
|
|
323
|
+
return True
|
|
324
|
+
constraint_type = _common.as_str(item.get("constraint_type"))
|
|
325
|
+
if constraint_type is None or constraint_type == "hard":
|
|
326
|
+
return True
|
|
327
|
+
return False
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _format_md_report(data: Dict[str, Any]) -> str:
|
|
331
|
+
summary = data.get("metrics", {})
|
|
332
|
+
lines: List[str] = []
|
|
333
|
+
lines.append(f"# Regression Summary ({summary.get('run_id')})")
|
|
334
|
+
lines.append("")
|
|
335
|
+
lines.append(f"- Project: `{summary.get('project_path')}`")
|
|
336
|
+
lines.append(f"- Generated at: `{summary.get('generated_at')}`")
|
|
337
|
+
lines.append("")
|
|
338
|
+
lines.append("## Spec+LS Compliance")
|
|
339
|
+
lines.append("")
|
|
340
|
+
comp = summary.get("compliance", {})
|
|
341
|
+
lines.append(f"- Chapters: {comp.get('chapters_total')}")
|
|
342
|
+
lines.append(f"- Compliance (high-confidence gate): {comp.get('compliance_rate_high_confidence')}")
|
|
343
|
+
lines.append(f"- Chapters w/ high-confidence violations: {comp.get('chapters_with_high_confidence_violation')}")
|
|
344
|
+
lines.append(f"- Compliance (any violation status): {comp.get('compliance_rate_any_violation')}")
|
|
345
|
+
lines.append(f"- Chapters w/ any violations: {comp.get('chapters_with_any_violation')}")
|
|
346
|
+
lines.append("")
|
|
347
|
+
|
|
348
|
+
if isinstance(data.get("top_rules"), list) and data["top_rules"]:
|
|
349
|
+
lines.append("## Top Violated Rules (any confidence)")
|
|
350
|
+
lines.append("")
|
|
351
|
+
for it in data["top_rules"][:10]:
|
|
352
|
+
lines.append(f"- {it.get('layer')} {it.get('rule_id')}: {it.get('count')}")
|
|
353
|
+
lines.append("")
|
|
354
|
+
|
|
355
|
+
if data.get("continuity") is not None:
|
|
356
|
+
lines.append("## Continuity (logs/continuity/latest.json)")
|
|
357
|
+
lines.append("")
|
|
358
|
+
stats = data["continuity"].get("stats", {}) if isinstance(data["continuity"], dict) else {}
|
|
359
|
+
lines.append(f"- issues_total: {stats.get('issues_total')}")
|
|
360
|
+
lines.append(f"- issues_by_severity: {stats.get('issues_by_severity')}")
|
|
361
|
+
lines.append("")
|
|
362
|
+
|
|
363
|
+
if data.get("foreshadowing") is not None:
|
|
364
|
+
lines.append("## Foreshadowing (foreshadowing/global.json)")
|
|
365
|
+
lines.append("")
|
|
366
|
+
fs = data["foreshadowing"]
|
|
367
|
+
lines.append(f"- active: {fs.get('active_count')} / total: {fs.get('items_total')} / resolved: {fs.get('resolved_count')}")
|
|
368
|
+
lines.append(f"- overdue_short: {fs.get('overdue_short_count')}")
|
|
369
|
+
lines.append("")
|
|
370
|
+
|
|
371
|
+
if data.get("style_drift") is not None:
|
|
372
|
+
lines.append("## Style Drift (style-drift.json)")
|
|
373
|
+
lines.append("")
|
|
374
|
+
sd = data["style_drift"]
|
|
375
|
+
lines.append(f"- active: {sd.get('active')} / drifts_count: {sd.get('drifts_count')}")
|
|
376
|
+
lines.append("")
|
|
377
|
+
|
|
378
|
+
return "\n".join(lines).rstrip() + "\n"
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def main() -> None:
|
|
382
|
+
project_dir = sys.argv[1]
|
|
383
|
+
labels_path = sys.argv[2].strip()
|
|
384
|
+
runs_dir = sys.argv[3]
|
|
385
|
+
archive = int(sys.argv[4]) == 1
|
|
386
|
+
include_continuity = int(sys.argv[5]) == 1
|
|
387
|
+
include_foreshadowing = int(sys.argv[6]) == 1
|
|
388
|
+
include_style = int(sys.argv[7]) == 1
|
|
389
|
+
|
|
390
|
+
project_dir_abs = os.path.abspath(project_dir)
|
|
391
|
+
|
|
392
|
+
eval_dir = os.path.join(project_dir_abs, "evaluations")
|
|
393
|
+
eval_items = _common.find_eval_files(eval_dir)
|
|
394
|
+
if not eval_items:
|
|
395
|
+
_die(f"no evaluations found under {project_dir_abs}/evaluations", 1)
|
|
396
|
+
|
|
397
|
+
chapters_total = len(eval_items)
|
|
398
|
+
chapters = [c for c, _ in eval_items]
|
|
399
|
+
|
|
400
|
+
# Determine last_completed_chapter for overdue logic.
|
|
401
|
+
checkpoint = _load_json(os.path.join(project_dir_abs, ".checkpoint.json"))
|
|
402
|
+
last_completed = None
|
|
403
|
+
if isinstance(checkpoint, dict):
|
|
404
|
+
last_completed = _common.as_int(checkpoint.get("last_completed_chapter"))
|
|
405
|
+
if last_completed is None:
|
|
406
|
+
last_completed = max(chapters)
|
|
407
|
+
|
|
408
|
+
# Spec+LS compliance aggregation.
|
|
409
|
+
violations_total = 0
|
|
410
|
+
chapters_with_any_violation = set()
|
|
411
|
+
chapters_with_high_conf_violation = set()
|
|
412
|
+
|
|
413
|
+
violations_by_conf: Dict[str, int] = {"high": 0, "medium": 0, "low": 0, "unknown": 0}
|
|
414
|
+
violations_by_layer: Dict[str, int] = {"L1": 0, "L2": 0, "L3": 0, "LS": 0, "unknown": 0}
|
|
415
|
+
|
|
416
|
+
by_rule: Dict[str, Dict[str, Dict[str, int]]] = {} # layer -> rule_id -> confidence -> count
|
|
417
|
+
|
|
418
|
+
overall_scores: List[float] = []
|
|
419
|
+
dim_sums: Dict[str, float] = {}
|
|
420
|
+
dim_counts: Dict[str, int] = {}
|
|
421
|
+
|
|
422
|
+
for chapter, path in eval_items:
|
|
423
|
+
obj = _load_json(path)
|
|
424
|
+
if not isinstance(obj, dict):
|
|
425
|
+
continue
|
|
426
|
+
|
|
427
|
+
overall = _common.extract_overall(obj)
|
|
428
|
+
if overall is not None:
|
|
429
|
+
overall_scores.append(overall)
|
|
430
|
+
|
|
431
|
+
scores = _common.extract_dimension_scores(obj)
|
|
432
|
+
for k, v in scores.items():
|
|
433
|
+
dim_sums[k] = dim_sums.get(k, 0.0) + float(v)
|
|
434
|
+
dim_counts[k] = dim_counts.get(k, 0) + 1
|
|
435
|
+
|
|
436
|
+
cv = _common.extract_contract_verification(obj)
|
|
437
|
+
layer_map = {
|
|
438
|
+
"L1": cv.get("l1_checks"),
|
|
439
|
+
"L2": cv.get("l2_checks"),
|
|
440
|
+
"L3": cv.get("l3_checks"),
|
|
441
|
+
"LS": cv.get("ls_checks"),
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
chapter_any_violation = False
|
|
445
|
+
chapter_high_violation = False
|
|
446
|
+
|
|
447
|
+
for layer, checks in layer_map.items():
|
|
448
|
+
if not isinstance(checks, list):
|
|
449
|
+
continue
|
|
450
|
+
for it in checks:
|
|
451
|
+
if not isinstance(it, dict):
|
|
452
|
+
continue
|
|
453
|
+
status = _norm_status(it.get("status"))
|
|
454
|
+
conf = _norm_confidence(it.get("confidence"))
|
|
455
|
+
|
|
456
|
+
if _is_violation_status(status):
|
|
457
|
+
chapter_any_violation = True
|
|
458
|
+
violations_total += 1
|
|
459
|
+
violations_by_conf[conf] = violations_by_conf.get(conf, 0) + 1
|
|
460
|
+
violations_by_layer[layer] = violations_by_layer.get(layer, 0) + 1
|
|
461
|
+
|
|
462
|
+
rule_id = _get_rule_id(layer, it)
|
|
463
|
+
by_rule.setdefault(layer, {}).setdefault(rule_id, {}).setdefault(conf, 0)
|
|
464
|
+
by_rule[layer][rule_id][conf] += 1
|
|
465
|
+
|
|
466
|
+
if _is_high_conf_violation(layer, it):
|
|
467
|
+
chapter_high_violation = True
|
|
468
|
+
|
|
469
|
+
if chapter_any_violation:
|
|
470
|
+
chapters_with_any_violation.add(chapter)
|
|
471
|
+
if chapter_high_violation:
|
|
472
|
+
chapters_with_high_conf_violation.add(chapter)
|
|
473
|
+
|
|
474
|
+
def _rate(ok: int, total: int) -> float:
|
|
475
|
+
if total <= 0:
|
|
476
|
+
return 0.0
|
|
477
|
+
return ok / total
|
|
478
|
+
|
|
479
|
+
compliance_rate_any = _rate(chapters_total - len(chapters_with_any_violation), chapters_total)
|
|
480
|
+
compliance_rate_high = _rate(chapters_total - len(chapters_with_high_conf_violation), chapters_total)
|
|
481
|
+
|
|
482
|
+
# Top rules list for quick view.
|
|
483
|
+
top_rules: List[Dict[str, Any]] = []
|
|
484
|
+
for layer in sorted(by_rule.keys()):
|
|
485
|
+
for rule_id, conf_map in by_rule[layer].items():
|
|
486
|
+
top_rules.append({"layer": layer, "rule_id": rule_id, "count": sum(conf_map.values())})
|
|
487
|
+
top_rules.sort(key=lambda x: (-int(x["count"]), str(x["layer"]), str(x["rule_id"])))
|
|
488
|
+
|
|
489
|
+
score_summary = None
|
|
490
|
+
if overall_scores:
|
|
491
|
+
score_summary = {
|
|
492
|
+
"n": len(overall_scores),
|
|
493
|
+
"mean": round(sum(overall_scores) / len(overall_scores), 4),
|
|
494
|
+
"min": round(min(overall_scores), 4),
|
|
495
|
+
"max": round(max(overall_scores), 4),
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
dim_summary = {}
|
|
499
|
+
for k in sorted(dim_sums.keys()):
|
|
500
|
+
cnt = dim_counts.get(k, 0)
|
|
501
|
+
if cnt <= 0:
|
|
502
|
+
continue
|
|
503
|
+
dim_summary[k] = {"n": cnt, "mean": round(dim_sums[k] / cnt, 4)}
|
|
504
|
+
|
|
505
|
+
continuity_summary = None
|
|
506
|
+
if include_continuity:
|
|
507
|
+
continuity_summary = _summarize_continuity(_load_json(os.path.join(project_dir_abs, "logs", "continuity", "latest.json")))
|
|
508
|
+
|
|
509
|
+
foreshadow_summary = None
|
|
510
|
+
if include_foreshadowing:
|
|
511
|
+
foreshadow_summary = _summarize_foreshadowing(project_dir_abs, last_completed)
|
|
512
|
+
|
|
513
|
+
style_summary = None
|
|
514
|
+
if include_style:
|
|
515
|
+
style_summary = _summarize_style_drift(_load_json(os.path.join(project_dir_abs, "style-drift.json")))
|
|
516
|
+
|
|
517
|
+
blacklist_summary = _summarize_ai_blacklist(project_dir_abs)
|
|
518
|
+
logs_summary = _summarize_logs(project_dir_abs)
|
|
519
|
+
|
|
520
|
+
run_id = _timestamp_id()
|
|
521
|
+
generated_at = _common.iso_utc_now()
|
|
522
|
+
|
|
523
|
+
config_snapshot = {
|
|
524
|
+
"schema_version": 1,
|
|
525
|
+
"run_id": run_id,
|
|
526
|
+
"generated_at": generated_at,
|
|
527
|
+
"project_path": project_dir_abs,
|
|
528
|
+
"labels_path": os.path.abspath(labels_path) if labels_path else None,
|
|
529
|
+
"enabled_checks": {
|
|
530
|
+
"continuity_latest_json": bool(include_continuity),
|
|
531
|
+
"foreshadowing_global_json": bool(include_foreshadowing),
|
|
532
|
+
"style_drift_json": bool(include_style),
|
|
533
|
+
},
|
|
534
|
+
# pause_for_user_force_rewrite is implicit (<2.0), no threshold to calibrate
|
|
535
|
+
"gate_thresholds_defaults": {"pass": 4.0, "polish": 3.5, "revise": 3.0, "pause_for_user": 2.0},
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
summary_metrics = {
|
|
539
|
+
"schema_version": 1,
|
|
540
|
+
"run_id": run_id,
|
|
541
|
+
"generated_at": generated_at,
|
|
542
|
+
"project_path": project_dir_abs,
|
|
543
|
+
"chapters_total": chapters_total,
|
|
544
|
+
"chapter_range": [min(chapters), max(chapters)],
|
|
545
|
+
"compliance": {
|
|
546
|
+
"chapters_total": chapters_total,
|
|
547
|
+
"chapters_with_any_violation": len(chapters_with_any_violation),
|
|
548
|
+
"chapters_with_high_confidence_violation": len(chapters_with_high_conf_violation),
|
|
549
|
+
"compliance_rate_any_violation": round(compliance_rate_any, 6),
|
|
550
|
+
"compliance_rate_high_confidence": round(compliance_rate_high, 6),
|
|
551
|
+
},
|
|
552
|
+
"violations_total": violations_total,
|
|
553
|
+
"violations_by_confidence": violations_by_conf,
|
|
554
|
+
"violations_by_layer": violations_by_layer,
|
|
555
|
+
"score_overall": score_summary,
|
|
556
|
+
"score_dimensions": dim_summary,
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
report = {
|
|
560
|
+
"schema_version": 1,
|
|
561
|
+
"run_id": run_id,
|
|
562
|
+
"generated_at": generated_at,
|
|
563
|
+
"project_path": project_dir_abs,
|
|
564
|
+
"labels_path": os.path.abspath(labels_path) if labels_path else None,
|
|
565
|
+
"chapter_ids": chapters,
|
|
566
|
+
"checkpoint": checkpoint if isinstance(checkpoint, dict) else None,
|
|
567
|
+
"spec_ls": {
|
|
568
|
+
"violations_total": violations_total,
|
|
569
|
+
"chapters_with_any_violation": sorted(list(chapters_with_any_violation)),
|
|
570
|
+
"chapters_with_high_confidence_violation": sorted(list(chapters_with_high_conf_violation)),
|
|
571
|
+
"violations_by_layer_rule_confidence": by_rule,
|
|
572
|
+
"top_rules": top_rules[:50],
|
|
573
|
+
},
|
|
574
|
+
"continuity": continuity_summary,
|
|
575
|
+
"foreshadowing": foreshadow_summary,
|
|
576
|
+
"style_drift": style_summary,
|
|
577
|
+
"ai_blacklist": blacklist_summary,
|
|
578
|
+
"logs": logs_summary,
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
out_json = json.dumps(report, ensure_ascii=False, sort_keys=True) + "\n"
|
|
582
|
+
sys.stdout.write(out_json)
|
|
583
|
+
|
|
584
|
+
if not archive:
|
|
585
|
+
return
|
|
586
|
+
|
|
587
|
+
run_dir = os.path.join(os.path.abspath(runs_dir), run_id)
|
|
588
|
+
parent_dir = os.path.abspath(runs_dir)
|
|
589
|
+
_mkdir(parent_dir)
|
|
590
|
+
tmp_dir = tempfile.mkdtemp(dir=parent_dir)
|
|
591
|
+
|
|
592
|
+
try:
|
|
593
|
+
with open(os.path.join(tmp_dir, "config.json"), "w", encoding="utf-8") as f:
|
|
594
|
+
f.write(json.dumps(config_snapshot, ensure_ascii=False, sort_keys=True) + "\n")
|
|
595
|
+
with open(os.path.join(tmp_dir, "summary.json"), "w", encoding="utf-8") as f:
|
|
596
|
+
f.write(json.dumps(summary_metrics, ensure_ascii=False, sort_keys=True) + "\n")
|
|
597
|
+
with open(os.path.join(tmp_dir, "report.json"), "w", encoding="utf-8") as f:
|
|
598
|
+
f.write(out_json)
|
|
599
|
+
with open(os.path.join(tmp_dir, "report.md"), "w", encoding="utf-8") as f:
|
|
600
|
+
report_data = {
|
|
601
|
+
"metrics": summary_metrics,
|
|
602
|
+
"top_rules": top_rules[:10],
|
|
603
|
+
"continuity": continuity_summary,
|
|
604
|
+
"foreshadowing": foreshadow_summary,
|
|
605
|
+
"style_drift": style_summary,
|
|
606
|
+
}
|
|
607
|
+
f.write(_format_md_report(report_data))
|
|
608
|
+
os.rename(tmp_dir, run_dir)
|
|
609
|
+
except Exception:
|
|
610
|
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
|
611
|
+
raise
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
if __name__ == "__main__":
|
|
615
|
+
try:
|
|
616
|
+
main()
|
|
617
|
+
except SystemExit:
|
|
618
|
+
raise
|
|
619
|
+
except Exception as e:
|
|
620
|
+
sys.stderr.write(f"run-regression.sh: unexpected error: {e}\n")
|
|
621
|
+
raise SystemExit(2)
|