novel-writer-cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +103 -0
  3. package/agents/chapter-writer.md +142 -0
  4. package/agents/character-weaver.md +117 -0
  5. package/agents/consistency-auditor.md +85 -0
  6. package/agents/plot-architect.md +128 -0
  7. package/agents/quality-judge.md +232 -0
  8. package/agents/style-analyzer.md +109 -0
  9. package/agents/style-refiner.md +97 -0
  10. package/agents/summarizer.md +128 -0
  11. package/agents/world-builder.md +161 -0
  12. package/dist/__tests__/character-voice.test.js +445 -0
  13. package/dist/__tests__/commit-prototype-pollution.test.js +45 -0
  14. package/dist/__tests__/engagement.test.js +382 -0
  15. package/dist/__tests__/foreshadow-visibility.test.js +131 -0
  16. package/dist/__tests__/hook-ledger.test.js +1028 -0
  17. package/dist/__tests__/naming-lint.test.js +132 -0
  18. package/dist/__tests__/narrative-health-injection.test.js +359 -0
  19. package/dist/__tests__/next-step-prejudge-guardrails.test.js +325 -0
  20. package/dist/__tests__/next-step-title-fix.test.js +153 -0
  21. package/dist/__tests__/platform-profile.test.js +274 -0
  22. package/dist/__tests__/promise-ledger.test.js +189 -0
  23. package/dist/__tests__/readability-lint.test.js +209 -0
  24. package/dist/__tests__/text-utils.test.js +39 -0
  25. package/dist/__tests__/title-policy.test.js +147 -0
  26. package/dist/advance.js +75 -0
  27. package/dist/character-voice.js +805 -0
  28. package/dist/checkpoint.js +126 -0
  29. package/dist/cli.js +563 -0
  30. package/dist/cliche-lint.js +515 -0
  31. package/dist/commit.js +1460 -0
  32. package/dist/consistency-auditor.js +684 -0
  33. package/dist/engagement.js +687 -0
  34. package/dist/errors.js +7 -0
  35. package/dist/fingerprint.js +16 -0
  36. package/dist/foreshadow-visibility.js +214 -0
  37. package/dist/fs-utils.js +68 -0
  38. package/dist/hook-ledger.js +721 -0
  39. package/dist/hook-policy.js +107 -0
  40. package/dist/instruction-gates.js +51 -0
  41. package/dist/instructions.js +406 -0
  42. package/dist/latest-summary-loader.js +29 -0
  43. package/dist/lock.js +121 -0
  44. package/dist/naming-lint.js +531 -0
  45. package/dist/ner.js +73 -0
  46. package/dist/next-step.js +408 -0
  47. package/dist/novel-ask.js +270 -0
  48. package/dist/output.js +9 -0
  49. package/dist/platform-constraints.js +518 -0
  50. package/dist/platform-profile.js +325 -0
  51. package/dist/prejudge-guardrails.js +370 -0
  52. package/dist/project.js +40 -0
  53. package/dist/promise-ledger.js +723 -0
  54. package/dist/readability-lint.js +555 -0
  55. package/dist/safe-parse.js +36 -0
  56. package/dist/safe-path.js +29 -0
  57. package/dist/scoring-weights.js +290 -0
  58. package/dist/steps.js +60 -0
  59. package/dist/text-utils.js +18 -0
  60. package/dist/title-policy.js +251 -0
  61. package/dist/type-guards.js +6 -0
  62. package/dist/validate.js +131 -0
  63. package/docs/user/README.md +17 -0
  64. package/docs/user/guardrails.md +179 -0
  65. package/docs/user/interactive-gates.md +124 -0
  66. package/docs/user/novel-cli.md +289 -0
  67. package/docs/user/ops.md +123 -0
  68. package/docs/user/quick-start.md +97 -0
  69. package/docs/user/spec-system.md +166 -0
  70. package/docs/user/storylines.md +144 -0
  71. package/package.json +48 -0
  72. package/schemas/README.md +18 -0
  73. package/schemas/character-voice-drift.schema.json +135 -0
  74. package/schemas/character-voice-profiles.schema.json +141 -0
  75. package/schemas/engagement-metrics.schema.json +38 -0
  76. package/schemas/hook-ledger.schema.json +108 -0
  77. package/schemas/platform-profile.schema.json +235 -0
  78. package/schemas/promise-ledger.schema.json +97 -0
  79. package/scripts/calibrate-quality-judge.sh +91 -0
  80. package/scripts/compare-regression-runs.sh +86 -0
  81. package/scripts/lib/_common.py +131 -0
  82. package/scripts/lib/calibrate_quality_judge.py +312 -0
  83. package/scripts/lib/compare_regression_runs.py +142 -0
  84. package/scripts/lib/run_regression.py +621 -0
  85. package/scripts/lint-blacklist.sh +201 -0
  86. package/scripts/lint-cliche.sh +370 -0
  87. package/scripts/lint-readability.sh +404 -0
  88. package/scripts/query-foreshadow.sh +252 -0
  89. package/scripts/run-ner.sh +669 -0
  90. package/scripts/run-regression.sh +122 -0
  91. package/skills/cli-step/SKILL.md +158 -0
  92. package/skills/continue/SKILL.md +348 -0
  93. package/skills/continue/references/context-contracts.md +169 -0
  94. package/skills/continue/references/continuity-checks.md +187 -0
  95. package/skills/continue/references/file-protocols.md +64 -0
  96. package/skills/continue/references/foreshadowing.md +130 -0
  97. package/skills/continue/references/gate-decision.md +53 -0
  98. package/skills/continue/references/periodic-maintenance.md +46 -0
  99. package/skills/novel-writing/SKILL.md +77 -0
  100. package/skills/novel-writing/references/quality-rubric.md +140 -0
  101. package/skills/novel-writing/references/style-guide.md +145 -0
  102. package/skills/start/SKILL.md +458 -0
  103. package/skills/start/references/quality-review.md +86 -0
  104. package/skills/start/references/setting-update.md +44 -0
  105. package/skills/start/references/vol-planning.md +61 -0
  106. package/skills/start/references/vol-review.md +58 -0
  107. package/skills/status/SKILL.md +116 -0
  108. package/skills/status/references/sample-output.md +60 -0
  109. package/templates/ai-blacklist.json +79 -0
  110. package/templates/brief-template.md +46 -0
  111. package/templates/genre-weight-profiles.json +90 -0
  112. package/templates/novel-ask/example.answer.json +12 -0
  113. package/templates/novel-ask/example.question.json +51 -0
  114. package/templates/platform-profile.json +148 -0
  115. package/templates/style-profile-template.json +58 -0
  116. package/templates/web-novel-cliche-lint.json +41 -0
@@ -0,0 +1,621 @@
1
+ """Regression runner for M2 outputs (M3).
2
+
3
+ Extracted from the heredoc in scripts/run-regression.sh.
4
+ Reads existing project outputs (evaluations/logs/etc) and summarizes
5
+ regression-friendly metrics.
6
+ """
7
+
8
+ import json
9
+ import os
10
+ import re
11
+ import shutil
12
+ import sys
13
+ import tempfile
14
+ from datetime import datetime, timezone
15
+ from typing import Any, Dict, List, Optional, Tuple
16
+
17
+ import _common
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Local helpers (thin wrappers keeping "run-regression.sh:" prefix)
22
+ # ---------------------------------------------------------------------------
23
+
24
+ def _die(msg: str, exit_code: int = 1) -> None:
25
+ _common.die(f"run-regression.sh: {msg}", exit_code)
26
+
27
+
28
+ def _load_json(path: str) -> Any:
29
+ """Load JSON; return None on missing, die with prefix on parse error."""
30
+ try:
31
+ return _common.load_json(path, missing_ok=True)
32
+ except Exception as e:
33
+ _die(f"invalid JSON at {path}: {e}", 1)
34
+
35
+
36
+ def _timestamp_id() -> str:
37
+ now = datetime.now(timezone.utc)
38
+ return now.strftime("%Y%m%dT%H%M%S") + f"_{now.strftime('%f')[:4]}Z"
39
+
40
+
41
+ def _mkdir(path: str) -> None:
42
+ os.makedirs(path, exist_ok=True)
43
+
44
+
45
+ def _severity_rank(v: str) -> int:
46
+ return {"high": 0, "medium": 1, "low": 2}.get(v, 9)
47
+
48
+
49
+ def _summarize_continuity(report: Any) -> Optional[Dict[str, Any]]:
50
+ if report is None:
51
+ return None
52
+ if not isinstance(report, dict):
53
+ return {"error": "continuity latest.json is not an object"}
54
+ stats = report.get("stats") if isinstance(report.get("stats"), dict) else {}
55
+ issues = report.get("issues") if isinstance(report.get("issues"), list) else []
56
+ top_issues: List[Dict[str, Any]] = []
57
+ for it in issues[:50]:
58
+ if not isinstance(it, dict):
59
+ continue
60
+ top_issues.append(
61
+ {
62
+ "id": it.get("id"),
63
+ "type": it.get("type"),
64
+ "severity": it.get("severity"),
65
+ "confidence": it.get("confidence"),
66
+ "description": it.get("description"),
67
+ }
68
+ )
69
+ top_issues.sort(key=lambda x: (_severity_rank(str(x.get("severity"))), str(x.get("type")), str(x.get("id"))))
70
+ top_issues = top_issues[:5]
71
+ return {
72
+ "schema_version": report.get("schema_version"),
73
+ "generated_at": report.get("generated_at"),
74
+ "scope": report.get("scope"),
75
+ "volume": report.get("volume"),
76
+ "chapter_range": report.get("chapter_range"),
77
+ "stats": {
78
+ "chapters_checked": stats.get("chapters_checked"),
79
+ "issues_total": stats.get("issues_total"),
80
+ "issues_by_severity": stats.get("issues_by_severity"),
81
+ },
82
+ "top_issues": top_issues,
83
+ }
84
+
85
+
86
+ def _summarize_style_drift(obj: Any) -> Optional[Dict[str, Any]]:
87
+ if obj is None:
88
+ return None
89
+ if not isinstance(obj, dict):
90
+ return {"error": "style-drift.json is not an object"}
91
+ drifts = obj.get("drifts") if isinstance(obj.get("drifts"), list) else []
92
+ return {
93
+ "active": obj.get("active"),
94
+ "detected_chapter": obj.get("detected_chapter"),
95
+ "window": obj.get("window"),
96
+ "drifts_count": len(drifts),
97
+ }
98
+
99
+
100
+ def _as_range(value: Any) -> Optional[Tuple[int, int]]:
101
+ if not isinstance(value, list) or len(value) != 2:
102
+ return None
103
+ a = _common.as_int(value[0])
104
+ b = _common.as_int(value[1])
105
+ if a is None or b is None:
106
+ return None
107
+ if a < 1 or b < 1 or a > b:
108
+ return None
109
+ return (a, b)
110
+
111
+
112
+ def _foreshadow_overdue_short(item: Dict[str, Any], last_completed_chapter: int) -> bool:
113
+ if item.get("scope") != "short":
114
+ return False
115
+ if item.get("status") == "resolved":
116
+ return False
117
+ r = _as_range(item.get("target_resolve_range"))
118
+ if r is None:
119
+ return False
120
+ return last_completed_chapter > r[1]
121
+
122
+
123
+ def _summarize_foreshadowing(project_dir: str, last_completed_chapter: int) -> Optional[Dict[str, Any]]:
124
+ global_path = os.path.join(project_dir, "foreshadowing", "global.json")
125
+ global_obj = _load_json(global_path)
126
+ if global_obj is None:
127
+ return None
128
+ if isinstance(global_obj, list):
129
+ items = global_obj
130
+ elif isinstance(global_obj, dict) and isinstance(global_obj.get("foreshadowing"), list):
131
+ items = global_obj["foreshadowing"]
132
+ else:
133
+ return {"error": "foreshadowing/global.json has unsupported schema (expected list or {foreshadowing:[]})"}
134
+
135
+ normalized: List[Dict[str, Any]] = [it for it in items if isinstance(it, dict)]
136
+ active = [it for it in normalized if it.get("status") != "resolved"]
137
+ resolved = [it for it in normalized if it.get("status") == "resolved"]
138
+ overdue = [it for it in normalized if _foreshadow_overdue_short(it, last_completed_chapter)]
139
+
140
+ overdue_ids = []
141
+ for it in overdue:
142
+ fid = _common.as_str(it.get("id"))
143
+ if fid:
144
+ overdue_ids.append(fid)
145
+
146
+ # Optional plan alignment using checkpoint current_volume.
147
+ plan_stats = None
148
+ ck = _load_json(os.path.join(project_dir, ".checkpoint.json"))
149
+ vol = ck.get("current_volume") if isinstance(ck, dict) else None
150
+ vol_int = _common.as_int(vol)
151
+ if vol_int is not None and vol_int >= 1:
152
+ plan_path = os.path.join(project_dir, "volumes", f"vol-{vol_int:02d}", "foreshadowing.json")
153
+ plan_obj = _load_json(plan_path)
154
+ plan_items: List[Dict[str, Any]] = []
155
+ if isinstance(plan_obj, dict) and isinstance(plan_obj.get("foreshadowing"), list):
156
+ plan_items = [it for it in plan_obj["foreshadowing"] if isinstance(it, dict)]
157
+ elif isinstance(plan_obj, list):
158
+ plan_items = [it for it in plan_obj if isinstance(it, dict)]
159
+
160
+ if plan_items:
161
+ global_ids = {str(_common.as_str(it.get("id")) or "") for it in normalized}
162
+ planned_ids = [str(_common.as_str(it.get("id")) or "") for it in plan_items if _common.as_str(it.get("id"))]
163
+ planned_total = len(planned_ids)
164
+ missing_in_global = [pid for pid in planned_ids if pid not in global_ids]
165
+
166
+ resolved_in_global = 0
167
+ pending_in_global = 0
168
+ global_by_id = {str(_common.as_str(it.get("id")) or ""): it for it in normalized if _common.as_str(it.get("id"))}
169
+ for pid in planned_ids:
170
+ it = global_by_id.get(pid)
171
+ if not it:
172
+ continue
173
+ if it.get("status") == "resolved":
174
+ resolved_in_global += 1
175
+ else:
176
+ pending_in_global += 1
177
+
178
+ plan_stats = {
179
+ "path": plan_path,
180
+ "planned_total": planned_total,
181
+ "resolved_in_global": resolved_in_global,
182
+ "pending_in_global": pending_in_global,
183
+ "missing_in_global": missing_in_global[:50],
184
+ }
185
+
186
+ return {
187
+ "global_path": global_path,
188
+ "items_total": len(normalized),
189
+ "active_count": len(active),
190
+ "resolved_count": len(resolved),
191
+ "overdue_short_count": len(overdue),
192
+ "overdue_short_ids": overdue_ids[:50],
193
+ "plan_alignment": plan_stats,
194
+ }
195
+
196
+
197
+ def _summarize_ai_blacklist(project_dir: str) -> Optional[Dict[str, Any]]:
198
+ path = os.path.join(project_dir, "ai-blacklist.json")
199
+ obj = _load_json(path)
200
+ if obj is None:
201
+ return None
202
+ if not isinstance(obj, dict):
203
+ return {"error": "ai-blacklist.json is not an object"}
204
+ words = obj.get("words") if isinstance(obj.get("words"), list) else []
205
+ whitelist_words = []
206
+ whitelist = obj.get("whitelist")
207
+ if isinstance(whitelist, list):
208
+ whitelist_words = [w for w in whitelist if isinstance(w, str)]
209
+ elif isinstance(whitelist, dict) and isinstance(whitelist.get("words"), list):
210
+ whitelist_words = [w for w in whitelist["words"] if isinstance(w, str)]
211
+ return {
212
+ "version": obj.get("version"),
213
+ "last_updated": obj.get("last_updated"),
214
+ "words_count": len(words),
215
+ "whitelist_words_count": len(whitelist_words),
216
+ "path": path,
217
+ }
218
+
219
+
220
+ def _summarize_logs(project_dir: str) -> Dict[str, Any]:
221
+ logs_dir = os.path.join(project_dir, "logs")
222
+ if not os.path.isdir(logs_dir):
223
+ return {"present": False}
224
+ log_files = []
225
+ for name in os.listdir(logs_dir):
226
+ if re.match(r"^chapter-\d+-log\.json$", name):
227
+ log_files.append(os.path.join(logs_dir, name))
228
+ log_files.sort()
229
+
230
+ stages_by_model: Dict[str, int] = {}
231
+ judge_models: Dict[str, int] = {}
232
+ gate_decisions: Dict[str, int] = {}
233
+ revisions_sum = 0
234
+ force_passed_count = 0
235
+
236
+ for path in log_files:
237
+ obj = _load_json(path)
238
+ if not isinstance(obj, dict):
239
+ continue
240
+ gate = _common.as_str(obj.get("gate_decision")) or "unknown"
241
+ gate_decisions[gate] = gate_decisions.get(gate, 0) + 1
242
+
243
+ rev = _common.as_int(obj.get("revisions"))
244
+ if rev is not None:
245
+ revisions_sum += rev
246
+ if obj.get("force_passed") is True:
247
+ force_passed_count += 1
248
+
249
+ stages = obj.get("stages")
250
+ if isinstance(stages, list):
251
+ for st in stages:
252
+ if not isinstance(st, dict):
253
+ continue
254
+ m = _common.as_str(st.get("model"))
255
+ if m:
256
+ stages_by_model[m] = stages_by_model.get(m, 0) + 1
257
+
258
+ judges = obj.get("judges")
259
+ if isinstance(judges, dict):
260
+ primary = judges.get("primary")
261
+ if isinstance(primary, dict):
262
+ m = _common.as_str(primary.get("model"))
263
+ if m:
264
+ judge_models[m] = judge_models.get(m, 0) + 1
265
+ secondary = judges.get("secondary")
266
+ if isinstance(secondary, dict):
267
+ m = _common.as_str(secondary.get("model"))
268
+ if m:
269
+ judge_models[m] = judge_models.get(m, 0) + 1
270
+
271
+ return {
272
+ "present": True,
273
+ "chapter_logs_count": len(log_files),
274
+ "gate_decisions": dict(sorted(gate_decisions.items())),
275
+ "revisions_sum": revisions_sum,
276
+ "force_passed_count": force_passed_count,
277
+ "stages_by_model": dict(sorted(stages_by_model.items())),
278
+ "judge_models": dict(sorted(judge_models.items())),
279
+ }
280
+
281
+
282
+ def _get_rule_id(layer: str, item: Dict[str, Any]) -> str:
283
+ if layer == "L1":
284
+ return _common.as_str(item.get("rule_id")) or "UNKNOWN"
285
+ if layer == "L2":
286
+ return _common.as_str(item.get("contract_id")) or _common.as_str(item.get("rule_id")) or "UNKNOWN"
287
+ if layer == "L3":
288
+ return _common.as_str(item.get("objective_id")) or _common.as_str(item.get("rule_id")) or "UNKNOWN"
289
+ if layer == "LS":
290
+ return _common.as_str(item.get("rule_id")) or "UNKNOWN"
291
+ return _common.as_str(item.get("rule_id")) or "UNKNOWN"
292
+
293
+
294
+ def _norm_confidence(v: Any) -> str:
295
+ s = _common.as_str(v)
296
+ if not s:
297
+ return "unknown"
298
+ s = s.lower()
299
+ if s in {"high", "medium", "low"}:
300
+ return s
301
+ return "unknown"
302
+
303
+
304
+ def _norm_status(v: Any) -> str:
305
+ s = _common.as_str(v)
306
+ if not s:
307
+ return "unknown"
308
+ return s.lower()
309
+
310
+
311
+ def _is_violation_status(status: str) -> bool:
312
+ return status in {"violation", "violation_suspected"}
313
+
314
+
315
+ def _is_high_conf_violation(layer: str, item: Dict[str, Any]) -> bool:
316
+ status = _norm_status(item.get("status"))
317
+ if status != "violation":
318
+ return False
319
+ conf = _norm_confidence(item.get("confidence"))
320
+ if conf != "high":
321
+ return False
322
+ if layer != "LS":
323
+ return True
324
+ constraint_type = _common.as_str(item.get("constraint_type"))
325
+ if constraint_type is None or constraint_type == "hard":
326
+ return True
327
+ return False
328
+
329
+
330
+ def _format_md_report(data: Dict[str, Any]) -> str:
331
+ summary = data.get("metrics", {})
332
+ lines: List[str] = []
333
+ lines.append(f"# Regression Summary ({summary.get('run_id')})")
334
+ lines.append("")
335
+ lines.append(f"- Project: `{summary.get('project_path')}`")
336
+ lines.append(f"- Generated at: `{summary.get('generated_at')}`")
337
+ lines.append("")
338
+ lines.append("## Spec+LS Compliance")
339
+ lines.append("")
340
+ comp = summary.get("compliance", {})
341
+ lines.append(f"- Chapters: {comp.get('chapters_total')}")
342
+ lines.append(f"- Compliance (high-confidence gate): {comp.get('compliance_rate_high_confidence')}")
343
+ lines.append(f"- Chapters w/ high-confidence violations: {comp.get('chapters_with_high_confidence_violation')}")
344
+ lines.append(f"- Compliance (any violation status): {comp.get('compliance_rate_any_violation')}")
345
+ lines.append(f"- Chapters w/ any violations: {comp.get('chapters_with_any_violation')}")
346
+ lines.append("")
347
+
348
+ if isinstance(data.get("top_rules"), list) and data["top_rules"]:
349
+ lines.append("## Top Violated Rules (any confidence)")
350
+ lines.append("")
351
+ for it in data["top_rules"][:10]:
352
+ lines.append(f"- {it.get('layer')} {it.get('rule_id')}: {it.get('count')}")
353
+ lines.append("")
354
+
355
+ if data.get("continuity") is not None:
356
+ lines.append("## Continuity (logs/continuity/latest.json)")
357
+ lines.append("")
358
+ stats = data["continuity"].get("stats", {}) if isinstance(data["continuity"], dict) else {}
359
+ lines.append(f"- issues_total: {stats.get('issues_total')}")
360
+ lines.append(f"- issues_by_severity: {stats.get('issues_by_severity')}")
361
+ lines.append("")
362
+
363
+ if data.get("foreshadowing") is not None:
364
+ lines.append("## Foreshadowing (foreshadowing/global.json)")
365
+ lines.append("")
366
+ fs = data["foreshadowing"]
367
+ lines.append(f"- active: {fs.get('active_count')} / total: {fs.get('items_total')} / resolved: {fs.get('resolved_count')}")
368
+ lines.append(f"- overdue_short: {fs.get('overdue_short_count')}")
369
+ lines.append("")
370
+
371
+ if data.get("style_drift") is not None:
372
+ lines.append("## Style Drift (style-drift.json)")
373
+ lines.append("")
374
+ sd = data["style_drift"]
375
+ lines.append(f"- active: {sd.get('active')} / drifts_count: {sd.get('drifts_count')}")
376
+ lines.append("")
377
+
378
+ return "\n".join(lines).rstrip() + "\n"
379
+
380
+
381
+ def main() -> None:
382
+ project_dir = sys.argv[1]
383
+ labels_path = sys.argv[2].strip()
384
+ runs_dir = sys.argv[3]
385
+ archive = int(sys.argv[4]) == 1
386
+ include_continuity = int(sys.argv[5]) == 1
387
+ include_foreshadowing = int(sys.argv[6]) == 1
388
+ include_style = int(sys.argv[7]) == 1
389
+
390
+ project_dir_abs = os.path.abspath(project_dir)
391
+
392
+ eval_dir = os.path.join(project_dir_abs, "evaluations")
393
+ eval_items = _common.find_eval_files(eval_dir)
394
+ if not eval_items:
395
+ _die(f"no evaluations found under {project_dir_abs}/evaluations", 1)
396
+
397
+ chapters_total = len(eval_items)
398
+ chapters = [c for c, _ in eval_items]
399
+
400
+ # Determine last_completed_chapter for overdue logic.
401
+ checkpoint = _load_json(os.path.join(project_dir_abs, ".checkpoint.json"))
402
+ last_completed = None
403
+ if isinstance(checkpoint, dict):
404
+ last_completed = _common.as_int(checkpoint.get("last_completed_chapter"))
405
+ if last_completed is None:
406
+ last_completed = max(chapters)
407
+
408
+ # Spec+LS compliance aggregation.
409
+ violations_total = 0
410
+ chapters_with_any_violation = set()
411
+ chapters_with_high_conf_violation = set()
412
+
413
+ violations_by_conf: Dict[str, int] = {"high": 0, "medium": 0, "low": 0, "unknown": 0}
414
+ violations_by_layer: Dict[str, int] = {"L1": 0, "L2": 0, "L3": 0, "LS": 0, "unknown": 0}
415
+
416
+ by_rule: Dict[str, Dict[str, Dict[str, int]]] = {} # layer -> rule_id -> confidence -> count
417
+
418
+ overall_scores: List[float] = []
419
+ dim_sums: Dict[str, float] = {}
420
+ dim_counts: Dict[str, int] = {}
421
+
422
+ for chapter, path in eval_items:
423
+ obj = _load_json(path)
424
+ if not isinstance(obj, dict):
425
+ continue
426
+
427
+ overall = _common.extract_overall(obj)
428
+ if overall is not None:
429
+ overall_scores.append(overall)
430
+
431
+ scores = _common.extract_dimension_scores(obj)
432
+ for k, v in scores.items():
433
+ dim_sums[k] = dim_sums.get(k, 0.0) + float(v)
434
+ dim_counts[k] = dim_counts.get(k, 0) + 1
435
+
436
+ cv = _common.extract_contract_verification(obj)
437
+ layer_map = {
438
+ "L1": cv.get("l1_checks"),
439
+ "L2": cv.get("l2_checks"),
440
+ "L3": cv.get("l3_checks"),
441
+ "LS": cv.get("ls_checks"),
442
+ }
443
+
444
+ chapter_any_violation = False
445
+ chapter_high_violation = False
446
+
447
+ for layer, checks in layer_map.items():
448
+ if not isinstance(checks, list):
449
+ continue
450
+ for it in checks:
451
+ if not isinstance(it, dict):
452
+ continue
453
+ status = _norm_status(it.get("status"))
454
+ conf = _norm_confidence(it.get("confidence"))
455
+
456
+ if _is_violation_status(status):
457
+ chapter_any_violation = True
458
+ violations_total += 1
459
+ violations_by_conf[conf] = violations_by_conf.get(conf, 0) + 1
460
+ violations_by_layer[layer] = violations_by_layer.get(layer, 0) + 1
461
+
462
+ rule_id = _get_rule_id(layer, it)
463
+ by_rule.setdefault(layer, {}).setdefault(rule_id, {}).setdefault(conf, 0)
464
+ by_rule[layer][rule_id][conf] += 1
465
+
466
+ if _is_high_conf_violation(layer, it):
467
+ chapter_high_violation = True
468
+
469
+ if chapter_any_violation:
470
+ chapters_with_any_violation.add(chapter)
471
+ if chapter_high_violation:
472
+ chapters_with_high_conf_violation.add(chapter)
473
+
474
+ def _rate(ok: int, total: int) -> float:
475
+ if total <= 0:
476
+ return 0.0
477
+ return ok / total
478
+
479
+ compliance_rate_any = _rate(chapters_total - len(chapters_with_any_violation), chapters_total)
480
+ compliance_rate_high = _rate(chapters_total - len(chapters_with_high_conf_violation), chapters_total)
481
+
482
+ # Top rules list for quick view.
483
+ top_rules: List[Dict[str, Any]] = []
484
+ for layer in sorted(by_rule.keys()):
485
+ for rule_id, conf_map in by_rule[layer].items():
486
+ top_rules.append({"layer": layer, "rule_id": rule_id, "count": sum(conf_map.values())})
487
+ top_rules.sort(key=lambda x: (-int(x["count"]), str(x["layer"]), str(x["rule_id"])))
488
+
489
+ score_summary = None
490
+ if overall_scores:
491
+ score_summary = {
492
+ "n": len(overall_scores),
493
+ "mean": round(sum(overall_scores) / len(overall_scores), 4),
494
+ "min": round(min(overall_scores), 4),
495
+ "max": round(max(overall_scores), 4),
496
+ }
497
+
498
+ dim_summary = {}
499
+ for k in sorted(dim_sums.keys()):
500
+ cnt = dim_counts.get(k, 0)
501
+ if cnt <= 0:
502
+ continue
503
+ dim_summary[k] = {"n": cnt, "mean": round(dim_sums[k] / cnt, 4)}
504
+
505
+ continuity_summary = None
506
+ if include_continuity:
507
+ continuity_summary = _summarize_continuity(_load_json(os.path.join(project_dir_abs, "logs", "continuity", "latest.json")))
508
+
509
+ foreshadow_summary = None
510
+ if include_foreshadowing:
511
+ foreshadow_summary = _summarize_foreshadowing(project_dir_abs, last_completed)
512
+
513
+ style_summary = None
514
+ if include_style:
515
+ style_summary = _summarize_style_drift(_load_json(os.path.join(project_dir_abs, "style-drift.json")))
516
+
517
+ blacklist_summary = _summarize_ai_blacklist(project_dir_abs)
518
+ logs_summary = _summarize_logs(project_dir_abs)
519
+
520
+ run_id = _timestamp_id()
521
+ generated_at = _common.iso_utc_now()
522
+
523
+ config_snapshot = {
524
+ "schema_version": 1,
525
+ "run_id": run_id,
526
+ "generated_at": generated_at,
527
+ "project_path": project_dir_abs,
528
+ "labels_path": os.path.abspath(labels_path) if labels_path else None,
529
+ "enabled_checks": {
530
+ "continuity_latest_json": bool(include_continuity),
531
+ "foreshadowing_global_json": bool(include_foreshadowing),
532
+ "style_drift_json": bool(include_style),
533
+ },
534
+ # pause_for_user_force_rewrite is implicit (<2.0), no threshold to calibrate
535
+ "gate_thresholds_defaults": {"pass": 4.0, "polish": 3.5, "revise": 3.0, "pause_for_user": 2.0},
536
+ }
537
+
538
+ summary_metrics = {
539
+ "schema_version": 1,
540
+ "run_id": run_id,
541
+ "generated_at": generated_at,
542
+ "project_path": project_dir_abs,
543
+ "chapters_total": chapters_total,
544
+ "chapter_range": [min(chapters), max(chapters)],
545
+ "compliance": {
546
+ "chapters_total": chapters_total,
547
+ "chapters_with_any_violation": len(chapters_with_any_violation),
548
+ "chapters_with_high_confidence_violation": len(chapters_with_high_conf_violation),
549
+ "compliance_rate_any_violation": round(compliance_rate_any, 6),
550
+ "compliance_rate_high_confidence": round(compliance_rate_high, 6),
551
+ },
552
+ "violations_total": violations_total,
553
+ "violations_by_confidence": violations_by_conf,
554
+ "violations_by_layer": violations_by_layer,
555
+ "score_overall": score_summary,
556
+ "score_dimensions": dim_summary,
557
+ }
558
+
559
+ report = {
560
+ "schema_version": 1,
561
+ "run_id": run_id,
562
+ "generated_at": generated_at,
563
+ "project_path": project_dir_abs,
564
+ "labels_path": os.path.abspath(labels_path) if labels_path else None,
565
+ "chapter_ids": chapters,
566
+ "checkpoint": checkpoint if isinstance(checkpoint, dict) else None,
567
+ "spec_ls": {
568
+ "violations_total": violations_total,
569
+ "chapters_with_any_violation": sorted(list(chapters_with_any_violation)),
570
+ "chapters_with_high_confidence_violation": sorted(list(chapters_with_high_conf_violation)),
571
+ "violations_by_layer_rule_confidence": by_rule,
572
+ "top_rules": top_rules[:50],
573
+ },
574
+ "continuity": continuity_summary,
575
+ "foreshadowing": foreshadow_summary,
576
+ "style_drift": style_summary,
577
+ "ai_blacklist": blacklist_summary,
578
+ "logs": logs_summary,
579
+ }
580
+
581
+ out_json = json.dumps(report, ensure_ascii=False, sort_keys=True) + "\n"
582
+ sys.stdout.write(out_json)
583
+
584
+ if not archive:
585
+ return
586
+
587
+ run_dir = os.path.join(os.path.abspath(runs_dir), run_id)
588
+ parent_dir = os.path.abspath(runs_dir)
589
+ _mkdir(parent_dir)
590
+ tmp_dir = tempfile.mkdtemp(dir=parent_dir)
591
+
592
+ try:
593
+ with open(os.path.join(tmp_dir, "config.json"), "w", encoding="utf-8") as f:
594
+ f.write(json.dumps(config_snapshot, ensure_ascii=False, sort_keys=True) + "\n")
595
+ with open(os.path.join(tmp_dir, "summary.json"), "w", encoding="utf-8") as f:
596
+ f.write(json.dumps(summary_metrics, ensure_ascii=False, sort_keys=True) + "\n")
597
+ with open(os.path.join(tmp_dir, "report.json"), "w", encoding="utf-8") as f:
598
+ f.write(out_json)
599
+ with open(os.path.join(tmp_dir, "report.md"), "w", encoding="utf-8") as f:
600
+ report_data = {
601
+ "metrics": summary_metrics,
602
+ "top_rules": top_rules[:10],
603
+ "continuity": continuity_summary,
604
+ "foreshadowing": foreshadow_summary,
605
+ "style_drift": style_summary,
606
+ }
607
+ f.write(_format_md_report(report_data))
608
+ os.rename(tmp_dir, run_dir)
609
+ except Exception:
610
+ shutil.rmtree(tmp_dir, ignore_errors=True)
611
+ raise
612
+
613
+
614
+ if __name__ == "__main__":
615
+ try:
616
+ main()
617
+ except SystemExit:
618
+ raise
619
+ except Exception as e:
620
+ sys.stderr.write(f"run-regression.sh: unexpected error: {e}\n")
621
+ raise SystemExit(2)