novel-writer-cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +103 -0
  3. package/agents/chapter-writer.md +142 -0
  4. package/agents/character-weaver.md +117 -0
  5. package/agents/consistency-auditor.md +85 -0
  6. package/agents/plot-architect.md +128 -0
  7. package/agents/quality-judge.md +232 -0
  8. package/agents/style-analyzer.md +109 -0
  9. package/agents/style-refiner.md +97 -0
  10. package/agents/summarizer.md +128 -0
  11. package/agents/world-builder.md +161 -0
  12. package/dist/__tests__/character-voice.test.js +445 -0
  13. package/dist/__tests__/commit-prototype-pollution.test.js +45 -0
  14. package/dist/__tests__/engagement.test.js +382 -0
  15. package/dist/__tests__/foreshadow-visibility.test.js +131 -0
  16. package/dist/__tests__/hook-ledger.test.js +1028 -0
  17. package/dist/__tests__/naming-lint.test.js +132 -0
  18. package/dist/__tests__/narrative-health-injection.test.js +359 -0
  19. package/dist/__tests__/next-step-prejudge-guardrails.test.js +325 -0
  20. package/dist/__tests__/next-step-title-fix.test.js +153 -0
  21. package/dist/__tests__/platform-profile.test.js +274 -0
  22. package/dist/__tests__/promise-ledger.test.js +189 -0
  23. package/dist/__tests__/readability-lint.test.js +209 -0
  24. package/dist/__tests__/text-utils.test.js +39 -0
  25. package/dist/__tests__/title-policy.test.js +147 -0
  26. package/dist/advance.js +75 -0
  27. package/dist/character-voice.js +805 -0
  28. package/dist/checkpoint.js +126 -0
  29. package/dist/cli.js +563 -0
  30. package/dist/cliche-lint.js +515 -0
  31. package/dist/commit.js +1460 -0
  32. package/dist/consistency-auditor.js +684 -0
  33. package/dist/engagement.js +687 -0
  34. package/dist/errors.js +7 -0
  35. package/dist/fingerprint.js +16 -0
  36. package/dist/foreshadow-visibility.js +214 -0
  37. package/dist/fs-utils.js +68 -0
  38. package/dist/hook-ledger.js +721 -0
  39. package/dist/hook-policy.js +107 -0
  40. package/dist/instruction-gates.js +51 -0
  41. package/dist/instructions.js +406 -0
  42. package/dist/latest-summary-loader.js +29 -0
  43. package/dist/lock.js +121 -0
  44. package/dist/naming-lint.js +531 -0
  45. package/dist/ner.js +73 -0
  46. package/dist/next-step.js +408 -0
  47. package/dist/novel-ask.js +270 -0
  48. package/dist/output.js +9 -0
  49. package/dist/platform-constraints.js +518 -0
  50. package/dist/platform-profile.js +325 -0
  51. package/dist/prejudge-guardrails.js +370 -0
  52. package/dist/project.js +40 -0
  53. package/dist/promise-ledger.js +723 -0
  54. package/dist/readability-lint.js +555 -0
  55. package/dist/safe-parse.js +36 -0
  56. package/dist/safe-path.js +29 -0
  57. package/dist/scoring-weights.js +290 -0
  58. package/dist/steps.js +60 -0
  59. package/dist/text-utils.js +18 -0
  60. package/dist/title-policy.js +251 -0
  61. package/dist/type-guards.js +6 -0
  62. package/dist/validate.js +131 -0
  63. package/docs/user/README.md +17 -0
  64. package/docs/user/guardrails.md +179 -0
  65. package/docs/user/interactive-gates.md +124 -0
  66. package/docs/user/novel-cli.md +289 -0
  67. package/docs/user/ops.md +123 -0
  68. package/docs/user/quick-start.md +97 -0
  69. package/docs/user/spec-system.md +166 -0
  70. package/docs/user/storylines.md +144 -0
  71. package/package.json +48 -0
  72. package/schemas/README.md +18 -0
  73. package/schemas/character-voice-drift.schema.json +135 -0
  74. package/schemas/character-voice-profiles.schema.json +141 -0
  75. package/schemas/engagement-metrics.schema.json +38 -0
  76. package/schemas/hook-ledger.schema.json +108 -0
  77. package/schemas/platform-profile.schema.json +235 -0
  78. package/schemas/promise-ledger.schema.json +97 -0
  79. package/scripts/calibrate-quality-judge.sh +91 -0
  80. package/scripts/compare-regression-runs.sh +86 -0
  81. package/scripts/lib/_common.py +131 -0
  82. package/scripts/lib/calibrate_quality_judge.py +312 -0
  83. package/scripts/lib/compare_regression_runs.py +142 -0
  84. package/scripts/lib/run_regression.py +621 -0
  85. package/scripts/lint-blacklist.sh +201 -0
  86. package/scripts/lint-cliche.sh +370 -0
  87. package/scripts/lint-readability.sh +404 -0
  88. package/scripts/query-foreshadow.sh +252 -0
  89. package/scripts/run-ner.sh +669 -0
  90. package/scripts/run-regression.sh +122 -0
  91. package/skills/cli-step/SKILL.md +158 -0
  92. package/skills/continue/SKILL.md +348 -0
  93. package/skills/continue/references/context-contracts.md +169 -0
  94. package/skills/continue/references/continuity-checks.md +187 -0
  95. package/skills/continue/references/file-protocols.md +64 -0
  96. package/skills/continue/references/foreshadowing.md +130 -0
  97. package/skills/continue/references/gate-decision.md +53 -0
  98. package/skills/continue/references/periodic-maintenance.md +46 -0
  99. package/skills/novel-writing/SKILL.md +77 -0
  100. package/skills/novel-writing/references/quality-rubric.md +140 -0
  101. package/skills/novel-writing/references/style-guide.md +145 -0
  102. package/skills/start/SKILL.md +458 -0
  103. package/skills/start/references/quality-review.md +86 -0
  104. package/skills/start/references/setting-update.md +44 -0
  105. package/skills/start/references/vol-planning.md +61 -0
  106. package/skills/start/references/vol-review.md +58 -0
  107. package/skills/status/SKILL.md +116 -0
  108. package/skills/status/references/sample-output.md +60 -0
  109. package/templates/ai-blacklist.json +79 -0
  110. package/templates/brief-template.md +46 -0
  111. package/templates/genre-weight-profiles.json +90 -0
  112. package/templates/novel-ask/example.answer.json +12 -0
  113. package/templates/novel-ask/example.question.json +51 -0
  114. package/templates/platform-profile.json +148 -0
  115. package/templates/style-profile-template.json +58 -0
  116. package/templates/web-novel-cliche-lint.json +41 -0
@@ -0,0 +1,235 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "schema_version": 1,
4
+ "title": "platform-profile.json",
5
+ "type": "object",
6
+ "required": [
7
+ "schema_version",
8
+ "platform",
9
+ "created_at",
10
+ "word_count",
11
+ "hook_policy",
12
+ "info_load",
13
+ "compliance",
14
+ "scoring"
15
+ ],
16
+ "additionalProperties": false,
17
+ "patternProperties": {
18
+ "^_.*$": { "$ref": "#/$defs/comment_value" }
19
+ },
20
+ "properties": {
21
+ "$schema": {
22
+ "type": "string",
23
+ "description": "Optional schema pointer for editor tooling. Example: schemas/platform-profile.schema.json"
24
+ },
25
+ "schema_version": {
26
+ "type": "integer",
27
+ "enum": [1],
28
+ "description": "Schema version of platform-profile.json (SSOT in schemas/platform-profile.schema.json)."
29
+ },
30
+ "platform": {
31
+ "type": "string",
32
+ "enum": ["qidian", "tomato"],
33
+ "description": "Immutable platform binding chosen at init."
34
+ },
35
+ "created_at": {
36
+ "type": "string",
37
+ "format": "date-time",
38
+ "description": "ISO-8601 timestamp when the profile was created."
39
+ },
40
+
41
+ "word_count": { "$ref": "#/$defs/word_count_policy" },
42
+ "hook_policy": { "$ref": "#/$defs/hook_policy" },
43
+ "info_load": { "$ref": "#/$defs/info_load_policy" },
44
+ "compliance": { "$ref": "#/$defs/compliance_policy" },
45
+ "scoring": { "$ref": "#/$defs/scoring_policy" },
46
+
47
+ "retention": { "$ref": "#/$defs/retention_policy" },
48
+ "readability": { "$ref": "#/$defs/readability_policy" },
49
+ "naming": { "$ref": "#/$defs/naming_policy" }
50
+ },
51
+ "$defs": {
52
+ "comment_value": {
53
+ "description": "Allows _comment/_note style metadata fields for human readability.",
54
+ "type": ["string", "number", "integer", "boolean", "object", "array", "null"]
55
+ },
56
+ "severity_policy": {
57
+ "type": "string",
58
+ "enum": ["warn", "soft", "hard"],
59
+ "description": "warn: advisory; soft: requires revision unless overridden; hard: blocks progression when enabled."
60
+ },
61
+ "genre_drive_type": {
62
+ "type": "string",
63
+ "enum": ["plot", "character", "suspense", "slice_of_life"]
64
+ },
65
+ "word_count_policy": {
66
+ "type": "object",
67
+ "required": ["target_min", "target_max", "hard_min", "hard_max"],
68
+ "additionalProperties": false,
69
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
70
+ "properties": {
71
+ "target_min": { "type": "integer", "minimum": 0 },
72
+ "target_max": { "type": "integer", "minimum": 0 },
73
+ "hard_min": { "type": "integer", "minimum": 0 },
74
+ "hard_max": { "type": "integer", "minimum": 0 }
75
+ }
76
+ },
77
+ "hook_policy": {
78
+ "type": "object",
79
+ "required": ["required", "min_strength", "allowed_types", "fix_strategy"],
80
+ "additionalProperties": false,
81
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
82
+ "properties": {
83
+ "required": { "type": "boolean" },
84
+ "min_strength": { "type": "integer", "minimum": 1, "maximum": 5 },
85
+ "allowed_types": {
86
+ "type": "array",
87
+ "items": { "type": "string", "minLength": 1 },
88
+ "minItems": 1,
89
+ "uniqueItems": true
90
+ },
91
+ "fix_strategy": {
92
+ "type": "string",
93
+ "enum": ["hook-fix"],
94
+ "description": "Bounded enum; unknown values MUST fail validation."
95
+ }
96
+ }
97
+ },
98
+ "info_load_policy": {
99
+ "type": "object",
100
+ "required": ["max_new_entities_per_chapter", "max_unknown_entities_per_chapter", "max_new_terms_per_1k_words"],
101
+ "additionalProperties": false,
102
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
103
+ "properties": {
104
+ "max_new_entities_per_chapter": { "type": "integer", "minimum": 0 },
105
+ "max_unknown_entities_per_chapter": { "type": "integer", "minimum": 0 },
106
+ "max_new_terms_per_1k_words": { "type": "integer", "minimum": 0 }
107
+ }
108
+ },
109
+ "compliance_policy": {
110
+ "type": "object",
111
+ "required": ["banned_words", "duplicate_name_policy"],
112
+ "additionalProperties": false,
113
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
114
+ "properties": {
115
+ "banned_words": {
116
+ "type": "array",
117
+ "items": { "type": "string", "minLength": 1 },
118
+ "uniqueItems": true
119
+ },
120
+ "duplicate_name_policy": { "$ref": "#/$defs/severity_policy" },
121
+ "script_paths": {
122
+ "type": "object",
123
+ "description": "Optional mapping for deterministic linters (tool_name -> relative path).",
124
+ "additionalProperties": { "type": "string", "minLength": 1 }
125
+ }
126
+ }
127
+ },
128
+ "scoring_policy": {
129
+ "type": "object",
130
+ "required": ["genre_drive_type", "weight_profile_id"],
131
+ "additionalProperties": false,
132
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
133
+ "properties": {
134
+ "genre_drive_type": { "$ref": "#/$defs/genre_drive_type" },
135
+ "weight_profile_id": { "type": "string", "minLength": 1 },
136
+ "weight_overrides": {
137
+ "type": "object",
138
+ "description": "Optional per-dimension weight overrides (dimension_name -> weight).",
139
+ "additionalProperties": { "type": "number", "minimum": 0 }
140
+ }
141
+ }
142
+ },
143
+ "retention_policy": {
144
+ "type": ["object", "null"],
145
+ "required": ["title_policy", "hook_ledger"],
146
+ "additionalProperties": false,
147
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
148
+ "properties": {
149
+ "title_policy": { "$ref": "#/$defs/title_policy" },
150
+ "hook_ledger": { "$ref": "#/$defs/hook_ledger_policy" }
151
+ }
152
+ },
153
+ "title_policy": {
154
+ "type": "object",
155
+ "required": ["enabled", "min_chars", "max_chars", "forbidden_patterns", "auto_fix"],
156
+ "additionalProperties": false,
157
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
158
+ "properties": {
159
+ "enabled": { "type": "boolean" },
160
+ "min_chars": { "type": "integer", "minimum": 0 },
161
+ "max_chars": { "type": "integer", "minimum": 0 },
162
+ "forbidden_patterns": { "type": "array", "items": { "type": "string", "minLength": 1 } },
163
+ "required_patterns": { "type": "array", "items": { "type": "string", "minLength": 1 } },
164
+ "auto_fix": { "type": "boolean" }
165
+ }
166
+ },
167
+ "hook_ledger_policy": {
168
+ "type": "object",
169
+ "required": [
170
+ "enabled",
171
+ "fulfillment_window_chapters",
172
+ "diversity_window_chapters",
173
+ "max_same_type_streak",
174
+ "min_distinct_types_in_window",
175
+ "overdue_policy"
176
+ ],
177
+ "additionalProperties": false,
178
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
179
+ "properties": {
180
+ "enabled": { "type": "boolean" },
181
+ "fulfillment_window_chapters": { "type": "integer", "minimum": 1 },
182
+ "diversity_window_chapters": { "type": "integer", "minimum": 1 },
183
+ "max_same_type_streak": { "type": "integer", "minimum": 1 },
184
+ "min_distinct_types_in_window": { "type": "integer", "minimum": 1 },
185
+ "overdue_policy": { "$ref": "#/$defs/severity_policy" }
186
+ }
187
+ },
188
+ "readability_policy": {
189
+ "type": ["object", "null"],
190
+ "required": ["mobile"],
191
+ "additionalProperties": false,
192
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
193
+ "properties": {
194
+ "mobile": { "$ref": "#/$defs/mobile_readability_policy" }
195
+ }
196
+ },
197
+ "mobile_readability_policy": {
198
+ "type": "object",
199
+ "required": ["enabled", "max_paragraph_chars", "max_consecutive_exposition_paragraphs", "blocking_severity"],
200
+ "additionalProperties": false,
201
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
202
+ "properties": {
203
+ "enabled": { "type": "boolean" },
204
+ "max_paragraph_chars": { "type": "integer", "minimum": 1 },
205
+ "max_consecutive_exposition_paragraphs": { "type": "integer", "minimum": 1 },
206
+ "blocking_severity": { "type": "string", "enum": ["hard_only", "soft_and_hard"] }
207
+ }
208
+ },
209
+ "naming_conflict_type": {
210
+ "type": "string",
211
+ "enum": ["duplicate", "near_duplicate", "alias_collision"]
212
+ },
213
+ "naming_policy": {
214
+ "type": ["object", "null"],
215
+ "required": ["enabled", "near_duplicate_threshold", "blocking_conflict_types"],
216
+ "additionalProperties": false,
217
+ "patternProperties": { "^_.*$": { "$ref": "#/$defs/comment_value" } },
218
+ "properties": {
219
+ "enabled": { "type": "boolean" },
220
+ "near_duplicate_threshold": { "type": "number", "minimum": 0, "maximum": 1 },
221
+ "blocking_conflict_types": {
222
+ "type": "array",
223
+ "items": { "$ref": "#/$defs/naming_conflict_type" },
224
+ "minItems": 1,
225
+ "uniqueItems": true
226
+ },
227
+ "exemptions": {
228
+ "type": "object",
229
+ "description": "Optional whitelist/exemptions for intentional overlaps.",
230
+ "additionalProperties": true
231
+ }
232
+ }
233
+ }
234
+ }
235
+ }
@@ -0,0 +1,97 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "schema_version": 1,
4
+ "title": "promise-ledger.json",
5
+ "type": "object",
6
+ "required": ["schema_version", "entries"],
7
+ "additionalProperties": false,
8
+ "patternProperties": {
9
+ "^_.*$": { "$ref": "#/$defs/comment_value" }
10
+ },
11
+ "propertyNames": {
12
+ "not": { "enum": ["__proto__", "constructor", "prototype"] }
13
+ },
14
+ "properties": {
15
+ "$schema": {
16
+ "type": "string",
17
+ "description": "Optional schema pointer for editor tooling. Example: schemas/promise-ledger.schema.json"
18
+ },
19
+ "schema_version": {
20
+ "type": "integer",
21
+ "enum": [1],
22
+ "description": "Schema version of promise-ledger.json (SSOT in schemas/promise-ledger.schema.json)."
23
+ },
24
+ "policy": { "$ref": "#/$defs/promise_ledger_policy" },
25
+ "entries": {
26
+ "type": "array",
27
+ "items": { "$ref": "#/$defs/promise_ledger_entry" }
28
+ }
29
+ },
30
+ "$defs": {
31
+ "comment_value": {
32
+ "description": "Free-form comment fields supported by templates/specs.",
33
+ "type": ["string", "number", "boolean", "null", "object", "array"]
34
+ },
35
+ "promise_ledger_policy": {
36
+ "type": "object",
37
+ "required": ["dormancy_threshold_chapters"],
38
+ "additionalProperties": false,
39
+ "properties": {
40
+ "dormancy_threshold_chapters": {
41
+ "type": "integer",
42
+ "minimum": 1,
43
+ "description": "Warn when chapters_since_last_touch >= dormancy_threshold_chapters."
44
+ }
45
+ }
46
+ },
47
+ "promise_ledger_entry": {
48
+ "type": "object",
49
+ "required": ["id", "type", "promise_text", "status", "introduced_chapter", "last_touched_chapter"],
50
+ "additionalProperties": false,
51
+ "patternProperties": {
52
+ "^_.*$": { "$ref": "#/$defs/comment_value" }
53
+ },
54
+ "propertyNames": {
55
+ "not": { "enum": ["__proto__", "constructor", "prototype"] }
56
+ },
57
+ "properties": {
58
+ "id": { "type": "string", "minLength": 1 },
59
+ "type": {
60
+ "type": "string",
61
+ "enum": ["selling_point", "core_mystery", "mechanism", "relationship_arc"]
62
+ },
63
+ "promise_text": {
64
+ "type": "string",
65
+ "minLength": 1,
66
+ "description": "Short, non-spoiler description of the promise."
67
+ },
68
+ "status": { "type": "string", "enum": ["promised", "advanced", "delivered"] },
69
+ "introduced_chapter": { "type": "integer", "minimum": 1 },
70
+ "last_touched_chapter": { "type": "integer", "minimum": 1 },
71
+ "delivered_chapter": { "type": ["integer", "null"], "minimum": 1 },
72
+ "links": {
73
+ "type": "object",
74
+ "additionalProperties": false,
75
+ "properties": {
76
+ "hook_entry_ids": { "type": "array", "items": { "type": "string", "minLength": 1 } },
77
+ "foreshadowing_ids": { "type": "array", "items": { "type": "string", "minLength": 1 } }
78
+ }
79
+ },
80
+ "history": {
81
+ "type": "array",
82
+ "items": {
83
+ "type": "object",
84
+ "required": ["chapter", "action"],
85
+ "additionalProperties": false,
86
+ "properties": {
87
+ "chapter": { "type": "integer", "minimum": 1 },
88
+ "action": { "type": "string", "minLength": 1 },
89
+ "note": { "type": "string" },
90
+ "at": { "type": "string", "format": "date-time" }
91
+ }
92
+ }
93
+ }
94
+ }
95
+ }
96
+ }
97
+ }
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # QualityJudge calibration against human-labeled dataset (M3).
4
+ #
5
+ # Usage:
6
+ # calibrate-quality-judge.sh --project <novel_project_dir> --labels <labels.jsonl> [--out <report.json>]
7
+ #
8
+ # Output:
9
+ # stdout JSON (exit 0 on success)
10
+ #
11
+ # Exit codes:
12
+ # 0 = success (valid JSON emitted to stdout)
13
+ # 1 = validation failure (bad args, missing files, invalid JSON)
14
+ # 2 = script exception (unexpected runtime error)
15
+ #
16
+ # Notes:
17
+ # - Aligns by chapter number.
18
+ # - Uses judge `overall_final` when available; falls back to `overall`.
19
+
20
+ set -euo pipefail
21
+
22
+ usage() {
23
+ cat >&2 <<'EOF'
24
+ Usage:
25
+ calibrate-quality-judge.sh --project <novel_project_dir> --labels <labels.jsonl> [--out <report.json>]
26
+
27
+ Options:
28
+ --project <dir> Novel project directory (must contain evaluations/)
29
+ --labels <file> JSONL labels file (eval/datasets/**/labels-YYYY-MM-DD.jsonl)
30
+ --out <file> Optional: write report JSON to file (directories created)
31
+ -h, --help Show help
32
+ EOF
33
+ }
34
+
35
+ project_dir=""
36
+ labels_path=""
37
+ out_path=""
38
+
39
+ while [ "$#" -gt 0 ]; do
40
+ case "$1" in
41
+ --project)
42
+ [ "$#" -ge 2 ] || { echo "calibrate-quality-judge.sh: error: --project requires a value" >&2; exit 1; }
43
+ project_dir="$2"
44
+ shift 2
45
+ ;;
46
+ --labels)
47
+ [ "$#" -ge 2 ] || { echo "calibrate-quality-judge.sh: error: --labels requires a value" >&2; exit 1; }
48
+ labels_path="$2"
49
+ shift 2
50
+ ;;
51
+ --out)
52
+ [ "$#" -ge 2 ] || { echo "calibrate-quality-judge.sh: error: --out requires a value" >&2; exit 1; }
53
+ out_path="$2"
54
+ shift 2
55
+ ;;
56
+ -h|--help)
57
+ usage
58
+ exit 0
59
+ ;;
60
+ *)
61
+ echo "calibrate-quality-judge.sh: unknown arg: $1" >&2
62
+ usage
63
+ exit 1
64
+ ;;
65
+ esac
66
+ done
67
+
68
+ if [ -z "$project_dir" ] || [ -z "$labels_path" ]; then
69
+ echo "calibrate-quality-judge.sh: --project and --labels are required" >&2
70
+ usage
71
+ exit 1
72
+ fi
73
+
74
+ if [ ! -d "$project_dir" ]; then
75
+ echo "calibrate-quality-judge.sh: project dir not found: $project_dir" >&2
76
+ exit 1
77
+ fi
78
+
79
+ if [ ! -f "$labels_path" ]; then
80
+ echo "calibrate-quality-judge.sh: labels file not found: $labels_path" >&2
81
+ exit 1
82
+ fi
83
+
84
+ if ! command -v python3 >/dev/null 2>&1; then
85
+ echo "calibrate-quality-judge.sh: python3 is required but not found" >&2
86
+ exit 1
87
+ fi
88
+
89
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
90
+ python3 "$SCRIPT_DIR/lib/calibrate_quality_judge.py" "$project_dir" "$labels_path" "$out_path"
91
+
@@ -0,0 +1,86 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Compare two archived regression runs (M3).
4
+ #
5
+ # Usage:
6
+ # compare-regression-runs.sh <run_dir_a> <run_dir_b> [--out <delta.json>]
7
+ #
8
+ # Output:
9
+ # stdout JSON (exit 0 on success)
10
+ #
11
+ # Exit codes:
12
+ # 0 = success (valid JSON emitted to stdout)
13
+ # 1 = validation failure (bad args, missing files, invalid JSON)
14
+ # 2 = script exception (unexpected runtime error)
15
+
16
+ set -euo pipefail
17
+
18
+ usage() {
19
+ cat >&2 <<'EOF'
20
+ Usage:
21
+ compare-regression-runs.sh <run_dir_a> <run_dir_b> [--out <delta.json>]
22
+
23
+ Notes:
24
+ - Expects each run dir to contain summary.json (generated by scripts/run-regression.sh).
25
+ EOF
26
+ }
27
+
28
+ out_path=""
29
+
30
+ if [ "$#" -lt 2 ]; then
31
+ usage
32
+ exit 1
33
+ fi
34
+
35
+ run_a="$1"
36
+ run_b="$2"
37
+ shift 2
38
+
39
+ while [ "$#" -gt 0 ]; do
40
+ case "$1" in
41
+ --out)
42
+ [ "$#" -ge 2 ] || { echo "compare-regression-runs.sh: error: --out requires a value" >&2; exit 1; }
43
+ out_path="$2"
44
+ shift 2
45
+ ;;
46
+ -h|--help)
47
+ usage
48
+ exit 0
49
+ ;;
50
+ *)
51
+ echo "compare-regression-runs.sh: unknown arg: $1" >&2
52
+ usage
53
+ exit 1
54
+ ;;
55
+ esac
56
+ done
57
+
58
+ if [ ! -d "$run_a" ]; then
59
+ echo "compare-regression-runs.sh: run_dir_a not found: $run_a" >&2
60
+ exit 1
61
+ fi
62
+ if [ ! -d "$run_b" ]; then
63
+ echo "compare-regression-runs.sh: run_dir_b not found: $run_b" >&2
64
+ exit 1
65
+ fi
66
+
67
+ summary_a="$run_a/summary.json"
68
+ summary_b="$run_b/summary.json"
69
+
70
+ if [ ! -f "$summary_a" ]; then
71
+ echo "compare-regression-runs.sh: missing summary.json: $summary_a" >&2
72
+ exit 1
73
+ fi
74
+ if [ ! -f "$summary_b" ]; then
75
+ echo "compare-regression-runs.sh: missing summary.json: $summary_b" >&2
76
+ exit 1
77
+ fi
78
+
79
+ if ! command -v python3 >/dev/null 2>&1; then
80
+ echo "compare-regression-runs.sh: python3 is required but not found" >&2
81
+ exit 1
82
+ fi
83
+
84
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
85
+ python3 "$SCRIPT_DIR/lib/compare_regression_runs.py" "$summary_a" "$summary_b" "$out_path"
86
+
@@ -0,0 +1,131 @@
1
+ """Shared utilities for M3 evaluation/regression scripts.
2
+
3
+ Imported by calibrate_quality_judge.py, run_regression.py, compare_regression_runs.py.
4
+ """
5
+
6
+ import json
7
+ import math
8
+ import os
9
+ import re
10
+ import sys
11
+ from datetime import datetime, timezone
12
+ from typing import Any, Dict, List, Optional, Sequence, Tuple
13
+
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # Low-level helpers
17
+ # ---------------------------------------------------------------------------
18
+
19
+ def die(msg: str, exit_code: int = 1) -> None:
20
+ sys.stderr.write(msg.rstrip() + "\n")
21
+ raise SystemExit(exit_code)
22
+
23
+
24
+ def load_json(path: str, *, missing_ok: bool = False) -> Any:
25
+ """Load JSON file. Returns None on missing when *missing_ok*."""
26
+ try:
27
+ with open(path, "r", encoding="utf-8") as f:
28
+ return json.load(f)
29
+ except FileNotFoundError:
30
+ if missing_ok:
31
+ return None
32
+ raise
33
+
34
+
35
+ def as_float(value: Any) -> Optional[float]:
36
+ if isinstance(value, (int, float)) and not isinstance(value, bool):
37
+ v = float(value)
38
+ return v if math.isfinite(v) else None
39
+ return None
40
+
41
+
42
+ def as_int(value: Any) -> Optional[int]:
43
+ if isinstance(value, int) and not isinstance(value, bool):
44
+ return int(value)
45
+ return None
46
+
47
+
48
+ def as_str(value: Any) -> Optional[str]:
49
+ if isinstance(value, str) and value.strip():
50
+ return value.strip()
51
+ return None
52
+
53
+
54
+ def iso_utc_now() -> str:
55
+ return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
56
+
57
+
58
+ # ---------------------------------------------------------------------------
59
+ # Eval-object extraction (QualityJudge output format)
60
+ # ---------------------------------------------------------------------------
61
+
62
+ def extract_eval_used(eval_obj: Dict[str, Any]) -> Dict[str, Any]:
63
+ maybe = eval_obj.get("eval_used")
64
+ return maybe if isinstance(maybe, dict) else eval_obj
65
+
66
+
67
+ def extract_overall(eval_obj: Dict[str, Any]) -> Optional[float]:
68
+ """Extract overall/overall_final with fallback chain."""
69
+ for v in [
70
+ eval_obj.get("overall_final"),
71
+ extract_eval_used(eval_obj).get("overall_final"),
72
+ extract_eval_used(eval_obj).get("overall"),
73
+ eval_obj.get("overall"),
74
+ ]:
75
+ n = as_float(v)
76
+ if n is not None:
77
+ return n
78
+ meta = eval_obj.get("metadata")
79
+ if isinstance(meta, dict):
80
+ judges = meta.get("judges")
81
+ if isinstance(judges, dict):
82
+ n = as_float(judges.get("overall_final"))
83
+ if n is not None:
84
+ return n
85
+ return None
86
+
87
+
88
+ def extract_dimension_scores(eval_obj: Dict[str, Any]) -> Dict[str, float]:
89
+ """Extract per-dimension {key: score} from eval object."""
90
+ used = extract_eval_used(eval_obj)
91
+ scores = used.get("scores")
92
+ if not isinstance(scores, dict):
93
+ scores = eval_obj.get("scores")
94
+ if not isinstance(scores, dict):
95
+ return {}
96
+ out: Dict[str, float] = {}
97
+ for key, item in scores.items():
98
+ if not isinstance(item, dict):
99
+ continue
100
+ v = as_float(item.get("score"))
101
+ if v is not None:
102
+ out[str(key)] = float(v)
103
+ return out
104
+
105
+
106
+ def extract_contract_verification(eval_obj: Dict[str, Any]) -> Dict[str, Any]:
107
+ used = extract_eval_used(eval_obj)
108
+ cv = used.get("contract_verification")
109
+ if isinstance(cv, dict):
110
+ return cv
111
+ cv = eval_obj.get("contract_verification")
112
+ if isinstance(cv, dict):
113
+ return cv
114
+ return {}
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # File discovery
119
+ # ---------------------------------------------------------------------------
120
+
121
+ def find_eval_files(eval_dir: str) -> List[Tuple[int, str]]:
122
+ """Find chapter-NNN-eval.json in *eval_dir*. Returns sorted [(chapter, path)]."""
123
+ if not os.path.isdir(eval_dir):
124
+ return []
125
+ items: List[Tuple[int, str]] = []
126
+ for name in os.listdir(eval_dir):
127
+ m = re.match(r"^chapter-(\d+)-eval\.json$", name)
128
+ if m:
129
+ items.append((int(m.group(1)), os.path.join(eval_dir, name)))
130
+ items.sort(key=lambda x: x[0])
131
+ return items