novel-writer-cli 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +103 -0
- package/agents/chapter-writer.md +142 -0
- package/agents/character-weaver.md +117 -0
- package/agents/consistency-auditor.md +85 -0
- package/agents/plot-architect.md +128 -0
- package/agents/quality-judge.md +232 -0
- package/agents/style-analyzer.md +109 -0
- package/agents/style-refiner.md +97 -0
- package/agents/summarizer.md +128 -0
- package/agents/world-builder.md +161 -0
- package/dist/__tests__/character-voice.test.js +445 -0
- package/dist/__tests__/commit-prototype-pollution.test.js +45 -0
- package/dist/__tests__/engagement.test.js +382 -0
- package/dist/__tests__/foreshadow-visibility.test.js +131 -0
- package/dist/__tests__/hook-ledger.test.js +1028 -0
- package/dist/__tests__/naming-lint.test.js +132 -0
- package/dist/__tests__/narrative-health-injection.test.js +359 -0
- package/dist/__tests__/next-step-prejudge-guardrails.test.js +325 -0
- package/dist/__tests__/next-step-title-fix.test.js +153 -0
- package/dist/__tests__/platform-profile.test.js +274 -0
- package/dist/__tests__/promise-ledger.test.js +189 -0
- package/dist/__tests__/readability-lint.test.js +209 -0
- package/dist/__tests__/text-utils.test.js +39 -0
- package/dist/__tests__/title-policy.test.js +147 -0
- package/dist/advance.js +75 -0
- package/dist/character-voice.js +805 -0
- package/dist/checkpoint.js +126 -0
- package/dist/cli.js +563 -0
- package/dist/cliche-lint.js +515 -0
- package/dist/commit.js +1460 -0
- package/dist/consistency-auditor.js +684 -0
- package/dist/engagement.js +687 -0
- package/dist/errors.js +7 -0
- package/dist/fingerprint.js +16 -0
- package/dist/foreshadow-visibility.js +214 -0
- package/dist/fs-utils.js +68 -0
- package/dist/hook-ledger.js +721 -0
- package/dist/hook-policy.js +107 -0
- package/dist/instruction-gates.js +51 -0
- package/dist/instructions.js +406 -0
- package/dist/latest-summary-loader.js +29 -0
- package/dist/lock.js +121 -0
- package/dist/naming-lint.js +531 -0
- package/dist/ner.js +73 -0
- package/dist/next-step.js +408 -0
- package/dist/novel-ask.js +270 -0
- package/dist/output.js +9 -0
- package/dist/platform-constraints.js +518 -0
- package/dist/platform-profile.js +325 -0
- package/dist/prejudge-guardrails.js +370 -0
- package/dist/project.js +40 -0
- package/dist/promise-ledger.js +723 -0
- package/dist/readability-lint.js +555 -0
- package/dist/safe-parse.js +36 -0
- package/dist/safe-path.js +29 -0
- package/dist/scoring-weights.js +290 -0
- package/dist/steps.js +60 -0
- package/dist/text-utils.js +18 -0
- package/dist/title-policy.js +251 -0
- package/dist/type-guards.js +6 -0
- package/dist/validate.js +131 -0
- package/docs/user/README.md +17 -0
- package/docs/user/guardrails.md +179 -0
- package/docs/user/interactive-gates.md +124 -0
- package/docs/user/novel-cli.md +289 -0
- package/docs/user/ops.md +123 -0
- package/docs/user/quick-start.md +97 -0
- package/docs/user/spec-system.md +166 -0
- package/docs/user/storylines.md +144 -0
- package/package.json +48 -0
- package/schemas/README.md +18 -0
- package/schemas/character-voice-drift.schema.json +135 -0
- package/schemas/character-voice-profiles.schema.json +141 -0
- package/schemas/engagement-metrics.schema.json +38 -0
- package/schemas/hook-ledger.schema.json +108 -0
- package/schemas/platform-profile.schema.json +235 -0
- package/schemas/promise-ledger.schema.json +97 -0
- package/scripts/calibrate-quality-judge.sh +91 -0
- package/scripts/compare-regression-runs.sh +86 -0
- package/scripts/lib/_common.py +131 -0
- package/scripts/lib/calibrate_quality_judge.py +312 -0
- package/scripts/lib/compare_regression_runs.py +142 -0
- package/scripts/lib/run_regression.py +621 -0
- package/scripts/lint-blacklist.sh +201 -0
- package/scripts/lint-cliche.sh +370 -0
- package/scripts/lint-readability.sh +404 -0
- package/scripts/query-foreshadow.sh +252 -0
- package/scripts/run-ner.sh +669 -0
- package/scripts/run-regression.sh +122 -0
- package/skills/cli-step/SKILL.md +158 -0
- package/skills/continue/SKILL.md +348 -0
- package/skills/continue/references/context-contracts.md +169 -0
- package/skills/continue/references/continuity-checks.md +187 -0
- package/skills/continue/references/file-protocols.md +64 -0
- package/skills/continue/references/foreshadowing.md +130 -0
- package/skills/continue/references/gate-decision.md +53 -0
- package/skills/continue/references/periodic-maintenance.md +46 -0
- package/skills/novel-writing/SKILL.md +77 -0
- package/skills/novel-writing/references/quality-rubric.md +140 -0
- package/skills/novel-writing/references/style-guide.md +145 -0
- package/skills/start/SKILL.md +458 -0
- package/skills/start/references/quality-review.md +86 -0
- package/skills/start/references/setting-update.md +44 -0
- package/skills/start/references/vol-planning.md +61 -0
- package/skills/start/references/vol-review.md +58 -0
- package/skills/status/SKILL.md +116 -0
- package/skills/status/references/sample-output.md +60 -0
- package/templates/ai-blacklist.json +79 -0
- package/templates/brief-template.md +46 -0
- package/templates/genre-weight-profiles.json +90 -0
- package/templates/novel-ask/example.answer.json +12 -0
- package/templates/novel-ask/example.question.json +51 -0
- package/templates/platform-profile.json +148 -0
- package/templates/style-profile-template.json +58 -0
- package/templates/web-novel-cliche-lint.json +41 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# Deterministic AI-blacklist linter (M3+ extension point).
|
|
4
|
+
#
|
|
5
|
+
# Usage:
|
|
6
|
+
# lint-blacklist.sh <chapter.md> <ai-blacklist.json>
|
|
7
|
+
#
|
|
8
|
+
# Output:
|
|
9
|
+
# stdout JSON (exit 0 on success)
|
|
10
|
+
#
|
|
11
|
+
# Exit codes:
|
|
12
|
+
# 0 = success (valid JSON emitted to stdout)
|
|
13
|
+
# 1 = validation failure (bad args, missing files, invalid JSON/schema)
|
|
14
|
+
# 2 = script exception (unexpected runtime error)
|
|
15
|
+
#
|
|
16
|
+
# Notes:
|
|
17
|
+
# - Treats optional whitelist/exemptions as "do not count as hits":
|
|
18
|
+
# - ai-blacklist.json.whitelist (list[str])
|
|
19
|
+
# - ai-blacklist.json.whitelist.words (list[str])
|
|
20
|
+
# - ai-blacklist.json.exemptions.words (list[str])
|
|
21
|
+
#
|
|
22
|
+
# - Hit rate is computed as "hits per 1000 non-whitespace characters" (次/千字).
|
|
23
|
+
|
|
24
|
+
set -euo pipefail
|
|
25
|
+
|
|
26
|
+
if [ "$#" -ne 2 ]; then
|
|
27
|
+
echo "Usage: lint-blacklist.sh <chapter.md> <ai-blacklist.json>" >&2
|
|
28
|
+
exit 1
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
chapter_path="$1"
|
|
32
|
+
blacklist_path="$2"
|
|
33
|
+
|
|
34
|
+
if [ ! -f "$chapter_path" ]; then
|
|
35
|
+
echo "lint-blacklist.sh: chapter file not found: $chapter_path" >&2
|
|
36
|
+
exit 1
|
|
37
|
+
fi
|
|
38
|
+
|
|
39
|
+
if [ ! -f "$blacklist_path" ]; then
|
|
40
|
+
echo "lint-blacklist.sh: blacklist file not found: $blacklist_path" >&2
|
|
41
|
+
exit 1
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
if ! command -v python3 >/dev/null 2>&1; then
|
|
45
|
+
echo "lint-blacklist.sh: python3 is required but not found" >&2
|
|
46
|
+
exit 2
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
python3 - "$chapter_path" "$blacklist_path" <<'PY'
|
|
50
|
+
import json
|
|
51
|
+
import re
|
|
52
|
+
import sys
|
|
53
|
+
from typing import Any, Dict, List, Set
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _die(msg: str, exit_code: int = 1) -> None:
|
|
57
|
+
sys.stderr.write(msg.rstrip() + "\n")
|
|
58
|
+
raise SystemExit(exit_code)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _load_json(path: str) -> Any:
|
|
62
|
+
try:
|
|
63
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
64
|
+
return json.load(f)
|
|
65
|
+
except Exception as e:
|
|
66
|
+
_die(f"lint-blacklist.sh: invalid JSON at {path}: {e}", 1)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _as_str_list(value: Any) -> List[str]:
|
|
70
|
+
if value is None:
|
|
71
|
+
return []
|
|
72
|
+
if not isinstance(value, list):
|
|
73
|
+
return []
|
|
74
|
+
out: List[str] = []
|
|
75
|
+
for item in value:
|
|
76
|
+
if isinstance(item, str) and item.strip():
|
|
77
|
+
out.append(item.strip())
|
|
78
|
+
return out
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _get_whitelist_words(blacklist: Dict[str, Any]) -> Set[str]:
|
|
82
|
+
words: List[str] = []
|
|
83
|
+
|
|
84
|
+
whitelist = blacklist.get("whitelist")
|
|
85
|
+
if isinstance(whitelist, list):
|
|
86
|
+
words.extend(_as_str_list(whitelist))
|
|
87
|
+
elif isinstance(whitelist, dict):
|
|
88
|
+
words.extend(_as_str_list(whitelist.get("words")))
|
|
89
|
+
|
|
90
|
+
exemptions = blacklist.get("exemptions")
|
|
91
|
+
if isinstance(exemptions, dict):
|
|
92
|
+
words.extend(_as_str_list(exemptions.get("words")))
|
|
93
|
+
|
|
94
|
+
return set(words)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _unique_preserve_order(items: List[str]) -> List[str]:
|
|
98
|
+
seen: Set[str] = set()
|
|
99
|
+
out: List[str] = []
|
|
100
|
+
for item in items:
|
|
101
|
+
if item in seen:
|
|
102
|
+
continue
|
|
103
|
+
seen.add(item)
|
|
104
|
+
out.append(item)
|
|
105
|
+
return out
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def main() -> None:
|
|
109
|
+
chapter_path = sys.argv[1]
|
|
110
|
+
blacklist_path = sys.argv[2]
|
|
111
|
+
|
|
112
|
+
blacklist = _load_json(blacklist_path)
|
|
113
|
+
if not isinstance(blacklist, dict):
|
|
114
|
+
_die("lint-blacklist.sh: ai-blacklist.json must be a JSON object", 1)
|
|
115
|
+
|
|
116
|
+
words = blacklist.get("words")
|
|
117
|
+
if not isinstance(words, list) or not all(isinstance(w, str) for w in words):
|
|
118
|
+
_die("lint-blacklist.sh: ai-blacklist.json.words must be a list of strings", 1)
|
|
119
|
+
|
|
120
|
+
whitelist = _get_whitelist_words(blacklist)
|
|
121
|
+
|
|
122
|
+
effective_words = [w.strip() for w in words if isinstance(w, str) and w.strip() and w.strip() not in whitelist]
|
|
123
|
+
effective_words = list(dict.fromkeys(effective_words)) # dedup preserving order
|
|
124
|
+
|
|
125
|
+
# Sort by length descending to match longest phrases first
|
|
126
|
+
effective_words.sort(key=lambda w: -len(w))
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
with open(chapter_path, "r", encoding="utf-8") as f:
|
|
130
|
+
text = f.read()
|
|
131
|
+
except Exception as e:
|
|
132
|
+
_die(f"lint-blacklist.sh: failed to read chapter: {e}", 1)
|
|
133
|
+
|
|
134
|
+
lines = text.splitlines()
|
|
135
|
+
non_ws_chars = len(re.sub(r"\s+", "", text))
|
|
136
|
+
|
|
137
|
+
# Use a working copy for masking matched phrases
|
|
138
|
+
masked_text = text
|
|
139
|
+
|
|
140
|
+
hits: List[Dict[str, Any]] = []
|
|
141
|
+
total_hits = 0
|
|
142
|
+
|
|
143
|
+
for word in effective_words:
|
|
144
|
+
count = masked_text.count(word)
|
|
145
|
+
if count <= 0:
|
|
146
|
+
continue
|
|
147
|
+
total_hits += count
|
|
148
|
+
|
|
149
|
+
# Collect line numbers and snippets from ORIGINAL text
|
|
150
|
+
line_numbers: List[int] = []
|
|
151
|
+
snippets: List[str] = []
|
|
152
|
+
for idx, line in enumerate(lines, start=1):
|
|
153
|
+
if word in line:
|
|
154
|
+
line_numbers.append(idx)
|
|
155
|
+
if len(snippets) < 5:
|
|
156
|
+
snippet = line.strip()
|
|
157
|
+
if len(snippet) > 160:
|
|
158
|
+
snippet = snippet[:160] + "…"
|
|
159
|
+
snippets.append(snippet)
|
|
160
|
+
|
|
161
|
+
hits.append(
|
|
162
|
+
{
|
|
163
|
+
"word": word,
|
|
164
|
+
"count": count,
|
|
165
|
+
"lines": line_numbers[:20],
|
|
166
|
+
"snippets": snippets,
|
|
167
|
+
}
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Mask matched word in working copy to prevent substring double-counting
|
|
171
|
+
masked_text = masked_text.replace(word, "\x00" * len(word))
|
|
172
|
+
|
|
173
|
+
hits.sort(key=lambda x: (-int(x["count"]), str(x["word"])))
|
|
174
|
+
|
|
175
|
+
hits_per_kchars = 0.0
|
|
176
|
+
if non_ws_chars > 0:
|
|
177
|
+
hits_per_kchars = total_hits / (non_ws_chars / 1000.0)
|
|
178
|
+
|
|
179
|
+
out: Dict[str, Any] = {
|
|
180
|
+
"chapter_path": chapter_path,
|
|
181
|
+
"blacklist_path": blacklist_path,
|
|
182
|
+
"chars": non_ws_chars,
|
|
183
|
+
"blacklist_words_count": len(words),
|
|
184
|
+
"whitelist_words_count": len(whitelist),
|
|
185
|
+
"effective_words_count": len(effective_words),
|
|
186
|
+
"total_hits": total_hits,
|
|
187
|
+
"hits_per_kchars": round(hits_per_kchars, 3),
|
|
188
|
+
"hits": hits,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
sys.stdout.write(json.dumps(out, ensure_ascii=False) + "\n")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
main()
|
|
196
|
+
except SystemExit:
|
|
197
|
+
raise
|
|
198
|
+
except Exception as e:
|
|
199
|
+
sys.stderr.write(f"lint-blacklist.sh: unexpected error: {e}\n")
|
|
200
|
+
raise SystemExit(2)
|
|
201
|
+
PY
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# Deterministic web-novel cliché linter (M6.4 extension point).
|
|
4
|
+
#
|
|
5
|
+
# Usage:
|
|
6
|
+
# lint-cliche.sh <chapter.md> <web-novel-cliche-lint.json>
|
|
7
|
+
#
|
|
8
|
+
# Output:
|
|
9
|
+
# stdout JSON (exit 0 on success)
|
|
10
|
+
#
|
|
11
|
+
# Exit codes:
|
|
12
|
+
# 0 = success (valid JSON emitted to stdout)
|
|
13
|
+
# 1 = validation failure (bad args, missing files, invalid JSON/schema)
|
|
14
|
+
# 2 = script exception (unexpected runtime error)
|
|
15
|
+
#
|
|
16
|
+
# Notes:
|
|
17
|
+
# - Treats whitelist and exemptions as "do not count as hits":
|
|
18
|
+
# - web-novel-cliche-lint.json.whitelist (list[str])
|
|
19
|
+
# - web-novel-cliche-lint.json.whitelist.words (list[str])
|
|
20
|
+
# - web-novel-cliche-lint.json.exemptions.exact (list[str])
|
|
21
|
+
# - web-novel-cliche-lint.json.exemptions.regex (list[str])
|
|
22
|
+
# - Hit rate is computed as "hits per 1000 non-whitespace characters" (次/千字).
|
|
23
|
+
|
|
24
|
+
set -euo pipefail
|
|
25
|
+
|
|
26
|
+
if [ "$#" -ne 2 ]; then
|
|
27
|
+
echo "Usage: lint-cliche.sh <chapter.md> <web-novel-cliche-lint.json>" >&2
|
|
28
|
+
exit 1
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
chapter_path="$1"
|
|
32
|
+
config_path="$2"
|
|
33
|
+
|
|
34
|
+
if [ ! -f "$chapter_path" ]; then
|
|
35
|
+
echo "lint-cliche.sh: chapter file not found: $chapter_path" >&2
|
|
36
|
+
exit 1
|
|
37
|
+
fi
|
|
38
|
+
|
|
39
|
+
if [ ! -f "$config_path" ]; then
|
|
40
|
+
echo "lint-cliche.sh: config file not found: $config_path" >&2
|
|
41
|
+
exit 1
|
|
42
|
+
fi
|
|
43
|
+
|
|
44
|
+
if ! command -v python3 >/dev/null 2>&1; then
|
|
45
|
+
echo "lint-cliche.sh: python3 is required but not found" >&2
|
|
46
|
+
exit 2
|
|
47
|
+
fi
|
|
48
|
+
|
|
49
|
+
python3 - "$chapter_path" "$config_path" <<'PY'
|
|
50
|
+
import json
|
|
51
|
+
import re
|
|
52
|
+
import sys
|
|
53
|
+
from datetime import datetime, timezone
|
|
54
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _die(msg: str, exit_code: int = 1) -> None:
|
|
58
|
+
sys.stderr.write(msg.rstrip() + "\n")
|
|
59
|
+
raise SystemExit(exit_code)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _load_json(path: str) -> Any:
|
|
63
|
+
try:
|
|
64
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
65
|
+
return json.load(f)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
_die(f"lint-cliche.sh: invalid JSON at {path}: {e}", 1)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _as_str_list(value: Any) -> List[str]:
|
|
71
|
+
if value is None:
|
|
72
|
+
return []
|
|
73
|
+
if not isinstance(value, list):
|
|
74
|
+
return []
|
|
75
|
+
out: List[str] = []
|
|
76
|
+
for item in value:
|
|
77
|
+
if isinstance(item, str) and item.strip():
|
|
78
|
+
out.append(item.strip())
|
|
79
|
+
return out
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _unique_preserve_order(items: List[str]) -> List[str]:
|
|
83
|
+
seen: Set[str] = set()
|
|
84
|
+
out: List[str] = []
|
|
85
|
+
for item in items:
|
|
86
|
+
if item in seen:
|
|
87
|
+
continue
|
|
88
|
+
seen.add(item)
|
|
89
|
+
out.append(item)
|
|
90
|
+
return out
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _sev_rank(sev: str) -> int:
|
|
94
|
+
if sev == "warn":
|
|
95
|
+
return 1
|
|
96
|
+
if sev == "soft":
|
|
97
|
+
return 2
|
|
98
|
+
if sev == "hard":
|
|
99
|
+
return 3
|
|
100
|
+
return 0
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _max_sev(a: str, b: str) -> str:
|
|
104
|
+
return a if _sev_rank(a) >= _sev_rank(b) else b
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _get_whitelist_words(cfg: Dict[str, Any]) -> Set[str]:
|
|
108
|
+
words: List[str] = []
|
|
109
|
+
whitelist = cfg.get("whitelist")
|
|
110
|
+
if isinstance(whitelist, list):
|
|
111
|
+
words.extend(_as_str_list(whitelist))
|
|
112
|
+
elif isinstance(whitelist, dict):
|
|
113
|
+
words.extend(_as_str_list(whitelist.get("words")))
|
|
114
|
+
return set(words)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _get_exemptions(cfg: Dict[str, Any]) -> Tuple[List[str], List[str]]:
|
|
118
|
+
exemptions = cfg.get("exemptions")
|
|
119
|
+
if not isinstance(exemptions, dict):
|
|
120
|
+
return ([], [])
|
|
121
|
+
exact = _as_str_list(exemptions.get("exact"))
|
|
122
|
+
regex = _as_str_list(exemptions.get("regex"))
|
|
123
|
+
return (_unique_preserve_order(exact), _unique_preserve_order(regex))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _mask_literal(text: str, phrase: str) -> str:
|
|
127
|
+
if not phrase:
|
|
128
|
+
return text
|
|
129
|
+
return text.replace(phrase, "\x00" * len(phrase))
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _mask_exemptions(text: str, exact: List[str], regex: List[str]) -> str:
|
|
133
|
+
masked = text
|
|
134
|
+
for phrase in exact:
|
|
135
|
+
masked = _mask_literal(masked, phrase)
|
|
136
|
+
for pattern in regex:
|
|
137
|
+
try:
|
|
138
|
+
re_obj = re.compile(pattern, flags=re.UNICODE)
|
|
139
|
+
except Exception:
|
|
140
|
+
continue
|
|
141
|
+
masked = re_obj.sub(lambda m: "\x00" * len(m.group(0)), masked)
|
|
142
|
+
return masked
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _collect_line_evidence(text: str, phrase: str) -> Tuple[List[int], List[str]]:
|
|
146
|
+
lines: List[int] = []
|
|
147
|
+
snippets: List[str] = []
|
|
148
|
+
for idx, line in enumerate(text.splitlines(), start=1):
|
|
149
|
+
if phrase not in line:
|
|
150
|
+
continue
|
|
151
|
+
lines.append(idx)
|
|
152
|
+
if len(snippets) < 5:
|
|
153
|
+
snippet = line.strip()
|
|
154
|
+
if len(snippet) > 160:
|
|
155
|
+
snippet = snippet[:160] + "…"
|
|
156
|
+
snippets.append(snippet)
|
|
157
|
+
if len(lines) >= 20:
|
|
158
|
+
break
|
|
159
|
+
return (lines, snippets)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def main() -> None:
|
|
163
|
+
chapter_path = sys.argv[1]
|
|
164
|
+
config_path = sys.argv[2]
|
|
165
|
+
|
|
166
|
+
cfg_raw = _load_json(config_path)
|
|
167
|
+
if not isinstance(cfg_raw, dict):
|
|
168
|
+
_die("lint-cliche.sh: web-novel-cliche-lint.json must be a JSON object", 1)
|
|
169
|
+
cfg: Dict[str, Any] = cfg_raw
|
|
170
|
+
|
|
171
|
+
schema_version = cfg.get("schema_version")
|
|
172
|
+
if not isinstance(schema_version, int):
|
|
173
|
+
schema_version = 0
|
|
174
|
+
last_updated = cfg.get("last_updated")
|
|
175
|
+
if not isinstance(last_updated, str) or not last_updated.strip():
|
|
176
|
+
last_updated = None
|
|
177
|
+
else:
|
|
178
|
+
last_updated = last_updated.strip()
|
|
179
|
+
|
|
180
|
+
words_raw = cfg.get("words")
|
|
181
|
+
if words_raw is None:
|
|
182
|
+
words_raw = []
|
|
183
|
+
if not isinstance(words_raw, list) or not all(isinstance(w, str) for w in words_raw):
|
|
184
|
+
_die("lint-cliche.sh: web-novel-cliche-lint.json.words must be a list of strings", 1)
|
|
185
|
+
words_flat = _unique_preserve_order([w.strip() for w in words_raw if isinstance(w, str) and w.strip()])
|
|
186
|
+
|
|
187
|
+
categories_raw = cfg.get("categories")
|
|
188
|
+
categories: Dict[str, List[str]] = {}
|
|
189
|
+
if categories_raw is not None:
|
|
190
|
+
if not isinstance(categories_raw, dict):
|
|
191
|
+
_die("lint-cliche.sh: web-novel-cliche-lint.json.categories must be an object", 1)
|
|
192
|
+
for k, v in categories_raw.items():
|
|
193
|
+
if not isinstance(k, str) or not k.strip():
|
|
194
|
+
continue
|
|
195
|
+
categories[k] = _unique_preserve_order([w.strip() for w in _as_str_list(v) if w.strip()])
|
|
196
|
+
|
|
197
|
+
severity_raw = cfg.get("severity")
|
|
198
|
+
severity_default = "warn"
|
|
199
|
+
per_category: Dict[str, str] = {}
|
|
200
|
+
per_word: Dict[str, str] = {}
|
|
201
|
+
if severity_raw is not None:
|
|
202
|
+
if not isinstance(severity_raw, dict):
|
|
203
|
+
_die("lint-cliche.sh: web-novel-cliche-lint.json.severity must be an object", 1)
|
|
204
|
+
default_raw = severity_raw.get("default")
|
|
205
|
+
if isinstance(default_raw, str) and default_raw in ("warn", "soft", "hard"):
|
|
206
|
+
severity_default = default_raw
|
|
207
|
+
pc_raw = severity_raw.get("per_category")
|
|
208
|
+
if isinstance(pc_raw, dict):
|
|
209
|
+
for k, v in pc_raw.items():
|
|
210
|
+
if isinstance(k, str) and isinstance(v, str) and v in ("warn", "soft", "hard"):
|
|
211
|
+
per_category[k] = v
|
|
212
|
+
pw_raw = severity_raw.get("per_word")
|
|
213
|
+
if isinstance(pw_raw, dict):
|
|
214
|
+
for k, v in pw_raw.items():
|
|
215
|
+
if isinstance(k, str) and isinstance(v, str) and v in ("warn", "soft", "hard"):
|
|
216
|
+
per_word[k] = v
|
|
217
|
+
|
|
218
|
+
whitelist = _get_whitelist_words(cfg)
|
|
219
|
+
exemptions_exact, exemptions_regex = _get_exemptions(cfg)
|
|
220
|
+
exemptions_exact_set = set(exemptions_exact)
|
|
221
|
+
|
|
222
|
+
# Build index: word -> (categories, severity)
|
|
223
|
+
index: Dict[str, Dict[str, Any]] = {}
|
|
224
|
+
|
|
225
|
+
def _add_word(word: str, cat: Optional[str]) -> None:
|
|
226
|
+
w = word.strip()
|
|
227
|
+
if not w:
|
|
228
|
+
return
|
|
229
|
+
if w in whitelist:
|
|
230
|
+
return
|
|
231
|
+
if w in exemptions_exact_set:
|
|
232
|
+
return
|
|
233
|
+
meta = index.get(w)
|
|
234
|
+
if meta is None:
|
|
235
|
+
meta = {"categories": set(), "severity": severity_default}
|
|
236
|
+
index[w] = meta
|
|
237
|
+
if cat:
|
|
238
|
+
meta["categories"].add(cat)
|
|
239
|
+
|
|
240
|
+
for w in words_flat:
|
|
241
|
+
_add_word(w, None)
|
|
242
|
+
for cat, lst in categories.items():
|
|
243
|
+
for w in lst:
|
|
244
|
+
_add_word(w, cat)
|
|
245
|
+
|
|
246
|
+
# Resolve severities
|
|
247
|
+
for w, meta in index.items():
|
|
248
|
+
if w in per_word:
|
|
249
|
+
meta["severity"] = per_word[w]
|
|
250
|
+
else:
|
|
251
|
+
sev = severity_default
|
|
252
|
+
for cat in meta["categories"]:
|
|
253
|
+
if cat in per_category:
|
|
254
|
+
sev = _max_sev(sev, per_category[cat])
|
|
255
|
+
meta["severity"] = sev
|
|
256
|
+
|
|
257
|
+
# Sort by length desc, then stable word sort for determinism
|
|
258
|
+
effective_words = list(index.keys())
|
|
259
|
+
effective_words.sort(key=lambda w: (-len(w), w))
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
with open(chapter_path, "r", encoding="utf-8") as f:
|
|
263
|
+
text = f.read()
|
|
264
|
+
except Exception as e:
|
|
265
|
+
_die(f"lint-cliche.sh: failed to read chapter: {e}", 1)
|
|
266
|
+
|
|
267
|
+
masked_text = _mask_exemptions(text, exemptions_exact, exemptions_regex)
|
|
268
|
+
non_ws_chars = len(re.sub(r"\s+", "", text))
|
|
269
|
+
|
|
270
|
+
severity_counts: Dict[str, int] = {"warn": 0, "soft": 0, "hard": 0}
|
|
271
|
+
category_counts: Dict[str, int] = {}
|
|
272
|
+
hits: List[Dict[str, Any]] = []
|
|
273
|
+
total_hits = 0
|
|
274
|
+
|
|
275
|
+
for word in effective_words:
|
|
276
|
+
count = masked_text.count(word)
|
|
277
|
+
if count <= 0:
|
|
278
|
+
continue
|
|
279
|
+
total_hits += count
|
|
280
|
+
masked_text = _mask_literal(masked_text, word)
|
|
281
|
+
|
|
282
|
+
meta = index.get(word, {"categories": set(), "severity": severity_default})
|
|
283
|
+
sev = meta.get("severity", severity_default)
|
|
284
|
+
if sev not in ("warn", "soft", "hard"):
|
|
285
|
+
sev = severity_default
|
|
286
|
+
severity_counts[sev] = severity_counts.get(sev, 0) + count
|
|
287
|
+
|
|
288
|
+
cats_sorted = sorted(list(meta.get("categories", set())))
|
|
289
|
+
primary_cat = cats_sorted[0] if len(cats_sorted) > 0 else None
|
|
290
|
+
if primary_cat:
|
|
291
|
+
category_counts[primary_cat] = category_counts.get(primary_cat, 0) + count
|
|
292
|
+
|
|
293
|
+
lines, snippets = _collect_line_evidence(text, word)
|
|
294
|
+
hits.append(
|
|
295
|
+
{
|
|
296
|
+
"word": word,
|
|
297
|
+
"count": count,
|
|
298
|
+
"severity": sev,
|
|
299
|
+
"category": primary_cat,
|
|
300
|
+
"categories": cats_sorted,
|
|
301
|
+
"lines": lines,
|
|
302
|
+
"snippets": snippets,
|
|
303
|
+
}
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def _sort_hits_key(h: Dict[str, Any]) -> Tuple[int, int, str]:
|
|
307
|
+
return (-int(h.get("count", 0)), -_sev_rank(str(h.get("severity", "warn"))), str(h.get("word", "")))
|
|
308
|
+
|
|
309
|
+
hits.sort(key=_sort_hits_key)
|
|
310
|
+
|
|
311
|
+
def _per_k(n: int) -> float:
|
|
312
|
+
if non_ws_chars <= 0:
|
|
313
|
+
return 0.0
|
|
314
|
+
return round(float(n) / (non_ws_chars / 1000.0), 3)
|
|
315
|
+
|
|
316
|
+
hits_per_kchars = _per_k(total_hits)
|
|
317
|
+
|
|
318
|
+
by_severity = {
|
|
319
|
+
"warn": {"hits": int(severity_counts.get("warn", 0)), "hits_per_kchars": _per_k(int(severity_counts.get("warn", 0)))},
|
|
320
|
+
"soft": {"hits": int(severity_counts.get("soft", 0)), "hits_per_kchars": _per_k(int(severity_counts.get("soft", 0)))},
|
|
321
|
+
"hard": {"hits": int(severity_counts.get("hard", 0)), "hits_per_kchars": _per_k(int(severity_counts.get("hard", 0)))},
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
by_category: Dict[str, Any] = {}
|
|
325
|
+
for cat, n in category_counts.items():
|
|
326
|
+
by_category[cat] = {"hits": int(n), "hits_per_kchars": _per_k(int(n))}
|
|
327
|
+
|
|
328
|
+
top_hits = [
|
|
329
|
+
{"word": h["word"], "count": int(h["count"]), "severity": h["severity"], "category": h.get("category")}
|
|
330
|
+
for h in hits[:10]
|
|
331
|
+
]
|
|
332
|
+
|
|
333
|
+
has_hard_hits = int(severity_counts.get("hard", 0)) > 0
|
|
334
|
+
|
|
335
|
+
chapter_num = 0
|
|
336
|
+
m = re.search(r"chapter-(\d+)", chapter_path)
|
|
337
|
+
if m:
|
|
338
|
+
try:
|
|
339
|
+
chapter_num = int(m.group(1))
|
|
340
|
+
except Exception:
|
|
341
|
+
chapter_num = 0
|
|
342
|
+
|
|
343
|
+
out: Dict[str, Any] = {
|
|
344
|
+
"schema_version": 1,
|
|
345
|
+
"generated_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
346
|
+
"scope": {"chapter": chapter_num},
|
|
347
|
+
"config": {"schema_version": int(schema_version), "last_updated": last_updated},
|
|
348
|
+
"mode": "script",
|
|
349
|
+
"chars": int(non_ws_chars),
|
|
350
|
+
"total_hits": int(total_hits),
|
|
351
|
+
"hits_per_kchars": float(hits_per_kchars),
|
|
352
|
+
"by_severity": by_severity,
|
|
353
|
+
"by_category": by_category,
|
|
354
|
+
"hits": hits,
|
|
355
|
+
"top_hits": top_hits,
|
|
356
|
+
"has_hard_hits": bool(has_hard_hits),
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
sys.stdout.write(json.dumps(out, ensure_ascii=False) + "\n")
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
try:
|
|
363
|
+
main()
|
|
364
|
+
except SystemExit:
|
|
365
|
+
raise
|
|
366
|
+
except Exception as e:
|
|
367
|
+
sys.stderr.write(f"lint-cliche.sh: unexpected error: {e}\n")
|
|
368
|
+
raise SystemExit(2)
|
|
369
|
+
PY
|
|
370
|
+
|