@ctxr/skill-llm-wiki 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +134 -0
  2. package/LICENSE +21 -0
  3. package/README.md +484 -0
  4. package/SKILL.md +252 -0
  5. package/guide/basics/concepts.md +74 -0
  6. package/guide/basics/index.md +45 -0
  7. package/guide/basics/schema.md +140 -0
  8. package/guide/cli.md +256 -0
  9. package/guide/correctness/index.md +45 -0
  10. package/guide/correctness/invariants.md +89 -0
  11. package/guide/correctness/safety.md +96 -0
  12. package/guide/history/diff.md +110 -0
  13. package/guide/history/hidden-git.md +130 -0
  14. package/guide/history/index.md +52 -0
  15. package/guide/history/remote-sync.md +113 -0
  16. package/guide/index.md +134 -0
  17. package/guide/isolation/coexistence.md +134 -0
  18. package/guide/isolation/index.md +44 -0
  19. package/guide/isolation/scale.md +251 -0
  20. package/guide/layout/in-place-mode.md +97 -0
  21. package/guide/layout/index.md +53 -0
  22. package/guide/layout/layout-contract.md +131 -0
  23. package/guide/layout/layout-modes.md +115 -0
  24. package/guide/operations/index.md +76 -0
  25. package/guide/operations/ingest/build.md +75 -0
  26. package/guide/operations/ingest/extend.md +61 -0
  27. package/guide/operations/ingest/index.md +54 -0
  28. package/guide/operations/ingest/join.md +65 -0
  29. package/guide/operations/maintain/fix.md +66 -0
  30. package/guide/operations/maintain/index.md +47 -0
  31. package/guide/operations/maintain/rebuild.md +86 -0
  32. package/guide/operations/validate.md +48 -0
  33. package/guide/substrate/index.md +47 -0
  34. package/guide/substrate/operators.md +96 -0
  35. package/guide/substrate/tiered-ai.md +363 -0
  36. package/guide/ux/index.md +44 -0
  37. package/guide/ux/preflight.md +150 -0
  38. package/guide/ux/user-intent.md +135 -0
  39. package/package.json +55 -0
  40. package/scripts/cli.mjs +893 -0
  41. package/scripts/commands/remote.mjs +93 -0
  42. package/scripts/commands/review.mjs +253 -0
  43. package/scripts/commands/sync.mjs +84 -0
  44. package/scripts/lib/chunk.mjs +421 -0
  45. package/scripts/lib/cluster-detect.mjs +516 -0
  46. package/scripts/lib/decision-log.mjs +343 -0
  47. package/scripts/lib/draft.mjs +158 -0
  48. package/scripts/lib/embeddings.mjs +366 -0
  49. package/scripts/lib/frontmatter.mjs +497 -0
  50. package/scripts/lib/git-commands.mjs +155 -0
  51. package/scripts/lib/git.mjs +486 -0
  52. package/scripts/lib/gitignore.mjs +62 -0
  53. package/scripts/lib/history.mjs +331 -0
  54. package/scripts/lib/indices.mjs +510 -0
  55. package/scripts/lib/ingest.mjs +258 -0
  56. package/scripts/lib/intent.mjs +713 -0
  57. package/scripts/lib/interactive.mjs +99 -0
  58. package/scripts/lib/migrate.mjs +126 -0
  59. package/scripts/lib/nest-applier.mjs +260 -0
  60. package/scripts/lib/operators.mjs +1365 -0
  61. package/scripts/lib/orchestrator.mjs +718 -0
  62. package/scripts/lib/paths.mjs +197 -0
  63. package/scripts/lib/preflight.mjs +213 -0
  64. package/scripts/lib/provenance.mjs +672 -0
  65. package/scripts/lib/quality-metric.mjs +269 -0
  66. package/scripts/lib/query-fixture.mjs +71 -0
  67. package/scripts/lib/rollback.mjs +95 -0
  68. package/scripts/lib/shape-check.mjs +172 -0
  69. package/scripts/lib/similarity-cache.mjs +126 -0
  70. package/scripts/lib/similarity.mjs +230 -0
  71. package/scripts/lib/snapshot.mjs +54 -0
  72. package/scripts/lib/source-frontmatter.mjs +85 -0
  73. package/scripts/lib/tier2-protocol.mjs +470 -0
  74. package/scripts/lib/tiered.mjs +453 -0
  75. package/scripts/lib/validate.mjs +362 -0
@@ -0,0 +1,343 @@
1
+ // decision-log.mjs — append-only audit trail for tiered-AI decisions.
2
+ //
3
+ // Every non-trivial similarity / operator decision records:
4
+ //
5
+ // { op_id, operator, sources[], tier_used, similarity,
6
+ // confidence_band, decision, reason }
7
+ //
8
+ // Stored at `<wiki>/.llmwiki/decisions.yaml`. Same hand-rolled
9
+ // deterministic YAML emitter/parser pattern as history.mjs — no
10
+ // external dep. Atomic append via temp-file + rename.
11
+ //
12
+ // Claude-at-session-time reads this log when a user asks "why was
13
+ // this merged?" so the audit trail has to survive across operations
14
+ // unchanged. The log is intentionally NOT reset on rollback — if an
15
+ // op's decisions are a matter of historical record, they remain
16
+ // queryable even after the op is reset.
17
+
18
+ import {
19
+ existsSync,
20
+ mkdirSync,
21
+ readFileSync,
22
+ renameSync,
23
+ writeFileSync,
24
+ } from "node:fs";
25
+ import { dirname, join } from "node:path";
26
+
27
+ export function decisionLogPath(wikiRoot) {
28
+ return join(wikiRoot, ".llmwiki", "decisions.yaml");
29
+ }
30
+
31
+ const REQUIRED_FIELDS = [
32
+ "op_id",
33
+ "operator",
34
+ "sources",
35
+ "tier_used",
36
+ "similarity",
37
+ "decision",
38
+ ];
39
+
40
+ function validate(entry) {
41
+ if (!entry || typeof entry !== "object") {
42
+ throw new Error("decision-log: entry must be an object");
43
+ }
44
+ for (const f of REQUIRED_FIELDS) {
45
+ if (!(f in entry)) {
46
+ throw new Error(`decision-log: entry missing required field "${f}"`);
47
+ }
48
+ }
49
+ if (!Array.isArray(entry.sources)) {
50
+ throw new Error("decision-log: sources must be an array of strings");
51
+ }
52
+ // `Number.isFinite` rejects NaN, Infinity, and non-numbers. That's
53
+ // exactly what we want: the audit log has no place for an
54
+ // Infinity similarity score (the emitter would serialise it as
55
+ // the string "Infinity" and the parser would read it back as a
56
+ // string, silently corrupting the type).
57
+ if (!Number.isFinite(entry.similarity)) {
58
+ throw new Error(
59
+ "decision-log: similarity must be a finite number (got " +
60
+ `${entry.similarity})`,
61
+ );
62
+ }
63
+ if (typeof entry.tier_used !== "number" || !Number.isInteger(entry.tier_used)) {
64
+ throw new Error("decision-log: tier_used must be an integer");
65
+ }
66
+ }
67
+
68
+ // Quote any string that could be misread as YAML (same rules as
69
+ // history.mjs). We intentionally keep the scalar shape identical
70
+ // to the op-log's so a future consolidation is mechanical.
71
+ function needsQuoting(value) {
72
+ if (value === "") return true;
73
+ if (/[:#{}\[\],&*!|>'"`\n\r\t]/.test(value)) return true;
74
+ if (/^[- ?]/.test(value)) return true;
75
+ if (/^-?\d+$/.test(value)) return true;
76
+ if (value === "true" || value === "false" || value === "null") return true;
77
+ return false;
78
+ }
79
+
80
+ function escapeQuoted(value) {
81
+ for (let i = 0; i < value.length; i++) {
82
+ const c = value.charCodeAt(i);
83
+ if (c < 0x20 && c !== 0x09 && c !== 0x0a && c !== 0x0d) {
84
+ throw new Error(
85
+ `decision-log: control character U+${c.toString(16).padStart(4, "0")} is not round-trip-safe`,
86
+ );
87
+ }
88
+ }
89
+ let out = '"';
90
+ for (const ch of value) {
91
+ switch (ch) {
92
+ case "\\": out += "\\\\"; break;
93
+ case '"': out += '\\"'; break;
94
+ case "\n": out += "\\n"; break;
95
+ case "\r": out += "\\r"; break;
96
+ case "\t": out += "\\t"; break;
97
+ default: out += ch;
98
+ }
99
+ }
100
+ return out + '"';
101
+ }
102
+
103
+ function emitScalar(value) {
104
+ if (value === null || value === undefined) return "null";
105
+ if (typeof value === "boolean" || typeof value === "number") return String(value);
106
+ if (typeof value === "string") {
107
+ if (needsQuoting(value)) return escapeQuoted(value);
108
+ return value;
109
+ }
110
+ throw new Error(
111
+ `decision-log: unsupported scalar type ${typeof value}`,
112
+ );
113
+ }
114
+
115
+ function emitEntry(entry) {
116
+ const lines = [];
117
+ lines.push("- op_id: " + emitScalar(entry.op_id));
118
+ lines.push(" operator: " + emitScalar(entry.operator));
119
+ lines.push(" sources:");
120
+ for (const s of entry.sources) {
121
+ lines.push(" - " + emitScalar(s));
122
+ }
123
+ lines.push(" tier_used: " + emitScalar(entry.tier_used));
124
+ lines.push(" similarity: " + emitScalar(entry.similarity));
125
+ lines.push(
126
+ " confidence_band: " + emitScalar(entry.confidence_band ?? null),
127
+ );
128
+ lines.push(" decision: " + emitScalar(entry.decision));
129
+ lines.push(" reason: " + emitScalar(entry.reason ?? null));
130
+ return lines.join("\n");
131
+ }
132
+
133
+ // Append an entry atomically.
134
+ export function appendDecision(wikiRoot, entry) {
135
+ validate(entry);
136
+ const path = decisionLogPath(wikiRoot);
137
+ mkdirSync(dirname(path), { recursive: true });
138
+ const block = emitEntry(entry) + "\n";
139
+ let payload;
140
+ if (!existsSync(path)) {
141
+ payload =
142
+ "# skill-llm-wiki tiered-AI decision log (append-only)\n" +
143
+ "version: 1\n" +
144
+ "entries:\n" +
145
+ block;
146
+ } else {
147
+ const existing = readFileSync(path, "utf8");
148
+ const prefix = existing.endsWith("\n") ? existing : existing + "\n";
149
+ payload = prefix + block;
150
+ }
151
+ const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
152
+ writeFileSync(tmp, payload, "utf8");
153
+ renameSync(tmp, path);
154
+ }
155
+
156
+ // Convenience helper for cluster-NEST outcomes. The convergence
157
+ // loop calls this for every math-only proposal (with its
158
+ // Tier 2 gate decision) and for every Tier-2-proposed cluster
159
+ // (with decision="tier2-approved" or "rejected-by-metric"). The
160
+ // entry lands in the same entries[] list as pairwise decisions
161
+ // so the audit trail for one op is queryable as a single stream.
162
+ //
163
+ // Schema translation:
164
+ //
165
+ // op_id, operator="NEST" — as-is
166
+ // sources — leaf ids in the cluster
167
+ // tier_used — 2 (every NEST decision
168
+ // touches Tier 2 either
169
+ // via propose_structure
170
+ // or nest_decision)
171
+ // similarity — average_affinity
172
+ // confidence_band — one of:
173
+ // "tier2-proposed",
174
+ // "math-gated",
175
+ // "empty-partition",
176
+ // "rejected-by-metric",
177
+ // "rejected-by-gate"
178
+ // decision — one of:
179
+ // "applied",
180
+ // "rejected-by-metric",
181
+ // "rejected-by-gate",
182
+ // "rejected-stale",
183
+ // "slug-renamed",
184
+ // "pending-tier2"
185
+ // reason — free text
186
+ //
187
+ // Coercion: average_affinity may be undefined for Tier-2-proposed
188
+ // clusters; we coerce to 0 so the finite-number validator does
189
+ // not reject the entry.
190
+ export function appendNestDecision(wikiRoot, entry) {
191
+ const similarity =
192
+ Number.isFinite(entry.similarity)
193
+ ? entry.similarity
194
+ : (Number.isFinite(entry.average_affinity) ? entry.average_affinity : 0);
195
+ appendDecision(wikiRoot, {
196
+ op_id: entry.op_id,
197
+ operator: "NEST",
198
+ sources: Array.isArray(entry.sources) ? entry.sources : [],
199
+ tier_used: 2,
200
+ similarity,
201
+ confidence_band: entry.confidence_band ?? null,
202
+ decision: entry.decision,
203
+ reason: entry.reason ?? null,
204
+ });
205
+ }
206
+
207
+ // Append the per-iteration metric trajectory for an op. Writes
208
+ // one entry per trajectory point with operator="METRIC_TRAJECTORY"
209
+ // so readers can recover the full cost curve for each op by
210
+ // filtering the entries[] list. `trajectory` is the
211
+ // metric_trajectory array produced by runConvergence: an array of
212
+ // `{ iteration, cost, event }` records. Writes even a single-
213
+ // point baseline trajectory so the log carries evidence that the
214
+ // convergence loop ran (rather than being silently skipped).
215
+ export function appendMetricTrajectory(wikiRoot, opId, trajectory) {
216
+ if (!Array.isArray(trajectory)) return;
217
+ for (const point of trajectory) {
218
+ const cost = Number.isFinite(point.cost) ? point.cost : 0;
219
+ const iteration = Number.isInteger(point.iteration) ? point.iteration : 0;
220
+ appendDecision(wikiRoot, {
221
+ op_id: opId,
222
+ operator: "METRIC_TRAJECTORY",
223
+ sources: [`iter-${iteration}`],
224
+ tier_used: 0,
225
+ similarity: cost,
226
+ confidence_band: point.event ?? "unknown",
227
+ decision: "measured",
228
+ reason: point.reason ?? null,
229
+ });
230
+ }
231
+ }
232
+
233
+ // Lightweight reader — we parse only what we need for tests and the
234
+ // `skill-llm-wiki history` subcommand. Errors out loudly on any line
235
+ // the parser doesn't recognise.
236
+ export function readDecisions(wikiRoot) {
237
+ const path = decisionLogPath(wikiRoot);
238
+ if (!existsSync(path)) return [];
239
+ const raw = readFileSync(path, "utf8");
240
+ // Strip comments and blank lines; reject unknown headers.
241
+ const lines = raw
242
+ .split(/\r?\n/)
243
+ .filter((l) => l.length > 0 && !/^\s*#/.test(l));
244
+ const out = [];
245
+ let i = 0;
246
+ // Header: version + entries:
247
+ if (i < lines.length && lines[i].startsWith("version:")) i++;
248
+ if (i < lines.length && lines[i].trim() === "entries:") i++;
249
+ let current = null;
250
+ while (i < lines.length) {
251
+ const line = lines[i];
252
+ if (line.startsWith("- op_id:")) {
253
+ if (current) out.push(current);
254
+ current = { op_id: parseValue(line.slice("- op_id:".length).trim()), sources: [] };
255
+ i++;
256
+ continue;
257
+ }
258
+ if (!current) {
259
+ throw new Error(`decision-log parser: stray line at ${i + 1}: ${line}`);
260
+ }
261
+ const listItem = /^ - (.*)$/.exec(line);
262
+ if (listItem) {
263
+ current.sources.push(parseValue(listItem[1]));
264
+ i++;
265
+ continue;
266
+ }
267
+ const kv = /^ (\w+):\s*(.*)$/.exec(line);
268
+ if (!kv) {
269
+ throw new Error(
270
+ `decision-log parser: unrecognised line at ${i + 1}: ${line}`,
271
+ );
272
+ }
273
+ const [, key, rest] = kv;
274
+ if (key === "sources") {
275
+ // `sources:` alone introduces the list items; items start with ` - `.
276
+ current.sources = [];
277
+ i++;
278
+ continue;
279
+ }
280
+ current[key] = parseValue(rest);
281
+ i++;
282
+ }
283
+ if (current) out.push(current);
284
+ return out;
285
+ }
286
+
287
+ function unescapeQuoted(body) {
288
+ let out = "";
289
+ for (let i = 0; i < body.length; i++) {
290
+ if (body[i] === "\\" && i + 1 < body.length) {
291
+ const next = body[i + 1];
292
+ switch (next) {
293
+ case "\\": out += "\\"; break;
294
+ case '"': out += '"'; break;
295
+ case "n": out += "\n"; break;
296
+ case "r": out += "\r"; break;
297
+ case "t": out += "\t"; break;
298
+ default: out += next;
299
+ }
300
+ i++;
301
+ } else {
302
+ out += body[i];
303
+ }
304
+ }
305
+ return out;
306
+ }
307
+
308
+ // Scientific-notation friendly number regex. Matches `0.5`, `1e-10`,
309
+ // `-3.14`, `42`, `-5`. Does NOT match `Infinity`, `NaN`, or
310
+ // hexadecimal — those are either forbidden by the validator or
311
+ // expressible as unambiguous strings.
312
+ const NUMBER_RE = /^-?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?$/;
313
+
314
+ function parseValue(raw) {
315
+ if (raw === "null" || raw === "") return null;
316
+ if (raw === "true") return true;
317
+ if (raw === "false") return false;
318
+ if (/^-?\d+$/.test(raw)) {
319
+ const n = Number(raw);
320
+ if (!Number.isSafeInteger(n)) {
321
+ throw new Error(
322
+ `decision-log parser: integer ${raw} is not a safe integer`,
323
+ );
324
+ }
325
+ return n;
326
+ }
327
+ if (NUMBER_RE.test(raw)) {
328
+ const n = Number(raw);
329
+ if (!Number.isFinite(n)) {
330
+ throw new Error(
331
+ `decision-log parser: non-finite numeric value ${raw}`,
332
+ );
333
+ }
334
+ return n;
335
+ }
336
+ if (raw.startsWith('"') && raw.endsWith('"') && raw.length >= 2) {
337
+ return unescapeQuoted(raw.slice(1, -1));
338
+ }
339
+ if (raw.startsWith('"') !== raw.endsWith('"')) {
340
+ throw new Error(`decision-log parser: unbalanced quote in: ${raw}`);
341
+ }
342
+ return raw;
343
+ }
@@ -0,0 +1,158 @@
1
+ // Draft frontmatter: deterministic extraction only.
2
+ //
3
+ // This is the script-side of the script-first + AI-fallback pipeline
4
+ // documented in methodology §9.6. It handles the "structured source" case
5
+ // where frontmatter can be derived mechanically from file metadata:
6
+ // - id from filename
7
+ // - focus from title or lead paragraph
8
+ // - covers[] from H2 sections or bulleted items in the lead
9
+ // - tags[] from filename prefixes or directory hints
10
+ // - activation from file_glob inferred from the source path
11
+ //
12
+ // When the source file ALREADY carries a frontmatter block (parsed at
13
+ // ingest time via gray-matter and stashed as
14
+ // `candidate.authored_frontmatter`), each AUTHORED_LEAF_FIELD is
15
+ // preferred over the heuristic — the drafter only fills gaps. This is
16
+ // what preserves `activation`, `covers`, `tags`, `focus`, `domains`,
17
+ // `shared_covers`, `aliases`, and friends when a hand-tuned corpus is
18
+ // re-built.
19
+ //
20
+ // Anything that needs semantic understanding (prose-heavy draft, ambiguous
21
+ // classification, cover synthesis from narrative) is left for Claude to
22
+ // handle inside its own execution context when running this skill. The
23
+ // `needs_ai` flag on the returned draft tells the caller which entries
24
+ // need AI review.
25
+
26
+ // Fields we copy straight from the source frontmatter when the author
27
+ // supplied them. Fields NOT in this list (id / type / depth_role /
28
+ // parents / source) are always re-derived because their authoritative
29
+ // source is the target-tree position, not the original source file.
30
+ const AUTHORED_LEAF_FIELDS = [
31
+ "focus",
32
+ "covers",
33
+ "tags",
34
+ "domains",
35
+ "aliases",
36
+ "activation",
37
+ "shared_covers",
38
+ "overlay_targets",
39
+ "links",
40
+ ];
41
+
42
+ export function draftLeafFrontmatter(candidate, { categoryPath } = {}) {
43
+ const authored = candidate.authored_frontmatter || {};
44
+ const hasAuthored = candidate.has_authored_frontmatter === true;
45
+
46
+ // Heuristic baseline — used when the author didn't supply a field.
47
+ const draftedCovers = extractCovers(candidate);
48
+ const draftedFocus = candidate.title || candidate.id;
49
+ const draftedTags = inferTags(candidate);
50
+
51
+ const data = {
52
+ id: candidate.id,
53
+ type: "primary",
54
+ depth_role: "leaf",
55
+ // Priority: authored > drafted > default. `pickAuthored` only
56
+ // returns the authored value when it is non-empty (non-null,
57
+ // non-undefined, and — for arrays — non-empty).
58
+ focus: pickAuthored(authored.focus, draftedFocus),
59
+ covers: pickAuthored(authored.covers, draftedCovers),
60
+ // `parents` is authoritative from the source when supplied. The
61
+ // hand-authored convention is a list of index.md paths relative
62
+ // to the leaf's own directory (`index.md` for the same dir,
63
+ // `../index.md` for one up). Heuristic fallback builds the same
64
+ // relative form from the category path.
65
+ parents: pickAuthored(authored.parents, ["index.md"]),
66
+ tags: pickAuthored(authored.tags, draftedTags),
67
+ source: {
68
+ origin: "file",
69
+ path: candidate.source_path,
70
+ hash: candidate.hash,
71
+ },
72
+ };
73
+
74
+ // Forward the remaining AUTHORED_LEAF_FIELDS verbatim. These have no
75
+ // heuristic analogue — when the author supplied them, we keep them;
76
+ // otherwise we omit the field entirely so the output stays compact.
77
+ if (hasAuthored) {
78
+ for (const field of AUTHORED_LEAF_FIELDS) {
79
+ if (field === "focus" || field === "covers" || field === "tags") continue;
80
+ if (authored[field] !== undefined && authored[field] !== null) {
81
+ data[field] = authored[field];
82
+ }
83
+ }
84
+ }
85
+
86
+ const confidence = scoreConfidence(data, candidate);
87
+ return { data, confidence, needs_ai: confidence < 0.6 };
88
+ }
89
+
90
+ function pickAuthored(authoredVal, fallback) {
91
+ if (authoredVal === undefined || authoredVal === null) return fallback;
92
+ if (Array.isArray(authoredVal)) {
93
+ return authoredVal.length > 0 ? authoredVal : fallback;
94
+ }
95
+ if (typeof authoredVal === "string") {
96
+ return authoredVal.trim() !== "" ? authoredVal : fallback;
97
+ }
98
+ return authoredVal;
99
+ }
100
+
101
+ function extractCovers(candidate) {
102
+ const out = [];
103
+ // H2 headings become the primary covers candidates.
104
+ for (const h of candidate.headings) {
105
+ if (h.level === 2) out.push(h.text);
106
+ if (out.length >= 10) break;
107
+ }
108
+ if (out.length === 0) {
109
+ // Fall back to splitting the lead on sentence boundaries.
110
+ const lead = candidate.lead || "";
111
+ const sentences = lead.split(/(?<=[.!?])\s+/).filter((s) => s.length > 10);
112
+ for (const s of sentences) {
113
+ out.push(s.slice(0, 120));
114
+ if (out.length >= 5) break;
115
+ }
116
+ }
117
+ return out.slice(0, 12);
118
+ }
119
+
120
+ function inferTags(candidate) {
121
+ const tags = new Set();
122
+ // Directory components as tag hints.
123
+ const parts = candidate.source_path.split(/[\/\\]/);
124
+ for (const part of parts.slice(0, -1)) {
125
+ if (part && part !== "." && !/^\d+$/.test(part)) {
126
+ tags.add(part.toLowerCase().replace(/[^a-z0-9-]+/g, "-"));
127
+ }
128
+ }
129
+ // Extension hint.
130
+ if (candidate.ext === ".md") tags.add("markdown");
131
+ return [...tags].slice(0, 8);
132
+ }
133
+
134
+ function scoreConfidence(draft, candidate) {
135
+ let score = 0;
136
+ if (draft.focus && draft.focus !== candidate.id) score += 0.3;
137
+ if (draft.covers.length >= 3) score += 0.4;
138
+ else if (draft.covers.length >= 1) score += 0.2;
139
+ if (candidate.headings.filter((h) => h.level === 2).length >= 2) score += 0.2;
140
+ if (candidate.size > 200) score += 0.1;
141
+ return Math.min(1, score);
142
+ }
143
+
144
+ // Quick classification by directory prefix. Script-first classifier.
145
+ //
146
+ // When the source file lives at the source root (no directory
147
+ // component), the candidate is placed at the TARGET root — not under a
148
+ // synthetic `general/` bucket. This is what keeps a flat authored
149
+ // guide flat in the output: 17 top-level leaves stay at the wiki root
150
+ // instead of being nested under `general/`.
151
+ //
152
+ // Subdirectories in the source are preserved as top-level categories
153
+ // in the target (e.g. `operations/build.md` → `operations/build.md`).
154
+ export function draftCategory(candidate) {
155
+ const parts = candidate.source_path.split(/[\/\\]/).filter(Boolean);
156
+ if (parts.length <= 1) return "";
157
+ return parts[0].toLowerCase().replace(/[^a-z0-9-]+/g, "-");
158
+ }