@ctxr/skill-llm-wiki 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +134 -0
- package/LICENSE +21 -0
- package/README.md +484 -0
- package/SKILL.md +252 -0
- package/guide/basics/concepts.md +74 -0
- package/guide/basics/index.md +45 -0
- package/guide/basics/schema.md +140 -0
- package/guide/cli.md +256 -0
- package/guide/correctness/index.md +45 -0
- package/guide/correctness/invariants.md +89 -0
- package/guide/correctness/safety.md +96 -0
- package/guide/history/diff.md +110 -0
- package/guide/history/hidden-git.md +130 -0
- package/guide/history/index.md +52 -0
- package/guide/history/remote-sync.md +113 -0
- package/guide/index.md +134 -0
- package/guide/isolation/coexistence.md +134 -0
- package/guide/isolation/index.md +44 -0
- package/guide/isolation/scale.md +251 -0
- package/guide/layout/in-place-mode.md +97 -0
- package/guide/layout/index.md +53 -0
- package/guide/layout/layout-contract.md +131 -0
- package/guide/layout/layout-modes.md +115 -0
- package/guide/operations/index.md +76 -0
- package/guide/operations/ingest/build.md +75 -0
- package/guide/operations/ingest/extend.md +61 -0
- package/guide/operations/ingest/index.md +54 -0
- package/guide/operations/ingest/join.md +65 -0
- package/guide/operations/maintain/fix.md +66 -0
- package/guide/operations/maintain/index.md +47 -0
- package/guide/operations/maintain/rebuild.md +86 -0
- package/guide/operations/validate.md +48 -0
- package/guide/substrate/index.md +47 -0
- package/guide/substrate/operators.md +96 -0
- package/guide/substrate/tiered-ai.md +363 -0
- package/guide/ux/index.md +44 -0
- package/guide/ux/preflight.md +150 -0
- package/guide/ux/user-intent.md +135 -0
- package/package.json +55 -0
- package/scripts/cli.mjs +893 -0
- package/scripts/commands/remote.mjs +93 -0
- package/scripts/commands/review.mjs +253 -0
- package/scripts/commands/sync.mjs +84 -0
- package/scripts/lib/chunk.mjs +421 -0
- package/scripts/lib/cluster-detect.mjs +516 -0
- package/scripts/lib/decision-log.mjs +343 -0
- package/scripts/lib/draft.mjs +158 -0
- package/scripts/lib/embeddings.mjs +366 -0
- package/scripts/lib/frontmatter.mjs +497 -0
- package/scripts/lib/git-commands.mjs +155 -0
- package/scripts/lib/git.mjs +486 -0
- package/scripts/lib/gitignore.mjs +62 -0
- package/scripts/lib/history.mjs +331 -0
- package/scripts/lib/indices.mjs +510 -0
- package/scripts/lib/ingest.mjs +258 -0
- package/scripts/lib/intent.mjs +713 -0
- package/scripts/lib/interactive.mjs +99 -0
- package/scripts/lib/migrate.mjs +126 -0
- package/scripts/lib/nest-applier.mjs +260 -0
- package/scripts/lib/operators.mjs +1365 -0
- package/scripts/lib/orchestrator.mjs +718 -0
- package/scripts/lib/paths.mjs +197 -0
- package/scripts/lib/preflight.mjs +213 -0
- package/scripts/lib/provenance.mjs +672 -0
- package/scripts/lib/quality-metric.mjs +269 -0
- package/scripts/lib/query-fixture.mjs +71 -0
- package/scripts/lib/rollback.mjs +95 -0
- package/scripts/lib/shape-check.mjs +172 -0
- package/scripts/lib/similarity-cache.mjs +126 -0
- package/scripts/lib/similarity.mjs +230 -0
- package/scripts/lib/snapshot.mjs +54 -0
- package/scripts/lib/source-frontmatter.mjs +85 -0
- package/scripts/lib/tier2-protocol.mjs +470 -0
- package/scripts/lib/tiered.mjs +453 -0
- package/scripts/lib/validate.mjs +362 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
// decision-log.mjs — append-only audit trail for tiered-AI decisions.
|
|
2
|
+
//
|
|
3
|
+
// Every non-trivial similarity / operator decision records:
|
|
4
|
+
//
|
|
5
|
+
// { op_id, operator, sources[], tier_used, similarity,
|
|
6
|
+
// confidence_band, decision, reason }
|
|
7
|
+
//
|
|
8
|
+
// Stored at `<wiki>/.llmwiki/decisions.yaml`. Same hand-rolled
|
|
9
|
+
// deterministic YAML emitter/parser pattern as history.mjs — no
|
|
10
|
+
// external dep. Atomic append via temp-file + rename.
|
|
11
|
+
//
|
|
12
|
+
// Claude-at-session-time reads this log when a user asks "why was
|
|
13
|
+
// this merged?" so the audit trail has to survive across operations
|
|
14
|
+
// unchanged. The log is intentionally NOT reset on rollback — if an
|
|
15
|
+
// op's decisions are a matter of historical record, they remain
|
|
16
|
+
// queryable even after the op is reset.
|
|
17
|
+
|
|
18
|
+
import {
|
|
19
|
+
existsSync,
|
|
20
|
+
mkdirSync,
|
|
21
|
+
readFileSync,
|
|
22
|
+
renameSync,
|
|
23
|
+
writeFileSync,
|
|
24
|
+
} from "node:fs";
|
|
25
|
+
import { dirname, join } from "node:path";
|
|
26
|
+
|
|
27
|
+
export function decisionLogPath(wikiRoot) {
|
|
28
|
+
return join(wikiRoot, ".llmwiki", "decisions.yaml");
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const REQUIRED_FIELDS = [
|
|
32
|
+
"op_id",
|
|
33
|
+
"operator",
|
|
34
|
+
"sources",
|
|
35
|
+
"tier_used",
|
|
36
|
+
"similarity",
|
|
37
|
+
"decision",
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
function validate(entry) {
|
|
41
|
+
if (!entry || typeof entry !== "object") {
|
|
42
|
+
throw new Error("decision-log: entry must be an object");
|
|
43
|
+
}
|
|
44
|
+
for (const f of REQUIRED_FIELDS) {
|
|
45
|
+
if (!(f in entry)) {
|
|
46
|
+
throw new Error(`decision-log: entry missing required field "${f}"`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
if (!Array.isArray(entry.sources)) {
|
|
50
|
+
throw new Error("decision-log: sources must be an array of strings");
|
|
51
|
+
}
|
|
52
|
+
// `Number.isFinite` rejects NaN, Infinity, and non-numbers. That's
|
|
53
|
+
// exactly what we want: the audit log has no place for an
|
|
54
|
+
// Infinity similarity score (the emitter would serialise it as
|
|
55
|
+
// the string "Infinity" and the parser would read it back as a
|
|
56
|
+
// string, silently corrupting the type).
|
|
57
|
+
if (!Number.isFinite(entry.similarity)) {
|
|
58
|
+
throw new Error(
|
|
59
|
+
"decision-log: similarity must be a finite number (got " +
|
|
60
|
+
`${entry.similarity})`,
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
if (typeof entry.tier_used !== "number" || !Number.isInteger(entry.tier_used)) {
|
|
64
|
+
throw new Error("decision-log: tier_used must be an integer");
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Quote any string that could be misread as YAML (same rules as
|
|
69
|
+
// history.mjs). We intentionally keep the scalar shape identical
|
|
70
|
+
// to the op-log's so a future consolidation is mechanical.
|
|
71
|
+
function needsQuoting(value) {
|
|
72
|
+
if (value === "") return true;
|
|
73
|
+
if (/[:#{}\[\],&*!|>'"`\n\r\t]/.test(value)) return true;
|
|
74
|
+
if (/^[- ?]/.test(value)) return true;
|
|
75
|
+
if (/^-?\d+$/.test(value)) return true;
|
|
76
|
+
if (value === "true" || value === "false" || value === "null") return true;
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function escapeQuoted(value) {
|
|
81
|
+
for (let i = 0; i < value.length; i++) {
|
|
82
|
+
const c = value.charCodeAt(i);
|
|
83
|
+
if (c < 0x20 && c !== 0x09 && c !== 0x0a && c !== 0x0d) {
|
|
84
|
+
throw new Error(
|
|
85
|
+
`decision-log: control character U+${c.toString(16).padStart(4, "0")} is not round-trip-safe`,
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
let out = '"';
|
|
90
|
+
for (const ch of value) {
|
|
91
|
+
switch (ch) {
|
|
92
|
+
case "\\": out += "\\\\"; break;
|
|
93
|
+
case '"': out += '\\"'; break;
|
|
94
|
+
case "\n": out += "\\n"; break;
|
|
95
|
+
case "\r": out += "\\r"; break;
|
|
96
|
+
case "\t": out += "\\t"; break;
|
|
97
|
+
default: out += ch;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return out + '"';
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function emitScalar(value) {
|
|
104
|
+
if (value === null || value === undefined) return "null";
|
|
105
|
+
if (typeof value === "boolean" || typeof value === "number") return String(value);
|
|
106
|
+
if (typeof value === "string") {
|
|
107
|
+
if (needsQuoting(value)) return escapeQuoted(value);
|
|
108
|
+
return value;
|
|
109
|
+
}
|
|
110
|
+
throw new Error(
|
|
111
|
+
`decision-log: unsupported scalar type ${typeof value}`,
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function emitEntry(entry) {
|
|
116
|
+
const lines = [];
|
|
117
|
+
lines.push("- op_id: " + emitScalar(entry.op_id));
|
|
118
|
+
lines.push(" operator: " + emitScalar(entry.operator));
|
|
119
|
+
lines.push(" sources:");
|
|
120
|
+
for (const s of entry.sources) {
|
|
121
|
+
lines.push(" - " + emitScalar(s));
|
|
122
|
+
}
|
|
123
|
+
lines.push(" tier_used: " + emitScalar(entry.tier_used));
|
|
124
|
+
lines.push(" similarity: " + emitScalar(entry.similarity));
|
|
125
|
+
lines.push(
|
|
126
|
+
" confidence_band: " + emitScalar(entry.confidence_band ?? null),
|
|
127
|
+
);
|
|
128
|
+
lines.push(" decision: " + emitScalar(entry.decision));
|
|
129
|
+
lines.push(" reason: " + emitScalar(entry.reason ?? null));
|
|
130
|
+
return lines.join("\n");
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Append an entry atomically.
|
|
134
|
+
export function appendDecision(wikiRoot, entry) {
|
|
135
|
+
validate(entry);
|
|
136
|
+
const path = decisionLogPath(wikiRoot);
|
|
137
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
138
|
+
const block = emitEntry(entry) + "\n";
|
|
139
|
+
let payload;
|
|
140
|
+
if (!existsSync(path)) {
|
|
141
|
+
payload =
|
|
142
|
+
"# skill-llm-wiki tiered-AI decision log (append-only)\n" +
|
|
143
|
+
"version: 1\n" +
|
|
144
|
+
"entries:\n" +
|
|
145
|
+
block;
|
|
146
|
+
} else {
|
|
147
|
+
const existing = readFileSync(path, "utf8");
|
|
148
|
+
const prefix = existing.endsWith("\n") ? existing : existing + "\n";
|
|
149
|
+
payload = prefix + block;
|
|
150
|
+
}
|
|
151
|
+
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
|
|
152
|
+
writeFileSync(tmp, payload, "utf8");
|
|
153
|
+
renameSync(tmp, path);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// Convenience helper for cluster-NEST outcomes. The convergence
|
|
157
|
+
// loop calls this for every math-only proposal (with its
|
|
158
|
+
// Tier 2 gate decision) and for every Tier-2-proposed cluster
|
|
159
|
+
// (with decision="tier2-approved" or "rejected-by-metric"). The
|
|
160
|
+
// entry lands in the same entries[] list as pairwise decisions
|
|
161
|
+
// so the audit trail for one op is queryable as a single stream.
|
|
162
|
+
//
|
|
163
|
+
// Schema translation:
|
|
164
|
+
//
|
|
165
|
+
// op_id, operator="NEST" — as-is
|
|
166
|
+
// sources — leaf ids in the cluster
|
|
167
|
+
// tier_used — 2 (every NEST decision
|
|
168
|
+
// touches Tier 2 either
|
|
169
|
+
// via propose_structure
|
|
170
|
+
// or nest_decision)
|
|
171
|
+
// similarity — average_affinity
|
|
172
|
+
// confidence_band — one of:
|
|
173
|
+
// "tier2-proposed",
|
|
174
|
+
// "math-gated",
|
|
175
|
+
// "empty-partition",
|
|
176
|
+
// "rejected-by-metric",
|
|
177
|
+
// "rejected-by-gate"
|
|
178
|
+
// decision — one of:
|
|
179
|
+
// "applied",
|
|
180
|
+
// "rejected-by-metric",
|
|
181
|
+
// "rejected-by-gate",
|
|
182
|
+
// "rejected-stale",
|
|
183
|
+
// "slug-renamed",
|
|
184
|
+
// "pending-tier2"
|
|
185
|
+
// reason — free text
|
|
186
|
+
//
|
|
187
|
+
// Coercion: average_affinity may be undefined for Tier-2-proposed
|
|
188
|
+
// clusters; we coerce to 0 so the finite-number validator does
|
|
189
|
+
// not reject the entry.
|
|
190
|
+
export function appendNestDecision(wikiRoot, entry) {
|
|
191
|
+
const similarity =
|
|
192
|
+
Number.isFinite(entry.similarity)
|
|
193
|
+
? entry.similarity
|
|
194
|
+
: (Number.isFinite(entry.average_affinity) ? entry.average_affinity : 0);
|
|
195
|
+
appendDecision(wikiRoot, {
|
|
196
|
+
op_id: entry.op_id,
|
|
197
|
+
operator: "NEST",
|
|
198
|
+
sources: Array.isArray(entry.sources) ? entry.sources : [],
|
|
199
|
+
tier_used: 2,
|
|
200
|
+
similarity,
|
|
201
|
+
confidence_band: entry.confidence_band ?? null,
|
|
202
|
+
decision: entry.decision,
|
|
203
|
+
reason: entry.reason ?? null,
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Append the per-iteration metric trajectory for an op. Writes
|
|
208
|
+
// one entry per trajectory point with operator="METRIC_TRAJECTORY"
|
|
209
|
+
// so readers can recover the full cost curve for each op by
|
|
210
|
+
// filtering the entries[] list. `trajectory` is the
|
|
211
|
+
// metric_trajectory array produced by runConvergence: an array of
|
|
212
|
+
// `{ iteration, cost, event }` records. Writes even a single-
|
|
213
|
+
// point baseline trajectory so the log carries evidence that the
|
|
214
|
+
// convergence loop ran (rather than being silently skipped).
|
|
215
|
+
export function appendMetricTrajectory(wikiRoot, opId, trajectory) {
|
|
216
|
+
if (!Array.isArray(trajectory)) return;
|
|
217
|
+
for (const point of trajectory) {
|
|
218
|
+
const cost = Number.isFinite(point.cost) ? point.cost : 0;
|
|
219
|
+
const iteration = Number.isInteger(point.iteration) ? point.iteration : 0;
|
|
220
|
+
appendDecision(wikiRoot, {
|
|
221
|
+
op_id: opId,
|
|
222
|
+
operator: "METRIC_TRAJECTORY",
|
|
223
|
+
sources: [`iter-${iteration}`],
|
|
224
|
+
tier_used: 0,
|
|
225
|
+
similarity: cost,
|
|
226
|
+
confidence_band: point.event ?? "unknown",
|
|
227
|
+
decision: "measured",
|
|
228
|
+
reason: point.reason ?? null,
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Lightweight reader — we parse only what we need for tests and the
|
|
234
|
+
// `skill-llm-wiki history` subcommand. Errors out loudly on any line
|
|
235
|
+
// the parser doesn't recognise.
|
|
236
|
+
export function readDecisions(wikiRoot) {
|
|
237
|
+
const path = decisionLogPath(wikiRoot);
|
|
238
|
+
if (!existsSync(path)) return [];
|
|
239
|
+
const raw = readFileSync(path, "utf8");
|
|
240
|
+
// Strip comments and blank lines; reject unknown headers.
|
|
241
|
+
const lines = raw
|
|
242
|
+
.split(/\r?\n/)
|
|
243
|
+
.filter((l) => l.length > 0 && !/^\s*#/.test(l));
|
|
244
|
+
const out = [];
|
|
245
|
+
let i = 0;
|
|
246
|
+
// Header: version + entries:
|
|
247
|
+
if (i < lines.length && lines[i].startsWith("version:")) i++;
|
|
248
|
+
if (i < lines.length && lines[i].trim() === "entries:") i++;
|
|
249
|
+
let current = null;
|
|
250
|
+
while (i < lines.length) {
|
|
251
|
+
const line = lines[i];
|
|
252
|
+
if (line.startsWith("- op_id:")) {
|
|
253
|
+
if (current) out.push(current);
|
|
254
|
+
current = { op_id: parseValue(line.slice("- op_id:".length).trim()), sources: [] };
|
|
255
|
+
i++;
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
258
|
+
if (!current) {
|
|
259
|
+
throw new Error(`decision-log parser: stray line at ${i + 1}: ${line}`);
|
|
260
|
+
}
|
|
261
|
+
const listItem = /^ - (.*)$/.exec(line);
|
|
262
|
+
if (listItem) {
|
|
263
|
+
current.sources.push(parseValue(listItem[1]));
|
|
264
|
+
i++;
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
const kv = /^ (\w+):\s*(.*)$/.exec(line);
|
|
268
|
+
if (!kv) {
|
|
269
|
+
throw new Error(
|
|
270
|
+
`decision-log parser: unrecognised line at ${i + 1}: ${line}`,
|
|
271
|
+
);
|
|
272
|
+
}
|
|
273
|
+
const [, key, rest] = kv;
|
|
274
|
+
if (key === "sources") {
|
|
275
|
+
// `sources:` alone introduces the list items; items start with ` - `.
|
|
276
|
+
current.sources = [];
|
|
277
|
+
i++;
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
current[key] = parseValue(rest);
|
|
281
|
+
i++;
|
|
282
|
+
}
|
|
283
|
+
if (current) out.push(current);
|
|
284
|
+
return out;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function unescapeQuoted(body) {
|
|
288
|
+
let out = "";
|
|
289
|
+
for (let i = 0; i < body.length; i++) {
|
|
290
|
+
if (body[i] === "\\" && i + 1 < body.length) {
|
|
291
|
+
const next = body[i + 1];
|
|
292
|
+
switch (next) {
|
|
293
|
+
case "\\": out += "\\"; break;
|
|
294
|
+
case '"': out += '"'; break;
|
|
295
|
+
case "n": out += "\n"; break;
|
|
296
|
+
case "r": out += "\r"; break;
|
|
297
|
+
case "t": out += "\t"; break;
|
|
298
|
+
default: out += next;
|
|
299
|
+
}
|
|
300
|
+
i++;
|
|
301
|
+
} else {
|
|
302
|
+
out += body[i];
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
return out;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Scientific-notation friendly number regex. Matches `0.5`, `1e-10`,
|
|
309
|
+
// `-3.14`, `42`, `-5`. Does NOT match `Infinity`, `NaN`, or
|
|
310
|
+
// hexadecimal — those are either forbidden by the validator or
|
|
311
|
+
// expressible as unambiguous strings.
|
|
312
|
+
const NUMBER_RE = /^-?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?$/;
|
|
313
|
+
|
|
314
|
+
function parseValue(raw) {
|
|
315
|
+
if (raw === "null" || raw === "") return null;
|
|
316
|
+
if (raw === "true") return true;
|
|
317
|
+
if (raw === "false") return false;
|
|
318
|
+
if (/^-?\d+$/.test(raw)) {
|
|
319
|
+
const n = Number(raw);
|
|
320
|
+
if (!Number.isSafeInteger(n)) {
|
|
321
|
+
throw new Error(
|
|
322
|
+
`decision-log parser: integer ${raw} is not a safe integer`,
|
|
323
|
+
);
|
|
324
|
+
}
|
|
325
|
+
return n;
|
|
326
|
+
}
|
|
327
|
+
if (NUMBER_RE.test(raw)) {
|
|
328
|
+
const n = Number(raw);
|
|
329
|
+
if (!Number.isFinite(n)) {
|
|
330
|
+
throw new Error(
|
|
331
|
+
`decision-log parser: non-finite numeric value ${raw}`,
|
|
332
|
+
);
|
|
333
|
+
}
|
|
334
|
+
return n;
|
|
335
|
+
}
|
|
336
|
+
if (raw.startsWith('"') && raw.endsWith('"') && raw.length >= 2) {
|
|
337
|
+
return unescapeQuoted(raw.slice(1, -1));
|
|
338
|
+
}
|
|
339
|
+
if (raw.startsWith('"') !== raw.endsWith('"')) {
|
|
340
|
+
throw new Error(`decision-log parser: unbalanced quote in: ${raw}`);
|
|
341
|
+
}
|
|
342
|
+
return raw;
|
|
343
|
+
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
// Draft frontmatter: deterministic extraction only.
|
|
2
|
+
//
|
|
3
|
+
// This is the script-side of the script-first + AI-fallback pipeline
|
|
4
|
+
// documented in methodology §9.6. It handles the "structured source" case
|
|
5
|
+
// where frontmatter can be derived mechanically from file metadata:
|
|
6
|
+
// - id from filename
|
|
7
|
+
// - focus from title or lead paragraph
|
|
8
|
+
// - covers[] from H2 sections or bulleted items in the lead
|
|
9
|
+
// - tags[] from filename prefixes or directory hints
|
|
10
|
+
// - activation from file_glob inferred from the source path
|
|
11
|
+
//
|
|
12
|
+
// When the source file ALREADY carries a frontmatter block (parsed at
|
|
13
|
+
// ingest time via gray-matter and stashed as
|
|
14
|
+
// `candidate.authored_frontmatter`), each AUTHORED_LEAF_FIELD is
|
|
15
|
+
// preferred over the heuristic — the drafter only fills gaps. This is
|
|
16
|
+
// what preserves `activation`, `covers`, `tags`, `focus`, `domains`,
|
|
17
|
+
// `shared_covers`, `aliases`, and friends when a hand-tuned corpus is
|
|
18
|
+
// re-built.
|
|
19
|
+
//
|
|
20
|
+
// Anything that needs semantic understanding (prose-heavy draft, ambiguous
|
|
21
|
+
// classification, cover synthesis from narrative) is left for Claude to
|
|
22
|
+
// handle inside its own execution context when running this skill. The
|
|
23
|
+
// `needs_ai` flag on the returned draft tells the caller which entries
|
|
24
|
+
// need AI review.
|
|
25
|
+
|
|
26
|
+
// Fields we copy straight from the source frontmatter when the author
|
|
27
|
+
// supplied them. Fields NOT in this list (id / type / depth_role /
|
|
28
|
+
// parents / source) are always re-derived because their authoritative
|
|
29
|
+
// source is the target-tree position, not the original source file.
|
|
30
|
+
const AUTHORED_LEAF_FIELDS = [
|
|
31
|
+
"focus",
|
|
32
|
+
"covers",
|
|
33
|
+
"tags",
|
|
34
|
+
"domains",
|
|
35
|
+
"aliases",
|
|
36
|
+
"activation",
|
|
37
|
+
"shared_covers",
|
|
38
|
+
"overlay_targets",
|
|
39
|
+
"links",
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
export function draftLeafFrontmatter(candidate, { categoryPath } = {}) {
|
|
43
|
+
const authored = candidate.authored_frontmatter || {};
|
|
44
|
+
const hasAuthored = candidate.has_authored_frontmatter === true;
|
|
45
|
+
|
|
46
|
+
// Heuristic baseline — used when the author didn't supply a field.
|
|
47
|
+
const draftedCovers = extractCovers(candidate);
|
|
48
|
+
const draftedFocus = candidate.title || candidate.id;
|
|
49
|
+
const draftedTags = inferTags(candidate);
|
|
50
|
+
|
|
51
|
+
const data = {
|
|
52
|
+
id: candidate.id,
|
|
53
|
+
type: "primary",
|
|
54
|
+
depth_role: "leaf",
|
|
55
|
+
// Priority: authored > drafted > default. `pickAuthored` only
|
|
56
|
+
// returns the authored value when it is non-empty (non-null,
|
|
57
|
+
// non-undefined, and — for arrays — non-empty).
|
|
58
|
+
focus: pickAuthored(authored.focus, draftedFocus),
|
|
59
|
+
covers: pickAuthored(authored.covers, draftedCovers),
|
|
60
|
+
// `parents` is authoritative from the source when supplied. The
|
|
61
|
+
// hand-authored convention is a list of index.md paths relative
|
|
62
|
+
// to the leaf's own directory (`index.md` for the same dir,
|
|
63
|
+
// `../index.md` for one up). Heuristic fallback builds the same
|
|
64
|
+
// relative form from the category path.
|
|
65
|
+
parents: pickAuthored(authored.parents, ["index.md"]),
|
|
66
|
+
tags: pickAuthored(authored.tags, draftedTags),
|
|
67
|
+
source: {
|
|
68
|
+
origin: "file",
|
|
69
|
+
path: candidate.source_path,
|
|
70
|
+
hash: candidate.hash,
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
// Forward the remaining AUTHORED_LEAF_FIELDS verbatim. These have no
|
|
75
|
+
// heuristic analogue — when the author supplied them, we keep them;
|
|
76
|
+
// otherwise we omit the field entirely so the output stays compact.
|
|
77
|
+
if (hasAuthored) {
|
|
78
|
+
for (const field of AUTHORED_LEAF_FIELDS) {
|
|
79
|
+
if (field === "focus" || field === "covers" || field === "tags") continue;
|
|
80
|
+
if (authored[field] !== undefined && authored[field] !== null) {
|
|
81
|
+
data[field] = authored[field];
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const confidence = scoreConfidence(data, candidate);
|
|
87
|
+
return { data, confidence, needs_ai: confidence < 0.6 };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function pickAuthored(authoredVal, fallback) {
|
|
91
|
+
if (authoredVal === undefined || authoredVal === null) return fallback;
|
|
92
|
+
if (Array.isArray(authoredVal)) {
|
|
93
|
+
return authoredVal.length > 0 ? authoredVal : fallback;
|
|
94
|
+
}
|
|
95
|
+
if (typeof authoredVal === "string") {
|
|
96
|
+
return authoredVal.trim() !== "" ? authoredVal : fallback;
|
|
97
|
+
}
|
|
98
|
+
return authoredVal;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function extractCovers(candidate) {
|
|
102
|
+
const out = [];
|
|
103
|
+
// H2 headings become the primary covers candidates.
|
|
104
|
+
for (const h of candidate.headings) {
|
|
105
|
+
if (h.level === 2) out.push(h.text);
|
|
106
|
+
if (out.length >= 10) break;
|
|
107
|
+
}
|
|
108
|
+
if (out.length === 0) {
|
|
109
|
+
// Fall back to splitting the lead on sentence boundaries.
|
|
110
|
+
const lead = candidate.lead || "";
|
|
111
|
+
const sentences = lead.split(/(?<=[.!?])\s+/).filter((s) => s.length > 10);
|
|
112
|
+
for (const s of sentences) {
|
|
113
|
+
out.push(s.slice(0, 120));
|
|
114
|
+
if (out.length >= 5) break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return out.slice(0, 12);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function inferTags(candidate) {
|
|
121
|
+
const tags = new Set();
|
|
122
|
+
// Directory components as tag hints.
|
|
123
|
+
const parts = candidate.source_path.split(/[\/\\]/);
|
|
124
|
+
for (const part of parts.slice(0, -1)) {
|
|
125
|
+
if (part && part !== "." && !/^\d+$/.test(part)) {
|
|
126
|
+
tags.add(part.toLowerCase().replace(/[^a-z0-9-]+/g, "-"));
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// Extension hint.
|
|
130
|
+
if (candidate.ext === ".md") tags.add("markdown");
|
|
131
|
+
return [...tags].slice(0, 8);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function scoreConfidence(draft, candidate) {
|
|
135
|
+
let score = 0;
|
|
136
|
+
if (draft.focus && draft.focus !== candidate.id) score += 0.3;
|
|
137
|
+
if (draft.covers.length >= 3) score += 0.4;
|
|
138
|
+
else if (draft.covers.length >= 1) score += 0.2;
|
|
139
|
+
if (candidate.headings.filter((h) => h.level === 2).length >= 2) score += 0.2;
|
|
140
|
+
if (candidate.size > 200) score += 0.1;
|
|
141
|
+
return Math.min(1, score);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Quick classification by directory prefix. Script-first classifier.
|
|
145
|
+
//
|
|
146
|
+
// When the source file lives at the source root (no directory
|
|
147
|
+
// component), the candidate is placed at the TARGET root — not under a
|
|
148
|
+
// synthetic `general/` bucket. This is what keeps a flat authored
|
|
149
|
+
// guide flat in the output: 17 top-level leaves stay at the wiki root
|
|
150
|
+
// instead of being nested under `general/`.
|
|
151
|
+
//
|
|
152
|
+
// Subdirectories in the source are preserved as top-level categories
|
|
153
|
+
// in the target (e.g. `operations/build.md` → `operations/build.md`).
|
|
154
|
+
export function draftCategory(candidate) {
|
|
155
|
+
const parts = candidate.source_path.split(/[\/\\]/).filter(Boolean);
|
|
156
|
+
if (parts.length <= 1) return "";
|
|
157
|
+
return parts[0].toLowerCase().replace(/[^a-z0-9-]+/g, "-");
|
|
158
|
+
}
|