@ctxr/skill-llm-wiki 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +134 -0
- package/LICENSE +21 -0
- package/README.md +484 -0
- package/SKILL.md +252 -0
- package/guide/basics/concepts.md +74 -0
- package/guide/basics/index.md +45 -0
- package/guide/basics/schema.md +140 -0
- package/guide/cli.md +256 -0
- package/guide/correctness/index.md +45 -0
- package/guide/correctness/invariants.md +89 -0
- package/guide/correctness/safety.md +96 -0
- package/guide/history/diff.md +110 -0
- package/guide/history/hidden-git.md +130 -0
- package/guide/history/index.md +52 -0
- package/guide/history/remote-sync.md +113 -0
- package/guide/index.md +134 -0
- package/guide/isolation/coexistence.md +134 -0
- package/guide/isolation/index.md +44 -0
- package/guide/isolation/scale.md +251 -0
- package/guide/layout/in-place-mode.md +97 -0
- package/guide/layout/index.md +53 -0
- package/guide/layout/layout-contract.md +131 -0
- package/guide/layout/layout-modes.md +115 -0
- package/guide/operations/index.md +76 -0
- package/guide/operations/ingest/build.md +75 -0
- package/guide/operations/ingest/extend.md +61 -0
- package/guide/operations/ingest/index.md +54 -0
- package/guide/operations/ingest/join.md +65 -0
- package/guide/operations/maintain/fix.md +66 -0
- package/guide/operations/maintain/index.md +47 -0
- package/guide/operations/maintain/rebuild.md +86 -0
- package/guide/operations/validate.md +48 -0
- package/guide/substrate/index.md +47 -0
- package/guide/substrate/operators.md +96 -0
- package/guide/substrate/tiered-ai.md +363 -0
- package/guide/ux/index.md +44 -0
- package/guide/ux/preflight.md +150 -0
- package/guide/ux/user-intent.md +135 -0
- package/package.json +55 -0
- package/scripts/cli.mjs +893 -0
- package/scripts/commands/remote.mjs +93 -0
- package/scripts/commands/review.mjs +253 -0
- package/scripts/commands/sync.mjs +84 -0
- package/scripts/lib/chunk.mjs +421 -0
- package/scripts/lib/cluster-detect.mjs +516 -0
- package/scripts/lib/decision-log.mjs +343 -0
- package/scripts/lib/draft.mjs +158 -0
- package/scripts/lib/embeddings.mjs +366 -0
- package/scripts/lib/frontmatter.mjs +497 -0
- package/scripts/lib/git-commands.mjs +155 -0
- package/scripts/lib/git.mjs +486 -0
- package/scripts/lib/gitignore.mjs +62 -0
- package/scripts/lib/history.mjs +331 -0
- package/scripts/lib/indices.mjs +510 -0
- package/scripts/lib/ingest.mjs +258 -0
- package/scripts/lib/intent.mjs +713 -0
- package/scripts/lib/interactive.mjs +99 -0
- package/scripts/lib/migrate.mjs +126 -0
- package/scripts/lib/nest-applier.mjs +260 -0
- package/scripts/lib/operators.mjs +1365 -0
- package/scripts/lib/orchestrator.mjs +718 -0
- package/scripts/lib/paths.mjs +197 -0
- package/scripts/lib/preflight.mjs +213 -0
- package/scripts/lib/provenance.mjs +672 -0
- package/scripts/lib/quality-metric.mjs +269 -0
- package/scripts/lib/query-fixture.mjs +71 -0
- package/scripts/lib/rollback.mjs +95 -0
- package/scripts/lib/shape-check.mjs +172 -0
- package/scripts/lib/similarity-cache.mjs +126 -0
- package/scripts/lib/similarity.mjs +230 -0
- package/scripts/lib/snapshot.mjs +54 -0
- package/scripts/lib/source-frontmatter.mjs +85 -0
- package/scripts/lib/tier2-protocol.mjs +470 -0
- package/scripts/lib/tiered.mjs +453 -0
- package/scripts/lib/validate.mjs +362 -0
|
@@ -0,0 +1,1365 @@
|
|
|
1
|
+
// operators.mjs — the four (plus one) rewrite operators from
|
|
2
|
+
// methodology §3.5:
|
|
3
|
+
//
|
|
4
|
+
// DESCEND — push leaf-style content out of a parent index down
|
|
5
|
+
// into a new or existing child leaf.
|
|
6
|
+
// LIFT — flatten a folder that contains exactly one entry by
|
|
7
|
+
// moving the entry up to the parent directory.
|
|
8
|
+
// MERGE — fuse two siblings whose focus/covers overlap enough
|
|
9
|
+
// that keeping them separate is pure redundancy.
|
|
10
|
+
// NEST — extract multiple H2 specialisations out of one leaf
|
|
11
|
+
// into a child folder (stubbed in Phase 6 — H2 body
|
|
12
|
+
// reading is not yet wired through the chunked iterator
|
|
13
|
+
// so we detect but do not apply).
|
|
14
|
+
// DECOMPOSE — split one leaf into multiple peer entries when its
|
|
15
|
+
// `covers[]` cluster into disjoint groups (stubbed for
|
|
16
|
+
// the same reason).
|
|
17
|
+
//
|
|
18
|
+
// Tie-break priority (methodology §3.5): DESCEND > LIFT > MERGE >
|
|
19
|
+
// NEST > DECOMPOSE. Reducing moves (DESCEND, LIFT, MERGE) fire
|
|
20
|
+
// before expanding moves (NEST, DECOMPOSE) so expansion never
|
|
21
|
+
// wastes effort on structure that was going to collapse anyway.
|
|
22
|
+
//
|
|
23
|
+
// Phase 6 ships: LIFT, MERGE, DESCEND detection + application.
|
|
24
|
+
// NEST and DECOMPOSE are detected and reported as suggestions for
|
|
25
|
+
// the shape-check audit trail but NOT applied — their application
|
|
26
|
+
// requires frontmatter rewrites + folder creation that Phase 6's
|
|
27
|
+
// scope deliberately keeps out of the operator loop.
|
|
28
|
+
//
|
|
29
|
+
// Every similarity decision flows through `tiered.mjs`. Every
|
|
30
|
+
// operator application goes through `git add` + `git commit` via
|
|
31
|
+
// the orchestrator between iterations.
|
|
32
|
+
|
|
33
|
+
import {
|
|
34
|
+
existsSync,
|
|
35
|
+
mkdirSync,
|
|
36
|
+
readFileSync,
|
|
37
|
+
readdirSync,
|
|
38
|
+
renameSync,
|
|
39
|
+
rmSync,
|
|
40
|
+
writeFileSync,
|
|
41
|
+
} from "node:fs";
|
|
42
|
+
import { basename, dirname, join, relative } from "node:path";
|
|
43
|
+
import { parseFrontmatter, renderFrontmatter } from "./frontmatter.mjs";
|
|
44
|
+
import { collectFrontmatterOnly } from "./chunk.mjs";
|
|
45
|
+
import { listChildren, rebuildAllIndices } from "./indices.mjs";
|
|
46
|
+
import { buildComparisonModel } from "./similarity.mjs";
|
|
47
|
+
import {
|
|
48
|
+
countPendingRequests,
|
|
49
|
+
decide,
|
|
50
|
+
enqueuePending,
|
|
51
|
+
getResolvedResponse,
|
|
52
|
+
takePendingRequests,
|
|
53
|
+
} from "./tiered.mjs";
|
|
54
|
+
import {
|
|
55
|
+
buildProposeStructureRequest,
|
|
56
|
+
detectClusters,
|
|
57
|
+
MAX_CLUSTER_SIZE,
|
|
58
|
+
MIN_CLUSTER_SIZE,
|
|
59
|
+
MIN_MATH_CLUSTER_SIZE,
|
|
60
|
+
MIN_TIER2_CLUSTER_SIZE,
|
|
61
|
+
} from "./cluster-detect.mjs";
|
|
62
|
+
import { applyNest, resolveNestSlug, validateSlug } from "./nest-applier.mjs";
|
|
63
|
+
import { computeRoutingCost } from "./quality-metric.mjs";
|
|
64
|
+
import { loadFixture, resolveFromFixture } from "./tier2-protocol.mjs";
|
|
65
|
+
import { appendMetricTrajectory, appendNestDecision } from "./decision-log.mjs";
|
|
66
|
+
|
|
67
|
+
// Max iterations the convergence loop will run before declaring
|
|
68
|
+
// termination. The methodology's convergence argument proves it
|
|
69
|
+
// halts, but we still cap defensively in case two operators
|
|
70
|
+
// interact pathologically.
|
|
71
|
+
const MAX_CONVERGENCE_ITERATIONS = 20;
|
|
72
|
+
|
|
73
|
+
// Each operator returns an array of `Proposal` objects describing
|
|
74
|
+
// a change to apply. The loop priority-orders proposals, applies
|
|
75
|
+
// the highest-priority one, commits, and re-runs detection.
|
|
76
|
+
//
|
|
77
|
+
// A proposal has:
|
|
78
|
+
// operator — "LIFT" | "MERGE" | "DESCEND" | "NEST" | "DECOMPOSE"
|
|
79
|
+
// priority — numeric: higher = applied first
|
|
80
|
+
// sources — array of absolute paths affected
|
|
81
|
+
// apply — function({ wikiRoot, opId, decisionCtx }) → Promise<{ summary }>
|
|
82
|
+
// describe — short human-readable description for the commit message
|
|
83
|
+
|
|
84
|
+
const PRIORITY = {
|
|
85
|
+
DESCEND: 5,
|
|
86
|
+
LIFT: 4,
|
|
87
|
+
MERGE: 3,
|
|
88
|
+
NEST: 2,
|
|
89
|
+
DECOMPOSE: 1,
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
// ── LIFT ──────────────────────────────────────────────────────────────
|
|
93
|
+
//
|
|
94
|
+
// Detection: a non-root directory that contains exactly one leaf
|
|
95
|
+
// file and no indexed subdirs. Apply: move the leaf up one level,
|
|
96
|
+
// delete the now-empty folder.
|
|
97
|
+
export function detectLift(wikiRoot) {
|
|
98
|
+
const proposals = [];
|
|
99
|
+
const dirs = walkDirs(wikiRoot);
|
|
100
|
+
for (const dir of dirs) {
|
|
101
|
+
if (dir === wikiRoot) continue;
|
|
102
|
+
const { leaves, subdirs } = listChildren(dir);
|
|
103
|
+
if (leaves.length === 1 && subdirs.length === 0) {
|
|
104
|
+
const leaf = leaves[0];
|
|
105
|
+
proposals.push({
|
|
106
|
+
operator: "LIFT",
|
|
107
|
+
priority: PRIORITY.LIFT,
|
|
108
|
+
sources: [leaf.path, dir],
|
|
109
|
+
describe: `LIFT ${basename(leaf.path)} out of ${basename(dir)}/`,
|
|
110
|
+
apply: async () => applyLift(wikiRoot, dir, leaf),
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return proposals;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
async function applyLift(wikiRoot, dir, leaf) {
|
|
118
|
+
const parentDir = dirname(dir);
|
|
119
|
+
const newPath = join(parentDir, basename(leaf.path));
|
|
120
|
+
if (existsSync(newPath)) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
`LIFT: target ${newPath} already exists; refusing to overwrite`,
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
// Update the leaf's parents[] to point at the new parent index.
|
|
126
|
+
// When the new location is the wiki root, the parent path is
|
|
127
|
+
// `index.md` (sibling-form). When the new location is still
|
|
128
|
+
// nested, the parent path is `../index.md`. Writing the canonical
|
|
129
|
+
// form here matches `rebuildIndex.parents` derivation and avoids
|
|
130
|
+
// escape-above-root chains when lifting to depth 0.
|
|
131
|
+
const raw = readFileSync(leaf.path, "utf8");
|
|
132
|
+
const { data, body } = parseFrontmatter(raw, leaf.path);
|
|
133
|
+
if (Array.isArray(data.parents)) {
|
|
134
|
+
const liftedToRoot = parentDir === wikiRoot;
|
|
135
|
+
data.parents = [liftedToRoot ? "index.md" : "../index.md"];
|
|
136
|
+
}
|
|
137
|
+
writeFileSync(newPath, renderFrontmatter(data, body), "utf8");
|
|
138
|
+
rmSync(leaf.path, { force: true });
|
|
139
|
+
// Remove the now-empty folder. If there's a stale index.md in it,
|
|
140
|
+
// remove that too (it was just the category stub).
|
|
141
|
+
const stubIndex = join(dir, "index.md");
|
|
142
|
+
if (existsSync(stubIndex)) rmSync(stubIndex, { force: true });
|
|
143
|
+
// Only remove the directory if it's empty — defensive in case
|
|
144
|
+
// the detector saw 1 leaf but a newer file appeared. We use
|
|
145
|
+
// recursive+force for the remove because `rmSync(..., { recursive:
|
|
146
|
+
// false })` silently no-ops on any non-empty dir (including one
|
|
147
|
+
// with a hidden .DS_Store), which would leave a stale folder
|
|
148
|
+
// behind and break the test and the convergence loop.
|
|
149
|
+
try {
|
|
150
|
+
const remaining = readdirSync(dir);
|
|
151
|
+
if (remaining.length === 0) {
|
|
152
|
+
rmSync(dir, { recursive: true, force: true });
|
|
153
|
+
}
|
|
154
|
+
} catch {
|
|
155
|
+
/* best effort */
|
|
156
|
+
}
|
|
157
|
+
return { summary: `lifted ${data.id} to ${relative(wikiRoot, newPath)}` };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ── MERGE ─────────────────────────────────────────────────────────────
|
|
161
|
+
//
|
|
162
|
+
// Detection: pairs of sibling leaves whose tiered similarity check
|
|
163
|
+
// says "same". Apply: produce a merged entry carrying the union
|
|
164
|
+
// of covers[] (deduped), the more general focus, both source ids as
|
|
165
|
+
// aliases, and delete the second source leaf.
|
|
166
|
+
export async function detectMerge(wikiRoot, ctx) {
|
|
167
|
+
const proposals = [];
|
|
168
|
+
const dirs = walkDirs(wikiRoot);
|
|
169
|
+
for (const dir of dirs) {
|
|
170
|
+
const { leaves } = listChildren(dir);
|
|
171
|
+
if (leaves.length < 2) continue;
|
|
172
|
+
// Compute the sibling-corpus IDF model ONCE per directory and
|
|
173
|
+
// reuse it across every pair. This changes detectMerge's inner
|
|
174
|
+
// cost from O(N² × N) tokenise+idf to O(N) tokenise+idf + O(N²)
|
|
175
|
+
// cosine. For a 1000-entry directory that's the difference
|
|
176
|
+
// between 10⁹ and 10⁶ operations.
|
|
177
|
+
const corpus = leaves.map((l) => l.data);
|
|
178
|
+
const model = buildComparisonModel(corpus);
|
|
179
|
+
for (let i = 0; i < leaves.length; i++) {
|
|
180
|
+
for (let j = i + 1; j < leaves.length; j++) {
|
|
181
|
+
const a = leaves[i];
|
|
182
|
+
const b = leaves[j];
|
|
183
|
+
const r = await decide(a.data, b.data, corpus, {
|
|
184
|
+
wikiRoot,
|
|
185
|
+
opId: ctx.opId,
|
|
186
|
+
operator: "MERGE",
|
|
187
|
+
qualityMode: ctx.qualityMode,
|
|
188
|
+
interactive: ctx.interactive,
|
|
189
|
+
tier2Handler: ctx.tier2Handler,
|
|
190
|
+
precomputedModel: model,
|
|
191
|
+
});
|
|
192
|
+
if (r.decision === "same") {
|
|
193
|
+
proposals.push({
|
|
194
|
+
operator: "MERGE",
|
|
195
|
+
priority: PRIORITY.MERGE,
|
|
196
|
+
sources: [a.path, b.path],
|
|
197
|
+
describe: `MERGE ${a.data.id} + ${b.data.id} (tier ${r.tier}, sim ${r.similarity.toFixed(2)})`,
|
|
198
|
+
apply: async () => applyMerge(wikiRoot, a, b, r),
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
return proposals;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
async function applyMerge(wikiRoot, a, b, decision) {
|
|
208
|
+
// Keep the entry with the longer focus (more specific + usually
|
|
209
|
+
// more authored) as the survivor. The other becomes an alias.
|
|
210
|
+
const [keeper, absorbed] =
|
|
211
|
+
(a.data.focus?.length ?? 0) >= (b.data.focus?.length ?? 0)
|
|
212
|
+
? [a, b]
|
|
213
|
+
: [b, a];
|
|
214
|
+
const rawKeeper = readFileSync(keeper.path, "utf8");
|
|
215
|
+
const { data, body } = parseFrontmatter(rawKeeper, keeper.path);
|
|
216
|
+
|
|
217
|
+
// Refuse to merge if the entries carry incompatible structural
|
|
218
|
+
// metadata — we cannot silently pick one over the other.
|
|
219
|
+
const structuralFields = ["type", "depth_role"];
|
|
220
|
+
for (const field of structuralFields) {
|
|
221
|
+
const kv = data[field];
|
|
222
|
+
const av = absorbed.data[field];
|
|
223
|
+
if (kv !== undefined && av !== undefined && kv !== av) {
|
|
224
|
+
throw new Error(
|
|
225
|
+
`MERGE: cannot merge ${keeper.data.id} and ${absorbed.data.id}: ` +
|
|
226
|
+
`conflicting "${field}" (${kv} vs ${av})`,
|
|
227
|
+
);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Union array-valued authored fields (covers, tags, domains)
|
|
232
|
+
// preserving keeper order first, then absorbed's unique entries.
|
|
233
|
+
const unionArray = (keeperField, absorbedField) => {
|
|
234
|
+
const merged = new Set(Array.isArray(keeperField) ? keeperField : []);
|
|
235
|
+
if (Array.isArray(absorbedField)) {
|
|
236
|
+
for (const item of absorbedField) merged.add(item);
|
|
237
|
+
}
|
|
238
|
+
return Array.from(merged);
|
|
239
|
+
};
|
|
240
|
+
data.covers = unionArray(data.covers, absorbed.data.covers);
|
|
241
|
+
if (Array.isArray(data.tags) || Array.isArray(absorbed.data.tags)) {
|
|
242
|
+
data.tags = unionArray(data.tags, absorbed.data.tags);
|
|
243
|
+
}
|
|
244
|
+
if (Array.isArray(data.domains) || Array.isArray(absorbed.data.domains)) {
|
|
245
|
+
data.domains = unionArray(data.domains, absorbed.data.domains);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Union parents[] so the absorbed's cross-references survive.
|
|
249
|
+
// The methodology's DAG semantics make `parents[0]` canonical, so
|
|
250
|
+
// the keeper's first parent wins; additional parents from absorbed
|
|
251
|
+
// are appended as soft parents.
|
|
252
|
+
if (Array.isArray(absorbed.data.parents)) {
|
|
253
|
+
const parents = new Set(data.parents ?? []);
|
|
254
|
+
for (const p of absorbed.data.parents) parents.add(p);
|
|
255
|
+
data.parents = Array.from(parents);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Aliases: absorbed's id + any aliases it already had, deduped,
|
|
259
|
+
// and we never alias the keeper to itself.
|
|
260
|
+
const aliases = new Set(data.aliases ?? []);
|
|
261
|
+
aliases.add(absorbed.data.id);
|
|
262
|
+
for (const al of absorbed.data.aliases ?? []) aliases.add(al);
|
|
263
|
+
aliases.delete(data.id);
|
|
264
|
+
data.aliases = Array.from(aliases);
|
|
265
|
+
|
|
266
|
+
writeFileSync(keeper.path, renderFrontmatter(data, body), "utf8");
|
|
267
|
+
rmSync(absorbed.path, { force: true });
|
|
268
|
+
return {
|
|
269
|
+
summary:
|
|
270
|
+
`merged ${absorbed.data.id} into ${keeper.data.id} ` +
|
|
271
|
+
`(tier ${decision.tier}, sim ${decision.similarity.toFixed(3)})`,
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// ── DESCEND ───────────────────────────────────────────────────────────
|
|
276
|
+
//
|
|
277
|
+
// Detection: an index.md whose body authored zone exceeds a byte
|
|
278
|
+
// budget OR contains leaf-content signatures (code fences, checklists,
|
|
279
|
+
// multi-paragraph domain exposition).
|
|
280
|
+
//
|
|
281
|
+
// Apply: carve the authored zone into a new leaf file under the same
|
|
282
|
+
// directory, clear the authored zone on the index, and link the leaf
|
|
283
|
+
// from the parent.
|
|
284
|
+
const DESCEND_AUTHORED_BUDGET = 2048;
|
|
285
|
+
const LEAF_SIGNATURES = [
|
|
286
|
+
/^\s*```/m, // code fence
|
|
287
|
+
/^\s*- \[ \]/m, // checkbox list
|
|
288
|
+
/^\s*\d+\.\s+\S+\n\s*\d+\.\s+\S+/m, // numbered list with 2+ items
|
|
289
|
+
];
|
|
290
|
+
|
|
291
|
+
export function detectDescend(wikiRoot) {
|
|
292
|
+
const proposals = [];
|
|
293
|
+
const dirs = walkDirs(wikiRoot);
|
|
294
|
+
for (const dir of dirs) {
|
|
295
|
+
const indexPath = join(dir, "index.md");
|
|
296
|
+
if (!existsSync(indexPath)) continue;
|
|
297
|
+
const raw = readFileSync(indexPath, "utf8");
|
|
298
|
+
let parsed;
|
|
299
|
+
try {
|
|
300
|
+
parsed = parseFrontmatter(raw, indexPath);
|
|
301
|
+
} catch {
|
|
302
|
+
continue;
|
|
303
|
+
}
|
|
304
|
+
const authored = extractAuthoredZone(parsed.body);
|
|
305
|
+
if (!authored) continue;
|
|
306
|
+
let reason = null;
|
|
307
|
+
if (authored.length > DESCEND_AUTHORED_BUDGET) {
|
|
308
|
+
reason = `authored zone is ${authored.length} bytes (budget ${DESCEND_AUTHORED_BUDGET})`;
|
|
309
|
+
}
|
|
310
|
+
for (const re of LEAF_SIGNATURES) {
|
|
311
|
+
if (re.test(authored)) {
|
|
312
|
+
reason = reason || "authored zone contains leaf-content signature";
|
|
313
|
+
break;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
if (!reason) continue;
|
|
317
|
+
proposals.push({
|
|
318
|
+
operator: "DESCEND",
|
|
319
|
+
priority: PRIORITY.DESCEND,
|
|
320
|
+
sources: [indexPath],
|
|
321
|
+
describe: `DESCEND content from ${relative(wikiRoot, indexPath)}: ${reason}`,
|
|
322
|
+
apply: async () => applyDescend(wikiRoot, indexPath, parsed, authored, reason),
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
return proposals;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
function extractAuthoredZone(body) {
|
|
329
|
+
if (!body) return "";
|
|
330
|
+
const start = body.indexOf("<!-- BEGIN AUTHORED ORIENTATION -->");
|
|
331
|
+
const end = body.indexOf("<!-- END AUTHORED ORIENTATION -->");
|
|
332
|
+
if (start === -1 || end === -1) return "";
|
|
333
|
+
return body.slice(start + "<!-- BEGIN AUTHORED ORIENTATION -->".length, end).trim();
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
async function applyDescend(wikiRoot, indexPath, parsed, authored, reason) {
|
|
337
|
+
const dir = dirname(indexPath);
|
|
338
|
+
// Create a leaf named `descended-content-<N>.md` where N is the
|
|
339
|
+
// lowest free number. Deterministic and stable.
|
|
340
|
+
let n = 1;
|
|
341
|
+
let leafPath;
|
|
342
|
+
while (true) {
|
|
343
|
+
leafPath = join(dir, `descended-content-${n}.md`);
|
|
344
|
+
if (!existsSync(leafPath)) break;
|
|
345
|
+
n++;
|
|
346
|
+
}
|
|
347
|
+
const id = `descended-content-${n}`;
|
|
348
|
+
const leafData = {
|
|
349
|
+
id,
|
|
350
|
+
type: "primary",
|
|
351
|
+
depth_role: "leaf",
|
|
352
|
+
focus: `content descended from ${parsed.data.id ?? basename(dir)}`,
|
|
353
|
+
covers: ["content moved from parent index authored zone"],
|
|
354
|
+
parents: ["index.md"],
|
|
355
|
+
tags: ["descended"],
|
|
356
|
+
};
|
|
357
|
+
const leafBody = `\n${authored}\n`;
|
|
358
|
+
writeFileSync(leafPath, renderFrontmatter(leafData, leafBody), "utf8");
|
|
359
|
+
// Clear the authored zone in the parent index, leaving the
|
|
360
|
+
// delimiter comments intact so future rebuilds preserve the
|
|
361
|
+
// contract contract.
|
|
362
|
+
const newBody = parsed.body.replace(
|
|
363
|
+
/<!-- BEGIN AUTHORED ORIENTATION -->[\s\S]*?<!-- END AUTHORED ORIENTATION -->/,
|
|
364
|
+
"<!-- BEGIN AUTHORED ORIENTATION -->\n<!-- END AUTHORED ORIENTATION -->",
|
|
365
|
+
);
|
|
366
|
+
writeFileSync(indexPath, renderFrontmatter(parsed.data, newBody), "utf8");
|
|
367
|
+
return { summary: `descended content from ${relative(wikiRoot, indexPath)} into ${id}.md (${reason})` };
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// ── NEST + DECOMPOSE (detection only in Phase 6) ─────────────────────
|
|
371
|
+
|
|
372
|
+
export function detectNestAndDecompose(wikiRoot) {
|
|
373
|
+
const proposals = [];
|
|
374
|
+
const dirs = walkDirs(wikiRoot);
|
|
375
|
+
for (const dir of dirs) {
|
|
376
|
+
const { leaves } = listChildren(dir);
|
|
377
|
+
for (const leaf of leaves) {
|
|
378
|
+
// DECOMPOSE signal: covers[] >= 12 items. Phase 6 reports this
|
|
379
|
+
// as a non-applying proposal so the shape-check log records it
|
|
380
|
+
// but convergence does not rewrite the tree.
|
|
381
|
+
const covers = leaf.data.covers ?? [];
|
|
382
|
+
if (covers.length >= 12) {
|
|
383
|
+
proposals.push({
|
|
384
|
+
operator: "DECOMPOSE",
|
|
385
|
+
priority: PRIORITY.DECOMPOSE,
|
|
386
|
+
sources: [leaf.path],
|
|
387
|
+
describe: `DECOMPOSE candidate ${leaf.data.id} (covers=${covers.length}) — application deferred`,
|
|
388
|
+
apply: async () => ({ summary: "detect-only (Phase 6 defers application)" }),
|
|
389
|
+
detectOnly: true,
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
// NEST signal: explicit nests_into[] hint in frontmatter.
|
|
393
|
+
const nestsInto = Array.isArray(leaf.data.nests_into)
|
|
394
|
+
? leaf.data.nests_into
|
|
395
|
+
: null;
|
|
396
|
+
if (nestsInto && nestsInto.length > 0) {
|
|
397
|
+
proposals.push({
|
|
398
|
+
operator: "NEST",
|
|
399
|
+
priority: PRIORITY.NEST,
|
|
400
|
+
sources: [leaf.path],
|
|
401
|
+
describe: `NEST candidate ${leaf.data.id} (nests_into ${nestsInto.length}) — application deferred`,
|
|
402
|
+
apply: async () => ({ summary: "detect-only (Phase 6 defers application)" }),
|
|
403
|
+
detectOnly: true,
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
return proposals;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// ── Orchestrator entry point ─────────────────────────────────────────
|
|
412
|
+
//
|
|
413
|
+
// `runConvergence(wikiRoot, ctx)` — called from orchestrator.mjs's
|
|
414
|
+
// operator-convergence phase. Runs detect → highest-priority apply
|
|
415
|
+
// → repeat until no applied proposals or the iteration budget is
|
|
416
|
+
// exhausted. Returns a summary of what happened for commit messages
|
|
417
|
+
// and phase records.
|
|
418
|
+
//
|
|
419
|
+
// Phase 8 overhaul: the loop now also runs the multi-signal
|
|
420
|
+
// cluster detector (cluster-detect.mjs) after the pairwise
|
|
421
|
+
// operators. When a cluster proposal survives and can be named
|
|
422
|
+
// (via fixture / runtime-resolved Tier 2 responses), the NEST
|
|
423
|
+
// applier rehouses the leaves into a new subcategory and the
|
|
424
|
+
// parent indices are regenerated. When naming cannot be resolved
|
|
425
|
+
// immediately, the request sits in the Tier 2 pending queue and
|
|
426
|
+
// the caller (orchestrator) can drain it into a batch + exit 7.
|
|
427
|
+
//
|
|
428
|
+
// Quality-metric gating: every proposed change is scored against
|
|
429
|
+
// the routing_cost metric. If the metric doesn't improve after
|
|
430
|
+
// applying the change, we roll back to the pre-change disk state
|
|
431
|
+
// and try the next proposal. This is the "let data pick the
|
|
432
|
+
// cluster" discipline — we never apply a cluster just because the
|
|
433
|
+
// affinity matrix liked it, we apply it only if the resulting
|
|
434
|
+
// tree routes queries more cheaply than the pre-change tree.
|
|
435
|
+
|
|
436
|
+
export async function runConvergence(wikiRoot, ctx = {}) {
|
|
437
|
+
const {
|
|
438
|
+
opId,
|
|
439
|
+
qualityMode = "tiered-fast",
|
|
440
|
+
maxIterations = MAX_CONVERGENCE_ITERATIONS,
|
|
441
|
+
interactive = false,
|
|
442
|
+
tier2Handler,
|
|
443
|
+
commitBetweenIterations = async () => {},
|
|
444
|
+
// tests that don't want cluster behaviour pass skipClusterNest
|
|
445
|
+
// explicitly; `LLM_WIKI_SKIP_CLUSTER_NEST=1` is the env-var
|
|
446
|
+
// shorthand for legacy tiered-build tests that exercise the
|
|
447
|
+
// pairwise tiered-AI path without a propose_structure fixture.
|
|
448
|
+
skipClusterNest = process.env.LLM_WIKI_SKIP_CLUSTER_NEST === "1",
|
|
449
|
+
} = ctx;
|
|
450
|
+
const applied = [];
|
|
451
|
+
const suggestions = [];
|
|
452
|
+
const metricTrajectory = [];
|
|
453
|
+
// Directories that were freshly created by a NEST application
|
|
454
|
+
// in this convergence run. We skip cluster detection inside
|
|
455
|
+
// them for the remainder of the run to prevent noise-driven
|
|
456
|
+
// infinite re-clustering: the newly-created subcategory
|
|
457
|
+
// already represents a coherent group, and re-nesting within
|
|
458
|
+
// it should wait for a separate run where the operator can
|
|
459
|
+
// review the shape.
|
|
460
|
+
const nestedParents = new Set();
|
|
461
|
+
let iteration = 0;
|
|
462
|
+
|
|
463
|
+
// Baseline metric (for the trajectory log).
|
|
464
|
+
try {
|
|
465
|
+
metricTrajectory.push({
|
|
466
|
+
iteration: 0,
|
|
467
|
+
cost: computeRoutingCost(wikiRoot).cost,
|
|
468
|
+
event: "baseline",
|
|
469
|
+
});
|
|
470
|
+
} catch {
|
|
471
|
+
/* empty wikis return 0 cost; ignore */
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
while (iteration < maxIterations) {
|
|
475
|
+
iteration++;
|
|
476
|
+
const proposals = [];
|
|
477
|
+
// Detect in priority order. DESCEND first (reducing), then LIFT,
|
|
478
|
+
// MERGE, and the detect-only NEST / DECOMPOSE at the bottom.
|
|
479
|
+
proposals.push(...detectDescend(wikiRoot));
|
|
480
|
+
proposals.push(...detectLift(wikiRoot));
|
|
481
|
+
proposals.push(
|
|
482
|
+
...(await detectMerge(wikiRoot, { opId, qualityMode, interactive, tier2Handler })),
|
|
483
|
+
);
|
|
484
|
+
const nestDecompose = detectNestAndDecompose(wikiRoot);
|
|
485
|
+
proposals.push(...nestDecompose);
|
|
486
|
+
|
|
487
|
+
// Filter out detect-only proposals from the application queue
|
|
488
|
+
// but keep them in the suggestion audit trail.
|
|
489
|
+
for (const p of nestDecompose) {
|
|
490
|
+
suggestions.push({
|
|
491
|
+
operator: p.operator,
|
|
492
|
+
sources: p.sources,
|
|
493
|
+
reason: p.describe,
|
|
494
|
+
});
|
|
495
|
+
}
|
|
496
|
+
const applicable = proposals.filter((p) => !p.detectOnly);
|
|
497
|
+
|
|
498
|
+
if (applicable.length > 0) {
|
|
499
|
+
// Pick the highest-priority proposal and apply it.
|
|
500
|
+
applicable.sort((a, b) => b.priority - a.priority);
|
|
501
|
+
const chosen = applicable[0];
|
|
502
|
+
let result;
|
|
503
|
+
try {
|
|
504
|
+
result = await chosen.apply({ wikiRoot, opId });
|
|
505
|
+
} catch (err) {
|
|
506
|
+
throw new Error(
|
|
507
|
+
`operator-convergence: ${chosen.operator} failed: ${err.message}`,
|
|
508
|
+
);
|
|
509
|
+
}
|
|
510
|
+
applied.push({
|
|
511
|
+
iteration,
|
|
512
|
+
operator: chosen.operator,
|
|
513
|
+
sources: chosen.sources,
|
|
514
|
+
describe: chosen.describe,
|
|
515
|
+
result,
|
|
516
|
+
});
|
|
517
|
+
await commitBetweenIterations({
|
|
518
|
+
iteration,
|
|
519
|
+
operator: chosen.operator,
|
|
520
|
+
summary: result.summary,
|
|
521
|
+
});
|
|
522
|
+
try {
|
|
523
|
+
metricTrajectory.push({
|
|
524
|
+
iteration,
|
|
525
|
+
cost: computeRoutingCost(wikiRoot).cost,
|
|
526
|
+
event: chosen.operator,
|
|
527
|
+
});
|
|
528
|
+
} catch {
|
|
529
|
+
/* ignore */
|
|
530
|
+
}
|
|
531
|
+
continue; // next iteration
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// No pairwise operator fired. Try cluster-based NEST.
|
|
535
|
+
if (skipClusterNest) break;
|
|
536
|
+
const nestOutcome = await tryClusterNestIteration(wikiRoot, {
|
|
537
|
+
opId,
|
|
538
|
+
iteration,
|
|
539
|
+
applied,
|
|
540
|
+
suggestions,
|
|
541
|
+
metricTrajectory,
|
|
542
|
+
commitBetweenIterations,
|
|
543
|
+
nestedParents,
|
|
544
|
+
});
|
|
545
|
+
if (nestOutcome === "applied") continue;
|
|
546
|
+
if (nestOutcome === "pending-tier2") {
|
|
547
|
+
// Unresolved cluster_name requests are parked on the pending
|
|
548
|
+
// queue. The orchestrator picks them up via drainPending().
|
|
549
|
+
break;
|
|
550
|
+
}
|
|
551
|
+
// "none" — nothing else to do, terminate.
|
|
552
|
+
break;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
const pendingTier2 = countPendingRequests(wikiRoot);
|
|
556
|
+
|
|
557
|
+
// Write the metric trajectory into decisions.yaml. Runs even
|
|
558
|
+
// for a single-point baseline (so an op that applied zero
|
|
559
|
+
// operators still leaves a record that convergence ran + what
|
|
560
|
+
// the baseline cost was). This is the "fix rebuild decision
|
|
561
|
+
// logging" patch: rebuild didn't apply any pairwise operators,
|
|
562
|
+
// so the old code never wrote a decision entry at all — the
|
|
563
|
+
// trajectory writer now guarantees every op leaves an audit
|
|
564
|
+
// trail regardless of whether it mutated anything.
|
|
565
|
+
if (opId && metricTrajectory.length > 0) {
|
|
566
|
+
try {
|
|
567
|
+
appendMetricTrajectory(wikiRoot, opId, metricTrajectory);
|
|
568
|
+
} catch {
|
|
569
|
+
/* best effort — decision log is a nice-to-have for rebuild */
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
return {
|
|
574
|
+
iterations: iteration,
|
|
575
|
+
applied,
|
|
576
|
+
suggestions,
|
|
577
|
+
metric_trajectory: metricTrajectory,
|
|
578
|
+
needs_tier2: pendingTier2 > 0,
|
|
579
|
+
pending_count: pendingTier2,
|
|
580
|
+
converged:
|
|
581
|
+
applied.length === 0 ||
|
|
582
|
+
(iteration < maxIterations && pendingTier2 === 0),
|
|
583
|
+
};
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Helper: given a wiki, try to apply a cluster NEST through the
|
|
587
|
+
// multi-tier propose_structure + math-detect pipeline.
|
|
588
|
+
//
|
|
589
|
+
// Per-directory flow (depth-first, root → subcategories):
|
|
590
|
+
//
|
|
591
|
+
// 1. Emit a propose_structure Tier 2 request for the directory's
|
|
592
|
+
// leaves. Tier 2 proposes the "ideal" nested partition.
|
|
593
|
+
// 2. Run the math cluster detector (aggressive thresholds) as
|
|
594
|
+
// a sanity check + source of additional proposals Tier 2
|
|
595
|
+
// might have missed.
|
|
596
|
+
// 3. Merge: Tier 2 subcategories + math clusters, deduplicated
|
|
597
|
+
// by member set.
|
|
598
|
+
// 4. For each math-only candidate, emit a `nest_decision`
|
|
599
|
+
// request — Tier 2 must GO/NO-GO the math proposal before
|
|
600
|
+
// it is applied.
|
|
601
|
+
// 5. For each approved candidate, either use Tier 2's slug
|
|
602
|
+
// (propose_structure) or emit a `cluster_name` request
|
|
603
|
+
// (math-only with no existing slug).
|
|
604
|
+
// 6. Apply each approved NEST through the applyNest helper.
|
|
605
|
+
// The quality-metric gate rolls back any application that
|
|
606
|
+
// regresses routing_cost.
|
|
607
|
+
//
|
|
608
|
+
// Returns one of:
|
|
609
|
+
// "applied" — a NEST fired, commit emitted.
|
|
610
|
+
// "pending-tier2" — at least one Tier 2 request is still
|
|
611
|
+
// unresolved. The caller exits 7.
|
|
612
|
+
// "none" — no candidates, all candidates rejected, or
|
|
613
|
+
// all candidates failed the metric gate.
|
|
614
|
+
async function tryClusterNestIteration(wikiRoot, ctx) {
|
|
615
|
+
const {
|
|
616
|
+
opId,
|
|
617
|
+
iteration,
|
|
618
|
+
applied,
|
|
619
|
+
suggestions,
|
|
620
|
+
metricTrajectory,
|
|
621
|
+
commitBetweenIterations,
|
|
622
|
+
nestedParents = new Set(),
|
|
623
|
+
} = ctx;
|
|
624
|
+
|
|
625
|
+
// Collect candidate proposals across every parent directory.
|
|
626
|
+
// For each directory:
|
|
627
|
+
// - propose_structure → Tier 2 subcategories (tier2-proposed)
|
|
628
|
+
// - math detector → math clusters (math-gated)
|
|
629
|
+
//
|
|
630
|
+
// Phase 5 batching overhaul: the loop walks EVERY directory in a
|
|
631
|
+
// single pass and accumulates every pending Tier 2 request into
|
|
632
|
+
// the shared pending queue. It does NOT short-circuit when a
|
|
633
|
+
// propose_structure request parks — math cluster detection still
|
|
634
|
+
// runs for that directory so any math-only candidates can emit
|
|
635
|
+
// their own gate/naming requests in the same batch. The tree
|
|
636
|
+
// state is identical across every directory visited in this
|
|
637
|
+
// pass (no NEST has been applied yet), so every response that
|
|
638
|
+
// comes back is consistent with the same base tree.
|
|
639
|
+
//
|
|
640
|
+
// The old "skip math if propose_structure parked" short-circuit
|
|
641
|
+
// was a size-minimisation heuristic: it avoided enqueuing math
|
|
642
|
+
// gate/naming requests for clusters that propose_structure might
|
|
643
|
+
// reject outright. In practice that optimisation costs MORE
|
|
644
|
+
// round trips than it saves, because each parked dir forces a
|
|
645
|
+
// separate exit-7 cycle instead of being batched with its
|
|
646
|
+
// siblings. The cost of a few "wasted" gate/naming requests is
|
|
647
|
+
// one sub-agent per request (cheap) — the cost of an extra exit-7
|
|
648
|
+
// cycle is an entire CLI preflight + resume rebuild (expensive).
|
|
649
|
+
// We batch maximally and let `mergeClusterProposals` + the
|
|
650
|
+
// stale-candidate guard deduplicate the fallout.
|
|
651
|
+
const fixture = loadFixture();
|
|
652
|
+
const dirs = walkDirs(wikiRoot);
|
|
653
|
+
const allCandidates = [];
|
|
654
|
+
for (const dir of dirs) {
|
|
655
|
+
if (nestedParents.has(dir)) continue;
|
|
656
|
+
const { leaves } = listChildren(dir);
|
|
657
|
+
// Skip directories that cannot produce a non-trivial partition.
|
|
658
|
+
// `MIN_TIER2_CLUSTER_SIZE` is the floor on ONE cluster's members —
|
|
659
|
+
// a directory with only that many leaves could at most fold them
|
|
660
|
+
// all into a single subcategory, which would be a trivial
|
|
661
|
+
// rename rather than a structural improvement. We therefore
|
|
662
|
+
// require strictly more than `MIN_TIER2_CLUSTER_SIZE` leaves
|
|
663
|
+
// (i.e., ≥ MIN_TIER2_CLUSTER_SIZE + 1) before we even ask
|
|
664
|
+
// Tier 2 for a structure proposal. Skipping the ≤-floor case
|
|
665
|
+
// cuts a documented source of wasted Tier 2 round trips: every
|
|
666
|
+
// newly-created subcategory that convergence visits on its next
|
|
667
|
+
// pass gets this trivial keep-flat answer for free without
|
|
668
|
+
// paying for a propose_structure request.
|
|
669
|
+
if (leaves.length < MIN_TIER2_CLUSTER_SIZE + 1) continue;
|
|
670
|
+
|
|
671
|
+
const relDir = relative(wikiRoot, dir) || ".";
|
|
672
|
+
|
|
673
|
+
// Step 1: propose_structure Tier 2 request. Park on pending
|
|
674
|
+
// without short-circuiting the math phase below.
|
|
675
|
+
let tier2Clusters = [];
|
|
676
|
+
const proposeReq = buildProposeStructureRequest(relDir, leaves);
|
|
677
|
+
const proposeResp = resolveTier2Response(wikiRoot, fixture, proposeReq);
|
|
678
|
+
if (proposeResp === "pending") {
|
|
679
|
+
enqueuePending(wikiRoot, proposeReq);
|
|
680
|
+
suggestions.push({
|
|
681
|
+
operator: "NEST",
|
|
682
|
+
sources: leaves.map((l) => l.path),
|
|
683
|
+
reason: `propose_structure parked for ${relDir} (awaiting Tier 2)`,
|
|
684
|
+
});
|
|
685
|
+
// Fall through — math still runs so any cluster this dir
|
|
686
|
+
// carries is evaluated (and its gate/naming requests are
|
|
687
|
+
// batched alongside every other directory's) before we exit 7.
|
|
688
|
+
} else {
|
|
689
|
+
tier2Clusters = extractTier2Clusters(proposeResp, leaves, dir);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
// Step 2: math cluster detection (aggressive scan).
|
|
693
|
+
const mathProposals = await detectClusters(wikiRoot, leaves, {
|
|
694
|
+
returnEmptyMarker: false,
|
|
695
|
+
});
|
|
696
|
+
const mathClusters = mathProposals
|
|
697
|
+
.filter((p) => !p.empty_partition)
|
|
698
|
+
.map((p) => ({
|
|
699
|
+
...p,
|
|
700
|
+
parent_dir: dir,
|
|
701
|
+
source: "math",
|
|
702
|
+
leaves_set: new Set(p.leaves.map((l) => l.data.id)),
|
|
703
|
+
}));
|
|
704
|
+
|
|
705
|
+
// Step 3: merge proposals, dedup by member set.
|
|
706
|
+
const merged = mergeClusterProposals(tier2Clusters, mathClusters);
|
|
707
|
+
for (const c of merged) c.parent_dir = dir;
|
|
708
|
+
allCandidates.push(...merged);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
if (allCandidates.length === 0) {
|
|
712
|
+
return countPendingRequests(wikiRoot) > 0 ? "pending-tier2" : "none";
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
// Step 4: math-only candidates go through a mandatory
|
|
716
|
+
// nest_decision gate. Candidates that came from propose_structure
|
|
717
|
+
// are already structurally approved by Tier 2 — skip the gate.
|
|
718
|
+
const gatedCandidates = [];
|
|
719
|
+
for (const cand of allCandidates) {
|
|
720
|
+
if (cand.source === "tier2" || cand.source === "both") {
|
|
721
|
+
gatedCandidates.push(cand);
|
|
722
|
+
continue;
|
|
723
|
+
}
|
|
724
|
+
// math-only: first validate staleness before emitting the gate
|
|
725
|
+
// request. A math candidate computed in an earlier directory pass
|
|
726
|
+
// (or in a prior invocation that restored from pending state) may
|
|
727
|
+
// reference leaves that a subsequent NEST has already moved out
|
|
728
|
+
// of the expected parent. Sending such a stale candidate to a
|
|
729
|
+
// Tier 2 sub-agent wastes a round trip and almost always comes
|
|
730
|
+
// back rejected with "these leaves are no longer siblings".
|
|
731
|
+
// Drop the candidate here and log the reason for the audit trail.
|
|
732
|
+
if (!mathCandidateIsFresh(cand)) {
|
|
733
|
+
dropStaleMathCandidate(wikiRoot, cand, opId, suggestions);
|
|
734
|
+
continue;
|
|
735
|
+
}
|
|
736
|
+
// math-only: run the gate.
|
|
737
|
+
const gateReq = cand.gate_request;
|
|
738
|
+
const gateResp = resolveTier2Response(wikiRoot, fixture, gateReq);
|
|
739
|
+
if (gateResp === "pending") {
|
|
740
|
+
enqueuePending(wikiRoot, gateReq);
|
|
741
|
+
suggestions.push({
|
|
742
|
+
operator: "NEST",
|
|
743
|
+
sources: cand.leaves.map((l) => l.path),
|
|
744
|
+
reason: `nest_decision parked (math cluster, avg_affinity=${cand.average_affinity.toFixed(3)})`,
|
|
745
|
+
});
|
|
746
|
+
continue;
|
|
747
|
+
}
|
|
748
|
+
const decision = typeof gateResp?.decision === "string" ? gateResp.decision : "undecidable";
|
|
749
|
+
if (decision === "nest") {
|
|
750
|
+
cand.gate_reason = gateResp.reason || "tier2 approved";
|
|
751
|
+
gatedCandidates.push(cand);
|
|
752
|
+
} else {
|
|
753
|
+
// keep_flat / undecidable — skip, log, continue.
|
|
754
|
+
suggestions.push({
|
|
755
|
+
operator: "NEST",
|
|
756
|
+
sources: cand.leaves.map((l) => l.path),
|
|
757
|
+
reason: `cluster rejected by nest_decision (${decision}): ${gateResp.reason || ""}`,
|
|
758
|
+
});
|
|
759
|
+
appendNestDecision(wikiRoot, {
|
|
760
|
+
op_id: opId,
|
|
761
|
+
sources: cand.leaves.map((l) => l.data.id),
|
|
762
|
+
similarity: cand.average_affinity ?? 0,
|
|
763
|
+
confidence_band: "math-gated",
|
|
764
|
+
decision: "rejected-by-gate",
|
|
765
|
+
reason: `nest_decision=${decision}: ${gateResp.reason || ""}`,
|
|
766
|
+
});
|
|
767
|
+
}
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
// Step 5: resolve naming. propose_structure already supplied a
|
|
771
|
+
// slug for tier2 clusters. math-only clusters need a
|
|
772
|
+
// cluster_name request.
|
|
773
|
+
const resolvedProposals = [];
|
|
774
|
+
for (const cand of gatedCandidates) {
|
|
775
|
+
if (cand.slug && validateSlug(cand.slug)) {
|
|
776
|
+
resolvedProposals.push(cand);
|
|
777
|
+
continue;
|
|
778
|
+
}
|
|
779
|
+
// Math-only path: cluster_name request.
|
|
780
|
+
if (!cand.naming_request) continue;
|
|
781
|
+
const namingResp = resolveTier2Response(wikiRoot, fixture, cand.naming_request);
|
|
782
|
+
if (namingResp === "pending") {
|
|
783
|
+
enqueuePending(wikiRoot, cand.naming_request);
|
|
784
|
+
suggestions.push({
|
|
785
|
+
operator: "NEST",
|
|
786
|
+
sources: cand.leaves.map((l) => l.path),
|
|
787
|
+
reason: `cluster_name parked (size=${cand.leaves.length})`,
|
|
788
|
+
});
|
|
789
|
+
continue;
|
|
790
|
+
}
|
|
791
|
+
if (namingResp?.decision === "reject") {
|
|
792
|
+
suggestions.push({
|
|
793
|
+
operator: "NEST",
|
|
794
|
+
sources: cand.leaves.map((l) => l.path),
|
|
795
|
+
reason: `cluster_name rejected (size=${cand.leaves.length})`,
|
|
796
|
+
});
|
|
797
|
+
appendNestDecision(wikiRoot, {
|
|
798
|
+
op_id: opId,
|
|
799
|
+
sources: cand.leaves.map((l) => l.data.id),
|
|
800
|
+
similarity: cand.average_affinity ?? 0,
|
|
801
|
+
confidence_band: cand.source === "math" ? "math-gated" : "tier2-proposed",
|
|
802
|
+
decision: "rejected-by-gate",
|
|
803
|
+
reason: "cluster_name=reject",
|
|
804
|
+
});
|
|
805
|
+
continue;
|
|
806
|
+
}
|
|
807
|
+
if (typeof namingResp?.slug === "string" && validateSlug(namingResp.slug)) {
|
|
808
|
+
// Forward purpose from the naming response if Tier 2 included
|
|
809
|
+
// one, otherwise keep whatever the candidate already had
|
|
810
|
+
// (which will be empty for math-only clusters). The applier
|
|
811
|
+
// uses this as the subcat's `focus:` line.
|
|
812
|
+
const purpose =
|
|
813
|
+
typeof namingResp.purpose === "string" && namingResp.purpose.trim()
|
|
814
|
+
? namingResp.purpose
|
|
815
|
+
: cand.purpose || "";
|
|
816
|
+
resolvedProposals.push({ ...cand, slug: namingResp.slug, purpose });
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
if (resolvedProposals.length === 0) {
|
|
821
|
+
return countPendingRequests(wikiRoot) > 0 ? "pending-tier2" : "none";
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
// Apply proposals in confidence order. v6-multi-NEST: we now
|
|
825
|
+
// apply a SET of non-conflicting proposals in a single iteration
|
|
826
|
+
// instead of only the highest-confidence one. This collapses a
|
|
827
|
+
// guide/-style 9-NEST convergence from ~9 iterations (= 8 exit-7
|
|
828
|
+
// cycles on the Tier 2 fixture path) into 1–2 iterations and
|
|
829
|
+
// fixes the novel-corpus partial-cluster bug where a second
|
|
830
|
+
// cluster ("frontend/") was orphaned because its parent was
|
|
831
|
+
// re-shaped by the first applied NEST before the re-scan.
|
|
832
|
+
//
|
|
833
|
+
// Selection rule for "non-conflicting":
|
|
834
|
+
//
|
|
835
|
+
// - DISJOINT member sets. Two proposals that would move the
|
|
836
|
+
// same leaf into two different subdirs are obviously in
|
|
837
|
+
// conflict — whichever applied second would either fail in
|
|
838
|
+
// the applier or silently clobber the first.
|
|
839
|
+
//
|
|
840
|
+
// Same parent_dir is ALLOWED when members are disjoint. The
|
|
841
|
+
// root-level 8-subcategory layout on guide/ is the canonical
|
|
842
|
+
// example: every NEST proposal targets `.` as its parent, but
|
|
843
|
+
// each carves out a disjoint subset of leaves, so applying
|
|
844
|
+
// them in sequence within one iteration is safe. NEST #1 moves
|
|
845
|
+
// its members + rewrites the parent index; NEST #2 takes a
|
|
846
|
+
// fresh snapshot of the parent index (captured at apply time)
|
|
847
|
+
// and moves its disjoint members on top of NEST #1's state.
|
|
848
|
+
// A regression-triggered rollback on NEST #2 restores the
|
|
849
|
+
// post-NEST-#1 snapshot, not the pre-NEST-#1 state, so
|
|
850
|
+
// NEST #1's effects survive a NEST #2 rollback.
|
|
851
|
+
//
|
|
852
|
+
// The stricter "different parent_dir" rule is NOT enforced
|
|
853
|
+
// because enforcing it would serialise the guide/ nesting pass
|
|
854
|
+
// into 8 iterations = 7 exit-7 cycles — exactly the pre-v6
|
|
855
|
+
// pain point we're fixing.
|
|
856
|
+
//
|
|
857
|
+
// Ordering: sort by confidence (source rank then avg affinity),
|
|
858
|
+
// then greedily pick each candidate that doesn't conflict with
|
|
859
|
+
// any already-picked one. The greedy pick preserves the original
|
|
860
|
+
// tie-break: a higher-ranked proposal blocks a lower-ranked one
|
|
861
|
+
// that overlaps with it.
|
|
862
|
+
//
|
|
863
|
+
// Per-apply gates: every picked candidate gets its own pre/post
|
|
864
|
+
// routing_cost measurement and its own rollback snapshot. The
|
|
865
|
+
// same math-strict / tier2-tolerance policy applies per apply.
|
|
866
|
+
// A rolled-back pick does NOT cancel subsequent picks in the
|
|
867
|
+
// same iteration — each is judged on its own metric delta
|
|
868
|
+
// against the tree state AFTER previous picks have landed.
|
|
869
|
+
//
|
|
870
|
+
// Re-freshness check: before each apply, re-run
|
|
871
|
+
// `mathCandidateIsFresh` on the candidate. If a prior pick in
|
|
872
|
+
// this iteration invalidated the candidate's member set
|
|
873
|
+
// (members moved out of parent_dir into a new subdir), the
|
|
874
|
+
// stale candidate is dropped via `dropStaleMathCandidate`,
|
|
875
|
+
// which writes a `rejected-stale` audit entry. This is the
|
|
876
|
+
// subtle case the 3b audit-log path was built for — before v6
|
|
877
|
+
// it was latent because single-NEST-per-iteration never
|
|
878
|
+
// produced stale candidates; with multi-NEST it's reachable.
|
|
879
|
+
//
|
|
880
|
+
// Commit topology: one commit per applied NEST. The iteration
|
|
881
|
+
// count stays the same across all picks in a single iteration
|
|
882
|
+
// (they share the outer `iteration` value) but each apply fires
|
|
883
|
+
// `commitBetweenIterations` so the private git history shows
|
|
884
|
+
// one commit per rewrite, matching pre-v6 behaviour.
|
|
885
|
+
const sourceRank = (s) => (s === "both" ? 2 : s === "tier2" ? 1 : 0);
|
|
886
|
+
resolvedProposals.sort((a, b) => {
|
|
887
|
+
const ra = sourceRank(a.source);
|
|
888
|
+
const rb = sourceRank(b.source);
|
|
889
|
+
if (ra !== rb) return rb - ra;
|
|
890
|
+
return (b.average_affinity ?? 0) - (a.average_affinity ?? 0);
|
|
891
|
+
});
|
|
892
|
+
|
|
893
|
+
// Non-conflict selection — greedy pick sorted by confidence.
|
|
894
|
+
// Only disjoint member sets are required; same-parent picks are
|
|
895
|
+
// allowed (see the block comment above for why).
|
|
896
|
+
const picked = [];
|
|
897
|
+
const takenMembers = new Set();
|
|
898
|
+
for (const proposal of resolvedProposals) {
|
|
899
|
+
const memberIds = proposal.leaves.map((l) => l.data?.id).filter(Boolean);
|
|
900
|
+
let overlap = false;
|
|
901
|
+
for (const m of memberIds) {
|
|
902
|
+
if (takenMembers.has(m)) {
|
|
903
|
+
overlap = true;
|
|
904
|
+
break;
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
if (overlap) continue;
|
|
908
|
+
picked.push(proposal);
|
|
909
|
+
for (const m of memberIds) takenMembers.add(m);
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// Ensure the routing graph exists before we measure the
|
|
913
|
+
// baseline. Without this, the very first iteration would see
|
|
914
|
+
// a non-existent root index.md (returns cost=0) and treat the
|
|
915
|
+
// first NEST's legitimate cost measurement as a regression.
|
|
916
|
+
// Bootstraps stub indices anywhere there are leaves but no
|
|
917
|
+
// index.md, then rebuilds the entire tree so entries[] is
|
|
918
|
+
// populated. Idempotent: subsequent calls are cheap.
|
|
919
|
+
try {
|
|
920
|
+
bootstrapStubIndicesForMetric(wikiRoot);
|
|
921
|
+
rebuildAllIndices(wikiRoot);
|
|
922
|
+
} catch {
|
|
923
|
+
/* best effort: indices may not be set up yet on a fresh wiki */
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
let appliedCount = 0;
|
|
927
|
+
for (const proposal of picked) {
|
|
928
|
+
// Re-check freshness RIGHT BEFORE apply. An earlier pick in the
|
|
929
|
+
// same iteration may have moved leaves out of this candidate's
|
|
930
|
+
// parent (e.g. an ancestor-directory cluster swept up members
|
|
931
|
+
// that were also part of a descendant-directory candidate). A
|
|
932
|
+
// stale pick would otherwise fail inside applyNest or produce
|
|
933
|
+
// garbage state, and the audit trail would lose the reason.
|
|
934
|
+
if (proposal.source === "math" && !mathCandidateIsFresh(proposal)) {
|
|
935
|
+
dropStaleMathCandidate(wikiRoot, proposal, opId, suggestions);
|
|
936
|
+
continue;
|
|
937
|
+
}
|
|
938
|
+
const confBand =
|
|
939
|
+
proposal.source === "both"
|
|
940
|
+
? "tier2-and-math"
|
|
941
|
+
: proposal.source === "tier2"
|
|
942
|
+
? "tier2-proposed"
|
|
943
|
+
: "math-gated";
|
|
944
|
+
const preMetric = computeRoutingCost(wikiRoot).cost;
|
|
945
|
+
// Snapshot the files we're about to mutate so we can roll back
|
|
946
|
+
// on a metric regression. We ONLY need the old leaf contents
|
|
947
|
+
// and the parent dir's index.md — the NEST applier touches
|
|
948
|
+
// those and creates a new subdir.
|
|
949
|
+
const rollback = snapshotForRollback(proposal, wikiRoot);
|
|
950
|
+
// Pre-resolve the slug against member + sibling ids. A Tier 2 or
|
|
951
|
+
// math-named slug that equals one of its member leaves' ids (or a
|
|
952
|
+
// non-member sibling's id in the same parent) would pass applyNest
|
|
953
|
+
// and then trip DUP-ID at validate time, forcing a full pipeline
|
|
954
|
+
// rollback. resolveNestSlug auto-suffixes deterministically
|
|
955
|
+
// (`-group`, then `-group-N`) so the NEST lands on the first try.
|
|
956
|
+
// An unchanged slug is a no-op. The audit-log entry for the rename
|
|
957
|
+
// is written AFTER applyNest succeeds so decisions.yaml never
|
|
958
|
+
// records a rename for an op that ultimately failed.
|
|
959
|
+
const originalSlug = proposal.slug;
|
|
960
|
+
const resolvedSlug = resolveNestSlug(originalSlug, proposal);
|
|
961
|
+
let result;
|
|
962
|
+
try {
|
|
963
|
+
result = applyNest(wikiRoot, proposal, resolvedSlug);
|
|
964
|
+
} catch (err) {
|
|
965
|
+
suggestions.push({
|
|
966
|
+
operator: "NEST",
|
|
967
|
+
sources: proposal.leaves.map((l) => l.path),
|
|
968
|
+
reason: `cluster apply failed: ${err.message}`,
|
|
969
|
+
});
|
|
970
|
+
appendNestDecision(wikiRoot, {
|
|
971
|
+
op_id: opId,
|
|
972
|
+
sources: proposal.leaves.map((l) => l.data.id),
|
|
973
|
+
similarity: proposal.average_affinity ?? 0,
|
|
974
|
+
confidence_band: confBand,
|
|
975
|
+
decision: "rejected-by-gate",
|
|
976
|
+
reason: `applyNest threw: ${err.message}`,
|
|
977
|
+
});
|
|
978
|
+
continue;
|
|
979
|
+
}
|
|
980
|
+
rebuildAllIndices(wikiRoot);
|
|
981
|
+
const postMetric = computeRoutingCost(wikiRoot).cost;
|
|
982
|
+
// Acceptance policy — distinguishes Tier 2 structural proposals
|
|
983
|
+
// from math-only candidates:
|
|
984
|
+
//
|
|
985
|
+
// - Math-only candidates (`source === "math"`) need STRICT
|
|
986
|
+
// improvement (post < pre − 1e-9). Math signals alone are a
|
|
987
|
+
// weak proxy for whether a cluster is worth creating, so the
|
|
988
|
+
// metric must pay for the nest.
|
|
989
|
+
//
|
|
990
|
+
// - Tier 2 proposals (`source === "tier2"` or `"both"`) are
|
|
991
|
+
// allowed to land on metric-NEUTRAL deltas: the cluster is a
|
|
992
|
+
// structural judgment call the model is making about
|
|
993
|
+
// conceptual organisation, and for hand-authored sparse-
|
|
994
|
+
// signal corpora the `routing_cost` metric is often neutral
|
|
995
|
+
// on such clusters because the authored activation keywords
|
|
996
|
+
// already disambiguate the leaves at the flat level. The
|
|
997
|
+
// metric stays as a REGRESSION safety net: Tier 2 nests are
|
|
998
|
+
// rolled back if they make routing worse by more than the
|
|
999
|
+
// tolerance (currently 5% relative), which prevents a model
|
|
1000
|
+
// hallucination from wrecking the wiki but allows structural
|
|
1001
|
+
// organisation to land.
|
|
1002
|
+
const isMathOnly = proposal.source === "math";
|
|
1003
|
+
const regressionTolerance = 0.05; // 5% relative slack for Tier 2 nests
|
|
1004
|
+
const postLimit = isMathOnly
|
|
1005
|
+
? preMetric - 1e-9 // strict improvement
|
|
1006
|
+
: preMetric * (1 + regressionTolerance); // bounded regression
|
|
1007
|
+
if (postMetric > postLimit) {
|
|
1008
|
+
// Regression beyond policy. Roll back.
|
|
1009
|
+
restoreRollback(rollback, result);
|
|
1010
|
+
rebuildAllIndices(wikiRoot);
|
|
1011
|
+
const policyLabel = isMathOnly
|
|
1012
|
+
? "strict-improvement"
|
|
1013
|
+
: `tier2-regression-tolerance<=${(regressionTolerance * 100).toFixed(0)}%`;
|
|
1014
|
+
suggestions.push({
|
|
1015
|
+
operator: "NEST",
|
|
1016
|
+
sources: proposal.leaves.map((l) => l.path),
|
|
1017
|
+
reason: `cluster rolled back: metric ${preMetric.toFixed(4)} → ${postMetric.toFixed(4)} (policy=${policyLabel})`,
|
|
1018
|
+
});
|
|
1019
|
+
appendNestDecision(wikiRoot, {
|
|
1020
|
+
op_id: opId,
|
|
1021
|
+
sources: proposal.leaves.map((l) => l.data.id),
|
|
1022
|
+
similarity: proposal.average_affinity ?? 0,
|
|
1023
|
+
confidence_band: confBand,
|
|
1024
|
+
decision: "rejected-by-metric",
|
|
1025
|
+
reason: `metric ${preMetric.toFixed(4)} → ${postMetric.toFixed(4)} exceeds ${policyLabel}`,
|
|
1026
|
+
});
|
|
1027
|
+
continue;
|
|
1028
|
+
}
|
|
1029
|
+
// Keep. Record application + commit.
|
|
1030
|
+
// If resolveNestSlug renamed the slug to dodge a collision, audit
|
|
1031
|
+
// the rename NOW (after the NEST has passed every gate and is
|
|
1032
|
+
// about to commit) so decisions.yaml never carries a slug-renamed
|
|
1033
|
+
// entry for an op that was subsequently rolled back by the metric
|
|
1034
|
+
// gate or rejected by applyNest.
|
|
1035
|
+
if (resolvedSlug !== originalSlug) {
|
|
1036
|
+
appendNestDecision(wikiRoot, {
|
|
1037
|
+
op_id: opId,
|
|
1038
|
+
sources: proposal.leaves.map((l) => l.data.id),
|
|
1039
|
+
similarity: proposal.average_affinity ?? 0,
|
|
1040
|
+
confidence_band: confBand,
|
|
1041
|
+
decision: "slug-renamed",
|
|
1042
|
+
reason: `slug "${originalSlug}" collided with existing id; renamed to "${resolvedSlug}"`,
|
|
1043
|
+
});
|
|
1044
|
+
}
|
|
1045
|
+
const affinityTag = Number.isFinite(proposal.average_affinity)
|
|
1046
|
+
? `avg_affinity=${proposal.average_affinity.toFixed(3)}, `
|
|
1047
|
+
: "";
|
|
1048
|
+
applied.push({
|
|
1049
|
+
iteration,
|
|
1050
|
+
operator: "NEST",
|
|
1051
|
+
sources: proposal.leaves.map((l) => l.path),
|
|
1052
|
+
describe:
|
|
1053
|
+
`NEST ${proposal.leaves.length} leaves into ` +
|
|
1054
|
+
`${relative(wikiRoot, result.target_dir)} ` +
|
|
1055
|
+
`(${affinityTag}source=${proposal.source}, ` +
|
|
1056
|
+
`metric ${preMetric.toFixed(4)} → ${postMetric.toFixed(4)})`,
|
|
1057
|
+
result,
|
|
1058
|
+
});
|
|
1059
|
+
await commitBetweenIterations({
|
|
1060
|
+
iteration,
|
|
1061
|
+
operator: "NEST",
|
|
1062
|
+
summary: `nested ${proposal.leaves.length} leaves into ${relative(wikiRoot, result.target_dir)}`,
|
|
1063
|
+
});
|
|
1064
|
+
metricTrajectory.push({
|
|
1065
|
+
iteration,
|
|
1066
|
+
cost: postMetric,
|
|
1067
|
+
event: "NEST",
|
|
1068
|
+
});
|
|
1069
|
+
appendNestDecision(wikiRoot, {
|
|
1070
|
+
op_id: opId,
|
|
1071
|
+
sources: proposal.leaves.map((l) => l.data.id),
|
|
1072
|
+
similarity: proposal.average_affinity ?? 0,
|
|
1073
|
+
confidence_band: confBand,
|
|
1074
|
+
decision: "applied",
|
|
1075
|
+
reason:
|
|
1076
|
+
`slug=${proposal.slug}, ` +
|
|
1077
|
+
`metric ${preMetric.toFixed(4)} → ${postMetric.toFixed(4)}`,
|
|
1078
|
+
});
|
|
1079
|
+
// Mark the freshly-created subdirectory so we do not
|
|
1080
|
+
// recursively sub-cluster it in later iterations of the
|
|
1081
|
+
// same run.
|
|
1082
|
+
nestedParents.add(result.target_dir);
|
|
1083
|
+
appliedCount++;
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
if (appliedCount > 0) return "applied";
|
|
1087
|
+
return countPendingRequests(wikiRoot) > 0 ? "pending-tier2" : "none";
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
// Enqueue a cluster-naming request through the shared tiered
|
|
1091
|
+
// pending queue. The orchestrator drains the queue after the
|
|
1092
|
+
// phase finishes and decides whether to write a Tier 2 batch +
|
|
1093
|
+
// exit 7 or to proceed (when a fixture resolved everything).
|
|
1094
|
+
function enqueueNamingRequest(wikiRoot, request) {
|
|
1095
|
+
enqueuePending(wikiRoot, request);
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
// Resolve a Tier 2 request: fixture → runtime-resolved map → "pending".
|
|
1099
|
+
// Returns the inner response object, or the literal string "pending"
|
|
1100
|
+
// when neither path carries an answer. Does NOT enqueue; callers
|
|
1101
|
+
// are responsible for calling `enqueuePending` on "pending".
|
|
1102
|
+
function resolveTier2Response(wikiRoot, fixture, request) {
|
|
1103
|
+
if (fixture) {
|
|
1104
|
+
const fx = resolveFromFixture(fixture, request);
|
|
1105
|
+
if (fx !== null && fx !== undefined) return fx;
|
|
1106
|
+
}
|
|
1107
|
+
const runtime = getResolvedResponse(wikiRoot, request.request_id);
|
|
1108
|
+
if (runtime !== null && runtime !== undefined) return runtime;
|
|
1109
|
+
return "pending";
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
// Convert a propose_structure response into a canonical list of
|
|
1113
|
+
// cluster candidates. Each candidate carries:
|
|
1114
|
+
//
|
|
1115
|
+
// operator: "NEST"
|
|
1116
|
+
// source: "tier2"
|
|
1117
|
+
// leaves: [<leaf>, ...]
|
|
1118
|
+
// slug: "<validated kebab-case>"
|
|
1119
|
+
// leaves_set: Set<leaf-id> (for dedup against math)
|
|
1120
|
+
//
|
|
1121
|
+
// Subcategories with invalid slugs, missing members, or fewer than
|
|
1122
|
+
// MIN_CLUSTER_SIZE members are dropped. Members referencing leaf
|
|
1123
|
+
// ids that aren't in the directory are silently filtered.
|
|
1124
|
+
function extractTier2Clusters(response, leaves, parentDir) {
|
|
1125
|
+
void parentDir;
|
|
1126
|
+
if (!response || typeof response !== "object") return [];
|
|
1127
|
+
const subcats = Array.isArray(response.subcategories)
|
|
1128
|
+
? response.subcategories
|
|
1129
|
+
: [];
|
|
1130
|
+
const leafById = new Map();
|
|
1131
|
+
for (const l of leaves) {
|
|
1132
|
+
if (l.data && l.data.id) leafById.set(l.data.id, l);
|
|
1133
|
+
}
|
|
1134
|
+
const out = [];
|
|
1135
|
+
for (const sc of subcats) {
|
|
1136
|
+
if (!sc || typeof sc !== "object") continue;
|
|
1137
|
+
const slug = typeof sc.slug === "string" ? sc.slug : null;
|
|
1138
|
+
if (!slug || !validateSlug(slug)) continue;
|
|
1139
|
+
const members = Array.isArray(sc.members) ? sc.members : [];
|
|
1140
|
+
const resolved = [];
|
|
1141
|
+
for (const memberId of members) {
|
|
1142
|
+
const leaf = leafById.get(memberId);
|
|
1143
|
+
if (leaf) resolved.push(leaf);
|
|
1144
|
+
}
|
|
1145
|
+
// Tier 2 clusters can have as few as MIN_TIER2_CLUSTER_SIZE (2)
|
|
1146
|
+
// members. A language model that has read both frontmatters can
|
|
1147
|
+
// defend a pair on conceptual grounds — "invariants + safety are
|
|
1148
|
+
// the correctness substrate", "preflight + user-intent are UX
|
|
1149
|
+
// at op boundaries" — even when pairwise math similarity alone
|
|
1150
|
+
// would be noisy. The metric gate's 5% regression tolerance for
|
|
1151
|
+
// Tier 2 proposals catches hallucinations; size-2 pairs flow
|
|
1152
|
+
// through the same gate, so a genuinely bad pair still gets
|
|
1153
|
+
// rolled back.
|
|
1154
|
+
if (resolved.length < MIN_TIER2_CLUSTER_SIZE) continue;
|
|
1155
|
+
if (resolved.length > MAX_CLUSTER_SIZE) {
|
|
1156
|
+
// Oversized Tier 2 proposals get split — keep MAX members
|
|
1157
|
+
// and leave the rest for a subsequent iteration. This keeps
|
|
1158
|
+
// every nested subcategory in the 2..MAX_CLUSTER_SIZE band
|
|
1159
|
+
// without silently dropping members.
|
|
1160
|
+
resolved.length = MAX_CLUSTER_SIZE;
|
|
1161
|
+
}
|
|
1162
|
+
out.push({
|
|
1163
|
+
operator: "NEST",
|
|
1164
|
+
source: "tier2",
|
|
1165
|
+
leaves: resolved,
|
|
1166
|
+
slug,
|
|
1167
|
+
purpose: typeof sc.purpose === "string" ? sc.purpose : "",
|
|
1168
|
+
leaves_set: new Set(resolved.map((l) => l.data.id)),
|
|
1169
|
+
size: resolved.length,
|
|
1170
|
+
});
|
|
1171
|
+
}
|
|
1172
|
+
return out;
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
// Deduplicate Tier 2 + math cluster candidates by member set.
|
|
1176
|
+
// When Tier 2 and math propose the same cluster (set equality on
|
|
1177
|
+
// leaf ids), the merged candidate carries source="both" — this is
|
|
1178
|
+
// the strongest signal, applied first in the resolved-proposal
|
|
1179
|
+
// ordering. Math clusters that duplicate a Tier 2 cluster are
|
|
1180
|
+
// dropped (the tier2 entry already has a slug). Math clusters
|
|
1181
|
+
// that don't duplicate any Tier 2 cluster survive as source="math"
|
|
1182
|
+
// and go through the nest_decision gate.
|
|
1183
|
+
function mergeClusterProposals(tier2Clusters, mathClusters) {
|
|
1184
|
+
const merged = [];
|
|
1185
|
+
const usedMathIdx = new Set();
|
|
1186
|
+
for (const tc of tier2Clusters) {
|
|
1187
|
+
let matched = false;
|
|
1188
|
+
for (let i = 0; i < mathClusters.length; i++) {
|
|
1189
|
+
if (usedMathIdx.has(i)) continue;
|
|
1190
|
+
const mc = mathClusters[i];
|
|
1191
|
+
if (setsEqual(tc.leaves_set, mc.leaves_set)) {
|
|
1192
|
+
merged.push({
|
|
1193
|
+
...tc,
|
|
1194
|
+
source: "both",
|
|
1195
|
+
average_affinity: mc.average_affinity,
|
|
1196
|
+
naming_request: mc.naming_request,
|
|
1197
|
+
gate_request: mc.gate_request,
|
|
1198
|
+
});
|
|
1199
|
+
usedMathIdx.add(i);
|
|
1200
|
+
matched = true;
|
|
1201
|
+
break;
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
if (!matched) merged.push(tc);
|
|
1205
|
+
}
|
|
1206
|
+
for (let i = 0; i < mathClusters.length; i++) {
|
|
1207
|
+
if (usedMathIdx.has(i)) continue;
|
|
1208
|
+
merged.push(mathClusters[i]);
|
|
1209
|
+
}
|
|
1210
|
+
return merged;
|
|
1211
|
+
}
|
|
1212
|
+
|
|
1213
|
+
function setsEqual(a, b) {
|
|
1214
|
+
if (a.size !== b.size) return false;
|
|
1215
|
+
for (const x of a) if (!b.has(x)) return false;
|
|
1216
|
+
return true;
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
// Snapshot just the files that NEST is about to mutate. We store
|
|
1220
|
+
// the raw bytes of each source leaf + the parent index (if any)
|
|
1221
|
+
// so a regression rollback can restore them byte-exact. The NEST
|
|
1222
|
+
// applier creates a new subdirectory — rollback deletes that dir.
|
|
1223
|
+
function snapshotForRollback(proposal, wikiRoot) {
|
|
1224
|
+
void wikiRoot;
|
|
1225
|
+
const leafFiles = proposal.leaves.map((l) => ({
|
|
1226
|
+
path: l.path,
|
|
1227
|
+
content: readFileSync(l.path, "utf8"),
|
|
1228
|
+
}));
|
|
1229
|
+
const parentDir = dirname(proposal.leaves[0].path);
|
|
1230
|
+
const parentIndex = join(parentDir, "index.md");
|
|
1231
|
+
let parentIndexContent = null;
|
|
1232
|
+
if (existsSync(parentIndex)) {
|
|
1233
|
+
parentIndexContent = readFileSync(parentIndex, "utf8");
|
|
1234
|
+
}
|
|
1235
|
+
return { leafFiles, parentIndex, parentIndexContent };
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
function restoreRollback(rb, applyResult) {
|
|
1239
|
+
// Delete the new subdirectory (with the stub + moved leaves).
|
|
1240
|
+
if (applyResult && applyResult.target_dir && existsSync(applyResult.target_dir)) {
|
|
1241
|
+
rmSync(applyResult.target_dir, { recursive: true, force: true });
|
|
1242
|
+
}
|
|
1243
|
+
// Restore the leaves at their original paths.
|
|
1244
|
+
for (const lf of rb.leafFiles) {
|
|
1245
|
+
mkdirSync(dirname(lf.path), { recursive: true });
|
|
1246
|
+
writeFileSync(lf.path, lf.content, "utf8");
|
|
1247
|
+
}
|
|
1248
|
+
// Restore the parent index content, if any.
|
|
1249
|
+
if (rb.parentIndex && rb.parentIndexContent !== null) {
|
|
1250
|
+
writeFileSync(rb.parentIndex, rb.parentIndexContent, "utf8");
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
// ── Local stub-index bootstrapper ────────────────────────────────────
|
|
1255
|
+
//
|
|
1256
|
+
// Minimal stub creator used by the cluster-NEST path so the
|
|
1257
|
+
// routing-cost metric has an index.md to parse at every
|
|
1258
|
+
// directory that carries leaves. This is the same idea as
|
|
1259
|
+
// orchestrator.mjs's bootstrapIndexStubs but lives here so
|
|
1260
|
+
// runConvergence can call it without importing from the
|
|
1261
|
+
// orchestrator (which would create a dependency cycle). The
|
|
1262
|
+
// orchestrator's version (called after convergence) carries
|
|
1263
|
+
// more fields and handles hosted-mode markers; this local one
|
|
1264
|
+
// is only about getting a valid index file on disk.
|
|
1265
|
+
function bootstrapStubIndicesForMetric(wikiRoot) {
|
|
1266
|
+
const dirs = walkDirs(wikiRoot);
|
|
1267
|
+
for (const dir of dirs) {
|
|
1268
|
+
const indexPath = join(dir, "index.md");
|
|
1269
|
+
if (existsSync(indexPath)) continue;
|
|
1270
|
+
const { leaves, subdirs } = listChildren(dir);
|
|
1271
|
+
if (leaves.length === 0 && subdirs.length === 0) continue;
|
|
1272
|
+
const isRoot = dir === wikiRoot;
|
|
1273
|
+
const id = isRoot ? basename(wikiRoot) : basename(dir);
|
|
1274
|
+
const stub =
|
|
1275
|
+
"---\n" +
|
|
1276
|
+
`id: ${id}\n` +
|
|
1277
|
+
"type: index\n" +
|
|
1278
|
+
(isRoot ? "depth_role: category\n" : "depth_role: subcategory\n") +
|
|
1279
|
+
`focus: "subtree under ${id}"\n` +
|
|
1280
|
+
"generator: skill-llm-wiki/v1\n" +
|
|
1281
|
+
"---\n\n";
|
|
1282
|
+
writeFileSync(indexPath, stub, "utf8");
|
|
1283
|
+
}
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
// Validate that every leaf on a math candidate is still a direct
|
|
1287
|
+
// child of the candidate's expected parent_dir. Returns `false` if
|
|
1288
|
+
// any leaf has moved to a different directory, been deleted, or
|
|
1289
|
+
// was never resident there to begin with. Called just before a
|
|
1290
|
+
// math-source nest_decision gate request is emitted so stale
|
|
1291
|
+
// candidates don't burn a Tier 2 round trip.
|
|
1292
|
+
//
|
|
1293
|
+
// A fresh candidate (one produced in the same iteration from a
|
|
1294
|
+
// fresh `listChildren` scan) always passes this check; the guard
|
|
1295
|
+
// only catches candidates whose members drifted between the pass
|
|
1296
|
+
// that produced them and the pass that would have gated them.
|
|
1297
|
+
// Phase 5 audit-log hook for stale math-candidate drops. Called by
|
|
1298
|
+
// `tryClusterNestIteration` when `mathCandidateIsFresh` returns
|
|
1299
|
+
// false. Writes a `rejected-stale` entry into decisions.yaml
|
|
1300
|
+
// (confidence_band="math-gated") and pushes a parallel record onto
|
|
1301
|
+
// the in-memory suggestions[] list so the convergence summary
|
|
1302
|
+
// mentions the drop. Exported so unit tests can exercise the append
|
|
1303
|
+
// path without having to drive the full convergence loop.
|
|
1304
|
+
//
|
|
1305
|
+
// Error handling: the decision-log append is best-effort — the loop
|
|
1306
|
+
// never fails a build because of a missing audit record. The guard
|
|
1307
|
+
// catches any filesystem or validator error and moves on.
|
|
1308
|
+
export function dropStaleMathCandidate(wikiRoot, cand, opId, suggestions) {
|
|
1309
|
+
if (Array.isArray(suggestions)) {
|
|
1310
|
+
suggestions.push({
|
|
1311
|
+
operator: "NEST",
|
|
1312
|
+
sources: cand.leaves.map((l) => l.path),
|
|
1313
|
+
reason: "math candidate dropped: members no longer co-resident in parent",
|
|
1314
|
+
});
|
|
1315
|
+
}
|
|
1316
|
+
try {
|
|
1317
|
+
appendNestDecision(wikiRoot, {
|
|
1318
|
+
op_id: opId,
|
|
1319
|
+
sources: cand.leaves.map((l) => l.data?.id ?? "anonymous"),
|
|
1320
|
+
similarity: Number.isFinite(cand.average_affinity)
|
|
1321
|
+
? cand.average_affinity
|
|
1322
|
+
: 0,
|
|
1323
|
+
confidence_band: "math-gated",
|
|
1324
|
+
decision: "rejected-stale",
|
|
1325
|
+
reason: "members no longer co-resident in parent",
|
|
1326
|
+
});
|
|
1327
|
+
} catch {
|
|
1328
|
+
/* best effort — audit log is a nice-to-have */
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
export function mathCandidateIsFresh(cand) {
|
|
1333
|
+
if (!cand || !cand.parent_dir) return false;
|
|
1334
|
+
if (!Array.isArray(cand.leaves) || cand.leaves.length === 0) return false;
|
|
1335
|
+
const parentDir = cand.parent_dir;
|
|
1336
|
+
for (const leaf of cand.leaves) {
|
|
1337
|
+
if (!leaf || typeof leaf.path !== "string") return false;
|
|
1338
|
+
if (!existsSync(leaf.path)) return false;
|
|
1339
|
+
if (dirname(leaf.path) !== parentDir) return false;
|
|
1340
|
+
}
|
|
1341
|
+
return true;
|
|
1342
|
+
}
|
|
1343
|
+
|
|
1344
|
+
// ── Directory walk helper ────────────────────────────────────────────
|
|
1345
|
+
|
|
1346
|
+
function walkDirs(wikiRoot) {
|
|
1347
|
+
const out = [wikiRoot];
|
|
1348
|
+
const stack = [wikiRoot];
|
|
1349
|
+
while (stack.length > 0) {
|
|
1350
|
+
const dir = stack.pop();
|
|
1351
|
+
try {
|
|
1352
|
+
const entries = readdirSync(dir, { withFileTypes: true });
|
|
1353
|
+
for (const e of entries) {
|
|
1354
|
+
if (e.name.startsWith(".")) continue;
|
|
1355
|
+
if (!e.isDirectory()) continue;
|
|
1356
|
+
const sub = join(dir, e.name);
|
|
1357
|
+
out.push(sub);
|
|
1358
|
+
stack.push(sub);
|
|
1359
|
+
}
|
|
1360
|
+
} catch {
|
|
1361
|
+
/* skip */
|
|
1362
|
+
}
|
|
1363
|
+
}
|
|
1364
|
+
return out;
|
|
1365
|
+
}
|