@ctxr/skill-llm-wiki 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +118 -0
  2. package/README.md +2 -2
  3. package/SKILL.md +7 -0
  4. package/guide/cli.md +6 -4
  5. package/guide/consumers/index.md +106 -0
  6. package/guide/consumers/quickstart.md +96 -0
  7. package/guide/consumers/recipes/ci-gate.md +125 -0
  8. package/guide/consumers/recipes/dated-wiki.md +131 -0
  9. package/guide/consumers/recipes/format-gate.md +126 -0
  10. package/guide/consumers/recipes/post-write-heal.md +125 -0
  11. package/guide/consumers/recipes/skill-absent.md +111 -0
  12. package/guide/consumers/recipes/subject-wiki.md +110 -0
  13. package/guide/consumers/recipes/testing.md +149 -0
  14. package/guide/index.md +9 -0
  15. package/guide/substrate/operators.md +1 -1
  16. package/guide/substrate/tiered-ai.md +6 -5
  17. package/guide/ux/user-intent.md +6 -5
  18. package/package.json +9 -3
  19. package/scripts/cli.mjs +565 -15
  20. package/scripts/lib/balance.mjs +579 -0
  21. package/scripts/lib/cluster-detect.mjs +482 -4
  22. package/scripts/lib/contract.mjs +257 -0
  23. package/scripts/lib/decision-log.mjs +121 -15
  24. package/scripts/lib/heal.mjs +167 -0
  25. package/scripts/lib/init.mjs +210 -0
  26. package/scripts/lib/intent.mjs +370 -4
  27. package/scripts/lib/join-constants.mjs +22 -0
  28. package/scripts/lib/join.mjs +917 -0
  29. package/scripts/lib/json-envelope.mjs +190 -0
  30. package/scripts/lib/nest-applier.mjs +395 -32
  31. package/scripts/lib/operators.mjs +472 -38
  32. package/scripts/lib/orchestrator.mjs +419 -12
  33. package/scripts/lib/root-containment.mjs +351 -0
  34. package/scripts/lib/similarity-cache.mjs +115 -20
  35. package/scripts/lib/similarity.mjs +11 -0
  36. package/scripts/lib/soft-dag.mjs +726 -0
  37. package/scripts/lib/templates.mjs +78 -0
  38. package/scripts/lib/tiered.mjs +42 -18
  39. package/scripts/lib/validate.mjs +22 -0
  40. package/scripts/lib/where.mjs +71 -0
  41. package/scripts/testkit/assert-frontmatter.mjs +171 -0
  42. package/scripts/testkit/cli-run.mjs +95 -0
  43. package/scripts/testkit/make-wiki-fixture.mjs +301 -0
  44. package/scripts/testkit/stub-skill.mjs +107 -0
  45. package/templates/adrs.llmwiki.layout.yaml +33 -0
  46. package/templates/plans.llmwiki.layout.yaml +34 -0
  47. package/templates/regressions.llmwiki.layout.yaml +34 -0
  48. package/templates/reports.llmwiki.layout.yaml +33 -0
  49. package/templates/runbooks.llmwiki.layout.yaml +33 -0
  50. package/templates/sessions.llmwiki.layout.yaml +34 -0
@@ -40,8 +40,10 @@ import {
40
40
  writeFileSync,
41
41
  } from "node:fs";
42
42
  import { basename, dirname, join, relative } from "node:path";
43
+ import pRetry, { AbortError } from "p-retry";
44
+ import pTimeout, { TimeoutError } from "p-timeout";
43
45
  import { parseFrontmatter, renderFrontmatter } from "./frontmatter.mjs";
44
- import { collectFrontmatterOnly } from "./chunk.mjs";
46
+ import { collectFrontmatterOnly, readFrontmatterStreaming } from "./chunk.mjs";
45
47
  import { listChildren, rebuildAllIndices } from "./indices.mjs";
46
48
  import { buildComparisonModel } from "./similarity.mjs";
47
49
  import {
@@ -53,13 +55,21 @@ import {
53
55
  } from "./tiered.mjs";
54
56
  import {
55
57
  buildProposeStructureRequest,
58
+ buildSiblingIdfContext,
56
59
  detectClusters,
60
+ deterministicPurpose,
61
+ generateDeterministicSlug,
57
62
  MAX_CLUSTER_SIZE,
58
63
  MIN_CLUSTER_SIZE,
59
64
  MIN_MATH_CLUSTER_SIZE,
60
65
  MIN_TIER2_CLUSTER_SIZE,
61
66
  } from "./cluster-detect.mjs";
62
- import { applyNest, resolveNestSlug, validateSlug } from "./nest-applier.mjs";
67
+ import {
68
+ applyNest,
69
+ buildWikiForbiddenIndex,
70
+ resolveNestSlug,
71
+ validateSlug,
72
+ } from "./nest-applier.mjs";
63
73
  import { computeRoutingCost } from "./quality-metric.mjs";
64
74
  import { loadFixture, resolveFromFixture } from "./tier2-protocol.mjs";
65
75
  import { appendMetricTrajectory, appendNestDecision } from "./decision-log.mjs";
@@ -68,7 +78,31 @@ import { appendMetricTrajectory, appendNestDecision } from "./decision-log.mjs";
68
78
  // termination. The methodology's convergence argument proves it
69
79
  // halts, but we still cap defensively in case two operators
70
80
  // interact pathologically.
71
- const MAX_CONVERGENCE_ITERATIONS = 20;
81
+ //
82
+ // Why 200, not 20: the architecture applies at most ONE PAIRWISE
83
+ // operator (DESCEND/LIFT/MERGE) per outer iteration, and pairwise
84
+ // ops always win priority over the cluster-NEST fallback
85
+ // (`tryClusterNestIteration` runs only when no pairwise op fires
86
+ // in a given iteration — see the while loop in `runConvergence`).
87
+ // Note that once cluster-NEST IS reached, it can apply multiple
88
+ // NEST commits in that single pass (multi-NEST selection inside
89
+ // `tryClusterNestIteration`); the scarcity is at the outer
90
+ // iteration level, not per-NEST. On a 596-leaf hand-authored
91
+ // corpus observed in the field (skill-code-review/reviewers.src),
92
+ // Tier 1 similarity produces ~20 viable MERGE pairs; the old cap
93
+ // of 20 iterations burned entirely on those MERGEs and cluster
94
+ // NEST — where the Phase X.10 coarse-partition emits ~75
95
+ // top-level NESTs — never got a chance to run even once. Result:
96
+ // the downstream balance phase saw a 576-leaf root, tried linear
97
+ // carving, hit its own 20-iter cap, and the whole build rolled
98
+ // back.
99
+ //
100
+ // 200 gives a typical large corpus (20 pairwise ops + ~75 cluster
101
+ // NESTs + some follow-up MERGEs after NESTs reveal new overlaps)
102
+ // plenty of headroom. Small wikis exit early via the "no ops
103
+ // fired this iteration" break and never approach the cap — so the
104
+ // raise is effectively free for them.
105
+ const MAX_CONVERGENCE_ITERATIONS = 200;
72
106
 
73
107
  // Each operator returns an array of `Proposal` objects describing
74
108
  // a change to apply. The loop priority-orders proposals, applies
@@ -94,11 +128,24 @@ const PRIORITY = {
94
128
  // Detection: a non-root directory that contains exactly one leaf
95
129
  // file and no indexed subdirs. Apply: move the leaf up one level,
96
130
  // delete the now-empty folder.
131
+ //
132
+ // X.11 constraint: LIFT refuses to land a leaf at the wiki root
133
+ // (`dirname(dir) === wikiRoot`). The root-containment invariant
134
+ // forbids non-index leaves at depth 0, and without this guard LIFT +
135
+ // X.11 root-containment (Phase 4.4.5) would oscillate forever on
136
+ // single-member subcategories that X.11 itself creates for outliers.
137
+ // Single-member subcategories
138
+ // produced by X.11 are a valid transient end state — they stay until
139
+ // future builds accrete topically-adjacent leaves that make the
140
+ // cluster worth merging up. Flatten-to-root is the only direction
141
+ // LIFT is forbidden; every deeper single-child passthrough is still
142
+ // fair game.
97
143
  export function detectLift(wikiRoot) {
98
144
  const proposals = [];
99
145
  const dirs = walkDirs(wikiRoot);
100
146
  for (const dir of dirs) {
101
147
  if (dir === wikiRoot) continue;
148
+ if (dirname(dir) === wikiRoot) continue;
102
149
  const { leaves, subdirs } = listChildren(dir);
103
150
  if (leaves.length === 1 && subdirs.length === 0) {
104
151
  const leaf = leaves[0];
@@ -163,6 +210,48 @@ async function applyLift(wikiRoot, dir, leaf) {
163
210
  // says "same". Apply: produce a merged entry carrying the union
164
211
  // of covers[] (deduped), the more general focus, both source ids as
165
212
  // aliases, and delete the second source leaf.
213
+ // Batch size for parallel pairwise `decide()` calls inside
214
+ // `detectMerge`. Each pair is independent (no cross-pair state;
215
+ // tiered.decide() reads/writes its own cache file + appends one
216
+ // decision-log entry), so an O(N²) sequential loop can be turned
217
+ // into ceil(N²/BATCH) sequential batches of concurrent awaits.
218
+ // 32 is empirically chosen to saturate I/O without flooding the
219
+ // kernel's open-file table on flat similarity-cache layouts.
220
+ // Larger batches don't help because most of decide()'s latency is
221
+ // filesystem-bound, and the bottleneck is dir-lookup contention
222
+ // on the cache dir, not the number of in-flight readers.
223
+ export const DETECT_MERGE_PAIR_BATCH_SIZE = 32;
224
+
225
+ // Per-pair timeout for `decide()` calls wrapped with `pTimeout`.
226
+ // A healthy pair resolves in <200 ms (cache hit) or <2 s (Tier 1
227
+ // inference cold). 30 s is a 15× safety margin over the worst
228
+ // realistic case — catches genuine hangs (stuck model load, NFS
229
+ // mount stall, runaway fs write) without false-positiving on a
230
+ // slow-but-progressing embed call. When the timeout fires the
231
+ // inner `decide()` call is still running on the event loop; it
232
+ // will eventually settle, but its result is discarded. For a
233
+ // 178k-pair sweep that's negligible overhead.
234
+ export const DETECT_MERGE_PAIR_TIMEOUT_MS = 30_000;
235
+
236
+ // Retry budget per pair. Most "retryable" failures are transient
237
+ // fs glitches (EAGAIN on a contested shard dir, a partial write
238
+ // that parse rejected) — three attempts total (1 initial + 2
239
+ // retries) with 500 ms → 5 s exponential backoff handles those
240
+ // cleanly. Only timeouts (wrapped as AbortError) are explicitly
241
+ // non-retryable; all other errors (including deterministic
242
+ // validation errors from `decide()` — unknown qualityMode, missing
243
+ // ctx fields, etc.) currently consume the full retry budget. That
244
+ // wastes a few seconds on what would be a repeat failure, but
245
+ // classifying decide()'s error surface would require pattern-
246
+ // matching on error messages or a dedicated error-code layer, and
247
+ // the waste in practice is small vs the correctness risk of
248
+ // misclassifying a legitimate transient failure. Acceptable
249
+ // trade-off for now; a targeted shouldRetry() refinement is
250
+ // tracked as a follow-up.
251
+ export const DETECT_MERGE_PAIR_RETRIES = 2;
252
+ export const DETECT_MERGE_PAIR_RETRY_MIN_MS = 500;
253
+ export const DETECT_MERGE_PAIR_RETRY_MAX_MS = 5_000;
254
+
166
255
  export async function detectMerge(wikiRoot, ctx) {
167
256
  const proposals = [];
168
257
  const dirs = walkDirs(wikiRoot);
@@ -176,19 +265,136 @@ export async function detectMerge(wikiRoot, ctx) {
176
265
  // between 10⁹ and 10⁶ operations.
177
266
  const corpus = leaves.map((l) => l.data);
178
267
  const model = buildComparisonModel(corpus);
179
- for (let i = 0; i < leaves.length; i++) {
180
- for (let j = i + 1; j < leaves.length; j++) {
181
- const a = leaves[i];
182
- const b = leaves[j];
183
- const r = await decide(a.data, b.data, corpus, {
184
- wikiRoot,
185
- opId: ctx.opId,
186
- operator: "MERGE",
187
- qualityMode: ctx.qualityMode,
188
- interactive: ctx.interactive,
189
- tier2Handler: ctx.tier2Handler,
190
- precomputedModel: model,
191
- });
268
+ // Stream pairs in batches without ever materialising the full
269
+ // O(N²) pair list. The outer/inner (i, j) indices advance
270
+ // monotonically; once a batch of DETECT_MERGE_PAIR_BATCH_SIZE
271
+ // pairs is ready we run them concurrently, drain results in
272
+ // deterministic order, and continue. Memory stays O(batch
273
+ // size) instead of O(N²). For a flat 600-leaf directory the
274
+ // pre-allocation would have been ~180k pair objects at ~200
275
+ // bytes each ≈ 36 MB — not fatal but wasteful when all we need
276
+ // is a rolling 32-pair window.
277
+ //
278
+ // Reliability: each `decide()` call is wrapped with
279
+ // `pTimeout` (per-call deadline) + `pRetry` (transient retry
280
+ // with exponential backoff). The outer batch uses
281
+ // `Promise.allSettled` so a pair that exhausts retries logs
282
+ // and skips instead of aborting the whole batch — the previous
283
+ // `Promise.all` semantic would have discarded every
284
+ // successfully-completed pair alongside the one failure.
285
+ // Timeout is non-retryable: a `TimeoutError` is re-thrown as
286
+ // `AbortError` so pRetry gives up immediately on the first
287
+ // deadline hit. Rationale: the underlying `decide()` call is
288
+ // still running on the event loop when pTimeout fires and may
289
+ // still write its cache entry + decision log. A retry would
290
+ // run a second `decide()` for the same pair and duplicate
291
+ // those side effects. Fail-fast on timeout keeps the write
292
+ // footprint to one cache/log entry per pair, which matters for
293
+ // post-run auditability. Transient errors (EAGAIN, partial
294
+ // writes, parse rejects) still retry up to
295
+ // `DETECT_MERGE_PAIR_RETRIES` times with backoff.
296
+ //
297
+ // Determinism: each batch's pairs are processed in the order
298
+ // they were generated (pure i-ascending, j-ascending), and the
299
+ // within-batch allSettled[] preserves that order via positional
300
+ // result semantics. Cross-batch ordering is preserved by
301
+ // draining each batch before advancing.
302
+ const BATCH = DETECT_MERGE_PAIR_BATCH_SIZE;
303
+ const decideCtx = {
304
+ wikiRoot,
305
+ opId: ctx.opId,
306
+ operator: "MERGE",
307
+ qualityMode: ctx.qualityMode,
308
+ interactive: ctx.interactive,
309
+ tier2Handler: ctx.tier2Handler,
310
+ precomputedModel: model,
311
+ };
312
+ const pending = [];
313
+ let failureCount = 0;
314
+ const decideWithGuards = (a, b) =>
315
+ pRetry(
316
+ async () => {
317
+ try {
318
+ return await pTimeout(decide(a.data, b.data, corpus, decideCtx), {
319
+ milliseconds: DETECT_MERGE_PAIR_TIMEOUT_MS,
320
+ message:
321
+ `detectMerge: decide() timed out after ` +
322
+ `${DETECT_MERGE_PAIR_TIMEOUT_MS}ms for pair ` +
323
+ `(${a.data.id ?? "?"} ↔ ${b.data.id ?? "?"})`,
324
+ });
325
+ } catch (err) {
326
+ // Convert timeouts into non-retryable AbortErrors. The
327
+ // underlying `decide()` call is still running on the
328
+ // event loop when pTimeout rejects — it will eventually
329
+ // settle and may still write the similarity-cache entry
330
+ // AND append a decision-log entry for this pair. Running
331
+ // a retry attempt after that would trigger a SECOND
332
+ // `decide()` for the same pair, doubling the cache writes
333
+ // and decision-log entries (on the happy-if-slow path)
334
+ // or doubling the timeout cost (on the stuck path).
335
+ // Treating timeouts as fail-fast skips the pair cleanly
336
+ // so the only visible residue is whatever the dangling
337
+ // decide() eventually writes — one cache entry, one log
338
+ // entry, never amplified. True cancellation would
339
+ // require an AbortSignal plumbed through decide →
340
+ // similarity-cache → embeddings, which is a bigger
341
+ // refactor; the non-retry shortcut is the honest
342
+ // trade-off for this release.
343
+ if (err instanceof TimeoutError) throw new AbortError(err);
344
+ throw err;
345
+ }
346
+ },
347
+ {
348
+ retries: DETECT_MERGE_PAIR_RETRIES,
349
+ minTimeout: DETECT_MERGE_PAIR_RETRY_MIN_MS,
350
+ maxTimeout: DETECT_MERGE_PAIR_RETRY_MAX_MS,
351
+ onFailedAttempt: (err) => {
352
+ // Single-line breadcrumb per failure. Quiet-but-visible
353
+ // — an operator inspecting a slow run should SEE the
354
+ // retries happening rather than wonder why a batch
355
+ // ended up with a few skipped pairs.
356
+ process.stderr.write(
357
+ `[detectMerge] retry ${err.attemptNumber}/${err.retriesLeft + err.attemptNumber}` +
358
+ ` pair (${a.data.id ?? "?"} ↔ ${b.data.id ?? "?"}): ` +
359
+ `${err.message}\n`,
360
+ );
361
+ },
362
+ },
363
+ );
364
+ const flush = async () => {
365
+ if (pending.length === 0) return;
366
+ const settled = await Promise.allSettled(
367
+ pending.map(({ a, b }) => decideWithGuards(a, b)),
368
+ );
369
+ for (let k = 0; k < settled.length; k++) {
370
+ const outcome = settled[k];
371
+ const { a, b } = pending[k];
372
+ if (outcome.status === "rejected") {
373
+ failureCount++;
374
+ const reason = outcome.reason;
375
+ // Timeout path: decideWithGuards converts `TimeoutError`
376
+ // into `AbortError` so pRetry skips the pair
377
+ // immediately (see the note in decideWithGuards on why
378
+ // re-running decide() after a timeout would duplicate
379
+ // side effects). At this point `reason` is therefore an
380
+ // AbortError whose originalError is the TimeoutError —
381
+ // unwrap to distinguish "timed out and bailed" from
382
+ // "exhausted retries on a transient error". The phrasing
383
+ // in the breadcrumb matters for operators grepping logs
384
+ // after a slow run.
385
+ const original = reason?.originalError;
386
+ const isTimeout =
387
+ reason instanceof TimeoutError || original instanceof TimeoutError;
388
+ const kind = isTimeout ? "timeout" : "error";
389
+ const verb = isTimeout ? "aborted" : "exhausted retries";
390
+ const msg = (isTimeout ? original?.message : reason?.message) ?? reason;
391
+ process.stderr.write(
392
+ `[detectMerge] pair (${a.data.id ?? "?"} ↔ ${b.data.id ?? "?"}) ` +
393
+ `${verb} (${kind}): ${msg}\n`,
394
+ );
395
+ continue;
396
+ }
397
+ const r = outcome.value;
192
398
  if (r.decision === "same") {
193
399
  proposals.push({
194
400
  operator: "MERGE",
@@ -199,6 +405,19 @@ export async function detectMerge(wikiRoot, ctx) {
199
405
  });
200
406
  }
201
407
  }
408
+ pending.length = 0;
409
+ };
410
+ for (let i = 0; i < leaves.length; i++) {
411
+ for (let j = i + 1; j < leaves.length; j++) {
412
+ pending.push({ a: leaves[i], b: leaves[j] });
413
+ if (pending.length >= BATCH) await flush();
414
+ }
415
+ }
416
+ await flush(); // drain the tail
417
+ if (failureCount > 0) {
418
+ process.stderr.write(
419
+ `[detectMerge] finished ${dir}: ${failureCount} pair(s) skipped after exhausting retries (or on timeout)\n`,
420
+ );
202
421
  }
203
422
  }
204
423
  return proposals;
@@ -263,6 +482,51 @@ async function applyMerge(wikiRoot, a, b, decision) {
263
482
  aliases.delete(data.id);
264
483
  data.aliases = Array.from(aliases);
265
484
 
485
+ // Pre-apply alias-collision guard. If any alias the MERGE is about to
486
+ // introduce would collide with a live id elsewhere in the wiki, refuse
487
+ // the merge BEFORE writing the keeper or deleting the absorbed. The
488
+ // v1.0 validator catches this downstream as ALIAS-COLLIDES-ID, but by
489
+ // then the phase has committed and the whole convergence iteration
490
+ // has to roll back. Checking here pre-empts the rollback.
491
+ //
492
+ // Scope: only aliases the MERGE is *newly introducing* can create a
493
+ // new collision — that's absorbed's id and absorbed's pre-existing
494
+ // aliases, rolled into the keeper. The keeper's pre-existing aliases
495
+ // were already validated on their way into the tree, so re-checking
496
+ // them here would only produce misattributed errors. We filter the
497
+ // candidate set accordingly and report the absorbed leaf as the
498
+ // source of the conflict.
499
+ //
500
+ // We exclude absorbed.path from the live-id scan because the merge
501
+ // deletes the absorbed file, freeing its id namespace, so absorbed's
502
+ // own id shouldn't register as an "elsewhere" collision against
503
+ // itself. keeper.path does NOT need to be excluded: MERGE rewrites
504
+ // keeper in place but keeps its id unchanged, and the filter on
505
+ // `newlyIntroducedAliases` already drops keeper's id before the
506
+ // alias check (see `.delete(data.id)` below), so keeper's id
507
+ // appearing in `liveIds` is harmless and keeping the scan focused on
508
+ // "every other entry in the wiki" is clearer than two asymmetric
509
+ // exclusions. Anything else carrying a colliding id is a real
510
+ // conflict that would surface downstream as ALIAS-COLLIDES-ID if we
511
+ // proceeded.
512
+ const newlyIntroducedAliases = new Set([
513
+ absorbed.data.id,
514
+ ...(absorbed.data.aliases ?? []),
515
+ ]);
516
+ newlyIntroducedAliases.delete(data.id);
517
+ const liveIds = collectLiveIds(wikiRoot, new Set([absorbed.path]));
518
+ for (const alias of newlyIntroducedAliases) {
519
+ if (liveIds.has(alias)) {
520
+ throw new Error(
521
+ `MERGE: alias "${alias}" (from absorbed ${absorbed.data.id}) ` +
522
+ `collides with an existing live id; refusing to merge ` +
523
+ `${absorbed.data.id} into ${keeper.data.id}. ` +
524
+ `Resolve the conflict (rename the other leaf, or drop ` +
525
+ `the alias) and retry.`,
526
+ );
527
+ }
528
+ }
529
+
266
530
  writeFileSync(keeper.path, renderFrontmatter(data, body), "utf8");
267
531
  rmSync(absorbed.path, { force: true });
268
532
  return {
@@ -541,6 +805,7 @@ export async function runConvergence(wikiRoot, ctx = {}) {
541
805
  metricTrajectory,
542
806
  commitBetweenIterations,
543
807
  nestedParents,
808
+ qualityMode,
544
809
  });
545
810
  if (nestOutcome === "applied") continue;
546
811
  if (nestOutcome === "pending-tier2") {
@@ -620,7 +885,9 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
620
885
  metricTrajectory,
621
886
  commitBetweenIterations,
622
887
  nestedParents = new Set(),
888
+ qualityMode = "tiered-fast",
623
889
  } = ctx;
890
+ const deterministic = qualityMode === "deterministic";
624
891
 
625
892
  // Collect candidate proposals across every parent directory.
626
893
  // For each directory:
@@ -670,23 +937,28 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
670
937
 
671
938
  const relDir = relative(wikiRoot, dir) || ".";
672
939
 
673
- // Step 1: propose_structure Tier 2 request. Park on pending
674
- // without short-circuiting the math phase below.
940
+ // Step 1: propose_structure Tier 2 request. Skipped in
941
+ // deterministic mode the whole point is no LLM in the loop.
942
+ // Math detection alone drives structural proposals; no Tier 2
943
+ // subcategory suggestions are ever ingested, so byte-reproducible
944
+ // outputs are guaranteed from the same inputs.
675
945
  let tier2Clusters = [];
676
- const proposeReq = buildProposeStructureRequest(relDir, leaves);
677
- const proposeResp = resolveTier2Response(wikiRoot, fixture, proposeReq);
678
- if (proposeResp === "pending") {
679
- enqueuePending(wikiRoot, proposeReq);
680
- suggestions.push({
681
- operator: "NEST",
682
- sources: leaves.map((l) => l.path),
683
- reason: `propose_structure parked for ${relDir} (awaiting Tier 2)`,
684
- });
685
- // Fall through — math still runs so any cluster this dir
686
- // carries is evaluated (and its gate/naming requests are
687
- // batched alongside every other directory's) before we exit 7.
688
- } else {
689
- tier2Clusters = extractTier2Clusters(proposeResp, leaves, dir);
946
+ if (!deterministic) {
947
+ const proposeReq = buildProposeStructureRequest(relDir, leaves);
948
+ const proposeResp = resolveTier2Response(wikiRoot, fixture, proposeReq);
949
+ if (proposeResp === "pending") {
950
+ enqueuePending(wikiRoot, proposeReq);
951
+ suggestions.push({
952
+ operator: "NEST",
953
+ sources: leaves.map((l) => l.path),
954
+ reason: `propose_structure parked for ${relDir} (awaiting Tier 2)`,
955
+ });
956
+ // Fall through math still runs so any cluster this dir
957
+ // carries is evaluated (and its gate/naming requests are
958
+ // batched alongside every other directory's) before we exit 7.
959
+ } else {
960
+ tier2Clusters = extractTier2Clusters(proposeResp, leaves, dir);
961
+ }
690
962
  }
691
963
 
692
964
  // Step 2: math cluster detection (aggressive scan).
@@ -705,6 +977,40 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
705
977
  // Step 3: merge proposals, dedup by member set.
706
978
  const merged = mergeClusterProposals(tier2Clusters, mathClusters);
707
979
  for (const c of merged) c.parent_dir = dir;
980
+ // Step 3b (deterministic only): pre-name every math candidate
981
+ // using `generateDeterministicSlug` so the naming loop below
982
+ // treats them as already-named and skips the cluster_name
983
+ // Tier 2 request. The sibling leaves at `dir` act as the IDF
984
+ // context for the slug generator so terms that are common
985
+ // across the entire directory get down-weighted relative to
986
+ // terms that distinguish the cluster from its siblings — the
987
+ // slug becomes the cluster's most-distinguishing feature.
988
+ //
989
+ // Precompute the IDF map ONCE per directory and reuse it across
990
+ // every candidate. Without this, each `generateDeterministicSlug`
991
+ // call would re-tokenize + recompute IDF over the full sibling
992
+ // corpus — an O(|siblings| × |candidates|) repeat on dirs with
993
+ // multiple candidates. A shared map collapses that to
994
+ // O(|siblings| + |candidates| × |cluster|). Same byte-output;
995
+ // faster for dense dirs.
996
+ if (deterministic) {
997
+ // Only build the sibling-IDF context when there's at least one
998
+ // math candidate that will actually consume it. Skipping the
999
+ // tokenisation + IDF computation on directories that produced
1000
+ // only tier2-sourced (or no) candidates avoids a hot-path cost
1001
+ // on large sibling sets for no gain.
1002
+ const hasMathCandidate = merged.some((c) => c.source === "math");
1003
+ if (hasMathCandidate) {
1004
+ const deterministicIdf = buildSiblingIdfContext(leaves);
1005
+ for (const cand of merged) {
1006
+ if (cand.source !== "math") continue;
1007
+ cand.slug = generateDeterministicSlug(cand.leaves, leaves, {
1008
+ precomputedIdf: deterministicIdf,
1009
+ });
1010
+ cand.purpose = deterministicPurpose(cand.leaves);
1011
+ }
1012
+ }
1013
+ }
708
1014
  allCandidates.push(...merged);
709
1015
  }
710
1016
 
@@ -715,6 +1021,12 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
715
1021
  // Step 4: math-only candidates go through a mandatory
716
1022
  // nest_decision gate. Candidates that came from propose_structure
717
1023
  // are already structurally approved by Tier 2 — skip the gate.
1024
+ //
1025
+ // In deterministic mode the gate is also skipped: math candidates
1026
+ // produced by the aggressive-threshold scan are auto-approved
1027
+ // because the partition-shape score + metric regression gate
1028
+ // downstream are the algorithmic equivalents of the gate and are
1029
+ // fully deterministic.
718
1030
  const gatedCandidates = [];
719
1031
  for (const cand of allCandidates) {
720
1032
  if (cand.source === "tier2" || cand.source === "both") {
@@ -733,6 +1045,11 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
733
1045
  dropStaleMathCandidate(wikiRoot, cand, opId, suggestions);
734
1046
  continue;
735
1047
  }
1048
+ if (deterministic) {
1049
+ cand.gate_reason = "deterministic mode: algorithmic auto-approve";
1050
+ gatedCandidates.push(cand);
1051
+ continue;
1052
+ }
736
1053
  // math-only: run the gate.
737
1054
  const gateReq = cand.gate_request;
738
1055
  const gateResp = resolveTier2Response(wikiRoot, fixture, gateReq);
@@ -923,6 +1240,23 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
923
1240
  /* best effort: indices may not be set up yet on a fresh wiki */
924
1241
  }
925
1242
 
1243
+ // Precompute the wiki-wide forbidden-id index once per convergence
1244
+ // iteration, but ONLY when at least one picked proposal will consult
1245
+ // it. resolveNestSlug below reuses it via opts.wikiIndex so each
1246
+ // picked proposal's slug resolution is O(parent-dir) instead of
1247
+ // O(full-tree). After each successful apply, we mutate the index
1248
+ // (`wikiIndex.add(resolvedSlug)`) so the next proposal sees the new
1249
+ // directory/id as occupied. Total cost across a multi-NEST iteration
1250
+ // drops from O(#applies × #files) to O(#files + #applies).
1251
+ //
1252
+ // Guarding on `picked.length > 0` avoids an otherwise-pointless
1253
+ // full-tree walk on iterations where detection ran but the non-
1254
+ // conflict selection culled every candidate — a concrete saving on
1255
+ // the convergence loop's last iteration, where the picked set is
1256
+ // typically empty and we're one step away from breaking out.
1257
+ const wikiIndex =
1258
+ picked.length > 0 ? buildWikiForbiddenIndex(wikiRoot) : null;
1259
+
926
1260
  let appliedCount = 0;
927
1261
  for (const proposal of picked) {
928
1262
  // Re-check freshness RIGHT BEFORE apply. An earlier pick in the
@@ -935,12 +1269,28 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
935
1269
  dropStaleMathCandidate(wikiRoot, proposal, opId, suggestions);
936
1270
  continue;
937
1271
  }
1272
+ // Confidence band + tier_used reflect how the candidate got here.
1273
+ // Under `--quality-mode deterministic` every math candidate was
1274
+ // auto-approved algorithmically (no sub-agent ever consulted),
1275
+ // so the audit trail must say so:
1276
+ // - `confidence_band: "deterministic-math"` distinguishes these
1277
+ // entries from math candidates that passed a Tier 2
1278
+ // `nest_decision` gate.
1279
+ // - `tier_used: 0` correctly records that no Tier was consulted
1280
+ // at decision time. Tooling/tests that interpret tier_used as
1281
+ // actual Tier usage no longer see a misleading `2`.
1282
+ // Under every other quality mode the paths that reach here
1283
+ // touched Tier 2 at least once (propose_structure, nest_decision,
1284
+ // or both), so tier_used stays at the legacy default of 2.
938
1285
  const confBand =
939
- proposal.source === "both"
940
- ? "tier2-and-math"
941
- : proposal.source === "tier2"
942
- ? "tier2-proposed"
943
- : "math-gated";
1286
+ deterministic && proposal.source === "math"
1287
+ ? "deterministic-math"
1288
+ : proposal.source === "both"
1289
+ ? "tier2-and-math"
1290
+ : proposal.source === "tier2"
1291
+ ? "tier2-proposed"
1292
+ : "math-gated";
1293
+ const tierUsed = confBand === "deterministic-math" ? 0 : 2;
944
1294
  const preMetric = computeRoutingCost(wikiRoot).cost;
945
1295
  // Snapshot the files we're about to mutate so we can roll back
946
1296
  // on a metric regression. We ONLY need the old leaf contents
@@ -957,7 +1307,12 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
957
1307
  // is written AFTER applyNest succeeds so decisions.yaml never
958
1308
  // records a rename for an op that ultimately failed.
959
1309
  const originalSlug = proposal.slug;
960
- const resolvedSlug = resolveNestSlug(originalSlug, proposal);
1310
+ const resolvedSlug = resolveNestSlug(
1311
+ originalSlug,
1312
+ proposal,
1313
+ wikiRoot,
1314
+ wikiIndex ? { wikiIndex } : {},
1315
+ );
961
1316
  let result;
962
1317
  try {
963
1318
  result = applyNest(wikiRoot, proposal, resolvedSlug);
@@ -972,6 +1327,7 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
972
1327
  sources: proposal.leaves.map((l) => l.data.id),
973
1328
  similarity: proposal.average_affinity ?? 0,
974
1329
  confidence_band: confBand,
1330
+ tier_used: tierUsed,
975
1331
  decision: "rejected-by-gate",
976
1332
  reason: `applyNest threw: ${err.message}`,
977
1333
  });
@@ -1021,6 +1377,7 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
1021
1377
  sources: proposal.leaves.map((l) => l.data.id),
1022
1378
  similarity: proposal.average_affinity ?? 0,
1023
1379
  confidence_band: confBand,
1380
+ tier_used: tierUsed,
1024
1381
  decision: "rejected-by-metric",
1025
1382
  reason: `metric ${preMetric.toFixed(4)} → ${postMetric.toFixed(4)} exceeds ${policyLabel}`,
1026
1383
  });
@@ -1038,6 +1395,7 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
1038
1395
  sources: proposal.leaves.map((l) => l.data.id),
1039
1396
  similarity: proposal.average_affinity ?? 0,
1040
1397
  confidence_band: confBand,
1398
+ tier_used: tierUsed,
1041
1399
  decision: "slug-renamed",
1042
1400
  reason: `slug "${originalSlug}" collided with existing id; renamed to "${resolvedSlug}"`,
1043
1401
  });
@@ -1071,6 +1429,7 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
1071
1429
  sources: proposal.leaves.map((l) => l.data.id),
1072
1430
  similarity: proposal.average_affinity ?? 0,
1073
1431
  confidence_band: confBand,
1432
+ tier_used: tierUsed,
1074
1433
  decision: "applied",
1075
1434
  reason:
1076
1435
  `slug=${proposal.slug}, ` +
@@ -1080,6 +1439,12 @@ async function tryClusterNestIteration(wikiRoot, ctx) {
1080
1439
  // recursively sub-cluster it in later iterations of the
1081
1440
  // same run.
1082
1441
  nestedParents.add(result.target_dir);
1442
+ // Update the wiki-wide forbidden index with the new slug so later
1443
+ // picks in this same iteration see the directory as occupied and
1444
+ // auto-suffix against it. Only the slug needs adding — member leaf
1445
+ // ids were already in the index (NEST moves files but preserves
1446
+ // ids) and the applier doesn't delete anything.
1447
+ if (wikiIndex) wikiIndex.add(resolvedSlug);
1083
1448
  appliedCount++;
1084
1449
  }
1085
1450
 
@@ -1343,6 +1708,75 @@ export function mathCandidateIsFresh(cand) {
1343
1708
 
1344
1709
  // ── Directory walk helper ────────────────────────────────────────────
1345
1710
 
1711
+ // Collect live entry frontmatter ids from the wiki on a best-effort
1712
+ // basis, excluding any paths the caller names. "Entry" means every
1713
+ // `.md` file we parse — leaves AND the `index.md` at every directory
1714
+ // depth, since the validator treats both as id-bearing entries and a
1715
+ // MERGE alias colliding with either shape is equally a conflict.
1716
+ //
1717
+ // Best-effort: unreadable files and malformed frontmatter are
1718
+ // silently skipped (see the `catch` block in the inner loop). The
1719
+ // direction of that error matters: skipping means the returned Set
1720
+ // is a SUBSET of "every id currently live in the wiki", not a
1721
+ // superset. A MERGE that passes this guard is therefore protected
1722
+ // against collisions with every cleanly-parseable entry in the tree,
1723
+ // but NOT against a collision hidden inside a malformed file —
1724
+ // that's a potential false negative. Malformed entries would also
1725
+ // fail the validator's post-op pass (which is what actually enforces
1726
+ // the invariant at commit time), so the guard's job is to pre-empt
1727
+ // the common case of a healthy wiki hitting a runtime MERGE
1728
+ // collision, not to substitute for validation. Callers that need a
1729
+ // strict complete live-id set should validate the wiki first.
1730
+ //
1731
+ // Used by applyMerge for its pre-apply alias-collision guard (before
1732
+ // MERGE commits new aliases, ensure none of them already exist as
1733
+ // live ids elsewhere in the corpus).
1734
+ //
1735
+ // Skips every dot-directory as a blanket rule — matches the discipline
1736
+ // in `scripts/lib/chunk.mjs::collectEntryPaths` and other wiki-tree
1737
+ // walks in the pipeline. This covers skill internals (`.llmwiki/`,
1738
+ // `.work/`, `.shape/`) as well as arbitrary user dot-directories the
1739
+ // corpus might carry (`.git/`, `.github/`, `.cache/`, etc). Without
1740
+ // this, ids found in unrelated dotfolders would produce false-positive
1741
+ // collision refusals the validator never reports.
1742
+ //
1743
+ // Per-file frontmatter is extracted via `readFrontmatterStreaming` so
1744
+ // this walk reads bounded (≤ `MAX_FRONTMATTER_BYTES`) from each leaf
1745
+ // rather than slurping the full body — matching the pattern used by
1746
+ // other full-tree id walks in the pipeline.
1747
+ export function collectLiveIds(wikiRoot, excludePaths = new Set()) {
1748
+ const liveIds = new Set();
1749
+ const stack = [wikiRoot];
1750
+ while (stack.length > 0) {
1751
+ const dir = stack.pop();
1752
+ let entries;
1753
+ try {
1754
+ entries = readdirSync(dir, { withFileTypes: true });
1755
+ } catch {
1756
+ continue;
1757
+ }
1758
+ for (const entry of entries) {
1759
+ if (entry.name.startsWith(".")) continue;
1760
+ const entryPath = join(dir, entry.name);
1761
+ if (entry.isDirectory()) {
1762
+ stack.push(entryPath);
1763
+ continue;
1764
+ }
1765
+ if (!entry.name.endsWith(".md")) continue;
1766
+ if (excludePaths.has(entryPath)) continue;
1767
+ try {
1768
+ const captured = readFrontmatterStreaming(entryPath);
1769
+ if (captured === null) continue;
1770
+ const { data } = parseFrontmatter(captured.frontmatterText, entryPath);
1771
+ if (data?.id) liveIds.add(data.id);
1772
+ } catch {
1773
+ /* skip unreadable / malformed frontmatter */
1774
+ }
1775
+ }
1776
+ }
1777
+ return liveIds;
1778
+ }
1779
+
1346
1780
  function walkDirs(wikiRoot) {
1347
1781
  const out = [wikiRoot];
1348
1782
  const stack = [wikiRoot];