@ctxr/skill-llm-wiki 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +118 -0
  2. package/README.md +2 -2
  3. package/SKILL.md +7 -0
  4. package/guide/cli.md +6 -4
  5. package/guide/consumers/index.md +106 -0
  6. package/guide/consumers/quickstart.md +96 -0
  7. package/guide/consumers/recipes/ci-gate.md +125 -0
  8. package/guide/consumers/recipes/dated-wiki.md +131 -0
  9. package/guide/consumers/recipes/format-gate.md +126 -0
  10. package/guide/consumers/recipes/post-write-heal.md +125 -0
  11. package/guide/consumers/recipes/skill-absent.md +111 -0
  12. package/guide/consumers/recipes/subject-wiki.md +110 -0
  13. package/guide/consumers/recipes/testing.md +149 -0
  14. package/guide/index.md +9 -0
  15. package/guide/substrate/operators.md +1 -1
  16. package/guide/substrate/tiered-ai.md +6 -5
  17. package/guide/ux/user-intent.md +6 -5
  18. package/package.json +9 -3
  19. package/scripts/cli.mjs +565 -15
  20. package/scripts/lib/balance.mjs +579 -0
  21. package/scripts/lib/cluster-detect.mjs +482 -4
  22. package/scripts/lib/contract.mjs +257 -0
  23. package/scripts/lib/decision-log.mjs +121 -15
  24. package/scripts/lib/heal.mjs +167 -0
  25. package/scripts/lib/init.mjs +210 -0
  26. package/scripts/lib/intent.mjs +370 -4
  27. package/scripts/lib/join-constants.mjs +22 -0
  28. package/scripts/lib/join.mjs +917 -0
  29. package/scripts/lib/json-envelope.mjs +190 -0
  30. package/scripts/lib/nest-applier.mjs +395 -32
  31. package/scripts/lib/operators.mjs +472 -38
  32. package/scripts/lib/orchestrator.mjs +419 -12
  33. package/scripts/lib/root-containment.mjs +351 -0
  34. package/scripts/lib/similarity-cache.mjs +115 -20
  35. package/scripts/lib/similarity.mjs +11 -0
  36. package/scripts/lib/soft-dag.mjs +726 -0
  37. package/scripts/lib/templates.mjs +78 -0
  38. package/scripts/lib/tiered.mjs +42 -18
  39. package/scripts/lib/validate.mjs +22 -0
  40. package/scripts/lib/where.mjs +71 -0
  41. package/scripts/testkit/assert-frontmatter.mjs +171 -0
  42. package/scripts/testkit/cli-run.mjs +95 -0
  43. package/scripts/testkit/make-wiki-fixture.mjs +301 -0
  44. package/scripts/testkit/stub-skill.mjs +107 -0
  45. package/templates/adrs.llmwiki.layout.yaml +33 -0
  46. package/templates/plans.llmwiki.layout.yaml +34 -0
  47. package/templates/regressions.llmwiki.layout.yaml +34 -0
  48. package/templates/reports.llmwiki.layout.yaml +33 -0
  49. package/templates/runbooks.llmwiki.layout.yaml +33 -0
  50. package/templates/sessions.llmwiki.layout.yaml +34 -0
@@ -0,0 +1,579 @@
1
+ // balance.mjs — post-convergence structural rebalance.
2
+ //
3
+ // Runs after the main convergence loop when the caller passed
4
+ // `--fanout-target=N` and/or `--max-depth=D`. Iterates until fixed
5
+ // point (or a maxIterations cap) applying two transform classes:
6
+ //
7
+ // 1. Sub-cluster overfull directories. Any directory whose fan-out
8
+ // exceeds `fanout-target * 1.5` is a candidate: the math
9
+ // cluster-detector carves out one coherent cluster and NEST
10
+ // applies it. The "× 1.5" slack avoids thrashing on directories
11
+ // that are just above target by one or two children — only
12
+ // meaningfully-overfull dirs are touched.
13
+ //
14
+ // 2. Flatten overdeep single-child chains. Any branch that exceeds
15
+ // `max-depth` AND whose terminal segment is a single-child
16
+ // passthrough gets lifted. The collapsed segment is the nearest
17
+ // ancestor index.md whose only routable content is a single
18
+ // subcategory — these add zero routing value and were a
19
+ // frequent offender in early-corpus hand-authored drafts.
20
+ //
21
+ // Every operation is deterministic in the inputs (lex-sorted dir
22
+ // iteration, lex-sorted cluster-member iteration, deterministic slug
23
+ // naming reused from the Phase X.3 deterministic-mode helpers). Two
24
+ // runs on the same tree produce the same output.
25
+ //
26
+ // The caller — orchestrator.mjs — invokes runBalance between the main
27
+ // convergence phase (Phase 4) and the index-regeneration phase
28
+ // (Phase 5). An optional `nestedParents` set can be passed in to
29
+ // opt specific directories out of the sub-cluster pass (balance
30
+ // adds its own newly-created subdirs to the same set across
31
+ // iterations so a freshly-created subdir never gets re-clustered
32
+ // on the next pass). The current orchestrator call site doesn't
33
+ // thread convergence's own nestedParents through — it passes an
34
+ // empty set — because balance targets overfull / overdeep dirs
35
+ // specifically, and convergence leaves exactly those as its
36
+ // residual "we didn't nest this deep enough" surface.
37
+
38
+ import { existsSync, readdirSync, renameSync, rmSync, rmdirSync } from "node:fs";
39
+ import { basename, dirname, join, relative, sep } from "node:path";
40
+ import {
41
+ buildSiblingIdfContext,
42
+ deterministicPurpose,
43
+ detectClusters,
44
+ generateDeterministicSlug,
45
+ } from "./cluster-detect.mjs";
46
+ import { listChildren, rebuildAllIndices } from "./indices.mjs";
47
+ import {
48
+ applyNest,
49
+ buildWikiForbiddenIndex,
50
+ resolveNestSlug,
51
+ } from "./nest-applier.mjs";
52
+
53
+ // Balance-loop convergence cap. The rebalance is expected to terminate
54
+ // in a handful of iterations because each successful operation
55
+ // strictly reduces either |overfull dirs| or |overdeep branches|; the
56
+ // cap is defensive in case pathological inputs (e.g. a sub-clustering
57
+ // that produces a new overfull dir inside itself) trigger ping-pong.
58
+ export const MAX_BALANCE_ITERATIONS = 20;
59
+
60
+ // Fanout trigger: we apply sub-clustering only when a directory's
61
+ // child count EXCEEDS this multiple of the user's target. Bare-equal
62
+ // dirs are left alone so the target is the landing zone, not the
63
+ // rejection threshold.
64
+ export const FANOUT_OVERLOAD_MULTIPLIER = 1.5;
65
+
66
+ // Platform-stable sort key for absolute paths. `relative(wikiRoot, p)`
67
+ // returns OS-native separators — `\\` on Windows, `/` on POSIX — which
68
+ // means raw string comparison across those strings produces different
69
+ // lex orders on different platforms and breaks the phase's
70
+ // byte-reproducibility guarantee. Normalise every `sep` to `/` before
71
+ // comparing so the sort key is identical on ubuntu-latest and
72
+ // windows-latest.
73
+ function posixSortKey(wikiRoot, p) {
74
+ const rel = relative(wikiRoot, p);
75
+ return sep === "/" ? rel : rel.split(sep).join("/");
76
+ }
77
+
78
+ // Compute the depth of each directory reachable from wikiRoot.
79
+ // Depth is the number of path segments between wikiRoot and the
80
+ // directory, so wikiRoot itself is depth 0 and any direct child
81
+ // subdirectory is depth 1. Dot-prefixed directories are skipped on
82
+ // the same blanket rule used elsewhere in the pipeline. Returns a
83
+ // Map<absolutePath, number>.
84
+ //
85
+ // Implementation: directory-only scan via `readdirSync`. Walks every
86
+ // non-dot child directory, regardless of whether it has `index.md`
87
+ // yet — balance runs before Phase 5's `bootstrapIndexStubs`, so
88
+ // category dirs created in Phase 3 draft can exist on disk with
89
+ // leaves but no index.md. Requiring `index.md` here (as an earlier
90
+ // draft did) would silently hide them from the rebalance pass.
91
+ // Dot-prefixed directories are still skipped under the blanket
92
+ // dot-skip rule shared across the pipeline.
93
+ export function computeDepthMap(wikiRoot) {
94
+ const out = new Map();
95
+ out.set(wikiRoot, 0);
96
+ const stack = [[wikiRoot, 0]];
97
+ while (stack.length > 0) {
98
+ const [dir, depth] = stack.pop();
99
+ let entries;
100
+ try {
101
+ entries = readdirSync(dir, { withFileTypes: true });
102
+ } catch {
103
+ continue;
104
+ }
105
+ const subdirs = [];
106
+ for (const e of entries) {
107
+ if (e.name.startsWith(".")) continue;
108
+ if (!e.isDirectory()) continue;
109
+ subdirs.push(join(dir, e.name));
110
+ }
111
+ // Sort lex so traversal order is deterministic.
112
+ subdirs.sort((a, b) => basename(a).localeCompare(basename(b)));
113
+ for (const sub of subdirs) {
114
+ const subDepth = depth + 1;
115
+ out.set(sub, subDepth);
116
+ stack.push([sub, subDepth]);
117
+ }
118
+ }
119
+ return out;
120
+ }
121
+
122
+ // Get the maximum routable depth in the wiki. Small summary helper
123
+ // for diagnostics and unit tests — callers that only need the
124
+ // deepest reachable routable directory depth don't have to inflate
125
+ // a full `computeDepthMap` into userland. Not wired into the
126
+ // orchestrator today; kept on the exported surface so a future
127
+ // audit-trail or `--dry-run` pre-flight can surface "would balance
128
+ // do anything at all?" cheaply.
129
+ export function getMaxDepth(wikiRoot) {
130
+ let max = 0;
131
+ for (const d of computeDepthMap(wikiRoot).values()) {
132
+ if (d > max) max = d;
133
+ }
134
+ return max;
135
+ }
136
+
137
+ // Compute fan-out statistics across every directory in a single
138
+ // traversal. Returns
139
+ //
140
+ // {
141
+ // maxFanout,
142
+ // avgFanout,
143
+ // perDir: Map<dir, number>, // combined leaves+subdirs
144
+ // leafCounts: Map<dir, number>, // leaves only
145
+ // }
146
+ //
147
+ // `perDir` counts the combined leaf + subdir children — the Claude-
148
+ // routing-cost view (an index lists both shapes). `leafCounts` holds
149
+ // leaves only — the movable-fanout view consumed by
150
+ // `detectFanoutOverload`. Both maps are produced in the same
151
+ // `readdirSync` sweep.
152
+ //
153
+ // Subdir traversal is filesystem-based (`readdirSync`, no index.md
154
+ // required). Balance runs after Phase 4 convergence but BEFORE
155
+ // Phase 5's `bootstrapIndexStubs` — category dirs created in Phase 3
156
+ // draft can have leaves without an index.md yet, and convergence's
157
+ // internal stub bootstrapper only fires on the cluster-NEST path
158
+ // (builds with no NEST picks skip it). So balance must walk every
159
+ // non-dot child dir regardless of index.md presence.
160
+ //
161
+ // Leaf counting, though, uses `listChildren`'s routable-leaf
162
+ // discipline (frontmatter parsed, `data.id` required). The
163
+ // sub-cluster pass can only move routable leaves — a `.md` file
164
+ // without valid frontmatter or without an `id` is inert — so
165
+ // counting all `.md` files would inflate the movable-fanout metric
166
+ // and flag dirs as overfull when they're actually un-actionable.
167
+ // `listChildren` is safe to call on an index-less parent dir (the
168
+ // index.md check is only for enumerating subdirs); it returns
169
+ // routable leaves even when the parent itself is pre-bootstrap.
170
+ export function computeFanoutStats(wikiRoot) {
171
+ const perDir = new Map();
172
+ const leafCounts = new Map();
173
+ let total = 0;
174
+ let count = 0;
175
+ let max = 0;
176
+ const stack = [wikiRoot];
177
+ while (stack.length > 0) {
178
+ const dir = stack.pop();
179
+ let entries;
180
+ try {
181
+ entries = readdirSync(dir, { withFileTypes: true });
182
+ } catch {
183
+ continue;
184
+ }
185
+ const subdirs = [];
186
+ for (const e of entries) {
187
+ if (e.name.startsWith(".")) continue;
188
+ if (!e.isDirectory()) continue;
189
+ subdirs.push(join(dir, e.name));
190
+ }
191
+ const { leaves } = listChildren(dir);
192
+ const leafCount = leaves.length;
193
+ const fan = leafCount + subdirs.length;
194
+ perDir.set(dir, fan);
195
+ leafCounts.set(dir, leafCount);
196
+ total += fan;
197
+ count++;
198
+ if (fan > max) max = fan;
199
+ for (const sub of subdirs) stack.push(sub);
200
+ }
201
+ const avg = count > 0 ? total / count : 0;
202
+ return { maxFanout: max, avgFanout: avg, perDir, leafCounts };
203
+ }
204
+
205
+ // Detect directories whose MOVABLE fan-out EXCEEDS fanoutTarget ×
206
+ // MULTIPLIER. Movable fan-out is leaf count alone, not leaves+subdirs:
207
+ // the sub-cluster pass can only carve clusters out of leaves (subdirs
208
+ // are structurally cemented by their own indexing) so a dir that is
209
+ // routing-overfull purely because it holds many subcategories is
210
+ // un-actionable here and would otherwise stall the loop at the
211
+ // lex-smallest un-actionable entry. The `computeFanoutStats` helper
212
+ // remains available for diagnostic / audit views that want the full
213
+ // routing-cost metric. Returned in lex order so the balance loop's
214
+ // apply sequence is byte-reproducible. `nestedParents` is an opt-out:
215
+ // directories created by the current balance pass (or by an earlier
216
+ // convergence pass in the same op) are left alone so we don't
217
+ // sub-cluster a freshly-created subcategory on the next iteration —
218
+ // that's the "let the op settle" discipline convergence already uses.
219
+ export function detectFanoutOverload(wikiRoot, fanoutTarget, nestedParents = new Set()) {
220
+ const threshold = fanoutTarget * FANOUT_OVERLOAD_MULTIPLIER;
221
+ // Single traversal: computeFanoutStats walks every dir once via
222
+ // listChildren and returns both leaf counts and combined counts. A
223
+ // previous draft filtered on perDir and then called listChildren
224
+ // again per candidate, doubling the I/O — the new `leafCounts` map
225
+ // keeps everything to one sweep regardless of tree size.
226
+ const { leafCounts } = computeFanoutStats(wikiRoot);
227
+ const dirs = Array.from(leafCounts.keys())
228
+ .filter((d) => !nestedParents.has(d))
229
+ .filter((d) => leafCounts.get(d) > threshold);
230
+ dirs.sort((a, b) => posixSortKey(wikiRoot, a).localeCompare(posixSortKey(wikiRoot, b)));
231
+ return dirs;
232
+ }
233
+
234
+ // Detect branches that exceed maxDepth and whose terminal subdir is a
235
+ // pure single-child passthrough (one subdir, zero leaves). Only
236
+ // single-child passthroughs are candidates because flattening a
237
+ // multi-child subcategory would lose structure; passthroughs, by
238
+ // definition, carry no information the parent doesn't already hold.
239
+ // Returns the absolute paths of the passthrough directories to
240
+ // collapse, in lex order. The caller applies LIFT-chain-style
241
+ // flattening via `applyBalanceFlatten`.
242
+ //
243
+ // Subdir traversal uses readdir directly (no index.md required) so
244
+ // pre-bootstrap category dirs are visible — balance runs before
245
+ // Phase 5's bootstrap stubs. Leaf counting uses `listChildren`'s
246
+ // routable discipline: a passthrough is defined as zero ROUTABLE
247
+ // leaves + one subdir, so a stray `.md` file without valid frontmatter
248
+ // shouldn't disqualify the parent from being a passthrough (it's
249
+ // inert content, not a routable sibling). Aligns the movable-leaf
250
+ // semantics used in computeFanoutStats.
251
+ export function detectDepthOverage(wikiRoot, maxDepth) {
252
+ const depths = computeDepthMap(wikiRoot);
253
+ const candidates = [];
254
+ for (const [dir, depth] of depths) {
255
+ if (depth <= maxDepth) continue;
256
+ let entries;
257
+ try {
258
+ entries = readdirSync(dir, { withFileTypes: true });
259
+ } catch {
260
+ continue;
261
+ }
262
+ let subdirCount = 0;
263
+ for (const e of entries) {
264
+ if (e.name.startsWith(".")) continue;
265
+ if (!e.isDirectory()) continue;
266
+ subdirCount++;
267
+ }
268
+ const { leaves } = listChildren(dir);
269
+ // Single-child passthrough: no routable leaves, exactly one
270
+ // subdir (regardless of whether that subdir has index.md yet).
271
+ if (leaves.length === 0 && subdirCount === 1) {
272
+ candidates.push(dir);
273
+ }
274
+ }
275
+ candidates.sort((a, b) =>
276
+ posixSortKey(wikiRoot, a).localeCompare(posixSortKey(wikiRoot, b)),
277
+ );
278
+ return candidates;
279
+ }
280
+
281
+ // Promote a single-child passthrough's only subdir up one level,
282
+ // replacing the passthrough itself. The passthrough's index.md is
283
+ // removed; the promoted subdir's contents live directly under the
284
+ // passthrough's parent dir.
285
+ //
286
+ // The `parents[]` frontmatter field on every descendant is a POSIX-
287
+ // RELATIVE path to the file's DIRECT parent index.md — in practice
288
+ // either `"index.md"` (for leaves, pointing to their same-dir index)
289
+ // or `"../index.md"` (for subcategory index.md files, pointing to
290
+ // the index in the dir above). Because every file in the promoted
291
+ // subtree moves up by exactly one level TOGETHER — both the file
292
+ // and its direct-parent index.md — the relative path between them
293
+ // is invariant. A leaf at `pass/child/leaf.md` with
294
+ // `parents: ["index.md"]` becomes `child/leaf.md` after flatten, and
295
+ // `"index.md"` still resolves to its direct parent (which also
296
+ // moved). A subcategory index at `pass/child/index.md` with
297
+ // `parents: ["../index.md"]` becomes `child/index.md` post-flatten;
298
+ // its `"../index.md"` semantically shifts from "pass/index.md"
299
+ // (which is being deleted) to "parent/index.md" (the new direct
300
+ // parent), which is exactly the right re-parenting.
301
+ //
302
+ // In other words: no parents[] rewrite is needed. An earlier draft
303
+ // of this function attempted to strip one leading `"../"` from every
304
+ // parents[] entry, but that was wrong — it would rewrite valid
305
+ // `"../index.md"` references on subcategory indices into
306
+ // `"index.md"`, self-pointing. Leaving parents[] alone is both
307
+ // simpler and correct.
308
+ //
309
+ // Returns { promoted, removed } where:
310
+ // promoted — the new absolute path of the promoted subdir.
311
+ // removed — the absolute path that no longer exists (the old
312
+ // passthrough).
313
+ export function applyBalanceFlatten(wikiRoot, passthroughDir) {
314
+ const { subdirs, leaves } = listChildren(passthroughDir);
315
+ if (leaves.length !== 0 || subdirs.length !== 1) {
316
+ throw new Error(
317
+ `balance-flatten: ${relative(wikiRoot, passthroughDir)} is not a single-child passthrough (leaves=${leaves.length}, subdirs=${subdirs.length})`,
318
+ );
319
+ }
320
+ const child = subdirs[0];
321
+ const parent = dirname(passthroughDir);
322
+ const promotedPath = join(parent, basename(child));
323
+ if (existsSync(promotedPath) && promotedPath !== child) {
324
+ throw new Error(
325
+ `balance-flatten: promote target ${relative(wikiRoot, promotedPath)} already exists`,
326
+ );
327
+ }
328
+ // Preflight: verify the passthrough dir contains ONLY the expected
329
+ // entries (the child subdir's basename + optionally `index.md`)
330
+ // BEFORE any filesystem mutation. listChildren enumerates only
331
+ // `.md` leaves and subdirs-containing-index.md, so non-`.md`
332
+ // content (assets/, stray README.txt, subdirs without an index.md)
333
+ // is invisible to the detector even though a later
334
+ // `rmSync(dir, {recursive: true})` would silently delete it.
335
+ //
336
+ // Dot-prefixed entries (`.DS_Store`, editor backups, `.shape/`
337
+ // internals) are deliberately skipped during this stray check —
338
+ // the rest of the pipeline (`listChildren`, `buildWikiForbiddenIndex`,
339
+ // `collectEntryPaths`) all skip them under the same blanket rule.
340
+ // They're non-routable noise, so refusing to flatten because a
341
+ // `.DS_Store` lives in the passthrough would surprise users. Since
342
+ // `rmdirSync` at the end of this function requires an empty
343
+ // directory, dotfile noise is actively removed before the rename
344
+ // (see the dotEntries cleanup pass below the stray check); the
345
+ // final `rmdirSync` would otherwise fail ENOTEMPTY if any dot
346
+ // entry remained.
347
+ //
348
+ // An earlier draft checked this AFTER the rename + index.md drop,
349
+ // which left the wiki partially-mutated (child already promoted,
350
+ // passthrough still present) when refusing — the caller's pre-op
351
+ // snapshot could undo it, but leaving the mutation/refusal ordering
352
+ // correct here makes the function itself atomic-or-untouched.
353
+ // Readdir errors are soft (directory may have been moved by a
354
+ // concurrent process) — re-raise so the orchestrator's snapshot
355
+ // restores.
356
+ const entries = readdirSync(passthroughDir);
357
+ const allowed = new Set([basename(child), "index.md"]);
358
+ const stray = entries.filter((e) => !allowed.has(e) && !e.startsWith("."));
359
+ if (stray.length > 0) {
360
+ throw new Error(
361
+ `balance-flatten: ${relative(wikiRoot, passthroughDir)} holds unexpected ` +
362
+ `non-listChildren content (stray: ${JSON.stringify(stray)}); ` +
363
+ `refusing to flatten to avoid silent data loss`,
364
+ );
365
+ }
366
+ // Clean up dot-prefixed noise BEFORE the rename so that rmdirSync
367
+ // at the end can succeed without recursive. Dotfiles are noise by
368
+ // the pipeline's convention (see the blanket dot-skip rule in
369
+ // collectEntryPaths / listChildren / buildWikiForbiddenIndex), so
370
+ // deleting them here is policy-consistent — we don't want a
371
+ // `.DS_Store` keeping a routable-empty directory alive.
372
+ const dotEntries = entries.filter((e) => e.startsWith("."));
373
+ for (const name of dotEntries) {
374
+ rmSync(join(passthroughDir, name), { recursive: true, force: true });
375
+ }
376
+ // Atomically move the child into its grandparent's directory, then
377
+ // drop the now-empty passthrough + its index.md.
378
+ renameSync(child, promotedPath);
379
+ const passIdx = join(passthroughDir, "index.md");
380
+ if (existsSync(passIdx)) rmSync(passIdx, { force: true });
381
+ // rmdirSync refuses non-empty dirs natively (ENOTEMPTY), so any
382
+ // unexpected mid-flight insertion between the preflight and here
383
+ // (e.g., a concurrent writer dropping a file into the passthrough)
384
+ // still fails loud rather than silently recursive-deleting.
385
+ rmdirSync(passthroughDir);
386
+ return { promoted: promotedPath, removed: passthroughDir };
387
+ }
388
+
389
+ // Run the balance phase to fixed point. Returns
390
+ //
391
+ // {
392
+ // iterations,
393
+ // applied: [{ iteration, operator, sources, describe }, ...],
394
+ // nestedParents: Set<absolutePath>, // augmented
395
+ // converged: boolean,
396
+ // }
397
+ //
398
+ // Contract with the caller (orchestrator.mjs):
399
+ // - `fanoutTarget` / `maxDepth` are the parsed flag values (already
400
+ // validated at intent time). Either or both may be null — if
401
+ // neither is set, runBalance is a no-op and returns
402
+ // `{ iterations: 0, applied: [], nestedParents, converged: true }`.
403
+ // - `nestedParents` is an optional opt-out set. Any directory in it
404
+ // is skipped by the fanout pass — useful for a caller that wants
405
+ // to protect newly-created subdirs from being immediately
406
+ // re-carved. The current orchestrator does NOT plumb convergence's
407
+ // internal `nestedParents` set through (runConvergence doesn't
408
+ // export it), so in practice balance starts with a fresh empty
409
+ // Set and augments it in-place as it creates new subdirs across
410
+ // its own iterations. Returned in the result so tests / future
411
+ // callers can observe what balance added. Pipe-through-from-
412
+ // convergence is a possible future enhancement, hence the shape.
413
+ // - `commitBetweenIterations({iteration, operator, summary})` is
414
+ // the same callback runConvergence uses; orchestrator wires it
415
+ // to the private-git commit machinery.
416
+ export async function runBalance(wikiRoot, ctx = {}) {
417
+ const {
418
+ fanoutTarget = null,
419
+ maxDepth = null,
420
+ opId,
421
+ qualityMode = "tiered-fast",
422
+ nestedParents = new Set(),
423
+ commitBetweenIterations = async () => {},
424
+ } = ctx;
425
+
426
+ if (fanoutTarget == null && maxDepth == null) {
427
+ return { iterations: 0, applied: [], nestedParents, converged: true };
428
+ }
429
+
430
+ const applied = [];
431
+ let iteration = 0;
432
+ let reachedFixedPoint = false;
433
+ // Build the wiki-wide forbidden-id index ONLY when the fanout pass
434
+ // could actually fire (fanoutTarget set). `--max-depth`-only runs
435
+ // never call resolveNestSlug, so walking the whole tree to build
436
+ // the forbidden-id set on their behalf is wasted I/O — significant
437
+ // on large hand-authored corpora. The index is mutated after each
438
+ // successful BALANCE_SUBCLUSTER (add the new subdir's slug),
439
+ // mirroring the reuse pattern in
440
+ // `operators.mjs::tryClusterNestIteration`.
441
+ //
442
+ // BALANCE_FLATTEN doesn't mutate the index: a flattened passthrough's
443
+ // basename stays in the set (stale, conservative — may trigger a
444
+ // `-group-N` fallback on a future attempt using that exact basename
445
+ // as a slug, which is safe because renaming-into-a-now-free-slot is
446
+ // cheaper to re-do than walking the full wiki again per iteration).
447
+ const wikiIndex = fanoutTarget != null
448
+ ? buildWikiForbiddenIndex(wikiRoot)
449
+ : null;
450
+ while (iteration < MAX_BALANCE_ITERATIONS) {
451
+ iteration++;
452
+ let didWork = false;
453
+
454
+ // Depth pass first — flattening a branch is a reducing operation
455
+ // that never creates a new overfull dir, so running it before the
456
+ // fanout pass keeps the per-iteration working set shrinking
457
+ // monotonically.
458
+ if (maxDepth != null) {
459
+ const overdeep = detectDepthOverage(wikiRoot, maxDepth);
460
+ if (overdeep.length > 0) {
461
+ const chosen = overdeep[0]; // lex-smallest, for determinism
462
+ const result = applyBalanceFlatten(wikiRoot, chosen);
463
+ rebuildAllIndices(wikiRoot);
464
+ applied.push({
465
+ iteration,
466
+ operator: "BALANCE_FLATTEN",
467
+ sources: [chosen],
468
+ describe:
469
+ `flattened passthrough ${relative(wikiRoot, chosen)} ` +
470
+ `(promoted ${relative(wikiRoot, result.promoted)})`,
471
+ });
472
+ await commitBetweenIterations({
473
+ iteration,
474
+ operator: "BALANCE_FLATTEN",
475
+ summary:
476
+ `balance: flattened ${relative(wikiRoot, chosen)} → ${relative(wikiRoot, result.promoted)}`,
477
+ });
478
+ didWork = true;
479
+ continue; // re-evaluate from scratch
480
+ }
481
+ }
482
+
483
+ // Fanout pass. Walk the lex-sorted overfull list until we find a
484
+ // parent whose leaves yield at least one live math cluster. Any
485
+ // earlier candidate that yields no live proposal (detectClusters
486
+ // returns `[]` for `leaves.length < MIN_CLUSTER_SIZE`, or only
487
+ // empty-partition markers when no threshold produces an acceptable
488
+ // partition) is recorded and skipped for the rest of this
489
+ // iteration — previous drafts applied only `overfull[0]` and
490
+ // declared convergence when that one dir was un-actionable, even
491
+ // though later dirs in the list could still be carved. Suppress
492
+ // unused-var warnings on qualityMode/opId (kept for a future
493
+ // per-mode claude-first re-enabled Tier 2 naming pass).
494
+ void qualityMode;
495
+ void opId;
496
+ if (fanoutTarget != null) {
497
+ const overfull = detectFanoutOverload(wikiRoot, fanoutTarget, nestedParents);
498
+ for (const parentDir of overfull) {
499
+ const { leaves } = listChildren(parentDir);
500
+ // Reuse the cluster detector + deterministic naming helpers
501
+ // from Phase X.3. Math-mode only — balance never escalates to
502
+ // Tier 2 even when the active quality mode is tiered-fast,
503
+ // because this phase's contract is "algorithmic rebalance",
504
+ // not "ask a model to restructure".
505
+ // `returnEmptyMarker: false` makes detectClusters return []
506
+ // on failure (rather than a single `{ empty_partition: true }`
507
+ // marker proposal). That's the mode balance wants: the
508
+ // enforcement phase has no Tier 2 to escalate to, so an empty
509
+ // partition means "skip this dir and try the next overfull
510
+ // candidate" — a plain length check on the proposals array
511
+ // captures that directly, no `empty_partition` filter needed.
512
+ const proposals = await detectClusters(wikiRoot, leaves, {
513
+ returnEmptyMarker: false,
514
+ });
515
+ if (proposals.length === 0) continue; // try the next overfull dir
516
+ // Take the strongest (highest average_affinity) proposal.
517
+ proposals.sort((a, b) => (b.average_affinity ?? 0) - (a.average_affinity ?? 0));
518
+ const chosen = proposals[0];
519
+ const deterministicIdf = buildSiblingIdfContext(leaves);
520
+ const slug = generateDeterministicSlug(chosen.leaves, leaves, {
521
+ precomputedIdf: deterministicIdf,
522
+ });
523
+ const purpose = deterministicPurpose(chosen.leaves);
524
+ chosen.parent_dir = parentDir;
525
+ chosen.source = "math";
526
+ chosen.slug = slug;
527
+ chosen.purpose = purpose;
528
+ const resolvedSlug = resolveNestSlug(slug, chosen, wikiRoot, {
529
+ wikiIndex,
530
+ });
531
+ // Let applyNest's errors propagate up to the orchestrator's
532
+ // pre-op snapshot rollback. applyNest performs several
533
+ // non-atomic operations (mkdir, move-per-leaf, stub write)
534
+ // after the cheap pre-checks, so a mid-apply failure leaves
535
+ // a partially-mutated wiki. Swallowing the error here and
536
+ // continuing the loop would commit that partial state; the
537
+ // orchestrator's catch block restores the pre-op snapshot
538
+ // cleanly.
539
+ const result = applyNest(wikiRoot, chosen, resolvedSlug);
540
+ rebuildAllIndices(wikiRoot);
541
+ nestedParents.add(result.target_dir);
542
+ // Mutate the pre-built wiki-forbidden index so the next
543
+ // resolveNestSlug call in this run sees the new subdir as
544
+ // occupied — skips the full-tree rebuild the nest-applier
545
+ // mutation contract expects.
546
+ wikiIndex.add(resolvedSlug);
547
+ applied.push({
548
+ iteration,
549
+ operator: "BALANCE_SUBCLUSTER",
550
+ sources: chosen.leaves.map((l) => l.path),
551
+ describe:
552
+ `sub-clustered ${chosen.leaves.length} leaves from ` +
553
+ `${relative(wikiRoot, parentDir)} → ${relative(wikiRoot, result.target_dir)} ` +
554
+ `(avg_affinity=${(chosen.average_affinity ?? 0).toFixed(3)}, ` +
555
+ `source=deterministic-math)`,
556
+ });
557
+ await commitBetweenIterations({
558
+ iteration,
559
+ operator: "BALANCE_SUBCLUSTER",
560
+ summary:
561
+ `balance: sub-clustered ${chosen.leaves.length} leaves into ${relative(wikiRoot, result.target_dir)}`,
562
+ });
563
+ didWork = true;
564
+ break; // one apply per iteration — reassess on the next pass
565
+ }
566
+ if (didWork) continue;
567
+ }
568
+
569
+ if (!didWork) {
570
+ // Fixed point: one full pass with neither phase finding work.
571
+ // This is the *only* clean-exit signal — an iteration cap hit is
572
+ // a non-convergence failure regardless of how many ops fired.
573
+ reachedFixedPoint = true;
574
+ break;
575
+ }
576
+ }
577
+
578
+ return { iterations: iteration, applied, nestedParents, converged: reachedFixedPoint };
579
+ }