@ctxr/skill-llm-wiki 1.0.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +128 -0
- package/README.md +11 -8
- package/SKILL.md +11 -11
- package/guide/cli.md +3 -2
- package/guide/correctness/safety.md +2 -2
- package/guide/layout/in-place-mode.md +1 -1
- package/guide/substrate/operators.md +1 -1
- package/guide/substrate/tiered-ai.md +6 -5
- package/guide/ux/user-intent.md +1 -1
- package/package.json +13 -4
- package/scripts/cli.mjs +92 -2
- package/scripts/lib/balance.mjs +579 -0
- package/scripts/lib/cluster-detect.mjs +482 -4
- package/scripts/lib/contract.mjs +53 -4
- package/scripts/lib/decision-log.mjs +121 -15
- package/scripts/lib/draft.mjs +127 -20
- package/scripts/lib/frontmatter.mjs +45 -9
- package/scripts/lib/heal.mjs +5 -0
- package/scripts/lib/intent.mjs +370 -4
- package/scripts/lib/join-constants.mjs +22 -0
- package/scripts/lib/join.mjs +917 -0
- package/scripts/lib/nest-applier.mjs +395 -32
- package/scripts/lib/operators.mjs +472 -38
- package/scripts/lib/orchestrator.mjs +419 -12
- package/scripts/lib/root-containment.mjs +351 -0
- package/scripts/lib/similarity-cache.mjs +115 -20
- package/scripts/lib/similarity.mjs +11 -0
- package/scripts/lib/soft-dag.mjs +726 -0
- package/scripts/lib/tier2-protocol.mjs +169 -37
- package/scripts/lib/tiered.mjs +42 -18
- package/scripts/lib/validate.mjs +22 -0
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
// balance.mjs — post-convergence structural rebalance.
|
|
2
|
+
//
|
|
3
|
+
// Runs after the main convergence loop when the caller passed
|
|
4
|
+
// `--fanout-target=N` and/or `--max-depth=D`. Iterates until fixed
|
|
5
|
+
// point (or a maxIterations cap) applying two transform classes:
|
|
6
|
+
//
|
|
7
|
+
// 1. Sub-cluster overfull directories. Any directory whose fan-out
|
|
8
|
+
// exceeds `fanout-target * 1.5` is a candidate: the math
|
|
9
|
+
// cluster-detector carves out one coherent cluster and NEST
|
|
10
|
+
// applies it. The "× 1.5" slack avoids thrashing on directories
|
|
11
|
+
// that are just above target by one or two children — only
|
|
12
|
+
// meaningfully-overfull dirs are touched.
|
|
13
|
+
//
|
|
14
|
+
// 2. Flatten overdeep single-child chains. Any branch that exceeds
|
|
15
|
+
// `max-depth` AND whose terminal segment is a single-child
|
|
16
|
+
// passthrough gets lifted. The collapsed segment is the nearest
|
|
17
|
+
// ancestor index.md whose only routable content is a single
|
|
18
|
+
// subcategory — these add zero routing value and were a
|
|
19
|
+
// frequent offender in early-corpus hand-authored drafts.
|
|
20
|
+
//
|
|
21
|
+
// Every operation is deterministic in the inputs (lex-sorted dir
|
|
22
|
+
// iteration, lex-sorted cluster-member iteration, deterministic slug
|
|
23
|
+
// naming reused from the Phase X.3 deterministic-mode helpers). Two
|
|
24
|
+
// runs on the same tree produce the same output.
|
|
25
|
+
//
|
|
26
|
+
// The caller — orchestrator.mjs — invokes runBalance between the main
|
|
27
|
+
// convergence phase (Phase 4) and the index-regeneration phase
|
|
28
|
+
// (Phase 5). An optional `nestedParents` set can be passed in to
|
|
29
|
+
// opt specific directories out of the sub-cluster pass (balance
|
|
30
|
+
// adds its own newly-created subdirs to the same set across
|
|
31
|
+
// iterations so a freshly-created subdir never gets re-clustered
|
|
32
|
+
// on the next pass). The current orchestrator call site doesn't
|
|
33
|
+
// thread convergence's own nestedParents through — it passes an
|
|
34
|
+
// empty set — because balance targets overfull / overdeep dirs
|
|
35
|
+
// specifically, and convergence leaves exactly those as its
|
|
36
|
+
// residual "we didn't nest this deep enough" surface.
|
|
37
|
+
|
|
38
|
+
import { existsSync, readdirSync, renameSync, rmSync, rmdirSync } from "node:fs";
|
|
39
|
+
import { basename, dirname, join, relative, sep } from "node:path";
|
|
40
|
+
import {
|
|
41
|
+
buildSiblingIdfContext,
|
|
42
|
+
deterministicPurpose,
|
|
43
|
+
detectClusters,
|
|
44
|
+
generateDeterministicSlug,
|
|
45
|
+
} from "./cluster-detect.mjs";
|
|
46
|
+
import { listChildren, rebuildAllIndices } from "./indices.mjs";
|
|
47
|
+
import {
|
|
48
|
+
applyNest,
|
|
49
|
+
buildWikiForbiddenIndex,
|
|
50
|
+
resolveNestSlug,
|
|
51
|
+
} from "./nest-applier.mjs";
|
|
52
|
+
|
|
53
|
+
// Balance-loop convergence cap. The rebalance is expected to terminate
|
|
54
|
+
// in a handful of iterations because each successful operation
|
|
55
|
+
// strictly reduces either |overfull dirs| or |overdeep branches|; the
|
|
56
|
+
// cap is defensive in case pathological inputs (e.g. a sub-clustering
|
|
57
|
+
// that produces a new overfull dir inside itself) trigger ping-pong.
|
|
58
|
+
export const MAX_BALANCE_ITERATIONS = 20;
|
|
59
|
+
|
|
60
|
+
// Fanout trigger: we apply sub-clustering only when a directory's
|
|
61
|
+
// child count EXCEEDS this multiple of the user's target. Bare-equal
|
|
62
|
+
// dirs are left alone so the target is the landing zone, not the
|
|
63
|
+
// rejection threshold.
|
|
64
|
+
export const FANOUT_OVERLOAD_MULTIPLIER = 1.5;
|
|
65
|
+
|
|
66
|
+
// Platform-stable sort key for absolute paths. `relative(wikiRoot, p)`
|
|
67
|
+
// returns OS-native separators — `\\` on Windows, `/` on POSIX — which
|
|
68
|
+
// means raw string comparison across those strings produces different
|
|
69
|
+
// lex orders on different platforms and breaks the phase's
|
|
70
|
+
// byte-reproducibility guarantee. Normalise every `sep` to `/` before
|
|
71
|
+
// comparing so the sort key is identical on ubuntu-latest and
|
|
72
|
+
// windows-latest.
|
|
73
|
+
function posixSortKey(wikiRoot, p) {
|
|
74
|
+
const rel = relative(wikiRoot, p);
|
|
75
|
+
return sep === "/" ? rel : rel.split(sep).join("/");
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Compute the depth of each directory reachable from wikiRoot.
|
|
79
|
+
// Depth is the number of path segments between wikiRoot and the
|
|
80
|
+
// directory, so wikiRoot itself is depth 0 and any direct child
|
|
81
|
+
// subdirectory is depth 1. Dot-prefixed directories are skipped on
|
|
82
|
+
// the same blanket rule used elsewhere in the pipeline. Returns a
|
|
83
|
+
// Map<absolutePath, number>.
|
|
84
|
+
//
|
|
85
|
+
// Implementation: directory-only scan via `readdirSync`. Walks every
|
|
86
|
+
// non-dot child directory, regardless of whether it has `index.md`
|
|
87
|
+
// yet — balance runs before Phase 5's `bootstrapIndexStubs`, so
|
|
88
|
+
// category dirs created in Phase 3 draft can exist on disk with
|
|
89
|
+
// leaves but no index.md. Requiring `index.md` here (as an earlier
|
|
90
|
+
// draft did) would silently hide them from the rebalance pass.
|
|
91
|
+
// Dot-prefixed directories are still skipped under the blanket
|
|
92
|
+
// dot-skip rule shared across the pipeline.
|
|
93
|
+
export function computeDepthMap(wikiRoot) {
|
|
94
|
+
const out = new Map();
|
|
95
|
+
out.set(wikiRoot, 0);
|
|
96
|
+
const stack = [[wikiRoot, 0]];
|
|
97
|
+
while (stack.length > 0) {
|
|
98
|
+
const [dir, depth] = stack.pop();
|
|
99
|
+
let entries;
|
|
100
|
+
try {
|
|
101
|
+
entries = readdirSync(dir, { withFileTypes: true });
|
|
102
|
+
} catch {
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
const subdirs = [];
|
|
106
|
+
for (const e of entries) {
|
|
107
|
+
if (e.name.startsWith(".")) continue;
|
|
108
|
+
if (!e.isDirectory()) continue;
|
|
109
|
+
subdirs.push(join(dir, e.name));
|
|
110
|
+
}
|
|
111
|
+
// Sort lex so traversal order is deterministic.
|
|
112
|
+
subdirs.sort((a, b) => basename(a).localeCompare(basename(b)));
|
|
113
|
+
for (const sub of subdirs) {
|
|
114
|
+
const subDepth = depth + 1;
|
|
115
|
+
out.set(sub, subDepth);
|
|
116
|
+
stack.push([sub, subDepth]);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return out;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Get the maximum routable depth in the wiki. Small summary helper
|
|
123
|
+
// for diagnostics and unit tests — callers that only need the
|
|
124
|
+
// deepest reachable routable directory depth don't have to inflate
|
|
125
|
+
// a full `computeDepthMap` into userland. Not wired into the
|
|
126
|
+
// orchestrator today; kept on the exported surface so a future
|
|
127
|
+
// audit-trail or `--dry-run` pre-flight can surface "would balance
|
|
128
|
+
// do anything at all?" cheaply.
|
|
129
|
+
export function getMaxDepth(wikiRoot) {
|
|
130
|
+
let max = 0;
|
|
131
|
+
for (const d of computeDepthMap(wikiRoot).values()) {
|
|
132
|
+
if (d > max) max = d;
|
|
133
|
+
}
|
|
134
|
+
return max;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Compute fan-out statistics across every directory in a single
|
|
138
|
+
// traversal. Returns
|
|
139
|
+
//
|
|
140
|
+
// {
|
|
141
|
+
// maxFanout,
|
|
142
|
+
// avgFanout,
|
|
143
|
+
// perDir: Map<dir, number>, // combined leaves+subdirs
|
|
144
|
+
// leafCounts: Map<dir, number>, // leaves only
|
|
145
|
+
// }
|
|
146
|
+
//
|
|
147
|
+
// `perDir` counts the combined leaf + subdir children — the Claude-
|
|
148
|
+
// routing-cost view (an index lists both shapes). `leafCounts` holds
|
|
149
|
+
// leaves only — the movable-fanout view consumed by
|
|
150
|
+
// `detectFanoutOverload`. Both maps are produced in the same
|
|
151
|
+
// `readdirSync` sweep.
|
|
152
|
+
//
|
|
153
|
+
// Subdir traversal is filesystem-based (`readdirSync`, no index.md
|
|
154
|
+
// required). Balance runs after Phase 4 convergence but BEFORE
|
|
155
|
+
// Phase 5's `bootstrapIndexStubs` — category dirs created in Phase 3
|
|
156
|
+
// draft can have leaves without an index.md yet, and convergence's
|
|
157
|
+
// internal stub bootstrapper only fires on the cluster-NEST path
|
|
158
|
+
// (builds with no NEST picks skip it). So balance must walk every
|
|
159
|
+
// non-dot child dir regardless of index.md presence.
|
|
160
|
+
//
|
|
161
|
+
// Leaf counting, though, uses `listChildren`'s routable-leaf
|
|
162
|
+
// discipline (frontmatter parsed, `data.id` required). The
|
|
163
|
+
// sub-cluster pass can only move routable leaves — a `.md` file
|
|
164
|
+
// without valid frontmatter or without an `id` is inert — so
|
|
165
|
+
// counting all `.md` files would inflate the movable-fanout metric
|
|
166
|
+
// and flag dirs as overfull when they're actually un-actionable.
|
|
167
|
+
// `listChildren` is safe to call on an index-less parent dir (the
|
|
168
|
+
// index.md check is only for enumerating subdirs); it returns
|
|
169
|
+
// routable leaves even when the parent itself is pre-bootstrap.
|
|
170
|
+
export function computeFanoutStats(wikiRoot) {
|
|
171
|
+
const perDir = new Map();
|
|
172
|
+
const leafCounts = new Map();
|
|
173
|
+
let total = 0;
|
|
174
|
+
let count = 0;
|
|
175
|
+
let max = 0;
|
|
176
|
+
const stack = [wikiRoot];
|
|
177
|
+
while (stack.length > 0) {
|
|
178
|
+
const dir = stack.pop();
|
|
179
|
+
let entries;
|
|
180
|
+
try {
|
|
181
|
+
entries = readdirSync(dir, { withFileTypes: true });
|
|
182
|
+
} catch {
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
const subdirs = [];
|
|
186
|
+
for (const e of entries) {
|
|
187
|
+
if (e.name.startsWith(".")) continue;
|
|
188
|
+
if (!e.isDirectory()) continue;
|
|
189
|
+
subdirs.push(join(dir, e.name));
|
|
190
|
+
}
|
|
191
|
+
const { leaves } = listChildren(dir);
|
|
192
|
+
const leafCount = leaves.length;
|
|
193
|
+
const fan = leafCount + subdirs.length;
|
|
194
|
+
perDir.set(dir, fan);
|
|
195
|
+
leafCounts.set(dir, leafCount);
|
|
196
|
+
total += fan;
|
|
197
|
+
count++;
|
|
198
|
+
if (fan > max) max = fan;
|
|
199
|
+
for (const sub of subdirs) stack.push(sub);
|
|
200
|
+
}
|
|
201
|
+
const avg = count > 0 ? total / count : 0;
|
|
202
|
+
return { maxFanout: max, avgFanout: avg, perDir, leafCounts };
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Detect directories whose MOVABLE fan-out EXCEEDS fanoutTarget ×
|
|
206
|
+
// MULTIPLIER. Movable fan-out is leaf count alone, not leaves+subdirs:
|
|
207
|
+
// the sub-cluster pass can only carve clusters out of leaves (subdirs
|
|
208
|
+
// are structurally cemented by their own indexing) so a dir that is
|
|
209
|
+
// routing-overfull purely because it holds many subcategories is
|
|
210
|
+
// un-actionable here and would otherwise stall the loop at the
|
|
211
|
+
// lex-smallest un-actionable entry. The `computeFanoutStats` helper
|
|
212
|
+
// remains available for diagnostic / audit views that want the full
|
|
213
|
+
// routing-cost metric. Returned in lex order so the balance loop's
|
|
214
|
+
// apply sequence is byte-reproducible. `nestedParents` is an opt-out:
|
|
215
|
+
// directories created by the current balance pass (or by an earlier
|
|
216
|
+
// convergence pass in the same op) are left alone so we don't
|
|
217
|
+
// sub-cluster a freshly-created subcategory on the next iteration —
|
|
218
|
+
// that's the "let the op settle" discipline convergence already uses.
|
|
219
|
+
export function detectFanoutOverload(wikiRoot, fanoutTarget, nestedParents = new Set()) {
|
|
220
|
+
const threshold = fanoutTarget * FANOUT_OVERLOAD_MULTIPLIER;
|
|
221
|
+
// Single traversal: computeFanoutStats walks every dir once via
|
|
222
|
+
// listChildren and returns both leaf counts and combined counts. A
|
|
223
|
+
// previous draft filtered on perDir and then called listChildren
|
|
224
|
+
// again per candidate, doubling the I/O — the new `leafCounts` map
|
|
225
|
+
// keeps everything to one sweep regardless of tree size.
|
|
226
|
+
const { leafCounts } = computeFanoutStats(wikiRoot);
|
|
227
|
+
const dirs = Array.from(leafCounts.keys())
|
|
228
|
+
.filter((d) => !nestedParents.has(d))
|
|
229
|
+
.filter((d) => leafCounts.get(d) > threshold);
|
|
230
|
+
dirs.sort((a, b) => posixSortKey(wikiRoot, a).localeCompare(posixSortKey(wikiRoot, b)));
|
|
231
|
+
return dirs;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Detect branches that exceed maxDepth and whose terminal subdir is a
|
|
235
|
+
// pure single-child passthrough (one subdir, zero leaves). Only
|
|
236
|
+
// single-child passthroughs are candidates because flattening a
|
|
237
|
+
// multi-child subcategory would lose structure; passthroughs, by
|
|
238
|
+
// definition, carry no information the parent doesn't already hold.
|
|
239
|
+
// Returns the absolute paths of the passthrough directories to
|
|
240
|
+
// collapse, in lex order. The caller applies LIFT-chain-style
|
|
241
|
+
// flattening via `applyBalanceFlatten`.
|
|
242
|
+
//
|
|
243
|
+
// Subdir traversal uses readdir directly (no index.md required) so
|
|
244
|
+
// pre-bootstrap category dirs are visible — balance runs before
|
|
245
|
+
// Phase 5's bootstrap stubs. Leaf counting uses `listChildren`'s
|
|
246
|
+
// routable discipline: a passthrough is defined as zero ROUTABLE
|
|
247
|
+
// leaves + one subdir, so a stray `.md` file without valid frontmatter
|
|
248
|
+
// shouldn't disqualify the parent from being a passthrough (it's
|
|
249
|
+
// inert content, not a routable sibling). Aligns the movable-leaf
|
|
250
|
+
// semantics used in computeFanoutStats.
|
|
251
|
+
export function detectDepthOverage(wikiRoot, maxDepth) {
|
|
252
|
+
const depths = computeDepthMap(wikiRoot);
|
|
253
|
+
const candidates = [];
|
|
254
|
+
for (const [dir, depth] of depths) {
|
|
255
|
+
if (depth <= maxDepth) continue;
|
|
256
|
+
let entries;
|
|
257
|
+
try {
|
|
258
|
+
entries = readdirSync(dir, { withFileTypes: true });
|
|
259
|
+
} catch {
|
|
260
|
+
continue;
|
|
261
|
+
}
|
|
262
|
+
let subdirCount = 0;
|
|
263
|
+
for (const e of entries) {
|
|
264
|
+
if (e.name.startsWith(".")) continue;
|
|
265
|
+
if (!e.isDirectory()) continue;
|
|
266
|
+
subdirCount++;
|
|
267
|
+
}
|
|
268
|
+
const { leaves } = listChildren(dir);
|
|
269
|
+
// Single-child passthrough: no routable leaves, exactly one
|
|
270
|
+
// subdir (regardless of whether that subdir has index.md yet).
|
|
271
|
+
if (leaves.length === 0 && subdirCount === 1) {
|
|
272
|
+
candidates.push(dir);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
candidates.sort((a, b) =>
|
|
276
|
+
posixSortKey(wikiRoot, a).localeCompare(posixSortKey(wikiRoot, b)),
|
|
277
|
+
);
|
|
278
|
+
return candidates;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Promote a single-child passthrough's only subdir up one level,
|
|
282
|
+
// replacing the passthrough itself. The passthrough's index.md is
|
|
283
|
+
// removed; the promoted subdir's contents live directly under the
|
|
284
|
+
// passthrough's parent dir.
|
|
285
|
+
//
|
|
286
|
+
// The `parents[]` frontmatter field on every descendant is a POSIX-
|
|
287
|
+
// RELATIVE path to the file's DIRECT parent index.md — in practice
|
|
288
|
+
// either `"index.md"` (for leaves, pointing to their same-dir index)
|
|
289
|
+
// or `"../index.md"` (for subcategory index.md files, pointing to
|
|
290
|
+
// the index in the dir above). Because every file in the promoted
|
|
291
|
+
// subtree moves up by exactly one level TOGETHER — both the file
|
|
292
|
+
// and its direct-parent index.md — the relative path between them
|
|
293
|
+
// is invariant. A leaf at `pass/child/leaf.md` with
|
|
294
|
+
// `parents: ["index.md"]` becomes `child/leaf.md` after flatten, and
|
|
295
|
+
// `"index.md"` still resolves to its direct parent (which also
|
|
296
|
+
// moved). A subcategory index at `pass/child/index.md` with
|
|
297
|
+
// `parents: ["../index.md"]` becomes `child/index.md` post-flatten;
|
|
298
|
+
// its `"../index.md"` semantically shifts from "pass/index.md"
|
|
299
|
+
// (which is being deleted) to "parent/index.md" (the new direct
|
|
300
|
+
// parent), which is exactly the right re-parenting.
|
|
301
|
+
//
|
|
302
|
+
// In other words: no parents[] rewrite is needed. An earlier draft
|
|
303
|
+
// of this function attempted to strip one leading `"../"` from every
|
|
304
|
+
// parents[] entry, but that was wrong — it would rewrite valid
|
|
305
|
+
// `"../index.md"` references on subcategory indices into
|
|
306
|
+
// `"index.md"`, self-pointing. Leaving parents[] alone is both
|
|
307
|
+
// simpler and correct.
|
|
308
|
+
//
|
|
309
|
+
// Returns { promoted, removed } where:
|
|
310
|
+
// promoted — the new absolute path of the promoted subdir.
|
|
311
|
+
// removed — the absolute path that no longer exists (the old
|
|
312
|
+
// passthrough).
|
|
313
|
+
export function applyBalanceFlatten(wikiRoot, passthroughDir) {
|
|
314
|
+
const { subdirs, leaves } = listChildren(passthroughDir);
|
|
315
|
+
if (leaves.length !== 0 || subdirs.length !== 1) {
|
|
316
|
+
throw new Error(
|
|
317
|
+
`balance-flatten: ${relative(wikiRoot, passthroughDir)} is not a single-child passthrough (leaves=${leaves.length}, subdirs=${subdirs.length})`,
|
|
318
|
+
);
|
|
319
|
+
}
|
|
320
|
+
const child = subdirs[0];
|
|
321
|
+
const parent = dirname(passthroughDir);
|
|
322
|
+
const promotedPath = join(parent, basename(child));
|
|
323
|
+
if (existsSync(promotedPath) && promotedPath !== child) {
|
|
324
|
+
throw new Error(
|
|
325
|
+
`balance-flatten: promote target ${relative(wikiRoot, promotedPath)} already exists`,
|
|
326
|
+
);
|
|
327
|
+
}
|
|
328
|
+
// Preflight: verify the passthrough dir contains ONLY the expected
|
|
329
|
+
// entries (the child subdir's basename + optionally `index.md`)
|
|
330
|
+
// BEFORE any filesystem mutation. listChildren enumerates only
|
|
331
|
+
// `.md` leaves and subdirs-containing-index.md, so non-`.md`
|
|
332
|
+
// content (assets/, stray README.txt, subdirs without an index.md)
|
|
333
|
+
// is invisible to the detector even though a later
|
|
334
|
+
// `rmSync(dir, {recursive: true})` would silently delete it.
|
|
335
|
+
//
|
|
336
|
+
// Dot-prefixed entries (`.DS_Store`, editor backups, `.shape/`
|
|
337
|
+
// internals) are deliberately skipped during this stray check —
|
|
338
|
+
// the rest of the pipeline (`listChildren`, `buildWikiForbiddenIndex`,
|
|
339
|
+
// `collectEntryPaths`) all skip them under the same blanket rule.
|
|
340
|
+
// They're non-routable noise, so refusing to flatten because a
|
|
341
|
+
// `.DS_Store` lives in the passthrough would surprise users. Since
|
|
342
|
+
// `rmdirSync` at the end of this function requires an empty
|
|
343
|
+
// directory, dotfile noise is actively removed before the rename
|
|
344
|
+
// (see the dotEntries cleanup pass below the stray check); the
|
|
345
|
+
// final `rmdirSync` would otherwise fail ENOTEMPTY if any dot
|
|
346
|
+
// entry remained.
|
|
347
|
+
//
|
|
348
|
+
// An earlier draft checked this AFTER the rename + index.md drop,
|
|
349
|
+
// which left the wiki partially-mutated (child already promoted,
|
|
350
|
+
// passthrough still present) when refusing — the caller's pre-op
|
|
351
|
+
// snapshot could undo it, but leaving the mutation/refusal ordering
|
|
352
|
+
// correct here makes the function itself atomic-or-untouched.
|
|
353
|
+
// Readdir errors are soft (directory may have been moved by a
|
|
354
|
+
// concurrent process) — re-raise so the orchestrator's snapshot
|
|
355
|
+
// restores.
|
|
356
|
+
const entries = readdirSync(passthroughDir);
|
|
357
|
+
const allowed = new Set([basename(child), "index.md"]);
|
|
358
|
+
const stray = entries.filter((e) => !allowed.has(e) && !e.startsWith("."));
|
|
359
|
+
if (stray.length > 0) {
|
|
360
|
+
throw new Error(
|
|
361
|
+
`balance-flatten: ${relative(wikiRoot, passthroughDir)} holds unexpected ` +
|
|
362
|
+
`non-listChildren content (stray: ${JSON.stringify(stray)}); ` +
|
|
363
|
+
`refusing to flatten to avoid silent data loss`,
|
|
364
|
+
);
|
|
365
|
+
}
|
|
366
|
+
// Clean up dot-prefixed noise BEFORE the rename so that rmdirSync
|
|
367
|
+
// at the end can succeed without recursive. Dotfiles are noise by
|
|
368
|
+
// the pipeline's convention (see the blanket dot-skip rule in
|
|
369
|
+
// collectEntryPaths / listChildren / buildWikiForbiddenIndex), so
|
|
370
|
+
// deleting them here is policy-consistent — we don't want a
|
|
371
|
+
// `.DS_Store` keeping a routable-empty directory alive.
|
|
372
|
+
const dotEntries = entries.filter((e) => e.startsWith("."));
|
|
373
|
+
for (const name of dotEntries) {
|
|
374
|
+
rmSync(join(passthroughDir, name), { recursive: true, force: true });
|
|
375
|
+
}
|
|
376
|
+
// Atomically move the child into its grandparent's directory, then
|
|
377
|
+
// drop the now-empty passthrough + its index.md.
|
|
378
|
+
renameSync(child, promotedPath);
|
|
379
|
+
const passIdx = join(passthroughDir, "index.md");
|
|
380
|
+
if (existsSync(passIdx)) rmSync(passIdx, { force: true });
|
|
381
|
+
// rmdirSync refuses non-empty dirs natively (ENOTEMPTY), so any
|
|
382
|
+
// unexpected mid-flight insertion between the preflight and here
|
|
383
|
+
// (e.g., a concurrent writer dropping a file into the passthrough)
|
|
384
|
+
// still fails loud rather than silently recursive-deleting.
|
|
385
|
+
rmdirSync(passthroughDir);
|
|
386
|
+
return { promoted: promotedPath, removed: passthroughDir };
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// Run the balance phase to fixed point. Returns
|
|
390
|
+
//
|
|
391
|
+
// {
|
|
392
|
+
// iterations,
|
|
393
|
+
// applied: [{ iteration, operator, sources, describe }, ...],
|
|
394
|
+
// nestedParents: Set<absolutePath>, // augmented
|
|
395
|
+
// converged: boolean,
|
|
396
|
+
// }
|
|
397
|
+
//
|
|
398
|
+
// Contract with the caller (orchestrator.mjs):
|
|
399
|
+
// - `fanoutTarget` / `maxDepth` are the parsed flag values (already
|
|
400
|
+
// validated at intent time). Either or both may be null — if
|
|
401
|
+
// neither is set, runBalance is a no-op and returns
|
|
402
|
+
// `{ iterations: 0, applied: [], nestedParents, converged: true }`.
|
|
403
|
+
// - `nestedParents` is an optional opt-out set. Any directory in it
|
|
404
|
+
// is skipped by the fanout pass — useful for a caller that wants
|
|
405
|
+
// to protect newly-created subdirs from being immediately
|
|
406
|
+
// re-carved. The current orchestrator does NOT plumb convergence's
|
|
407
|
+
// internal `nestedParents` set through (runConvergence doesn't
|
|
408
|
+
// export it), so in practice balance starts with a fresh empty
|
|
409
|
+
// Set and augments it in-place as it creates new subdirs across
|
|
410
|
+
// its own iterations. Returned in the result so tests / future
|
|
411
|
+
// callers can observe what balance added. Pipe-through-from-
|
|
412
|
+
// convergence is a possible future enhancement, hence the shape.
|
|
413
|
+
// - `commitBetweenIterations({iteration, operator, summary})` is
|
|
414
|
+
// the same callback runConvergence uses; orchestrator wires it
|
|
415
|
+
// to the private-git commit machinery.
|
|
416
|
+
export async function runBalance(wikiRoot, ctx = {}) {
|
|
417
|
+
const {
|
|
418
|
+
fanoutTarget = null,
|
|
419
|
+
maxDepth = null,
|
|
420
|
+
opId,
|
|
421
|
+
qualityMode = "tiered-fast",
|
|
422
|
+
nestedParents = new Set(),
|
|
423
|
+
commitBetweenIterations = async () => {},
|
|
424
|
+
} = ctx;
|
|
425
|
+
|
|
426
|
+
if (fanoutTarget == null && maxDepth == null) {
|
|
427
|
+
return { iterations: 0, applied: [], nestedParents, converged: true };
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const applied = [];
|
|
431
|
+
let iteration = 0;
|
|
432
|
+
let reachedFixedPoint = false;
|
|
433
|
+
// Build the wiki-wide forbidden-id index ONLY when the fanout pass
|
|
434
|
+
// could actually fire (fanoutTarget set). `--max-depth`-only runs
|
|
435
|
+
// never call resolveNestSlug, so walking the whole tree to build
|
|
436
|
+
// the forbidden-id set on their behalf is wasted I/O — significant
|
|
437
|
+
// on large hand-authored corpora. The index is mutated after each
|
|
438
|
+
// successful BALANCE_SUBCLUSTER (add the new subdir's slug),
|
|
439
|
+
// mirroring the reuse pattern in
|
|
440
|
+
// `operators.mjs::tryClusterNestIteration`.
|
|
441
|
+
//
|
|
442
|
+
// BALANCE_FLATTEN doesn't mutate the index: a flattened passthrough's
|
|
443
|
+
// basename stays in the set (stale, conservative — may trigger a
|
|
444
|
+
// `-group-N` fallback on a future attempt using that exact basename
|
|
445
|
+
// as a slug, which is safe because renaming-into-a-now-free-slot is
|
|
446
|
+
// cheaper to re-do than walking the full wiki again per iteration).
|
|
447
|
+
const wikiIndex = fanoutTarget != null
|
|
448
|
+
? buildWikiForbiddenIndex(wikiRoot)
|
|
449
|
+
: null;
|
|
450
|
+
while (iteration < MAX_BALANCE_ITERATIONS) {
|
|
451
|
+
iteration++;
|
|
452
|
+
let didWork = false;
|
|
453
|
+
|
|
454
|
+
// Depth pass first — flattening a branch is a reducing operation
|
|
455
|
+
// that never creates a new overfull dir, so running it before the
|
|
456
|
+
// fanout pass keeps the per-iteration working set shrinking
|
|
457
|
+
// monotonically.
|
|
458
|
+
if (maxDepth != null) {
|
|
459
|
+
const overdeep = detectDepthOverage(wikiRoot, maxDepth);
|
|
460
|
+
if (overdeep.length > 0) {
|
|
461
|
+
const chosen = overdeep[0]; // lex-smallest, for determinism
|
|
462
|
+
const result = applyBalanceFlatten(wikiRoot, chosen);
|
|
463
|
+
rebuildAllIndices(wikiRoot);
|
|
464
|
+
applied.push({
|
|
465
|
+
iteration,
|
|
466
|
+
operator: "BALANCE_FLATTEN",
|
|
467
|
+
sources: [chosen],
|
|
468
|
+
describe:
|
|
469
|
+
`flattened passthrough ${relative(wikiRoot, chosen)} ` +
|
|
470
|
+
`(promoted ${relative(wikiRoot, result.promoted)})`,
|
|
471
|
+
});
|
|
472
|
+
await commitBetweenIterations({
|
|
473
|
+
iteration,
|
|
474
|
+
operator: "BALANCE_FLATTEN",
|
|
475
|
+
summary:
|
|
476
|
+
`balance: flattened ${relative(wikiRoot, chosen)} → ${relative(wikiRoot, result.promoted)}`,
|
|
477
|
+
});
|
|
478
|
+
didWork = true;
|
|
479
|
+
continue; // re-evaluate from scratch
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// Fanout pass. Walk the lex-sorted overfull list until we find a
|
|
484
|
+
// parent whose leaves yield at least one live math cluster. Any
|
|
485
|
+
// earlier candidate that yields no live proposal (detectClusters
|
|
486
|
+
// returns `[]` for `leaves.length < MIN_CLUSTER_SIZE`, or only
|
|
487
|
+
// empty-partition markers when no threshold produces an acceptable
|
|
488
|
+
// partition) is recorded and skipped for the rest of this
|
|
489
|
+
// iteration — previous drafts applied only `overfull[0]` and
|
|
490
|
+
// declared convergence when that one dir was un-actionable, even
|
|
491
|
+
// though later dirs in the list could still be carved. Suppress
|
|
492
|
+
// unused-var warnings on qualityMode/opId (kept for a future
|
|
493
|
+
// per-mode claude-first re-enabled Tier 2 naming pass).
|
|
494
|
+
void qualityMode;
|
|
495
|
+
void opId;
|
|
496
|
+
if (fanoutTarget != null) {
|
|
497
|
+
const overfull = detectFanoutOverload(wikiRoot, fanoutTarget, nestedParents);
|
|
498
|
+
for (const parentDir of overfull) {
|
|
499
|
+
const { leaves } = listChildren(parentDir);
|
|
500
|
+
// Reuse the cluster detector + deterministic naming helpers
|
|
501
|
+
// from Phase X.3. Math-mode only — balance never escalates to
|
|
502
|
+
// Tier 2 even when the active quality mode is tiered-fast,
|
|
503
|
+
// because this phase's contract is "algorithmic rebalance",
|
|
504
|
+
// not "ask a model to restructure".
|
|
505
|
+
// `returnEmptyMarker: false` makes detectClusters return []
|
|
506
|
+
// on failure (rather than a single `{ empty_partition: true }`
|
|
507
|
+
// marker proposal). That's the mode balance wants: the
|
|
508
|
+
// enforcement phase has no Tier 2 to escalate to, so an empty
|
|
509
|
+
// partition means "skip this dir and try the next overfull
|
|
510
|
+
// candidate" — a plain length check on the proposals array
|
|
511
|
+
// captures that directly, no `empty_partition` filter needed.
|
|
512
|
+
const proposals = await detectClusters(wikiRoot, leaves, {
|
|
513
|
+
returnEmptyMarker: false,
|
|
514
|
+
});
|
|
515
|
+
if (proposals.length === 0) continue; // try the next overfull dir
|
|
516
|
+
// Take the strongest (highest average_affinity) proposal.
|
|
517
|
+
proposals.sort((a, b) => (b.average_affinity ?? 0) - (a.average_affinity ?? 0));
|
|
518
|
+
const chosen = proposals[0];
|
|
519
|
+
const deterministicIdf = buildSiblingIdfContext(leaves);
|
|
520
|
+
const slug = generateDeterministicSlug(chosen.leaves, leaves, {
|
|
521
|
+
precomputedIdf: deterministicIdf,
|
|
522
|
+
});
|
|
523
|
+
const purpose = deterministicPurpose(chosen.leaves);
|
|
524
|
+
chosen.parent_dir = parentDir;
|
|
525
|
+
chosen.source = "math";
|
|
526
|
+
chosen.slug = slug;
|
|
527
|
+
chosen.purpose = purpose;
|
|
528
|
+
const resolvedSlug = resolveNestSlug(slug, chosen, wikiRoot, {
|
|
529
|
+
wikiIndex,
|
|
530
|
+
});
|
|
531
|
+
// Let applyNest's errors propagate up to the orchestrator's
|
|
532
|
+
// pre-op snapshot rollback. applyNest performs several
|
|
533
|
+
// non-atomic operations (mkdir, move-per-leaf, stub write)
|
|
534
|
+
// after the cheap pre-checks, so a mid-apply failure leaves
|
|
535
|
+
// a partially-mutated wiki. Swallowing the error here and
|
|
536
|
+
// continuing the loop would commit that partial state; the
|
|
537
|
+
// orchestrator's catch block restores the pre-op snapshot
|
|
538
|
+
// cleanly.
|
|
539
|
+
const result = applyNest(wikiRoot, chosen, resolvedSlug);
|
|
540
|
+
rebuildAllIndices(wikiRoot);
|
|
541
|
+
nestedParents.add(result.target_dir);
|
|
542
|
+
// Mutate the pre-built wiki-forbidden index so the next
|
|
543
|
+
// resolveNestSlug call in this run sees the new subdir as
|
|
544
|
+
// occupied — skips the full-tree rebuild the nest-applier
|
|
545
|
+
// mutation contract expects.
|
|
546
|
+
wikiIndex.add(resolvedSlug);
|
|
547
|
+
applied.push({
|
|
548
|
+
iteration,
|
|
549
|
+
operator: "BALANCE_SUBCLUSTER",
|
|
550
|
+
sources: chosen.leaves.map((l) => l.path),
|
|
551
|
+
describe:
|
|
552
|
+
`sub-clustered ${chosen.leaves.length} leaves from ` +
|
|
553
|
+
`${relative(wikiRoot, parentDir)} → ${relative(wikiRoot, result.target_dir)} ` +
|
|
554
|
+
`(avg_affinity=${(chosen.average_affinity ?? 0).toFixed(3)}, ` +
|
|
555
|
+
`source=deterministic-math)`,
|
|
556
|
+
});
|
|
557
|
+
await commitBetweenIterations({
|
|
558
|
+
iteration,
|
|
559
|
+
operator: "BALANCE_SUBCLUSTER",
|
|
560
|
+
summary:
|
|
561
|
+
`balance: sub-clustered ${chosen.leaves.length} leaves into ${relative(wikiRoot, result.target_dir)}`,
|
|
562
|
+
});
|
|
563
|
+
didWork = true;
|
|
564
|
+
break; // one apply per iteration — reassess on the next pass
|
|
565
|
+
}
|
|
566
|
+
if (didWork) continue;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
if (!didWork) {
|
|
570
|
+
// Fixed point: one full pass with neither phase finding work.
|
|
571
|
+
// This is the *only* clean-exit signal — an iteration cap hit is
|
|
572
|
+
// a non-convergence failure regardless of how many ops fired.
|
|
573
|
+
reachedFixedPoint = true;
|
|
574
|
+
break;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
return { iterations: iteration, applied, nestedParents, converged: reachedFixedPoint };
|
|
579
|
+
}
|