@ctxr/skill-llm-wiki 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +118 -0
- package/README.md +2 -2
- package/guide/cli.md +3 -2
- package/guide/substrate/operators.md +1 -1
- package/guide/substrate/tiered-ai.md +6 -5
- package/guide/ux/user-intent.md +1 -1
- package/package.json +4 -2
- package/scripts/cli.mjs +92 -2
- package/scripts/lib/balance.mjs +579 -0
- package/scripts/lib/cluster-detect.mjs +482 -4
- package/scripts/lib/contract.mjs +31 -3
- package/scripts/lib/decision-log.mjs +121 -15
- package/scripts/lib/heal.mjs +5 -0
- package/scripts/lib/intent.mjs +370 -4
- package/scripts/lib/join-constants.mjs +22 -0
- package/scripts/lib/join.mjs +917 -0
- package/scripts/lib/nest-applier.mjs +395 -32
- package/scripts/lib/operators.mjs +472 -38
- package/scripts/lib/orchestrator.mjs +419 -12
- package/scripts/lib/root-containment.mjs +351 -0
- package/scripts/lib/similarity-cache.mjs +115 -20
- package/scripts/lib/similarity.mjs +11 -0
- package/scripts/lib/soft-dag.mjs +726 -0
- package/scripts/lib/tiered.mjs +42 -18
- package/scripts/lib/validate.mjs +22 -0
|
@@ -33,6 +33,7 @@ import {
|
|
|
33
33
|
writeFileSync,
|
|
34
34
|
} from "node:fs";
|
|
35
35
|
import { basename, dirname, join } from "node:path";
|
|
36
|
+
import { readFrontmatterStreaming } from "./chunk.mjs";
|
|
36
37
|
import { parseFrontmatter, renderFrontmatter } from "./frontmatter.mjs";
|
|
37
38
|
|
|
38
39
|
const SLUG_RE = /^[a-z][a-z0-9-]{0,63}$/;
|
|
@@ -42,19 +43,38 @@ export function validateSlug(slug) {
|
|
|
42
43
|
return SLUG_RE.test(slug);
|
|
43
44
|
}
|
|
44
45
|
|
|
45
|
-
// Resolve a slug that won't collide with
|
|
46
|
-
//
|
|
47
|
-
//
|
|
48
|
-
//
|
|
49
|
-
//
|
|
50
|
-
//
|
|
51
|
-
//
|
|
52
|
-
//
|
|
53
|
-
//
|
|
46
|
+
// Resolve a slug that won't collide with any live id in the wiki. The
|
|
47
|
+
// original observed collision case (v0.4.1 novel-corpus run): Tier 2's
|
|
48
|
+
// propose_structure response picked slug="security" for a cluster whose
|
|
49
|
+
// members included a leaf with id="security", so after apply both the
|
|
50
|
+
// new subcategory's stub index.md AND the moved leaf carried
|
|
51
|
+
// id="security" — DUP-ID at validate time, forcing a full pipeline
|
|
52
|
+
// rollback. A later scenario added cross-depth collisions: a leaf at
|
|
53
|
+
// arch/event-patterns/index.md in one branch made the slug
|
|
54
|
+
// "event-patterns" unsafe for a cluster under design-patterns-group/
|
|
55
|
+
// in a different branch — even though the two are at different depths
|
|
56
|
+
// and not siblings.
|
|
57
|
+
//
|
|
58
|
+
// Pre-resolving here auto-suffixes the slug (deterministically:
|
|
59
|
+
// `-group`, then `-group-N`) until it's non-colliding, letting the
|
|
60
|
+
// NEST land on the first try. When wikiRoot is provided, the resolver
|
|
61
|
+
// checks the full-tree id namespace; when omitted (e.g. legacy unit
|
|
62
|
+
// tests that predate cross-depth awareness), it falls back to the
|
|
63
|
+
// parent-dir-only check for backward compatibility.
|
|
64
|
+
//
|
|
54
65
|
// Non-collision slugs are returned unchanged; invalid slugs are left
|
|
55
66
|
// alone so applyNest's own validation can reject them with its usual
|
|
56
67
|
// error message.
|
|
57
|
-
|
|
68
|
+
//
|
|
69
|
+
// `opts.wikiIndex` is an optional precomputed Set of every live id and
|
|
70
|
+
// directory basename in the wiki (see `buildWikiForbiddenIndex`). When
|
|
71
|
+
// supplied, the full-tree walk is skipped and the precomputed set is
|
|
72
|
+
// merged into the per-proposal forbidden set instead. A multi-NEST
|
|
73
|
+
// convergence iteration builds the index once before the apply loop
|
|
74
|
+
// and mutates it incrementally (adds the resolved slug after each
|
|
75
|
+
// successful apply), reducing the slug-resolver cost from
|
|
76
|
+
// O(#applies × #files) to O(#files + #applies).
|
|
77
|
+
export function resolveNestSlug(slug, proposal, wikiRoot, opts = {}) {
|
|
58
78
|
if (!validateSlug(slug)) return slug;
|
|
59
79
|
if (
|
|
60
80
|
!proposal ||
|
|
@@ -63,8 +83,12 @@ export function resolveNestSlug(slug, proposal) {
|
|
|
63
83
|
) {
|
|
64
84
|
return slug;
|
|
65
85
|
}
|
|
66
|
-
const
|
|
67
|
-
|
|
86
|
+
const isForbidden = collectForbiddenIdsPredicate(
|
|
87
|
+
proposal,
|
|
88
|
+
wikiRoot,
|
|
89
|
+
opts.wikiIndex,
|
|
90
|
+
);
|
|
91
|
+
if (!isForbidden(slug)) return slug;
|
|
68
92
|
// Try "-group" first (the natural human reading: "the group of X
|
|
69
93
|
// leaves"); fall back to numeric suffixes starting at -group-2
|
|
70
94
|
// because "-group" itself already occupies the slot that would
|
|
@@ -72,55 +96,394 @@ export function resolveNestSlug(slug, proposal) {
|
|
|
72
96
|
// "${slug}-group" overflows the 64-char SLUG_RE cap, short-circuit:
|
|
73
97
|
// all numeric candidates share the same prefix and will fail
|
|
74
98
|
// validation identically, so there's no point spinning the loop.
|
|
75
|
-
// Returning the original (colliding) slug
|
|
76
|
-
//
|
|
77
|
-
//
|
|
99
|
+
// Returning the original (colliding) slug in that overflow case
|
|
100
|
+
// propagates the collision downstream. Which failure surfaces
|
|
101
|
+
// depends on the collision class: an existing sibling directory
|
|
102
|
+
// makes `applyNest`'s `existsSync(targetDir)` check throw "target
|
|
103
|
+
// subcategory already exists"; a member-id / cross-depth /
|
|
104
|
+
// alias-id collision slips past applyNest (the directory does not
|
|
105
|
+
// pre-exist) and surfaces later as `DUP-ID` or
|
|
106
|
+
// `ALIAS-COLLIDES-ID` at validate time, triggering the usual
|
|
107
|
+
// rollback. Either way, failing loudly beats silently spinning
|
|
108
|
+
// through a hundred failed validateSlug() checks.
|
|
78
109
|
const primary = `${slug}-group`;
|
|
79
110
|
if (!validateSlug(primary)) return slug;
|
|
80
|
-
if (!
|
|
111
|
+
if (!isForbidden(primary)) return primary;
|
|
81
112
|
for (let i = 2; i < 100; i++) {
|
|
82
113
|
const candidate = `${slug}-group-${i}`;
|
|
83
|
-
|
|
114
|
+
// Re-check `validateSlug(candidate)` inside the loop because the
|
|
115
|
+
// numeric suffix widens `candidate` past `${slug}-group` — for a
|
|
116
|
+
// base slug near the 64-char SLUG_RE cap, `${slug}-group` can
|
|
117
|
+
// validate while `${slug}-group-2` overflows. Bailing out early
|
|
118
|
+
// avoids returning an invalid slug that applyNest would otherwise
|
|
119
|
+
// reject with the uninformative "invalid slug" error even though
|
|
120
|
+
// the original input was valid. Same fail-loud rationale as the
|
|
121
|
+
// primary-overflow short-circuit above.
|
|
122
|
+
if (!validateSlug(candidate)) return slug;
|
|
123
|
+
if (!isForbidden(candidate)) return candidate;
|
|
84
124
|
}
|
|
85
125
|
return slug;
|
|
86
126
|
}
|
|
87
127
|
|
|
88
|
-
|
|
89
|
-
|
|
128
|
+
// Build a predicate `(id) => boolean` that returns `true` when `id`
|
|
129
|
+
// collides with any already-claimed id in the wiki — member ids,
|
|
130
|
+
// parent-dir sibling ids, parent-dir subdir basenames, and either the
|
|
131
|
+
// caller's precomputed wiki-wide index (preferred) or a fresh
|
|
132
|
+
// walkWikiIds fallback (legacy path).
|
|
133
|
+
//
|
|
134
|
+
// Why a predicate instead of a materialized Set: when the caller
|
|
135
|
+
// passes a precomputed `wikiIndex`, that index can easily be 10⁴+
|
|
136
|
+
// entries on a large corpus. Copying the whole index into a new
|
|
137
|
+
// per-call Set costs O(|wikiIndex|) memory + time on every
|
|
138
|
+
// resolveNestSlug invocation, which defeats the entire point of the
|
|
139
|
+
// iteration-level precompute. A predicate keeps the wiki-wide index
|
|
140
|
+
// by reference and queries it directly, making each `isForbidden(x)`
|
|
141
|
+
// check O(1) and each resolveNestSlug call O(|members| + |parent-
|
|
142
|
+
// siblings|) regardless of wiki size.
|
|
143
|
+
function collectForbiddenIdsPredicate(
|
|
144
|
+
proposal,
|
|
145
|
+
wikiRoot,
|
|
146
|
+
precomputedWikiIndex = null,
|
|
147
|
+
) {
|
|
148
|
+
// Local set: member ids + parent-dir sibling ids/subdirs. Always
|
|
149
|
+
// small (bounded by one directory's children), so materializing it
|
|
150
|
+
// is fine.
|
|
151
|
+
const local = new Set();
|
|
90
152
|
for (const leaf of proposal.leaves) {
|
|
91
|
-
if (leaf?.data?.id)
|
|
153
|
+
if (leaf?.data?.id) local.add(leaf.data.id);
|
|
92
154
|
}
|
|
93
155
|
const parentDir = dirname(proposal.leaves[0].path);
|
|
94
156
|
const memberPaths = new Set(proposal.leaves.map((l) => l.path));
|
|
157
|
+
|
|
158
|
+
// Explicitly forbid the parent directory's OWN basename. Under the
|
|
159
|
+
// validator's invariant (`type: index` id === `basename(dirname(
|
|
160
|
+
// index.md))` at every depth), the parent's `index.md` carries
|
|
161
|
+
// id === basename(parentDir). The NEST applier writes the new
|
|
162
|
+
// subdir's stub `index.md` with id === slug, so a slug equal to the
|
|
163
|
+
// parent's basename produces TWO index.md files with the same id
|
|
164
|
+
// (parent + new child) and trips DUP-ID at validate time.
|
|
165
|
+
//
|
|
166
|
+
// Earlier versions of this function's documentation claimed
|
|
167
|
+
// applyNest's `existsSync(targetDir)` check caught this class, but
|
|
168
|
+
// that check only fires when `<parentDir>/<slug>/` already EXISTS —
|
|
169
|
+
// which it doesn't, because we're about to create it. The
|
|
170
|
+
// parent-name collision is only reachable at validate-time unless
|
|
171
|
+
// we pre-empt it here. Adding basename(parentDir) to the local set
|
|
172
|
+
// redirects the collision into the deterministic `-group` suffix
|
|
173
|
+
// branch above.
|
|
174
|
+
local.add(basename(parentDir));
|
|
175
|
+
|
|
176
|
+
// Parent-dir walk. When wikiRoot is not supplied this walk is the
|
|
177
|
+
// ONLY source of "live ids at this depth" — the legacy slot used by
|
|
178
|
+
// unit tests that predate cross-depth awareness. When wikiRoot IS
|
|
179
|
+
// supplied, the parent-dir walk runs first as a cheap O(siblings)
|
|
180
|
+
// seed before the wiki-wide walk / precomputed-index path below.
|
|
181
|
+
//
|
|
182
|
+
// Note: this loop now skips dot-prefixed entries (`.DS_Store`,
|
|
183
|
+
// `.foo.md`, `.git/`, etc) to match the skill's repo-wide dot-skip
|
|
184
|
+
// convention (walkWikiIds, buildWikiForbiddenIndex, indices::
|
|
185
|
+
// listChildren all do the same). That IS a behavioural change vs.
|
|
186
|
+
// the v1.0.0 parent-dir-only path, which iterated every entry
|
|
187
|
+
// regardless of name prefix. The change is intentional: v1.0.0's
|
|
188
|
+
// non-skip was a latent bug — a stray dotfile carrying frontmatter
|
|
189
|
+
// with a conflicting id would have spuriously forced a valid slug
|
|
190
|
+
// to auto-suffix, a false positive the validator would never have
|
|
191
|
+
// caught. No production consumer has reported hitting that path,
|
|
192
|
+
// so aligning with the repo convention is strictly an improvement.
|
|
193
|
+
// Legacy callers that DO pass dotfiles into the cluster's parent
|
|
194
|
+
// directory see different (but correct) resolver output.
|
|
95
195
|
let entries;
|
|
96
196
|
try {
|
|
97
197
|
entries = readdirSync(parentDir, { withFileTypes: true });
|
|
98
198
|
} catch {
|
|
99
|
-
|
|
199
|
+
entries = [];
|
|
100
200
|
}
|
|
101
201
|
for (const entry of entries) {
|
|
102
|
-
// Skip
|
|
103
|
-
//
|
|
104
|
-
//
|
|
105
|
-
//
|
|
106
|
-
//
|
|
202
|
+
// Skip dot-prefixed entries (directories AND files) on the same
|
|
203
|
+
// blanket-rule basis the full-tree walkers use (`walkWikiIds`,
|
|
204
|
+
// `buildWikiForbiddenIndex`, `indices.mjs::listChildren`). Without
|
|
205
|
+
// this skip, a stray `.DS_Store` or a `.foo.md` dotfile carrying
|
|
206
|
+
// frontmatter could spuriously poison the forbidden set and force
|
|
207
|
+
// a valid slug to auto-suffix for no legitimate reason.
|
|
208
|
+
if (entry.name.startsWith(".")) continue;
|
|
209
|
+
// Skip the parent's own index.md inside this loop: its id is
|
|
210
|
+
// always `basename(parentDir)` (per the validator invariant) and
|
|
211
|
+
// that value was explicitly added to `local` above. The parent's
|
|
212
|
+
// `aliases[]` ARE harvested — separately, after this loop — so a
|
|
213
|
+
// slug matching a parent-index alias doesn't slip past the
|
|
214
|
+
// resolver and trip ALIAS-COLLIDES-ID at validate time. That has
|
|
215
|
+
// to be a one-shot streaming read rather than inline here because
|
|
216
|
+
// we deliberately skip the rest of index.md's record on this
|
|
217
|
+
// hot-path loop (no point re-parsing its id).
|
|
107
218
|
if (entry.name === "index.md") continue;
|
|
108
219
|
const entryPath = join(parentDir, entry.name);
|
|
109
220
|
if (memberPaths.has(entryPath)) continue;
|
|
110
221
|
if (entry.isDirectory()) {
|
|
111
|
-
|
|
222
|
+
local.add(entry.name);
|
|
112
223
|
continue;
|
|
113
224
|
}
|
|
114
225
|
if (!entry.name.endsWith(".md")) continue;
|
|
226
|
+
// Only regular files qualify. Without this guard a symlink or
|
|
227
|
+
// special dirent named `*.md` would get opened + parsed; the
|
|
228
|
+
// rest of the pipeline's walks (`chunk.mjs::collectEntryPaths`,
|
|
229
|
+
// `indices.mjs::listChildren`) already require `isFile()`, so
|
|
230
|
+
// this keeps the walk discipline symmetric and forecloses the
|
|
231
|
+
// "planted symlink poisons the forbidden set" class.
|
|
232
|
+
if (!entry.isFile()) continue;
|
|
233
|
+
try {
|
|
234
|
+
// Use the streaming frontmatter reader (bounded to
|
|
235
|
+
// MAX_FRONTMATTER_BYTES) instead of slurping the whole file
|
|
236
|
+
// via readFileSync. Keeps sibling-id collection scale-safe
|
|
237
|
+
// when a parent directory contains large leaves, and stays
|
|
238
|
+
// consistent with walkWikiIds + buildWikiForbiddenIndex
|
|
239
|
+
// which both read the same way.
|
|
240
|
+
const captured = readFrontmatterStreaming(entryPath);
|
|
241
|
+
if (captured === null) continue;
|
|
242
|
+
const { data } = parseFrontmatter(captured.frontmatterText, entryPath);
|
|
243
|
+
if (data?.id) local.add(data.id);
|
|
244
|
+
// Also reserve any declared aliases. A new NEST stub carrying
|
|
245
|
+
// id === slug would trip ALIAS-COLLIDES-ID at validate time if
|
|
246
|
+
// the slug matches an existing alias on any live entry, and
|
|
247
|
+
// the pre-apply guard is the only place we can pre-empt that
|
|
248
|
+
// class of rollback.
|
|
249
|
+
if (Array.isArray(data?.aliases)) {
|
|
250
|
+
for (const alias of data.aliases) {
|
|
251
|
+
if (typeof alias === "string" && alias) local.add(alias);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
} catch {
|
|
255
|
+
/* skip unreadable / malformed frontmatter */
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Harvest the parent's own index.md aliases. The inline loop above
|
|
260
|
+
// skips `index.md` wholesale because its id is already covered by
|
|
261
|
+
// the explicit `basename(parentDir)` add, but the aliases it carries
|
|
262
|
+
// aren't reconstructible from the directory name. A slug matching a
|
|
263
|
+
// parent-index alias would slip past every other collision check
|
|
264
|
+
// here and surface only as ALIAS-COLLIDES-ID at validate time. One
|
|
265
|
+
// targeted streaming read closes that gap.
|
|
266
|
+
const parentIndexPath = join(parentDir, "index.md");
|
|
267
|
+
try {
|
|
268
|
+
const captured = readFrontmatterStreaming(parentIndexPath);
|
|
269
|
+
if (captured !== null) {
|
|
270
|
+
const { data } = parseFrontmatter(captured.frontmatterText, parentIndexPath);
|
|
271
|
+
if (Array.isArray(data?.aliases)) {
|
|
272
|
+
for (const alias of data.aliases) {
|
|
273
|
+
if (typeof alias === "string" && alias) local.add(alias);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
} catch {
|
|
278
|
+
/* skip unreadable parent index (or no index.md at the root) */
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Wiki-wide path. Precomputed index short-circuits the walk AND we
|
|
282
|
+
// keep it by reference inside the predicate instead of copying it
|
|
283
|
+
// into `local`. Legacy callers without precomputedWikiIndex fall
|
|
284
|
+
// back to the one-shot walkWikiIds, which materializes into `local`
|
|
285
|
+
// (the walk's output is bounded to "this call only" so no memory
|
|
286
|
+
// concern there).
|
|
287
|
+
//
|
|
288
|
+
// Full-tree walk catches cross-depth collisions that the parent-dir
|
|
289
|
+
// walk alone misses. Observed case: a leaf at
|
|
290
|
+
// `arch/event-patterns/index.md` (id "event-patterns") makes the
|
|
291
|
+
// slug "event-patterns" unsafe for a cluster proposed under
|
|
292
|
+
// `design-patterns-group/` in a different branch — even though the
|
|
293
|
+
// two are at different depths and not siblings. Validation catches
|
|
294
|
+
// this post-apply as DUP-ID, forcing rollback; the pre-apply walk
|
|
295
|
+
// here prevents the wasted round-trip.
|
|
296
|
+
//
|
|
297
|
+
// wikiRoot is optional: when absent (legacy callers / unit tests
|
|
298
|
+
// that predate cross-depth awareness), the parent-dir-only walk
|
|
299
|
+
// above is the effective behaviour, preserving prior semantics.
|
|
300
|
+
if (!precomputedWikiIndex && wikiRoot) {
|
|
301
|
+
walkWikiIds(wikiRoot, parentDir, memberPaths, local);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (precomputedWikiIndex) {
|
|
305
|
+
return (id) => local.has(id) || precomputedWikiIndex.has(id);
|
|
306
|
+
}
|
|
307
|
+
return (id) => local.has(id);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Build a wiki-wide forbidden-id index: the set of every `.md`
|
|
311
|
+
// entry's frontmatter id + aliases (both leaves AND `index.md` at
|
|
312
|
+
// every depth), plus every non-hidden directory basename under
|
|
313
|
+
// `wikiRoot`. Exposed as a reusable snapshot the caller can build
|
|
314
|
+
// once and pass to `resolveNestSlug` via `opts.wikiIndex` instead of
|
|
315
|
+
// paying for a full-tree walk on every invocation.
|
|
316
|
+
//
|
|
317
|
+
// Index.md entries are included (not skipped) because the validator
|
|
318
|
+
// treats leaves and indices as the same entry class on the
|
|
319
|
+
// DUP-ID / ALIAS-COLLIDES-ID axes — a slug matching a nested
|
|
320
|
+
// subcategory's id or alias would trip validation just as a slug
|
|
321
|
+
// matching a leaf id would. Aliases are included for the same
|
|
322
|
+
// reason: the resolver pre-empts both DUP-ID (slug === some entry's
|
|
323
|
+
// `id`) AND ALIAS-COLLIDES-ID (slug === some entry's alias)
|
|
324
|
+
// validation failures in one pass. Directory basenames cover
|
|
325
|
+
// the sibling-subdirectory class — a NEST creating `<parent>/<slug>/`
|
|
326
|
+
// where `<slug>` matches an existing directory anywhere in the tree
|
|
327
|
+
// would collide on `type: index` id at validate time.
|
|
328
|
+
//
|
|
329
|
+
// Mutation contract: after a successful NEST apply, the caller must
|
|
330
|
+
// call `wikiIndex.add(resolvedSlug)` so subsequent `resolveNestSlug`
|
|
331
|
+
// calls in the same iteration see the new directory as occupied.
|
|
332
|
+
// No other mutations are needed — leaf ids don't change when leaves
|
|
333
|
+
// move into the new subdir, and nothing is deleted by a NEST apply.
|
|
334
|
+
//
|
|
335
|
+
// Dot-prefixed entries (directories AND files — anything whose name
|
|
336
|
+
// starts with `.`) are skipped under the same blanket rule as
|
|
337
|
+
// `walkWikiIds` / `collectEntryPaths`. Covers skill-owned internals
|
|
338
|
+
// (`.llmwiki/`, `.work/`, `.shape/`), the user's git metadata
|
|
339
|
+
// (`.git/`, `.github/`), transient dotfiles (`.DS_Store`, editor
|
|
340
|
+
// backups), and hypothetical `.foo.md` leaves. Per-file frontmatter
|
|
341
|
+
// is extracted via `readFrontmatterStreaming` for bounded reads on
|
|
342
|
+
// large corpora.
|
|
343
|
+
export function buildWikiForbiddenIndex(wikiRoot) {
|
|
344
|
+
const set = new Set();
|
|
345
|
+
if (!wikiRoot) return set;
|
|
346
|
+
const stack = [wikiRoot];
|
|
347
|
+
while (stack.length > 0) {
|
|
348
|
+
const dir = stack.pop();
|
|
349
|
+
let entries;
|
|
350
|
+
try {
|
|
351
|
+
entries = readdirSync(dir, { withFileTypes: true });
|
|
352
|
+
} catch {
|
|
353
|
+
continue;
|
|
354
|
+
}
|
|
355
|
+
for (const entry of entries) {
|
|
356
|
+
if (entry.name.startsWith(".")) continue;
|
|
357
|
+
const entryPath = join(dir, entry.name);
|
|
358
|
+
if (entry.isDirectory()) {
|
|
359
|
+
set.add(entry.name);
|
|
360
|
+
stack.push(entryPath);
|
|
361
|
+
continue;
|
|
362
|
+
}
|
|
363
|
+
if (!entry.name.endsWith(".md")) continue;
|
|
364
|
+
// Regular-file-only: keeps the walk discipline aligned with
|
|
365
|
+
// chunk.mjs::collectEntryPaths / indices.mjs::listChildren and
|
|
366
|
+
// prevents symlinks / special dirents from being opened.
|
|
367
|
+
if (!entry.isFile()) continue;
|
|
368
|
+
try {
|
|
369
|
+
const captured = readFrontmatterStreaming(entryPath);
|
|
370
|
+
if (captured === null) continue;
|
|
371
|
+
const { data } = parseFrontmatter(captured.frontmatterText, entryPath);
|
|
372
|
+
if (data?.id) set.add(data.id);
|
|
373
|
+
// Aliases are also reserved — a slug matching an existing
|
|
374
|
+
// alias is the other DUP-adjacent class the validator flags
|
|
375
|
+
// (ALIAS-COLLIDES-ID). Including them here keeps
|
|
376
|
+
// opts.wikiIndex callers in sync with the full-tree walk
|
|
377
|
+
// path's guarantee.
|
|
378
|
+
if (Array.isArray(data?.aliases)) {
|
|
379
|
+
for (const alias of data.aliases) {
|
|
380
|
+
if (typeof alias === "string" && alias) set.add(alias);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
} catch {
|
|
384
|
+
/* skip unreadable / malformed frontmatter */
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
return set;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// Walk the entire wiki under wikiRoot, adding every leaf frontmatter
|
|
392
|
+
// id and every non-hidden directory basename to the `forbidden` set.
|
|
393
|
+
// `parentDir` and `memberPaths` are the cluster's own context — leaves
|
|
394
|
+
// already inside the cluster are excluded because they'll be moved
|
|
395
|
+
// into the new subdirectory and their id will live there, not collide.
|
|
396
|
+
// The parent-dir walk above has already collected direct siblings;
|
|
397
|
+
// this pass covers every OTHER directory in the tree.
|
|
398
|
+
//
|
|
399
|
+
// Dot-prefixed entries (directories AND files) are skipped as a
|
|
400
|
+
// blanket rule — this matches the discipline in
|
|
401
|
+
// `scripts/lib/chunk.mjs::collectEntryPaths` and covers every
|
|
402
|
+
// metadata surface the skill owns (`.llmwiki/`, `.work/`, `.shape/`),
|
|
403
|
+
// any user dotfile directory the corpus might carry (`.git/`,
|
|
404
|
+
// `.github/`, etc), AND any stray dotfiles (`.DS_Store`, hypothetical
|
|
405
|
+
// `.foo.md` leaves). There is no allow-list: if a dot-prefixed entry
|
|
406
|
+
// is worth considering as a routable leaf, rename it.
|
|
407
|
+
//
|
|
408
|
+
// Per-file frontmatter is extracted via the streaming reader so this
|
|
409
|
+
// collision pass reads bounded (≤ `MAX_FRONTMATTER_BYTES`) from each
|
|
410
|
+
// file rather than the full body — a real concern on large corpora
|
|
411
|
+
// (the frontmatter-bearing leaves at the consumer 596-leaf scale
|
|
412
|
+
// already parse through `readFrontmatterStreaming` elsewhere in the
|
|
413
|
+
// pipeline for the same reason).
|
|
414
|
+
function walkWikiIds(wikiRoot, parentDir, memberPaths, forbidden) {
|
|
415
|
+
const stack = [wikiRoot];
|
|
416
|
+
while (stack.length > 0) {
|
|
417
|
+
const dir = stack.pop();
|
|
418
|
+
let entries;
|
|
115
419
|
try {
|
|
116
|
-
|
|
117
|
-
const { data } = parseFrontmatter(raw, entryPath);
|
|
118
|
-
if (data?.id) forbidden.add(data.id);
|
|
420
|
+
entries = readdirSync(dir, { withFileTypes: true });
|
|
119
421
|
} catch {
|
|
120
|
-
|
|
422
|
+
continue;
|
|
423
|
+
}
|
|
424
|
+
for (const entry of entries) {
|
|
425
|
+
// Skip dot-prefixed entries (directories AND files — this is
|
|
426
|
+
// a blanket rule, not a directories-only skip). Covers skill
|
|
427
|
+
// internals (`.llmwiki/`, `.work/`), user metadata (`.git/`,
|
|
428
|
+
// `.github/`), and stray dotfiles (`.DS_Store`, hypothetical
|
|
429
|
+
// `.foo.md`). Matches the rest of the pipeline's walk
|
|
430
|
+
// discipline — see the walkWikiIds header comment above.
|
|
431
|
+
if (entry.name.startsWith(".")) continue;
|
|
432
|
+
const entryPath = join(dir, entry.name);
|
|
433
|
+
if (entry.isDirectory()) {
|
|
434
|
+
// Directory basename is a potential slug collision (a NEST
|
|
435
|
+
// elsewhere in the tree carrying the same slug would produce
|
|
436
|
+
// two directories with the same id).
|
|
437
|
+
forbidden.add(entry.name);
|
|
438
|
+
stack.push(entryPath);
|
|
439
|
+
continue;
|
|
440
|
+
}
|
|
441
|
+
if (!entry.name.endsWith(".md")) continue;
|
|
442
|
+
// Regular-file-only guard (matches the rest of the pipeline's
|
|
443
|
+
// walk discipline — chunk.mjs::collectEntryPaths and
|
|
444
|
+
// indices.mjs::listChildren both require isFile). Without this
|
|
445
|
+
// a symlink or special dirent named `*.md` could feed into
|
|
446
|
+
// readFrontmatterStreaming and poison the forbidden set.
|
|
447
|
+
if (!entry.isFile()) continue;
|
|
448
|
+
// Skip ordinary leaves that are in the cluster's own parent
|
|
449
|
+
// dir — the parent-dir walk above has already handled them
|
|
450
|
+
// (including the member-exclusion logic). Skipping avoids
|
|
451
|
+
// double-reading frontmatter on the hot path.
|
|
452
|
+
//
|
|
453
|
+
// EXCEPTION: parent's own index.md. Normally the parent-dir
|
|
454
|
+
// walk above already injects `basename(parentDir)` into the
|
|
455
|
+
// forbidden set (see the explicit `local.add(basename(parentDir))`
|
|
456
|
+
// right before this loop), so a slug matching the parent's id
|
|
457
|
+
// would auto-suffix without our help. But when
|
|
458
|
+
// parentDir === wikiRoot, this walk is the ONLY pass that sees
|
|
459
|
+
// the root at all — it starts AT wikiRoot and only visits
|
|
460
|
+
// children as directory entries, never wikiRoot itself, so the
|
|
461
|
+
// wiki-root basename isn't added via the `entry.isDirectory()`
|
|
462
|
+
// branch either. Parsing root/index.md picks up the canonical
|
|
463
|
+
// root id (also equal to basename(wikiRoot) under the validator
|
|
464
|
+
// invariant) via the frontmatter read; the explicit add above
|
|
465
|
+
// would cover the name but parsing also catches any alias
|
|
466
|
+
// entries the root might carry. Cheap: one extra
|
|
467
|
+
// frontmatter-stream read per walk.
|
|
468
|
+
if (dir === parentDir && entry.name !== "index.md") continue;
|
|
469
|
+
if (memberPaths.has(entryPath)) continue;
|
|
470
|
+
try {
|
|
471
|
+
const captured = readFrontmatterStreaming(entryPath);
|
|
472
|
+
if (captured === null) continue;
|
|
473
|
+
const { data } = parseFrontmatter(captured.frontmatterText, entryPath);
|
|
474
|
+
if (data?.id) forbidden.add(data.id);
|
|
475
|
+
// Reserve aliases too so the full-tree fallback path has the
|
|
476
|
+
// same ALIAS-COLLIDES-ID coverage as buildWikiForbiddenIndex.
|
|
477
|
+
if (Array.isArray(data?.aliases)) {
|
|
478
|
+
for (const alias of data.aliases) {
|
|
479
|
+
if (typeof alias === "string" && alias) forbidden.add(alias);
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
} catch {
|
|
483
|
+
/* skip unreadable / malformed frontmatter */
|
|
484
|
+
}
|
|
121
485
|
}
|
|
122
486
|
}
|
|
123
|
-
return forbidden;
|
|
124
487
|
}
|
|
125
488
|
|
|
126
489
|
export function applyNest(wikiRoot, proposal, slug, opts = {}) {
|