@ctxr/skill-llm-wiki 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +134 -0
  2. package/LICENSE +21 -0
  3. package/README.md +484 -0
  4. package/SKILL.md +252 -0
  5. package/guide/basics/concepts.md +74 -0
  6. package/guide/basics/index.md +45 -0
  7. package/guide/basics/schema.md +140 -0
  8. package/guide/cli.md +256 -0
  9. package/guide/correctness/index.md +45 -0
  10. package/guide/correctness/invariants.md +89 -0
  11. package/guide/correctness/safety.md +96 -0
  12. package/guide/history/diff.md +110 -0
  13. package/guide/history/hidden-git.md +130 -0
  14. package/guide/history/index.md +52 -0
  15. package/guide/history/remote-sync.md +113 -0
  16. package/guide/index.md +134 -0
  17. package/guide/isolation/coexistence.md +134 -0
  18. package/guide/isolation/index.md +44 -0
  19. package/guide/isolation/scale.md +251 -0
  20. package/guide/layout/in-place-mode.md +97 -0
  21. package/guide/layout/index.md +53 -0
  22. package/guide/layout/layout-contract.md +131 -0
  23. package/guide/layout/layout-modes.md +115 -0
  24. package/guide/operations/index.md +76 -0
  25. package/guide/operations/ingest/build.md +75 -0
  26. package/guide/operations/ingest/extend.md +61 -0
  27. package/guide/operations/ingest/index.md +54 -0
  28. package/guide/operations/ingest/join.md +65 -0
  29. package/guide/operations/maintain/fix.md +66 -0
  30. package/guide/operations/maintain/index.md +47 -0
  31. package/guide/operations/maintain/rebuild.md +86 -0
  32. package/guide/operations/validate.md +48 -0
  33. package/guide/substrate/index.md +47 -0
  34. package/guide/substrate/operators.md +96 -0
  35. package/guide/substrate/tiered-ai.md +363 -0
  36. package/guide/ux/index.md +44 -0
  37. package/guide/ux/preflight.md +150 -0
  38. package/guide/ux/user-intent.md +135 -0
  39. package/package.json +55 -0
  40. package/scripts/cli.mjs +893 -0
  41. package/scripts/commands/remote.mjs +93 -0
  42. package/scripts/commands/review.mjs +253 -0
  43. package/scripts/commands/sync.mjs +84 -0
  44. package/scripts/lib/chunk.mjs +421 -0
  45. package/scripts/lib/cluster-detect.mjs +516 -0
  46. package/scripts/lib/decision-log.mjs +343 -0
  47. package/scripts/lib/draft.mjs +158 -0
  48. package/scripts/lib/embeddings.mjs +366 -0
  49. package/scripts/lib/frontmatter.mjs +497 -0
  50. package/scripts/lib/git-commands.mjs +155 -0
  51. package/scripts/lib/git.mjs +486 -0
  52. package/scripts/lib/gitignore.mjs +62 -0
  53. package/scripts/lib/history.mjs +331 -0
  54. package/scripts/lib/indices.mjs +510 -0
  55. package/scripts/lib/ingest.mjs +258 -0
  56. package/scripts/lib/intent.mjs +713 -0
  57. package/scripts/lib/interactive.mjs +99 -0
  58. package/scripts/lib/migrate.mjs +126 -0
  59. package/scripts/lib/nest-applier.mjs +260 -0
  60. package/scripts/lib/operators.mjs +1365 -0
  61. package/scripts/lib/orchestrator.mjs +718 -0
  62. package/scripts/lib/paths.mjs +197 -0
  63. package/scripts/lib/preflight.mjs +213 -0
  64. package/scripts/lib/provenance.mjs +672 -0
  65. package/scripts/lib/quality-metric.mjs +269 -0
  66. package/scripts/lib/query-fixture.mjs +71 -0
  67. package/scripts/lib/rollback.mjs +95 -0
  68. package/scripts/lib/shape-check.mjs +172 -0
  69. package/scripts/lib/similarity-cache.mjs +126 -0
  70. package/scripts/lib/similarity.mjs +230 -0
  71. package/scripts/lib/snapshot.mjs +54 -0
  72. package/scripts/lib/source-frontmatter.mjs +85 -0
  73. package/scripts/lib/tier2-protocol.mjs +470 -0
  74. package/scripts/lib/tiered.mjs +453 -0
  75. package/scripts/lib/validate.mjs +362 -0
@@ -0,0 +1,269 @@
1
+ // quality-metric.mjs — routing_cost metric for convergence.
2
+ //
3
+ // Definition: given a wiki tree and a fixed query distribution
4
+ // (query-fixture.mjs), simulate the routing procedure from
5
+ // SKILL.md for each query. Starting at the root index.md, read its
6
+ // frontmatter's entries[] records, compute which ones are
7
+ // "matched" by the query, follow matched subcategory indices one
8
+ // level deeper, and sum the file bytes of every file the routing
9
+ // pass read (root index + matched subcategory indices + matched
10
+ // leaves).
11
+ //
12
+ // Routing-substrate note. The old (literal) router matched on
13
+ // aggregated `activation_defaults` lifted into entries[] at the
14
+ // parent level. That substrate is gone. Parent `entries[]` now
15
+ // carry only `id` / `file` / `type` / `focus` / `tags`, plus the
16
+ // parent index itself has an authored `focus` / `shared_covers` /
17
+ // `tags` block. The simulator matches entries using those
18
+ // parent-side fields only — it does NOT peek into leaves to make
19
+ // a descent decision.
20
+ //
21
+ // This is the critical property that makes nested wikis cheaper
22
+ // than flat wikis in the simulator: if the parent's description
23
+ // of a subcategory ("focus", shared covers, tags) doesn't match
24
+ // the query, the subcategory's whole subtree is skipped, and
25
+ // every leaf inside it stays unread. In the old literal-routing
26
+ // substrate the subcategory's `activation_defaults` was the gate;
27
+ // now it's the subcategory's authored `focus` and shared_covers.
28
+ //
29
+ // Per-leaf `activation` blocks on the leaves themselves are
30
+ // ignored by the simulator. They still round-trip through
31
+ // frontmatter and may inform Claude's judgment once a leaf is
32
+ // already open, but for the purposes of the routing-cost metric
33
+ // they're hidden behind the parent gate.
34
+ //
35
+ // The metric is:
36
+ //
37
+ // routing_cost = SUM over queries of bytes_read(query) / total_leaf_bytes
38
+ //
39
+ // where total_leaf_bytes is the total bytes of every .md file
40
+ // under the wiki (excluding .llmwiki / .work / index.md). Lower is
41
+ // better. A perfectly-shaped wiki where each query activates only
42
+ // the matching subcategory will score much lower than a flat wiki
43
+ // where every leaf is a peer at the root and every root lookup
44
+ // has to consider them all.
45
+ //
46
+ // This metric intentionally favours NESTED shapes over FLAT
47
+ // shapes for the same leaves — a flat wiki's root index.md is
48
+ // larger (lists all entries), and routing at the root level has
49
+ // to read every activated leaf directly (no subcategory index
50
+ // between). A nested wiki's root index.md is smaller (lists
51
+ // subcategories) and routing skips past non-matching
52
+ // subcategories without visiting their leaves. That's the
53
+ // behaviour we want to incentivise.
54
+
55
+ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
56
+ import { join } from "node:path";
57
+ import { parseFrontmatter } from "./frontmatter.mjs";
58
+ import { REPRESENTATIVE_QUERIES } from "./query-fixture.mjs";
59
+
60
+ // Compute the total bytes of every .md file under wikiRoot,
61
+ // excluding dot-directories. Used as the denominator in the cost
62
+ // ratio so the metric is in a comparable [0, N] range.
63
+ export function totalLeafBytes(wikiRoot) {
64
+ let total = 0;
65
+ const stack = [wikiRoot];
66
+ while (stack.length > 0) {
67
+ const dir = stack.pop();
68
+ let entries;
69
+ try {
70
+ entries = readdirSync(dir, { withFileTypes: true });
71
+ } catch {
72
+ continue;
73
+ }
74
+ for (const e of entries) {
75
+ if (e.name.startsWith(".")) continue;
76
+ const full = join(dir, e.name);
77
+ if (e.isDirectory()) {
78
+ stack.push(full);
79
+ continue;
80
+ }
81
+ if (!e.isFile() || !e.name.endsWith(".md")) continue;
82
+ try {
83
+ total += statSync(full).size;
84
+ } catch {
85
+ /* ignore */
86
+ }
87
+ }
88
+ }
89
+ return total;
90
+ }
91
+
92
+ // Does an entry match for a query? Uses only parent-side fields
93
+ // that travel in the parent index's `entries[]` record: `tags`
94
+ // (authored, optional) and `focus` (always present, even if it's
95
+ // just a placeholder). No peeking into leaves or aggregated
96
+ // defaults — those substrates are gone.
97
+ //
98
+ // For subcategory entries we additionally consider the
99
+ // subcategory's own `tags` and `shared_covers` authored in its
100
+ // index.md, because those are the subcategory's "I am relevant
101
+ // for queries about X" description. We read that from the
102
+ // subcat's index.md on demand; the root index no longer carries
103
+ // an aggregated copy.
104
+ function entryMatches(entry, query) {
105
+ if (!entry) return false;
106
+ const qKeywords = new Set(
107
+ (query.activation_keywords || []).map((k) => k.toLowerCase()),
108
+ );
109
+ const qTags = new Set((query.tags || []).map((t) => t.toLowerCase()));
110
+ const entryTags = Array.isArray(entry.tags) ? entry.tags : [];
111
+ for (const t of entryTags) {
112
+ if (qTags.has(String(t).toLowerCase())) return true;
113
+ }
114
+ const focus = typeof entry.focus === "string" ? entry.focus.toLowerCase() : "";
115
+ for (const kw of qKeywords) {
116
+ if (focus.includes(kw)) return true;
117
+ }
118
+ return false;
119
+ }
120
+
121
+ // Does a subcategory's own authored frontmatter match for a
122
+ // query? We read the subcat's index.md (which the router will
123
+ // read anyway once it descends) and check its `tags`, `focus`,
124
+ // and `shared_covers` against the query. Returns true if any
125
+ // field matches.
126
+ function subcatOwnMatches(parsed, query) {
127
+ if (!parsed || !parsed.data) return false;
128
+ const qKeywords = new Set(
129
+ (query.activation_keywords || []).map((k) => k.toLowerCase()),
130
+ );
131
+ const qTags = new Set((query.tags || []).map((t) => t.toLowerCase()));
132
+ const data = parsed.data;
133
+ const ownTags = Array.isArray(data.tags) ? data.tags : [];
134
+ for (const t of ownTags) {
135
+ if (qTags.has(String(t).toLowerCase())) return true;
136
+ }
137
+ const focus = typeof data.focus === "string" ? data.focus.toLowerCase() : "";
138
+ for (const kw of qKeywords) {
139
+ if (focus.includes(kw)) return true;
140
+ }
141
+ const covers = Array.isArray(data.shared_covers) ? data.shared_covers : [];
142
+ for (const c of covers) {
143
+ const lc = String(c).toLowerCase();
144
+ for (const kw of qKeywords) {
145
+ if (lc.includes(kw)) return true;
146
+ }
147
+ }
148
+ return false;
149
+ }
150
+
151
+ // Simulate routing for one query starting at `dirPath`'s index.md.
152
+ // Returns the set of absolute file paths the router would read.
153
+ // Bounded recursion: depth cap of 10 prevents pathological cycles.
154
+ //
155
+ // The walk is strictly parent-gated: a subcategory is only
156
+ // descended into when either the parent-side entries[] record
157
+ // matches (via `entryMatches`) OR the subcat's own authored
158
+ // frontmatter matches (via `subcatOwnMatches` reading the
159
+ // subcat's index.md). Leaves inside a non-matching subcat are
160
+ // never touched. This matches the semantic routing procedure in
161
+ // SKILL.md: Claude descends based on the parent's description of
162
+ // the child, not on deep-probe of the child's descendants.
163
+ function simulateQueryRouting(wikiRoot, query, dirPath = wikiRoot, depth = 0, visited = new Set()) {
164
+ if (depth > 10) return visited;
165
+ const indexPath = join(dirPath, "index.md");
166
+ if (!existsSync(indexPath)) return visited;
167
+ if (visited.has(indexPath)) return visited;
168
+ visited.add(indexPath);
169
+ let parsed;
170
+ try {
171
+ const raw = readFileSync(indexPath, "utf8");
172
+ parsed = parseFrontmatter(raw, indexPath);
173
+ } catch {
174
+ return visited;
175
+ }
176
+ const entries = Array.isArray(parsed.data.entries) ? parsed.data.entries : [];
177
+ for (const entry of entries) {
178
+ const file = entry.file;
179
+ if (!file || typeof file !== "string") continue;
180
+ const full = join(dirPath, file);
181
+ if (visited.has(full)) continue;
182
+ if (entry.type === "index") {
183
+ // First try the parent-side record (cheap, no extra I/O).
184
+ let shouldDescend = entryMatches(entry, query);
185
+ const subDir = full.endsWith("index.md") ? full.slice(0, -"index.md".length - 1) : full;
186
+ if (!shouldDescend) {
187
+ // Fallback: peek at the subcat's own authored
188
+ // `focus`/`tags`/`shared_covers` (parsing the index once).
189
+ // This matches "Claude opens the child index to read its
190
+ // own focus before deciding to descend" and is still a
191
+ // parent-gate, not a deep-probe of leaves.
192
+ const subIndexPath = join(subDir, "index.md");
193
+ if (existsSync(subIndexPath)) {
194
+ try {
195
+ const subRaw = readFileSync(subIndexPath, "utf8");
196
+ const subParsed = parseFrontmatter(subRaw, subIndexPath);
197
+ if (subcatOwnMatches(subParsed, query)) shouldDescend = true;
198
+ } catch {
199
+ /* ignore — don't descend */
200
+ }
201
+ }
202
+ }
203
+ if (!shouldDescend) continue;
204
+ simulateQueryRouting(wikiRoot, query, subDir, depth + 1, visited);
205
+ } else {
206
+ if (!entryMatches(entry, query)) continue;
207
+ visited.add(full);
208
+ }
209
+ }
210
+ return visited;
211
+ }
212
+
213
+ // Sum bytes of every file in a set (missing files count zero).
214
+ function sumBytes(files) {
215
+ let total = 0;
216
+ for (const f of files) {
217
+ try {
218
+ total += statSync(f).size;
219
+ } catch {
220
+ /* ignore */
221
+ }
222
+ }
223
+ return total;
224
+ }
225
+
226
+ // Compute the routing_cost metric for a wiki. Returns an object:
227
+ //
228
+ // { cost, per_query, total_leaf_bytes, queries_matched }
229
+ //
230
+ // `cost` is the primary scalar used by convergence (lower = better).
231
+ // `per_query` is a per-query breakdown for debugging / logs.
232
+ export function computeRoutingCost(wikiRoot, options = {}) {
233
+ const { queries = REPRESENTATIVE_QUERIES } = options;
234
+ const total = totalLeafBytes(wikiRoot);
235
+ if (total === 0) {
236
+ return { cost: 0, per_query: [], total_leaf_bytes: 0, queries_matched: 0 };
237
+ }
238
+ let costSum = 0;
239
+ const per = [];
240
+ let matched = 0;
241
+ for (const q of queries) {
242
+ const files = simulateQueryRouting(wikiRoot, q);
243
+ const bytes = sumBytes(files);
244
+ if (files.size > 0) matched++;
245
+ const ratio = bytes / total;
246
+ costSum += ratio;
247
+ per.push({
248
+ query: q.id,
249
+ files: files.size,
250
+ bytes,
251
+ ratio,
252
+ });
253
+ }
254
+ return {
255
+ cost: costSum,
256
+ per_query: per,
257
+ total_leaf_bytes: total,
258
+ queries_matched: matched,
259
+ };
260
+ }
261
+
262
+ // Pretty-print a metric result for commit messages / logs.
263
+ export function formatRoutingCost(metric) {
264
+ return (
265
+ `routing_cost=${metric.cost.toFixed(4)} ` +
266
+ `(${metric.queries_matched}/${metric.per_query.length} queries matched, ` +
267
+ `total_leaf_bytes=${metric.total_leaf_bytes})`
268
+ );
269
+ }
@@ -0,0 +1,71 @@
1
+ // query-fixture.mjs — a fixed, representative query distribution
2
+ // the convergence loop uses to measure routing cost. Stored in the
3
+ // skill (NOT in user wikis) so the metric is corpus-agnostic and
4
+ // the same metric can be used to compare structural alternatives.
5
+ //
6
+ // Each query is a bag of activation keywords + tags the router
7
+ // would see in the real skill. The metric simulates one lookup
8
+ // pass at the wiki root: compute the activated set of entries[],
9
+ // follow the activated subcategory index.md files one level down,
10
+ // and sum the bytes of every file traversed (indices + leaves).
11
+ // Lower total bytes across all queries = better structure.
12
+ //
13
+ // The 10 queries below exercise different operation tags commonly
14
+ // seen in the skill's documentation: rebuild, build, extend,
15
+ // layout-mode reasoning, history/audit, validation, join, and a
16
+ // couple of "general how do I" questions. They deliberately span
17
+ // the tags the guide emits so both a flat-root wiki and a nested
18
+ // wiki will find matches.
19
+
20
+ export const REPRESENTATIVE_QUERIES = Object.freeze([
21
+ {
22
+ id: "q-rebuild-basic",
23
+ activation_keywords: ["rebuild", "optimize", "structure"],
24
+ tags: ["rebuild", "operation"],
25
+ },
26
+ {
27
+ id: "q-build-from-source",
28
+ activation_keywords: ["build", "source", "ingest"],
29
+ tags: ["build", "operation"],
30
+ },
31
+ {
32
+ id: "q-extend-new-entries",
33
+ activation_keywords: ["extend", "add", "new", "entries"],
34
+ tags: ["extend", "operation"],
35
+ },
36
+ {
37
+ id: "q-layout-mode-reasoning",
38
+ activation_keywords: ["layout", "sibling", "in-place", "hosted", "mode"],
39
+ tags: ["layout"],
40
+ },
41
+ {
42
+ id: "q-history-audit",
43
+ activation_keywords: ["history", "log", "commit", "audit", "blame"],
44
+ tags: ["history", "git"],
45
+ },
46
+ {
47
+ id: "q-validate-fix",
48
+ activation_keywords: ["validate", "fix", "invariant", "broken"],
49
+ tags: ["validation", "fix"],
50
+ },
51
+ {
52
+ id: "q-join-wikis",
53
+ activation_keywords: ["join", "merge", "wikis"],
54
+ tags: ["join"],
55
+ },
56
+ {
57
+ id: "q-rollback-to",
58
+ activation_keywords: ["rollback", "restore", "previous"],
59
+ tags: ["rollback"],
60
+ },
61
+ {
62
+ id: "q-tiered-ai",
63
+ activation_keywords: ["tiered", "similarity", "embeddings", "claude"],
64
+ tags: ["tiered", "ai"],
65
+ },
66
+ {
67
+ id: "q-scale-large",
68
+ activation_keywords: ["scale", "large", "corpus", "ten", "thousand"],
69
+ tags: ["scale", "performance"],
70
+ },
71
+ ]);
@@ -0,0 +1,95 @@
1
+ // rollback.mjs — resolve a rollback reference to a concrete git ref,
2
+ // verify it exists, then perform `git reset --hard` + `git clean -fd`.
3
+ //
4
+ // Tag namespace split (see snapshot.mjs): pre-op anchors live under
5
+ // `refs/tags/pre-op/<id>` and final tags live under `refs/tags/op/<id>`.
6
+ // The two namespaces exist so git's ref hierarchy doesn't collide.
7
+ //
8
+ // Accepted ref forms:
9
+ // genesis → op/genesis (always present after gitInit)
10
+ // <op-id> → op/<op-id> (state right after the op)
11
+ // pre-<op-id> → pre-op/<op-id> (state just before the op)
12
+ // HEAD, HEAD~N, etc. → passed through verbatim to git rev-parse
13
+ // pre-op/... → passed through verbatim
14
+ // op/... → passed through verbatim
15
+
16
+ import { gitClean, gitRefExists, gitResetHard, gitRevParse } from "./git.mjs";
17
+
18
+ // Op-id / bare-ref grammar. Refs that don't look like this are
19
+ // rejected outright so they cannot slip through to git as unintended
20
+ // command-line flags or path-like expressions.
21
+ const BARE_REF_RE = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
22
+ const HEAD_REF_RE = /^HEAD(~\d+|\^\d*)?$/;
23
+
24
+ export function resolveRollbackRef(raw) {
25
+ if (!raw || typeof raw !== "string") {
26
+ throw new Error("rollback reference must be a non-empty string");
27
+ }
28
+ if (raw.startsWith("-")) {
29
+ throw new Error(`rollback reference must not start with '-': ${raw}`);
30
+ }
31
+ if (raw === "genesis") return "op/genesis";
32
+ if (HEAD_REF_RE.test(raw)) return raw;
33
+ // Namespace-prefixed tags: accept but validate the body.
34
+ if (raw.startsWith("pre-op/")) {
35
+ const rest = raw.slice("pre-op/".length);
36
+ if (!BARE_REF_RE.test(rest)) {
37
+ throw new Error(`rollback: invalid pre-op ref body: ${raw}`);
38
+ }
39
+ return raw;
40
+ }
41
+ if (raw.startsWith("op/")) {
42
+ const rest = raw.slice("op/".length);
43
+ if (!BARE_REF_RE.test(rest)) {
44
+ throw new Error(`rollback: invalid op ref body: ${raw}`);
45
+ }
46
+ return raw;
47
+ }
48
+ if (raw.startsWith("pre-")) {
49
+ const rest = raw.slice("pre-".length);
50
+ if (!BARE_REF_RE.test(rest)) {
51
+ throw new Error(`rollback: invalid pre-<op-id> ref body: ${raw}`);
52
+ }
53
+ return `pre-op/${rest}`;
54
+ }
55
+ // Bare op-id: interpret as "state right after the op finished".
56
+ if (!BARE_REF_RE.test(raw)) {
57
+ throw new Error(`rollback: invalid op-id: ${raw}`);
58
+ }
59
+ return `op/${raw}`;
60
+ }
61
+
62
+ /**
63
+ * Rollback the wiki's working tree to a prior commit, destructively.
64
+ *
65
+ * ⚠ IRREVERSIBLE by itself: this function runs `git reset --hard` and
66
+ * `git clean -fd`, which discards any unsaved working-tree edits and
67
+ * removes untracked files not protected by `.gitignore` / `info/exclude`.
68
+ * Callers should either (a) take a fresh `preOpSnapshot` immediately
69
+ * before invoking this as a belt-and-braces rollback anchor, or (b)
70
+ * prompt the user with the current HEAD SHA so they can recover via
71
+ * `git reflog` on the private repo if they typed the wrong ref.
72
+ *
73
+ * `git clean -fd` omits `-x` intentionally: scratch dirs protected by
74
+ * the internal `.llmwiki/git/info/exclude` (namely `.work/` and
75
+ * `.shape/history/*\/work/`) are preserved through a rollback.
76
+ *
77
+ * @param {string} wikiRoot Absolute path to the wiki root
78
+ * @param {string} rawRef One of: "genesis", "<op-id>", "pre-<op-id>",
79
+ * "HEAD", "HEAD~N", or any git ref the private
80
+ * repo understands.
81
+ * @returns {{ ref: string, sha: string | null }}
82
+ * @throws if the resolved ref does not exist in the private repo
83
+ */
84
+ export function rollbackOperation(wikiRoot, rawRef) {
85
+ const ref = resolveRollbackRef(rawRef);
86
+ if (!gitRefExists(wikiRoot, ref)) {
87
+ throw new Error(
88
+ `rollback: ref "${ref}" not found in the wiki's private git repo`,
89
+ );
90
+ }
91
+ const sha = gitRevParse(wikiRoot, ref);
92
+ gitResetHard(wikiRoot, ref);
93
+ gitClean(wikiRoot);
94
+ return { ref, sha };
95
+ }
@@ -0,0 +1,172 @@
1
+ // Shape-check: detect which rewrite operators from section 3.5 of the
2
+ // methodology would currently apply. Non-mutating. Writes findings to
3
+ // `<wiki>/.shape/suggestions.md` and, if they cross a threshold, sets
4
+ // `rebuild_needed: true` on the root index.md.
5
+
6
+ import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync, renameSync, statSync } from "node:fs";
7
+ import { basename, dirname, join, relative } from "node:path";
8
+ import { parseFrontmatter, renderFrontmatter } from "./frontmatter.mjs";
9
+ import { listChildren, readIndex } from "./indices.mjs";
10
+
11
+ const DEFAULT_THRESHOLD = 5;
12
+ const MERGE_SIMILARITY = 0.7;
13
+
14
+ export function runShapeCheck(wikiRoot, options = {}) {
15
+ const threshold = options.threshold ?? DEFAULT_THRESHOLD;
16
+ const suggestions = [];
17
+ const dirs = [];
18
+ collectDirs(wikiRoot, dirs);
19
+
20
+ for (const dir of dirs) {
21
+ const { leaves, subdirs } = listChildren(dir);
22
+
23
+ // LIFT: exactly one non-index entry in a non-root folder
24
+ if (dir !== wikiRoot && leaves.length === 1 && subdirs.length === 0) {
25
+ suggestions.push({
26
+ operator: "LIFT",
27
+ target: dir,
28
+ reason: `folder contains exactly one entry (${leaves[0].data.id}); lift it to parent`,
29
+ });
30
+ }
31
+
32
+ // MERGE: sibling pairs with high covers overlap
33
+ for (let i = 0; i < leaves.length; i++) {
34
+ for (let j = i + 1; j < leaves.length; j++) {
35
+ const a = leaves[i].data;
36
+ const b = leaves[j].data;
37
+ if (a.type !== b.type) continue;
38
+ const aCov = new Set(a.covers ?? []);
39
+ const bCov = new Set(b.covers ?? []);
40
+ if (aCov.size === 0 || bCov.size === 0) continue;
41
+ let intersect = 0;
42
+ for (const c of aCov) if (bCov.has(c)) intersect++;
43
+ const union = aCov.size + bCov.size - intersect;
44
+ if (union === 0) continue;
45
+ const overlap = intersect / union;
46
+ if (overlap >= MERGE_SIMILARITY) {
47
+ suggestions.push({
48
+ operator: "MERGE",
49
+ target: [leaves[i].path, leaves[j].path],
50
+ reason: `siblings "${a.id}" and "${b.id}" have ${Math.round(overlap * 100)}% covers overlap`,
51
+ });
52
+ }
53
+ }
54
+ }
55
+
56
+ // DESCEND: index body with authored zone above budget
57
+ const indexPath = join(dir, "index.md");
58
+ if (existsSync(indexPath)) {
59
+ const raw = readFileSync(indexPath, "utf8");
60
+ const { body } = parseFrontmatter(raw, indexPath);
61
+ const authored = extractAuthoredZone(body);
62
+ if (authored.length > 2048) {
63
+ suggestions.push({
64
+ operator: "DESCEND",
65
+ target: indexPath,
66
+ reason: `index authored zone is ${authored.length} bytes; push content to a leaf`,
67
+ });
68
+ }
69
+ if (authored && (/^\s*```/m.test(authored) || /^\s*- \[ \]/m.test(authored))) {
70
+ suggestions.push({
71
+ operator: "DESCEND",
72
+ target: indexPath,
73
+ reason: "index body contains leaf-style content (code fence or checklist)",
74
+ });
75
+ }
76
+ }
77
+
78
+ // DECOMPOSE & NEST candidates are frontmatter-heuristic-heavy; we report
79
+ // simpler signals here and leave semantic clustering to AI review during
80
+ // Rebuild.
81
+ for (const leaf of leaves) {
82
+ const covers = leaf.data.covers ?? [];
83
+ if (covers.length > 12) {
84
+ suggestions.push({
85
+ operator: "DECOMPOSE",
86
+ target: leaf.path,
87
+ reason: `leaf has ${covers.length} covers[] items; consider splitting by concern`,
88
+ });
89
+ }
90
+ if (leaf.data.nests_into && Array.isArray(leaf.data.nests_into) && leaf.data.nests_into.length > 0) {
91
+ suggestions.push({
92
+ operator: "NEST",
93
+ target: leaf.path,
94
+ reason: `leaf declares nests_into: ${leaf.data.nests_into.join(", ")}`,
95
+ });
96
+ }
97
+ }
98
+ }
99
+
100
+ // Write suggestions file and root flag.
101
+ writeSuggestions(wikiRoot, suggestions);
102
+ if (suggestions.length >= threshold) {
103
+ setRootRebuildFlag(wikiRoot, suggestions, true);
104
+ } else if (suggestions.length === 0) {
105
+ setRootRebuildFlag(wikiRoot, [], false);
106
+ }
107
+ return suggestions;
108
+ }
109
+
110
+ function collectDirs(dirPath, acc) {
111
+ if (!existsSync(dirPath)) return;
112
+ acc.push(dirPath);
113
+ try {
114
+ const entries = readdirSync(dirPath, { withFileTypes: true });
115
+ for (const e of entries) {
116
+ if (e.name.startsWith(".")) continue;
117
+ if (!e.isDirectory()) continue;
118
+ const sub = join(dirPath, e.name);
119
+ if (existsSync(join(sub, "index.md"))) collectDirs(sub, acc);
120
+ }
121
+ } catch {
122
+ /* skip */
123
+ }
124
+ }
125
+
126
+ function extractAuthoredZone(body) {
127
+ const start = body.indexOf("<!-- BEGIN AUTHORED ORIENTATION -->");
128
+ const end = body.indexOf("<!-- END AUTHORED ORIENTATION -->");
129
+ if (start === -1 || end === -1) return "";
130
+ return body.slice(start + "<!-- BEGIN AUTHORED ORIENTATION -->".length, end).trim();
131
+ }
132
+
133
+ function writeSuggestions(wikiRoot, suggestions) {
134
+ const dir = join(wikiRoot, ".shape");
135
+ mkdirSync(dir, { recursive: true });
136
+ const p = join(dir, "suggestions.md");
137
+ const now = new Date().toISOString();
138
+ const lines = [];
139
+ lines.push("# Shape Suggestions");
140
+ lines.push("");
141
+ lines.push(`_Last shape-check: ${now}_`);
142
+ lines.push("");
143
+ if (suggestions.length === 0) {
144
+ lines.push("_No pending operator candidates._");
145
+ lines.push("");
146
+ } else {
147
+ lines.push(`**${suggestions.length} pending candidate(s):**`);
148
+ lines.push("");
149
+ for (const s of suggestions) {
150
+ const targetStr = Array.isArray(s.target) ? s.target.map((t) => relative(wikiRoot, t)).join(", ") : relative(wikiRoot, s.target);
151
+ lines.push(`- **${s.operator}** — \`${targetStr}\``);
152
+ lines.push(` - ${s.reason}`);
153
+ }
154
+ lines.push("");
155
+ }
156
+ writeFileSync(p, lines.join("\n"), "utf8");
157
+ }
158
+
159
+ function setRootRebuildFlag(wikiRoot, suggestions, needed) {
160
+ const p = join(wikiRoot, "index.md");
161
+ if (!existsSync(p)) return;
162
+ const raw = readFileSync(p, "utf8");
163
+ const { data, body } = parseFrontmatter(raw, p);
164
+ data.rebuild_needed = needed;
165
+ data.rebuild_reasons = suggestions.slice(0, 10).map((s) => `${s.operator}: ${s.reason}`);
166
+ if (!data.rebuild_command) {
167
+ data.rebuild_command = "skill-llm-wiki rebuild <wiki> --plan";
168
+ }
169
+ const tmp = p + ".tmp";
170
+ writeFileSync(tmp, renderFrontmatter(data, body), "utf8");
171
+ renameSync(tmp, p);
172
+ }