@ctxr/skill-llm-wiki 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +134 -0
  2. package/LICENSE +21 -0
  3. package/README.md +484 -0
  4. package/SKILL.md +252 -0
  5. package/guide/basics/concepts.md +74 -0
  6. package/guide/basics/index.md +45 -0
  7. package/guide/basics/schema.md +140 -0
  8. package/guide/cli.md +256 -0
  9. package/guide/correctness/index.md +45 -0
  10. package/guide/correctness/invariants.md +89 -0
  11. package/guide/correctness/safety.md +96 -0
  12. package/guide/history/diff.md +110 -0
  13. package/guide/history/hidden-git.md +130 -0
  14. package/guide/history/index.md +52 -0
  15. package/guide/history/remote-sync.md +113 -0
  16. package/guide/index.md +134 -0
  17. package/guide/isolation/coexistence.md +134 -0
  18. package/guide/isolation/index.md +44 -0
  19. package/guide/isolation/scale.md +251 -0
  20. package/guide/layout/in-place-mode.md +97 -0
  21. package/guide/layout/index.md +53 -0
  22. package/guide/layout/layout-contract.md +131 -0
  23. package/guide/layout/layout-modes.md +115 -0
  24. package/guide/operations/index.md +76 -0
  25. package/guide/operations/ingest/build.md +75 -0
  26. package/guide/operations/ingest/extend.md +61 -0
  27. package/guide/operations/ingest/index.md +54 -0
  28. package/guide/operations/ingest/join.md +65 -0
  29. package/guide/operations/maintain/fix.md +66 -0
  30. package/guide/operations/maintain/index.md +47 -0
  31. package/guide/operations/maintain/rebuild.md +86 -0
  32. package/guide/operations/validate.md +48 -0
  33. package/guide/substrate/index.md +47 -0
  34. package/guide/substrate/operators.md +96 -0
  35. package/guide/substrate/tiered-ai.md +363 -0
  36. package/guide/ux/index.md +44 -0
  37. package/guide/ux/preflight.md +150 -0
  38. package/guide/ux/user-intent.md +135 -0
  39. package/package.json +55 -0
  40. package/scripts/cli.mjs +893 -0
  41. package/scripts/commands/remote.mjs +93 -0
  42. package/scripts/commands/review.mjs +253 -0
  43. package/scripts/commands/sync.mjs +84 -0
  44. package/scripts/lib/chunk.mjs +421 -0
  45. package/scripts/lib/cluster-detect.mjs +516 -0
  46. package/scripts/lib/decision-log.mjs +343 -0
  47. package/scripts/lib/draft.mjs +158 -0
  48. package/scripts/lib/embeddings.mjs +366 -0
  49. package/scripts/lib/frontmatter.mjs +497 -0
  50. package/scripts/lib/git-commands.mjs +155 -0
  51. package/scripts/lib/git.mjs +486 -0
  52. package/scripts/lib/gitignore.mjs +62 -0
  53. package/scripts/lib/history.mjs +331 -0
  54. package/scripts/lib/indices.mjs +510 -0
  55. package/scripts/lib/ingest.mjs +258 -0
  56. package/scripts/lib/intent.mjs +713 -0
  57. package/scripts/lib/interactive.mjs +99 -0
  58. package/scripts/lib/migrate.mjs +126 -0
  59. package/scripts/lib/nest-applier.mjs +260 -0
  60. package/scripts/lib/operators.mjs +1365 -0
  61. package/scripts/lib/orchestrator.mjs +718 -0
  62. package/scripts/lib/paths.mjs +197 -0
  63. package/scripts/lib/preflight.mjs +213 -0
  64. package/scripts/lib/provenance.mjs +672 -0
  65. package/scripts/lib/quality-metric.mjs +269 -0
  66. package/scripts/lib/query-fixture.mjs +71 -0
  67. package/scripts/lib/rollback.mjs +95 -0
  68. package/scripts/lib/shape-check.mjs +172 -0
  69. package/scripts/lib/similarity-cache.mjs +126 -0
  70. package/scripts/lib/similarity.mjs +230 -0
  71. package/scripts/lib/snapshot.mjs +54 -0
  72. package/scripts/lib/source-frontmatter.mjs +85 -0
  73. package/scripts/lib/tier2-protocol.mjs +470 -0
  74. package/scripts/lib/tiered.mjs +453 -0
  75. package/scripts/lib/validate.mjs +362 -0
@@ -0,0 +1,453 @@
1
+ // tiered.mjs — the escalation orchestrator for the tiered AI ladder.
2
+ //
3
+ // Every similarity decision (classify, MERGE/DECOMPOSE/NEST
4
+ // detection in operator-convergence, cluster-name at NEST time,
5
+ // Rebuild plan-review, Join id-collision) flows through this
6
+ // module. It runs Tier 0 (TF-IDF), escalates to Tier 1 (local
7
+ // MiniLM embeddings — now a required dep) on mid-band results, and
8
+ // escalates to Tier 2 (sub-agent, via exit-7 handshake) only for
9
+ // the residual ambiguous cases. A similarity-cache hit short-
10
+ // circuits the whole ladder.
11
+ //
12
+ // Three quality modes, selected via --quality-mode or the
13
+ // LLM_WIKI_QUALITY_MODE env var:
14
+ //
15
+ // tiered-fast (default):
16
+ // Tier 0 → Tier 1 → Tier 2, the full ladder. Mid-band Tier 0
17
+ // escalates to Tier 1; mid-band Tier 1 escalates to Tier 2.
18
+ //
19
+ // claude-first:
20
+ // Tier 0 is still consulted for decisive cases (saves tokens on
21
+ // obvious decisions) but anything in the Tier 0 mid-band goes
22
+ // straight to Tier 2, skipping Tier 1.
23
+ //
24
+ // tier0-only:
25
+ // Tier 0 decisions only. Mid-band becomes an explicit
26
+ // "undecidable" marker that the caller must resolve manually.
27
+ //
28
+ // Tier 2 escalation contract: the skill's CLI runs under Node with
29
+ // no access to Claude Code's `Agent` tool, so it cannot spawn
30
+ // sub-agents directly. Instead, Tier 2 requests are accumulated in
31
+ // a per-batch pending queue (tier2-protocol.mjs). When a phase
32
+ // finishes, the caller writes the batch to
33
+ // `<wiki>/.work/tier2/pending-<batch-id>.json` and the CLI exits
34
+ // with code 7 (NEEDS_TIER2). The wiki-runner sub-agent spawns one
35
+ // `Agent` per request, writes the responses back, and re-invokes
36
+ // the CLI. On resume `tiered.decide` reads the responses from the
37
+ // fixture/response-map and returns inline.
38
+ //
39
+ // Test hermeticity: `LLM_WIKI_TIER2_FIXTURE=<path>` wires a
40
+ // pre-canned fixture into the decide() path so unit/e2e tests can
41
+ // drive Tier 2 decisions without exit-7.
42
+
43
+ import { createHash } from "node:crypto";
44
+ import { appendDecision } from "./decision-log.mjs";
45
+ import {
46
+ embed,
47
+ embeddingCosine,
48
+ TIER1_DECISIVE_DIFFERENT,
49
+ TIER1_DECISIVE_SAME,
50
+ } from "./embeddings.mjs";
51
+ import { readCached, writeCached } from "./similarity-cache.mjs";
52
+ import {
53
+ compareEntries,
54
+ entryText,
55
+ TIER0_DECISIVE_DIFFERENT,
56
+ TIER0_DECISIVE_SAME,
57
+ } from "./similarity.mjs";
58
+ import {
59
+ loadFixture,
60
+ makeRequest,
61
+ resolveFromFixture,
62
+ } from "./tier2-protocol.mjs";
63
+
64
+ export const QUALITY_MODES = Object.freeze([
65
+ "tiered-fast",
66
+ "claude-first",
67
+ "tier0-only",
68
+ ]);
69
+
70
+ export const DEFAULT_QUALITY_MODE = "tiered-fast";
71
+
72
+ export function resolveQualityMode(flags = {}) {
73
+ const fromFlag = flags.quality_mode;
74
+ const fromEnv = process.env.LLM_WIKI_QUALITY_MODE;
75
+ const raw = fromFlag || fromEnv || DEFAULT_QUALITY_MODE;
76
+ if (!QUALITY_MODES.includes(raw)) {
77
+ throw new Error(
78
+ `tiered: unknown quality mode "${raw}" (valid: ${QUALITY_MODES.join(", ")})`,
79
+ );
80
+ }
81
+ return raw;
82
+ }
83
+
84
+ // ── Tier 2 pending queue ────────────────────────────────────────────
85
+ //
86
+ // A lightweight per-wiki queue of Tier 2 requests that the caller
87
+ // accumulates during a phase. When the phase finishes, the caller
88
+ // drains the queue and writes the batch via tier2-protocol. The
89
+ // queue is a module-level Map keyed by wikiRoot so multiple
90
+ // operations in the same process (tests) don't collide.
91
+
92
+ const PENDING_QUEUES = new Map();
93
+
94
+ export function takePendingRequests(wikiRoot) {
95
+ const list = PENDING_QUEUES.get(wikiRoot) || [];
96
+ PENDING_QUEUES.delete(wikiRoot);
97
+ return list;
98
+ }
99
+
100
+ export function countPendingRequests(wikiRoot) {
101
+ return (PENDING_QUEUES.get(wikiRoot) || []).length;
102
+ }
103
+
104
+ export function _resetPendingQueues() {
105
+ PENDING_QUEUES.clear();
106
+ }
107
+
108
+ export function enqueuePending(wikiRoot, request) {
109
+ if (!PENDING_QUEUES.has(wikiRoot)) {
110
+ PENDING_QUEUES.set(wikiRoot, []);
111
+ }
112
+ // Dedup by request_id — same question asked twice answers once.
113
+ const list = PENDING_QUEUES.get(wikiRoot);
114
+ if (list.some((r) => r.request_id === request.request_id)) return;
115
+ list.push(request);
116
+ }
117
+
118
+ // Backwards-compatible seam used by operators.mjs to push
119
+ // cluster_name requests onto the shared queue.
120
+ export const _appendPending = enqueuePending;
121
+
122
+ // ── Resolved-response cache ────────────────────────────────────────
123
+ //
124
+ // Once the wiki-runner has written responses, we load them once at
125
+ // phase-resume and then queries into this Map return the resolved
126
+ // value inline. Unit tests can seed via `seedTier2Responses`.
127
+
128
+ const RESOLVED_RESPONSES = new Map();
129
+
130
+ export function seedTier2Responses(wikiRoot, map) {
131
+ RESOLVED_RESPONSES.set(wikiRoot, map);
132
+ }
133
+
134
+ export function clearTier2Responses(wikiRoot) {
135
+ RESOLVED_RESPONSES.delete(wikiRoot);
136
+ }
137
+
138
+ function resolvedResponseFor(wikiRoot, requestId) {
139
+ const m = RESOLVED_RESPONSES.get(wikiRoot);
140
+ if (!m) return undefined;
141
+ return m.get(requestId);
142
+ }
143
+
144
+ // Public lookup used by operators.mjs's cluster path to check
145
+ // whether a naming request has already been answered by a prior
146
+ // wiki-runner response (seeded via seedTier2Responses during
147
+ // resume).
148
+ export function getResolvedResponse(wikiRoot, requestId) {
149
+ return resolvedResponseFor(wikiRoot, requestId);
150
+ }
151
+
152
+ // Content-address a pair of entries by hashing their text. Used as
153
+ // the cache key so neither the ids nor paths influence symmetry.
154
+ function entryHash(data) {
155
+ const text = entryText(data);
156
+ return "sha256:" + createHash("sha256").update(text).digest("hex");
157
+ }
158
+
159
+ // The main decision entry point. Takes two entries, the shared
160
+ // context (for Tier 0 IDF), and options carrying the wiki root,
161
+ // op-id, operator name, and quality mode. Returns
162
+ //
163
+ // { tier, similarity, decision, confidence_band, reason }
164
+ //
165
+ // where `tier` is 0, 1, or 2 reflecting the ladder step that
166
+ // produced the final decision (cache hits report the tier of the
167
+ // cached decision), and `decision` is "same" | "different" |
168
+ // "undecidable" | "pending-tier2".
169
+ //
170
+ // `pending-tier2` means the decision was escalated to Tier 2 and
171
+ // the response isn't available yet. The caller must queue the
172
+ // request via the pending queue and trigger exit-7 at the end of
173
+ // the current phase. On the re-invocation after the wiki-runner
174
+ // writes responses, `decide` will find the answer in the resolved-
175
+ // responses map and return it as a regular `tier=2` decision.
176
+ export async function decide(
177
+ a,
178
+ b,
179
+ contextEntries,
180
+ options = {},
181
+ ) {
182
+ const {
183
+ wikiRoot,
184
+ opId,
185
+ operator,
186
+ qualityMode = DEFAULT_QUALITY_MODE,
187
+ writeLog = true,
188
+ readCache = true,
189
+ writeCache = true,
190
+ tier2Handler = null, // legacy custom handler for unit tests
191
+ } = options;
192
+ if (!wikiRoot) {
193
+ throw new Error("tiered.decide requires { wikiRoot }");
194
+ }
195
+ if (!operator) {
196
+ throw new Error("tiered.decide requires { operator }");
197
+ }
198
+ if (!QUALITY_MODES.includes(qualityMode)) {
199
+ throw new Error(`tiered: unknown quality mode "${qualityMode}"`);
200
+ }
201
+
202
+ const hashA = entryHash(a);
203
+ const hashB = entryHash(b);
204
+
205
+ // Cache short-circuit.
206
+ if (readCache) {
207
+ const cached = readCached(wikiRoot, hashA, hashB);
208
+ if (cached) {
209
+ return {
210
+ tier: cached.tier,
211
+ similarity: cached.similarity,
212
+ decision: cached.decision,
213
+ confidence_band: cached.confidence_band ?? "cached",
214
+ reason: "cached",
215
+ };
216
+ }
217
+ }
218
+
219
+ // Tier 0 — always consulted. Cheap and deterministic.
220
+ const t0 = compareEntries(a, b, contextEntries, {
221
+ precomputedModel: options.precomputedModel ?? null,
222
+ });
223
+ if (t0.decision === "undecidable") {
224
+ const result = {
225
+ tier: 0,
226
+ similarity: t0.similarity,
227
+ decision: "undecidable",
228
+ confidence_band: t0.confidence_band,
229
+ reason: t0.reason,
230
+ };
231
+ finaliseDecision(result, { a, b, hashA, hashB, wikiRoot, opId, operator, writeLog, writeCache });
232
+ return result;
233
+ }
234
+ if (t0.decision !== "escalate") {
235
+ const result = {
236
+ tier: 0,
237
+ similarity: t0.similarity,
238
+ decision: t0.decision,
239
+ confidence_band: t0.confidence_band,
240
+ reason: null,
241
+ };
242
+ finaliseDecision(result, { a, b, hashA, hashB, wikiRoot, opId, operator, writeLog, writeCache });
243
+ return result;
244
+ }
245
+
246
+ // Mid-band Tier 0 → escalate. Behaviour depends on quality mode.
247
+ if (qualityMode === "tier0-only") {
248
+ const result = {
249
+ tier: 0,
250
+ similarity: t0.similarity,
251
+ decision: "undecidable",
252
+ confidence_band: t0.confidence_band,
253
+ reason: "tier0-only quality mode — mid-band left unresolved",
254
+ };
255
+ finaliseDecision(result, { a, b, hashA, hashB, wikiRoot, opId, operator, writeLog, writeCache });
256
+ return result;
257
+ }
258
+
259
+ if (qualityMode === "claude-first") {
260
+ // Skip Tier 1 entirely, go straight to Tier 2.
261
+ return await escalateToTier2(
262
+ a, b, hashA, hashB, wikiRoot, opId, operator,
263
+ t0.similarity, "claude-first mode", writeLog, writeCache,
264
+ tier2Handler, t0, null,
265
+ );
266
+ }
267
+
268
+ // tiered-fast — try Tier 1. We USED to eagerly call `ensureTier1`
269
+ // here before the embed() Promise.all, but that triggered the
270
+ // @xenova/transformers dynamic import (and its `[tier1-debug]
271
+ // loading Tier 1 model` breadcrumb) on every mid-band pair even
272
+ // when BOTH embeddings were already warm in the on-disk cache.
273
+ // `embed()` already short-circuits on a cache hit without touching
274
+ // the loader, and also raises a clear "Tier 1 failed to load"
275
+ // error on a cache miss when the module can't import, so the
276
+ // eager precheck was pure overhead. Drop it and let `embed()`
277
+ // surface the same hard error on the actual miss path.
278
+ //
279
+ // Net effect: a resume cycle with a warm embedding cache now
280
+ // never loads the Tier 1 model at all, matching the Tier 1 lazy-
281
+ // load contract documented in `guide/tiered-ai.md`.
282
+ const textA = entryText(a);
283
+ const textB = entryText(b);
284
+ const [vecA, vecB] = await Promise.all([
285
+ embed(wikiRoot, textA),
286
+ embed(wikiRoot, textB),
287
+ ]);
288
+ const sim = embeddingCosine(vecA, vecB);
289
+ if (sim >= TIER1_DECISIVE_SAME) {
290
+ const result = {
291
+ tier: 1,
292
+ similarity: sim,
293
+ decision: "same",
294
+ confidence_band: "decisive-same",
295
+ reason: null,
296
+ };
297
+ finaliseDecision(result, { a, b, hashA, hashB, wikiRoot, opId, operator, writeLog, writeCache });
298
+ return result;
299
+ }
300
+ if (sim <= TIER1_DECISIVE_DIFFERENT) {
301
+ const result = {
302
+ tier: 1,
303
+ similarity: sim,
304
+ decision: "different",
305
+ confidence_band: "decisive-different",
306
+ reason: null,
307
+ };
308
+ finaliseDecision(result, { a, b, hashA, hashB, wikiRoot, opId, operator, writeLog, writeCache });
309
+ return result;
310
+ }
311
+ // Mid-band Tier 1 → Tier 2.
312
+ return await escalateToTier2(
313
+ a, b, hashA, hashB, wikiRoot, opId, operator,
314
+ sim, "tier1 mid-band", writeLog, writeCache,
315
+ tier2Handler, t0, { similarity: sim },
316
+ );
317
+ }
318
+
319
+ // ── Tier 2 escalation ──────────────────────────────────────────────
320
+ //
321
+ // Three paths, in priority order:
322
+ //
323
+ // 1. Unit-test `tier2Handler` option — a callback that runs
324
+ // inline and returns the decision. Used by tests that want to
325
+ // assert the escalation path fires with specific context.
326
+ // 2. LLM_WIKI_TIER2_FIXTURE — a fixture JSON file that pre-
327
+ // resolves request_ids. Used by e2e tests.
328
+ // 3. Resolved-responses map seeded via seedTier2Responses() — the
329
+ // runtime-resume path: the wiki-runner wrote responses after a
330
+ // previous exit-7 and the orchestrator seeded them for the re-
331
+ // invocation.
332
+ // 4. Otherwise: build a merge_decision request, enqueue it, and
333
+ // return `{ decision: "pending-tier2" }`. The caller's phase
334
+ // handler propagates this up to drain the queue and exit 7.
335
+ async function escalateToTier2(
336
+ a, b, hashA, hashB, wikiRoot, opId, operator,
337
+ similarity, reason, writeLog, writeCache,
338
+ tier2Handler, t0, t1,
339
+ ) {
340
+ // Path 1: unit-test inline handler.
341
+ if (tier2Handler) {
342
+ const t2 = await tier2Handler({ a, b, t0, t1, reason });
343
+ const result = {
344
+ tier: 2,
345
+ similarity: t2.similarity ?? similarity,
346
+ decision: t2.decision,
347
+ confidence_band: t2.confidence_band ?? "claude-resolved",
348
+ reason: t2.reason ?? reason,
349
+ };
350
+ finaliseDecision(result, { a, b, hashA, hashB, wikiRoot, opId, operator, writeLog, writeCache });
351
+ return result;
352
+ }
353
+
354
+ // Build the merge_decision request. The request_id is
355
+ // deterministic in the pair of entry texts, so the same pair
356
+ // asked twice produces the same id and the wiki-runner only
357
+ // answers it once.
358
+ const request = makeRequest("merge_decision", {
359
+ prompt:
360
+ "Are these two frontmatter blobs describing the SAME concept " +
361
+ "(for MERGE), DIFFERENT concepts, or is it unclear? Answer " +
362
+ "one of 'same' / 'different' / 'undecidable' with a one-line reason.",
363
+ inputs: { a, b, operator, tier0_similarity: t0.similarity, tier1_similarity: t1?.similarity ?? null },
364
+ });
365
+
366
+ // Path 2: fixture file.
367
+ const fixture = loadFixture();
368
+ if (fixture) {
369
+ const resp = resolveFromFixture(fixture, request);
370
+ if (resp && typeof resp.decision === "string") {
371
+ const result = {
372
+ tier: 2,
373
+ similarity: resp.similarity ?? similarity,
374
+ decision: resp.decision,
375
+ confidence_band: "fixture-resolved",
376
+ reason: resp.reason ?? "fixture",
377
+ };
378
+ finaliseDecision(result, { a, b, hashA, hashB, wikiRoot, opId, operator, writeLog, writeCache });
379
+ return result;
380
+ }
381
+ }
382
+
383
+ // Path 3: runtime-resolved response seeded from a previous
384
+ // exit-7 + wiki-runner cycle.
385
+ const resolved = resolvedResponseFor(wikiRoot, request.request_id);
386
+ if (resolved && typeof resolved.decision === "string") {
387
+ const result = {
388
+ tier: 2,
389
+ similarity: resolved.similarity ?? similarity,
390
+ decision: resolved.decision,
391
+ confidence_band: "runner-resolved",
392
+ reason: resolved.reason ?? "wiki-runner",
393
+ };
394
+ finaliseDecision(result, { a, b, hashA, hashB, wikiRoot, opId, operator, writeLog, writeCache });
395
+ return result;
396
+ }
397
+
398
+ // Path 4: queue the request for the wiki-runner and return
399
+ // pending-tier2. The caller's phase handler will drain the
400
+ // queue, write a batch file, and exit 7.
401
+ enqueuePending(wikiRoot, request);
402
+ return {
403
+ tier: 2,
404
+ similarity,
405
+ decision: "pending-tier2",
406
+ confidence_band: "tier2-queued",
407
+ reason,
408
+ request_id: request.request_id,
409
+ };
410
+ }
411
+
412
+ // Side-effects: write the decision log + the pairwise cache.
413
+ function finaliseDecision(result, ctx) {
414
+ const {
415
+ a, b, hashA, hashB, wikiRoot, opId, operator, writeLog, writeCache: wc,
416
+ } = ctx;
417
+ if (result.decision === "pending-tier2") return; // never cache pending state
418
+ if (wc) {
419
+ writeCached(wikiRoot, hashA, hashB, result);
420
+ }
421
+ if (writeLog && opId) {
422
+ appendDecision(wikiRoot, {
423
+ op_id: opId,
424
+ operator,
425
+ sources: [a?.id ?? "anonymous-a", b?.id ?? "anonymous-b"],
426
+ tier_used: result.tier,
427
+ similarity: result.similarity,
428
+ confidence_band: result.confidence_band,
429
+ decision: result.decision,
430
+ reason: result.reason,
431
+ });
432
+ }
433
+ }
434
+
435
+ // Legacy compat: the old handler lives on as a stub that returns
436
+ // undecidable, in case a test imports it directly. New code should
437
+ // use the exit-7 protocol.
438
+ export async function defaultTier2Handler({ t0, reason }) {
439
+ return {
440
+ decision: "undecidable",
441
+ similarity: t0.similarity,
442
+ confidence_band: "tier2-stub",
443
+ reason: `legacy stub — ${reason}; use tier2-protocol.mjs for real escalation`,
444
+ };
445
+ }
446
+
447
+ // Re-export thresholds for convenience.
448
+ export {
449
+ TIER0_DECISIVE_SAME,
450
+ TIER0_DECISIVE_DIFFERENT,
451
+ TIER1_DECISIVE_SAME,
452
+ TIER1_DECISIVE_DIFFERENT,
453
+ };