@pella-labs/pinakes 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/README.md +208 -0
  2. package/dist/cli/audit.d.ts +30 -0
  3. package/dist/cli/audit.d.ts.map +1 -0
  4. package/dist/cli/audit.js +49 -0
  5. package/dist/cli/audit.js.map +1 -0
  6. package/dist/cli/export.d.ts +32 -0
  7. package/dist/cli/export.d.ts.map +1 -0
  8. package/dist/cli/export.js +73 -0
  9. package/dist/cli/export.js.map +1 -0
  10. package/dist/cli/import.d.ts +24 -0
  11. package/dist/cli/import.d.ts.map +1 -0
  12. package/dist/cli/import.js +96 -0
  13. package/dist/cli/import.js.map +1 -0
  14. package/dist/cli/index.d.ts +3 -0
  15. package/dist/cli/index.d.ts.map +1 -0
  16. package/dist/cli/index.js +172 -0
  17. package/dist/cli/index.js.map +1 -0
  18. package/dist/cli/purge.d.ts +23 -0
  19. package/dist/cli/purge.d.ts.map +1 -0
  20. package/dist/cli/purge.js +57 -0
  21. package/dist/cli/purge.js.map +1 -0
  22. package/dist/cli/rebuild.d.ts +54 -0
  23. package/dist/cli/rebuild.d.ts.map +1 -0
  24. package/dist/cli/rebuild.js +113 -0
  25. package/dist/cli/rebuild.js.map +1 -0
  26. package/dist/cli/serve.d.ts +49 -0
  27. package/dist/cli/serve.d.ts.map +1 -0
  28. package/dist/cli/serve.js +296 -0
  29. package/dist/cli/serve.js.map +1 -0
  30. package/dist/cli/status.d.ts +39 -0
  31. package/dist/cli/status.d.ts.map +1 -0
  32. package/dist/cli/status.js +108 -0
  33. package/dist/cli/status.js.map +1 -0
  34. package/dist/db/client.d.ts +109 -0
  35. package/dist/db/client.d.ts.map +1 -0
  36. package/dist/db/client.js +175 -0
  37. package/dist/db/client.js.map +1 -0
  38. package/dist/db/repository.d.ts +82 -0
  39. package/dist/db/repository.d.ts.map +1 -0
  40. package/dist/db/repository.js +173 -0
  41. package/dist/db/repository.js.map +1 -0
  42. package/dist/db/schema.d.ts +990 -0
  43. package/dist/db/schema.d.ts.map +1 -0
  44. package/dist/db/schema.js +259 -0
  45. package/dist/db/schema.js.map +1 -0
  46. package/dist/db/types.d.ts +28 -0
  47. package/dist/db/types.d.ts.map +1 -0
  48. package/dist/db/types.js +11 -0
  49. package/dist/db/types.js.map +1 -0
  50. package/dist/gaps/detector.d.ts +67 -0
  51. package/dist/gaps/detector.d.ts.map +1 -0
  52. package/dist/gaps/detector.js +160 -0
  53. package/dist/gaps/detector.js.map +1 -0
  54. package/dist/gate/budget.d.ts +90 -0
  55. package/dist/gate/budget.d.ts.map +1 -0
  56. package/dist/gate/budget.js +145 -0
  57. package/dist/gate/budget.js.map +1 -0
  58. package/dist/ingest/chokidar.d.ts +33 -0
  59. package/dist/ingest/chokidar.d.ts.map +1 -0
  60. package/dist/ingest/chokidar.js +152 -0
  61. package/dist/ingest/chokidar.js.map +1 -0
  62. package/dist/ingest/ingester.d.ts +117 -0
  63. package/dist/ingest/ingester.d.ts.map +1 -0
  64. package/dist/ingest/ingester.js +312 -0
  65. package/dist/ingest/ingester.js.map +1 -0
  66. package/dist/ingest/manifest.d.ts +87 -0
  67. package/dist/ingest/manifest.d.ts.map +1 -0
  68. package/dist/ingest/manifest.js +223 -0
  69. package/dist/ingest/manifest.js.map +1 -0
  70. package/dist/ingest/memory-store.d.ts +55 -0
  71. package/dist/ingest/memory-store.d.ts.map +1 -0
  72. package/dist/ingest/memory-store.js +94 -0
  73. package/dist/ingest/memory-store.js.map +1 -0
  74. package/dist/ingest/parse/chunk.d.ts +15 -0
  75. package/dist/ingest/parse/chunk.d.ts.map +1 -0
  76. package/dist/ingest/parse/chunk.js +88 -0
  77. package/dist/ingest/parse/chunk.js.map +1 -0
  78. package/dist/ingest/parse/markdown.d.ts +64 -0
  79. package/dist/ingest/parse/markdown.d.ts.map +1 -0
  80. package/dist/ingest/parse/markdown.js +152 -0
  81. package/dist/ingest/parse/markdown.js.map +1 -0
  82. package/dist/ingest/queue.d.ts +21 -0
  83. package/dist/ingest/queue.d.ts.map +1 -0
  84. package/dist/ingest/queue.js +24 -0
  85. package/dist/ingest/queue.js.map +1 -0
  86. package/dist/ingest/source.d.ts +42 -0
  87. package/dist/ingest/source.d.ts.map +1 -0
  88. package/dist/ingest/source.js +19 -0
  89. package/dist/ingest/source.js.map +1 -0
  90. package/dist/mcp/envelope.d.ts +73 -0
  91. package/dist/mcp/envelope.d.ts.map +1 -0
  92. package/dist/mcp/envelope.js +46 -0
  93. package/dist/mcp/envelope.js.map +1 -0
  94. package/dist/mcp/tools/execute.d.ts +55 -0
  95. package/dist/mcp/tools/execute.d.ts.map +1 -0
  96. package/dist/mcp/tools/execute.js +232 -0
  97. package/dist/mcp/tools/execute.js.map +1 -0
  98. package/dist/mcp/tools/search.d.ts +53 -0
  99. package/dist/mcp/tools/search.d.ts.map +1 -0
  100. package/dist/mcp/tools/search.js +114 -0
  101. package/dist/mcp/tools/search.js.map +1 -0
  102. package/dist/observability/audit.d.ts +25 -0
  103. package/dist/observability/audit.d.ts.map +1 -0
  104. package/dist/observability/audit.js +38 -0
  105. package/dist/observability/audit.js.map +1 -0
  106. package/dist/observability/logger.d.ts +4 -0
  107. package/dist/observability/logger.d.ts.map +1 -0
  108. package/dist/observability/logger.js +56 -0
  109. package/dist/observability/logger.js.map +1 -0
  110. package/dist/observability/metrics.d.ts +38 -0
  111. package/dist/observability/metrics.d.ts.map +1 -0
  112. package/dist/observability/metrics.js +64 -0
  113. package/dist/observability/metrics.js.map +1 -0
  114. package/dist/retrieval/embedder.d.ts +130 -0
  115. package/dist/retrieval/embedder.d.ts.map +1 -0
  116. package/dist/retrieval/embedder.js +278 -0
  117. package/dist/retrieval/embedder.js.map +1 -0
  118. package/dist/retrieval/fts.d.ts +42 -0
  119. package/dist/retrieval/fts.d.ts.map +1 -0
  120. package/dist/retrieval/fts.js +46 -0
  121. package/dist/retrieval/fts.js.map +1 -0
  122. package/dist/retrieval/hybrid.d.ts +43 -0
  123. package/dist/retrieval/hybrid.d.ts.map +1 -0
  124. package/dist/retrieval/hybrid.js +120 -0
  125. package/dist/retrieval/hybrid.js.map +1 -0
  126. package/dist/retrieval/vec.d.ts +39 -0
  127. package/dist/retrieval/vec.d.ts.map +1 -0
  128. package/dist/retrieval/vec.js +50 -0
  129. package/dist/retrieval/vec.js.map +1 -0
  130. package/dist/sandbox/bindings/budget.d.ts +10 -0
  131. package/dist/sandbox/bindings/budget.d.ts.map +1 -0
  132. package/dist/sandbox/bindings/budget.js +44 -0
  133. package/dist/sandbox/bindings/budget.js.map +1 -0
  134. package/dist/sandbox/bindings/install.d.ts +23 -0
  135. package/dist/sandbox/bindings/install.d.ts.map +1 -0
  136. package/dist/sandbox/bindings/install.js +15 -0
  137. package/dist/sandbox/bindings/install.js.map +1 -0
  138. package/dist/sandbox/bindings/kg.d.ts +29 -0
  139. package/dist/sandbox/bindings/kg.d.ts.map +1 -0
  140. package/dist/sandbox/bindings/kg.js +323 -0
  141. package/dist/sandbox/bindings/kg.js.map +1 -0
  142. package/dist/sandbox/bindings/logger.d.ts +11 -0
  143. package/dist/sandbox/bindings/logger.d.ts.map +1 -0
  144. package/dist/sandbox/bindings/logger.js +33 -0
  145. package/dist/sandbox/bindings/logger.js.map +1 -0
  146. package/dist/sandbox/bindings/write.d.ts +34 -0
  147. package/dist/sandbox/bindings/write.d.ts.map +1 -0
  148. package/dist/sandbox/bindings/write.js +195 -0
  149. package/dist/sandbox/bindings/write.js.map +1 -0
  150. package/dist/sandbox/executor.d.ts +68 -0
  151. package/dist/sandbox/executor.d.ts.map +1 -0
  152. package/dist/sandbox/executor.js +280 -0
  153. package/dist/sandbox/executor.js.map +1 -0
  154. package/dist/sandbox/helpers.d.ts +26 -0
  155. package/dist/sandbox/helpers.d.ts.map +1 -0
  156. package/dist/sandbox/helpers.js +131 -0
  157. package/dist/sandbox/helpers.js.map +1 -0
  158. package/dist/sandbox/pool.d.ts +63 -0
  159. package/dist/sandbox/pool.d.ts.map +1 -0
  160. package/dist/sandbox/pool.js +98 -0
  161. package/dist/sandbox/pool.js.map +1 -0
  162. package/dist/sandbox/vendored-codemode.d.ts +99 -0
  163. package/dist/sandbox/vendored-codemode.d.ts.map +1 -0
  164. package/dist/sandbox/vendored-codemode.js +471 -0
  165. package/dist/sandbox/vendored-codemode.js.map +1 -0
  166. package/dist/server.d.ts +3 -0
  167. package/dist/server.d.ts.map +1 -0
  168. package/dist/server.js +74 -0
  169. package/dist/server.js.map +1 -0
  170. package/dist/spike.d.ts +15 -0
  171. package/dist/spike.d.ts.map +1 -0
  172. package/dist/spike.js +90 -0
  173. package/dist/spike.js.map +1 -0
  174. package/package.json +60 -0
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Phase 1 in-memory document store.
3
+ *
4
+ * This is the spike's substitute for the real SQLite + FTS + vector stack
5
+ * that Phase 2-4 will land. It loads every `*.md` file under a given root
6
+ * directory once at startup, splits each file on blank lines into paragraph
7
+ * chunks, and exposes a substring-based `search()` + id-based `get()`.
8
+ *
9
+ * The store is intentionally dumb — no ranking, no stemming, no tokenizing,
10
+ * no caching beyond the initial load. The whole point of Phase 1 is to
11
+ * prove that the markdown → chunks → tool response round-trip works without
12
+ * getting distracted by retrieval quality. Phase 2 replaces this with the
13
+ * drizzle schema + FTS5 + sqlite-vec layered stack.
14
+ *
15
+ * The only non-trivial contract: chunk ids must be deterministic. Given the
16
+ * same root directory contents, loading the store twice must produce
17
+ * identical ids. Phase 2's ingester relies on this for idempotent upsert.
18
+ */
19
+ export interface Chunk {
20
+ /** Deterministic sha1(`relative_path:index`) — stable across reloads. */
21
+ id: string;
22
+ /** Paragraph text, trimmed. Never empty. */
23
+ text: string;
24
+ /** `file://` URL of the source file. */
25
+ source_uri: string;
26
+ /** Position of this chunk within its source file, 0-based. */
27
+ chunk_index: number;
28
+ }
29
+ export declare class MemoryStore {
30
+ private readonly rootDir;
31
+ private chunks;
32
+ private byId;
33
+ private constructor();
34
+ /**
35
+ * Construct and populate a store from the given wiki root directory.
36
+ * Reads every `*.md` file recursively, splits on blank lines, and
37
+ * indexes into a flat chunks array + id lookup map.
38
+ */
39
+ static load(rootDir: string): Promise<MemoryStore>;
40
+ /**
41
+ * Case-insensitive substring filter over every chunk's text. Returns all
42
+ * matches in insertion order (file order, then chunk order within file).
43
+ *
44
+ * Phase 1 does no ranking — the spike only needs to prove the bindings
45
+ * pipeline. Phase 4 replaces this with FTS5 + RRF.
46
+ */
47
+ search(query: string): Chunk[];
48
+ /** Exact lookup by id. Returns `null` if not found (never throws). */
49
+ get(id: string): Chunk | null;
50
+ /** Total number of indexed chunks. */
51
+ size(): number;
52
+ /** The resolved absolute path this store was loaded from. */
53
+ root(): string;
54
+ }
55
+ //# sourceMappingURL=memory-store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"memory-store.d.ts","sourceRoot":"","sources":["../../src/ingest/memory-store.ts"],"names":[],"mappings":"AAKA;;;;;;;;;;;;;;;;;GAiBG;AAEH,MAAM,WAAW,KAAK;IACpB,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;IACX,4CAA4C;IAC5C,IAAI,EAAE,MAAM,CAAC;IACb,wCAAwC;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,8DAA8D;IAC9D,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,qBAAa,WAAW;IAIF,OAAO,CAAC,QAAQ,CAAC,OAAO;IAH5C,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,IAAI,CAA4B;IAExC,OAAO;IAEP;;;;OAIG;WACU,IAAI,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAuBxD;;;;;;OAMG;IACH,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,KAAK,EAAE;IAM9B,sEAAsE;IACtE,GAAG,CAAC,EAAE,EAAE,MAAM,GAAG,KAAK,GAAG,IAAI;IAI7B,sCAAsC;IACtC,IAAI,IAAI,MAAM;IAId,6DAA6D;IAC7D,IAAI,IAAI,MAAM;CAGf"}
@@ -0,0 +1,94 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { readdir, readFile } from 'node:fs/promises';
3
+ import { resolve, relative } from 'node:path';
4
+ import { pathToFileURL } from 'node:url';
5
+ export class MemoryStore {
6
+ rootDir;
7
+ chunks = [];
8
+ byId = new Map();
9
+ constructor(rootDir) {
10
+ this.rootDir = rootDir;
11
+ }
12
+ /**
13
+ * Construct and populate a store from the given wiki root directory.
14
+ * Reads every `*.md` file recursively, splits on blank lines, and
15
+ * indexes into a flat chunks array + id lookup map.
16
+ */
17
+ static async load(rootDir) {
18
+ const abs = resolve(rootDir);
19
+ const store = new MemoryStore(abs);
20
+ const files = await collectMarkdownFiles(abs);
21
+ for (const file of files) {
22
+ const text = await readFile(file, 'utf8');
23
+ const rel = relative(abs, file);
24
+ const uri = pathToFileURL(file).href;
25
+ const paragraphs = splitParagraphs(text);
26
+ for (let i = 0; i < paragraphs.length; i++) {
27
+ const chunk = {
28
+ id: sha1(`${rel}:${i}`),
29
+ text: paragraphs[i],
30
+ source_uri: uri,
31
+ chunk_index: i,
32
+ };
33
+ store.chunks.push(chunk);
34
+ store.byId.set(chunk.id, chunk);
35
+ }
36
+ }
37
+ return store;
38
+ }
39
+ /**
40
+ * Case-insensitive substring filter over every chunk's text. Returns all
41
+ * matches in insertion order (file order, then chunk order within file).
42
+ *
43
+ * Phase 1 does no ranking — the spike only needs to prove the bindings
44
+ * pipeline. Phase 4 replaces this with FTS5 + RRF.
45
+ */
46
+ search(query) {
47
+ const q = query.toLowerCase();
48
+ if (!q)
49
+ return [];
50
+ return this.chunks.filter((c) => c.text.toLowerCase().includes(q));
51
+ }
52
+ /** Exact lookup by id. Returns `null` if not found (never throws). */
53
+ get(id) {
54
+ return this.byId.get(id) ?? null;
55
+ }
56
+ /** Total number of indexed chunks. */
57
+ size() {
58
+ return this.chunks.length;
59
+ }
60
+ /** The resolved absolute path this store was loaded from. */
61
+ root() {
62
+ return this.rootDir;
63
+ }
64
+ }
65
+ // ============================================================================
66
+ // helpers
67
+ // ============================================================================
68
+ async function collectMarkdownFiles(root) {
69
+ const out = [];
70
+ const entries = await readdir(root, { withFileTypes: true, recursive: true });
71
+ for (const entry of entries) {
72
+ if (!entry.isFile())
73
+ continue;
74
+ if (!entry.name.toLowerCase().endsWith('.md'))
75
+ continue;
76
+ // Node 20+ recursive readdir provides parentPath on each entry
77
+ const parent = entry.parentPath
78
+ ?? entry.path
79
+ ?? root;
80
+ out.push(resolve(parent, entry.name));
81
+ }
82
+ out.sort(); // deterministic order across filesystems
83
+ return out;
84
+ }
85
+ function splitParagraphs(source) {
86
+ return source
87
+ .split(/\r?\n\r?\n+/)
88
+ .map((p) => p.trim())
89
+ .filter((p) => p.length > 0);
90
+ }
91
+ function sha1(input) {
92
+ return createHash('sha1').update(input).digest('hex');
93
+ }
94
+ //# sourceMappingURL=memory-store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"memory-store.js","sourceRoot":"","sources":["../../src/ingest/memory-store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAgCzC,MAAM,OAAO,WAAW;IAIe;IAH7B,MAAM,GAAY,EAAE,CAAC;IACrB,IAAI,GAAG,IAAI,GAAG,EAAiB,CAAC;IAExC,YAAqC,OAAe;QAAf,YAAO,GAAP,OAAO,CAAQ;IAAG,CAAC;IAExD;;;;OAIG;IACH,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,OAAe;QAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QAC7B,MAAM,KAAK,GAAG,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,KAAK,GAAG,MAAM,oBAAoB,CAAC,GAAG,CAAC,CAAC;QAC9C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC1C,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;YAChC,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC;YACrC,MAAM,UAAU,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;YACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3C,MAAM,KAAK,GAAU;oBACnB,EAAE,EAAE,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC;oBACvB,IAAI,EAAE,UAAU,CAAC,CAAC,CAAE;oBACpB,UAAU,EAAE,GAAG;oBACf,WAAW,EAAE,CAAC;iBACf,CAAC;gBACF,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACzB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;YAClC,CAAC;QACH,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,KAAa;QAClB,MAAM,CAAC,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;QAC9B,IAAI,CAAC,CAAC;YAAE,OAAO,EAAE,CAAC;QAClB,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACrE,CAAC;IAED,sEAAsE;IACtE,GAAG,CAAC,EAAU;QACZ,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC;IACnC,CAAC;IAED,sCAAsC;IACtC,IAAI;QACF,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED,6DAA6D;IAC7D,IAAI;QACF,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF;AAED,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E,KAAK,UAAU,oBAAoB,CAAC,IAAY;IAC9C,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,IAAI,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC9E,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAAE,SAAS;QAC9B,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,SAAS;QACxD,+DAA+D;QAC/D,MAAM,MAAM,GAAI,KAA2D,CAAC,UAAU;eAChF,KAAsC,CAAC,IAAI;eAC5C,IAAI,CAAC;QACV,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;IACxC,CAAC;IACD,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,yCAAyC;IACrD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,eAAe,CAAC,MAAc;IACrC,OAAO,MAAM;SACV,KAAK,CAAC,aAAa,CAAC;SACpB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,IAAI,CAAC,KAAa;IACzB,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACxD,CAAC"}
@@ -0,0 +1,15 @@
1
+ export interface Chunk {
2
+ /** Chunk text — paragraphs joined by blank lines, original whitespace preserved */
3
+ text: string;
4
+ /** Cached token count — exposed so the ingester doesn't need to recount */
5
+ token_count: number;
6
+ }
7
+ /**
8
+ * Split a section's content into ~target_tokens-sized chunks on paragraph
9
+ * boundaries. Returns an empty array for input with no non-whitespace content.
10
+ *
11
+ * `targetTokens` defaults to 500. Pass a smaller value in tests if you want
12
+ * to force a section to chunk at a predictable boundary.
13
+ */
14
+ export declare function chunkSection(content: string, targetTokens?: number): Chunk[];
15
+ //# sourceMappingURL=chunk.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunk.d.ts","sourceRoot":"","sources":["../../../src/ingest/parse/chunk.ts"],"names":[],"mappings":"AAoCA,MAAM,WAAW,KAAK;IACpB,mFAAmF;IACnF,IAAI,EAAE,MAAM,CAAC;IACb,2EAA2E;IAC3E,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,MAAM,EAAE,YAAY,GAAE,MAA8B,GAAG,KAAK,EAAE,CAyCnG"}
@@ -0,0 +1,88 @@
1
+ import { countTokens } from '../../gate/budget.js';
2
+ /**
3
+ * Paragraph-aware chunker for KG-MCP Phase 2.
4
+ *
5
+ * Splits a section's body into chunks of approximately `targetTokens` tokens,
6
+ * never breaking a paragraph in half. Tokens are counted via the existing
7
+ * `countTokens()` from `gate/budget.ts`, which uses `js-tiktoken p50k_base`
8
+ * with the long-string fast path (D32 — see CLAUDE.md §API Rules #6 budget math).
9
+ *
10
+ * **Algorithm**:
11
+ * 1. Split the input on blank lines (`\n\n+`) into paragraphs
12
+ * 2. Iterate paragraphs left to right, accumulating into a current chunk
13
+ * 3. If adding the next paragraph would push the chunk past `targetTokens`,
14
+ * flush the current chunk and start a new one with that paragraph
15
+ * 4. A single oversize paragraph that ALONE exceeds `targetTokens` gets
16
+ * its own chunk (rather than being split mid-sentence — the LLM can
17
+ * still query it via FTS5, just slower)
18
+ *
19
+ * **Determinism**: same input → same output. The chunker is pure: no random
20
+ * tie-breaking, no time-based decisions, no environment lookups. The
21
+ * downstream `chunk_sha = sha1(chunk_text)` therefore stays stable across
22
+ * runs, which is the load-bearing assumption for the per-chunk skip-unchanged
23
+ * optimization (CLAUDE.md §Database Rules #3, Loop 6.5 A4).
24
+ *
25
+ * **Why ~500 tokens?** Empirical sweet spot for retrieval: small enough that
26
+ * each chunk is a focused topic, large enough that 1-2 chunks usually answer
27
+ * a query without needing to fetch a whole node. Phase 4's RRF + budget gate
28
+ * tunes around this size; deviating significantly will affect retrieval
29
+ * quality. The actual chunk sizes will fluctuate around this target since
30
+ * we won't break a paragraph — chunks may be smaller (single short paragraph)
31
+ * or larger (single long paragraph).
32
+ */
33
+ const DEFAULT_TARGET_TOKENS = 500;
34
+ /**
35
+ * Split a section's content into ~target_tokens-sized chunks on paragraph
36
+ * boundaries. Returns an empty array for input with no non-whitespace content.
37
+ *
38
+ * `targetTokens` defaults to 500. Pass a smaller value in tests if you want
39
+ * to force a section to chunk at a predictable boundary.
40
+ */
41
+ export function chunkSection(content, targetTokens = DEFAULT_TARGET_TOKENS) {
42
+ const paragraphs = splitParagraphs(content);
43
+ if (paragraphs.length === 0)
44
+ return [];
45
+ const chunks = [];
46
+ let current = [];
47
+ let currentTokens = 0;
48
+ for (const para of paragraphs) {
49
+ const paraTokens = countTokens(para);
50
+ // Edge case: paragraph alone exceeds target — emit it as its own chunk
51
+ // (rather than splitting mid-sentence, which would hurt retrieval and
52
+ // also break round-trip determinism).
53
+ if (paraTokens > targetTokens) {
54
+ if (current.length > 0) {
55
+ chunks.push({ text: current.join('\n\n'), token_count: currentTokens });
56
+ current = [];
57
+ currentTokens = 0;
58
+ }
59
+ chunks.push({ text: para, token_count: paraTokens });
60
+ continue;
61
+ }
62
+ // If adding this paragraph would exceed target, flush current and start fresh.
63
+ if (currentTokens + paraTokens > targetTokens && current.length > 0) {
64
+ chunks.push({ text: current.join('\n\n'), token_count: currentTokens });
65
+ current = [];
66
+ currentTokens = 0;
67
+ }
68
+ current.push(para);
69
+ currentTokens += paraTokens;
70
+ }
71
+ // Flush the trailing chunk.
72
+ if (current.length > 0) {
73
+ chunks.push({ text: current.join('\n\n'), token_count: currentTokens });
74
+ }
75
+ return chunks;
76
+ }
77
+ /**
78
+ * Split a string on blank lines, trimming each paragraph and dropping empty
79
+ * ones. Mirrors the Phase 1 splitParagraphs in memory-store.ts so chunking
80
+ * behavior stays consistent across the swap.
81
+ */
82
+ function splitParagraphs(source) {
83
+ return source
84
+ .split(/\r?\n\r?\n+/)
85
+ .map((p) => p.trim())
86
+ .filter((p) => p.length > 0);
87
+ }
88
+ //# sourceMappingURL=chunk.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunk.js","sourceRoot":"","sources":["../../../src/ingest/parse/chunk.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAEnD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,MAAM,qBAAqB,GAAG,GAAG,CAAC;AASlC;;;;;;GAMG;AACH,MAAM,UAAU,YAAY,CAAC,OAAe,EAAE,eAAuB,qBAAqB;IACxF,MAAM,UAAU,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAC5C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,OAAO,GAAa,EAAE,CAAC;IAC3B,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;QAErC,uEAAuE;QACvE,sEAAsE;QACtE,sCAAsC;QACtC,IAAI,UAAU,GAAG,YAAY,EAAE,CAAC;YAC9B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,WAAW,EAAE,aAAa,EAAE,CAAC,CAAC;gBACxE,OAAO,GAAG,EAAE,CAAC;gBACb,aAAa,GAAG,CAAC,CAAC;YACpB,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC,CAAC;YACrD,SAAS;QACX,CAAC;QAED,+EAA+E;QAC/E,IAAI,aAAa,GAAG,UAAU,GAAG,YAAY,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,WAAW,EAAE,aAAa,EAAE,CAAC,CAAC;YACxE,OAAO,GAAG,EAAE,CAAC;YACb,aAAa,GAAG,CAAC,CAAC;QACpB,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,aAAa,IAAI,UAAU,CAAC;IAC9B,CAAC;IAED,4BAA4B;IAC5B,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,WAAW,EAAE,aAAa,EAAE,CAAC,CAAC;IAC1E,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CAAC,MAAc;IACrC,OAAO,MAAM;SACV,KAAK,CAAC,aAAa,CAAC;SACpB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjC,CAAC"}
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Markdown → section parser for KG-MCP Phase 2.
3
+ *
4
+ * Parses a markdown file into a flat array of `SectionNode` objects, one
5
+ * per ATX heading (`#`, `##`, …) plus an optional pre-heading section for
6
+ * any content above the first heading. The chunker (chunk.ts) then splits
7
+ * each section's `content` into ~500-token chunks.
8
+ *
9
+ * **Why mdast?** Phase 1 used a `\n\n` regex split that lost all heading
10
+ * structure. The Karpathy two-level wiki has nested H1/H2/H3 sections, and
11
+ * the LLM querying via `kg.search` benefits from being able to identify
12
+ * "this chunk lives under H2 'Login flow' which lives under H1 'Authentication'".
13
+ * The `section_path` field captures that hierarchy.
14
+ *
15
+ * **Section content slicing**: we use mdast `position.start.offset` to slice
16
+ * the original source for each section, so the stored content includes the
17
+ * exact original markdown (whitespace, formatting, code fences) — not a
18
+ * re-rendered approximation. This makes round-trip tests trivial: rebuild a
19
+ * file by joining all section contents and you should get back something
20
+ * structurally identical to the input.
21
+ *
22
+ * **Determinism**: same input → same output. Pinned `mdast-util-from-markdown@^2.0.0`
23
+ * + no plugins = stable mdast tree, stable section list, stable downstream
24
+ * chunk ids. Tests verify this by parsing twice and deep-equal-ing the result.
25
+ */
26
+ /**
27
+ * One section of a markdown file. Sections are derived from ATX headings;
28
+ * a `SectionNode { depth: 0 }` is the optional pre-heading content above
29
+ * the first heading.
30
+ */
31
+ export interface SectionNode {
32
+ /** ATX heading hierarchy joined by ` / ` (e.g. `"Authentication / Login flow"`); empty for pre-heading content */
33
+ section_path: string;
34
+ /** The heading text itself (or empty string for pre-heading content) */
35
+ title: string;
36
+ /** Original markdown source for this section (heading + body), preserving whitespace */
37
+ content: string;
38
+ /** Always `'section'` for Phase 2 — Phase 4 may add other kinds (entity, decision) */
39
+ kind: 'section';
40
+ /** ATX depth: 0 = pre-heading, 1 = `#`, 2 = `##`, …, 6 = `######` */
41
+ depth: number;
42
+ }
43
+ /** Confidence level for provenance tracking (Phase 6). */
44
+ export type Confidence = 'extracted' | 'inferred' | 'ambiguous';
45
+ /**
46
+ * Detect confidence from YAML frontmatter in a markdown file.
47
+ *
48
+ * Rules:
49
+ * - `source: haiku` or `source: ai` or `source: ai-generated` → `'inferred'`
50
+ * - `status: ambiguous` or `status: needs-review` or `confidence: ambiguous` → `'ambiguous'`
51
+ * - `confidence: inferred` → `'inferred'`
52
+ * - Otherwise → `'extracted'`
53
+ */
54
+ export declare function detectConfidence(source: string): Confidence;
55
+ /**
56
+ * Parse a markdown source string into a flat list of sections.
57
+ *
58
+ * Sections are emitted in source order (top-to-bottom). Empty pre-heading
59
+ * content (whitespace only) is skipped. Sections with no body (just a
60
+ * heading and nothing after) are still emitted — the chunker handles them
61
+ * by producing zero chunks for that section.
62
+ */
63
+ export declare function parseMarkdown(source: string): SectionNode[];
64
+ //# sourceMappingURL=markdown.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../../src/ingest/parse/markdown.ts"],"names":[],"mappings":"AAiBA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;;;;GAIG;AACH,MAAM,WAAW,WAAW;IAC1B,kHAAkH;IAClH,YAAY,EAAE,MAAM,CAAC;IACrB,wEAAwE;IACxE,KAAK,EAAE,MAAM,CAAC;IACd,wFAAwF;IACxF,OAAO,EAAE,MAAM,CAAC;IAChB,sFAAsF;IACtF,IAAI,EAAE,SAAS,CAAC;IAChB,qEAAqE;IACrE,KAAK,EAAE,MAAM,CAAC;CACf;AAED,0DAA0D;AAC1D,MAAM,MAAM,UAAU,GAAG,WAAW,GAAG,UAAU,GAAG,WAAW,CAAC;AAEhE;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAG,UAAU,CAiB3D;AAuBD;;;;;;;GAOG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,EAAE,CAuF3D"}
@@ -0,0 +1,152 @@
1
+ import { fromMarkdown } from 'mdast-util-from-markdown';
2
+ /**
3
+ * Detect confidence from YAML frontmatter in a markdown file.
4
+ *
5
+ * Rules:
6
+ * - `source: haiku` or `source: ai` or `source: ai-generated` → `'inferred'`
7
+ * - `status: ambiguous` or `status: needs-review` or `confidence: ambiguous` → `'ambiguous'`
8
+ * - `confidence: inferred` → `'inferred'`
9
+ * - Otherwise → `'extracted'`
10
+ */
11
+ export function detectConfidence(source) {
12
+ const fm = parseFrontmatter(source);
13
+ if (!fm)
14
+ return 'extracted';
15
+ // Check explicit confidence field first
16
+ if (fm.confidence === 'inferred')
17
+ return 'inferred';
18
+ if (fm.confidence === 'ambiguous')
19
+ return 'ambiguous';
20
+ // Check source field
21
+ const src = typeof fm.source === 'string' ? fm.source.toLowerCase() : '';
22
+ if (src === 'haiku' || src === 'ai' || src === 'ai-generated')
23
+ return 'inferred';
24
+ // Check status field
25
+ const status = typeof fm.status === 'string' ? fm.status.toLowerCase() : '';
26
+ if (status === 'ambiguous' || status === 'needs-review')
27
+ return 'ambiguous';
28
+ return 'extracted';
29
+ }
30
+ /**
31
+ * Minimal YAML frontmatter parser. Extracts key: value pairs from
32
+ * `---\n...\n---` blocks at the start of a file. No dependency needed.
33
+ */
34
+ function parseFrontmatter(source) {
35
+ if (!source.startsWith('---'))
36
+ return null;
37
+ const endIdx = source.indexOf('\n---', 3);
38
+ if (endIdx === -1)
39
+ return null;
40
+ const block = source.slice(4, endIdx);
41
+ const result = {};
42
+ for (const line of block.split('\n')) {
43
+ const colon = line.indexOf(':');
44
+ if (colon === -1)
45
+ continue;
46
+ const key = line.slice(0, colon).trim();
47
+ const val = line.slice(colon + 1).trim();
48
+ if (key)
49
+ result[key] = val;
50
+ }
51
+ return result;
52
+ }
53
+ /**
54
+ * Parse a markdown source string into a flat list of sections.
55
+ *
56
+ * Sections are emitted in source order (top-to-bottom). Empty pre-heading
57
+ * content (whitespace only) is skipped. Sections with no body (just a
58
+ * heading and nothing after) are still emitted — the chunker handles them
59
+ * by producing zero chunks for that section.
60
+ */
61
+ export function parseMarkdown(source) {
62
+ const tree = fromMarkdown(source);
63
+ const sections = [];
64
+ const rootChildren = tree.children ?? [];
65
+ // Pass 1: collect every top-level heading with its source offset.
66
+ // We only care about headings at the root of the mdast tree — nested
67
+ // headings inside blockquotes or lists are unusual and we treat them
68
+ // as part of the surrounding section's body.
69
+ const headings = [];
70
+ for (const child of rootChildren) {
71
+ if (child.type !== 'heading')
72
+ continue;
73
+ const start = child.position?.start.offset;
74
+ if (typeof start !== 'number')
75
+ continue; // defensive: should always be present
76
+ headings.push({
77
+ depth: child.depth ?? 1,
78
+ title: mdastNodeText(child).trim(),
79
+ startOffset: start,
80
+ });
81
+ }
82
+ // Edge case: no headings at all → one big pre-heading section (if non-empty).
83
+ if (headings.length === 0) {
84
+ if (source.trim().length > 0) {
85
+ sections.push({
86
+ section_path: '',
87
+ title: '',
88
+ content: source,
89
+ kind: 'section',
90
+ depth: 0,
91
+ });
92
+ }
93
+ return sections;
94
+ }
95
+ // Pre-heading content: anything before the first heading.
96
+ const firstStart = headings[0].startOffset;
97
+ if (firstStart > 0) {
98
+ const preContent = source.slice(0, firstStart);
99
+ if (preContent.trim().length > 0) {
100
+ sections.push({
101
+ section_path: '',
102
+ title: '',
103
+ content: preContent,
104
+ kind: 'section',
105
+ depth: 0,
106
+ });
107
+ }
108
+ }
109
+ // Pass 2: walk the heading list, building section_path via a depth stack
110
+ // and slicing content from this heading's offset to the next heading's
111
+ // offset (or EOF for the final section).
112
+ const stack = [];
113
+ for (let i = 0; i < headings.length; i++) {
114
+ const h = headings[i];
115
+ // Pop the stack until the top is a strict ancestor of `h`.
116
+ // This handles unusual cases like H1 → H3 → H2 (the H2 pops the H3
117
+ // but keeps the H1 ancestor).
118
+ while (stack.length > 0 && stack[stack.length - 1].depth >= h.depth) {
119
+ stack.pop();
120
+ }
121
+ stack.push({ depth: h.depth, title: h.title });
122
+ const sectionPath = stack.map((s) => s.title).join(' / ');
123
+ const startOffset = h.startOffset;
124
+ const endOffset = i + 1 < headings.length ? headings[i + 1].startOffset : source.length;
125
+ const content = source.slice(startOffset, endOffset);
126
+ sections.push({
127
+ section_path: sectionPath,
128
+ title: h.title,
129
+ content,
130
+ kind: 'section',
131
+ depth: h.depth,
132
+ });
133
+ }
134
+ return sections;
135
+ }
136
+ /**
137
+ * Recursively concatenate the text content of an mdast node, ignoring
138
+ * formatting. Equivalent to mdast-util-to-string but inlined to avoid the
139
+ * transitive-dep import issue.
140
+ */
141
+ function mdastNodeText(node) {
142
+ if (typeof node.value === 'string')
143
+ return node.value;
144
+ if (!node.children)
145
+ return '';
146
+ let out = '';
147
+ for (const child of node.children) {
148
+ out += mdastNodeText(child);
149
+ }
150
+ return out;
151
+ }
152
+ //# sourceMappingURL=markdown.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../../src/ingest/parse/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAgExD;;;;;;;;GAQG;AACH,MAAM,UAAU,gBAAgB,CAAC,MAAc;IAC7C,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,WAAW,CAAC;IAE5B,wCAAwC;IACxC,IAAI,EAAE,CAAC,UAAU,KAAK,UAAU;QAAE,OAAO,UAAU,CAAC;IACpD,IAAI,EAAE,CAAC,UAAU,KAAK,WAAW;QAAE,OAAO,WAAW,CAAC;IAEtD,qBAAqB;IACrB,MAAM,GAAG,GAAG,OAAO,EAAE,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACzE,IAAI,GAAG,KAAK,OAAO,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,cAAc;QAAE,OAAO,UAAU,CAAC;IAEjF,qBAAqB;IACrB,MAAM,MAAM,GAAG,OAAO,EAAE,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC5E,IAAI,MAAM,KAAK,WAAW,IAAI,MAAM,KAAK,cAAc;QAAE,OAAO,WAAW,CAAC;IAE5E,OAAO,WAAW,CAAC;AACrB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,MAAc;IACtC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC3C,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAC1C,IAAI,MAAM,KAAK,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IAE/B,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;IACtC,MAAM,MAAM,GAA2B,EAAE,CAAC;IAC1C,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACrC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAChC,IAAI,KAAK,KAAK,CAAC,CAAC;YAAE,SAAS;QAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzC,IAAI,GAAG;YAAE,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;IAC7B,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,aAAa,CAAC,MAAc;IAC1C,MAAM,IAAI,GAAG,YAAY,CAAC,MAAM,CAAyB,CAAC;IAC1D,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;IAEzC,kEAAkE;IAClE,qEAAqE;IACrE,qEAAqE;IACrE,6CAA6C;IAC7C,MAAM,QAAQ,GAIT,EAAE,CAAC;IAER,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;QACjC,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS;YAAE,SAAS;QACvC,MAAM,KAAK,GAAG,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,CAAC;QAC3C,IAAI,OAAO,KAAK,KAAK,QAAQ;YAAE,SAAS,CAAC,sCAAsC;QAC/E,QAAQ,CAAC,IAAI,CAAC;YACZ,KAAK,EAAE,KAAK,CAAC,KAAK,IAAI,CAAC;YACvB,KAAK,EAAE,aAAa,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE;YAClC,WAAW,EAAE,KAAK;SACnB,CAAC,CAAC;IACL,CAAC;IAED,8EAA8E;IAC9E,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,QAAQ,CAAC,IAAI,CAAC;gBACZ,YAAY,EAAE,EAAE;gBAChB,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,MAAM;gBACf,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,CAAC;aACT,CAAC,CAAC;QACL,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,0DAA0D;IAC1D,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC,WAAW,CAAC;IAC5C,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;QACnB,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;QAC/C,IAAI,UAAU,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,QAAQ,CAAC,IAAI,CAAC;gBACZ,YAAY,EAAE,EAAE;gBAChB,KAAK,EAAE,EAAE;gBACT,OAAO,EAAE,UAAU;gBACnB,IAAI,EAAE,SAAS;gBACf,KAAK,EAAE,CAAC;aACT,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,yEAAyE;IACzE,uEAAuE;IACvE,yCAAyC;IACzC,MAAM,KAAK,GAA4C,EAAE,CAAC;IAE1D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC;QAEvB,2DAA2D;QAC3D,mEAAmE;QACnE,8BAA8B;QAC9B,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;YACrE,KAAK,CAAC,GAAG,EAAE,CAAC;QACd,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QAE/C,MAAM,WAAW,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAE1D,MAAM,WAAW,GAAG,CAAC,CAAC,WAAW,CAAC;QAClC,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;QACzF,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QAErD,QAAQ,CAAC,IAAI,CAAC;YACZ,YAAY,EAAE,WAAW;YACzB,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,OAAO;YACP,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,CAAC,CAAC,KAAK;SACf,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,IAAe;IACpC,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC,KAAK,CAAC;IACtD,IAAI,CAAC,IAAI,CAAC,QAAQ;QAAE,OAAO,EAAE,CAAC;IAC9B,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClC,GAAG,IAAI,aAAa,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
@@ -0,0 +1,21 @@
1
+ import type { IngestEvent, IngestSource } from './source.js';
2
+ /**
3
+ * `QueueSubscriber` — placeholder for the Phase 5+ orchestrator contract.
4
+ *
5
+ * The orchestration engineer (per the project memory) is separately
6
+ * building a message-queue contract that will feed events into KG-MCP.
7
+ * When that lands, this class becomes a real subscriber. Until then it
8
+ * exists so:
9
+ * 1. The `IngestSource` interface has both implementations the codebase
10
+ * expects, exercising the contract in type-check
11
+ * 2. `serve.ts` can document the swap-in seam (commented `// const source = new QueueSubscriber(...)`)
12
+ * 3. Phase 5 doesn't have to add a new file — only fill this one in
13
+ *
14
+ * Both methods throw `not implemented` — calling them is a programming
15
+ * error in Phase 2.
16
+ */
17
+ export declare class QueueSubscriber implements IngestSource {
18
+ start(_onEvent: (ev: IngestEvent) => Promise<void>): Promise<void>;
19
+ stop(): Promise<void>;
20
+ }
21
+ //# sourceMappingURL=queue.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"queue.d.ts","sourceRoot":"","sources":["../../src/ingest/queue.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE7D;;;;;;;;;;;;;;GAcG;AACH,qBAAa,eAAgB,YAAW,YAAY;IAC5C,KAAK,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAMlE,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;CAK5B"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * `QueueSubscriber` — placeholder for the Phase 5+ orchestrator contract.
3
+ *
4
+ * The orchestration engineer (per the project memory) is separately
5
+ * building a message-queue contract that will feed events into KG-MCP.
6
+ * When that lands, this class becomes a real subscriber. Until then it
7
+ * exists so:
8
+ * 1. The `IngestSource` interface has both implementations the codebase
9
+ * expects, exercising the contract in type-check
10
+ * 2. `serve.ts` can document the swap-in seam (commented `// const source = new QueueSubscriber(...)`)
11
+ * 3. Phase 5 doesn't have to add a new file — only fill this one in
12
+ *
13
+ * Both methods throw `not implemented` — calling them is a programming
14
+ * error in Phase 2.
15
+ */
16
+ export class QueueSubscriber {
17
+ async start(_onEvent) {
18
+ throw new Error('QueueSubscriber.start() not implemented — pending orchestrator message-queue contract');
19
+ }
20
+ async stop() {
21
+ throw new Error('QueueSubscriber.stop() not implemented — pending orchestrator message-queue contract');
22
+ }
23
+ }
24
+ //# sourceMappingURL=queue.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"queue.js","sourceRoot":"","sources":["../../src/ingest/queue.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;GAcG;AACH,MAAM,OAAO,eAAe;IAC1B,KAAK,CAAC,KAAK,CAAC,QAA4C;QACtD,MAAM,IAAI,KAAK,CACb,uFAAuF,CACxF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,IAAI,KAAK,CACb,sFAAsF,CACvF,CAAC;IACJ,CAAC;CACF"}
@@ -0,0 +1,42 @@
1
+ /**
2
+ * `IngestSource` interface for KG-MCP Phase 2.
3
+ *
4
+ * The data-plane seam between the file-system watcher (chokidar today,
5
+ * orchestrator message queue tomorrow) and the `IngesterService`.
6
+ *
7
+ * Phase 2 ships two implementations:
8
+ * - `ChokidarWatcher` (src/ingest/chokidar.ts) — watches a directory,
9
+ * emits events on file changes, with the mandatory 2-second debounce
10
+ * and per-path drop-oldest queue
11
+ * - `QueueSubscriber` (src/ingest/queue.ts) — stub that throws
12
+ * `not implemented`. Phase 5+ wires it to the orchestrator contract
13
+ * when that lands.
14
+ *
15
+ * The `ChokidarWatcher` ↔ `QueueSubscriber` swap is one line in
16
+ * `src/cli/serve.ts`. Per presearch.md §2.5 / D19.
17
+ */
18
+ export type IngestEventKind = 'file:added' | 'file:changed' | 'file:removed';
19
+ export type Scope = 'project' | 'personal';
20
+ export interface IngestEvent {
21
+ /** What happened */
22
+ kind: IngestEventKind;
23
+ /** Absolute path to the markdown file */
24
+ path: string;
25
+ /** Which KG this event belongs to */
26
+ scope: Scope;
27
+ }
28
+ /**
29
+ * The interface that `serve.ts` programs against. The watcher (or queue
30
+ * subscriber) calls `start(onEvent)` and pumps events into the callback;
31
+ * `stop()` cleans up.
32
+ *
33
+ * Errors thrown by the `onEvent` callback are caught by the source and
34
+ * logged — the source MUST NOT crash on a single failing event, since
35
+ * a single corrupt file shouldn't bring down the watcher for an entire
36
+ * wiki dir.
37
+ */
38
+ export interface IngestSource {
39
+ start(onEvent: (ev: IngestEvent) => Promise<void>): Promise<void>;
40
+ stop(): Promise<void>;
41
+ }
42
+ //# sourceMappingURL=source.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"source.d.ts","sourceRoot":"","sources":["../../src/ingest/source.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,MAAM,MAAM,eAAe,GAAG,YAAY,GAAG,cAAc,GAAG,cAAc,CAAC;AAE7E,MAAM,MAAM,KAAK,GAAG,SAAS,GAAG,UAAU,CAAC;AAE3C,MAAM,WAAW,WAAW;IAC1B,oBAAoB;IACpB,IAAI,EAAE,eAAe,CAAC;IACtB,yCAAyC;IACzC,IAAI,EAAE,MAAM,CAAC;IACb,qCAAqC;IACrC,KAAK,EAAE,KAAK,CAAC;CACd;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,YAAY;IAC3B,KAAK,CAAC,OAAO,EAAE,CAAC,EAAE,EAAE,WAAW,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClE,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACvB"}
@@ -0,0 +1,19 @@
1
+ /**
2
+ * `IngestSource` interface for KG-MCP Phase 2.
3
+ *
4
+ * The data-plane seam between the file-system watcher (chokidar today,
5
+ * orchestrator message queue tomorrow) and the `IngesterService`.
6
+ *
7
+ * Phase 2 ships two implementations:
8
+ * - `ChokidarWatcher` (src/ingest/chokidar.ts) — watches a directory,
9
+ * emits events on file changes, with the mandatory 2-second debounce
10
+ * and per-path drop-oldest queue
11
+ * - `QueueSubscriber` (src/ingest/queue.ts) — stub that throws
12
+ * `not implemented`. Phase 5+ wires it to the orchestrator contract
13
+ * when that lands.
14
+ *
15
+ * The `ChokidarWatcher` ↔ `QueueSubscriber` swap is one line in
16
+ * `src/cli/serve.ts`. Per presearch.md §2.5 / D19.
17
+ */
18
+ export {};
19
+ //# sourceMappingURL=source.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"source.js","sourceRoot":"","sources":["../../src/ingest/source.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG"}