@pella-labs/pinakes 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/README.md +208 -0
  2. package/dist/cli/audit.d.ts +30 -0
  3. package/dist/cli/audit.d.ts.map +1 -0
  4. package/dist/cli/audit.js +49 -0
  5. package/dist/cli/audit.js.map +1 -0
  6. package/dist/cli/export.d.ts +32 -0
  7. package/dist/cli/export.d.ts.map +1 -0
  8. package/dist/cli/export.js +73 -0
  9. package/dist/cli/export.js.map +1 -0
  10. package/dist/cli/import.d.ts +24 -0
  11. package/dist/cli/import.d.ts.map +1 -0
  12. package/dist/cli/import.js +96 -0
  13. package/dist/cli/import.js.map +1 -0
  14. package/dist/cli/index.d.ts +3 -0
  15. package/dist/cli/index.d.ts.map +1 -0
  16. package/dist/cli/index.js +172 -0
  17. package/dist/cli/index.js.map +1 -0
  18. package/dist/cli/purge.d.ts +23 -0
  19. package/dist/cli/purge.d.ts.map +1 -0
  20. package/dist/cli/purge.js +57 -0
  21. package/dist/cli/purge.js.map +1 -0
  22. package/dist/cli/rebuild.d.ts +54 -0
  23. package/dist/cli/rebuild.d.ts.map +1 -0
  24. package/dist/cli/rebuild.js +113 -0
  25. package/dist/cli/rebuild.js.map +1 -0
  26. package/dist/cli/serve.d.ts +49 -0
  27. package/dist/cli/serve.d.ts.map +1 -0
  28. package/dist/cli/serve.js +296 -0
  29. package/dist/cli/serve.js.map +1 -0
  30. package/dist/cli/status.d.ts +39 -0
  31. package/dist/cli/status.d.ts.map +1 -0
  32. package/dist/cli/status.js +108 -0
  33. package/dist/cli/status.js.map +1 -0
  34. package/dist/db/client.d.ts +109 -0
  35. package/dist/db/client.d.ts.map +1 -0
  36. package/dist/db/client.js +175 -0
  37. package/dist/db/client.js.map +1 -0
  38. package/dist/db/repository.d.ts +82 -0
  39. package/dist/db/repository.d.ts.map +1 -0
  40. package/dist/db/repository.js +173 -0
  41. package/dist/db/repository.js.map +1 -0
  42. package/dist/db/schema.d.ts +990 -0
  43. package/dist/db/schema.d.ts.map +1 -0
  44. package/dist/db/schema.js +259 -0
  45. package/dist/db/schema.js.map +1 -0
  46. package/dist/db/types.d.ts +28 -0
  47. package/dist/db/types.d.ts.map +1 -0
  48. package/dist/db/types.js +11 -0
  49. package/dist/db/types.js.map +1 -0
  50. package/dist/gaps/detector.d.ts +67 -0
  51. package/dist/gaps/detector.d.ts.map +1 -0
  52. package/dist/gaps/detector.js +160 -0
  53. package/dist/gaps/detector.js.map +1 -0
  54. package/dist/gate/budget.d.ts +90 -0
  55. package/dist/gate/budget.d.ts.map +1 -0
  56. package/dist/gate/budget.js +145 -0
  57. package/dist/gate/budget.js.map +1 -0
  58. package/dist/ingest/chokidar.d.ts +33 -0
  59. package/dist/ingest/chokidar.d.ts.map +1 -0
  60. package/dist/ingest/chokidar.js +152 -0
  61. package/dist/ingest/chokidar.js.map +1 -0
  62. package/dist/ingest/ingester.d.ts +117 -0
  63. package/dist/ingest/ingester.d.ts.map +1 -0
  64. package/dist/ingest/ingester.js +312 -0
  65. package/dist/ingest/ingester.js.map +1 -0
  66. package/dist/ingest/manifest.d.ts +87 -0
  67. package/dist/ingest/manifest.d.ts.map +1 -0
  68. package/dist/ingest/manifest.js +223 -0
  69. package/dist/ingest/manifest.js.map +1 -0
  70. package/dist/ingest/memory-store.d.ts +55 -0
  71. package/dist/ingest/memory-store.d.ts.map +1 -0
  72. package/dist/ingest/memory-store.js +94 -0
  73. package/dist/ingest/memory-store.js.map +1 -0
  74. package/dist/ingest/parse/chunk.d.ts +15 -0
  75. package/dist/ingest/parse/chunk.d.ts.map +1 -0
  76. package/dist/ingest/parse/chunk.js +88 -0
  77. package/dist/ingest/parse/chunk.js.map +1 -0
  78. package/dist/ingest/parse/markdown.d.ts +64 -0
  79. package/dist/ingest/parse/markdown.d.ts.map +1 -0
  80. package/dist/ingest/parse/markdown.js +152 -0
  81. package/dist/ingest/parse/markdown.js.map +1 -0
  82. package/dist/ingest/queue.d.ts +21 -0
  83. package/dist/ingest/queue.d.ts.map +1 -0
  84. package/dist/ingest/queue.js +24 -0
  85. package/dist/ingest/queue.js.map +1 -0
  86. package/dist/ingest/source.d.ts +42 -0
  87. package/dist/ingest/source.d.ts.map +1 -0
  88. package/dist/ingest/source.js +19 -0
  89. package/dist/ingest/source.js.map +1 -0
  90. package/dist/mcp/envelope.d.ts +73 -0
  91. package/dist/mcp/envelope.d.ts.map +1 -0
  92. package/dist/mcp/envelope.js +46 -0
  93. package/dist/mcp/envelope.js.map +1 -0
  94. package/dist/mcp/tools/execute.d.ts +55 -0
  95. package/dist/mcp/tools/execute.d.ts.map +1 -0
  96. package/dist/mcp/tools/execute.js +232 -0
  97. package/dist/mcp/tools/execute.js.map +1 -0
  98. package/dist/mcp/tools/search.d.ts +53 -0
  99. package/dist/mcp/tools/search.d.ts.map +1 -0
  100. package/dist/mcp/tools/search.js +114 -0
  101. package/dist/mcp/tools/search.js.map +1 -0
  102. package/dist/observability/audit.d.ts +25 -0
  103. package/dist/observability/audit.d.ts.map +1 -0
  104. package/dist/observability/audit.js +38 -0
  105. package/dist/observability/audit.js.map +1 -0
  106. package/dist/observability/logger.d.ts +4 -0
  107. package/dist/observability/logger.d.ts.map +1 -0
  108. package/dist/observability/logger.js +56 -0
  109. package/dist/observability/logger.js.map +1 -0
  110. package/dist/observability/metrics.d.ts +38 -0
  111. package/dist/observability/metrics.d.ts.map +1 -0
  112. package/dist/observability/metrics.js +64 -0
  113. package/dist/observability/metrics.js.map +1 -0
  114. package/dist/retrieval/embedder.d.ts +130 -0
  115. package/dist/retrieval/embedder.d.ts.map +1 -0
  116. package/dist/retrieval/embedder.js +278 -0
  117. package/dist/retrieval/embedder.js.map +1 -0
  118. package/dist/retrieval/fts.d.ts +42 -0
  119. package/dist/retrieval/fts.d.ts.map +1 -0
  120. package/dist/retrieval/fts.js +46 -0
  121. package/dist/retrieval/fts.js.map +1 -0
  122. package/dist/retrieval/hybrid.d.ts +43 -0
  123. package/dist/retrieval/hybrid.d.ts.map +1 -0
  124. package/dist/retrieval/hybrid.js +120 -0
  125. package/dist/retrieval/hybrid.js.map +1 -0
  126. package/dist/retrieval/vec.d.ts +39 -0
  127. package/dist/retrieval/vec.d.ts.map +1 -0
  128. package/dist/retrieval/vec.js +50 -0
  129. package/dist/retrieval/vec.js.map +1 -0
  130. package/dist/sandbox/bindings/budget.d.ts +10 -0
  131. package/dist/sandbox/bindings/budget.d.ts.map +1 -0
  132. package/dist/sandbox/bindings/budget.js +44 -0
  133. package/dist/sandbox/bindings/budget.js.map +1 -0
  134. package/dist/sandbox/bindings/install.d.ts +23 -0
  135. package/dist/sandbox/bindings/install.d.ts.map +1 -0
  136. package/dist/sandbox/bindings/install.js +15 -0
  137. package/dist/sandbox/bindings/install.js.map +1 -0
  138. package/dist/sandbox/bindings/kg.d.ts +29 -0
  139. package/dist/sandbox/bindings/kg.d.ts.map +1 -0
  140. package/dist/sandbox/bindings/kg.js +323 -0
  141. package/dist/sandbox/bindings/kg.js.map +1 -0
  142. package/dist/sandbox/bindings/logger.d.ts +11 -0
  143. package/dist/sandbox/bindings/logger.d.ts.map +1 -0
  144. package/dist/sandbox/bindings/logger.js +33 -0
  145. package/dist/sandbox/bindings/logger.js.map +1 -0
  146. package/dist/sandbox/bindings/write.d.ts +34 -0
  147. package/dist/sandbox/bindings/write.d.ts.map +1 -0
  148. package/dist/sandbox/bindings/write.js +195 -0
  149. package/dist/sandbox/bindings/write.js.map +1 -0
  150. package/dist/sandbox/executor.d.ts +68 -0
  151. package/dist/sandbox/executor.d.ts.map +1 -0
  152. package/dist/sandbox/executor.js +280 -0
  153. package/dist/sandbox/executor.js.map +1 -0
  154. package/dist/sandbox/helpers.d.ts +26 -0
  155. package/dist/sandbox/helpers.d.ts.map +1 -0
  156. package/dist/sandbox/helpers.js +131 -0
  157. package/dist/sandbox/helpers.js.map +1 -0
  158. package/dist/sandbox/pool.d.ts +63 -0
  159. package/dist/sandbox/pool.d.ts.map +1 -0
  160. package/dist/sandbox/pool.js +98 -0
  161. package/dist/sandbox/pool.js.map +1 -0
  162. package/dist/sandbox/vendored-codemode.d.ts +99 -0
  163. package/dist/sandbox/vendored-codemode.d.ts.map +1 -0
  164. package/dist/sandbox/vendored-codemode.js +471 -0
  165. package/dist/sandbox/vendored-codemode.js.map +1 -0
  166. package/dist/server.d.ts +3 -0
  167. package/dist/server.d.ts.map +1 -0
  168. package/dist/server.js +74 -0
  169. package/dist/server.js.map +1 -0
  170. package/dist/spike.d.ts +15 -0
  171. package/dist/spike.d.ts.map +1 -0
  172. package/dist/spike.js +90 -0
  173. package/dist/spike.js.map +1 -0
  174. package/package.json +60 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/db/schema.ts"],"names":[],"mappings":"AAGA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAMH;;;;;;;;GAQG;AACH,eAAO,MAAM,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAGjB,CAAC;AAOH;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkCnB,CAAC;AAOF;;;;;;;;GAQG;AACH,eAAO,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAiBnB,CAAC;AAMF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAqBpB,CAAC;AAMF;;;;;;;;GAQG;AACH,eAAO,MAAM,KAAK;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAgBjB,CAAC;AAMF;;;;;;;GAOG;AACH,eAAO,MAAM,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAOjB,CAAC;AAMH;;;;;;;;;;GAUG;AACH,eAAO,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAQlB,CAAC;AAMH,MAAM,MAAM,MAAM,GAAG,OAAO,OAAO,CAAC,YAAY,CAAC;AACjD,MAAM,MAAM,SAAS,GAAG,OAAO,OAAO,CAAC,YAAY,CAAC;AACpD,MAAM,MAAM,OAAO,GAAG,OAAO,QAAQ,CAAC,YAAY,CAAC;AACnD,MAAM,MAAM,UAAU,GAAG,OAAO,QAAQ,CAAC,YAAY,CAAC;AACtD,MAAM,MAAM,MAAM,GAAG,OAAO,OAAO,CAAC,YAAY,CAAC;AACjD,MAAM,MAAM,SAAS,GAAG,OAAO,OAAO,CAAC,YAAY,CAAC;AACpD,MAAM,MAAM,QAAQ,GAAG,OAAO,KAAK,CAAC,YAAY,CAAC;AACjD,MAAM,MAAM,WAAW,GAAG,OAAO,KAAK,CAAC,YAAY,CAAC;AAEpD;;;GAGG;AACH,eAAO,MAAM,SAAS,4FAQZ,CAAC;AAEX;;;GAGG;AACH,eAAO,MAAM,iBAAiB,6CAA8C,CAAC"}
@@ -0,0 +1,259 @@
1
+ import { sql } from 'drizzle-orm';
2
+ import { sqliteTable, text, integer, index, primaryKey } from 'drizzle-orm/sqlite-core';
3
+ /**
4
+ * KG-MCP Drizzle schema (presearch.md §2.3, CLAUDE.md §Database Rules).
5
+ *
6
+ * 8 logical tables + `kg_meta` for schema versioning. Two of the eight are
7
+ * virtual tables (`kg_chunks_fts`, `kg_chunks_vec`) that drizzle-kit can't
8
+ * model — they're created via raw SQL appended to the initial migration in
9
+ * src/db/migrations. The drizzle code below covers the 7 regular tables
10
+ * plus kg_meta.
11
+ *
12
+ * Invariants this schema MUST preserve:
13
+ *
14
+ * - `kg_nodes.id` is `sha1(scope + ':' + source_uri + ':' + section_path)`,
15
+ * set by the ingester (NOT auto-generated). Re-ingesting the same
16
+ * markdown produces identical ids — Phase 2's idempotent upsert relies
17
+ * on this. The DB never sees the hashing logic; it just stores the value
18
+ * and enforces uniqueness via the PK.
19
+ *
20
+ * - `kg_chunks.id` is `sha1(node_id + ':' + chunk_index)`, same idea.
21
+ *
22
+ * - `chunk_sha = sha1(chunk_text)` is the LOAD-BEARING field for the
23
+ * per-chunk skip-unchanged optimization (CLAUDE.md §Database Rules #3,
24
+ * Loop 6.5 A4). On a Pharos wiki-updater whole-file rewrite we look up
25
+ * the existing chunk_shas for the file's nodes and only re-embed chunks
26
+ * whose sha changed. Without this, every turn re-embeds 60 chunks ×
27
+ * ~50ms = 3s of blocking work that competes with the active coding LLM
28
+ * for Ollama. Do not remove this column.
29
+ *
30
+ * - `last_accessed_at` on `kg_nodes` exists for the Phase 5 personal-KG
31
+ * LRU eviction (Loop 6.5 A2). Phase 2 just stamps it on insert/update.
32
+ *
33
+ * - `source_sha` on `kg_nodes` is the file-level hash; staleness detection
34
+ * on the query path compares this against the current on-disk hash.
35
+ *
36
+ * - All FK relationships use `ON DELETE CASCADE` so deleting a node cleans
37
+ * up its chunks and edges in one statement (verified by schema test #5).
38
+ * Foreign keys are enforced via PRAGMA foreign_keys=ON, mandatory on
39
+ * every connection in client.ts.
40
+ */
41
+ // ----------------------------------------------------------------------------
42
+ // kg_meta — schema versioning + bookkeeping
43
+ // ----------------------------------------------------------------------------
44
+ /**
45
+ * Tiny key/value table holding `schema_version`, `last_full_rebuild`, and
46
+ * any other one-off bookkeeping. Sized for handfuls of rows.
47
+ *
48
+ * Stamped at first openDb() call by client.ts if absent. The schema_version
49
+ * value lets us detect drift on startup and either run new migrations or
50
+ * (in the worst case for sqlite-vec breaking changes) drop + rebuild the
51
+ * vec virtual table from markdown.
52
+ */
53
+ export const kgMeta = sqliteTable('kg_meta', {
54
+ key: text('key').primaryKey(),
55
+ value: text('value'),
56
+ });
57
+ // ----------------------------------------------------------------------------
58
+ // kg_nodes — markdown sections + concept entities (Phase 2 only writes
59
+ // kind='section' rows; Phase 4 adds entities, Phase 6 adds gaps)
60
+ // ----------------------------------------------------------------------------
61
+ /**
62
+ * One row per markdown section. The "primary unit" of the KG — chunks belong
63
+ * to nodes, edges connect nodes.
64
+ *
65
+ * `kind` is open-ended for forward compatibility:
66
+ * - `'section'` — the only kind Phase 2 writes; one per ATX heading
67
+ * - `'entity' | 'concept' | 'decision' | 'log_entry' | 'gap'` — Phase 4-6
68
+ *
69
+ * `section_path` is the ATX heading hierarchy joined by ` / ` (e.g.
70
+ * `"Authentication / Login flow"` for an `## Login flow` under `# Authentication`).
71
+ * Empty string for top-of-file content above any heading.
72
+ *
73
+ * `content` stores the full section markdown (heading + body). Chunks are
74
+ * derived from this and stored separately in kg_chunks. We keep both because
75
+ * `kg_execute` callers may want the whole section (`kg.get(node_id)`) instead
76
+ * of paragraph-sized chunks.
77
+ */
78
+ export const kgNodes = sqliteTable('kg_nodes', {
79
+ /** sha1(scope + ':' + source_uri + ':' + section_path), set by ingester */
80
+ id: text('id').primaryKey(),
81
+ /** 'project' | 'personal' — enforced at app layer, not as a CHECK */
82
+ scope: text('scope').notNull(),
83
+ /** file:// URL of the source markdown file */
84
+ sourceUri: text('source_uri').notNull(),
85
+ /** Heading hierarchy joined by ' / '; empty string for pre-heading content */
86
+ sectionPath: text('section_path').notNull(),
87
+ /** 'section' for Phase 2; widens in Phase 4+ */
88
+ kind: text('kind').notNull().default('section'),
89
+ /** Heading text (H1/H2/H3 string), null for pre-heading content */
90
+ title: text('title'),
91
+ /** Full section markdown (heading + body) */
92
+ content: text('content').notNull(),
93
+ /** sha1 of the entire source file (for staleness detection) */
94
+ sourceSha: text('source_sha').notNull(),
95
+ /** Cached token count of `content` for fast budget math */
96
+ tokenCount: integer('token_count').notNull(),
97
+ /** Provenance confidence: 'extracted' (default), 'inferred' (AI-generated), 'ambiguous' (flagged) */
98
+ confidence: text('confidence').notNull().default('extracted'),
99
+ /** Unix epoch ms of first insert */
100
+ createdAt: integer('created_at').notNull(),
101
+ /** Unix epoch ms of last update */
102
+ updatedAt: integer('updated_at').notNull(),
103
+ /** Unix epoch ms of last read access — Phase 5 LRU eviction (Loop 6.5 A2) */
104
+ lastAccessedAt: integer('last_accessed_at').notNull(),
105
+ }, (t) => [
106
+ index('idx_kg_nodes_scope_uri').on(t.scope, t.sourceUri),
107
+ index('idx_kg_nodes_last_accessed').on(t.lastAccessedAt),
108
+ ]);
109
+ // ----------------------------------------------------------------------------
110
+ // kg_edges — wikilinks, citations, supersedes, etc. (Phase 4+ writes; Phase 2
111
+ // just creates the table for migration completeness)
112
+ // ----------------------------------------------------------------------------
113
+ /**
114
+ * Directed edges between nodes. Composite primary key on
115
+ * `(src_id, dst_id, edge_kind)` so the same pair can have multiple edge
116
+ * kinds (e.g. one node both `cites` and `supersedes` another).
117
+ *
118
+ * Phase 2 doesn't populate this table — Phase 4 extracts wikilinks during
119
+ * markdown parsing. The table exists now so the migration is complete and
120
+ * Phase 4 doesn't have to add it as a separate migration.
121
+ */
122
+ export const kgEdges = sqliteTable('kg_edges', {
123
+ srcId: text('src_id')
124
+ .notNull()
125
+ .references(() => kgNodes.id, { onDelete: 'cascade' }),
126
+ dstId: text('dst_id')
127
+ .notNull()
128
+ .references(() => kgNodes.id, { onDelete: 'cascade' }),
129
+ /** 'wikilink' | 'cites' | 'supersedes' | 'contradicts' | 'mentions' | 'derived_from' */
130
+ edgeKind: text('edge_kind').notNull(),
131
+ }, (t) => [
132
+ primaryKey({ columns: [t.srcId, t.dstId, t.edgeKind] }),
133
+ index('idx_kg_edges_src').on(t.srcId),
134
+ index('idx_kg_edges_dst').on(t.dstId),
135
+ ]);
136
+ // ----------------------------------------------------------------------------
137
+ // kg_chunks — paragraph-level splits of nodes
138
+ // ----------------------------------------------------------------------------
139
+ /**
140
+ * One row per ~500-token chunk derived from a node's `content`. The
141
+ * chunker (src/ingest/parse/chunk.ts) splits on paragraph boundaries and
142
+ * accumulates until adding the next paragraph would exceed `target_tokens`.
143
+ *
144
+ * The implicit SQLite `rowid` (auto-assigned for tables without
145
+ * INTEGER PRIMARY KEY) is what FTS5 and sqlite-vec join on:
146
+ * - `kg_chunks_fts` is `content='kg_chunks', content_rowid='rowid'`
147
+ * - `kg_chunks_vec.rowid` matches `kg_chunks.rowid`
148
+ *
149
+ * `chunk_sha = sha1(text)` is the per-chunk skip-unchanged key. On a
150
+ * file rewrite, the ingester compares each new chunk's chunk_sha against
151
+ * the existing chunk_shas for the file's nodes; matching chunks reuse
152
+ * the existing embedding (no embedder call), only changed chunks get
153
+ * re-embedded. This is the load-bearing optimization for Pharos's
154
+ * whole-file-rewrite-per-turn pattern (CLAUDE.md §Database Rules #3).
155
+ */
156
+ export const kgChunks = sqliteTable('kg_chunks', {
157
+ /** sha1(node_id + ':' + chunk_index) */
158
+ id: text('id').primaryKey(),
159
+ /** FK to kg_nodes; cascade-delete cleans up chunks when a node goes away */
160
+ nodeId: text('node_id')
161
+ .notNull()
162
+ .references(() => kgNodes.id, { onDelete: 'cascade' }),
163
+ /** 0-based position within the node's chunk list */
164
+ chunkIndex: integer('chunk_index').notNull(),
165
+ /** The chunk's text content */
166
+ text: text('text').notNull(),
167
+ /** sha1(text) — load-bearing for per-chunk skip-unchanged */
168
+ chunkSha: text('chunk_sha').notNull(),
169
+ /** Cached token count for fast budget math */
170
+ tokenCount: integer('token_count').notNull(),
171
+ /** Unix epoch ms of insert */
172
+ createdAt: integer('created_at').notNull(),
173
+ }, (t) => [index('idx_kg_chunks_node').on(t.nodeId)]);
174
+ // ----------------------------------------------------------------------------
175
+ // kg_log — append-only event log (Karpathy log.md materialized)
176
+ // ----------------------------------------------------------------------------
177
+ /**
178
+ * Append-only event stream. Phase 2 writes:
179
+ * - `'ingest:done'` — successful file ingest
180
+ * - `'ingest:error'` — failed file ingest
181
+ * - `'rebuild:start' | 'rebuild:done'` — full-rebuild markers
182
+ *
183
+ * `payload` is opaque JSON shaped per `kind`. The reader is the LLM via
184
+ * `kg.log.recent(n, opts)` in Phase 4+; Phase 2 just appends.
185
+ */
186
+ export const kgLog = sqliteTable('kg_log', {
187
+ id: integer('id').primaryKey({ autoIncrement: true }),
188
+ /** Unix epoch ms */
189
+ ts: integer('ts').notNull(),
190
+ /** 'project' | 'personal' */
191
+ scope: text('scope').notNull(),
192
+ /** Event kind, e.g. 'ingest:done' */
193
+ kind: text('kind').notNull(),
194
+ /** Source URI for ingest events; null for non-file events */
195
+ sourceUri: text('source_uri'),
196
+ /** Opaque JSON payload, shape per `kind` */
197
+ payload: text('payload'),
198
+ }, (t) => [index('idx_kg_log_ts').on(sql `${t.ts} DESC`)]);
199
+ // ----------------------------------------------------------------------------
200
+ // kg_gaps — detected concept gaps (Phase 6+ writes; Phase 2 creates table)
201
+ // ----------------------------------------------------------------------------
202
+ /**
203
+ * Concept gaps detected by the Phase 6 gap-detection sub-agent. Phase 2
204
+ * creates the table; Phase 6 wires the writes.
205
+ *
206
+ * `mentions_count` lets the dashboard prioritize gaps that appear across
207
+ * multiple turns. `resolved_at` is set when a gap is closed (either by
208
+ * the LLM writing about the topic or by a manual dismissal).
209
+ */
210
+ export const kgGaps = sqliteTable('kg_gaps', {
211
+ id: integer('id').primaryKey({ autoIncrement: true }),
212
+ scope: text('scope').notNull(),
213
+ topic: text('topic').notNull(),
214
+ firstSeenAt: integer('first_seen_at').notNull(),
215
+ mentionsCount: integer('mentions_count').notNull().default(1),
216
+ resolvedAt: integer('resolved_at'),
217
+ });
218
+ // ----------------------------------------------------------------------------
219
+ // kg_audit — every tool call (Phase 5 wires writes; Phase 2 creates table)
220
+ // ----------------------------------------------------------------------------
221
+ /**
222
+ * Audit log of every MCP tool call. Phase 5 wires the dispatcher to write
223
+ * one row per call as part of the privacy invariant verification surface
224
+ * (CLAUDE.md §Security #7). Phase 2 creates the table.
225
+ *
226
+ * NB: per CLAUDE.md §Security #7, the JSONL mirror path differs by scope —
227
+ * project rows mirror to `.pharos/kg-audit.jsonl` (in the repo, safe for
228
+ * `git add .`), personal/both rows mirror to `~/.pharos/profile/kg-audit.jsonl`.
229
+ * Personal-scope audit rows go to a separate kg_audit table in the personal
230
+ * DB, never the project DB. This split is enforced at the app layer.
231
+ */
232
+ export const kgAudit = sqliteTable('kg_audit', {
233
+ id: integer('id').primaryKey({ autoIncrement: true }),
234
+ ts: integer('ts').notNull(),
235
+ toolName: text('tool_name').notNull(),
236
+ scopeRequested: text('scope_requested').notNull(),
237
+ callerCtx: text('caller_ctx'),
238
+ responseTokens: integer('response_tokens'),
239
+ error: text('error'),
240
+ });
241
+ /**
242
+ * The list of all schema-managed tables, useful for the schema test that
243
+ * verifies the migration creates every expected table.
244
+ */
245
+ export const KG_TABLES = [
246
+ 'kg_meta',
247
+ 'kg_nodes',
248
+ 'kg_edges',
249
+ 'kg_chunks',
250
+ 'kg_log',
251
+ 'kg_gaps',
252
+ 'kg_audit',
253
+ ];
254
+ /**
255
+ * The two virtual tables created via raw SQL in the migration (drizzle-kit
256
+ * doesn't emit virtual table DDL). Schema test verifies they exist.
257
+ */
258
+ export const KG_VIRTUAL_TABLES = ['kg_chunks_fts', 'kg_chunks_vec'];
259
+ //# sourceMappingURL=schema.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/db/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,aAAa,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAExF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,+EAA+E;AAC/E,4CAA4C;AAC5C,+EAA+E;AAE/E;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,EAAE;IAC3C,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE;IAC7B,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC;CACrB,CAAC,CAAC;AAEH,+EAA+E;AAC/E,uEAAuE;AACvE,iEAAiE;AACjE,+EAA+E;AAE/E;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,WAAW,CAChC,UAAU,EACV;IACE,2EAA2E;IAC3E,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE;IAC3B,qEAAqE;IACrE,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE;IAC9B,8CAA8C;IAC9C,SAAS,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;IACvC,8EAA8E;IAC9E,WAAW,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC,OAAO,EAAE;IAC3C,gDAAgD;IAChD,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC;IAC/C,mEAAmE;IACnE,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC;IACpB,6CAA6C;IAC7C,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE;IAClC,+DAA+D;IAC/D,SAAS,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;IACvC,2DAA2D;IAC3D,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,OAAO,EAAE;IAC5C,qGAAqG;IACrG,UAAU,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,WAAW,CAAC;IAC7D,oCAAoC;IACpC,SAAS,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;IAC1C,mCAAmC;IACnC,SAAS,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;IAC1C,6EAA6E;IAC7E,cAAc,EAAE,OAAO,CAAC,kBAAkB,CAAC,CAAC,OAAO,EAAE;CACtD,EACD,CAAC,CAAC,EAAE,EAAE,CAAC;IACL,KAAK,CAAC,wBAAwB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,SAAS,CAAC;IACxD,KAAK,CAAC,4BAA4B,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC;CACzD,CACF,CAAC;AAEF,+EAA+E;AAC/E,8EAA8E;AAC9E,qDAAqD;AACrD,+EAA+E;AAE/E;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,WAAW,CAChC,UAAU,EACV;IACE,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC;SAClB,OAAO,EAAE;SACT,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IACxD,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC;SAClB,OAAO,EAAE;SACT,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IACxD,wFAAwF;IACxF,QAAQ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE;CACtC,EACD,CAAC,CAAC,EAAE,EAAE,CAAC;IACL,UAAU,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC;IACvD,KAAK,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;IACrC,KAAK,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;CACtC,CACF,CAAC;AAEF,+EAA+E;AAC/E,8CAA8C;AAC9C,+EAA+E;AAE/E;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,CAAC,MAAM,QAAQ,GAAG,WAAW,CACjC,WAAW,EACX;IACE,wCAAwC;IACxC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE;IAC3B,4EAA4E;IAC5E,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACpB,OAAO,EAAE;SACT,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IACxD,oDAAoD;IACpD,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,OAAO,EAAE;IAC5C,+BAA+B;IAC/B,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE;IAC5B,6DAA6D;IAC7D,QAAQ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE;IACrC,8CAA8C;IAC9C,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,OAAO,EAAE;IAC5C,8BAA8B;IAC9B,SAAS,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;CAC3C,EACD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAClD,CAAC;AAEF,+EAA+E;AAC/E,gEAAgE;AAChE,+EAA+E;AAE/E;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,KAAK,GAAG,WAAW,CAC9B,QAAQ,EACR;IACE,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC;IACrD,oBAAoB;IACpB,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE;IAC3B,6BAA6B;IAC7B,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE;IAC9B,qCAAqC;IACrC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE;IAC5B,6DAA6D;IAC7D,SAAS,EAAE,IAAI,CAAC,YAAY,CAAC;IAC7B,4CAA4C;IAC5C,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC;CACzB,EACD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,EAAE,CAAC,GAAG,CAAA,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CACtD,CAAC;AAEF,+EAA+E;AAC/E,2EAA2E;AAC3E,+EAA+E;AAE/E;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,EAAE;IAC3C,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC;IACrD,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE;IAC9B,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE;IAC9B,WAAW,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC,OAAO,EAAE;IAC/C,aAAa,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IAC7D,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC;CACnC,CAAC,CAAC;AAEH,+EAA+E;AAC/E,2EAA2E;AAC3E,+EAA+E;AAE/E;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,WAAW,CAAC,UAAU,EAAE;IAC7C,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC;IACrD,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE;IAC3B,QAAQ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE;IACrC,cAAc,EAAE,IAAI,CAAC,iBAAiB,CAAC,CAAC,OAAO,EAAE;IACjD,SAAS,EAAE,IAAI,CAAC,YAAY,CAAC;IAC7B,cAAc,EAAE,OAAO,CAAC,iBAAiB,CAAC;IAC1C,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC;CACrB,CAAC,CAAC;AAeH;;;GAGG;AACH,MAAM,CAAC,MAAM,SAAS,GAAG;IACvB,SAAS;IACT,UAAU;IACV,UAAU;IACV,WAAW;IACX,QAAQ;IACR,SAAS;IACT,UAAU;CACF,CAAC;AAEX;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,eAAe,EAAE,eAAe,CAAU,CAAC"}
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Shared types for KG-MCP query results.
3
+ *
4
+ * Phase 2 introduces this file to decouple the tool handlers from any
5
+ * single store implementation. Phase 1's `MemoryStore` defined `Chunk`
6
+ * inline; Phase 2's `Repository` returns the same shape, and the tool
7
+ * handlers import from here. When `MemoryStore` is deleted in Pass 4
8
+ * step 32, this file becomes the canonical source.
9
+ */
10
+ export type Scope = 'project' | 'personal' | 'both';
11
+ /**
12
+ * A retrieval-shaped chunk. Mirrors what Phase 1's `MemoryStore` returned
13
+ * so the existing 13 spike tests stay green across the swap. Field shape
14
+ * is locked — adding fields is fine; renaming/removing breaks the
15
+ * sandbox host bindings (`kg.search` returns `{id, text, source_uri}`)
16
+ * and the spike test asserting that shape.
17
+ */
18
+ export interface Chunk {
19
+ /** Deterministic id — Phase 1: sha1(`relative_path:index`); Phase 2: sha1(`node_id:chunk_index`) */
20
+ id: string;
21
+ /** Chunk text content (paragraph(s) up to the chunker's target token count) */
22
+ text: string;
23
+ /** `file://` URL of the source markdown file */
24
+ source_uri: string;
25
+ /** 0-based position within the source node's chunk list */
26
+ chunk_index: number;
27
+ }
28
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/db/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,MAAM,KAAK,GAAG,SAAS,GAAG,UAAU,GAAG,MAAM,CAAC;AAEpD;;;;;;GAMG;AACH,MAAM,WAAW,KAAK;IACpB,oGAAoG;IACpG,EAAE,EAAE,MAAM,CAAC;IACX,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,UAAU,EAAE,MAAM,CAAC;IACnB,2DAA2D;IAC3D,WAAW,EAAE,MAAM,CAAC;CACrB"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Shared types for KG-MCP query results.
3
+ *
4
+ * Phase 2 introduces this file to decouple the tool handlers from any
5
+ * single store implementation. Phase 1's `MemoryStore` defined `Chunk`
6
+ * inline; Phase 2's `Repository` returns the same shape, and the tool
7
+ * handlers import from here. When `MemoryStore` is deleted in Pass 4
8
+ * step 32, this file becomes the canonical source.
9
+ */
10
+ export {};
11
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/db/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG"}
@@ -0,0 +1,67 @@
1
+ import type { Database as BetterSqliteDatabase } from 'better-sqlite3';
2
+ /**
3
+ * Gap detector for KG-MCP Phase 6.
4
+ *
5
+ * After an ingest transaction commits, scans the new node's content for
6
+ * concept mentions. A "concept" is a term referenced via bold (`**term**`),
7
+ * wikilinks (`[[term]]`), or backtick-quoted identifiers that appears ≥3
8
+ * times across the entire KG but has no dedicated `kg_nodes` row (i.e., no
9
+ * node whose title matches the concept).
10
+ *
11
+ * Upserts into `kg_gaps` with `topic`, `first_seen_at`, `mentions_count`.
12
+ * When a node is later created with a matching title, `resolved_at` is set.
13
+ *
14
+ * This is a read-only detection surface — the LLM fills gaps by calling
15
+ * `kg.project.write()` to create wiki pages, and re-indexing resolves
16
+ * the gap automatically.
17
+ */
18
+ /**
19
+ * Extract candidate concept strings from markdown content.
20
+ *
21
+ * Sources:
22
+ * - Bold text: `**term**` or `__term__`
23
+ * - Wikilinks: `[[term]]` or `[[term|display]]`
24
+ * - Backtick-quoted terms: `` `term` `` (single backtick only, not code fences)
25
+ *
26
+ * Returns deduplicated, normalized (lowercase, trimmed) set.
27
+ */
28
+ export declare function extractConcepts(content: string): Set<string>;
29
+ /**
30
+ * Run gap detection after an ingest transaction commits.
31
+ *
32
+ * For each concept extracted from the ingested content:
33
+ * 1. Count how many chunks across the KG contain the concept (case-insensitive)
34
+ * 2. Check if a node with a matching title already exists
35
+ * 3. If ≥3 mentions and no dedicated node → upsert into `kg_gaps`
36
+ *
37
+ * Also resolves any existing gaps that now have a dedicated node.
38
+ *
39
+ * @param writer The writer DB connection (same transaction context as ingest).
40
+ * @param scope 'project' or 'personal'.
41
+ * @param content The full file content that was just ingested.
42
+ * @param nodesTitles Titles of all nodes that were just ingested (for resolution check).
43
+ */
44
+ export declare function detectGaps(writer: BetterSqliteDatabase, scope: string, content: string, nodesTitles: string[]): {
45
+ gaps_created: number;
46
+ gaps_resolved: number;
47
+ };
48
+ /**
49
+ * Resolve gaps whose topics match any of the given node titles.
50
+ * Sets `resolved_at` to now for matching unresolved gaps.
51
+ */
52
+ export declare function resolveGaps(writer: BetterSqliteDatabase, scope: string, nodesTitles: string[]): number;
53
+ export interface GapRow {
54
+ id: number;
55
+ topic: string;
56
+ first_seen_at: number;
57
+ mentions_count: number;
58
+ resolved_at: number | null;
59
+ }
60
+ /**
61
+ * Query gaps for a scope. Returns unresolved by default; pass
62
+ * `resolved: true` to include resolved gaps.
63
+ */
64
+ export declare function queryGaps(reader: BetterSqliteDatabase, scope: string, opts?: {
65
+ resolved?: boolean;
66
+ }): GapRow[];
67
+ //# sourceMappingURL=detector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detector.d.ts","sourceRoot":"","sources":["../../src/gaps/detector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,IAAI,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAIvE;;;;;;;;;;;;;;;GAeG;AAMH;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,CA6B5D;AAMD;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,UAAU,CACxB,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EAAE,GACpB;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,CAAA;CAAE,CAsEjD;AAED;;;GAGG;AACH,wBAAgB,WAAW,CACzB,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EAAE,GACpB,MAAM,CAkBR;AAMD,MAAM,WAAW,MAAM;IACrB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED;;;GAGG;AACH,wBAAgB,SAAS,CACvB,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,IAAI,CAAC,EAAE;IAAE,QAAQ,CAAC,EAAE,OAAO,CAAA;CAAE,GAC5B,MAAM,EAAE,CAiBV"}
@@ -0,0 +1,160 @@
1
+ import { logger } from '../observability/logger.js';
2
+ /**
3
+ * Gap detector for KG-MCP Phase 6.
4
+ *
5
+ * After an ingest transaction commits, scans the new node's content for
6
+ * concept mentions. A "concept" is a term referenced via bold (`**term**`),
7
+ * wikilinks (`[[term]]`), or backtick-quoted identifiers that appears ≥3
8
+ * times across the entire KG but has no dedicated `kg_nodes` row (i.e., no
9
+ * node whose title matches the concept).
10
+ *
11
+ * Upserts into `kg_gaps` with `topic`, `first_seen_at`, `mentions_count`.
12
+ * When a node is later created with a matching title, `resolved_at` is set.
13
+ *
14
+ * This is a read-only detection surface — the LLM fills gaps by calling
15
+ * `kg.project.write()` to create wiki pages, and re-indexing resolves
16
+ * the gap automatically.
17
+ */
18
+ // ----------------------------------------------------------------------------
19
+ // Concept extraction
20
+ // ----------------------------------------------------------------------------
21
+ /**
22
+ * Extract candidate concept strings from markdown content.
23
+ *
24
+ * Sources:
25
+ * - Bold text: `**term**` or `__term__`
26
+ * - Wikilinks: `[[term]]` or `[[term|display]]`
27
+ * - Backtick-quoted terms: `` `term` `` (single backtick only, not code fences)
28
+ *
29
+ * Returns deduplicated, normalized (lowercase, trimmed) set.
30
+ */
31
+ export function extractConcepts(content) {
32
+ const concepts = new Set();
33
+ // Bold: **term** or __term__
34
+ const boldRe = /\*\*([^*]+)\*\*|__([^_]+)__/g;
35
+ for (const m of content.matchAll(boldRe)) {
36
+ const term = (m[1] ?? m[2] ?? '').trim().toLowerCase();
37
+ if (term.length >= 2 && term.length <= 100)
38
+ concepts.add(term);
39
+ }
40
+ // Wikilinks: [[term]] or [[term|display]]
41
+ const wikilinkRe = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
42
+ for (const m of content.matchAll(wikilinkRe)) {
43
+ const term = (m[1] ?? '').trim().toLowerCase();
44
+ if (term.length >= 2 && term.length <= 100)
45
+ concepts.add(term);
46
+ }
47
+ // Backtick terms (single backtick, not code fences)
48
+ const backtickRe = /(?<!`)(`[^`\n]+?`)(?!`)/g;
49
+ for (const m of content.matchAll(backtickRe)) {
50
+ const raw = m[1] ?? '';
51
+ const term = raw.slice(1, -1).trim().toLowerCase();
52
+ // Only keep multi-char, non-code-looking terms (no spaces in very long strings)
53
+ if (term.length >= 2 && term.length <= 60 && !/[{}();=]/.test(term)) {
54
+ concepts.add(term);
55
+ }
56
+ }
57
+ return concepts;
58
+ }
59
+ // ----------------------------------------------------------------------------
60
+ // Gap detection + resolution
61
+ // ----------------------------------------------------------------------------
62
+ /**
63
+ * Run gap detection after an ingest transaction commits.
64
+ *
65
+ * For each concept extracted from the ingested content:
66
+ * 1. Count how many chunks across the KG contain the concept (case-insensitive)
67
+ * 2. Check if a node with a matching title already exists
68
+ * 3. If ≥3 mentions and no dedicated node → upsert into `kg_gaps`
69
+ *
70
+ * Also resolves any existing gaps that now have a dedicated node.
71
+ *
72
+ * @param writer The writer DB connection (same transaction context as ingest).
73
+ * @param scope 'project' or 'personal'.
74
+ * @param content The full file content that was just ingested.
75
+ * @param nodesTitles Titles of all nodes that were just ingested (for resolution check).
76
+ */
77
+ export function detectGaps(writer, scope, content, nodesTitles) {
78
+ let gapsCreated = 0;
79
+ let gapsResolved = 0;
80
+ // Phase 1: resolve existing gaps whose topics match newly-ingested node titles
81
+ gapsResolved = resolveGaps(writer, scope, nodesTitles);
82
+ // Phase 2: detect new gaps from concepts in the ingested content
83
+ const concepts = extractConcepts(content);
84
+ if (concepts.size === 0) {
85
+ return { gaps_created: gapsCreated, gaps_resolved: gapsResolved };
86
+ }
87
+ const countChunkMentions = writer.prepare(`SELECT count(*) AS c FROM kg_chunks ch
88
+ JOIN kg_nodes n ON ch.node_id = n.id
89
+ WHERE n.scope = ? AND ch.text LIKE '%' || ? || '%' COLLATE NOCASE`);
90
+ const findDedicatedNode = writer.prepare(`SELECT id FROM kg_nodes WHERE scope = ? AND LOWER(title) = ? LIMIT 1`);
91
+ // kg_gaps doesn't have a unique constraint on (scope, topic).
92
+ // Check if the gap already exists and update or insert accordingly.
93
+ const findGap = writer.prepare(`SELECT id, resolved_at FROM kg_gaps WHERE scope = ? AND topic = ? LIMIT 1`);
94
+ const insertGap = writer.prepare(`INSERT INTO kg_gaps (scope, topic, first_seen_at, mentions_count)
95
+ VALUES (?, ?, ?, ?)`);
96
+ const updateGapCount = writer.prepare(`UPDATE kg_gaps SET mentions_count = ?, resolved_at = NULL WHERE id = ?`);
97
+ const now = Date.now();
98
+ for (const concept of concepts) {
99
+ // Count mentions across the KG
100
+ const row = countChunkMentions.get(scope, concept);
101
+ const count = row?.c ?? 0;
102
+ if (count < 3)
103
+ continue;
104
+ // Check for a dedicated node
105
+ const dedicated = findDedicatedNode.get(scope, concept);
106
+ if (dedicated)
107
+ continue;
108
+ // Upsert the gap
109
+ const existing = findGap.get(scope, concept);
110
+ if (existing) {
111
+ // Update mention count; reopen if previously resolved
112
+ updateGapCount.run(count, existing.id);
113
+ }
114
+ else {
115
+ insertGap.run(scope, concept, now, count);
116
+ gapsCreated++;
117
+ }
118
+ }
119
+ if (gapsCreated > 0 || gapsResolved > 0) {
120
+ logger.info({ scope, gapsCreated, gapsResolved, conceptsScanned: concepts.size }, 'gap detection complete');
121
+ }
122
+ return { gaps_created: gapsCreated, gaps_resolved: gapsResolved };
123
+ }
124
+ /**
125
+ * Resolve gaps whose topics match any of the given node titles.
126
+ * Sets `resolved_at` to now for matching unresolved gaps.
127
+ */
128
+ export function resolveGaps(writer, scope, nodesTitles) {
129
+ if (nodesTitles.length === 0)
130
+ return 0;
131
+ const now = Date.now();
132
+ let resolved = 0;
133
+ const resolveStmt = writer.prepare(`UPDATE kg_gaps SET resolved_at = ?
134
+ WHERE scope = ? AND LOWER(topic) = ? AND resolved_at IS NULL`);
135
+ for (const title of nodesTitles) {
136
+ if (!title)
137
+ continue;
138
+ const info = resolveStmt.run(now, scope, title.toLowerCase());
139
+ resolved += info.changes;
140
+ }
141
+ return resolved;
142
+ }
143
+ /**
144
+ * Query gaps for a scope. Returns unresolved by default; pass
145
+ * `resolved: true` to include resolved gaps.
146
+ */
147
+ export function queryGaps(reader, scope, opts) {
148
+ if (opts?.resolved) {
149
+ return reader
150
+ .prepare(`SELECT id, topic, first_seen_at, mentions_count, resolved_at
151
+ FROM kg_gaps WHERE scope = ? ORDER BY mentions_count DESC`)
152
+ .all(scope);
153
+ }
154
+ return reader
155
+ .prepare(`SELECT id, topic, first_seen_at, mentions_count, resolved_at
156
+ FROM kg_gaps WHERE scope = ? AND resolved_at IS NULL
157
+ ORDER BY mentions_count DESC`)
158
+ .all(scope);
159
+ }
160
+ //# sourceMappingURL=detector.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"detector.js","sourceRoot":"","sources":["../../src/gaps/detector.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,EAAE,MAAM,4BAA4B,CAAC;AAEpD;;;;;;;;;;;;;;;GAeG;AAEH,+EAA+E;AAC/E,qBAAqB;AACrB,+EAA+E;AAE/E;;;;;;;;;GASG;AACH,MAAM,UAAU,eAAe,CAAC,OAAe;IAC7C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IAEnC,6BAA6B;IAC7B,MAAM,MAAM,GAAG,8BAA8B,CAAC;IAC9C,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACzC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACvD,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;YAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACjE,CAAC;IAED,0CAA0C;IAC1C,MAAM,UAAU,GAAG,iCAAiC,CAAC;IACrD,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;YAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACjE,CAAC;IAED,oDAAoD;IACpD,MAAM,UAAU,GAAG,0BAA0B,CAAC;IAC9C,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QAC7C,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACnD,gFAAgF;QAChF,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACpE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,+EAA+E;AAC/E,6BAA6B;AAC7B,+EAA+E;AAE/E;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,UAAU,CACxB,MAA4B,EAC5B,KAAa,EACb,OAAe,EACf,WAAqB;IAErB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,+EAA+E;IAC/E,YAAY,GAAG,WAAW,CAAC,MAAM,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;IAEvD,iEAAiE;IACjE,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAE1C,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAE,YAAY,EAAE,CAAC;IACpE,CAAC;IAED,MAAM,kBAAkB,GAAG,MAAM,CAAC,OAAO,CACvC;;wEAEoE,CACrE,CAAC;IAEF,MAAM,iBAAiB,GAAG,MAAM,CAAC,OAAO,CACtC,sEAAsE,CACvE,CAAC;IAEF,8DAA8D;IAC9D,oEAAoE;IACpE,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAC5B,2EAA2E,CAC5E,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAC9B;yBACqB,CACtB,CAAC;IAEF,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO,CACnC,wEAAwE,CACzE,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEvB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,+BAA+B;QAC/B,MAAM,GAAG,GAAG,kBAAkB,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;QAC1B,IAAI,KAAK,GAAG,CAAC;YAAE,SAAS;QAExB,6BAA6B;QAC7B,MAAM,SAAS,GAAG,iBAAiB,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACxD,IAAI,SAAS;YAAE,SAAS;QAExB,iBAAiB;QACjB,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAC7C,IAAI,QAAQ,EAAE,CAAC;YACb,sDAAsD;YACtD,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;QACzC,CAAC;aAAM,CAAC;YACN,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;YAC1C,WAAW,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,WAAW,GAAG,CAAC,IAAI,YAAY,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,CAAC,IAAI,CACT,EAAE,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,eAAe,EAAE,QAAQ,CAAC,IAAI,EAAE,EACpE,wBAAwB,CACzB,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAE,YAAY,EAAE,CAAC;AACpE,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,WAAW,CACzB,MAA4B,EAC5B,KAAa,EACb,WAAqB;IAErB,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEvC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,MAAM,WAAW,GAAG,MAAM,CAAC,OAAO,CAChC;mEAC+D,CAChE,CAAC;IAEF,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;QAChC,IAAI,CAAC,KAAK;YAAE,SAAS;QACrB,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;QAC9D,QAAQ,IAAI,IAAI,CAAC,OAAO,CAAC;IAC3B,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAcD;;;GAGG;AACH,MAAM,UAAU,SAAS,CACvB,MAA4B,EAC5B,KAAa,EACb,IAA6B;IAE7B,IAAI,IAAI,EAAE,QAAQ,EAAE,CAAC;QACnB,OAAO,MAAM;aACV,OAAO,CACN;qEAC6D,CAC9D;aACA,GAAG,CAAC,KAAK,CAAC,CAAC;IAChB,CAAC;IAED,OAAO,MAAM;SACV,OAAO,CACN;;sCAEgC,CACjC;SACA,GAAG,CAAC,KAAK,CAAC,CAAC;AAChB,CAAC"}
@@ -0,0 +1,90 @@
1
+ /**
2
+ * Token-counting budget gate.
3
+ *
4
+ * Implements CLAUDE.md §API Rules #6 budget math:
5
+ *
6
+ * envelope_reserve = 500 // bytes set aside for meta/logs/stale_files
7
+ * safety_margin = 0.9 // js-tiktoken is an estimator, not an oracle
8
+ * available = floor((max_tokens - envelope_reserve) * safety_margin)
9
+ *
10
+ * At the default `max_tokens=5000` the available budget for result bodies is:
11
+ * floor((5000 - 500) * 0.9) = 4050 tokens
12
+ *
13
+ * Truncation is greedy by rank: keep the highest-ranked item whole if it fits;
14
+ * otherwise emit a `too_large` sentinel so the caller can re-query with a
15
+ * higher `max_tokens` or fetch the node directly by id.
16
+ *
17
+ * The sentinel pattern is Loop 6.5 A3 / presearch.md D22. A single oversize
18
+ * item must NOT blackhole the whole response — we report its id + uri and
19
+ * let the LLM decide what to do next.
20
+ *
21
+ * Token counting uses the `p50k_base` encoder — close enough to Claude's
22
+ * tokenization for budgeting purposes, and the 10% safety margin absorbs the
23
+ * estimation error between tokenizers.
24
+ */
25
+ export declare const ENVELOPE_RESERVE_TOKENS = 500;
26
+ export declare const SAFETY_MARGIN = 0.9;
27
+ /**
28
+ * Count tokens in a UTF-8 string.
29
+ *
30
+ * Fast path (long strings): return a character-based over-estimate. This
31
+ * is strictly a ceiling — we'd rather emit a few extra `results_truncated`
32
+ * responses than block the event loop for minutes on tokenization.
33
+ *
34
+ * Slow path (short strings): use the real p50k_base encoder for an exact
35
+ * count. This is what matters for normal-size response bodies.
36
+ *
37
+ * The encoder is initialized once at module load and shared across calls.
38
+ */
39
+ export declare function countTokens(text: string): number;
40
+ /**
41
+ * Given a user-facing `max_tokens` budget, compute the internal result-body
42
+ * budget after subtracting the envelope reserve and applying the safety
43
+ * margin. Always returns a non-negative integer.
44
+ */
45
+ export declare function computeInternalBudget(maxTokens: number): number;
46
+ /**
47
+ * A too-large sentinel replaces a single item that would exceed the budget
48
+ * on its own. The shape is deliberately minimal — id + source_uri so the
49
+ * caller can re-query, plus the original token count so they can size a new
50
+ * `max_tokens` request.
51
+ */
52
+ export interface TooLargeSentinel {
53
+ too_large: true;
54
+ id: string;
55
+ source_uri: string;
56
+ tokens: number;
57
+ }
58
+ export interface FitResult<T> {
59
+ kept: Array<T | TooLargeSentinel>;
60
+ truncated: boolean;
61
+ tokensUsed: number;
62
+ tokensBudgeted: number;
63
+ }
64
+ /**
65
+ * Greedy rank-order truncation. Iterates `items` in the order given (caller
66
+ * is responsible for ranking first), measures each one's serialized token
67
+ * count, and keeps items until the next one would exceed the internal
68
+ * budget.
69
+ *
70
+ * If a single item's token count alone exceeds the budget, it is replaced
71
+ * with a `too_large` sentinel and counted as zero body tokens (the sentinel
72
+ * itself is tiny — ~20 tokens). The iteration then continues so that smaller
73
+ * items after the oversize one can still land in the response.
74
+ *
75
+ * @param items Results, pre-ranked (highest rank first).
76
+ * @param maxTokens User-facing `max_tokens` budget from the tool call.
77
+ * @param serialize How to turn one item into the text we'll count. Usually
78
+ * `JSON.stringify`. Broken out so the caller can include
79
+ * framing (commas, wrapping object keys) in the count.
80
+ * @param idOf Read the item's id for sentinel construction.
81
+ * @param uriOf Read the item's source uri for sentinel construction.
82
+ */
83
+ export declare function fitResults<T>(items: T[], maxTokens: number, serialize: (item: T) => string, idOf: (item: T) => string, uriOf: (item: T) => string): FitResult<T>;
84
+ /**
85
+ * Count tokens in an already-serialized response body without running the
86
+ * fit loop. Used by the tool handlers to populate `meta.tokens_used` after
87
+ * the envelope has been built.
88
+ */
89
+ export declare function countEnvelopeTokens(envelopeJson: string): number;
90
+ //# sourceMappingURL=budget.d.ts.map