@pella-labs/pinakes 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +208 -0
- package/dist/cli/audit.d.ts +30 -0
- package/dist/cli/audit.d.ts.map +1 -0
- package/dist/cli/audit.js +49 -0
- package/dist/cli/audit.js.map +1 -0
- package/dist/cli/export.d.ts +32 -0
- package/dist/cli/export.d.ts.map +1 -0
- package/dist/cli/export.js +73 -0
- package/dist/cli/export.js.map +1 -0
- package/dist/cli/import.d.ts +24 -0
- package/dist/cli/import.d.ts.map +1 -0
- package/dist/cli/import.js +96 -0
- package/dist/cli/import.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +172 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/purge.d.ts +23 -0
- package/dist/cli/purge.d.ts.map +1 -0
- package/dist/cli/purge.js +57 -0
- package/dist/cli/purge.js.map +1 -0
- package/dist/cli/rebuild.d.ts +54 -0
- package/dist/cli/rebuild.d.ts.map +1 -0
- package/dist/cli/rebuild.js +113 -0
- package/dist/cli/rebuild.js.map +1 -0
- package/dist/cli/serve.d.ts +49 -0
- package/dist/cli/serve.d.ts.map +1 -0
- package/dist/cli/serve.js +296 -0
- package/dist/cli/serve.js.map +1 -0
- package/dist/cli/status.d.ts +39 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +108 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/db/client.d.ts +109 -0
- package/dist/db/client.d.ts.map +1 -0
- package/dist/db/client.js +175 -0
- package/dist/db/client.js.map +1 -0
- package/dist/db/repository.d.ts +82 -0
- package/dist/db/repository.d.ts.map +1 -0
- package/dist/db/repository.js +173 -0
- package/dist/db/repository.js.map +1 -0
- package/dist/db/schema.d.ts +990 -0
- package/dist/db/schema.d.ts.map +1 -0
- package/dist/db/schema.js +259 -0
- package/dist/db/schema.js.map +1 -0
- package/dist/db/types.d.ts +28 -0
- package/dist/db/types.d.ts.map +1 -0
- package/dist/db/types.js +11 -0
- package/dist/db/types.js.map +1 -0
- package/dist/gaps/detector.d.ts +67 -0
- package/dist/gaps/detector.d.ts.map +1 -0
- package/dist/gaps/detector.js +160 -0
- package/dist/gaps/detector.js.map +1 -0
- package/dist/gate/budget.d.ts +90 -0
- package/dist/gate/budget.d.ts.map +1 -0
- package/dist/gate/budget.js +145 -0
- package/dist/gate/budget.js.map +1 -0
- package/dist/ingest/chokidar.d.ts +33 -0
- package/dist/ingest/chokidar.d.ts.map +1 -0
- package/dist/ingest/chokidar.js +152 -0
- package/dist/ingest/chokidar.js.map +1 -0
- package/dist/ingest/ingester.d.ts +117 -0
- package/dist/ingest/ingester.d.ts.map +1 -0
- package/dist/ingest/ingester.js +312 -0
- package/dist/ingest/ingester.js.map +1 -0
- package/dist/ingest/manifest.d.ts +87 -0
- package/dist/ingest/manifest.d.ts.map +1 -0
- package/dist/ingest/manifest.js +223 -0
- package/dist/ingest/manifest.js.map +1 -0
- package/dist/ingest/memory-store.d.ts +55 -0
- package/dist/ingest/memory-store.d.ts.map +1 -0
- package/dist/ingest/memory-store.js +94 -0
- package/dist/ingest/memory-store.js.map +1 -0
- package/dist/ingest/parse/chunk.d.ts +15 -0
- package/dist/ingest/parse/chunk.d.ts.map +1 -0
- package/dist/ingest/parse/chunk.js +88 -0
- package/dist/ingest/parse/chunk.js.map +1 -0
- package/dist/ingest/parse/markdown.d.ts +64 -0
- package/dist/ingest/parse/markdown.d.ts.map +1 -0
- package/dist/ingest/parse/markdown.js +152 -0
- package/dist/ingest/parse/markdown.js.map +1 -0
- package/dist/ingest/queue.d.ts +21 -0
- package/dist/ingest/queue.d.ts.map +1 -0
- package/dist/ingest/queue.js +24 -0
- package/dist/ingest/queue.js.map +1 -0
- package/dist/ingest/source.d.ts +42 -0
- package/dist/ingest/source.d.ts.map +1 -0
- package/dist/ingest/source.js +19 -0
- package/dist/ingest/source.js.map +1 -0
- package/dist/mcp/envelope.d.ts +73 -0
- package/dist/mcp/envelope.d.ts.map +1 -0
- package/dist/mcp/envelope.js +46 -0
- package/dist/mcp/envelope.js.map +1 -0
- package/dist/mcp/tools/execute.d.ts +55 -0
- package/dist/mcp/tools/execute.d.ts.map +1 -0
- package/dist/mcp/tools/execute.js +232 -0
- package/dist/mcp/tools/execute.js.map +1 -0
- package/dist/mcp/tools/search.d.ts +53 -0
- package/dist/mcp/tools/search.d.ts.map +1 -0
- package/dist/mcp/tools/search.js +114 -0
- package/dist/mcp/tools/search.js.map +1 -0
- package/dist/observability/audit.d.ts +25 -0
- package/dist/observability/audit.d.ts.map +1 -0
- package/dist/observability/audit.js +38 -0
- package/dist/observability/audit.js.map +1 -0
- package/dist/observability/logger.d.ts +4 -0
- package/dist/observability/logger.d.ts.map +1 -0
- package/dist/observability/logger.js +56 -0
- package/dist/observability/logger.js.map +1 -0
- package/dist/observability/metrics.d.ts +38 -0
- package/dist/observability/metrics.d.ts.map +1 -0
- package/dist/observability/metrics.js +64 -0
- package/dist/observability/metrics.js.map +1 -0
- package/dist/retrieval/embedder.d.ts +130 -0
- package/dist/retrieval/embedder.d.ts.map +1 -0
- package/dist/retrieval/embedder.js +278 -0
- package/dist/retrieval/embedder.js.map +1 -0
- package/dist/retrieval/fts.d.ts +42 -0
- package/dist/retrieval/fts.d.ts.map +1 -0
- package/dist/retrieval/fts.js +46 -0
- package/dist/retrieval/fts.js.map +1 -0
- package/dist/retrieval/hybrid.d.ts +43 -0
- package/dist/retrieval/hybrid.d.ts.map +1 -0
- package/dist/retrieval/hybrid.js +120 -0
- package/dist/retrieval/hybrid.js.map +1 -0
- package/dist/retrieval/vec.d.ts +39 -0
- package/dist/retrieval/vec.d.ts.map +1 -0
- package/dist/retrieval/vec.js +50 -0
- package/dist/retrieval/vec.js.map +1 -0
- package/dist/sandbox/bindings/budget.d.ts +10 -0
- package/dist/sandbox/bindings/budget.d.ts.map +1 -0
- package/dist/sandbox/bindings/budget.js +44 -0
- package/dist/sandbox/bindings/budget.js.map +1 -0
- package/dist/sandbox/bindings/install.d.ts +23 -0
- package/dist/sandbox/bindings/install.d.ts.map +1 -0
- package/dist/sandbox/bindings/install.js +15 -0
- package/dist/sandbox/bindings/install.js.map +1 -0
- package/dist/sandbox/bindings/kg.d.ts +29 -0
- package/dist/sandbox/bindings/kg.d.ts.map +1 -0
- package/dist/sandbox/bindings/kg.js +323 -0
- package/dist/sandbox/bindings/kg.js.map +1 -0
- package/dist/sandbox/bindings/logger.d.ts +11 -0
- package/dist/sandbox/bindings/logger.d.ts.map +1 -0
- package/dist/sandbox/bindings/logger.js +33 -0
- package/dist/sandbox/bindings/logger.js.map +1 -0
- package/dist/sandbox/bindings/write.d.ts +34 -0
- package/dist/sandbox/bindings/write.d.ts.map +1 -0
- package/dist/sandbox/bindings/write.js +195 -0
- package/dist/sandbox/bindings/write.js.map +1 -0
- package/dist/sandbox/executor.d.ts +68 -0
- package/dist/sandbox/executor.d.ts.map +1 -0
- package/dist/sandbox/executor.js +280 -0
- package/dist/sandbox/executor.js.map +1 -0
- package/dist/sandbox/helpers.d.ts +26 -0
- package/dist/sandbox/helpers.d.ts.map +1 -0
- package/dist/sandbox/helpers.js +131 -0
- package/dist/sandbox/helpers.js.map +1 -0
- package/dist/sandbox/pool.d.ts +63 -0
- package/dist/sandbox/pool.d.ts.map +1 -0
- package/dist/sandbox/pool.js +98 -0
- package/dist/sandbox/pool.js.map +1 -0
- package/dist/sandbox/vendored-codemode.d.ts +99 -0
- package/dist/sandbox/vendored-codemode.d.ts.map +1 -0
- package/dist/sandbox/vendored-codemode.js +471 -0
- package/dist/sandbox/vendored-codemode.js.map +1 -0
- package/dist/server.d.ts +3 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +74 -0
- package/dist/server.js.map +1 -0
- package/dist/spike.d.ts +15 -0
- package/dist/spike.d.ts.map +1 -0
- package/dist/spike.js +90 -0
- package/dist/spike.js.map +1 -0
- package/package.json +60 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/db/schema.ts"],"names":[],"mappings":"AAGA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAMH;;;;;;;;GAQG;AACH,eAAO,MAAM,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAGjB,CAAC;AAOH;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkCnB,CAAC;AAOF;;;;;;;;GAQG;AACH,eAAO,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAiBnB,CAAC;AAMF;;;;;;;;;;;;;;;;GAgBG;AACH,eAAO,MAAM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAqBpB,CAAC;AAMF;;;;;;;;GAQG;AACH,eAAO,MAAM,KAAK;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAgBjB,CAAC;AAMF;;;;;;;GAOG;AACH,eAAO,MAAM,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAOjB,CAAC;AAMH;;;;;;;;;;GAUG;AACH,eAAO,MAAM,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAQlB,CAAC;AAMH,MAAM,MAAM,MAAM,GAAG,OAAO,OAAO,CAAC,YAAY,CAAC;AACjD,MAAM,MAAM,SAAS,GAAG,OAAO,OAAO,CAAC,YAAY,CAAC;AACpD,MAAM,MAAM,OAAO,GAAG,OAAO,QAAQ,CAAC,YAAY,CAAC;AACnD,MAAM,MAAM,UAAU,GAAG,OAAO,QAAQ,CAAC,YAAY,CAAC;AACtD,MAAM,MAAM,MAAM,GAAG,OAAO,OAAO,CAAC,YAAY,CAAC;AACjD,MAAM,MAAM,SAAS,GAAG,OAAO,OAAO,CAAC,YAAY,CAAC;AACpD,MAAM,MAAM,QAAQ,GAAG,OAAO,KAAK,CAAC,YAAY,CAAC;AACjD,MAAM,MAAM,WAAW,GAAG,OAAO,KAAK,CAAC,YAAY,CAAC;AAEpD;;;GAGG;AACH,eAAO,MAAM,SAAS,4FAQZ,CAAC;AAEX;;;GAGG;AACH,eAAO,MAAM,iBAAiB,6CAA8C,CAAC"}
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import { sql } from 'drizzle-orm';
|
|
2
|
+
import { sqliteTable, text, integer, index, primaryKey } from 'drizzle-orm/sqlite-core';
|
|
3
|
+
/**
|
|
4
|
+
* KG-MCP Drizzle schema (presearch.md §2.3, CLAUDE.md §Database Rules).
|
|
5
|
+
*
|
|
6
|
+
* 8 logical tables + `kg_meta` for schema versioning. Two of the eight are
|
|
7
|
+
* virtual tables (`kg_chunks_fts`, `kg_chunks_vec`) that drizzle-kit can't
|
|
8
|
+
* model — they're created via raw SQL appended to the initial migration in
|
|
9
|
+
* src/db/migrations. The drizzle code below covers the 7 regular tables
|
|
10
|
+
* plus kg_meta.
|
|
11
|
+
*
|
|
12
|
+
* Invariants this schema MUST preserve:
|
|
13
|
+
*
|
|
14
|
+
* - `kg_nodes.id` is `sha1(scope + ':' + source_uri + ':' + section_path)`,
|
|
15
|
+
* set by the ingester (NOT auto-generated). Re-ingesting the same
|
|
16
|
+
* markdown produces identical ids — Phase 2's idempotent upsert relies
|
|
17
|
+
* on this. The DB never sees the hashing logic; it just stores the value
|
|
18
|
+
* and enforces uniqueness via the PK.
|
|
19
|
+
*
|
|
20
|
+
* - `kg_chunks.id` is `sha1(node_id + ':' + chunk_index)`, same idea.
|
|
21
|
+
*
|
|
22
|
+
* - `chunk_sha = sha1(chunk_text)` is the LOAD-BEARING field for the
|
|
23
|
+
* per-chunk skip-unchanged optimization (CLAUDE.md §Database Rules #3,
|
|
24
|
+
* Loop 6.5 A4). On a Pharos wiki-updater whole-file rewrite we look up
|
|
25
|
+
* the existing chunk_shas for the file's nodes and only re-embed chunks
|
|
26
|
+
* whose sha changed. Without this, every turn re-embeds 60 chunks ×
|
|
27
|
+
* ~50ms = 3s of blocking work that competes with the active coding LLM
|
|
28
|
+
* for Ollama. Do not remove this column.
|
|
29
|
+
*
|
|
30
|
+
* - `last_accessed_at` on `kg_nodes` exists for the Phase 5 personal-KG
|
|
31
|
+
* LRU eviction (Loop 6.5 A2). Phase 2 just stamps it on insert/update.
|
|
32
|
+
*
|
|
33
|
+
* - `source_sha` on `kg_nodes` is the file-level hash; staleness detection
|
|
34
|
+
* on the query path compares this against the current on-disk hash.
|
|
35
|
+
*
|
|
36
|
+
* - All FK relationships use `ON DELETE CASCADE` so deleting a node cleans
|
|
37
|
+
* up its chunks and edges in one statement (verified by schema test #5).
|
|
38
|
+
* Foreign keys are enforced via PRAGMA foreign_keys=ON, mandatory on
|
|
39
|
+
* every connection in client.ts.
|
|
40
|
+
*/
|
|
41
|
+
// ----------------------------------------------------------------------------
|
|
42
|
+
// kg_meta — schema versioning + bookkeeping
|
|
43
|
+
// ----------------------------------------------------------------------------
|
|
44
|
+
/**
|
|
45
|
+
* Tiny key/value table holding `schema_version`, `last_full_rebuild`, and
|
|
46
|
+
* any other one-off bookkeeping. Sized for handfuls of rows.
|
|
47
|
+
*
|
|
48
|
+
* Stamped at first openDb() call by client.ts if absent. The schema_version
|
|
49
|
+
* value lets us detect drift on startup and either run new migrations or
|
|
50
|
+
* (in the worst case for sqlite-vec breaking changes) drop + rebuild the
|
|
51
|
+
* vec virtual table from markdown.
|
|
52
|
+
*/
|
|
53
|
+
export const kgMeta = sqliteTable('kg_meta', {
|
|
54
|
+
key: text('key').primaryKey(),
|
|
55
|
+
value: text('value'),
|
|
56
|
+
});
|
|
57
|
+
// ----------------------------------------------------------------------------
|
|
58
|
+
// kg_nodes — markdown sections + concept entities (Phase 2 only writes
|
|
59
|
+
// kind='section' rows; Phase 4 adds entities, Phase 6 adds gaps)
|
|
60
|
+
// ----------------------------------------------------------------------------
|
|
61
|
+
/**
|
|
62
|
+
* One row per markdown section. The "primary unit" of the KG — chunks belong
|
|
63
|
+
* to nodes, edges connect nodes.
|
|
64
|
+
*
|
|
65
|
+
* `kind` is open-ended for forward compatibility:
|
|
66
|
+
* - `'section'` — the only kind Phase 2 writes; one per ATX heading
|
|
67
|
+
* - `'entity' | 'concept' | 'decision' | 'log_entry' | 'gap'` — Phase 4-6
|
|
68
|
+
*
|
|
69
|
+
* `section_path` is the ATX heading hierarchy joined by ` / ` (e.g.
|
|
70
|
+
* `"Authentication / Login flow"` for an `## Login flow` under `# Authentication`).
|
|
71
|
+
* Empty string for top-of-file content above any heading.
|
|
72
|
+
*
|
|
73
|
+
* `content` stores the full section markdown (heading + body). Chunks are
|
|
74
|
+
* derived from this and stored separately in kg_chunks. We keep both because
|
|
75
|
+
* `kg_execute` callers may want the whole section (`kg.get(node_id)`) instead
|
|
76
|
+
* of paragraph-sized chunks.
|
|
77
|
+
*/
|
|
78
|
+
export const kgNodes = sqliteTable('kg_nodes', {
|
|
79
|
+
/** sha1(scope + ':' + source_uri + ':' + section_path), set by ingester */
|
|
80
|
+
id: text('id').primaryKey(),
|
|
81
|
+
/** 'project' | 'personal' — enforced at app layer, not as a CHECK */
|
|
82
|
+
scope: text('scope').notNull(),
|
|
83
|
+
/** file:// URL of the source markdown file */
|
|
84
|
+
sourceUri: text('source_uri').notNull(),
|
|
85
|
+
/** Heading hierarchy joined by ' / '; empty string for pre-heading content */
|
|
86
|
+
sectionPath: text('section_path').notNull(),
|
|
87
|
+
/** 'section' for Phase 2; widens in Phase 4+ */
|
|
88
|
+
kind: text('kind').notNull().default('section'),
|
|
89
|
+
/** Heading text (H1/H2/H3 string), null for pre-heading content */
|
|
90
|
+
title: text('title'),
|
|
91
|
+
/** Full section markdown (heading + body) */
|
|
92
|
+
content: text('content').notNull(),
|
|
93
|
+
/** sha1 of the entire source file (for staleness detection) */
|
|
94
|
+
sourceSha: text('source_sha').notNull(),
|
|
95
|
+
/** Cached token count of `content` for fast budget math */
|
|
96
|
+
tokenCount: integer('token_count').notNull(),
|
|
97
|
+
/** Provenance confidence: 'extracted' (default), 'inferred' (AI-generated), 'ambiguous' (flagged) */
|
|
98
|
+
confidence: text('confidence').notNull().default('extracted'),
|
|
99
|
+
/** Unix epoch ms of first insert */
|
|
100
|
+
createdAt: integer('created_at').notNull(),
|
|
101
|
+
/** Unix epoch ms of last update */
|
|
102
|
+
updatedAt: integer('updated_at').notNull(),
|
|
103
|
+
/** Unix epoch ms of last read access — Phase 5 LRU eviction (Loop 6.5 A2) */
|
|
104
|
+
lastAccessedAt: integer('last_accessed_at').notNull(),
|
|
105
|
+
}, (t) => [
|
|
106
|
+
index('idx_kg_nodes_scope_uri').on(t.scope, t.sourceUri),
|
|
107
|
+
index('idx_kg_nodes_last_accessed').on(t.lastAccessedAt),
|
|
108
|
+
]);
|
|
109
|
+
// ----------------------------------------------------------------------------
|
|
110
|
+
// kg_edges — wikilinks, citations, supersedes, etc. (Phase 4+ writes; Phase 2
|
|
111
|
+
// just creates the table for migration completeness)
|
|
112
|
+
// ----------------------------------------------------------------------------
|
|
113
|
+
/**
|
|
114
|
+
* Directed edges between nodes. Composite primary key on
|
|
115
|
+
* `(src_id, dst_id, edge_kind)` so the same pair can have multiple edge
|
|
116
|
+
* kinds (e.g. one node both `cites` and `supersedes` another).
|
|
117
|
+
*
|
|
118
|
+
* Phase 2 doesn't populate this table — Phase 4 extracts wikilinks during
|
|
119
|
+
* markdown parsing. The table exists now so the migration is complete and
|
|
120
|
+
* Phase 4 doesn't have to add it as a separate migration.
|
|
121
|
+
*/
|
|
122
|
+
export const kgEdges = sqliteTable('kg_edges', {
|
|
123
|
+
srcId: text('src_id')
|
|
124
|
+
.notNull()
|
|
125
|
+
.references(() => kgNodes.id, { onDelete: 'cascade' }),
|
|
126
|
+
dstId: text('dst_id')
|
|
127
|
+
.notNull()
|
|
128
|
+
.references(() => kgNodes.id, { onDelete: 'cascade' }),
|
|
129
|
+
/** 'wikilink' | 'cites' | 'supersedes' | 'contradicts' | 'mentions' | 'derived_from' */
|
|
130
|
+
edgeKind: text('edge_kind').notNull(),
|
|
131
|
+
}, (t) => [
|
|
132
|
+
primaryKey({ columns: [t.srcId, t.dstId, t.edgeKind] }),
|
|
133
|
+
index('idx_kg_edges_src').on(t.srcId),
|
|
134
|
+
index('idx_kg_edges_dst').on(t.dstId),
|
|
135
|
+
]);
|
|
136
|
+
// ----------------------------------------------------------------------------
|
|
137
|
+
// kg_chunks — paragraph-level splits of nodes
|
|
138
|
+
// ----------------------------------------------------------------------------
|
|
139
|
+
/**
|
|
140
|
+
* One row per ~500-token chunk derived from a node's `content`. The
|
|
141
|
+
* chunker (src/ingest/parse/chunk.ts) splits on paragraph boundaries and
|
|
142
|
+
* accumulates until adding the next paragraph would exceed `target_tokens`.
|
|
143
|
+
*
|
|
144
|
+
* The implicit SQLite `rowid` (auto-assigned for tables without
|
|
145
|
+
* INTEGER PRIMARY KEY) is what FTS5 and sqlite-vec join on:
|
|
146
|
+
* - `kg_chunks_fts` is `content='kg_chunks', content_rowid='rowid'`
|
|
147
|
+
* - `kg_chunks_vec.rowid` matches `kg_chunks.rowid`
|
|
148
|
+
*
|
|
149
|
+
* `chunk_sha = sha1(text)` is the per-chunk skip-unchanged key. On a
|
|
150
|
+
* file rewrite, the ingester compares each new chunk's chunk_sha against
|
|
151
|
+
* the existing chunk_shas for the file's nodes; matching chunks reuse
|
|
152
|
+
* the existing embedding (no embedder call), only changed chunks get
|
|
153
|
+
* re-embedded. This is the load-bearing optimization for Pharos's
|
|
154
|
+
* whole-file-rewrite-per-turn pattern (CLAUDE.md §Database Rules #3).
|
|
155
|
+
*/
|
|
156
|
+
export const kgChunks = sqliteTable('kg_chunks', {
|
|
157
|
+
/** sha1(node_id + ':' + chunk_index) */
|
|
158
|
+
id: text('id').primaryKey(),
|
|
159
|
+
/** FK to kg_nodes; cascade-delete cleans up chunks when a node goes away */
|
|
160
|
+
nodeId: text('node_id')
|
|
161
|
+
.notNull()
|
|
162
|
+
.references(() => kgNodes.id, { onDelete: 'cascade' }),
|
|
163
|
+
/** 0-based position within the node's chunk list */
|
|
164
|
+
chunkIndex: integer('chunk_index').notNull(),
|
|
165
|
+
/** The chunk's text content */
|
|
166
|
+
text: text('text').notNull(),
|
|
167
|
+
/** sha1(text) — load-bearing for per-chunk skip-unchanged */
|
|
168
|
+
chunkSha: text('chunk_sha').notNull(),
|
|
169
|
+
/** Cached token count for fast budget math */
|
|
170
|
+
tokenCount: integer('token_count').notNull(),
|
|
171
|
+
/** Unix epoch ms of insert */
|
|
172
|
+
createdAt: integer('created_at').notNull(),
|
|
173
|
+
}, (t) => [index('idx_kg_chunks_node').on(t.nodeId)]);
|
|
174
|
+
// ----------------------------------------------------------------------------
|
|
175
|
+
// kg_log — append-only event log (Karpathy log.md materialized)
|
|
176
|
+
// ----------------------------------------------------------------------------
|
|
177
|
+
/**
|
|
178
|
+
* Append-only event stream. Phase 2 writes:
|
|
179
|
+
* - `'ingest:done'` — successful file ingest
|
|
180
|
+
* - `'ingest:error'` — failed file ingest
|
|
181
|
+
* - `'rebuild:start' | 'rebuild:done'` — full-rebuild markers
|
|
182
|
+
*
|
|
183
|
+
* `payload` is opaque JSON shaped per `kind`. The reader is the LLM via
|
|
184
|
+
* `kg.log.recent(n, opts)` in Phase 4+; Phase 2 just appends.
|
|
185
|
+
*/
|
|
186
|
+
export const kgLog = sqliteTable('kg_log', {
|
|
187
|
+
id: integer('id').primaryKey({ autoIncrement: true }),
|
|
188
|
+
/** Unix epoch ms */
|
|
189
|
+
ts: integer('ts').notNull(),
|
|
190
|
+
/** 'project' | 'personal' */
|
|
191
|
+
scope: text('scope').notNull(),
|
|
192
|
+
/** Event kind, e.g. 'ingest:done' */
|
|
193
|
+
kind: text('kind').notNull(),
|
|
194
|
+
/** Source URI for ingest events; null for non-file events */
|
|
195
|
+
sourceUri: text('source_uri'),
|
|
196
|
+
/** Opaque JSON payload, shape per `kind` */
|
|
197
|
+
payload: text('payload'),
|
|
198
|
+
}, (t) => [index('idx_kg_log_ts').on(sql `${t.ts} DESC`)]);
|
|
199
|
+
// ----------------------------------------------------------------------------
|
|
200
|
+
// kg_gaps — detected concept gaps (Phase 6+ writes; Phase 2 creates table)
|
|
201
|
+
// ----------------------------------------------------------------------------
|
|
202
|
+
/**
|
|
203
|
+
* Concept gaps detected by the Phase 6 gap-detection sub-agent. Phase 2
|
|
204
|
+
* creates the table; Phase 6 wires the writes.
|
|
205
|
+
*
|
|
206
|
+
* `mentions_count` lets the dashboard prioritize gaps that appear across
|
|
207
|
+
* multiple turns. `resolved_at` is set when a gap is closed (either by
|
|
208
|
+
* the LLM writing about the topic or by a manual dismissal).
|
|
209
|
+
*/
|
|
210
|
+
export const kgGaps = sqliteTable('kg_gaps', {
|
|
211
|
+
id: integer('id').primaryKey({ autoIncrement: true }),
|
|
212
|
+
scope: text('scope').notNull(),
|
|
213
|
+
topic: text('topic').notNull(),
|
|
214
|
+
firstSeenAt: integer('first_seen_at').notNull(),
|
|
215
|
+
mentionsCount: integer('mentions_count').notNull().default(1),
|
|
216
|
+
resolvedAt: integer('resolved_at'),
|
|
217
|
+
});
|
|
218
|
+
// ----------------------------------------------------------------------------
|
|
219
|
+
// kg_audit — every tool call (Phase 5 wires writes; Phase 2 creates table)
|
|
220
|
+
// ----------------------------------------------------------------------------
|
|
221
|
+
/**
|
|
222
|
+
* Audit log of every MCP tool call. Phase 5 wires the dispatcher to write
|
|
223
|
+
* one row per call as part of the privacy invariant verification surface
|
|
224
|
+
* (CLAUDE.md §Security #7). Phase 2 creates the table.
|
|
225
|
+
*
|
|
226
|
+
* NB: per CLAUDE.md §Security #7, the JSONL mirror path differs by scope —
|
|
227
|
+
* project rows mirror to `.pharos/kg-audit.jsonl` (in the repo, safe for
|
|
228
|
+
* `git add .`), personal/both rows mirror to `~/.pharos/profile/kg-audit.jsonl`.
|
|
229
|
+
* Personal-scope audit rows go to a separate kg_audit table in the personal
|
|
230
|
+
* DB, never the project DB. This split is enforced at the app layer.
|
|
231
|
+
*/
|
|
232
|
+
export const kgAudit = sqliteTable('kg_audit', {
|
|
233
|
+
id: integer('id').primaryKey({ autoIncrement: true }),
|
|
234
|
+
ts: integer('ts').notNull(),
|
|
235
|
+
toolName: text('tool_name').notNull(),
|
|
236
|
+
scopeRequested: text('scope_requested').notNull(),
|
|
237
|
+
callerCtx: text('caller_ctx'),
|
|
238
|
+
responseTokens: integer('response_tokens'),
|
|
239
|
+
error: text('error'),
|
|
240
|
+
});
|
|
241
|
+
/**
|
|
242
|
+
* The list of all schema-managed tables, useful for the schema test that
|
|
243
|
+
* verifies the migration creates every expected table.
|
|
244
|
+
*/
|
|
245
|
+
export const KG_TABLES = [
|
|
246
|
+
'kg_meta',
|
|
247
|
+
'kg_nodes',
|
|
248
|
+
'kg_edges',
|
|
249
|
+
'kg_chunks',
|
|
250
|
+
'kg_log',
|
|
251
|
+
'kg_gaps',
|
|
252
|
+
'kg_audit',
|
|
253
|
+
];
|
|
254
|
+
/**
|
|
255
|
+
* The two virtual tables created via raw SQL in the migration (drizzle-kit
|
|
256
|
+
* doesn't emit virtual table DDL). Schema test verifies they exist.
|
|
257
|
+
*/
|
|
258
|
+
export const KG_VIRTUAL_TABLES = ['kg_chunks_fts', 'kg_chunks_vec'];
|
|
259
|
+
//# sourceMappingURL=schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/db/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,GAAG,EAAE,MAAM,aAAa,CAAC;AAClC,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAExF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,+EAA+E;AAC/E,4CAA4C;AAC5C,+EAA+E;AAE/E;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,EAAE;IAC3C,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,UAAU,EAAE;IAC7B,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC;CACrB,CAAC,CAAC;AAEH,+EAA+E;AAC/E,uEAAuE;AACvE,iEAAiE;AACjE,+EAA+E;AAE/E;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,WAAW,CAChC,UAAU,EACV;IACE,2EAA2E;IAC3E,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE;IAC3B,qEAAqE;IACrE,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE;IAC9B,8CAA8C;IAC9C,SAAS,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;IACvC,8EAA8E;IAC9E,WAAW,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC,OAAO,EAAE;IAC3C,gDAAgD;IAChD,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC;IAC/C,mEAAmE;IACnE,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC;IACpB,6CAA6C;IAC7C,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE;IAClC,+DAA+D;IAC/D,SAAS,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;IACvC,2DAA2D;IAC3D,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,OAAO,EAAE;IAC5C,qGAAqG;IACrG,UAAU,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,WAAW,CAAC;IAC7D,oCAAoC;IACpC,SAAS,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;IAC1C,mCAAmC;IACnC,SAAS,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;IAC1C,6EAA6E;IAC7E,cAAc,EAAE,OAAO,CAAC,kBAAkB,CAAC,CAAC,OAAO,EAAE;CACtD,EACD,CAAC,CAAC,EAAE,EAAE,CAAC;IACL,KAAK,CAAC,wBAAwB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,SAAS,CAAC;IACxD,KAAK,CAAC,4BAA4B,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC;CACzD,CACF,CAAC;AAEF,+EAA+E;AAC/E,8EAA8E;AAC9E,qDAAqD;AACrD,+EAA+E;AAE/E;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,WAAW,CAChC,UAAU,EACV;IACE,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC;SAClB,OAAO,EAAE;SACT,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IACxD,KAAK,EAAE,IAAI,CAAC,QAAQ,CAAC;SAClB,OAAO,EAAE;SACT,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IACxD,wFAAwF;IACxF,QAAQ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE;CACtC,EACD,CAAC,CAAC,EAAE,EAAE,CAAC;IACL,UAAU,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC;IACvD,KAAK,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;IACrC,KAAK,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;CACtC,CACF,CAAC;AAEF,+EAA+E;AAC/E,8CAA8C;AAC9C,+EAA+E;AAE/E;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,CAAC,MAAM,QAAQ,GAAG,WAAW,CACjC,WAAW,EACX;IACE,wCAAwC;IACxC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE;IAC3B,4EAA4E;IAC5E,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC;SACpB,OAAO,EAAE;SACT,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;IACxD,oDAAoD;IACpD,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,OAAO,EAAE;IAC5C,+BAA+B;IAC/B,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE;IAC5B,6DAA6D;IAC7D,QAAQ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE;IACrC,8CAA8C;IAC9C,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,OAAO,EAAE;IAC5C,8BAA8B;IAC9B,SAAS,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE;CAC3C,EACD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAClD,CAAC;AAEF,+EAA+E;AAC/E,gEAAgE;AAChE,+EAA+E;AAE/E;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,KAAK,GAAG,WAAW,CAC9B,QAAQ,EACR;IACE,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC;IACrD,oBAAoB;IACpB,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE;IAC3B,6BAA6B;IAC7B,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE;IAC9B,qCAAqC;IACrC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE;IAC5B,6DAA6D;IAC7D,SAAS,EAAE,IAAI,CAAC,YAAY,CAAC;IAC7B,4CAA4C;IAC5C,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC;CACzB,EACD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,EAAE,CAAC,GAAG,CAAA,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CACtD,CAAC;AAEF,+EAA+E;AAC/E,2EAA2E;AAC3E,+EAA+E;AAE/E;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,EAAE;IAC3C,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC;IACrD,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE;IAC9B,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE;IAC9B,WAAW,EAAE,OAAO,CAAC,eAAe,CAAC,CAAC,OAAO,EAAE;IAC/C,aAAa,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC,OAAO,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IAC7D,UAAU,EAAE,OAAO,CAAC,aAAa,CAAC;CACnC,CAAC,CAAC;AAEH,+EAA+E;AAC/E,2EAA2E;AAC3E,+EAA+E;AAE/E;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,OAAO,GAAG,WAAW,CAAC,UAAU,EAAE;IAC7C,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC;IACrD,EAAE,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE;IAC3B,QAAQ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE;IACrC,cAAc,EAAE,IAAI,CAAC,iBAAiB,CAAC,CAAC,OAAO,EAAE;IACjD,SAAS,EAAE,IAAI,CAAC,YAAY,CAAC;IAC7B,cAAc,EAAE,OAAO,CAAC,iBAAiB,CAAC;IAC1C,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC;CACrB,CAAC,CAAC;AAeH;;;GAGG;AACH,MAAM,CAAC,MAAM,SAAS,GAAG;IACvB,SAAS;IACT,UAAU;IACV,UAAU;IACV,WAAW;IACX,QAAQ;IACR,SAAS;IACT,UAAU;CACF,CAAC;AAEX;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,eAAe,EAAE,eAAe,CAAU,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared types for KG-MCP query results.
|
|
3
|
+
*
|
|
4
|
+
* Phase 2 introduces this file to decouple the tool handlers from any
|
|
5
|
+
* single store implementation. Phase 1's `MemoryStore` defined `Chunk`
|
|
6
|
+
* inline; Phase 2's `Repository` returns the same shape, and the tool
|
|
7
|
+
* handlers import from here. When `MemoryStore` is deleted in Pass 4
|
|
8
|
+
* step 32, this file becomes the canonical source.
|
|
9
|
+
*/
|
|
10
|
+
export type Scope = 'project' | 'personal' | 'both';
|
|
11
|
+
/**
|
|
12
|
+
* A retrieval-shaped chunk. Mirrors what Phase 1's `MemoryStore` returned
|
|
13
|
+
* so the existing 13 spike tests stay green across the swap. Field shape
|
|
14
|
+
* is locked — adding fields is fine; renaming/removing breaks the
|
|
15
|
+
* sandbox host bindings (`kg.search` returns `{id, text, source_uri}`)
|
|
16
|
+
* and the spike test asserting that shape.
|
|
17
|
+
*/
|
|
18
|
+
export interface Chunk {
|
|
19
|
+
/** Deterministic id — Phase 1: sha1(`relative_path:index`); Phase 2: sha1(`node_id:chunk_index`) */
|
|
20
|
+
id: string;
|
|
21
|
+
/** Chunk text content (paragraph(s) up to the chunker's target token count) */
|
|
22
|
+
text: string;
|
|
23
|
+
/** `file://` URL of the source markdown file */
|
|
24
|
+
source_uri: string;
|
|
25
|
+
/** 0-based position within the source node's chunk list */
|
|
26
|
+
chunk_index: number;
|
|
27
|
+
}
|
|
28
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/db/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,MAAM,MAAM,KAAK,GAAG,SAAS,GAAG,UAAU,GAAG,MAAM,CAAC;AAEpD;;;;;;GAMG;AACH,MAAM,WAAW,KAAK;IACpB,oGAAoG;IACpG,EAAE,EAAE,MAAM,CAAC;IACX,+EAA+E;IAC/E,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,UAAU,EAAE,MAAM,CAAC;IACnB,2DAA2D;IAC3D,WAAW,EAAE,MAAM,CAAC;CACrB"}
|
package/dist/db/types.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared types for KG-MCP query results.
|
|
3
|
+
*
|
|
4
|
+
* Phase 2 introduces this file to decouple the tool handlers from any
|
|
5
|
+
* single store implementation. Phase 1's `MemoryStore` defined `Chunk`
|
|
6
|
+
* inline; Phase 2's `Repository` returns the same shape, and the tool
|
|
7
|
+
* handlers import from here. When `MemoryStore` is deleted in Pass 4
|
|
8
|
+
* step 32, this file becomes the canonical source.
|
|
9
|
+
*/
|
|
10
|
+
export {};
|
|
11
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/db/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import type { Database as BetterSqliteDatabase } from 'better-sqlite3';
|
|
2
|
+
/**
|
|
3
|
+
* Gap detector for KG-MCP Phase 6.
|
|
4
|
+
*
|
|
5
|
+
* After an ingest transaction commits, scans the new node's content for
|
|
6
|
+
* concept mentions. A "concept" is a term referenced via bold (`**term**`),
|
|
7
|
+
* wikilinks (`[[term]]`), or backtick-quoted identifiers that appears ≥3
|
|
8
|
+
* times across the entire KG but has no dedicated `kg_nodes` row (i.e., no
|
|
9
|
+
* node whose title matches the concept).
|
|
10
|
+
*
|
|
11
|
+
* Upserts into `kg_gaps` with `topic`, `first_seen_at`, `mentions_count`.
|
|
12
|
+
* When a node is later created with a matching title, `resolved_at` is set.
|
|
13
|
+
*
|
|
14
|
+
* This is a read-only detection surface — the LLM fills gaps by calling
|
|
15
|
+
* `kg.project.write()` to create wiki pages, and re-indexing resolves
|
|
16
|
+
* the gap automatically.
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* Extract candidate concept strings from markdown content.
|
|
20
|
+
*
|
|
21
|
+
* Sources:
|
|
22
|
+
* - Bold text: `**term**` or `__term__`
|
|
23
|
+
* - Wikilinks: `[[term]]` or `[[term|display]]`
|
|
24
|
+
* - Backtick-quoted terms: `` `term` `` (single backtick only, not code fences)
|
|
25
|
+
*
|
|
26
|
+
* Returns deduplicated, normalized (lowercase, trimmed) set.
|
|
27
|
+
*/
|
|
28
|
+
export declare function extractConcepts(content: string): Set<string>;
|
|
29
|
+
/**
|
|
30
|
+
* Run gap detection after an ingest transaction commits.
|
|
31
|
+
*
|
|
32
|
+
* For each concept extracted from the ingested content:
|
|
33
|
+
* 1. Count how many chunks across the KG contain the concept (case-insensitive)
|
|
34
|
+
* 2. Check if a node with a matching title already exists
|
|
35
|
+
* 3. If ≥3 mentions and no dedicated node → upsert into `kg_gaps`
|
|
36
|
+
*
|
|
37
|
+
* Also resolves any existing gaps that now have a dedicated node.
|
|
38
|
+
*
|
|
39
|
+
* @param writer The writer DB connection (same transaction context as ingest).
|
|
40
|
+
* @param scope 'project' or 'personal'.
|
|
41
|
+
* @param content The full file content that was just ingested.
|
|
42
|
+
* @param nodesTitles Titles of all nodes that were just ingested (for resolution check).
|
|
43
|
+
*/
|
|
44
|
+
export declare function detectGaps(writer: BetterSqliteDatabase, scope: string, content: string, nodesTitles: string[]): {
|
|
45
|
+
gaps_created: number;
|
|
46
|
+
gaps_resolved: number;
|
|
47
|
+
};
|
|
48
|
+
/**
|
|
49
|
+
* Resolve gaps whose topics match any of the given node titles.
|
|
50
|
+
* Sets `resolved_at` to now for matching unresolved gaps.
|
|
51
|
+
*/
|
|
52
|
+
export declare function resolveGaps(writer: BetterSqliteDatabase, scope: string, nodesTitles: string[]): number;
|
|
53
|
+
export interface GapRow {
|
|
54
|
+
id: number;
|
|
55
|
+
topic: string;
|
|
56
|
+
first_seen_at: number;
|
|
57
|
+
mentions_count: number;
|
|
58
|
+
resolved_at: number | null;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Query gaps for a scope. Returns unresolved by default; pass
|
|
62
|
+
* `resolved: true` to include resolved gaps.
|
|
63
|
+
*/
|
|
64
|
+
export declare function queryGaps(reader: BetterSqliteDatabase, scope: string, opts?: {
|
|
65
|
+
resolved?: boolean;
|
|
66
|
+
}): GapRow[];
|
|
67
|
+
//# sourceMappingURL=detector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detector.d.ts","sourceRoot":"","sources":["../../src/gaps/detector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,IAAI,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAIvE;;;;;;;;;;;;;;;GAeG;AAMH;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,CA6B5D;AAMD;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,UAAU,CACxB,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,EAAE,GACpB;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,MAAM,CAAA;CAAE,CAsEjD;AAED;;;GAGG;AACH,wBAAgB,WAAW,CACzB,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,WAAW,EAAE,MAAM,EAAE,GACpB,MAAM,CAkBR;AAMD,MAAM,WAAW,MAAM;IACrB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED;;;GAGG;AACH,wBAAgB,SAAS,CACvB,MAAM,EAAE,oBAAoB,EAC5B,KAAK,EAAE,MAAM,EACb,IAAI,CAAC,EAAE;IAAE,QAAQ,CAAC,EAAE,OAAO,CAAA;CAAE,GAC5B,MAAM,EAAE,CAiBV"}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import { logger } from '../observability/logger.js';
|
|
2
|
+
/**
|
|
3
|
+
* Gap detector for KG-MCP Phase 6.
|
|
4
|
+
*
|
|
5
|
+
* After an ingest transaction commits, scans the new node's content for
|
|
6
|
+
* concept mentions. A "concept" is a term referenced via bold (`**term**`),
|
|
7
|
+
* wikilinks (`[[term]]`), or backtick-quoted identifiers that appears ≥3
|
|
8
|
+
* times across the entire KG but has no dedicated `kg_nodes` row (i.e., no
|
|
9
|
+
* node whose title matches the concept).
|
|
10
|
+
*
|
|
11
|
+
* Upserts into `kg_gaps` with `topic`, `first_seen_at`, `mentions_count`.
|
|
12
|
+
* When a node is later created with a matching title, `resolved_at` is set.
|
|
13
|
+
*
|
|
14
|
+
* This is a read-only detection surface — the LLM fills gaps by calling
|
|
15
|
+
* `kg.project.write()` to create wiki pages, and re-indexing resolves
|
|
16
|
+
* the gap automatically.
|
|
17
|
+
*/
|
|
18
|
+
// ----------------------------------------------------------------------------
|
|
19
|
+
// Concept extraction
|
|
20
|
+
// ----------------------------------------------------------------------------
|
|
21
|
+
/**
|
|
22
|
+
* Extract candidate concept strings from markdown content.
|
|
23
|
+
*
|
|
24
|
+
* Sources:
|
|
25
|
+
* - Bold text: `**term**` or `__term__`
|
|
26
|
+
* - Wikilinks: `[[term]]` or `[[term|display]]`
|
|
27
|
+
* - Backtick-quoted terms: `` `term` `` (single backtick only, not code fences)
|
|
28
|
+
*
|
|
29
|
+
* Returns deduplicated, normalized (lowercase, trimmed) set.
|
|
30
|
+
*/
|
|
31
|
+
export function extractConcepts(content) {
|
|
32
|
+
const concepts = new Set();
|
|
33
|
+
// Bold: **term** or __term__
|
|
34
|
+
const boldRe = /\*\*([^*]+)\*\*|__([^_]+)__/g;
|
|
35
|
+
for (const m of content.matchAll(boldRe)) {
|
|
36
|
+
const term = (m[1] ?? m[2] ?? '').trim().toLowerCase();
|
|
37
|
+
if (term.length >= 2 && term.length <= 100)
|
|
38
|
+
concepts.add(term);
|
|
39
|
+
}
|
|
40
|
+
// Wikilinks: [[term]] or [[term|display]]
|
|
41
|
+
const wikilinkRe = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
|
|
42
|
+
for (const m of content.matchAll(wikilinkRe)) {
|
|
43
|
+
const term = (m[1] ?? '').trim().toLowerCase();
|
|
44
|
+
if (term.length >= 2 && term.length <= 100)
|
|
45
|
+
concepts.add(term);
|
|
46
|
+
}
|
|
47
|
+
// Backtick terms (single backtick, not code fences)
|
|
48
|
+
const backtickRe = /(?<!`)(`[^`\n]+?`)(?!`)/g;
|
|
49
|
+
for (const m of content.matchAll(backtickRe)) {
|
|
50
|
+
const raw = m[1] ?? '';
|
|
51
|
+
const term = raw.slice(1, -1).trim().toLowerCase();
|
|
52
|
+
// Only keep multi-char, non-code-looking terms (no spaces in very long strings)
|
|
53
|
+
if (term.length >= 2 && term.length <= 60 && !/[{}();=]/.test(term)) {
|
|
54
|
+
concepts.add(term);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return concepts;
|
|
58
|
+
}
|
|
59
|
+
// ----------------------------------------------------------------------------
|
|
60
|
+
// Gap detection + resolution
|
|
61
|
+
// ----------------------------------------------------------------------------
|
|
62
|
+
/**
|
|
63
|
+
* Run gap detection after an ingest transaction commits.
|
|
64
|
+
*
|
|
65
|
+
* For each concept extracted from the ingested content:
|
|
66
|
+
* 1. Count how many chunks across the KG contain the concept (case-insensitive)
|
|
67
|
+
* 2. Check if a node with a matching title already exists
|
|
68
|
+
* 3. If ≥3 mentions and no dedicated node → upsert into `kg_gaps`
|
|
69
|
+
*
|
|
70
|
+
* Also resolves any existing gaps that now have a dedicated node.
|
|
71
|
+
*
|
|
72
|
+
* @param writer The writer DB connection (same transaction context as ingest).
|
|
73
|
+
* @param scope 'project' or 'personal'.
|
|
74
|
+
* @param content The full file content that was just ingested.
|
|
75
|
+
* @param nodesTitles Titles of all nodes that were just ingested (for resolution check).
|
|
76
|
+
*/
|
|
77
|
+
export function detectGaps(writer, scope, content, nodesTitles) {
|
|
78
|
+
let gapsCreated = 0;
|
|
79
|
+
let gapsResolved = 0;
|
|
80
|
+
// Phase 1: resolve existing gaps whose topics match newly-ingested node titles
|
|
81
|
+
gapsResolved = resolveGaps(writer, scope, nodesTitles);
|
|
82
|
+
// Phase 2: detect new gaps from concepts in the ingested content
|
|
83
|
+
const concepts = extractConcepts(content);
|
|
84
|
+
if (concepts.size === 0) {
|
|
85
|
+
return { gaps_created: gapsCreated, gaps_resolved: gapsResolved };
|
|
86
|
+
}
|
|
87
|
+
const countChunkMentions = writer.prepare(`SELECT count(*) AS c FROM kg_chunks ch
|
|
88
|
+
JOIN kg_nodes n ON ch.node_id = n.id
|
|
89
|
+
WHERE n.scope = ? AND ch.text LIKE '%' || ? || '%' COLLATE NOCASE`);
|
|
90
|
+
const findDedicatedNode = writer.prepare(`SELECT id FROM kg_nodes WHERE scope = ? AND LOWER(title) = ? LIMIT 1`);
|
|
91
|
+
// kg_gaps doesn't have a unique constraint on (scope, topic).
|
|
92
|
+
// Check if the gap already exists and update or insert accordingly.
|
|
93
|
+
const findGap = writer.prepare(`SELECT id, resolved_at FROM kg_gaps WHERE scope = ? AND topic = ? LIMIT 1`);
|
|
94
|
+
const insertGap = writer.prepare(`INSERT INTO kg_gaps (scope, topic, first_seen_at, mentions_count)
|
|
95
|
+
VALUES (?, ?, ?, ?)`);
|
|
96
|
+
const updateGapCount = writer.prepare(`UPDATE kg_gaps SET mentions_count = ?, resolved_at = NULL WHERE id = ?`);
|
|
97
|
+
const now = Date.now();
|
|
98
|
+
for (const concept of concepts) {
|
|
99
|
+
// Count mentions across the KG
|
|
100
|
+
const row = countChunkMentions.get(scope, concept);
|
|
101
|
+
const count = row?.c ?? 0;
|
|
102
|
+
if (count < 3)
|
|
103
|
+
continue;
|
|
104
|
+
// Check for a dedicated node
|
|
105
|
+
const dedicated = findDedicatedNode.get(scope, concept);
|
|
106
|
+
if (dedicated)
|
|
107
|
+
continue;
|
|
108
|
+
// Upsert the gap
|
|
109
|
+
const existing = findGap.get(scope, concept);
|
|
110
|
+
if (existing) {
|
|
111
|
+
// Update mention count; reopen if previously resolved
|
|
112
|
+
updateGapCount.run(count, existing.id);
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
insertGap.run(scope, concept, now, count);
|
|
116
|
+
gapsCreated++;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
if (gapsCreated > 0 || gapsResolved > 0) {
|
|
120
|
+
logger.info({ scope, gapsCreated, gapsResolved, conceptsScanned: concepts.size }, 'gap detection complete');
|
|
121
|
+
}
|
|
122
|
+
return { gaps_created: gapsCreated, gaps_resolved: gapsResolved };
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Resolve gaps whose topics match any of the given node titles.
|
|
126
|
+
* Sets `resolved_at` to now for matching unresolved gaps.
|
|
127
|
+
*/
|
|
128
|
+
export function resolveGaps(writer, scope, nodesTitles) {
|
|
129
|
+
if (nodesTitles.length === 0)
|
|
130
|
+
return 0;
|
|
131
|
+
const now = Date.now();
|
|
132
|
+
let resolved = 0;
|
|
133
|
+
const resolveStmt = writer.prepare(`UPDATE kg_gaps SET resolved_at = ?
|
|
134
|
+
WHERE scope = ? AND LOWER(topic) = ? AND resolved_at IS NULL`);
|
|
135
|
+
for (const title of nodesTitles) {
|
|
136
|
+
if (!title)
|
|
137
|
+
continue;
|
|
138
|
+
const info = resolveStmt.run(now, scope, title.toLowerCase());
|
|
139
|
+
resolved += info.changes;
|
|
140
|
+
}
|
|
141
|
+
return resolved;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Query gaps for a scope. Returns unresolved by default; pass
|
|
145
|
+
* `resolved: true` to include resolved gaps.
|
|
146
|
+
*/
|
|
147
|
+
export function queryGaps(reader, scope, opts) {
|
|
148
|
+
if (opts?.resolved) {
|
|
149
|
+
return reader
|
|
150
|
+
.prepare(`SELECT id, topic, first_seen_at, mentions_count, resolved_at
|
|
151
|
+
FROM kg_gaps WHERE scope = ? ORDER BY mentions_count DESC`)
|
|
152
|
+
.all(scope);
|
|
153
|
+
}
|
|
154
|
+
return reader
|
|
155
|
+
.prepare(`SELECT id, topic, first_seen_at, mentions_count, resolved_at
|
|
156
|
+
FROM kg_gaps WHERE scope = ? AND resolved_at IS NULL
|
|
157
|
+
ORDER BY mentions_count DESC`)
|
|
158
|
+
.all(scope);
|
|
159
|
+
}
|
|
160
|
+
//# sourceMappingURL=detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detector.js","sourceRoot":"","sources":["../../src/gaps/detector.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,MAAM,EAAE,MAAM,4BAA4B,CAAC;AAEpD;;;;;;;;;;;;;;;GAeG;AAEH,+EAA+E;AAC/E,qBAAqB;AACrB,+EAA+E;AAE/E;;;;;;;;;GASG;AACH,MAAM,UAAU,eAAe,CAAC,OAAe;IAC7C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IAEnC,6BAA6B;IAC7B,MAAM,MAAM,GAAG,8BAA8B,CAAC;IAC9C,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACzC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACvD,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;YAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACjE,CAAC;IAED,0CAA0C;IAC1C,MAAM,UAAU,GAAG,iCAAiC,CAAC;IACrD,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QAC7C,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;YAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACjE,CAAC;IAED,oDAAoD;IACpD,MAAM,UAAU,GAAG,0BAA0B,CAAC;IAC9C,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QAC7C,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACnD,gFAAgF;QAChF,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YACpE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,+EAA+E;AAC/E,6BAA6B;AAC7B,+EAA+E;AAE/E;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,UAAU,CACxB,MAA4B,EAC5B,KAAa,EACb,OAAe,EACf,WAAqB;IAErB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,+EAA+E;IAC/E,YAAY,GAAG,WAAW,CAAC,MAAM,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;IAEvD,iEAAiE;IACjE,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAE1C,IAAI,QAAQ,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAE,YAAY,EAAE,CAAC;IACpE,CAAC;IAED,MAAM,kBAAkB,GAAG,MAAM,CAAC,OAAO,CACvC;;wEAEoE,CACrE,CAAC;IAEF,MAAM,iBAAiB,GAAG,MAAM,CAAC,OAAO,CACtC,sEAAsE,CACvE,CAAC;IAEF,8DAA8D;IAC9D,oEAAoE;IACpE,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAC5B,2EAA2E,CAC5E,CAAC;IAEF,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAC9B;yBACqB,CACtB,CAAC;IAEF,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO,CACnC,wEAAwE,CACzE,CAAC;IAEF,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEvB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,+BAA+B;QAC/B,MAAM,GAAG,GAAG,kBAAkB,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC;QAC1B,IAAI,KAAK,GAAG,CAAC;YAAE,SAAS;QAExB,6BAA6B;QAC7B,MAAM,SAAS,GAAG,iBAAiB,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACxD,IAAI,SAAS;YAAE,SAAS;QAExB,iBAAiB;QACjB,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAC7C,IAAI,QAAQ,EAAE,CAAC;YACb,sDAAsD;YACtD,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;QACzC,CAAC;aAAM,CAAC;YACN,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;YAC1C,WAAW,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,WAAW,GAAG,CAAC,IAAI,YAAY,GAAG,CAAC,EAAE,CAAC;QACxC,MAAM,CAAC,IAAI,CACT,EAAE,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,eAAe,EAAE,QAAQ,CAAC,IAAI,EAAE,EACpE,wBAAwB,CACzB,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAE,YAAY,EAAE,CAAC;AACpE,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,WAAW,CACzB,MAA4B,EAC5B,KAAa,EACb,WAAqB;IAErB,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEvC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,MAAM,WAAW,GAAG,MAAM,CAAC,OAAO,CAChC;mEAC+D,CAChE,CAAC;IAEF,KAAK,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;QAChC,IAAI,CAAC,KAAK;YAAE,SAAS;QACrB,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;QAC9D,QAAQ,IAAI,IAAI,CAAC,OAAO,CAAC;IAC3B,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAcD;;;GAGG;AACH,MAAM,UAAU,SAAS,CACvB,MAA4B,EAC5B,KAAa,EACb,IAA6B;IAE7B,IAAI,IAAI,EAAE,QAAQ,EAAE,CAAC;QACnB,OAAO,MAAM;aACV,OAAO,CACN;qEAC6D,CAC9D;aACA,GAAG,CAAC,KAAK,CAAC,CAAC;IAChB,CAAC;IAED,OAAO,MAAM;SACV,OAAO,CACN;;sCAEgC,CACjC;SACA,GAAG,CAAC,KAAK,CAAC,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token-counting budget gate.
|
|
3
|
+
*
|
|
4
|
+
* Implements CLAUDE.md §API Rules #6 budget math:
|
|
5
|
+
*
|
|
6
|
+
* envelope_reserve = 500 // bytes set aside for meta/logs/stale_files
|
|
7
|
+
* safety_margin = 0.9 // js-tiktoken is an estimator, not an oracle
|
|
8
|
+
* available = floor((max_tokens - envelope_reserve) * safety_margin)
|
|
9
|
+
*
|
|
10
|
+
* At the default `max_tokens=5000` the available budget for result bodies is:
|
|
11
|
+
* floor((5000 - 500) * 0.9) = 4050 tokens
|
|
12
|
+
*
|
|
13
|
+
* Truncation is greedy by rank: keep the highest-ranked item whole if it fits;
|
|
14
|
+
* otherwise emit a `too_large` sentinel so the caller can re-query with a
|
|
15
|
+
* higher `max_tokens` or fetch the node directly by id.
|
|
16
|
+
*
|
|
17
|
+
* The sentinel pattern is Loop 6.5 A3 / presearch.md D22. A single oversize
|
|
18
|
+
* item must NOT blackhole the whole response — we report its id + uri and
|
|
19
|
+
* let the LLM decide what to do next.
|
|
20
|
+
*
|
|
21
|
+
* Token counting uses the `p50k_base` encoder — close enough to Claude's
|
|
22
|
+
* tokenization for budgeting purposes, and the 10% safety margin absorbs the
|
|
23
|
+
* estimation error between tokenizers.
|
|
24
|
+
*/
|
|
25
|
+
export declare const ENVELOPE_RESERVE_TOKENS = 500;
|
|
26
|
+
export declare const SAFETY_MARGIN = 0.9;
|
|
27
|
+
/**
|
|
28
|
+
* Count tokens in a UTF-8 string.
|
|
29
|
+
*
|
|
30
|
+
* Fast path (long strings): return a character-based over-estimate. This
|
|
31
|
+
* is strictly a ceiling — we'd rather emit a few extra `results_truncated`
|
|
32
|
+
* responses than block the event loop for minutes on tokenization.
|
|
33
|
+
*
|
|
34
|
+
* Slow path (short strings): use the real p50k_base encoder for an exact
|
|
35
|
+
* count. This is what matters for normal-size response bodies.
|
|
36
|
+
*
|
|
37
|
+
* The encoder is initialized once at module load and shared across calls.
|
|
38
|
+
*/
|
|
39
|
+
export declare function countTokens(text: string): number;
|
|
40
|
+
/**
|
|
41
|
+
* Given a user-facing `max_tokens` budget, compute the internal result-body
|
|
42
|
+
* budget after subtracting the envelope reserve and applying the safety
|
|
43
|
+
* margin. Always returns a non-negative integer.
|
|
44
|
+
*/
|
|
45
|
+
export declare function computeInternalBudget(maxTokens: number): number;
|
|
46
|
+
/**
|
|
47
|
+
* A too-large sentinel replaces a single item that would exceed the budget
|
|
48
|
+
* on its own. The shape is deliberately minimal — id + source_uri so the
|
|
49
|
+
* caller can re-query, plus the original token count so they can size a new
|
|
50
|
+
* `max_tokens` request.
|
|
51
|
+
*/
|
|
52
|
+
export interface TooLargeSentinel {
|
|
53
|
+
too_large: true;
|
|
54
|
+
id: string;
|
|
55
|
+
source_uri: string;
|
|
56
|
+
tokens: number;
|
|
57
|
+
}
|
|
58
|
+
export interface FitResult<T> {
|
|
59
|
+
kept: Array<T | TooLargeSentinel>;
|
|
60
|
+
truncated: boolean;
|
|
61
|
+
tokensUsed: number;
|
|
62
|
+
tokensBudgeted: number;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Greedy rank-order truncation. Iterates `items` in the order given (caller
|
|
66
|
+
* is responsible for ranking first), measures each one's serialized token
|
|
67
|
+
* count, and keeps items until the next one would exceed the internal
|
|
68
|
+
* budget.
|
|
69
|
+
*
|
|
70
|
+
* If a single item's token count alone exceeds the budget, it is replaced
|
|
71
|
+
* with a `too_large` sentinel and counted as zero body tokens (the sentinel
|
|
72
|
+
* itself is tiny — ~20 tokens). The iteration then continues so that smaller
|
|
73
|
+
* items after the oversize one can still land in the response.
|
|
74
|
+
*
|
|
75
|
+
* @param items Results, pre-ranked (highest rank first).
|
|
76
|
+
* @param maxTokens User-facing `max_tokens` budget from the tool call.
|
|
77
|
+
* @param serialize How to turn one item into the text we'll count. Usually
|
|
78
|
+
* `JSON.stringify`. Broken out so the caller can include
|
|
79
|
+
* framing (commas, wrapping object keys) in the count.
|
|
80
|
+
* @param idOf Read the item's id for sentinel construction.
|
|
81
|
+
* @param uriOf Read the item's source uri for sentinel construction.
|
|
82
|
+
*/
|
|
83
|
+
export declare function fitResults<T>(items: T[], maxTokens: number, serialize: (item: T) => string, idOf: (item: T) => string, uriOf: (item: T) => string): FitResult<T>;
|
|
84
|
+
/**
|
|
85
|
+
* Count tokens in an already-serialized response body without running the
|
|
86
|
+
* fit loop. Used by the tool handlers to populate `meta.tokens_used` after
|
|
87
|
+
* the envelope has been built.
|
|
88
|
+
*/
|
|
89
|
+
export declare function countEnvelopeTokens(envelopeJson: string): number;
|
|
90
|
+
//# sourceMappingURL=budget.d.ts.map
|