@leanlabsinnov/codegraph 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/bin.js +2 -2
- package/dist/{chunk-VFE242Y7.js → chunk-2TORJYBO.js} +53 -41
- package/dist/chunk-2TORJYBO.js.map +1 -0
- package/dist/{chunk-AMJXGXLM.js → chunk-36AWRLQ6.js} +36 -42
- package/dist/chunk-36AWRLQ6.js.map +1 -0
- package/dist/index.js +2 -2
- package/dist/{src-7P6XREHJ.js → src-PDNTANJD.js} +4 -4
- package/package.json +3 -3
- package/dist/chunk-AMJXGXLM.js.map +0 -1
- package/dist/chunk-VFE242Y7.js.map +0 -1
- /package/dist/{src-7P6XREHJ.js.map → src-PDNTANJD.js.map} +0 -0
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# @
|
|
1
|
+
# @leanlabsinnov/codegraph
|
|
2
2
|
|
|
3
3
|
Live, queryable knowledge graph for your codebase. Indexes JS/TS into an embedded graph
|
|
4
4
|
DB with vector embeddings, then exposes a local MCP server that Claude Code, Cursor, and
|
|
@@ -12,7 +12,7 @@ external services.
|
|
|
12
12
|
## Install
|
|
13
13
|
|
|
14
14
|
```bash
|
|
15
|
-
npm i -g @
|
|
15
|
+
npm i -g @leanlabsinnov/codegraph
|
|
16
16
|
```
|
|
17
17
|
|
|
18
18
|
Requires Node 20+.
|
|
@@ -29,7 +29,7 @@ codegraph serve
|
|
|
29
29
|
Then point Claude Code / Cursor / Windsurf at `http://127.0.0.1:3748/mcp` with the bearer
|
|
30
30
|
token from `~/.codegraph/config.json`.
|
|
31
31
|
|
|
32
|
-
See the full [README on GitHub](https://github.com/
|
|
32
|
+
See the full [README on GitHub](https://github.com/leanlabsinnov/codegraph) for client setup, all
|
|
33
33
|
10 MCP tools, and troubleshooting.
|
|
34
34
|
|
|
35
35
|
## Commands
|
package/dist/bin.js
CHANGED
|
@@ -77,13 +77,7 @@ function buildSchemaStatements(opts) {
|
|
|
77
77
|
}
|
|
78
78
|
return statements;
|
|
79
79
|
}
|
|
80
|
-
|
|
81
|
-
return [
|
|
82
|
-
"INSTALL VECTOR",
|
|
83
|
-
"LOAD EXTENSION VECTOR",
|
|
84
|
-
"CALL CREATE_VECTOR_INDEX('Symbol', 'embedding_idx', 'embedding', metric := 'cosine')"
|
|
85
|
-
];
|
|
86
|
-
}
|
|
80
|
+
var SEMANTIC_SEARCH_MODE = "brute-force";
|
|
87
81
|
var DEFAULT_EMBEDDING_DIMENSION = 1536;
|
|
88
82
|
|
|
89
83
|
// ../graph-db/src/client.ts
|
|
@@ -95,7 +89,6 @@ var GraphDb = class {
|
|
|
95
89
|
embeddingDimension;
|
|
96
90
|
db = null;
|
|
97
91
|
conn = null;
|
|
98
|
-
vectorIndexReady = false;
|
|
99
92
|
/**
|
|
100
93
|
* Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a
|
|
101
94
|
* prepared statement for any parameterized query - reusing the prepared object keeps
|
|
@@ -116,7 +109,6 @@ var GraphDb = class {
|
|
|
116
109
|
this.preparedCache.clear();
|
|
117
110
|
this.conn = null;
|
|
118
111
|
this.db = null;
|
|
119
|
-
this.vectorIndexReady = false;
|
|
120
112
|
}
|
|
121
113
|
requireConn() {
|
|
122
114
|
if (!this.conn) {
|
|
@@ -125,8 +117,11 @@ var GraphDb = class {
|
|
|
125
117
|
return this.conn;
|
|
126
118
|
}
|
|
127
119
|
/**
|
|
128
|
-
* Idempotent migration
|
|
129
|
-
*
|
|
120
|
+
* Idempotent migration: creates the `Symbol` node table and one REL table per
|
|
121
|
+
* `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately
|
|
122
|
+
* skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via
|
|
123
|
+
* `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.
|
|
124
|
+
* See `schema.ts` for the full rationale.
|
|
130
125
|
*/
|
|
131
126
|
async migrate() {
|
|
132
127
|
await this.connect();
|
|
@@ -134,22 +129,6 @@ var GraphDb = class {
|
|
|
134
129
|
for (const stmt of schemaStmts) {
|
|
135
130
|
await this.exec(stmt);
|
|
136
131
|
}
|
|
137
|
-
for (const stmt of buildVectorIndexStatements()) {
|
|
138
|
-
try {
|
|
139
|
-
await this.exec(stmt);
|
|
140
|
-
} catch (err) {
|
|
141
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
142
|
-
if (isAlreadyExistsError(message)) continue;
|
|
143
|
-
if (/extension/i.test(message) && /(not found|missing|unsupported|disabled)/i.test(message)) {
|
|
144
|
-
console.warn(
|
|
145
|
-
`[codegraph] vector extension unavailable; semantic search disabled. Underlying: ${message}`
|
|
146
|
-
);
|
|
147
|
-
return;
|
|
148
|
-
}
|
|
149
|
-
throw new Error(`migrate failed on \`${stmt}\`: ${message}`);
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
this.vectorIndexReady = true;
|
|
153
132
|
}
|
|
154
133
|
/**
|
|
155
134
|
* Typed Cypher escape hatch.
|
|
@@ -192,19 +171,39 @@ var GraphDb = class {
|
|
|
192
171
|
return conn.execute(prepared, params);
|
|
193
172
|
}
|
|
194
173
|
/**
|
|
195
|
-
*
|
|
196
|
-
*
|
|
197
|
-
*
|
|
174
|
+
* Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including
|
|
175
|
+
* `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`
|
|
176
|
+
* because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped
|
|
177
|
+
* (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.
|
|
178
|
+
*
|
|
179
|
+
* In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE
|
|
180
|
+
* from violating the primary-key uniqueness constraint.
|
|
198
181
|
*/
|
|
199
182
|
async upsertNodes(nodes) {
|
|
200
183
|
if (nodes.length === 0) return;
|
|
201
184
|
await this.connect();
|
|
185
|
+
const deduped = dedupeById(nodes);
|
|
186
|
+
const withEmbedding = [];
|
|
187
|
+
const withoutEmbedding = [];
|
|
188
|
+
for (const n of deduped) {
|
|
189
|
+
if (Array.isArray(n.embedding) && n.embedding.length > 0) {
|
|
190
|
+
withEmbedding.push(n);
|
|
191
|
+
} else {
|
|
192
|
+
withoutEmbedding.push(n);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
await this.createSymbolBatch(withoutEmbedding, false);
|
|
196
|
+
await this.createSymbolBatch(withEmbedding, true);
|
|
197
|
+
}
|
|
198
|
+
async createSymbolBatch(nodes, withEmbedding) {
|
|
199
|
+
if (nodes.length === 0) return;
|
|
202
200
|
const BATCH = 200;
|
|
203
|
-
const
|
|
204
|
-
const
|
|
201
|
+
const columns = [...SYMBOL_COLUMNS, ...withEmbedding ? ["embedding"] : []];
|
|
202
|
+
const propMap = columns.map((c) => `${c}: r.${c}`).join(", ");
|
|
203
|
+
const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;
|
|
205
204
|
for (let i = 0; i < nodes.length; i += BATCH) {
|
|
206
205
|
const slice = nodes.slice(i, i + BATCH);
|
|
207
|
-
const payload = slice.map(buildSymbolRow);
|
|
206
|
+
const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));
|
|
208
207
|
await this.exec(cypher, { batch: payload });
|
|
209
208
|
}
|
|
210
209
|
}
|
|
@@ -291,12 +290,17 @@ var GraphDb = class {
|
|
|
291
290
|
const coverage = total === 0 ? 0 : embedded / total;
|
|
292
291
|
return { nodes, edges, embeddingCoverage: coverage };
|
|
293
292
|
}
|
|
294
|
-
/**
|
|
293
|
+
/**
|
|
294
|
+
* v0.1.x never creates an HNSW index - semantic search is brute-force via
|
|
295
|
+
* `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers
|
|
296
|
+
* (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu
|
|
297
|
+
* fixes ship and we flip the index back on.
|
|
298
|
+
*/
|
|
295
299
|
hasVectorIndex() {
|
|
296
|
-
return
|
|
300
|
+
return false;
|
|
297
301
|
}
|
|
298
302
|
};
|
|
299
|
-
function buildSymbolRow(node) {
|
|
303
|
+
function buildSymbolRow(node, withEmbedding, embeddingDimension) {
|
|
300
304
|
const src = node;
|
|
301
305
|
const row = {};
|
|
302
306
|
for (const col of SYMBOL_COLUMNS) {
|
|
@@ -305,8 +309,19 @@ function buildSymbolRow(node) {
|
|
|
305
309
|
}
|
|
306
310
|
row.id = node.id;
|
|
307
311
|
row.kind = node.kind;
|
|
312
|
+
if (withEmbedding) {
|
|
313
|
+
const vec = node.embedding;
|
|
314
|
+
row.embedding = Array.isArray(vec) && vec.length === embeddingDimension ? vec : new Array(embeddingDimension).fill(0);
|
|
315
|
+
}
|
|
308
316
|
return row;
|
|
309
317
|
}
|
|
318
|
+
function dedupeById(nodes) {
|
|
319
|
+
const seen = /* @__PURE__ */ new Map();
|
|
320
|
+
for (const n of nodes) {
|
|
321
|
+
seen.set(n.id, n);
|
|
322
|
+
}
|
|
323
|
+
return Array.from(seen.values());
|
|
324
|
+
}
|
|
310
325
|
function normalizeRow(row) {
|
|
311
326
|
if (row instanceof Map) {
|
|
312
327
|
const out = {};
|
|
@@ -342,9 +357,6 @@ async function collectAll(result) {
|
|
|
342
357
|
if (typeof getAll !== "function") return [];
|
|
343
358
|
return getAll.call(target);
|
|
344
359
|
}
|
|
345
|
-
function isAlreadyExistsError(message) {
|
|
346
|
-
return /already exists/i.test(message) || /already loaded/i.test(message) || /already installed/i.test(message) || /duplicate (table|index)/i.test(message);
|
|
347
|
-
}
|
|
348
360
|
|
|
349
361
|
export {
|
|
350
362
|
SYMBOL_COLUMN_SPEC,
|
|
@@ -352,9 +364,9 @@ export {
|
|
|
352
364
|
defaultFor,
|
|
353
365
|
EDGE_COLUMNS,
|
|
354
366
|
buildSchemaStatements,
|
|
355
|
-
|
|
367
|
+
SEMANTIC_SEARCH_MODE,
|
|
356
368
|
DEFAULT_EMBEDDING_DIMENSION,
|
|
357
369
|
defaultDbPath,
|
|
358
370
|
GraphDb
|
|
359
371
|
};
|
|
360
|
-
//# sourceMappingURL=chunk-
|
|
372
|
+
//# sourceMappingURL=chunk-2TORJYBO.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../graph-db/src/client.ts","../../graph-db/src/schema.ts"],"sourcesContent":["import { mkdir } from \"node:fs/promises\";\nimport { homedir } from \"node:os\";\nimport { dirname, resolve } from \"node:path\";\nimport { EDGE_KINDS, NODE_KINDS, type EdgeKind } from \"@codegraph/shared\";\nimport * as kuzu from \"kuzu\";\nimport {\n DEFAULT_EMBEDDING_DIMENSION,\n SYMBOL_COLUMNS,\n buildSchemaStatements,\n defaultFor,\n} from \"./schema.js\";\nimport type { QueryResult, UpsertEdgeInput, UpsertNodeInput } from \"./types.js\";\n\nexport interface GraphDbOptions {\n /** Directory where Kuzu stores its on-disk database files. Defaults to `~/.codegraph/graph`. */\n dbPath?: string;\n /**\n * Legacy `url` option kept for back-compat with Phase-1 callers. Ignored at runtime -\n * Kuzu is embedded - but accepted so existing call sites compile while the rest of the\n * codebase migrates to `dbPath`.\n */\n url?: string;\n /** Vector dimension for the `Symbol.embedding` column. Baked into the schema at create time. */\n embeddingDimension?: number;\n}\n\n/** Default on-disk location for the embedded graph. */\nexport function defaultDbPath(): string {\n return resolve(homedir(), \".codegraph\", \"graph\");\n}\n\n/**\n * Thin, typed wrapper around the embedded Kuzu database.\n *\n * Public surface (intentionally identical to the Phase-1 FalkorDB client so callers don't\n * change): connect / close / migrate / query / upsertNodes / upsertEdges / deleteByRepo /\n * stats. Internals are pure Kuzu.\n */\nexport class GraphDb {\n private readonly dbPath: string;\n private readonly embeddingDimension: number;\n private db: kuzu.Database | null = null;\n private conn: kuzu.Connection | null = null;\n /**\n * Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a\n * prepared statement for any parameterized query - reusing the prepared object keeps\n * UNWIND-batched upserts fast.\n */\n private preparedCache = new Map<string, kuzu.PreparedStatement>();\n\n constructor(opts: GraphDbOptions = {}) {\n this.dbPath = opts.dbPath ?? defaultDbPath();\n this.embeddingDimension = opts.embeddingDimension ?? DEFAULT_EMBEDDING_DIMENSION;\n }\n\n async connect(): Promise<void> {\n if (this.conn) return;\n await mkdir(dirname(this.dbPath), { recursive: true });\n this.db = new kuzu.Database(this.dbPath);\n this.conn = new kuzu.Connection(this.db);\n }\n\n async close(): Promise<void> {\n // We deliberately do NOT call `conn.close()` / `db.close()` on Kuzu's Node bindings.\n // In 0.11.x those native handles are also disposed by the binding's process-exit hook,\n // and double-disposing can SIGSEGV the worker on cleanup. Dropping references is\n // enough for the GC to release the underlying memory before the process exits.\n this.preparedCache.clear();\n this.conn = null;\n this.db = null;\n }\n\n private requireConn(): kuzu.Connection {\n if (!this.conn) {\n throw new Error(\"GraphDb not connected. Call connect() first.\");\n }\n return this.conn;\n }\n\n /**\n * Idempotent migration: creates the `Symbol` node table and one REL table per\n * `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately\n * skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via\n * `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.\n * See `schema.ts` for the full rationale.\n */\n async migrate(): Promise<void> {\n await this.connect();\n const schemaStmts = buildSchemaStatements({ embeddingDimension: this.embeddingDimension });\n for (const stmt of schemaStmts) {\n await this.exec(stmt);\n }\n }\n\n /**\n * Typed Cypher escape hatch.\n *\n * Kuzu returns BIGINT columns as native BigInt; we coerce to plain `number` when safe so\n * downstream JSON serialization (MCP responses, snapshot tests) does not need bespoke\n * handling.\n */\n async query<T = Record<string, unknown>>(\n cypher: string,\n params: Record<string, unknown> = {},\n ): Promise<QueryResult<T>> {\n const result = await this.runQuery(cypher, params);\n const raw = await collectAll(result);\n const data = raw.map((row) => normalizeRow(row)) as T[];\n const headers = raw.length > 0 ? Object.keys(raw[0] ?? {}) : [];\n return { data, headers, metadata: [] };\n }\n\n /** Fire-and-forget DDL/exec. */\n private async exec(cypher: string, params: Record<string, unknown> = {}): Promise<void> {\n await this.runQuery(cypher, params);\n }\n\n /**\n * Bridge to Kuzu's two execution paths:\n * - `conn.query(stmt)` for unparameterized statements (the second positional arg is a\n * `progressCallback`, NOT params - mistaking that is the #1 way to confuse the API).\n * - `conn.prepare(stmt) + conn.execute(prepared, params)` for anything with `$name`\n * placeholders. We cache the prepared statement so UNWIND batches reuse it.\n */\n private async runQuery(cypher: string, params: Record<string, unknown>): Promise<unknown> {\n const conn = this.requireConn();\n if (Object.keys(params).length === 0) {\n return conn.query(cypher);\n }\n let prepared = this.preparedCache.get(cypher);\n if (!prepared) {\n prepared = await conn.prepare(cypher);\n if (!prepared.isSuccess()) {\n throw new Error(prepared.getErrorMessage());\n }\n this.preparedCache.set(cypher, prepared);\n }\n // Cast through `unknown`: Kuzu's bindings advertise a strict `KuzuValue` union, but\n // we can pass through any JSON-serializable value the embedded engine accepts (nested\n // structs and lists are converted at the native layer).\n return conn.execute(prepared, params as unknown as Parameters<kuzu.Connection[\"execute\"]>[1]);\n }\n\n /**\n * Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including\n * `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`\n * because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped\n * (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.\n *\n * In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE\n * from violating the primary-key uniqueness constraint.\n */\n async upsertNodes(nodes: UpsertNodeInput[]): Promise<void> {\n if (nodes.length === 0) return;\n await this.connect();\n const deduped = dedupeById(nodes);\n const withEmbedding: UpsertNodeInput[] = [];\n const withoutEmbedding: UpsertNodeInput[] = [];\n for (const n of deduped) {\n if (Array.isArray(n.embedding) && n.embedding.length > 0) {\n withEmbedding.push(n);\n } else {\n withoutEmbedding.push(n);\n }\n }\n await this.createSymbolBatch(withoutEmbedding, false);\n await this.createSymbolBatch(withEmbedding, true);\n }\n\n private async createSymbolBatch(\n nodes: UpsertNodeInput[],\n withEmbedding: boolean,\n ): Promise<void> {\n if (nodes.length === 0) return;\n const BATCH = 200;\n const columns = [...SYMBOL_COLUMNS, ...(withEmbedding ? ([\"embedding\"] as const) : [])];\n const propMap = columns.map((c) => `${c}: r.${c}`).join(\", \");\n const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;\n for (let i = 0; i < nodes.length; i += BATCH) {\n const slice = nodes.slice(i, i + BATCH);\n const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));\n await this.exec(cypher, { batch: payload });\n }\n }\n\n /**\n * Upserts edges. Both endpoints must already exist as `Symbol` nodes; rows where the\n * MATCH fails are silently dropped, matching Cypher semantics.\n *\n * Uses CREATE because the orchestrator wipes the repo's slice before writing, so\n * duplicates can't pre-exist within a single index pass.\n */\n async upsertEdges(edges: UpsertEdgeInput[]): Promise<void> {\n if (edges.length === 0) return;\n await this.connect();\n const byKind = new Map<EdgeKind, UpsertEdgeInput[]>();\n for (const e of edges) {\n const bucket = byKind.get(e.kind);\n if (bucket) bucket.push(e);\n else byKind.set(e.kind, [e]);\n }\n const BATCH = 500;\n for (const [kind, batch] of byKind) {\n const cypher = `UNWIND $batch AS r MATCH (a:Symbol {id: r.fromId}) MATCH (b:Symbol {id: r.toId}) CREATE (a)-[e:${kind} {line: r.line}]->(b)`;\n for (let i = 0; i < batch.length; i += BATCH) {\n const slice = batch.slice(i, i + BATCH);\n // Use 0 (not null) for missing line numbers so Kuzu can infer the struct field\n // as INT64 even when an entire batch happens to have no `line` set.\n const payload = slice.map((e) => ({\n fromId: e.fromId,\n toId: e.toId,\n line: typeof e.line === \"number\" ? e.line : 0,\n }));\n await this.exec(cypher, { batch: payload });\n }\n }\n }\n\n /**\n * Deletes all nodes (and incident edges via DETACH DELETE) for a repo. If `paths` is\n * provided, restricts the delete to nodes whose `path` is in the list - used by\n * incremental re-indexing.\n */\n async deleteByRepo(repoId: string, paths?: string[]): Promise<void> {\n await this.connect();\n if (paths && paths.length > 0) {\n await this.exec(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.path IN $paths DETACH DELETE n\",\n { repoId, paths },\n );\n return;\n }\n await this.exec(\"MATCH (n:Symbol) WHERE n.repoId = $repoId DETACH DELETE n\", { repoId });\n }\n\n /**\n * Returns counts of nodes (per kind) and edges (per kind) for a repo, plus the share of\n * non-File nodes that carry an embedding.\n */\n async stats(repoId: string): Promise<{\n nodes: Record<string, number>;\n edges: Record<string, number>;\n embeddingCoverage: number;\n }> {\n await this.connect();\n const nodes: Record<string, number> = {};\n for (const kind of NODE_KINDS) {\n const r = await this.query<{ count: number }>(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.kind = $kind RETURN count(n) AS count\",\n { repoId, kind },\n );\n nodes[kind] = Number(r.data[0]?.count ?? 0);\n }\n const edges: Record<string, number> = {};\n for (const kind of EDGE_KINDS) {\n const r = await this.query<{ count: number }>(\n `MATCH (a:Symbol)-[r:${kind}]->(b:Symbol)\n WHERE a.repoId = $repoId AND b.repoId = $repoId\n RETURN count(r) AS count`,\n { repoId },\n );\n edges[kind] = Number(r.data[0]?.count ?? 0);\n }\n const cov = await this.query<{ total: number | bigint; embedded: number | bigint }>(\n `MATCH (n:Symbol)\n WHERE n.repoId = $repoId AND n.kind <> 'File'\n RETURN count(n) AS total,\n count(n.embedding) AS embedded`,\n { repoId },\n );\n const row = cov.data[0];\n const total = Number(row?.total ?? 0);\n const embedded = Number(row?.embedded ?? 0);\n const coverage = total === 0 ? 0 : embedded / total;\n return { nodes, edges, embeddingCoverage: coverage };\n }\n\n /**\n * v0.1.x never creates an HNSW index - semantic search is brute-force via\n * `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers\n * (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu\n * fixes ship and we flip the index back on.\n */\n hasVectorIndex(): boolean {\n return false;\n }\n}\n\n/**\n * Build a fully-populated row for a Kuzu UNWIND batch. Every column in `SYMBOL_COLUMNS`\n * is present (typed default when missing) so Kuzu can infer a homogeneous struct schema\n * for the batch parameter. When `withEmbedding` is true the `embedding` field is also\n * populated - we either use the provided vector or a zero-vector of the configured\n * dimension so the struct schema stays uniform across the batch.\n */\nfunction buildSymbolRow(\n node: UpsertNodeInput,\n withEmbedding: boolean,\n embeddingDimension: number,\n): Record<string, unknown> {\n const src = node as unknown as Record<string, unknown>;\n const row: Record<string, unknown> = {};\n for (const col of SYMBOL_COLUMNS) {\n const value = src[col];\n row[col] = value === undefined || value === null ? defaultFor(col) : value;\n }\n row.id = node.id;\n row.kind = node.kind;\n if (withEmbedding) {\n const vec = node.embedding;\n row.embedding =\n Array.isArray(vec) && vec.length === embeddingDimension\n ? vec\n : new Array<number>(embeddingDimension).fill(0);\n }\n return row;\n}\n\n/**\n * Coalesce same-id rows down to the last occurrence. Required because we now use bare\n * CREATE (not MERGE) for inserts and Kuzu rejects primary-key collisions inside a single\n * UNWIND batch.\n */\nfunction dedupeById(nodes: UpsertNodeInput[]): UpsertNodeInput[] {\n const seen = new Map<string, UpsertNodeInput>();\n for (const n of nodes) {\n seen.set(n.id, n);\n }\n return Array.from(seen.values());\n}\n\n/** Convert Kuzu's row representation (Map or plain object) into a plain JSON object. */\nfunction normalizeRow(row: unknown): Record<string, unknown> {\n if (row instanceof Map) {\n const out: Record<string, unknown> = {};\n for (const [k, v] of row) {\n out[String(k)] = coerceValue(v);\n }\n return out;\n }\n if (row && typeof row === \"object\") {\n const src = row as Record<string, unknown>;\n const out: Record<string, unknown> = {};\n for (const k of Object.keys(src)) {\n out[k] = coerceValue(src[k]);\n }\n return out;\n }\n return { value: coerceValue(row) };\n}\n\n/**\n * Kuzu returns BIGINT columns as JS BigInt. Coerce to `number` when within Number.MAX_SAFE\n * for JSON-friendly downstream consumption.\n */\nfunction coerceValue(value: unknown): unknown {\n if (typeof value === \"bigint\") {\n if (value <= BigInt(Number.MAX_SAFE_INTEGER) && value >= BigInt(Number.MIN_SAFE_INTEGER)) {\n return Number(value);\n }\n return value.toString();\n }\n if (Array.isArray(value)) return value.map(coerceValue);\n return value;\n}\n\n/** Drain a Kuzu QueryResult (or array of them) into an array of row objects. */\nasync function collectAll(result: unknown): Promise<unknown[]> {\n // Multi-statement queries return an array; we keep only the last one (matches how the\n // final statement is the one that carries a `RETURN`).\n const target = Array.isArray(result) ? result[result.length - 1] : result;\n if (!target) return [];\n const getAll = (target as { getAll?: () => Promise<unknown[]> }).getAll;\n if (typeof getAll !== \"function\") return [];\n return getAll.call(target);\n}\n\n","import { EDGE_KINDS } from \"@codegraph/shared\";\n\n/**\n * Kuzu is schema-first. Unlike FalkorDB which is schema-less, every column we ever want to\n * SET on a node must exist up-front. We use ONE `Symbol` node table with a `kind` column\n * (Kuzu does not support multi-labels), and one REL table per `EdgeKind`.\n *\n * Columns are the union of every field across the `GraphNode` discriminated union in\n * `@codegraph/shared` plus the two embedding-namespace fields. Fields that are not\n * relevant to a given kind stay NULL.\n */\n\n/**\n * Per-column metadata so the upserter can build batches with explicit typed defaults.\n *\n * Kuzu's struct parameter type inference fails when a column is null on every row in a\n * batch (it defaults to STRING and rejects assignment to a BOOL/INT64 column). Concrete\n * defaults keep inference deterministic and let us skip clunky CAST() clauses.\n *\n * Convention: optional booleans default to `false`, optional ints to `0`, optional strings\n * to `\"\"`. We never check `WHERE n.foo IS NULL` in queries, so the lost null-distinction\n * is acceptable for v0.1.0.\n */\nexport const SYMBOL_COLUMN_SPEC = {\n id: \"STRING\",\n kind: \"STRING\",\n repoId: \"STRING\",\n name: \"STRING\",\n path: \"STRING\",\n lineStart: \"INT64\",\n lineEnd: \"INT64\",\n signature: \"STRING\",\n leadingComment: \"STRING\",\n isExported: \"BOOLEAN\",\n // File-specific\n language: \"STRING\",\n sizeBytes: \"INT64\",\n contentHash: \"STRING\",\n // Function-specific\n isAsync: \"BOOLEAN\",\n isArrow: \"BOOLEAN\",\n // Route-specific\n method: \"STRING\",\n routePath: \"STRING\",\n framework: \"STRING\",\n // Embedding namespace tag\n embeddingNamespace: \"STRING\",\n} as const;\n\nexport type SymbolColumn = keyof typeof SYMBOL_COLUMN_SPEC;\n\nexport const SYMBOL_COLUMNS = Object.keys(SYMBOL_COLUMN_SPEC) as SymbolColumn[];\n\n/** Return the typed default for an unset optional column. */\nexport function defaultFor(column: SymbolColumn): unknown {\n const t = SYMBOL_COLUMN_SPEC[column];\n if (t === \"BOOLEAN\") return false;\n if (t === \"INT64\") return 0;\n return \"\";\n}\n\n/** Optional per-edge metadata. Currently only `line`. */\nexport const EDGE_COLUMNS = [\"line\"] as const;\n\nexport type EdgeColumn = (typeof EDGE_COLUMNS)[number];\n\n/**\n * DDL statements that bring an empty Kuzu database to the codegraph schema.\n * `IF NOT EXISTS` makes `migrate()` idempotent so it can run on every connect.\n *\n * `embedding` is a fixed-dimension column - dimension is configured at migrate time and\n * baked into the schema. If a user later switches to an embedding provider with a\n * different dimension they must delete the on-disk graph directory to recreate it. The\n * embedding-namespace tag ensures we never silently mix dimensions.\n */\nexport function buildSchemaStatements(opts: { embeddingDimension: number }): string[] {\n const columnDefs = [\n \"id STRING\",\n \"kind STRING\",\n \"repoId STRING\",\n \"name STRING\",\n \"path STRING\",\n \"lineStart INT64\",\n \"lineEnd INT64\",\n \"signature STRING\",\n \"leadingComment STRING\",\n \"isExported BOOLEAN\",\n \"language STRING\",\n \"sizeBytes INT64\",\n \"contentHash STRING\",\n \"isAsync BOOLEAN\",\n \"isArrow BOOLEAN\",\n \"method STRING\",\n \"routePath STRING\",\n \"framework STRING\",\n \"embeddingNamespace STRING\",\n `embedding FLOAT[${opts.embeddingDimension}]`,\n \"PRIMARY KEY (id)\",\n ];\n const statements: string[] = [\n `CREATE NODE TABLE IF NOT EXISTS Symbol(${columnDefs.join(\", \")})`,\n ];\n for (const kind of EDGE_KINDS) {\n statements.push(\n `CREATE REL TABLE IF NOT EXISTS ${kind}(FROM Symbol TO Symbol, line INT64)`,\n );\n }\n return statements;\n}\n\n/**\n * Semantic search in v0.1.x is intentionally brute-force via Kuzu's built-in\n * `array_cosine_similarity` function - we do NOT create an HNSW vector index.\n *\n * Why: Kuzu 0.11.x has two open issues that make the HNSW path unusable for a\n * mutable graph workload:\n * - kuzudb/kuzu#5965: SET on a vector-indexed column is rejected with\n * \"Cannot set property vec in table embeddings because it is used in one or more\n * indexes\". The Kuzu team's own recommended workaround in that thread is\n * \"delay creation of the index itself\".\n * - kuzudb/kuzu#6040: DROP_VECTOR_INDEX leaves stale on-disk metadata, so once a\n * column has ever been indexed it becomes permanently un-writable - even fresh\n * CREATEs fail with \"Catalog exception: _N_<index>_UPPER does not exist\".\n *\n * `array_cosine_similarity` is a core Kuzu function (not part of the vector extension)\n * and runs in microseconds for the corpus sizes Phase 1 targets. We will switch back\n * to `CALL CREATE_VECTOR_INDEX` / `QUERY_VECTOR_INDEX` once the upstream fixes ship.\n */\nexport const SEMANTIC_SEARCH_MODE = \"brute-force\" as const;\n\n/** Default embedding dimension when none is supplied. Matches `text-embedding-3-small`. */\nexport const DEFAULT_EMBEDDING_DIMENSION = 1536;\n"],"mappings":";;;;;;AAAA,SAAS,aAAa;AACtB,SAAS,eAAe;AACxB,SAAS,SAAS,eAAe;AAEjC,YAAY,UAAU;;;ACmBf,IAAM,qBAAqB;AAAA,EAChC,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,MAAM;AAAA,EACN,WAAW;AAAA,EACX,SAAS;AAAA,EACT,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,YAAY;AAAA;AAAA,EAEZ,UAAU;AAAA,EACV,WAAW;AAAA,EACX,aAAa;AAAA;AAAA,EAEb,SAAS;AAAA,EACT,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,WAAW;AAAA;AAAA,EAEX,oBAAoB;AACtB;AAIO,IAAM,iBAAiB,OAAO,KAAK,kBAAkB;AAGrD,SAAS,WAAW,QAA+B;AACxD,QAAM,IAAI,mBAAmB,MAAM;AACnC,MAAI,MAAM,UAAW,QAAO;AAC5B,MAAI,MAAM,QAAS,QAAO;AAC1B,SAAO;AACT;AAGO,IAAM,eAAe,CAAC,MAAM;AAa5B,SAAS,sBAAsB,MAAgD;AACpF,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,mBAAmB,KAAK,kBAAkB;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,aAAuB;AAAA,IAC3B,0CAA0C,WAAW,KAAK,IAAI,CAAC;AAAA,EACjE;AACA,aAAW,QAAQ,YAAY;AAC7B,eAAW;AAAA,MACT,kCAAkC,IAAI;AAAA,IACxC;AAAA,EACF;AACA,SAAO;AACT;AAoBO,IAAM,uBAAuB;AAG7B,IAAM,8BAA8B;;;ADxGpC,SAAS,gBAAwB;AACtC,SAAO,QAAQ,QAAQ,GAAG,cAAc,OAAO;AACjD;AASO,IAAM,UAAN,MAAc;AAAA,EACF;AAAA,EACA;AAAA,EACT,KAA2B;AAAA,EAC3B,OAA+B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAM/B,gBAAgB,oBAAI,IAAoC;AAAA,EAEhE,YAAY,OAAuB,CAAC,GAAG;AACrC,SAAK,SAAS,KAAK,UAAU,cAAc;AAC3C,SAAK,qBAAqB,KAAK,sBAAsB;AAAA,EACvD;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,KAAM;AACf,UAAM,MAAM,QAAQ,KAAK,MAAM,GAAG,EAAE,WAAW,KAAK,CAAC;AACrD,SAAK,KAAK,IAAS,cAAS,KAAK,MAAM;AACvC,SAAK,OAAO,IAAS,gBAAW,KAAK,EAAE;AAAA,EACzC;AAAA,EAEA,MAAM,QAAuB;AAK3B,SAAK,cAAc,MAAM;AACzB,SAAK,OAAO;AACZ,SAAK,KAAK;AAAA,EACZ;AAAA,EAEQ,cAA+B;AACrC,QAAI,CAAC,KAAK,MAAM;AACd,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,UAAyB;AAC7B,UAAM,KAAK,QAAQ;AACnB,UAAM,cAAc,sBAAsB,EAAE,oBAAoB,KAAK,mBAAmB,CAAC;AACzF,eAAW,QAAQ,aAAa;AAC9B,YAAM,KAAK,KAAK,IAAI;AAAA,IACtB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MACJ,QACA,SAAkC,CAAC,GACV;AACzB,UAAM,SAAS,MAAM,KAAK,SAAS,QAAQ,MAAM;AACjD,UAAM,MAAM,MAAM,WAAW,MAAM;AACnC,UAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,aAAa,GAAG,CAAC;AAC/C,UAAM,UAAU,IAAI,SAAS,IAAI,OAAO,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;AAC9D,WAAO,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE;AAAA,EACvC;AAAA;AAAA,EAGA,MAAc,KAAK,QAAgB,SAAkC,CAAC,GAAkB;AACtF,UAAM,KAAK,SAAS,QAAQ,MAAM;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,SAAS,QAAgB,QAAmD;AACxF,UAAM,OAAO,KAAK,YAAY;AAC9B,QAAI,OAAO,KAAK,MAAM,EAAE,WAAW,GAAG;AACpC,aAAO,KAAK,MAAM,MAAM;AAAA,IAC1B;AACA,QAAI,WAAW,KAAK,cAAc,IAAI,MAAM;AAC5C,QAAI,CAAC,UAAU;AACb,iBAAW,MAAM,KAAK,QAAQ,MAAM;AACpC,UAAI,CAAC,SAAS,UAAU,GAAG;AACzB,cAAM,IAAI,MAAM,SAAS,gBAAgB,CAAC;AAAA,MAC5C;AACA,WAAK,cAAc,IAAI,QAAQ,QAAQ;AAAA,IACzC;AAIA,WAAO,KAAK,QAAQ,UAAU,MAA8D;AAAA,EAC9F;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,UAAU,WAAW,KAAK;AAChC,UAAM,gBAAmC,CAAC;AAC1C,UAAM,mBAAsC,CAAC;AAC7C,eAAW,KAAK,SAAS;AACvB,UAAI,MAAM,QAAQ,EAAE,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACxD,sBAAc,KAAK,CAAC;AAAA,MACtB,OAAO;AACL,yBAAiB,KAAK,CAAC;AAAA,MACzB;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB,kBAAkB,KAAK;AACpD,UAAM,KAAK,kBAAkB,eAAe,IAAI;AAAA,EAClD;AAAA,EAEA,MAAc,kBACZ,OACA,eACe;AACf,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,QAAQ;AACd,UAAM,UAAU,CAAC,GAAG,gBAAgB,GAAI,gBAAiB,CAAC,WAAW,IAAc,CAAC,CAAE;AACtF,UAAM,UAAU,QAAQ,IAAI,CAAC,MAAM,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,IAAI;AAC5D,UAAM,SAAS,wCAAwC,OAAO;AAC9D,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,YAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AACtC,YAAM,UAAU,MAAM,IAAI,CAAC,MAAM,eAAe,GAAG,eAAe,KAAK,kBAAkB,CAAC;AAC1F,YAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,SAAS,oBAAI,IAAiC;AACpD,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,OAAO,IAAI,EAAE,IAAI;AAChC,UAAI,OAAQ,QAAO,KAAK,CAAC;AAAA,UACpB,QAAO,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;AAAA,IAC7B;AACA,UAAM,QAAQ;AACd,eAAW,CAAC,MAAM,KAAK,KAAK,QAAQ;AAClC,YAAM,SAAS,kGAAkG,IAAI;AACrH,eAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,cAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AAGtC,cAAM,UAAU,MAAM,IAAI,CAAC,OAAO;AAAA,UAChC,QAAQ,EAAE;AAAA,UACV,MAAM,EAAE;AAAA,UACR,MAAM,OAAO,EAAE,SAAS,WAAW,EAAE,OAAO;AAAA,QAC9C,EAAE;AACF,cAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,MAC5C;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,aAAa,QAAgB,OAAiC;AAClE,UAAM,KAAK,QAAQ;AACnB,QAAI,SAAS,MAAM,SAAS,GAAG;AAC7B,YAAM,KAAK;AAAA,QACT;AAAA,QACA,EAAE,QAAQ,MAAM;AAAA,MAClB;AACA;AAAA,IACF;AACA,UAAM,KAAK,KAAK,6DAA6D,EAAE,OAAO,CAAC;AAAA,EACzF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,QAIT;AACD,UAAM,KAAK,QAAQ;AACnB,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB;AAAA,QACA,EAAE,QAAQ,KAAK;AAAA,MACjB;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB,uBAAuB,IAAI;AAAA;AAAA;AAAA,QAG3B,EAAE,OAAO;AAAA,MACX;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,MAAM,MAAM,KAAK;AAAA,MACrB;AAAA;AAAA;AAAA;AAAA,MAIA,EAAE,OAAO;AAAA,IACX;AACA,UAAM,MAAM,IAAI,KAAK,CAAC;AACtB,UAAM,QAAQ,OAAO,KAAK,SAAS,CAAC;AACpC,UAAM,WAAW,OAAO,KAAK,YAAY,CAAC;AAC1C,UAAM,WAAW,UAAU,IAAI,IAAI,WAAW;AAC9C,WAAO,EAAE,OAAO,OAAO,mBAAmB,SAAS;AAAA,EACrD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,iBAA0B;AACxB,WAAO;AAAA,EACT;AACF;AASA,SAAS,eACP,MACA,eACA,oBACyB;AACzB,QAAM,MAAM;AACZ,QAAM,MAA+B,CAAC;AACtC,aAAW,OAAO,gBAAgB;AAChC,UAAM,QAAQ,IAAI,GAAG;AACrB,QAAI,GAAG,IAAI,UAAU,UAAa,UAAU,OAAO,WAAW,GAAG,IAAI;AAAA,EACvE;AACA,MAAI,KAAK,KAAK;AACd,MAAI,OAAO,KAAK;AAChB,MAAI,eAAe;AACjB,UAAM,MAAM,KAAK;AACjB,QAAI,YACF,MAAM,QAAQ,GAAG,KAAK,IAAI,WAAW,qBACjC,MACA,IAAI,MAAc,kBAAkB,EAAE,KAAK,CAAC;AAAA,EACpD;AACA,SAAO;AACT;AAOA,SAAS,WAAW,OAA6C;AAC/D,QAAM,OAAO,oBAAI,IAA6B;AAC9C,aAAW,KAAK,OAAO;AACrB,SAAK,IAAI,EAAE,IAAI,CAAC;AAAA,EAClB;AACA,SAAO,MAAM,KAAK,KAAK,OAAO,CAAC;AACjC;AAGA,SAAS,aAAa,KAAuC;AAC3D,MAAI,eAAe,KAAK;AACtB,UAAM,MAA+B,CAAC;AACtC,eAAW,CAAC,GAAG,CAAC,KAAK,KAAK;AACxB,UAAI,OAAO,CAAC,CAAC,IAAI,YAAY,CAAC;AAAA,IAChC;AACA,WAAO;AAAA,EACT;AACA,MAAI,OAAO,OAAO,QAAQ,UAAU;AAClC,UAAM,MAAM;AACZ,UAAM,MAA+B,CAAC;AACtC,eAAW,KAAK,OAAO,KAAK,GAAG,GAAG;AAChC,UAAI,CAAC,IAAI,YAAY,IAAI,CAAC,CAAC;AAAA,IAC7B;AACA,WAAO;AAAA,EACT;AACA,SAAO,EAAE,OAAO,YAAY,GAAG,EAAE;AACnC;AAMA,SAAS,YAAY,OAAyB;AAC5C,MAAI,OAAO,UAAU,UAAU;AAC7B,QAAI,SAAS,OAAO,OAAO,gBAAgB,KAAK,SAAS,OAAO,OAAO,gBAAgB,GAAG;AACxF,aAAO,OAAO,KAAK;AAAA,IACrB;AACA,WAAO,MAAM,SAAS;AAAA,EACxB;AACA,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,MAAM,IAAI,WAAW;AACtD,SAAO;AACT;AAGA,eAAe,WAAW,QAAqC;AAG7D,QAAM,SAAS,MAAM,QAAQ,MAAM,IAAI,OAAO,OAAO,SAAS,CAAC,IAAI;AACnE,MAAI,CAAC,OAAQ,QAAO,CAAC;AACrB,QAAM,SAAU,OAAiD;AACjE,MAAI,OAAO,WAAW,WAAY,QAAO,CAAC;AAC1C,SAAO,OAAO,KAAK,MAAM;AAC3B;","names":[]}
|
|
@@ -5,7 +5,7 @@ import {
|
|
|
5
5
|
import {
|
|
6
6
|
GraphDb,
|
|
7
7
|
defaultDbPath
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-2TORJYBO.js";
|
|
9
9
|
import {
|
|
10
10
|
DEFAULT_CONFIG,
|
|
11
11
|
LLM_PRESETS,
|
|
@@ -433,17 +433,19 @@ async function selfTestKuzu(dbPath, embeddingDimension) {
|
|
|
433
433
|
try {
|
|
434
434
|
await db.connect();
|
|
435
435
|
await db.migrate();
|
|
436
|
-
const result = await db.query(
|
|
437
|
-
|
|
436
|
+
const result = await db.query(
|
|
437
|
+
`RETURN array_cosine_similarity([1.0, 0.0], [1.0, 0.0]) AS similarity, true AS ok`
|
|
438
|
+
);
|
|
438
439
|
await db.close();
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
status: vectorReady ? "ok" : "warn",
|
|
443
|
-
detail: vectorReady ? "ok (vector index ready)" : "ok (vector extension missing; semantic search disabled)"
|
|
444
|
-
};
|
|
440
|
+
const row = result.data[0];
|
|
441
|
+
if (row?.ok !== true || typeof row.similarity !== "number") {
|
|
442
|
+
return { name: "kuzu round-trip", status: "fail", detail: "unexpected result" };
|
|
445
443
|
}
|
|
446
|
-
return {
|
|
444
|
+
return {
|
|
445
|
+
name: "kuzu round-trip",
|
|
446
|
+
status: "ok",
|
|
447
|
+
detail: "ok (brute-force semantic search ready)"
|
|
448
|
+
};
|
|
447
449
|
} catch (err) {
|
|
448
450
|
await db.close().catch(() => {
|
|
449
451
|
});
|
|
@@ -1328,19 +1330,25 @@ async function indexRepo(opts) {
|
|
|
1328
1330
|
edges: allEdges,
|
|
1329
1331
|
knownFilePaths
|
|
1330
1332
|
});
|
|
1331
|
-
|
|
1332
|
-
await opts.graphDb.upsertNodes(allNodes);
|
|
1333
|
-
await opts.graphDb.upsertEdges(resolved);
|
|
1334
|
-
opts.onProgress?.({ type: "upsert", nodes: allNodes.length, edges: resolved.length });
|
|
1335
|
-
let embeddingCount = 0;
|
|
1333
|
+
let embeddedById = /* @__PURE__ */ new Map();
|
|
1336
1334
|
if (!opts.skipEmbeddings && opts.router) {
|
|
1337
1335
|
const embedded = await embedNodes(allNodes, {
|
|
1338
1336
|
router: opts.router,
|
|
1339
1337
|
onBatch: ({ embedded: embedded2, total }) => opts.onProgress?.({ type: "embed", embedded: embedded2, total })
|
|
1340
1338
|
});
|
|
1341
|
-
|
|
1342
|
-
embeddingCount = embedded.length;
|
|
1339
|
+
embeddedById = new Map(embedded.map((e) => [e.id, e]));
|
|
1343
1340
|
}
|
|
1341
|
+
const nodePayload = allNodes.map((n) => {
|
|
1342
|
+
const e = embeddedById.get(n.id);
|
|
1343
|
+
const base = n;
|
|
1344
|
+
if (!e) return base;
|
|
1345
|
+
return { ...base, embedding: e.embedding, embeddingNamespace: e.embeddingNamespace };
|
|
1346
|
+
});
|
|
1347
|
+
await opts.graphDb.deleteByRepo(opts.repoId);
|
|
1348
|
+
await opts.graphDb.upsertNodes(nodePayload);
|
|
1349
|
+
await opts.graphDb.upsertEdges(resolved);
|
|
1350
|
+
opts.onProgress?.({ type: "upsert", nodes: allNodes.length, edges: resolved.length });
|
|
1351
|
+
const embeddingCount = embeddedById.size;
|
|
1344
1352
|
return {
|
|
1345
1353
|
durationMs: Date.now() - start,
|
|
1346
1354
|
parsedFiles: parsedCount - failed,
|
|
@@ -1351,22 +1359,6 @@ async function indexRepo(opts) {
|
|
|
1351
1359
|
droppedEdges: dropped
|
|
1352
1360
|
};
|
|
1353
1361
|
}
|
|
1354
|
-
async function persistEmbeddings(graphDb, embedded) {
|
|
1355
|
-
if (embedded.length === 0) return;
|
|
1356
|
-
const BATCH = 100;
|
|
1357
|
-
for (let i = 0; i < embedded.length; i += BATCH) {
|
|
1358
|
-
const batch = embedded.slice(i, i + BATCH);
|
|
1359
|
-
await graphDb.query(
|
|
1360
|
-
`
|
|
1361
|
-
UNWIND $batch AS e
|
|
1362
|
-
MATCH (n:Symbol { id: e.id })
|
|
1363
|
-
SET n.embedding = e.embedding,
|
|
1364
|
-
n.embeddingNamespace = e.embeddingNamespace
|
|
1365
|
-
`,
|
|
1366
|
-
{ batch }
|
|
1367
|
-
);
|
|
1368
|
-
}
|
|
1369
|
-
}
|
|
1370
1362
|
async function runWithConcurrency(items, concurrency, fn) {
|
|
1371
1363
|
let cursor = 0;
|
|
1372
1364
|
const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
|
|
@@ -2005,14 +1997,16 @@ var searchSemanticTool = {
|
|
|
2005
1997
|
);
|
|
2006
1998
|
}
|
|
2007
1999
|
const namespace = `${deps.llm.embeddingNamespace.provider}:${deps.llm.embeddingNamespace.model}:${deps.llm.embeddingNamespace.dimension}`;
|
|
2000
|
+
const dim = deps.llm.embeddingNamespace.dimension;
|
|
2008
2001
|
return cachedJsonResult("search_semantic", { description, k, namespace }, deps, async () => {
|
|
2009
2002
|
const rows = await deps.graph.query(
|
|
2010
|
-
`
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
ORDER BY
|
|
2003
|
+
`MATCH (s:Symbol)
|
|
2004
|
+
WHERE s.embeddingNamespace = $ns
|
|
2005
|
+
RETURN s.id AS id, s.name AS name, s.kind AS kind, s.path AS path,
|
|
2006
|
+
s.lineStart AS line, s.signature AS signature,
|
|
2007
|
+
array_cosine_similarity(s.embedding, CAST($vec AS FLOAT[${dim}])) AS score
|
|
2008
|
+
ORDER BY score DESC
|
|
2009
|
+
LIMIT $k`,
|
|
2016
2010
|
{ vec: embedding, k, ns: namespace }
|
|
2017
2011
|
);
|
|
2018
2012
|
return { description, k, namespace, count: rows.length, matches: rows };
|
|
@@ -2300,7 +2294,7 @@ async function startMcpServer(portOrOptions) {
|
|
|
2300
2294
|
async function loadGraphClient(dbPath) {
|
|
2301
2295
|
let mod;
|
|
2302
2296
|
try {
|
|
2303
|
-
mod = await import("./src-
|
|
2297
|
+
mod = await import("./src-PDNTANJD.js");
|
|
2304
2298
|
} catch (err) {
|
|
2305
2299
|
throw new Error(
|
|
2306
2300
|
`Failed to import @codegraph/graph-db. Run \`pnpm -r build\` first. Underlying error: ${err instanceof Error ? err.message : String(err)}`
|
|
@@ -2400,7 +2394,7 @@ shutting down (${signal})...
|
|
|
2400
2394
|
// src/program.ts
|
|
2401
2395
|
function buildProgram() {
|
|
2402
2396
|
const program = new Command();
|
|
2403
|
-
program.name("codegraph").description("Live, queryable knowledge graph for your codebase").version("0.1.
|
|
2397
|
+
program.name("codegraph").description("Live, queryable knowledge graph for your codebase").version("0.1.2").option("--verbose", "Print full stack traces on error").hook("preAction", (thisCommand) => {
|
|
2404
2398
|
const opts = thisCommand.optsWithGlobals();
|
|
2405
2399
|
if (opts.verbose) process.env.CODEGRAPH_VERBOSE = "1";
|
|
2406
2400
|
});
|
|
@@ -2446,4 +2440,4 @@ export {
|
|
|
2446
2440
|
renderError,
|
|
2447
2441
|
buildProgram
|
|
2448
2442
|
};
|
|
2449
|
-
//# sourceMappingURL=chunk-
|
|
2443
|
+
//# sourceMappingURL=chunk-36AWRLQ6.js.map
|