@leanlabsinnov/codegraph 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @cyrilc/codegraph
1
+ # @leanlabsinnov/codegraph
2
2
 
3
3
  Live, queryable knowledge graph for your codebase. Indexes JS/TS into an embedded graph
4
4
  DB with vector embeddings, then exposes a local MCP server that Claude Code, Cursor, and
@@ -12,7 +12,7 @@ external services.
12
12
  ## Install
13
13
 
14
14
  ```bash
15
- npm i -g @cyrilc/codegraph
15
+ npm i -g @leanlabsinnov/codegraph
16
16
  ```
17
17
 
18
18
  Requires Node 20+.
@@ -29,7 +29,7 @@ codegraph serve
29
29
  Then point Claude Code / Cursor / Windsurf at `http://127.0.0.1:3748/mcp` with the bearer
30
30
  token from `~/.codegraph/config.json`.
31
31
 
32
- See the full [README on GitHub](https://github.com/cyrilc/codegraph) for client setup, all
32
+ See the full [README on GitHub](https://github.com/leanlabsinnov/codegraph) for client setup, all
33
33
  10 MCP tools, and troubleshooting.
34
34
 
35
35
  ## Commands
package/dist/bin.js CHANGED
@@ -2,9 +2,9 @@
2
2
  import {
3
3
  buildProgram,
4
4
  renderError
5
- } from "./chunk-O4ZO6CP5.js";
5
+ } from "./chunk-36AWRLQ6.js";
6
6
  import "./chunk-B2TIVKUB.js";
7
- import "./chunk-F5QKPRNW.js";
7
+ import "./chunk-2TORJYBO.js";
8
8
  import "./chunk-XGPZDCQ4.js";
9
9
 
10
10
  // src/bin.ts
@@ -10,31 +10,38 @@ import { dirname, resolve } from "path";
10
10
  import * as kuzu from "kuzu";
11
11
 
12
12
  // ../graph-db/src/schema.ts
13
- var SYMBOL_COLUMNS = [
14
- "id",
15
- "kind",
16
- "repoId",
17
- "name",
18
- "path",
19
- "lineStart",
20
- "lineEnd",
21
- "signature",
22
- "leadingComment",
23
- "isExported",
13
+ var SYMBOL_COLUMN_SPEC = {
14
+ id: "STRING",
15
+ kind: "STRING",
16
+ repoId: "STRING",
17
+ name: "STRING",
18
+ path: "STRING",
19
+ lineStart: "INT64",
20
+ lineEnd: "INT64",
21
+ signature: "STRING",
22
+ leadingComment: "STRING",
23
+ isExported: "BOOLEAN",
24
24
  // File-specific
25
- "language",
26
- "sizeBytes",
27
- "contentHash",
25
+ language: "STRING",
26
+ sizeBytes: "INT64",
27
+ contentHash: "STRING",
28
28
  // Function-specific
29
- "isAsync",
30
- "isArrow",
29
+ isAsync: "BOOLEAN",
30
+ isArrow: "BOOLEAN",
31
31
  // Route-specific
32
- "method",
33
- "routePath",
34
- "framework",
35
- // Embedding
36
- "embeddingNamespace"
37
- ];
32
+ method: "STRING",
33
+ routePath: "STRING",
34
+ framework: "STRING",
35
+ // Embedding namespace tag
36
+ embeddingNamespace: "STRING"
37
+ };
38
+ var SYMBOL_COLUMNS = Object.keys(SYMBOL_COLUMN_SPEC);
39
+ function defaultFor(column) {
40
+ const t = SYMBOL_COLUMN_SPEC[column];
41
+ if (t === "BOOLEAN") return false;
42
+ if (t === "INT64") return 0;
43
+ return "";
44
+ }
38
45
  var EDGE_COLUMNS = ["line"];
39
46
  function buildSchemaStatements(opts) {
40
47
  const columnDefs = [
@@ -70,13 +77,7 @@ function buildSchemaStatements(opts) {
70
77
  }
71
78
  return statements;
72
79
  }
73
- function buildVectorIndexStatements() {
74
- return [
75
- "INSTALL VECTOR",
76
- "LOAD EXTENSION VECTOR",
77
- "CALL CREATE_VECTOR_INDEX('Symbol', 'embedding_idx', 'embedding', metric := 'cosine')"
78
- ];
79
- }
80
+ var SEMANTIC_SEARCH_MODE = "brute-force";
80
81
  var DEFAULT_EMBEDDING_DIMENSION = 1536;
81
82
 
82
83
  // ../graph-db/src/client.ts
@@ -88,7 +89,6 @@ var GraphDb = class {
88
89
  embeddingDimension;
89
90
  db = null;
90
91
  conn = null;
91
- vectorIndexReady = false;
92
92
  /**
93
93
  * Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a
94
94
  * prepared statement for any parameterized query - reusing the prepared object keeps
@@ -107,17 +107,8 @@ var GraphDb = class {
107
107
  }
108
108
  async close() {
109
109
  this.preparedCache.clear();
110
- try {
111
- this.conn?.close?.();
112
- } catch {
113
- }
114
- try {
115
- this.db?.close?.();
116
- } catch {
117
- }
118
110
  this.conn = null;
119
111
  this.db = null;
120
- this.vectorIndexReady = false;
121
112
  }
122
113
  requireConn() {
123
114
  if (!this.conn) {
@@ -126,8 +117,11 @@ var GraphDb = class {
126
117
  return this.conn;
127
118
  }
128
119
  /**
129
- * Idempotent migration. Runs all DDL through `IF NOT EXISTS` guards and tolerates the
130
- * vector-index "already exists" error from re-runs.
120
+ * Idempotent migration: creates the `Symbol` node table and one REL table per
121
+ * `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately
122
+ * skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via
123
+ * `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.
124
+ * See `schema.ts` for the full rationale.
131
125
  */
132
126
  async migrate() {
133
127
  await this.connect();
@@ -135,22 +129,6 @@ var GraphDb = class {
135
129
  for (const stmt of schemaStmts) {
136
130
  await this.exec(stmt);
137
131
  }
138
- for (const stmt of buildVectorIndexStatements()) {
139
- try {
140
- await this.exec(stmt);
141
- } catch (err) {
142
- const message = err instanceof Error ? err.message : String(err);
143
- if (isAlreadyExistsError(message)) continue;
144
- if (/extension/i.test(message) && /(not found|missing|unsupported|disabled)/i.test(message)) {
145
- console.warn(
146
- `[codegraph] vector extension unavailable; semantic search disabled. Underlying: ${message}`
147
- );
148
- return;
149
- }
150
- throw new Error(`migrate failed on \`${stmt}\`: ${message}`);
151
- }
152
- }
153
- this.vectorIndexReady = true;
154
132
  }
155
133
  /**
156
134
  * Typed Cypher escape hatch.
@@ -193,19 +171,39 @@ var GraphDb = class {
193
171
  return conn.execute(prepared, params);
194
172
  }
195
173
  /**
196
- * Upserts nodes via batched UNWIND + MERGE. Kuzu requires every column we SET to exist
197
- * in the schema, so each row is normalized to include every `SYMBOL_COLUMNS` field (NULL
198
- * for fields not present).
174
+ * Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including
175
+ * `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`
176
+ * because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped
177
+ * (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.
178
+ *
179
+ * In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE
180
+ * from violating the primary-key uniqueness constraint.
199
181
  */
200
182
  async upsertNodes(nodes) {
201
183
  if (nodes.length === 0) return;
202
184
  await this.connect();
185
+ const deduped = dedupeById(nodes);
186
+ const withEmbedding = [];
187
+ const withoutEmbedding = [];
188
+ for (const n of deduped) {
189
+ if (Array.isArray(n.embedding) && n.embedding.length > 0) {
190
+ withEmbedding.push(n);
191
+ } else {
192
+ withoutEmbedding.push(n);
193
+ }
194
+ }
195
+ await this.createSymbolBatch(withoutEmbedding, false);
196
+ await this.createSymbolBatch(withEmbedding, true);
197
+ }
198
+ async createSymbolBatch(nodes, withEmbedding) {
199
+ if (nodes.length === 0) return;
203
200
  const BATCH = 200;
204
- const setClause = SYMBOL_COLUMNS.filter((c) => c !== "id").map((c) => `n.${c} = r.${c}`).join(", ");
205
- const cypher = `UNWIND $batch AS r MERGE (n:Symbol {id: r.id}) SET ${setClause}`;
201
+ const columns = [...SYMBOL_COLUMNS, ...withEmbedding ? ["embedding"] : []];
202
+ const propMap = columns.map((c) => `${c}: r.${c}`).join(", ");
203
+ const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;
206
204
  for (let i = 0; i < nodes.length; i += BATCH) {
207
205
  const slice = nodes.slice(i, i + BATCH);
208
- const payload = slice.map(buildSymbolRow);
206
+ const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));
209
207
  await this.exec(cypher, { batch: payload });
210
208
  }
211
209
  }
@@ -233,7 +231,7 @@ var GraphDb = class {
233
231
  const payload = slice.map((e) => ({
234
232
  fromId: e.fromId,
235
233
  toId: e.toId,
236
- line: typeof e.line === "number" ? e.line : null
234
+ line: typeof e.line === "number" ? e.line : 0
237
235
  }));
238
236
  await this.exec(cypher, { batch: payload });
239
237
  }
@@ -292,22 +290,38 @@ var GraphDb = class {
292
290
  const coverage = total === 0 ? 0 : embedded / total;
293
291
  return { nodes, edges, embeddingCoverage: coverage };
294
292
  }
295
- /** True once `migrate()` confirmed the vector extension is loaded. */
293
+ /**
294
+ * v0.1.x never creates an HNSW index - semantic search is brute-force via
295
+ * `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers
296
+ * (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu
297
+ * fixes ship and we flip the index back on.
298
+ */
296
299
  hasVectorIndex() {
297
- return this.vectorIndexReady;
300
+ return false;
298
301
  }
299
302
  };
300
- function buildSymbolRow(node) {
303
+ function buildSymbolRow(node, withEmbedding, embeddingDimension) {
301
304
  const src = node;
302
305
  const row = {};
303
306
  for (const col of SYMBOL_COLUMNS) {
304
307
  const value = src[col];
305
- row[col] = value === void 0 ? null : value;
308
+ row[col] = value === void 0 || value === null ? defaultFor(col) : value;
306
309
  }
307
310
  row.id = node.id;
308
311
  row.kind = node.kind;
312
+ if (withEmbedding) {
313
+ const vec = node.embedding;
314
+ row.embedding = Array.isArray(vec) && vec.length === embeddingDimension ? vec : new Array(embeddingDimension).fill(0);
315
+ }
309
316
  return row;
310
317
  }
318
+ function dedupeById(nodes) {
319
+ const seen = /* @__PURE__ */ new Map();
320
+ for (const n of nodes) {
321
+ seen.set(n.id, n);
322
+ }
323
+ return Array.from(seen.values());
324
+ }
311
325
  function normalizeRow(row) {
312
326
  if (row instanceof Map) {
313
327
  const out = {};
@@ -343,17 +357,16 @@ async function collectAll(result) {
343
357
  if (typeof getAll !== "function") return [];
344
358
  return getAll.call(target);
345
359
  }
346
- function isAlreadyExistsError(message) {
347
- return /already exists/i.test(message) || /already loaded/i.test(message) || /already installed/i.test(message) || /duplicate (table|index)/i.test(message);
348
- }
349
360
 
350
361
  export {
362
+ SYMBOL_COLUMN_SPEC,
351
363
  SYMBOL_COLUMNS,
364
+ defaultFor,
352
365
  EDGE_COLUMNS,
353
366
  buildSchemaStatements,
354
- buildVectorIndexStatements,
367
+ SEMANTIC_SEARCH_MODE,
355
368
  DEFAULT_EMBEDDING_DIMENSION,
356
369
  defaultDbPath,
357
370
  GraphDb
358
371
  };
359
- //# sourceMappingURL=chunk-F5QKPRNW.js.map
372
+ //# sourceMappingURL=chunk-2TORJYBO.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../graph-db/src/client.ts","../../graph-db/src/schema.ts"],"sourcesContent":["import { mkdir } from \"node:fs/promises\";\nimport { homedir } from \"node:os\";\nimport { dirname, resolve } from \"node:path\";\nimport { EDGE_KINDS, NODE_KINDS, type EdgeKind } from \"@codegraph/shared\";\nimport * as kuzu from \"kuzu\";\nimport {\n DEFAULT_EMBEDDING_DIMENSION,\n SYMBOL_COLUMNS,\n buildSchemaStatements,\n defaultFor,\n} from \"./schema.js\";\nimport type { QueryResult, UpsertEdgeInput, UpsertNodeInput } from \"./types.js\";\n\nexport interface GraphDbOptions {\n /** Directory where Kuzu stores its on-disk database files. Defaults to `~/.codegraph/graph`. */\n dbPath?: string;\n /**\n * Legacy `url` option kept for back-compat with Phase-1 callers. Ignored at runtime -\n * Kuzu is embedded - but accepted so existing call sites compile while the rest of the\n * codebase migrates to `dbPath`.\n */\n url?: string;\n /** Vector dimension for the `Symbol.embedding` column. Baked into the schema at create time. */\n embeddingDimension?: number;\n}\n\n/** Default on-disk location for the embedded graph. */\nexport function defaultDbPath(): string {\n return resolve(homedir(), \".codegraph\", \"graph\");\n}\n\n/**\n * Thin, typed wrapper around the embedded Kuzu database.\n *\n * Public surface (intentionally identical to the Phase-1 FalkorDB client so callers don't\n * change): connect / close / migrate / query / upsertNodes / upsertEdges / deleteByRepo /\n * stats. Internals are pure Kuzu.\n */\nexport class GraphDb {\n private readonly dbPath: string;\n private readonly embeddingDimension: number;\n private db: kuzu.Database | null = null;\n private conn: kuzu.Connection | null = null;\n /**\n * Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a\n * prepared statement for any parameterized query - reusing the prepared object keeps\n * UNWIND-batched upserts fast.\n */\n private preparedCache = new Map<string, kuzu.PreparedStatement>();\n\n constructor(opts: GraphDbOptions = {}) {\n this.dbPath = opts.dbPath ?? defaultDbPath();\n this.embeddingDimension = opts.embeddingDimension ?? DEFAULT_EMBEDDING_DIMENSION;\n }\n\n async connect(): Promise<void> {\n if (this.conn) return;\n await mkdir(dirname(this.dbPath), { recursive: true });\n this.db = new kuzu.Database(this.dbPath);\n this.conn = new kuzu.Connection(this.db);\n }\n\n async close(): Promise<void> {\n // We deliberately do NOT call `conn.close()` / `db.close()` on Kuzu's Node bindings.\n // In 0.11.x those native handles are also disposed by the binding's process-exit hook,\n // and double-disposing can SIGSEGV the worker on cleanup. Dropping references is\n // enough for the GC to release the underlying memory before the process exits.\n this.preparedCache.clear();\n this.conn = null;\n this.db = null;\n }\n\n private requireConn(): kuzu.Connection {\n if (!this.conn) {\n throw new Error(\"GraphDb not connected. Call connect() first.\");\n }\n return this.conn;\n }\n\n /**\n * Idempotent migration: creates the `Symbol` node table and one REL table per\n * `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately\n * skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via\n * `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.\n * See `schema.ts` for the full rationale.\n */\n async migrate(): Promise<void> {\n await this.connect();\n const schemaStmts = buildSchemaStatements({ embeddingDimension: this.embeddingDimension });\n for (const stmt of schemaStmts) {\n await this.exec(stmt);\n }\n }\n\n /**\n * Typed Cypher escape hatch.\n *\n * Kuzu returns BIGINT columns as native BigInt; we coerce to plain `number` when safe so\n * downstream JSON serialization (MCP responses, snapshot tests) does not need bespoke\n * handling.\n */\n async query<T = Record<string, unknown>>(\n cypher: string,\n params: Record<string, unknown> = {},\n ): Promise<QueryResult<T>> {\n const result = await this.runQuery(cypher, params);\n const raw = await collectAll(result);\n const data = raw.map((row) => normalizeRow(row)) as T[];\n const headers = raw.length > 0 ? Object.keys(raw[0] ?? {}) : [];\n return { data, headers, metadata: [] };\n }\n\n /** Fire-and-forget DDL/exec. */\n private async exec(cypher: string, params: Record<string, unknown> = {}): Promise<void> {\n await this.runQuery(cypher, params);\n }\n\n /**\n * Bridge to Kuzu's two execution paths:\n * - `conn.query(stmt)` for unparameterized statements (the second positional arg is a\n * `progressCallback`, NOT params - mistaking that is the #1 way to confuse the API).\n * - `conn.prepare(stmt) + conn.execute(prepared, params)` for anything with `$name`\n * placeholders. We cache the prepared statement so UNWIND batches reuse it.\n */\n private async runQuery(cypher: string, params: Record<string, unknown>): Promise<unknown> {\n const conn = this.requireConn();\n if (Object.keys(params).length === 0) {\n return conn.query(cypher);\n }\n let prepared = this.preparedCache.get(cypher);\n if (!prepared) {\n prepared = await conn.prepare(cypher);\n if (!prepared.isSuccess()) {\n throw new Error(prepared.getErrorMessage());\n }\n this.preparedCache.set(cypher, prepared);\n }\n // Cast through `unknown`: Kuzu's bindings advertise a strict `KuzuValue` union, but\n // we can pass through any JSON-serializable value the embedded engine accepts (nested\n // structs and lists are converted at the native layer).\n return conn.execute(prepared, params as unknown as Parameters<kuzu.Connection[\"execute\"]>[1]);\n }\n\n /**\n * Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including\n * `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`\n * because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped\n * (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.\n *\n * In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE\n * from violating the primary-key uniqueness constraint.\n */\n async upsertNodes(nodes: UpsertNodeInput[]): Promise<void> {\n if (nodes.length === 0) return;\n await this.connect();\n const deduped = dedupeById(nodes);\n const withEmbedding: UpsertNodeInput[] = [];\n const withoutEmbedding: UpsertNodeInput[] = [];\n for (const n of deduped) {\n if (Array.isArray(n.embedding) && n.embedding.length > 0) {\n withEmbedding.push(n);\n } else {\n withoutEmbedding.push(n);\n }\n }\n await this.createSymbolBatch(withoutEmbedding, false);\n await this.createSymbolBatch(withEmbedding, true);\n }\n\n private async createSymbolBatch(\n nodes: UpsertNodeInput[],\n withEmbedding: boolean,\n ): Promise<void> {\n if (nodes.length === 0) return;\n const BATCH = 200;\n const columns = [...SYMBOL_COLUMNS, ...(withEmbedding ? ([\"embedding\"] as const) : [])];\n const propMap = columns.map((c) => `${c}: r.${c}`).join(\", \");\n const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;\n for (let i = 0; i < nodes.length; i += BATCH) {\n const slice = nodes.slice(i, i + BATCH);\n const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));\n await this.exec(cypher, { batch: payload });\n }\n }\n\n /**\n * Upserts edges. Both endpoints must already exist as `Symbol` nodes; rows where the\n * MATCH fails are silently dropped, matching Cypher semantics.\n *\n * Uses CREATE because the orchestrator wipes the repo's slice before writing, so\n * duplicates can't pre-exist within a single index pass.\n */\n async upsertEdges(edges: UpsertEdgeInput[]): Promise<void> {\n if (edges.length === 0) return;\n await this.connect();\n const byKind = new Map<EdgeKind, UpsertEdgeInput[]>();\n for (const e of edges) {\n const bucket = byKind.get(e.kind);\n if (bucket) bucket.push(e);\n else byKind.set(e.kind, [e]);\n }\n const BATCH = 500;\n for (const [kind, batch] of byKind) {\n const cypher = `UNWIND $batch AS r MATCH (a:Symbol {id: r.fromId}) MATCH (b:Symbol {id: r.toId}) CREATE (a)-[e:${kind} {line: r.line}]->(b)`;\n for (let i = 0; i < batch.length; i += BATCH) {\n const slice = batch.slice(i, i + BATCH);\n // Use 0 (not null) for missing line numbers so Kuzu can infer the struct field\n // as INT64 even when an entire batch happens to have no `line` set.\n const payload = slice.map((e) => ({\n fromId: e.fromId,\n toId: e.toId,\n line: typeof e.line === \"number\" ? e.line : 0,\n }));\n await this.exec(cypher, { batch: payload });\n }\n }\n }\n\n /**\n * Deletes all nodes (and incident edges via DETACH DELETE) for a repo. If `paths` is\n * provided, restricts the delete to nodes whose `path` is in the list - used by\n * incremental re-indexing.\n */\n async deleteByRepo(repoId: string, paths?: string[]): Promise<void> {\n await this.connect();\n if (paths && paths.length > 0) {\n await this.exec(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.path IN $paths DETACH DELETE n\",\n { repoId, paths },\n );\n return;\n }\n await this.exec(\"MATCH (n:Symbol) WHERE n.repoId = $repoId DETACH DELETE n\", { repoId });\n }\n\n /**\n * Returns counts of nodes (per kind) and edges (per kind) for a repo, plus the share of\n * non-File nodes that carry an embedding.\n */\n async stats(repoId: string): Promise<{\n nodes: Record<string, number>;\n edges: Record<string, number>;\n embeddingCoverage: number;\n }> {\n await this.connect();\n const nodes: Record<string, number> = {};\n for (const kind of NODE_KINDS) {\n const r = await this.query<{ count: number }>(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.kind = $kind RETURN count(n) AS count\",\n { repoId, kind },\n );\n nodes[kind] = Number(r.data[0]?.count ?? 0);\n }\n const edges: Record<string, number> = {};\n for (const kind of EDGE_KINDS) {\n const r = await this.query<{ count: number }>(\n `MATCH (a:Symbol)-[r:${kind}]->(b:Symbol)\n WHERE a.repoId = $repoId AND b.repoId = $repoId\n RETURN count(r) AS count`,\n { repoId },\n );\n edges[kind] = Number(r.data[0]?.count ?? 0);\n }\n const cov = await this.query<{ total: number | bigint; embedded: number | bigint }>(\n `MATCH (n:Symbol)\n WHERE n.repoId = $repoId AND n.kind <> 'File'\n RETURN count(n) AS total,\n count(n.embedding) AS embedded`,\n { repoId },\n );\n const row = cov.data[0];\n const total = Number(row?.total ?? 0);\n const embedded = Number(row?.embedded ?? 0);\n const coverage = total === 0 ? 0 : embedded / total;\n return { nodes, edges, embeddingCoverage: coverage };\n }\n\n /**\n * v0.1.x never creates an HNSW index - semantic search is brute-force via\n * `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers\n * (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu\n * fixes ship and we flip the index back on.\n */\n hasVectorIndex(): boolean {\n return false;\n }\n}\n\n/**\n * Build a fully-populated row for a Kuzu UNWIND batch. Every column in `SYMBOL_COLUMNS`\n * is present (typed default when missing) so Kuzu can infer a homogeneous struct schema\n * for the batch parameter. When `withEmbedding` is true the `embedding` field is also\n * populated - we either use the provided vector or a zero-vector of the configured\n * dimension so the struct schema stays uniform across the batch.\n */\nfunction buildSymbolRow(\n node: UpsertNodeInput,\n withEmbedding: boolean,\n embeddingDimension: number,\n): Record<string, unknown> {\n const src = node as unknown as Record<string, unknown>;\n const row: Record<string, unknown> = {};\n for (const col of SYMBOL_COLUMNS) {\n const value = src[col];\n row[col] = value === undefined || value === null ? defaultFor(col) : value;\n }\n row.id = node.id;\n row.kind = node.kind;\n if (withEmbedding) {\n const vec = node.embedding;\n row.embedding =\n Array.isArray(vec) && vec.length === embeddingDimension\n ? vec\n : new Array<number>(embeddingDimension).fill(0);\n }\n return row;\n}\n\n/**\n * Coalesce same-id rows down to the last occurrence. Required because we now use bare\n * CREATE (not MERGE) for inserts and Kuzu rejects primary-key collisions inside a single\n * UNWIND batch.\n */\nfunction dedupeById(nodes: UpsertNodeInput[]): UpsertNodeInput[] {\n const seen = new Map<string, UpsertNodeInput>();\n for (const n of nodes) {\n seen.set(n.id, n);\n }\n return Array.from(seen.values());\n}\n\n/** Convert Kuzu's row representation (Map or plain object) into a plain JSON object. */\nfunction normalizeRow(row: unknown): Record<string, unknown> {\n if (row instanceof Map) {\n const out: Record<string, unknown> = {};\n for (const [k, v] of row) {\n out[String(k)] = coerceValue(v);\n }\n return out;\n }\n if (row && typeof row === \"object\") {\n const src = row as Record<string, unknown>;\n const out: Record<string, unknown> = {};\n for (const k of Object.keys(src)) {\n out[k] = coerceValue(src[k]);\n }\n return out;\n }\n return { value: coerceValue(row) };\n}\n\n/**\n * Kuzu returns BIGINT columns as JS BigInt. Coerce to `number` when within Number.MAX_SAFE\n * for JSON-friendly downstream consumption.\n */\nfunction coerceValue(value: unknown): unknown {\n if (typeof value === \"bigint\") {\n if (value <= BigInt(Number.MAX_SAFE_INTEGER) && value >= BigInt(Number.MIN_SAFE_INTEGER)) {\n return Number(value);\n }\n return value.toString();\n }\n if (Array.isArray(value)) return value.map(coerceValue);\n return value;\n}\n\n/** Drain a Kuzu QueryResult (or array of them) into an array of row objects. */\nasync function collectAll(result: unknown): Promise<unknown[]> {\n // Multi-statement queries return an array; we keep only the last one (matches how the\n // final statement is the one that carries a `RETURN`).\n const target = Array.isArray(result) ? result[result.length - 1] : result;\n if (!target) return [];\n const getAll = (target as { getAll?: () => Promise<unknown[]> }).getAll;\n if (typeof getAll !== \"function\") return [];\n return getAll.call(target);\n}\n\n","import { EDGE_KINDS } from \"@codegraph/shared\";\n\n/**\n * Kuzu is schema-first. Unlike FalkorDB which is schema-less, every column we ever want to\n * SET on a node must exist up-front. We use ONE `Symbol` node table with a `kind` column\n * (Kuzu does not support multi-labels), and one REL table per `EdgeKind`.\n *\n * Columns are the union of every field across the `GraphNode` discriminated union in\n * `@codegraph/shared` plus the two embedding-namespace fields. Fields that are not\n * relevant to a given kind stay NULL.\n */\n\n/**\n * Per-column metadata so the upserter can build batches with explicit typed defaults.\n *\n * Kuzu's struct parameter type inference fails when a column is null on every row in a\n * batch (it defaults to STRING and rejects assignment to a BOOL/INT64 column). Concrete\n * defaults keep inference deterministic and let us skip clunky CAST() clauses.\n *\n * Convention: optional booleans default to `false`, optional ints to `0`, optional strings\n * to `\"\"`. We never check `WHERE n.foo IS NULL` in queries, so the lost null-distinction\n * is acceptable for v0.1.0.\n */\nexport const SYMBOL_COLUMN_SPEC = {\n id: \"STRING\",\n kind: \"STRING\",\n repoId: \"STRING\",\n name: \"STRING\",\n path: \"STRING\",\n lineStart: \"INT64\",\n lineEnd: \"INT64\",\n signature: \"STRING\",\n leadingComment: \"STRING\",\n isExported: \"BOOLEAN\",\n // File-specific\n language: \"STRING\",\n sizeBytes: \"INT64\",\n contentHash: \"STRING\",\n // Function-specific\n isAsync: \"BOOLEAN\",\n isArrow: \"BOOLEAN\",\n // Route-specific\n method: \"STRING\",\n routePath: \"STRING\",\n framework: \"STRING\",\n // Embedding namespace tag\n embeddingNamespace: \"STRING\",\n} as const;\n\nexport type SymbolColumn = keyof typeof SYMBOL_COLUMN_SPEC;\n\nexport const SYMBOL_COLUMNS = Object.keys(SYMBOL_COLUMN_SPEC) as SymbolColumn[];\n\n/** Return the typed default for an unset optional column. */\nexport function defaultFor(column: SymbolColumn): unknown {\n const t = SYMBOL_COLUMN_SPEC[column];\n if (t === \"BOOLEAN\") return false;\n if (t === \"INT64\") return 0;\n return \"\";\n}\n\n/** Optional per-edge metadata. Currently only `line`. */\nexport const EDGE_COLUMNS = [\"line\"] as const;\n\nexport type EdgeColumn = (typeof EDGE_COLUMNS)[number];\n\n/**\n * DDL statements that bring an empty Kuzu database to the codegraph schema.\n * `IF NOT EXISTS` makes `migrate()` idempotent so it can run on every connect.\n *\n * `embedding` is a fixed-dimension column - dimension is configured at migrate time and\n * baked into the schema. If a user later switches to an embedding provider with a\n * different dimension they must delete the on-disk graph directory to recreate it. The\n * embedding-namespace tag ensures we never silently mix dimensions.\n */\nexport function buildSchemaStatements(opts: { embeddingDimension: number }): string[] {\n const columnDefs = [\n \"id STRING\",\n \"kind STRING\",\n \"repoId STRING\",\n \"name STRING\",\n \"path STRING\",\n \"lineStart INT64\",\n \"lineEnd INT64\",\n \"signature STRING\",\n \"leadingComment STRING\",\n \"isExported BOOLEAN\",\n \"language STRING\",\n \"sizeBytes INT64\",\n \"contentHash STRING\",\n \"isAsync BOOLEAN\",\n \"isArrow BOOLEAN\",\n \"method STRING\",\n \"routePath STRING\",\n \"framework STRING\",\n \"embeddingNamespace STRING\",\n `embedding FLOAT[${opts.embeddingDimension}]`,\n \"PRIMARY KEY (id)\",\n ];\n const statements: string[] = [\n `CREATE NODE TABLE IF NOT EXISTS Symbol(${columnDefs.join(\", \")})`,\n ];\n for (const kind of EDGE_KINDS) {\n statements.push(\n `CREATE REL TABLE IF NOT EXISTS ${kind}(FROM Symbol TO Symbol, line INT64)`,\n );\n }\n return statements;\n}\n\n/**\n * Semantic search in v0.1.x is intentionally brute-force via Kuzu's built-in\n * `array_cosine_similarity` function - we do NOT create an HNSW vector index.\n *\n * Why: Kuzu 0.11.x has two open issues that make the HNSW path unusable for a\n * mutable graph workload:\n * - kuzudb/kuzu#5965: SET on a vector-indexed column is rejected with\n * \"Cannot set property vec in table embeddings because it is used in one or more\n * indexes\". The Kuzu team's own recommended workaround in that thread is\n * \"delay creation of the index itself\".\n * - kuzudb/kuzu#6040: DROP_VECTOR_INDEX leaves stale on-disk metadata, so once a\n * column has ever been indexed it becomes permanently un-writable - even fresh\n * CREATEs fail with \"Catalog exception: _N_<index>_UPPER does not exist\".\n *\n * `array_cosine_similarity` is a core Kuzu function (not part of the vector extension)\n * and runs in microseconds for the corpus sizes Phase 1 targets. We will switch back\n * to `CALL CREATE_VECTOR_INDEX` / `QUERY_VECTOR_INDEX` once the upstream fixes ship.\n */\nexport const SEMANTIC_SEARCH_MODE = \"brute-force\" as const;\n\n/** Default embedding dimension when none is supplied. Matches `text-embedding-3-small`. */\nexport const DEFAULT_EMBEDDING_DIMENSION = 1536;\n"],"mappings":";;;;;;AAAA,SAAS,aAAa;AACtB,SAAS,eAAe;AACxB,SAAS,SAAS,eAAe;AAEjC,YAAY,UAAU;;;ACmBf,IAAM,qBAAqB;AAAA,EAChC,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,MAAM;AAAA,EACN,WAAW;AAAA,EACX,SAAS;AAAA,EACT,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,YAAY;AAAA;AAAA,EAEZ,UAAU;AAAA,EACV,WAAW;AAAA,EACX,aAAa;AAAA;AAAA,EAEb,SAAS;AAAA,EACT,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,WAAW;AAAA;AAAA,EAEX,oBAAoB;AACtB;AAIO,IAAM,iBAAiB,OAAO,KAAK,kBAAkB;AAGrD,SAAS,WAAW,QAA+B;AACxD,QAAM,IAAI,mBAAmB,MAAM;AACnC,MAAI,MAAM,UAAW,QAAO;AAC5B,MAAI,MAAM,QAAS,QAAO;AAC1B,SAAO;AACT;AAGO,IAAM,eAAe,CAAC,MAAM;AAa5B,SAAS,sBAAsB,MAAgD;AACpF,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,mBAAmB,KAAK,kBAAkB;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,aAAuB;AAAA,IAC3B,0CAA0C,WAAW,KAAK,IAAI,CAAC;AAAA,EACjE;AACA,aAAW,QAAQ,YAAY;AAC7B,eAAW;AAAA,MACT,kCAAkC,IAAI;AAAA,IACxC;AAAA,EACF;AACA,SAAO;AACT;AAoBO,IAAM,uBAAuB;AAG7B,IAAM,8BAA8B;;;ADxGpC,SAAS,gBAAwB;AACtC,SAAO,QAAQ,QAAQ,GAAG,cAAc,OAAO;AACjD;AASO,IAAM,UAAN,MAAc;AAAA,EACF;AAAA,EACA;AAAA,EACT,KAA2B;AAAA,EAC3B,OAA+B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAM/B,gBAAgB,oBAAI,IAAoC;AAAA,EAEhE,YAAY,OAAuB,CAAC,GAAG;AACrC,SAAK,SAAS,KAAK,UAAU,cAAc;AAC3C,SAAK,qBAAqB,KAAK,sBAAsB;AAAA,EACvD;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,KAAM;AACf,UAAM,MAAM,QAAQ,KAAK,MAAM,GAAG,EAAE,WAAW,KAAK,CAAC;AACrD,SAAK,KAAK,IAAS,cAAS,KAAK,MAAM;AACvC,SAAK,OAAO,IAAS,gBAAW,KAAK,EAAE;AAAA,EACzC;AAAA,EAEA,MAAM,QAAuB;AAK3B,SAAK,cAAc,MAAM;AACzB,SAAK,OAAO;AACZ,SAAK,KAAK;AAAA,EACZ;AAAA,EAEQ,cAA+B;AACrC,QAAI,CAAC,KAAK,MAAM;AACd,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,UAAyB;AAC7B,UAAM,KAAK,QAAQ;AACnB,UAAM,cAAc,sBAAsB,EAAE,oBAAoB,KAAK,mBAAmB,CAAC;AACzF,eAAW,QAAQ,aAAa;AAC9B,YAAM,KAAK,KAAK,IAAI;AAAA,IACtB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MACJ,QACA,SAAkC,CAAC,GACV;AACzB,UAAM,SAAS,MAAM,KAAK,SAAS,QAAQ,MAAM;AACjD,UAAM,MAAM,MAAM,WAAW,MAAM;AACnC,UAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,aAAa,GAAG,CAAC;AAC/C,UAAM,UAAU,IAAI,SAAS,IAAI,OAAO,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;AAC9D,WAAO,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE;AAAA,EACvC;AAAA;AAAA,EAGA,MAAc,KAAK,QAAgB,SAAkC,CAAC,GAAkB;AACtF,UAAM,KAAK,SAAS,QAAQ,MAAM;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,SAAS,QAAgB,QAAmD;AACxF,UAAM,OAAO,KAAK,YAAY;AAC9B,QAAI,OAAO,KAAK,MAAM,EAAE,WAAW,GAAG;AACpC,aAAO,KAAK,MAAM,MAAM;AAAA,IAC1B;AACA,QAAI,WAAW,KAAK,cAAc,IAAI,MAAM;AAC5C,QAAI,CAAC,UAAU;AACb,iBAAW,MAAM,KAAK,QAAQ,MAAM;AACpC,UAAI,CAAC,SAAS,UAAU,GAAG;AACzB,cAAM,IAAI,MAAM,SAAS,gBAAgB,CAAC;AAAA,MAC5C;AACA,WAAK,cAAc,IAAI,QAAQ,QAAQ;AAAA,IACzC;AAIA,WAAO,KAAK,QAAQ,UAAU,MAA8D;AAAA,EAC9F;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,UAAU,WAAW,KAAK;AAChC,UAAM,gBAAmC,CAAC;AAC1C,UAAM,mBAAsC,CAAC;AAC7C,eAAW,KAAK,SAAS;AACvB,UAAI,MAAM,QAAQ,EAAE,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACxD,sBAAc,KAAK,CAAC;AAAA,MACtB,OAAO;AACL,yBAAiB,KAAK,CAAC;AAAA,MACzB;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB,kBAAkB,KAAK;AACpD,UAAM,KAAK,kBAAkB,eAAe,IAAI;AAAA,EAClD;AAAA,EAEA,MAAc,kBACZ,OACA,eACe;AACf,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,QAAQ;AACd,UAAM,UAAU,CAAC,GAAG,gBAAgB,GAAI,gBAAiB,CAAC,WAAW,IAAc,CAAC,CAAE;AACtF,UAAM,UAAU,QAAQ,IAAI,CAAC,MAAM,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,IAAI;AAC5D,UAAM,SAAS,wCAAwC,OAAO;AAC9D,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,YAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AACtC,YAAM,UAAU,MAAM,IAAI,CAAC,MAAM,eAAe,GAAG,eAAe,KAAK,kBAAkB,CAAC;AAC1F,YAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,SAAS,oBAAI,IAAiC;AACpD,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,OAAO,IAAI,EAAE,IAAI;AAChC,UAAI,OAAQ,QAAO,KAAK,CAAC;AAAA,UACpB,QAAO,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;AAAA,IAC7B;AACA,UAAM,QAAQ;AACd,eAAW,CAAC,MAAM,KAAK,KAAK,QAAQ;AAClC,YAAM,SAAS,kGAAkG,IAAI;AACrH,eAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,cAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AAGtC,cAAM,UAAU,MAAM,IAAI,CAAC,OAAO;AAAA,UAChC,QAAQ,EAAE;AAAA,UACV,MAAM,EAAE;AAAA,UACR,MAAM,OAAO,EAAE,SAAS,WAAW,EAAE,OAAO;AAAA,QAC9C,EAAE;AACF,cAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,MAC5C;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,aAAa,QAAgB,OAAiC;AAClE,UAAM,KAAK,QAAQ;AACnB,QAAI,SAAS,MAAM,SAAS,GAAG;AAC7B,YAAM,KAAK;AAAA,QACT;AAAA,QACA,EAAE,QAAQ,MAAM;AAAA,MAClB;AACA;AAAA,IACF;AACA,UAAM,KAAK,KAAK,6DAA6D,EAAE,OAAO,CAAC;AAAA,EACzF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,QAIT;AACD,UAAM,KAAK,QAAQ;AACnB,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB;AAAA,QACA,EAAE,QAAQ,KAAK;AAAA,MACjB;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB,uBAAuB,IAAI;AAAA;AAAA;AAAA,QAG3B,EAAE,OAAO;AAAA,MACX;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,MAAM,MAAM,KAAK;AAAA,MACrB;AAAA;AAAA;AAAA;AAAA,MAIA,EAAE,OAAO;AAAA,IACX;AACA,UAAM,MAAM,IAAI,KAAK,CAAC;AACtB,UAAM,QAAQ,OAAO,KAAK,SAAS,CAAC;AACpC,UAAM,WAAW,OAAO,KAAK,YAAY,CAAC;AAC1C,UAAM,WAAW,UAAU,IAAI,IAAI,WAAW;AAC9C,WAAO,EAAE,OAAO,OAAO,mBAAmB,SAAS;AAAA,EACrD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,iBAA0B;AACxB,WAAO;AAAA,EACT;AACF;AASA,SAAS,eACP,MACA,eACA,oBACyB;AACzB,QAAM,MAAM;AACZ,QAAM,MAA+B,CAAC;AACtC,aAAW,OAAO,gBAAgB;AAChC,UAAM,QAAQ,IAAI,GAAG;AACrB,QAAI,GAAG,IAAI,UAAU,UAAa,UAAU,OAAO,WAAW,GAAG,IAAI;AAAA,EACvE;AACA,MAAI,KAAK,KAAK;AACd,MAAI,OAAO,KAAK;AAChB,MAAI,eAAe;AACjB,UAAM,MAAM,KAAK;AACjB,QAAI,YACF,MAAM,QAAQ,GAAG,KAAK,IAAI,WAAW,qBACjC,MACA,IAAI,MAAc,kBAAkB,EAAE,KAAK,CAAC;AAAA,EACpD;AACA,SAAO;AACT;AAOA,SAAS,WAAW,OAA6C;AAC/D,QAAM,OAAO,oBAAI,IAA6B;AAC9C,aAAW,KAAK,OAAO;AACrB,SAAK,IAAI,EAAE,IAAI,CAAC;AAAA,EAClB;AACA,SAAO,MAAM,KAAK,KAAK,OAAO,CAAC;AACjC;AAGA,SAAS,aAAa,KAAuC;AAC3D,MAAI,eAAe,KAAK;AACtB,UAAM,MAA+B,CAAC;AACtC,eAAW,CAAC,GAAG,CAAC,KAAK,KAAK;AACxB,UAAI,OAAO,CAAC,CAAC,IAAI,YAAY,CAAC;AAAA,IAChC;AACA,WAAO;AAAA,EACT;AACA,MAAI,OAAO,OAAO,QAAQ,UAAU;AAClC,UAAM,MAAM;AACZ,UAAM,MAA+B,CAAC;AACtC,eAAW,KAAK,OAAO,KAAK,GAAG,GAAG;AAChC,UAAI,CAAC,IAAI,YAAY,IAAI,CAAC,CAAC;AAAA,IAC7B;AACA,WAAO;AAAA,EACT;AACA,SAAO,EAAE,OAAO,YAAY,GAAG,EAAE;AACnC;AAMA,SAAS,YAAY,OAAyB;AAC5C,MAAI,OAAO,UAAU,UAAU;AAC7B,QAAI,SAAS,OAAO,OAAO,gBAAgB,KAAK,SAAS,OAAO,OAAO,gBAAgB,GAAG;AACxF,aAAO,OAAO,KAAK;AAAA,IACrB;AACA,WAAO,MAAM,SAAS;AAAA,EACxB;AACA,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,MAAM,IAAI,WAAW;AACtD,SAAO;AACT;AAGA,eAAe,WAAW,QAAqC;AAG7D,QAAM,SAAS,MAAM,QAAQ,MAAM,IAAI,OAAO,OAAO,SAAS,CAAC,IAAI;AACnE,MAAI,CAAC,OAAQ,QAAO,CAAC;AACrB,QAAM,SAAU,OAAiD;AACjE,MAAI,OAAO,WAAW,WAAY,QAAO,CAAC;AAC1C,SAAO,OAAO,KAAK,MAAM;AAC3B;","names":[]}
@@ -5,7 +5,7 @@ import {
5
5
  import {
6
6
  GraphDb,
7
7
  defaultDbPath
8
- } from "./chunk-F5QKPRNW.js";
8
+ } from "./chunk-2TORJYBO.js";
9
9
  import {
10
10
  DEFAULT_CONFIG,
11
11
  LLM_PRESETS,
@@ -433,16 +433,19 @@ async function selfTestKuzu(dbPath, embeddingDimension) {
433
433
  try {
434
434
  await db.connect();
435
435
  await db.migrate();
436
- const result = await db.query("RETURN 1 AS result");
436
+ const result = await db.query(
437
+ `RETURN array_cosine_similarity([1.0, 0.0], [1.0, 0.0]) AS similarity, true AS ok`
438
+ );
437
439
  await db.close();
438
- if (result.data[0]?.result === 1) {
439
- return {
440
- name: "kuzu round-trip",
441
- status: db.hasVectorIndex() ? "ok" : "warn",
442
- detail: db.hasVectorIndex() ? "ok (vector index ready)" : "ok (vector extension missing; semantic search disabled)"
443
- };
440
+ const row = result.data[0];
441
+ if (row?.ok !== true || typeof row.similarity !== "number") {
442
+ return { name: "kuzu round-trip", status: "fail", detail: "unexpected result" };
444
443
  }
445
- return { name: "kuzu round-trip", status: "fail", detail: "unexpected result" };
444
+ return {
445
+ name: "kuzu round-trip",
446
+ status: "ok",
447
+ detail: "ok (brute-force semantic search ready)"
448
+ };
446
449
  } catch (err) {
447
450
  await db.close().catch(() => {
448
451
  });
@@ -1327,19 +1330,25 @@ async function indexRepo(opts) {
1327
1330
  edges: allEdges,
1328
1331
  knownFilePaths
1329
1332
  });
1330
- await opts.graphDb.deleteByRepo(opts.repoId);
1331
- await opts.graphDb.upsertNodes(allNodes);
1332
- await opts.graphDb.upsertEdges(resolved);
1333
- opts.onProgress?.({ type: "upsert", nodes: allNodes.length, edges: resolved.length });
1334
- let embeddingCount = 0;
1333
+ let embeddedById = /* @__PURE__ */ new Map();
1335
1334
  if (!opts.skipEmbeddings && opts.router) {
1336
1335
  const embedded = await embedNodes(allNodes, {
1337
1336
  router: opts.router,
1338
1337
  onBatch: ({ embedded: embedded2, total }) => opts.onProgress?.({ type: "embed", embedded: embedded2, total })
1339
1338
  });
1340
- await persistEmbeddings(opts.graphDb, embedded);
1341
- embeddingCount = embedded.length;
1339
+ embeddedById = new Map(embedded.map((e) => [e.id, e]));
1342
1340
  }
1341
+ const nodePayload = allNodes.map((n) => {
1342
+ const e = embeddedById.get(n.id);
1343
+ const base = n;
1344
+ if (!e) return base;
1345
+ return { ...base, embedding: e.embedding, embeddingNamespace: e.embeddingNamespace };
1346
+ });
1347
+ await opts.graphDb.deleteByRepo(opts.repoId);
1348
+ await opts.graphDb.upsertNodes(nodePayload);
1349
+ await opts.graphDb.upsertEdges(resolved);
1350
+ opts.onProgress?.({ type: "upsert", nodes: allNodes.length, edges: resolved.length });
1351
+ const embeddingCount = embeddedById.size;
1343
1352
  return {
1344
1353
  durationMs: Date.now() - start,
1345
1354
  parsedFiles: parsedCount - failed,
@@ -1350,22 +1359,6 @@ async function indexRepo(opts) {
1350
1359
  droppedEdges: dropped
1351
1360
  };
1352
1361
  }
1353
- async function persistEmbeddings(graphDb, embedded) {
1354
- if (embedded.length === 0) return;
1355
- const BATCH = 100;
1356
- for (let i = 0; i < embedded.length; i += BATCH) {
1357
- const batch = embedded.slice(i, i + BATCH);
1358
- await graphDb.query(
1359
- `
1360
- UNWIND $batch AS e
1361
- MATCH (n:Symbol { id: e.id })
1362
- SET n.embedding = e.embedding,
1363
- n.embeddingNamespace = e.embeddingNamespace
1364
- `,
1365
- { batch }
1366
- );
1367
- }
1368
- }
1369
1362
  async function runWithConcurrency(items, concurrency, fn) {
1370
1363
  let cursor = 0;
1371
1364
  const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
@@ -2004,14 +1997,16 @@ var searchSemanticTool = {
2004
1997
  );
2005
1998
  }
2006
1999
  const namespace = `${deps.llm.embeddingNamespace.provider}:${deps.llm.embeddingNamespace.model}:${deps.llm.embeddingNamespace.dimension}`;
2000
+ const dim = deps.llm.embeddingNamespace.dimension;
2007
2001
  return cachedJsonResult("search_semantic", { description, k, namespace }, deps, async () => {
2008
2002
  const rows = await deps.graph.query(
2009
- `CALL QUERY_VECTOR_INDEX('Symbol', 'embedding_idx', $vec, $k)
2010
- WITH node, distance
2011
- WHERE node.embeddingNamespace = $ns
2012
- RETURN node.id AS id, node.name AS name, node.kind AS kind, node.path AS path,
2013
- node.lineStart AS line, node.signature AS signature, distance AS score
2014
- ORDER BY distance ASC`,
2003
+ `MATCH (s:Symbol)
2004
+ WHERE s.embeddingNamespace = $ns
2005
+ RETURN s.id AS id, s.name AS name, s.kind AS kind, s.path AS path,
2006
+ s.lineStart AS line, s.signature AS signature,
2007
+ array_cosine_similarity(s.embedding, CAST($vec AS FLOAT[${dim}])) AS score
2008
+ ORDER BY score DESC
2009
+ LIMIT $k`,
2015
2010
  { vec: embedding, k, ns: namespace }
2016
2011
  );
2017
2012
  return { description, k, namespace, count: rows.length, matches: rows };
@@ -2299,7 +2294,7 @@ async function startMcpServer(portOrOptions) {
2299
2294
  async function loadGraphClient(dbPath) {
2300
2295
  let mod;
2301
2296
  try {
2302
- mod = await import("./src-467W2KXC.js");
2297
+ mod = await import("./src-PDNTANJD.js");
2303
2298
  } catch (err) {
2304
2299
  throw new Error(
2305
2300
  `Failed to import @codegraph/graph-db. Run \`pnpm -r build\` first. Underlying error: ${err instanceof Error ? err.message : String(err)}`
@@ -2336,8 +2331,10 @@ async function loadLlmRouter(configPath2) {
2336
2331
  throw new Error("@codegraph/llm-router has no `createLlmRouter` export.");
2337
2332
  }
2338
2333
  const fileConfig = await readCodegraphConfig(configPath2);
2334
+ const llmConfig = fileConfig.llm ?? void 0;
2335
+ const effectiveConfig = llmConfig && llmConfig.generation && llmConfig.embeddings ? llmConfig : DEFAULT_CONFIG.llm;
2339
2336
  const router = await createLlmRouter2({
2340
- config: fileConfig.llm ?? void 0,
2337
+ config: effectiveConfig,
2341
2338
  configPath: configPath2
2342
2339
  });
2343
2340
  return adaptLlmRouter(router);
@@ -2397,7 +2394,7 @@ shutting down (${signal})...
2397
2394
  // src/program.ts
2398
2395
  function buildProgram() {
2399
2396
  const program = new Command();
2400
- program.name("codegraph").description("Live, queryable knowledge graph for your codebase").version("0.1.0").option("--verbose", "Print full stack traces on error").hook("preAction", (thisCommand) => {
2397
+ program.name("codegraph").description("Live, queryable knowledge graph for your codebase").version("0.1.2").option("--verbose", "Print full stack traces on error").hook("preAction", (thisCommand) => {
2401
2398
  const opts = thisCommand.optsWithGlobals();
2402
2399
  if (opts.verbose) process.env.CODEGRAPH_VERBOSE = "1";
2403
2400
  });
@@ -2443,4 +2440,4 @@ export {
2443
2440
  renderError,
2444
2441
  buildProgram
2445
2442
  };
2446
- //# sourceMappingURL=chunk-O4ZO6CP5.js.map
2443
+ //# sourceMappingURL=chunk-36AWRLQ6.js.map