@leanlabsinnov/codegraph 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # @cyrilc/codegraph
1
+ # @leanlabsinnov/codegraph
2
2
 
3
3
  Live, queryable knowledge graph for your codebase. Indexes JS/TS into an embedded graph
4
4
  DB with vector embeddings, then exposes a local MCP server that Claude Code, Cursor, and
@@ -12,7 +12,7 @@ external services.
12
12
  ## Install
13
13
 
14
14
  ```bash
15
- npm i -g @cyrilc/codegraph
15
+ npm i -g @leanlabsinnov/codegraph
16
16
  ```
17
17
 
18
18
  Requires Node 20+.
@@ -29,7 +29,7 @@ codegraph serve
29
29
  Then point Claude Code / Cursor / Windsurf at `http://127.0.0.1:3748/mcp` with the bearer
30
30
  token from `~/.codegraph/config.json`.
31
31
 
32
- See the full [README on GitHub](https://github.com/cyrilc/codegraph) for client setup, all
32
+ See the full [README on GitHub](https://github.com/leanlabsinnov/codegraph) for client setup, all
33
33
  10 MCP tools, and troubleshooting.
34
34
 
35
35
  ## Commands
package/dist/bin.js CHANGED
@@ -2,9 +2,9 @@
2
2
  import {
3
3
  buildProgram,
4
4
  renderError
5
- } from "./chunk-AMJXGXLM.js";
5
+ } from "./chunk-36AWRLQ6.js";
6
6
  import "./chunk-B2TIVKUB.js";
7
- import "./chunk-VFE242Y7.js";
7
+ import "./chunk-2TORJYBO.js";
8
8
  import "./chunk-XGPZDCQ4.js";
9
9
 
10
10
  // src/bin.ts
@@ -77,13 +77,7 @@ function buildSchemaStatements(opts) {
77
77
  }
78
78
  return statements;
79
79
  }
80
- function buildVectorIndexStatements() {
81
- return [
82
- "INSTALL VECTOR",
83
- "LOAD EXTENSION VECTOR",
84
- "CALL CREATE_VECTOR_INDEX('Symbol', 'embedding_idx', 'embedding', metric := 'cosine')"
85
- ];
86
- }
80
+ var SEMANTIC_SEARCH_MODE = "brute-force";
87
81
  var DEFAULT_EMBEDDING_DIMENSION = 1536;
88
82
 
89
83
  // ../graph-db/src/client.ts
@@ -95,7 +89,6 @@ var GraphDb = class {
95
89
  embeddingDimension;
96
90
  db = null;
97
91
  conn = null;
98
- vectorIndexReady = false;
99
92
  /**
100
93
  * Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a
101
94
  * prepared statement for any parameterized query - reusing the prepared object keeps
@@ -116,7 +109,6 @@ var GraphDb = class {
116
109
  this.preparedCache.clear();
117
110
  this.conn = null;
118
111
  this.db = null;
119
- this.vectorIndexReady = false;
120
112
  }
121
113
  requireConn() {
122
114
  if (!this.conn) {
@@ -125,8 +117,11 @@ var GraphDb = class {
125
117
  return this.conn;
126
118
  }
127
119
  /**
128
- * Idempotent migration. Runs all DDL through `IF NOT EXISTS` guards and tolerates the
129
- * vector-index "already exists" error from re-runs.
120
+ * Idempotent migration: creates the `Symbol` node table and one REL table per
121
+ * `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately
122
+ * skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via
123
+ * `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.
124
+ * See `schema.ts` for the full rationale.
130
125
  */
131
126
  async migrate() {
132
127
  await this.connect();
@@ -134,22 +129,6 @@ var GraphDb = class {
134
129
  for (const stmt of schemaStmts) {
135
130
  await this.exec(stmt);
136
131
  }
137
- for (const stmt of buildVectorIndexStatements()) {
138
- try {
139
- await this.exec(stmt);
140
- } catch (err) {
141
- const message = err instanceof Error ? err.message : String(err);
142
- if (isAlreadyExistsError(message)) continue;
143
- if (/extension/i.test(message) && /(not found|missing|unsupported|disabled)/i.test(message)) {
144
- console.warn(
145
- `[codegraph] vector extension unavailable; semantic search disabled. Underlying: ${message}`
146
- );
147
- return;
148
- }
149
- throw new Error(`migrate failed on \`${stmt}\`: ${message}`);
150
- }
151
- }
152
- this.vectorIndexReady = true;
153
132
  }
154
133
  /**
155
134
  * Typed Cypher escape hatch.
@@ -192,19 +171,39 @@ var GraphDb = class {
192
171
  return conn.execute(prepared, params);
193
172
  }
194
173
  /**
195
- * Upserts nodes via batched UNWIND + MERGE. Kuzu requires every column we SET to exist
196
- * in the schema, so each row is normalized to include every `SYMBOL_COLUMNS` field (NULL
197
- * for fields not present).
174
+ * Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including
175
+ * `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`
176
+ * because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped
177
+ * (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.
178
+ *
179
+ * In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE
180
+ * from violating the primary-key uniqueness constraint.
198
181
  */
199
182
  async upsertNodes(nodes) {
200
183
  if (nodes.length === 0) return;
201
184
  await this.connect();
185
+ const deduped = dedupeById(nodes);
186
+ const withEmbedding = [];
187
+ const withoutEmbedding = [];
188
+ for (const n of deduped) {
189
+ if (Array.isArray(n.embedding) && n.embedding.length > 0) {
190
+ withEmbedding.push(n);
191
+ } else {
192
+ withoutEmbedding.push(n);
193
+ }
194
+ }
195
+ await this.createSymbolBatch(withoutEmbedding, false);
196
+ await this.createSymbolBatch(withEmbedding, true);
197
+ }
198
+ async createSymbolBatch(nodes, withEmbedding) {
199
+ if (nodes.length === 0) return;
202
200
  const BATCH = 200;
203
- const setClause = SYMBOL_COLUMNS.filter((c) => c !== "id").map((c) => `n.${c} = r.${c}`).join(", ");
204
- const cypher = `UNWIND $batch AS r MERGE (n:Symbol {id: r.id}) SET ${setClause}`;
201
+ const columns = [...SYMBOL_COLUMNS, ...withEmbedding ? ["embedding"] : []];
202
+ const propMap = columns.map((c) => `${c}: r.${c}`).join(", ");
203
+ const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;
205
204
  for (let i = 0; i < nodes.length; i += BATCH) {
206
205
  const slice = nodes.slice(i, i + BATCH);
207
- const payload = slice.map(buildSymbolRow);
206
+ const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));
208
207
  await this.exec(cypher, { batch: payload });
209
208
  }
210
209
  }
@@ -291,12 +290,17 @@ var GraphDb = class {
291
290
  const coverage = total === 0 ? 0 : embedded / total;
292
291
  return { nodes, edges, embeddingCoverage: coverage };
293
292
  }
294
- /** True once `migrate()` confirmed the vector extension is loaded. */
293
+ /**
294
+ * v0.1.x never creates an HNSW index - semantic search is brute-force via
295
+ * `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers
296
+ * (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu
297
+ * fixes ship and we flip the index back on.
298
+ */
295
299
  hasVectorIndex() {
296
- return this.vectorIndexReady;
300
+ return false;
297
301
  }
298
302
  };
299
- function buildSymbolRow(node) {
303
+ function buildSymbolRow(node, withEmbedding, embeddingDimension) {
300
304
  const src = node;
301
305
  const row = {};
302
306
  for (const col of SYMBOL_COLUMNS) {
@@ -305,8 +309,19 @@ function buildSymbolRow(node) {
305
309
  }
306
310
  row.id = node.id;
307
311
  row.kind = node.kind;
312
+ if (withEmbedding) {
313
+ const vec = node.embedding;
314
+ row.embedding = Array.isArray(vec) && vec.length === embeddingDimension ? vec : new Array(embeddingDimension).fill(0);
315
+ }
308
316
  return row;
309
317
  }
318
+ function dedupeById(nodes) {
319
+ const seen = /* @__PURE__ */ new Map();
320
+ for (const n of nodes) {
321
+ seen.set(n.id, n);
322
+ }
323
+ return Array.from(seen.values());
324
+ }
310
325
  function normalizeRow(row) {
311
326
  if (row instanceof Map) {
312
327
  const out = {};
@@ -342,9 +357,6 @@ async function collectAll(result) {
342
357
  if (typeof getAll !== "function") return [];
343
358
  return getAll.call(target);
344
359
  }
345
- function isAlreadyExistsError(message) {
346
- return /already exists/i.test(message) || /already loaded/i.test(message) || /already installed/i.test(message) || /duplicate (table|index)/i.test(message);
347
- }
348
360
 
349
361
  export {
350
362
  SYMBOL_COLUMN_SPEC,
@@ -352,9 +364,9 @@ export {
352
364
  defaultFor,
353
365
  EDGE_COLUMNS,
354
366
  buildSchemaStatements,
355
- buildVectorIndexStatements,
367
+ SEMANTIC_SEARCH_MODE,
356
368
  DEFAULT_EMBEDDING_DIMENSION,
357
369
  defaultDbPath,
358
370
  GraphDb
359
371
  };
360
- //# sourceMappingURL=chunk-VFE242Y7.js.map
372
+ //# sourceMappingURL=chunk-2TORJYBO.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../graph-db/src/client.ts","../../graph-db/src/schema.ts"],"sourcesContent":["import { mkdir } from \"node:fs/promises\";\nimport { homedir } from \"node:os\";\nimport { dirname, resolve } from \"node:path\";\nimport { EDGE_KINDS, NODE_KINDS, type EdgeKind } from \"@codegraph/shared\";\nimport * as kuzu from \"kuzu\";\nimport {\n DEFAULT_EMBEDDING_DIMENSION,\n SYMBOL_COLUMNS,\n buildSchemaStatements,\n defaultFor,\n} from \"./schema.js\";\nimport type { QueryResult, UpsertEdgeInput, UpsertNodeInput } from \"./types.js\";\n\nexport interface GraphDbOptions {\n /** Directory where Kuzu stores its on-disk database files. Defaults to `~/.codegraph/graph`. */\n dbPath?: string;\n /**\n * Legacy `url` option kept for back-compat with Phase-1 callers. Ignored at runtime -\n * Kuzu is embedded - but accepted so existing call sites compile while the rest of the\n * codebase migrates to `dbPath`.\n */\n url?: string;\n /** Vector dimension for the `Symbol.embedding` column. Baked into the schema at create time. */\n embeddingDimension?: number;\n}\n\n/** Default on-disk location for the embedded graph. */\nexport function defaultDbPath(): string {\n return resolve(homedir(), \".codegraph\", \"graph\");\n}\n\n/**\n * Thin, typed wrapper around the embedded Kuzu database.\n *\n * Public surface (intentionally identical to the Phase-1 FalkorDB client so callers don't\n * change): connect / close / migrate / query / upsertNodes / upsertEdges / deleteByRepo /\n * stats. Internals are pure Kuzu.\n */\nexport class GraphDb {\n private readonly dbPath: string;\n private readonly embeddingDimension: number;\n private db: kuzu.Database | null = null;\n private conn: kuzu.Connection | null = null;\n /**\n * Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a\n * prepared statement for any parameterized query - reusing the prepared object keeps\n * UNWIND-batched upserts fast.\n */\n private preparedCache = new Map<string, kuzu.PreparedStatement>();\n\n constructor(opts: GraphDbOptions = {}) {\n this.dbPath = opts.dbPath ?? defaultDbPath();\n this.embeddingDimension = opts.embeddingDimension ?? DEFAULT_EMBEDDING_DIMENSION;\n }\n\n async connect(): Promise<void> {\n if (this.conn) return;\n await mkdir(dirname(this.dbPath), { recursive: true });\n this.db = new kuzu.Database(this.dbPath);\n this.conn = new kuzu.Connection(this.db);\n }\n\n async close(): Promise<void> {\n // We deliberately do NOT call `conn.close()` / `db.close()` on Kuzu's Node bindings.\n // In 0.11.x those native handles are also disposed by the binding's process-exit hook,\n // and double-disposing can SIGSEGV the worker on cleanup. Dropping references is\n // enough for the GC to release the underlying memory before the process exits.\n this.preparedCache.clear();\n this.conn = null;\n this.db = null;\n }\n\n private requireConn(): kuzu.Connection {\n if (!this.conn) {\n throw new Error(\"GraphDb not connected. Call connect() first.\");\n }\n return this.conn;\n }\n\n /**\n * Idempotent migration: creates the `Symbol` node table and one REL table per\n * `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately\n * skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via\n * `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.\n * See `schema.ts` for the full rationale.\n */\n async migrate(): Promise<void> {\n await this.connect();\n const schemaStmts = buildSchemaStatements({ embeddingDimension: this.embeddingDimension });\n for (const stmt of schemaStmts) {\n await this.exec(stmt);\n }\n }\n\n /**\n * Typed Cypher escape hatch.\n *\n * Kuzu returns BIGINT columns as native BigInt; we coerce to plain `number` when safe so\n * downstream JSON serialization (MCP responses, snapshot tests) does not need bespoke\n * handling.\n */\n async query<T = Record<string, unknown>>(\n cypher: string,\n params: Record<string, unknown> = {},\n ): Promise<QueryResult<T>> {\n const result = await this.runQuery(cypher, params);\n const raw = await collectAll(result);\n const data = raw.map((row) => normalizeRow(row)) as T[];\n const headers = raw.length > 0 ? Object.keys(raw[0] ?? {}) : [];\n return { data, headers, metadata: [] };\n }\n\n /** Fire-and-forget DDL/exec. */\n private async exec(cypher: string, params: Record<string, unknown> = {}): Promise<void> {\n await this.runQuery(cypher, params);\n }\n\n /**\n * Bridge to Kuzu's two execution paths:\n * - `conn.query(stmt)` for unparameterized statements (the second positional arg is a\n * `progressCallback`, NOT params - mistaking that is the #1 way to confuse the API).\n * - `conn.prepare(stmt) + conn.execute(prepared, params)` for anything with `$name`\n * placeholders. We cache the prepared statement so UNWIND batches reuse it.\n */\n private async runQuery(cypher: string, params: Record<string, unknown>): Promise<unknown> {\n const conn = this.requireConn();\n if (Object.keys(params).length === 0) {\n return conn.query(cypher);\n }\n let prepared = this.preparedCache.get(cypher);\n if (!prepared) {\n prepared = await conn.prepare(cypher);\n if (!prepared.isSuccess()) {\n throw new Error(prepared.getErrorMessage());\n }\n this.preparedCache.set(cypher, prepared);\n }\n // Cast through `unknown`: Kuzu's bindings advertise a strict `KuzuValue` union, but\n // we can pass through any JSON-serializable value the embedded engine accepts (nested\n // structs and lists are converted at the native layer).\n return conn.execute(prepared, params as unknown as Parameters<kuzu.Connection[\"execute\"]>[1]);\n }\n\n /**\n * Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including\n * `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`\n * because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped\n * (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.\n *\n * In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE\n * from violating the primary-key uniqueness constraint.\n */\n async upsertNodes(nodes: UpsertNodeInput[]): Promise<void> {\n if (nodes.length === 0) return;\n await this.connect();\n const deduped = dedupeById(nodes);\n const withEmbedding: UpsertNodeInput[] = [];\n const withoutEmbedding: UpsertNodeInput[] = [];\n for (const n of deduped) {\n if (Array.isArray(n.embedding) && n.embedding.length > 0) {\n withEmbedding.push(n);\n } else {\n withoutEmbedding.push(n);\n }\n }\n await this.createSymbolBatch(withoutEmbedding, false);\n await this.createSymbolBatch(withEmbedding, true);\n }\n\n private async createSymbolBatch(\n nodes: UpsertNodeInput[],\n withEmbedding: boolean,\n ): Promise<void> {\n if (nodes.length === 0) return;\n const BATCH = 200;\n const columns = [...SYMBOL_COLUMNS, ...(withEmbedding ? ([\"embedding\"] as const) : [])];\n const propMap = columns.map((c) => `${c}: r.${c}`).join(\", \");\n const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;\n for (let i = 0; i < nodes.length; i += BATCH) {\n const slice = nodes.slice(i, i + BATCH);\n const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));\n await this.exec(cypher, { batch: payload });\n }\n }\n\n /**\n * Upserts edges. Both endpoints must already exist as `Symbol` nodes; rows where the\n * MATCH fails are silently dropped, matching Cypher semantics.\n *\n * Uses CREATE because the orchestrator wipes the repo's slice before writing, so\n * duplicates can't pre-exist within a single index pass.\n */\n async upsertEdges(edges: UpsertEdgeInput[]): Promise<void> {\n if (edges.length === 0) return;\n await this.connect();\n const byKind = new Map<EdgeKind, UpsertEdgeInput[]>();\n for (const e of edges) {\n const bucket = byKind.get(e.kind);\n if (bucket) bucket.push(e);\n else byKind.set(e.kind, [e]);\n }\n const BATCH = 500;\n for (const [kind, batch] of byKind) {\n const cypher = `UNWIND $batch AS r MATCH (a:Symbol {id: r.fromId}) MATCH (b:Symbol {id: r.toId}) CREATE (a)-[e:${kind} {line: r.line}]->(b)`;\n for (let i = 0; i < batch.length; i += BATCH) {\n const slice = batch.slice(i, i + BATCH);\n // Use 0 (not null) for missing line numbers so Kuzu can infer the struct field\n // as INT64 even when an entire batch happens to have no `line` set.\n const payload = slice.map((e) => ({\n fromId: e.fromId,\n toId: e.toId,\n line: typeof e.line === \"number\" ? e.line : 0,\n }));\n await this.exec(cypher, { batch: payload });\n }\n }\n }\n\n /**\n * Deletes all nodes (and incident edges via DETACH DELETE) for a repo. If `paths` is\n * provided, restricts the delete to nodes whose `path` is in the list - used by\n * incremental re-indexing.\n */\n async deleteByRepo(repoId: string, paths?: string[]): Promise<void> {\n await this.connect();\n if (paths && paths.length > 0) {\n await this.exec(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.path IN $paths DETACH DELETE n\",\n { repoId, paths },\n );\n return;\n }\n await this.exec(\"MATCH (n:Symbol) WHERE n.repoId = $repoId DETACH DELETE n\", { repoId });\n }\n\n /**\n * Returns counts of nodes (per kind) and edges (per kind) for a repo, plus the share of\n * non-File nodes that carry an embedding.\n */\n async stats(repoId: string): Promise<{\n nodes: Record<string, number>;\n edges: Record<string, number>;\n embeddingCoverage: number;\n }> {\n await this.connect();\n const nodes: Record<string, number> = {};\n for (const kind of NODE_KINDS) {\n const r = await this.query<{ count: number }>(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.kind = $kind RETURN count(n) AS count\",\n { repoId, kind },\n );\n nodes[kind] = Number(r.data[0]?.count ?? 0);\n }\n const edges: Record<string, number> = {};\n for (const kind of EDGE_KINDS) {\n const r = await this.query<{ count: number }>(\n `MATCH (a:Symbol)-[r:${kind}]->(b:Symbol)\n WHERE a.repoId = $repoId AND b.repoId = $repoId\n RETURN count(r) AS count`,\n { repoId },\n );\n edges[kind] = Number(r.data[0]?.count ?? 0);\n }\n const cov = await this.query<{ total: number | bigint; embedded: number | bigint }>(\n `MATCH (n:Symbol)\n WHERE n.repoId = $repoId AND n.kind <> 'File'\n RETURN count(n) AS total,\n count(n.embedding) AS embedded`,\n { repoId },\n );\n const row = cov.data[0];\n const total = Number(row?.total ?? 0);\n const embedded = Number(row?.embedded ?? 0);\n const coverage = total === 0 ? 0 : embedded / total;\n return { nodes, edges, embeddingCoverage: coverage };\n }\n\n /**\n * v0.1.x never creates an HNSW index - semantic search is brute-force via\n * `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers\n * (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu\n * fixes ship and we flip the index back on.\n */\n hasVectorIndex(): boolean {\n return false;\n }\n}\n\n/**\n * Build a fully-populated row for a Kuzu UNWIND batch. Every column in `SYMBOL_COLUMNS`\n * is present (typed default when missing) so Kuzu can infer a homogeneous struct schema\n * for the batch parameter. When `withEmbedding` is true the `embedding` field is also\n * populated - we either use the provided vector or a zero-vector of the configured\n * dimension so the struct schema stays uniform across the batch.\n */\nfunction buildSymbolRow(\n node: UpsertNodeInput,\n withEmbedding: boolean,\n embeddingDimension: number,\n): Record<string, unknown> {\n const src = node as unknown as Record<string, unknown>;\n const row: Record<string, unknown> = {};\n for (const col of SYMBOL_COLUMNS) {\n const value = src[col];\n row[col] = value === undefined || value === null ? defaultFor(col) : value;\n }\n row.id = node.id;\n row.kind = node.kind;\n if (withEmbedding) {\n const vec = node.embedding;\n row.embedding =\n Array.isArray(vec) && vec.length === embeddingDimension\n ? vec\n : new Array<number>(embeddingDimension).fill(0);\n }\n return row;\n}\n\n/**\n * Coalesce same-id rows down to the last occurrence. Required because we now use bare\n * CREATE (not MERGE) for inserts and Kuzu rejects primary-key collisions inside a single\n * UNWIND batch.\n */\nfunction dedupeById(nodes: UpsertNodeInput[]): UpsertNodeInput[] {\n const seen = new Map<string, UpsertNodeInput>();\n for (const n of nodes) {\n seen.set(n.id, n);\n }\n return Array.from(seen.values());\n}\n\n/** Convert Kuzu's row representation (Map or plain object) into a plain JSON object. */\nfunction normalizeRow(row: unknown): Record<string, unknown> {\n if (row instanceof Map) {\n const out: Record<string, unknown> = {};\n for (const [k, v] of row) {\n out[String(k)] = coerceValue(v);\n }\n return out;\n }\n if (row && typeof row === \"object\") {\n const src = row as Record<string, unknown>;\n const out: Record<string, unknown> = {};\n for (const k of Object.keys(src)) {\n out[k] = coerceValue(src[k]);\n }\n return out;\n }\n return { value: coerceValue(row) };\n}\n\n/**\n * Kuzu returns BIGINT columns as JS BigInt. Coerce to `number` when within Number.MAX_SAFE\n * for JSON-friendly downstream consumption.\n */\nfunction coerceValue(value: unknown): unknown {\n if (typeof value === \"bigint\") {\n if (value <= BigInt(Number.MAX_SAFE_INTEGER) && value >= BigInt(Number.MIN_SAFE_INTEGER)) {\n return Number(value);\n }\n return value.toString();\n }\n if (Array.isArray(value)) return value.map(coerceValue);\n return value;\n}\n\n/** Drain a Kuzu QueryResult (or array of them) into an array of row objects. */\nasync function collectAll(result: unknown): Promise<unknown[]> {\n // Multi-statement queries return an array; we keep only the last one (matches how the\n // final statement is the one that carries a `RETURN`).\n const target = Array.isArray(result) ? result[result.length - 1] : result;\n if (!target) return [];\n const getAll = (target as { getAll?: () => Promise<unknown[]> }).getAll;\n if (typeof getAll !== \"function\") return [];\n return getAll.call(target);\n}\n\n","import { EDGE_KINDS } from \"@codegraph/shared\";\n\n/**\n * Kuzu is schema-first. Unlike FalkorDB which is schema-less, every column we ever want to\n * SET on a node must exist up-front. We use ONE `Symbol` node table with a `kind` column\n * (Kuzu does not support multi-labels), and one REL table per `EdgeKind`.\n *\n * Columns are the union of every field across the `GraphNode` discriminated union in\n * `@codegraph/shared` plus the two embedding-namespace fields. Fields that are not\n * relevant to a given kind stay NULL.\n */\n\n/**\n * Per-column metadata so the upserter can build batches with explicit typed defaults.\n *\n * Kuzu's struct parameter type inference fails when a column is null on every row in a\n * batch (it defaults to STRING and rejects assignment to a BOOL/INT64 column). Concrete\n * defaults keep inference deterministic and let us skip clunky CAST() clauses.\n *\n * Convention: optional booleans default to `false`, optional ints to `0`, optional strings\n * to `\"\"`. We never check `WHERE n.foo IS NULL` in queries, so the lost null-distinction\n * is acceptable for v0.1.0.\n */\nexport const SYMBOL_COLUMN_SPEC = {\n id: \"STRING\",\n kind: \"STRING\",\n repoId: \"STRING\",\n name: \"STRING\",\n path: \"STRING\",\n lineStart: \"INT64\",\n lineEnd: \"INT64\",\n signature: \"STRING\",\n leadingComment: \"STRING\",\n isExported: \"BOOLEAN\",\n // File-specific\n language: \"STRING\",\n sizeBytes: \"INT64\",\n contentHash: \"STRING\",\n // Function-specific\n isAsync: \"BOOLEAN\",\n isArrow: \"BOOLEAN\",\n // Route-specific\n method: \"STRING\",\n routePath: \"STRING\",\n framework: \"STRING\",\n // Embedding namespace tag\n embeddingNamespace: \"STRING\",\n} as const;\n\nexport type SymbolColumn = keyof typeof SYMBOL_COLUMN_SPEC;\n\nexport const SYMBOL_COLUMNS = Object.keys(SYMBOL_COLUMN_SPEC) as SymbolColumn[];\n\n/** Return the typed default for an unset optional column. */\nexport function defaultFor(column: SymbolColumn): unknown {\n const t = SYMBOL_COLUMN_SPEC[column];\n if (t === \"BOOLEAN\") return false;\n if (t === \"INT64\") return 0;\n return \"\";\n}\n\n/** Optional per-edge metadata. Currently only `line`. */\nexport const EDGE_COLUMNS = [\"line\"] as const;\n\nexport type EdgeColumn = (typeof EDGE_COLUMNS)[number];\n\n/**\n * DDL statements that bring an empty Kuzu database to the codegraph schema.\n * `IF NOT EXISTS` makes `migrate()` idempotent so it can run on every connect.\n *\n * `embedding` is a fixed-dimension column - dimension is configured at migrate time and\n * baked into the schema. If a user later switches to an embedding provider with a\n * different dimension they must delete the on-disk graph directory to recreate it. The\n * embedding-namespace tag ensures we never silently mix dimensions.\n */\nexport function buildSchemaStatements(opts: { embeddingDimension: number }): string[] {\n const columnDefs = [\n \"id STRING\",\n \"kind STRING\",\n \"repoId STRING\",\n \"name STRING\",\n \"path STRING\",\n \"lineStart INT64\",\n \"lineEnd INT64\",\n \"signature STRING\",\n \"leadingComment STRING\",\n \"isExported BOOLEAN\",\n \"language STRING\",\n \"sizeBytes INT64\",\n \"contentHash STRING\",\n \"isAsync BOOLEAN\",\n \"isArrow BOOLEAN\",\n \"method STRING\",\n \"routePath STRING\",\n \"framework STRING\",\n \"embeddingNamespace STRING\",\n `embedding FLOAT[${opts.embeddingDimension}]`,\n \"PRIMARY KEY (id)\",\n ];\n const statements: string[] = [\n `CREATE NODE TABLE IF NOT EXISTS Symbol(${columnDefs.join(\", \")})`,\n ];\n for (const kind of EDGE_KINDS) {\n statements.push(\n `CREATE REL TABLE IF NOT EXISTS ${kind}(FROM Symbol TO Symbol, line INT64)`,\n );\n }\n return statements;\n}\n\n/**\n * Semantic search in v0.1.x is intentionally brute-force via Kuzu's built-in\n * `array_cosine_similarity` function - we do NOT create an HNSW vector index.\n *\n * Why: Kuzu 0.11.x has two open issues that make the HNSW path unusable for a\n * mutable graph workload:\n * - kuzudb/kuzu#5965: SET on a vector-indexed column is rejected with\n * \"Cannot set property vec in table embeddings because it is used in one or more\n * indexes\". The Kuzu team's own recommended workaround in that thread is\n * \"delay creation of the index itself\".\n * - kuzudb/kuzu#6040: DROP_VECTOR_INDEX leaves stale on-disk metadata, so once a\n * column has ever been indexed it becomes permanently un-writable - even fresh\n * CREATEs fail with \"Catalog exception: _N_<index>_UPPER does not exist\".\n *\n * `array_cosine_similarity` is a core Kuzu function (not part of the vector extension)\n * and runs in microseconds for the corpus sizes Phase 1 targets. We will switch back\n * to `CALL CREATE_VECTOR_INDEX` / `QUERY_VECTOR_INDEX` once the upstream fixes ship.\n */\nexport const SEMANTIC_SEARCH_MODE = \"brute-force\" as const;\n\n/** Default embedding dimension when none is supplied. Matches `text-embedding-3-small`. */\nexport const DEFAULT_EMBEDDING_DIMENSION = 1536;\n"],"mappings":";;;;;;AAAA,SAAS,aAAa;AACtB,SAAS,eAAe;AACxB,SAAS,SAAS,eAAe;AAEjC,YAAY,UAAU;;;ACmBf,IAAM,qBAAqB;AAAA,EAChC,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,MAAM;AAAA,EACN,WAAW;AAAA,EACX,SAAS;AAAA,EACT,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,YAAY;AAAA;AAAA,EAEZ,UAAU;AAAA,EACV,WAAW;AAAA,EACX,aAAa;AAAA;AAAA,EAEb,SAAS;AAAA,EACT,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,WAAW;AAAA;AAAA,EAEX,oBAAoB;AACtB;AAIO,IAAM,iBAAiB,OAAO,KAAK,kBAAkB;AAGrD,SAAS,WAAW,QAA+B;AACxD,QAAM,IAAI,mBAAmB,MAAM;AACnC,MAAI,MAAM,UAAW,QAAO;AAC5B,MAAI,MAAM,QAAS,QAAO;AAC1B,SAAO;AACT;AAGO,IAAM,eAAe,CAAC,MAAM;AAa5B,SAAS,sBAAsB,MAAgD;AACpF,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,mBAAmB,KAAK,kBAAkB;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,aAAuB;AAAA,IAC3B,0CAA0C,WAAW,KAAK,IAAI,CAAC;AAAA,EACjE;AACA,aAAW,QAAQ,YAAY;AAC7B,eAAW;AAAA,MACT,kCAAkC,IAAI;AAAA,IACxC;AAAA,EACF;AACA,SAAO;AACT;AAoBO,IAAM,uBAAuB;AAG7B,IAAM,8BAA8B;;;ADxGpC,SAAS,gBAAwB;AACtC,SAAO,QAAQ,QAAQ,GAAG,cAAc,OAAO;AACjD;AASO,IAAM,UAAN,MAAc;AAAA,EACF;AAAA,EACA;AAAA,EACT,KAA2B;AAAA,EAC3B,OAA+B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAM/B,gBAAgB,oBAAI,IAAoC;AAAA,EAEhE,YAAY,OAAuB,CAAC,GAAG;AACrC,SAAK,SAAS,KAAK,UAAU,cAAc;AAC3C,SAAK,qBAAqB,KAAK,sBAAsB;AAAA,EACvD;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,KAAM;AACf,UAAM,MAAM,QAAQ,KAAK,MAAM,GAAG,EAAE,WAAW,KAAK,CAAC;AACrD,SAAK,KAAK,IAAS,cAAS,KAAK,MAAM;AACvC,SAAK,OAAO,IAAS,gBAAW,KAAK,EAAE;AAAA,EACzC;AAAA,EAEA,MAAM,QAAuB;AAK3B,SAAK,cAAc,MAAM;AACzB,SAAK,OAAO;AACZ,SAAK,KAAK;AAAA,EACZ;AAAA,EAEQ,cAA+B;AACrC,QAAI,CAAC,KAAK,MAAM;AACd,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,UAAyB;AAC7B,UAAM,KAAK,QAAQ;AACnB,UAAM,cAAc,sBAAsB,EAAE,oBAAoB,KAAK,mBAAmB,CAAC;AACzF,eAAW,QAAQ,aAAa;AAC9B,YAAM,KAAK,KAAK,IAAI;AAAA,IACtB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MACJ,QACA,SAAkC,CAAC,GACV;AACzB,UAAM,SAAS,MAAM,KAAK,SAAS,QAAQ,MAAM;AACjD,UAAM,MAAM,MAAM,WAAW,MAAM;AACnC,UAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,aAAa,GAAG,CAAC;AAC/C,UAAM,UAAU,IAAI,SAAS,IAAI,OAAO,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;AAC9D,WAAO,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE;AAAA,EACvC;AAAA;AAAA,EAGA,MAAc,KAAK,QAAgB,SAAkC,CAAC,GAAkB;AACtF,UAAM,KAAK,SAAS,QAAQ,MAAM;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,SAAS,QAAgB,QAAmD;AACxF,UAAM,OAAO,KAAK,YAAY;AAC9B,QAAI,OAAO,KAAK,MAAM,EAAE,WAAW,GAAG;AACpC,aAAO,KAAK,MAAM,MAAM;AAAA,IAC1B;AACA,QAAI,WAAW,KAAK,cAAc,IAAI,MAAM;AAC5C,QAAI,CAAC,UAAU;AACb,iBAAW,MAAM,KAAK,QAAQ,MAAM;AACpC,UAAI,CAAC,SAAS,UAAU,GAAG;AACzB,cAAM,IAAI,MAAM,SAAS,gBAAgB,CAAC;AAAA,MAC5C;AACA,WAAK,cAAc,IAAI,QAAQ,QAAQ;AAAA,IACzC;AAIA,WAAO,KAAK,QAAQ,UAAU,MAA8D;AAAA,EAC9F;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,UAAU,WAAW,KAAK;AAChC,UAAM,gBAAmC,CAAC;AAC1C,UAAM,mBAAsC,CAAC;AAC7C,eAAW,KAAK,SAAS;AACvB,UAAI,MAAM,QAAQ,EAAE,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACxD,sBAAc,KAAK,CAAC;AAAA,MACtB,OAAO;AACL,yBAAiB,KAAK,CAAC;AAAA,MACzB;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB,kBAAkB,KAAK;AACpD,UAAM,KAAK,kBAAkB,eAAe,IAAI;AAAA,EAClD;AAAA,EAEA,MAAc,kBACZ,OACA,eACe;AACf,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,QAAQ;AACd,UAAM,UAAU,CAAC,GAAG,gBAAgB,GAAI,gBAAiB,CAAC,WAAW,IAAc,CAAC,CAAE;AACtF,UAAM,UAAU,QAAQ,IAAI,CAAC,MAAM,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,IAAI;AAC5D,UAAM,SAAS,wCAAwC,OAAO;AAC9D,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,YAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AACtC,YAAM,UAAU,MAAM,IAAI,CAAC,MAAM,eAAe,GAAG,eAAe,KAAK,kBAAkB,CAAC;AAC1F,YAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,SAAS,oBAAI,IAAiC;AACpD,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,OAAO,IAAI,EAAE,IAAI;AAChC,UAAI,OAAQ,QAAO,KAAK,CAAC;AAAA,UACpB,QAAO,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;AAAA,IAC7B;AACA,UAAM,QAAQ;AACd,eAAW,CAAC,MAAM,KAAK,KAAK,QAAQ;AAClC,YAAM,SAAS,kGAAkG,IAAI;AACrH,eAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,cAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AAGtC,cAAM,UAAU,MAAM,IAAI,CAAC,OAAO;AAAA,UAChC,QAAQ,EAAE;AAAA,UACV,MAAM,EAAE;AAAA,UACR,MAAM,OAAO,EAAE,SAAS,WAAW,EAAE,OAAO;AAAA,QAC9C,EAAE;AACF,cAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,MAC5C;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,aAAa,QAAgB,OAAiC;AAClE,UAAM,KAAK,QAAQ;AACnB,QAAI,SAAS,MAAM,SAAS,GAAG;AAC7B,YAAM,KAAK;AAAA,QACT;AAAA,QACA,EAAE,QAAQ,MAAM;AAAA,MAClB;AACA;AAAA,IACF;AACA,UAAM,KAAK,KAAK,6DAA6D,EAAE,OAAO,CAAC;AAAA,EACzF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,QAIT;AACD,UAAM,KAAK,QAAQ;AACnB,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB;AAAA,QACA,EAAE,QAAQ,KAAK;AAAA,MACjB;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB,uBAAuB,IAAI;AAAA;AAAA;AAAA,QAG3B,EAAE,OAAO;AAAA,MACX;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,MAAM,MAAM,KAAK;AAAA,MACrB;AAAA;AAAA;AAAA;AAAA,MAIA,EAAE,OAAO;AAAA,IACX;AACA,UAAM,MAAM,IAAI,KAAK,CAAC;AACtB,UAAM,QAAQ,OAAO,KAAK,SAAS,CAAC;AACpC,UAAM,WAAW,OAAO,KAAK,YAAY,CAAC;AAC1C,UAAM,WAAW,UAAU,IAAI,IAAI,WAAW;AAC9C,WAAO,EAAE,OAAO,OAAO,mBAAmB,SAAS;AAAA,EACrD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,iBAA0B;AACxB,WAAO;AAAA,EACT;AACF;AASA,SAAS,eACP,MACA,eACA,oBACyB;AACzB,QAAM,MAAM;AACZ,QAAM,MAA+B,CAAC;AACtC,aAAW,OAAO,gBAAgB;AAChC,UAAM,QAAQ,IAAI,GAAG;AACrB,QAAI,GAAG,IAAI,UAAU,UAAa,UAAU,OAAO,WAAW,GAAG,IAAI;AAAA,EACvE;AACA,MAAI,KAAK,KAAK;AACd,MAAI,OAAO,KAAK;AAChB,MAAI,eAAe;AACjB,UAAM,MAAM,KAAK;AACjB,QAAI,YACF,MAAM,QAAQ,GAAG,KAAK,IAAI,WAAW,qBACjC,MACA,IAAI,MAAc,kBAAkB,EAAE,KAAK,CAAC;AAAA,EACpD;AACA,SAAO;AACT;AAOA,SAAS,WAAW,OAA6C;AAC/D,QAAM,OAAO,oBAAI,IAA6B;AAC9C,aAAW,KAAK,OAAO;AACrB,SAAK,IAAI,EAAE,IAAI,CAAC;AAAA,EAClB;AACA,SAAO,MAAM,KAAK,KAAK,OAAO,CAAC;AACjC;AAGA,SAAS,aAAa,KAAuC;AAC3D,MAAI,eAAe,KAAK;AACtB,UAAM,MAA+B,CAAC;AACtC,eAAW,CAAC,GAAG,CAAC,KAAK,KAAK;AACxB,UAAI,OAAO,CAAC,CAAC,IAAI,YAAY,CAAC;AAAA,IAChC;AACA,WAAO;AAAA,EACT;AACA,MAAI,OAAO,OAAO,QAAQ,UAAU;AAClC,UAAM,MAAM;AACZ,UAAM,MAA+B,CAAC;AACtC,eAAW,KAAK,OAAO,KAAK,GAAG,GAAG;AAChC,UAAI,CAAC,IAAI,YAAY,IAAI,CAAC,CAAC;AAAA,IAC7B;AACA,WAAO;AAAA,EACT;AACA,SAAO,EAAE,OAAO,YAAY,GAAG,EAAE;AACnC;AAMA,SAAS,YAAY,OAAyB;AAC5C,MAAI,OAAO,UAAU,UAAU;AAC7B,QAAI,SAAS,OAAO,OAAO,gBAAgB,KAAK,SAAS,OAAO,OAAO,gBAAgB,GAAG;AACxF,aAAO,OAAO,KAAK;AAAA,IACrB;AACA,WAAO,MAAM,SAAS;AAAA,EACxB;AACA,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,MAAM,IAAI,WAAW;AACtD,SAAO;AACT;AAGA,eAAe,WAAW,QAAqC;AAG7D,QAAM,SAAS,MAAM,QAAQ,MAAM,IAAI,OAAO,OAAO,SAAS,CAAC,IAAI;AACnE,MAAI,CAAC,OAAQ,QAAO,CAAC;AACrB,QAAM,SAAU,OAAiD;AACjE,MAAI,OAAO,WAAW,WAAY,QAAO,CAAC;AAC1C,SAAO,OAAO,KAAK,MAAM;AAC3B;","names":[]}
@@ -5,7 +5,7 @@ import {
5
5
  import {
6
6
  GraphDb,
7
7
  defaultDbPath
8
- } from "./chunk-VFE242Y7.js";
8
+ } from "./chunk-2TORJYBO.js";
9
9
  import {
10
10
  DEFAULT_CONFIG,
11
11
  LLM_PRESETS,
@@ -433,17 +433,19 @@ async function selfTestKuzu(dbPath, embeddingDimension) {
433
433
  try {
434
434
  await db.connect();
435
435
  await db.migrate();
436
- const result = await db.query("RETURN 1 AS result");
437
- const vectorReady = db.hasVectorIndex();
436
+ const result = await db.query(
437
+ `RETURN array_cosine_similarity([1.0, 0.0], [1.0, 0.0]) AS similarity, true AS ok`
438
+ );
438
439
  await db.close();
439
- if (result.data[0]?.result === 1) {
440
- return {
441
- name: "kuzu round-trip",
442
- status: vectorReady ? "ok" : "warn",
443
- detail: vectorReady ? "ok (vector index ready)" : "ok (vector extension missing; semantic search disabled)"
444
- };
440
+ const row = result.data[0];
441
+ if (row?.ok !== true || typeof row.similarity !== "number") {
442
+ return { name: "kuzu round-trip", status: "fail", detail: "unexpected result" };
445
443
  }
446
- return { name: "kuzu round-trip", status: "fail", detail: "unexpected result" };
444
+ return {
445
+ name: "kuzu round-trip",
446
+ status: "ok",
447
+ detail: "ok (brute-force semantic search ready)"
448
+ };
447
449
  } catch (err) {
448
450
  await db.close().catch(() => {
449
451
  });
@@ -1328,19 +1330,25 @@ async function indexRepo(opts) {
1328
1330
  edges: allEdges,
1329
1331
  knownFilePaths
1330
1332
  });
1331
- await opts.graphDb.deleteByRepo(opts.repoId);
1332
- await opts.graphDb.upsertNodes(allNodes);
1333
- await opts.graphDb.upsertEdges(resolved);
1334
- opts.onProgress?.({ type: "upsert", nodes: allNodes.length, edges: resolved.length });
1335
- let embeddingCount = 0;
1333
+ let embeddedById = /* @__PURE__ */ new Map();
1336
1334
  if (!opts.skipEmbeddings && opts.router) {
1337
1335
  const embedded = await embedNodes(allNodes, {
1338
1336
  router: opts.router,
1339
1337
  onBatch: ({ embedded: embedded2, total }) => opts.onProgress?.({ type: "embed", embedded: embedded2, total })
1340
1338
  });
1341
- await persistEmbeddings(opts.graphDb, embedded);
1342
- embeddingCount = embedded.length;
1339
+ embeddedById = new Map(embedded.map((e) => [e.id, e]));
1343
1340
  }
1341
+ const nodePayload = allNodes.map((n) => {
1342
+ const e = embeddedById.get(n.id);
1343
+ const base = n;
1344
+ if (!e) return base;
1345
+ return { ...base, embedding: e.embedding, embeddingNamespace: e.embeddingNamespace };
1346
+ });
1347
+ await opts.graphDb.deleteByRepo(opts.repoId);
1348
+ await opts.graphDb.upsertNodes(nodePayload);
1349
+ await opts.graphDb.upsertEdges(resolved);
1350
+ opts.onProgress?.({ type: "upsert", nodes: allNodes.length, edges: resolved.length });
1351
+ const embeddingCount = embeddedById.size;
1344
1352
  return {
1345
1353
  durationMs: Date.now() - start,
1346
1354
  parsedFiles: parsedCount - failed,
@@ -1351,22 +1359,6 @@ async function indexRepo(opts) {
1351
1359
  droppedEdges: dropped
1352
1360
  };
1353
1361
  }
1354
- async function persistEmbeddings(graphDb, embedded) {
1355
- if (embedded.length === 0) return;
1356
- const BATCH = 100;
1357
- for (let i = 0; i < embedded.length; i += BATCH) {
1358
- const batch = embedded.slice(i, i + BATCH);
1359
- await graphDb.query(
1360
- `
1361
- UNWIND $batch AS e
1362
- MATCH (n:Symbol { id: e.id })
1363
- SET n.embedding = e.embedding,
1364
- n.embeddingNamespace = e.embeddingNamespace
1365
- `,
1366
- { batch }
1367
- );
1368
- }
1369
- }
1370
1362
  async function runWithConcurrency(items, concurrency, fn) {
1371
1363
  let cursor = 0;
1372
1364
  const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
@@ -2005,14 +1997,16 @@ var searchSemanticTool = {
2005
1997
  );
2006
1998
  }
2007
1999
  const namespace = `${deps.llm.embeddingNamespace.provider}:${deps.llm.embeddingNamespace.model}:${deps.llm.embeddingNamespace.dimension}`;
2000
+ const dim = deps.llm.embeddingNamespace.dimension;
2008
2001
  return cachedJsonResult("search_semantic", { description, k, namespace }, deps, async () => {
2009
2002
  const rows = await deps.graph.query(
2010
- `CALL QUERY_VECTOR_INDEX('Symbol', 'embedding_idx', $vec, $k)
2011
- WITH node, distance
2012
- WHERE node.embeddingNamespace = $ns
2013
- RETURN node.id AS id, node.name AS name, node.kind AS kind, node.path AS path,
2014
- node.lineStart AS line, node.signature AS signature, distance AS score
2015
- ORDER BY distance ASC`,
2003
+ `MATCH (s:Symbol)
2004
+ WHERE s.embeddingNamespace = $ns
2005
+ RETURN s.id AS id, s.name AS name, s.kind AS kind, s.path AS path,
2006
+ s.lineStart AS line, s.signature AS signature,
2007
+ array_cosine_similarity(s.embedding, CAST($vec AS FLOAT[${dim}])) AS score
2008
+ ORDER BY score DESC
2009
+ LIMIT $k`,
2016
2010
  { vec: embedding, k, ns: namespace }
2017
2011
  );
2018
2012
  return { description, k, namespace, count: rows.length, matches: rows };
@@ -2300,7 +2294,7 @@ async function startMcpServer(portOrOptions) {
2300
2294
  async function loadGraphClient(dbPath) {
2301
2295
  let mod;
2302
2296
  try {
2303
- mod = await import("./src-7P6XREHJ.js");
2297
+ mod = await import("./src-PDNTANJD.js");
2304
2298
  } catch (err) {
2305
2299
  throw new Error(
2306
2300
  `Failed to import @codegraph/graph-db. Run \`pnpm -r build\` first. Underlying error: ${err instanceof Error ? err.message : String(err)}`
@@ -2400,7 +2394,7 @@ shutting down (${signal})...
2400
2394
  // src/program.ts
2401
2395
  function buildProgram() {
2402
2396
  const program = new Command();
2403
- program.name("codegraph").description("Live, queryable knowledge graph for your codebase").version("0.1.0").option("--verbose", "Print full stack traces on error").hook("preAction", (thisCommand) => {
2397
+ program.name("codegraph").description("Live, queryable knowledge graph for your codebase").version("0.1.2").option("--verbose", "Print full stack traces on error").hook("preAction", (thisCommand) => {
2404
2398
  const opts = thisCommand.optsWithGlobals();
2405
2399
  if (opts.verbose) process.env.CODEGRAPH_VERBOSE = "1";
2406
2400
  });
@@ -2446,4 +2440,4 @@ export {
2446
2440
  renderError,
2447
2441
  buildProgram
2448
2442
  };
2449
- //# sourceMappingURL=chunk-AMJXGXLM.js.map
2443
+ //# sourceMappingURL=chunk-36AWRLQ6.js.map