@leanlabsinnov/codegraph 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/bin.js +2 -2
- package/dist/{chunk-F5QKPRNW.js → chunk-2TORJYBO.js} +86 -73
- package/dist/chunk-2TORJYBO.js.map +1 -0
- package/dist/{chunk-O4ZO6CP5.js → chunk-36AWRLQ6.js} +39 -42
- package/dist/chunk-36AWRLQ6.js.map +1 -0
- package/dist/index.js +2 -2
- package/dist/{src-467W2KXC.js → src-PDNTANJD.js} +10 -6
- package/package.json +8 -8
- package/dist/chunk-F5QKPRNW.js.map +0 -1
- package/dist/chunk-O4ZO6CP5.js.map +0 -1
- /package/dist/{src-467W2KXC.js.map → src-PDNTANJD.js.map} +0 -0
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# @
|
|
1
|
+
# @leanlabsinnov/codegraph
|
|
2
2
|
|
|
3
3
|
Live, queryable knowledge graph for your codebase. Indexes JS/TS into an embedded graph
|
|
4
4
|
DB with vector embeddings, then exposes a local MCP server that Claude Code, Cursor, and
|
|
@@ -12,7 +12,7 @@ external services.
|
|
|
12
12
|
## Install
|
|
13
13
|
|
|
14
14
|
```bash
|
|
15
|
-
npm i -g @
|
|
15
|
+
npm i -g @leanlabsinnov/codegraph
|
|
16
16
|
```
|
|
17
17
|
|
|
18
18
|
Requires Node 20+.
|
|
@@ -29,7 +29,7 @@ codegraph serve
|
|
|
29
29
|
Then point Claude Code / Cursor / Windsurf at `http://127.0.0.1:3748/mcp` with the bearer
|
|
30
30
|
token from `~/.codegraph/config.json`.
|
|
31
31
|
|
|
32
|
-
See the full [README on GitHub](https://github.com/
|
|
32
|
+
See the full [README on GitHub](https://github.com/leanlabsinnov/codegraph) for client setup, all
|
|
33
33
|
10 MCP tools, and troubleshooting.
|
|
34
34
|
|
|
35
35
|
## Commands
|
package/dist/bin.js
CHANGED
|
@@ -10,31 +10,38 @@ import { dirname, resolve } from "path";
|
|
|
10
10
|
import * as kuzu from "kuzu";
|
|
11
11
|
|
|
12
12
|
// ../graph-db/src/schema.ts
|
|
13
|
-
var
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
13
|
+
var SYMBOL_COLUMN_SPEC = {
|
|
14
|
+
id: "STRING",
|
|
15
|
+
kind: "STRING",
|
|
16
|
+
repoId: "STRING",
|
|
17
|
+
name: "STRING",
|
|
18
|
+
path: "STRING",
|
|
19
|
+
lineStart: "INT64",
|
|
20
|
+
lineEnd: "INT64",
|
|
21
|
+
signature: "STRING",
|
|
22
|
+
leadingComment: "STRING",
|
|
23
|
+
isExported: "BOOLEAN",
|
|
24
24
|
// File-specific
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
language: "STRING",
|
|
26
|
+
sizeBytes: "INT64",
|
|
27
|
+
contentHash: "STRING",
|
|
28
28
|
// Function-specific
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
isAsync: "BOOLEAN",
|
|
30
|
+
isArrow: "BOOLEAN",
|
|
31
31
|
// Route-specific
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
// Embedding
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
method: "STRING",
|
|
33
|
+
routePath: "STRING",
|
|
34
|
+
framework: "STRING",
|
|
35
|
+
// Embedding namespace tag
|
|
36
|
+
embeddingNamespace: "STRING"
|
|
37
|
+
};
|
|
38
|
+
var SYMBOL_COLUMNS = Object.keys(SYMBOL_COLUMN_SPEC);
|
|
39
|
+
function defaultFor(column) {
|
|
40
|
+
const t = SYMBOL_COLUMN_SPEC[column];
|
|
41
|
+
if (t === "BOOLEAN") return false;
|
|
42
|
+
if (t === "INT64") return 0;
|
|
43
|
+
return "";
|
|
44
|
+
}
|
|
38
45
|
var EDGE_COLUMNS = ["line"];
|
|
39
46
|
function buildSchemaStatements(opts) {
|
|
40
47
|
const columnDefs = [
|
|
@@ -70,13 +77,7 @@ function buildSchemaStatements(opts) {
|
|
|
70
77
|
}
|
|
71
78
|
return statements;
|
|
72
79
|
}
|
|
73
|
-
|
|
74
|
-
return [
|
|
75
|
-
"INSTALL VECTOR",
|
|
76
|
-
"LOAD EXTENSION VECTOR",
|
|
77
|
-
"CALL CREATE_VECTOR_INDEX('Symbol', 'embedding_idx', 'embedding', metric := 'cosine')"
|
|
78
|
-
];
|
|
79
|
-
}
|
|
80
|
+
var SEMANTIC_SEARCH_MODE = "brute-force";
|
|
80
81
|
var DEFAULT_EMBEDDING_DIMENSION = 1536;
|
|
81
82
|
|
|
82
83
|
// ../graph-db/src/client.ts
|
|
@@ -88,7 +89,6 @@ var GraphDb = class {
|
|
|
88
89
|
embeddingDimension;
|
|
89
90
|
db = null;
|
|
90
91
|
conn = null;
|
|
91
|
-
vectorIndexReady = false;
|
|
92
92
|
/**
|
|
93
93
|
* Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a
|
|
94
94
|
* prepared statement for any parameterized query - reusing the prepared object keeps
|
|
@@ -107,17 +107,8 @@ var GraphDb = class {
|
|
|
107
107
|
}
|
|
108
108
|
async close() {
|
|
109
109
|
this.preparedCache.clear();
|
|
110
|
-
try {
|
|
111
|
-
this.conn?.close?.();
|
|
112
|
-
} catch {
|
|
113
|
-
}
|
|
114
|
-
try {
|
|
115
|
-
this.db?.close?.();
|
|
116
|
-
} catch {
|
|
117
|
-
}
|
|
118
110
|
this.conn = null;
|
|
119
111
|
this.db = null;
|
|
120
|
-
this.vectorIndexReady = false;
|
|
121
112
|
}
|
|
122
113
|
requireConn() {
|
|
123
114
|
if (!this.conn) {
|
|
@@ -126,8 +117,11 @@ var GraphDb = class {
|
|
|
126
117
|
return this.conn;
|
|
127
118
|
}
|
|
128
119
|
/**
|
|
129
|
-
* Idempotent migration
|
|
130
|
-
*
|
|
120
|
+
* Idempotent migration: creates the `Symbol` node table and one REL table per
|
|
121
|
+
* `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately
|
|
122
|
+
* skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via
|
|
123
|
+
* `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.
|
|
124
|
+
* See `schema.ts` for the full rationale.
|
|
131
125
|
*/
|
|
132
126
|
async migrate() {
|
|
133
127
|
await this.connect();
|
|
@@ -135,22 +129,6 @@ var GraphDb = class {
|
|
|
135
129
|
for (const stmt of schemaStmts) {
|
|
136
130
|
await this.exec(stmt);
|
|
137
131
|
}
|
|
138
|
-
for (const stmt of buildVectorIndexStatements()) {
|
|
139
|
-
try {
|
|
140
|
-
await this.exec(stmt);
|
|
141
|
-
} catch (err) {
|
|
142
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
143
|
-
if (isAlreadyExistsError(message)) continue;
|
|
144
|
-
if (/extension/i.test(message) && /(not found|missing|unsupported|disabled)/i.test(message)) {
|
|
145
|
-
console.warn(
|
|
146
|
-
`[codegraph] vector extension unavailable; semantic search disabled. Underlying: ${message}`
|
|
147
|
-
);
|
|
148
|
-
return;
|
|
149
|
-
}
|
|
150
|
-
throw new Error(`migrate failed on \`${stmt}\`: ${message}`);
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
this.vectorIndexReady = true;
|
|
154
132
|
}
|
|
155
133
|
/**
|
|
156
134
|
* Typed Cypher escape hatch.
|
|
@@ -193,19 +171,39 @@ var GraphDb = class {
|
|
|
193
171
|
return conn.execute(prepared, params);
|
|
194
172
|
}
|
|
195
173
|
/**
|
|
196
|
-
*
|
|
197
|
-
*
|
|
198
|
-
*
|
|
174
|
+
* Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including
|
|
175
|
+
* `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`
|
|
176
|
+
* because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped
|
|
177
|
+
* (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.
|
|
178
|
+
*
|
|
179
|
+
* In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE
|
|
180
|
+
* from violating the primary-key uniqueness constraint.
|
|
199
181
|
*/
|
|
200
182
|
async upsertNodes(nodes) {
|
|
201
183
|
if (nodes.length === 0) return;
|
|
202
184
|
await this.connect();
|
|
185
|
+
const deduped = dedupeById(nodes);
|
|
186
|
+
const withEmbedding = [];
|
|
187
|
+
const withoutEmbedding = [];
|
|
188
|
+
for (const n of deduped) {
|
|
189
|
+
if (Array.isArray(n.embedding) && n.embedding.length > 0) {
|
|
190
|
+
withEmbedding.push(n);
|
|
191
|
+
} else {
|
|
192
|
+
withoutEmbedding.push(n);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
await this.createSymbolBatch(withoutEmbedding, false);
|
|
196
|
+
await this.createSymbolBatch(withEmbedding, true);
|
|
197
|
+
}
|
|
198
|
+
async createSymbolBatch(nodes, withEmbedding) {
|
|
199
|
+
if (nodes.length === 0) return;
|
|
203
200
|
const BATCH = 200;
|
|
204
|
-
const
|
|
205
|
-
const
|
|
201
|
+
const columns = [...SYMBOL_COLUMNS, ...withEmbedding ? ["embedding"] : []];
|
|
202
|
+
const propMap = columns.map((c) => `${c}: r.${c}`).join(", ");
|
|
203
|
+
const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;
|
|
206
204
|
for (let i = 0; i < nodes.length; i += BATCH) {
|
|
207
205
|
const slice = nodes.slice(i, i + BATCH);
|
|
208
|
-
const payload = slice.map(buildSymbolRow);
|
|
206
|
+
const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));
|
|
209
207
|
await this.exec(cypher, { batch: payload });
|
|
210
208
|
}
|
|
211
209
|
}
|
|
@@ -233,7 +231,7 @@ var GraphDb = class {
|
|
|
233
231
|
const payload = slice.map((e) => ({
|
|
234
232
|
fromId: e.fromId,
|
|
235
233
|
toId: e.toId,
|
|
236
|
-
line: typeof e.line === "number" ? e.line :
|
|
234
|
+
line: typeof e.line === "number" ? e.line : 0
|
|
237
235
|
}));
|
|
238
236
|
await this.exec(cypher, { batch: payload });
|
|
239
237
|
}
|
|
@@ -292,22 +290,38 @@ var GraphDb = class {
|
|
|
292
290
|
const coverage = total === 0 ? 0 : embedded / total;
|
|
293
291
|
return { nodes, edges, embeddingCoverage: coverage };
|
|
294
292
|
}
|
|
295
|
-
/**
|
|
293
|
+
/**
|
|
294
|
+
* v0.1.x never creates an HNSW index - semantic search is brute-force via
|
|
295
|
+
* `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers
|
|
296
|
+
* (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu
|
|
297
|
+
* fixes ship and we flip the index back on.
|
|
298
|
+
*/
|
|
296
299
|
hasVectorIndex() {
|
|
297
|
-
return
|
|
300
|
+
return false;
|
|
298
301
|
}
|
|
299
302
|
};
|
|
300
|
-
function buildSymbolRow(node) {
|
|
303
|
+
function buildSymbolRow(node, withEmbedding, embeddingDimension) {
|
|
301
304
|
const src = node;
|
|
302
305
|
const row = {};
|
|
303
306
|
for (const col of SYMBOL_COLUMNS) {
|
|
304
307
|
const value = src[col];
|
|
305
|
-
row[col] = value === void 0
|
|
308
|
+
row[col] = value === void 0 || value === null ? defaultFor(col) : value;
|
|
306
309
|
}
|
|
307
310
|
row.id = node.id;
|
|
308
311
|
row.kind = node.kind;
|
|
312
|
+
if (withEmbedding) {
|
|
313
|
+
const vec = node.embedding;
|
|
314
|
+
row.embedding = Array.isArray(vec) && vec.length === embeddingDimension ? vec : new Array(embeddingDimension).fill(0);
|
|
315
|
+
}
|
|
309
316
|
return row;
|
|
310
317
|
}
|
|
318
|
+
function dedupeById(nodes) {
|
|
319
|
+
const seen = /* @__PURE__ */ new Map();
|
|
320
|
+
for (const n of nodes) {
|
|
321
|
+
seen.set(n.id, n);
|
|
322
|
+
}
|
|
323
|
+
return Array.from(seen.values());
|
|
324
|
+
}
|
|
311
325
|
function normalizeRow(row) {
|
|
312
326
|
if (row instanceof Map) {
|
|
313
327
|
const out = {};
|
|
@@ -343,17 +357,16 @@ async function collectAll(result) {
|
|
|
343
357
|
if (typeof getAll !== "function") return [];
|
|
344
358
|
return getAll.call(target);
|
|
345
359
|
}
|
|
346
|
-
function isAlreadyExistsError(message) {
|
|
347
|
-
return /already exists/i.test(message) || /already loaded/i.test(message) || /already installed/i.test(message) || /duplicate (table|index)/i.test(message);
|
|
348
|
-
}
|
|
349
360
|
|
|
350
361
|
export {
|
|
362
|
+
SYMBOL_COLUMN_SPEC,
|
|
351
363
|
SYMBOL_COLUMNS,
|
|
364
|
+
defaultFor,
|
|
352
365
|
EDGE_COLUMNS,
|
|
353
366
|
buildSchemaStatements,
|
|
354
|
-
|
|
367
|
+
SEMANTIC_SEARCH_MODE,
|
|
355
368
|
DEFAULT_EMBEDDING_DIMENSION,
|
|
356
369
|
defaultDbPath,
|
|
357
370
|
GraphDb
|
|
358
371
|
};
|
|
359
|
-
//# sourceMappingURL=chunk-
|
|
372
|
+
//# sourceMappingURL=chunk-2TORJYBO.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../graph-db/src/client.ts","../../graph-db/src/schema.ts"],"sourcesContent":["import { mkdir } from \"node:fs/promises\";\nimport { homedir } from \"node:os\";\nimport { dirname, resolve } from \"node:path\";\nimport { EDGE_KINDS, NODE_KINDS, type EdgeKind } from \"@codegraph/shared\";\nimport * as kuzu from \"kuzu\";\nimport {\n DEFAULT_EMBEDDING_DIMENSION,\n SYMBOL_COLUMNS,\n buildSchemaStatements,\n defaultFor,\n} from \"./schema.js\";\nimport type { QueryResult, UpsertEdgeInput, UpsertNodeInput } from \"./types.js\";\n\nexport interface GraphDbOptions {\n /** Directory where Kuzu stores its on-disk database files. Defaults to `~/.codegraph/graph`. */\n dbPath?: string;\n /**\n * Legacy `url` option kept for back-compat with Phase-1 callers. Ignored at runtime -\n * Kuzu is embedded - but accepted so existing call sites compile while the rest of the\n * codebase migrates to `dbPath`.\n */\n url?: string;\n /** Vector dimension for the `Symbol.embedding` column. Baked into the schema at create time. */\n embeddingDimension?: number;\n}\n\n/** Default on-disk location for the embedded graph. */\nexport function defaultDbPath(): string {\n return resolve(homedir(), \".codegraph\", \"graph\");\n}\n\n/**\n * Thin, typed wrapper around the embedded Kuzu database.\n *\n * Public surface (intentionally identical to the Phase-1 FalkorDB client so callers don't\n * change): connect / close / migrate / query / upsertNodes / upsertEdges / deleteByRepo /\n * stats. Internals are pure Kuzu.\n */\nexport class GraphDb {\n private readonly dbPath: string;\n private readonly embeddingDimension: number;\n private db: kuzu.Database | null = null;\n private conn: kuzu.Connection | null = null;\n /**\n * Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a\n * prepared statement for any parameterized query - reusing the prepared object keeps\n * UNWIND-batched upserts fast.\n */\n private preparedCache = new Map<string, kuzu.PreparedStatement>();\n\n constructor(opts: GraphDbOptions = {}) {\n this.dbPath = opts.dbPath ?? defaultDbPath();\n this.embeddingDimension = opts.embeddingDimension ?? DEFAULT_EMBEDDING_DIMENSION;\n }\n\n async connect(): Promise<void> {\n if (this.conn) return;\n await mkdir(dirname(this.dbPath), { recursive: true });\n this.db = new kuzu.Database(this.dbPath);\n this.conn = new kuzu.Connection(this.db);\n }\n\n async close(): Promise<void> {\n // We deliberately do NOT call `conn.close()` / `db.close()` on Kuzu's Node bindings.\n // In 0.11.x those native handles are also disposed by the binding's process-exit hook,\n // and double-disposing can SIGSEGV the worker on cleanup. Dropping references is\n // enough for the GC to release the underlying memory before the process exits.\n this.preparedCache.clear();\n this.conn = null;\n this.db = null;\n }\n\n private requireConn(): kuzu.Connection {\n if (!this.conn) {\n throw new Error(\"GraphDb not connected. Call connect() first.\");\n }\n return this.conn;\n }\n\n /**\n * Idempotent migration: creates the `Symbol` node table and one REL table per\n * `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately\n * skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via\n * `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.\n * See `schema.ts` for the full rationale.\n */\n async migrate(): Promise<void> {\n await this.connect();\n const schemaStmts = buildSchemaStatements({ embeddingDimension: this.embeddingDimension });\n for (const stmt of schemaStmts) {\n await this.exec(stmt);\n }\n }\n\n /**\n * Typed Cypher escape hatch.\n *\n * Kuzu returns BIGINT columns as native BigInt; we coerce to plain `number` when safe so\n * downstream JSON serialization (MCP responses, snapshot tests) does not need bespoke\n * handling.\n */\n async query<T = Record<string, unknown>>(\n cypher: string,\n params: Record<string, unknown> = {},\n ): Promise<QueryResult<T>> {\n const result = await this.runQuery(cypher, params);\n const raw = await collectAll(result);\n const data = raw.map((row) => normalizeRow(row)) as T[];\n const headers = raw.length > 0 ? Object.keys(raw[0] ?? {}) : [];\n return { data, headers, metadata: [] };\n }\n\n /** Fire-and-forget DDL/exec. */\n private async exec(cypher: string, params: Record<string, unknown> = {}): Promise<void> {\n await this.runQuery(cypher, params);\n }\n\n /**\n * Bridge to Kuzu's two execution paths:\n * - `conn.query(stmt)` for unparameterized statements (the second positional arg is a\n * `progressCallback`, NOT params - mistaking that is the #1 way to confuse the API).\n * - `conn.prepare(stmt) + conn.execute(prepared, params)` for anything with `$name`\n * placeholders. We cache the prepared statement so UNWIND batches reuse it.\n */\n private async runQuery(cypher: string, params: Record<string, unknown>): Promise<unknown> {\n const conn = this.requireConn();\n if (Object.keys(params).length === 0) {\n return conn.query(cypher);\n }\n let prepared = this.preparedCache.get(cypher);\n if (!prepared) {\n prepared = await conn.prepare(cypher);\n if (!prepared.isSuccess()) {\n throw new Error(prepared.getErrorMessage());\n }\n this.preparedCache.set(cypher, prepared);\n }\n // Cast through `unknown`: Kuzu's bindings advertise a strict `KuzuValue` union, but\n // we can pass through any JSON-serializable value the embedded engine accepts (nested\n // structs and lists are converted at the native layer).\n return conn.execute(prepared, params as unknown as Parameters<kuzu.Connection[\"execute\"]>[1]);\n }\n\n /**\n * Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including\n * `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`\n * because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped\n * (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.\n *\n * In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE\n * from violating the primary-key uniqueness constraint.\n */\n async upsertNodes(nodes: UpsertNodeInput[]): Promise<void> {\n if (nodes.length === 0) return;\n await this.connect();\n const deduped = dedupeById(nodes);\n const withEmbedding: UpsertNodeInput[] = [];\n const withoutEmbedding: UpsertNodeInput[] = [];\n for (const n of deduped) {\n if (Array.isArray(n.embedding) && n.embedding.length > 0) {\n withEmbedding.push(n);\n } else {\n withoutEmbedding.push(n);\n }\n }\n await this.createSymbolBatch(withoutEmbedding, false);\n await this.createSymbolBatch(withEmbedding, true);\n }\n\n private async createSymbolBatch(\n nodes: UpsertNodeInput[],\n withEmbedding: boolean,\n ): Promise<void> {\n if (nodes.length === 0) return;\n const BATCH = 200;\n const columns = [...SYMBOL_COLUMNS, ...(withEmbedding ? ([\"embedding\"] as const) : [])];\n const propMap = columns.map((c) => `${c}: r.${c}`).join(\", \");\n const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;\n for (let i = 0; i < nodes.length; i += BATCH) {\n const slice = nodes.slice(i, i + BATCH);\n const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));\n await this.exec(cypher, { batch: payload });\n }\n }\n\n /**\n * Upserts edges. Both endpoints must already exist as `Symbol` nodes; rows where the\n * MATCH fails are silently dropped, matching Cypher semantics.\n *\n * Uses CREATE because the orchestrator wipes the repo's slice before writing, so\n * duplicates can't pre-exist within a single index pass.\n */\n async upsertEdges(edges: UpsertEdgeInput[]): Promise<void> {\n if (edges.length === 0) return;\n await this.connect();\n const byKind = new Map<EdgeKind, UpsertEdgeInput[]>();\n for (const e of edges) {\n const bucket = byKind.get(e.kind);\n if (bucket) bucket.push(e);\n else byKind.set(e.kind, [e]);\n }\n const BATCH = 500;\n for (const [kind, batch] of byKind) {\n const cypher = `UNWIND $batch AS r MATCH (a:Symbol {id: r.fromId}) MATCH (b:Symbol {id: r.toId}) CREATE (a)-[e:${kind} {line: r.line}]->(b)`;\n for (let i = 0; i < batch.length; i += BATCH) {\n const slice = batch.slice(i, i + BATCH);\n // Use 0 (not null) for missing line numbers so Kuzu can infer the struct field\n // as INT64 even when an entire batch happens to have no `line` set.\n const payload = slice.map((e) => ({\n fromId: e.fromId,\n toId: e.toId,\n line: typeof e.line === \"number\" ? e.line : 0,\n }));\n await this.exec(cypher, { batch: payload });\n }\n }\n }\n\n /**\n * Deletes all nodes (and incident edges via DETACH DELETE) for a repo. If `paths` is\n * provided, restricts the delete to nodes whose `path` is in the list - used by\n * incremental re-indexing.\n */\n async deleteByRepo(repoId: string, paths?: string[]): Promise<void> {\n await this.connect();\n if (paths && paths.length > 0) {\n await this.exec(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.path IN $paths DETACH DELETE n\",\n { repoId, paths },\n );\n return;\n }\n await this.exec(\"MATCH (n:Symbol) WHERE n.repoId = $repoId DETACH DELETE n\", { repoId });\n }\n\n /**\n * Returns counts of nodes (per kind) and edges (per kind) for a repo, plus the share of\n * non-File nodes that carry an embedding.\n */\n async stats(repoId: string): Promise<{\n nodes: Record<string, number>;\n edges: Record<string, number>;\n embeddingCoverage: number;\n }> {\n await this.connect();\n const nodes: Record<string, number> = {};\n for (const kind of NODE_KINDS) {\n const r = await this.query<{ count: number }>(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.kind = $kind RETURN count(n) AS count\",\n { repoId, kind },\n );\n nodes[kind] = Number(r.data[0]?.count ?? 0);\n }\n const edges: Record<string, number> = {};\n for (const kind of EDGE_KINDS) {\n const r = await this.query<{ count: number }>(\n `MATCH (a:Symbol)-[r:${kind}]->(b:Symbol)\n WHERE a.repoId = $repoId AND b.repoId = $repoId\n RETURN count(r) AS count`,\n { repoId },\n );\n edges[kind] = Number(r.data[0]?.count ?? 0);\n }\n const cov = await this.query<{ total: number | bigint; embedded: number | bigint }>(\n `MATCH (n:Symbol)\n WHERE n.repoId = $repoId AND n.kind <> 'File'\n RETURN count(n) AS total,\n count(n.embedding) AS embedded`,\n { repoId },\n );\n const row = cov.data[0];\n const total = Number(row?.total ?? 0);\n const embedded = Number(row?.embedded ?? 0);\n const coverage = total === 0 ? 0 : embedded / total;\n return { nodes, edges, embeddingCoverage: coverage };\n }\n\n /**\n * v0.1.x never creates an HNSW index - semantic search is brute-force via\n * `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers\n * (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu\n * fixes ship and we flip the index back on.\n */\n hasVectorIndex(): boolean {\n return false;\n }\n}\n\n/**\n * Build a fully-populated row for a Kuzu UNWIND batch. Every column in `SYMBOL_COLUMNS`\n * is present (typed default when missing) so Kuzu can infer a homogeneous struct schema\n * for the batch parameter. When `withEmbedding` is true the `embedding` field is also\n * populated - we either use the provided vector or a zero-vector of the configured\n * dimension so the struct schema stays uniform across the batch.\n */\nfunction buildSymbolRow(\n node: UpsertNodeInput,\n withEmbedding: boolean,\n embeddingDimension: number,\n): Record<string, unknown> {\n const src = node as unknown as Record<string, unknown>;\n const row: Record<string, unknown> = {};\n for (const col of SYMBOL_COLUMNS) {\n const value = src[col];\n row[col] = value === undefined || value === null ? defaultFor(col) : value;\n }\n row.id = node.id;\n row.kind = node.kind;\n if (withEmbedding) {\n const vec = node.embedding;\n row.embedding =\n Array.isArray(vec) && vec.length === embeddingDimension\n ? vec\n : new Array<number>(embeddingDimension).fill(0);\n }\n return row;\n}\n\n/**\n * Coalesce same-id rows down to the last occurrence. Required because we now use bare\n * CREATE (not MERGE) for inserts and Kuzu rejects primary-key collisions inside a single\n * UNWIND batch.\n */\nfunction dedupeById(nodes: UpsertNodeInput[]): UpsertNodeInput[] {\n const seen = new Map<string, UpsertNodeInput>();\n for (const n of nodes) {\n seen.set(n.id, n);\n }\n return Array.from(seen.values());\n}\n\n/** Convert Kuzu's row representation (Map or plain object) into a plain JSON object. */\nfunction normalizeRow(row: unknown): Record<string, unknown> {\n if (row instanceof Map) {\n const out: Record<string, unknown> = {};\n for (const [k, v] of row) {\n out[String(k)] = coerceValue(v);\n }\n return out;\n }\n if (row && typeof row === \"object\") {\n const src = row as Record<string, unknown>;\n const out: Record<string, unknown> = {};\n for (const k of Object.keys(src)) {\n out[k] = coerceValue(src[k]);\n }\n return out;\n }\n return { value: coerceValue(row) };\n}\n\n/**\n * Kuzu returns BIGINT columns as JS BigInt. Coerce to `number` when within Number.MAX_SAFE\n * for JSON-friendly downstream consumption.\n */\nfunction coerceValue(value: unknown): unknown {\n if (typeof value === \"bigint\") {\n if (value <= BigInt(Number.MAX_SAFE_INTEGER) && value >= BigInt(Number.MIN_SAFE_INTEGER)) {\n return Number(value);\n }\n return value.toString();\n }\n if (Array.isArray(value)) return value.map(coerceValue);\n return value;\n}\n\n/** Drain a Kuzu QueryResult (or array of them) into an array of row objects. */\nasync function collectAll(result: unknown): Promise<unknown[]> {\n // Multi-statement queries return an array; we keep only the last one (matches how the\n // final statement is the one that carries a `RETURN`).\n const target = Array.isArray(result) ? result[result.length - 1] : result;\n if (!target) return [];\n const getAll = (target as { getAll?: () => Promise<unknown[]> }).getAll;\n if (typeof getAll !== \"function\") return [];\n return getAll.call(target);\n}\n\n","import { EDGE_KINDS } from \"@codegraph/shared\";\n\n/**\n * Kuzu is schema-first. Unlike FalkorDB which is schema-less, every column we ever want to\n * SET on a node must exist up-front. We use ONE `Symbol` node table with a `kind` column\n * (Kuzu does not support multi-labels), and one REL table per `EdgeKind`.\n *\n * Columns are the union of every field across the `GraphNode` discriminated union in\n * `@codegraph/shared` plus the two embedding-namespace fields. Fields that are not\n * relevant to a given kind stay NULL.\n */\n\n/**\n * Per-column metadata so the upserter can build batches with explicit typed defaults.\n *\n * Kuzu's struct parameter type inference fails when a column is null on every row in a\n * batch (it defaults to STRING and rejects assignment to a BOOL/INT64 column). Concrete\n * defaults keep inference deterministic and let us skip clunky CAST() clauses.\n *\n * Convention: optional booleans default to `false`, optional ints to `0`, optional strings\n * to `\"\"`. We never check `WHERE n.foo IS NULL` in queries, so the lost null-distinction\n * is acceptable for v0.1.0.\n */\nexport const SYMBOL_COLUMN_SPEC = {\n id: \"STRING\",\n kind: \"STRING\",\n repoId: \"STRING\",\n name: \"STRING\",\n path: \"STRING\",\n lineStart: \"INT64\",\n lineEnd: \"INT64\",\n signature: \"STRING\",\n leadingComment: \"STRING\",\n isExported: \"BOOLEAN\",\n // File-specific\n language: \"STRING\",\n sizeBytes: \"INT64\",\n contentHash: \"STRING\",\n // Function-specific\n isAsync: \"BOOLEAN\",\n isArrow: \"BOOLEAN\",\n // Route-specific\n method: \"STRING\",\n routePath: \"STRING\",\n framework: \"STRING\",\n // Embedding namespace tag\n embeddingNamespace: \"STRING\",\n} as const;\n\nexport type SymbolColumn = keyof typeof SYMBOL_COLUMN_SPEC;\n\nexport const SYMBOL_COLUMNS = Object.keys(SYMBOL_COLUMN_SPEC) as SymbolColumn[];\n\n/** Return the typed default for an unset optional column. */\nexport function defaultFor(column: SymbolColumn): unknown {\n const t = SYMBOL_COLUMN_SPEC[column];\n if (t === \"BOOLEAN\") return false;\n if (t === \"INT64\") return 0;\n return \"\";\n}\n\n/** Optional per-edge metadata. Currently only `line`. */\nexport const EDGE_COLUMNS = [\"line\"] as const;\n\nexport type EdgeColumn = (typeof EDGE_COLUMNS)[number];\n\n/**\n * DDL statements that bring an empty Kuzu database to the codegraph schema.\n * `IF NOT EXISTS` makes `migrate()` idempotent so it can run on every connect.\n *\n * `embedding` is a fixed-dimension column - dimension is configured at migrate time and\n * baked into the schema. If a user later switches to an embedding provider with a\n * different dimension they must delete the on-disk graph directory to recreate it. The\n * embedding-namespace tag ensures we never silently mix dimensions.\n */\nexport function buildSchemaStatements(opts: { embeddingDimension: number }): string[] {\n const columnDefs = [\n \"id STRING\",\n \"kind STRING\",\n \"repoId STRING\",\n \"name STRING\",\n \"path STRING\",\n \"lineStart INT64\",\n \"lineEnd INT64\",\n \"signature STRING\",\n \"leadingComment STRING\",\n \"isExported BOOLEAN\",\n \"language STRING\",\n \"sizeBytes INT64\",\n \"contentHash STRING\",\n \"isAsync BOOLEAN\",\n \"isArrow BOOLEAN\",\n \"method STRING\",\n \"routePath STRING\",\n \"framework STRING\",\n \"embeddingNamespace STRING\",\n `embedding FLOAT[${opts.embeddingDimension}]`,\n \"PRIMARY KEY (id)\",\n ];\n const statements: string[] = [\n `CREATE NODE TABLE IF NOT EXISTS Symbol(${columnDefs.join(\", \")})`,\n ];\n for (const kind of EDGE_KINDS) {\n statements.push(\n `CREATE REL TABLE IF NOT EXISTS ${kind}(FROM Symbol TO Symbol, line INT64)`,\n );\n }\n return statements;\n}\n\n/**\n * Semantic search in v0.1.x is intentionally brute-force via Kuzu's built-in\n * `array_cosine_similarity` function - we do NOT create an HNSW vector index.\n *\n * Why: Kuzu 0.11.x has two open issues that make the HNSW path unusable for a\n * mutable graph workload:\n * - kuzudb/kuzu#5965: SET on a vector-indexed column is rejected with\n * \"Cannot set property vec in table embeddings because it is used in one or more\n * indexes\". The Kuzu team's own recommended workaround in that thread is\n * \"delay creation of the index itself\".\n * - kuzudb/kuzu#6040: DROP_VECTOR_INDEX leaves stale on-disk metadata, so once a\n * column has ever been indexed it becomes permanently un-writable - even fresh\n * CREATEs fail with \"Catalog exception: _N_<index>_UPPER does not exist\".\n *\n * `array_cosine_similarity` is a core Kuzu function (not part of the vector extension)\n * and runs in microseconds for the corpus sizes Phase 1 targets. We will switch back\n * to `CALL CREATE_VECTOR_INDEX` / `QUERY_VECTOR_INDEX` once the upstream fixes ship.\n */\nexport const SEMANTIC_SEARCH_MODE = \"brute-force\" as const;\n\n/** Default embedding dimension when none is supplied. Matches `text-embedding-3-small`. */\nexport const DEFAULT_EMBEDDING_DIMENSION = 1536;\n"],"mappings":";;;;;;AAAA,SAAS,aAAa;AACtB,SAAS,eAAe;AACxB,SAAS,SAAS,eAAe;AAEjC,YAAY,UAAU;;;ACmBf,IAAM,qBAAqB;AAAA,EAChC,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,MAAM;AAAA,EACN,WAAW;AAAA,EACX,SAAS;AAAA,EACT,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,YAAY;AAAA;AAAA,EAEZ,UAAU;AAAA,EACV,WAAW;AAAA,EACX,aAAa;AAAA;AAAA,EAEb,SAAS;AAAA,EACT,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,WAAW;AAAA;AAAA,EAEX,oBAAoB;AACtB;AAIO,IAAM,iBAAiB,OAAO,KAAK,kBAAkB;AAGrD,SAAS,WAAW,QAA+B;AACxD,QAAM,IAAI,mBAAmB,MAAM;AACnC,MAAI,MAAM,UAAW,QAAO;AAC5B,MAAI,MAAM,QAAS,QAAO;AAC1B,SAAO;AACT;AAGO,IAAM,eAAe,CAAC,MAAM;AAa5B,SAAS,sBAAsB,MAAgD;AACpF,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,mBAAmB,KAAK,kBAAkB;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,aAAuB;AAAA,IAC3B,0CAA0C,WAAW,KAAK,IAAI,CAAC;AAAA,EACjE;AACA,aAAW,QAAQ,YAAY;AAC7B,eAAW;AAAA,MACT,kCAAkC,IAAI;AAAA,IACxC;AAAA,EACF;AACA,SAAO;AACT;AAoBO,IAAM,uBAAuB;AAG7B,IAAM,8BAA8B;;;ADxGpC,SAAS,gBAAwB;AACtC,SAAO,QAAQ,QAAQ,GAAG,cAAc,OAAO;AACjD;AASO,IAAM,UAAN,MAAc;AAAA,EACF;AAAA,EACA;AAAA,EACT,KAA2B;AAAA,EAC3B,OAA+B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAM/B,gBAAgB,oBAAI,IAAoC;AAAA,EAEhE,YAAY,OAAuB,CAAC,GAAG;AACrC,SAAK,SAAS,KAAK,UAAU,cAAc;AAC3C,SAAK,qBAAqB,KAAK,sBAAsB;AAAA,EACvD;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,KAAM;AACf,UAAM,MAAM,QAAQ,KAAK,MAAM,GAAG,EAAE,WAAW,KAAK,CAAC;AACrD,SAAK,KAAK,IAAS,cAAS,KAAK,MAAM;AACvC,SAAK,OAAO,IAAS,gBAAW,KAAK,EAAE;AAAA,EACzC;AAAA,EAEA,MAAM,QAAuB;AAK3B,SAAK,cAAc,MAAM;AACzB,SAAK,OAAO;AACZ,SAAK,KAAK;AAAA,EACZ;AAAA,EAEQ,cAA+B;AACrC,QAAI,CAAC,KAAK,MAAM;AACd,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,UAAyB;AAC7B,UAAM,KAAK,QAAQ;AACnB,UAAM,cAAc,sBAAsB,EAAE,oBAAoB,KAAK,mBAAmB,CAAC;AACzF,eAAW,QAAQ,aAAa;AAC9B,YAAM,KAAK,KAAK,IAAI;AAAA,IACtB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MACJ,QACA,SAAkC,CAAC,GACV;AACzB,UAAM,SAAS,MAAM,KAAK,SAAS,QAAQ,MAAM;AACjD,UAAM,MAAM,MAAM,WAAW,MAAM;AACnC,UAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,aAAa,GAAG,CAAC;AAC/C,UAAM,UAAU,IAAI,SAAS,IAAI,OAAO,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;AAC9D,WAAO,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE;AAAA,EACvC;AAAA;AAAA,EAGA,MAAc,KAAK,QAAgB,SAAkC,CAAC,GAAkB;AACtF,UAAM,KAAK,SAAS,QAAQ,MAAM;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,SAAS,QAAgB,QAAmD;AACxF,UAAM,OAAO,KAAK,YAAY;AAC9B,QAAI,OAAO,KAAK,MAAM,EAAE,WAAW,GAAG;AACpC,aAAO,KAAK,MAAM,MAAM;AAAA,IAC1B;AACA,QAAI,WAAW,KAAK,cAAc,IAAI,MAAM;AAC5C,QAAI,CAAC,UAAU;AACb,iBAAW,MAAM,KAAK,QAAQ,MAAM;AACpC,UAAI,CAAC,SAAS,UAAU,GAAG;AACzB,cAAM,IAAI,MAAM,SAAS,gBAAgB,CAAC;AAAA,MAC5C;AACA,WAAK,cAAc,IAAI,QAAQ,QAAQ;AAAA,IACzC;AAIA,WAAO,KAAK,QAAQ,UAAU,MAA8D;AAAA,EAC9F;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,UAAU,WAAW,KAAK;AAChC,UAAM,gBAAmC,CAAC;AAC1C,UAAM,mBAAsC,CAAC;AAC7C,eAAW,KAAK,SAAS;AACvB,UAAI,MAAM,QAAQ,EAAE,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACxD,sBAAc,KAAK,CAAC;AAAA,MACtB,OAAO;AACL,yBAAiB,KAAK,CAAC;AAAA,MACzB;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB,kBAAkB,KAAK;AACpD,UAAM,KAAK,kBAAkB,eAAe,IAAI;AAAA,EAClD;AAAA,EAEA,MAAc,kBACZ,OACA,eACe;AACf,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,QAAQ;AACd,UAAM,UAAU,CAAC,GAAG,gBAAgB,GAAI,gBAAiB,CAAC,WAAW,IAAc,CAAC,CAAE;AACtF,UAAM,UAAU,QAAQ,IAAI,CAAC,MAAM,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,IAAI;AAC5D,UAAM,SAAS,wCAAwC,OAAO;AAC9D,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,YAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AACtC,YAAM,UAAU,MAAM,IAAI,CAAC,MAAM,eAAe,GAAG,eAAe,KAAK,kBAAkB,CAAC;AAC1F,YAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,SAAS,oBAAI,IAAiC;AACpD,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,OAAO,IAAI,EAAE,IAAI;AAChC,UAAI,OAAQ,QAAO,KAAK,CAAC;AAAA,UACpB,QAAO,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;AAAA,IAC7B;AACA,UAAM,QAAQ;AACd,eAAW,CAAC,MAAM,KAAK,KAAK,QAAQ;AAClC,YAAM,SAAS,kGAAkG,IAAI;AACrH,eAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,cAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AAGtC,cAAM,UAAU,MAAM,IAAI,CAAC,OAAO;AAAA,UAChC,QAAQ,EAAE;AAAA,UACV,MAAM,EAAE;AAAA,UACR,MAAM,OAAO,EAAE,SAAS,WAAW,EAAE,OAAO;AAAA,QAC9C,EAAE;AACF,cAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,MAC5C;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,aAAa,QAAgB,OAAiC;AAClE,UAAM,KAAK,QAAQ;AACnB,QAAI,SAAS,MAAM,SAAS,GAAG;AAC7B,YAAM,KAAK;AAAA,QACT;AAAA,QACA,EAAE,QAAQ,MAAM;AAAA,MAClB;AACA;AAAA,IACF;AACA,UAAM,KAAK,KAAK,6DAA6D,EAAE,OAAO,CAAC;AAAA,EACzF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,QAIT;AACD,UAAM,KAAK,QAAQ;AACnB,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB;AAAA,QACA,EAAE,QAAQ,KAAK;AAAA,MACjB;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB,uBAAuB,IAAI;AAAA;AAAA;AAAA,QAG3B,EAAE,OAAO;AAAA,MACX;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,MAAM,MAAM,KAAK;AAAA,MACrB;AAAA;AAAA;AAAA;AAAA,MAIA,EAAE,OAAO;AAAA,IACX;AACA,UAAM,MAAM,IAAI,KAAK,CAAC;AACtB,UAAM,QAAQ,OAAO,KAAK,SAAS,CAAC;AACpC,UAAM,WAAW,OAAO,KAAK,YAAY,CAAC;AAC1C,UAAM,WAAW,UAAU,IAAI,IAAI,WAAW;AAC9C,WAAO,EAAE,OAAO,OAAO,mBAAmB,SAAS;AAAA,EACrD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,iBAA0B;AACxB,WAAO;AAAA,EACT;AACF;AASA,SAAS,eACP,MACA,eACA,oBACyB;AACzB,QAAM,MAAM;AACZ,QAAM,MAA+B,CAAC;AACtC,aAAW,OAAO,gBAAgB;AAChC,UAAM,QAAQ,IAAI,GAAG;AACrB,QAAI,GAAG,IAAI,UAAU,UAAa,UAAU,OAAO,WAAW,GAAG,IAAI;AAAA,EACvE;AACA,MAAI,KAAK,KAAK;AACd,MAAI,OAAO,KAAK;AAChB,MAAI,eAAe;AACjB,UAAM,MAAM,KAAK;AACjB,QAAI,YACF,MAAM,QAAQ,GAAG,KAAK,IAAI,WAAW,qBACjC,MACA,IAAI,MAAc,kBAAkB,EAAE,KAAK,CAAC;AAAA,EACpD;AACA,SAAO;AACT;AAOA,SAAS,WAAW,OAA6C;AAC/D,QAAM,OAAO,oBAAI,IAA6B;AAC9C,aAAW,KAAK,OAAO;AACrB,SAAK,IAAI,EAAE,IAAI,CAAC;AAAA,EAClB;AACA,SAAO,MAAM,KAAK,KAAK,OAAO,CAAC;AACjC;AAGA,SAAS,aAAa,KAAuC;AAC3D,MAAI,eAAe,KAAK;AACtB,UAAM,MAA+B,CAAC;AACtC,eAAW,CAAC,GAAG,CAAC,KAAK,KAAK;AACxB,UAAI,OAAO,CAAC,CAAC,IAAI,YAAY,CAAC;AAAA,IAChC;AACA,WAAO;AAAA,EACT;AACA,MAAI,OAAO,OAAO,QAAQ,UAAU;AAClC,UAAM,MAAM;AACZ,UAAM,MAA+B,CAAC;AACtC,eAAW,KAAK,OAAO,KAAK,GAAG,GAAG;AAChC,UAAI,CAAC,IAAI,YAAY,IAAI,CAAC,CAAC;AAAA,IAC7B;AACA,WAAO;AAAA,EACT;AACA,SAAO,EAAE,OAAO,YAAY,GAAG,EAAE;AACnC;AAMA,SAAS,YAAY,OAAyB;AAC5C,MAAI,OAAO,UAAU,UAAU;AAC7B,QAAI,SAAS,OAAO,OAAO,gBAAgB,KAAK,SAAS,OAAO,OAAO,gBAAgB,GAAG;AACxF,aAAO,OAAO,KAAK;AAAA,IACrB;AACA,WAAO,MAAM,SAAS;AAAA,EACxB;AACA,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,MAAM,IAAI,WAAW;AACtD,SAAO;AACT;AAGA,eAAe,WAAW,QAAqC;AAG7D,QAAM,SAAS,MAAM,QAAQ,MAAM,IAAI,OAAO,OAAO,SAAS,CAAC,IAAI;AACnE,MAAI,CAAC,OAAQ,QAAO,CAAC;AACrB,QAAM,SAAU,OAAiD;AACjE,MAAI,OAAO,WAAW,WAAY,QAAO,CAAC;AAC1C,SAAO,OAAO,KAAK,MAAM;AAC3B;","names":[]}
|
|
@@ -5,7 +5,7 @@ import {
|
|
|
5
5
|
import {
|
|
6
6
|
GraphDb,
|
|
7
7
|
defaultDbPath
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-2TORJYBO.js";
|
|
9
9
|
import {
|
|
10
10
|
DEFAULT_CONFIG,
|
|
11
11
|
LLM_PRESETS,
|
|
@@ -433,16 +433,19 @@ async function selfTestKuzu(dbPath, embeddingDimension) {
|
|
|
433
433
|
try {
|
|
434
434
|
await db.connect();
|
|
435
435
|
await db.migrate();
|
|
436
|
-
const result = await db.query(
|
|
436
|
+
const result = await db.query(
|
|
437
|
+
`RETURN array_cosine_similarity([1.0, 0.0], [1.0, 0.0]) AS similarity, true AS ok`
|
|
438
|
+
);
|
|
437
439
|
await db.close();
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
status: db.hasVectorIndex() ? "ok" : "warn",
|
|
442
|
-
detail: db.hasVectorIndex() ? "ok (vector index ready)" : "ok (vector extension missing; semantic search disabled)"
|
|
443
|
-
};
|
|
440
|
+
const row = result.data[0];
|
|
441
|
+
if (row?.ok !== true || typeof row.similarity !== "number") {
|
|
442
|
+
return { name: "kuzu round-trip", status: "fail", detail: "unexpected result" };
|
|
444
443
|
}
|
|
445
|
-
return {
|
|
444
|
+
return {
|
|
445
|
+
name: "kuzu round-trip",
|
|
446
|
+
status: "ok",
|
|
447
|
+
detail: "ok (brute-force semantic search ready)"
|
|
448
|
+
};
|
|
446
449
|
} catch (err) {
|
|
447
450
|
await db.close().catch(() => {
|
|
448
451
|
});
|
|
@@ -1327,19 +1330,25 @@ async function indexRepo(opts) {
|
|
|
1327
1330
|
edges: allEdges,
|
|
1328
1331
|
knownFilePaths
|
|
1329
1332
|
});
|
|
1330
|
-
|
|
1331
|
-
await opts.graphDb.upsertNodes(allNodes);
|
|
1332
|
-
await opts.graphDb.upsertEdges(resolved);
|
|
1333
|
-
opts.onProgress?.({ type: "upsert", nodes: allNodes.length, edges: resolved.length });
|
|
1334
|
-
let embeddingCount = 0;
|
|
1333
|
+
let embeddedById = /* @__PURE__ */ new Map();
|
|
1335
1334
|
if (!opts.skipEmbeddings && opts.router) {
|
|
1336
1335
|
const embedded = await embedNodes(allNodes, {
|
|
1337
1336
|
router: opts.router,
|
|
1338
1337
|
onBatch: ({ embedded: embedded2, total }) => opts.onProgress?.({ type: "embed", embedded: embedded2, total })
|
|
1339
1338
|
});
|
|
1340
|
-
|
|
1341
|
-
embeddingCount = embedded.length;
|
|
1339
|
+
embeddedById = new Map(embedded.map((e) => [e.id, e]));
|
|
1342
1340
|
}
|
|
1341
|
+
const nodePayload = allNodes.map((n) => {
|
|
1342
|
+
const e = embeddedById.get(n.id);
|
|
1343
|
+
const base = n;
|
|
1344
|
+
if (!e) return base;
|
|
1345
|
+
return { ...base, embedding: e.embedding, embeddingNamespace: e.embeddingNamespace };
|
|
1346
|
+
});
|
|
1347
|
+
await opts.graphDb.deleteByRepo(opts.repoId);
|
|
1348
|
+
await opts.graphDb.upsertNodes(nodePayload);
|
|
1349
|
+
await opts.graphDb.upsertEdges(resolved);
|
|
1350
|
+
opts.onProgress?.({ type: "upsert", nodes: allNodes.length, edges: resolved.length });
|
|
1351
|
+
const embeddingCount = embeddedById.size;
|
|
1343
1352
|
return {
|
|
1344
1353
|
durationMs: Date.now() - start,
|
|
1345
1354
|
parsedFiles: parsedCount - failed,
|
|
@@ -1350,22 +1359,6 @@ async function indexRepo(opts) {
|
|
|
1350
1359
|
droppedEdges: dropped
|
|
1351
1360
|
};
|
|
1352
1361
|
}
|
|
1353
|
-
async function persistEmbeddings(graphDb, embedded) {
|
|
1354
|
-
if (embedded.length === 0) return;
|
|
1355
|
-
const BATCH = 100;
|
|
1356
|
-
for (let i = 0; i < embedded.length; i += BATCH) {
|
|
1357
|
-
const batch = embedded.slice(i, i + BATCH);
|
|
1358
|
-
await graphDb.query(
|
|
1359
|
-
`
|
|
1360
|
-
UNWIND $batch AS e
|
|
1361
|
-
MATCH (n:Symbol { id: e.id })
|
|
1362
|
-
SET n.embedding = e.embedding,
|
|
1363
|
-
n.embeddingNamespace = e.embeddingNamespace
|
|
1364
|
-
`,
|
|
1365
|
-
{ batch }
|
|
1366
|
-
);
|
|
1367
|
-
}
|
|
1368
|
-
}
|
|
1369
1362
|
async function runWithConcurrency(items, concurrency, fn) {
|
|
1370
1363
|
let cursor = 0;
|
|
1371
1364
|
const runners = Array.from({ length: Math.min(concurrency, items.length) }, async () => {
|
|
@@ -2004,14 +1997,16 @@ var searchSemanticTool = {
|
|
|
2004
1997
|
);
|
|
2005
1998
|
}
|
|
2006
1999
|
const namespace = `${deps.llm.embeddingNamespace.provider}:${deps.llm.embeddingNamespace.model}:${deps.llm.embeddingNamespace.dimension}`;
|
|
2000
|
+
const dim = deps.llm.embeddingNamespace.dimension;
|
|
2007
2001
|
return cachedJsonResult("search_semantic", { description, k, namespace }, deps, async () => {
|
|
2008
2002
|
const rows = await deps.graph.query(
|
|
2009
|
-
`
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
ORDER BY
|
|
2003
|
+
`MATCH (s:Symbol)
|
|
2004
|
+
WHERE s.embeddingNamespace = $ns
|
|
2005
|
+
RETURN s.id AS id, s.name AS name, s.kind AS kind, s.path AS path,
|
|
2006
|
+
s.lineStart AS line, s.signature AS signature,
|
|
2007
|
+
array_cosine_similarity(s.embedding, CAST($vec AS FLOAT[${dim}])) AS score
|
|
2008
|
+
ORDER BY score DESC
|
|
2009
|
+
LIMIT $k`,
|
|
2015
2010
|
{ vec: embedding, k, ns: namespace }
|
|
2016
2011
|
);
|
|
2017
2012
|
return { description, k, namespace, count: rows.length, matches: rows };
|
|
@@ -2299,7 +2294,7 @@ async function startMcpServer(portOrOptions) {
|
|
|
2299
2294
|
async function loadGraphClient(dbPath) {
|
|
2300
2295
|
let mod;
|
|
2301
2296
|
try {
|
|
2302
|
-
mod = await import("./src-
|
|
2297
|
+
mod = await import("./src-PDNTANJD.js");
|
|
2303
2298
|
} catch (err) {
|
|
2304
2299
|
throw new Error(
|
|
2305
2300
|
`Failed to import @codegraph/graph-db. Run \`pnpm -r build\` first. Underlying error: ${err instanceof Error ? err.message : String(err)}`
|
|
@@ -2336,8 +2331,10 @@ async function loadLlmRouter(configPath2) {
|
|
|
2336
2331
|
throw new Error("@codegraph/llm-router has no `createLlmRouter` export.");
|
|
2337
2332
|
}
|
|
2338
2333
|
const fileConfig = await readCodegraphConfig(configPath2);
|
|
2334
|
+
const llmConfig = fileConfig.llm ?? void 0;
|
|
2335
|
+
const effectiveConfig = llmConfig && llmConfig.generation && llmConfig.embeddings ? llmConfig : DEFAULT_CONFIG.llm;
|
|
2339
2336
|
const router = await createLlmRouter2({
|
|
2340
|
-
config:
|
|
2337
|
+
config: effectiveConfig,
|
|
2341
2338
|
configPath: configPath2
|
|
2342
2339
|
});
|
|
2343
2340
|
return adaptLlmRouter(router);
|
|
@@ -2397,7 +2394,7 @@ shutting down (${signal})...
|
|
|
2397
2394
|
// src/program.ts
|
|
2398
2395
|
function buildProgram() {
|
|
2399
2396
|
const program = new Command();
|
|
2400
|
-
program.name("codegraph").description("Live, queryable knowledge graph for your codebase").version("0.1.
|
|
2397
|
+
program.name("codegraph").description("Live, queryable knowledge graph for your codebase").version("0.1.2").option("--verbose", "Print full stack traces on error").hook("preAction", (thisCommand) => {
|
|
2401
2398
|
const opts = thisCommand.optsWithGlobals();
|
|
2402
2399
|
if (opts.verbose) process.env.CODEGRAPH_VERBOSE = "1";
|
|
2403
2400
|
});
|
|
@@ -2443,4 +2440,4 @@ export {
|
|
|
2443
2440
|
renderError,
|
|
2444
2441
|
buildProgram
|
|
2445
2442
|
};
|
|
2446
|
-
//# sourceMappingURL=chunk-
|
|
2443
|
+
//# sourceMappingURL=chunk-36AWRLQ6.js.map
|