@leanlabsinnov/codegraph 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin.js +4 -4
- package/dist/{chunk-GOJIV25M.js → chunk-JJGCCH3V.js} +40 -3
- package/dist/chunk-JJGCCH3V.js.map +1 -0
- package/dist/{chunk-KYPDPBI5.js → chunk-OJ2SQ3YV.js} +3565 -2769
- package/dist/chunk-OJ2SQ3YV.js.map +1 -0
- package/dist/{chunk-Z6DQLXRR.js → chunk-W5VPP2CN.js} +11 -6
- package/dist/chunk-W5VPP2CN.js.map +1 -0
- package/dist/{chunk-C2AULDUQ.js → chunk-WDIPIIA4.js} +41 -3
- package/dist/chunk-WDIPIIA4.js.map +1 -0
- package/dist/index.js +4 -4
- package/dist/{src-IKWDKNPH.js → src-I6DIEVJ5.js} +3 -3
- package/dist/src-UJRJ3HLU.js +10 -0
- package/package.json +1 -1
- package/dist/chunk-C2AULDUQ.js.map +0 -1
- package/dist/chunk-GOJIV25M.js.map +0 -1
- package/dist/chunk-KYPDPBI5.js.map +0 -1
- package/dist/chunk-Z6DQLXRR.js.map +0 -1
- package/dist/src-HB4UDUBX.js +0 -10
- /package/dist/{src-HB4UDUBX.js.map → src-I6DIEVJ5.js.map} +0 -0
- /package/dist/{src-IKWDKNPH.js.map → src-UJRJ3HLU.js.map} +0 -0
|
@@ -47,7 +47,8 @@ var LLM_PRESETS = {
|
|
|
47
47
|
provider: "openai",
|
|
48
48
|
model: "text-embedding-3-small",
|
|
49
49
|
dimension: 1536
|
|
50
|
-
}
|
|
50
|
+
},
|
|
51
|
+
localReasoning: { provider: "ollama", model: "qwen2.5-coder:1.5b" }
|
|
51
52
|
},
|
|
52
53
|
"byo-openai": {
|
|
53
54
|
mode: "byo",
|
|
@@ -56,7 +57,8 @@ var LLM_PRESETS = {
|
|
|
56
57
|
provider: "openai",
|
|
57
58
|
model: "text-embedding-3-small",
|
|
58
59
|
dimension: 1536
|
|
59
|
-
}
|
|
60
|
+
},
|
|
61
|
+
localReasoning: { provider: "ollama", model: "qwen2.5-coder:1.5b" }
|
|
60
62
|
},
|
|
61
63
|
"byo-anthropic": {
|
|
62
64
|
mode: "byo",
|
|
@@ -66,7 +68,8 @@ var LLM_PRESETS = {
|
|
|
66
68
|
provider: "openai",
|
|
67
69
|
model: "text-embedding-3-small",
|
|
68
70
|
dimension: 1536
|
|
69
|
-
}
|
|
71
|
+
},
|
|
72
|
+
localReasoning: { provider: "ollama", model: "qwen2.5-coder:1.5b" }
|
|
70
73
|
},
|
|
71
74
|
"byo-google": {
|
|
72
75
|
mode: "byo",
|
|
@@ -75,7 +78,8 @@ var LLM_PRESETS = {
|
|
|
75
78
|
provider: "google",
|
|
76
79
|
model: "text-embedding-004",
|
|
77
80
|
dimension: 768
|
|
78
|
-
}
|
|
81
|
+
},
|
|
82
|
+
localReasoning: { provider: "ollama", model: "qwen2.5-coder:1.5b" }
|
|
79
83
|
},
|
|
80
84
|
"local-ollama": {
|
|
81
85
|
mode: "local",
|
|
@@ -84,7 +88,8 @@ var LLM_PRESETS = {
|
|
|
84
88
|
provider: "ollama",
|
|
85
89
|
model: "nomic-embed-text",
|
|
86
90
|
dimension: 768
|
|
87
|
-
}
|
|
91
|
+
},
|
|
92
|
+
localReasoning: { provider: "ollama", model: "qwen2.5-coder:1.5b" }
|
|
88
93
|
},
|
|
89
94
|
/**
|
|
90
95
|
* Any OpenAI-compatible local server (llama.cpp, LM Studio, vLLM, LocalAI).
|
|
@@ -124,4 +129,4 @@ export {
|
|
|
124
129
|
DEFAULT_CONFIG,
|
|
125
130
|
LLM_PRESETS
|
|
126
131
|
};
|
|
127
|
-
//# sourceMappingURL=chunk-
|
|
132
|
+
//# sourceMappingURL=chunk-W5VPP2CN.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../shared/src/kinds.ts","../../shared/src/embedding.ts","../../shared/src/config.ts","../../shared/src/ids.ts"],"sourcesContent":["export const NODE_KINDS = [\n \"File\",\n \"Function\",\n \"Class\",\n \"Interface\",\n \"Component\",\n \"Route\",\n \"Variable\",\n] as const;\n\nexport type NodeKind = (typeof NODE_KINDS)[number];\n\nexport const EDGE_KINDS = [\n \"IMPORTS\",\n \"CALLS\",\n \"RENDERS\",\n \"INHERITS\",\n \"DEFINES\",\n \"EXPORTS\",\n] as const;\n\nexport type EdgeKind = (typeof EDGE_KINDS)[number];\n\nexport const SUPPORTED_LANGUAGES = [\"typescript\", \"tsx\", \"javascript\", \"jsx\", \"python\"] as const;\nexport type Language = (typeof SUPPORTED_LANGUAGES)[number];\n","/**\n * Embedding namespace is the tuple that uniquely identifies a vector space.\n * Every stored embedding tags itself with this namespace string so switching\n * providers later triggers a re-embed instead of silent retrieval drift.\n *\n * Format: `${provider}:${model}:${dimension}`\n */\nexport interface EmbeddingNamespace {\n provider: string;\n model: string;\n dimension: number;\n}\n\nexport function namespaceToString(ns: EmbeddingNamespace): string {\n return `${ns.provider}:${ns.model}:${ns.dimension}`;\n}\n\nexport function parseNamespace(s: string): EmbeddingNamespace | null {\n const parts = s.split(\":\");\n if (parts.length !== 3) return null;\n const [provider, model, dimensionStr] = parts;\n if (!provider || !model || !dimensionStr) return null;\n const dimension = Number(dimensionStr);\n if (!Number.isFinite(dimension) || dimension <= 0) return null;\n return { provider, model, dimension };\n}\n","/**\n * Shape of `~/.codegraph/config.json` and the in-memory resolved config.\n *\n * The on-disk file is partial; defaults are applied on load. Keeping the\n * shape co-located with shared types so cli / ingestion / mcp-server all\n * speak the same vocabulary.\n */\n\nexport type LlmMode = \"managed\" | \"byo\" | \"local\";\n\nexport type LlmProvider = \"openai\" | \"anthropic\" | \"google\" | \"ollama\";\n\nexport interface LlmProviderConfig {\n provider: LlmProvider;\n model: string;\n}\n\nexport interface LlmEmbeddingConfig extends LlmProviderConfig {\n dimension: number;\n}\n\nexport interface LlmConfig {\n mode: LlmMode;\n generation: LlmProviderConfig;\n embeddings: LlmEmbeddingConfig;\n /**\n * Optional local reasoning tier. When set, `LlmRouter.localGenerate()` routes to this\n * model instead of the primary `generation` model. Intended for cheap, latency-tolerant\n * tasks like summarisation and classification that run inside tool handlers, so that the\n * expensive frontier model (Cursor/Claude) only sees pre-digested output.\n *\n * Defaults to `{ provider: \"ollama\", model: \"qwen2.5-coder:1.5b\" }` when unset but an\n * Ollama preset is active. Falls back to `generation` when Ollama is unavailable.\n */\n localReasoning?: LlmProviderConfig;\n /** Optional API base URL override (e.g. for self-hosted OpenAI-compatible). */\n baseUrl?: string;\n}\n\nexport interface ServerConfig {\n /** SSE port for the local MCP server. */\n port: number;\n /** Single bearer token used by Claude Code / Cursor to authenticate. */\n bearerToken: string;\n}\n\n/**\n * Where the embedded Kuzu graph and any auxiliary on-disk state live. `dbPath` is\n * optional - when missing the storage layer falls back to `~/.codegraph/graph`.\n */\nexport interface DataConfig {\n dbPath?: string;\n}\n\nexport interface CodegraphConfig {\n llm: LlmConfig;\n server: ServerConfig;\n data: DataConfig;\n}\n\nexport const DEFAULT_CONFIG: CodegraphConfig = {\n llm: {\n mode: \"byo\",\n generation: { provider: \"openai\", model: \"gpt-4o-mini\" },\n embeddings: {\n provider: \"openai\",\n model: \"text-embedding-3-small\",\n dimension: 1536,\n },\n },\n server: {\n port: 3748,\n bearerToken: \"\",\n },\n data: {},\n};\n\nexport const LLM_PRESETS: Record<string, Pick<LlmConfig, \"mode\" | \"generation\" | \"embeddings\" | \"localReasoning\">> = {\n \"managed-stub\": {\n mode: \"managed\",\n generation: { provider: \"openai\", model: \"gpt-4o-mini\" },\n embeddings: {\n provider: \"openai\",\n model: \"text-embedding-3-small\",\n dimension: 1536,\n },\n localReasoning: { provider: \"ollama\", model: \"qwen2.5-coder:1.5b\" },\n },\n \"byo-openai\": {\n mode: \"byo\",\n generation: { provider: \"openai\", model: \"gpt-4o-mini\" },\n embeddings: {\n provider: \"openai\",\n model: \"text-embedding-3-small\",\n dimension: 1536,\n },\n localReasoning: { provider: \"ollama\", model: \"qwen2.5-coder:1.5b\" },\n },\n \"byo-anthropic\": {\n mode: \"byo\",\n generation: { provider: \"anthropic\", model: \"claude-3-5-haiku-latest\" },\n // Anthropic has no embedding API; fall back to OpenAI for embeddings.\n embeddings: {\n provider: \"openai\",\n model: \"text-embedding-3-small\",\n dimension: 1536,\n },\n localReasoning: { provider: \"ollama\", model: \"qwen2.5-coder:1.5b\" },\n },\n \"byo-google\": {\n mode: \"byo\",\n generation: { provider: \"google\", model: \"gemini-1.5-flash-latest\" },\n embeddings: {\n provider: \"google\",\n model: \"text-embedding-004\",\n dimension: 768,\n },\n localReasoning: { provider: \"ollama\", model: \"qwen2.5-coder:1.5b\" },\n },\n \"local-ollama\": {\n mode: \"local\",\n generation: { provider: \"ollama\", model: \"qwen2.5-coder:1.5b\" },\n embeddings: {\n provider: \"ollama\",\n model: \"nomic-embed-text\",\n dimension: 768,\n },\n localReasoning: { provider: \"ollama\", model: \"qwen2.5-coder:1.5b\" },\n },\n /**\n * Any OpenAI-compatible local server (llama.cpp, LM Studio, vLLM, LocalAI).\n * `baseUrl` must be set separately in LlmConfig (e.g. http://localhost:8080/v1).\n * Model names default to common values but should be overridden to match whatever\n * the user has loaded in their server.\n */\n \"local-openai-compatible\": {\n mode: \"local\",\n generation: { provider: \"openai\", model: \"qwen2.5-coder\" },\n embeddings: {\n provider: \"openai\",\n model: \"nomic-embed-text\",\n dimension: 768,\n },\n },\n};\n","import { createHash } from \"node:crypto\";\nimport type { NodeKind } from \"./kinds.js\";\n\n/**\n * Compute a stable graph node ID of the form `${repoId}:${kind}:${sha1(path#name#line)}`.\n *\n * The hash is deterministic across runs as long as `path`, `name`, and `line` are stable,\n * so an `upsertNodes` pass over a repo will overwrite the same nodes rather than producing\n * duplicates.\n */\nexport function makeNodeId(input: {\n repoId: string;\n kind: NodeKind;\n path: string;\n name: string;\n line?: number;\n}): string {\n const { repoId, kind, path, name, line } = input;\n const fingerprint = `${path}#${name}#${line ?? 0}`;\n const hash = createHash(\"sha1\").update(fingerprint).digest(\"hex\");\n return `${repoId}:${kind}:${hash}`;\n}\n\n/** Compute a stable repo ID from an absolute path. */\nexport function makeRepoId(absolutePath: string): string {\n return createHash(\"sha1\").update(absolutePath).digest(\"hex\").slice(0, 12);\n}\n\n/** Convenience wrapper: stable ID for a File node. */\nexport function makeFileId(input: { repoId: string; path: string }): string {\n return makeNodeId({ repoId: input.repoId, kind: \"File\", path: input.path, name: input.path });\n}\n"],"mappings":";AAAO,IAAM,aAAa;AAAA,EACxB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAIO,IAAM,aAAa;AAAA,EACxB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;;;ACNO,SAAS,kBAAkB,IAAgC;AAChE,SAAO,GAAG,GAAG,QAAQ,IAAI,GAAG,KAAK,IAAI,GAAG,SAAS;AACnD;;;AC6CO,IAAM,iBAAkC;AAAA,EAC7C,KAAK;AAAA,IACH,MAAM;AAAA,IACN,YAAY,EAAE,UAAU,UAAU,OAAO,cAAc;AAAA,IACvD,YAAY;AAAA,MACV,UAAU;AAAA,MACV,OAAO;AAAA,MACP,WAAW;AAAA,IACb;AAAA,EACF;AAAA,EACA,QAAQ;AAAA,IACN,MAAM;AAAA,IACN,aAAa;AAAA,EACf;AAAA,EACA,MAAM,CAAC;AACT;AAEO,IAAM,cAAwG;AAAA,EACnH,gBAAgB;AAAA,IACd,MAAM;AAAA,IACN,YAAY,EAAE,UAAU,UAAU,OAAO,cAAc;AAAA,IACvD,YAAY;AAAA,MACV,UAAU;AAAA,MACV,OAAO;AAAA,MACP,WAAW;AAAA,IACb;AAAA,IACA,gBAAgB,EAAE,UAAU,UAAU,OAAO,qBAAqB;AAAA,EACpE;AAAA,EACA,cAAc;AAAA,IACZ,MAAM;AAAA,IACN,YAAY,EAAE,UAAU,UAAU,OAAO,cAAc;AAAA,IACvD,YAAY;AAAA,MACV,UAAU;AAAA,MACV,OAAO;AAAA,MACP,WAAW;AAAA,IACb;AAAA,IACA,gBAAgB,EAAE,UAAU,UAAU,OAAO,qBAAqB;AAAA,EACpE;AAAA,EACA,iBAAiB;AAAA,IACf,MAAM;AAAA,IACN,YAAY,EAAE,UAAU,aAAa,OAAO,0BAA0B;AAAA;AAAA,IAEtE,YAAY;AAAA,MACV,UAAU;AAAA,MACV,OAAO;AAAA,MACP,WAAW;AAAA,IACb;AAAA,IACA,gBAAgB,EAAE,UAAU,UAAU,OAAO,qBAAqB;AAAA,EACpE;AAAA,EACA,cAAc;AAAA,IACZ,MAAM;AAAA,IACN,YAAY,EAAE,UAAU,UAAU,OAAO,0BAA0B;AAAA,IACnE,YAAY;AAAA,MACV,UAAU;AAAA,MACV,OAAO;AAAA,MACP,WAAW;AAAA,IACb;AAAA,IACA,gBAAgB,EAAE,UAAU,UAAU,OAAO,qBAAqB;AAAA,EACpE;AAAA,EACA,gBAAgB;AAAA,IACd,MAAM;AAAA,IACN,YAAY,EAAE,UAAU,UAAU,OAAO,qBAAqB;AAAA,IAC9D,YAAY;AAAA,MACV,UAAU;AAAA,MACV,OAAO;AAAA,MACP,WAAW;AAAA,IACb;AAAA,IACA,gBAAgB,EAAE,UAAU,UAAU,OAAO,qBAAqB;AAAA,EACpE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,2BAA2B;AAAA,IACzB,MAAM;AAAA,IACN,YAAY,EAAE,UAAU,UAAU,OAAO,gBAAgB;AAAA,IACzD,YAAY;AAAA,MACV,UAAU;AAAA,MACV,OAAO;AAAA,MACP,WAAW;AAAA,IACb;AAAA,EACF;AACF;;;AChJA,SAAS,kBAAkB;AAUpB,SAAS,WAAW,OAMhB;AACT,QAAM,EAAE,QAAQ,MAAM,MAAM,MAAM,KAAK,IAAI;AAC3C,QAAM,cAAc,GAAG,IAAI,IAAI,IAAI,IAAI,QAAQ,CAAC;AAChD,QAAM,OAAO,WAAW,MAAM,EAAE,OAAO,WAAW,EAAE,OAAO,KAAK;AAChE,SAAO,GAAG,MAAM,IAAI,IAAI,IAAI,IAAI;AAClC;AAQO,SAAS,WAAW,OAAiD;AAC1E,SAAO,WAAW,EAAE,QAAQ,MAAM,QAAQ,MAAM,QAAQ,MAAM,MAAM,MAAM,MAAM,MAAM,KAAK,CAAC;AAC9F;","names":[]}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
EDGE_KINDS,
|
|
3
3
|
NODE_KINDS
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-W5VPP2CN.js";
|
|
5
5
|
|
|
6
6
|
// ../graph-db/src/client.ts
|
|
7
7
|
import { mkdir } from "fs/promises";
|
|
@@ -33,7 +33,14 @@ var SYMBOL_COLUMN_SPEC = {
|
|
|
33
33
|
routePath: "STRING",
|
|
34
34
|
framework: "STRING",
|
|
35
35
|
// Embedding namespace tag
|
|
36
|
-
embeddingNamespace: "STRING"
|
|
36
|
+
embeddingNamespace: "STRING",
|
|
37
|
+
// Enrichment columns (populated by enricher.ts after edge upsert)
|
|
38
|
+
fanIn: "INT64",
|
|
39
|
+
fanOut: "INT64",
|
|
40
|
+
isTest: "BOOLEAN",
|
|
41
|
+
isGenerated: "BOOLEAN",
|
|
42
|
+
layer: "STRING",
|
|
43
|
+
summary: "STRING"
|
|
37
44
|
};
|
|
38
45
|
var SYMBOL_COLUMNS = Object.keys(SYMBOL_COLUMN_SPEC);
|
|
39
46
|
function defaultFor(column) {
|
|
@@ -64,6 +71,13 @@ function buildSchemaStatements(opts) {
|
|
|
64
71
|
"routePath STRING",
|
|
65
72
|
"framework STRING",
|
|
66
73
|
"embeddingNamespace STRING",
|
|
74
|
+
// Enrichment columns
|
|
75
|
+
"fanIn INT64",
|
|
76
|
+
"fanOut INT64",
|
|
77
|
+
"isTest BOOLEAN",
|
|
78
|
+
"isGenerated BOOLEAN",
|
|
79
|
+
"layer STRING",
|
|
80
|
+
"summary STRING",
|
|
67
81
|
`embedding FLOAT[${opts.embeddingDimension}]`,
|
|
68
82
|
"PRIMARY KEY (id)"
|
|
69
83
|
];
|
|
@@ -247,6 +261,30 @@ var GraphDb = class {
|
|
|
247
261
|
}
|
|
248
262
|
}
|
|
249
263
|
}
|
|
264
|
+
/**
|
|
265
|
+
* Batch-update a fixed set of scalar properties on existing nodes, identified by id.
|
|
266
|
+
* Used by the enricher to write back fanIn/fanOut/isTest/isGenerated/layer/summary after
|
|
267
|
+
* the edge upsert phase completes. Only the keys present in `patches[0]` (beyond `id`)
|
|
268
|
+
* are SET; other columns are untouched.
|
|
269
|
+
*
|
|
270
|
+
* Kuzu's UNWIND+SET is safe here because we are not touching the `embedding` column (the
|
|
271
|
+
* one that triggers kuzudb/kuzu#5965).
|
|
272
|
+
*/
|
|
273
|
+
async patchNodes(patches) {
|
|
274
|
+
if (patches.length === 0) return;
|
|
275
|
+
await this.connect();
|
|
276
|
+
const sample = patches[0];
|
|
277
|
+
if (!sample) return;
|
|
278
|
+
const columns = Object.keys(sample).filter((k) => k !== "id");
|
|
279
|
+
if (columns.length === 0) return;
|
|
280
|
+
const setClause = columns.map((c) => `n.${c} = r.${c}`).join(", ");
|
|
281
|
+
const cypher = `UNWIND $batch AS r MATCH (n:Symbol {id: r.id}) SET ${setClause}`;
|
|
282
|
+
const BATCH = 500;
|
|
283
|
+
for (let i = 0; i < patches.length; i += BATCH) {
|
|
284
|
+
const slice = patches.slice(i, i + BATCH);
|
|
285
|
+
await this.exec(cypher, { batch: slice });
|
|
286
|
+
}
|
|
287
|
+
}
|
|
250
288
|
/**
|
|
251
289
|
* Deletes all nodes (and incident edges via DETACH DELETE) for a repo. If `paths` is
|
|
252
290
|
* provided, restricts the delete to nodes whose `path` is in the list - used by
|
|
@@ -387,4 +425,4 @@ export {
|
|
|
387
425
|
defaultDbPath,
|
|
388
426
|
GraphDb
|
|
389
427
|
};
|
|
390
|
-
//# sourceMappingURL=chunk-
|
|
428
|
+
//# sourceMappingURL=chunk-WDIPIIA4.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../graph-db/src/client.ts","../../graph-db/src/schema.ts"],"sourcesContent":["import { mkdir } from \"node:fs/promises\";\nimport { homedir } from \"node:os\";\nimport { dirname, resolve } from \"node:path\";\nimport { EDGE_KINDS, NODE_KINDS, type EdgeKind } from \"@codegraph/shared\";\nimport * as kuzu from \"kuzu\";\nimport {\n DEFAULT_EMBEDDING_DIMENSION,\n SYMBOL_COLUMNS,\n buildSchemaStatements,\n defaultFor,\n} from \"./schema.js\";\nimport type { QueryResult, UpsertEdgeInput, UpsertNodeInput } from \"./types.js\";\n\nexport interface GraphDbOptions {\n /** Directory where Kuzu stores its on-disk database files. Defaults to `~/.codegraph/graph`. */\n dbPath?: string;\n /**\n * Legacy `url` option kept for back-compat with Phase-1 callers. Ignored at runtime -\n * Kuzu is embedded - but accepted so existing call sites compile while the rest of the\n * codebase migrates to `dbPath`.\n */\n url?: string;\n /** Vector dimension for the `Symbol.embedding` column. Baked into the schema at create time. */\n embeddingDimension?: number;\n}\n\n/** Default on-disk location for the embedded graph. */\nexport function defaultDbPath(): string {\n return resolve(homedir(), \".codegraph\", \"graph\");\n}\n\n/**\n * Thin, typed wrapper around the embedded Kuzu database.\n *\n * Public surface (intentionally identical to the Phase-1 FalkorDB client so callers don't\n * change): connect / close / migrate / query / upsertNodes / upsertEdges / deleteByRepo /\n * stats. Internals are pure Kuzu.\n */\nexport class GraphDb {\n private readonly dbPath: string;\n private readonly embeddingDimension: number;\n private db: kuzu.Database | null = null;\n private conn: kuzu.Connection | null = null;\n /**\n * Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a\n * prepared statement for any parameterized query - reusing the prepared object keeps\n * UNWIND-batched upserts fast.\n */\n private preparedCache = new Map<string, kuzu.PreparedStatement>();\n\n constructor(opts: GraphDbOptions = {}) {\n this.dbPath = opts.dbPath ?? defaultDbPath();\n this.embeddingDimension = opts.embeddingDimension ?? DEFAULT_EMBEDDING_DIMENSION;\n }\n\n async connect(): Promise<void> {\n if (this.conn) return;\n await mkdir(dirname(this.dbPath), { recursive: true });\n this.db = new kuzu.Database(this.dbPath);\n this.conn = new kuzu.Connection(this.db);\n }\n\n async close(): Promise<void> {\n // We deliberately do NOT call `conn.close()` / `db.close()` on Kuzu's Node bindings.\n // In 0.11.x those native handles are also disposed by the binding's process-exit hook,\n // and double-disposing can SIGSEGV the worker on cleanup. Dropping references is\n // enough for the GC to release the underlying memory before the process exits.\n this.preparedCache.clear();\n this.conn = null;\n this.db = null;\n }\n\n private requireConn(): kuzu.Connection {\n if (!this.conn) {\n throw new Error(\"GraphDb not connected. Call connect() first.\");\n }\n return this.conn;\n }\n\n /**\n * Idempotent migration: creates the `Symbol` node table and one REL table per\n * `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately\n * skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via\n * `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.\n * See `schema.ts` for the full rationale.\n */\n async migrate(): Promise<void> {\n await this.connect();\n const schemaStmts = buildSchemaStatements({ embeddingDimension: this.embeddingDimension });\n for (const stmt of schemaStmts) {\n await this.exec(stmt);\n }\n }\n\n /**\n * Typed Cypher escape hatch.\n *\n * Kuzu returns BIGINT columns as native BigInt; we coerce to plain `number` when safe so\n * downstream JSON serialization (MCP responses, snapshot tests) does not need bespoke\n * handling.\n */\n async query<T = Record<string, unknown>>(\n cypher: string,\n params: Record<string, unknown> = {},\n ): Promise<QueryResult<T>> {\n const result = await this.runQuery(cypher, params);\n const raw = await collectAll(result);\n const data = raw.map((row) => normalizeRow(row)) as T[];\n const headers = raw.length > 0 ? Object.keys(raw[0] ?? {}) : [];\n return { data, headers, metadata: [] };\n }\n\n /** Fire-and-forget DDL/exec. */\n private async exec(cypher: string, params: Record<string, unknown> = {}): Promise<void> {\n await this.runQuery(cypher, params);\n }\n\n /**\n * Bridge to Kuzu's two execution paths:\n * - `conn.query(stmt)` for unparameterized statements (the second positional arg is a\n * `progressCallback`, NOT params - mistaking that is the #1 way to confuse the API).\n * - `conn.prepare(stmt) + conn.execute(prepared, params)` for anything with `$name`\n * placeholders. We cache the prepared statement so UNWIND batches reuse it.\n *\n * Kuzu's `execute` rejects bind maps that contain keys the prepared statement does not\n * reference (`Parameter <name> not found`). Tool authors often pass a uniform params bag\n * and let the Cypher branch internally on which placeholders to use, so we filter the\n * bind map down to the keys actually referenced by `$name` before handing it off.\n */\n private async runQuery(cypher: string, params: Record<string, unknown>): Promise<unknown> {\n const conn = this.requireConn();\n const referenced = extractParamNames(cypher);\n if (referenced.size === 0) {\n return conn.query(cypher);\n }\n const bound: Record<string, unknown> = {};\n for (const key of referenced) {\n if (key in params) bound[key] = params[key];\n }\n let prepared = this.preparedCache.get(cypher);\n if (!prepared) {\n prepared = await conn.prepare(cypher);\n if (!prepared.isSuccess()) {\n throw new Error(prepared.getErrorMessage());\n }\n this.preparedCache.set(cypher, prepared);\n }\n // Cast through `unknown`: Kuzu's bindings advertise a strict `KuzuValue` union, but\n // we can pass through any JSON-serializable value the embedded engine accepts (nested\n // structs and lists are converted at the native layer).\n return conn.execute(prepared, bound as unknown as Parameters<kuzu.Connection[\"execute\"]>[1]);\n }\n\n /**\n * Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including\n * `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`\n * because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped\n * (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.\n *\n * In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE\n * from violating the primary-key uniqueness constraint.\n */\n async upsertNodes(nodes: UpsertNodeInput[]): Promise<void> {\n if (nodes.length === 0) return;\n await this.connect();\n const deduped = dedupeById(nodes);\n const withEmbedding: UpsertNodeInput[] = [];\n const withoutEmbedding: UpsertNodeInput[] = [];\n for (const n of deduped) {\n if (Array.isArray(n.embedding) && n.embedding.length > 0) {\n withEmbedding.push(n);\n } else {\n withoutEmbedding.push(n);\n }\n }\n await this.createSymbolBatch(withoutEmbedding, false);\n await this.createSymbolBatch(withEmbedding, true);\n }\n\n private async createSymbolBatch(\n nodes: UpsertNodeInput[],\n withEmbedding: boolean,\n ): Promise<void> {\n if (nodes.length === 0) return;\n const BATCH = 200;\n const columns = [...SYMBOL_COLUMNS, ...(withEmbedding ? ([\"embedding\"] as const) : [])];\n const propMap = columns.map((c) => `${c}: r.${c}`).join(\", \");\n const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;\n for (let i = 0; i < nodes.length; i += BATCH) {\n const slice = nodes.slice(i, i + BATCH);\n const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));\n await this.exec(cypher, { batch: payload });\n }\n }\n\n /**\n * Upserts edges. Both endpoints must already exist as `Symbol` nodes; rows where the\n * MATCH fails are silently dropped, matching Cypher semantics.\n *\n * Uses CREATE because the orchestrator wipes the repo's slice before writing, so\n * duplicates can't pre-exist within a single index pass.\n */\n async upsertEdges(edges: UpsertEdgeInput[]): Promise<void> {\n if (edges.length === 0) return;\n await this.connect();\n const byKind = new Map<EdgeKind, UpsertEdgeInput[]>();\n for (const e of edges) {\n const bucket = byKind.get(e.kind);\n if (bucket) bucket.push(e);\n else byKind.set(e.kind, [e]);\n }\n const BATCH = 500;\n for (const [kind, batch] of byKind) {\n const cypher = `UNWIND $batch AS r MATCH (a:Symbol {id: r.fromId}) MATCH (b:Symbol {id: r.toId}) CREATE (a)-[e:${kind} {line: r.line}]->(b)`;\n for (let i = 0; i < batch.length; i += BATCH) {\n const slice = batch.slice(i, i + BATCH);\n // Use 0 (not null) for missing line numbers so Kuzu can infer the struct field\n // as INT64 even when an entire batch happens to have no `line` set.\n const payload = slice.map((e) => ({\n fromId: e.fromId,\n toId: e.toId,\n line: typeof e.line === \"number\" ? e.line : 0,\n }));\n await this.exec(cypher, { batch: payload });\n }\n }\n }\n\n /**\n * Batch-update a fixed set of scalar properties on existing nodes, identified by id.\n * Used by the enricher to write back fanIn/fanOut/isTest/isGenerated/layer/summary after\n * the edge upsert phase completes. Only the keys present in `patches[0]` (beyond `id`)\n * are SET; other columns are untouched.\n *\n * Kuzu's UNWIND+SET is safe here because we are not touching the `embedding` column (the\n * one that triggers kuzudb/kuzu#5965).\n */\n async patchNodes(patches: Array<{ id: string } & Record<string, unknown>>): Promise<void> {\n if (patches.length === 0) return;\n await this.connect();\n const sample = patches[0];\n if (!sample) return;\n const columns = Object.keys(sample).filter((k) => k !== \"id\");\n if (columns.length === 0) return;\n const setClause = columns.map((c) => `n.${c} = r.${c}`).join(\", \");\n const cypher = `UNWIND $batch AS r MATCH (n:Symbol {id: r.id}) SET ${setClause}`;\n const BATCH = 500;\n for (let i = 0; i < patches.length; i += BATCH) {\n const slice = patches.slice(i, i + BATCH);\n await this.exec(cypher, { batch: slice });\n }\n }\n\n /**\n * Deletes all nodes (and incident edges via DETACH DELETE) for a repo. If `paths` is\n * provided, restricts the delete to nodes whose `path` is in the list - used by\n * incremental re-indexing.\n */\n async deleteByRepo(repoId: string, paths?: string[]): Promise<void> {\n await this.connect();\n if (paths && paths.length > 0) {\n await this.exec(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.path IN $paths DETACH DELETE n\",\n { repoId, paths },\n );\n return;\n }\n await this.exec(\"MATCH (n:Symbol) WHERE n.repoId = $repoId DETACH DELETE n\", { repoId });\n }\n\n /**\n * Returns counts of nodes (per kind) and edges (per kind) for a repo, plus the share of\n * non-File nodes that carry an embedding.\n */\n async stats(repoId: string): Promise<{\n nodes: Record<string, number>;\n edges: Record<string, number>;\n embeddingCoverage: number;\n }> {\n await this.connect();\n const nodes: Record<string, number> = {};\n for (const kind of NODE_KINDS) {\n const r = await this.query<{ count: number }>(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.kind = $kind RETURN count(n) AS count\",\n { repoId, kind },\n );\n nodes[kind] = Number(r.data[0]?.count ?? 0);\n }\n const edges: Record<string, number> = {};\n for (const kind of EDGE_KINDS) {\n const r = await this.query<{ count: number }>(\n `MATCH (a:Symbol)-[r:${kind}]->(b:Symbol)\n WHERE a.repoId = $repoId AND b.repoId = $repoId\n RETURN count(r) AS count`,\n { repoId },\n );\n edges[kind] = Number(r.data[0]?.count ?? 0);\n }\n const cov = await this.query<{ total: number | bigint; embedded: number | bigint }>(\n `MATCH (n:Symbol)\n WHERE n.repoId = $repoId AND n.kind <> 'File'\n RETURN count(n) AS total,\n count(n.embedding) AS embedded`,\n { repoId },\n );\n const row = cov.data[0];\n const total = Number(row?.total ?? 0);\n const embedded = Number(row?.embedded ?? 0);\n const coverage = total === 0 ? 0 : embedded / total;\n return { nodes, edges, embeddingCoverage: coverage };\n }\n\n /**\n * v0.1.x never creates an HNSW index - semantic search is brute-force via\n * `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers\n * (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu\n * fixes ship and we flip the index back on.\n */\n hasVectorIndex(): boolean {\n return false;\n }\n}\n\n/**\n * Build a fully-populated row for a Kuzu UNWIND batch. Every column in `SYMBOL_COLUMNS`\n * is present (typed default when missing) so Kuzu can infer a homogeneous struct schema\n * for the batch parameter. When `withEmbedding` is true the `embedding` field is also\n * populated - we either use the provided vector or a zero-vector of the configured\n * dimension so the struct schema stays uniform across the batch.\n */\nfunction buildSymbolRow(\n node: UpsertNodeInput,\n withEmbedding: boolean,\n embeddingDimension: number,\n): Record<string, unknown> {\n const src = node as unknown as Record<string, unknown>;\n const row: Record<string, unknown> = {};\n for (const col of SYMBOL_COLUMNS) {\n const value = src[col];\n row[col] = value === undefined || value === null ? defaultFor(col) : value;\n }\n row.id = node.id;\n row.kind = node.kind;\n if (withEmbedding) {\n const vec = node.embedding;\n row.embedding =\n Array.isArray(vec) && vec.length === embeddingDimension\n ? vec\n : new Array<number>(embeddingDimension).fill(0);\n }\n return row;\n}\n\n/**\n * Pull every `$name` placeholder out of a Cypher string. Skips occurrences inside\n * single/double-quoted string literals so a literal like `\"$10\"` is not mistaken for a\n * parameter. Returns a `Set` so callers can membership-test cheaply.\n */\nconst PARAM_PATTERN = /(?:\"(?:[^\"\\\\]|\\\\.)*\"|'(?:[^'\\\\]|\\\\.)*'|\\$([A-Za-z_][A-Za-z0-9_]*))/g;\nfunction extractParamNames(cypher: string): Set<string> {\n const out = new Set<string>();\n for (const match of cypher.matchAll(PARAM_PATTERN)) {\n if (match[1]) out.add(match[1]);\n }\n return out;\n}\n\n/**\n * Coalesce same-id rows down to the last occurrence. Required because we now use bare\n * CREATE (not MERGE) for inserts and Kuzu rejects primary-key collisions inside a single\n * UNWIND batch.\n */\nfunction dedupeById(nodes: UpsertNodeInput[]): UpsertNodeInput[] {\n const seen = new Map<string, UpsertNodeInput>();\n for (const n of nodes) {\n seen.set(n.id, n);\n }\n return Array.from(seen.values());\n}\n\n/** Convert Kuzu's row representation (Map or plain object) into a plain JSON object. */\nfunction normalizeRow(row: unknown): Record<string, unknown> {\n if (row instanceof Map) {\n const out: Record<string, unknown> = {};\n for (const [k, v] of row) {\n out[String(k)] = coerceValue(v);\n }\n return out;\n }\n if (row && typeof row === \"object\") {\n const src = row as Record<string, unknown>;\n const out: Record<string, unknown> = {};\n for (const k of Object.keys(src)) {\n out[k] = coerceValue(src[k]);\n }\n return out;\n }\n return { value: coerceValue(row) };\n}\n\n/**\n * Kuzu returns BIGINT columns as JS BigInt. Coerce to `number` when within Number.MAX_SAFE\n * for JSON-friendly downstream consumption.\n */\nfunction coerceValue(value: unknown): unknown {\n if (typeof value === \"bigint\") {\n if (value <= BigInt(Number.MAX_SAFE_INTEGER) && value >= BigInt(Number.MIN_SAFE_INTEGER)) {\n return Number(value);\n }\n return value.toString();\n }\n if (Array.isArray(value)) return value.map(coerceValue);\n return value;\n}\n\n/** Drain a Kuzu QueryResult (or array of them) into an array of row objects. */\nasync function collectAll(result: unknown): Promise<unknown[]> {\n // Multi-statement queries return an array; we keep only the last one (matches how the\n // final statement is the one that carries a `RETURN`).\n const target = Array.isArray(result) ? result[result.length - 1] : result;\n if (!target) return [];\n const getAll = (target as { getAll?: () => Promise<unknown[]> }).getAll;\n if (typeof getAll !== \"function\") return [];\n return getAll.call(target);\n}\n\n","import { EDGE_KINDS } from \"@codegraph/shared\";\n\n/**\n * Kuzu is schema-first. Unlike FalkorDB which is schema-less, every column we ever want to\n * SET on a node must exist up-front. We use ONE `Symbol` node table with a `kind` column\n * (Kuzu does not support multi-labels), and one REL table per `EdgeKind`.\n *\n * Columns are the union of every field across the `GraphNode` discriminated union in\n * `@codegraph/shared` plus the two embedding-namespace fields. Fields that are not\n * relevant to a given kind stay NULL.\n */\n\n/**\n * Per-column metadata so the upserter can build batches with explicit typed defaults.\n *\n * Kuzu's struct parameter type inference fails when a column is null on every row in a\n * batch (it defaults to STRING and rejects assignment to a BOOL/INT64 column). Concrete\n * defaults keep inference deterministic and let us skip clunky CAST() clauses.\n *\n * Convention: optional booleans default to `false`, optional ints to `0`, optional strings\n * to `\"\"`. We never check `WHERE n.foo IS NULL` in queries, so the lost null-distinction\n * is acceptable for v0.1.0.\n */\nexport const SYMBOL_COLUMN_SPEC = {\n id: \"STRING\",\n kind: \"STRING\",\n repoId: \"STRING\",\n name: \"STRING\",\n path: \"STRING\",\n lineStart: \"INT64\",\n lineEnd: \"INT64\",\n signature: \"STRING\",\n leadingComment: \"STRING\",\n isExported: \"BOOLEAN\",\n // File-specific\n language: \"STRING\",\n sizeBytes: \"INT64\",\n contentHash: \"STRING\",\n // Function-specific\n isAsync: \"BOOLEAN\",\n isArrow: \"BOOLEAN\",\n // Route-specific\n method: \"STRING\",\n routePath: \"STRING\",\n framework: \"STRING\",\n // Embedding namespace tag\n embeddingNamespace: \"STRING\",\n // Enrichment columns (populated by enricher.ts after edge upsert)\n fanIn: \"INT64\",\n fanOut: \"INT64\",\n isTest: \"BOOLEAN\",\n isGenerated: \"BOOLEAN\",\n layer: \"STRING\",\n summary: \"STRING\",\n} as const;\n\nexport type SymbolColumn = keyof typeof SYMBOL_COLUMN_SPEC;\n\nexport const SYMBOL_COLUMNS = Object.keys(SYMBOL_COLUMN_SPEC) as SymbolColumn[];\n\n/** Return the typed default for an unset optional column. */\nexport function defaultFor(column: SymbolColumn): unknown {\n const t = SYMBOL_COLUMN_SPEC[column];\n if (t === \"BOOLEAN\") return false;\n if (t === \"INT64\") return 0;\n return \"\";\n}\n\n/** Optional per-edge metadata. Currently only `line`. */\nexport const EDGE_COLUMNS = [\"line\"] as const;\n\nexport type EdgeColumn = (typeof EDGE_COLUMNS)[number];\n\n/**\n * DDL statements that bring an empty Kuzu database to the codegraph schema.\n * `IF NOT EXISTS` makes `migrate()` idempotent so it can run on every connect.\n *\n * `embedding` is a fixed-dimension column - dimension is configured at migrate time and\n * baked into the schema. If a user later switches to an embedding provider with a\n * different dimension they must delete the on-disk graph directory to recreate it. The\n * embedding-namespace tag ensures we never silently mix dimensions.\n */\nexport function buildSchemaStatements(opts: { embeddingDimension: number }): string[] {\n const columnDefs = [\n \"id STRING\",\n \"kind STRING\",\n \"repoId STRING\",\n \"name STRING\",\n \"path STRING\",\n \"lineStart INT64\",\n \"lineEnd INT64\",\n \"signature STRING\",\n \"leadingComment STRING\",\n \"isExported BOOLEAN\",\n \"language STRING\",\n \"sizeBytes INT64\",\n \"contentHash STRING\",\n \"isAsync BOOLEAN\",\n \"isArrow BOOLEAN\",\n \"method STRING\",\n \"routePath STRING\",\n \"framework STRING\",\n \"embeddingNamespace STRING\",\n // Enrichment columns\n \"fanIn INT64\",\n \"fanOut INT64\",\n \"isTest BOOLEAN\",\n \"isGenerated BOOLEAN\",\n \"layer STRING\",\n \"summary STRING\",\n `embedding FLOAT[${opts.embeddingDimension}]`,\n \"PRIMARY KEY (id)\",\n ];\n const statements: string[] = [\n `CREATE NODE TABLE IF NOT EXISTS Symbol(${columnDefs.join(\", \")})`,\n ];\n for (const kind of EDGE_KINDS) {\n statements.push(\n `CREATE REL TABLE IF NOT EXISTS ${kind}(FROM Symbol TO Symbol, line INT64)`,\n );\n }\n return statements;\n}\n\n/**\n * Semantic search in v0.1.x is intentionally brute-force via Kuzu's built-in\n * `array_cosine_similarity` function - we do NOT create an HNSW vector index.\n *\n * Why: Kuzu 0.11.x has two open issues that make the HNSW path unusable for a\n * mutable graph workload:\n * - kuzudb/kuzu#5965: SET on a vector-indexed column is rejected with\n * \"Cannot set property vec in table embeddings because it is used in one or more\n * indexes\". The Kuzu team's own recommended workaround in that thread is\n * \"delay creation of the index itself\".\n * - kuzudb/kuzu#6040: DROP_VECTOR_INDEX leaves stale on-disk metadata, so once a\n * column has ever been indexed it becomes permanently un-writable - even fresh\n * CREATEs fail with \"Catalog exception: _N_<index>_UPPER does not exist\".\n *\n * `array_cosine_similarity` is a core Kuzu function (not part of the vector extension)\n * and runs in microseconds for the corpus sizes Phase 1 targets. We will switch back\n * to `CALL CREATE_VECTOR_INDEX` / `QUERY_VECTOR_INDEX` once the upstream fixes ship.\n */\nexport const SEMANTIC_SEARCH_MODE = \"brute-force\" as const;\n\n/** Default embedding dimension when none is supplied. Matches `text-embedding-3-small`. */\nexport const DEFAULT_EMBEDDING_DIMENSION = 1536;\n"],"mappings":";;;;;;AAAA,SAAS,aAAa;AACtB,SAAS,eAAe;AACxB,SAAS,SAAS,eAAe;AAEjC,YAAY,UAAU;;;ACmBf,IAAM,qBAAqB;AAAA,EAChC,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,MAAM;AAAA,EACN,WAAW;AAAA,EACX,SAAS;AAAA,EACT,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,YAAY;AAAA;AAAA,EAEZ,UAAU;AAAA,EACV,WAAW;AAAA,EACX,aAAa;AAAA;AAAA,EAEb,SAAS;AAAA,EACT,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,WAAW;AAAA;AAAA,EAEX,oBAAoB;AAAA;AAAA,EAEpB,OAAO;AAAA,EACP,QAAQ;AAAA,EACR,QAAQ;AAAA,EACR,aAAa;AAAA,EACb,OAAO;AAAA,EACP,SAAS;AACX;AAIO,IAAM,iBAAiB,OAAO,KAAK,kBAAkB;AAGrD,SAAS,WAAW,QAA+B;AACxD,QAAM,IAAI,mBAAmB,MAAM;AACnC,MAAI,MAAM,UAAW,QAAO;AAC5B,MAAI,MAAM,QAAS,QAAO;AAC1B,SAAO;AACT;AAGO,IAAM,eAAe,CAAC,MAAM;AAa5B,SAAS,sBAAsB,MAAgD;AACpF,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA;AAAA,IAEA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,mBAAmB,KAAK,kBAAkB;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,aAAuB;AAAA,IAC3B,0CAA0C,WAAW,KAAK,IAAI,CAAC;AAAA,EACjE;AACA,aAAW,QAAQ,YAAY;AAC7B,eAAW;AAAA,MACT,kCAAkC,IAAI;AAAA,IACxC;AAAA,EACF;AACA,SAAO;AACT;AAoBO,IAAM,uBAAuB;AAG7B,IAAM,8BAA8B;;;ADtHpC,SAAS,gBAAwB;AACtC,SAAO,QAAQ,QAAQ,GAAG,cAAc,OAAO;AACjD;AASO,IAAM,UAAN,MAAc;AAAA,EACF;AAAA,EACA;AAAA,EACT,KAA2B;AAAA,EAC3B,OAA+B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAM/B,gBAAgB,oBAAI,IAAoC;AAAA,EAEhE,YAAY,OAAuB,CAAC,GAAG;AACrC,SAAK,SAAS,KAAK,UAAU,cAAc;AAC3C,SAAK,qBAAqB,KAAK,sBAAsB;AAAA,EACvD;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,KAAM;AACf,UAAM,MAAM,QAAQ,KAAK,MAAM,GAAG,EAAE,WAAW,KAAK,CAAC;AACrD,SAAK,KAAK,IAAS,cAAS,KAAK,MAAM;AACvC,SAAK,OAAO,IAAS,gBAAW,KAAK,EAAE;AAAA,EACzC;AAAA,EAEA,MAAM,QAAuB;AAK3B,SAAK,cAAc,MAAM;AACzB,SAAK,OAAO;AACZ,SAAK,KAAK;AAAA,EACZ;AAAA,EAEQ,cAA+B;AACrC,QAAI,CAAC,KAAK,MAAM;AACd,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,UAAyB;AAC7B,UAAM,KAAK,QAAQ;AACnB,UAAM,cAAc,sBAAsB,EAAE,oBAAoB,KAAK,mBAAmB,CAAC;AACzF,eAAW,QAAQ,aAAa;AAC9B,YAAM,KAAK,KAAK,IAAI;AAAA,IACtB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MACJ,QACA,SAAkC,CAAC,GACV;AACzB,UAAM,SAAS,MAAM,KAAK,SAAS,QAAQ,MAAM;AACjD,UAAM,MAAM,MAAM,WAAW,MAAM;AACnC,UAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,aAAa,GAAG,CAAC;AAC/C,UAAM,UAAU,IAAI,SAAS,IAAI,OAAO,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;AAC9D,WAAO,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE;AAAA,EACvC;AAAA;AAAA,EAGA,MAAc,KAAK,QAAgB,SAAkC,CAAC,GAAkB;AACtF,UAAM,KAAK,SAAS,QAAQ,MAAM;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAcA,MAAc,SAAS,QAAgB,QAAmD;AACxF,UAAM,OAAO,KAAK,YAAY;AAC9B,UAAM,aAAa,kBAAkB,MAAM;AAC3C,QAAI,WAAW,SAAS,GAAG;AACzB,aAAO,KAAK,MAAM,MAAM;AAAA,IAC1B;AACA,UAAM,QAAiC,CAAC;AACxC,eAAW,OAAO,YAAY;AAC5B,UAAI,OAAO,OAAQ,OAAM,GAAG,IAAI,OAAO,GAAG;AAAA,IAC5C;AACA,QAAI,WAAW,KAAK,cAAc,IAAI,MAAM;AAC5C,QAAI,CAAC,UAAU;AACb,iBAAW,MAAM,KAAK,QAAQ,MAAM;AACpC,UAAI,CAAC,SAAS,UAAU,GAAG;AACzB,cAAM,IAAI,MAAM,SAAS,gBAAgB,CAAC;AAAA,MAC5C;AACA,WAAK,cAAc,IAAI,QAAQ,QAAQ;AAAA,IACzC;AAIA,WAAO,KAAK,QAAQ,UAAU,KAA6D;AAAA,EAC7F;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,UAAU,WAAW,KAAK;AAChC,UAAM,gBAAmC,CAAC;AAC1C,UAAM,mBAAsC,CAAC;AAC7C,eAAW,KAAK,SAAS;AACvB,UAAI,MAAM,QAAQ,EAAE,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACxD,sBAAc,KAAK,CAAC;AAAA,MACtB,OAAO;AACL,yBAAiB,KAAK,CAAC;AAAA,MACzB;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB,kBAAkB,KAAK;AACpD,UAAM,KAAK,kBAAkB,eAAe,IAAI;AAAA,EAClD;AAAA,EAEA,MAAc,kBACZ,OACA,eACe;AACf,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,QAAQ;AACd,UAAM,UAAU,CAAC,GAAG,gBAAgB,GAAI,gBAAiB,CAAC,WAAW,IAAc,CAAC,CAAE;AACtF,UAAM,UAAU,QAAQ,IAAI,CAAC,MAAM,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,IAAI;AAC5D,UAAM,SAAS,wCAAwC,OAAO;AAC9D,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,YAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AACtC,YAAM,UAAU,MAAM,IAAI,CAAC,MAAM,eAAe,GAAG,eAAe,KAAK,kBAAkB,CAAC;AAC1F,YAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,SAAS,oBAAI,IAAiC;AACpD,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,OAAO,IAAI,EAAE,IAAI;AAChC,UAAI,OAAQ,QAAO,KAAK,CAAC;AAAA,UACpB,QAAO,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;AAAA,IAC7B;AACA,UAAM,QAAQ;AACd,eAAW,CAAC,MAAM,KAAK,KAAK,QAAQ;AAClC,YAAM,SAAS,kGAAkG,IAAI;AACrH,eAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,cAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AAGtC,cAAM,UAAU,MAAM,IAAI,CAAC,OAAO;AAAA,UAChC,QAAQ,EAAE;AAAA,UACV,MAAM,EAAE;AAAA,UACR,MAAM,OAAO,EAAE,SAAS,WAAW,EAAE,OAAO;AAAA,QAC9C,EAAE;AACF,cAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,MAC5C;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,WAAW,SAAyE;AACxF,QAAI,QAAQ,WAAW,EAAG;AAC1B,UAAM,KAAK,QAAQ;AACnB,UAAM,SAAS,QAAQ,CAAC;AACxB,QAAI,CAAC,OAAQ;AACb,UAAM,UAAU,OAAO,KAAK,MAAM,EAAE,OAAO,CAAC,MAAM,MAAM,IAAI;AAC5D,QAAI,QAAQ,WAAW,EAAG;AAC1B,UAAM,YAAY,QAAQ,IAAI,CAAC,MAAM,KAAK,CAAC,QAAQ,CAAC,EAAE,EAAE,KAAK,IAAI;AACjE,UAAM,SAAS,sDAAsD,SAAS;AAC9E,UAAM,QAAQ;AACd,aAAS,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK,OAAO;AAC9C,YAAM,QAAQ,QAAQ,MAAM,GAAG,IAAI,KAAK;AACxC,YAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,MAAM,CAAC;AAAA,IAC1C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,aAAa,QAAgB,OAAiC;AAClE,UAAM,KAAK,QAAQ;AACnB,QAAI,SAAS,MAAM,SAAS,GAAG;AAC7B,YAAM,KAAK;AAAA,QACT;AAAA,QACA,EAAE,QAAQ,MAAM;AAAA,MAClB;AACA;AAAA,IACF;AACA,UAAM,KAAK,KAAK,6DAA6D,EAAE,OAAO,CAAC;AAAA,EACzF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,QAIT;AACD,UAAM,KAAK,QAAQ;AACnB,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB;AAAA,QACA,EAAE,QAAQ,KAAK;AAAA,MACjB;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB,uBAAuB,IAAI;AAAA;AAAA;AAAA,QAG3B,EAAE,OAAO;AAAA,MACX;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,MAAM,MAAM,KAAK;AAAA,MACrB;AAAA;AAAA;AAAA;AAAA,MAIA,EAAE,OAAO;AAAA,IACX;AACA,UAAM,MAAM,IAAI,KAAK,CAAC;AACtB,UAAM,QAAQ,OAAO,KAAK,SAAS,CAAC;AACpC,UAAM,WAAW,OAAO,KAAK,YAAY,CAAC;AAC1C,UAAM,WAAW,UAAU,IAAI,IAAI,WAAW;AAC9C,WAAO,EAAE,OAAO,OAAO,mBAAmB,SAAS;AAAA,EACrD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,iBAA0B;AACxB,WAAO;AAAA,EACT;AACF;AASA,SAAS,eACP,MACA,eACA,oBACyB;AACzB,QAAM,MAAM;AACZ,QAAM,MAA+B,CAAC;AACtC,aAAW,OAAO,gBAAgB;AAChC,UAAM,QAAQ,IAAI,GAAG;AACrB,QAAI,GAAG,IAAI,UAAU,UAAa,UAAU,OAAO,WAAW,GAAG,IAAI;AAAA,EACvE;AACA,MAAI,KAAK,KAAK;AACd,MAAI,OAAO,KAAK;AAChB,MAAI,eAAe;AACjB,UAAM,MAAM,KAAK;AACjB,QAAI,YACF,MAAM,QAAQ,GAAG,KAAK,IAAI,WAAW,qBACjC,MACA,IAAI,MAAc,kBAAkB,EAAE,KAAK,CAAC;AAAA,EACpD;AACA,SAAO;AACT;AAOA,IAAM,gBAAgB;AACtB,SAAS,kBAAkB,QAA6B;AACtD,QAAM,MAAM,oBAAI,IAAY;AAC5B,aAAW,SAAS,OAAO,SAAS,aAAa,GAAG;AAClD,QAAI,MAAM,CAAC,EAAG,KAAI,IAAI,MAAM,CAAC,CAAC;AAAA,EAChC;AACA,SAAO;AACT;AAOA,SAAS,WAAW,OAA6C;AAC/D,QAAM,OAAO,oBAAI,IAA6B;AAC9C,aAAW,KAAK,OAAO;AACrB,SAAK,IAAI,EAAE,IAAI,CAAC;AAAA,EAClB;AACA,SAAO,MAAM,KAAK,KAAK,OAAO,CAAC;AACjC;AAGA,SAAS,aAAa,KAAuC;AAC3D,MAAI,eAAe,KAAK;AACtB,UAAM,MAA+B,CAAC;AACtC,eAAW,CAAC,GAAG,CAAC,KAAK,KAAK;AACxB,UAAI,OAAO,CAAC,CAAC,IAAI,YAAY,CAAC;AAAA,IAChC;AACA,WAAO;AAAA,EACT;AACA,MAAI,OAAO,OAAO,QAAQ,UAAU;AAClC,UAAM,MAAM;AACZ,UAAM,MAA+B,CAAC;AACtC,eAAW,KAAK,OAAO,KAAK,GAAG,GAAG;AAChC,UAAI,CAAC,IAAI,YAAY,IAAI,CAAC,CAAC;AAAA,IAC7B;AACA,WAAO;AAAA,EACT;AACA,SAAO,EAAE,OAAO,YAAY,GAAG,EAAE;AACnC;AAMA,SAAS,YAAY,OAAyB;AAC5C,MAAI,OAAO,UAAU,UAAU;AAC7B,QAAI,SAAS,OAAO,OAAO,gBAAgB,KAAK,SAAS,OAAO,OAAO,gBAAgB,GAAG;AACxF,aAAO,OAAO,KAAK;AAAA,IACrB;AACA,WAAO,MAAM,SAAS;AAAA,EACxB;AACA,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,MAAM,IAAI,WAAW;AACtD,SAAO;AACT;AAGA,eAAe,WAAW,QAAqC;AAG7D,QAAM,SAAS,MAAM,QAAQ,MAAM,IAAI,OAAO,OAAO,SAAS,CAAC,IAAI;AACnE,MAAI,CAAC,OAAQ,QAAO,CAAC;AACrB,QAAM,SAAU,OAAiD;AACjE,MAAI,OAAO,WAAW,WAAY,QAAO,CAAC;AAC1C,SAAO,OAAO,KAAK,MAAM;AAC3B;","names":[]}
|
package/dist/index.js
CHANGED
|
@@ -3,10 +3,10 @@ import {
|
|
|
3
3
|
configPath,
|
|
4
4
|
loadConfig,
|
|
5
5
|
saveConfig
|
|
6
|
-
} from "./chunk-
|
|
7
|
-
import "./chunk-
|
|
8
|
-
import "./chunk-
|
|
9
|
-
import "./chunk-
|
|
6
|
+
} from "./chunk-OJ2SQ3YV.js";
|
|
7
|
+
import "./chunk-WDIPIIA4.js";
|
|
8
|
+
import "./chunk-JJGCCH3V.js";
|
|
9
|
+
import "./chunk-W5VPP2CN.js";
|
|
10
10
|
export {
|
|
11
11
|
buildProgram,
|
|
12
12
|
configPath,
|
|
@@ -8,8 +8,8 @@ import {
|
|
|
8
8
|
buildSchemaStatements,
|
|
9
9
|
defaultDbPath,
|
|
10
10
|
defaultFor
|
|
11
|
-
} from "./chunk-
|
|
12
|
-
import "./chunk-
|
|
11
|
+
} from "./chunk-WDIPIIA4.js";
|
|
12
|
+
import "./chunk-W5VPP2CN.js";
|
|
13
13
|
export {
|
|
14
14
|
DEFAULT_EMBEDDING_DIMENSION,
|
|
15
15
|
EDGE_COLUMNS,
|
|
@@ -21,4 +21,4 @@ export {
|
|
|
21
21
|
defaultDbPath,
|
|
22
22
|
defaultFor
|
|
23
23
|
};
|
|
24
|
-
//# sourceMappingURL=src-
|
|
24
|
+
//# sourceMappingURL=src-I6DIEVJ5.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@leanlabsinnov/codegraph",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"description": "Live, queryable knowledge graph for your codebase. Indexes JS/TS into an embedded graph DB with embeddings and exposes an MCP server Claude Code and Cursor can call.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mcp",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../graph-db/src/client.ts","../../graph-db/src/schema.ts"],"sourcesContent":["import { mkdir } from \"node:fs/promises\";\nimport { homedir } from \"node:os\";\nimport { dirname, resolve } from \"node:path\";\nimport { EDGE_KINDS, NODE_KINDS, type EdgeKind } from \"@codegraph/shared\";\nimport * as kuzu from \"kuzu\";\nimport {\n DEFAULT_EMBEDDING_DIMENSION,\n SYMBOL_COLUMNS,\n buildSchemaStatements,\n defaultFor,\n} from \"./schema.js\";\nimport type { QueryResult, UpsertEdgeInput, UpsertNodeInput } from \"./types.js\";\n\nexport interface GraphDbOptions {\n /** Directory where Kuzu stores its on-disk database files. Defaults to `~/.codegraph/graph`. */\n dbPath?: string;\n /**\n * Legacy `url` option kept for back-compat with Phase-1 callers. Ignored at runtime -\n * Kuzu is embedded - but accepted so existing call sites compile while the rest of the\n * codebase migrates to `dbPath`.\n */\n url?: string;\n /** Vector dimension for the `Symbol.embedding` column. Baked into the schema at create time. */\n embeddingDimension?: number;\n}\n\n/** Default on-disk location for the embedded graph. */\nexport function defaultDbPath(): string {\n return resolve(homedir(), \".codegraph\", \"graph\");\n}\n\n/**\n * Thin, typed wrapper around the embedded Kuzu database.\n *\n * Public surface (intentionally identical to the Phase-1 FalkorDB client so callers don't\n * change): connect / close / migrate / query / upsertNodes / upsertEdges / deleteByRepo /\n * stats. Internals are pure Kuzu.\n */\nexport class GraphDb {\n private readonly dbPath: string;\n private readonly embeddingDimension: number;\n private db: kuzu.Database | null = null;\n private conn: kuzu.Connection | null = null;\n /**\n * Cache of `conn.prepare()` results keyed by Cypher source. Kuzu's Node SDK requires a\n * prepared statement for any parameterized query - reusing the prepared object keeps\n * UNWIND-batched upserts fast.\n */\n private preparedCache = new Map<string, kuzu.PreparedStatement>();\n\n constructor(opts: GraphDbOptions = {}) {\n this.dbPath = opts.dbPath ?? defaultDbPath();\n this.embeddingDimension = opts.embeddingDimension ?? DEFAULT_EMBEDDING_DIMENSION;\n }\n\n async connect(): Promise<void> {\n if (this.conn) return;\n await mkdir(dirname(this.dbPath), { recursive: true });\n this.db = new kuzu.Database(this.dbPath);\n this.conn = new kuzu.Connection(this.db);\n }\n\n async close(): Promise<void> {\n // We deliberately do NOT call `conn.close()` / `db.close()` on Kuzu's Node bindings.\n // In 0.11.x those native handles are also disposed by the binding's process-exit hook,\n // and double-disposing can SIGSEGV the worker on cleanup. Dropping references is\n // enough for the GC to release the underlying memory before the process exits.\n this.preparedCache.clear();\n this.conn = null;\n this.db = null;\n }\n\n private requireConn(): kuzu.Connection {\n if (!this.conn) {\n throw new Error(\"GraphDb not connected. Call connect() first.\");\n }\n return this.conn;\n }\n\n /**\n * Idempotent migration: creates the `Symbol` node table and one REL table per\n * `EdgeKind`. The `embedding FLOAT[N]` column lives on `Symbol` but we deliberately\n * skip Kuzu's `CREATE_VECTOR_INDEX` - semantic search is brute-force via\n * `array_cosine_similarity` to work around kuzudb/kuzu#5965 and kuzudb/kuzu#6040.\n * See `schema.ts` for the full rationale.\n */\n async migrate(): Promise<void> {\n await this.connect();\n const schemaStmts = buildSchemaStatements({ embeddingDimension: this.embeddingDimension });\n for (const stmt of schemaStmts) {\n await this.exec(stmt);\n }\n }\n\n /**\n * Typed Cypher escape hatch.\n *\n * Kuzu returns BIGINT columns as native BigInt; we coerce to plain `number` when safe so\n * downstream JSON serialization (MCP responses, snapshot tests) does not need bespoke\n * handling.\n */\n async query<T = Record<string, unknown>>(\n cypher: string,\n params: Record<string, unknown> = {},\n ): Promise<QueryResult<T>> {\n const result = await this.runQuery(cypher, params);\n const raw = await collectAll(result);\n const data = raw.map((row) => normalizeRow(row)) as T[];\n const headers = raw.length > 0 ? Object.keys(raw[0] ?? {}) : [];\n return { data, headers, metadata: [] };\n }\n\n /** Fire-and-forget DDL/exec. */\n private async exec(cypher: string, params: Record<string, unknown> = {}): Promise<void> {\n await this.runQuery(cypher, params);\n }\n\n /**\n * Bridge to Kuzu's two execution paths:\n * - `conn.query(stmt)` for unparameterized statements (the second positional arg is a\n * `progressCallback`, NOT params - mistaking that is the #1 way to confuse the API).\n * - `conn.prepare(stmt) + conn.execute(prepared, params)` for anything with `$name`\n * placeholders. We cache the prepared statement so UNWIND batches reuse it.\n *\n * Kuzu's `execute` rejects bind maps that contain keys the prepared statement does not\n * reference (`Parameter <name> not found`). Tool authors often pass a uniform params bag\n * and let the Cypher branch internally on which placeholders to use, so we filter the\n * bind map down to the keys actually referenced by `$name` before handing it off.\n */\n private async runQuery(cypher: string, params: Record<string, unknown>): Promise<unknown> {\n const conn = this.requireConn();\n const referenced = extractParamNames(cypher);\n if (referenced.size === 0) {\n return conn.query(cypher);\n }\n const bound: Record<string, unknown> = {};\n for (const key of referenced) {\n if (key in params) bound[key] = params[key];\n }\n let prepared = this.preparedCache.get(cypher);\n if (!prepared) {\n prepared = await conn.prepare(cypher);\n if (!prepared.isSuccess()) {\n throw new Error(prepared.getErrorMessage());\n }\n this.preparedCache.set(cypher, prepared);\n }\n // Cast through `unknown`: Kuzu's bindings advertise a strict `KuzuValue` union, but\n // we can pass through any JSON-serializable value the embedded engine accepts (nested\n // structs and lists are converted at the native layer).\n return conn.execute(prepared, bound as unknown as Parameters<kuzu.Connection[\"execute\"]>[1]);\n }\n\n /**\n * Inserts nodes via batched UNWIND + bare CREATE. The whole property map (including\n * `embedding` when present) is set in the CREATE clause - we deliberately avoid `SET`\n * because Kuzu rejects writes to an HNSW-indexed column even after the index is dropped\n * (kuzudb/kuzu#6040). Callers must wipe pre-existing rows with `deleteByRepo` first.\n *\n * In-batch duplicates (same `id`) are coalesced to the last occurrence to keep CREATE\n * from violating the primary-key uniqueness constraint.\n */\n async upsertNodes(nodes: UpsertNodeInput[]): Promise<void> {\n if (nodes.length === 0) return;\n await this.connect();\n const deduped = dedupeById(nodes);\n const withEmbedding: UpsertNodeInput[] = [];\n const withoutEmbedding: UpsertNodeInput[] = [];\n for (const n of deduped) {\n if (Array.isArray(n.embedding) && n.embedding.length > 0) {\n withEmbedding.push(n);\n } else {\n withoutEmbedding.push(n);\n }\n }\n await this.createSymbolBatch(withoutEmbedding, false);\n await this.createSymbolBatch(withEmbedding, true);\n }\n\n private async createSymbolBatch(\n nodes: UpsertNodeInput[],\n withEmbedding: boolean,\n ): Promise<void> {\n if (nodes.length === 0) return;\n const BATCH = 200;\n const columns = [...SYMBOL_COLUMNS, ...(withEmbedding ? ([\"embedding\"] as const) : [])];\n const propMap = columns.map((c) => `${c}: r.${c}`).join(\", \");\n const cypher = `UNWIND $batch AS r CREATE (n:Symbol {${propMap}})`;\n for (let i = 0; i < nodes.length; i += BATCH) {\n const slice = nodes.slice(i, i + BATCH);\n const payload = slice.map((n) => buildSymbolRow(n, withEmbedding, this.embeddingDimension));\n await this.exec(cypher, { batch: payload });\n }\n }\n\n /**\n * Upserts edges. Both endpoints must already exist as `Symbol` nodes; rows where the\n * MATCH fails are silently dropped, matching Cypher semantics.\n *\n * Uses CREATE because the orchestrator wipes the repo's slice before writing, so\n * duplicates can't pre-exist within a single index pass.\n */\n async upsertEdges(edges: UpsertEdgeInput[]): Promise<void> {\n if (edges.length === 0) return;\n await this.connect();\n const byKind = new Map<EdgeKind, UpsertEdgeInput[]>();\n for (const e of edges) {\n const bucket = byKind.get(e.kind);\n if (bucket) bucket.push(e);\n else byKind.set(e.kind, [e]);\n }\n const BATCH = 500;\n for (const [kind, batch] of byKind) {\n const cypher = `UNWIND $batch AS r MATCH (a:Symbol {id: r.fromId}) MATCH (b:Symbol {id: r.toId}) CREATE (a)-[e:${kind} {line: r.line}]->(b)`;\n for (let i = 0; i < batch.length; i += BATCH) {\n const slice = batch.slice(i, i + BATCH);\n // Use 0 (not null) for missing line numbers so Kuzu can infer the struct field\n // as INT64 even when an entire batch happens to have no `line` set.\n const payload = slice.map((e) => ({\n fromId: e.fromId,\n toId: e.toId,\n line: typeof e.line === \"number\" ? e.line : 0,\n }));\n await this.exec(cypher, { batch: payload });\n }\n }\n }\n\n /**\n * Deletes all nodes (and incident edges via DETACH DELETE) for a repo. If `paths` is\n * provided, restricts the delete to nodes whose `path` is in the list - used by\n * incremental re-indexing.\n */\n async deleteByRepo(repoId: string, paths?: string[]): Promise<void> {\n await this.connect();\n if (paths && paths.length > 0) {\n await this.exec(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.path IN $paths DETACH DELETE n\",\n { repoId, paths },\n );\n return;\n }\n await this.exec(\"MATCH (n:Symbol) WHERE n.repoId = $repoId DETACH DELETE n\", { repoId });\n }\n\n /**\n * Returns counts of nodes (per kind) and edges (per kind) for a repo, plus the share of\n * non-File nodes that carry an embedding.\n */\n async stats(repoId: string): Promise<{\n nodes: Record<string, number>;\n edges: Record<string, number>;\n embeddingCoverage: number;\n }> {\n await this.connect();\n const nodes: Record<string, number> = {};\n for (const kind of NODE_KINDS) {\n const r = await this.query<{ count: number }>(\n \"MATCH (n:Symbol) WHERE n.repoId = $repoId AND n.kind = $kind RETURN count(n) AS count\",\n { repoId, kind },\n );\n nodes[kind] = Number(r.data[0]?.count ?? 0);\n }\n const edges: Record<string, number> = {};\n for (const kind of EDGE_KINDS) {\n const r = await this.query<{ count: number }>(\n `MATCH (a:Symbol)-[r:${kind}]->(b:Symbol)\n WHERE a.repoId = $repoId AND b.repoId = $repoId\n RETURN count(r) AS count`,\n { repoId },\n );\n edges[kind] = Number(r.data[0]?.count ?? 0);\n }\n const cov = await this.query<{ total: number | bigint; embedded: number | bigint }>(\n `MATCH (n:Symbol)\n WHERE n.repoId = $repoId AND n.kind <> 'File'\n RETURN count(n) AS total,\n count(n.embedding) AS embedded`,\n { repoId },\n );\n const row = cov.data[0];\n const total = Number(row?.total ?? 0);\n const embedded = Number(row?.embedded ?? 0);\n const coverage = total === 0 ? 0 : embedded / total;\n return { nodes, edges, embeddingCoverage: coverage };\n }\n\n /**\n * v0.1.x never creates an HNSW index - semantic search is brute-force via\n * `array_cosine_similarity`. Always returns `false`. Kept on the surface so callers\n * (e.g. `codegraph doctor`) can branch on a single boolean once the upstream Kuzu\n * fixes ship and we flip the index back on.\n */\n hasVectorIndex(): boolean {\n return false;\n }\n}\n\n/**\n * Build a fully-populated row for a Kuzu UNWIND batch. Every column in `SYMBOL_COLUMNS`\n * is present (typed default when missing) so Kuzu can infer a homogeneous struct schema\n * for the batch parameter. When `withEmbedding` is true the `embedding` field is also\n * populated - we either use the provided vector or a zero-vector of the configured\n * dimension so the struct schema stays uniform across the batch.\n */\nfunction buildSymbolRow(\n node: UpsertNodeInput,\n withEmbedding: boolean,\n embeddingDimension: number,\n): Record<string, unknown> {\n const src = node as unknown as Record<string, unknown>;\n const row: Record<string, unknown> = {};\n for (const col of SYMBOL_COLUMNS) {\n const value = src[col];\n row[col] = value === undefined || value === null ? defaultFor(col) : value;\n }\n row.id = node.id;\n row.kind = node.kind;\n if (withEmbedding) {\n const vec = node.embedding;\n row.embedding =\n Array.isArray(vec) && vec.length === embeddingDimension\n ? vec\n : new Array<number>(embeddingDimension).fill(0);\n }\n return row;\n}\n\n/**\n * Pull every `$name` placeholder out of a Cypher string. Skips occurrences inside\n * single/double-quoted string literals so a literal like `\"$10\"` is not mistaken for a\n * parameter. Returns a `Set` so callers can membership-test cheaply.\n */\nconst PARAM_PATTERN = /(?:\"(?:[^\"\\\\]|\\\\.)*\"|'(?:[^'\\\\]|\\\\.)*'|\\$([A-Za-z_][A-Za-z0-9_]*))/g;\nfunction extractParamNames(cypher: string): Set<string> {\n const out = new Set<string>();\n for (const match of cypher.matchAll(PARAM_PATTERN)) {\n if (match[1]) out.add(match[1]);\n }\n return out;\n}\n\n/**\n * Coalesce same-id rows down to the last occurrence. Required because we now use bare\n * CREATE (not MERGE) for inserts and Kuzu rejects primary-key collisions inside a single\n * UNWIND batch.\n */\nfunction dedupeById(nodes: UpsertNodeInput[]): UpsertNodeInput[] {\n const seen = new Map<string, UpsertNodeInput>();\n for (const n of nodes) {\n seen.set(n.id, n);\n }\n return Array.from(seen.values());\n}\n\n/** Convert Kuzu's row representation (Map or plain object) into a plain JSON object. */\nfunction normalizeRow(row: unknown): Record<string, unknown> {\n if (row instanceof Map) {\n const out: Record<string, unknown> = {};\n for (const [k, v] of row) {\n out[String(k)] = coerceValue(v);\n }\n return out;\n }\n if (row && typeof row === \"object\") {\n const src = row as Record<string, unknown>;\n const out: Record<string, unknown> = {};\n for (const k of Object.keys(src)) {\n out[k] = coerceValue(src[k]);\n }\n return out;\n }\n return { value: coerceValue(row) };\n}\n\n/**\n * Kuzu returns BIGINT columns as JS BigInt. Coerce to `number` when within Number.MAX_SAFE\n * for JSON-friendly downstream consumption.\n */\nfunction coerceValue(value: unknown): unknown {\n if (typeof value === \"bigint\") {\n if (value <= BigInt(Number.MAX_SAFE_INTEGER) && value >= BigInt(Number.MIN_SAFE_INTEGER)) {\n return Number(value);\n }\n return value.toString();\n }\n if (Array.isArray(value)) return value.map(coerceValue);\n return value;\n}\n\n/** Drain a Kuzu QueryResult (or array of them) into an array of row objects. */\nasync function collectAll(result: unknown): Promise<unknown[]> {\n // Multi-statement queries return an array; we keep only the last one (matches how the\n // final statement is the one that carries a `RETURN`).\n const target = Array.isArray(result) ? result[result.length - 1] : result;\n if (!target) return [];\n const getAll = (target as { getAll?: () => Promise<unknown[]> }).getAll;\n if (typeof getAll !== \"function\") return [];\n return getAll.call(target);\n}\n\n","import { EDGE_KINDS } from \"@codegraph/shared\";\n\n/**\n * Kuzu is schema-first. Unlike FalkorDB which is schema-less, every column we ever want to\n * SET on a node must exist up-front. We use ONE `Symbol` node table with a `kind` column\n * (Kuzu does not support multi-labels), and one REL table per `EdgeKind`.\n *\n * Columns are the union of every field across the `GraphNode` discriminated union in\n * `@codegraph/shared` plus the two embedding-namespace fields. Fields that are not\n * relevant to a given kind stay NULL.\n */\n\n/**\n * Per-column metadata so the upserter can build batches with explicit typed defaults.\n *\n * Kuzu's struct parameter type inference fails when a column is null on every row in a\n * batch (it defaults to STRING and rejects assignment to a BOOL/INT64 column). Concrete\n * defaults keep inference deterministic and let us skip clunky CAST() clauses.\n *\n * Convention: optional booleans default to `false`, optional ints to `0`, optional strings\n * to `\"\"`. We never check `WHERE n.foo IS NULL` in queries, so the lost null-distinction\n * is acceptable for v0.1.0.\n */\nexport const SYMBOL_COLUMN_SPEC = {\n id: \"STRING\",\n kind: \"STRING\",\n repoId: \"STRING\",\n name: \"STRING\",\n path: \"STRING\",\n lineStart: \"INT64\",\n lineEnd: \"INT64\",\n signature: \"STRING\",\n leadingComment: \"STRING\",\n isExported: \"BOOLEAN\",\n // File-specific\n language: \"STRING\",\n sizeBytes: \"INT64\",\n contentHash: \"STRING\",\n // Function-specific\n isAsync: \"BOOLEAN\",\n isArrow: \"BOOLEAN\",\n // Route-specific\n method: \"STRING\",\n routePath: \"STRING\",\n framework: \"STRING\",\n // Embedding namespace tag\n embeddingNamespace: \"STRING\",\n} as const;\n\nexport type SymbolColumn = keyof typeof SYMBOL_COLUMN_SPEC;\n\nexport const SYMBOL_COLUMNS = Object.keys(SYMBOL_COLUMN_SPEC) as SymbolColumn[];\n\n/** Return the typed default for an unset optional column. */\nexport function defaultFor(column: SymbolColumn): unknown {\n const t = SYMBOL_COLUMN_SPEC[column];\n if (t === \"BOOLEAN\") return false;\n if (t === \"INT64\") return 0;\n return \"\";\n}\n\n/** Optional per-edge metadata. Currently only `line`. */\nexport const EDGE_COLUMNS = [\"line\"] as const;\n\nexport type EdgeColumn = (typeof EDGE_COLUMNS)[number];\n\n/**\n * DDL statements that bring an empty Kuzu database to the codegraph schema.\n * `IF NOT EXISTS` makes `migrate()` idempotent so it can run on every connect.\n *\n * `embedding` is a fixed-dimension column - dimension is configured at migrate time and\n * baked into the schema. If a user later switches to an embedding provider with a\n * different dimension they must delete the on-disk graph directory to recreate it. The\n * embedding-namespace tag ensures we never silently mix dimensions.\n */\nexport function buildSchemaStatements(opts: { embeddingDimension: number }): string[] {\n const columnDefs = [\n \"id STRING\",\n \"kind STRING\",\n \"repoId STRING\",\n \"name STRING\",\n \"path STRING\",\n \"lineStart INT64\",\n \"lineEnd INT64\",\n \"signature STRING\",\n \"leadingComment STRING\",\n \"isExported BOOLEAN\",\n \"language STRING\",\n \"sizeBytes INT64\",\n \"contentHash STRING\",\n \"isAsync BOOLEAN\",\n \"isArrow BOOLEAN\",\n \"method STRING\",\n \"routePath STRING\",\n \"framework STRING\",\n \"embeddingNamespace STRING\",\n `embedding FLOAT[${opts.embeddingDimension}]`,\n \"PRIMARY KEY (id)\",\n ];\n const statements: string[] = [\n `CREATE NODE TABLE IF NOT EXISTS Symbol(${columnDefs.join(\", \")})`,\n ];\n for (const kind of EDGE_KINDS) {\n statements.push(\n `CREATE REL TABLE IF NOT EXISTS ${kind}(FROM Symbol TO Symbol, line INT64)`,\n );\n }\n return statements;\n}\n\n/**\n * Semantic search in v0.1.x is intentionally brute-force via Kuzu's built-in\n * `array_cosine_similarity` function - we do NOT create an HNSW vector index.\n *\n * Why: Kuzu 0.11.x has two open issues that make the HNSW path unusable for a\n * mutable graph workload:\n * - kuzudb/kuzu#5965: SET on a vector-indexed column is rejected with\n * \"Cannot set property vec in table embeddings because it is used in one or more\n * indexes\". The Kuzu team's own recommended workaround in that thread is\n * \"delay creation of the index itself\".\n * - kuzudb/kuzu#6040: DROP_VECTOR_INDEX leaves stale on-disk metadata, so once a\n * column has ever been indexed it becomes permanently un-writable - even fresh\n * CREATEs fail with \"Catalog exception: _N_<index>_UPPER does not exist\".\n *\n * `array_cosine_similarity` is a core Kuzu function (not part of the vector extension)\n * and runs in microseconds for the corpus sizes Phase 1 targets. We will switch back\n * to `CALL CREATE_VECTOR_INDEX` / `QUERY_VECTOR_INDEX` once the upstream fixes ship.\n */\nexport const SEMANTIC_SEARCH_MODE = \"brute-force\" as const;\n\n/** Default embedding dimension when none is supplied. Matches `text-embedding-3-small`. */\nexport const DEFAULT_EMBEDDING_DIMENSION = 1536;\n"],"mappings":";;;;;;AAAA,SAAS,aAAa;AACtB,SAAS,eAAe;AACxB,SAAS,SAAS,eAAe;AAEjC,YAAY,UAAU;;;ACmBf,IAAM,qBAAqB;AAAA,EAChC,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,MAAM;AAAA,EACN,WAAW;AAAA,EACX,SAAS;AAAA,EACT,WAAW;AAAA,EACX,gBAAgB;AAAA,EAChB,YAAY;AAAA;AAAA,EAEZ,UAAU;AAAA,EACV,WAAW;AAAA,EACX,aAAa;AAAA;AAAA,EAEb,SAAS;AAAA,EACT,SAAS;AAAA;AAAA,EAET,QAAQ;AAAA,EACR,WAAW;AAAA,EACX,WAAW;AAAA;AAAA,EAEX,oBAAoB;AACtB;AAIO,IAAM,iBAAiB,OAAO,KAAK,kBAAkB;AAGrD,SAAS,WAAW,QAA+B;AACxD,QAAM,IAAI,mBAAmB,MAAM;AACnC,MAAI,MAAM,UAAW,QAAO;AAC5B,MAAI,MAAM,QAAS,QAAO;AAC1B,SAAO;AACT;AAGO,IAAM,eAAe,CAAC,MAAM;AAa5B,SAAS,sBAAsB,MAAgD;AACpF,QAAM,aAAa;AAAA,IACjB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,mBAAmB,KAAK,kBAAkB;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,aAAuB;AAAA,IAC3B,0CAA0C,WAAW,KAAK,IAAI,CAAC;AAAA,EACjE;AACA,aAAW,QAAQ,YAAY;AAC7B,eAAW;AAAA,MACT,kCAAkC,IAAI;AAAA,IACxC;AAAA,EACF;AACA,SAAO;AACT;AAoBO,IAAM,uBAAuB;AAG7B,IAAM,8BAA8B;;;ADxGpC,SAAS,gBAAwB;AACtC,SAAO,QAAQ,QAAQ,GAAG,cAAc,OAAO;AACjD;AASO,IAAM,UAAN,MAAc;AAAA,EACF;AAAA,EACA;AAAA,EACT,KAA2B;AAAA,EAC3B,OAA+B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAM/B,gBAAgB,oBAAI,IAAoC;AAAA,EAEhE,YAAY,OAAuB,CAAC,GAAG;AACrC,SAAK,SAAS,KAAK,UAAU,cAAc;AAC3C,SAAK,qBAAqB,KAAK,sBAAsB;AAAA,EACvD;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,KAAM;AACf,UAAM,MAAM,QAAQ,KAAK,MAAM,GAAG,EAAE,WAAW,KAAK,CAAC;AACrD,SAAK,KAAK,IAAS,cAAS,KAAK,MAAM;AACvC,SAAK,OAAO,IAAS,gBAAW,KAAK,EAAE;AAAA,EACzC;AAAA,EAEA,MAAM,QAAuB;AAK3B,SAAK,cAAc,MAAM;AACzB,SAAK,OAAO;AACZ,SAAK,KAAK;AAAA,EACZ;AAAA,EAEQ,cAA+B;AACrC,QAAI,CAAC,KAAK,MAAM;AACd,YAAM,IAAI,MAAM,8CAA8C;AAAA,IAChE;AACA,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,UAAyB;AAC7B,UAAM,KAAK,QAAQ;AACnB,UAAM,cAAc,sBAAsB,EAAE,oBAAoB,KAAK,mBAAmB,CAAC;AACzF,eAAW,QAAQ,aAAa;AAC9B,YAAM,KAAK,KAAK,IAAI;AAAA,IACtB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,MACJ,QACA,SAAkC,CAAC,GACV;AACzB,UAAM,SAAS,MAAM,KAAK,SAAS,QAAQ,MAAM;AACjD,UAAM,MAAM,MAAM,WAAW,MAAM;AACnC,UAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,aAAa,GAAG,CAAC;AAC/C,UAAM,UAAU,IAAI,SAAS,IAAI,OAAO,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;AAC9D,WAAO,EAAE,MAAM,SAAS,UAAU,CAAC,EAAE;AAAA,EACvC;AAAA;AAAA,EAGA,MAAc,KAAK,QAAgB,SAAkC,CAAC,GAAkB;AACtF,UAAM,KAAK,SAAS,QAAQ,MAAM;AAAA,EACpC;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAcA,MAAc,SAAS,QAAgB,QAAmD;AACxF,UAAM,OAAO,KAAK,YAAY;AAC9B,UAAM,aAAa,kBAAkB,MAAM;AAC3C,QAAI,WAAW,SAAS,GAAG;AACzB,aAAO,KAAK,MAAM,MAAM;AAAA,IAC1B;AACA,UAAM,QAAiC,CAAC;AACxC,eAAW,OAAO,YAAY;AAC5B,UAAI,OAAO,OAAQ,OAAM,GAAG,IAAI,OAAO,GAAG;AAAA,IAC5C;AACA,QAAI,WAAW,KAAK,cAAc,IAAI,MAAM;AAC5C,QAAI,CAAC,UAAU;AACb,iBAAW,MAAM,KAAK,QAAQ,MAAM;AACpC,UAAI,CAAC,SAAS,UAAU,GAAG;AACzB,cAAM,IAAI,MAAM,SAAS,gBAAgB,CAAC;AAAA,MAC5C;AACA,WAAK,cAAc,IAAI,QAAQ,QAAQ;AAAA,IACzC;AAIA,WAAO,KAAK,QAAQ,UAAU,KAA6D;AAAA,EAC7F;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAWA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,UAAU,WAAW,KAAK;AAChC,UAAM,gBAAmC,CAAC;AAC1C,UAAM,mBAAsC,CAAC;AAC7C,eAAW,KAAK,SAAS;AACvB,UAAI,MAAM,QAAQ,EAAE,SAAS,KAAK,EAAE,UAAU,SAAS,GAAG;AACxD,sBAAc,KAAK,CAAC;AAAA,MACtB,OAAO;AACL,yBAAiB,KAAK,CAAC;AAAA,MACzB;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB,kBAAkB,KAAK;AACpD,UAAM,KAAK,kBAAkB,eAAe,IAAI;AAAA,EAClD;AAAA,EAEA,MAAc,kBACZ,OACA,eACe;AACf,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,QAAQ;AACd,UAAM,UAAU,CAAC,GAAG,gBAAgB,GAAI,gBAAiB,CAAC,WAAW,IAAc,CAAC,CAAE;AACtF,UAAM,UAAU,QAAQ,IAAI,CAAC,MAAM,GAAG,CAAC,OAAO,CAAC,EAAE,EAAE,KAAK,IAAI;AAC5D,UAAM,SAAS,wCAAwC,OAAO;AAC9D,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,YAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AACtC,YAAM,UAAU,MAAM,IAAI,CAAC,MAAM,eAAe,GAAG,eAAe,KAAK,kBAAkB,CAAC;AAC1F,YAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,IAC5C;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,YAAY,OAAyC;AACzD,QAAI,MAAM,WAAW,EAAG;AACxB,UAAM,KAAK,QAAQ;AACnB,UAAM,SAAS,oBAAI,IAAiC;AACpD,eAAW,KAAK,OAAO;AACrB,YAAM,SAAS,OAAO,IAAI,EAAE,IAAI;AAChC,UAAI,OAAQ,QAAO,KAAK,CAAC;AAAA,UACpB,QAAO,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC;AAAA,IAC7B;AACA,UAAM,QAAQ;AACd,eAAW,CAAC,MAAM,KAAK,KAAK,QAAQ;AAClC,YAAM,SAAS,kGAAkG,IAAI;AACrH,eAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,OAAO;AAC5C,cAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,KAAK;AAGtC,cAAM,UAAU,MAAM,IAAI,CAAC,OAAO;AAAA,UAChC,QAAQ,EAAE;AAAA,UACV,MAAM,EAAE;AAAA,UACR,MAAM,OAAO,EAAE,SAAS,WAAW,EAAE,OAAO;AAAA,QAC9C,EAAE;AACF,cAAM,KAAK,KAAK,QAAQ,EAAE,OAAO,QAAQ,CAAC;AAAA,MAC5C;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,aAAa,QAAgB,OAAiC;AAClE,UAAM,KAAK,QAAQ;AACnB,QAAI,SAAS,MAAM,SAAS,GAAG;AAC7B,YAAM,KAAK;AAAA,QACT;AAAA,QACA,EAAE,QAAQ,MAAM;AAAA,MAClB;AACA;AAAA,IACF;AACA,UAAM,KAAK,KAAK,6DAA6D,EAAE,OAAO,CAAC;AAAA,EACzF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,QAIT;AACD,UAAM,KAAK,QAAQ;AACnB,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB;AAAA,QACA,EAAE,QAAQ,KAAK;AAAA,MACjB;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,QAAgC,CAAC;AACvC,eAAW,QAAQ,YAAY;AAC7B,YAAM,IAAI,MAAM,KAAK;AAAA,QACnB,uBAAuB,IAAI;AAAA;AAAA;AAAA,QAG3B,EAAE,OAAO;AAAA,MACX;AACA,YAAM,IAAI,IAAI,OAAO,EAAE,KAAK,CAAC,GAAG,SAAS,CAAC;AAAA,IAC5C;AACA,UAAM,MAAM,MAAM,KAAK;AAAA,MACrB;AAAA;AAAA;AAAA;AAAA,MAIA,EAAE,OAAO;AAAA,IACX;AACA,UAAM,MAAM,IAAI,KAAK,CAAC;AACtB,UAAM,QAAQ,OAAO,KAAK,SAAS,CAAC;AACpC,UAAM,WAAW,OAAO,KAAK,YAAY,CAAC;AAC1C,UAAM,WAAW,UAAU,IAAI,IAAI,WAAW;AAC9C,WAAO,EAAE,OAAO,OAAO,mBAAmB,SAAS;AAAA,EACrD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,iBAA0B;AACxB,WAAO;AAAA,EACT;AACF;AASA,SAAS,eACP,MACA,eACA,oBACyB;AACzB,QAAM,MAAM;AACZ,QAAM,MAA+B,CAAC;AACtC,aAAW,OAAO,gBAAgB;AAChC,UAAM,QAAQ,IAAI,GAAG;AACrB,QAAI,GAAG,IAAI,UAAU,UAAa,UAAU,OAAO,WAAW,GAAG,IAAI;AAAA,EACvE;AACA,MAAI,KAAK,KAAK;AACd,MAAI,OAAO,KAAK;AAChB,MAAI,eAAe;AACjB,UAAM,MAAM,KAAK;AACjB,QAAI,YACF,MAAM,QAAQ,GAAG,KAAK,IAAI,WAAW,qBACjC,MACA,IAAI,MAAc,kBAAkB,EAAE,KAAK,CAAC;AAAA,EACpD;AACA,SAAO;AACT;AAOA,IAAM,gBAAgB;AACtB,SAAS,kBAAkB,QAA6B;AACtD,QAAM,MAAM,oBAAI,IAAY;AAC5B,aAAW,SAAS,OAAO,SAAS,aAAa,GAAG;AAClD,QAAI,MAAM,CAAC,EAAG,KAAI,IAAI,MAAM,CAAC,CAAC;AAAA,EAChC;AACA,SAAO;AACT;AAOA,SAAS,WAAW,OAA6C;AAC/D,QAAM,OAAO,oBAAI,IAA6B;AAC9C,aAAW,KAAK,OAAO;AACrB,SAAK,IAAI,EAAE,IAAI,CAAC;AAAA,EAClB;AACA,SAAO,MAAM,KAAK,KAAK,OAAO,CAAC;AACjC;AAGA,SAAS,aAAa,KAAuC;AAC3D,MAAI,eAAe,KAAK;AACtB,UAAM,MAA+B,CAAC;AACtC,eAAW,CAAC,GAAG,CAAC,KAAK,KAAK;AACxB,UAAI,OAAO,CAAC,CAAC,IAAI,YAAY,CAAC;AAAA,IAChC;AACA,WAAO;AAAA,EACT;AACA,MAAI,OAAO,OAAO,QAAQ,UAAU;AAClC,UAAM,MAAM;AACZ,UAAM,MAA+B,CAAC;AACtC,eAAW,KAAK,OAAO,KAAK,GAAG,GAAG;AAChC,UAAI,CAAC,IAAI,YAAY,IAAI,CAAC,CAAC;AAAA,IAC7B;AACA,WAAO;AAAA,EACT;AACA,SAAO,EAAE,OAAO,YAAY,GAAG,EAAE;AACnC;AAMA,SAAS,YAAY,OAAyB;AAC5C,MAAI,OAAO,UAAU,UAAU;AAC7B,QAAI,SAAS,OAAO,OAAO,gBAAgB,KAAK,SAAS,OAAO,OAAO,gBAAgB,GAAG;AACxF,aAAO,OAAO,KAAK;AAAA,IACrB;AACA,WAAO,MAAM,SAAS;AAAA,EACxB;AACA,MAAI,MAAM,QAAQ,KAAK,EAAG,QAAO,MAAM,IAAI,WAAW;AACtD,SAAO;AACT;AAGA,eAAe,WAAW,QAAqC;AAG7D,QAAM,SAAS,MAAM,QAAQ,MAAM,IAAI,OAAO,OAAO,SAAS,CAAC,IAAI;AACnE,MAAI,CAAC,OAAQ,QAAO,CAAC;AACrB,QAAM,SAAU,OAAiD;AACjE,MAAI,OAAO,WAAW,WAAY,QAAO,CAAC;AAC1C,SAAO,OAAO,KAAK,MAAM;AAC3B;","names":[]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../llm-router/src/router.ts"],"sourcesContent":["import type { EmbeddingModel, LanguageModel } from \"ai\";\nimport { embedMany, generateText } from \"ai\";\nimport type { LlmConfig, LlmEmbeddingConfig, LlmProviderConfig } from \"@codegraph/shared\";\nimport { namespaceToString } from \"@codegraph/shared\";\nimport type { GenerateOptions, LlmRouter, ResolvedLlmConfig } from \"./types.js\";\n\nexport interface CreateRouterOptions {\n config: LlmConfig;\n env?: NodeJS.ProcessEnv;\n}\n\nexport async function createLlmRouter(opts: CreateRouterOptions): Promise<LlmRouter> {\n const env = opts.env ?? process.env;\n const resolved: ResolvedLlmConfig = {\n ...opts.config,\n embeddingNamespace: {\n provider: opts.config.embeddings.provider,\n model: opts.config.embeddings.model,\n dimension: opts.config.embeddings.dimension,\n },\n };\n\n const generationModel = await buildGenerationModel(opts.config, env);\n const embeddingModel = await buildEmbeddingModel(opts.config.embeddings, env, opts.config.baseUrl);\n\n async function doEmbed(texts: string[]): Promise<number[][]> {\n if (texts.length === 0) return [];\n const { embeddings } = await embedMany({ model: embeddingModel, values: texts });\n return embeddings;\n }\n\n async function doGenerate(options: GenerateOptions): Promise<string> {\n const { text } = await generateText({\n model: generationModel,\n ...(options.system !== undefined ? { system: options.system } : {}),\n messages: options.messages,\n ...(options.temperature !== undefined ? { temperature: options.temperature } : {}),\n ...(options.maxTokens !== undefined ? { maxTokens: options.maxTokens } : {}),\n });\n return text;\n }\n\n const router: LlmRouter = {\n config: resolved,\n embed: doEmbed,\n generate: doGenerate,\n async selfTest() {\n const embedStart = Date.now();\n const embeds = await doEmbed([\"codegraph self-test\"]);\n const embedLatencyMs = Date.now() - embedStart;\n const genStart = Date.now();\n const out = await doGenerate({\n messages: [{ role: \"user\", content: \"Reply with the single word ok.\" }],\n maxTokens: 5,\n temperature: 0,\n });\n const generateLatencyMs = Date.now() - genStart;\n return {\n embedDims: embeds[0]?.length ?? 0,\n generationOk: typeof out === \"string\" && out.length > 0,\n embedLatencyMs,\n generateLatencyMs,\n };\n },\n };\n return router;\n}\n\n/**\n * Used by the CLI's `config llm` printer; surfaces the resolved namespace\n * tag without forcing a provider import upstream.\n */\nexport function namespaceLabel(config: LlmConfig): string {\n return namespaceToString({\n provider: config.embeddings.provider,\n model: config.embeddings.model,\n dimension: config.embeddings.dimension,\n });\n}\n\nasync function buildGenerationModel(\n config: LlmConfig,\n env: NodeJS.ProcessEnv,\n): Promise<LanguageModel> {\n const provider = config.generation.provider;\n if (provider === \"openai\") return openaiText(config.generation, env, config.baseUrl);\n if (provider === \"anthropic\") return anthropicText(config.generation, env, config.baseUrl);\n if (provider === \"google\") return googleText(config.generation, env, config.baseUrl);\n if (provider === \"ollama\") return ollamaText(config.generation, env, config.baseUrl);\n throw new Error(`Unsupported generation provider: ${provider}`);\n}\n\nasync function buildEmbeddingModel(\n config: LlmEmbeddingConfig,\n env: NodeJS.ProcessEnv,\n baseUrl?: string,\n): Promise<EmbeddingModel<string>> {\n if (config.provider === \"openai\") return openaiEmbedding(config, env, baseUrl);\n if (config.provider === \"google\") return googleEmbedding(config, env);\n if (config.provider === \"ollama\") return ollamaEmbedding(config, env);\n if (config.provider === \"anthropic\") {\n // Anthropic has no embedding endpoint; fall back to OpenAI so the\n // pipeline keeps working when generation is anthropic.\n return openaiEmbedding(\n { provider: \"openai\", model: \"text-embedding-3-small\", dimension: 1536 },\n env,\n );\n }\n throw new Error(`Unsupported embedding provider: ${config.provider}`);\n}\n\nasync function openaiText(\n config: LlmProviderConfig,\n env: NodeJS.ProcessEnv,\n baseUrl?: string,\n): Promise<LanguageModel> {\n const { createOpenAI } = await import(\"@ai-sdk/openai\");\n const openai = createOpenAI({\n ...(env.OPENAI_API_KEY ? { apiKey: env.OPENAI_API_KEY } : {}),\n ...(baseUrl ? { baseURL: baseUrl } : {}),\n });\n return openai(config.model);\n}\n\nasync function openaiEmbedding(\n config: LlmEmbeddingConfig,\n env: NodeJS.ProcessEnv,\n baseUrl?: string,\n): Promise<EmbeddingModel<string>> {\n const { createOpenAI } = await import(\"@ai-sdk/openai\");\n const openai = createOpenAI({\n ...(env.OPENAI_API_KEY ? { apiKey: env.OPENAI_API_KEY } : {}),\n ...(baseUrl ? { baseURL: baseUrl } : {}),\n });\n return openai.embedding(config.model);\n}\n\nasync function anthropicText(\n config: LlmProviderConfig,\n env: NodeJS.ProcessEnv,\n baseUrl?: string,\n): Promise<LanguageModel> {\n const { createAnthropic } = await import(\"@ai-sdk/anthropic\");\n const anthropic = createAnthropic({\n ...(env.ANTHROPIC_API_KEY ? { apiKey: env.ANTHROPIC_API_KEY } : {}),\n ...(baseUrl ? { baseURL: baseUrl } : {}),\n });\n return anthropic(config.model);\n}\n\nasync function googleText(\n config: LlmProviderConfig,\n env: NodeJS.ProcessEnv,\n baseUrl?: string,\n): Promise<LanguageModel> {\n const { createGoogleGenerativeAI } = await import(\"@ai-sdk/google\");\n const google = createGoogleGenerativeAI({\n ...(env.GOOGLE_GENERATIVE_AI_API_KEY\n ? { apiKey: env.GOOGLE_GENERATIVE_AI_API_KEY }\n : {}),\n ...(baseUrl ? { baseURL: baseUrl } : {}),\n });\n return google(config.model);\n}\n\nasync function googleEmbedding(\n config: LlmEmbeddingConfig,\n env: NodeJS.ProcessEnv,\n): Promise<EmbeddingModel<string>> {\n const { createGoogleGenerativeAI } = await import(\"@ai-sdk/google\");\n const google = createGoogleGenerativeAI(\n env.GOOGLE_GENERATIVE_AI_API_KEY\n ? { apiKey: env.GOOGLE_GENERATIVE_AI_API_KEY }\n : {},\n );\n return google.textEmbeddingModel(config.model);\n}\n\nasync function ollamaText(\n config: LlmProviderConfig,\n env: NodeJS.ProcessEnv,\n baseUrl?: string,\n): Promise<LanguageModel> {\n const { createOllama } = await import(\"ollama-ai-provider\");\n const base = baseUrl ?? env.OLLAMA_BASE_URL;\n const ollama = createOllama(base ? { baseURL: `${base.replace(/\\/$/, \"\")}/api` } : {});\n return ollama(config.model);\n}\n\nasync function ollamaEmbedding(\n config: LlmEmbeddingConfig,\n env: NodeJS.ProcessEnv,\n): Promise<EmbeddingModel<string>> {\n const { createOllama } = await import(\"ollama-ai-provider\");\n const base = env.OLLAMA_BASE_URL;\n const ollama = createOllama(base ? { baseURL: `${base.replace(/\\/$/, \"\")}/api` } : {});\n return ollama.embedding(config.model);\n}\n"],"mappings":";;;;;AACA,SAAS,WAAW,oBAAoB;AAUxC,eAAsB,gBAAgB,MAA+C;AACnF,QAAM,MAAM,KAAK,OAAO,QAAQ;AAChC,QAAM,WAA8B;AAAA,IAClC,GAAG,KAAK;AAAA,IACR,oBAAoB;AAAA,MAClB,UAAU,KAAK,OAAO,WAAW;AAAA,MACjC,OAAO,KAAK,OAAO,WAAW;AAAA,MAC9B,WAAW,KAAK,OAAO,WAAW;AAAA,IACpC;AAAA,EACF;AAEA,QAAM,kBAAkB,MAAM,qBAAqB,KAAK,QAAQ,GAAG;AACnE,QAAM,iBAAiB,MAAM,oBAAoB,KAAK,OAAO,YAAY,KAAK,KAAK,OAAO,OAAO;AAEjG,iBAAe,QAAQ,OAAsC;AAC3D,QAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAChC,UAAM,EAAE,WAAW,IAAI,MAAM,UAAU,EAAE,OAAO,gBAAgB,QAAQ,MAAM,CAAC;AAC/E,WAAO;AAAA,EACT;AAEA,iBAAe,WAAW,SAA2C;AACnE,UAAM,EAAE,KAAK,IAAI,MAAM,aAAa;AAAA,MAClC,OAAO;AAAA,MACP,GAAI,QAAQ,WAAW,SAAY,EAAE,QAAQ,QAAQ,OAAO,IAAI,CAAC;AAAA,MACjE,UAAU,QAAQ;AAAA,MAClB,GAAI,QAAQ,gBAAgB,SAAY,EAAE,aAAa,QAAQ,YAAY,IAAI,CAAC;AAAA,MAChF,GAAI,QAAQ,cAAc,SAAY,EAAE,WAAW,QAAQ,UAAU,IAAI,CAAC;AAAA,IAC5E,CAAC;AACD,WAAO;AAAA,EACT;AAEA,QAAM,SAAoB;AAAA,IACxB,QAAQ;AAAA,IACR,OAAO;AAAA,IACP,UAAU;AAAA,IACV,MAAM,WAAW;AACf,YAAM,aAAa,KAAK,IAAI;AAC5B,YAAM,SAAS,MAAM,QAAQ,CAAC,qBAAqB,CAAC;AACpD,YAAM,iBAAiB,KAAK,IAAI,IAAI;AACpC,YAAM,WAAW,KAAK,IAAI;AAC1B,YAAM,MAAM,MAAM,WAAW;AAAA,QAC3B,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS,iCAAiC,CAAC;AAAA,QACtE,WAAW;AAAA,QACX,aAAa;AAAA,MACf,CAAC;AACD,YAAM,oBAAoB,KAAK,IAAI,IAAI;AACvC,aAAO;AAAA,QACL,WAAW,OAAO,CAAC,GAAG,UAAU;AAAA,QAChC,cAAc,OAAO,QAAQ,YAAY,IAAI,SAAS;AAAA,QACtD;AAAA,QACA;AAAA,MACF;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAMO,SAAS,eAAe,QAA2B;AACxD,SAAO,kBAAkB;AAAA,IACvB,UAAU,OAAO,WAAW;AAAA,IAC5B,OAAO,OAAO,WAAW;AAAA,IACzB,WAAW,OAAO,WAAW;AAAA,EAC/B,CAAC;AACH;AAEA,eAAe,qBACb,QACA,KACwB;AACxB,QAAM,WAAW,OAAO,WAAW;AACnC,MAAI,aAAa,SAAU,QAAO,WAAW,OAAO,YAAY,KAAK,OAAO,OAAO;AACnF,MAAI,aAAa,YAAa,QAAO,cAAc,OAAO,YAAY,KAAK,OAAO,OAAO;AACzF,MAAI,aAAa,SAAU,QAAO,WAAW,OAAO,YAAY,KAAK,OAAO,OAAO;AACnF,MAAI,aAAa,SAAU,QAAO,WAAW,OAAO,YAAY,KAAK,OAAO,OAAO;AACnF,QAAM,IAAI,MAAM,oCAAoC,QAAQ,EAAE;AAChE;AAEA,eAAe,oBACb,QACA,KACA,SACiC;AACjC,MAAI,OAAO,aAAa,SAAU,QAAO,gBAAgB,QAAQ,KAAK,OAAO;AAC7E,MAAI,OAAO,aAAa,SAAU,QAAO,gBAAgB,QAAQ,GAAG;AACpE,MAAI,OAAO,aAAa,SAAU,QAAO,gBAAgB,QAAQ,GAAG;AACpE,MAAI,OAAO,aAAa,aAAa;AAGnC,WAAO;AAAA,MACL,EAAE,UAAU,UAAU,OAAO,0BAA0B,WAAW,KAAK;AAAA,MACvE;AAAA,IACF;AAAA,EACF;AACA,QAAM,IAAI,MAAM,mCAAmC,OAAO,QAAQ,EAAE;AACtE;AAEA,eAAe,WACb,QACA,KACA,SACwB;AACxB,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,gBAAgB;AACtD,QAAM,SAAS,aAAa;AAAA,IAC1B,GAAI,IAAI,iBAAiB,EAAE,QAAQ,IAAI,eAAe,IAAI,CAAC;AAAA,IAC3D,GAAI,UAAU,EAAE,SAAS,QAAQ,IAAI,CAAC;AAAA,EACxC,CAAC;AACD,SAAO,OAAO,OAAO,KAAK;AAC5B;AAEA,eAAe,gBACb,QACA,KACA,SACiC;AACjC,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,gBAAgB;AACtD,QAAM,SAAS,aAAa;AAAA,IAC1B,GAAI,IAAI,iBAAiB,EAAE,QAAQ,IAAI,eAAe,IAAI,CAAC;AAAA,IAC3D,GAAI,UAAU,EAAE,SAAS,QAAQ,IAAI,CAAC;AAAA,EACxC,CAAC;AACD,SAAO,OAAO,UAAU,OAAO,KAAK;AACtC;AAEA,eAAe,cACb,QACA,KACA,SACwB;AACxB,QAAM,EAAE,gBAAgB,IAAI,MAAM,OAAO,mBAAmB;AAC5D,QAAM,YAAY,gBAAgB;AAAA,IAChC,GAAI,IAAI,oBAAoB,EAAE,QAAQ,IAAI,kBAAkB,IAAI,CAAC;AAAA,IACjE,GAAI,UAAU,EAAE,SAAS,QAAQ,IAAI,CAAC;AAAA,EACxC,CAAC;AACD,SAAO,UAAU,OAAO,KAAK;AAC/B;AAEA,eAAe,WACb,QACA,KACA,SACwB;AACxB,QAAM,EAAE,yBAAyB,IAAI,MAAM,OAAO,gBAAgB;AAClE,QAAM,SAAS,yBAAyB;AAAA,IACtC,GAAI,IAAI,+BACJ,EAAE,QAAQ,IAAI,6BAA6B,IAC3C,CAAC;AAAA,IACL,GAAI,UAAU,EAAE,SAAS,QAAQ,IAAI,CAAC;AAAA,EACxC,CAAC;AACD,SAAO,OAAO,OAAO,KAAK;AAC5B;AAEA,eAAe,gBACb,QACA,KACiC;AACjC,QAAM,EAAE,yBAAyB,IAAI,MAAM,OAAO,gBAAgB;AAClE,QAAM,SAAS;AAAA,IACb,IAAI,+BACA,EAAE,QAAQ,IAAI,6BAA6B,IAC3C,CAAC;AAAA,EACP;AACA,SAAO,OAAO,mBAAmB,OAAO,KAAK;AAC/C;AAEA,eAAe,WACb,QACA,KACA,SACwB;AACxB,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,oBAAoB;AAC1D,QAAM,OAAO,WAAW,IAAI;AAC5B,QAAM,SAAS,aAAa,OAAO,EAAE,SAAS,GAAG,KAAK,QAAQ,OAAO,EAAE,CAAC,OAAO,IAAI,CAAC,CAAC;AACrF,SAAO,OAAO,OAAO,KAAK;AAC5B;AAEA,eAAe,gBACb,QACA,KACiC;AACjC,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,oBAAoB;AAC1D,QAAM,OAAO,IAAI;AACjB,QAAM,SAAS,aAAa,OAAO,EAAE,SAAS,GAAG,KAAK,QAAQ,OAAO,EAAE,CAAC,OAAO,IAAI,CAAC,CAAC;AACrF,SAAO,OAAO,UAAU,OAAO,KAAK;AACtC;","names":[]}
|