@codragraph/cli 1.6.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
- package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
- package/dist/_shared/cgdb/schema-constants.js +67 -0
- package/dist/_shared/cgdb/schema-constants.js.map +1 -0
- package/dist/_shared/index.d.ts +2 -2
- package/dist/_shared/index.js +1 -1
- package/dist/cli/analyze.d.ts +22 -0
- package/dist/cli/analyze.js +109 -6
- package/dist/cli/compress-stats.d.ts +29 -0
- package/dist/cli/compress-stats.js +97 -0
- package/dist/cli/graphstore.d.ts +6 -2
- package/dist/cli/graphstore.js +45 -23
- package/dist/cli/index-repo.js +3 -3
- package/dist/cli/index.js +16 -2
- package/dist/cli/profile-heap.d.ts +35 -0
- package/dist/cli/profile-heap.js +126 -0
- package/dist/cli/setup.d.ts +13 -0
- package/dist/cli/setup.js +22 -11
- package/dist/cli/skill-gen.d.ts +14 -2
- package/dist/cli/skill-gen.js +52 -19
- package/dist/cli/tool.js +4 -0
- package/dist/cli/wiki.js +3 -3
- package/dist/core/augmentation/engine.js +7 -7
- package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
- package/dist/core/cgdb/cgdb-adapter.js +1320 -0
- package/dist/core/cgdb/content-read.d.ts +46 -0
- package/dist/core/cgdb/content-read.js +64 -0
- package/dist/core/cgdb/csv-generator.d.ts +29 -0
- package/dist/core/cgdb/csv-generator.js +492 -0
- package/dist/core/cgdb/pool-adapter.d.ts +93 -0
- package/dist/core/cgdb/pool-adapter.js +550 -0
- package/dist/core/cgdb/schema.d.ts +62 -0
- package/dist/core/cgdb/schema.js +502 -0
- package/dist/core/embeddings/embedding-pipeline.js +27 -10
- package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
- package/dist/core/graphstore/cgdb-row-source.js +141 -0
- package/dist/core/graphstore/index.d.ts +1 -1
- package/dist/core/graphstore/index.js +3 -3
- package/dist/core/group/bridge-db.d.ts +2 -2
- package/dist/core/group/bridge-db.js +123 -36
- package/dist/core/group/bridge-schema.d.ts +4 -4
- package/dist/core/group/bridge-schema.js +4 -4
- package/dist/core/group/cross-impact.js +3 -3
- package/dist/core/group/sync.js +4 -4
- package/dist/core/lbug/content-read.d.ts +46 -0
- package/dist/core/lbug/content-read.js +64 -0
- package/dist/core/lbug/csv-generator.d.ts +2 -6
- package/dist/core/lbug/csv-generator.js +45 -12
- package/dist/core/lbug/lbug-adapter.d.ts +4 -1
- package/dist/core/lbug/lbug-adapter.js +153 -21
- package/dist/core/lbug/schema.d.ts +7 -7
- package/dist/core/lbug/schema.js +18 -0
- package/dist/core/run-analyze.d.ts +13 -0
- package/dist/core/run-analyze.js +114 -27
- package/dist/core/search/bm25-index.d.ts +3 -3
- package/dist/core/search/bm25-index.js +75 -23
- package/dist/core/search/hybrid-search.js +2 -2
- package/dist/core/wiki/generator.d.ts +2 -2
- package/dist/core/wiki/generator.js +4 -4
- package/dist/core/wiki/graph-queries.d.ts +2 -2
- package/dist/core/wiki/graph-queries.js +5 -5
- package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
- package/dist/mcp/core/cgdb-adapter.js +5 -0
- package/dist/mcp/core/embedder.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +2 -2
- package/dist/mcp/local/local-backend.js +36 -19
- package/dist/mcp/server.js +3 -3
- package/dist/mcp/tools.js +1 -1
- package/dist/server/analyze-worker.js +2 -2
- package/dist/server/api.js +34 -33
- package/dist/storage/repo-manager.d.ts +42 -3
- package/dist/storage/repo-manager.js +23 -4
- package/hooks/claude/codragraph-hook.cjs +98 -5
- package/package.json +4 -4
- package/scripts/build-tree-sitter-proto.cjs +15 -3
- package/scripts/build.js +8 -9
- package/scripts/patch-tree-sitter-swift.cjs +17 -4
- package/skills/codragraph-api-surface.md +110 -0
- package/skills/codragraph-config-audit.md +146 -0
- package/skills/codragraph-cross-repo-impact.md +135 -0
- package/skills/codragraph-data-lineage.md +137 -0
- package/skills/codragraph-dead-code.md +119 -0
- package/skills/codragraph-gh-actions-debug.md +162 -0
- package/skills/codragraph-gh-issue-workflow.md +178 -0
- package/skills/codragraph-gh-pr-workflow.md +176 -0
- package/skills/codragraph-gh-release-workflow.md +187 -0
- package/skills/codragraph-git-bisect.md +176 -0
- package/skills/codragraph-git-force-push.md +147 -0
- package/skills/codragraph-git-history-rewrite.md +174 -0
- package/skills/codragraph-git-rebase-vs-merge.md +138 -0
- package/skills/codragraph-git-recovery.md +181 -0
- package/skills/codragraph-git-worktree.md +145 -0
- package/skills/codragraph-migration-tracking.md +130 -0
- package/skills/codragraph-notebook-context.md +136 -0
- package/skills/codragraph-observability-coverage.md +125 -0
- package/skills/codragraph-onboarding.md +129 -0
- package/skills/codragraph-perf-hotspots.md +132 -0
- package/skills/codragraph-project-switcher.md +116 -0
- package/skills/codragraph-security-audit.md +144 -0
- package/skills/codragraph-sql-tracing.md +122 -0
- package/skills/codragraph-supply-chain-audit.md +153 -0
- package/skills/codragraph-test-coverage.md +97 -0
- package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
- package/vendor/tree-sitter-proto/src/node-types.json +1 -1
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import fs from 'fs/promises';
|
|
9
9
|
import path from 'path';
|
|
10
|
-
import {
|
|
10
|
+
import { initCgdb, executeQuery, executeParameterized, closeCgdb, isCgdbReady, isWriteQuery, } from '../../core/cgdb/pool-adapter.js';
|
|
11
11
|
export { isWriteQuery };
|
|
12
12
|
// Embedding imports are lazy (dynamic import) to avoid loading onnxruntime-node
|
|
13
13
|
// at MCP server startup — crashes on unsupported Node ABI versions (#89)
|
|
@@ -18,7 +18,8 @@ import { listRegisteredRepos, cleanupOldKuzuFiles, } from '../../storage/repo-ma
|
|
|
18
18
|
import { GroupService } from '../../core/group/service.js';
|
|
19
19
|
import { resolveAtGroupMemberRepoPath } from '../../core/group/resolve-at-member.js';
|
|
20
20
|
import { collectBestChunks } from '../../core/embeddings/types.js';
|
|
21
|
-
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME } from '../../core/
|
|
21
|
+
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME } from '../../core/cgdb/schema.js';
|
|
22
|
+
import { decodeContentField } from '../../core/cgdb/content-read.js';
|
|
22
23
|
import { PhaseTimer } from '../../core/search/phase-timer.js';
|
|
23
24
|
import { checkStaleness, checkCwdMatch } from '../../core/git-staleness.js';
|
|
24
25
|
// AI context generation is CLI-only (codragraph analyze)
|
|
@@ -188,7 +189,7 @@ export class LocalBackend {
|
|
|
188
189
|
}
|
|
189
190
|
/** Close all pooled LadybugDB connections (CLI one-shot; optional for long-lived MCP). */
|
|
190
191
|
async dispose() {
|
|
191
|
-
await
|
|
192
|
+
await closeCgdb();
|
|
192
193
|
}
|
|
193
194
|
// ─── Initialization ──────────────────────────────────────────────
|
|
194
195
|
/**
|
|
@@ -211,9 +212,9 @@ export class LocalBackend {
|
|
|
211
212
|
const id = this.repoId(entry.name, entry.path);
|
|
212
213
|
freshIds.add(id);
|
|
213
214
|
const storagePath = entry.storagePath;
|
|
214
|
-
const
|
|
215
|
+
const cgdbPath = path.join(storagePath, 'cgdb');
|
|
215
216
|
// Clean up any leftover KuzuDB files from before the LadybugDB migration.
|
|
216
|
-
// If kuzu exists but
|
|
217
|
+
// If kuzu exists but cgdb doesn't, warn so the user knows to re-analyze.
|
|
217
218
|
const kuzu = await cleanupOldKuzuFiles(storagePath);
|
|
218
219
|
if (kuzu.found && kuzu.needsReindex) {
|
|
219
220
|
console.error(`CodraGraph: "${entry.name}" has a stale KuzuDB index. Run: codragraph analyze ${entry.path}`);
|
|
@@ -223,7 +224,7 @@ export class LocalBackend {
|
|
|
223
224
|
name: entry.name,
|
|
224
225
|
repoPath: entry.path,
|
|
225
226
|
storagePath,
|
|
226
|
-
|
|
227
|
+
cgdbPath,
|
|
227
228
|
indexedAt: entry.indexedAt,
|
|
228
229
|
lastCommit: entry.lastCommit,
|
|
229
230
|
remoteUrl: entry.remoteUrl,
|
|
@@ -363,7 +364,7 @@ export class LocalBackend {
|
|
|
363
364
|
// Check if the index was rebuilt since we opened the connection (#297).
|
|
364
365
|
// Throttle staleness checks to at most once per 5 seconds per repo to
|
|
365
366
|
// avoid an fs.readFile round-trip on every tool invocation.
|
|
366
|
-
if (this.initializedRepos.has(repoId) &&
|
|
367
|
+
if (this.initializedRepos.has(repoId) && isCgdbReady(repoId)) {
|
|
367
368
|
const now = Date.now();
|
|
368
369
|
const lastCheck = this.lastStalenessCheck.get(repoId) ?? 0;
|
|
369
370
|
if (now - lastCheck < 5000)
|
|
@@ -379,10 +380,10 @@ export class LocalBackend {
|
|
|
379
380
|
// callers both detect staleness and double-close the pool.
|
|
380
381
|
const reinit = (async () => {
|
|
381
382
|
try {
|
|
382
|
-
await
|
|
383
|
+
await closeCgdb(repoId);
|
|
383
384
|
this.initializedRepos.delete(repoId);
|
|
384
385
|
handle.indexedAt = meta.indexedAt;
|
|
385
|
-
await
|
|
386
|
+
await initCgdb(repoId, handle.cgdbPath);
|
|
386
387
|
this.initializedRepos.add(repoId);
|
|
387
388
|
}
|
|
388
389
|
finally {
|
|
@@ -401,7 +402,7 @@ export class LocalBackend {
|
|
|
401
402
|
}
|
|
402
403
|
}
|
|
403
404
|
try {
|
|
404
|
-
await
|
|
405
|
+
await initCgdb(repoId, handle.cgdbPath);
|
|
405
406
|
this.initializedRepos.add(repoId);
|
|
406
407
|
}
|
|
407
408
|
catch (err) {
|
|
@@ -835,10 +836,12 @@ export class LocalBackend {
|
|
|
835
836
|
try {
|
|
836
837
|
const contentRows = await executeParameterized(repo.id, `
|
|
837
838
|
MATCH (n {id: $nodeId})
|
|
838
|
-
RETURN n.content AS content
|
|
839
|
+
RETURN n.content AS content, n.contentEncoding AS contentEncoding
|
|
839
840
|
`, { nodeId: sym.nodeId });
|
|
840
841
|
if (contentRows.length > 0) {
|
|
841
|
-
|
|
842
|
+
const raw = contentRows[0].content ?? contentRows[0][0];
|
|
843
|
+
const enc = contentRows[0].contentEncoding ?? contentRows[0][1];
|
|
844
|
+
content = decodeContentField(raw, enc);
|
|
842
845
|
}
|
|
843
846
|
}
|
|
844
847
|
catch (e) {
|
|
@@ -945,10 +948,10 @@ export class LocalBackend {
|
|
|
945
948
|
* BM25 keyword search helper - uses LadybugDB FTS for always-fresh results
|
|
946
949
|
*/
|
|
947
950
|
async bm25Search(repo, query, limit) {
|
|
948
|
-
const {
|
|
951
|
+
const { searchFTSFromCgdb } = await import('../../core/search/bm25-index.js');
|
|
949
952
|
let bm25Results;
|
|
950
953
|
try {
|
|
951
|
-
bm25Results = await
|
|
954
|
+
bm25Results = await searchFTSFromCgdb(query, limit, repo.id);
|
|
952
955
|
}
|
|
953
956
|
catch (err) {
|
|
954
957
|
console.error('CodraGraph: BM25/FTS search failed (FTS indexes may not exist) -', err.message);
|
|
@@ -1085,7 +1088,7 @@ export class LocalBackend {
|
|
|
1085
1088
|
}
|
|
1086
1089
|
async cypher(repo, params) {
|
|
1087
1090
|
await this.ensureInitialized(repo.id);
|
|
1088
|
-
if (!
|
|
1091
|
+
if (!isCgdbReady(repo.id)) {
|
|
1089
1092
|
return { error: 'LadybugDB not ready. Index may be corrupted.' };
|
|
1090
1093
|
}
|
|
1091
1094
|
// Block write operations (defense-in-depth — DB is already read-only)
|
|
@@ -1330,7 +1333,13 @@ export class LocalBackend {
|
|
|
1330
1333
|
*/
|
|
1331
1334
|
async resolveSymbolCandidates(repo, query, hints) {
|
|
1332
1335
|
const { uid, name, include_content } = query;
|
|
1333
|
-
|
|
1336
|
+
// RFC 0001 Phase 2: when fetching content, also fetch the per-row
|
|
1337
|
+
// encoding tag so `decodeContentField` can pass it through unchanged
|
|
1338
|
+
// (default 'none') or run brotli/zstd decode. Adding contentEncoding
|
|
1339
|
+
// to the SELECT shifts the numeric-index fallback for content from
|
|
1340
|
+
// r[6] to (still) r[6] — encoding lands at r[7] — but we read by name
|
|
1341
|
+
// first which is the documented preferred path on LadybugDB.
|
|
1342
|
+
const selectClause = `n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine${include_content ? ', n.content AS content, n.contentEncoding AS contentEncoding' : ''}`;
|
|
1334
1343
|
// Direct UID — zero-ambiguity path.
|
|
1335
1344
|
if (uid) {
|
|
1336
1345
|
const rows = await executeParameterized(repo.id, `MATCH (n {id: $uid}) RETURN ${selectClause} LIMIT 1`, { uid });
|
|
@@ -1344,7 +1353,11 @@ export class LocalBackend {
|
|
|
1344
1353
|
filePath: (r.filePath ?? r[3]),
|
|
1345
1354
|
startLine: (r.startLine ?? r[4]),
|
|
1346
1355
|
endLine: (r.endLine ?? r[5]),
|
|
1347
|
-
...(include_content
|
|
1356
|
+
...(include_content
|
|
1357
|
+
? {
|
|
1358
|
+
content: decodeContentField(r.content ?? r[6], r.contentEncoding ?? r[7]),
|
|
1359
|
+
}
|
|
1360
|
+
: {}),
|
|
1348
1361
|
};
|
|
1349
1362
|
// Same LadybugDB label-enrichment as the name-based path: a UID
|
|
1350
1363
|
// pointing at a Class must still surface `type: 'Class'` so impact's
|
|
@@ -1380,7 +1393,11 @@ export class LocalBackend {
|
|
|
1380
1393
|
filePath: (r.filePath ?? r[3]),
|
|
1381
1394
|
startLine: (r.startLine ?? r[4]),
|
|
1382
1395
|
endLine: (r.endLine ?? r[5]),
|
|
1383
|
-
...(include_content
|
|
1396
|
+
...(include_content
|
|
1397
|
+
? {
|
|
1398
|
+
content: decodeContentField(r.content ?? r[6], r.contentEncoding ?? r[7]),
|
|
1399
|
+
}
|
|
1400
|
+
: {}),
|
|
1384
1401
|
}));
|
|
1385
1402
|
// Enrich labels for any candidates where `labels(n)[0]` came back empty.
|
|
1386
1403
|
// LadybugDB returns an empty string for that projection on certain node
|
|
@@ -3205,7 +3222,7 @@ export class LocalBackend {
|
|
|
3205
3222
|
};
|
|
3206
3223
|
}
|
|
3207
3224
|
async disconnect() {
|
|
3208
|
-
await
|
|
3225
|
+
await closeCgdb(); // close all connections
|
|
3209
3226
|
// Note: we intentionally do NOT call disposeEmbedder() here.
|
|
3210
3227
|
// ONNX Runtime's native cleanup segfaults on macOS and some Linux configs,
|
|
3211
3228
|
// and importing the embedder module on Node v24+ crashes if onnxruntime
|
package/dist/mcp/server.js
CHANGED
|
@@ -15,7 +15,7 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
|
15
15
|
import { CompatibleStdioServerTransport } from './compatible-stdio-transport.js';
|
|
16
16
|
import { CallToolRequestSchema, ListToolsRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema, ListResourceTemplatesRequestSchema, ListPromptsRequestSchema, GetPromptRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
17
17
|
import { CODRAGRAPH_TOOLS } from './tools.js';
|
|
18
|
-
import { realStdoutWrite } from './core/
|
|
18
|
+
import { realStdoutWrite } from './core/cgdb-adapter.js';
|
|
19
19
|
import { getResourceDefinitions, getResourceTemplates, readResource } from './resources.js';
|
|
20
20
|
/**
|
|
21
21
|
* Next-step hints appended to tool responses.
|
|
@@ -208,7 +208,7 @@ export function createMCPServer(backend) {
|
|
|
208
208
|
{ name: 'repo', description: 'Repository (omit if only one indexed)', required: false },
|
|
209
209
|
{
|
|
210
210
|
name: 'symbolId',
|
|
211
|
-
description: 'Stable symbol id (PK in the
|
|
211
|
+
description: 'Stable symbol id (PK in the cgdb node table)',
|
|
212
212
|
required: true,
|
|
213
213
|
},
|
|
214
214
|
{
|
|
@@ -484,7 +484,7 @@ Follow these steps:
|
|
|
484
484
|
export async function startMCPServer(backend) {
|
|
485
485
|
const server = createMCPServer(backend);
|
|
486
486
|
// Use the shared stdout reference captured at module-load time by the
|
|
487
|
-
//
|
|
487
|
+
// cgdb-adapter. Avoids divergence if anything patches stdout between
|
|
488
488
|
// module load and server start.
|
|
489
489
|
const _safeStdout = new Proxy(process.stdout, {
|
|
490
490
|
get(target, prop, receiver) {
|
package/dist/mcp/tools.js
CHANGED
|
@@ -806,7 +806,7 @@ Pair with codragraph_context first to get the symbol's stable id (e.g. \`fn:src/
|
|
|
806
806
|
type: 'object',
|
|
807
807
|
properties: {
|
|
808
808
|
repo: { type: 'string', description: 'Indexed repo (omit if only one).' },
|
|
809
|
-
symbolId: { type: 'string', description: 'Stable symbol id (PK in the
|
|
809
|
+
symbolId: { type: 'string', description: 'Stable symbol id (PK in the cgdb node table).' },
|
|
810
810
|
table: {
|
|
811
811
|
type: 'string',
|
|
812
812
|
description: 'Optional table hint to narrow the search (Function, Class, Method, Interface, …).',
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* Child -> Parent: { type: 'error', message: string }
|
|
12
12
|
*/
|
|
13
13
|
import { runFullAnalysis } from '../core/run-analyze.js';
|
|
14
|
-
import {
|
|
14
|
+
import { closeCgdb } from '../core/cgdb/cgdb-adapter.js';
|
|
15
15
|
function send(msg) {
|
|
16
16
|
process.send?.(msg);
|
|
17
17
|
}
|
|
@@ -28,7 +28,7 @@ process.on('unhandledRejection', (reason) => {
|
|
|
28
28
|
process.on('SIGTERM', async () => {
|
|
29
29
|
send({ type: 'error', message: 'Analysis cancelled (worker received SIGTERM)' });
|
|
30
30
|
try {
|
|
31
|
-
await
|
|
31
|
+
await closeCgdb();
|
|
32
32
|
}
|
|
33
33
|
catch { }
|
|
34
34
|
process.exit(0);
|
package/dist/server/api.js
CHANGED
|
@@ -13,10 +13,11 @@ import path from 'path';
|
|
|
13
13
|
import fs from 'fs/promises';
|
|
14
14
|
import { createRequire } from 'node:module';
|
|
15
15
|
import { loadMeta, listRegisteredRepos, getStoragePath } from '../storage/repo-manager.js';
|
|
16
|
-
import { executeQuery, executePrepared, executeWithReusedStatement, streamQuery,
|
|
17
|
-
import { isWriteQuery } from '../core/
|
|
16
|
+
import { executeQuery, executePrepared, executeWithReusedStatement, streamQuery, closeCgdb, withCgdbDb, } from '../core/cgdb/cgdb-adapter.js';
|
|
17
|
+
import { isWriteQuery } from '../core/cgdb/pool-adapter.js';
|
|
18
|
+
import { decodeContentField } from '../core/cgdb/content-read.js';
|
|
18
19
|
import { NODE_TABLES } from '../_shared/index.js';
|
|
19
|
-
import {
|
|
20
|
+
import { searchFTSFromCgdb } from '../core/search/bm25-index.js';
|
|
20
21
|
import { hybridSearch } from '../core/search/hybrid-search.js';
|
|
21
22
|
// Embedding imports are lazy (dynamic import) to avoid loading onnxruntime-node
|
|
22
23
|
// at server startup — crashes on unsupported Node ABI versions (#89)
|
|
@@ -189,7 +190,7 @@ const getNodeQuery = (table, includeContent) => {
|
|
|
189
190
|
const tableLabel = quoteNodeTable(table);
|
|
190
191
|
if (table === 'File') {
|
|
191
192
|
return includeContent
|
|
192
|
-
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content`
|
|
193
|
+
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content, n.contentEncoding AS contentEncoding`
|
|
193
194
|
: `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath`;
|
|
194
195
|
}
|
|
195
196
|
if (table === 'Folder') {
|
|
@@ -208,7 +209,7 @@ const getNodeQuery = (table, includeContent) => {
|
|
|
208
209
|
return `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.description AS description`;
|
|
209
210
|
}
|
|
210
211
|
return includeContent
|
|
211
|
-
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content`
|
|
212
|
+
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content, n.contentEncoding AS contentEncoding`
|
|
212
213
|
: `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine`;
|
|
213
214
|
};
|
|
214
215
|
const mapGraphNodeRow = (table, row, includeContent) => ({
|
|
@@ -219,7 +220,7 @@ const mapGraphNodeRow = (table, row, includeContent) => ({
|
|
|
219
220
|
filePath: row.filePath ?? row[2],
|
|
220
221
|
startLine: row.startLine,
|
|
221
222
|
endLine: row.endLine,
|
|
222
|
-
content: includeContent ? row.content : undefined,
|
|
223
|
+
content: includeContent ? decodeContentField(row.content, row.contentEncoding) : undefined,
|
|
223
224
|
responseKeys: row.responseKeys,
|
|
224
225
|
errorKeys: row.errorKeys,
|
|
225
226
|
middleware: row.middleware,
|
|
@@ -387,7 +388,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
387
388
|
const cleanupMcp = mountMCPEndpoints(app, backend);
|
|
388
389
|
const jobManager = new JobManager();
|
|
389
390
|
// Shared repo lock — prevents concurrent analyze + embed on the same repo path,
|
|
390
|
-
// which would corrupt LadybugDB (analyze calls
|
|
391
|
+
// which would corrupt LadybugDB (analyze calls closeCgdb + initCgdb while embed has queries in flight).
|
|
391
392
|
const activeRepoPaths = new Set();
|
|
392
393
|
const acquireRepoLock = (repoPath) => {
|
|
393
394
|
if (activeRepoPaths.has(repoPath)) {
|
|
@@ -570,7 +571,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
570
571
|
try {
|
|
571
572
|
// Close any open LadybugDB handle before deleting files
|
|
572
573
|
try {
|
|
573
|
-
await
|
|
574
|
+
await closeCgdb();
|
|
574
575
|
}
|
|
575
576
|
catch { }
|
|
576
577
|
// 1. Delete the .codragraph index/storage directory
|
|
@@ -894,21 +895,21 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
894
895
|
res.status(404).json({ error: 'Repository not found' });
|
|
895
896
|
return;
|
|
896
897
|
}
|
|
897
|
-
const
|
|
898
|
+
const cgdbPath = path.join(entry.storagePath, 'cgdb');
|
|
898
899
|
const includeContent = req.query.includeContent === 'true';
|
|
899
900
|
const stream = req.query.stream === 'true';
|
|
900
|
-
// Guard: when a repo has no materialized
|
|
901
|
-
// seeded CAS-only repos), or the
|
|
901
|
+
// Guard: when a repo has no materialized cgdb schema (fixture-
|
|
902
|
+
// seeded CAS-only repos), or the cgdb WAL is corrupt/stale from
|
|
902
903
|
// a prior failed analyze, LadybugDB native will abort with
|
|
903
904
|
// UNREACHABLE_CODE or an ANY-vector exception. Detect both
|
|
904
905
|
// shapes — missing file (cheap fs.access) AND empty/4096-byte
|
|
905
906
|
// schema-only file (fs.stat) — and return an empty graph so
|
|
906
907
|
// the dashboard doesn't blow up. The Graph tab keeps working
|
|
907
|
-
// for repos that actually have a real
|
|
908
|
-
const
|
|
908
|
+
// for repos that actually have a real cgdb.
|
|
909
|
+
const isCgdbMaterialized = await (async () => {
|
|
909
910
|
try {
|
|
910
|
-
const stat = await fs.stat(
|
|
911
|
-
// Schema-only
|
|
911
|
+
const stat = await fs.stat(cgdbPath);
|
|
912
|
+
// Schema-only cgdb is exactly 4096 bytes (one page, no real
|
|
912
913
|
// data). Real graphs are larger.
|
|
913
914
|
return stat.isFile() && stat.size > 4096;
|
|
914
915
|
}
|
|
@@ -916,14 +917,14 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
916
917
|
return false;
|
|
917
918
|
}
|
|
918
919
|
})();
|
|
919
|
-
if (!
|
|
920
|
+
if (!isCgdbMaterialized) {
|
|
920
921
|
if (stream) {
|
|
921
922
|
res.setHeader('Content-Type', 'application/x-ndjson; charset=utf-8');
|
|
922
923
|
res.flushHeaders();
|
|
923
924
|
res.write(JSON.stringify({
|
|
924
925
|
type: 'meta',
|
|
925
926
|
repoName: entry.name,
|
|
926
|
-
note: 'no
|
|
927
|
+
note: 'no cgdb file — graph not yet materialized',
|
|
927
928
|
nodeCount: 0,
|
|
928
929
|
relationshipCount: 0,
|
|
929
930
|
}) + '\n');
|
|
@@ -936,7 +937,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
936
937
|
nodes: [],
|
|
937
938
|
relationships: [],
|
|
938
939
|
stats: { nodes: 0, edges: 0 },
|
|
939
|
-
note: 'no
|
|
940
|
+
note: 'no cgdb file — graph not yet materialized',
|
|
940
941
|
});
|
|
941
942
|
return;
|
|
942
943
|
}
|
|
@@ -958,7 +959,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
958
959
|
res.once('finish', markFinished);
|
|
959
960
|
res.once('close', abortStreaming);
|
|
960
961
|
try {
|
|
961
|
-
await
|
|
962
|
+
await withCgdbDb(cgdbPath, async () => streamGraphNdjson(res, includeContent, abortController.signal));
|
|
962
963
|
if (!abortController.signal.aborted && !res.writableEnded) {
|
|
963
964
|
res.end();
|
|
964
965
|
}
|
|
@@ -970,7 +971,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
970
971
|
}
|
|
971
972
|
return;
|
|
972
973
|
}
|
|
973
|
-
const graph = await
|
|
974
|
+
const graph = await withCgdbDb(cgdbPath, async () => buildGraph(includeContent));
|
|
974
975
|
res.json(graph);
|
|
975
976
|
}
|
|
976
977
|
catch (err) {
|
|
@@ -1008,8 +1009,8 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
1008
1009
|
res.status(404).json({ error: 'Repository not found' });
|
|
1009
1010
|
return;
|
|
1010
1011
|
}
|
|
1011
|
-
const
|
|
1012
|
-
const result = await
|
|
1012
|
+
const cgdbPath = path.join(entry.storagePath, 'cgdb');
|
|
1013
|
+
const result = await withCgdbDb(cgdbPath, () => executeQuery(cypher));
|
|
1013
1014
|
res.json({ result });
|
|
1014
1015
|
}
|
|
1015
1016
|
catch (err) {
|
|
@@ -1029,14 +1030,14 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
1029
1030
|
res.status(404).json({ error: 'Repository not found' });
|
|
1030
1031
|
return;
|
|
1031
1032
|
}
|
|
1032
|
-
const
|
|
1033
|
+
const cgdbPath = path.join(entry.storagePath, 'cgdb');
|
|
1033
1034
|
const parsedLimit = Number(req.body.limit ?? 10);
|
|
1034
1035
|
const limit = Number.isFinite(parsedLimit)
|
|
1035
1036
|
? Math.max(1, Math.min(100, Math.trunc(parsedLimit)))
|
|
1036
1037
|
: 10;
|
|
1037
1038
|
const mode = req.body.mode ?? 'hybrid';
|
|
1038
1039
|
const enrich = req.body.enrich !== false; // default true
|
|
1039
|
-
const results = await
|
|
1040
|
+
const results = await withCgdbDb(cgdbPath, async () => {
|
|
1040
1041
|
let searchResults;
|
|
1041
1042
|
if (mode === 'semantic') {
|
|
1042
1043
|
const { isEmbedderReady } = await import('../core/embeddings/embedder.js');
|
|
@@ -1054,7 +1055,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
1054
1055
|
}));
|
|
1055
1056
|
}
|
|
1056
1057
|
else if (mode === 'bm25') {
|
|
1057
|
-
searchResults = await
|
|
1058
|
+
searchResults = await searchFTSFromCgdb(query, limit);
|
|
1058
1059
|
searchResults = searchResults.map((r, i) => ({
|
|
1059
1060
|
...r,
|
|
1060
1061
|
rank: i + 1,
|
|
@@ -1069,7 +1070,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
1069
1070
|
searchResults = await hybridSearch(query, limit, executeQuery, semSearch);
|
|
1070
1071
|
}
|
|
1071
1072
|
else {
|
|
1072
|
-
searchResults = await
|
|
1073
|
+
searchResults = await searchFTSFromCgdb(query, limit);
|
|
1073
1074
|
}
|
|
1074
1075
|
}
|
|
1075
1076
|
if (!enrich)
|
|
@@ -1229,8 +1230,8 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
1229
1230
|
const results = [];
|
|
1230
1231
|
const repoRoot = path.resolve(entry.path);
|
|
1231
1232
|
// Get file paths from the graph (lightweight — no content loaded)
|
|
1232
|
-
const
|
|
1233
|
-
const fileRows = await
|
|
1233
|
+
const cgdbPath = path.join(entry.storagePath, 'cgdb');
|
|
1234
|
+
const fileRows = await withCgdbDb(cgdbPath, () => executeQuery(`MATCH (n:File) WHERE n.content IS NOT NULL RETURN n.filePath AS filePath`));
|
|
1234
1235
|
// Search files on disk one at a time (constant memory)
|
|
1235
1236
|
for (const row of fileRows) {
|
|
1236
1237
|
if (results.length >= limit)
|
|
@@ -1601,12 +1602,12 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
1601
1602
|
// Run embedding pipeline asynchronously
|
|
1602
1603
|
(async () => {
|
|
1603
1604
|
try {
|
|
1604
|
-
const
|
|
1605
|
-
await
|
|
1605
|
+
const cgdbPath = path.join(entry.storagePath, 'cgdb');
|
|
1606
|
+
await withCgdbDb(cgdbPath, async () => {
|
|
1606
1607
|
const { runEmbeddingPipeline } = await import('../core/embeddings/embedding-pipeline.js');
|
|
1607
1608
|
// Fetch existing content hashes for incremental embedding.
|
|
1608
|
-
// Delegated to
|
|
1609
|
-
const { fetchExistingEmbeddingHashes } = await import('../core/
|
|
1609
|
+
// Delegated to cgdb-adapter which owns the DB query logic and legacy-fallback handling.
|
|
1610
|
+
const { fetchExistingEmbeddingHashes } = await import('../core/cgdb/cgdb-adapter.js');
|
|
1610
1611
|
const existingEmbeddings = await fetchExistingEmbeddingHashes(executeQuery);
|
|
1611
1612
|
if (existingEmbeddings && existingEmbeddings.size > 0) {
|
|
1612
1613
|
console.log(`[embed] ${existingEmbeddings.size} nodes already embedded — incremental run with content-hash comparison`);
|
|
@@ -1717,7 +1718,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
1717
1718
|
jobManager.dispose();
|
|
1718
1719
|
embedJobManager.dispose();
|
|
1719
1720
|
await cleanupMcp();
|
|
1720
|
-
await
|
|
1721
|
+
await closeCgdb();
|
|
1721
1722
|
await backend.disconnect();
|
|
1722
1723
|
process.exit(0);
|
|
1723
1724
|
};
|
|
@@ -36,10 +36,49 @@
|
|
|
36
36
|
* so the registry stabilises over analyze/re-analyze cycles.
|
|
37
37
|
*/
|
|
38
38
|
export declare const canonicalizePath: (p: string) => string;
|
|
39
|
+
/**
|
|
40
|
+
* On-disk schema version for `.codragraph/cgdb` and `.codragraph/meta.json`.
|
|
41
|
+
*
|
|
42
|
+
* 1 — pre-RFC-0001-Phase-2 layout. Node tables have `content STRING`
|
|
43
|
+
* but no `contentEncoding` column. Implicit/missing on existing
|
|
44
|
+
* 1.6.x and 1.7.x indexes (RepoMeta.schemaVersion was undefined).
|
|
45
|
+
* 2 — RFC 0001 Phase 2: every node table that has `content` also has
|
|
46
|
+
* a `contentEncoding STRING DEFAULT 'none'` column. Writers may
|
|
47
|
+
* opt into compression via `--compress brotli|zstd` (compression
|
|
48
|
+
* is OFF by default, so existing readers keep working). Readers
|
|
49
|
+
* decode based on the per-row encoding tag.
|
|
50
|
+
*
|
|
51
|
+
* Bumping this is the migration trigger: `runFullAnalysis` forces a
|
|
52
|
+
* full re-analyze when an existing index has a missing or older
|
|
53
|
+
* `schemaVersion` field, because adding a column to an existing
|
|
54
|
+
* LadybugDB table via ALTER is not validated end-to-end yet — fresh
|
|
55
|
+
* `CREATE NODE TABLE` is the supported path.
|
|
56
|
+
*/
|
|
57
|
+
export declare const INDEX_SCHEMA_VERSION: 2;
|
|
39
58
|
export interface RepoMeta {
|
|
40
59
|
repoPath: string;
|
|
41
60
|
lastCommit: string;
|
|
42
61
|
indexedAt: string;
|
|
62
|
+
/**
|
|
63
|
+
* On-disk schema version (see {@link INDEX_SCHEMA_VERSION}). Absent on
|
|
64
|
+
* indexes written by 1.7.x or earlier; `runFullAnalysis` treats those
|
|
65
|
+
* as needing a full re-analyze when they're loaded by a 1.8+ CLI.
|
|
66
|
+
*/
|
|
67
|
+
schemaVersion?: number;
|
|
68
|
+
/**
|
|
69
|
+
* RFC 0001 Phase 2 — the per-row content encoding chosen at the last
|
|
70
|
+
* `analyze --compress` invocation. `'none'` (or absent) means rows
|
|
71
|
+
* carry plain text; `'brotli'` / `'zstd'` means rows are compressed
|
|
72
|
+
* and consumers must decode. Persisted so query-time tooling can
|
|
73
|
+
* detect the compressed mode without sampling rows.
|
|
74
|
+
*
|
|
75
|
+
* Phase 2.5 hooks: `core/search/bm25-index.ts` reads this field at
|
|
76
|
+
* FTS-create time and drops `content` from the FTS property list
|
|
77
|
+
* when set to a non-`'none'` value (full-text search falls back to
|
|
78
|
+
* symbol-name matches). Embeddings and graph queries are unaffected
|
|
79
|
+
* — they decode at the read boundary.
|
|
80
|
+
*/
|
|
81
|
+
compress?: 'none' | 'brotli' | 'zstd';
|
|
43
82
|
/**
|
|
44
83
|
* Canonical `origin` remote URL captured at index time. Used to
|
|
45
84
|
* fingerprint the same logical repo across multiple on-disk clones
|
|
@@ -71,7 +110,7 @@ export interface RepoMeta {
|
|
|
71
110
|
export interface IndexedRepo {
|
|
72
111
|
repoPath: string;
|
|
73
112
|
storagePath: string;
|
|
74
|
-
|
|
113
|
+
cgdbPath: string;
|
|
75
114
|
metaPath: string;
|
|
76
115
|
meta: RepoMeta;
|
|
77
116
|
}
|
|
@@ -101,7 +140,7 @@ export declare const getStoragePath: (repoPath: string) => string;
|
|
|
101
140
|
*/
|
|
102
141
|
export declare const getStoragePaths: (repoPath: string) => {
|
|
103
142
|
storagePath: string;
|
|
104
|
-
|
|
143
|
+
cgdbPath: string;
|
|
105
144
|
metaPath: string;
|
|
106
145
|
};
|
|
107
146
|
/**
|
|
@@ -114,7 +153,7 @@ export declare const hasKuzuIndex: (storagePath: string) => Promise<boolean>;
|
|
|
114
153
|
*
|
|
115
154
|
* Returns:
|
|
116
155
|
* found — true if .codragraph/kuzu existed and was deleted
|
|
117
|
-
* needsReindex — true if kuzu existed but
|
|
156
|
+
* needsReindex — true if kuzu existed but cgdb does not (re-analyze required)
|
|
118
157
|
*
|
|
119
158
|
* Callers own the user-facing messaging; this function only deletes files.
|
|
120
159
|
*/
|
|
@@ -49,6 +49,25 @@ export const canonicalizePath = (p) => {
|
|
|
49
49
|
return resolved;
|
|
50
50
|
}
|
|
51
51
|
};
|
|
52
|
+
/**
|
|
53
|
+
* On-disk schema version for `.codragraph/cgdb` and `.codragraph/meta.json`.
|
|
54
|
+
*
|
|
55
|
+
* 1 — pre-RFC-0001-Phase-2 layout. Node tables have `content STRING`
|
|
56
|
+
* but no `contentEncoding` column. Implicit/missing on existing
|
|
57
|
+
* 1.6.x and 1.7.x indexes (RepoMeta.schemaVersion was undefined).
|
|
58
|
+
* 2 — RFC 0001 Phase 2: every node table that has `content` also has
|
|
59
|
+
* a `contentEncoding STRING DEFAULT 'none'` column. Writers may
|
|
60
|
+
* opt into compression via `--compress brotli|zstd` (compression
|
|
61
|
+
* is OFF by default, so existing readers keep working). Readers
|
|
62
|
+
* decode based on the per-row encoding tag.
|
|
63
|
+
*
|
|
64
|
+
* Bumping this is the migration trigger: `runFullAnalysis` forces a
|
|
65
|
+
* full re-analyze when an existing index has a missing or older
|
|
66
|
+
* `schemaVersion` field, because adding a column to an existing
|
|
67
|
+
* LadybugDB table via ALTER is not validated end-to-end yet — fresh
|
|
68
|
+
* `CREATE NODE TABLE` is the supported path.
|
|
69
|
+
*/
|
|
70
|
+
export const INDEX_SCHEMA_VERSION = 2;
|
|
52
71
|
const CODRAGRAPH_DIR = '.codragraph';
|
|
53
72
|
// ─── Local Storage Helpers ─────────────────────────────────────────────
|
|
54
73
|
/**
|
|
@@ -64,7 +83,7 @@ export const getStoragePaths = (repoPath) => {
|
|
|
64
83
|
const storagePath = getStoragePath(repoPath);
|
|
65
84
|
return {
|
|
66
85
|
storagePath,
|
|
67
|
-
|
|
86
|
+
cgdbPath: path.join(storagePath, 'cgdb'),
|
|
68
87
|
metaPath: path.join(storagePath, 'meta.json'),
|
|
69
88
|
};
|
|
70
89
|
};
|
|
@@ -86,16 +105,16 @@ export const hasKuzuIndex = async (storagePath) => {
|
|
|
86
105
|
*
|
|
87
106
|
* Returns:
|
|
88
107
|
* found — true if .codragraph/kuzu existed and was deleted
|
|
89
|
-
* needsReindex — true if kuzu existed but
|
|
108
|
+
* needsReindex — true if kuzu existed but cgdb does not (re-analyze required)
|
|
90
109
|
*
|
|
91
110
|
* Callers own the user-facing messaging; this function only deletes files.
|
|
92
111
|
*/
|
|
93
112
|
export const cleanupOldKuzuFiles = async (storagePath) => {
|
|
94
113
|
const oldPath = path.join(storagePath, 'kuzu');
|
|
95
|
-
const newPath = path.join(storagePath, '
|
|
114
|
+
const newPath = path.join(storagePath, 'cgdb');
|
|
96
115
|
try {
|
|
97
116
|
await fs.stat(oldPath);
|
|
98
|
-
// Old kuzu file/dir exists — determine if
|
|
117
|
+
// Old kuzu file/dir exists — determine if cgdb is already present
|
|
99
118
|
let needsReindex = false;
|
|
100
119
|
try {
|
|
101
120
|
await fs.stat(newPath);
|