@codragraph/cli 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
- package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
- package/dist/_shared/cgdb/schema-constants.js +67 -0
- package/dist/_shared/cgdb/schema-constants.js.map +1 -0
- package/dist/_shared/index.d.ts +2 -2
- package/dist/_shared/index.js +1 -1
- package/dist/cli/analyze.js +3 -3
- package/dist/cli/graphstore.js +21 -21
- package/dist/cli/index-repo.js +3 -3
- package/dist/cli/wiki.js +3 -3
- package/dist/core/augmentation/engine.js +7 -7
- package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
- package/dist/core/cgdb/cgdb-adapter.js +1320 -0
- package/dist/core/cgdb/content-read.d.ts +46 -0
- package/dist/core/cgdb/content-read.js +64 -0
- package/dist/core/cgdb/csv-generator.d.ts +29 -0
- package/dist/core/cgdb/csv-generator.js +492 -0
- package/dist/core/cgdb/pool-adapter.d.ts +93 -0
- package/dist/core/cgdb/pool-adapter.js +550 -0
- package/dist/core/cgdb/schema.d.ts +62 -0
- package/dist/core/cgdb/schema.js +502 -0
- package/dist/core/embeddings/embedding-pipeline.js +4 -4
- package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
- package/dist/core/graphstore/cgdb-row-source.js +141 -0
- package/dist/core/graphstore/index.d.ts +1 -1
- package/dist/core/graphstore/index.js +3 -3
- package/dist/core/group/bridge-db.d.ts +2 -2
- package/dist/core/group/bridge-db.js +18 -18
- package/dist/core/group/bridge-schema.d.ts +4 -4
- package/dist/core/group/bridge-schema.js +4 -4
- package/dist/core/group/cross-impact.js +3 -3
- package/dist/core/group/sync.js +4 -4
- package/dist/core/run-analyze.js +24 -24
- package/dist/core/search/bm25-index.d.ts +3 -3
- package/dist/core/search/bm25-index.js +9 -9
- package/dist/core/search/hybrid-search.js +2 -2
- package/dist/core/wiki/generator.d.ts +2 -2
- package/dist/core/wiki/generator.js +4 -4
- package/dist/core/wiki/graph-queries.d.ts +2 -2
- package/dist/core/wiki/graph-queries.js +5 -5
- package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
- package/dist/mcp/core/cgdb-adapter.js +5 -0
- package/dist/mcp/core/embedder.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +2 -2
- package/dist/mcp/local/local-backend.js +15 -15
- package/dist/mcp/server.js +3 -3
- package/dist/mcp/tools.js +1 -1
- package/dist/server/analyze-worker.js +2 -2
- package/dist/server/api.js +31 -31
- package/dist/storage/repo-manager.d.ts +4 -4
- package/dist/storage/repo-manager.js +5 -5
- package/hooks/claude/codragraph-hook.cjs +4 -4
- package/package.json +3 -3
- package/scripts/build.js +8 -9
- package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
- package/vendor/tree-sitter-proto/src/node-types.json +1 -1
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapter exposing a live LadybugDB instance as a `@codragraph/graphstore`
|
|
3
|
+
* `RowSource`. Used by the analyze pipeline (Phase 4) to snapshot the
|
|
4
|
+
* loaded graph into the content-addressed store.
|
|
5
|
+
*
|
|
6
|
+
* Best-effort by design: any table that errors at query time is skipped
|
|
7
|
+
* (with the failure surfaced through the optional `onSkip` callback) so
|
|
8
|
+
* the surrounding analyze flow never breaks because the versioning hook
|
|
9
|
+
* misbehaves.
|
|
10
|
+
*/
|
|
11
|
+
import { NODE_TABLES, REL_TABLE_NAME } from '../../_shared/index.js';
|
|
12
|
+
import { executeQuery } from '../cgdb/cgdb-adapter.js';
|
|
13
|
+
export const createCgdbRowSource = (opts = {}) => {
|
|
14
|
+
const onSkip = opts.onSkip ?? (() => { });
|
|
15
|
+
const tables = opts.nodeTables ?? NODE_TABLES;
|
|
16
|
+
const listNodeTables = async () => {
|
|
17
|
+
return [...tables];
|
|
18
|
+
};
|
|
19
|
+
const streamNodeTable = async function* (tableName) {
|
|
20
|
+
let rows;
|
|
21
|
+
try {
|
|
22
|
+
// `MATCH (n:T) RETURN n` returns one row per node. The node value
|
|
23
|
+
// is reachable as either `row.n` (named-column form) or `row[0]`
|
|
24
|
+
// (positional form) depending on the LadybugDB result-shape mode;
|
|
25
|
+
// we accept both, mirroring the resilient pattern used by
|
|
26
|
+
// `core/search/bm25-index.ts` for FTS results. Tables that do not
|
|
27
|
+
// exist on disk for a given repo throw here — we treat that as
|
|
28
|
+
// "no rows" via the onSkip callback rather than a hard failure.
|
|
29
|
+
rows = await executeQuery(`MATCH (n:${tableName}) RETURN n`);
|
|
30
|
+
}
|
|
31
|
+
catch (err) {
|
|
32
|
+
onSkip(tableName, err);
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
let yielded = 0;
|
|
36
|
+
for (const raw of rows) {
|
|
37
|
+
const node = unwrapNode(raw);
|
|
38
|
+
if (!node)
|
|
39
|
+
continue;
|
|
40
|
+
yield normalizeNodeRow(node);
|
|
41
|
+
yielded++;
|
|
42
|
+
}
|
|
43
|
+
// If the query reported rows but none unwrapped, surface that as a
|
|
44
|
+
// skip so the analyze log makes the silent-empty failure mode
|
|
45
|
+
// visible instead of producing a 0-row snapshot for the table.
|
|
46
|
+
if (rows.length > 0 && yielded === 0) {
|
|
47
|
+
onSkip(tableName, new Error(`cgdb-row-source: query returned ${rows.length} row(s) for "${tableName}" but none had an unwrappable node — ` +
|
|
48
|
+
`result shape changed? expected row.n or row[0] to be the node`));
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
const streamEdges = async function* () {
|
|
52
|
+
let rows;
|
|
53
|
+
try {
|
|
54
|
+
// Project `from`/`to`/`type` as scalar columns and the full rel as
|
|
55
|
+
// `rel`. Scalars give us a deterministic edge id even if the rel
|
|
56
|
+
// payload's shape changes; `rel` carries any extra properties for
|
|
57
|
+
// hashing.
|
|
58
|
+
rows = await executeQuery(`MATCH (a)-[r:${REL_TABLE_NAME}]->(b) RETURN a.id AS \`from\`, b.id AS \`to\`, r.type AS type, r AS rel`);
|
|
59
|
+
}
|
|
60
|
+
catch (err) {
|
|
61
|
+
onSkip(REL_TABLE_NAME, err);
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
let yielded = 0;
|
|
65
|
+
for (const raw of rows) {
|
|
66
|
+
const r = raw;
|
|
67
|
+
const from = pickField(r, 'from', 0);
|
|
68
|
+
const to = pickField(r, 'to', 1);
|
|
69
|
+
const type = pickField(r, 'type', 2);
|
|
70
|
+
const rel = pickField(r, 'rel', 3);
|
|
71
|
+
if (typeof from !== 'string' || typeof to !== 'string')
|
|
72
|
+
continue;
|
|
73
|
+
yield normalizeEdgeRow({ from, to, type, rel: isPlainObject(rel) ? rel : null });
|
|
74
|
+
yielded++;
|
|
75
|
+
}
|
|
76
|
+
if (rows.length > 0 && yielded === 0) {
|
|
77
|
+
onSkip(REL_TABLE_NAME, new Error(`cgdb-row-source: edges query returned ${rows.length} row(s) but none had a string from/to — ` +
|
|
78
|
+
`result shape changed?`));
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
return { listNodeTables, streamNodeTable, streamEdges };
|
|
82
|
+
};
|
|
83
|
+
/**
|
|
84
|
+
* Pull the node out of an executeQuery result row, accepting either the
|
|
85
|
+
* named-column form (`row.n`) or the positional form (`row[0]`). Returns
|
|
86
|
+
* null when the row is missing or the node value isn't an object — the
|
|
87
|
+
* caller treats that as "skip and surface".
|
|
88
|
+
*/
|
|
89
|
+
const unwrapNode = (raw) => {
|
|
90
|
+
if (!raw || typeof raw !== 'object')
|
|
91
|
+
return null;
|
|
92
|
+
const r = raw;
|
|
93
|
+
const candidate = r['n'] ?? r[0];
|
|
94
|
+
return isPlainObject(candidate) ? candidate : null;
|
|
95
|
+
};
|
|
96
|
+
/** Read a field from an executeQuery row, falling back to the positional index. */
|
|
97
|
+
const pickField = (row, named, positional) => {
|
|
98
|
+
if (!row)
|
|
99
|
+
return undefined;
|
|
100
|
+
return row[named] ?? row[positional];
|
|
101
|
+
};
|
|
102
|
+
const isPlainObject = (v) => typeof v === 'object' && v !== null && !Array.isArray(v);
|
|
103
|
+
/**
|
|
104
|
+
* Sanitize a node row for canonical hashing:
|
|
105
|
+
* - Drop LadybugDB-specific internal fields (`_id`, `_label`) that are
|
|
106
|
+
* not content-bearing — including them would make the hash sensitive
|
|
107
|
+
* to internal storage offsets and break dedup across snapshots.
|
|
108
|
+
* - Sort keys deterministically (canonical JSON in the serializer
|
|
109
|
+
* already does this, but doing it once here keeps the row payload
|
|
110
|
+
* stable when we ever swap engines).
|
|
111
|
+
*/
|
|
112
|
+
const normalizeNodeRow = (node) => {
|
|
113
|
+
const out = {};
|
|
114
|
+
for (const key of Object.keys(node).sort()) {
|
|
115
|
+
if (key === '_id' || key === '_label')
|
|
116
|
+
continue;
|
|
117
|
+
out[key] = node[key];
|
|
118
|
+
}
|
|
119
|
+
return out;
|
|
120
|
+
};
|
|
121
|
+
const normalizeEdgeRow = (r) => {
|
|
122
|
+
const props = {};
|
|
123
|
+
if (r.rel && typeof r.rel === 'object') {
|
|
124
|
+
for (const key of Object.keys(r.rel).sort()) {
|
|
125
|
+
// Skip the synthetic from/to/type that show up under `rel` too —
|
|
126
|
+
// we already project them as top-level columns and don't want
|
|
127
|
+
// duplication in the canonical row.
|
|
128
|
+
if (key === 'from' || key === 'to' || key === 'type')
|
|
129
|
+
continue;
|
|
130
|
+
if (key.startsWith('_'))
|
|
131
|
+
continue;
|
|
132
|
+
props[key] = r.rel[key];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return {
|
|
136
|
+
from: String(r.from),
|
|
137
|
+
to: String(r.to),
|
|
138
|
+
type: typeof r.type === 'string' ? r.type : String(r.type ?? ''),
|
|
139
|
+
...props,
|
|
140
|
+
};
|
|
141
|
+
};
|
|
@@ -38,7 +38,7 @@ export interface RecordAnalysisSnapshotResult {
|
|
|
38
38
|
/**
|
|
39
39
|
* Snapshot the currently-loaded LadybugDB into the content-addressed
|
|
40
40
|
* store and advance the active branch's HEAD to the new commit. Caller
|
|
41
|
-
* is expected to have already initialized
|
|
41
|
+
* is expected to have already initialized cgdb with `initCgdb(...)`.
|
|
42
42
|
*
|
|
43
43
|
* Returns null if anything goes sideways (logged via `onSkipTable`); the
|
|
44
44
|
* analyze pipeline treats that as "no snapshot for this run".
|
|
@@ -9,13 +9,13 @@
|
|
|
9
9
|
*/
|
|
10
10
|
import path from 'node:path';
|
|
11
11
|
import { FsCAS, serializeSnapshot, createCommit, setHead, writeHeadBranch, resolveHeadCommit, DEFAULT_BRANCH, } from '@codragraph/graphstore';
|
|
12
|
-
import {
|
|
12
|
+
import { createCgdbRowSource } from './cgdb-row-source.js';
|
|
13
13
|
/** Subdirectory of `<repo>/.codragraph` that holds versioning artifacts. */
|
|
14
14
|
export const GRAPHSTORE_SUBDIR = 'graphstore';
|
|
15
15
|
/**
|
|
16
16
|
* Snapshot the currently-loaded LadybugDB into the content-addressed
|
|
17
17
|
* store and advance the active branch's HEAD to the new commit. Caller
|
|
18
|
-
* is expected to have already initialized
|
|
18
|
+
* is expected to have already initialized cgdb with `initCgdb(...)`.
|
|
19
19
|
*
|
|
20
20
|
* Returns null if anything goes sideways (logged via `onSkipTable`); the
|
|
21
21
|
* analyze pipeline treats that as "no snapshot for this run".
|
|
@@ -26,7 +26,7 @@ export const recordAnalysisSnapshot = async (opts) => {
|
|
|
26
26
|
let serialized;
|
|
27
27
|
try {
|
|
28
28
|
serialized = await serializeSnapshot({
|
|
29
|
-
source:
|
|
29
|
+
source: createCgdbRowSource({ onSkip: opts.onSkipTable }),
|
|
30
30
|
cas,
|
|
31
31
|
indexedRepoCommit: opts.indexedRepoCommit,
|
|
32
32
|
});
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { LbugValue } from '@ladybugdb/core';
|
|
1
|
+
import type { LbugValue as CgdbValue } from '@ladybugdb/core';
|
|
2
2
|
import type { BridgeHandle, BridgeMeta, StoredContract, CrossLink, RepoSnapshot } from './types.js';
|
|
3
3
|
export declare function contractNodeId(repo: string, contractId: string, role: string, filePath: string): string;
|
|
4
4
|
/**
|
|
@@ -46,7 +46,7 @@ export declare function indexContract(index: ContractLookupIndex, contract: Stor
|
|
|
46
46
|
export declare function findContractNode(index: ContractLookupIndex, repo: string, role: 'consumer' | 'provider', symbolUid: string, filePath: string, symbolName: string): string | null;
|
|
47
47
|
export declare function openBridgeDb(dbPath: string): Promise<BridgeHandle>;
|
|
48
48
|
export declare function ensureBridgeSchema(handle: BridgeHandle): Promise<void>;
|
|
49
|
-
export declare function queryBridge<T>(handle: BridgeHandle, cypher: string, params?: Record<string,
|
|
49
|
+
export declare function queryBridge<T>(handle: BridgeHandle, cypher: string, params?: Record<string, CgdbValue>): Promise<T[]>;
|
|
50
50
|
export declare function closeBridgeDb(handle: BridgeHandle): Promise<void>;
|
|
51
51
|
export declare function retryRename(src: string, dst: string, attempts?: number): Promise<void>;
|
|
52
52
|
export declare function writeBridgeMeta(groupDir: string, meta: BridgeMeta): Promise<void>;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fsp from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { createHash } from 'node:crypto';
|
|
4
|
-
import
|
|
4
|
+
import cgdb from '@ladybugdb/core';
|
|
5
5
|
import { BRIDGE_SCHEMA_QUERIES, BRIDGE_SCHEMA_VERSION } from './bridge-schema.js';
|
|
6
6
|
import { dedupeContracts, dedupeCrossLinks } from './normalization.js';
|
|
7
7
|
export function contractNodeId(repo, contractId, role, filePath) {
|
|
@@ -74,8 +74,8 @@ export function findContractNode(index, repo, role, symbolUid, filePath, symbolN
|
|
|
74
74
|
export async function openBridgeDb(dbPath) {
|
|
75
75
|
const parentDir = path.dirname(dbPath);
|
|
76
76
|
await fsp.mkdir(parentDir, { recursive: true });
|
|
77
|
-
const db = new
|
|
78
|
-
const conn = new
|
|
77
|
+
const db = new cgdb.Database(dbPath, 0, false, false); // writable
|
|
78
|
+
const conn = new cgdb.Connection(db);
|
|
79
79
|
return { _db: db, _conn: conn, groupDir: parentDir };
|
|
80
80
|
}
|
|
81
81
|
/**
|
|
@@ -83,10 +83,10 @@ export async function openBridgeDb(dbPath) {
|
|
|
83
83
|
* CREATE NODE TABLE or CREATE REL TABLE statement hits an already-existing
|
|
84
84
|
* table. LadybugDB DDL doesn't support IF NOT EXISTS, and its JS driver
|
|
85
85
|
* doesn't expose typed error codes, so we match on the message substring —
|
|
86
|
-
* the same pattern used by `core/
|
|
86
|
+
* the same pattern used by `core/cgdb/cgdb-adapter.ts`. If a future
|
|
87
87
|
* LadybugDB release changes the wording, update this constant.
|
|
88
88
|
*/
|
|
89
|
-
const
|
|
89
|
+
const CGDB_ALREADY_EXISTS_MSG = 'already exists';
|
|
90
90
|
export async function ensureBridgeSchema(handle) {
|
|
91
91
|
const conn = handle._conn;
|
|
92
92
|
for (const q of BRIDGE_SCHEMA_QUERIES) {
|
|
@@ -95,14 +95,14 @@ export async function ensureBridgeSchema(handle) {
|
|
|
95
95
|
}
|
|
96
96
|
catch (err) {
|
|
97
97
|
const msg = err instanceof Error ? err.message : String(err);
|
|
98
|
-
if (!msg.includes(
|
|
98
|
+
if (!msg.includes(CGDB_ALREADY_EXISTS_MSG))
|
|
99
99
|
throw err;
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
102
|
}
|
|
103
103
|
/**
|
|
104
104
|
* Close every QueryResult / PreparedStatement before letting V8 GC them.
|
|
105
|
-
* Same close-order discipline as `core/
|
|
105
|
+
* Same close-order discipline as `core/cgdb/cgdb-adapter.ts:closeQueryResult`
|
|
106
106
|
* — leaking these handles past `conn.close()` corrupts LadybugDB's native
|
|
107
107
|
* file lock on Windows ("Error 33: The process cannot access the file
|
|
108
108
|
* because it is being used by another process") and segfaults on
|
|
@@ -132,7 +132,7 @@ async function closeBridgeHandle(h) {
|
|
|
132
132
|
* fully released the exclusive lock yet. Retrying with backoff is the
|
|
133
133
|
* documented workaround for this class of Windows-fs interactions.
|
|
134
134
|
*/
|
|
135
|
-
function
|
|
135
|
+
function isTransientCgdbLockError(err) {
|
|
136
136
|
const msg = err?.message ?? '';
|
|
137
137
|
return (msg.includes('Error 33') ||
|
|
138
138
|
msg.includes('locked a portion of the file') ||
|
|
@@ -180,7 +180,7 @@ export async function queryBridge(handle, cypher, params) {
|
|
|
180
180
|
return await queryBridgeOnce(handle, cypher, params);
|
|
181
181
|
}
|
|
182
182
|
catch (err) {
|
|
183
|
-
if (!
|
|
183
|
+
if (!isTransientCgdbLockError(err) || attempt === ATTEMPTS - 1)
|
|
184
184
|
throw err;
|
|
185
185
|
await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
|
|
186
186
|
}
|
|
@@ -248,7 +248,7 @@ export async function writeBridgeMeta(groupDir, meta) {
|
|
|
248
248
|
// Use retryRename for consistency with writeBridge's atomic swap — on
|
|
249
249
|
// Windows a concurrent reader can cause EBUSY/EPERM even on a tiny
|
|
250
250
|
// meta.json, and we don't want meta write to be less robust than the
|
|
251
|
-
// bridge.
|
|
251
|
+
// bridge.cgdb swap it accompanies.
|
|
252
252
|
await retryRename(tmp, target);
|
|
253
253
|
}
|
|
254
254
|
export async function readBridgeMeta(groupDir) {
|
|
@@ -275,9 +275,9 @@ export async function writeBridge(groupDir, input) {
|
|
|
275
275
|
await fsp.mkdir(groupDir, { recursive: true });
|
|
276
276
|
const contracts = dedupeContracts(input.contracts);
|
|
277
277
|
const crossLinks = dedupeCrossLinks(input.crossLinks);
|
|
278
|
-
const finalPath = path.join(groupDir, 'bridge.
|
|
279
|
-
const tmpPath = path.join(groupDir, 'bridge.
|
|
280
|
-
const bakPath = path.join(groupDir, 'bridge.
|
|
278
|
+
const finalPath = path.join(groupDir, 'bridge.cgdb');
|
|
279
|
+
const tmpPath = path.join(groupDir, 'bridge.cgdb.tmp');
|
|
280
|
+
const bakPath = path.join(groupDir, 'bridge.cgdb.bak');
|
|
281
281
|
const report = {
|
|
282
282
|
contractsInserted: 0,
|
|
283
283
|
contractsFailed: 0,
|
|
@@ -466,7 +466,7 @@ export async function writeBridge(groupDir, input) {
|
|
|
466
466
|
/* openBridgeDbReadOnly */
|
|
467
467
|
/* ------------------------------------------------------------------ */
|
|
468
468
|
export async function openBridgeDbReadOnly(groupDir) {
|
|
469
|
-
const dbPath = path.join(groupDir, 'bridge.
|
|
469
|
+
const dbPath = path.join(groupDir, 'bridge.cgdb');
|
|
470
470
|
try {
|
|
471
471
|
await fsp.access(dbPath);
|
|
472
472
|
}
|
|
@@ -476,7 +476,7 @@ export async function openBridgeDbReadOnly(groupDir) {
|
|
|
476
476
|
// triggers bak recovery is an interrupted writer, which on Windows may
|
|
477
477
|
// still be holding an open handle on `.bak` for a few milliseconds when
|
|
478
478
|
// a reader races in. EBUSY/EPERM retries recover that case silently.
|
|
479
|
-
const bakPath = path.join(groupDir, 'bridge.
|
|
479
|
+
const bakPath = path.join(groupDir, 'bridge.cgdb.bak');
|
|
480
480
|
try {
|
|
481
481
|
await fsp.access(bakPath);
|
|
482
482
|
await retryRename(bakPath, dbPath);
|
|
@@ -506,8 +506,8 @@ export async function openBridgeDbReadOnly(groupDir) {
|
|
|
506
506
|
let db;
|
|
507
507
|
let conn;
|
|
508
508
|
try {
|
|
509
|
-
db = new
|
|
510
|
-
conn = new
|
|
509
|
+
db = new cgdb.Database(dbPath, 0, false, true); // readOnly
|
|
510
|
+
conn = new cgdb.Connection(db);
|
|
511
511
|
return { _db: db, _conn: conn, groupDir };
|
|
512
512
|
}
|
|
513
513
|
catch (err) {
|
|
@@ -527,7 +527,7 @@ export async function openBridgeDbReadOnly(groupDir) {
|
|
|
527
527
|
/* ignore */
|
|
528
528
|
}
|
|
529
529
|
}
|
|
530
|
-
if (!
|
|
530
|
+
if (!isTransientCgdbLockError(err) || attempt === ATTEMPTS - 1)
|
|
531
531
|
return null;
|
|
532
532
|
await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
|
|
533
533
|
continue;
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Bridge LadybugDB schema for cross-repo Contract Registry.
|
|
3
|
-
* Separate from per-repo schema in
|
|
3
|
+
* Separate from per-repo schema in cgdb/schema.ts.
|
|
4
4
|
*/
|
|
5
5
|
/**
|
|
6
|
-
* Version of the bridge.
|
|
6
|
+
* Version of the bridge.cgdb schema below. `openBridgeDbReadOnly` compares
|
|
7
7
|
* this against `meta.json`'s version field and returns `null` on mismatch,
|
|
8
8
|
* which trips the caller into either the JSON fallback path or a fresh
|
|
9
|
-
* `group sync` that rebuilds `bridge.
|
|
9
|
+
* `group sync` that rebuilds `bridge.cgdb` from scratch.
|
|
10
10
|
*
|
|
11
11
|
* Migration contract for contributors bumping this constant:
|
|
12
12
|
* 1. Bump the number (e.g. `1` → `2`).
|
|
13
13
|
* 2. Update the DDL below to match the new schema.
|
|
14
14
|
* 3. DO NOT attempt an online migration in this file — the version gate
|
|
15
15
|
* is intentionally a "discard and re-sync" strategy for V1. An old
|
|
16
|
-
* bridge.
|
|
16
|
+
* bridge.cgdb whose version doesn't match is treated as opaque and
|
|
17
17
|
* rebuilt by the next `group sync`.
|
|
18
18
|
* 4. If online migration becomes necessary (e.g. when groups accumulate
|
|
19
19
|
* large amounts of embedding data), add a migration path as a
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Bridge LadybugDB schema for cross-repo Contract Registry.
|
|
3
|
-
* Separate from per-repo schema in
|
|
3
|
+
* Separate from per-repo schema in cgdb/schema.ts.
|
|
4
4
|
*/
|
|
5
5
|
/**
|
|
6
|
-
* Version of the bridge.
|
|
6
|
+
* Version of the bridge.cgdb schema below. `openBridgeDbReadOnly` compares
|
|
7
7
|
* this against `meta.json`'s version field and returns `null` on mismatch,
|
|
8
8
|
* which trips the caller into either the JSON fallback path or a fresh
|
|
9
|
-
* `group sync` that rebuilds `bridge.
|
|
9
|
+
* `group sync` that rebuilds `bridge.cgdb` from scratch.
|
|
10
10
|
*
|
|
11
11
|
* Migration contract for contributors bumping this constant:
|
|
12
12
|
* 1. Bump the number (e.g. `1` → `2`).
|
|
13
13
|
* 2. Update the DDL below to match the new schema.
|
|
14
14
|
* 3. DO NOT attempt an online migration in this file — the version gate
|
|
15
15
|
* is intentionally a "discard and re-sync" strategy for V1. An old
|
|
16
|
-
* bridge.
|
|
16
|
+
* bridge.cgdb whose version doesn't match is treated as opaque and
|
|
17
17
|
* rebuilt by the next `group sync`.
|
|
18
18
|
* 4. If online migration becomes necessary (e.g. when groups accumulate
|
|
19
19
|
* large amounts of embedding data), add a migration path as a
|
|
@@ -199,19 +199,19 @@ async function ensureBridgeReady(groupDir) {
|
|
|
199
199
|
error: `Bridge schema version mismatch (meta.json has ${meta.version}, expected ${BRIDGE_SCHEMA_VERSION}). Run codragraph group sync for this group.`,
|
|
200
200
|
};
|
|
201
201
|
}
|
|
202
|
-
const dbPath = path.join(groupDir, 'bridge.
|
|
202
|
+
const dbPath = path.join(groupDir, 'bridge.cgdb');
|
|
203
203
|
try {
|
|
204
204
|
await fsp.access(dbPath);
|
|
205
205
|
}
|
|
206
206
|
catch {
|
|
207
207
|
return {
|
|
208
|
-
error: `No bridge.
|
|
208
|
+
error: `No bridge.cgdb in this group directory. Run codragraph group sync (schema ${BRIDGE_SCHEMA_VERSION}).`,
|
|
209
209
|
};
|
|
210
210
|
}
|
|
211
211
|
const handle = await openBridgeDbReadOnly(groupDir);
|
|
212
212
|
if (!handle) {
|
|
213
213
|
return {
|
|
214
|
-
error: `Could not open bridge.
|
|
214
|
+
error: `Could not open bridge.cgdb read-only (schema ${BRIDGE_SCHEMA_VERSION}). Run codragraph group sync.`,
|
|
215
215
|
};
|
|
216
216
|
}
|
|
217
217
|
return { handle };
|
package/dist/core/group/sync.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fs from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { Buffer } from 'node:buffer';
|
|
4
|
-
import {
|
|
4
|
+
import { initCgdb, closeCgdb, executeParameterized } from '../cgdb/pool-adapter.js';
|
|
5
5
|
import { readRegistry } from '../../storage/repo-manager.js';
|
|
6
6
|
import { HttpRouteExtractor } from './extractors/http-route-extractor.js';
|
|
7
7
|
import { GrpcExtractor } from './extractors/grpc-extractor.js';
|
|
@@ -78,9 +78,9 @@ export async function syncGroup(config, opts) {
|
|
|
78
78
|
continue;
|
|
79
79
|
}
|
|
80
80
|
const poolId = handle.id;
|
|
81
|
-
const
|
|
81
|
+
const cgdbPath = path.join(handle.storagePath, 'cgdb');
|
|
82
82
|
try {
|
|
83
|
-
await
|
|
83
|
+
await initCgdb(poolId, cgdbPath);
|
|
84
84
|
openPoolIds.push(poolId);
|
|
85
85
|
const executor = (query, params) => executeParameterized(poolId, query, params ?? {});
|
|
86
86
|
dbExecutors.set(groupPath, executor);
|
|
@@ -139,7 +139,7 @@ export async function syncGroup(config, opts) {
|
|
|
139
139
|
}
|
|
140
140
|
finally {
|
|
141
141
|
for (const id of [...new Set(openPoolIds)]) {
|
|
142
|
-
await
|
|
142
|
+
await closeCgdb(id).catch(() => { });
|
|
143
143
|
}
|
|
144
144
|
}
|
|
145
145
|
}
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -13,13 +13,13 @@ import fs from 'fs/promises';
|
|
|
13
13
|
import * as fsSync from 'node:fs';
|
|
14
14
|
import * as v8 from 'node:v8';
|
|
15
15
|
import { runPipelineFromRepo } from './ingestion/pipeline.js';
|
|
16
|
-
import {
|
|
16
|
+
import { initCgdb, loadGraphToCgdb, getCgdbStats, executeQuery, executeWithReusedStatement, closeCgdb, loadCachedEmbeddings, } from './cgdb/cgdb-adapter.js';
|
|
17
17
|
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, INDEX_SCHEMA_VERSION, } from '../storage/repo-manager.js';
|
|
18
18
|
import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
|
|
19
19
|
import { recordAnalysisSnapshot } from './graphstore/index.js';
|
|
20
20
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
21
|
-
import { EMBEDDING_TABLE_NAME } from './
|
|
22
|
-
import { STALE_HASH_SENTINEL } from './
|
|
21
|
+
import { EMBEDDING_TABLE_NAME } from './cgdb/schema.js';
|
|
22
|
+
import { STALE_HASH_SENTINEL } from './cgdb/schema.js';
|
|
23
23
|
/** Threshold: auto-skip embeddings for repos with more nodes than this */
|
|
24
24
|
const EMBEDDING_NODE_LIMIT = 50_000;
|
|
25
25
|
export const PHASE_LABELS = {
|
|
@@ -32,7 +32,7 @@ export const PHASE_LABELS = {
|
|
|
32
32
|
communities: 'Detecting communities',
|
|
33
33
|
processes: 'Detecting processes',
|
|
34
34
|
complete: 'Pipeline complete',
|
|
35
|
-
|
|
35
|
+
cgdb: 'Loading into LadybugDB',
|
|
36
36
|
fts: 'Creating search indexes',
|
|
37
37
|
embeddings: 'Generating embeddings',
|
|
38
38
|
done: 'Done',
|
|
@@ -81,7 +81,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
81
81
|
const progress = (phase, percent, message) => {
|
|
82
82
|
callbacks.onProgress(phase, percent, message);
|
|
83
83
|
// Only snapshot on phase transitions, not every tick. Phase strings come
|
|
84
|
-
// from runPipelineFromRepo /
|
|
84
|
+
// from runPipelineFromRepo / loadGraphToCgdb and are stable.
|
|
85
85
|
if (heapProfileEnabled && phase && phase !== lastProfilePhase) {
|
|
86
86
|
lastProfilePhase = phase;
|
|
87
87
|
const ts = Date.now();
|
|
@@ -117,7 +117,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
117
117
|
}
|
|
118
118
|
}
|
|
119
119
|
};
|
|
120
|
-
const { storagePath,
|
|
120
|
+
const { storagePath, cgdbPath } = getStoragePaths(repoPath);
|
|
121
121
|
// Clean up stale KuzuDB files from before the LadybugDB migration.
|
|
122
122
|
const kuzuResult = await cleanupOldKuzuFiles(storagePath);
|
|
123
123
|
if (kuzuResult.found && kuzuResult.needsReindex) {
|
|
@@ -159,15 +159,15 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
159
159
|
if (options.embeddings && existingMeta && !options.force) {
|
|
160
160
|
try {
|
|
161
161
|
progress('embeddings', 0, 'Caching embeddings...');
|
|
162
|
-
await
|
|
162
|
+
await initCgdb(cgdbPath);
|
|
163
163
|
const cached = await loadCachedEmbeddings();
|
|
164
164
|
cachedEmbeddingNodeIds = cached.embeddingNodeIds;
|
|
165
165
|
cachedEmbeddings = cached.embeddings;
|
|
166
|
-
await
|
|
166
|
+
await closeCgdb();
|
|
167
167
|
}
|
|
168
168
|
catch {
|
|
169
169
|
try {
|
|
170
|
-
await
|
|
170
|
+
await closeCgdb();
|
|
171
171
|
}
|
|
172
172
|
catch {
|
|
173
173
|
/* swallow */
|
|
@@ -181,10 +181,10 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
181
181
|
progress(p.phase, scaled, phaseLabel);
|
|
182
182
|
});
|
|
183
183
|
// ── Phase 2: LadybugDB (60–85%) ──────────────────────────────────
|
|
184
|
-
progress('
|
|
185
|
-
await
|
|
186
|
-
const
|
|
187
|
-
for (const f of
|
|
184
|
+
progress('cgdb', 60, 'Loading into LadybugDB...');
|
|
185
|
+
await closeCgdb();
|
|
186
|
+
const cgdbFiles = [cgdbPath, `${cgdbPath}.wal`, `${cgdbPath}.lock`];
|
|
187
|
+
for (const f of cgdbFiles) {
|
|
188
188
|
try {
|
|
189
189
|
await fs.rm(f, { recursive: true, force: true });
|
|
190
190
|
}
|
|
@@ -192,16 +192,16 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
192
192
|
/* swallow */
|
|
193
193
|
}
|
|
194
194
|
}
|
|
195
|
-
await
|
|
195
|
+
await initCgdb(cgdbPath);
|
|
196
196
|
try {
|
|
197
|
-
// All work after
|
|
197
|
+
// All work after initCgdb is wrapped in try/finally to ensure closeCgdb()
|
|
198
198
|
// is called even if an error occurs — the module-level singleton DB handle
|
|
199
199
|
// must be released to avoid blocking subsequent invocations.
|
|
200
|
-
let
|
|
201
|
-
await
|
|
202
|
-
|
|
203
|
-
const pct = Math.min(84, 60 + Math.round((
|
|
204
|
-
progress('
|
|
200
|
+
let cgdbMsgCount = 0;
|
|
201
|
+
await loadGraphToCgdb(pipelineResult.graph, pipelineResult.repoPath, storagePath, (msg) => {
|
|
202
|
+
cgdbMsgCount++;
|
|
203
|
+
const pct = Math.min(84, 60 + Math.round((cgdbMsgCount / (cgdbMsgCount + 10)) * 24));
|
|
204
|
+
progress('cgdb', pct, msg);
|
|
205
205
|
},
|
|
206
206
|
// RFC 0001 Phase 2: when --compress is set, every content row goes
|
|
207
207
|
// through encodeContent before hitting the CSV. Default 'none' is
|
|
@@ -244,7 +244,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
244
244
|
// ── Phase 3.5: Re-insert cached embeddings ────────────────────────
|
|
245
245
|
if (cachedEmbeddings.length > 0) {
|
|
246
246
|
const cachedDims = cachedEmbeddings[0].embedding.length;
|
|
247
|
-
const { EMBEDDING_DIMS } = await import('./
|
|
247
|
+
const { EMBEDDING_DIMS } = await import('./cgdb/schema.js');
|
|
248
248
|
if (cachedDims !== EMBEDDING_DIMS) {
|
|
249
249
|
// Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all
|
|
250
250
|
log(`Embedding dimensions changed (${cachedDims}d -> ${EMBEDDING_DIMS}d), discarding cache`);
|
|
@@ -267,7 +267,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
267
267
|
}
|
|
268
268
|
}
|
|
269
269
|
// ── Phase 4: Embeddings (90–98%) ──────────────────────────────────
|
|
270
|
-
const stats = await
|
|
270
|
+
const stats = await getCgdbStats();
|
|
271
271
|
let embeddingSkipped = true;
|
|
272
272
|
if (options.embeddings) {
|
|
273
273
|
if (stats.nodes <= EMBEDDING_NODE_LIMIT) {
|
|
@@ -380,7 +380,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
380
380
|
// Best-effort — don't fail the entire analysis for context file issues
|
|
381
381
|
}
|
|
382
382
|
// ── Close LadybugDB ──────────────────────────────────────────────
|
|
383
|
-
await
|
|
383
|
+
await closeCgdb();
|
|
384
384
|
progress('done', 100, 'Done');
|
|
385
385
|
return {
|
|
386
386
|
repoName: projectName,
|
|
@@ -392,7 +392,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
392
392
|
catch (err) {
|
|
393
393
|
// Ensure LadybugDB is closed even on error
|
|
394
394
|
try {
|
|
395
|
-
await
|
|
395
|
+
await closeCgdb();
|
|
396
396
|
}
|
|
397
397
|
catch {
|
|
398
398
|
/* swallow */
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Always reads from the database (no cached state to drift).
|
|
6
6
|
*
|
|
7
7
|
* FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see
|
|
8
|
-
* `
|
|
8
|
+
* `cgdb-adapter.ts` for the rationale. This keeps `analyze` fast (the
|
|
9
9
|
* ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on
|
|
10
10
|
* small repos / CI runners) at the cost of paying that overhead on the
|
|
11
11
|
* first `query`/`context` call in a session.
|
|
@@ -20,7 +20,7 @@ export interface BM25SearchResult {
|
|
|
20
20
|
* Drop all ensured-FTS cache entries for a given repoId.
|
|
21
21
|
*
|
|
22
22
|
* Called from the pool-close listener so that a pool teardown / recreation
|
|
23
|
-
* forces the next `
|
|
23
|
+
* forces the next `searchFTSFromCgdb` call to re-issue `CREATE_FTS_INDEX`
|
|
24
24
|
* against the fresh connection rather than trust stale ensure-state from a
|
|
25
25
|
* previous pool lifetime.
|
|
26
26
|
*
|
|
@@ -38,4 +38,4 @@ export declare function invalidateEnsuredFTSForRepo(repoId: string): void;
|
|
|
38
38
|
* @param repoId - If provided, queries will be routed via the MCP connection pool
|
|
39
39
|
* @returns Ranked search results from FTS indexes
|
|
40
40
|
*/
|
|
41
|
-
export declare const
|
|
41
|
+
export declare const searchFTSFromCgdb: (query: string, limit?: number, repoId?: string) => Promise<BM25SearchResult[]>;
|
|
@@ -5,14 +5,14 @@
|
|
|
5
5
|
* Always reads from the database (no cached state to drift).
|
|
6
6
|
*
|
|
7
7
|
* FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see
|
|
8
|
-
* `
|
|
8
|
+
* `cgdb-adapter.ts` for the rationale. This keeps `analyze` fast (the
|
|
9
9
|
* ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on
|
|
10
10
|
* small repos / CI runners) at the cost of paying that overhead on the
|
|
11
11
|
* first `query`/`context` call in a session.
|
|
12
12
|
*/
|
|
13
|
-
import { queryFTS, ensureFTSIndex, executeQuery as executeCoreQuery, } from '../
|
|
13
|
+
import { queryFTS, ensureFTSIndex, executeQuery as executeCoreQuery, } from '../cgdb/cgdb-adapter.js';
|
|
14
14
|
/**
|
|
15
|
-
* FTS table set served by `
|
|
15
|
+
* FTS table set served by `searchFTSFromCgdb`. Centralised so that both
|
|
16
16
|
* the CLI/pipeline path and the MCP pool path stay in lockstep.
|
|
17
17
|
*
|
|
18
18
|
* The properties list is computed at FTS-create time via `ftsPropertiesFor`
|
|
@@ -72,7 +72,7 @@ const FALLBACK_FIELD_WEIGHTS = {
|
|
|
72
72
|
/**
|
|
73
73
|
* Per-process cache for the MCP pool path: tracks which `(repoId, table)`
|
|
74
74
|
* pairs have been ensured. The CLI/pipeline path gets its own cache inside
|
|
75
|
-
* `
|
|
75
|
+
* `cgdb-adapter.ts` keyed by table/index, scoped to the singleton connection.
|
|
76
76
|
*
|
|
77
77
|
* IMPORTANT: an entry is added ONLY when the index was confirmed to exist
|
|
78
78
|
* (CREATE_FTS_INDEX succeeded, or failed with `'already exists'`). Other
|
|
@@ -80,14 +80,14 @@ const FALLBACK_FIELD_WEIGHTS = {
|
|
|
80
80
|
* unset so the next query retries instead of silently caching the failure.
|
|
81
81
|
*
|
|
82
82
|
* Entries for a given repoId are invalidated when its pool is closed —
|
|
83
|
-
* see the `addPoolCloseListener` registration in `
|
|
83
|
+
* see the `addPoolCloseListener` registration in `searchFTSFromCgdb`.
|
|
84
84
|
*/
|
|
85
85
|
const ensuredPoolFTS = new Set();
|
|
86
86
|
/**
|
|
87
87
|
* Drop all ensured-FTS cache entries for a given repoId.
|
|
88
88
|
*
|
|
89
89
|
* Called from the pool-close listener so that a pool teardown / recreation
|
|
90
|
-
* forces the next `
|
|
90
|
+
* forces the next `searchFTSFromCgdb` call to re-issue `CREATE_FTS_INDEX`
|
|
91
91
|
* against the fresh connection rather than trust stale ensure-state from a
|
|
92
92
|
* previous pool lifetime.
|
|
93
93
|
*
|
|
@@ -245,7 +245,7 @@ properties = ['name', 'content']) {
|
|
|
245
245
|
* @param repoId - If provided, queries will be routed via the MCP connection pool
|
|
246
246
|
* @returns Ranked search results from FTS indexes
|
|
247
247
|
*/
|
|
248
|
-
export const
|
|
248
|
+
export const searchFTSFromCgdb = async (query, limit = 20, repoId) => {
|
|
249
249
|
if (!query.trim() || limit <= 0)
|
|
250
250
|
return [];
|
|
251
251
|
let fileResults, functionResults, classResults, methodResults, interfaceResults;
|
|
@@ -253,7 +253,7 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
253
253
|
// Use MCP connection pool via dynamic import
|
|
254
254
|
// IMPORTANT: FTS queries run sequentially to avoid connection contention.
|
|
255
255
|
// The MCP pool supports multiple connections, but FTS is best run serially.
|
|
256
|
-
const poolMod = await import('../
|
|
256
|
+
const poolMod = await import('../cgdb/pool-adapter.js');
|
|
257
257
|
const { executeQuery, addPoolCloseListener } = poolMod;
|
|
258
258
|
// Register the pool-close listener lazily on first use so a teardown of
|
|
259
259
|
// the pool entry (LRU eviction, idle timeout, explicit close) drops the
|
|
@@ -287,7 +287,7 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
287
287
|
}
|
|
288
288
|
}
|
|
289
289
|
else {
|
|
290
|
-
// Use core
|
|
290
|
+
// Use core cgdb adapter (CLI / pipeline context) — also sequential for safety.
|
|
291
291
|
// Lazy-create FTS indexes on first query (analyze no longer does it).
|
|
292
292
|
// RFC 0001 Phase 2.5 — same `compress`-aware property selection as the MCP
|
|
293
293
|
// path; the CLI walks up from cwd to find the repo's meta.json.
|