@codragraph/cli 1.6.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
- package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
- package/dist/_shared/cgdb/schema-constants.js +67 -0
- package/dist/_shared/cgdb/schema-constants.js.map +1 -0
- package/dist/_shared/index.d.ts +2 -2
- package/dist/_shared/index.js +1 -1
- package/dist/cli/analyze.d.ts +22 -0
- package/dist/cli/analyze.js +109 -6
- package/dist/cli/compress-stats.d.ts +29 -0
- package/dist/cli/compress-stats.js +97 -0
- package/dist/cli/graphstore.d.ts +6 -2
- package/dist/cli/graphstore.js +45 -23
- package/dist/cli/index-repo.js +3 -3
- package/dist/cli/index.js +16 -2
- package/dist/cli/profile-heap.d.ts +35 -0
- package/dist/cli/profile-heap.js +126 -0
- package/dist/cli/setup.d.ts +13 -0
- package/dist/cli/setup.js +22 -11
- package/dist/cli/skill-gen.d.ts +14 -2
- package/dist/cli/skill-gen.js +52 -19
- package/dist/cli/tool.js +4 -0
- package/dist/cli/wiki.js +3 -3
- package/dist/core/augmentation/engine.js +7 -7
- package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
- package/dist/core/cgdb/cgdb-adapter.js +1320 -0
- package/dist/core/cgdb/content-read.d.ts +46 -0
- package/dist/core/cgdb/content-read.js +64 -0
- package/dist/core/cgdb/csv-generator.d.ts +29 -0
- package/dist/core/cgdb/csv-generator.js +492 -0
- package/dist/core/cgdb/pool-adapter.d.ts +93 -0
- package/dist/core/cgdb/pool-adapter.js +550 -0
- package/dist/core/cgdb/schema.d.ts +62 -0
- package/dist/core/cgdb/schema.js +502 -0
- package/dist/core/embeddings/embedding-pipeline.js +27 -10
- package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
- package/dist/core/graphstore/cgdb-row-source.js +141 -0
- package/dist/core/graphstore/index.d.ts +1 -1
- package/dist/core/graphstore/index.js +3 -3
- package/dist/core/group/bridge-db.d.ts +2 -2
- package/dist/core/group/bridge-db.js +123 -36
- package/dist/core/group/bridge-schema.d.ts +4 -4
- package/dist/core/group/bridge-schema.js +4 -4
- package/dist/core/group/cross-impact.js +3 -3
- package/dist/core/group/sync.js +4 -4
- package/dist/core/lbug/content-read.d.ts +46 -0
- package/dist/core/lbug/content-read.js +64 -0
- package/dist/core/lbug/csv-generator.d.ts +2 -6
- package/dist/core/lbug/csv-generator.js +45 -12
- package/dist/core/lbug/lbug-adapter.d.ts +4 -1
- package/dist/core/lbug/lbug-adapter.js +153 -21
- package/dist/core/lbug/schema.d.ts +7 -7
- package/dist/core/lbug/schema.js +18 -0
- package/dist/core/run-analyze.d.ts +13 -0
- package/dist/core/run-analyze.js +114 -27
- package/dist/core/search/bm25-index.d.ts +3 -3
- package/dist/core/search/bm25-index.js +75 -23
- package/dist/core/search/hybrid-search.js +2 -2
- package/dist/core/wiki/generator.d.ts +2 -2
- package/dist/core/wiki/generator.js +4 -4
- package/dist/core/wiki/graph-queries.d.ts +2 -2
- package/dist/core/wiki/graph-queries.js +5 -5
- package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
- package/dist/mcp/core/cgdb-adapter.js +5 -0
- package/dist/mcp/core/embedder.js +1 -1
- package/dist/mcp/local/local-backend.d.ts +2 -2
- package/dist/mcp/local/local-backend.js +36 -19
- package/dist/mcp/server.js +3 -3
- package/dist/mcp/tools.js +1 -1
- package/dist/server/analyze-worker.js +2 -2
- package/dist/server/api.js +34 -33
- package/dist/storage/repo-manager.d.ts +42 -3
- package/dist/storage/repo-manager.js +23 -4
- package/hooks/claude/codragraph-hook.cjs +98 -5
- package/package.json +4 -4
- package/scripts/build-tree-sitter-proto.cjs +15 -3
- package/scripts/build.js +8 -9
- package/scripts/patch-tree-sitter-swift.cjs +17 -4
- package/skills/codragraph-api-surface.md +110 -0
- package/skills/codragraph-config-audit.md +146 -0
- package/skills/codragraph-cross-repo-impact.md +135 -0
- package/skills/codragraph-data-lineage.md +137 -0
- package/skills/codragraph-dead-code.md +119 -0
- package/skills/codragraph-gh-actions-debug.md +162 -0
- package/skills/codragraph-gh-issue-workflow.md +178 -0
- package/skills/codragraph-gh-pr-workflow.md +176 -0
- package/skills/codragraph-gh-release-workflow.md +187 -0
- package/skills/codragraph-git-bisect.md +176 -0
- package/skills/codragraph-git-force-push.md +147 -0
- package/skills/codragraph-git-history-rewrite.md +174 -0
- package/skills/codragraph-git-rebase-vs-merge.md +138 -0
- package/skills/codragraph-git-recovery.md +181 -0
- package/skills/codragraph-git-worktree.md +145 -0
- package/skills/codragraph-migration-tracking.md +130 -0
- package/skills/codragraph-notebook-context.md +136 -0
- package/skills/codragraph-observability-coverage.md +125 -0
- package/skills/codragraph-onboarding.md +129 -0
- package/skills/codragraph-perf-hotspots.md +132 -0
- package/skills/codragraph-project-switcher.md +116 -0
- package/skills/codragraph-security-audit.md +144 -0
- package/skills/codragraph-sql-tracing.md +122 -0
- package/skills/codragraph-supply-chain-audit.md +153 -0
- package/skills/codragraph-test-coverage.md +97 -0
- package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
- package/vendor/tree-sitter-proto/src/node-types.json +1 -1
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapter exposing a live LadybugDB instance as a `@codragraph/graphstore`
|
|
3
|
+
* `RowSource`. Used by the analyze pipeline (Phase 4) to snapshot the
|
|
4
|
+
* loaded graph into the content-addressed store.
|
|
5
|
+
*
|
|
6
|
+
* Best-effort by design: any table that errors at query time is skipped
|
|
7
|
+
* (with the failure surfaced through the optional `onSkip` callback) so
|
|
8
|
+
* the surrounding analyze flow never breaks because the versioning hook
|
|
9
|
+
* misbehaves.
|
|
10
|
+
*/
|
|
11
|
+
import { NODE_TABLES, REL_TABLE_NAME } from '../../_shared/index.js';
|
|
12
|
+
import { executeQuery } from '../cgdb/cgdb-adapter.js';
|
|
13
|
+
export const createCgdbRowSource = (opts = {}) => {
|
|
14
|
+
const onSkip = opts.onSkip ?? (() => { });
|
|
15
|
+
const tables = opts.nodeTables ?? NODE_TABLES;
|
|
16
|
+
const listNodeTables = async () => {
|
|
17
|
+
return [...tables];
|
|
18
|
+
};
|
|
19
|
+
const streamNodeTable = async function* (tableName) {
|
|
20
|
+
let rows;
|
|
21
|
+
try {
|
|
22
|
+
// `MATCH (n:T) RETURN n` returns one row per node. The node value
|
|
23
|
+
// is reachable as either `row.n` (named-column form) or `row[0]`
|
|
24
|
+
// (positional form) depending on the LadybugDB result-shape mode;
|
|
25
|
+
// we accept both, mirroring the resilient pattern used by
|
|
26
|
+
// `core/search/bm25-index.ts` for FTS results. Tables that do not
|
|
27
|
+
// exist on disk for a given repo throw here — we treat that as
|
|
28
|
+
// "no rows" via the onSkip callback rather than a hard failure.
|
|
29
|
+
rows = await executeQuery(`MATCH (n:${tableName}) RETURN n`);
|
|
30
|
+
}
|
|
31
|
+
catch (err) {
|
|
32
|
+
onSkip(tableName, err);
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
let yielded = 0;
|
|
36
|
+
for (const raw of rows) {
|
|
37
|
+
const node = unwrapNode(raw);
|
|
38
|
+
if (!node)
|
|
39
|
+
continue;
|
|
40
|
+
yield normalizeNodeRow(node);
|
|
41
|
+
yielded++;
|
|
42
|
+
}
|
|
43
|
+
// If the query reported rows but none unwrapped, surface that as a
|
|
44
|
+
// skip so the analyze log makes the silent-empty failure mode
|
|
45
|
+
// visible instead of producing a 0-row snapshot for the table.
|
|
46
|
+
if (rows.length > 0 && yielded === 0) {
|
|
47
|
+
onSkip(tableName, new Error(`cgdb-row-source: query returned ${rows.length} row(s) for "${tableName}" but none had an unwrappable node — ` +
|
|
48
|
+
`result shape changed? expected row.n or row[0] to be the node`));
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
const streamEdges = async function* () {
|
|
52
|
+
let rows;
|
|
53
|
+
try {
|
|
54
|
+
// Project `from`/`to`/`type` as scalar columns and the full rel as
|
|
55
|
+
// `rel`. Scalars give us a deterministic edge id even if the rel
|
|
56
|
+
// payload's shape changes; `rel` carries any extra properties for
|
|
57
|
+
// hashing.
|
|
58
|
+
rows = await executeQuery(`MATCH (a)-[r:${REL_TABLE_NAME}]->(b) RETURN a.id AS \`from\`, b.id AS \`to\`, r.type AS type, r AS rel`);
|
|
59
|
+
}
|
|
60
|
+
catch (err) {
|
|
61
|
+
onSkip(REL_TABLE_NAME, err);
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
let yielded = 0;
|
|
65
|
+
for (const raw of rows) {
|
|
66
|
+
const r = raw;
|
|
67
|
+
const from = pickField(r, 'from', 0);
|
|
68
|
+
const to = pickField(r, 'to', 1);
|
|
69
|
+
const type = pickField(r, 'type', 2);
|
|
70
|
+
const rel = pickField(r, 'rel', 3);
|
|
71
|
+
if (typeof from !== 'string' || typeof to !== 'string')
|
|
72
|
+
continue;
|
|
73
|
+
yield normalizeEdgeRow({ from, to, type, rel: isPlainObject(rel) ? rel : null });
|
|
74
|
+
yielded++;
|
|
75
|
+
}
|
|
76
|
+
if (rows.length > 0 && yielded === 0) {
|
|
77
|
+
onSkip(REL_TABLE_NAME, new Error(`cgdb-row-source: edges query returned ${rows.length} row(s) but none had a string from/to — ` +
|
|
78
|
+
`result shape changed?`));
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
return { listNodeTables, streamNodeTable, streamEdges };
|
|
82
|
+
};
|
|
83
|
+
/**
|
|
84
|
+
* Pull the node out of an executeQuery result row, accepting either the
|
|
85
|
+
* named-column form (`row.n`) or the positional form (`row[0]`). Returns
|
|
86
|
+
* null when the row is missing or the node value isn't an object — the
|
|
87
|
+
* caller treats that as "skip and surface".
|
|
88
|
+
*/
|
|
89
|
+
const unwrapNode = (raw) => {
|
|
90
|
+
if (!raw || typeof raw !== 'object')
|
|
91
|
+
return null;
|
|
92
|
+
const r = raw;
|
|
93
|
+
const candidate = r['n'] ?? r[0];
|
|
94
|
+
return isPlainObject(candidate) ? candidate : null;
|
|
95
|
+
};
|
|
96
|
+
/** Read a field from an executeQuery row, falling back to the positional index. */
|
|
97
|
+
const pickField = (row, named, positional) => {
|
|
98
|
+
if (!row)
|
|
99
|
+
return undefined;
|
|
100
|
+
return row[named] ?? row[positional];
|
|
101
|
+
};
|
|
102
|
+
const isPlainObject = (v) => typeof v === 'object' && v !== null && !Array.isArray(v);
|
|
103
|
+
/**
|
|
104
|
+
* Sanitize a node row for canonical hashing:
|
|
105
|
+
* - Drop LadybugDB-specific internal fields (`_id`, `_label`) that are
|
|
106
|
+
* not content-bearing — including them would make the hash sensitive
|
|
107
|
+
* to internal storage offsets and break dedup across snapshots.
|
|
108
|
+
* - Sort keys deterministically (canonical JSON in the serializer
|
|
109
|
+
* already does this, but doing it once here keeps the row payload
|
|
110
|
+
* stable when we ever swap engines).
|
|
111
|
+
*/
|
|
112
|
+
const normalizeNodeRow = (node) => {
|
|
113
|
+
const out = {};
|
|
114
|
+
for (const key of Object.keys(node).sort()) {
|
|
115
|
+
if (key === '_id' || key === '_label')
|
|
116
|
+
continue;
|
|
117
|
+
out[key] = node[key];
|
|
118
|
+
}
|
|
119
|
+
return out;
|
|
120
|
+
};
|
|
121
|
+
const normalizeEdgeRow = (r) => {
|
|
122
|
+
const props = {};
|
|
123
|
+
if (r.rel && typeof r.rel === 'object') {
|
|
124
|
+
for (const key of Object.keys(r.rel).sort()) {
|
|
125
|
+
// Skip the synthetic from/to/type that show up under `rel` too —
|
|
126
|
+
// we already project them as top-level columns and don't want
|
|
127
|
+
// duplication in the canonical row.
|
|
128
|
+
if (key === 'from' || key === 'to' || key === 'type')
|
|
129
|
+
continue;
|
|
130
|
+
if (key.startsWith('_'))
|
|
131
|
+
continue;
|
|
132
|
+
props[key] = r.rel[key];
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return {
|
|
136
|
+
from: String(r.from),
|
|
137
|
+
to: String(r.to),
|
|
138
|
+
type: typeof r.type === 'string' ? r.type : String(r.type ?? ''),
|
|
139
|
+
...props,
|
|
140
|
+
};
|
|
141
|
+
};
|
|
@@ -38,7 +38,7 @@ export interface RecordAnalysisSnapshotResult {
|
|
|
38
38
|
/**
|
|
39
39
|
* Snapshot the currently-loaded LadybugDB into the content-addressed
|
|
40
40
|
* store and advance the active branch's HEAD to the new commit. Caller
|
|
41
|
-
* is expected to have already initialized
|
|
41
|
+
* is expected to have already initialized cgdb with `initCgdb(...)`.
|
|
42
42
|
*
|
|
43
43
|
* Returns null if anything goes sideways (logged via `onSkipTable`); the
|
|
44
44
|
* analyze pipeline treats that as "no snapshot for this run".
|
|
@@ -9,13 +9,13 @@
|
|
|
9
9
|
*/
|
|
10
10
|
import path from 'node:path';
|
|
11
11
|
import { FsCAS, serializeSnapshot, createCommit, setHead, writeHeadBranch, resolveHeadCommit, DEFAULT_BRANCH, } from '@codragraph/graphstore';
|
|
12
|
-
import {
|
|
12
|
+
import { createCgdbRowSource } from './cgdb-row-source.js';
|
|
13
13
|
/** Subdirectory of `<repo>/.codragraph` that holds versioning artifacts. */
|
|
14
14
|
export const GRAPHSTORE_SUBDIR = 'graphstore';
|
|
15
15
|
/**
|
|
16
16
|
* Snapshot the currently-loaded LadybugDB into the content-addressed
|
|
17
17
|
* store and advance the active branch's HEAD to the new commit. Caller
|
|
18
|
-
* is expected to have already initialized
|
|
18
|
+
* is expected to have already initialized cgdb with `initCgdb(...)`.
|
|
19
19
|
*
|
|
20
20
|
* Returns null if anything goes sideways (logged via `onSkipTable`); the
|
|
21
21
|
* analyze pipeline treats that as "no snapshot for this run".
|
|
@@ -26,7 +26,7 @@ export const recordAnalysisSnapshot = async (opts) => {
|
|
|
26
26
|
let serialized;
|
|
27
27
|
try {
|
|
28
28
|
serialized = await serializeSnapshot({
|
|
29
|
-
source:
|
|
29
|
+
source: createCgdbRowSource({ onSkip: opts.onSkipTable }),
|
|
30
30
|
cas,
|
|
31
31
|
indexedRepoCommit: opts.indexedRepoCommit,
|
|
32
32
|
});
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { LbugValue } from '@ladybugdb/core';
|
|
1
|
+
import type { LbugValue as CgdbValue } from '@ladybugdb/core';
|
|
2
2
|
import type { BridgeHandle, BridgeMeta, StoredContract, CrossLink, RepoSnapshot } from './types.js';
|
|
3
3
|
export declare function contractNodeId(repo: string, contractId: string, role: string, filePath: string): string;
|
|
4
4
|
/**
|
|
@@ -46,7 +46,7 @@ export declare function indexContract(index: ContractLookupIndex, contract: Stor
|
|
|
46
46
|
export declare function findContractNode(index: ContractLookupIndex, repo: string, role: 'consumer' | 'provider', symbolUid: string, filePath: string, symbolName: string): string | null;
|
|
47
47
|
export declare function openBridgeDb(dbPath: string): Promise<BridgeHandle>;
|
|
48
48
|
export declare function ensureBridgeSchema(handle: BridgeHandle): Promise<void>;
|
|
49
|
-
export declare function queryBridge<T>(handle: BridgeHandle, cypher: string, params?: Record<string,
|
|
49
|
+
export declare function queryBridge<T>(handle: BridgeHandle, cypher: string, params?: Record<string, CgdbValue>): Promise<T[]>;
|
|
50
50
|
export declare function closeBridgeDb(handle: BridgeHandle): Promise<void>;
|
|
51
51
|
export declare function retryRename(src: string, dst: string, attempts?: number): Promise<void>;
|
|
52
52
|
export declare function writeBridgeMeta(groupDir: string, meta: BridgeMeta): Promise<void>;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fsp from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { createHash } from 'node:crypto';
|
|
4
|
-
import
|
|
4
|
+
import cgdb from '@ladybugdb/core';
|
|
5
5
|
import { BRIDGE_SCHEMA_QUERIES, BRIDGE_SCHEMA_VERSION } from './bridge-schema.js';
|
|
6
6
|
import { dedupeContracts, dedupeCrossLinks } from './normalization.js';
|
|
7
7
|
export function contractNodeId(repo, contractId, role, filePath) {
|
|
@@ -74,8 +74,8 @@ export function findContractNode(index, repo, role, symbolUid, filePath, symbolN
|
|
|
74
74
|
export async function openBridgeDb(dbPath) {
|
|
75
75
|
const parentDir = path.dirname(dbPath);
|
|
76
76
|
await fsp.mkdir(parentDir, { recursive: true });
|
|
77
|
-
const db = new
|
|
78
|
-
const conn = new
|
|
77
|
+
const db = new cgdb.Database(dbPath, 0, false, false); // writable
|
|
78
|
+
const conn = new cgdb.Connection(db);
|
|
79
79
|
return { _db: db, _conn: conn, groupDir: parentDir };
|
|
80
80
|
}
|
|
81
81
|
/**
|
|
@@ -83,10 +83,10 @@ export async function openBridgeDb(dbPath) {
|
|
|
83
83
|
* CREATE NODE TABLE or CREATE REL TABLE statement hits an already-existing
|
|
84
84
|
* table. LadybugDB DDL doesn't support IF NOT EXISTS, and its JS driver
|
|
85
85
|
* doesn't expose typed error codes, so we match on the message substring —
|
|
86
|
-
* the same pattern used by `core/
|
|
86
|
+
* the same pattern used by `core/cgdb/cgdb-adapter.ts`. If a future
|
|
87
87
|
* LadybugDB release changes the wording, update this constant.
|
|
88
88
|
*/
|
|
89
|
-
const
|
|
89
|
+
const CGDB_ALREADY_EXISTS_MSG = 'already exists';
|
|
90
90
|
export async function ensureBridgeSchema(handle) {
|
|
91
91
|
const conn = handle._conn;
|
|
92
92
|
for (const q of BRIDGE_SCHEMA_QUERIES) {
|
|
@@ -95,26 +95,98 @@ export async function ensureBridgeSchema(handle) {
|
|
|
95
95
|
}
|
|
96
96
|
catch (err) {
|
|
97
97
|
const msg = err instanceof Error ? err.message : String(err);
|
|
98
|
-
if (!msg.includes(
|
|
98
|
+
if (!msg.includes(CGDB_ALREADY_EXISTS_MSG))
|
|
99
99
|
throw err;
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
102
|
}
|
|
103
|
-
|
|
103
|
+
/**
|
|
104
|
+
* Close every QueryResult / PreparedStatement before letting V8 GC them.
|
|
105
|
+
* Same close-order discipline as `core/cgdb/cgdb-adapter.ts:closeQueryResult`
|
|
106
|
+
* — leaking these handles past `conn.close()` corrupts LadybugDB's native
|
|
107
|
+
* file lock on Windows ("Error 33: The process cannot access the file
|
|
108
|
+
* because it is being used by another process") and segfaults on
|
|
109
|
+
* process exit elsewhere. Best-effort: wrap close calls in try/catch so
|
|
110
|
+
* a finalizer that already ran doesn't poison the queryBridge return.
|
|
111
|
+
*/
|
|
112
|
+
async function closeBridgeHandle(h) {
|
|
113
|
+
if (!h)
|
|
114
|
+
return;
|
|
115
|
+
const candidates = Array.isArray(h) ? h : [h];
|
|
116
|
+
for (const r of candidates) {
|
|
117
|
+
try {
|
|
118
|
+
const close = r?.close;
|
|
119
|
+
if (typeof close === 'function')
|
|
120
|
+
await Promise.resolve(close.call(r));
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
/* best-effort */
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* True iff the error is a Windows-only transient file-lock surfaced by
|
|
129
|
+
* LadybugDB's native binding immediately after a writer process closes
|
|
130
|
+
* the same DB file. Symptom is `Error 33` on the read path even though
|
|
131
|
+
* `db.close()` returned cleanly at the JS layer — the kernel hasn't
|
|
132
|
+
* fully released the exclusive lock yet. Retrying with backoff is the
|
|
133
|
+
* documented workaround for this class of Windows-fs interactions.
|
|
134
|
+
*/
|
|
135
|
+
function isTransientCgdbLockError(err) {
|
|
136
|
+
const msg = err?.message ?? '';
|
|
137
|
+
return (msg.includes('Error 33') ||
|
|
138
|
+
msg.includes('locked a portion of the file') ||
|
|
139
|
+
msg.includes('cannot access the file because it is being used by another process'));
|
|
140
|
+
}
|
|
141
|
+
async function queryBridgeOnce(handle, cypher, params) {
|
|
104
142
|
const conn = handle._conn;
|
|
105
143
|
if (params && Object.keys(params).length > 0) {
|
|
106
144
|
const stmt = await conn.prepare(cypher);
|
|
107
145
|
if (!stmt.isSuccess()) {
|
|
108
146
|
const errMsg = await stmt.getErrorMessage();
|
|
147
|
+
await closeBridgeHandle(stmt);
|
|
109
148
|
throw new Error(`Bridge query prepare failed: ${errMsg}`);
|
|
110
149
|
}
|
|
111
150
|
const queryResult = await conn.execute(stmt, params);
|
|
112
151
|
const result = unwrapQueryResult(queryResult);
|
|
113
|
-
|
|
152
|
+
try {
|
|
153
|
+
return (await result.getAll());
|
|
154
|
+
}
|
|
155
|
+
finally {
|
|
156
|
+
await closeBridgeHandle(queryResult);
|
|
157
|
+
await closeBridgeHandle(stmt);
|
|
158
|
+
}
|
|
114
159
|
}
|
|
115
160
|
const queryResult = await conn.query(cypher);
|
|
116
161
|
const result = unwrapQueryResult(queryResult);
|
|
117
|
-
|
|
162
|
+
try {
|
|
163
|
+
return (await result.getAll());
|
|
164
|
+
}
|
|
165
|
+
finally {
|
|
166
|
+
await closeBridgeHandle(queryResult);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
export async function queryBridge(handle, cypher, params) {
|
|
170
|
+
// Retry on Windows-transient file-lock errors. Reads issued through a
|
|
171
|
+
// freshly-opened readonly Database can race the writer's
|
|
172
|
+
// post-`db.close()` lock release on Windows + Node 22.14 (LadybugDB
|
|
173
|
+
// native binding holds the kernel lock briefly after the JS-level
|
|
174
|
+
// close returns). Backoff doubles per attempt up to ~3 s total — well
|
|
175
|
+
// below any user-visible CLI delay budget but enough to absorb a slow
|
|
176
|
+
// Windows kernel lock release.
|
|
177
|
+
const ATTEMPTS = 7;
|
|
178
|
+
for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
|
|
179
|
+
try {
|
|
180
|
+
return await queryBridgeOnce(handle, cypher, params);
|
|
181
|
+
}
|
|
182
|
+
catch (err) {
|
|
183
|
+
if (!isTransientCgdbLockError(err) || attempt === ATTEMPTS - 1)
|
|
184
|
+
throw err;
|
|
185
|
+
await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
// Unreachable: the loop either returns or throws on the last attempt.
|
|
189
|
+
throw new Error('queryBridge: retry loop exited unexpectedly');
|
|
118
190
|
}
|
|
119
191
|
/**
|
|
120
192
|
* LadybugDB's `conn.query` / `conn.execute` can return either a single
|
|
@@ -176,7 +248,7 @@ export async function writeBridgeMeta(groupDir, meta) {
|
|
|
176
248
|
// Use retryRename for consistency with writeBridge's atomic swap — on
|
|
177
249
|
// Windows a concurrent reader can cause EBUSY/EPERM even on a tiny
|
|
178
250
|
// meta.json, and we don't want meta write to be less robust than the
|
|
179
|
-
// bridge.
|
|
251
|
+
// bridge.cgdb swap it accompanies.
|
|
180
252
|
await retryRename(tmp, target);
|
|
181
253
|
}
|
|
182
254
|
export async function readBridgeMeta(groupDir) {
|
|
@@ -203,9 +275,9 @@ export async function writeBridge(groupDir, input) {
|
|
|
203
275
|
await fsp.mkdir(groupDir, { recursive: true });
|
|
204
276
|
const contracts = dedupeContracts(input.contracts);
|
|
205
277
|
const crossLinks = dedupeCrossLinks(input.crossLinks);
|
|
206
|
-
const finalPath = path.join(groupDir, 'bridge.
|
|
207
|
-
const tmpPath = path.join(groupDir, 'bridge.
|
|
208
|
-
const bakPath = path.join(groupDir, 'bridge.
|
|
278
|
+
const finalPath = path.join(groupDir, 'bridge.cgdb');
|
|
279
|
+
const tmpPath = path.join(groupDir, 'bridge.cgdb.tmp');
|
|
280
|
+
const bakPath = path.join(groupDir, 'bridge.cgdb.bak');
|
|
209
281
|
const report = {
|
|
210
282
|
contractsInserted: 0,
|
|
211
283
|
contractsFailed: 0,
|
|
@@ -394,7 +466,7 @@ export async function writeBridge(groupDir, input) {
|
|
|
394
466
|
/* openBridgeDbReadOnly */
|
|
395
467
|
/* ------------------------------------------------------------------ */
|
|
396
468
|
export async function openBridgeDbReadOnly(groupDir) {
|
|
397
|
-
const dbPath = path.join(groupDir, 'bridge.
|
|
469
|
+
const dbPath = path.join(groupDir, 'bridge.cgdb');
|
|
398
470
|
try {
|
|
399
471
|
await fsp.access(dbPath);
|
|
400
472
|
}
|
|
@@ -404,7 +476,7 @@ export async function openBridgeDbReadOnly(groupDir) {
|
|
|
404
476
|
// triggers bak recovery is an interrupted writer, which on Windows may
|
|
405
477
|
// still be holding an open handle on `.bak` for a few milliseconds when
|
|
406
478
|
// a reader races in. EBUSY/EPERM retries recover that case silently.
|
|
407
|
-
const bakPath = path.join(groupDir, 'bridge.
|
|
479
|
+
const bakPath = path.join(groupDir, 'bridge.cgdb.bak');
|
|
408
480
|
try {
|
|
409
481
|
await fsp.access(bakPath);
|
|
410
482
|
await retryRename(bakPath, dbPath);
|
|
@@ -421,32 +493,47 @@ export async function openBridgeDbReadOnly(groupDir) {
|
|
|
421
493
|
// Open the native handle. If Connection construction throws AFTER
|
|
422
494
|
// Database was successfully allocated, we'd leak the native Database
|
|
423
495
|
// object. Wrap each step separately and tear down the partial handle.
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
496
|
+
//
|
|
497
|
+
// Retry on the Windows-transient lock error: the LadybugDB native
|
|
498
|
+
// binding holds the kernel file lock briefly past `db.close()` on
|
|
499
|
+
// Windows + Node 22.14, so a reader that races a recent writer can
|
|
500
|
+
// hit "Error 33: locked a portion of the file" on the constructor's
|
|
501
|
+
// first 4 KB header read. Backoff up to ~3 s lets the writer's lock
|
|
502
|
+
// age out — enough headroom for any normal write→read sequence
|
|
503
|
+
// without becoming a user-visible delay.
|
|
504
|
+
const ATTEMPTS = 7;
|
|
505
|
+
for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
|
|
506
|
+
let db;
|
|
507
|
+
let conn;
|
|
508
|
+
try {
|
|
509
|
+
db = new cgdb.Database(dbPath, 0, false, true); // readOnly
|
|
510
|
+
conn = new cgdb.Connection(db);
|
|
511
|
+
return { _db: db, _conn: conn, groupDir };
|
|
439
512
|
}
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
513
|
+
catch (err) {
|
|
514
|
+
if (conn) {
|
|
515
|
+
try {
|
|
516
|
+
await conn.close();
|
|
517
|
+
}
|
|
518
|
+
catch {
|
|
519
|
+
/* ignore */
|
|
520
|
+
}
|
|
443
521
|
}
|
|
444
|
-
|
|
445
|
-
|
|
522
|
+
if (db) {
|
|
523
|
+
try {
|
|
524
|
+
await db.close();
|
|
525
|
+
}
|
|
526
|
+
catch {
|
|
527
|
+
/* ignore */
|
|
528
|
+
}
|
|
446
529
|
}
|
|
530
|
+
if (!isTransientCgdbLockError(err) || attempt === ATTEMPTS - 1)
|
|
531
|
+
return null;
|
|
532
|
+
await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
|
|
533
|
+
continue;
|
|
447
534
|
}
|
|
448
|
-
return null;
|
|
449
535
|
}
|
|
536
|
+
return null;
|
|
450
537
|
}
|
|
451
538
|
/* ------------------------------------------------------------------ */
|
|
452
539
|
/* bridgeExists */
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Bridge LadybugDB schema for cross-repo Contract Registry.
|
|
3
|
-
* Separate from per-repo schema in
|
|
3
|
+
* Separate from per-repo schema in cgdb/schema.ts.
|
|
4
4
|
*/
|
|
5
5
|
/**
|
|
6
|
-
* Version of the bridge.
|
|
6
|
+
* Version of the bridge.cgdb schema below. `openBridgeDbReadOnly` compares
|
|
7
7
|
* this against `meta.json`'s version field and returns `null` on mismatch,
|
|
8
8
|
* which trips the caller into either the JSON fallback path or a fresh
|
|
9
|
-
* `group sync` that rebuilds `bridge.
|
|
9
|
+
* `group sync` that rebuilds `bridge.cgdb` from scratch.
|
|
10
10
|
*
|
|
11
11
|
* Migration contract for contributors bumping this constant:
|
|
12
12
|
* 1. Bump the number (e.g. `1` → `2`).
|
|
13
13
|
* 2. Update the DDL below to match the new schema.
|
|
14
14
|
* 3. DO NOT attempt an online migration in this file — the version gate
|
|
15
15
|
* is intentionally a "discard and re-sync" strategy for V1. An old
|
|
16
|
-
* bridge.
|
|
16
|
+
* bridge.cgdb whose version doesn't match is treated as opaque and
|
|
17
17
|
* rebuilt by the next `group sync`.
|
|
18
18
|
* 4. If online migration becomes necessary (e.g. when groups accumulate
|
|
19
19
|
* large amounts of embedding data), add a migration path as a
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Bridge LadybugDB schema for cross-repo Contract Registry.
|
|
3
|
-
* Separate from per-repo schema in
|
|
3
|
+
* Separate from per-repo schema in cgdb/schema.ts.
|
|
4
4
|
*/
|
|
5
5
|
/**
|
|
6
|
-
* Version of the bridge.
|
|
6
|
+
* Version of the bridge.cgdb schema below. `openBridgeDbReadOnly` compares
|
|
7
7
|
* this against `meta.json`'s version field and returns `null` on mismatch,
|
|
8
8
|
* which trips the caller into either the JSON fallback path or a fresh
|
|
9
|
-
* `group sync` that rebuilds `bridge.
|
|
9
|
+
* `group sync` that rebuilds `bridge.cgdb` from scratch.
|
|
10
10
|
*
|
|
11
11
|
* Migration contract for contributors bumping this constant:
|
|
12
12
|
* 1. Bump the number (e.g. `1` → `2`).
|
|
13
13
|
* 2. Update the DDL below to match the new schema.
|
|
14
14
|
* 3. DO NOT attempt an online migration in this file — the version gate
|
|
15
15
|
* is intentionally a "discard and re-sync" strategy for V1. An old
|
|
16
|
-
* bridge.
|
|
16
|
+
* bridge.cgdb whose version doesn't match is treated as opaque and
|
|
17
17
|
* rebuilt by the next `group sync`.
|
|
18
18
|
* 4. If online migration becomes necessary (e.g. when groups accumulate
|
|
19
19
|
* large amounts of embedding data), add a migration path as a
|
|
@@ -199,19 +199,19 @@ async function ensureBridgeReady(groupDir) {
|
|
|
199
199
|
error: `Bridge schema version mismatch (meta.json has ${meta.version}, expected ${BRIDGE_SCHEMA_VERSION}). Run codragraph group sync for this group.`,
|
|
200
200
|
};
|
|
201
201
|
}
|
|
202
|
-
const dbPath = path.join(groupDir, 'bridge.
|
|
202
|
+
const dbPath = path.join(groupDir, 'bridge.cgdb');
|
|
203
203
|
try {
|
|
204
204
|
await fsp.access(dbPath);
|
|
205
205
|
}
|
|
206
206
|
catch {
|
|
207
207
|
return {
|
|
208
|
-
error: `No bridge.
|
|
208
|
+
error: `No bridge.cgdb in this group directory. Run codragraph group sync (schema ${BRIDGE_SCHEMA_VERSION}).`,
|
|
209
209
|
};
|
|
210
210
|
}
|
|
211
211
|
const handle = await openBridgeDbReadOnly(groupDir);
|
|
212
212
|
if (!handle) {
|
|
213
213
|
return {
|
|
214
|
-
error: `Could not open bridge.
|
|
214
|
+
error: `Could not open bridge.cgdb read-only (schema ${BRIDGE_SCHEMA_VERSION}). Run codragraph group sync.`,
|
|
215
215
|
};
|
|
216
216
|
}
|
|
217
217
|
return { handle };
|
package/dist/core/group/sync.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fs from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { Buffer } from 'node:buffer';
|
|
4
|
-
import {
|
|
4
|
+
import { initCgdb, closeCgdb, executeParameterized } from '../cgdb/pool-adapter.js';
|
|
5
5
|
import { readRegistry } from '../../storage/repo-manager.js';
|
|
6
6
|
import { HttpRouteExtractor } from './extractors/http-route-extractor.js';
|
|
7
7
|
import { GrpcExtractor } from './extractors/grpc-extractor.js';
|
|
@@ -78,9 +78,9 @@ export async function syncGroup(config, opts) {
|
|
|
78
78
|
continue;
|
|
79
79
|
}
|
|
80
80
|
const poolId = handle.id;
|
|
81
|
-
const
|
|
81
|
+
const cgdbPath = path.join(handle.storagePath, 'cgdb');
|
|
82
82
|
try {
|
|
83
|
-
await
|
|
83
|
+
await initCgdb(poolId, cgdbPath);
|
|
84
84
|
openPoolIds.push(poolId);
|
|
85
85
|
const executor = (query, params) => executeParameterized(poolId, query, params ?? {});
|
|
86
86
|
dbExecutors.set(groupPath, executor);
|
|
@@ -139,7 +139,7 @@ export async function syncGroup(config, opts) {
|
|
|
139
139
|
}
|
|
140
140
|
finally {
|
|
141
141
|
for (const id of [...new Set(openPoolIds)]) {
|
|
142
|
-
await
|
|
142
|
+
await closeCgdb(id).catch(() => { });
|
|
143
143
|
}
|
|
144
144
|
}
|
|
145
145
|
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-side decoder for `content` columns in lbug node rows.
|
|
3
|
+
*
|
|
4
|
+
* RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
|
|
5
|
+
* every node table that has `content`. Default is `'none'` (passthrough)
|
|
6
|
+
* so existing reads keep working unchanged. When a writer opts into
|
|
7
|
+
* `--compress brotli|zstd`, the column carries the encoding tag and the
|
|
8
|
+
* `content` column carries base64-encoded compressed bytes — readers
|
|
9
|
+
* MUST run those bytes back through `decodeContent` before handing them
|
|
10
|
+
* to a consumer (MCP tool result, HTTP API response, embedding model,
|
|
11
|
+
* LLM input).
|
|
12
|
+
*
|
|
13
|
+
* Centralizing the decode in one helper has two benefits:
|
|
14
|
+
* 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
|
|
15
|
+
* contentEncoding` to the Cypher RETURN, and pipe the row through
|
|
16
|
+
* `decodeContentField` (or `decodeContentRow`) at the boundary.
|
|
17
|
+
* 2. Anyone hunting for "where does the read path decode compressed
|
|
18
|
+
* bytes" greps for `decodeContentField` and gets every site in one
|
|
19
|
+
* shot — no per-table feature detection scattered across files.
|
|
20
|
+
*/
|
|
21
|
+
/**
|
|
22
|
+
* Decode a single (content, contentEncoding) pair from a Cypher row.
|
|
23
|
+
*
|
|
24
|
+
* Returns the input content unchanged when:
|
|
25
|
+
* - the encoding is missing / empty / `'none'` (the common case for
|
|
26
|
+
* 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
|
|
27
|
+
* `--compress`);
|
|
28
|
+
* - content is null/undefined (caller decides whether that's an error);
|
|
29
|
+
* - content is not a string (pre-Phase-2 indexes never wrote non-string
|
|
30
|
+
* content, but defensive: don't crash a read path on a malformed row).
|
|
31
|
+
*
|
|
32
|
+
* Throws (via `decodeContent`) only when the row claims an encoding this
|
|
33
|
+
* CLI build can't decode — that's a forward-compat error and the right
|
|
34
|
+
* behavior is to fail loudly rather than return wrong content.
|
|
35
|
+
*/
|
|
36
|
+
export declare function decodeContentField(content: unknown, encoding: unknown): string | undefined;
|
|
37
|
+
/**
|
|
38
|
+
* Apply `decodeContentField` to a row that carries `content` and
|
|
39
|
+
* `contentEncoding` keys (or their numeric column-index aliases).
|
|
40
|
+
*
|
|
41
|
+
* The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
|
|
42
|
+
* driver versions vary on whether named keys are populated, so existing
|
|
43
|
+
* read sites do `r.content ?? r[N]`. This helper accepts the same
|
|
44
|
+
* pattern. Returns a NEW object (does not mutate input).
|
|
45
|
+
*/
|
|
46
|
+
export declare function decodeContentRow<T extends Record<string, unknown>>(row: T, contentKey?: keyof T, encodingKey?: keyof T): T;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-side decoder for `content` columns in lbug node rows.
|
|
3
|
+
*
|
|
4
|
+
* RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
|
|
5
|
+
* every node table that has `content`. Default is `'none'` (passthrough)
|
|
6
|
+
* so existing reads keep working unchanged. When a writer opts into
|
|
7
|
+
* `--compress brotli|zstd`, the column carries the encoding tag and the
|
|
8
|
+
* `content` column carries base64-encoded compressed bytes — readers
|
|
9
|
+
* MUST run those bytes back through `decodeContent` before handing them
|
|
10
|
+
* to a consumer (MCP tool result, HTTP API response, embedding model,
|
|
11
|
+
* LLM input).
|
|
12
|
+
*
|
|
13
|
+
* Centralizing the decode in one helper has two benefits:
|
|
14
|
+
* 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
|
|
15
|
+
* contentEncoding` to the Cypher RETURN, and pipe the row through
|
|
16
|
+
* `decodeContentField` (or `decodeContentRow`) at the boundary.
|
|
17
|
+
* 2. Anyone hunting for "where does the read path decode compressed
|
|
18
|
+
* bytes" greps for `decodeContentField` and gets every site in one
|
|
19
|
+
* shot — no per-table feature detection scattered across files.
|
|
20
|
+
*/
|
|
21
|
+
import { decodeContent } from '@codragraph/graphstore';
|
|
22
|
+
/**
|
|
23
|
+
* Decode a single (content, contentEncoding) pair from a Cypher row.
|
|
24
|
+
*
|
|
25
|
+
* Returns the input content unchanged when:
|
|
26
|
+
* - the encoding is missing / empty / `'none'` (the common case for
|
|
27
|
+
* 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
|
|
28
|
+
* `--compress`);
|
|
29
|
+
* - content is null/undefined (caller decides whether that's an error);
|
|
30
|
+
* - content is not a string (pre-Phase-2 indexes never wrote non-string
|
|
31
|
+
* content, but defensive: don't crash a read path on a malformed row).
|
|
32
|
+
*
|
|
33
|
+
* Throws (via `decodeContent`) only when the row claims an encoding this
|
|
34
|
+
* CLI build can't decode — that's a forward-compat error and the right
|
|
35
|
+
* behavior is to fail loudly rather than return wrong content.
|
|
36
|
+
*/
|
|
37
|
+
export function decodeContentField(content, encoding) {
|
|
38
|
+
if (content === undefined || content === null)
|
|
39
|
+
return undefined;
|
|
40
|
+
if (typeof content !== 'string')
|
|
41
|
+
return content;
|
|
42
|
+
if (typeof encoding !== 'string' || encoding === '' || encoding === 'none') {
|
|
43
|
+
return content;
|
|
44
|
+
}
|
|
45
|
+
return decodeContent(content, encoding);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Apply `decodeContentField` to a row that carries `content` and
|
|
49
|
+
* `contentEncoding` keys (or their numeric column-index aliases).
|
|
50
|
+
*
|
|
51
|
+
* The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
|
|
52
|
+
* driver versions vary on whether named keys are populated, so existing
|
|
53
|
+
* read sites do `r.content ?? r[N]`. This helper accepts the same
|
|
54
|
+
* pattern. Returns a NEW object (does not mutate input).
|
|
55
|
+
*/
|
|
56
|
+
export function decodeContentRow(row, contentKey = 'content', encodingKey = 'contentEncoding') {
|
|
57
|
+
const content = row[contentKey];
|
|
58
|
+
if (content === undefined || content === null)
|
|
59
|
+
return row;
|
|
60
|
+
const encoding = row[encodingKey];
|
|
61
|
+
if (typeof encoding !== 'string' || encoding === '' || encoding === 'none')
|
|
62
|
+
return row;
|
|
63
|
+
return { ...row, [contentKey]: decodeContentField(content, encoding) };
|
|
64
|
+
}
|