@codragraph/cli 1.6.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -16
- package/dist/cli/ai-context.js +2 -2
- package/dist/cli/analyze.d.ts +22 -0
- package/dist/cli/analyze.js +111 -8
- package/dist/cli/compress-stats.d.ts +29 -0
- package/dist/cli/compress-stats.js +97 -0
- package/dist/cli/graphstore.d.ts +6 -2
- package/dist/cli/graphstore.js +24 -2
- package/dist/cli/index.js +17 -6
- package/dist/cli/profile-heap.d.ts +35 -0
- package/dist/cli/profile-heap.js +126 -0
- package/dist/cli/setup.d.ts +13 -0
- package/dist/cli/setup.js +75 -29
- package/dist/cli/skill-gen.d.ts +14 -2
- package/dist/cli/skill-gen.js +53 -20
- package/dist/cli/tool.js +4 -0
- package/dist/config/ignore-service.js +1 -1
- package/dist/core/embeddings/embedding-pipeline.js +24 -7
- package/dist/core/group/bridge-db.js +111 -24
- package/dist/core/group/extractors/grpc-patterns/proto.js +1 -12
- package/dist/core/ingestion/call-processor.js +2 -2
- package/dist/core/ingestion/cobol/cobol-preprocessor.js +1 -1
- package/dist/core/ingestion/cobol/jcl-parser.d.ts +1 -1
- package/dist/core/ingestion/cobol/jcl-parser.js +1 -1
- package/dist/core/ingestion/cobol-processor.d.ts +1 -1
- package/dist/core/ingestion/cobol-processor.js +1 -1
- package/dist/core/ingestion/heritage-extractors/generic.js +1 -1
- package/dist/core/ingestion/heritage-processor.js +1 -1
- package/dist/core/ingestion/import-processor.js +1 -1
- package/dist/core/ingestion/mro-processor.js +1 -1
- package/dist/core/ingestion/parsing-processor.js +1 -1
- package/dist/core/ingestion/type-extractors/c-cpp.js +1 -1
- package/dist/core/ingestion/type-extractors/python.js +1 -1
- package/dist/core/ingestion/type-extractors/shared.js +0 -3
- package/dist/core/lbug/content-read.d.ts +46 -0
- package/dist/core/lbug/content-read.js +64 -0
- package/dist/core/lbug/csv-generator.d.ts +2 -6
- package/dist/core/lbug/csv-generator.js +45 -12
- package/dist/core/lbug/lbug-adapter.d.ts +4 -1
- package/dist/core/lbug/lbug-adapter.js +157 -25
- package/dist/core/lbug/pool-adapter.js +51 -44
- package/dist/core/lbug/schema.d.ts +7 -7
- package/dist/core/lbug/schema.js +18 -0
- package/dist/core/run-analyze.d.ts +13 -0
- package/dist/core/run-analyze.js +91 -4
- package/dist/core/search/bm25-index.js +153 -12
- package/dist/core/wiki/generator.js +4 -4
- package/dist/mcp/local/local-backend.js +22 -5
- package/dist/mcp/resources.js +2 -3
- package/dist/server/api.js +4 -3
- package/dist/storage/repo-manager.d.ts +39 -0
- package/dist/storage/repo-manager.js +19 -0
- package/hooks/claude/codragraph-hook.cjs +108 -5
- package/hooks/claude/pre-tool-use.sh +6 -1
- package/package.json +4 -4
- package/scripts/build-tree-sitter-proto.cjs +15 -3
- package/scripts/patch-tree-sitter-swift.cjs +17 -4
- package/skills/codragraph-api-surface.md +110 -0
- package/skills/codragraph-cli.md +5 -5
- package/skills/codragraph-config-audit.md +146 -0
- package/skills/codragraph-cross-repo-impact.md +135 -0
- package/skills/codragraph-data-lineage.md +137 -0
- package/skills/codragraph-dead-code.md +119 -0
- package/skills/codragraph-debugging.md +1 -1
- package/skills/codragraph-exploring.md +1 -1
- package/skills/codragraph-gh-actions-debug.md +162 -0
- package/skills/codragraph-gh-issue-workflow.md +178 -0
- package/skills/codragraph-gh-pr-workflow.md +176 -0
- package/skills/codragraph-gh-release-workflow.md +187 -0
- package/skills/codragraph-git-bisect.md +176 -0
- package/skills/codragraph-git-force-push.md +147 -0
- package/skills/codragraph-git-history-rewrite.md +174 -0
- package/skills/codragraph-git-rebase-vs-merge.md +138 -0
- package/skills/codragraph-git-recovery.md +181 -0
- package/skills/codragraph-git-worktree.md +145 -0
- package/skills/codragraph-guide.md +1 -1
- package/skills/codragraph-impact-analysis.md +1 -1
- package/skills/codragraph-migration-tracking.md +130 -0
- package/skills/codragraph-notebook-context.md +136 -0
- package/skills/codragraph-observability-coverage.md +125 -0
- package/skills/codragraph-onboarding.md +129 -0
- package/skills/codragraph-perf-hotspots.md +132 -0
- package/skills/codragraph-pr-review.md +1 -1
- package/skills/codragraph-project-switcher.md +116 -0
- package/skills/codragraph-refactoring.md +1 -1
- package/skills/codragraph-security-audit.md +144 -0
- package/skills/codragraph-sql-tracing.md +122 -0
- package/skills/codragraph-supply-chain-audit.md +153 -0
- package/skills/codragraph-test-coverage.md +97 -0
|
@@ -100,21 +100,93 @@ export async function ensureBridgeSchema(handle) {
|
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
102
|
}
|
|
103
|
-
|
|
103
|
+
/**
|
|
104
|
+
* Close every QueryResult / PreparedStatement before letting V8 GC them.
|
|
105
|
+
* Same close-order discipline as `core/lbug/lbug-adapter.ts:closeQueryResult`
|
|
106
|
+
* — leaking these handles past `conn.close()` corrupts LadybugDB's native
|
|
107
|
+
* file lock on Windows ("Error 33: The process cannot access the file
|
|
108
|
+
* because it is being used by another process") and segfaults on
|
|
109
|
+
* process exit elsewhere. Best-effort: wrap close calls in try/catch so
|
|
110
|
+
* a finalizer that already ran doesn't poison the queryBridge return.
|
|
111
|
+
*/
|
|
112
|
+
async function closeBridgeHandle(h) {
|
|
113
|
+
if (!h)
|
|
114
|
+
return;
|
|
115
|
+
const candidates = Array.isArray(h) ? h : [h];
|
|
116
|
+
for (const r of candidates) {
|
|
117
|
+
try {
|
|
118
|
+
const close = r?.close;
|
|
119
|
+
if (typeof close === 'function')
|
|
120
|
+
await Promise.resolve(close.call(r));
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
/* best-effort */
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* True iff the error is a Windows-only transient file-lock surfaced by
|
|
129
|
+
* LadybugDB's native binding immediately after a writer process closes
|
|
130
|
+
* the same DB file. Symptom is `Error 33` on the read path even though
|
|
131
|
+
* `db.close()` returned cleanly at the JS layer — the kernel hasn't
|
|
132
|
+
* fully released the exclusive lock yet. Retrying with backoff is the
|
|
133
|
+
* documented workaround for this class of Windows-fs interactions.
|
|
134
|
+
*/
|
|
135
|
+
function isTransientLbugLockError(err) {
|
|
136
|
+
const msg = err?.message ?? '';
|
|
137
|
+
return (msg.includes('Error 33') ||
|
|
138
|
+
msg.includes('locked a portion of the file') ||
|
|
139
|
+
msg.includes('cannot access the file because it is being used by another process'));
|
|
140
|
+
}
|
|
141
|
+
async function queryBridgeOnce(handle, cypher, params) {
|
|
104
142
|
const conn = handle._conn;
|
|
105
143
|
if (params && Object.keys(params).length > 0) {
|
|
106
144
|
const stmt = await conn.prepare(cypher);
|
|
107
145
|
if (!stmt.isSuccess()) {
|
|
108
146
|
const errMsg = await stmt.getErrorMessage();
|
|
147
|
+
await closeBridgeHandle(stmt);
|
|
109
148
|
throw new Error(`Bridge query prepare failed: ${errMsg}`);
|
|
110
149
|
}
|
|
111
150
|
const queryResult = await conn.execute(stmt, params);
|
|
112
151
|
const result = unwrapQueryResult(queryResult);
|
|
113
|
-
|
|
152
|
+
try {
|
|
153
|
+
return (await result.getAll());
|
|
154
|
+
}
|
|
155
|
+
finally {
|
|
156
|
+
await closeBridgeHandle(queryResult);
|
|
157
|
+
await closeBridgeHandle(stmt);
|
|
158
|
+
}
|
|
114
159
|
}
|
|
115
160
|
const queryResult = await conn.query(cypher);
|
|
116
161
|
const result = unwrapQueryResult(queryResult);
|
|
117
|
-
|
|
162
|
+
try {
|
|
163
|
+
return (await result.getAll());
|
|
164
|
+
}
|
|
165
|
+
finally {
|
|
166
|
+
await closeBridgeHandle(queryResult);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
export async function queryBridge(handle, cypher, params) {
|
|
170
|
+
// Retry on Windows-transient file-lock errors. Reads issued through a
|
|
171
|
+
// freshly-opened readonly Database can race the writer's
|
|
172
|
+
// post-`db.close()` lock release on Windows + Node 22.14 (LadybugDB
|
|
173
|
+
// native binding holds the kernel lock briefly after the JS-level
|
|
174
|
+
// close returns). Backoff doubles per attempt up to ~3 s total — well
|
|
175
|
+
// below any user-visible CLI delay budget but enough to absorb a slow
|
|
176
|
+
// Windows kernel lock release.
|
|
177
|
+
const ATTEMPTS = 7;
|
|
178
|
+
for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
|
|
179
|
+
try {
|
|
180
|
+
return await queryBridgeOnce(handle, cypher, params);
|
|
181
|
+
}
|
|
182
|
+
catch (err) {
|
|
183
|
+
if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
|
|
184
|
+
throw err;
|
|
185
|
+
await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
// Unreachable: the loop either returns or throws on the last attempt.
|
|
189
|
+
throw new Error('queryBridge: retry loop exited unexpectedly');
|
|
118
190
|
}
|
|
119
191
|
/**
|
|
120
192
|
* LadybugDB's `conn.query` / `conn.execute` can return either a single
|
|
@@ -421,32 +493,47 @@ export async function openBridgeDbReadOnly(groupDir) {
|
|
|
421
493
|
// Open the native handle. If Connection construction throws AFTER
|
|
422
494
|
// Database was successfully allocated, we'd leak the native Database
|
|
423
495
|
// object. Wrap each step separately and tear down the partial handle.
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
496
|
+
//
|
|
497
|
+
// Retry on the Windows-transient lock error: the LadybugDB native
|
|
498
|
+
// binding holds the kernel file lock briefly past `db.close()` on
|
|
499
|
+
// Windows + Node 22.14, so a reader that races a recent writer can
|
|
500
|
+
// hit "Error 33: locked a portion of the file" on the constructor's
|
|
501
|
+
// first 4 KB header read. Backoff up to ~3 s lets the writer's lock
|
|
502
|
+
// age out — enough headroom for any normal write→read sequence
|
|
503
|
+
// without becoming a user-visible delay.
|
|
504
|
+
const ATTEMPTS = 7;
|
|
505
|
+
for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
|
|
506
|
+
let db;
|
|
507
|
+
let conn;
|
|
508
|
+
try {
|
|
509
|
+
db = new lbug.Database(dbPath, 0, false, true); // readOnly
|
|
510
|
+
conn = new lbug.Connection(db);
|
|
511
|
+
return { _db: db, _conn: conn, groupDir };
|
|
439
512
|
}
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
513
|
+
catch (err) {
|
|
514
|
+
if (conn) {
|
|
515
|
+
try {
|
|
516
|
+
await conn.close();
|
|
517
|
+
}
|
|
518
|
+
catch {
|
|
519
|
+
/* ignore */
|
|
520
|
+
}
|
|
443
521
|
}
|
|
444
|
-
|
|
445
|
-
|
|
522
|
+
if (db) {
|
|
523
|
+
try {
|
|
524
|
+
await db.close();
|
|
525
|
+
}
|
|
526
|
+
catch {
|
|
527
|
+
/* ignore */
|
|
528
|
+
}
|
|
446
529
|
}
|
|
530
|
+
if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
|
|
531
|
+
return null;
|
|
532
|
+
await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
|
|
533
|
+
continue;
|
|
447
534
|
}
|
|
448
|
-
return null;
|
|
449
535
|
}
|
|
536
|
+
return null;
|
|
450
537
|
}
|
|
451
538
|
/* ------------------------------------------------------------------ */
|
|
452
539
|
/* bridgeExists */
|
|
@@ -31,7 +31,6 @@ if (ProtoGrammar) {
|
|
|
31
31
|
// test runners (vitest forks) when SyntaxNode isn't fully initialized
|
|
32
32
|
// yet. Catching that here ensures `PROTO_GRPC_PLUGIN` stays null and
|
|
33
33
|
// the orchestrator falls back to the manual parser.
|
|
34
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
35
34
|
const _Parser = _require('tree-sitter');
|
|
36
35
|
// Smoke-test: parse + setLanguage to verify the grammar is
|
|
37
36
|
// end-to-end compatible with this tree-sitter runtime.
|
|
@@ -72,24 +71,14 @@ if (ProtoGrammar) {
|
|
|
72
71
|
}
|
|
73
72
|
}
|
|
74
73
|
function buildPlugin() {
|
|
75
|
-
if (!ProtoGrammar || !
|
|
74
|
+
if (!ProtoGrammar || !SERVICE_PATTERNS)
|
|
76
75
|
return null;
|
|
77
|
-
const pkgPatterns = PACKAGE_PATTERNS;
|
|
78
76
|
const svcPatterns = SERVICE_PATTERNS;
|
|
79
77
|
return {
|
|
80
78
|
name: 'proto-grpc',
|
|
81
79
|
language: ProtoGrammar,
|
|
82
80
|
scan(tree) {
|
|
83
81
|
const out = [];
|
|
84
|
-
// Extract `package` declaration (first match wins).
|
|
85
|
-
let pkg = '';
|
|
86
|
-
for (const match of runCompiledPatterns(pkgPatterns, tree)) {
|
|
87
|
-
const pkgNode = match.captures.pkg;
|
|
88
|
-
if (pkgNode) {
|
|
89
|
-
pkg = pkgNode.text;
|
|
90
|
-
break;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
82
|
// Extract `service → rpc` pairs. The query returns one match per
|
|
94
83
|
// (service, rpc) combination thanks to the nested structure.
|
|
95
84
|
for (const match of runCompiledPatterns(svcPatterns, tree)) {
|
|
@@ -616,7 +616,7 @@ importedRawReturnTypesMap, heritageMap, bindingAccumulator) => {
|
|
|
616
616
|
bufferSize: getTreeSitterBufferSize(file.content.length),
|
|
617
617
|
});
|
|
618
618
|
}
|
|
619
|
-
catch (
|
|
619
|
+
catch (_parseError) {
|
|
620
620
|
continue;
|
|
621
621
|
}
|
|
622
622
|
astCache.set(file.path, tree);
|
|
@@ -704,7 +704,7 @@ importedRawReturnTypesMap, heritageMap, bindingAccumulator) => {
|
|
|
704
704
|
// loop above, so verifyConstructorBindings sees all provider bindings
|
|
705
705
|
// regardless of file processing order.
|
|
706
706
|
for (let i = 0; i < prepared.length; i++) {
|
|
707
|
-
const { file, language, provider, tree, matches, parentMap, typeEnv } = prepared[i];
|
|
707
|
+
const { file, language, provider, tree: _tree, matches, parentMap, typeEnv } = prepared[i];
|
|
708
708
|
enclosingFnExtractCache.clear();
|
|
709
709
|
onProgress?.(i + 1, files.length);
|
|
710
710
|
if (i % 20 === 0)
|
|
@@ -1404,7 +1404,7 @@ export function extractCobolSymbolsWithRegex(content, _filePath) {
|
|
|
1404
1404
|
if (anonRedefMatch) {
|
|
1405
1405
|
// Check it's truly anonymous: the second capture is not a valid data name
|
|
1406
1406
|
// followed by more clauses — it's the REDEFINES target directly after level
|
|
1407
|
-
const
|
|
1407
|
+
const _level = parseInt(anonRedefMatch[1], 10);
|
|
1408
1408
|
// Only skip if this is genuinely "NN REDEFINES target" with no name between
|
|
1409
1409
|
// We detect this by checking the full data item regex does NOT match
|
|
1410
1410
|
// (because RE_DATA_ITEM expects a name before any clauses)
|
|
@@ -65,4 +65,4 @@ export interface JclParseResults {
|
|
|
65
65
|
* @param filePath - Path for diagnostics (not used in extraction)
|
|
66
66
|
* @returns Parsed JCL results
|
|
67
67
|
*/
|
|
68
|
-
export declare function parseJcl(content: string,
|
|
68
|
+
export declare function parseJcl(content: string, _filePath: string): JclParseResults;
|
|
@@ -73,7 +73,7 @@ function extractDisp(params) {
|
|
|
73
73
|
* @param filePath - Path for diagnostics (not used in extraction)
|
|
74
74
|
* @returns Parsed JCL results
|
|
75
75
|
*/
|
|
76
|
-
export function parseJcl(content,
|
|
76
|
+
export function parseJcl(content, _filePath) {
|
|
77
77
|
const results = {
|
|
78
78
|
jobs: [],
|
|
79
79
|
steps: [],
|
|
@@ -50,5 +50,5 @@ export declare function isJclFile(filePath: string): boolean;
|
|
|
50
50
|
* @param allPathSet - Set of all file paths in the repository
|
|
51
51
|
* @returns Summary of what was extracted
|
|
52
52
|
*/
|
|
53
|
-
export declare const processCobol: (graph: KnowledgeGraph, files: CobolFile[],
|
|
53
|
+
export declare const processCobol: (graph: KnowledgeGraph, files: CobolFile[], _allPathSet: ReadonlySet<string>) => CobolProcessResult;
|
|
54
54
|
export {};
|
|
@@ -47,7 +47,7 @@ function isCopybook(filePath) {
|
|
|
47
47
|
* @param allPathSet - Set of all file paths in the repository
|
|
48
48
|
* @returns Summary of what was extracted
|
|
49
49
|
*/
|
|
50
|
-
export const processCobol = (graph, files,
|
|
50
|
+
export const processCobol = (graph, files, _allPathSet) => {
|
|
51
51
|
const result = {
|
|
52
52
|
programs: 0,
|
|
53
53
|
paragraphs: 0,
|
|
@@ -12,7 +12,7 @@ export function createHeritageExtractor(config) {
|
|
|
12
12
|
const callNameSet = actualConfig.callBasedHeritage?.callNames;
|
|
13
13
|
return {
|
|
14
14
|
language: actualConfig.language,
|
|
15
|
-
extract(captureMap,
|
|
15
|
+
extract(captureMap, _context) {
|
|
16
16
|
const classNode = captureMap['heritage.class'];
|
|
17
17
|
if (!classNode)
|
|
18
18
|
return [];
|
|
@@ -151,7 +151,7 @@ export const processHeritage = async (graph, files, astCache, ctx, onProgress) =
|
|
|
151
151
|
bufferSize: getTreeSitterBufferSize(file.content.length),
|
|
152
152
|
});
|
|
153
153
|
}
|
|
154
|
-
catch (
|
|
154
|
+
catch (_parseError) {
|
|
155
155
|
// Skip files that can't be parsed
|
|
156
156
|
continue;
|
|
157
157
|
}
|
|
@@ -316,7 +316,7 @@ function parameterTypesMatch(a, b, aParamCount, bParamCount) {
|
|
|
316
316
|
*/
|
|
317
317
|
function emitMethodImplementsEdges(graph, parentMap, methodMap, parentEdgeType, ancestorsMap, edgeTypesMap) {
|
|
318
318
|
let edgeCount = 0;
|
|
319
|
-
for (const [classId,
|
|
319
|
+
for (const [classId, _parentIds] of parentMap) {
|
|
320
320
|
const classNode = graph.getNode(classId);
|
|
321
321
|
if (!classNode)
|
|
322
322
|
continue;
|
|
@@ -273,7 +273,7 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, sco
|
|
|
273
273
|
bufferSize: getTreeSitterBufferSize(parseContent.length),
|
|
274
274
|
});
|
|
275
275
|
}
|
|
276
|
-
catch (
|
|
276
|
+
catch (_parseError) {
|
|
277
277
|
console.warn(`Skipping unparseable file: ${file.path}`);
|
|
278
278
|
continue;
|
|
279
279
|
}
|
|
@@ -479,7 +479,7 @@ const inferLiteralType = (node) => {
|
|
|
479
479
|
};
|
|
480
480
|
/** C++: detect constructor type from smart pointer factory calls (make_shared<Dog>()).
|
|
481
481
|
* Extracts the template type argument as the constructor type for virtual dispatch. */
|
|
482
|
-
const detectCppConstructorType = (node,
|
|
482
|
+
const detectCppConstructorType = (node, _classNames) => {
|
|
483
483
|
// Navigate to the initializer value in the declaration
|
|
484
484
|
const declarator = node.childForFieldName('declarator');
|
|
485
485
|
const initDecl = declarator?.type === 'init_declarator' ? declarator : undefined;
|
|
@@ -149,7 +149,7 @@ const scanConstructorBinding = (node) => {
|
|
|
149
149
|
};
|
|
150
150
|
const FOR_LOOP_NODE_TYPES = new Set(['for_statement']);
|
|
151
151
|
/** Python function/method node types that carry a parameters list. */
|
|
152
|
-
const
|
|
152
|
+
const _PY_FUNCTION_NODE_TYPES = new Set(['function_definition', 'decorated_definition']);
|
|
153
153
|
/**
|
|
154
154
|
* Extract element type from a Python type annotation AST node.
|
|
155
155
|
* Handles:
|
|
@@ -564,16 +564,13 @@ export function extractElementTypeFromString(typeStr, pos = 'last') {
|
|
|
564
564
|
const openAngle = typeStr.indexOf('<');
|
|
565
565
|
const openSquare = typeStr.indexOf('[');
|
|
566
566
|
let openIdx = -1;
|
|
567
|
-
let openChar = '';
|
|
568
567
|
let closeChar = '';
|
|
569
568
|
if (openAngle >= 0 && (openSquare < 0 || openAngle < openSquare)) {
|
|
570
569
|
openIdx = openAngle;
|
|
571
|
-
openChar = '<';
|
|
572
570
|
closeChar = '>';
|
|
573
571
|
}
|
|
574
572
|
else if (openSquare >= 0) {
|
|
575
573
|
openIdx = openSquare;
|
|
576
|
-
openChar = '[';
|
|
577
574
|
closeChar = ']';
|
|
578
575
|
}
|
|
579
576
|
if (openIdx < 0)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-side decoder for `content` columns in lbug node rows.
|
|
3
|
+
*
|
|
4
|
+
* RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
|
|
5
|
+
* every node table that has `content`. Default is `'none'` (passthrough)
|
|
6
|
+
* so existing reads keep working unchanged. When a writer opts into
|
|
7
|
+
* `--compress brotli|zstd`, the column carries the encoding tag and the
|
|
8
|
+
* `content` column carries base64-encoded compressed bytes — readers
|
|
9
|
+
* MUST run those bytes back through `decodeContent` before handing them
|
|
10
|
+
* to a consumer (MCP tool result, HTTP API response, embedding model,
|
|
11
|
+
* LLM input).
|
|
12
|
+
*
|
|
13
|
+
* Centralizing the decode in one helper has two benefits:
|
|
14
|
+
* 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
|
|
15
|
+
* contentEncoding` to the Cypher RETURN, and pipe the row through
|
|
16
|
+
* `decodeContentField` (or `decodeContentRow`) at the boundary.
|
|
17
|
+
* 2. Anyone hunting for "where does the read path decode compressed
|
|
18
|
+
* bytes" greps for `decodeContentField` and gets every site in one
|
|
19
|
+
* shot — no per-table feature detection scattered across files.
|
|
20
|
+
*/
|
|
21
|
+
/**
|
|
22
|
+
* Decode a single (content, contentEncoding) pair from a Cypher row.
|
|
23
|
+
*
|
|
24
|
+
* Returns the input content unchanged when:
|
|
25
|
+
* - the encoding is missing / empty / `'none'` (the common case for
|
|
26
|
+
* 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
|
|
27
|
+
* `--compress`);
|
|
28
|
+
* - content is null/undefined (caller decides whether that's an error);
|
|
29
|
+
* - content is not a string (pre-Phase-2 indexes never wrote non-string
|
|
30
|
+
* content, but defensive: don't crash a read path on a malformed row).
|
|
31
|
+
*
|
|
32
|
+
* Throws (via `decodeContent`) only when the row claims an encoding this
|
|
33
|
+
* CLI build can't decode — that's a forward-compat error and the right
|
|
34
|
+
* behavior is to fail loudly rather than return wrong content.
|
|
35
|
+
*/
|
|
36
|
+
export declare function decodeContentField(content: unknown, encoding: unknown): string | undefined;
|
|
37
|
+
/**
|
|
38
|
+
* Apply `decodeContentField` to a row that carries `content` and
|
|
39
|
+
* `contentEncoding` keys (or their numeric column-index aliases).
|
|
40
|
+
*
|
|
41
|
+
* The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
|
|
42
|
+
* driver versions vary on whether named keys are populated, so existing
|
|
43
|
+
* read sites do `r.content ?? r[N]`. This helper accepts the same
|
|
44
|
+
* pattern. Returns a NEW object (does not mutate input).
|
|
45
|
+
*/
|
|
46
|
+
export declare function decodeContentRow<T extends Record<string, unknown>>(row: T, contentKey?: keyof T, encodingKey?: keyof T): T;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-side decoder for `content` columns in lbug node rows.
|
|
3
|
+
*
|
|
4
|
+
* RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
|
|
5
|
+
* every node table that has `content`. Default is `'none'` (passthrough)
|
|
6
|
+
* so existing reads keep working unchanged. When a writer opts into
|
|
7
|
+
* `--compress brotli|zstd`, the column carries the encoding tag and the
|
|
8
|
+
* `content` column carries base64-encoded compressed bytes — readers
|
|
9
|
+
* MUST run those bytes back through `decodeContent` before handing them
|
|
10
|
+
* to a consumer (MCP tool result, HTTP API response, embedding model,
|
|
11
|
+
* LLM input).
|
|
12
|
+
*
|
|
13
|
+
* Centralizing the decode in one helper has two benefits:
|
|
14
|
+
* 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
|
|
15
|
+
* contentEncoding` to the Cypher RETURN, and pipe the row through
|
|
16
|
+
* `decodeContentField` (or `decodeContentRow`) at the boundary.
|
|
17
|
+
* 2. Anyone hunting for "where does the read path decode compressed
|
|
18
|
+
* bytes" greps for `decodeContentField` and gets every site in one
|
|
19
|
+
* shot — no per-table feature detection scattered across files.
|
|
20
|
+
*/
|
|
21
|
+
import { decodeContent } from '@codragraph/graphstore';
|
|
22
|
+
/**
|
|
23
|
+
* Decode a single (content, contentEncoding) pair from a Cypher row.
|
|
24
|
+
*
|
|
25
|
+
* Returns the input content unchanged when:
|
|
26
|
+
* - the encoding is missing / empty / `'none'` (the common case for
|
|
27
|
+
* 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
|
|
28
|
+
* `--compress`);
|
|
29
|
+
* - content is null/undefined (caller decides whether that's an error);
|
|
30
|
+
* - content is not a string (pre-Phase-2 indexes never wrote non-string
|
|
31
|
+
* content, but defensive: don't crash a read path on a malformed row).
|
|
32
|
+
*
|
|
33
|
+
* Throws (via `decodeContent`) only when the row claims an encoding this
|
|
34
|
+
* CLI build can't decode — that's a forward-compat error and the right
|
|
35
|
+
* behavior is to fail loudly rather than return wrong content.
|
|
36
|
+
*/
|
|
37
|
+
export function decodeContentField(content, encoding) {
|
|
38
|
+
if (content === undefined || content === null)
|
|
39
|
+
return undefined;
|
|
40
|
+
if (typeof content !== 'string')
|
|
41
|
+
return content;
|
|
42
|
+
if (typeof encoding !== 'string' || encoding === '' || encoding === 'none') {
|
|
43
|
+
return content;
|
|
44
|
+
}
|
|
45
|
+
return decodeContent(content, encoding);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Apply `decodeContentField` to a row that carries `content` and
|
|
49
|
+
* `contentEncoding` keys (or their numeric column-index aliases).
|
|
50
|
+
*
|
|
51
|
+
* The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
|
|
52
|
+
* driver versions vary on whether named keys are populated, so existing
|
|
53
|
+
* read sites do `r.content ?? r[N]`. This helper accepts the same
|
|
54
|
+
* pattern. Returns a NEW object (does not mutate input).
|
|
55
|
+
*/
|
|
56
|
+
export function decodeContentRow(row, contentKey = 'content', encodingKey = 'contentEncoding') {
|
|
57
|
+
const content = row[contentKey];
|
|
58
|
+
if (content === undefined || content === null)
|
|
59
|
+
return row;
|
|
60
|
+
const encoding = row[encodingKey];
|
|
61
|
+
if (typeof encoding !== 'string' || encoding === '' || encoding === 'none')
|
|
62
|
+
return row;
|
|
63
|
+
return { ...row, [contentKey]: decodeContentField(content, encoding) };
|
|
64
|
+
}
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
*/
|
|
14
14
|
import { KnowledgeGraph } from '../graph/types.js';
|
|
15
15
|
import { NodeTableName } from './schema.js';
|
|
16
|
+
import { type ContentEncoding } from '@codragraph/graphstore';
|
|
16
17
|
export declare const sanitizeUTF8: (str: string) => string;
|
|
17
18
|
export declare const escapeCSVField: (value: string | number | undefined | null) => string;
|
|
18
19
|
export declare const escapeCSVNumber: (value: number | undefined | null, defaultValue?: number) => string;
|
|
@@ -25,9 +26,4 @@ export interface StreamedCSVResult {
|
|
|
25
26
|
relCsvPath: string;
|
|
26
27
|
relRows: number;
|
|
27
28
|
}
|
|
28
|
-
|
|
29
|
-
* Stream all CSV data directly to disk files.
|
|
30
|
-
* Iterates graph nodes exactly ONCE — routes each node to the right writer.
|
|
31
|
-
* File contents are lazy-read from disk with a generous LRU cache.
|
|
32
|
-
*/
|
|
33
|
-
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string) => Promise<StreamedCSVResult>;
|
|
29
|
+
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, compress?: ContentEncoding) => Promise<StreamedCSVResult>;
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
import fs from 'fs/promises';
|
|
15
15
|
import { createWriteStream } from 'fs';
|
|
16
16
|
import path from 'path';
|
|
17
|
+
import { encodeContent } from '@codragraph/graphstore';
|
|
17
18
|
/** Flush buffered rows to disk every N rows */
|
|
18
19
|
const FLUSH_EVERY = 500;
|
|
19
20
|
// ============================================================================
|
|
@@ -184,7 +185,26 @@ class BufferedCSVWriter {
|
|
|
184
185
|
* Iterates graph nodes exactly ONCE — routes each node to the right writer.
|
|
185
186
|
* File contents are lazy-read from disk with a generous LRU cache.
|
|
186
187
|
*/
|
|
187
|
-
|
|
188
|
+
/**
|
|
189
|
+
* Apply RFC 0001 Phase 2 content encoding. Returns the on-the-wire string
|
|
190
|
+
* + the encoding tag to write into the per-row `contentEncoding` column.
|
|
191
|
+
*
|
|
192
|
+
* `compress: undefined | 'none'` is the default — content goes through
|
|
193
|
+
* unchanged and the tag is `'none'` (matches the schema DEFAULT, so older
|
|
194
|
+
* readers and the schema-default behavior stay in agreement).
|
|
195
|
+
*
|
|
196
|
+
* Always writing the tag column (even as 'none') keeps the CSV / COPY /
|
|
197
|
+
* schema layouts uniform regardless of compression mode. The wasted bytes
|
|
198
|
+
* are negligible — a few characters per row vs the kilobytes of content
|
|
199
|
+
* the column is alongside.
|
|
200
|
+
*/
|
|
201
|
+
const applyEncoding = (content, compress) => {
|
|
202
|
+
if (!compress || compress === 'none') {
|
|
203
|
+
return { wireContent: content, tag: 'none' };
|
|
204
|
+
}
|
|
205
|
+
return { wireContent: encodeContent(content, compress), tag: compress };
|
|
206
|
+
};
|
|
207
|
+
export const streamAllCSVsToDisk = async (graph, repoPath, csvDir, compress) => {
|
|
188
208
|
// Remove stale CSVs from previous crashed runs, then recreate
|
|
189
209
|
try {
|
|
190
210
|
await fs.rm(csvDir, { recursive: true, force: true });
|
|
@@ -196,26 +216,29 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
196
216
|
const prevMax = process.getMaxListeners();
|
|
197
217
|
process.setMaxListeners(prevMax + 40);
|
|
198
218
|
const contentCache = new FileContentCache(repoPath);
|
|
199
|
-
// Create writers for every node type up-front
|
|
200
|
-
|
|
219
|
+
// Create writers for every node type up-front. Content-bearing tables
|
|
220
|
+
// carry an extra `contentEncoding` column right after `content` to
|
|
221
|
+
// match the Phase 2 schema layout. Tables without `content` (Folder,
|
|
222
|
+
// Community, Process, Route, Tool) are unchanged.
|
|
223
|
+
const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content,contentEncoding');
|
|
201
224
|
const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
|
|
202
|
-
const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,description';
|
|
225
|
+
const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description';
|
|
203
226
|
const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
|
|
204
227
|
const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
|
|
205
228
|
const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
|
|
206
|
-
const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,description,parameterCount,returnType';
|
|
229
|
+
const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description,parameterCount,returnType';
|
|
207
230
|
const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), methodHeader);
|
|
208
231
|
const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
|
|
209
232
|
const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
|
|
210
233
|
const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
|
|
211
234
|
// Section nodes have an extra 'level' column
|
|
212
|
-
const sectionWriter = new BufferedCSVWriter(path.join(csvDir, 'section.csv'), 'id,name,filePath,startLine,endLine,level,content,description');
|
|
235
|
+
const sectionWriter = new BufferedCSVWriter(path.join(csvDir, 'section.csv'), 'id,name,filePath,startLine,endLine,level,content,contentEncoding,description');
|
|
213
236
|
// Route nodes for API endpoint mapping
|
|
214
237
|
const routeWriter = new BufferedCSVWriter(path.join(csvDir, 'route.csv'), 'id,name,filePath,responseKeys,errorKeys,middleware');
|
|
215
238
|
// Tool nodes for MCP tool definitions
|
|
216
239
|
const toolWriter = new BufferedCSVWriter(path.join(csvDir, 'tool.csv'), 'id,name,filePath,description');
|
|
217
240
|
// Multi-language node types share the same CSV shape (no isExported column)
|
|
218
|
-
const multiLangHeader = 'id,name,filePath,startLine,endLine,content,description';
|
|
241
|
+
const multiLangHeader = 'id,name,filePath,startLine,endLine,content,contentEncoding,description';
|
|
219
242
|
const MULTI_LANG_TYPES = [
|
|
220
243
|
'Struct',
|
|
221
244
|
'Enum',
|
|
@@ -259,11 +282,13 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
259
282
|
switch (node.label) {
|
|
260
283
|
case 'File': {
|
|
261
284
|
const content = await extractContent(node, contentCache);
|
|
285
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
262
286
|
await fileWriter.addRow([
|
|
263
287
|
escapeCSVField(node.id),
|
|
264
288
|
escapeCSVField(node.properties.name || ''),
|
|
265
289
|
escapeCSVField(node.properties.filePath || ''),
|
|
266
|
-
escapeCSVField(
|
|
290
|
+
escapeCSVField(wireContent),
|
|
291
|
+
escapeCSVField(tag),
|
|
267
292
|
].join(','));
|
|
268
293
|
break;
|
|
269
294
|
}
|
|
@@ -306,6 +331,7 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
306
331
|
}
|
|
307
332
|
case 'Method': {
|
|
308
333
|
const content = await extractContent(node, contentCache);
|
|
334
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
309
335
|
await methodWriter.addRow([
|
|
310
336
|
escapeCSVField(node.id),
|
|
311
337
|
escapeCSVField(node.properties.name || ''),
|
|
@@ -313,7 +339,8 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
313
339
|
escapeCSVNumber(node.properties.startLine, -1),
|
|
314
340
|
escapeCSVNumber(node.properties.endLine, -1),
|
|
315
341
|
node.properties.isExported ? 'true' : 'false',
|
|
316
|
-
escapeCSVField(
|
|
342
|
+
escapeCSVField(wireContent),
|
|
343
|
+
escapeCSVField(tag),
|
|
317
344
|
escapeCSVField(node.properties.description || ''),
|
|
318
345
|
escapeCSVNumber(node.properties.parameterCount, 0),
|
|
319
346
|
escapeCSVField(node.properties.returnType || ''),
|
|
@@ -322,6 +349,7 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
322
349
|
}
|
|
323
350
|
case 'Section': {
|
|
324
351
|
const content = await extractContent(node, contentCache);
|
|
352
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
325
353
|
await sectionWriter.addRow([
|
|
326
354
|
escapeCSVField(node.id),
|
|
327
355
|
escapeCSVField(node.properties.name || ''),
|
|
@@ -329,7 +357,8 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
329
357
|
escapeCSVNumber(node.properties.startLine, -1),
|
|
330
358
|
escapeCSVNumber(node.properties.endLine, -1),
|
|
331
359
|
escapeCSVNumber(node.properties.level, 1),
|
|
332
|
-
escapeCSVField(
|
|
360
|
+
escapeCSVField(wireContent),
|
|
361
|
+
escapeCSVField(tag),
|
|
333
362
|
escapeCSVField(node.properties.description || ''),
|
|
334
363
|
].join(','));
|
|
335
364
|
break;
|
|
@@ -366,6 +395,7 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
366
395
|
const writer = codeWriterMap[node.label];
|
|
367
396
|
if (writer) {
|
|
368
397
|
const content = await extractContent(node, contentCache);
|
|
398
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
369
399
|
await writer.addRow([
|
|
370
400
|
escapeCSVField(node.id),
|
|
371
401
|
escapeCSVField(node.properties.name || ''),
|
|
@@ -373,7 +403,8 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
373
403
|
escapeCSVNumber(node.properties.startLine, -1),
|
|
374
404
|
escapeCSVNumber(node.properties.endLine, -1),
|
|
375
405
|
node.properties.isExported ? 'true' : 'false',
|
|
376
|
-
escapeCSVField(
|
|
406
|
+
escapeCSVField(wireContent),
|
|
407
|
+
escapeCSVField(tag),
|
|
377
408
|
escapeCSVField(node.properties.description || ''),
|
|
378
409
|
].join(','));
|
|
379
410
|
}
|
|
@@ -382,13 +413,15 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
|
|
|
382
413
|
const mlWriter = multiLangWriters.get(node.label);
|
|
383
414
|
if (mlWriter) {
|
|
384
415
|
const content = await extractContent(node, contentCache);
|
|
416
|
+
const { wireContent, tag } = applyEncoding(content, compress);
|
|
385
417
|
await mlWriter.addRow([
|
|
386
418
|
escapeCSVField(node.id),
|
|
387
419
|
escapeCSVField(node.properties.name || ''),
|
|
388
420
|
escapeCSVField(node.properties.filePath || ''),
|
|
389
421
|
escapeCSVNumber(node.properties.startLine, -1),
|
|
390
422
|
escapeCSVNumber(node.properties.endLine, -1),
|
|
391
|
-
escapeCSVField(
|
|
423
|
+
escapeCSVField(wireContent),
|
|
424
|
+
escapeCSVField(tag),
|
|
392
425
|
escapeCSVField(node.properties.description || ''),
|
|
393
426
|
].join(','));
|
|
394
427
|
}
|