npm - @codragraph/cli - Versions diffs - 1.6.3 → 2.0.0 - Mend

@codragraph/cli 1.6.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/README.md +50 -16
package/dist/cli/ai-context.js +2 -2
package/dist/cli/analyze.d.ts +22 -0
package/dist/cli/analyze.js +111 -8
package/dist/cli/compress-stats.d.ts +29 -0
package/dist/cli/compress-stats.js +97 -0
package/dist/cli/graphstore.d.ts +6 -2
package/dist/cli/graphstore.js +24 -2
package/dist/cli/index.js +17 -6
package/dist/cli/profile-heap.d.ts +35 -0
package/dist/cli/profile-heap.js +126 -0
package/dist/cli/setup.d.ts +13 -0
package/dist/cli/setup.js +75 -29
package/dist/cli/skill-gen.d.ts +14 -2
package/dist/cli/skill-gen.js +53 -20
package/dist/cli/tool.js +4 -0
package/dist/config/ignore-service.js +1 -1
package/dist/core/embeddings/embedding-pipeline.js +24 -7
package/dist/core/group/bridge-db.js +111 -24
package/dist/core/group/extractors/grpc-patterns/proto.js +1 -12
package/dist/core/ingestion/call-processor.js +2 -2
package/dist/core/ingestion/cobol/cobol-preprocessor.js +1 -1
package/dist/core/ingestion/cobol/jcl-parser.d.ts +1 -1
package/dist/core/ingestion/cobol/jcl-parser.js +1 -1
package/dist/core/ingestion/cobol-processor.d.ts +1 -1
package/dist/core/ingestion/cobol-processor.js +1 -1
package/dist/core/ingestion/heritage-extractors/generic.js +1 -1
package/dist/core/ingestion/heritage-processor.js +1 -1
package/dist/core/ingestion/import-processor.js +1 -1
package/dist/core/ingestion/mro-processor.js +1 -1
package/dist/core/ingestion/parsing-processor.js +1 -1
package/dist/core/ingestion/type-extractors/c-cpp.js +1 -1
package/dist/core/ingestion/type-extractors/python.js +1 -1
package/dist/core/ingestion/type-extractors/shared.js +0 -3
package/dist/core/lbug/content-read.d.ts +46 -0
package/dist/core/lbug/content-read.js +64 -0
package/dist/core/lbug/csv-generator.d.ts +2 -6
package/dist/core/lbug/csv-generator.js +45 -12
package/dist/core/lbug/lbug-adapter.d.ts +4 -1
package/dist/core/lbug/lbug-adapter.js +157 -25
package/dist/core/lbug/pool-adapter.js +51 -44
package/dist/core/lbug/schema.d.ts +7 -7
package/dist/core/lbug/schema.js +18 -0
package/dist/core/run-analyze.d.ts +13 -0
package/dist/core/run-analyze.js +91 -4
package/dist/core/search/bm25-index.js +153 -12
package/dist/core/wiki/generator.js +4 -4
package/dist/mcp/local/local-backend.js +22 -5
package/dist/mcp/resources.js +2 -3
package/dist/server/api.js +4 -3
package/dist/storage/repo-manager.d.ts +39 -0
package/dist/storage/repo-manager.js +19 -0
package/hooks/claude/codragraph-hook.cjs +108 -5
package/hooks/claude/pre-tool-use.sh +6 -1
package/package.json +4 -4
package/scripts/build-tree-sitter-proto.cjs +15 -3
package/scripts/patch-tree-sitter-swift.cjs +17 -4
package/skills/codragraph-api-surface.md +110 -0
package/skills/codragraph-cli.md +5 -5
package/skills/codragraph-config-audit.md +146 -0
package/skills/codragraph-cross-repo-impact.md +135 -0
package/skills/codragraph-data-lineage.md +137 -0
package/skills/codragraph-dead-code.md +119 -0
package/skills/codragraph-debugging.md +1 -1
package/skills/codragraph-exploring.md +1 -1
package/skills/codragraph-gh-actions-debug.md +162 -0
package/skills/codragraph-gh-issue-workflow.md +178 -0
package/skills/codragraph-gh-pr-workflow.md +176 -0
package/skills/codragraph-gh-release-workflow.md +187 -0
package/skills/codragraph-git-bisect.md +176 -0
package/skills/codragraph-git-force-push.md +147 -0
package/skills/codragraph-git-history-rewrite.md +174 -0
package/skills/codragraph-git-rebase-vs-merge.md +138 -0
package/skills/codragraph-git-recovery.md +181 -0
package/skills/codragraph-git-worktree.md +145 -0
package/skills/codragraph-guide.md +1 -1
package/skills/codragraph-impact-analysis.md +1 -1
package/skills/codragraph-migration-tracking.md +130 -0
package/skills/codragraph-notebook-context.md +136 -0
package/skills/codragraph-observability-coverage.md +125 -0
package/skills/codragraph-onboarding.md +129 -0
package/skills/codragraph-perf-hotspots.md +132 -0
package/skills/codragraph-pr-review.md +1 -1
package/skills/codragraph-project-switcher.md +116 -0
package/skills/codragraph-refactoring.md +1 -1
package/skills/codragraph-security-audit.md +144 -0
package/skills/codragraph-sql-tracing.md +122 -0
package/skills/codragraph-supply-chain-audit.md +153 -0
package/skills/codragraph-test-coverage.md +97 -0

package/dist/core/group/bridge-db.js CHANGED Viewed

@@ -100,21 +100,93 @@ export async function ensureBridgeSchema(handle) {
         }
     }
 }
-export async function queryBridge(handle, cypher, params) {
+/**
+ * Close every QueryResult / PreparedStatement before letting V8 GC them.
+ * Same close-order discipline as `core/lbug/lbug-adapter.ts:closeQueryResult`
+ * — leaking these handles past `conn.close()` corrupts LadybugDB's native
+ * file lock on Windows ("Error 33: The process cannot access the file
+ * because it is being used by another process") and segfaults on
+ * process exit elsewhere. Best-effort: wrap close calls in try/catch so
+ * a finalizer that already ran doesn't poison the queryBridge return.
+ */
+async function closeBridgeHandle(h) {
+    if (!h)
+        return;
+    const candidates = Array.isArray(h) ? h : [h];
+    for (const r of candidates) {
+        try {
+            const close = r?.close;
+            if (typeof close === 'function')
+                await Promise.resolve(close.call(r));
+        }
+        catch {
+            /* best-effort */
+        }
+    }
+}
+/**
+ * True iff the error is a Windows-only transient file-lock surfaced by
+ * LadybugDB's native binding immediately after a writer process closes
+ * the same DB file. Symptom is `Error 33` on the read path even though
+ * `db.close()` returned cleanly at the JS layer — the kernel hasn't
+ * fully released the exclusive lock yet. Retrying with backoff is the
+ * documented workaround for this class of Windows-fs interactions.
+ */
+function isTransientLbugLockError(err) {
+    const msg = err?.message ?? '';
+    return (msg.includes('Error 33') ||
+        msg.includes('locked a portion of the file') ||
+        msg.includes('cannot access the file because it is being used by another process'));
+}
+async function queryBridgeOnce(handle, cypher, params) {
     const conn = handle._conn;
     if (params && Object.keys(params).length > 0) {
         const stmt = await conn.prepare(cypher);
         if (!stmt.isSuccess()) {
             const errMsg = await stmt.getErrorMessage();
+            await closeBridgeHandle(stmt);
             throw new Error(`Bridge query prepare failed: ${errMsg}`);
         }
         const queryResult = await conn.execute(stmt, params);
         const result = unwrapQueryResult(queryResult);
-        return (await result.getAll());
+        try {
+            return (await result.getAll());
+        }
+        finally {
+            await closeBridgeHandle(queryResult);
+            await closeBridgeHandle(stmt);
+        }
     }
     const queryResult = await conn.query(cypher);
     const result = unwrapQueryResult(queryResult);
-    return (await result.getAll());
+    try {
+        return (await result.getAll());
+    }
+    finally {
+        await closeBridgeHandle(queryResult);
+    }
+}
+export async function queryBridge(handle, cypher, params) {
+    // Retry on Windows-transient file-lock errors. Reads issued through a
+    // freshly-opened readonly Database can race the writer's
+    // post-`db.close()` lock release on Windows + Node 22.14 (LadybugDB
+    // native binding holds the kernel lock briefly after the JS-level
+    // close returns). Backoff doubles per attempt up to ~3 s total — well
+    // below any user-visible CLI delay budget but enough to absorb a slow
+    // Windows kernel lock release.
+    const ATTEMPTS = 7;
+    for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
+        try {
+            return await queryBridgeOnce(handle, cypher, params);
+        }
+        catch (err) {
+            if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
+                throw err;
+            await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
+        }
+    }
+    // Unreachable: the loop either returns or throws on the last attempt.
+    throw new Error('queryBridge: retry loop exited unexpectedly');
 }
 /**
  * LadybugDB's `conn.query` / `conn.execute` can return either a single
@@ -421,32 +493,47 @@ export async function openBridgeDbReadOnly(groupDir) {
     // Open the native handle. If Connection construction throws AFTER
     // Database was successfully allocated, we'd leak the native Database
     // object. Wrap each step separately and tear down the partial handle.
-    let db;
-    let conn;
-    try {
-        db = new lbug.Database(dbPath, 0, false, true); // readOnly
-        conn = new lbug.Connection(db);
-        return { _db: db, _conn: conn, groupDir };
-    }
-    catch {
-        if (conn) {
-            try {
-                await conn.close();
-            }
-            catch {
-                /* ignore */
-            }
+    //
+    // Retry on the Windows-transient lock error: the LadybugDB native
+    // binding holds the kernel file lock briefly past `db.close()` on
+    // Windows + Node 22.14, so a reader that races a recent writer can
+    // hit "Error 33: locked a portion of the file" on the constructor's
+    // first 4 KB header read. Backoff up to ~3 s lets the writer's lock
+    // age out — enough headroom for any normal write→read sequence
+    // without becoming a user-visible delay.
+    const ATTEMPTS = 7;
+    for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
+        let db;
+        let conn;
+        try {
+            db = new lbug.Database(dbPath, 0, false, true); // readOnly
+            conn = new lbug.Connection(db);
+            return { _db: db, _conn: conn, groupDir };
         }
-        if (db) {
-            try {
-                await db.close();
+        catch (err) {
+            if (conn) {
+                try {
+                    await conn.close();
+                }
+                catch {
+                    /* ignore */
+                }
             }
-            catch {
-                /* ignore */
+            if (db) {
+                try {
+                    await db.close();
+                }
+                catch {
+                    /* ignore */
+                }
             }
+            if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
+                return null;
+            await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
+            continue;
         }
-        return null;
     }
+    return null;
 }
 /* ------------------------------------------------------------------ */
 /*  bridgeExists                                                       */

package/dist/core/group/extractors/grpc-patterns/proto.js CHANGED Viewed

@@ -31,7 +31,6 @@ if (ProtoGrammar) {
         // test runners (vitest forks) when SyntaxNode isn't fully initialized
         // yet. Catching that here ensures `PROTO_GRPC_PLUGIN` stays null and
         // the orchestrator falls back to the manual parser.
-        // eslint-disable-next-line @typescript-eslint/no-explicit-any
         const _Parser = _require('tree-sitter');
         // Smoke-test: parse + setLanguage to verify the grammar is
         // end-to-end compatible with this tree-sitter runtime.
@@ -72,24 +71,14 @@ if (ProtoGrammar) {
     }
 }
 function buildPlugin() {
-    if (!ProtoGrammar || !PACKAGE_PATTERNS || !SERVICE_PATTERNS)
+    if (!ProtoGrammar || !SERVICE_PATTERNS)
         return null;
-    const pkgPatterns = PACKAGE_PATTERNS;
     const svcPatterns = SERVICE_PATTERNS;
     return {
         name: 'proto-grpc',
         language: ProtoGrammar,
         scan(tree) {
             const out = [];
-            // Extract `package` declaration (first match wins).
-            let pkg = '';
-            for (const match of runCompiledPatterns(pkgPatterns, tree)) {
-                const pkgNode = match.captures.pkg;
-                if (pkgNode) {
-                    pkg = pkgNode.text;
-                    break;
-                }
-            }
             // Extract `service → rpc` pairs. The query returns one match per
             // (service, rpc) combination thanks to the nested structure.
             for (const match of runCompiledPatterns(svcPatterns, tree)) {

package/dist/core/ingestion/call-processor.js CHANGED Viewed

@@ -616,7 +616,7 @@ importedRawReturnTypesMap, heritageMap, bindingAccumulator) => {
                     bufferSize: getTreeSitterBufferSize(file.content.length),
                 });
             }
-            catch (parseError) {
+            catch (_parseError) {
                 continue;
             }
             astCache.set(file.path, tree);
@@ -704,7 +704,7 @@ importedRawReturnTypesMap, heritageMap, bindingAccumulator) => {
     // loop above, so verifyConstructorBindings sees all provider bindings
     // regardless of file processing order.
     for (let i = 0; i < prepared.length; i++) {
-        const { file, language, provider, tree, matches, parentMap, typeEnv } = prepared[i];
+        const { file, language, provider, tree: _tree, matches, parentMap, typeEnv } = prepared[i];
         enclosingFnExtractCache.clear();
         onProgress?.(i + 1, files.length);
         if (i % 20 === 0)

package/dist/core/ingestion/cobol/cobol-preprocessor.js CHANGED Viewed

@@ -1404,7 +1404,7 @@ export function extractCobolSymbolsWithRegex(content, _filePath) {
         if (anonRedefMatch) {
             // Check it's truly anonymous: the second capture is not a valid data name
             // followed by more clauses — it's the REDEFINES target directly after level
-            const level = parseInt(anonRedefMatch[1], 10);
+            const _level = parseInt(anonRedefMatch[1], 10);
             // Only skip if this is genuinely "NN REDEFINES target" with no name between
             // We detect this by checking the full data item regex does NOT match
             // (because RE_DATA_ITEM expects a name before any clauses)

package/dist/core/ingestion/cobol/jcl-parser.d.ts CHANGED Viewed

@@ -65,4 +65,4 @@ export interface JclParseResults {
  * @param filePath - Path for diagnostics (not used in extraction)
  * @returns Parsed JCL results
  */
-export declare function parseJcl(content: string, filePath: string): JclParseResults;
+export declare function parseJcl(content: string, _filePath: string): JclParseResults;

package/dist/core/ingestion/cobol/jcl-parser.js CHANGED Viewed

@@ -73,7 +73,7 @@ function extractDisp(params) {
  * @param filePath - Path for diagnostics (not used in extraction)
  * @returns Parsed JCL results
  */
-export function parseJcl(content, filePath) {
+export function parseJcl(content, _filePath) {
     const results = {
         jobs: [],
         steps: [],

package/dist/core/ingestion/cobol-processor.d.ts CHANGED Viewed

@@ -50,5 +50,5 @@ export declare function isJclFile(filePath: string): boolean;
  * @param allPathSet - Set of all file paths in the repository
  * @returns Summary of what was extracted
  */
-export declare const processCobol: (graph: KnowledgeGraph, files: CobolFile[], allPathSet: ReadonlySet<string>) => CobolProcessResult;
+export declare const processCobol: (graph: KnowledgeGraph, files: CobolFile[], _allPathSet: ReadonlySet<string>) => CobolProcessResult;
 export {};

package/dist/core/ingestion/cobol-processor.js CHANGED Viewed

@@ -47,7 +47,7 @@ function isCopybook(filePath) {
  * @param allPathSet - Set of all file paths in the repository
  * @returns Summary of what was extracted
  */
-export const processCobol = (graph, files, allPathSet) => {
+export const processCobol = (graph, files, _allPathSet) => {
     const result = {
         programs: 0,
         paragraphs: 0,

package/dist/core/ingestion/heritage-extractors/generic.js CHANGED Viewed

@@ -12,7 +12,7 @@ export function createHeritageExtractor(config) {
     const callNameSet = actualConfig.callBasedHeritage?.callNames;
     return {
         language: actualConfig.language,
-        extract(captureMap, context) {
+        extract(captureMap, _context) {
             const classNode = captureMap['heritage.class'];
             if (!classNode)
                 return [];

package/dist/core/ingestion/heritage-processor.js CHANGED Viewed

@@ -151,7 +151,7 @@ export const processHeritage = async (graph, files, astCache, ctx, onProgress) =
                     bufferSize: getTreeSitterBufferSize(file.content.length),
                 });
             }
-            catch (parseError) {
+            catch (_parseError) {
                 // Skip files that can't be parsed
                 continue;
             }

package/dist/core/ingestion/import-processor.js CHANGED Viewed

@@ -245,7 +245,7 @@ export const processImports = async (graph, files, astCache, ctx, onProgress, re
                     bufferSize: getTreeSitterBufferSize(file.content.length),
                 });
             }
-            catch (parseError) {
+            catch (_parseError) {
                 continue;
             }
             wasReparsed = true;

package/dist/core/ingestion/mro-processor.js CHANGED Viewed

@@ -316,7 +316,7 @@ function parameterTypesMatch(a, b, aParamCount, bParamCount) {
  */
 function emitMethodImplementsEdges(graph, parentMap, methodMap, parentEdgeType, ancestorsMap, edgeTypesMap) {
     let edgeCount = 0;
-    for (const [classId, parentIds] of parentMap) {
+    for (const [classId, _parentIds] of parentMap) {
         const classNode = graph.getNode(classId);
         if (!classNode)
             continue;

package/dist/core/ingestion/parsing-processor.js CHANGED Viewed

@@ -273,7 +273,7 @@ const processParsingSequential = async (graph, files, symbolTable, astCache, sco
                 bufferSize: getTreeSitterBufferSize(parseContent.length),
             });
         }
-        catch (parseError) {
+        catch (_parseError) {
             console.warn(`Skipping unparseable file: ${file.path}`);
             continue;
         }

package/dist/core/ingestion/type-extractors/c-cpp.js CHANGED Viewed

@@ -479,7 +479,7 @@ const inferLiteralType = (node) => {
 };
 /** C++: detect constructor type from smart pointer factory calls (make_shared<Dog>()).
  *  Extracts the template type argument as the constructor type for virtual dispatch. */
-const detectCppConstructorType = (node, classNames) => {
+const detectCppConstructorType = (node, _classNames) => {
     // Navigate to the initializer value in the declaration
     const declarator = node.childForFieldName('declarator');
     const initDecl = declarator?.type === 'init_declarator' ? declarator : undefined;

package/dist/core/ingestion/type-extractors/python.js CHANGED Viewed

@@ -149,7 +149,7 @@ const scanConstructorBinding = (node) => {
 };
 const FOR_LOOP_NODE_TYPES = new Set(['for_statement']);
 /** Python function/method node types that carry a parameters list. */
-const PY_FUNCTION_NODE_TYPES = new Set(['function_definition', 'decorated_definition']);
+const _PY_FUNCTION_NODE_TYPES = new Set(['function_definition', 'decorated_definition']);
 /**
  * Extract element type from a Python type annotation AST node.
  * Handles:

package/dist/core/ingestion/type-extractors/shared.js CHANGED Viewed

@@ -564,16 +564,13 @@ export function extractElementTypeFromString(typeStr, pos = 'last') {
     const openAngle = typeStr.indexOf('<');
     const openSquare = typeStr.indexOf('[');
     let openIdx = -1;
-    let openChar = '';
     let closeChar = '';
     if (openAngle >= 0 && (openSquare < 0 || openAngle < openSquare)) {
         openIdx = openAngle;
-        openChar = '<';
         closeChar = '>';
     }
     else if (openSquare >= 0) {
         openIdx = openSquare;
-        openChar = '[';
         closeChar = ']';
     }
     if (openIdx < 0)

package/dist/core/lbug/content-read.d.ts ADDED Viewed

@@ -0,0 +1,46 @@
+/**
+ * Read-side decoder for `content` columns in lbug node rows.
+ *
+ * RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
+ * every node table that has `content`. Default is `'none'` (passthrough)
+ * so existing reads keep working unchanged. When a writer opts into
+ * `--compress brotli|zstd`, the column carries the encoding tag and the
+ * `content` column carries base64-encoded compressed bytes — readers
+ * MUST run those bytes back through `decodeContent` before handing them
+ * to a consumer (MCP tool result, HTTP API response, embedding model,
+ * LLM input).
+ *
+ * Centralizing the decode in one helper has two benefits:
+ *   1. Shim sites are 2-line changes: add `, n.contentEncoding AS
+ *      contentEncoding` to the Cypher RETURN, and pipe the row through
+ *      `decodeContentField` (or `decodeContentRow`) at the boundary.
+ *   2. Anyone hunting for "where does the read path decode compressed
+ *      bytes" greps for `decodeContentField` and gets every site in one
+ *      shot — no per-table feature detection scattered across files.
+ */
+/**
+ * Decode a single (content, contentEncoding) pair from a Cypher row.
+ *
+ * Returns the input content unchanged when:
+ *   - the encoding is missing / empty / `'none'` (the common case for
+ *     1.6.x – 1.7.x indexes, plus any 1.8+ index written without
+ *     `--compress`);
+ *   - content is null/undefined (caller decides whether that's an error);
+ *   - content is not a string (pre-Phase-2 indexes never wrote non-string
+ *     content, but defensive: don't crash a read path on a malformed row).
+ *
+ * Throws (via `decodeContent`) only when the row claims an encoding this
+ * CLI build can't decode — that's a forward-compat error and the right
+ * behavior is to fail loudly rather than return wrong content.
+ */
+export declare function decodeContentField(content: unknown, encoding: unknown): string | undefined;
+/**
+ * Apply `decodeContentField` to a row that carries `content` and
+ * `contentEncoding` keys (or their numeric column-index aliases).
+ *
+ * The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
+ * driver versions vary on whether named keys are populated, so existing
+ * read sites do `r.content ?? r[N]`. This helper accepts the same
+ * pattern. Returns a NEW object (does not mutate input).
+ */
+export declare function decodeContentRow<T extends Record<string, unknown>>(row: T, contentKey?: keyof T, encodingKey?: keyof T): T;

package/dist/core/lbug/content-read.js ADDED Viewed

@@ -0,0 +1,64 @@
+/**
+ * Read-side decoder for `content` columns in lbug node rows.
+ *
+ * RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
+ * every node table that has `content`. Default is `'none'` (passthrough)
+ * so existing reads keep working unchanged. When a writer opts into
+ * `--compress brotli|zstd`, the column carries the encoding tag and the
+ * `content` column carries base64-encoded compressed bytes — readers
+ * MUST run those bytes back through `decodeContent` before handing them
+ * to a consumer (MCP tool result, HTTP API response, embedding model,
+ * LLM input).
+ *
+ * Centralizing the decode in one helper has two benefits:
+ *   1. Shim sites are 2-line changes: add `, n.contentEncoding AS
+ *      contentEncoding` to the Cypher RETURN, and pipe the row through
+ *      `decodeContentField` (or `decodeContentRow`) at the boundary.
+ *   2. Anyone hunting for "where does the read path decode compressed
+ *      bytes" greps for `decodeContentField` and gets every site in one
+ *      shot — no per-table feature detection scattered across files.
+ */
+import { decodeContent } from '@codragraph/graphstore';
+/**
+ * Decode a single (content, contentEncoding) pair from a Cypher row.
+ *
+ * Returns the input content unchanged when:
+ *   - the encoding is missing / empty / `'none'` (the common case for
+ *     1.6.x – 1.7.x indexes, plus any 1.8+ index written without
+ *     `--compress`);
+ *   - content is null/undefined (caller decides whether that's an error);
+ *   - content is not a string (pre-Phase-2 indexes never wrote non-string
+ *     content, but defensive: don't crash a read path on a malformed row).
+ *
+ * Throws (via `decodeContent`) only when the row claims an encoding this
+ * CLI build can't decode — that's a forward-compat error and the right
+ * behavior is to fail loudly rather than return wrong content.
+ */
+export function decodeContentField(content, encoding) {
+    if (content === undefined || content === null)
+        return undefined;
+    if (typeof content !== 'string')
+        return content;
+    if (typeof encoding !== 'string' || encoding === '' || encoding === 'none') {
+        return content;
+    }
+    return decodeContent(content, encoding);
+}
+/**
+ * Apply `decodeContentField` to a row that carries `content` and
+ * `contentEncoding` keys (or their numeric column-index aliases).
+ *
+ * The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
+ * driver versions vary on whether named keys are populated, so existing
+ * read sites do `r.content ?? r[N]`. This helper accepts the same
+ * pattern. Returns a NEW object (does not mutate input).
+ */
+export function decodeContentRow(row, contentKey = 'content', encodingKey = 'contentEncoding') {
+    const content = row[contentKey];
+    if (content === undefined || content === null)
+        return row;
+    const encoding = row[encodingKey];
+    if (typeof encoding !== 'string' || encoding === '' || encoding === 'none')
+        return row;
+    return { ...row, [contentKey]: decodeContentField(content, encoding) };
+}

package/dist/core/lbug/csv-generator.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@
  */
 import { KnowledgeGraph } from '../graph/types.js';
 import { NodeTableName } from './schema.js';
+import { type ContentEncoding } from '@codragraph/graphstore';
 export declare const sanitizeUTF8: (str: string) => string;
 export declare const escapeCSVField: (value: string | number | undefined | null) => string;
 export declare const escapeCSVNumber: (value: number | undefined | null, defaultValue?: number) => string;
@@ -25,9 +26,4 @@ export interface StreamedCSVResult {
     relCsvPath: string;
     relRows: number;
 }
-/**
- * Stream all CSV data directly to disk files.
- * Iterates graph nodes exactly ONCE — routes each node to the right writer.
- * File contents are lazy-read from disk with a generous LRU cache.
- */
-export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string) => Promise<StreamedCSVResult>;
+export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, compress?: ContentEncoding) => Promise<StreamedCSVResult>;

package/dist/core/lbug/csv-generator.js CHANGED Viewed

@@ -14,6 +14,7 @@
 import fs from 'fs/promises';
 import { createWriteStream } from 'fs';
 import path from 'path';
+import { encodeContent } from '@codragraph/graphstore';
 /** Flush buffered rows to disk every N rows */
 const FLUSH_EVERY = 500;
 // ============================================================================
@@ -184,7 +185,26 @@ class BufferedCSVWriter {
  * Iterates graph nodes exactly ONCE — routes each node to the right writer.
  * File contents are lazy-read from disk with a generous LRU cache.
  */
-export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
+/**
+ * Apply RFC 0001 Phase 2 content encoding. Returns the on-the-wire string
+ * + the encoding tag to write into the per-row `contentEncoding` column.
+ *
+ * `compress: undefined | 'none'` is the default — content goes through
+ * unchanged and the tag is `'none'` (matches the schema DEFAULT, so older
+ * readers and the schema-default behavior stay in agreement).
+ *
+ * Always writing the tag column (even as 'none') keeps the CSV / COPY /
+ * schema layouts uniform regardless of compression mode. The wasted bytes
+ * are negligible — a few characters per row vs the kilobytes of content
+ * the column is alongside.
+ */
+const applyEncoding = (content, compress) => {
+    if (!compress || compress === 'none') {
+        return { wireContent: content, tag: 'none' };
+    }
+    return { wireContent: encodeContent(content, compress), tag: compress };
+};
+export const streamAllCSVsToDisk = async (graph, repoPath, csvDir, compress) => {
     // Remove stale CSVs from previous crashed runs, then recreate
     try {
         await fs.rm(csvDir, { recursive: true, force: true });
@@ -196,26 +216,29 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
     const prevMax = process.getMaxListeners();
     process.setMaxListeners(prevMax + 40);
     const contentCache = new FileContentCache(repoPath);
-    // Create writers for every node type up-front
-    const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content');
+    // Create writers for every node type up-front. Content-bearing tables
+    // carry an extra `contentEncoding` column right after `content` to
+    // match the Phase 2 schema layout. Tables without `content` (Folder,
+    // Community, Process, Route, Tool) are unchanged.
+    const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content,contentEncoding');
     const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath');
-    const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,description';
+    const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description';
     const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader);
     const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader);
     const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader);
-    const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,description,parameterCount,returnType';
+    const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,contentEncoding,description,parameterCount,returnType';
     const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), methodHeader);
     const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader);
     const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount');
     const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId');
     // Section nodes have an extra 'level' column
-    const sectionWriter = new BufferedCSVWriter(path.join(csvDir, 'section.csv'), 'id,name,filePath,startLine,endLine,level,content,description');
+    const sectionWriter = new BufferedCSVWriter(path.join(csvDir, 'section.csv'), 'id,name,filePath,startLine,endLine,level,content,contentEncoding,description');
     // Route nodes for API endpoint mapping
     const routeWriter = new BufferedCSVWriter(path.join(csvDir, 'route.csv'), 'id,name,filePath,responseKeys,errorKeys,middleware');
     // Tool nodes for MCP tool definitions
     const toolWriter = new BufferedCSVWriter(path.join(csvDir, 'tool.csv'), 'id,name,filePath,description');
     // Multi-language node types share the same CSV shape (no isExported column)
-    const multiLangHeader = 'id,name,filePath,startLine,endLine,content,description';
+    const multiLangHeader = 'id,name,filePath,startLine,endLine,content,contentEncoding,description';
     const MULTI_LANG_TYPES = [
         'Struct',
         'Enum',
@@ -259,11 +282,13 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
         switch (node.label) {
             case 'File': {
                 const content = await extractContent(node, contentCache);
+                const { wireContent, tag } = applyEncoding(content, compress);
                 await fileWriter.addRow([
                     escapeCSVField(node.id),
                     escapeCSVField(node.properties.name || ''),
                     escapeCSVField(node.properties.filePath || ''),
-                    escapeCSVField(content),
+                    escapeCSVField(wireContent),
+                    escapeCSVField(tag),
                 ].join(','));
                 break;
             }
@@ -306,6 +331,7 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
             }
             case 'Method': {
                 const content = await extractContent(node, contentCache);
+                const { wireContent, tag } = applyEncoding(content, compress);
                 await methodWriter.addRow([
                     escapeCSVField(node.id),
                     escapeCSVField(node.properties.name || ''),
@@ -313,7 +339,8 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
                     escapeCSVNumber(node.properties.startLine, -1),
                     escapeCSVNumber(node.properties.endLine, -1),
                     node.properties.isExported ? 'true' : 'false',
-                    escapeCSVField(content),
+                    escapeCSVField(wireContent),
+                    escapeCSVField(tag),
                     escapeCSVField(node.properties.description || ''),
                     escapeCSVNumber(node.properties.parameterCount, 0),
                     escapeCSVField(node.properties.returnType || ''),
@@ -322,6 +349,7 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
             }
             case 'Section': {
                 const content = await extractContent(node, contentCache);
+                const { wireContent, tag } = applyEncoding(content, compress);
                 await sectionWriter.addRow([
                     escapeCSVField(node.id),
                     escapeCSVField(node.properties.name || ''),
@@ -329,7 +357,8 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
                     escapeCSVNumber(node.properties.startLine, -1),
                     escapeCSVNumber(node.properties.endLine, -1),
                     escapeCSVNumber(node.properties.level, 1),
-                    escapeCSVField(content),
+                    escapeCSVField(wireContent),
+                    escapeCSVField(tag),
                     escapeCSVField(node.properties.description || ''),
                 ].join(','));
                 break;
@@ -366,6 +395,7 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
                 const writer = codeWriterMap[node.label];
                 if (writer) {
                     const content = await extractContent(node, contentCache);
+                    const { wireContent, tag } = applyEncoding(content, compress);
                     await writer.addRow([
                         escapeCSVField(node.id),
                         escapeCSVField(node.properties.name || ''),
@@ -373,7 +403,8 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
                         escapeCSVNumber(node.properties.startLine, -1),
                         escapeCSVNumber(node.properties.endLine, -1),
                         node.properties.isExported ? 'true' : 'false',
-                        escapeCSVField(content),
+                        escapeCSVField(wireContent),
+                        escapeCSVField(tag),
                         escapeCSVField(node.properties.description || ''),
                     ].join(','));
                 }
@@ -382,13 +413,15 @@ export const streamAllCSVsToDisk = async (graph, repoPath, csvDir) => {
                     const mlWriter = multiLangWriters.get(node.label);
                     if (mlWriter) {
                         const content = await extractContent(node, contentCache);
+                        const { wireContent, tag } = applyEncoding(content, compress);
                         await mlWriter.addRow([
                             escapeCSVField(node.id),
                             escapeCSVField(node.properties.name || ''),
                             escapeCSVField(node.properties.filePath || ''),
                             escapeCSVNumber(node.properties.startLine, -1),
                             escapeCSVNumber(node.properties.endLine, -1),
-                            escapeCSVField(content),
+                            escapeCSVField(wireContent),
+                            escapeCSVField(tag),
                             escapeCSVField(node.properties.description || ''),
                         ].join(','));
                     }