npm - @codragraph/cli - Versions diffs - 1.6.4 → 2.0.0 - Mend

@codragraph/cli 1.6.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/README.md +34 -0
package/dist/cli/analyze.d.ts +22 -0
package/dist/cli/analyze.js +107 -4
package/dist/cli/compress-stats.d.ts +29 -0
package/dist/cli/compress-stats.js +97 -0
package/dist/cli/graphstore.d.ts +6 -2
package/dist/cli/graphstore.js +24 -2
package/dist/cli/index.js +16 -2
package/dist/cli/profile-heap.d.ts +35 -0
package/dist/cli/profile-heap.js +126 -0
package/dist/cli/setup.d.ts +13 -0
package/dist/cli/setup.js +22 -11
package/dist/cli/skill-gen.d.ts +14 -2
package/dist/cli/skill-gen.js +52 -19
package/dist/cli/tool.js +4 -0
package/dist/core/embeddings/embedding-pipeline.js +24 -7
package/dist/core/group/bridge-db.js +111 -24
package/dist/core/lbug/content-read.d.ts +46 -0
package/dist/core/lbug/content-read.js +64 -0
package/dist/core/lbug/csv-generator.d.ts +2 -6
package/dist/core/lbug/csv-generator.js +45 -12
package/dist/core/lbug/lbug-adapter.d.ts +4 -1
package/dist/core/lbug/lbug-adapter.js +153 -21
package/dist/core/lbug/schema.d.ts +7 -7
package/dist/core/lbug/schema.js +18 -0
package/dist/core/run-analyze.d.ts +13 -0
package/dist/core/run-analyze.js +91 -4
package/dist/core/search/bm25-index.js +67 -15
package/dist/mcp/local/local-backend.js +22 -5
package/dist/server/api.js +4 -3
package/dist/storage/repo-manager.d.ts +39 -0
package/dist/storage/repo-manager.js +19 -0
package/hooks/claude/codragraph-hook.cjs +95 -2
package/package.json +4 -4
package/scripts/build-tree-sitter-proto.cjs +15 -3
package/scripts/patch-tree-sitter-swift.cjs +17 -4
package/skills/codragraph-api-surface.md +110 -0
package/skills/codragraph-config-audit.md +146 -0
package/skills/codragraph-cross-repo-impact.md +135 -0
package/skills/codragraph-data-lineage.md +137 -0
package/skills/codragraph-dead-code.md +119 -0
package/skills/codragraph-gh-actions-debug.md +162 -0
package/skills/codragraph-gh-issue-workflow.md +178 -0
package/skills/codragraph-gh-pr-workflow.md +176 -0
package/skills/codragraph-gh-release-workflow.md +187 -0
package/skills/codragraph-git-bisect.md +176 -0
package/skills/codragraph-git-force-push.md +147 -0
package/skills/codragraph-git-history-rewrite.md +174 -0
package/skills/codragraph-git-rebase-vs-merge.md +138 -0
package/skills/codragraph-git-recovery.md +181 -0
package/skills/codragraph-git-worktree.md +145 -0
package/skills/codragraph-migration-tracking.md +130 -0
package/skills/codragraph-notebook-context.md +136 -0
package/skills/codragraph-observability-coverage.md +125 -0
package/skills/codragraph-onboarding.md +129 -0
package/skills/codragraph-perf-hotspots.md +132 -0
package/skills/codragraph-project-switcher.md +116 -0
package/skills/codragraph-security-audit.md +144 -0
package/skills/codragraph-sql-tracing.md +122 -0
package/skills/codragraph-supply-chain-audit.md +153 -0
package/skills/codragraph-test-coverage.md +97 -0

package/dist/cli/setup.js CHANGED Viewed

@@ -519,12 +519,17 @@ async function installCodexSkills(result) {
         result.errors.push(`Codex skills: ${err.message}`);
     }
 }
-// ─── Main command ──────────────────────────────────────────────────
-export const setupCommand = async () => {
-    console.log('');
-    console.log('  CodraGraph Setup');
-    console.log('  ==============');
-    console.log('');
+export const runSetup = async (options = {}) => {
+    if (options.compactHeader) {
+        console.log('  CodraGraph: first-run editor setup');
+        console.log('');
+    }
+    else {
+        console.log('');
+        console.log('  CodraGraph Setup');
+        console.log('  ==============');
+        console.log('');
+    }
     // Ensure global directory exists
     const globalDir = getGlobalDir();
     await fs.mkdir(globalDir, { recursive: true });
@@ -569,10 +574,16 @@ export const setupCommand = async () => {
     console.log('  Summary:');
     console.log(`    MCP configured for: ${result.configured.filter((c) => !c.includes('skills')).join(', ') || 'none'}`);
     console.log(`    Skills installed to: ${result.configured.filter((c) => c.includes('skills')).length > 0 ? result.configured.filter((c) => c.includes('skills')).join(', ') : 'none'}`);
+    if (!options.skipNextSteps) {
+        console.log('');
+        console.log('  Next steps:');
+        console.log('    1. cd into any git repo');
+        console.log('    2. Run: codragraph analyze');
+        console.log('    3. Open the repo in your editor — MCP is ready!');
+    }
     console.log('');
-    console.log('  Next steps:');
-    console.log('    1. cd into any git repo');
-    console.log('    2. Run: codragraph analyze');
-    console.log('    3. Open the repo in your editor — MCP is ready!');
-    console.log('');
+    return result;
+};
+export const setupCommand = async () => {
+    await runSetup();
 };

package/dist/cli/skill-gen.d.ts CHANGED Viewed

@@ -13,14 +13,26 @@ export interface GeneratedSkillInfo {
     symbolCount: number;
     fileCount: number;
 }
+/**
+ * Supported skill targets. Project-relative output paths mirror each editor's
+ * convention: Claude / Cursor use `skills/`, OpenCode uses `skill/` (singular)
+ * to match its global config layout, Codex uses `skills/`. The trailing
+ * `generated/` segment isolates auto-generated skills from human-authored ones.
+ */
+export declare const SKILL_TARGETS: readonly ["claude", "cursor", "opencode", "codex"];
+export type SkillTarget = (typeof SKILL_TARGETS)[number];
 /**
  * @brief Generate repo-specific skill files from detected communities
  * @param {string} repoPath - Absolute path to the repository root
  * @param {string} projectName - Human-readable project name
  * @param {PipelineResult} pipelineResult - In-memory pipeline data with communities, processes, graph
- * @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string }>} Generated skill metadata
+ * @param {SkillTarget[]} targets - Editor targets to emit to. Defaults to ['claude'].
+ * @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string, outputPaths: string[] }>}
+ *          `outputPath` is the Claude path (or first target) for backwards compat;
+ *          `outputPaths` lists every directory written to.
  */
-export declare const generateSkillFiles: (repoPath: string, projectName: string, pipelineResult: PipelineResult) => Promise<{
+export declare const generateSkillFiles: (repoPath: string, projectName: string, pipelineResult: PipelineResult, targets?: SkillTarget[]) => Promise<{
     skills: GeneratedSkillInfo[];
     outputPath: string;
+    outputPaths: string[];
 }>;

package/dist/cli/skill-gen.js CHANGED Viewed

@@ -8,6 +8,20 @@
  */
 import fs from 'fs/promises';
 import path from 'path';
+import { estimateTokens } from './compress-stats.js';
+/**
+ * Supported skill targets. Project-relative output paths mirror each editor's
+ * convention: Claude / Cursor use `skills/`, OpenCode uses `skill/` (singular)
+ * to match its global config layout, Codex uses `skills/`. The trailing
+ * `generated/` segment isolates auto-generated skills from human-authored ones.
+ */
+export const SKILL_TARGETS = ['claude', 'cursor', 'opencode', 'codex'];
+const SKILL_OUTPUT_DIRS = {
+    claude: ['.claude', 'skills', 'generated'],
+    cursor: ['.cursor', 'skills', 'generated'],
+    opencode: ['.opencode', 'skill', 'generated'],
+    codex: ['.codex', 'skills', 'generated'],
+};
 // ============================================================================
 // MAIN EXPORT
 // ============================================================================
@@ -16,14 +30,24 @@ import path from 'path';
  * @param {string} repoPath - Absolute path to the repository root
  * @param {string} projectName - Human-readable project name
  * @param {PipelineResult} pipelineResult - In-memory pipeline data with communities, processes, graph
- * @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string }>} Generated skill metadata
+ * @param {SkillTarget[]} targets - Editor targets to emit to. Defaults to ['claude'].
+ * @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string, outputPaths: string[] }>}
+ *          `outputPath` is the Claude path (or first target) for backwards compat;
+ *          `outputPaths` lists every directory written to.
  */
-export const generateSkillFiles = async (repoPath, projectName, pipelineResult) => {
+export const generateSkillFiles = async (repoPath, projectName, pipelineResult, targets = ['claude']) => {
     const { communityResult, processResult, graph } = pipelineResult;
-    const outputDir = path.join(repoPath, '.claude', 'skills', 'generated');
+    // Resolve all output dirs once. The "primary" path is Claude (if requested)
+    // or the first target — kept for AGENTS.md / CLAUDE.md generators that link
+    // to skill files relative to .claude/.
+    const effectiveTargets = targets.length > 0 ? targets : ['claude'];
+    const outputDirs = effectiveTargets.map((t) => path.join(repoPath, ...SKILL_OUTPUT_DIRS[t]));
+    const primaryDir = effectiveTargets.includes('claude')
+        ? path.join(repoPath, ...SKILL_OUTPUT_DIRS.claude)
+        : outputDirs[0];
     if (!communityResult || !communityResult.memberships.length) {
         console.log('\n  Skills: no communities detected, skipping skill generation');
-        return { skills: [], outputPath: outputDir };
+        return { skills: [], outputPath: primaryDir, outputPaths: outputDirs };
     }
     console.log('\n  Generating repo-specific skills...');
     // Step 1: Build communities from memberships (not the filtered communities array).
@@ -42,19 +66,21 @@ export const generateSkillFiles = async (repoPath, projectName, pipelineResult)
         .slice(0, 20);
     if (significant.length === 0) {
         console.log('\n  Skills: no significant communities found (all below 3-symbol threshold)');
-        return { skills: [], outputPath: outputDir };
+        return { skills: [], outputPath: primaryDir, outputPaths: outputDirs };
     }
     // Step 3: Build lookup maps
     const membershipsByComm = buildMembershipMap(communityResult.memberships);
     const nodeIdToCommunityLabel = buildNodeCommunityLabelMap(communityResult.memberships, communities);
-    // Step 4: Clear and recreate output directory
-    try {
-        await fs.rm(outputDir, { recursive: true, force: true });
-    }
-    catch {
-        /* may not exist */
+    // Step 4: Clear and recreate every output directory we'll write to
+    for (const dir of outputDirs) {
+        try {
+            await fs.rm(dir, { recursive: true, force: true });
+        }
+        catch {
+            /* may not exist */
+        }
+        await fs.mkdir(dir, { recursive: true });
     }
-    await fs.mkdir(outputDir, { recursive: true });
     // Step 5: Generate skill files
     const skills = [];
     const usedNames = new Set();
@@ -76,10 +102,13 @@ export const generateSkillFiles = async (repoPath, projectName, pipelineResult)
         usedNames.add(kebabName);
         // Generate SKILL.md content
         const content = renderSkillMarkdown(community, projectName, members, files, entryPoints, flows, connections, kebabName);
-        // Write file
-        const skillDir = path.join(outputDir, kebabName);
-        await fs.mkdir(skillDir, { recursive: true });
-        await fs.writeFile(path.join(skillDir, 'SKILL.md'), content, 'utf-8');
+        // Write the same SKILL.md to each requested editor target
+        for (const dir of outputDirs) {
+            const skillDir = path.join(dir, kebabName);
+            await fs.mkdir(skillDir, { recursive: true });
+            await fs.writeFile(path.join(skillDir, 'SKILL.md'), content, 'utf-8');
+        }
+        const skillTokens = estimateTokens(content);
         const info = {
             name: kebabName,
             label: community.label,
@@ -87,10 +116,14 @@ export const generateSkillFiles = async (repoPath, projectName, pipelineResult)
             fileCount: files.length,
         };
         skills.push(info);
-        console.log(`    \u2713 ${community.label} (${community.symbolCount} symbols, ${files.length} files)`);
+        // Show the @codragraph/compress headline number per skill: how many
+        // tokens of distilled context this community boils down to.
+        console.log(`    \u2713 ${community.label} (${community.symbolCount} symbols, ${files.length} files) ` +
+            `\u2192 ~${skillTokens.toLocaleString()} tokens`);
     }
-    console.log(`\n  ${skills.length} skills generated \u2192 .claude/skills/generated/`);
-    return { skills, outputPath: outputDir };
+    const targetSummary = effectiveTargets.join(', ');
+    console.log(`\n  ${skills.length} skills generated \u2192 ${targetSummary}`);
+    return { skills, outputPath: primaryDir, outputPaths: outputDirs };
 };
 // ============================================================================
 // FALLBACK COMMUNITY BUILDER

package/dist/cli/tool.js CHANGED Viewed

@@ -16,6 +16,7 @@
  */
 import { writeSync } from 'node:fs';
 import { LocalBackend } from '../mcp/local/local-backend.js';
+import { emitTokenStats } from './compress-stats.js';
 let _backend = null;
 async function getBackend() {
     if (_backend)
@@ -68,6 +69,7 @@ export async function queryCommand(queryText, options) {
         repo: options?.repo,
     });
     output(result);
+    emitTokenStats(result);
 }
 export async function contextCommand(name, options) {
     if (!name?.trim() && !options?.uid) {
@@ -83,6 +85,7 @@ export async function contextCommand(name, options) {
         repo: options?.repo,
     });
     output(result);
+    emitTokenStats(result);
 }
 export async function impactCommand(target, options) {
     if (!target?.trim()) {
@@ -99,6 +102,7 @@ export async function impactCommand(target, options) {
             repo: options?.repo,
         });
         output(result);
+        emitTokenStats(result);
     }
     catch (err) {
         // Belt-and-suspenders: catch infrastructure failures (getBackend, callTool transport)

package/dist/core/embeddings/embedding-pipeline.js CHANGED Viewed

@@ -16,6 +16,7 @@ import { extractStructuralNames } from './structural-extractor.js';
 import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABEL_METHOD, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
 import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from '../lbug/schema.js';
 import { loadVectorExtension } from '../lbug/lbug-adapter.js';
+import { decodeContentField } from '../lbug/content-read.js';
 const isDev = process.env.NODE_ENV === 'development';
 /**
  * Bump this when the embedding text template changes in a way that should
@@ -46,12 +47,17 @@ const queryEmbeddableNodes = async (executeQuery) => {
     for (const label of EMBEDDABLE_LABELS) {
         try {
             let query;
+            // RFC 0001 Phase 2: pull contentEncoding alongside content so we
+            // hand DECODED text to the embedder. Embedding compressed bytes
+            // would silently destroy semantic search quality without any
+            // visible error — decode is mandatory at this boundary.
             if (label === LABEL_METHOD) {
                 // Method has parameterCount and returnType
                 query = `
           MATCH (n:Method)
           RETURN n.id AS id, n.name AS name, 'Method' AS label,
                  n.filePath AS filePath, n.content AS content,
+                 n.contentEncoding AS contentEncoding,
                  n.startLine AS startLine, n.endLine AS endLine,
                  n.isExported AS isExported, n.description AS description,
                  n.parameterCount AS parameterCount, n.returnType AS returnType
@@ -63,6 +69,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
           MATCH (n:\`${label}\`)
           RETURN n.id AS id, n.name AS name, '${label}' AS label,
                  n.filePath AS filePath, n.content AS content,
+                 n.contentEncoding AS contentEncoding,
                  n.startLine AS startLine, n.endLine AS endLine,
                  n.isExported AS isExported, n.description AS description
         `;
@@ -73,6 +80,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
           MATCH (n:\`${label}\`)
           RETURN n.id AS id, n.name AS name, '${label}' AS label,
                  n.filePath AS filePath, n.content AS content,
+                 n.contentEncoding AS contentEncoding,
                  n.startLine AS startLine, n.endLine AS endLine,
                  n.description AS description
         `;
@@ -80,20 +88,29 @@ const queryEmbeddableNodes = async (executeQuery) => {
             const rows = await executeQuery(query);
             for (const row of rows) {
                 const hasExportedColumn = label === LABEL_METHOD || LABELS_WITH_EXPORTED.has(label);
+                // Column layout (every variant of the query above shares the
+                // first six positions; later columns differ by label):
+                //   0=id, 1=name, 2=label, 3=filePath,
+                //   4=content, 5=contentEncoding,
+                //   6=startLine, 7=endLine,
+                //   8=isExported  (Method + LABELS_WITH_EXPORTED only)
+                //   8 or 9=description (depending on isExported presence)
+                //   10=parameterCount, 11=returnType (Method only)
+                const decoded = decodeContentField(row.content ?? row[4], row.contentEncoding ?? row[5]);
                 allNodes.push({
                     id: row.id ?? row[0],
                     name: row.name ?? row[1],
                     label: row.label ?? row[2],
                     filePath: row.filePath ?? row[3],
-                    content: row.content ?? row[4] ?? '',
-                    startLine: row.startLine ?? row[5],
-                    endLine: row.endLine ?? row[6],
-                    isExported: hasExportedColumn ? (row.isExported ?? row[7]) : undefined,
-                    description: row.description ?? (hasExportedColumn ? row[8] : row[7]),
+                    content: decoded ?? '',
+                    startLine: row.startLine ?? row[6],
+                    endLine: row.endLine ?? row[7],
+                    isExported: hasExportedColumn ? (row.isExported ?? row[8]) : undefined,
+                    description: row.description ?? (hasExportedColumn ? row[9] : row[8]),
                     ...(label === LABEL_METHOD
                         ? {
-                            parameterCount: row.parameterCount ?? row[9],
-                            returnType: row.returnType ?? row[10],
+                            parameterCount: row.parameterCount ?? row[10],
+                            returnType: row.returnType ?? row[11],
                         }
                         : {}),
                 });

package/dist/core/group/bridge-db.js CHANGED Viewed

@@ -100,21 +100,93 @@ export async function ensureBridgeSchema(handle) {
         }
     }
 }
-export async function queryBridge(handle, cypher, params) {
+/**
+ * Close every QueryResult / PreparedStatement before letting V8 GC them.
+ * Same close-order discipline as `core/lbug/lbug-adapter.ts:closeQueryResult`
+ * — leaking these handles past `conn.close()` corrupts LadybugDB's native
+ * file lock on Windows ("Error 33: The process cannot access the file
+ * because it is being used by another process") and segfaults on
+ * process exit elsewhere. Best-effort: wrap close calls in try/catch so
+ * a finalizer that already ran doesn't poison the queryBridge return.
+ */
+async function closeBridgeHandle(h) {
+    if (!h)
+        return;
+    const candidates = Array.isArray(h) ? h : [h];
+    for (const r of candidates) {
+        try {
+            const close = r?.close;
+            if (typeof close === 'function')
+                await Promise.resolve(close.call(r));
+        }
+        catch {
+            /* best-effort */
+        }
+    }
+}
+/**
+ * True iff the error is a Windows-only transient file-lock surfaced by
+ * LadybugDB's native binding immediately after a writer process closes
+ * the same DB file. Symptom is `Error 33` on the read path even though
+ * `db.close()` returned cleanly at the JS layer — the kernel hasn't
+ * fully released the exclusive lock yet. Retrying with backoff is the
+ * documented workaround for this class of Windows-fs interactions.
+ */
+function isTransientLbugLockError(err) {
+    const msg = err?.message ?? '';
+    return (msg.includes('Error 33') ||
+        msg.includes('locked a portion of the file') ||
+        msg.includes('cannot access the file because it is being used by another process'));
+}
+async function queryBridgeOnce(handle, cypher, params) {
     const conn = handle._conn;
     if (params && Object.keys(params).length > 0) {
         const stmt = await conn.prepare(cypher);
         if (!stmt.isSuccess()) {
             const errMsg = await stmt.getErrorMessage();
+            await closeBridgeHandle(stmt);
             throw new Error(`Bridge query prepare failed: ${errMsg}`);
         }
         const queryResult = await conn.execute(stmt, params);
         const result = unwrapQueryResult(queryResult);
-        return (await result.getAll());
+        try {
+            return (await result.getAll());
+        }
+        finally {
+            await closeBridgeHandle(queryResult);
+            await closeBridgeHandle(stmt);
+        }
     }
     const queryResult = await conn.query(cypher);
     const result = unwrapQueryResult(queryResult);
-    return (await result.getAll());
+    try {
+        return (await result.getAll());
+    }
+    finally {
+        await closeBridgeHandle(queryResult);
+    }
+}
+export async function queryBridge(handle, cypher, params) {
+    // Retry on Windows-transient file-lock errors. Reads issued through a
+    // freshly-opened readonly Database can race the writer's
+    // post-`db.close()` lock release on Windows + Node 22.14 (LadybugDB
+    // native binding holds the kernel lock briefly after the JS-level
+    // close returns). Backoff doubles per attempt up to ~3 s total — well
+    // below any user-visible CLI delay budget but enough to absorb a slow
+    // Windows kernel lock release.
+    const ATTEMPTS = 7;
+    for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
+        try {
+            return await queryBridgeOnce(handle, cypher, params);
+        }
+        catch (err) {
+            if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
+                throw err;
+            await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
+        }
+    }
+    // Unreachable: the loop either returns or throws on the last attempt.
+    throw new Error('queryBridge: retry loop exited unexpectedly');
 }
 /**
  * LadybugDB's `conn.query` / `conn.execute` can return either a single
@@ -421,32 +493,47 @@ export async function openBridgeDbReadOnly(groupDir) {
     // Open the native handle. If Connection construction throws AFTER
     // Database was successfully allocated, we'd leak the native Database
     // object. Wrap each step separately and tear down the partial handle.
-    let db;
-    let conn;
-    try {
-        db = new lbug.Database(dbPath, 0, false, true); // readOnly
-        conn = new lbug.Connection(db);
-        return { _db: db, _conn: conn, groupDir };
-    }
-    catch {
-        if (conn) {
-            try {
-                await conn.close();
-            }
-            catch {
-                /* ignore */
-            }
+    //
+    // Retry on the Windows-transient lock error: the LadybugDB native
+    // binding holds the kernel file lock briefly past `db.close()` on
+    // Windows + Node 22.14, so a reader that races a recent writer can
+    // hit "Error 33: locked a portion of the file" on the constructor's
+    // first 4 KB header read. Backoff up to ~3 s lets the writer's lock
+    // age out — enough headroom for any normal write→read sequence
+    // without becoming a user-visible delay.
+    const ATTEMPTS = 7;
+    for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
+        let db;
+        let conn;
+        try {
+            db = new lbug.Database(dbPath, 0, false, true); // readOnly
+            conn = new lbug.Connection(db);
+            return { _db: db, _conn: conn, groupDir };
         }
-        if (db) {
-            try {
-                await db.close();
+        catch (err) {
+            if (conn) {
+                try {
+                    await conn.close();
+                }
+                catch {
+                    /* ignore */
+                }
             }
-            catch {
-                /* ignore */
+            if (db) {
+                try {
+                    await db.close();
+                }
+                catch {
+                    /* ignore */
+                }
             }
+            if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
+                return null;
+            await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
+            continue;
         }
-        return null;
     }
+    return null;
 }
 /* ------------------------------------------------------------------ */
 /*  bridgeExists                                                       */

package/dist/core/lbug/content-read.d.ts ADDED Viewed

@@ -0,0 +1,46 @@
+/**
+ * Read-side decoder for `content` columns in lbug node rows.
+ *
+ * RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
+ * every node table that has `content`. Default is `'none'` (passthrough)
+ * so existing reads keep working unchanged. When a writer opts into
+ * `--compress brotli|zstd`, the column carries the encoding tag and the
+ * `content` column carries base64-encoded compressed bytes — readers
+ * MUST run those bytes back through `decodeContent` before handing them
+ * to a consumer (MCP tool result, HTTP API response, embedding model,
+ * LLM input).
+ *
+ * Centralizing the decode in one helper has two benefits:
+ *   1. Shim sites are 2-line changes: add `, n.contentEncoding AS
+ *      contentEncoding` to the Cypher RETURN, and pipe the row through
+ *      `decodeContentField` (or `decodeContentRow`) at the boundary.
+ *   2. Anyone hunting for "where does the read path decode compressed
+ *      bytes" greps for `decodeContentField` and gets every site in one
+ *      shot — no per-table feature detection scattered across files.
+ */
+/**
+ * Decode a single (content, contentEncoding) pair from a Cypher row.
+ *
+ * Returns the input content unchanged when:
+ *   - the encoding is missing / empty / `'none'` (the common case for
+ *     1.6.x – 1.7.x indexes, plus any 1.8+ index written without
+ *     `--compress`);
+ *   - content is null/undefined (caller decides whether that's an error);
+ *   - content is not a string (pre-Phase-2 indexes never wrote non-string
+ *     content, but defensive: don't crash a read path on a malformed row).
+ *
+ * Throws (via `decodeContent`) only when the row claims an encoding this
+ * CLI build can't decode — that's a forward-compat error and the right
+ * behavior is to fail loudly rather than return wrong content.
+ */
+export declare function decodeContentField(content: unknown, encoding: unknown): string | undefined;
+/**
+ * Apply `decodeContentField` to a row that carries `content` and
+ * `contentEncoding` keys (or their numeric column-index aliases).
+ *
+ * The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
+ * driver versions vary on whether named keys are populated, so existing
+ * read sites do `r.content ?? r[N]`. This helper accepts the same
+ * pattern. Returns a NEW object (does not mutate input).
+ */
+export declare function decodeContentRow<T extends Record<string, unknown>>(row: T, contentKey?: keyof T, encodingKey?: keyof T): T;

package/dist/core/lbug/content-read.js ADDED Viewed

@@ -0,0 +1,64 @@
+/**
+ * Read-side decoder for `content` columns in lbug node rows.
+ *
+ * RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
+ * every node table that has `content`. Default is `'none'` (passthrough)
+ * so existing reads keep working unchanged. When a writer opts into
+ * `--compress brotli|zstd`, the column carries the encoding tag and the
+ * `content` column carries base64-encoded compressed bytes — readers
+ * MUST run those bytes back through `decodeContent` before handing them
+ * to a consumer (MCP tool result, HTTP API response, embedding model,
+ * LLM input).
+ *
+ * Centralizing the decode in one helper has two benefits:
+ *   1. Shim sites are 2-line changes: add `, n.contentEncoding AS
+ *      contentEncoding` to the Cypher RETURN, and pipe the row through
+ *      `decodeContentField` (or `decodeContentRow`) at the boundary.
+ *   2. Anyone hunting for "where does the read path decode compressed
+ *      bytes" greps for `decodeContentField` and gets every site in one
+ *      shot — no per-table feature detection scattered across files.
+ */
+import { decodeContent } from '@codragraph/graphstore';
+/**
+ * Decode a single (content, contentEncoding) pair from a Cypher row.
+ *
+ * Returns the input content unchanged when:
+ *   - the encoding is missing / empty / `'none'` (the common case for
+ *     1.6.x – 1.7.x indexes, plus any 1.8+ index written without
+ *     `--compress`);
+ *   - content is null/undefined (caller decides whether that's an error);
+ *   - content is not a string (pre-Phase-2 indexes never wrote non-string
+ *     content, but defensive: don't crash a read path on a malformed row).
+ *
+ * Throws (via `decodeContent`) only when the row claims an encoding this
+ * CLI build can't decode — that's a forward-compat error and the right
+ * behavior is to fail loudly rather than return wrong content.
+ */
+export function decodeContentField(content, encoding) {
+    if (content === undefined || content === null)
+        return undefined;
+    if (typeof content !== 'string')
+        return content;
+    if (typeof encoding !== 'string' || encoding === '' || encoding === 'none') {
+        return content;
+    }
+    return decodeContent(content, encoding);
+}
+/**
+ * Apply `decodeContentField` to a row that carries `content` and
+ * `contentEncoding` keys (or their numeric column-index aliases).
+ *
+ * The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
+ * driver versions vary on whether named keys are populated, so existing
+ * read sites do `r.content ?? r[N]`. This helper accepts the same
+ * pattern. Returns a NEW object (does not mutate input).
+ */
+export function decodeContentRow(row, contentKey = 'content', encodingKey = 'contentEncoding') {
+    const content = row[contentKey];
+    if (content === undefined || content === null)
+        return row;
+    const encoding = row[encodingKey];
+    if (typeof encoding !== 'string' || encoding === '' || encoding === 'none')
+        return row;
+    return { ...row, [contentKey]: decodeContentField(content, encoding) };
+}

package/dist/core/lbug/csv-generator.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@
  */
 import { KnowledgeGraph } from '../graph/types.js';
 import { NodeTableName } from './schema.js';
+import { type ContentEncoding } from '@codragraph/graphstore';
 export declare const sanitizeUTF8: (str: string) => string;
 export declare const escapeCSVField: (value: string | number | undefined | null) => string;
 export declare const escapeCSVNumber: (value: number | undefined | null, defaultValue?: number) => string;
@@ -25,9 +26,4 @@ export interface StreamedCSVResult {
     relCsvPath: string;
     relRows: number;
 }
-/**
- * Stream all CSV data directly to disk files.
- * Iterates graph nodes exactly ONCE — routes each node to the right writer.
- * File contents are lazy-read from disk with a generous LRU cache.
- */
-export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string) => Promise<StreamedCSVResult>;
+export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, compress?: ContentEncoding) => Promise<StreamedCSVResult>;