@codragraph/cli 1.6.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +34 -0
  2. package/dist/cli/analyze.d.ts +22 -0
  3. package/dist/cli/analyze.js +107 -4
  4. package/dist/cli/compress-stats.d.ts +29 -0
  5. package/dist/cli/compress-stats.js +97 -0
  6. package/dist/cli/graphstore.d.ts +6 -2
  7. package/dist/cli/graphstore.js +24 -2
  8. package/dist/cli/index.js +16 -2
  9. package/dist/cli/profile-heap.d.ts +35 -0
  10. package/dist/cli/profile-heap.js +126 -0
  11. package/dist/cli/setup.d.ts +13 -0
  12. package/dist/cli/setup.js +22 -11
  13. package/dist/cli/skill-gen.d.ts +14 -2
  14. package/dist/cli/skill-gen.js +52 -19
  15. package/dist/cli/tool.js +4 -0
  16. package/dist/core/embeddings/embedding-pipeline.js +24 -7
  17. package/dist/core/group/bridge-db.js +111 -24
  18. package/dist/core/lbug/content-read.d.ts +46 -0
  19. package/dist/core/lbug/content-read.js +64 -0
  20. package/dist/core/lbug/csv-generator.d.ts +2 -6
  21. package/dist/core/lbug/csv-generator.js +45 -12
  22. package/dist/core/lbug/lbug-adapter.d.ts +4 -1
  23. package/dist/core/lbug/lbug-adapter.js +153 -21
  24. package/dist/core/lbug/schema.d.ts +7 -7
  25. package/dist/core/lbug/schema.js +18 -0
  26. package/dist/core/run-analyze.d.ts +13 -0
  27. package/dist/core/run-analyze.js +91 -4
  28. package/dist/core/search/bm25-index.js +67 -15
  29. package/dist/mcp/local/local-backend.js +22 -5
  30. package/dist/server/api.js +4 -3
  31. package/dist/storage/repo-manager.d.ts +39 -0
  32. package/dist/storage/repo-manager.js +19 -0
  33. package/hooks/claude/codragraph-hook.cjs +95 -2
  34. package/package.json +4 -4
  35. package/scripts/build-tree-sitter-proto.cjs +15 -3
  36. package/scripts/patch-tree-sitter-swift.cjs +17 -4
  37. package/skills/codragraph-api-surface.md +110 -0
  38. package/skills/codragraph-config-audit.md +146 -0
  39. package/skills/codragraph-cross-repo-impact.md +135 -0
  40. package/skills/codragraph-data-lineage.md +137 -0
  41. package/skills/codragraph-dead-code.md +119 -0
  42. package/skills/codragraph-gh-actions-debug.md +162 -0
  43. package/skills/codragraph-gh-issue-workflow.md +178 -0
  44. package/skills/codragraph-gh-pr-workflow.md +176 -0
  45. package/skills/codragraph-gh-release-workflow.md +187 -0
  46. package/skills/codragraph-git-bisect.md +176 -0
  47. package/skills/codragraph-git-force-push.md +147 -0
  48. package/skills/codragraph-git-history-rewrite.md +174 -0
  49. package/skills/codragraph-git-rebase-vs-merge.md +138 -0
  50. package/skills/codragraph-git-recovery.md +181 -0
  51. package/skills/codragraph-git-worktree.md +145 -0
  52. package/skills/codragraph-migration-tracking.md +130 -0
  53. package/skills/codragraph-notebook-context.md +136 -0
  54. package/skills/codragraph-observability-coverage.md +125 -0
  55. package/skills/codragraph-onboarding.md +129 -0
  56. package/skills/codragraph-perf-hotspots.md +132 -0
  57. package/skills/codragraph-project-switcher.md +116 -0
  58. package/skills/codragraph-security-audit.md +144 -0
  59. package/skills/codragraph-sql-tracing.md +122 -0
  60. package/skills/codragraph-supply-chain-audit.md +153 -0
  61. package/skills/codragraph-test-coverage.md +97 -0
package/dist/cli/setup.js CHANGED
@@ -519,12 +519,17 @@ async function installCodexSkills(result) {
519
519
  result.errors.push(`Codex skills: ${err.message}`);
520
520
  }
521
521
  }
522
- // ─── Main command ──────────────────────────────────────────────────
523
- export const setupCommand = async () => {
524
- console.log('');
525
- console.log(' CodraGraph Setup');
526
- console.log(' ==============');
527
- console.log('');
522
+ export const runSetup = async (options = {}) => {
523
+ if (options.compactHeader) {
524
+ console.log(' CodraGraph: first-run editor setup');
525
+ console.log('');
526
+ }
527
+ else {
528
+ console.log('');
529
+ console.log(' CodraGraph Setup');
530
+ console.log(' ==============');
531
+ console.log('');
532
+ }
528
533
  // Ensure global directory exists
529
534
  const globalDir = getGlobalDir();
530
535
  await fs.mkdir(globalDir, { recursive: true });
@@ -569,10 +574,16 @@ export const setupCommand = async () => {
569
574
  console.log(' Summary:');
570
575
  console.log(` MCP configured for: ${result.configured.filter((c) => !c.includes('skills')).join(', ') || 'none'}`);
571
576
  console.log(` Skills installed to: ${result.configured.filter((c) => c.includes('skills')).length > 0 ? result.configured.filter((c) => c.includes('skills')).join(', ') : 'none'}`);
577
+ if (!options.skipNextSteps) {
578
+ console.log('');
579
+ console.log(' Next steps:');
580
+ console.log(' 1. cd into any git repo');
581
+ console.log(' 2. Run: codragraph analyze');
582
+ console.log(' 3. Open the repo in your editor — MCP is ready!');
583
+ }
572
584
  console.log('');
573
- console.log(' Next steps:');
574
- console.log(' 1. cd into any git repo');
575
- console.log(' 2. Run: codragraph analyze');
576
- console.log(' 3. Open the repo in your editor — MCP is ready!');
577
- console.log('');
585
+ return result;
586
+ };
587
+ export const setupCommand = async () => {
588
+ await runSetup();
578
589
  };
@@ -13,14 +13,26 @@ export interface GeneratedSkillInfo {
13
13
  symbolCount: number;
14
14
  fileCount: number;
15
15
  }
16
+ /**
17
+ * Supported skill targets. Project-relative output paths mirror each editor's
18
+ * convention: Claude / Cursor use `skills/`, OpenCode uses `skill/` (singular)
19
+ * to match its global config layout, Codex uses `skills/`. The trailing
20
+ * `generated/` segment isolates auto-generated skills from human-authored ones.
21
+ */
22
+ export declare const SKILL_TARGETS: readonly ["claude", "cursor", "opencode", "codex"];
23
+ export type SkillTarget = (typeof SKILL_TARGETS)[number];
16
24
  /**
17
25
  * @brief Generate repo-specific skill files from detected communities
18
26
  * @param {string} repoPath - Absolute path to the repository root
19
27
  * @param {string} projectName - Human-readable project name
20
28
  * @param {PipelineResult} pipelineResult - In-memory pipeline data with communities, processes, graph
21
- * @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string }>} Generated skill metadata
29
+ * @param {SkillTarget[]} targets - Editor targets to emit to. Defaults to ['claude'].
30
+ * @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string, outputPaths: string[] }>}
31
+ * `outputPath` is the Claude path (or first target) for backwards compat;
32
+ * `outputPaths` lists every directory written to.
22
33
  */
23
- export declare const generateSkillFiles: (repoPath: string, projectName: string, pipelineResult: PipelineResult) => Promise<{
34
+ export declare const generateSkillFiles: (repoPath: string, projectName: string, pipelineResult: PipelineResult, targets?: SkillTarget[]) => Promise<{
24
35
  skills: GeneratedSkillInfo[];
25
36
  outputPath: string;
37
+ outputPaths: string[];
26
38
  }>;
@@ -8,6 +8,20 @@
8
8
  */
9
9
  import fs from 'fs/promises';
10
10
  import path from 'path';
11
+ import { estimateTokens } from './compress-stats.js';
12
+ /**
13
+ * Supported skill targets. Project-relative output paths mirror each editor's
14
+ * convention: Claude / Cursor use `skills/`, OpenCode uses `skill/` (singular)
15
+ * to match its global config layout, Codex uses `skills/`. The trailing
16
+ * `generated/` segment isolates auto-generated skills from human-authored ones.
17
+ */
18
+ export const SKILL_TARGETS = ['claude', 'cursor', 'opencode', 'codex'];
19
+ const SKILL_OUTPUT_DIRS = {
20
+ claude: ['.claude', 'skills', 'generated'],
21
+ cursor: ['.cursor', 'skills', 'generated'],
22
+ opencode: ['.opencode', 'skill', 'generated'],
23
+ codex: ['.codex', 'skills', 'generated'],
24
+ };
11
25
  // ============================================================================
12
26
  // MAIN EXPORT
13
27
  // ============================================================================
@@ -16,14 +30,24 @@ import path from 'path';
16
30
  * @param {string} repoPath - Absolute path to the repository root
17
31
  * @param {string} projectName - Human-readable project name
18
32
  * @param {PipelineResult} pipelineResult - In-memory pipeline data with communities, processes, graph
19
- * @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string }>} Generated skill metadata
33
+ * @param {SkillTarget[]} targets - Editor targets to emit to. Defaults to ['claude'].
34
+ * @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string, outputPaths: string[] }>}
35
+ * `outputPath` is the Claude path (or first target) for backwards compat;
36
+ * `outputPaths` lists every directory written to.
20
37
  */
21
- export const generateSkillFiles = async (repoPath, projectName, pipelineResult) => {
38
+ export const generateSkillFiles = async (repoPath, projectName, pipelineResult, targets = ['claude']) => {
22
39
  const { communityResult, processResult, graph } = pipelineResult;
23
- const outputDir = path.join(repoPath, '.claude', 'skills', 'generated');
40
+ // Resolve all output dirs once. The "primary" path is Claude (if requested)
41
+ // or the first target — kept for AGENTS.md / CLAUDE.md generators that link
42
+ // to skill files relative to .claude/.
43
+ const effectiveTargets = targets.length > 0 ? targets : ['claude'];
44
+ const outputDirs = effectiveTargets.map((t) => path.join(repoPath, ...SKILL_OUTPUT_DIRS[t]));
45
+ const primaryDir = effectiveTargets.includes('claude')
46
+ ? path.join(repoPath, ...SKILL_OUTPUT_DIRS.claude)
47
+ : outputDirs[0];
24
48
  if (!communityResult || !communityResult.memberships.length) {
25
49
  console.log('\n Skills: no communities detected, skipping skill generation');
26
- return { skills: [], outputPath: outputDir };
50
+ return { skills: [], outputPath: primaryDir, outputPaths: outputDirs };
27
51
  }
28
52
  console.log('\n Generating repo-specific skills...');
29
53
  // Step 1: Build communities from memberships (not the filtered communities array).
@@ -42,19 +66,21 @@ export const generateSkillFiles = async (repoPath, projectName, pipelineResult)
42
66
  .slice(0, 20);
43
67
  if (significant.length === 0) {
44
68
  console.log('\n Skills: no significant communities found (all below 3-symbol threshold)');
45
- return { skills: [], outputPath: outputDir };
69
+ return { skills: [], outputPath: primaryDir, outputPaths: outputDirs };
46
70
  }
47
71
  // Step 3: Build lookup maps
48
72
  const membershipsByComm = buildMembershipMap(communityResult.memberships);
49
73
  const nodeIdToCommunityLabel = buildNodeCommunityLabelMap(communityResult.memberships, communities);
50
- // Step 4: Clear and recreate output directory
51
- try {
52
- await fs.rm(outputDir, { recursive: true, force: true });
53
- }
54
- catch {
55
- /* may not exist */
74
+ // Step 4: Clear and recreate every output directory we'll write to
75
+ for (const dir of outputDirs) {
76
+ try {
77
+ await fs.rm(dir, { recursive: true, force: true });
78
+ }
79
+ catch {
80
+ /* may not exist */
81
+ }
82
+ await fs.mkdir(dir, { recursive: true });
56
83
  }
57
- await fs.mkdir(outputDir, { recursive: true });
58
84
  // Step 5: Generate skill files
59
85
  const skills = [];
60
86
  const usedNames = new Set();
@@ -76,10 +102,13 @@ export const generateSkillFiles = async (repoPath, projectName, pipelineResult)
76
102
  usedNames.add(kebabName);
77
103
  // Generate SKILL.md content
78
104
  const content = renderSkillMarkdown(community, projectName, members, files, entryPoints, flows, connections, kebabName);
79
- // Write file
80
- const skillDir = path.join(outputDir, kebabName);
81
- await fs.mkdir(skillDir, { recursive: true });
82
- await fs.writeFile(path.join(skillDir, 'SKILL.md'), content, 'utf-8');
105
+ // Write the same SKILL.md to each requested editor target
106
+ for (const dir of outputDirs) {
107
+ const skillDir = path.join(dir, kebabName);
108
+ await fs.mkdir(skillDir, { recursive: true });
109
+ await fs.writeFile(path.join(skillDir, 'SKILL.md'), content, 'utf-8');
110
+ }
111
+ const skillTokens = estimateTokens(content);
83
112
  const info = {
84
113
  name: kebabName,
85
114
  label: community.label,
@@ -87,10 +116,14 @@ export const generateSkillFiles = async (repoPath, projectName, pipelineResult)
87
116
  fileCount: files.length,
88
117
  };
89
118
  skills.push(info);
90
- console.log(` \u2713 ${community.label} (${community.symbolCount} symbols, ${files.length} files)`);
119
+ // Show the @codragraph/compress headline number per skill: how many
120
+ // tokens of distilled context this community boils down to.
121
+ console.log(` \u2713 ${community.label} (${community.symbolCount} symbols, ${files.length} files) ` +
122
+ `\u2192 ~${skillTokens.toLocaleString()} tokens`);
91
123
  }
92
- console.log(`\n ${skills.length} skills generated \u2192 .claude/skills/generated/`);
93
- return { skills, outputPath: outputDir };
124
+ const targetSummary = effectiveTargets.join(', ');
125
+ console.log(`\n ${skills.length} skills generated \u2192 ${targetSummary}`);
126
+ return { skills, outputPath: primaryDir, outputPaths: outputDirs };
94
127
  };
95
128
  // ============================================================================
96
129
  // FALLBACK COMMUNITY BUILDER
package/dist/cli/tool.js CHANGED
@@ -16,6 +16,7 @@
16
16
  */
17
17
  import { writeSync } from 'node:fs';
18
18
  import { LocalBackend } from '../mcp/local/local-backend.js';
19
+ import { emitTokenStats } from './compress-stats.js';
19
20
  let _backend = null;
20
21
  async function getBackend() {
21
22
  if (_backend)
@@ -68,6 +69,7 @@ export async function queryCommand(queryText, options) {
68
69
  repo: options?.repo,
69
70
  });
70
71
  output(result);
72
+ emitTokenStats(result);
71
73
  }
72
74
  export async function contextCommand(name, options) {
73
75
  if (!name?.trim() && !options?.uid) {
@@ -83,6 +85,7 @@ export async function contextCommand(name, options) {
83
85
  repo: options?.repo,
84
86
  });
85
87
  output(result);
88
+ emitTokenStats(result);
86
89
  }
87
90
  export async function impactCommand(target, options) {
88
91
  if (!target?.trim()) {
@@ -99,6 +102,7 @@ export async function impactCommand(target, options) {
99
102
  repo: options?.repo,
100
103
  });
101
104
  output(result);
105
+ emitTokenStats(result);
102
106
  }
103
107
  catch (err) {
104
108
  // Belt-and-suspenders: catch infrastructure failures (getBackend, callTool transport)
@@ -16,6 +16,7 @@ import { extractStructuralNames } from './structural-extractor.js';
16
16
  import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABEL_METHOD, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
17
17
  import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from '../lbug/schema.js';
18
18
  import { loadVectorExtension } from '../lbug/lbug-adapter.js';
19
+ import { decodeContentField } from '../lbug/content-read.js';
19
20
  const isDev = process.env.NODE_ENV === 'development';
20
21
  /**
21
22
  * Bump this when the embedding text template changes in a way that should
@@ -46,12 +47,17 @@ const queryEmbeddableNodes = async (executeQuery) => {
46
47
  for (const label of EMBEDDABLE_LABELS) {
47
48
  try {
48
49
  let query;
50
+ // RFC 0001 Phase 2: pull contentEncoding alongside content so we
51
+ // hand DECODED text to the embedder. Embedding compressed bytes
52
+ // would silently destroy semantic search quality without any
53
+ // visible error — decode is mandatory at this boundary.
49
54
  if (label === LABEL_METHOD) {
50
55
  // Method has parameterCount and returnType
51
56
  query = `
52
57
  MATCH (n:Method)
53
58
  RETURN n.id AS id, n.name AS name, 'Method' AS label,
54
59
  n.filePath AS filePath, n.content AS content,
60
+ n.contentEncoding AS contentEncoding,
55
61
  n.startLine AS startLine, n.endLine AS endLine,
56
62
  n.isExported AS isExported, n.description AS description,
57
63
  n.parameterCount AS parameterCount, n.returnType AS returnType
@@ -63,6 +69,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
63
69
  MATCH (n:\`${label}\`)
64
70
  RETURN n.id AS id, n.name AS name, '${label}' AS label,
65
71
  n.filePath AS filePath, n.content AS content,
72
+ n.contentEncoding AS contentEncoding,
66
73
  n.startLine AS startLine, n.endLine AS endLine,
67
74
  n.isExported AS isExported, n.description AS description
68
75
  `;
@@ -73,6 +80,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
73
80
  MATCH (n:\`${label}\`)
74
81
  RETURN n.id AS id, n.name AS name, '${label}' AS label,
75
82
  n.filePath AS filePath, n.content AS content,
83
+ n.contentEncoding AS contentEncoding,
76
84
  n.startLine AS startLine, n.endLine AS endLine,
77
85
  n.description AS description
78
86
  `;
@@ -80,20 +88,29 @@ const queryEmbeddableNodes = async (executeQuery) => {
80
88
  const rows = await executeQuery(query);
81
89
  for (const row of rows) {
82
90
  const hasExportedColumn = label === LABEL_METHOD || LABELS_WITH_EXPORTED.has(label);
91
+ // Column layout (every variant of the query above shares the
92
+ // first six positions; later columns differ by label):
93
+ // 0=id, 1=name, 2=label, 3=filePath,
94
+ // 4=content, 5=contentEncoding,
95
+ // 6=startLine, 7=endLine,
96
+ // 8=isExported (Method + LABELS_WITH_EXPORTED only)
97
+ // 8 or 9=description (depending on isExported presence)
98
+ // 10=parameterCount, 11=returnType (Method only)
99
+ const decoded = decodeContentField(row.content ?? row[4], row.contentEncoding ?? row[5]);
83
100
  allNodes.push({
84
101
  id: row.id ?? row[0],
85
102
  name: row.name ?? row[1],
86
103
  label: row.label ?? row[2],
87
104
  filePath: row.filePath ?? row[3],
88
- content: row.content ?? row[4] ?? '',
89
- startLine: row.startLine ?? row[5],
90
- endLine: row.endLine ?? row[6],
91
- isExported: hasExportedColumn ? (row.isExported ?? row[7]) : undefined,
92
- description: row.description ?? (hasExportedColumn ? row[8] : row[7]),
105
+ content: decoded ?? '',
106
+ startLine: row.startLine ?? row[6],
107
+ endLine: row.endLine ?? row[7],
108
+ isExported: hasExportedColumn ? (row.isExported ?? row[8]) : undefined,
109
+ description: row.description ?? (hasExportedColumn ? row[9] : row[8]),
93
110
  ...(label === LABEL_METHOD
94
111
  ? {
95
- parameterCount: row.parameterCount ?? row[9],
96
- returnType: row.returnType ?? row[10],
112
+ parameterCount: row.parameterCount ?? row[10],
113
+ returnType: row.returnType ?? row[11],
97
114
  }
98
115
  : {}),
99
116
  });
@@ -100,21 +100,93 @@ export async function ensureBridgeSchema(handle) {
100
100
  }
101
101
  }
102
102
  }
103
- export async function queryBridge(handle, cypher, params) {
103
+ /**
104
+ * Close every QueryResult / PreparedStatement before letting V8 GC them.
105
+ * Same close-order discipline as `core/lbug/lbug-adapter.ts:closeQueryResult`
106
+ * — leaking these handles past `conn.close()` corrupts LadybugDB's native
107
+ * file lock on Windows ("Error 33: The process cannot access the file
108
+ * because it is being used by another process") and segfaults on
109
+ * process exit elsewhere. Best-effort: wrap close calls in try/catch so
110
+ * a finalizer that already ran doesn't poison the queryBridge return.
111
+ */
112
+ async function closeBridgeHandle(h) {
113
+ if (!h)
114
+ return;
115
+ const candidates = Array.isArray(h) ? h : [h];
116
+ for (const r of candidates) {
117
+ try {
118
+ const close = r?.close;
119
+ if (typeof close === 'function')
120
+ await Promise.resolve(close.call(r));
121
+ }
122
+ catch {
123
+ /* best-effort */
124
+ }
125
+ }
126
+ }
127
+ /**
128
+ * True iff the error is a Windows-only transient file-lock surfaced by
129
+ * LadybugDB's native binding immediately after a writer process closes
130
+ * the same DB file. Symptom is `Error 33` on the read path even though
131
+ * `db.close()` returned cleanly at the JS layer — the kernel hasn't
132
+ * fully released the exclusive lock yet. Retrying with backoff is the
133
+ * documented workaround for this class of Windows-fs interactions.
134
+ */
135
+ function isTransientLbugLockError(err) {
136
+ const msg = err?.message ?? '';
137
+ return (msg.includes('Error 33') ||
138
+ msg.includes('locked a portion of the file') ||
139
+ msg.includes('cannot access the file because it is being used by another process'));
140
+ }
141
+ async function queryBridgeOnce(handle, cypher, params) {
104
142
  const conn = handle._conn;
105
143
  if (params && Object.keys(params).length > 0) {
106
144
  const stmt = await conn.prepare(cypher);
107
145
  if (!stmt.isSuccess()) {
108
146
  const errMsg = await stmt.getErrorMessage();
147
+ await closeBridgeHandle(stmt);
109
148
  throw new Error(`Bridge query prepare failed: ${errMsg}`);
110
149
  }
111
150
  const queryResult = await conn.execute(stmt, params);
112
151
  const result = unwrapQueryResult(queryResult);
113
- return (await result.getAll());
152
+ try {
153
+ return (await result.getAll());
154
+ }
155
+ finally {
156
+ await closeBridgeHandle(queryResult);
157
+ await closeBridgeHandle(stmt);
158
+ }
114
159
  }
115
160
  const queryResult = await conn.query(cypher);
116
161
  const result = unwrapQueryResult(queryResult);
117
- return (await result.getAll());
162
+ try {
163
+ return (await result.getAll());
164
+ }
165
+ finally {
166
+ await closeBridgeHandle(queryResult);
167
+ }
168
+ }
169
+ export async function queryBridge(handle, cypher, params) {
170
+ // Retry on Windows-transient file-lock errors. Reads issued through a
171
+ // freshly-opened readonly Database can race the writer's
172
+ // post-`db.close()` lock release on Windows + Node 22.14 (LadybugDB
173
+ // native binding holds the kernel lock briefly after the JS-level
174
+ // close returns). Backoff doubles per attempt up to ~3 s total — well
175
+ // below any user-visible CLI delay budget but enough to absorb a slow
176
+ // Windows kernel lock release.
177
+ const ATTEMPTS = 7;
178
+ for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
179
+ try {
180
+ return await queryBridgeOnce(handle, cypher, params);
181
+ }
182
+ catch (err) {
183
+ if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
184
+ throw err;
185
+ await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
186
+ }
187
+ }
188
+ // Unreachable: the loop either returns or throws on the last attempt.
189
+ throw new Error('queryBridge: retry loop exited unexpectedly');
118
190
  }
119
191
  /**
120
192
  * LadybugDB's `conn.query` / `conn.execute` can return either a single
@@ -421,32 +493,47 @@ export async function openBridgeDbReadOnly(groupDir) {
421
493
  // Open the native handle. If Connection construction throws AFTER
422
494
  // Database was successfully allocated, we'd leak the native Database
423
495
  // object. Wrap each step separately and tear down the partial handle.
424
- let db;
425
- let conn;
426
- try {
427
- db = new lbug.Database(dbPath, 0, false, true); // readOnly
428
- conn = new lbug.Connection(db);
429
- return { _db: db, _conn: conn, groupDir };
430
- }
431
- catch {
432
- if (conn) {
433
- try {
434
- await conn.close();
435
- }
436
- catch {
437
- /* ignore */
438
- }
496
+ //
497
+ // Retry on the Windows-transient lock error: the LadybugDB native
498
+ // binding holds the kernel file lock briefly past `db.close()` on
499
+ // Windows + Node 22.14, so a reader that races a recent writer can
500
+ // hit "Error 33: locked a portion of the file" on the constructor's
501
+ // first 4 KB header read. Backoff up to ~3 s lets the writer's lock
502
+ // age out — enough headroom for any normal write→read sequence
503
+ // without becoming a user-visible delay.
504
+ const ATTEMPTS = 7;
505
+ for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
506
+ let db;
507
+ let conn;
508
+ try {
509
+ db = new lbug.Database(dbPath, 0, false, true); // readOnly
510
+ conn = new lbug.Connection(db);
511
+ return { _db: db, _conn: conn, groupDir };
439
512
  }
440
- if (db) {
441
- try {
442
- await db.close();
513
+ catch (err) {
514
+ if (conn) {
515
+ try {
516
+ await conn.close();
517
+ }
518
+ catch {
519
+ /* ignore */
520
+ }
443
521
  }
444
- catch {
445
- /* ignore */
522
+ if (db) {
523
+ try {
524
+ await db.close();
525
+ }
526
+ catch {
527
+ /* ignore */
528
+ }
446
529
  }
530
+ if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
531
+ return null;
532
+ await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
533
+ continue;
447
534
  }
448
- return null;
449
535
  }
536
+ return null;
450
537
  }
451
538
  /* ------------------------------------------------------------------ */
452
539
  /* bridgeExists */
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Read-side decoder for `content` columns in lbug node rows.
3
+ *
4
+ * RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
5
+ * every node table that has `content`. Default is `'none'` (passthrough)
6
+ * so existing reads keep working unchanged. When a writer opts into
7
+ * `--compress brotli|zstd`, the column carries the encoding tag and the
8
+ * `content` column carries base64-encoded compressed bytes — readers
9
+ * MUST run those bytes back through `decodeContent` before handing them
10
+ * to a consumer (MCP tool result, HTTP API response, embedding model,
11
+ * LLM input).
12
+ *
13
+ * Centralizing the decode in one helper has two benefits:
14
+ * 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
15
+ * contentEncoding` to the Cypher RETURN, and pipe the row through
16
+ * `decodeContentField` (or `decodeContentRow`) at the boundary.
17
+ * 2. Anyone hunting for "where does the read path decode compressed
18
+ * bytes" greps for `decodeContentField` and gets every site in one
19
+ * shot — no per-table feature detection scattered across files.
20
+ */
21
+ /**
22
+ * Decode a single (content, contentEncoding) pair from a Cypher row.
23
+ *
24
+ * Returns the input content unchanged when:
25
+ * - the encoding is missing / empty / `'none'` (the common case for
26
+ * 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
27
+ * `--compress`);
28
+ * - content is null/undefined (caller decides whether that's an error);
29
+ * - content is not a string (pre-Phase-2 indexes never wrote non-string
30
+ * content, but defensive: don't crash a read path on a malformed row).
31
+ *
32
+ * Throws (via `decodeContent`) only when the row claims an encoding this
33
+ * CLI build can't decode — that's a forward-compat error and the right
34
+ * behavior is to fail loudly rather than return wrong content.
35
+ */
36
+ export declare function decodeContentField(content: unknown, encoding: unknown): string | undefined;
37
+ /**
38
+ * Apply `decodeContentField` to a row that carries `content` and
39
+ * `contentEncoding` keys (or their numeric column-index aliases).
40
+ *
41
+ * The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
42
+ * driver versions vary on whether named keys are populated, so existing
43
+ * read sites do `r.content ?? r[N]`. This helper accepts the same
44
+ * pattern. Returns a NEW object (does not mutate input).
45
+ */
46
+ export declare function decodeContentRow<T extends Record<string, unknown>>(row: T, contentKey?: keyof T, encodingKey?: keyof T): T;
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Read-side decoder for `content` columns in lbug node rows.
3
+ *
4
+ * RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
5
+ * every node table that has `content`. Default is `'none'` (passthrough)
6
+ * so existing reads keep working unchanged. When a writer opts into
7
+ * `--compress brotli|zstd`, the column carries the encoding tag and the
8
+ * `content` column carries base64-encoded compressed bytes — readers
9
+ * MUST run those bytes back through `decodeContent` before handing them
10
+ * to a consumer (MCP tool result, HTTP API response, embedding model,
11
+ * LLM input).
12
+ *
13
+ * Centralizing the decode in one helper has two benefits:
14
+ * 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
15
+ * contentEncoding` to the Cypher RETURN, and pipe the row through
16
+ * `decodeContentField` (or `decodeContentRow`) at the boundary.
17
+ * 2. Anyone hunting for "where does the read path decode compressed
18
+ * bytes" greps for `decodeContentField` and gets every site in one
19
+ * shot — no per-table feature detection scattered across files.
20
+ */
21
+ import { decodeContent } from '@codragraph/graphstore';
22
+ /**
23
+ * Decode a single (content, contentEncoding) pair from a Cypher row.
24
+ *
25
+ * Returns the input content unchanged when:
26
+ * - the encoding is missing / empty / `'none'` (the common case for
27
+ * 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
28
+ * `--compress`);
29
+ * - content is null/undefined (caller decides whether that's an error);
30
+ * - content is not a string (pre-Phase-2 indexes never wrote non-string
31
+ * content, but defensive: don't crash a read path on a malformed row).
32
+ *
33
+ * Throws (via `decodeContent`) only when the row claims an encoding this
34
+ * CLI build can't decode — that's a forward-compat error and the right
35
+ * behavior is to fail loudly rather than return wrong content.
36
+ */
37
+ export function decodeContentField(content, encoding) {
38
+ if (content === undefined || content === null)
39
+ return undefined;
40
+ if (typeof content !== 'string')
41
+ return content;
42
+ if (typeof encoding !== 'string' || encoding === '' || encoding === 'none') {
43
+ return content;
44
+ }
45
+ return decodeContent(content, encoding);
46
+ }
47
+ /**
48
+ * Apply `decodeContentField` to a row that carries `content` and
49
+ * `contentEncoding` keys (or their numeric column-index aliases).
50
+ *
51
+ * The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
52
+ * driver versions vary on whether named keys are populated, so existing
53
+ * read sites do `r.content ?? r[N]`. This helper accepts the same
54
+ * pattern. Returns a NEW object (does not mutate input).
55
+ */
56
+ export function decodeContentRow(row, contentKey = 'content', encodingKey = 'contentEncoding') {
57
+ const content = row[contentKey];
58
+ if (content === undefined || content === null)
59
+ return row;
60
+ const encoding = row[encodingKey];
61
+ if (typeof encoding !== 'string' || encoding === '' || encoding === 'none')
62
+ return row;
63
+ return { ...row, [contentKey]: decodeContentField(content, encoding) };
64
+ }
@@ -13,6 +13,7 @@
13
13
  */
14
14
  import { KnowledgeGraph } from '../graph/types.js';
15
15
  import { NodeTableName } from './schema.js';
16
+ import { type ContentEncoding } from '@codragraph/graphstore';
16
17
  export declare const sanitizeUTF8: (str: string) => string;
17
18
  export declare const escapeCSVField: (value: string | number | undefined | null) => string;
18
19
  export declare const escapeCSVNumber: (value: number | undefined | null, defaultValue?: number) => string;
@@ -25,9 +26,4 @@ export interface StreamedCSVResult {
25
26
  relCsvPath: string;
26
27
  relRows: number;
27
28
  }
28
- /**
29
- * Stream all CSV data directly to disk files.
30
- * Iterates graph nodes exactly ONCE — routes each node to the right writer.
31
- * File contents are lazy-read from disk with a generous LRU cache.
32
- */
33
- export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string) => Promise<StreamedCSVResult>;
29
+ export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, compress?: ContentEncoding) => Promise<StreamedCSVResult>;