@codragraph/cli 1.6.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/cli/analyze.d.ts +22 -0
- package/dist/cli/analyze.js +107 -4
- package/dist/cli/compress-stats.d.ts +29 -0
- package/dist/cli/compress-stats.js +97 -0
- package/dist/cli/graphstore.d.ts +6 -2
- package/dist/cli/graphstore.js +24 -2
- package/dist/cli/index.js +16 -2
- package/dist/cli/profile-heap.d.ts +35 -0
- package/dist/cli/profile-heap.js +126 -0
- package/dist/cli/setup.d.ts +13 -0
- package/dist/cli/setup.js +22 -11
- package/dist/cli/skill-gen.d.ts +14 -2
- package/dist/cli/skill-gen.js +52 -19
- package/dist/cli/tool.js +4 -0
- package/dist/core/embeddings/embedding-pipeline.js +24 -7
- package/dist/core/group/bridge-db.js +111 -24
- package/dist/core/lbug/content-read.d.ts +46 -0
- package/dist/core/lbug/content-read.js +64 -0
- package/dist/core/lbug/csv-generator.d.ts +2 -6
- package/dist/core/lbug/csv-generator.js +45 -12
- package/dist/core/lbug/lbug-adapter.d.ts +4 -1
- package/dist/core/lbug/lbug-adapter.js +153 -21
- package/dist/core/lbug/schema.d.ts +7 -7
- package/dist/core/lbug/schema.js +18 -0
- package/dist/core/run-analyze.d.ts +13 -0
- package/dist/core/run-analyze.js +91 -4
- package/dist/core/search/bm25-index.js +67 -15
- package/dist/mcp/local/local-backend.js +22 -5
- package/dist/server/api.js +4 -3
- package/dist/storage/repo-manager.d.ts +39 -0
- package/dist/storage/repo-manager.js +19 -0
- package/hooks/claude/codragraph-hook.cjs +95 -2
- package/package.json +4 -4
- package/scripts/build-tree-sitter-proto.cjs +15 -3
- package/scripts/patch-tree-sitter-swift.cjs +17 -4
- package/skills/codragraph-api-surface.md +110 -0
- package/skills/codragraph-config-audit.md +146 -0
- package/skills/codragraph-cross-repo-impact.md +135 -0
- package/skills/codragraph-data-lineage.md +137 -0
- package/skills/codragraph-dead-code.md +119 -0
- package/skills/codragraph-gh-actions-debug.md +162 -0
- package/skills/codragraph-gh-issue-workflow.md +178 -0
- package/skills/codragraph-gh-pr-workflow.md +176 -0
- package/skills/codragraph-gh-release-workflow.md +187 -0
- package/skills/codragraph-git-bisect.md +176 -0
- package/skills/codragraph-git-force-push.md +147 -0
- package/skills/codragraph-git-history-rewrite.md +174 -0
- package/skills/codragraph-git-rebase-vs-merge.md +138 -0
- package/skills/codragraph-git-recovery.md +181 -0
- package/skills/codragraph-git-worktree.md +145 -0
- package/skills/codragraph-migration-tracking.md +130 -0
- package/skills/codragraph-notebook-context.md +136 -0
- package/skills/codragraph-observability-coverage.md +125 -0
- package/skills/codragraph-onboarding.md +129 -0
- package/skills/codragraph-perf-hotspots.md +132 -0
- package/skills/codragraph-project-switcher.md +116 -0
- package/skills/codragraph-security-audit.md +144 -0
- package/skills/codragraph-sql-tracing.md +122 -0
- package/skills/codragraph-supply-chain-audit.md +153 -0
- package/skills/codragraph-test-coverage.md +97 -0
package/dist/cli/setup.js
CHANGED
|
@@ -519,12 +519,17 @@ async function installCodexSkills(result) {
|
|
|
519
519
|
result.errors.push(`Codex skills: ${err.message}`);
|
|
520
520
|
}
|
|
521
521
|
}
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
522
|
+
export const runSetup = async (options = {}) => {
|
|
523
|
+
if (options.compactHeader) {
|
|
524
|
+
console.log(' CodraGraph: first-run editor setup');
|
|
525
|
+
console.log('');
|
|
526
|
+
}
|
|
527
|
+
else {
|
|
528
|
+
console.log('');
|
|
529
|
+
console.log(' CodraGraph Setup');
|
|
530
|
+
console.log(' ==============');
|
|
531
|
+
console.log('');
|
|
532
|
+
}
|
|
528
533
|
// Ensure global directory exists
|
|
529
534
|
const globalDir = getGlobalDir();
|
|
530
535
|
await fs.mkdir(globalDir, { recursive: true });
|
|
@@ -569,10 +574,16 @@ export const setupCommand = async () => {
|
|
|
569
574
|
console.log(' Summary:');
|
|
570
575
|
console.log(` MCP configured for: ${result.configured.filter((c) => !c.includes('skills')).join(', ') || 'none'}`);
|
|
571
576
|
console.log(` Skills installed to: ${result.configured.filter((c) => c.includes('skills')).length > 0 ? result.configured.filter((c) => c.includes('skills')).join(', ') : 'none'}`);
|
|
577
|
+
if (!options.skipNextSteps) {
|
|
578
|
+
console.log('');
|
|
579
|
+
console.log(' Next steps:');
|
|
580
|
+
console.log(' 1. cd into any git repo');
|
|
581
|
+
console.log(' 2. Run: codragraph analyze');
|
|
582
|
+
console.log(' 3. Open the repo in your editor — MCP is ready!');
|
|
583
|
+
}
|
|
572
584
|
console.log('');
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
console.log('');
|
|
585
|
+
return result;
|
|
586
|
+
};
|
|
587
|
+
export const setupCommand = async () => {
|
|
588
|
+
await runSetup();
|
|
578
589
|
};
|
package/dist/cli/skill-gen.d.ts
CHANGED
|
@@ -13,14 +13,26 @@ export interface GeneratedSkillInfo {
|
|
|
13
13
|
symbolCount: number;
|
|
14
14
|
fileCount: number;
|
|
15
15
|
}
|
|
16
|
+
/**
|
|
17
|
+
* Supported skill targets. Project-relative output paths mirror each editor's
|
|
18
|
+
* convention: Claude / Cursor use `skills/`, OpenCode uses `skill/` (singular)
|
|
19
|
+
* to match its global config layout, Codex uses `skills/`. The trailing
|
|
20
|
+
* `generated/` segment isolates auto-generated skills from human-authored ones.
|
|
21
|
+
*/
|
|
22
|
+
export declare const SKILL_TARGETS: readonly ["claude", "cursor", "opencode", "codex"];
|
|
23
|
+
export type SkillTarget = (typeof SKILL_TARGETS)[number];
|
|
16
24
|
/**
|
|
17
25
|
* @brief Generate repo-specific skill files from detected communities
|
|
18
26
|
* @param {string} repoPath - Absolute path to the repository root
|
|
19
27
|
* @param {string} projectName - Human-readable project name
|
|
20
28
|
* @param {PipelineResult} pipelineResult - In-memory pipeline data with communities, processes, graph
|
|
21
|
-
* @
|
|
29
|
+
* @param {SkillTarget[]} targets - Editor targets to emit to. Defaults to ['claude'].
|
|
30
|
+
* @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string, outputPaths: string[] }>}
|
|
31
|
+
* `outputPath` is the Claude path (or first target) for backwards compat;
|
|
32
|
+
* `outputPaths` lists every directory written to.
|
|
22
33
|
*/
|
|
23
|
-
export declare const generateSkillFiles: (repoPath: string, projectName: string, pipelineResult: PipelineResult) => Promise<{
|
|
34
|
+
export declare const generateSkillFiles: (repoPath: string, projectName: string, pipelineResult: PipelineResult, targets?: SkillTarget[]) => Promise<{
|
|
24
35
|
skills: GeneratedSkillInfo[];
|
|
25
36
|
outputPath: string;
|
|
37
|
+
outputPaths: string[];
|
|
26
38
|
}>;
|
package/dist/cli/skill-gen.js
CHANGED
|
@@ -8,6 +8,20 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import fs from 'fs/promises';
|
|
10
10
|
import path from 'path';
|
|
11
|
+
import { estimateTokens } from './compress-stats.js';
|
|
12
|
+
/**
|
|
13
|
+
* Supported skill targets. Project-relative output paths mirror each editor's
|
|
14
|
+
* convention: Claude / Cursor use `skills/`, OpenCode uses `skill/` (singular)
|
|
15
|
+
* to match its global config layout, Codex uses `skills/`. The trailing
|
|
16
|
+
* `generated/` segment isolates auto-generated skills from human-authored ones.
|
|
17
|
+
*/
|
|
18
|
+
export const SKILL_TARGETS = ['claude', 'cursor', 'opencode', 'codex'];
|
|
19
|
+
const SKILL_OUTPUT_DIRS = {
|
|
20
|
+
claude: ['.claude', 'skills', 'generated'],
|
|
21
|
+
cursor: ['.cursor', 'skills', 'generated'],
|
|
22
|
+
opencode: ['.opencode', 'skill', 'generated'],
|
|
23
|
+
codex: ['.codex', 'skills', 'generated'],
|
|
24
|
+
};
|
|
11
25
|
// ============================================================================
|
|
12
26
|
// MAIN EXPORT
|
|
13
27
|
// ============================================================================
|
|
@@ -16,14 +30,24 @@ import path from 'path';
|
|
|
16
30
|
* @param {string} repoPath - Absolute path to the repository root
|
|
17
31
|
* @param {string} projectName - Human-readable project name
|
|
18
32
|
* @param {PipelineResult} pipelineResult - In-memory pipeline data with communities, processes, graph
|
|
19
|
-
* @
|
|
33
|
+
* @param {SkillTarget[]} targets - Editor targets to emit to. Defaults to ['claude'].
|
|
34
|
+
* @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string, outputPaths: string[] }>}
|
|
35
|
+
* `outputPath` is the Claude path (or first target) for backwards compat;
|
|
36
|
+
* `outputPaths` lists every directory written to.
|
|
20
37
|
*/
|
|
21
|
-
export const generateSkillFiles = async (repoPath, projectName, pipelineResult) => {
|
|
38
|
+
export const generateSkillFiles = async (repoPath, projectName, pipelineResult, targets = ['claude']) => {
|
|
22
39
|
const { communityResult, processResult, graph } = pipelineResult;
|
|
23
|
-
|
|
40
|
+
// Resolve all output dirs once. The "primary" path is Claude (if requested)
|
|
41
|
+
// or the first target — kept for AGENTS.md / CLAUDE.md generators that link
|
|
42
|
+
// to skill files relative to .claude/.
|
|
43
|
+
const effectiveTargets = targets.length > 0 ? targets : ['claude'];
|
|
44
|
+
const outputDirs = effectiveTargets.map((t) => path.join(repoPath, ...SKILL_OUTPUT_DIRS[t]));
|
|
45
|
+
const primaryDir = effectiveTargets.includes('claude')
|
|
46
|
+
? path.join(repoPath, ...SKILL_OUTPUT_DIRS.claude)
|
|
47
|
+
: outputDirs[0];
|
|
24
48
|
if (!communityResult || !communityResult.memberships.length) {
|
|
25
49
|
console.log('\n Skills: no communities detected, skipping skill generation');
|
|
26
|
-
return { skills: [], outputPath:
|
|
50
|
+
return { skills: [], outputPath: primaryDir, outputPaths: outputDirs };
|
|
27
51
|
}
|
|
28
52
|
console.log('\n Generating repo-specific skills...');
|
|
29
53
|
// Step 1: Build communities from memberships (not the filtered communities array).
|
|
@@ -42,19 +66,21 @@ export const generateSkillFiles = async (repoPath, projectName, pipelineResult)
|
|
|
42
66
|
.slice(0, 20);
|
|
43
67
|
if (significant.length === 0) {
|
|
44
68
|
console.log('\n Skills: no significant communities found (all below 3-symbol threshold)');
|
|
45
|
-
return { skills: [], outputPath:
|
|
69
|
+
return { skills: [], outputPath: primaryDir, outputPaths: outputDirs };
|
|
46
70
|
}
|
|
47
71
|
// Step 3: Build lookup maps
|
|
48
72
|
const membershipsByComm = buildMembershipMap(communityResult.memberships);
|
|
49
73
|
const nodeIdToCommunityLabel = buildNodeCommunityLabelMap(communityResult.memberships, communities);
|
|
50
|
-
// Step 4: Clear and recreate output directory
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
74
|
+
// Step 4: Clear and recreate every output directory we'll write to
|
|
75
|
+
for (const dir of outputDirs) {
|
|
76
|
+
try {
|
|
77
|
+
await fs.rm(dir, { recursive: true, force: true });
|
|
78
|
+
}
|
|
79
|
+
catch {
|
|
80
|
+
/* may not exist */
|
|
81
|
+
}
|
|
82
|
+
await fs.mkdir(dir, { recursive: true });
|
|
56
83
|
}
|
|
57
|
-
await fs.mkdir(outputDir, { recursive: true });
|
|
58
84
|
// Step 5: Generate skill files
|
|
59
85
|
const skills = [];
|
|
60
86
|
const usedNames = new Set();
|
|
@@ -76,10 +102,13 @@ export const generateSkillFiles = async (repoPath, projectName, pipelineResult)
|
|
|
76
102
|
usedNames.add(kebabName);
|
|
77
103
|
// Generate SKILL.md content
|
|
78
104
|
const content = renderSkillMarkdown(community, projectName, members, files, entryPoints, flows, connections, kebabName);
|
|
79
|
-
// Write
|
|
80
|
-
const
|
|
81
|
-
|
|
82
|
-
|
|
105
|
+
// Write the same SKILL.md to each requested editor target
|
|
106
|
+
for (const dir of outputDirs) {
|
|
107
|
+
const skillDir = path.join(dir, kebabName);
|
|
108
|
+
await fs.mkdir(skillDir, { recursive: true });
|
|
109
|
+
await fs.writeFile(path.join(skillDir, 'SKILL.md'), content, 'utf-8');
|
|
110
|
+
}
|
|
111
|
+
const skillTokens = estimateTokens(content);
|
|
83
112
|
const info = {
|
|
84
113
|
name: kebabName,
|
|
85
114
|
label: community.label,
|
|
@@ -87,10 +116,14 @@ export const generateSkillFiles = async (repoPath, projectName, pipelineResult)
|
|
|
87
116
|
fileCount: files.length,
|
|
88
117
|
};
|
|
89
118
|
skills.push(info);
|
|
90
|
-
|
|
119
|
+
// Show the @codragraph/compress headline number per skill: how many
|
|
120
|
+
// tokens of distilled context this community boils down to.
|
|
121
|
+
console.log(` \u2713 ${community.label} (${community.symbolCount} symbols, ${files.length} files) ` +
|
|
122
|
+
`\u2192 ~${skillTokens.toLocaleString()} tokens`);
|
|
91
123
|
}
|
|
92
|
-
|
|
93
|
-
|
|
124
|
+
const targetSummary = effectiveTargets.join(', ');
|
|
125
|
+
console.log(`\n ${skills.length} skills generated \u2192 ${targetSummary}`);
|
|
126
|
+
return { skills, outputPath: primaryDir, outputPaths: outputDirs };
|
|
94
127
|
};
|
|
95
128
|
// ============================================================================
|
|
96
129
|
// FALLBACK COMMUNITY BUILDER
|
package/dist/cli/tool.js
CHANGED
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
*/
|
|
17
17
|
import { writeSync } from 'node:fs';
|
|
18
18
|
import { LocalBackend } from '../mcp/local/local-backend.js';
|
|
19
|
+
import { emitTokenStats } from './compress-stats.js';
|
|
19
20
|
let _backend = null;
|
|
20
21
|
async function getBackend() {
|
|
21
22
|
if (_backend)
|
|
@@ -68,6 +69,7 @@ export async function queryCommand(queryText, options) {
|
|
|
68
69
|
repo: options?.repo,
|
|
69
70
|
});
|
|
70
71
|
output(result);
|
|
72
|
+
emitTokenStats(result);
|
|
71
73
|
}
|
|
72
74
|
export async function contextCommand(name, options) {
|
|
73
75
|
if (!name?.trim() && !options?.uid) {
|
|
@@ -83,6 +85,7 @@ export async function contextCommand(name, options) {
|
|
|
83
85
|
repo: options?.repo,
|
|
84
86
|
});
|
|
85
87
|
output(result);
|
|
88
|
+
emitTokenStats(result);
|
|
86
89
|
}
|
|
87
90
|
export async function impactCommand(target, options) {
|
|
88
91
|
if (!target?.trim()) {
|
|
@@ -99,6 +102,7 @@ export async function impactCommand(target, options) {
|
|
|
99
102
|
repo: options?.repo,
|
|
100
103
|
});
|
|
101
104
|
output(result);
|
|
105
|
+
emitTokenStats(result);
|
|
102
106
|
}
|
|
103
107
|
catch (err) {
|
|
104
108
|
// Belt-and-suspenders: catch infrastructure failures (getBackend, callTool transport)
|
|
@@ -16,6 +16,7 @@ import { extractStructuralNames } from './structural-extractor.js';
|
|
|
16
16
|
import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, isShortLabel, LABEL_METHOD, LABELS_WITH_EXPORTED, STRUCTURAL_LABELS, collectBestChunks, } from './types.js';
|
|
17
17
|
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME, CREATE_VECTOR_INDEX_QUERY, STALE_HASH_SENTINEL, } from '../lbug/schema.js';
|
|
18
18
|
import { loadVectorExtension } from '../lbug/lbug-adapter.js';
|
|
19
|
+
import { decodeContentField } from '../lbug/content-read.js';
|
|
19
20
|
const isDev = process.env.NODE_ENV === 'development';
|
|
20
21
|
/**
|
|
21
22
|
* Bump this when the embedding text template changes in a way that should
|
|
@@ -46,12 +47,17 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
46
47
|
for (const label of EMBEDDABLE_LABELS) {
|
|
47
48
|
try {
|
|
48
49
|
let query;
|
|
50
|
+
// RFC 0001 Phase 2: pull contentEncoding alongside content so we
|
|
51
|
+
// hand DECODED text to the embedder. Embedding compressed bytes
|
|
52
|
+
// would silently destroy semantic search quality without any
|
|
53
|
+
// visible error — decode is mandatory at this boundary.
|
|
49
54
|
if (label === LABEL_METHOD) {
|
|
50
55
|
// Method has parameterCount and returnType
|
|
51
56
|
query = `
|
|
52
57
|
MATCH (n:Method)
|
|
53
58
|
RETURN n.id AS id, n.name AS name, 'Method' AS label,
|
|
54
59
|
n.filePath AS filePath, n.content AS content,
|
|
60
|
+
n.contentEncoding AS contentEncoding,
|
|
55
61
|
n.startLine AS startLine, n.endLine AS endLine,
|
|
56
62
|
n.isExported AS isExported, n.description AS description,
|
|
57
63
|
n.parameterCount AS parameterCount, n.returnType AS returnType
|
|
@@ -63,6 +69,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
63
69
|
MATCH (n:\`${label}\`)
|
|
64
70
|
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
65
71
|
n.filePath AS filePath, n.content AS content,
|
|
72
|
+
n.contentEncoding AS contentEncoding,
|
|
66
73
|
n.startLine AS startLine, n.endLine AS endLine,
|
|
67
74
|
n.isExported AS isExported, n.description AS description
|
|
68
75
|
`;
|
|
@@ -73,6 +80,7 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
73
80
|
MATCH (n:\`${label}\`)
|
|
74
81
|
RETURN n.id AS id, n.name AS name, '${label}' AS label,
|
|
75
82
|
n.filePath AS filePath, n.content AS content,
|
|
83
|
+
n.contentEncoding AS contentEncoding,
|
|
76
84
|
n.startLine AS startLine, n.endLine AS endLine,
|
|
77
85
|
n.description AS description
|
|
78
86
|
`;
|
|
@@ -80,20 +88,29 @@ const queryEmbeddableNodes = async (executeQuery) => {
|
|
|
80
88
|
const rows = await executeQuery(query);
|
|
81
89
|
for (const row of rows) {
|
|
82
90
|
const hasExportedColumn = label === LABEL_METHOD || LABELS_WITH_EXPORTED.has(label);
|
|
91
|
+
// Column layout (every variant of the query above shares the
|
|
92
|
+
// first six positions; later columns differ by label):
|
|
93
|
+
// 0=id, 1=name, 2=label, 3=filePath,
|
|
94
|
+
// 4=content, 5=contentEncoding,
|
|
95
|
+
// 6=startLine, 7=endLine,
|
|
96
|
+
// 8=isExported (Method + LABELS_WITH_EXPORTED only)
|
|
97
|
+
// 8 or 9=description (depending on isExported presence)
|
|
98
|
+
// 10=parameterCount, 11=returnType (Method only)
|
|
99
|
+
const decoded = decodeContentField(row.content ?? row[4], row.contentEncoding ?? row[5]);
|
|
83
100
|
allNodes.push({
|
|
84
101
|
id: row.id ?? row[0],
|
|
85
102
|
name: row.name ?? row[1],
|
|
86
103
|
label: row.label ?? row[2],
|
|
87
104
|
filePath: row.filePath ?? row[3],
|
|
88
|
-
content:
|
|
89
|
-
startLine: row.startLine ?? row[
|
|
90
|
-
endLine: row.endLine ?? row[
|
|
91
|
-
isExported: hasExportedColumn ? (row.isExported ?? row[
|
|
92
|
-
description: row.description ?? (hasExportedColumn ? row[
|
|
105
|
+
content: decoded ?? '',
|
|
106
|
+
startLine: row.startLine ?? row[6],
|
|
107
|
+
endLine: row.endLine ?? row[7],
|
|
108
|
+
isExported: hasExportedColumn ? (row.isExported ?? row[8]) : undefined,
|
|
109
|
+
description: row.description ?? (hasExportedColumn ? row[9] : row[8]),
|
|
93
110
|
...(label === LABEL_METHOD
|
|
94
111
|
? {
|
|
95
|
-
parameterCount: row.parameterCount ?? row[
|
|
96
|
-
returnType: row.returnType ?? row[
|
|
112
|
+
parameterCount: row.parameterCount ?? row[10],
|
|
113
|
+
returnType: row.returnType ?? row[11],
|
|
97
114
|
}
|
|
98
115
|
: {}),
|
|
99
116
|
});
|
|
@@ -100,21 +100,93 @@ export async function ensureBridgeSchema(handle) {
|
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
102
|
}
|
|
103
|
-
|
|
103
|
+
/**
|
|
104
|
+
* Close every QueryResult / PreparedStatement before letting V8 GC them.
|
|
105
|
+
* Same close-order discipline as `core/lbug/lbug-adapter.ts:closeQueryResult`
|
|
106
|
+
* — leaking these handles past `conn.close()` corrupts LadybugDB's native
|
|
107
|
+
* file lock on Windows ("Error 33: The process cannot access the file
|
|
108
|
+
* because it is being used by another process") and segfaults on
|
|
109
|
+
* process exit elsewhere. Best-effort: wrap close calls in try/catch so
|
|
110
|
+
* a finalizer that already ran doesn't poison the queryBridge return.
|
|
111
|
+
*/
|
|
112
|
+
async function closeBridgeHandle(h) {
|
|
113
|
+
if (!h)
|
|
114
|
+
return;
|
|
115
|
+
const candidates = Array.isArray(h) ? h : [h];
|
|
116
|
+
for (const r of candidates) {
|
|
117
|
+
try {
|
|
118
|
+
const close = r?.close;
|
|
119
|
+
if (typeof close === 'function')
|
|
120
|
+
await Promise.resolve(close.call(r));
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
/* best-effort */
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* True iff the error is a Windows-only transient file-lock surfaced by
|
|
129
|
+
* LadybugDB's native binding immediately after a writer process closes
|
|
130
|
+
* the same DB file. Symptom is `Error 33` on the read path even though
|
|
131
|
+
* `db.close()` returned cleanly at the JS layer — the kernel hasn't
|
|
132
|
+
* fully released the exclusive lock yet. Retrying with backoff is the
|
|
133
|
+
* documented workaround for this class of Windows-fs interactions.
|
|
134
|
+
*/
|
|
135
|
+
function isTransientLbugLockError(err) {
|
|
136
|
+
const msg = err?.message ?? '';
|
|
137
|
+
return (msg.includes('Error 33') ||
|
|
138
|
+
msg.includes('locked a portion of the file') ||
|
|
139
|
+
msg.includes('cannot access the file because it is being used by another process'));
|
|
140
|
+
}
|
|
141
|
+
async function queryBridgeOnce(handle, cypher, params) {
|
|
104
142
|
const conn = handle._conn;
|
|
105
143
|
if (params && Object.keys(params).length > 0) {
|
|
106
144
|
const stmt = await conn.prepare(cypher);
|
|
107
145
|
if (!stmt.isSuccess()) {
|
|
108
146
|
const errMsg = await stmt.getErrorMessage();
|
|
147
|
+
await closeBridgeHandle(stmt);
|
|
109
148
|
throw new Error(`Bridge query prepare failed: ${errMsg}`);
|
|
110
149
|
}
|
|
111
150
|
const queryResult = await conn.execute(stmt, params);
|
|
112
151
|
const result = unwrapQueryResult(queryResult);
|
|
113
|
-
|
|
152
|
+
try {
|
|
153
|
+
return (await result.getAll());
|
|
154
|
+
}
|
|
155
|
+
finally {
|
|
156
|
+
await closeBridgeHandle(queryResult);
|
|
157
|
+
await closeBridgeHandle(stmt);
|
|
158
|
+
}
|
|
114
159
|
}
|
|
115
160
|
const queryResult = await conn.query(cypher);
|
|
116
161
|
const result = unwrapQueryResult(queryResult);
|
|
117
|
-
|
|
162
|
+
try {
|
|
163
|
+
return (await result.getAll());
|
|
164
|
+
}
|
|
165
|
+
finally {
|
|
166
|
+
await closeBridgeHandle(queryResult);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
export async function queryBridge(handle, cypher, params) {
|
|
170
|
+
// Retry on Windows-transient file-lock errors. Reads issued through a
|
|
171
|
+
// freshly-opened readonly Database can race the writer's
|
|
172
|
+
// post-`db.close()` lock release on Windows + Node 22.14 (LadybugDB
|
|
173
|
+
// native binding holds the kernel lock briefly after the JS-level
|
|
174
|
+
// close returns). Backoff doubles per attempt up to ~3 s total — well
|
|
175
|
+
// below any user-visible CLI delay budget but enough to absorb a slow
|
|
176
|
+
// Windows kernel lock release.
|
|
177
|
+
const ATTEMPTS = 7;
|
|
178
|
+
for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
|
|
179
|
+
try {
|
|
180
|
+
return await queryBridgeOnce(handle, cypher, params);
|
|
181
|
+
}
|
|
182
|
+
catch (err) {
|
|
183
|
+
if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
|
|
184
|
+
throw err;
|
|
185
|
+
await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
// Unreachable: the loop either returns or throws on the last attempt.
|
|
189
|
+
throw new Error('queryBridge: retry loop exited unexpectedly');
|
|
118
190
|
}
|
|
119
191
|
/**
|
|
120
192
|
* LadybugDB's `conn.query` / `conn.execute` can return either a single
|
|
@@ -421,32 +493,47 @@ export async function openBridgeDbReadOnly(groupDir) {
|
|
|
421
493
|
// Open the native handle. If Connection construction throws AFTER
|
|
422
494
|
// Database was successfully allocated, we'd leak the native Database
|
|
423
495
|
// object. Wrap each step separately and tear down the partial handle.
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
496
|
+
//
|
|
497
|
+
// Retry on the Windows-transient lock error: the LadybugDB native
|
|
498
|
+
// binding holds the kernel file lock briefly past `db.close()` on
|
|
499
|
+
// Windows + Node 22.14, so a reader that races a recent writer can
|
|
500
|
+
// hit "Error 33: locked a portion of the file" on the constructor's
|
|
501
|
+
// first 4 KB header read. Backoff up to ~3 s lets the writer's lock
|
|
502
|
+
// age out — enough headroom for any normal write→read sequence
|
|
503
|
+
// without becoming a user-visible delay.
|
|
504
|
+
const ATTEMPTS = 7;
|
|
505
|
+
for (let attempt = 0; attempt < ATTEMPTS; attempt++) {
|
|
506
|
+
let db;
|
|
507
|
+
let conn;
|
|
508
|
+
try {
|
|
509
|
+
db = new lbug.Database(dbPath, 0, false, true); // readOnly
|
|
510
|
+
conn = new lbug.Connection(db);
|
|
511
|
+
return { _db: db, _conn: conn, groupDir };
|
|
439
512
|
}
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
513
|
+
catch (err) {
|
|
514
|
+
if (conn) {
|
|
515
|
+
try {
|
|
516
|
+
await conn.close();
|
|
517
|
+
}
|
|
518
|
+
catch {
|
|
519
|
+
/* ignore */
|
|
520
|
+
}
|
|
443
521
|
}
|
|
444
|
-
|
|
445
|
-
|
|
522
|
+
if (db) {
|
|
523
|
+
try {
|
|
524
|
+
await db.close();
|
|
525
|
+
}
|
|
526
|
+
catch {
|
|
527
|
+
/* ignore */
|
|
528
|
+
}
|
|
446
529
|
}
|
|
530
|
+
if (!isTransientLbugLockError(err) || attempt === ATTEMPTS - 1)
|
|
531
|
+
return null;
|
|
532
|
+
await new Promise((r) => setTimeout(r, 50 * Math.pow(2, attempt)));
|
|
533
|
+
continue;
|
|
447
534
|
}
|
|
448
|
-
return null;
|
|
449
535
|
}
|
|
536
|
+
return null;
|
|
450
537
|
}
|
|
451
538
|
/* ------------------------------------------------------------------ */
|
|
452
539
|
/* bridgeExists */
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-side decoder for `content` columns in lbug node rows.
|
|
3
|
+
*
|
|
4
|
+
* RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
|
|
5
|
+
* every node table that has `content`. Default is `'none'` (passthrough)
|
|
6
|
+
* so existing reads keep working unchanged. When a writer opts into
|
|
7
|
+
* `--compress brotli|zstd`, the column carries the encoding tag and the
|
|
8
|
+
* `content` column carries base64-encoded compressed bytes — readers
|
|
9
|
+
* MUST run those bytes back through `decodeContent` before handing them
|
|
10
|
+
* to a consumer (MCP tool result, HTTP API response, embedding model,
|
|
11
|
+
* LLM input).
|
|
12
|
+
*
|
|
13
|
+
* Centralizing the decode in one helper has two benefits:
|
|
14
|
+
* 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
|
|
15
|
+
* contentEncoding` to the Cypher RETURN, and pipe the row through
|
|
16
|
+
* `decodeContentField` (or `decodeContentRow`) at the boundary.
|
|
17
|
+
* 2. Anyone hunting for "where does the read path decode compressed
|
|
18
|
+
* bytes" greps for `decodeContentField` and gets every site in one
|
|
19
|
+
* shot — no per-table feature detection scattered across files.
|
|
20
|
+
*/
|
|
21
|
+
/**
|
|
22
|
+
* Decode a single (content, contentEncoding) pair from a Cypher row.
|
|
23
|
+
*
|
|
24
|
+
* Returns the input content unchanged when:
|
|
25
|
+
* - the encoding is missing / empty / `'none'` (the common case for
|
|
26
|
+
* 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
|
|
27
|
+
* `--compress`);
|
|
28
|
+
* - content is null/undefined (caller decides whether that's an error);
|
|
29
|
+
* - content is not a string (pre-Phase-2 indexes never wrote non-string
|
|
30
|
+
* content, but defensive: don't crash a read path on a malformed row).
|
|
31
|
+
*
|
|
32
|
+
* Throws (via `decodeContent`) only when the row claims an encoding this
|
|
33
|
+
* CLI build can't decode — that's a forward-compat error and the right
|
|
34
|
+
* behavior is to fail loudly rather than return wrong content.
|
|
35
|
+
*/
|
|
36
|
+
export declare function decodeContentField(content: unknown, encoding: unknown): string | undefined;
|
|
37
|
+
/**
|
|
38
|
+
* Apply `decodeContentField` to a row that carries `content` and
|
|
39
|
+
* `contentEncoding` keys (or their numeric column-index aliases).
|
|
40
|
+
*
|
|
41
|
+
* The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
|
|
42
|
+
* driver versions vary on whether named keys are populated, so existing
|
|
43
|
+
* read sites do `r.content ?? r[N]`. This helper accepts the same
|
|
44
|
+
* pattern. Returns a NEW object (does not mutate input).
|
|
45
|
+
*/
|
|
46
|
+
export declare function decodeContentRow<T extends Record<string, unknown>>(row: T, contentKey?: keyof T, encodingKey?: keyof T): T;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read-side decoder for `content` columns in lbug node rows.
|
|
3
|
+
*
|
|
4
|
+
* RFC 0001 Phase 2 introduces an optional `contentEncoding` column on
|
|
5
|
+
* every node table that has `content`. Default is `'none'` (passthrough)
|
|
6
|
+
* so existing reads keep working unchanged. When a writer opts into
|
|
7
|
+
* `--compress brotli|zstd`, the column carries the encoding tag and the
|
|
8
|
+
* `content` column carries base64-encoded compressed bytes — readers
|
|
9
|
+
* MUST run those bytes back through `decodeContent` before handing them
|
|
10
|
+
* to a consumer (MCP tool result, HTTP API response, embedding model,
|
|
11
|
+
* LLM input).
|
|
12
|
+
*
|
|
13
|
+
* Centralizing the decode in one helper has two benefits:
|
|
14
|
+
* 1. Shim sites are 2-line changes: add `, n.contentEncoding AS
|
|
15
|
+
* contentEncoding` to the Cypher RETURN, and pipe the row through
|
|
16
|
+
* `decodeContentField` (or `decodeContentRow`) at the boundary.
|
|
17
|
+
* 2. Anyone hunting for "where does the read path decode compressed
|
|
18
|
+
* bytes" greps for `decodeContentField` and gets every site in one
|
|
19
|
+
* shot — no per-table feature detection scattered across files.
|
|
20
|
+
*/
|
|
21
|
+
import { decodeContent } from '@codragraph/graphstore';
|
|
22
|
+
/**
|
|
23
|
+
* Decode a single (content, contentEncoding) pair from a Cypher row.
|
|
24
|
+
*
|
|
25
|
+
* Returns the input content unchanged when:
|
|
26
|
+
* - the encoding is missing / empty / `'none'` (the common case for
|
|
27
|
+
* 1.6.x – 1.7.x indexes, plus any 1.8+ index written without
|
|
28
|
+
* `--compress`);
|
|
29
|
+
* - content is null/undefined (caller decides whether that's an error);
|
|
30
|
+
* - content is not a string (pre-Phase-2 indexes never wrote non-string
|
|
31
|
+
* content, but defensive: don't crash a read path on a malformed row).
|
|
32
|
+
*
|
|
33
|
+
* Throws (via `decodeContent`) only when the row claims an encoding this
|
|
34
|
+
* CLI build can't decode — that's a forward-compat error and the right
|
|
35
|
+
* behavior is to fail loudly rather than return wrong content.
|
|
36
|
+
*/
|
|
37
|
+
export function decodeContentField(content, encoding) {
|
|
38
|
+
if (content === undefined || content === null)
|
|
39
|
+
return undefined;
|
|
40
|
+
if (typeof content !== 'string')
|
|
41
|
+
return content;
|
|
42
|
+
if (typeof encoding !== 'string' || encoding === '' || encoding === 'none') {
|
|
43
|
+
return content;
|
|
44
|
+
}
|
|
45
|
+
return decodeContent(content, encoding);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Apply `decodeContentField` to a row that carries `content` and
|
|
49
|
+
* `contentEncoding` keys (or their numeric column-index aliases).
|
|
50
|
+
*
|
|
51
|
+
* The numeric-fallback shape (`r[N]`) mirrors LadybugDB's row format —
|
|
52
|
+
* driver versions vary on whether named keys are populated, so existing
|
|
53
|
+
* read sites do `r.content ?? r[N]`. This helper accepts the same
|
|
54
|
+
* pattern. Returns a NEW object (does not mutate input).
|
|
55
|
+
*/
|
|
56
|
+
export function decodeContentRow(row, contentKey = 'content', encodingKey = 'contentEncoding') {
|
|
57
|
+
const content = row[contentKey];
|
|
58
|
+
if (content === undefined || content === null)
|
|
59
|
+
return row;
|
|
60
|
+
const encoding = row[encodingKey];
|
|
61
|
+
if (typeof encoding !== 'string' || encoding === '' || encoding === 'none')
|
|
62
|
+
return row;
|
|
63
|
+
return { ...row, [contentKey]: decodeContentField(content, encoding) };
|
|
64
|
+
}
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
*/
|
|
14
14
|
import { KnowledgeGraph } from '../graph/types.js';
|
|
15
15
|
import { NodeTableName } from './schema.js';
|
|
16
|
+
import { type ContentEncoding } from '@codragraph/graphstore';
|
|
16
17
|
export declare const sanitizeUTF8: (str: string) => string;
|
|
17
18
|
export declare const escapeCSVField: (value: string | number | undefined | null) => string;
|
|
18
19
|
export declare const escapeCSVNumber: (value: number | undefined | null, defaultValue?: number) => string;
|
|
@@ -25,9 +26,4 @@ export interface StreamedCSVResult {
|
|
|
25
26
|
relCsvPath: string;
|
|
26
27
|
relRows: number;
|
|
27
28
|
}
|
|
28
|
-
|
|
29
|
-
* Stream all CSV data directly to disk files.
|
|
30
|
-
* Iterates graph nodes exactly ONCE — routes each node to the right writer.
|
|
31
|
-
* File contents are lazy-read from disk with a generous LRU cache.
|
|
32
|
-
*/
|
|
33
|
-
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string) => Promise<StreamedCSVResult>;
|
|
29
|
+
export declare const streamAllCSVsToDisk: (graph: KnowledgeGraph, repoPath: string, csvDir: string, compress?: ContentEncoding) => Promise<StreamedCSVResult>;
|