@codragraph/cli 1.6.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/cli/analyze.d.ts +22 -0
- package/dist/cli/analyze.js +107 -4
- package/dist/cli/compress-stats.d.ts +29 -0
- package/dist/cli/compress-stats.js +97 -0
- package/dist/cli/graphstore.d.ts +6 -2
- package/dist/cli/graphstore.js +24 -2
- package/dist/cli/index.js +16 -2
- package/dist/cli/profile-heap.d.ts +35 -0
- package/dist/cli/profile-heap.js +126 -0
- package/dist/cli/setup.d.ts +13 -0
- package/dist/cli/setup.js +22 -11
- package/dist/cli/skill-gen.d.ts +14 -2
- package/dist/cli/skill-gen.js +52 -19
- package/dist/cli/tool.js +4 -0
- package/dist/core/embeddings/embedding-pipeline.js +24 -7
- package/dist/core/group/bridge-db.js +111 -24
- package/dist/core/lbug/content-read.d.ts +46 -0
- package/dist/core/lbug/content-read.js +64 -0
- package/dist/core/lbug/csv-generator.d.ts +2 -6
- package/dist/core/lbug/csv-generator.js +45 -12
- package/dist/core/lbug/lbug-adapter.d.ts +4 -1
- package/dist/core/lbug/lbug-adapter.js +153 -21
- package/dist/core/lbug/schema.d.ts +7 -7
- package/dist/core/lbug/schema.js +18 -0
- package/dist/core/run-analyze.d.ts +13 -0
- package/dist/core/run-analyze.js +91 -4
- package/dist/core/search/bm25-index.js +67 -15
- package/dist/mcp/local/local-backend.js +22 -5
- package/dist/server/api.js +4 -3
- package/dist/storage/repo-manager.d.ts +39 -0
- package/dist/storage/repo-manager.js +19 -0
- package/hooks/claude/codragraph-hook.cjs +95 -2
- package/package.json +4 -4
- package/scripts/build-tree-sitter-proto.cjs +15 -3
- package/scripts/patch-tree-sitter-swift.cjs +17 -4
- package/skills/codragraph-api-surface.md +110 -0
- package/skills/codragraph-config-audit.md +146 -0
- package/skills/codragraph-cross-repo-impact.md +135 -0
- package/skills/codragraph-data-lineage.md +137 -0
- package/skills/codragraph-dead-code.md +119 -0
- package/skills/codragraph-gh-actions-debug.md +162 -0
- package/skills/codragraph-gh-issue-workflow.md +178 -0
- package/skills/codragraph-gh-pr-workflow.md +176 -0
- package/skills/codragraph-gh-release-workflow.md +187 -0
- package/skills/codragraph-git-bisect.md +176 -0
- package/skills/codragraph-git-force-push.md +147 -0
- package/skills/codragraph-git-history-rewrite.md +174 -0
- package/skills/codragraph-git-rebase-vs-merge.md +138 -0
- package/skills/codragraph-git-recovery.md +181 -0
- package/skills/codragraph-git-worktree.md +145 -0
- package/skills/codragraph-migration-tracking.md +130 -0
- package/skills/codragraph-notebook-context.md +136 -0
- package/skills/codragraph-observability-coverage.md +125 -0
- package/skills/codragraph-onboarding.md +129 -0
- package/skills/codragraph-perf-hotspots.md +132 -0
- package/skills/codragraph-project-switcher.md +116 -0
- package/skills/codragraph-security-audit.md +144 -0
- package/skills/codragraph-sql-tracing.md +122 -0
- package/skills/codragraph-supply-chain-audit.md +153 -0
- package/skills/codragraph-test-coverage.md +97 -0
package/dist/core/run-analyze.js
CHANGED
|
@@ -10,9 +10,11 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import path from 'path';
|
|
12
12
|
import fs from 'fs/promises';
|
|
13
|
+
import * as fsSync from 'node:fs';
|
|
14
|
+
import * as v8 from 'node:v8';
|
|
13
15
|
import { runPipelineFromRepo } from './ingestion/pipeline.js';
|
|
14
16
|
import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, } from './lbug/lbug-adapter.js';
|
|
15
|
-
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
|
|
17
|
+
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, INDEX_SCHEMA_VERSION, } from '../storage/repo-manager.js';
|
|
16
18
|
import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
|
|
17
19
|
import { recordAnalysisSnapshot } from './graphstore/index.js';
|
|
18
20
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
@@ -51,7 +53,70 @@ export const PHASE_LABELS = {
|
|
|
51
53
|
*/
|
|
52
54
|
export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
53
55
|
const log = (msg) => callbacks.onLog?.(msg);
|
|
54
|
-
|
|
56
|
+
// RFC 0002 Phase 1 — optional heap-profile instrumentation. Set
|
|
57
|
+
// CODRAGRAPH_HEAP_PROFILE=1 (or run `codragraph profile-heap`) to write a
|
|
58
|
+
// v8 heap snapshot at every phase boundary, plus a `profile-summary.jsonl`
|
|
59
|
+
// log of `process.memoryUsage()` at the same boundaries. Snapshots land in
|
|
60
|
+
// `<repo>/.codragraph/heap-profiles/`. Open snapshots in Chrome DevTools
|
|
61
|
+
// (Memory → Load) to find which constructors dominate retained set; the
|
|
62
|
+
// JSONL is the cheap RSS / heapUsed timeline. Off by default — snapshot
|
|
63
|
+
// writes pause the event loop ~2-5s and consume ~100-500MB of disk each.
|
|
64
|
+
const heapProfileEnabled = process.env.CODRAGRAPH_HEAP_PROFILE === '1';
|
|
65
|
+
let heapProfileDir = '';
|
|
66
|
+
let heapProfileSummaryPath = '';
|
|
67
|
+
let lastProfilePhase = '';
|
|
68
|
+
if (heapProfileEnabled) {
|
|
69
|
+
heapProfileDir = path.join(repoPath, '.codragraph', 'heap-profiles');
|
|
70
|
+
heapProfileSummaryPath = path.join(heapProfileDir, 'profile-summary.jsonl');
|
|
71
|
+
try {
|
|
72
|
+
fsSync.mkdirSync(heapProfileDir, { recursive: true });
|
|
73
|
+
// Truncate any prior summary so a single run produces a clean log.
|
|
74
|
+
// We append crash-safely on each phase boundary below.
|
|
75
|
+
fsSync.writeFileSync(heapProfileSummaryPath, '');
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
/* permission issue — best-effort */
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
const progress = (phase, percent, message) => {
|
|
82
|
+
callbacks.onProgress(phase, percent, message);
|
|
83
|
+
// Only snapshot on phase transitions, not every tick. Phase strings come
|
|
84
|
+
// from runPipelineFromRepo / loadGraphToLbug and are stable.
|
|
85
|
+
if (heapProfileEnabled && phase && phase !== lastProfilePhase) {
|
|
86
|
+
lastProfilePhase = phase;
|
|
87
|
+
const ts = Date.now();
|
|
88
|
+
const safe = phase.replace(/[^a-zA-Z0-9]+/g, '_').slice(0, 60);
|
|
89
|
+
const file = path.join(heapProfileDir, `${ts}-${safe}.heapsnapshot`);
|
|
90
|
+
// Capture the cheap memoryUsage timeline FIRST — even if writeHeapSnapshot
|
|
91
|
+
// crashes (out of disk, permissions), we still have the RSS curve which
|
|
92
|
+
// is the more useful artifact for the heap-pressure RFC.
|
|
93
|
+
try {
|
|
94
|
+
const mu = process.memoryUsage();
|
|
95
|
+
const entry = JSON.stringify({
|
|
96
|
+
ts,
|
|
97
|
+
phase,
|
|
98
|
+
percent,
|
|
99
|
+
rss: mu.rss,
|
|
100
|
+
heapUsed: mu.heapUsed,
|
|
101
|
+
heapTotal: mu.heapTotal,
|
|
102
|
+
external: mu.external,
|
|
103
|
+
arrayBuffers: mu.arrayBuffers,
|
|
104
|
+
snapshotFile: path.basename(file),
|
|
105
|
+
});
|
|
106
|
+
fsSync.appendFileSync(heapProfileSummaryPath, entry + '\n');
|
|
107
|
+
}
|
|
108
|
+
catch (err) {
|
|
109
|
+
log(`heap-profile: summary append failed (${err.message})`);
|
|
110
|
+
}
|
|
111
|
+
try {
|
|
112
|
+
v8.writeHeapSnapshot(file);
|
|
113
|
+
log(`heap-profile: wrote ${file}`);
|
|
114
|
+
}
|
|
115
|
+
catch (err) {
|
|
116
|
+
log(`heap-profile: write failed (${err.message})`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
};
|
|
55
120
|
const { storagePath, lbugPath } = getStoragePaths(repoPath);
|
|
56
121
|
// Clean up stale KuzuDB files from before the LadybugDB migration.
|
|
57
122
|
const kuzuResult = await cleanupOldKuzuFiles(storagePath);
|
|
@@ -62,7 +127,17 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
62
127
|
const currentCommit = repoHasGit ? getCurrentCommit(repoPath) : '';
|
|
63
128
|
const existingMeta = await loadMeta(storagePath);
|
|
64
129
|
// ── Early-return: already up to date ──────────────────────────────
|
|
65
|
-
|
|
130
|
+
// Schema-version mismatch forces a full re-analyze regardless of commit
|
|
131
|
+
// equality: existing 1.7.x indexes have no `schemaVersion` field at all,
|
|
132
|
+
// and 1.8+ readers expect every node table to carry a `contentEncoding`
|
|
133
|
+
// column (RFC 0001 Phase 2). LadybugDB ALTER on existing tables is not
|
|
134
|
+
// validated end-to-end yet, so the supported migration path is
|
|
135
|
+
// re-analyze → fresh CREATE NODE TABLE.
|
|
136
|
+
const schemaUpToDate = !!existingMeta && (existingMeta.schemaVersion ?? 0) >= INDEX_SCHEMA_VERSION;
|
|
137
|
+
if (existingMeta &&
|
|
138
|
+
schemaUpToDate &&
|
|
139
|
+
!options.force &&
|
|
140
|
+
existingMeta.lastCommit === currentCommit) {
|
|
66
141
|
// Non-git folders have currentCommit = '' — always rebuild since we can't detect changes
|
|
67
142
|
if (currentCommit !== '') {
|
|
68
143
|
return {
|
|
@@ -73,6 +148,11 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
73
148
|
};
|
|
74
149
|
}
|
|
75
150
|
}
|
|
151
|
+
if (existingMeta && !schemaUpToDate) {
|
|
152
|
+
log(`Index schema version ${existingMeta.schemaVersion ?? '<missing>'} is older than ` +
|
|
153
|
+
`${INDEX_SCHEMA_VERSION} (RFC 0001 Phase 2 — adds contentEncoding column). ` +
|
|
154
|
+
`Re-analyzing.`);
|
|
155
|
+
}
|
|
76
156
|
// ── Cache embeddings from existing index before rebuild ────────────
|
|
77
157
|
let cachedEmbeddingNodeIds = new Set();
|
|
78
158
|
let cachedEmbeddings = [];
|
|
@@ -122,7 +202,12 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
122
202
|
lbugMsgCount++;
|
|
123
203
|
const pct = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24));
|
|
124
204
|
progress('lbug', pct, msg);
|
|
125
|
-
}
|
|
205
|
+
},
|
|
206
|
+
// RFC 0001 Phase 2: when --compress is set, every content row goes
|
|
207
|
+
// through encodeContent before hitting the CSV. Default 'none' is
|
|
208
|
+
// a true passthrough, so the on-disk layout is byte-identical to
|
|
209
|
+
// pre-Phase-2 indexes when no compression flag is passed.
|
|
210
|
+
{ compress: options.compress });
|
|
126
211
|
// ── Phase 2.5: Versioned-graph snapshot (best-effort) ────────────
|
|
127
212
|
// Phase 4 hook: snapshot the freshly-loaded graph into the
|
|
128
213
|
// content-addressed `.codragraph/graphstore/`. Failures here do NOT
|
|
@@ -230,6 +315,8 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
230
315
|
repoPath,
|
|
231
316
|
lastCommit: currentCommit,
|
|
232
317
|
indexedAt: new Date().toISOString(),
|
|
318
|
+
schemaVersion: INDEX_SCHEMA_VERSION,
|
|
319
|
+
compress: options.compress ?? 'none',
|
|
233
320
|
// Captured here (not at registration) so it travels with the
|
|
234
321
|
// on-disk meta.json — sibling-clone fingerprinting works for
|
|
235
322
|
// out-of-tree consumers (group-status, future tooling) without
|
|
@@ -12,17 +12,56 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { queryFTS, ensureFTSIndex, executeQuery as executeCoreQuery, } from '../lbug/lbug-adapter.js';
|
|
14
14
|
/**
|
|
15
|
-
* FTS
|
|
16
|
-
* CLI/pipeline path and the MCP pool path
|
|
17
|
-
*
|
|
15
|
+
* FTS table set served by `searchFTSFromLbug`. Centralised so that both
|
|
16
|
+
* the CLI/pipeline path and the MCP pool path stay in lockstep.
|
|
17
|
+
*
|
|
18
|
+
* The properties list is computed at FTS-create time via `ftsPropertiesFor`
|
|
19
|
+
* — for repos that were analysed with `--compress brotli|zstd`, the
|
|
20
|
+
* `content` column holds base64-of-encoded-bytes and would tokenise to
|
|
21
|
+
* useless tokens. Those repos get name-only FTS so search at least
|
|
22
|
+
* matches function/class names instead of returning random hits on
|
|
23
|
+
* base64 alphabet. Plain (compress='none' / unset) repos get the full
|
|
24
|
+
* `name + content` index for body-text matches. RFC 0001 Phase 2.5.
|
|
18
25
|
*/
|
|
19
|
-
const
|
|
20
|
-
{ table: 'File', indexName: 'file_fts'
|
|
21
|
-
{ table: 'Function', indexName: 'function_fts'
|
|
22
|
-
{ table: 'Class', indexName: 'class_fts'
|
|
23
|
-
{ table: 'Method', indexName: 'method_fts'
|
|
24
|
-
{ table: 'Interface', indexName: 'interface_fts'
|
|
26
|
+
const FTS_TABLES = [
|
|
27
|
+
{ table: 'File', indexName: 'file_fts' },
|
|
28
|
+
{ table: 'Function', indexName: 'function_fts' },
|
|
29
|
+
{ table: 'Class', indexName: 'class_fts' },
|
|
30
|
+
{ table: 'Method', indexName: 'method_fts' },
|
|
31
|
+
{ table: 'Interface', indexName: 'interface_fts' },
|
|
25
32
|
];
|
|
33
|
+
const ftsPropertiesFor = (compress) => !compress || compress === 'none' ? ['name', 'content'] : ['name'];
|
|
34
|
+
/**
|
|
35
|
+
* Look up `meta.compress` for a repo. The MCP path passes `repoId`
|
|
36
|
+
* (registry-derived); the CLI path passes nothing and we walk up from
|
|
37
|
+
* cwd. Returns `'none'` whenever the lookup fails so the safe default
|
|
38
|
+
* (full FTS index) is used — the failure mode is reduced search
|
|
39
|
+
* quality, never wrong results.
|
|
40
|
+
*/
|
|
41
|
+
async function getCompressMode(repoId) {
|
|
42
|
+
try {
|
|
43
|
+
const repoMod = await import('../../storage/repo-manager.js');
|
|
44
|
+
if (repoId) {
|
|
45
|
+
// MCP path: registry name is the source of truth. The MCP
|
|
46
|
+
// backend's `repoId` is `entry.name.toLowerCase()` (or `${name}-${hash}`
|
|
47
|
+
// on collision); match conservatively against both forms.
|
|
48
|
+
const entries = await repoMod.listRegisteredRepos();
|
|
49
|
+
for (const entry of entries) {
|
|
50
|
+
const base = entry.name.toLowerCase();
|
|
51
|
+
if (base === repoId || repoId.startsWith(`${base}-`)) {
|
|
52
|
+
const meta = await repoMod.loadMeta(entry.storagePath);
|
|
53
|
+
return meta?.compress ?? 'none';
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return 'none';
|
|
57
|
+
}
|
|
58
|
+
const repo = await repoMod.findRepo(process.cwd());
|
|
59
|
+
return repo?.meta?.compress ?? 'none';
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
return 'none';
|
|
63
|
+
}
|
|
64
|
+
}
|
|
26
65
|
const FALLBACK_SCAN_LIMIT = 50_000;
|
|
27
66
|
const BOOLEAN_QUERY_TOKENS = new Set(['and', 'or', 'not']);
|
|
28
67
|
const FALLBACK_FIELD_WEIGHTS = {
|
|
@@ -184,9 +223,13 @@ async function queryFallbackViaExecutor(executor, tableName, properties, query,
|
|
|
184
223
|
return [];
|
|
185
224
|
}
|
|
186
225
|
}
|
|
187
|
-
async function fallbackSearchAllTables(executor, query, limit
|
|
226
|
+
async function fallbackSearchAllTables(executor, query, limit,
|
|
227
|
+
// Same compress-aware property selection as the FTS path. Default keeps
|
|
228
|
+
// pre-Phase-2 behaviour (`['name', 'content']`) for callers that don't
|
|
229
|
+
// pass a value.
|
|
230
|
+
properties = ['name', 'content']) {
|
|
188
231
|
const results = [];
|
|
189
|
-
for (const { table
|
|
232
|
+
for (const { table } of FTS_TABLES) {
|
|
190
233
|
results.push(await queryFallbackViaExecutor(executor, table, properties, query, limit));
|
|
191
234
|
}
|
|
192
235
|
return results;
|
|
@@ -220,7 +263,12 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
220
263
|
const executor = (cypher) => executeQuery(repoId, cypher);
|
|
221
264
|
// Lazy-create FTS indexes on first query for this repo (analyze no longer
|
|
222
265
|
// creates them up-front, so we ensure them here). Cached per-process.
|
|
223
|
-
|
|
266
|
+
// RFC 0001 Phase 2.5: drop `content` from FTS properties for repos
|
|
267
|
+
// analysed with --compress brotli|zstd — the column holds encoded
|
|
268
|
+
// bytes and would tokenise to garbage.
|
|
269
|
+
const compress = await getCompressMode(repoId);
|
|
270
|
+
const properties = ftsPropertiesFor(compress);
|
|
271
|
+
for (const { table, indexName } of FTS_TABLES) {
|
|
224
272
|
await ensureFTSIndexViaExecutor(executor, repoId, table, indexName, properties);
|
|
225
273
|
}
|
|
226
274
|
fileResults = await queryFTSViaExecutor(executor, 'File', 'file_fts', query, limit);
|
|
@@ -235,13 +283,17 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
235
283
|
interfaceResults.length ===
|
|
236
284
|
0) {
|
|
237
285
|
[fileResults, functionResults, classResults, methodResults, interfaceResults] =
|
|
238
|
-
await fallbackSearchAllTables(executor, query, limit);
|
|
286
|
+
await fallbackSearchAllTables(executor, query, limit, properties);
|
|
239
287
|
}
|
|
240
288
|
}
|
|
241
289
|
else {
|
|
242
290
|
// Use core lbug adapter (CLI / pipeline context) — also sequential for safety.
|
|
243
291
|
// Lazy-create FTS indexes on first query (analyze no longer does it).
|
|
244
|
-
|
|
292
|
+
// RFC 0001 Phase 2.5 — same `compress`-aware property selection as the MCP
|
|
293
|
+
// path; the CLI walks up from cwd to find the repo's meta.json.
|
|
294
|
+
const compress = await getCompressMode();
|
|
295
|
+
const properties = ftsPropertiesFor(compress);
|
|
296
|
+
for (const { table, indexName } of FTS_TABLES) {
|
|
245
297
|
await ensureFTSIndex(table, indexName, [...properties]).catch(() => { });
|
|
246
298
|
}
|
|
247
299
|
fileResults = await queryFTS('File', 'file_fts', query, limit, false).catch(() => []);
|
|
@@ -256,7 +308,7 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
256
308
|
interfaceResults.length ===
|
|
257
309
|
0) {
|
|
258
310
|
[fileResults, functionResults, classResults, methodResults, interfaceResults] =
|
|
259
|
-
await fallbackSearchAllTables(executeCoreQuery, query, limit);
|
|
311
|
+
await fallbackSearchAllTables(executeCoreQuery, query, limit, properties);
|
|
260
312
|
}
|
|
261
313
|
}
|
|
262
314
|
// Collect all node scores per filePath to track which nodes actually matched
|
|
@@ -19,6 +19,7 @@ import { GroupService } from '../../core/group/service.js';
|
|
|
19
19
|
import { resolveAtGroupMemberRepoPath } from '../../core/group/resolve-at-member.js';
|
|
20
20
|
import { collectBestChunks } from '../../core/embeddings/types.js';
|
|
21
21
|
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME } from '../../core/lbug/schema.js';
|
|
22
|
+
import { decodeContentField } from '../../core/lbug/content-read.js';
|
|
22
23
|
import { PhaseTimer } from '../../core/search/phase-timer.js';
|
|
23
24
|
import { checkStaleness, checkCwdMatch } from '../../core/git-staleness.js';
|
|
24
25
|
// AI context generation is CLI-only (codragraph analyze)
|
|
@@ -835,10 +836,12 @@ export class LocalBackend {
|
|
|
835
836
|
try {
|
|
836
837
|
const contentRows = await executeParameterized(repo.id, `
|
|
837
838
|
MATCH (n {id: $nodeId})
|
|
838
|
-
RETURN n.content AS content
|
|
839
|
+
RETURN n.content AS content, n.contentEncoding AS contentEncoding
|
|
839
840
|
`, { nodeId: sym.nodeId });
|
|
840
841
|
if (contentRows.length > 0) {
|
|
841
|
-
|
|
842
|
+
const raw = contentRows[0].content ?? contentRows[0][0];
|
|
843
|
+
const enc = contentRows[0].contentEncoding ?? contentRows[0][1];
|
|
844
|
+
content = decodeContentField(raw, enc);
|
|
842
845
|
}
|
|
843
846
|
}
|
|
844
847
|
catch (e) {
|
|
@@ -1330,7 +1333,13 @@ export class LocalBackend {
|
|
|
1330
1333
|
*/
|
|
1331
1334
|
async resolveSymbolCandidates(repo, query, hints) {
|
|
1332
1335
|
const { uid, name, include_content } = query;
|
|
1333
|
-
|
|
1336
|
+
// RFC 0001 Phase 2: when fetching content, also fetch the per-row
|
|
1337
|
+
// encoding tag so `decodeContentField` can pass it through unchanged
|
|
1338
|
+
// (default 'none') or run brotli/zstd decode. Adding contentEncoding
|
|
1339
|
+
// to the SELECT shifts the numeric-index fallback for content from
|
|
1340
|
+
// r[6] to (still) r[6] — encoding lands at r[7] — but we read by name
|
|
1341
|
+
// first which is the documented preferred path on LadybugDB.
|
|
1342
|
+
const selectClause = `n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine${include_content ? ', n.content AS content, n.contentEncoding AS contentEncoding' : ''}`;
|
|
1334
1343
|
// Direct UID — zero-ambiguity path.
|
|
1335
1344
|
if (uid) {
|
|
1336
1345
|
const rows = await executeParameterized(repo.id, `MATCH (n {id: $uid}) RETURN ${selectClause} LIMIT 1`, { uid });
|
|
@@ -1344,7 +1353,11 @@ export class LocalBackend {
|
|
|
1344
1353
|
filePath: (r.filePath ?? r[3]),
|
|
1345
1354
|
startLine: (r.startLine ?? r[4]),
|
|
1346
1355
|
endLine: (r.endLine ?? r[5]),
|
|
1347
|
-
...(include_content
|
|
1356
|
+
...(include_content
|
|
1357
|
+
? {
|
|
1358
|
+
content: decodeContentField(r.content ?? r[6], r.contentEncoding ?? r[7]),
|
|
1359
|
+
}
|
|
1360
|
+
: {}),
|
|
1348
1361
|
};
|
|
1349
1362
|
// Same LadybugDB label-enrichment as the name-based path: a UID
|
|
1350
1363
|
// pointing at a Class must still surface `type: 'Class'` so impact's
|
|
@@ -1380,7 +1393,11 @@ export class LocalBackend {
|
|
|
1380
1393
|
filePath: (r.filePath ?? r[3]),
|
|
1381
1394
|
startLine: (r.startLine ?? r[4]),
|
|
1382
1395
|
endLine: (r.endLine ?? r[5]),
|
|
1383
|
-
...(include_content
|
|
1396
|
+
...(include_content
|
|
1397
|
+
? {
|
|
1398
|
+
content: decodeContentField(r.content ?? r[6], r.contentEncoding ?? r[7]),
|
|
1399
|
+
}
|
|
1400
|
+
: {}),
|
|
1384
1401
|
}));
|
|
1385
1402
|
// Enrich labels for any candidates where `labels(n)[0]` came back empty.
|
|
1386
1403
|
// LadybugDB returns an empty string for that projection on certain node
|
package/dist/server/api.js
CHANGED
|
@@ -15,6 +15,7 @@ import { createRequire } from 'node:module';
|
|
|
15
15
|
import { loadMeta, listRegisteredRepos, getStoragePath } from '../storage/repo-manager.js';
|
|
16
16
|
import { executeQuery, executePrepared, executeWithReusedStatement, streamQuery, closeLbug, withLbugDb, } from '../core/lbug/lbug-adapter.js';
|
|
17
17
|
import { isWriteQuery } from '../core/lbug/pool-adapter.js';
|
|
18
|
+
import { decodeContentField } from '../core/lbug/content-read.js';
|
|
18
19
|
import { NODE_TABLES } from '../_shared/index.js';
|
|
19
20
|
import { searchFTSFromLbug } from '../core/search/bm25-index.js';
|
|
20
21
|
import { hybridSearch } from '../core/search/hybrid-search.js';
|
|
@@ -189,7 +190,7 @@ const getNodeQuery = (table, includeContent) => {
|
|
|
189
190
|
const tableLabel = quoteNodeTable(table);
|
|
190
191
|
if (table === 'File') {
|
|
191
192
|
return includeContent
|
|
192
|
-
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content`
|
|
193
|
+
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content, n.contentEncoding AS contentEncoding`
|
|
193
194
|
: `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath`;
|
|
194
195
|
}
|
|
195
196
|
if (table === 'Folder') {
|
|
@@ -208,7 +209,7 @@ const getNodeQuery = (table, includeContent) => {
|
|
|
208
209
|
return `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.description AS description`;
|
|
209
210
|
}
|
|
210
211
|
return includeContent
|
|
211
|
-
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content`
|
|
212
|
+
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content, n.contentEncoding AS contentEncoding`
|
|
212
213
|
: `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine`;
|
|
213
214
|
};
|
|
214
215
|
const mapGraphNodeRow = (table, row, includeContent) => ({
|
|
@@ -219,7 +220,7 @@ const mapGraphNodeRow = (table, row, includeContent) => ({
|
|
|
219
220
|
filePath: row.filePath ?? row[2],
|
|
220
221
|
startLine: row.startLine,
|
|
221
222
|
endLine: row.endLine,
|
|
222
|
-
content: includeContent ? row.content : undefined,
|
|
223
|
+
content: includeContent ? decodeContentField(row.content, row.contentEncoding) : undefined,
|
|
223
224
|
responseKeys: row.responseKeys,
|
|
224
225
|
errorKeys: row.errorKeys,
|
|
225
226
|
middleware: row.middleware,
|
|
@@ -36,10 +36,49 @@
|
|
|
36
36
|
* so the registry stabilises over analyze/re-analyze cycles.
|
|
37
37
|
*/
|
|
38
38
|
export declare const canonicalizePath: (p: string) => string;
|
|
39
|
+
/**
|
|
40
|
+
* On-disk schema version for `.codragraph/lbug` and `.codragraph/meta.json`.
|
|
41
|
+
*
|
|
42
|
+
* 1 — pre-RFC-0001-Phase-2 layout. Node tables have `content STRING`
|
|
43
|
+
* but no `contentEncoding` column. Implicit/missing on existing
|
|
44
|
+
* 1.6.x and 1.7.x indexes (RepoMeta.schemaVersion was undefined).
|
|
45
|
+
* 2 — RFC 0001 Phase 2: every node table that has `content` also has
|
|
46
|
+
* a `contentEncoding STRING DEFAULT 'none'` column. Writers may
|
|
47
|
+
* opt into compression via `--compress brotli|zstd` (compression
|
|
48
|
+
* is OFF by default, so existing readers keep working). Readers
|
|
49
|
+
* decode based on the per-row encoding tag.
|
|
50
|
+
*
|
|
51
|
+
* Bumping this is the migration trigger: `runFullAnalysis` forces a
|
|
52
|
+
* full re-analyze when an existing index has a missing or older
|
|
53
|
+
* `schemaVersion` field, because adding a column to an existing
|
|
54
|
+
* LadybugDB table via ALTER is not validated end-to-end yet — fresh
|
|
55
|
+
* `CREATE NODE TABLE` is the supported path.
|
|
56
|
+
*/
|
|
57
|
+
export declare const INDEX_SCHEMA_VERSION: 2;
|
|
39
58
|
export interface RepoMeta {
|
|
40
59
|
repoPath: string;
|
|
41
60
|
lastCommit: string;
|
|
42
61
|
indexedAt: string;
|
|
62
|
+
/**
|
|
63
|
+
* On-disk schema version (see {@link INDEX_SCHEMA_VERSION}). Absent on
|
|
64
|
+
* indexes written by 1.7.x or earlier; `runFullAnalysis` treats those
|
|
65
|
+
* as needing a full re-analyze when they're loaded by a 1.8+ CLI.
|
|
66
|
+
*/
|
|
67
|
+
schemaVersion?: number;
|
|
68
|
+
/**
|
|
69
|
+
* RFC 0001 Phase 2 — the per-row content encoding chosen at the last
|
|
70
|
+
* `analyze --compress` invocation. `'none'` (or absent) means rows
|
|
71
|
+
* carry plain text; `'brotli'` / `'zstd'` means rows are compressed
|
|
72
|
+
* and consumers must decode. Persisted so query-time tooling can
|
|
73
|
+
* detect the compressed mode without sampling rows.
|
|
74
|
+
*
|
|
75
|
+
* Phase 2.5 hooks: `core/search/bm25-index.ts` reads this field at
|
|
76
|
+
* FTS-create time and drops `content` from the FTS property list
|
|
77
|
+
* when set to a non-`'none'` value (full-text search falls back to
|
|
78
|
+
* symbol-name matches). Embeddings and graph queries are unaffected
|
|
79
|
+
* — they decode at the read boundary.
|
|
80
|
+
*/
|
|
81
|
+
compress?: 'none' | 'brotli' | 'zstd';
|
|
43
82
|
/**
|
|
44
83
|
* Canonical `origin` remote URL captured at index time. Used to
|
|
45
84
|
* fingerprint the same logical repo across multiple on-disk clones
|
|
@@ -49,6 +49,25 @@ export const canonicalizePath = (p) => {
|
|
|
49
49
|
return resolved;
|
|
50
50
|
}
|
|
51
51
|
};
|
|
52
|
+
/**
|
|
53
|
+
* On-disk schema version for `.codragraph/lbug` and `.codragraph/meta.json`.
|
|
54
|
+
*
|
|
55
|
+
* 1 — pre-RFC-0001-Phase-2 layout. Node tables have `content STRING`
|
|
56
|
+
* but no `contentEncoding` column. Implicit/missing on existing
|
|
57
|
+
* 1.6.x and 1.7.x indexes (RepoMeta.schemaVersion was undefined).
|
|
58
|
+
* 2 — RFC 0001 Phase 2: every node table that has `content` also has
|
|
59
|
+
* a `contentEncoding STRING DEFAULT 'none'` column. Writers may
|
|
60
|
+
* opt into compression via `--compress brotli|zstd` (compression
|
|
61
|
+
* is OFF by default, so existing readers keep working). Readers
|
|
62
|
+
* decode based on the per-row encoding tag.
|
|
63
|
+
*
|
|
64
|
+
* Bumping this is the migration trigger: `runFullAnalysis` forces a
|
|
65
|
+
* full re-analyze when an existing index has a missing or older
|
|
66
|
+
* `schemaVersion` field, because adding a column to an existing
|
|
67
|
+
* LadybugDB table via ALTER is not validated end-to-end yet — fresh
|
|
68
|
+
* `CREATE NODE TABLE` is the supported path.
|
|
69
|
+
*/
|
|
70
|
+
export const INDEX_SCHEMA_VERSION = 2;
|
|
52
71
|
const CODRAGRAPH_DIR = '.codragraph';
|
|
53
72
|
// ─── Local Storage Helpers ─────────────────────────────────────────────
|
|
54
73
|
/**
|
|
@@ -12,8 +12,27 @@
|
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
const fs = require('fs');
|
|
15
|
+
const os = require('os');
|
|
15
16
|
const path = require('path');
|
|
16
|
-
const { spawnSync } = require('child_process');
|
|
17
|
+
const { spawnSync, spawn } = require('child_process');
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Decide whether background auto-reindex is opted in. Two equivalent signals:
|
|
21
|
+
* 1. CODRAGRAPH_AUTO_REINDEX=1 in env (good for shells, CI)
|
|
22
|
+
* 2. `{ "autoReindex": true }` in ~/.codragraph/config.json (good for GUI
|
|
23
|
+
* editor launches on Windows, where shell env doesn't propagate to
|
|
24
|
+
* hook child processes reliably)
|
|
25
|
+
*/
|
|
26
|
+
function isAutoReindexEnabled() {
|
|
27
|
+
if (process.env.CODRAGRAPH_AUTO_REINDEX === '1') return true;
|
|
28
|
+
try {
|
|
29
|
+
const configPath = path.join(os.homedir(), '.codragraph', 'config.json');
|
|
30
|
+
const config = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
|
|
31
|
+
return config && config.autoReindex === true;
|
|
32
|
+
} catch {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
17
36
|
|
|
18
37
|
/**
|
|
19
38
|
* Read JSON input from stdin synchronously.
|
|
@@ -250,10 +269,84 @@ function handlePostToolUse(input) {
|
|
|
250
269
|
if (currentHead && currentHead === lastCommit) return;
|
|
251
270
|
|
|
252
271
|
const analyzeCmd = `npx @codragraph/cli analyze${hadEmbeddings ? ' --embeddings' : ''}`;
|
|
272
|
+
|
|
273
|
+
// Opt-in background auto-reindex.
|
|
274
|
+
// Default stays as notification-only because spawning analyze while an MCP
|
|
275
|
+
// server holds LadybugDB will fail with a database-busy error — the
|
|
276
|
+
// notification path lets the agent reindex at a quiet moment instead.
|
|
277
|
+
// Power users who run MCP outside Claude Code's lifecycle can opt in via
|
|
278
|
+
// CODRAGRAPH_AUTO_REINDEX=1 or `{ "autoReindex": true }` in
|
|
279
|
+
// ~/.codragraph/config.json.
|
|
280
|
+
if (isAutoReindexEnabled()) {
|
|
281
|
+
// The "coalesce" file is a single-process gate: it exists only while a
|
|
282
|
+
// reindex is in flight. The spawned analyze removes it on exit (success or
|
|
283
|
+
// failure) via CODRAGRAPH_REINDEX_LOCK_PATH; the 10-min mtime fallback
|
|
284
|
+
// catches the rare crash that bypasses analyze's exit handler.
|
|
285
|
+
const coalescePath = path.join(gitNexusDir, '.reindex.coalesce');
|
|
286
|
+
const crashSafetyTtlMs = 10 * 60 * 1000;
|
|
287
|
+
let inFlight = false;
|
|
288
|
+
try {
|
|
289
|
+
const stat = fs.statSync(coalescePath);
|
|
290
|
+
if (Date.now() - stat.mtimeMs < crashSafetyTtlMs) inFlight = true;
|
|
291
|
+
} catch {
|
|
292
|
+
/* no coalesce file — no reindex in flight */
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (!inFlight) {
|
|
296
|
+
try {
|
|
297
|
+
fs.writeFileSync(coalescePath, String(process.pid));
|
|
298
|
+
} catch {
|
|
299
|
+
/* best-effort — gate is for coalescing, not correctness */
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const cliPath = resolveCliPath();
|
|
303
|
+
const reindexArgs = hadEmbeddings
|
|
304
|
+
? ['analyze', '--embeddings', '--no-setup']
|
|
305
|
+
: ['analyze', '--no-setup'];
|
|
306
|
+
const spawnEnv = { ...process.env, CODRAGRAPH_REINDEX_LOCK_PATH: coalescePath };
|
|
307
|
+
const spawnOpts = {
|
|
308
|
+
cwd,
|
|
309
|
+
detached: true,
|
|
310
|
+
stdio: 'ignore',
|
|
311
|
+
windowsHide: true,
|
|
312
|
+
env: spawnEnv,
|
|
313
|
+
};
|
|
314
|
+
try {
|
|
315
|
+
let child;
|
|
316
|
+
if (cliPath) {
|
|
317
|
+
child = spawn(process.execPath, [cliPath, ...reindexArgs], spawnOpts);
|
|
318
|
+
} else if (process.platform === 'win32') {
|
|
319
|
+
child = spawn('cmd', ['/c', 'npx', '-y', '@codragraph/cli', ...reindexArgs], spawnOpts);
|
|
320
|
+
} else {
|
|
321
|
+
child = spawn('npx', ['-y', '@codragraph/cli', ...reindexArgs], spawnOpts);
|
|
322
|
+
}
|
|
323
|
+
child.unref();
|
|
324
|
+
} catch {
|
|
325
|
+
/* spawn failed — fall through to notification */
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
sendHookResponse(
|
|
329
|
+
'PostToolUse',
|
|
330
|
+
`CodraGraph: auto-reindex started in background ` +
|
|
331
|
+
`(HEAD ${lastCommit ? lastCommit.slice(0, 7) : 'never'} → ${currentHead.slice(0, 7)}). ` +
|
|
332
|
+
`If an MCP server is currently holding the database, the reindex will fail silently — ` +
|
|
333
|
+
`run \`${analyzeCmd}\` manually after closing the agent session.`,
|
|
334
|
+
);
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
sendHookResponse(
|
|
339
|
+
'PostToolUse',
|
|
340
|
+
`CodraGraph: auto-reindex coalesced — another reindex is in flight (will pick up your latest commit when it finishes).`,
|
|
341
|
+
);
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
|
|
253
345
|
sendHookResponse(
|
|
254
346
|
'PostToolUse',
|
|
255
347
|
`CodraGraph index is stale (last indexed: ${lastCommit ? lastCommit.slice(0, 7) : 'never'}). ` +
|
|
256
|
-
`Run \`${analyzeCmd}\` to update the knowledge graph
|
|
348
|
+
`Run \`${analyzeCmd}\` to update the knowledge graph. ` +
|
|
349
|
+
`Set CODRAGRAPH_AUTO_REINDEX=1 (or autoReindex: true in ~/.codragraph/config.json) for background auto-reindex.`,
|
|
257
350
|
);
|
|
258
351
|
}
|
|
259
352
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@codragraph/cli",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Anit Chaudhary",
|
|
@@ -56,10 +56,10 @@
|
|
|
56
56
|
"prepack": "node scripts/build.js"
|
|
57
57
|
},
|
|
58
58
|
"dependencies": {
|
|
59
|
+
"@codragraph/graphstore": "^1.0.0",
|
|
59
60
|
"@huggingface/transformers": "^4.1.0",
|
|
60
|
-
"@ladybugdb/core": "^0.
|
|
61
|
+
"@ladybugdb/core": "^0.16.0",
|
|
61
62
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
62
|
-
"@codragraph/graphstore": "^0.1.1",
|
|
63
63
|
"@scarf/scarf": "^1.4.0",
|
|
64
64
|
"cli-progress": "^3.12.0",
|
|
65
65
|
"commander": "^14.0.3",
|
|
@@ -99,6 +99,7 @@
|
|
|
99
99
|
"tree-sitter-swift": "^0.6.0"
|
|
100
100
|
},
|
|
101
101
|
"devDependencies": {
|
|
102
|
+
"@codragraph/shared": "file:../codragraph-shared",
|
|
102
103
|
"@types/cli-progress": "^3.11.6",
|
|
103
104
|
"@types/cors": "^2.8.17",
|
|
104
105
|
"@types/express": "^4.17.21",
|
|
@@ -106,7 +107,6 @@
|
|
|
106
107
|
"@types/node": "^25.6.0",
|
|
107
108
|
"@types/uuid": "^11.0.0",
|
|
108
109
|
"@vitest/coverage-v8": "^4.0.18",
|
|
109
|
-
"@codragraph/shared": "file:../codragraph-shared",
|
|
110
110
|
"tsx": "^4.0.0",
|
|
111
111
|
"typescript": "^5.4.5",
|
|
112
112
|
"vitest": "^4.0.18"
|
|
@@ -34,14 +34,26 @@ const fs = require('fs');
|
|
|
34
34
|
const path = require('path');
|
|
35
35
|
const { execSync } = require('child_process');
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
// Resolve tree-sitter-proto from BOTH the codragraph package itself AND any
|
|
38
|
+
// monorepo root that hoisted the dep. npm workspaces hoist optional deps to
|
|
39
|
+
// the workspace root, so the package-local path doesn't exist on a workspace
|
|
40
|
+
// install. Same trap as patch-tree-sitter-swift.cjs — see that file for the
|
|
41
|
+
// full failure mode.
|
|
42
|
+
const protoCandidates = [
|
|
43
|
+
path.join(__dirname, '..', 'node_modules', 'tree-sitter-proto'),
|
|
44
|
+
path.join(__dirname, '..', '..', 'node_modules', 'tree-sitter-proto'),
|
|
45
|
+
];
|
|
46
|
+
const protoDir = protoCandidates.find((d) => fs.existsSync(path.join(d, 'binding.gyp')));
|
|
47
|
+
if (!protoDir) {
|
|
48
|
+
// tree-sitter-proto is an optionalDependency; absent when install
|
|
49
|
+
// skipped optional deps or the file: dep was not resolved.
|
|
50
|
+
process.exit(0);
|
|
51
|
+
}
|
|
38
52
|
const bindingGyp = path.join(protoDir, 'binding.gyp');
|
|
39
53
|
const bindingNode = path.join(protoDir, 'build', 'Release', 'tree_sitter_proto_binding.node');
|
|
40
54
|
|
|
41
55
|
try {
|
|
42
56
|
if (!fs.existsSync(bindingGyp)) {
|
|
43
|
-
// tree-sitter-proto is an optionalDependency; absent when install
|
|
44
|
-
// skipped optional deps or the file: dep was not resolved.
|
|
45
57
|
process.exit(0);
|
|
46
58
|
}
|
|
47
59
|
|
|
@@ -29,13 +29,26 @@ const fs = require('fs');
|
|
|
29
29
|
const path = require('path');
|
|
30
30
|
const { execSync } = require('child_process');
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
// Resolve tree-sitter-swift from BOTH the codragraph package itself AND any
|
|
33
|
+
// monorepo root that hoisted the dep. npm workspaces hoist optional deps to
|
|
34
|
+
// the workspace root, so `codragraph/node_modules/tree-sitter-swift` doesn't
|
|
35
|
+
// exist when this script runs as the codragraph postinstall — checking only
|
|
36
|
+
// that path silently no-ops, which is exactly the failure that left
|
|
37
|
+
// Windows Node 22.14 users without a Swift parser.
|
|
38
|
+
//
|
|
39
|
+
// Order matters: the package-local dir takes precedence (standalone install),
|
|
40
|
+
// then the parent monorepo root (workspace install).
|
|
41
|
+
const candidateDirs = [
|
|
42
|
+
path.join(__dirname, '..', 'node_modules', 'tree-sitter-swift'),
|
|
43
|
+
path.join(__dirname, '..', '..', 'node_modules', 'tree-sitter-swift'),
|
|
44
|
+
];
|
|
45
|
+
const swiftDir = candidateDirs.find((d) => fs.existsSync(path.join(d, 'binding.gyp')));
|
|
46
|
+
if (!swiftDir) {
|
|
47
|
+
process.exit(0);
|
|
48
|
+
}
|
|
33
49
|
const bindingPath = path.join(swiftDir, 'binding.gyp');
|
|
34
50
|
|
|
35
51
|
try {
|
|
36
|
-
if (!fs.existsSync(bindingPath)) {
|
|
37
|
-
process.exit(0);
|
|
38
|
-
}
|
|
39
52
|
|
|
40
53
|
const content = fs.readFileSync(bindingPath, 'utf8');
|
|
41
54
|
let needsRebuild = false;
|