@codragraph/cli 1.6.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +50 -16
  2. package/dist/cli/ai-context.js +2 -2
  3. package/dist/cli/analyze.d.ts +22 -0
  4. package/dist/cli/analyze.js +111 -8
  5. package/dist/cli/compress-stats.d.ts +29 -0
  6. package/dist/cli/compress-stats.js +97 -0
  7. package/dist/cli/graphstore.d.ts +6 -2
  8. package/dist/cli/graphstore.js +24 -2
  9. package/dist/cli/index.js +17 -6
  10. package/dist/cli/profile-heap.d.ts +35 -0
  11. package/dist/cli/profile-heap.js +126 -0
  12. package/dist/cli/setup.d.ts +13 -0
  13. package/dist/cli/setup.js +75 -29
  14. package/dist/cli/skill-gen.d.ts +14 -2
  15. package/dist/cli/skill-gen.js +53 -20
  16. package/dist/cli/tool.js +4 -0
  17. package/dist/config/ignore-service.js +1 -1
  18. package/dist/core/embeddings/embedding-pipeline.js +24 -7
  19. package/dist/core/group/bridge-db.js +111 -24
  20. package/dist/core/group/extractors/grpc-patterns/proto.js +1 -12
  21. package/dist/core/ingestion/call-processor.js +2 -2
  22. package/dist/core/ingestion/cobol/cobol-preprocessor.js +1 -1
  23. package/dist/core/ingestion/cobol/jcl-parser.d.ts +1 -1
  24. package/dist/core/ingestion/cobol/jcl-parser.js +1 -1
  25. package/dist/core/ingestion/cobol-processor.d.ts +1 -1
  26. package/dist/core/ingestion/cobol-processor.js +1 -1
  27. package/dist/core/ingestion/heritage-extractors/generic.js +1 -1
  28. package/dist/core/ingestion/heritage-processor.js +1 -1
  29. package/dist/core/ingestion/import-processor.js +1 -1
  30. package/dist/core/ingestion/mro-processor.js +1 -1
  31. package/dist/core/ingestion/parsing-processor.js +1 -1
  32. package/dist/core/ingestion/type-extractors/c-cpp.js +1 -1
  33. package/dist/core/ingestion/type-extractors/python.js +1 -1
  34. package/dist/core/ingestion/type-extractors/shared.js +0 -3
  35. package/dist/core/lbug/content-read.d.ts +46 -0
  36. package/dist/core/lbug/content-read.js +64 -0
  37. package/dist/core/lbug/csv-generator.d.ts +2 -6
  38. package/dist/core/lbug/csv-generator.js +45 -12
  39. package/dist/core/lbug/lbug-adapter.d.ts +4 -1
  40. package/dist/core/lbug/lbug-adapter.js +157 -25
  41. package/dist/core/lbug/pool-adapter.js +51 -44
  42. package/dist/core/lbug/schema.d.ts +7 -7
  43. package/dist/core/lbug/schema.js +18 -0
  44. package/dist/core/run-analyze.d.ts +13 -0
  45. package/dist/core/run-analyze.js +91 -4
  46. package/dist/core/search/bm25-index.js +153 -12
  47. package/dist/core/wiki/generator.js +4 -4
  48. package/dist/mcp/local/local-backend.js +22 -5
  49. package/dist/mcp/resources.js +2 -3
  50. package/dist/server/api.js +4 -3
  51. package/dist/storage/repo-manager.d.ts +39 -0
  52. package/dist/storage/repo-manager.js +19 -0
  53. package/hooks/claude/codragraph-hook.cjs +108 -5
  54. package/hooks/claude/pre-tool-use.sh +6 -1
  55. package/package.json +4 -4
  56. package/scripts/build-tree-sitter-proto.cjs +15 -3
  57. package/scripts/patch-tree-sitter-swift.cjs +17 -4
  58. package/skills/codragraph-api-surface.md +110 -0
  59. package/skills/codragraph-cli.md +5 -5
  60. package/skills/codragraph-config-audit.md +146 -0
  61. package/skills/codragraph-cross-repo-impact.md +135 -0
  62. package/skills/codragraph-data-lineage.md +137 -0
  63. package/skills/codragraph-dead-code.md +119 -0
  64. package/skills/codragraph-debugging.md +1 -1
  65. package/skills/codragraph-exploring.md +1 -1
  66. package/skills/codragraph-gh-actions-debug.md +162 -0
  67. package/skills/codragraph-gh-issue-workflow.md +178 -0
  68. package/skills/codragraph-gh-pr-workflow.md +176 -0
  69. package/skills/codragraph-gh-release-workflow.md +187 -0
  70. package/skills/codragraph-git-bisect.md +176 -0
  71. package/skills/codragraph-git-force-push.md +147 -0
  72. package/skills/codragraph-git-history-rewrite.md +174 -0
  73. package/skills/codragraph-git-rebase-vs-merge.md +138 -0
  74. package/skills/codragraph-git-recovery.md +181 -0
  75. package/skills/codragraph-git-worktree.md +145 -0
  76. package/skills/codragraph-guide.md +1 -1
  77. package/skills/codragraph-impact-analysis.md +1 -1
  78. package/skills/codragraph-migration-tracking.md +130 -0
  79. package/skills/codragraph-notebook-context.md +136 -0
  80. package/skills/codragraph-observability-coverage.md +125 -0
  81. package/skills/codragraph-onboarding.md +129 -0
  82. package/skills/codragraph-perf-hotspots.md +132 -0
  83. package/skills/codragraph-pr-review.md +1 -1
  84. package/skills/codragraph-project-switcher.md +116 -0
  85. package/skills/codragraph-refactoring.md +1 -1
  86. package/skills/codragraph-security-audit.md +144 -0
  87. package/skills/codragraph-sql-tracing.md +122 -0
  88. package/skills/codragraph-supply-chain-audit.md +153 -0
  89. package/skills/codragraph-test-coverage.md +97 -0
@@ -10,9 +10,11 @@
10
10
  */
11
11
  import path from 'path';
12
12
  import fs from 'fs/promises';
13
+ import * as fsSync from 'node:fs';
14
+ import * as v8 from 'node:v8';
13
15
  import { runPipelineFromRepo } from './ingestion/pipeline.js';
14
16
  import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, } from './lbug/lbug-adapter.js';
15
- import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
17
+ import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, INDEX_SCHEMA_VERSION, } from '../storage/repo-manager.js';
16
18
  import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
17
19
  import { recordAnalysisSnapshot } from './graphstore/index.js';
18
20
  import { generateAIContextFiles } from '../cli/ai-context.js';
@@ -51,7 +53,70 @@ export const PHASE_LABELS = {
51
53
  */
52
54
  export async function runFullAnalysis(repoPath, options, callbacks) {
53
55
  const log = (msg) => callbacks.onLog?.(msg);
54
- const progress = (phase, percent, message) => callbacks.onProgress(phase, percent, message);
56
+ // RFC 0002 Phase 1 optional heap-profile instrumentation. Set
57
+ // CODRAGRAPH_HEAP_PROFILE=1 (or run `codragraph profile-heap`) to write a
58
+ // v8 heap snapshot at every phase boundary, plus a `profile-summary.jsonl`
59
+ // log of `process.memoryUsage()` at the same boundaries. Snapshots land in
60
+ // `<repo>/.codragraph/heap-profiles/`. Open snapshots in Chrome DevTools
61
+ // (Memory → Load) to find which constructors dominate retained set; the
62
+ // JSONL is the cheap RSS / heapUsed timeline. Off by default — snapshot
63
+ // writes pause the event loop ~2-5s and consume ~100-500MB of disk each.
64
+ const heapProfileEnabled = process.env.CODRAGRAPH_HEAP_PROFILE === '1';
65
+ let heapProfileDir = '';
66
+ let heapProfileSummaryPath = '';
67
+ let lastProfilePhase = '';
68
+ if (heapProfileEnabled) {
69
+ heapProfileDir = path.join(repoPath, '.codragraph', 'heap-profiles');
70
+ heapProfileSummaryPath = path.join(heapProfileDir, 'profile-summary.jsonl');
71
+ try {
72
+ fsSync.mkdirSync(heapProfileDir, { recursive: true });
73
+ // Truncate any prior summary so a single run produces a clean log.
74
+ // We append crash-safely on each phase boundary below.
75
+ fsSync.writeFileSync(heapProfileSummaryPath, '');
76
+ }
77
+ catch {
78
+ /* permission issue — best-effort */
79
+ }
80
+ }
81
+ const progress = (phase, percent, message) => {
82
+ callbacks.onProgress(phase, percent, message);
83
+ // Only snapshot on phase transitions, not every tick. Phase strings come
84
+ // from runPipelineFromRepo / loadGraphToLbug and are stable.
85
+ if (heapProfileEnabled && phase && phase !== lastProfilePhase) {
86
+ lastProfilePhase = phase;
87
+ const ts = Date.now();
88
+ const safe = phase.replace(/[^a-zA-Z0-9]+/g, '_').slice(0, 60);
89
+ const file = path.join(heapProfileDir, `${ts}-${safe}.heapsnapshot`);
90
+ // Capture the cheap memoryUsage timeline FIRST — even if writeHeapSnapshot
91
+ // crashes (out of disk, permissions), we still have the RSS curve which
92
+ // is the more useful artifact for the heap-pressure RFC.
93
+ try {
94
+ const mu = process.memoryUsage();
95
+ const entry = JSON.stringify({
96
+ ts,
97
+ phase,
98
+ percent,
99
+ rss: mu.rss,
100
+ heapUsed: mu.heapUsed,
101
+ heapTotal: mu.heapTotal,
102
+ external: mu.external,
103
+ arrayBuffers: mu.arrayBuffers,
104
+ snapshotFile: path.basename(file),
105
+ });
106
+ fsSync.appendFileSync(heapProfileSummaryPath, entry + '\n');
107
+ }
108
+ catch (err) {
109
+ log(`heap-profile: summary append failed (${err.message})`);
110
+ }
111
+ try {
112
+ v8.writeHeapSnapshot(file);
113
+ log(`heap-profile: wrote ${file}`);
114
+ }
115
+ catch (err) {
116
+ log(`heap-profile: write failed (${err.message})`);
117
+ }
118
+ }
119
+ };
55
120
  const { storagePath, lbugPath } = getStoragePaths(repoPath);
56
121
  // Clean up stale KuzuDB files from before the LadybugDB migration.
57
122
  const kuzuResult = await cleanupOldKuzuFiles(storagePath);
@@ -62,7 +127,17 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
62
127
  const currentCommit = repoHasGit ? getCurrentCommit(repoPath) : '';
63
128
  const existingMeta = await loadMeta(storagePath);
64
129
  // ── Early-return: already up to date ──────────────────────────────
65
- if (existingMeta && !options.force && existingMeta.lastCommit === currentCommit) {
130
+ // Schema-version mismatch forces a full re-analyze regardless of commit
131
+ // equality: existing 1.7.x indexes have no `schemaVersion` field at all,
132
+ // and 1.8+ readers expect every node table to carry a `contentEncoding`
133
+ // column (RFC 0001 Phase 2). LadybugDB ALTER on existing tables is not
134
+ // validated end-to-end yet, so the supported migration path is
135
+ // re-analyze → fresh CREATE NODE TABLE.
136
+ const schemaUpToDate = !!existingMeta && (existingMeta.schemaVersion ?? 0) >= INDEX_SCHEMA_VERSION;
137
+ if (existingMeta &&
138
+ schemaUpToDate &&
139
+ !options.force &&
140
+ existingMeta.lastCommit === currentCommit) {
66
141
  // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes
67
142
  if (currentCommit !== '') {
68
143
  return {
@@ -73,6 +148,11 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
73
148
  };
74
149
  }
75
150
  }
151
+ if (existingMeta && !schemaUpToDate) {
152
+ log(`Index schema version ${existingMeta.schemaVersion ?? '<missing>'} is older than ` +
153
+ `${INDEX_SCHEMA_VERSION} (RFC 0001 Phase 2 — adds contentEncoding column). ` +
154
+ `Re-analyzing.`);
155
+ }
76
156
  // ── Cache embeddings from existing index before rebuild ────────────
77
157
  let cachedEmbeddingNodeIds = new Set();
78
158
  let cachedEmbeddings = [];
@@ -122,7 +202,12 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
122
202
  lbugMsgCount++;
123
203
  const pct = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24));
124
204
  progress('lbug', pct, msg);
125
- });
205
+ },
206
+ // RFC 0001 Phase 2: when --compress is set, every content row goes
207
+ // through encodeContent before hitting the CSV. Default 'none' is
208
+ // a true passthrough, so the on-disk layout is byte-identical to
209
+ // pre-Phase-2 indexes when no compression flag is passed.
210
+ { compress: options.compress });
126
211
  // ── Phase 2.5: Versioned-graph snapshot (best-effort) ────────────
127
212
  // Phase 4 hook: snapshot the freshly-loaded graph into the
128
213
  // content-addressed `.codragraph/graphstore/`. Failures here do NOT
@@ -230,6 +315,8 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
230
315
  repoPath,
231
316
  lastCommit: currentCommit,
232
317
  indexedAt: new Date().toISOString(),
318
+ schemaVersion: INDEX_SCHEMA_VERSION,
319
+ compress: options.compress ?? 'none',
233
320
  // Captured here (not at registration) so it travels with the
234
321
  // on-disk meta.json — sibling-clone fingerprinting works for
235
322
  // out-of-tree consumers (group-status, future tooling) without
@@ -10,19 +10,65 @@
10
10
  * small repos / CI runners) at the cost of paying that overhead on the
11
11
  * first `query`/`context` call in a session.
12
12
  */
13
- import { queryFTS, ensureFTSIndex } from '../lbug/lbug-adapter.js';
13
+ import { queryFTS, ensureFTSIndex, executeQuery as executeCoreQuery, } from '../lbug/lbug-adapter.js';
14
14
  /**
15
- * FTS schema served by `searchFTSFromLbug`. Centralised so that both the
16
- * CLI/pipeline path and the MCP pool path use identical (table, index,
17
- * properties) tuples and the lazy-create logic stays in one place.
15
+ * FTS table set served by `searchFTSFromLbug`. Centralised so that both
16
+ * the CLI/pipeline path and the MCP pool path stay in lockstep.
17
+ *
18
+ * The properties list is computed at FTS-create time via `ftsPropertiesFor`
19
+ * — for repos that were analysed with `--compress brotli|zstd`, the
20
+ * `content` column holds base64-of-encoded-bytes and would tokenise to
21
+ * useless tokens. Those repos get name-only FTS so search at least
22
+ * matches function/class names instead of returning random hits on
23
+ * base64 alphabet. Plain (compress='none' / unset) repos get the full
24
+ * `name + content` index for body-text matches. RFC 0001 Phase 2.5.
18
25
  */
19
- const FTS_INDEXES = [
20
- { table: 'File', indexName: 'file_fts', properties: ['name', 'content'] },
21
- { table: 'Function', indexName: 'function_fts', properties: ['name', 'content'] },
22
- { table: 'Class', indexName: 'class_fts', properties: ['name', 'content'] },
23
- { table: 'Method', indexName: 'method_fts', properties: ['name', 'content'] },
24
- { table: 'Interface', indexName: 'interface_fts', properties: ['name', 'content'] },
26
+ const FTS_TABLES = [
27
+ { table: 'File', indexName: 'file_fts' },
28
+ { table: 'Function', indexName: 'function_fts' },
29
+ { table: 'Class', indexName: 'class_fts' },
30
+ { table: 'Method', indexName: 'method_fts' },
31
+ { table: 'Interface', indexName: 'interface_fts' },
25
32
  ];
33
+ const ftsPropertiesFor = (compress) => !compress || compress === 'none' ? ['name', 'content'] : ['name'];
34
+ /**
35
+ * Look up `meta.compress` for a repo. The MCP path passes `repoId`
36
+ * (registry-derived); the CLI path passes nothing and we walk up from
37
+ * cwd. Returns `'none'` whenever the lookup fails so the safe default
38
+ * (full FTS index) is used — the failure mode is reduced search
39
+ * quality, never wrong results.
40
+ */
41
+ async function getCompressMode(repoId) {
42
+ try {
43
+ const repoMod = await import('../../storage/repo-manager.js');
44
+ if (repoId) {
45
+ // MCP path: registry name is the source of truth. The MCP
46
+ // backend's `repoId` is `entry.name.toLowerCase()` (or `${name}-${hash}`
47
+ // on collision); match conservatively against both forms.
48
+ const entries = await repoMod.listRegisteredRepos();
49
+ for (const entry of entries) {
50
+ const base = entry.name.toLowerCase();
51
+ if (base === repoId || repoId.startsWith(`${base}-`)) {
52
+ const meta = await repoMod.loadMeta(entry.storagePath);
53
+ return meta?.compress ?? 'none';
54
+ }
55
+ }
56
+ return 'none';
57
+ }
58
+ const repo = await repoMod.findRepo(process.cwd());
59
+ return repo?.meta?.compress ?? 'none';
60
+ }
61
+ catch {
62
+ return 'none';
63
+ }
64
+ }
65
+ const FALLBACK_SCAN_LIMIT = 50_000;
66
+ const BOOLEAN_QUERY_TOKENS = new Set(['and', 'or', 'not']);
67
+ const FALLBACK_FIELD_WEIGHTS = {
68
+ name: 4,
69
+ content: 2,
70
+ description: 1,
71
+ };
26
72
  /**
27
73
  * Per-process cache for the MCP pool path: tracks which `(repoId, table)`
28
74
  * pairs have been ensured. The CLI/pipeline path gets its own cache inside
@@ -122,6 +168,72 @@ async function queryFTSViaExecutor(executor, tableName, indexName, query, limit)
122
168
  return [];
123
169
  }
124
170
  }
171
+ function searchTerms(query) {
172
+ const terms = query
173
+ .toLowerCase()
174
+ .match(/[\p{L}\p{N}_]+/gu)
175
+ ?.filter((term) => term.length > 1 && !BOOLEAN_QUERY_TOKENS.has(term));
176
+ return [...new Set(terms ?? [])];
177
+ }
178
+ function scoreFallbackNode(node, query, properties) {
179
+ const terms = searchTerms(query);
180
+ if (terms.length === 0)
181
+ return 0;
182
+ const phrase = query.trim().toLowerCase();
183
+ let score = 0;
184
+ for (const property of properties) {
185
+ const raw = node[property];
186
+ if (raw === null || raw === undefined)
187
+ continue;
188
+ const value = String(raw).toLowerCase();
189
+ if (!value)
190
+ continue;
191
+ const weight = FALLBACK_FIELD_WEIGHTS[property] ?? 1;
192
+ if (phrase.length > 1 && value.includes(phrase)) {
193
+ score += weight * (terms.length + 1);
194
+ }
195
+ for (const term of terms) {
196
+ if (value.includes(term))
197
+ score += weight;
198
+ }
199
+ }
200
+ return score;
201
+ }
202
+ async function queryFallbackViaExecutor(executor, tableName, properties, query, limit) {
203
+ try {
204
+ const rows = await executor(`
205
+ MATCH (node:${tableName})
206
+ RETURN node
207
+ LIMIT ${FALLBACK_SCAN_LIMIT}
208
+ `);
209
+ return rows
210
+ .map((row) => {
211
+ const node = row.node || row[0] || {};
212
+ return {
213
+ filePath: node.filePath || '',
214
+ score: scoreFallbackNode(node, query, properties),
215
+ nodeId: node.nodeId || node.id || '',
216
+ };
217
+ })
218
+ .filter((result) => result.filePath && result.score > 0)
219
+ .sort((a, b) => b.score - a.score)
220
+ .slice(0, limit);
221
+ }
222
+ catch {
223
+ return [];
224
+ }
225
+ }
226
+ async function fallbackSearchAllTables(executor, query, limit,
227
+ // Same compress-aware property selection as the FTS path. Default keeps
228
+ // pre-Phase-2 behaviour (`['name', 'content']`) for callers that don't
229
+ // pass a value.
230
+ properties = ['name', 'content']) {
231
+ const results = [];
232
+ for (const { table } of FTS_TABLES) {
233
+ results.push(await queryFallbackViaExecutor(executor, table, properties, query, limit));
234
+ }
235
+ return results;
236
+ }
125
237
  /**
126
238
  * Search using LadybugDB's built-in FTS (always fresh, reads from disk)
127
239
  *
@@ -134,6 +246,8 @@ async function queryFTSViaExecutor(executor, tableName, indexName, query, limit)
134
246
  * @returns Ranked search results from FTS indexes
135
247
  */
136
248
  export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
249
+ if (!query.trim() || limit <= 0)
250
+ return [];
137
251
  let fileResults, functionResults, classResults, methodResults, interfaceResults;
138
252
  if (repoId) {
139
253
  // Use MCP connection pool via dynamic import
@@ -149,7 +263,12 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
149
263
  const executor = (cypher) => executeQuery(repoId, cypher);
150
264
  // Lazy-create FTS indexes on first query for this repo (analyze no longer
151
265
  // creates them up-front, so we ensure them here). Cached per-process.
152
- for (const { table, indexName, properties } of FTS_INDEXES) {
266
+ // RFC 0001 Phase 2.5: drop `content` from FTS properties for repos
267
+ // analysed with --compress brotli|zstd — the column holds encoded
268
+ // bytes and would tokenise to garbage.
269
+ const compress = await getCompressMode(repoId);
270
+ const properties = ftsPropertiesFor(compress);
271
+ for (const { table, indexName } of FTS_TABLES) {
153
272
  await ensureFTSIndexViaExecutor(executor, repoId, table, indexName, properties);
154
273
  }
155
274
  fileResults = await queryFTSViaExecutor(executor, 'File', 'file_fts', query, limit);
@@ -157,11 +276,24 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
157
276
  classResults = await queryFTSViaExecutor(executor, 'Class', 'class_fts', query, limit);
158
277
  methodResults = await queryFTSViaExecutor(executor, 'Method', 'method_fts', query, limit);
159
278
  interfaceResults = await queryFTSViaExecutor(executor, 'Interface', 'interface_fts', query, limit);
279
+ if (fileResults.length +
280
+ functionResults.length +
281
+ classResults.length +
282
+ methodResults.length +
283
+ interfaceResults.length ===
284
+ 0) {
285
+ [fileResults, functionResults, classResults, methodResults, interfaceResults] =
286
+ await fallbackSearchAllTables(executor, query, limit, properties);
287
+ }
160
288
  }
161
289
  else {
162
290
  // Use core lbug adapter (CLI / pipeline context) — also sequential for safety.
163
291
  // Lazy-create FTS indexes on first query (analyze no longer does it).
164
- for (const { table, indexName, properties } of FTS_INDEXES) {
292
+ // RFC 0001 Phase 2.5 same `compress`-aware property selection as the MCP
293
+ // path; the CLI walks up from cwd to find the repo's meta.json.
294
+ const compress = await getCompressMode();
295
+ const properties = ftsPropertiesFor(compress);
296
+ for (const { table, indexName } of FTS_TABLES) {
165
297
  await ensureFTSIndex(table, indexName, [...properties]).catch(() => { });
166
298
  }
167
299
  fileResults = await queryFTS('File', 'file_fts', query, limit, false).catch(() => []);
@@ -169,6 +301,15 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
169
301
  classResults = await queryFTS('Class', 'class_fts', query, limit, false).catch(() => []);
170
302
  methodResults = await queryFTS('Method', 'method_fts', query, limit, false).catch(() => []);
171
303
  interfaceResults = await queryFTS('Interface', 'interface_fts', query, limit, false).catch(() => []);
304
+ if (fileResults.length +
305
+ functionResults.length +
306
+ classResults.length +
307
+ methodResults.length +
308
+ interfaceResults.length ===
309
+ 0) {
310
+ [fileResults, functionResults, classResults, methodResults, interfaceResults] =
311
+ await fallbackSearchAllTables(executeCoreQuery, query, limit, properties);
312
+ }
172
313
  }
173
314
  // Collect all node scores per filePath to track which nodes actually matched
174
315
  const fileNodeScores = new Map();
@@ -221,7 +221,7 @@ export class WikiGenerator {
221
221
  reportProgress(node.name);
222
222
  return 1;
223
223
  }
224
- catch (err) {
224
+ catch (_err) {
225
225
  this.failedModules.push(node.name);
226
226
  reportProgress(`Failed: ${node.name}`);
227
227
  return 0;
@@ -239,7 +239,7 @@ export class WikiGenerator {
239
239
  pagesGenerated++;
240
240
  reportProgress(node.name);
241
241
  }
242
- catch (err) {
242
+ catch (_err) {
243
243
  this.failedModules.push(node.name);
244
244
  reportProgress(`Failed: ${node.name}`);
245
245
  }
@@ -607,7 +607,7 @@ export class WikiGenerator {
607
607
  this.onProgress('incremental', percent, `${incProcessed}/${affectedNodes.length} — ${node.name}`);
608
608
  return 1;
609
609
  }
610
- catch (err) {
610
+ catch (_err) {
611
611
  this.failedModules.push(node.name);
612
612
  incProcessed++;
613
613
  return 0;
@@ -807,7 +807,7 @@ export class WikiGenerator {
807
807
  let activeConcurrency = this.concurrency;
808
808
  let running = 0;
809
809
  let idx = 0;
810
- return new Promise((resolve, reject) => {
810
+ return new Promise((resolve, _reject) => {
811
811
  const next = () => {
812
812
  while (running < activeConcurrency && idx < items.length) {
813
813
  const item = items[idx++];
@@ -19,6 +19,7 @@ import { GroupService } from '../../core/group/service.js';
19
19
  import { resolveAtGroupMemberRepoPath } from '../../core/group/resolve-at-member.js';
20
20
  import { collectBestChunks } from '../../core/embeddings/types.js';
21
21
  import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME } from '../../core/lbug/schema.js';
22
+ import { decodeContentField } from '../../core/lbug/content-read.js';
22
23
  import { PhaseTimer } from '../../core/search/phase-timer.js';
23
24
  import { checkStaleness, checkCwdMatch } from '../../core/git-staleness.js';
24
25
  // AI context generation is CLI-only (codragraph analyze)
@@ -835,10 +836,12 @@ export class LocalBackend {
835
836
  try {
836
837
  const contentRows = await executeParameterized(repo.id, `
837
838
  MATCH (n {id: $nodeId})
838
- RETURN n.content AS content
839
+ RETURN n.content AS content, n.contentEncoding AS contentEncoding
839
840
  `, { nodeId: sym.nodeId });
840
841
  if (contentRows.length > 0) {
841
- content = contentRows[0].content ?? contentRows[0][0];
842
+ const raw = contentRows[0].content ?? contentRows[0][0];
843
+ const enc = contentRows[0].contentEncoding ?? contentRows[0][1];
844
+ content = decodeContentField(raw, enc);
842
845
  }
843
846
  }
844
847
  catch (e) {
@@ -1330,7 +1333,13 @@ export class LocalBackend {
1330
1333
  */
1331
1334
  async resolveSymbolCandidates(repo, query, hints) {
1332
1335
  const { uid, name, include_content } = query;
1333
- const selectClause = `n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine${include_content ? ', n.content AS content' : ''}`;
1336
+ // RFC 0001 Phase 2: when fetching content, also fetch the per-row
1337
+ // encoding tag so `decodeContentField` can pass it through unchanged
1338
+ // (default 'none') or run brotli/zstd decode. Adding contentEncoding
1339
+ // to the SELECT shifts the numeric-index fallback for content from
1340
+ // r[6] to (still) r[6] — encoding lands at r[7] — but we read by name
1341
+ // first which is the documented preferred path on LadybugDB.
1342
+ const selectClause = `n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine${include_content ? ', n.content AS content, n.contentEncoding AS contentEncoding' : ''}`;
1334
1343
  // Direct UID — zero-ambiguity path.
1335
1344
  if (uid) {
1336
1345
  const rows = await executeParameterized(repo.id, `MATCH (n {id: $uid}) RETURN ${selectClause} LIMIT 1`, { uid });
@@ -1344,7 +1353,11 @@ export class LocalBackend {
1344
1353
  filePath: (r.filePath ?? r[3]),
1345
1354
  startLine: (r.startLine ?? r[4]),
1346
1355
  endLine: (r.endLine ?? r[5]),
1347
- ...(include_content ? { content: (r.content ?? r[6]) } : {}),
1356
+ ...(include_content
1357
+ ? {
1358
+ content: decodeContentField(r.content ?? r[6], r.contentEncoding ?? r[7]),
1359
+ }
1360
+ : {}),
1348
1361
  };
1349
1362
  // Same LadybugDB label-enrichment as the name-based path: a UID
1350
1363
  // pointing at a Class must still surface `type: 'Class'` so impact's
@@ -1380,7 +1393,11 @@ export class LocalBackend {
1380
1393
  filePath: (r.filePath ?? r[3]),
1381
1394
  startLine: (r.startLine ?? r[4]),
1382
1395
  endLine: (r.endLine ?? r[5]),
1383
- ...(include_content ? { content: (r.content ?? r[6]) } : {}),
1396
+ ...(include_content
1397
+ ? {
1398
+ content: decodeContentField(r.content ?? r[6], r.contentEncoding ?? r[7]),
1399
+ }
1400
+ : {}),
1384
1401
  }));
1385
1402
  // Enrich labels for any candidates where `labels(n)[0]` came back empty.
1386
1403
  // LadybugDB returns an empty string for that projection on certain node
@@ -318,7 +318,7 @@ async function getContextResource(backend, repoName) {
318
318
  lines.push(' - cypher: Raw graph queries');
319
319
  lines.push(' - list_repos: Discover all indexed repositories');
320
320
  lines.push('');
321
- lines.push('re_index: Run `npx codragraph analyze` in terminal if data is stale');
321
+ lines.push('re_index: Run `npx @codragraph/cli analyze` in terminal if data is stale');
322
322
  lines.push('');
323
323
  lines.push('resources_available:');
324
324
  lines.push(' - codragraph://repos: All indexed repositories');
@@ -520,7 +520,7 @@ async function getProcessDetailResource(name, backend, repoName) {
520
520
  async function getSetupResource(backend) {
521
521
  const repos = await backend.listRepos();
522
522
  if (repos.length === 0) {
523
- return '# CodraGraph\n\nNo repositories indexed. Run: `npx codragraph analyze` in a repository.';
523
+ return '# CodraGraph\n\nNo repositories indexed. Run: `npx @codragraph/cli analyze` in a repository.';
524
524
  }
525
525
  const sections = [];
526
526
  for (const repo of repos) {
@@ -625,7 +625,6 @@ async function getRecipesResource(backend, repoName, taskFamily) {
625
625
  let result;
626
626
  try {
627
627
  const harnessModuleId = '@codragraph/harness/mcp/handler';
628
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
629
628
  const mod = (await import(/* @vite-ignore */ harnessModuleId));
630
629
  result = await mod.handleHarnessRecipesList({
631
630
  task_family: taskFamily,
@@ -15,6 +15,7 @@ import { createRequire } from 'node:module';
15
15
  import { loadMeta, listRegisteredRepos, getStoragePath } from '../storage/repo-manager.js';
16
16
  import { executeQuery, executePrepared, executeWithReusedStatement, streamQuery, closeLbug, withLbugDb, } from '../core/lbug/lbug-adapter.js';
17
17
  import { isWriteQuery } from '../core/lbug/pool-adapter.js';
18
+ import { decodeContentField } from '../core/lbug/content-read.js';
18
19
  import { NODE_TABLES } from '../_shared/index.js';
19
20
  import { searchFTSFromLbug } from '../core/search/bm25-index.js';
20
21
  import { hybridSearch } from '../core/search/hybrid-search.js';
@@ -189,7 +190,7 @@ const getNodeQuery = (table, includeContent) => {
189
190
  const tableLabel = quoteNodeTable(table);
190
191
  if (table === 'File') {
191
192
  return includeContent
192
- ? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content`
193
+ ? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content, n.contentEncoding AS contentEncoding`
193
194
  : `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath`;
194
195
  }
195
196
  if (table === 'Folder') {
@@ -208,7 +209,7 @@ const getNodeQuery = (table, includeContent) => {
208
209
  return `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.description AS description`;
209
210
  }
210
211
  return includeContent
211
- ? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content`
212
+ ? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content, n.contentEncoding AS contentEncoding`
212
213
  : `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine`;
213
214
  };
214
215
  const mapGraphNodeRow = (table, row, includeContent) => ({
@@ -219,7 +220,7 @@ const mapGraphNodeRow = (table, row, includeContent) => ({
219
220
  filePath: row.filePath ?? row[2],
220
221
  startLine: row.startLine,
221
222
  endLine: row.endLine,
222
- content: includeContent ? row.content : undefined,
223
+ content: includeContent ? decodeContentField(row.content, row.contentEncoding) : undefined,
223
224
  responseKeys: row.responseKeys,
224
225
  errorKeys: row.errorKeys,
225
226
  middleware: row.middleware,
@@ -36,10 +36,49 @@
36
36
  * so the registry stabilises over analyze/re-analyze cycles.
37
37
  */
38
38
  export declare const canonicalizePath: (p: string) => string;
39
+ /**
40
+ * On-disk schema version for `.codragraph/lbug` and `.codragraph/meta.json`.
41
+ *
42
+ * 1 — pre-RFC-0001-Phase-2 layout. Node tables have `content STRING`
43
+ * but no `contentEncoding` column. Implicit/missing on existing
44
+ * 1.6.x and 1.7.x indexes (RepoMeta.schemaVersion was undefined).
45
+ * 2 — RFC 0001 Phase 2: every node table that has `content` also has
46
+ * a `contentEncoding STRING DEFAULT 'none'` column. Writers may
47
+ * opt into compression via `--compress brotli|zstd` (compression
48
+ * is OFF by default, so existing readers keep working). Readers
49
+ * decode based on the per-row encoding tag.
50
+ *
51
+ * Bumping this is the migration trigger: `runFullAnalysis` forces a
52
+ * full re-analyze when an existing index has a missing or older
53
+ * `schemaVersion` field, because adding a column to an existing
54
+ * LadybugDB table via ALTER is not validated end-to-end yet — fresh
55
+ * `CREATE NODE TABLE` is the supported path.
56
+ */
57
+ export declare const INDEX_SCHEMA_VERSION: 2;
39
58
  export interface RepoMeta {
40
59
  repoPath: string;
41
60
  lastCommit: string;
42
61
  indexedAt: string;
62
+ /**
63
+ * On-disk schema version (see {@link INDEX_SCHEMA_VERSION}). Absent on
64
+ * indexes written by 1.7.x or earlier; `runFullAnalysis` treats those
65
+ * as needing a full re-analyze when they're loaded by a 1.8+ CLI.
66
+ */
67
+ schemaVersion?: number;
68
+ /**
69
+ * RFC 0001 Phase 2 — the per-row content encoding chosen at the last
70
+ * `analyze --compress` invocation. `'none'` (or absent) means rows
71
+ * carry plain text; `'brotli'` / `'zstd'` means rows are compressed
72
+ * and consumers must decode. Persisted so query-time tooling can
73
+ * detect the compressed mode without sampling rows.
74
+ *
75
+ * Phase 2.5 hooks: `core/search/bm25-index.ts` reads this field at
76
+ * FTS-create time and drops `content` from the FTS property list
77
+ * when set to a non-`'none'` value (full-text search falls back to
78
+ * symbol-name matches). Embeddings and graph queries are unaffected
79
+ * — they decode at the read boundary.
80
+ */
81
+ compress?: 'none' | 'brotli' | 'zstd';
43
82
  /**
44
83
  * Canonical `origin` remote URL captured at index time. Used to
45
84
  * fingerprint the same logical repo across multiple on-disk clones
@@ -49,6 +49,25 @@ export const canonicalizePath = (p) => {
49
49
  return resolved;
50
50
  }
51
51
  };
52
+ /**
53
+ * On-disk schema version for `.codragraph/lbug` and `.codragraph/meta.json`.
54
+ *
55
+ * 1 — pre-RFC-0001-Phase-2 layout. Node tables have `content STRING`
56
+ * but no `contentEncoding` column. Implicit/missing on existing
57
+ * 1.6.x and 1.7.x indexes (RepoMeta.schemaVersion was undefined).
58
+ * 2 — RFC 0001 Phase 2: every node table that has `content` also has
59
+ * a `contentEncoding STRING DEFAULT 'none'` column. Writers may
60
+ * opt into compression via `--compress brotli|zstd` (compression
61
+ * is OFF by default, so existing readers keep working). Readers
62
+ * decode based on the per-row encoding tag.
63
+ *
64
+ * Bumping this is the migration trigger: `runFullAnalysis` forces a
65
+ * full re-analyze when an existing index has a missing or older
66
+ * `schemaVersion` field, because adding a column to an existing
67
+ * LadybugDB table via ALTER is not validated end-to-end yet — fresh
68
+ * `CREATE NODE TABLE` is the supported path.
69
+ */
70
+ export const INDEX_SCHEMA_VERSION = 2;
52
71
  const CODRAGRAPH_DIR = '.codragraph';
53
72
  // ─── Local Storage Helpers ─────────────────────────────────────────────
54
73
  /**