@codragraph/cli 1.6.4 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +34 -0
  2. package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
  3. package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
  4. package/dist/_shared/cgdb/schema-constants.js +67 -0
  5. package/dist/_shared/cgdb/schema-constants.js.map +1 -0
  6. package/dist/_shared/index.d.ts +2 -2
  7. package/dist/_shared/index.js +1 -1
  8. package/dist/cli/analyze.d.ts +22 -0
  9. package/dist/cli/analyze.js +109 -6
  10. package/dist/cli/compress-stats.d.ts +29 -0
  11. package/dist/cli/compress-stats.js +97 -0
  12. package/dist/cli/graphstore.d.ts +6 -2
  13. package/dist/cli/graphstore.js +45 -23
  14. package/dist/cli/index-repo.js +3 -3
  15. package/dist/cli/index.js +16 -2
  16. package/dist/cli/profile-heap.d.ts +35 -0
  17. package/dist/cli/profile-heap.js +126 -0
  18. package/dist/cli/setup.d.ts +13 -0
  19. package/dist/cli/setup.js +22 -11
  20. package/dist/cli/skill-gen.d.ts +14 -2
  21. package/dist/cli/skill-gen.js +52 -19
  22. package/dist/cli/tool.js +4 -0
  23. package/dist/cli/wiki.js +3 -3
  24. package/dist/core/augmentation/engine.js +7 -7
  25. package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
  26. package/dist/core/cgdb/cgdb-adapter.js +1320 -0
  27. package/dist/core/cgdb/content-read.d.ts +46 -0
  28. package/dist/core/cgdb/content-read.js +64 -0
  29. package/dist/core/cgdb/csv-generator.d.ts +29 -0
  30. package/dist/core/cgdb/csv-generator.js +492 -0
  31. package/dist/core/cgdb/pool-adapter.d.ts +93 -0
  32. package/dist/core/cgdb/pool-adapter.js +550 -0
  33. package/dist/core/cgdb/schema.d.ts +62 -0
  34. package/dist/core/cgdb/schema.js +502 -0
  35. package/dist/core/embeddings/embedding-pipeline.js +27 -10
  36. package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
  37. package/dist/core/graphstore/cgdb-row-source.js +141 -0
  38. package/dist/core/graphstore/index.d.ts +1 -1
  39. package/dist/core/graphstore/index.js +3 -3
  40. package/dist/core/group/bridge-db.d.ts +2 -2
  41. package/dist/core/group/bridge-db.js +123 -36
  42. package/dist/core/group/bridge-schema.d.ts +4 -4
  43. package/dist/core/group/bridge-schema.js +4 -4
  44. package/dist/core/group/cross-impact.js +3 -3
  45. package/dist/core/group/sync.js +4 -4
  46. package/dist/core/lbug/content-read.d.ts +46 -0
  47. package/dist/core/lbug/content-read.js +64 -0
  48. package/dist/core/lbug/csv-generator.d.ts +2 -6
  49. package/dist/core/lbug/csv-generator.js +45 -12
  50. package/dist/core/lbug/lbug-adapter.d.ts +4 -1
  51. package/dist/core/lbug/lbug-adapter.js +153 -21
  52. package/dist/core/lbug/schema.d.ts +7 -7
  53. package/dist/core/lbug/schema.js +18 -0
  54. package/dist/core/run-analyze.d.ts +13 -0
  55. package/dist/core/run-analyze.js +114 -27
  56. package/dist/core/search/bm25-index.d.ts +3 -3
  57. package/dist/core/search/bm25-index.js +75 -23
  58. package/dist/core/search/hybrid-search.js +2 -2
  59. package/dist/core/wiki/generator.d.ts +2 -2
  60. package/dist/core/wiki/generator.js +4 -4
  61. package/dist/core/wiki/graph-queries.d.ts +2 -2
  62. package/dist/core/wiki/graph-queries.js +5 -5
  63. package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
  64. package/dist/mcp/core/cgdb-adapter.js +5 -0
  65. package/dist/mcp/core/embedder.js +1 -1
  66. package/dist/mcp/local/local-backend.d.ts +2 -2
  67. package/dist/mcp/local/local-backend.js +36 -19
  68. package/dist/mcp/server.js +3 -3
  69. package/dist/mcp/tools.js +1 -1
  70. package/dist/server/analyze-worker.js +2 -2
  71. package/dist/server/api.js +34 -33
  72. package/dist/storage/repo-manager.d.ts +42 -3
  73. package/dist/storage/repo-manager.js +23 -4
  74. package/hooks/claude/codragraph-hook.cjs +98 -5
  75. package/package.json +4 -4
  76. package/scripts/build-tree-sitter-proto.cjs +15 -3
  77. package/scripts/build.js +8 -9
  78. package/scripts/patch-tree-sitter-swift.cjs +17 -4
  79. package/skills/codragraph-api-surface.md +110 -0
  80. package/skills/codragraph-config-audit.md +146 -0
  81. package/skills/codragraph-cross-repo-impact.md +135 -0
  82. package/skills/codragraph-data-lineage.md +137 -0
  83. package/skills/codragraph-dead-code.md +119 -0
  84. package/skills/codragraph-gh-actions-debug.md +162 -0
  85. package/skills/codragraph-gh-issue-workflow.md +178 -0
  86. package/skills/codragraph-gh-pr-workflow.md +176 -0
  87. package/skills/codragraph-gh-release-workflow.md +187 -0
  88. package/skills/codragraph-git-bisect.md +176 -0
  89. package/skills/codragraph-git-force-push.md +147 -0
  90. package/skills/codragraph-git-history-rewrite.md +174 -0
  91. package/skills/codragraph-git-rebase-vs-merge.md +138 -0
  92. package/skills/codragraph-git-recovery.md +181 -0
  93. package/skills/codragraph-git-worktree.md +145 -0
  94. package/skills/codragraph-migration-tracking.md +130 -0
  95. package/skills/codragraph-notebook-context.md +136 -0
  96. package/skills/codragraph-observability-coverage.md +125 -0
  97. package/skills/codragraph-onboarding.md +129 -0
  98. package/skills/codragraph-perf-hotspots.md +132 -0
  99. package/skills/codragraph-project-switcher.md +116 -0
  100. package/skills/codragraph-security-audit.md +144 -0
  101. package/skills/codragraph-sql-tracing.md +122 -0
  102. package/skills/codragraph-supply-chain-audit.md +153 -0
  103. package/skills/codragraph-test-coverage.md +97 -0
  104. package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
  105. package/vendor/tree-sitter-proto/src/node-types.json +1 -1
@@ -8,6 +8,7 @@
8
8
  * IMPORTANT: This module must NEVER call process.exit(). The caller (CLI
9
9
  * wrapper or server worker) is responsible for process lifecycle.
10
10
  */
11
+ import type { ContentEncoding } from '@codragraph/graphstore';
11
12
  export interface AnalyzeCallbacks {
12
13
  onProgress: (phase: string, percent: number, message: string) => void;
13
14
  onLog?: (message: string) => void;
@@ -41,6 +42,18 @@ export interface AnalyzeOptions {
41
42
  * of a pipeline re-index.
42
43
  */
43
44
  allowDuplicateName?: boolean;
45
+ /**
46
+ * RFC 0001 Phase 2 — opt into per-row content compression. `'none'`
47
+ * (or undefined) writes plain text and the schema-default tag, exactly
48
+ * as pre-Phase-2 indexes do. `'brotli'` and `'zstd'` route every
49
+ * content field through `encodeContent` before it hits the CSV; the
50
+ * read path decodes via the per-row `contentEncoding` tag.
51
+ *
52
+ * Choosing `'zstd'` requires Node ≥ 22.15 on the indexer (the runtime
53
+ * that wrote the rows). Readers on older Node will get a clear
54
+ * forward-compat error rather than silently bad content.
55
+ */
56
+ compress?: ContentEncoding;
44
57
  }
45
58
  export interface AnalyzeResult {
46
59
  repoName: string;
@@ -10,14 +10,16 @@
10
10
  */
11
11
  import path from 'path';
12
12
  import fs from 'fs/promises';
13
+ import * as fsSync from 'node:fs';
14
+ import * as v8 from 'node:v8';
13
15
  import { runPipelineFromRepo } from './ingestion/pipeline.js';
14
- import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, } from './lbug/lbug-adapter.js';
15
- import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
16
+ import { initCgdb, loadGraphToCgdb, getCgdbStats, executeQuery, executeWithReusedStatement, closeCgdb, loadCachedEmbeddings, } from './cgdb/cgdb-adapter.js';
17
+ import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, INDEX_SCHEMA_VERSION, } from '../storage/repo-manager.js';
16
18
  import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
17
19
  import { recordAnalysisSnapshot } from './graphstore/index.js';
18
20
  import { generateAIContextFiles } from '../cli/ai-context.js';
19
- import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
20
- import { STALE_HASH_SENTINEL } from './lbug/schema.js';
21
+ import { EMBEDDING_TABLE_NAME } from './cgdb/schema.js';
22
+ import { STALE_HASH_SENTINEL } from './cgdb/schema.js';
21
23
  /** Threshold: auto-skip embeddings for repos with more nodes than this */
22
24
  const EMBEDDING_NODE_LIMIT = 50_000;
23
25
  export const PHASE_LABELS = {
@@ -30,7 +32,7 @@ export const PHASE_LABELS = {
30
32
  communities: 'Detecting communities',
31
33
  processes: 'Detecting processes',
32
34
  complete: 'Pipeline complete',
33
- lbug: 'Loading into LadybugDB',
35
+ cgdb: 'Loading into LadybugDB',
34
36
  fts: 'Creating search indexes',
35
37
  embeddings: 'Generating embeddings',
36
38
  done: 'Done',
@@ -51,8 +53,71 @@ export const PHASE_LABELS = {
51
53
  */
52
54
  export async function runFullAnalysis(repoPath, options, callbacks) {
53
55
  const log = (msg) => callbacks.onLog?.(msg);
54
- const progress = (phase, percent, message) => callbacks.onProgress(phase, percent, message);
55
- const { storagePath, lbugPath } = getStoragePaths(repoPath);
56
+ // RFC 0002 Phase 1 optional heap-profile instrumentation. Set
57
+ // CODRAGRAPH_HEAP_PROFILE=1 (or run `codragraph profile-heap`) to write a
58
+ // v8 heap snapshot at every phase boundary, plus a `profile-summary.jsonl`
59
+ // log of `process.memoryUsage()` at the same boundaries. Snapshots land in
60
+ // `<repo>/.codragraph/heap-profiles/`. Open snapshots in Chrome DevTools
61
+ // (Memory → Load) to find which constructors dominate retained set; the
62
+ // JSONL is the cheap RSS / heapUsed timeline. Off by default — snapshot
63
+ // writes pause the event loop ~2-5s and consume ~100-500MB of disk each.
64
+ const heapProfileEnabled = process.env.CODRAGRAPH_HEAP_PROFILE === '1';
65
+ let heapProfileDir = '';
66
+ let heapProfileSummaryPath = '';
67
+ let lastProfilePhase = '';
68
+ if (heapProfileEnabled) {
69
+ heapProfileDir = path.join(repoPath, '.codragraph', 'heap-profiles');
70
+ heapProfileSummaryPath = path.join(heapProfileDir, 'profile-summary.jsonl');
71
+ try {
72
+ fsSync.mkdirSync(heapProfileDir, { recursive: true });
73
+ // Truncate any prior summary so a single run produces a clean log.
74
+ // We append crash-safely on each phase boundary below.
75
+ fsSync.writeFileSync(heapProfileSummaryPath, '');
76
+ }
77
+ catch {
78
+ /* permission issue — best-effort */
79
+ }
80
+ }
81
+ const progress = (phase, percent, message) => {
82
+ callbacks.onProgress(phase, percent, message);
83
+ // Only snapshot on phase transitions, not every tick. Phase strings come
84
+ // from runPipelineFromRepo / loadGraphToCgdb and are stable.
85
+ if (heapProfileEnabled && phase && phase !== lastProfilePhase) {
86
+ lastProfilePhase = phase;
87
+ const ts = Date.now();
88
+ const safe = phase.replace(/[^a-zA-Z0-9]+/g, '_').slice(0, 60);
89
+ const file = path.join(heapProfileDir, `${ts}-${safe}.heapsnapshot`);
90
+ // Capture the cheap memoryUsage timeline FIRST — even if writeHeapSnapshot
91
+ // crashes (out of disk, permissions), we still have the RSS curve which
92
+ // is the more useful artifact for the heap-pressure RFC.
93
+ try {
94
+ const mu = process.memoryUsage();
95
+ const entry = JSON.stringify({
96
+ ts,
97
+ phase,
98
+ percent,
99
+ rss: mu.rss,
100
+ heapUsed: mu.heapUsed,
101
+ heapTotal: mu.heapTotal,
102
+ external: mu.external,
103
+ arrayBuffers: mu.arrayBuffers,
104
+ snapshotFile: path.basename(file),
105
+ });
106
+ fsSync.appendFileSync(heapProfileSummaryPath, entry + '\n');
107
+ }
108
+ catch (err) {
109
+ log(`heap-profile: summary append failed (${err.message})`);
110
+ }
111
+ try {
112
+ v8.writeHeapSnapshot(file);
113
+ log(`heap-profile: wrote ${file}`);
114
+ }
115
+ catch (err) {
116
+ log(`heap-profile: write failed (${err.message})`);
117
+ }
118
+ }
119
+ };
120
+ const { storagePath, cgdbPath } = getStoragePaths(repoPath);
56
121
  // Clean up stale KuzuDB files from before the LadybugDB migration.
57
122
  const kuzuResult = await cleanupOldKuzuFiles(storagePath);
58
123
  if (kuzuResult.found && kuzuResult.needsReindex) {
@@ -62,7 +127,17 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
62
127
  const currentCommit = repoHasGit ? getCurrentCommit(repoPath) : '';
63
128
  const existingMeta = await loadMeta(storagePath);
64
129
  // ── Early-return: already up to date ──────────────────────────────
65
- if (existingMeta && !options.force && existingMeta.lastCommit === currentCommit) {
130
+ // Schema-version mismatch forces a full re-analyze regardless of commit
131
+ // equality: existing 1.7.x indexes have no `schemaVersion` field at all,
132
+ // and 1.8+ readers expect every node table to carry a `contentEncoding`
133
+ // column (RFC 0001 Phase 2). LadybugDB ALTER on existing tables is not
134
+ // validated end-to-end yet, so the supported migration path is
135
+ // re-analyze → fresh CREATE NODE TABLE.
136
+ const schemaUpToDate = !!existingMeta && (existingMeta.schemaVersion ?? 0) >= INDEX_SCHEMA_VERSION;
137
+ if (existingMeta &&
138
+ schemaUpToDate &&
139
+ !options.force &&
140
+ existingMeta.lastCommit === currentCommit) {
66
141
  // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes
67
142
  if (currentCommit !== '') {
68
143
  return {
@@ -73,21 +148,26 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
73
148
  };
74
149
  }
75
150
  }
151
+ if (existingMeta && !schemaUpToDate) {
152
+ log(`Index schema version ${existingMeta.schemaVersion ?? '<missing>'} is older than ` +
153
+ `${INDEX_SCHEMA_VERSION} (RFC 0001 Phase 2 — adds contentEncoding column). ` +
154
+ `Re-analyzing.`);
155
+ }
76
156
  // ── Cache embeddings from existing index before rebuild ────────────
77
157
  let cachedEmbeddingNodeIds = new Set();
78
158
  let cachedEmbeddings = [];
79
159
  if (options.embeddings && existingMeta && !options.force) {
80
160
  try {
81
161
  progress('embeddings', 0, 'Caching embeddings...');
82
- await initLbug(lbugPath);
162
+ await initCgdb(cgdbPath);
83
163
  const cached = await loadCachedEmbeddings();
84
164
  cachedEmbeddingNodeIds = cached.embeddingNodeIds;
85
165
  cachedEmbeddings = cached.embeddings;
86
- await closeLbug();
166
+ await closeCgdb();
87
167
  }
88
168
  catch {
89
169
  try {
90
- await closeLbug();
170
+ await closeCgdb();
91
171
  }
92
172
  catch {
93
173
  /* swallow */
@@ -101,10 +181,10 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
101
181
  progress(p.phase, scaled, phaseLabel);
102
182
  });
103
183
  // ── Phase 2: LadybugDB (60–85%) ──────────────────────────────────
104
- progress('lbug', 60, 'Loading into LadybugDB...');
105
- await closeLbug();
106
- const lbugFiles = [lbugPath, `${lbugPath}.wal`, `${lbugPath}.lock`];
107
- for (const f of lbugFiles) {
184
+ progress('cgdb', 60, 'Loading into LadybugDB...');
185
+ await closeCgdb();
186
+ const cgdbFiles = [cgdbPath, `${cgdbPath}.wal`, `${cgdbPath}.lock`];
187
+ for (const f of cgdbFiles) {
108
188
  try {
109
189
  await fs.rm(f, { recursive: true, force: true });
110
190
  }
@@ -112,17 +192,22 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
112
192
  /* swallow */
113
193
  }
114
194
  }
115
- await initLbug(lbugPath);
195
+ await initCgdb(cgdbPath);
116
196
  try {
117
- // All work after initLbug is wrapped in try/finally to ensure closeLbug()
197
+ // All work after initCgdb is wrapped in try/finally to ensure closeCgdb()
118
198
  // is called even if an error occurs — the module-level singleton DB handle
119
199
  // must be released to avoid blocking subsequent invocations.
120
- let lbugMsgCount = 0;
121
- await loadGraphToLbug(pipelineResult.graph, pipelineResult.repoPath, storagePath, (msg) => {
122
- lbugMsgCount++;
123
- const pct = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24));
124
- progress('lbug', pct, msg);
125
- });
200
+ let cgdbMsgCount = 0;
201
+ await loadGraphToCgdb(pipelineResult.graph, pipelineResult.repoPath, storagePath, (msg) => {
202
+ cgdbMsgCount++;
203
+ const pct = Math.min(84, 60 + Math.round((cgdbMsgCount / (cgdbMsgCount + 10)) * 24));
204
+ progress('cgdb', pct, msg);
205
+ },
206
+ // RFC 0001 Phase 2: when --compress is set, every content row goes
207
+ // through encodeContent before hitting the CSV. Default 'none' is
208
+ // a true passthrough, so the on-disk layout is byte-identical to
209
+ // pre-Phase-2 indexes when no compression flag is passed.
210
+ { compress: options.compress });
126
211
  // ── Phase 2.5: Versioned-graph snapshot (best-effort) ────────────
127
212
  // Phase 4 hook: snapshot the freshly-loaded graph into the
128
213
  // content-addressed `.codragraph/graphstore/`. Failures here do NOT
@@ -159,7 +244,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
159
244
  // ── Phase 3.5: Re-insert cached embeddings ────────────────────────
160
245
  if (cachedEmbeddings.length > 0) {
161
246
  const cachedDims = cachedEmbeddings[0].embedding.length;
162
- const { EMBEDDING_DIMS } = await import('./lbug/schema.js');
247
+ const { EMBEDDING_DIMS } = await import('./cgdb/schema.js');
163
248
  if (cachedDims !== EMBEDDING_DIMS) {
164
249
  // Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all
165
250
  log(`Embedding dimensions changed (${cachedDims}d -> ${EMBEDDING_DIMS}d), discarding cache`);
@@ -182,7 +267,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
182
267
  }
183
268
  }
184
269
  // ── Phase 4: Embeddings (90–98%) ──────────────────────────────────
185
- const stats = await getLbugStats();
270
+ const stats = await getCgdbStats();
186
271
  let embeddingSkipped = true;
187
272
  if (options.embeddings) {
188
273
  if (stats.nodes <= EMBEDDING_NODE_LIMIT) {
@@ -230,6 +315,8 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
230
315
  repoPath,
231
316
  lastCommit: currentCommit,
232
317
  indexedAt: new Date().toISOString(),
318
+ schemaVersion: INDEX_SCHEMA_VERSION,
319
+ compress: options.compress ?? 'none',
233
320
  // Captured here (not at registration) so it travels with the
234
321
  // on-disk meta.json — sibling-clone fingerprinting works for
235
322
  // out-of-tree consumers (group-status, future tooling) without
@@ -293,7 +380,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
293
380
  // Best-effort — don't fail the entire analysis for context file issues
294
381
  }
295
382
  // ── Close LadybugDB ──────────────────────────────────────────────
296
- await closeLbug();
383
+ await closeCgdb();
297
384
  progress('done', 100, 'Done');
298
385
  return {
299
386
  repoName: projectName,
@@ -305,7 +392,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
305
392
  catch (err) {
306
393
  // Ensure LadybugDB is closed even on error
307
394
  try {
308
- await closeLbug();
395
+ await closeCgdb();
309
396
  }
310
397
  catch {
311
398
  /* swallow */
@@ -5,7 +5,7 @@
5
5
  * Always reads from the database (no cached state to drift).
6
6
  *
7
7
  * FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see
8
- * `lbug-adapter.ts` for the rationale. This keeps `analyze` fast (the
8
+ * `cgdb-adapter.ts` for the rationale. This keeps `analyze` fast (the
9
9
  * ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on
10
10
  * small repos / CI runners) at the cost of paying that overhead on the
11
11
  * first `query`/`context` call in a session.
@@ -20,7 +20,7 @@ export interface BM25SearchResult {
20
20
  * Drop all ensured-FTS cache entries for a given repoId.
21
21
  *
22
22
  * Called from the pool-close listener so that a pool teardown / recreation
23
- * forces the next `searchFTSFromLbug` call to re-issue `CREATE_FTS_INDEX`
23
+ * forces the next `searchFTSFromCgdb` call to re-issue `CREATE_FTS_INDEX`
24
24
  * against the fresh connection rather than trust stale ensure-state from a
25
25
  * previous pool lifetime.
26
26
  *
@@ -38,4 +38,4 @@ export declare function invalidateEnsuredFTSForRepo(repoId: string): void;
38
38
  * @param repoId - If provided, queries will be routed via the MCP connection pool
39
39
  * @returns Ranked search results from FTS indexes
40
40
  */
41
- export declare const searchFTSFromLbug: (query: string, limit?: number, repoId?: string) => Promise<BM25SearchResult[]>;
41
+ export declare const searchFTSFromCgdb: (query: string, limit?: number, repoId?: string) => Promise<BM25SearchResult[]>;
@@ -5,24 +5,63 @@
5
5
  * Always reads from the database (no cached state to drift).
6
6
  *
7
7
  * FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see
8
- * `lbug-adapter.ts` for the rationale. This keeps `analyze` fast (the
8
+ * `cgdb-adapter.ts` for the rationale. This keeps `analyze` fast (the
9
9
  * ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on
10
10
  * small repos / CI runners) at the cost of paying that overhead on the
11
11
  * first `query`/`context` call in a session.
12
12
  */
13
- import { queryFTS, ensureFTSIndex, executeQuery as executeCoreQuery, } from '../lbug/lbug-adapter.js';
13
+ import { queryFTS, ensureFTSIndex, executeQuery as executeCoreQuery, } from '../cgdb/cgdb-adapter.js';
14
14
  /**
15
- * FTS schema served by `searchFTSFromLbug`. Centralised so that both the
16
- * CLI/pipeline path and the MCP pool path use identical (table, index,
17
- * properties) tuples and the lazy-create logic stays in one place.
15
+ * FTS table set served by `searchFTSFromCgdb`. Centralised so that both
16
+ * the CLI/pipeline path and the MCP pool path stay in lockstep.
17
+ *
18
+ * The properties list is computed at FTS-create time via `ftsPropertiesFor`
19
+ * — for repos that were analysed with `--compress brotli|zstd`, the
20
+ * `content` column holds base64-of-encoded-bytes and would tokenise to
21
+ * useless tokens. Those repos get name-only FTS so search at least
22
+ * matches function/class names instead of returning random hits on
23
+ * base64 alphabet. Plain (compress='none' / unset) repos get the full
24
+ * `name + content` index for body-text matches. RFC 0001 Phase 2.5.
18
25
  */
19
- const FTS_INDEXES = [
20
- { table: 'File', indexName: 'file_fts', properties: ['name', 'content'] },
21
- { table: 'Function', indexName: 'function_fts', properties: ['name', 'content'] },
22
- { table: 'Class', indexName: 'class_fts', properties: ['name', 'content'] },
23
- { table: 'Method', indexName: 'method_fts', properties: ['name', 'content'] },
24
- { table: 'Interface', indexName: 'interface_fts', properties: ['name', 'content'] },
26
+ const FTS_TABLES = [
27
+ { table: 'File', indexName: 'file_fts' },
28
+ { table: 'Function', indexName: 'function_fts' },
29
+ { table: 'Class', indexName: 'class_fts' },
30
+ { table: 'Method', indexName: 'method_fts' },
31
+ { table: 'Interface', indexName: 'interface_fts' },
25
32
  ];
33
+ const ftsPropertiesFor = (compress) => !compress || compress === 'none' ? ['name', 'content'] : ['name'];
34
+ /**
35
+ * Look up `meta.compress` for a repo. The MCP path passes `repoId`
36
+ * (registry-derived); the CLI path passes nothing and we walk up from
37
+ * cwd. Returns `'none'` whenever the lookup fails so the safe default
38
+ * (full FTS index) is used — the failure mode is reduced search
39
+ * quality, never wrong results.
40
+ */
41
+ async function getCompressMode(repoId) {
42
+ try {
43
+ const repoMod = await import('../../storage/repo-manager.js');
44
+ if (repoId) {
45
+ // MCP path: registry name is the source of truth. The MCP
46
+ // backend's `repoId` is `entry.name.toLowerCase()` (or `${name}-${hash}`
47
+ // on collision); match conservatively against both forms.
48
+ const entries = await repoMod.listRegisteredRepos();
49
+ for (const entry of entries) {
50
+ const base = entry.name.toLowerCase();
51
+ if (base === repoId || repoId.startsWith(`${base}-`)) {
52
+ const meta = await repoMod.loadMeta(entry.storagePath);
53
+ return meta?.compress ?? 'none';
54
+ }
55
+ }
56
+ return 'none';
57
+ }
58
+ const repo = await repoMod.findRepo(process.cwd());
59
+ return repo?.meta?.compress ?? 'none';
60
+ }
61
+ catch {
62
+ return 'none';
63
+ }
64
+ }
26
65
  const FALLBACK_SCAN_LIMIT = 50_000;
27
66
  const BOOLEAN_QUERY_TOKENS = new Set(['and', 'or', 'not']);
28
67
  const FALLBACK_FIELD_WEIGHTS = {
@@ -33,7 +72,7 @@ const FALLBACK_FIELD_WEIGHTS = {
33
72
  /**
34
73
  * Per-process cache for the MCP pool path: tracks which `(repoId, table)`
35
74
  * pairs have been ensured. The CLI/pipeline path gets its own cache inside
36
- * `lbug-adapter.ts` keyed by table/index, scoped to the singleton connection.
75
+ * `cgdb-adapter.ts` keyed by table/index, scoped to the singleton connection.
37
76
  *
38
77
  * IMPORTANT: an entry is added ONLY when the index was confirmed to exist
39
78
  * (CREATE_FTS_INDEX succeeded, or failed with `'already exists'`). Other
@@ -41,14 +80,14 @@ const FALLBACK_FIELD_WEIGHTS = {
41
80
  * unset so the next query retries instead of silently caching the failure.
42
81
  *
43
82
  * Entries for a given repoId are invalidated when its pool is closed —
44
- * see the `addPoolCloseListener` registration in `searchFTSFromLbug`.
83
+ * see the `addPoolCloseListener` registration in `searchFTSFromCgdb`.
45
84
  */
46
85
  const ensuredPoolFTS = new Set();
47
86
  /**
48
87
  * Drop all ensured-FTS cache entries for a given repoId.
49
88
  *
50
89
  * Called from the pool-close listener so that a pool teardown / recreation
51
- * forces the next `searchFTSFromLbug` call to re-issue `CREATE_FTS_INDEX`
90
+ * forces the next `searchFTSFromCgdb` call to re-issue `CREATE_FTS_INDEX`
52
91
  * against the fresh connection rather than trust stale ensure-state from a
53
92
  * previous pool lifetime.
54
93
  *
@@ -184,9 +223,13 @@ async function queryFallbackViaExecutor(executor, tableName, properties, query,
184
223
  return [];
185
224
  }
186
225
  }
187
- async function fallbackSearchAllTables(executor, query, limit) {
226
+ async function fallbackSearchAllTables(executor, query, limit,
227
+ // Same compress-aware property selection as the FTS path. Default keeps
228
+ // pre-Phase-2 behaviour (`['name', 'content']`) for callers that don't
229
+ // pass a value.
230
+ properties = ['name', 'content']) {
188
231
  const results = [];
189
- for (const { table, properties } of FTS_INDEXES) {
232
+ for (const { table } of FTS_TABLES) {
190
233
  results.push(await queryFallbackViaExecutor(executor, table, properties, query, limit));
191
234
  }
192
235
  return results;
@@ -202,7 +245,7 @@ async function fallbackSearchAllTables(executor, query, limit) {
202
245
  * @param repoId - If provided, queries will be routed via the MCP connection pool
203
246
  * @returns Ranked search results from FTS indexes
204
247
  */
205
- export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
248
+ export const searchFTSFromCgdb = async (query, limit = 20, repoId) => {
206
249
  if (!query.trim() || limit <= 0)
207
250
  return [];
208
251
  let fileResults, functionResults, classResults, methodResults, interfaceResults;
@@ -210,7 +253,7 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
210
253
  // Use MCP connection pool via dynamic import
211
254
  // IMPORTANT: FTS queries run sequentially to avoid connection contention.
212
255
  // The MCP pool supports multiple connections, but FTS is best run serially.
213
- const poolMod = await import('../lbug/pool-adapter.js');
256
+ const poolMod = await import('../cgdb/pool-adapter.js');
214
257
  const { executeQuery, addPoolCloseListener } = poolMod;
215
258
  // Register the pool-close listener lazily on first use so a teardown of
216
259
  // the pool entry (LRU eviction, idle timeout, explicit close) drops the
@@ -220,7 +263,12 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
220
263
  const executor = (cypher) => executeQuery(repoId, cypher);
221
264
  // Lazy-create FTS indexes on first query for this repo (analyze no longer
222
265
  // creates them up-front, so we ensure them here). Cached per-process.
223
- for (const { table, indexName, properties } of FTS_INDEXES) {
266
+ // RFC 0001 Phase 2.5: drop `content` from FTS properties for repos
267
+ // analysed with --compress brotli|zstd — the column holds encoded
268
+ // bytes and would tokenise to garbage.
269
+ const compress = await getCompressMode(repoId);
270
+ const properties = ftsPropertiesFor(compress);
271
+ for (const { table, indexName } of FTS_TABLES) {
224
272
  await ensureFTSIndexViaExecutor(executor, repoId, table, indexName, properties);
225
273
  }
226
274
  fileResults = await queryFTSViaExecutor(executor, 'File', 'file_fts', query, limit);
@@ -235,13 +283,17 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
235
283
  interfaceResults.length ===
236
284
  0) {
237
285
  [fileResults, functionResults, classResults, methodResults, interfaceResults] =
238
- await fallbackSearchAllTables(executor, query, limit);
286
+ await fallbackSearchAllTables(executor, query, limit, properties);
239
287
  }
240
288
  }
241
289
  else {
242
- // Use core lbug adapter (CLI / pipeline context) — also sequential for safety.
290
+ // Use core cgdb adapter (CLI / pipeline context) — also sequential for safety.
243
291
  // Lazy-create FTS indexes on first query (analyze no longer does it).
244
- for (const { table, indexName, properties } of FTS_INDEXES) {
292
+ // RFC 0001 Phase 2.5 same `compress`-aware property selection as the MCP
293
+ // path; the CLI walks up from cwd to find the repo's meta.json.
294
+ const compress = await getCompressMode();
295
+ const properties = ftsPropertiesFor(compress);
296
+ for (const { table, indexName } of FTS_TABLES) {
245
297
  await ensureFTSIndex(table, indexName, [...properties]).catch(() => { });
246
298
  }
247
299
  fileResults = await queryFTS('File', 'file_fts', query, limit, false).catch(() => []);
@@ -256,7 +308,7 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
256
308
  interfaceResults.length ===
257
309
  0) {
258
310
  [fileResults, functionResults, classResults, methodResults, interfaceResults] =
259
- await fallbackSearchAllTables(executeCoreQuery, query, limit);
311
+ await fallbackSearchAllTables(executeCoreQuery, query, limit, properties);
260
312
  }
261
313
  }
262
314
  // Collect all node scores per filePath to track which nodes actually matched
@@ -7,7 +7,7 @@
7
7
  * This is the same approach used by Elasticsearch, Pinecone, and other
8
8
  * production search systems.
9
9
  */
10
- import { searchFTSFromLbug } from './bm25-index.js';
10
+ import { searchFTSFromCgdb } from './bm25-index.js';
11
11
  /**
12
12
  * RRF constant - standard value used in the literature
13
13
  * Higher values give more weight to lower-ranked results
@@ -112,7 +112,7 @@ export const formatHybridResults = (results) => {
112
112
  */
113
113
  export const hybridSearch = async (query, limit, executeQuery, semanticSearch) => {
114
114
  // Use LadybugDB FTS for always-fresh BM25 results
115
- const bm25Results = await searchFTSFromLbug(query, limit);
115
+ const bm25Results = await searchFTSFromCgdb(query, limit);
116
116
  const semanticResults = await semanticSearch(executeQuery, query, limit);
117
117
  return mergeWithRRF(bm25Results, semanticResults, limit);
118
118
  };
@@ -41,14 +41,14 @@ export declare class WikiGenerator {
41
41
  private repoPath;
42
42
  private storagePath;
43
43
  private wikiDir;
44
- private lbugPath;
44
+ private cgdbPath;
45
45
  private llmConfig;
46
46
  private maxTokensPerModule;
47
47
  private concurrency;
48
48
  private options;
49
49
  private onProgress;
50
50
  private failedModules;
51
- constructor(repoPath: string, storagePath: string, lbugPath: string, llmConfig: LLMConfig, options?: WikiOptions, onProgress?: ProgressCallback);
51
+ constructor(repoPath: string, storagePath: string, cgdbPath: string, llmConfig: LLMConfig, options?: WikiOptions, onProgress?: ProgressCallback);
52
52
  private lastPercent;
53
53
  /**
54
54
  * Create streaming options that report LLM progress to the progress bar.
@@ -26,18 +26,18 @@ export class WikiGenerator {
26
26
  repoPath;
27
27
  storagePath;
28
28
  wikiDir;
29
- lbugPath;
29
+ cgdbPath;
30
30
  llmConfig;
31
31
  maxTokensPerModule;
32
32
  concurrency;
33
33
  options;
34
34
  onProgress;
35
35
  failedModules = [];
36
- constructor(repoPath, storagePath, lbugPath, llmConfig, options = {}, onProgress) {
36
+ constructor(repoPath, storagePath, cgdbPath, llmConfig, options = {}, onProgress) {
37
37
  this.repoPath = repoPath;
38
38
  this.storagePath = storagePath;
39
39
  this.wikiDir = path.join(storagePath, WIKI_DIR);
40
- this.lbugPath = lbugPath;
40
+ this.cgdbPath = cgdbPath;
41
41
  this.options = options;
42
42
  this.llmConfig = llmConfig;
43
43
  this.maxTokensPerModule = options.maxTokensPerModule ?? DEFAULT_MAX_TOKENS_PER_MODULE;
@@ -134,7 +134,7 @@ export class WikiGenerator {
134
134
  }
135
135
  // Init graph
136
136
  this.onProgress('init', 2, 'Connecting to knowledge graph...');
137
- await initWikiDb(this.lbugPath);
137
+ await initWikiDb(this.cgdbPath);
138
138
  let result;
139
139
  try {
140
140
  if (!forceMode && existingMeta && existingMeta.fromCommit) {
@@ -2,7 +2,7 @@
2
2
  * Graph Queries for Wiki Generation
3
3
  *
4
4
  * Encapsulated Cypher queries against the CodraGraph knowledge graph.
5
- * Uses the MCP-style pooled lbug-adapter for connection management.
5
+ * Uses the MCP-style pooled cgdb-adapter for connection management.
6
6
  */
7
7
  /**
8
8
  * Touch the wiki DB connection to prevent idle timeout during long LLM calls.
@@ -36,7 +36,7 @@ export interface ProcessInfo {
36
36
  /**
37
37
  * Initialize the LadybugDB connection for wiki generation.
38
38
  */
39
- export declare function initWikiDb(lbugPath: string): Promise<void>;
39
+ export declare function initWikiDb(cgdbPath: string): Promise<void>;
40
40
  /**
41
41
  * Close the LadybugDB connection.
42
42
  */
@@ -2,9 +2,9 @@
2
2
  * Graph Queries for Wiki Generation
3
3
  *
4
4
  * Encapsulated Cypher queries against the CodraGraph knowledge graph.
5
- * Uses the MCP-style pooled lbug-adapter for connection management.
5
+ * Uses the MCP-style pooled cgdb-adapter for connection management.
6
6
  */
7
- import { initLbug, executeQuery, closeLbug, touchRepo } from '../lbug/pool-adapter.js';
7
+ import { initCgdb, executeQuery, closeCgdb, touchRepo } from '../cgdb/pool-adapter.js';
8
8
  const REPO_ID = '__wiki__';
9
9
  /**
10
10
  * Touch the wiki DB connection to prevent idle timeout during long LLM calls.
@@ -15,14 +15,14 @@ export function touchWikiDb() {
15
15
  /**
16
16
  * Initialize the LadybugDB connection for wiki generation.
17
17
  */
18
- export async function initWikiDb(lbugPath) {
19
- await initLbug(REPO_ID, lbugPath);
18
+ export async function initWikiDb(cgdbPath) {
19
+ await initCgdb(REPO_ID, cgdbPath);
20
20
  }
21
21
  /**
22
22
  * Close the LadybugDB connection.
23
23
  */
24
24
  export async function closeWikiDb() {
25
- await closeLbug(REPO_ID);
25
+ await closeCgdb(REPO_ID);
26
26
  }
27
27
  /**
28
28
  * Get all source files with their exported symbol names and types.
@@ -0,0 +1,5 @@
1
+ /**
2
+ * LadybugDB connection pool — re-exported from core.
3
+ * Prefer importing from `../../core/cgdb/pool-adapter.js` in new code.
4
+ */
5
+ export * from '../../core/cgdb/pool-adapter.js';
@@ -0,0 +1,5 @@
1
+ /**
2
+ * LadybugDB connection pool — re-exported from core.
3
+ * Prefer importing from `../../core/cgdb/pool-adapter.js` in new code.
4
+ */
5
+ export * from '../../core/cgdb/pool-adapter.js';
@@ -6,7 +6,7 @@
6
6
  */
7
7
  import { pipeline, env } from '@huggingface/transformers';
8
8
  import { isHttpMode, getHttpDimensions, httpEmbedQuery, } from '../../core/embeddings/http-client.js';
9
- import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/lbug/pool-adapter.js';
9
+ import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/cgdb/pool-adapter.js';
10
10
  // Model config
11
11
  const MODEL_ID = 'Snowflake/snowflake-arctic-embed-xs';
12
12
  // Module-level state for singleton pattern
@@ -5,7 +5,7 @@
5
5
  * Supports multiple indexed repositories via a global registry.
6
6
  * LadybugDB connections are opened lazily per repo on first query.
7
7
  */
8
- import { isWriteQuery } from '../../core/lbug/pool-adapter.js';
8
+ import { isWriteQuery } from '../../core/cgdb/pool-adapter.js';
9
9
  export { isWriteQuery };
10
10
  import { type RegistryEntry } from '../../storage/repo-manager.js';
11
11
  import { GroupService } from '../../core/group/service.js';
@@ -53,7 +53,7 @@ interface RepoHandle {
53
53
  name: string;
54
54
  repoPath: string;
55
55
  storagePath: string;
56
- lbugPath: string;
56
+ cgdbPath: string;
57
57
  indexedAt: string;
58
58
  lastCommit: string;
59
59
  remoteUrl?: string;