@codragraph/cli 1.6.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -16
- package/dist/cli/ai-context.js +2 -2
- package/dist/cli/analyze.d.ts +22 -0
- package/dist/cli/analyze.js +111 -8
- package/dist/cli/compress-stats.d.ts +29 -0
- package/dist/cli/compress-stats.js +97 -0
- package/dist/cli/graphstore.d.ts +6 -2
- package/dist/cli/graphstore.js +24 -2
- package/dist/cli/index.js +17 -6
- package/dist/cli/profile-heap.d.ts +35 -0
- package/dist/cli/profile-heap.js +126 -0
- package/dist/cli/setup.d.ts +13 -0
- package/dist/cli/setup.js +75 -29
- package/dist/cli/skill-gen.d.ts +14 -2
- package/dist/cli/skill-gen.js +53 -20
- package/dist/cli/tool.js +4 -0
- package/dist/config/ignore-service.js +1 -1
- package/dist/core/embeddings/embedding-pipeline.js +24 -7
- package/dist/core/group/bridge-db.js +111 -24
- package/dist/core/group/extractors/grpc-patterns/proto.js +1 -12
- package/dist/core/ingestion/call-processor.js +2 -2
- package/dist/core/ingestion/cobol/cobol-preprocessor.js +1 -1
- package/dist/core/ingestion/cobol/jcl-parser.d.ts +1 -1
- package/dist/core/ingestion/cobol/jcl-parser.js +1 -1
- package/dist/core/ingestion/cobol-processor.d.ts +1 -1
- package/dist/core/ingestion/cobol-processor.js +1 -1
- package/dist/core/ingestion/heritage-extractors/generic.js +1 -1
- package/dist/core/ingestion/heritage-processor.js +1 -1
- package/dist/core/ingestion/import-processor.js +1 -1
- package/dist/core/ingestion/mro-processor.js +1 -1
- package/dist/core/ingestion/parsing-processor.js +1 -1
- package/dist/core/ingestion/type-extractors/c-cpp.js +1 -1
- package/dist/core/ingestion/type-extractors/python.js +1 -1
- package/dist/core/ingestion/type-extractors/shared.js +0 -3
- package/dist/core/lbug/content-read.d.ts +46 -0
- package/dist/core/lbug/content-read.js +64 -0
- package/dist/core/lbug/csv-generator.d.ts +2 -6
- package/dist/core/lbug/csv-generator.js +45 -12
- package/dist/core/lbug/lbug-adapter.d.ts +4 -1
- package/dist/core/lbug/lbug-adapter.js +157 -25
- package/dist/core/lbug/pool-adapter.js +51 -44
- package/dist/core/lbug/schema.d.ts +7 -7
- package/dist/core/lbug/schema.js +18 -0
- package/dist/core/run-analyze.d.ts +13 -0
- package/dist/core/run-analyze.js +91 -4
- package/dist/core/search/bm25-index.js +153 -12
- package/dist/core/wiki/generator.js +4 -4
- package/dist/mcp/local/local-backend.js +22 -5
- package/dist/mcp/resources.js +2 -3
- package/dist/server/api.js +4 -3
- package/dist/storage/repo-manager.d.ts +39 -0
- package/dist/storage/repo-manager.js +19 -0
- package/hooks/claude/codragraph-hook.cjs +108 -5
- package/hooks/claude/pre-tool-use.sh +6 -1
- package/package.json +4 -4
- package/scripts/build-tree-sitter-proto.cjs +15 -3
- package/scripts/patch-tree-sitter-swift.cjs +17 -4
- package/skills/codragraph-api-surface.md +110 -0
- package/skills/codragraph-cli.md +5 -5
- package/skills/codragraph-config-audit.md +146 -0
- package/skills/codragraph-cross-repo-impact.md +135 -0
- package/skills/codragraph-data-lineage.md +137 -0
- package/skills/codragraph-dead-code.md +119 -0
- package/skills/codragraph-debugging.md +1 -1
- package/skills/codragraph-exploring.md +1 -1
- package/skills/codragraph-gh-actions-debug.md +162 -0
- package/skills/codragraph-gh-issue-workflow.md +178 -0
- package/skills/codragraph-gh-pr-workflow.md +176 -0
- package/skills/codragraph-gh-release-workflow.md +187 -0
- package/skills/codragraph-git-bisect.md +176 -0
- package/skills/codragraph-git-force-push.md +147 -0
- package/skills/codragraph-git-history-rewrite.md +174 -0
- package/skills/codragraph-git-rebase-vs-merge.md +138 -0
- package/skills/codragraph-git-recovery.md +181 -0
- package/skills/codragraph-git-worktree.md +145 -0
- package/skills/codragraph-guide.md +1 -1
- package/skills/codragraph-impact-analysis.md +1 -1
- package/skills/codragraph-migration-tracking.md +130 -0
- package/skills/codragraph-notebook-context.md +136 -0
- package/skills/codragraph-observability-coverage.md +125 -0
- package/skills/codragraph-onboarding.md +129 -0
- package/skills/codragraph-perf-hotspots.md +132 -0
- package/skills/codragraph-pr-review.md +1 -1
- package/skills/codragraph-project-switcher.md +116 -0
- package/skills/codragraph-refactoring.md +1 -1
- package/skills/codragraph-security-audit.md +144 -0
- package/skills/codragraph-sql-tracing.md +122 -0
- package/skills/codragraph-supply-chain-audit.md +153 -0
- package/skills/codragraph-test-coverage.md +97 -0
package/dist/core/run-analyze.js
CHANGED
|
@@ -10,9 +10,11 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import path from 'path';
|
|
12
12
|
import fs from 'fs/promises';
|
|
13
|
+
import * as fsSync from 'node:fs';
|
|
14
|
+
import * as v8 from 'node:v8';
|
|
13
15
|
import { runPipelineFromRepo } from './ingestion/pipeline.js';
|
|
14
16
|
import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, } from './lbug/lbug-adapter.js';
|
|
15
|
-
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
|
|
17
|
+
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, INDEX_SCHEMA_VERSION, } from '../storage/repo-manager.js';
|
|
16
18
|
import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
|
|
17
19
|
import { recordAnalysisSnapshot } from './graphstore/index.js';
|
|
18
20
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
@@ -51,7 +53,70 @@ export const PHASE_LABELS = {
|
|
|
51
53
|
*/
|
|
52
54
|
export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
53
55
|
const log = (msg) => callbacks.onLog?.(msg);
|
|
54
|
-
|
|
56
|
+
// RFC 0002 Phase 1 — optional heap-profile instrumentation. Set
|
|
57
|
+
// CODRAGRAPH_HEAP_PROFILE=1 (or run `codragraph profile-heap`) to write a
|
|
58
|
+
// v8 heap snapshot at every phase boundary, plus a `profile-summary.jsonl`
|
|
59
|
+
// log of `process.memoryUsage()` at the same boundaries. Snapshots land in
|
|
60
|
+
// `<repo>/.codragraph/heap-profiles/`. Open snapshots in Chrome DevTools
|
|
61
|
+
// (Memory → Load) to find which constructors dominate retained set; the
|
|
62
|
+
// JSONL is the cheap RSS / heapUsed timeline. Off by default — snapshot
|
|
63
|
+
// writes pause the event loop ~2-5s and consume ~100-500MB of disk each.
|
|
64
|
+
const heapProfileEnabled = process.env.CODRAGRAPH_HEAP_PROFILE === '1';
|
|
65
|
+
let heapProfileDir = '';
|
|
66
|
+
let heapProfileSummaryPath = '';
|
|
67
|
+
let lastProfilePhase = '';
|
|
68
|
+
if (heapProfileEnabled) {
|
|
69
|
+
heapProfileDir = path.join(repoPath, '.codragraph', 'heap-profiles');
|
|
70
|
+
heapProfileSummaryPath = path.join(heapProfileDir, 'profile-summary.jsonl');
|
|
71
|
+
try {
|
|
72
|
+
fsSync.mkdirSync(heapProfileDir, { recursive: true });
|
|
73
|
+
// Truncate any prior summary so a single run produces a clean log.
|
|
74
|
+
// We append crash-safely on each phase boundary below.
|
|
75
|
+
fsSync.writeFileSync(heapProfileSummaryPath, '');
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
/* permission issue — best-effort */
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
const progress = (phase, percent, message) => {
|
|
82
|
+
callbacks.onProgress(phase, percent, message);
|
|
83
|
+
// Only snapshot on phase transitions, not every tick. Phase strings come
|
|
84
|
+
// from runPipelineFromRepo / loadGraphToLbug and are stable.
|
|
85
|
+
if (heapProfileEnabled && phase && phase !== lastProfilePhase) {
|
|
86
|
+
lastProfilePhase = phase;
|
|
87
|
+
const ts = Date.now();
|
|
88
|
+
const safe = phase.replace(/[^a-zA-Z0-9]+/g, '_').slice(0, 60);
|
|
89
|
+
const file = path.join(heapProfileDir, `${ts}-${safe}.heapsnapshot`);
|
|
90
|
+
// Capture the cheap memoryUsage timeline FIRST — even if writeHeapSnapshot
|
|
91
|
+
// crashes (out of disk, permissions), we still have the RSS curve which
|
|
92
|
+
// is the more useful artifact for the heap-pressure RFC.
|
|
93
|
+
try {
|
|
94
|
+
const mu = process.memoryUsage();
|
|
95
|
+
const entry = JSON.stringify({
|
|
96
|
+
ts,
|
|
97
|
+
phase,
|
|
98
|
+
percent,
|
|
99
|
+
rss: mu.rss,
|
|
100
|
+
heapUsed: mu.heapUsed,
|
|
101
|
+
heapTotal: mu.heapTotal,
|
|
102
|
+
external: mu.external,
|
|
103
|
+
arrayBuffers: mu.arrayBuffers,
|
|
104
|
+
snapshotFile: path.basename(file),
|
|
105
|
+
});
|
|
106
|
+
fsSync.appendFileSync(heapProfileSummaryPath, entry + '\n');
|
|
107
|
+
}
|
|
108
|
+
catch (err) {
|
|
109
|
+
log(`heap-profile: summary append failed (${err.message})`);
|
|
110
|
+
}
|
|
111
|
+
try {
|
|
112
|
+
v8.writeHeapSnapshot(file);
|
|
113
|
+
log(`heap-profile: wrote ${file}`);
|
|
114
|
+
}
|
|
115
|
+
catch (err) {
|
|
116
|
+
log(`heap-profile: write failed (${err.message})`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
};
|
|
55
120
|
const { storagePath, lbugPath } = getStoragePaths(repoPath);
|
|
56
121
|
// Clean up stale KuzuDB files from before the LadybugDB migration.
|
|
57
122
|
const kuzuResult = await cleanupOldKuzuFiles(storagePath);
|
|
@@ -62,7 +127,17 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
62
127
|
const currentCommit = repoHasGit ? getCurrentCommit(repoPath) : '';
|
|
63
128
|
const existingMeta = await loadMeta(storagePath);
|
|
64
129
|
// ── Early-return: already up to date ──────────────────────────────
|
|
65
|
-
|
|
130
|
+
// Schema-version mismatch forces a full re-analyze regardless of commit
|
|
131
|
+
// equality: existing 1.7.x indexes have no `schemaVersion` field at all,
|
|
132
|
+
// and 1.8+ readers expect every node table to carry a `contentEncoding`
|
|
133
|
+
// column (RFC 0001 Phase 2). LadybugDB ALTER on existing tables is not
|
|
134
|
+
// validated end-to-end yet, so the supported migration path is
|
|
135
|
+
// re-analyze → fresh CREATE NODE TABLE.
|
|
136
|
+
const schemaUpToDate = !!existingMeta && (existingMeta.schemaVersion ?? 0) >= INDEX_SCHEMA_VERSION;
|
|
137
|
+
if (existingMeta &&
|
|
138
|
+
schemaUpToDate &&
|
|
139
|
+
!options.force &&
|
|
140
|
+
existingMeta.lastCommit === currentCommit) {
|
|
66
141
|
// Non-git folders have currentCommit = '' — always rebuild since we can't detect changes
|
|
67
142
|
if (currentCommit !== '') {
|
|
68
143
|
return {
|
|
@@ -73,6 +148,11 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
73
148
|
};
|
|
74
149
|
}
|
|
75
150
|
}
|
|
151
|
+
if (existingMeta && !schemaUpToDate) {
|
|
152
|
+
log(`Index schema version ${existingMeta.schemaVersion ?? '<missing>'} is older than ` +
|
|
153
|
+
`${INDEX_SCHEMA_VERSION} (RFC 0001 Phase 2 — adds contentEncoding column). ` +
|
|
154
|
+
`Re-analyzing.`);
|
|
155
|
+
}
|
|
76
156
|
// ── Cache embeddings from existing index before rebuild ────────────
|
|
77
157
|
let cachedEmbeddingNodeIds = new Set();
|
|
78
158
|
let cachedEmbeddings = [];
|
|
@@ -122,7 +202,12 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
122
202
|
lbugMsgCount++;
|
|
123
203
|
const pct = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24));
|
|
124
204
|
progress('lbug', pct, msg);
|
|
125
|
-
}
|
|
205
|
+
},
|
|
206
|
+
// RFC 0001 Phase 2: when --compress is set, every content row goes
|
|
207
|
+
// through encodeContent before hitting the CSV. Default 'none' is
|
|
208
|
+
// a true passthrough, so the on-disk layout is byte-identical to
|
|
209
|
+
// pre-Phase-2 indexes when no compression flag is passed.
|
|
210
|
+
{ compress: options.compress });
|
|
126
211
|
// ── Phase 2.5: Versioned-graph snapshot (best-effort) ────────────
|
|
127
212
|
// Phase 4 hook: snapshot the freshly-loaded graph into the
|
|
128
213
|
// content-addressed `.codragraph/graphstore/`. Failures here do NOT
|
|
@@ -230,6 +315,8 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
230
315
|
repoPath,
|
|
231
316
|
lastCommit: currentCommit,
|
|
232
317
|
indexedAt: new Date().toISOString(),
|
|
318
|
+
schemaVersion: INDEX_SCHEMA_VERSION,
|
|
319
|
+
compress: options.compress ?? 'none',
|
|
233
320
|
// Captured here (not at registration) so it travels with the
|
|
234
321
|
// on-disk meta.json — sibling-clone fingerprinting works for
|
|
235
322
|
// out-of-tree consumers (group-status, future tooling) without
|
|
@@ -10,19 +10,65 @@
|
|
|
10
10
|
* small repos / CI runners) at the cost of paying that overhead on the
|
|
11
11
|
* first `query`/`context` call in a session.
|
|
12
12
|
*/
|
|
13
|
-
import { queryFTS, ensureFTSIndex } from '../lbug/lbug-adapter.js';
|
|
13
|
+
import { queryFTS, ensureFTSIndex, executeQuery as executeCoreQuery, } from '../lbug/lbug-adapter.js';
|
|
14
14
|
/**
|
|
15
|
-
* FTS
|
|
16
|
-
* CLI/pipeline path and the MCP pool path
|
|
17
|
-
*
|
|
15
|
+
* FTS table set served by `searchFTSFromLbug`. Centralised so that both
|
|
16
|
+
* the CLI/pipeline path and the MCP pool path stay in lockstep.
|
|
17
|
+
*
|
|
18
|
+
* The properties list is computed at FTS-create time via `ftsPropertiesFor`
|
|
19
|
+
* — for repos that were analysed with `--compress brotli|zstd`, the
|
|
20
|
+
* `content` column holds base64-of-encoded-bytes and would tokenise to
|
|
21
|
+
* useless tokens. Those repos get name-only FTS so search at least
|
|
22
|
+
* matches function/class names instead of returning random hits on
|
|
23
|
+
* base64 alphabet. Plain (compress='none' / unset) repos get the full
|
|
24
|
+
* `name + content` index for body-text matches. RFC 0001 Phase 2.5.
|
|
18
25
|
*/
|
|
19
|
-
const
|
|
20
|
-
{ table: 'File', indexName: 'file_fts'
|
|
21
|
-
{ table: 'Function', indexName: 'function_fts'
|
|
22
|
-
{ table: 'Class', indexName: 'class_fts'
|
|
23
|
-
{ table: 'Method', indexName: 'method_fts'
|
|
24
|
-
{ table: 'Interface', indexName: 'interface_fts'
|
|
26
|
+
const FTS_TABLES = [
|
|
27
|
+
{ table: 'File', indexName: 'file_fts' },
|
|
28
|
+
{ table: 'Function', indexName: 'function_fts' },
|
|
29
|
+
{ table: 'Class', indexName: 'class_fts' },
|
|
30
|
+
{ table: 'Method', indexName: 'method_fts' },
|
|
31
|
+
{ table: 'Interface', indexName: 'interface_fts' },
|
|
25
32
|
];
|
|
33
|
+
const ftsPropertiesFor = (compress) => !compress || compress === 'none' ? ['name', 'content'] : ['name'];
|
|
34
|
+
/**
|
|
35
|
+
* Look up `meta.compress` for a repo. The MCP path passes `repoId`
|
|
36
|
+
* (registry-derived); the CLI path passes nothing and we walk up from
|
|
37
|
+
* cwd. Returns `'none'` whenever the lookup fails so the safe default
|
|
38
|
+
* (full FTS index) is used — the failure mode is reduced search
|
|
39
|
+
* quality, never wrong results.
|
|
40
|
+
*/
|
|
41
|
+
async function getCompressMode(repoId) {
|
|
42
|
+
try {
|
|
43
|
+
const repoMod = await import('../../storage/repo-manager.js');
|
|
44
|
+
if (repoId) {
|
|
45
|
+
// MCP path: registry name is the source of truth. The MCP
|
|
46
|
+
// backend's `repoId` is `entry.name.toLowerCase()` (or `${name}-${hash}`
|
|
47
|
+
// on collision); match conservatively against both forms.
|
|
48
|
+
const entries = await repoMod.listRegisteredRepos();
|
|
49
|
+
for (const entry of entries) {
|
|
50
|
+
const base = entry.name.toLowerCase();
|
|
51
|
+
if (base === repoId || repoId.startsWith(`${base}-`)) {
|
|
52
|
+
const meta = await repoMod.loadMeta(entry.storagePath);
|
|
53
|
+
return meta?.compress ?? 'none';
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return 'none';
|
|
57
|
+
}
|
|
58
|
+
const repo = await repoMod.findRepo(process.cwd());
|
|
59
|
+
return repo?.meta?.compress ?? 'none';
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
return 'none';
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
const FALLBACK_SCAN_LIMIT = 50_000;
|
|
66
|
+
const BOOLEAN_QUERY_TOKENS = new Set(['and', 'or', 'not']);
|
|
67
|
+
const FALLBACK_FIELD_WEIGHTS = {
|
|
68
|
+
name: 4,
|
|
69
|
+
content: 2,
|
|
70
|
+
description: 1,
|
|
71
|
+
};
|
|
26
72
|
/**
|
|
27
73
|
* Per-process cache for the MCP pool path: tracks which `(repoId, table)`
|
|
28
74
|
* pairs have been ensured. The CLI/pipeline path gets its own cache inside
|
|
@@ -122,6 +168,72 @@ async function queryFTSViaExecutor(executor, tableName, indexName, query, limit)
|
|
|
122
168
|
return [];
|
|
123
169
|
}
|
|
124
170
|
}
|
|
171
|
+
function searchTerms(query) {
|
|
172
|
+
const terms = query
|
|
173
|
+
.toLowerCase()
|
|
174
|
+
.match(/[\p{L}\p{N}_]+/gu)
|
|
175
|
+
?.filter((term) => term.length > 1 && !BOOLEAN_QUERY_TOKENS.has(term));
|
|
176
|
+
return [...new Set(terms ?? [])];
|
|
177
|
+
}
|
|
178
|
+
function scoreFallbackNode(node, query, properties) {
|
|
179
|
+
const terms = searchTerms(query);
|
|
180
|
+
if (terms.length === 0)
|
|
181
|
+
return 0;
|
|
182
|
+
const phrase = query.trim().toLowerCase();
|
|
183
|
+
let score = 0;
|
|
184
|
+
for (const property of properties) {
|
|
185
|
+
const raw = node[property];
|
|
186
|
+
if (raw === null || raw === undefined)
|
|
187
|
+
continue;
|
|
188
|
+
const value = String(raw).toLowerCase();
|
|
189
|
+
if (!value)
|
|
190
|
+
continue;
|
|
191
|
+
const weight = FALLBACK_FIELD_WEIGHTS[property] ?? 1;
|
|
192
|
+
if (phrase.length > 1 && value.includes(phrase)) {
|
|
193
|
+
score += weight * (terms.length + 1);
|
|
194
|
+
}
|
|
195
|
+
for (const term of terms) {
|
|
196
|
+
if (value.includes(term))
|
|
197
|
+
score += weight;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return score;
|
|
201
|
+
}
|
|
202
|
+
async function queryFallbackViaExecutor(executor, tableName, properties, query, limit) {
|
|
203
|
+
try {
|
|
204
|
+
const rows = await executor(`
|
|
205
|
+
MATCH (node:${tableName})
|
|
206
|
+
RETURN node
|
|
207
|
+
LIMIT ${FALLBACK_SCAN_LIMIT}
|
|
208
|
+
`);
|
|
209
|
+
return rows
|
|
210
|
+
.map((row) => {
|
|
211
|
+
const node = row.node || row[0] || {};
|
|
212
|
+
return {
|
|
213
|
+
filePath: node.filePath || '',
|
|
214
|
+
score: scoreFallbackNode(node, query, properties),
|
|
215
|
+
nodeId: node.nodeId || node.id || '',
|
|
216
|
+
};
|
|
217
|
+
})
|
|
218
|
+
.filter((result) => result.filePath && result.score > 0)
|
|
219
|
+
.sort((a, b) => b.score - a.score)
|
|
220
|
+
.slice(0, limit);
|
|
221
|
+
}
|
|
222
|
+
catch {
|
|
223
|
+
return [];
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
async function fallbackSearchAllTables(executor, query, limit,
|
|
227
|
+
// Same compress-aware property selection as the FTS path. Default keeps
|
|
228
|
+
// pre-Phase-2 behaviour (`['name', 'content']`) for callers that don't
|
|
229
|
+
// pass a value.
|
|
230
|
+
properties = ['name', 'content']) {
|
|
231
|
+
const results = [];
|
|
232
|
+
for (const { table } of FTS_TABLES) {
|
|
233
|
+
results.push(await queryFallbackViaExecutor(executor, table, properties, query, limit));
|
|
234
|
+
}
|
|
235
|
+
return results;
|
|
236
|
+
}
|
|
125
237
|
/**
|
|
126
238
|
* Search using LadybugDB's built-in FTS (always fresh, reads from disk)
|
|
127
239
|
*
|
|
@@ -134,6 +246,8 @@ async function queryFTSViaExecutor(executor, tableName, indexName, query, limit)
|
|
|
134
246
|
* @returns Ranked search results from FTS indexes
|
|
135
247
|
*/
|
|
136
248
|
export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
249
|
+
if (!query.trim() || limit <= 0)
|
|
250
|
+
return [];
|
|
137
251
|
let fileResults, functionResults, classResults, methodResults, interfaceResults;
|
|
138
252
|
if (repoId) {
|
|
139
253
|
// Use MCP connection pool via dynamic import
|
|
@@ -149,7 +263,12 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
149
263
|
const executor = (cypher) => executeQuery(repoId, cypher);
|
|
150
264
|
// Lazy-create FTS indexes on first query for this repo (analyze no longer
|
|
151
265
|
// creates them up-front, so we ensure them here). Cached per-process.
|
|
152
|
-
|
|
266
|
+
// RFC 0001 Phase 2.5: drop `content` from FTS properties for repos
|
|
267
|
+
// analysed with --compress brotli|zstd — the column holds encoded
|
|
268
|
+
// bytes and would tokenise to garbage.
|
|
269
|
+
const compress = await getCompressMode(repoId);
|
|
270
|
+
const properties = ftsPropertiesFor(compress);
|
|
271
|
+
for (const { table, indexName } of FTS_TABLES) {
|
|
153
272
|
await ensureFTSIndexViaExecutor(executor, repoId, table, indexName, properties);
|
|
154
273
|
}
|
|
155
274
|
fileResults = await queryFTSViaExecutor(executor, 'File', 'file_fts', query, limit);
|
|
@@ -157,11 +276,24 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
157
276
|
classResults = await queryFTSViaExecutor(executor, 'Class', 'class_fts', query, limit);
|
|
158
277
|
methodResults = await queryFTSViaExecutor(executor, 'Method', 'method_fts', query, limit);
|
|
159
278
|
interfaceResults = await queryFTSViaExecutor(executor, 'Interface', 'interface_fts', query, limit);
|
|
279
|
+
if (fileResults.length +
|
|
280
|
+
functionResults.length +
|
|
281
|
+
classResults.length +
|
|
282
|
+
methodResults.length +
|
|
283
|
+
interfaceResults.length ===
|
|
284
|
+
0) {
|
|
285
|
+
[fileResults, functionResults, classResults, methodResults, interfaceResults] =
|
|
286
|
+
await fallbackSearchAllTables(executor, query, limit, properties);
|
|
287
|
+
}
|
|
160
288
|
}
|
|
161
289
|
else {
|
|
162
290
|
// Use core lbug adapter (CLI / pipeline context) — also sequential for safety.
|
|
163
291
|
// Lazy-create FTS indexes on first query (analyze no longer does it).
|
|
164
|
-
|
|
292
|
+
// RFC 0001 Phase 2.5 — same `compress`-aware property selection as the MCP
|
|
293
|
+
// path; the CLI walks up from cwd to find the repo's meta.json.
|
|
294
|
+
const compress = await getCompressMode();
|
|
295
|
+
const properties = ftsPropertiesFor(compress);
|
|
296
|
+
for (const { table, indexName } of FTS_TABLES) {
|
|
165
297
|
await ensureFTSIndex(table, indexName, [...properties]).catch(() => { });
|
|
166
298
|
}
|
|
167
299
|
fileResults = await queryFTS('File', 'file_fts', query, limit, false).catch(() => []);
|
|
@@ -169,6 +301,15 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
169
301
|
classResults = await queryFTS('Class', 'class_fts', query, limit, false).catch(() => []);
|
|
170
302
|
methodResults = await queryFTS('Method', 'method_fts', query, limit, false).catch(() => []);
|
|
171
303
|
interfaceResults = await queryFTS('Interface', 'interface_fts', query, limit, false).catch(() => []);
|
|
304
|
+
if (fileResults.length +
|
|
305
|
+
functionResults.length +
|
|
306
|
+
classResults.length +
|
|
307
|
+
methodResults.length +
|
|
308
|
+
interfaceResults.length ===
|
|
309
|
+
0) {
|
|
310
|
+
[fileResults, functionResults, classResults, methodResults, interfaceResults] =
|
|
311
|
+
await fallbackSearchAllTables(executeCoreQuery, query, limit, properties);
|
|
312
|
+
}
|
|
172
313
|
}
|
|
173
314
|
// Collect all node scores per filePath to track which nodes actually matched
|
|
174
315
|
const fileNodeScores = new Map();
|
|
@@ -221,7 +221,7 @@ export class WikiGenerator {
|
|
|
221
221
|
reportProgress(node.name);
|
|
222
222
|
return 1;
|
|
223
223
|
}
|
|
224
|
-
catch (
|
|
224
|
+
catch (_err) {
|
|
225
225
|
this.failedModules.push(node.name);
|
|
226
226
|
reportProgress(`Failed: ${node.name}`);
|
|
227
227
|
return 0;
|
|
@@ -239,7 +239,7 @@ export class WikiGenerator {
|
|
|
239
239
|
pagesGenerated++;
|
|
240
240
|
reportProgress(node.name);
|
|
241
241
|
}
|
|
242
|
-
catch (
|
|
242
|
+
catch (_err) {
|
|
243
243
|
this.failedModules.push(node.name);
|
|
244
244
|
reportProgress(`Failed: ${node.name}`);
|
|
245
245
|
}
|
|
@@ -607,7 +607,7 @@ export class WikiGenerator {
|
|
|
607
607
|
this.onProgress('incremental', percent, `${incProcessed}/${affectedNodes.length} — ${node.name}`);
|
|
608
608
|
return 1;
|
|
609
609
|
}
|
|
610
|
-
catch (
|
|
610
|
+
catch (_err) {
|
|
611
611
|
this.failedModules.push(node.name);
|
|
612
612
|
incProcessed++;
|
|
613
613
|
return 0;
|
|
@@ -807,7 +807,7 @@ export class WikiGenerator {
|
|
|
807
807
|
let activeConcurrency = this.concurrency;
|
|
808
808
|
let running = 0;
|
|
809
809
|
let idx = 0;
|
|
810
|
-
return new Promise((resolve,
|
|
810
|
+
return new Promise((resolve, _reject) => {
|
|
811
811
|
const next = () => {
|
|
812
812
|
while (running < activeConcurrency && idx < items.length) {
|
|
813
813
|
const item = items[idx++];
|
|
@@ -19,6 +19,7 @@ import { GroupService } from '../../core/group/service.js';
|
|
|
19
19
|
import { resolveAtGroupMemberRepoPath } from '../../core/group/resolve-at-member.js';
|
|
20
20
|
import { collectBestChunks } from '../../core/embeddings/types.js';
|
|
21
21
|
import { EMBEDDING_TABLE_NAME, EMBEDDING_INDEX_NAME } from '../../core/lbug/schema.js';
|
|
22
|
+
import { decodeContentField } from '../../core/lbug/content-read.js';
|
|
22
23
|
import { PhaseTimer } from '../../core/search/phase-timer.js';
|
|
23
24
|
import { checkStaleness, checkCwdMatch } from '../../core/git-staleness.js';
|
|
24
25
|
// AI context generation is CLI-only (codragraph analyze)
|
|
@@ -835,10 +836,12 @@ export class LocalBackend {
|
|
|
835
836
|
try {
|
|
836
837
|
const contentRows = await executeParameterized(repo.id, `
|
|
837
838
|
MATCH (n {id: $nodeId})
|
|
838
|
-
RETURN n.content AS content
|
|
839
|
+
RETURN n.content AS content, n.contentEncoding AS contentEncoding
|
|
839
840
|
`, { nodeId: sym.nodeId });
|
|
840
841
|
if (contentRows.length > 0) {
|
|
841
|
-
|
|
842
|
+
const raw = contentRows[0].content ?? contentRows[0][0];
|
|
843
|
+
const enc = contentRows[0].contentEncoding ?? contentRows[0][1];
|
|
844
|
+
content = decodeContentField(raw, enc);
|
|
842
845
|
}
|
|
843
846
|
}
|
|
844
847
|
catch (e) {
|
|
@@ -1330,7 +1333,13 @@ export class LocalBackend {
|
|
|
1330
1333
|
*/
|
|
1331
1334
|
async resolveSymbolCandidates(repo, query, hints) {
|
|
1332
1335
|
const { uid, name, include_content } = query;
|
|
1333
|
-
|
|
1336
|
+
// RFC 0001 Phase 2: when fetching content, also fetch the per-row
|
|
1337
|
+
// encoding tag so `decodeContentField` can pass it through unchanged
|
|
1338
|
+
// (default 'none') or run brotli/zstd decode. Adding contentEncoding
|
|
1339
|
+
// to the SELECT shifts the numeric-index fallback for content from
|
|
1340
|
+
// r[6] to (still) r[6] — encoding lands at r[7] — but we read by name
|
|
1341
|
+
// first which is the documented preferred path on LadybugDB.
|
|
1342
|
+
const selectClause = `n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine${include_content ? ', n.content AS content, n.contentEncoding AS contentEncoding' : ''}`;
|
|
1334
1343
|
// Direct UID — zero-ambiguity path.
|
|
1335
1344
|
if (uid) {
|
|
1336
1345
|
const rows = await executeParameterized(repo.id, `MATCH (n {id: $uid}) RETURN ${selectClause} LIMIT 1`, { uid });
|
|
@@ -1344,7 +1353,11 @@ export class LocalBackend {
|
|
|
1344
1353
|
filePath: (r.filePath ?? r[3]),
|
|
1345
1354
|
startLine: (r.startLine ?? r[4]),
|
|
1346
1355
|
endLine: (r.endLine ?? r[5]),
|
|
1347
|
-
...(include_content
|
|
1356
|
+
...(include_content
|
|
1357
|
+
? {
|
|
1358
|
+
content: decodeContentField(r.content ?? r[6], r.contentEncoding ?? r[7]),
|
|
1359
|
+
}
|
|
1360
|
+
: {}),
|
|
1348
1361
|
};
|
|
1349
1362
|
// Same LadybugDB label-enrichment as the name-based path: a UID
|
|
1350
1363
|
// pointing at a Class must still surface `type: 'Class'` so impact's
|
|
@@ -1380,7 +1393,11 @@ export class LocalBackend {
|
|
|
1380
1393
|
filePath: (r.filePath ?? r[3]),
|
|
1381
1394
|
startLine: (r.startLine ?? r[4]),
|
|
1382
1395
|
endLine: (r.endLine ?? r[5]),
|
|
1383
|
-
...(include_content
|
|
1396
|
+
...(include_content
|
|
1397
|
+
? {
|
|
1398
|
+
content: decodeContentField(r.content ?? r[6], r.contentEncoding ?? r[7]),
|
|
1399
|
+
}
|
|
1400
|
+
: {}),
|
|
1384
1401
|
}));
|
|
1385
1402
|
// Enrich labels for any candidates where `labels(n)[0]` came back empty.
|
|
1386
1403
|
// LadybugDB returns an empty string for that projection on certain node
|
package/dist/mcp/resources.js
CHANGED
|
@@ -318,7 +318,7 @@ async function getContextResource(backend, repoName) {
|
|
|
318
318
|
lines.push(' - cypher: Raw graph queries');
|
|
319
319
|
lines.push(' - list_repos: Discover all indexed repositories');
|
|
320
320
|
lines.push('');
|
|
321
|
-
lines.push('re_index: Run `npx codragraph analyze` in terminal if data is stale');
|
|
321
|
+
lines.push('re_index: Run `npx @codragraph/cli analyze` in terminal if data is stale');
|
|
322
322
|
lines.push('');
|
|
323
323
|
lines.push('resources_available:');
|
|
324
324
|
lines.push(' - codragraph://repos: All indexed repositories');
|
|
@@ -520,7 +520,7 @@ async function getProcessDetailResource(name, backend, repoName) {
|
|
|
520
520
|
async function getSetupResource(backend) {
|
|
521
521
|
const repos = await backend.listRepos();
|
|
522
522
|
if (repos.length === 0) {
|
|
523
|
-
return '# CodraGraph\n\nNo repositories indexed. Run: `npx codragraph analyze` in a repository.';
|
|
523
|
+
return '# CodraGraph\n\nNo repositories indexed. Run: `npx @codragraph/cli analyze` in a repository.';
|
|
524
524
|
}
|
|
525
525
|
const sections = [];
|
|
526
526
|
for (const repo of repos) {
|
|
@@ -625,7 +625,6 @@ async function getRecipesResource(backend, repoName, taskFamily) {
|
|
|
625
625
|
let result;
|
|
626
626
|
try {
|
|
627
627
|
const harnessModuleId = '@codragraph/harness/mcp/handler';
|
|
628
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
629
628
|
const mod = (await import(/* @vite-ignore */ harnessModuleId));
|
|
630
629
|
result = await mod.handleHarnessRecipesList({
|
|
631
630
|
task_family: taskFamily,
|
package/dist/server/api.js
CHANGED
|
@@ -15,6 +15,7 @@ import { createRequire } from 'node:module';
|
|
|
15
15
|
import { loadMeta, listRegisteredRepos, getStoragePath } from '../storage/repo-manager.js';
|
|
16
16
|
import { executeQuery, executePrepared, executeWithReusedStatement, streamQuery, closeLbug, withLbugDb, } from '../core/lbug/lbug-adapter.js';
|
|
17
17
|
import { isWriteQuery } from '../core/lbug/pool-adapter.js';
|
|
18
|
+
import { decodeContentField } from '../core/lbug/content-read.js';
|
|
18
19
|
import { NODE_TABLES } from '../_shared/index.js';
|
|
19
20
|
import { searchFTSFromLbug } from '../core/search/bm25-index.js';
|
|
20
21
|
import { hybridSearch } from '../core/search/hybrid-search.js';
|
|
@@ -189,7 +190,7 @@ const getNodeQuery = (table, includeContent) => {
|
|
|
189
190
|
const tableLabel = quoteNodeTable(table);
|
|
190
191
|
if (table === 'File') {
|
|
191
192
|
return includeContent
|
|
192
|
-
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content`
|
|
193
|
+
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.content AS content, n.contentEncoding AS contentEncoding`
|
|
193
194
|
: `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath`;
|
|
194
195
|
}
|
|
195
196
|
if (table === 'Folder') {
|
|
@@ -208,7 +209,7 @@ const getNodeQuery = (table, includeContent) => {
|
|
|
208
209
|
return `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.description AS description`;
|
|
209
210
|
}
|
|
210
211
|
return includeContent
|
|
211
|
-
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content`
|
|
212
|
+
? `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine, n.content AS content, n.contentEncoding AS contentEncoding`
|
|
212
213
|
: `MATCH (n:${tableLabel}) RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine`;
|
|
213
214
|
};
|
|
214
215
|
const mapGraphNodeRow = (table, row, includeContent) => ({
|
|
@@ -219,7 +220,7 @@ const mapGraphNodeRow = (table, row, includeContent) => ({
|
|
|
219
220
|
filePath: row.filePath ?? row[2],
|
|
220
221
|
startLine: row.startLine,
|
|
221
222
|
endLine: row.endLine,
|
|
222
|
-
content: includeContent ? row.content : undefined,
|
|
223
|
+
content: includeContent ? decodeContentField(row.content, row.contentEncoding) : undefined,
|
|
223
224
|
responseKeys: row.responseKeys,
|
|
224
225
|
errorKeys: row.errorKeys,
|
|
225
226
|
middleware: row.middleware,
|
|
@@ -36,10 +36,49 @@
|
|
|
36
36
|
* so the registry stabilises over analyze/re-analyze cycles.
|
|
37
37
|
*/
|
|
38
38
|
export declare const canonicalizePath: (p: string) => string;
|
|
39
|
+
/**
|
|
40
|
+
* On-disk schema version for `.codragraph/lbug` and `.codragraph/meta.json`.
|
|
41
|
+
*
|
|
42
|
+
* 1 — pre-RFC-0001-Phase-2 layout. Node tables have `content STRING`
|
|
43
|
+
* but no `contentEncoding` column. Implicit/missing on existing
|
|
44
|
+
* 1.6.x and 1.7.x indexes (RepoMeta.schemaVersion was undefined).
|
|
45
|
+
* 2 — RFC 0001 Phase 2: every node table that has `content` also has
|
|
46
|
+
* a `contentEncoding STRING DEFAULT 'none'` column. Writers may
|
|
47
|
+
* opt into compression via `--compress brotli|zstd` (compression
|
|
48
|
+
* is OFF by default, so existing readers keep working). Readers
|
|
49
|
+
* decode based on the per-row encoding tag.
|
|
50
|
+
*
|
|
51
|
+
* Bumping this is the migration trigger: `runFullAnalysis` forces a
|
|
52
|
+
* full re-analyze when an existing index has a missing or older
|
|
53
|
+
* `schemaVersion` field, because adding a column to an existing
|
|
54
|
+
* LadybugDB table via ALTER is not validated end-to-end yet — fresh
|
|
55
|
+
* `CREATE NODE TABLE` is the supported path.
|
|
56
|
+
*/
|
|
57
|
+
export declare const INDEX_SCHEMA_VERSION: 2;
|
|
39
58
|
export interface RepoMeta {
|
|
40
59
|
repoPath: string;
|
|
41
60
|
lastCommit: string;
|
|
42
61
|
indexedAt: string;
|
|
62
|
+
/**
|
|
63
|
+
* On-disk schema version (see {@link INDEX_SCHEMA_VERSION}). Absent on
|
|
64
|
+
* indexes written by 1.7.x or earlier; `runFullAnalysis` treats those
|
|
65
|
+
* as needing a full re-analyze when they're loaded by a 1.8+ CLI.
|
|
66
|
+
*/
|
|
67
|
+
schemaVersion?: number;
|
|
68
|
+
/**
|
|
69
|
+
* RFC 0001 Phase 2 — the per-row content encoding chosen at the last
|
|
70
|
+
* `analyze --compress` invocation. `'none'` (or absent) means rows
|
|
71
|
+
* carry plain text; `'brotli'` / `'zstd'` means rows are compressed
|
|
72
|
+
* and consumers must decode. Persisted so query-time tooling can
|
|
73
|
+
* detect the compressed mode without sampling rows.
|
|
74
|
+
*
|
|
75
|
+
* Phase 2.5 hooks: `core/search/bm25-index.ts` reads this field at
|
|
76
|
+
* FTS-create time and drops `content` from the FTS property list
|
|
77
|
+
* when set to a non-`'none'` value (full-text search falls back to
|
|
78
|
+
* symbol-name matches). Embeddings and graph queries are unaffected
|
|
79
|
+
* — they decode at the read boundary.
|
|
80
|
+
*/
|
|
81
|
+
compress?: 'none' | 'brotli' | 'zstd';
|
|
43
82
|
/**
|
|
44
83
|
* Canonical `origin` remote URL captured at index time. Used to
|
|
45
84
|
* fingerprint the same logical repo across multiple on-disk clones
|
|
@@ -49,6 +49,25 @@ export const canonicalizePath = (p) => {
|
|
|
49
49
|
return resolved;
|
|
50
50
|
}
|
|
51
51
|
};
|
|
52
|
+
/**
|
|
53
|
+
* On-disk schema version for `.codragraph/lbug` and `.codragraph/meta.json`.
|
|
54
|
+
*
|
|
55
|
+
* 1 — pre-RFC-0001-Phase-2 layout. Node tables have `content STRING`
|
|
56
|
+
* but no `contentEncoding` column. Implicit/missing on existing
|
|
57
|
+
* 1.6.x and 1.7.x indexes (RepoMeta.schemaVersion was undefined).
|
|
58
|
+
* 2 — RFC 0001 Phase 2: every node table that has `content` also has
|
|
59
|
+
* a `contentEncoding STRING DEFAULT 'none'` column. Writers may
|
|
60
|
+
* opt into compression via `--compress brotli|zstd` (compression
|
|
61
|
+
* is OFF by default, so existing readers keep working). Readers
|
|
62
|
+
* decode based on the per-row encoding tag.
|
|
63
|
+
*
|
|
64
|
+
* Bumping this is the migration trigger: `runFullAnalysis` forces a
|
|
65
|
+
* full re-analyze when an existing index has a missing or older
|
|
66
|
+
* `schemaVersion` field, because adding a column to an existing
|
|
67
|
+
* LadybugDB table via ALTER is not validated end-to-end yet — fresh
|
|
68
|
+
* `CREATE NODE TABLE` is the supported path.
|
|
69
|
+
*/
|
|
70
|
+
export const INDEX_SCHEMA_VERSION = 2;
|
|
52
71
|
const CODRAGRAPH_DIR = '.codragraph';
|
|
53
72
|
// ─── Local Storage Helpers ─────────────────────────────────────────────
|
|
54
73
|
/**
|