gitnexus 1.6.3-rc.21 → 1.6.3-rc.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/ai-context.js +1 -1
- package/dist/cli/group.js +73 -0
- package/dist/core/embeddings/chunker.js +30 -25
- package/dist/core/embeddings/embedding-pipeline.d.ts +6 -0
- package/dist/core/embeddings/embedding-pipeline.js +15 -6
- package/dist/core/embeddings/text-generator.d.ts +1 -1
- package/dist/core/embeddings/text-generator.js +33 -24
- package/dist/core/embeddings/types.d.ts +43 -1
- package/dist/core/embeddings/types.js +101 -29
- package/dist/core/group/cross-impact.d.ts +41 -0
- package/dist/core/group/cross-impact.js +454 -0
- package/dist/core/group/group-path-utils.d.ts +17 -0
- package/dist/core/group/group-path-utils.js +40 -0
- package/dist/core/group/resolve-at-member.d.ts +10 -0
- package/dist/core/group/resolve-at-member.js +31 -0
- package/dist/core/group/service.d.ts +9 -0
- package/dist/core/group/service.js +219 -20
- package/dist/core/group/types.d.ts +30 -0
- package/dist/core/lbug/lbug-adapter.d.ts +12 -0
- package/dist/core/lbug/lbug-adapter.js +30 -0
- package/dist/core/run-analyze.js +7 -12
- package/dist/core/search/bm25-index.d.ts +6 -0
- package/dist/core/search/bm25-index.js +54 -2
- package/dist/mcp/local/local-backend.d.ts +18 -3
- package/dist/mcp/local/local-backend.js +141 -15
- package/dist/mcp/resources.d.ts +31 -0
- package/dist/mcp/resources.js +100 -17
- package/dist/mcp/tools.d.ts +4 -1
- package/dist/mcp/tools.js +75 -54
- package/package.json +1 -1
|
@@ -2,15 +2,125 @@
|
|
|
2
2
|
* Group orchestration shared by MCP (LocalBackend) and CLI.
|
|
3
3
|
* DB access is injected via GroupToolPort so this module stays free of LocalBackend private API.
|
|
4
4
|
*/
|
|
5
|
+
import fsp from 'node:fs/promises';
|
|
6
|
+
import path from 'node:path';
|
|
5
7
|
import { checkStaleness } from '../git-staleness.js';
|
|
6
8
|
import { loadGroupConfig } from './config-parser.js';
|
|
9
|
+
import { fileMatchesServicePrefix, normalizeServicePrefix, repoInSubgroup, } from './group-path-utils.js';
|
|
7
10
|
import { getDefaultGitnexusDir, getGroupDir, listGroups, readContractRegistry } from './storage.js';
|
|
8
11
|
import { syncGroup } from './sync.js';
|
|
9
|
-
function
|
|
10
|
-
if (!
|
|
11
|
-
return
|
|
12
|
-
const
|
|
13
|
-
return
|
|
12
|
+
function isStoredContract(raw) {
|
|
13
|
+
if (!raw || typeof raw !== 'object')
|
|
14
|
+
return false;
|
|
15
|
+
const o = raw;
|
|
16
|
+
return (typeof o.contractId === 'string' &&
|
|
17
|
+
typeof o.type === 'string' &&
|
|
18
|
+
typeof o.repo === 'string' &&
|
|
19
|
+
typeof o.role === 'string' &&
|
|
20
|
+
(o.role === 'provider' || o.role === 'consumer') &&
|
|
21
|
+
typeof o.symbolUid === 'string' &&
|
|
22
|
+
typeof o.symbolName === 'string' &&
|
|
23
|
+
typeof o.confidence === 'number' &&
|
|
24
|
+
o.meta !== undefined &&
|
|
25
|
+
typeof o.meta === 'object' &&
|
|
26
|
+
o.meta !== null &&
|
|
27
|
+
o.symbolRef !== undefined &&
|
|
28
|
+
typeof o.symbolRef === 'object' &&
|
|
29
|
+
o.symbolRef !== null &&
|
|
30
|
+
typeof o.symbolRef.filePath === 'string' &&
|
|
31
|
+
typeof o.symbolRef.name === 'string');
|
|
32
|
+
}
|
|
33
|
+
function filterQueryByServicePrefix(queryResult, servicePrefix) {
|
|
34
|
+
const symbols = (queryResult.process_symbols || []).filter((s) => fileMatchesServicePrefix(typeof s.filePath === 'string' ? s.filePath : undefined, servicePrefix));
|
|
35
|
+
const allowed = new Set(symbols.map((s) => String(s.process_id ?? '')).filter(Boolean));
|
|
36
|
+
const processes = (queryResult.processes || []).filter((p) => allowed.has(String(p.id)));
|
|
37
|
+
return { processes, process_symbols: symbols };
|
|
38
|
+
}
|
|
39
|
+
function isCrossLink(raw) {
|
|
40
|
+
if (!raw || typeof raw !== 'object')
|
|
41
|
+
return false;
|
|
42
|
+
const o = raw;
|
|
43
|
+
const from = o.from;
|
|
44
|
+
const to = o.to;
|
|
45
|
+
if (!from || !to)
|
|
46
|
+
return false;
|
|
47
|
+
if (typeof from.repo !== 'string' || typeof to.repo !== 'string')
|
|
48
|
+
return false;
|
|
49
|
+
return typeof o.contractId === 'string' && typeof o.type === 'string';
|
|
50
|
+
}
|
|
51
|
+
async function loadContractRegistryResilient(groupDir) {
|
|
52
|
+
const filePath = path.join(groupDir, 'contracts.json');
|
|
53
|
+
let raw;
|
|
54
|
+
try {
|
|
55
|
+
raw = await fsp.readFile(filePath, 'utf-8');
|
|
56
|
+
}
|
|
57
|
+
catch (e) {
|
|
58
|
+
if (e.code === 'ENOENT') {
|
|
59
|
+
return { ok: false, error: `No contracts.json for this group. Run group_sync first.` };
|
|
60
|
+
}
|
|
61
|
+
return { ok: false, error: e instanceof Error ? e.message : String(e) };
|
|
62
|
+
}
|
|
63
|
+
let root;
|
|
64
|
+
try {
|
|
65
|
+
root = JSON.parse(raw);
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
return { ok: false, error: 'contracts.json is not valid JSON' };
|
|
69
|
+
}
|
|
70
|
+
if (!root || typeof root !== 'object' || Array.isArray(root)) {
|
|
71
|
+
return { ok: false, error: 'contracts.json has an invalid root object' };
|
|
72
|
+
}
|
|
73
|
+
const base = root;
|
|
74
|
+
const contractsRaw = base.contracts;
|
|
75
|
+
const crossRaw = base.crossLinks;
|
|
76
|
+
let skippedCorrupt = 0;
|
|
77
|
+
const contracts = [];
|
|
78
|
+
if (Array.isArray(contractsRaw)) {
|
|
79
|
+
for (const row of contractsRaw) {
|
|
80
|
+
try {
|
|
81
|
+
if (isStoredContract(row)) {
|
|
82
|
+
contracts.push(row);
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
skippedCorrupt++;
|
|
86
|
+
console.warn('[group] skipping corrupt contract row in contracts.json');
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
skippedCorrupt++;
|
|
91
|
+
console.warn('[group] skipping corrupt contract row in contracts.json');
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
const crossLinks = [];
|
|
96
|
+
if (Array.isArray(crossRaw)) {
|
|
97
|
+
for (const row of crossRaw) {
|
|
98
|
+
try {
|
|
99
|
+
if (isCrossLink(row)) {
|
|
100
|
+
crossLinks.push(row);
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
skippedCorrupt++;
|
|
104
|
+
console.warn('[group] skipping corrupt crossLinks row in contracts.json');
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
skippedCorrupt++;
|
|
109
|
+
console.warn('[group] skipping corrupt crossLinks row in contracts.json');
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
const registry = {
|
|
114
|
+
version: typeof base.version === 'number' ? base.version : 0,
|
|
115
|
+
generatedAt: typeof base.generatedAt === 'string' ? base.generatedAt : '',
|
|
116
|
+
repoSnapshots: base.repoSnapshots && typeof base.repoSnapshots === 'object' && base.repoSnapshots !== null
|
|
117
|
+
? base.repoSnapshots
|
|
118
|
+
: {},
|
|
119
|
+
missingRepos: Array.isArray(base.missingRepos) ? base.missingRepos : [],
|
|
120
|
+
contracts,
|
|
121
|
+
crossLinks,
|
|
122
|
+
};
|
|
123
|
+
return { ok: true, registry, skippedCorrupt };
|
|
14
124
|
}
|
|
15
125
|
export class GroupService {
|
|
16
126
|
port;
|
|
@@ -57,10 +167,14 @@ export class GroupService {
|
|
|
57
167
|
if (!name)
|
|
58
168
|
return { error: 'name is required' };
|
|
59
169
|
const groupDir = getGroupDir(getDefaultGitnexusDir(), name);
|
|
60
|
-
const
|
|
61
|
-
if (
|
|
62
|
-
|
|
170
|
+
const loaded = await loadContractRegistryResilient(groupDir);
|
|
171
|
+
if (loaded.ok === false) {
|
|
172
|
+
if (loaded.error.includes('No contracts.json')) {
|
|
173
|
+
return { error: `No contracts.json for group "${name}". Run group_sync first.` };
|
|
174
|
+
}
|
|
175
|
+
return { error: loaded.error };
|
|
63
176
|
}
|
|
177
|
+
const { registry, skippedCorrupt } = loaded;
|
|
64
178
|
let contracts = registry.contracts;
|
|
65
179
|
if (params.type)
|
|
66
180
|
contracts = contracts.filter((c) => c.type === params.type);
|
|
@@ -73,21 +187,106 @@ export class GroupService {
|
|
|
73
187
|
]));
|
|
74
188
|
contracts = contracts.filter((c) => !matchedIds.has(`${c.repo}::${c.contractId}`));
|
|
75
189
|
}
|
|
76
|
-
|
|
190
|
+
const out = { contracts, crossLinks: registry.crossLinks };
|
|
191
|
+
if (skippedCorrupt > 0)
|
|
192
|
+
out.skippedCorrupt = skippedCorrupt;
|
|
193
|
+
return out;
|
|
194
|
+
}
|
|
195
|
+
async groupImpact(params) {
|
|
196
|
+
const { runGroupImpact } = await import('./cross-impact.js');
|
|
197
|
+
return runGroupImpact({ port: this.port, gitnexusDir: getDefaultGitnexusDir() }, params);
|
|
198
|
+
}
|
|
199
|
+
async groupContext(params) {
|
|
200
|
+
const name = String(params.name ?? '').trim();
|
|
201
|
+
const target = typeof params.target === 'string' ? params.target.trim() : '';
|
|
202
|
+
const uid = typeof params.uid === 'string' ? params.uid.trim() : undefined;
|
|
203
|
+
const file_path = typeof params.file_path === 'string' ? params.file_path : undefined;
|
|
204
|
+
const include_content = Boolean(params.include_content);
|
|
205
|
+
if (params.service !== undefined &&
|
|
206
|
+
params.service !== null &&
|
|
207
|
+
String(params.service).trim() === '') {
|
|
208
|
+
return { group: name || '', error: 'service must not be an empty string', results: [] };
|
|
209
|
+
}
|
|
210
|
+
const servicePrefix = normalizeServicePrefix(params.service);
|
|
211
|
+
const subgroup = typeof params.subgroup === 'string' ? params.subgroup : undefined;
|
|
212
|
+
const subgroupExact = params.subgroupExact === true;
|
|
213
|
+
if (!name) {
|
|
214
|
+
return { group: '', error: 'name is required', results: [] };
|
|
215
|
+
}
|
|
216
|
+
if (!uid && !target) {
|
|
217
|
+
return { group: name, error: 'target or uid is required', results: [] };
|
|
218
|
+
}
|
|
219
|
+
const groupDir = getGroupDir(getDefaultGitnexusDir(), name);
|
|
220
|
+
let config;
|
|
221
|
+
try {
|
|
222
|
+
config = await loadGroupConfig(groupDir);
|
|
223
|
+
}
|
|
224
|
+
catch (e) {
|
|
225
|
+
return {
|
|
226
|
+
group: name,
|
|
227
|
+
target: target || uid,
|
|
228
|
+
service: servicePrefix,
|
|
229
|
+
error: e instanceof Error ? e.message : String(e),
|
|
230
|
+
results: [],
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
const memberEntries = Object.entries(config.repos).filter(([repoPath]) => repoInSubgroup(repoPath, subgroup, subgroupExact));
|
|
234
|
+
// Per-repo work is independent (each repo opens its own DB handle and the
|
|
235
|
+
// group-level result preserves repo iteration order via the indexed map).
|
|
236
|
+
// Errors are caught per repo so one slow/failed member does not block the rest.
|
|
237
|
+
const results = await Promise.all(memberEntries.map(async ([repoPath, registryName]) => {
|
|
238
|
+
try {
|
|
239
|
+
const repoObj = await this.port.resolveRepo(registryName);
|
|
240
|
+
const payload = await this.port.context(repoObj, {
|
|
241
|
+
name: target || undefined,
|
|
242
|
+
uid,
|
|
243
|
+
file_path,
|
|
244
|
+
include_content,
|
|
245
|
+
});
|
|
246
|
+
if (servicePrefix) {
|
|
247
|
+
const st = payload?.status;
|
|
248
|
+
const sym = payload?.symbol;
|
|
249
|
+
if (st === 'found' && !fileMatchesServicePrefix(sym?.filePath, servicePrefix)) {
|
|
250
|
+
return { repoPath, registryName, payload: {} };
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return { repoPath, registryName, payload };
|
|
254
|
+
}
|
|
255
|
+
catch (e) {
|
|
256
|
+
return {
|
|
257
|
+
repoPath,
|
|
258
|
+
registryName,
|
|
259
|
+
payload: { error: e instanceof Error ? e.message : String(e) },
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
}));
|
|
263
|
+
return {
|
|
264
|
+
group: name,
|
|
265
|
+
target: target || uid,
|
|
266
|
+
service: servicePrefix,
|
|
267
|
+
results,
|
|
268
|
+
};
|
|
77
269
|
}
|
|
78
270
|
async groupQuery(params) {
|
|
79
271
|
const name = String(params.name ?? '').trim();
|
|
80
272
|
const queryText = String(params.query ?? '').trim();
|
|
81
273
|
if (!name || !queryText)
|
|
82
274
|
return { error: 'name and query are required' };
|
|
275
|
+
if (params.service !== undefined &&
|
|
276
|
+
params.service !== null &&
|
|
277
|
+
String(params.service).trim() === '') {
|
|
278
|
+
return { error: 'service must not be an empty string' };
|
|
279
|
+
}
|
|
280
|
+
const servicePrefix = normalizeServicePrefix(params.service);
|
|
83
281
|
const limit = typeof params.limit === 'number' && params.limit > 0 ? params.limit : 5;
|
|
84
282
|
const subgroup = typeof params.subgroup === 'string' ? params.subgroup : undefined;
|
|
283
|
+
const subgroupExact = params.subgroupExact === true;
|
|
85
284
|
const groupDir = getGroupDir(getDefaultGitnexusDir(), name);
|
|
86
285
|
const config = await loadGroupConfig(groupDir);
|
|
87
|
-
const
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
286
|
+
const memberEntries = Object.entries(config.repos).filter(([repoPath]) => repoInSubgroup(repoPath, subgroup, subgroupExact));
|
|
287
|
+
// Per-repo query is independent; run them concurrently and isolate
|
|
288
|
+
// failures so one slow/failed member does not block the rest.
|
|
289
|
+
const perRepo = await Promise.all(memberEntries.map(async ([repoPath, registryName]) => {
|
|
91
290
|
try {
|
|
92
291
|
const repoObj = await this.port.resolveRepo(registryName);
|
|
93
292
|
const queryResult = (await this.port.query(repoObj, {
|
|
@@ -96,18 +295,20 @@ export class GroupService {
|
|
|
96
295
|
max_symbols: 10,
|
|
97
296
|
include_content: false,
|
|
98
297
|
}));
|
|
99
|
-
const processes =
|
|
298
|
+
const processes = servicePrefix
|
|
299
|
+
? filterQueryByServicePrefix(queryResult, servicePrefix).processes
|
|
300
|
+
: queryResult.processes || [];
|
|
100
301
|
const scored = processes.map((p, idx) => ({
|
|
101
302
|
...p,
|
|
102
303
|
_rrf_score: 1 / (idx + 1 + 60),
|
|
103
304
|
_repo: repoPath,
|
|
104
305
|
}));
|
|
105
|
-
|
|
306
|
+
return { repo: repoPath, score: 0, processes: scored };
|
|
106
307
|
}
|
|
107
308
|
catch {
|
|
108
|
-
|
|
309
|
+
return { repo: repoPath, score: 0, processes: [] };
|
|
109
310
|
}
|
|
110
|
-
}
|
|
311
|
+
}));
|
|
111
312
|
const allProcesses = perRepo.flatMap((r) => r.processes);
|
|
112
313
|
allProcesses.sort((a, b) => b._rrf_score - a._rrf_score);
|
|
113
314
|
const topN = allProcesses.slice(0, limit);
|
|
@@ -126,12 +327,10 @@ export class GroupService {
|
|
|
126
327
|
const config = await loadGroupConfig(groupDir);
|
|
127
328
|
const registry = await readContractRegistry(groupDir);
|
|
128
329
|
const repoStatuses = {};
|
|
129
|
-
const fsp = await import('node:fs/promises');
|
|
130
|
-
const pathMod = await import('node:path');
|
|
131
330
|
for (const [repoPath, registryName] of Object.entries(config.repos)) {
|
|
132
331
|
try {
|
|
133
332
|
const repoObj = await this.port.resolveRepo(registryName);
|
|
134
|
-
const metaPath =
|
|
333
|
+
const metaPath = path.join(repoObj.storagePath, 'meta.json');
|
|
135
334
|
const metaRaw = await fsp.readFile(metaPath, 'utf-8').catch(() => '{}');
|
|
136
335
|
const meta = JSON.parse(metaRaw);
|
|
137
336
|
const staleness = meta.lastCommit
|
|
@@ -83,6 +83,8 @@ export interface RepoHandle {
|
|
|
83
83
|
repoPath: string;
|
|
84
84
|
storagePath: string;
|
|
85
85
|
}
|
|
86
|
+
/** Why local impact or fan-out stopped early (e.g. wall-clock budget exhausted). */
|
|
87
|
+
export type GroupImpactTruncationReason = 'timeout' | 'partial';
|
|
86
88
|
export interface GroupImpactResult {
|
|
87
89
|
local: unknown;
|
|
88
90
|
group: string;
|
|
@@ -97,6 +99,34 @@ export interface GroupImpactResult {
|
|
|
97
99
|
cross_repo_hits: number;
|
|
98
100
|
};
|
|
99
101
|
risk: string;
|
|
102
|
+
/**
|
|
103
|
+
* Milliseconds budget applied to the **Phase 1 local impact** leg (`safeLocalImpact`).
|
|
104
|
+
* If the walk hits this wall first, expect `truncationReason: 'timeout'` and a partial `local` payload.
|
|
105
|
+
*/
|
|
106
|
+
timeoutMs?: number;
|
|
107
|
+
/** Present when local impact or fan-out stopped early (timeout, graph cap, etc.). */
|
|
108
|
+
truncationReason?: GroupImpactTruncationReason;
|
|
109
|
+
/**
|
|
110
|
+
* Human-readable note when `crossDepth` was clamped (e.g. multi-hop not implemented yet).
|
|
111
|
+
*/
|
|
112
|
+
crossDepthWarning?: string;
|
|
113
|
+
}
|
|
114
|
+
/** One repo’s `context` tool payload in a group-scoped context run. */
|
|
115
|
+
export interface GroupContextRepoEntry {
|
|
116
|
+
repoPath: string;
|
|
117
|
+
registryName: string;
|
|
118
|
+
payload: unknown;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Aggregated group `context`: explicit per-repo rows (no merged symbol payloads).
|
|
122
|
+
* Use top-level `error` only for unrecoverable failures, not for “no matches” or service scope misses.
|
|
123
|
+
*/
|
|
124
|
+
export interface GroupContextResult {
|
|
125
|
+
group: string;
|
|
126
|
+
target?: string;
|
|
127
|
+
service?: string;
|
|
128
|
+
error?: string;
|
|
129
|
+
results: GroupContextRepoEntry[];
|
|
100
130
|
}
|
|
101
131
|
export interface CrossRepoImpact {
|
|
102
132
|
repo: string;
|
|
@@ -139,6 +139,18 @@ export declare const loadVectorExtension: () => Promise<void>;
|
|
|
139
139
|
* @param stemmer - Stemming algorithm (default: 'porter')
|
|
140
140
|
*/
|
|
141
141
|
export declare const createFTSIndex: (tableName: string, indexName: string, properties: string[], stemmer?: string) => Promise<void>;
|
|
142
|
+
/**
|
|
143
|
+
* Lazy-create an FTS index, caching the fact in-process.
|
|
144
|
+
*
|
|
145
|
+
* Used by `queryFTS` so that `analyze` doesn't pay the ~440 ms × 5 fixed
|
|
146
|
+
* LadybugDB cost up-front (it dominates analyze on small repos). Instead,
|
|
147
|
+
* the cost is moved to the first `query`/`context` call in a session,
|
|
148
|
+
* where it's amortised across many lookups.
|
|
149
|
+
*
|
|
150
|
+
* Safe to call repeatedly — the in-process Set guarantees only the first
|
|
151
|
+
* call hits LadybugDB. `closeLbug` clears the cache so re-init starts fresh.
|
|
152
|
+
*/
|
|
153
|
+
export declare const ensureFTSIndex: (tableName: string, indexName: string, properties: string[], stemmer?: string) => Promise<void>;
|
|
142
154
|
/**
|
|
143
155
|
* Query a full-text search index
|
|
144
156
|
* @param tableName - The node table name
|
|
@@ -110,6 +110,17 @@ let conn = null;
|
|
|
110
110
|
let currentDbPath = null;
|
|
111
111
|
let ftsLoaded = false;
|
|
112
112
|
let vectorExtensionLoaded = false;
|
|
113
|
+
/**
|
|
114
|
+
* In-process cache of FTS indexes that have been ensured against the current
|
|
115
|
+
* connection. Prevents repeated `CALL CREATE_FTS_INDEX` round-trips inside a
|
|
116
|
+
* single CLI/MCP session — the first call to `ensureFTSIndex` for a given
|
|
117
|
+
* `(tableName, indexName)` pays the LadybugDB cost (~440 ms even when the
|
|
118
|
+
* index already exists on disk), subsequent calls are a Set lookup. Cleared
|
|
119
|
+
* by `closeLbug` so a re-init starts fresh.
|
|
120
|
+
*
|
|
121
|
+
* Key format: `${tableName}:${indexName}`.
|
|
122
|
+
*/
|
|
123
|
+
const ensuredFTSIndexes = new Set();
|
|
113
124
|
/**
|
|
114
125
|
* Check if an error indicates a missing column or table (schema-level problem)
|
|
115
126
|
* rather than a transient/connection error. Used for legacy DB fallback logic.
|
|
@@ -935,6 +946,7 @@ export const closeLbug = async () => {
|
|
|
935
946
|
currentDbPath = null;
|
|
936
947
|
ftsLoaded = false;
|
|
937
948
|
vectorExtensionLoaded = false;
|
|
949
|
+
ensuredFTSIndexes.clear();
|
|
938
950
|
};
|
|
939
951
|
export const isLbugReady = () => conn !== null && db !== null;
|
|
940
952
|
/**
|
|
@@ -1097,6 +1109,24 @@ export const createFTSIndex = async (tableName, indexName, properties, stemmer =
|
|
|
1097
1109
|
}
|
|
1098
1110
|
}
|
|
1099
1111
|
};
|
|
1112
|
+
/**
|
|
1113
|
+
* Lazy-create an FTS index, caching the fact in-process.
|
|
1114
|
+
*
|
|
1115
|
+
* Used by `queryFTS` so that `analyze` doesn't pay the ~440 ms × 5 fixed
|
|
1116
|
+
* LadybugDB cost up-front (it dominates analyze on small repos). Instead,
|
|
1117
|
+
* the cost is moved to the first `query`/`context` call in a session,
|
|
1118
|
+
* where it's amortised across many lookups.
|
|
1119
|
+
*
|
|
1120
|
+
* Safe to call repeatedly — the in-process Set guarantees only the first
|
|
1121
|
+
* call hits LadybugDB. `closeLbug` clears the cache so re-init starts fresh.
|
|
1122
|
+
*/
|
|
1123
|
+
export const ensureFTSIndex = async (tableName, indexName, properties, stemmer = 'porter') => {
|
|
1124
|
+
const key = `${tableName}:${indexName}`;
|
|
1125
|
+
if (ensuredFTSIndexes.has(key))
|
|
1126
|
+
return;
|
|
1127
|
+
await createFTSIndex(tableName, indexName, properties, stemmer);
|
|
1128
|
+
ensuredFTSIndexes.add(key);
|
|
1129
|
+
};
|
|
1100
1130
|
/**
|
|
1101
1131
|
* Query a full-text search index
|
|
1102
1132
|
* @param tableName - The node table name
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
import path from 'path';
|
|
12
12
|
import fs from 'fs/promises';
|
|
13
13
|
import { runPipelineFromRepo } from './ingestion/pipeline.js';
|
|
14
|
-
import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug,
|
|
14
|
+
import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, } from './lbug/lbug-adapter.js';
|
|
15
15
|
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
|
|
16
16
|
import { getCurrentCommit, hasGitDir, getInferredRepoName } from '../storage/git.js';
|
|
17
17
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
@@ -123,17 +123,12 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
123
123
|
progress('lbug', pct, msg);
|
|
124
124
|
});
|
|
125
125
|
// ── Phase 3: FTS (85–90%) ─────────────────────────────────────────
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
await createFTSIndex('Interface', 'interface_fts', ['name', 'content']);
|
|
133
|
-
}
|
|
134
|
-
catch {
|
|
135
|
-
// Non-fatal — FTS is best-effort
|
|
136
|
-
}
|
|
126
|
+
// FTS indexes are created lazily on first `query`/`context` call instead
|
|
127
|
+
// of eagerly here. On small repos / CI runners the LadybugDB
|
|
128
|
+
// CREATE_FTS_INDEX cost is ~440 ms × 5 (≈2 s) regardless of table size,
|
|
129
|
+
// which dominated `analyze` runtime and pushed Windows CI past its
|
|
130
|
+
// 30 s test budget. Lazy creation is implemented in
|
|
131
|
+
// `core/search/bm25-index.ts` via `ensureFTSIndex`.
|
|
137
132
|
// ── Phase 3.5: Re-insert cached embeddings ────────────────────────
|
|
138
133
|
if (cachedEmbeddings.length > 0) {
|
|
139
134
|
const cachedDims = cachedEmbeddings[0].embedding.length;
|
|
@@ -3,6 +3,12 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Uses LadybugDB's built-in full-text search indexes for keyword-based search.
|
|
5
5
|
* Always reads from the database (no cached state to drift).
|
|
6
|
+
*
|
|
7
|
+
* FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see
|
|
8
|
+
* `lbug-adapter.ts` for the rationale. This keeps `analyze` fast (the
|
|
9
|
+
* ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on
|
|
10
|
+
* small repos / CI runners) at the cost of paying that overhead on the
|
|
11
|
+
* first `query`/`context` call in a session.
|
|
6
12
|
*/
|
|
7
13
|
export interface BM25SearchResult {
|
|
8
14
|
filePath: string;
|
|
@@ -3,8 +3,51 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Uses LadybugDB's built-in full-text search indexes for keyword-based search.
|
|
5
5
|
* Always reads from the database (no cached state to drift).
|
|
6
|
+
*
|
|
7
|
+
* FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see
|
|
8
|
+
* `lbug-adapter.ts` for the rationale. This keeps `analyze` fast (the
|
|
9
|
+
* ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on
|
|
10
|
+
* small repos / CI runners) at the cost of paying that overhead on the
|
|
11
|
+
* first `query`/`context` call in a session.
|
|
12
|
+
*/
|
|
13
|
+
import { queryFTS, ensureFTSIndex } from '../lbug/lbug-adapter.js';
|
|
14
|
+
/**
|
|
15
|
+
* FTS schema served by `searchFTSFromLbug`. Centralised so that both the
|
|
16
|
+
* CLI/pipeline path and the MCP pool path use identical (table, index,
|
|
17
|
+
* properties) tuples and the lazy-create logic stays in one place.
|
|
6
18
|
*/
|
|
7
|
-
|
|
19
|
+
const FTS_INDEXES = [
|
|
20
|
+
{ table: 'File', indexName: 'file_fts', properties: ['name', 'content'] },
|
|
21
|
+
{ table: 'Function', indexName: 'function_fts', properties: ['name', 'content'] },
|
|
22
|
+
{ table: 'Class', indexName: 'class_fts', properties: ['name', 'content'] },
|
|
23
|
+
{ table: 'Method', indexName: 'method_fts', properties: ['name', 'content'] },
|
|
24
|
+
{ table: 'Interface', indexName: 'interface_fts', properties: ['name', 'content'] },
|
|
25
|
+
];
|
|
26
|
+
/**
|
|
27
|
+
* Per-process cache for the MCP pool path: tracks which `(repoId, table)`
|
|
28
|
+
* pairs have been ensured. The CLI/pipeline path gets its own cache inside
|
|
29
|
+
* `lbug-adapter.ts` keyed by table/index, scoped to the singleton connection.
|
|
30
|
+
*/
|
|
31
|
+
const ensuredPoolFTS = new Set();
|
|
32
|
+
async function ensureFTSIndexViaExecutor(executor, repoId, table, indexName, properties) {
|
|
33
|
+
const key = `${repoId}:${table}:${indexName}`;
|
|
34
|
+
if (ensuredPoolFTS.has(key))
|
|
35
|
+
return;
|
|
36
|
+
const propList = properties.map((p) => `'${p}'`).join(', ');
|
|
37
|
+
try {
|
|
38
|
+
await executor(`CALL CREATE_FTS_INDEX('${table}', '${indexName}', [${propList}], stemmer := 'porter')`);
|
|
39
|
+
}
|
|
40
|
+
catch (e) {
|
|
41
|
+
// 'already exists' is the happy path (index persists on disk between
|
|
42
|
+
// process invocations) — anything else we swallow because FTS is
|
|
43
|
+
// best-effort: queryFTS itself returns [] on missing-index errors.
|
|
44
|
+
const msg = String(e?.message ?? '');
|
|
45
|
+
if (!msg.includes('already exists')) {
|
|
46
|
+
// Best-effort — continue without index, queryFTS will fall back to [].
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
ensuredPoolFTS.add(key);
|
|
50
|
+
}
|
|
8
51
|
/**
|
|
9
52
|
* Execute a single FTS query via a custom executor (for MCP connection pool).
|
|
10
53
|
* Returns the same shape as core queryFTS (from LadybugDB adapter).
|
|
@@ -52,6 +95,11 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
52
95
|
// The MCP pool supports multiple connections, but FTS is best run serially.
|
|
53
96
|
const { executeQuery } = await import('../lbug/pool-adapter.js');
|
|
54
97
|
const executor = (cypher) => executeQuery(repoId, cypher);
|
|
98
|
+
// Lazy-create FTS indexes on first query for this repo (analyze no longer
|
|
99
|
+
// creates them up-front, so we ensure them here). Cached per-process.
|
|
100
|
+
for (const { table, indexName, properties } of FTS_INDEXES) {
|
|
101
|
+
await ensureFTSIndexViaExecutor(executor, repoId, table, indexName, properties);
|
|
102
|
+
}
|
|
55
103
|
fileResults = await queryFTSViaExecutor(executor, 'File', 'file_fts', query, limit);
|
|
56
104
|
functionResults = await queryFTSViaExecutor(executor, 'Function', 'function_fts', query, limit);
|
|
57
105
|
classResults = await queryFTSViaExecutor(executor, 'Class', 'class_fts', query, limit);
|
|
@@ -59,7 +107,11 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
59
107
|
interfaceResults = await queryFTSViaExecutor(executor, 'Interface', 'interface_fts', query, limit);
|
|
60
108
|
}
|
|
61
109
|
else {
|
|
62
|
-
// Use core lbug adapter (CLI / pipeline context) — also sequential for safety
|
|
110
|
+
// Use core lbug adapter (CLI / pipeline context) — also sequential for safety.
|
|
111
|
+
// Lazy-create FTS indexes on first query (analyze no longer does it).
|
|
112
|
+
for (const { table, indexName, properties } of FTS_INDEXES) {
|
|
113
|
+
await ensureFTSIndex(table, indexName, [...properties]).catch(() => { });
|
|
114
|
+
}
|
|
63
115
|
fileResults = await queryFTS('File', 'file_fts', query, limit, false).catch(() => []);
|
|
64
116
|
functionResults = await queryFTS('Function', 'function_fts', query, limit, false).catch(() => []);
|
|
65
117
|
classResults = await queryFTS('Class', 'class_fts', query, limit, false).catch(() => []);
|
|
@@ -236,11 +236,26 @@ export declare class LocalBackend {
|
|
|
236
236
|
includeTests: boolean;
|
|
237
237
|
}): Promise<any | null>;
|
|
238
238
|
private handleGroupTool;
|
|
239
|
+
/**
|
|
240
|
+
* Dispatch impact/query/context when `repo` is `@groupName` or `@groupName/memberPath`
|
|
241
|
+
* (group mode — not the global indexed-repo `repo` parameter).
|
|
242
|
+
*/
|
|
243
|
+
private callToolAtGroupRepo;
|
|
239
244
|
private groupList;
|
|
240
245
|
private groupSync;
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
246
|
+
/**
|
|
247
|
+
* MCP resource body for `gitnexus://group/{name}/contracts` (Issue #794).
|
|
248
|
+
*/
|
|
249
|
+
readGroupContractsResource(groupName: string, filter: {
|
|
250
|
+
type?: string;
|
|
251
|
+
repo?: string;
|
|
252
|
+
unmatchedOnly?: boolean;
|
|
253
|
+
}): Promise<string>;
|
|
254
|
+
/**
|
|
255
|
+
* MCP resource body for `gitnexus://group/{name}/status` (Issue #794).
|
|
256
|
+
*/
|
|
257
|
+
readGroupStatusResource(groupName: string): Promise<string>;
|
|
258
|
+
private static formatGroupResourcePayload;
|
|
244
259
|
/**
|
|
245
260
|
* Fetch Route nodes with their consumers in a single query.
|
|
246
261
|
* Shared by routeMap and shapeCheck to avoid N+1 query patterns.
|