brainclaw 1.9.1 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -1
- package/dist/brainclaw-vscode.vsix +0 -0
- package/dist/cli.js +18 -1
- package/dist/commands/code-map.js +129 -0
- package/dist/commands/codev.js +7 -0
- package/dist/commands/mcp.js +121 -0
- package/dist/commands/run-profile.js +3 -2
- package/dist/commands/switch.js +100 -89
- package/dist/core/agent-files.js +12 -0
- package/dist/core/code-map/backend.js +123 -0
- package/dist/core/code-map/core.js +81 -0
- package/dist/core/code-map/drafts.js +2 -0
- package/dist/core/code-map/extractor.js +29 -0
- package/dist/core/code-map/finalizer.js +191 -0
- package/dist/core/code-map/freshness.js +108 -0
- package/dist/core/code-map/ids.js +0 -0
- package/dist/core/code-map/importable.js +35 -0
- package/dist/core/code-map/indexes.js +197 -0
- package/dist/core/code-map/lang/java/imports.scm +17 -0
- package/dist/core/code-map/lang/java/index.js +254 -0
- package/dist/core/code-map/lang/java/tags.scm +48 -0
- package/dist/core/code-map/lang/php/imports.scm +21 -0
- package/dist/core/code-map/lang/php/index.js +251 -0
- package/dist/core/code-map/lang/php/tags.scm +44 -0
- package/dist/core/code-map/lang/provider.js +9 -0
- package/dist/core/code-map/lang/providers.js +24 -0
- package/dist/core/code-map/lang/python/imports.scm +90 -0
- package/dist/core/code-map/lang/python/index.js +364 -0
- package/dist/core/code-map/lang/python/tags.scm +81 -0
- package/dist/core/code-map/lang/query-runtime.js +374 -0
- package/dist/core/code-map/lang/registry.js +125 -0
- package/dist/core/code-map/lang/typescript/imports.scm +90 -0
- package/dist/core/code-map/lang/typescript/index.js +306 -0
- package/dist/core/code-map/lang/typescript/tags.js.scm +106 -0
- package/dist/core/code-map/lang/typescript/tags.scm +151 -0
- package/dist/core/code-map/lock.js +210 -0
- package/dist/core/code-map/materialized.js +51 -0
- package/dist/core/code-map/memory-reader.js +59 -0
- package/dist/core/code-map/paths.js +53 -0
- package/dist/core/code-map/query.js +568 -0
- package/dist/core/code-map/refresh.js +0 -0
- package/dist/core/code-map/resolve.js +177 -0
- package/dist/core/code-map/store.js +206 -0
- package/dist/core/code-map/types.js +288 -0
- package/dist/core/code-map/vocabulary.js +57 -0
- package/dist/core/code-map/wasm-loader.js +294 -0
- package/dist/core/code-map/work-section.js +206 -0
- package/dist/core/codev-rounds.js +4 -0
- package/dist/core/execution-adapters.js +11 -10
- package/dist/core/execution-profile.js +58 -0
- package/dist/core/facade-schema.js +9 -0
- package/dist/core/instruction-templates.js +2 -0
- package/dist/core/mcp-command-resolution.js +3 -1
- package/dist/core/store-resolution.js +41 -4
- package/dist/facts.js +9 -5
- package/dist/facts.json +8 -4
- package/dist/vendor/web-tree-sitter/tree-sitter.js +3980 -0
- package/dist/vendor/web-tree-sitter/tree-sitter.wasm +0 -0
- package/dist/wasm/tree-sitter-java.wasm +0 -0
- package/dist/wasm/tree-sitter-javascript.wasm +0 -0
- package/dist/wasm/tree-sitter-php.wasm +0 -0
- package/dist/wasm/tree-sitter-python.wasm +0 -0
- package/dist/wasm/tree-sitter-tsx.wasm +0 -0
- package/dist/wasm/tree-sitter-typescript.wasm +0 -0
- package/dist/wasm/tree-sitter.wasm +0 -0
- package/docs/cli.md +46 -8
- package/docs/code-map.md +198 -0
- package/docs/integrations/mcp.md +13 -6
- package/docs/mcp-schema-changelog.md +7 -3
- package/docs/quickstart.md +1 -1
- package/package.json +11 -6
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Map query logic (spec §6.1, §9, §11, §12.1) — the agent-facing
|
|
3
|
+
* `find()` / `brief()` implementations live here; backend.ts is the thin
|
|
4
|
+
* CodeQueryBackend adapter that wires this to the durable store.
|
|
5
|
+
*
|
|
6
|
+
* Everything reads from `indexes/**` + `files/**` (store.ts readers). No WASM,
|
|
7
|
+
* no graph DB. The hot correctness feature is the bounded lazy read-path
|
|
8
|
+
* freshness check (§6.1): before a shard selected from an index is trusted, we
|
|
9
|
+
* stat the live file and, when cheap, hash it — detecting modifications and
|
|
10
|
+
* deletions (NOT additions) within a per-query budget.
|
|
11
|
+
*/
|
|
12
|
+
import fs from 'node:fs';
|
|
13
|
+
import path from 'node:path';
|
|
14
|
+
import { hashContent } from './extractor.js';
|
|
15
|
+
import { readImportsIndex, readManifest, readResolutionIndex, readShard, readSymbolsIndex, } from './store.js';
|
|
16
|
+
// --- lazy read-path freshness budget (spec §6.1) ---
|
|
17
|
+
/** Default per-query lazy-check budget (spec §6.1). */
|
|
18
|
+
export const LAZY_BUDGET = {
|
|
19
|
+
maxFilesChecked: 32,
|
|
20
|
+
maxWallMs: 2500,
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Build a bounded lazy freshness checker for a single query (spec §6.1). The
|
|
24
|
+
* stat/hash logic lives in `validateEntry`, which compares against the stored
|
|
25
|
+
* shard's mtime/size/file_hash; this object only carries the shared budget +
|
|
26
|
+
* per-path memoization so a brief() that touches one file from several ranking
|
|
27
|
+
* signals spends a single budget slot.
|
|
28
|
+
*/
|
|
29
|
+
function makeLazyChecker(budget = LAZY_BUDGET) {
|
|
30
|
+
return {
|
|
31
|
+
budget,
|
|
32
|
+
startedAt: Date.now(),
|
|
33
|
+
memo: new Map(),
|
|
34
|
+
filesChecked: 0,
|
|
35
|
+
exhausted: false,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
/** Has the lazy-check budget (file count or wall clock) been spent? */
|
|
39
|
+
function budgetExhausted(checker) {
|
|
40
|
+
if (checker.exhausted)
|
|
41
|
+
return true;
|
|
42
|
+
if (checker.filesChecked >= checker.budget.maxFilesChecked) {
|
|
43
|
+
checker.exhausted = true;
|
|
44
|
+
}
|
|
45
|
+
else if (Date.now() - checker.startedAt >= checker.budget.maxWallMs) {
|
|
46
|
+
checker.exhausted = true;
|
|
47
|
+
}
|
|
48
|
+
return checker.exhausted;
|
|
49
|
+
}
|
|
50
|
+
function newAccumulator() {
|
|
51
|
+
return {
|
|
52
|
+
staleChangedPaths: new Set(),
|
|
53
|
+
missingPaths: new Set(),
|
|
54
|
+
uncheckedPaths: new Set(),
|
|
55
|
+
budgetSkippedPaths: new Set(),
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Validate a single index entry's backing shard against the live file. Uses the
|
|
60
|
+
* shard's stored mtime/size + file_hash for an accurate content comparison.
|
|
61
|
+
* Records the outcome on the accumulator. Returns whether the entry may be
|
|
62
|
+
* served as a *confident* (fresh) result.
|
|
63
|
+
*/
|
|
64
|
+
function validateEntry(entry, checker, acc, projectRoot, maxParseFileBytes, cwd, preferredDirName) {
|
|
65
|
+
const cached = checker.memo.get(entry.path);
|
|
66
|
+
if (cached !== undefined)
|
|
67
|
+
return cached;
|
|
68
|
+
const abs = path.join(projectRoot, entry.path);
|
|
69
|
+
let stat = null;
|
|
70
|
+
try {
|
|
71
|
+
stat = fs.statSync(abs);
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
acc.missingPaths.add(entry.path); // §6.1.2 — deletion.
|
|
75
|
+
checker.memo.set(entry.path, false);
|
|
76
|
+
return false;
|
|
77
|
+
}
|
|
78
|
+
const shard = readShard(entry.file_id, cwd, preferredDirName);
|
|
79
|
+
if (!shard) {
|
|
80
|
+
// No backing shard to compare against — treat as unchecked, not confident.
|
|
81
|
+
acc.uncheckedPaths.add(entry.path);
|
|
82
|
+
checker.memo.set(entry.path, false);
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
// §6.1.3 — cheap gate: mtime + size match => fresh for this read.
|
|
86
|
+
if (stat.mtimeMs === shard.mtime_ms && stat.size === shard.size_bytes) {
|
|
87
|
+
checker.memo.set(entry.path, true);
|
|
88
|
+
return true;
|
|
89
|
+
}
|
|
90
|
+
// §6.1.4/§6.1.6 — gate tripped: hash only when within budget AND not oversized.
|
|
91
|
+
// These are distinct reasons: an oversized file can never be hashed on the read
|
|
92
|
+
// path (§6.1.4), whereas a budget-exhausted skip is what §6.1.6 maps to
|
|
93
|
+
// `partial`. Keep them separable so the badge reason is accurate.
|
|
94
|
+
if (stat.size > maxParseFileBytes) {
|
|
95
|
+
acc.uncheckedPaths.add(entry.path); // structurally unverifiable, not budget.
|
|
96
|
+
checker.memo.set(entry.path, false);
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
if (budgetExhausted(checker)) {
|
|
100
|
+
acc.uncheckedPaths.add(entry.path);
|
|
101
|
+
acc.budgetSkippedPaths.add(entry.path);
|
|
102
|
+
checker.memo.set(entry.path, false);
|
|
103
|
+
return false;
|
|
104
|
+
}
|
|
105
|
+
checker.filesChecked++;
|
|
106
|
+
let live;
|
|
107
|
+
try {
|
|
108
|
+
live = fs.readFileSync(abs, 'utf-8');
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
acc.uncheckedPaths.add(entry.path);
|
|
112
|
+
checker.memo.set(entry.path, false);
|
|
113
|
+
return false;
|
|
114
|
+
}
|
|
115
|
+
if (hashContent(live) === shard.file_hash) {
|
|
116
|
+
checker.memo.set(entry.path, true); // §6.1 — identical despite mtime touch.
|
|
117
|
+
return true;
|
|
118
|
+
}
|
|
119
|
+
acc.staleChangedPaths.add(entry.path); // §6.1.5 — confirmed content change.
|
|
120
|
+
checker.memo.set(entry.path, false);
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Derive the response freshness badge from the base manifest status + the
|
|
125
|
+
* outcomes recorded during this query's lazy check (spec §6.1, §9).
|
|
126
|
+
*
|
|
127
|
+
* Precedence: an exhausted budget yields `partial`; otherwise any detected
|
|
128
|
+
* change/deletion yields `stale_changed_files`; else the manifest base status.
|
|
129
|
+
*/
|
|
130
|
+
function deriveBadge(base, acc, budgetExhausted, hadConfidentMatch, emptyIndex) {
|
|
131
|
+
const details = {};
|
|
132
|
+
if (acc.staleChangedPaths.size > 0) {
|
|
133
|
+
details.stale_changed_files = [...acc.staleChangedPaths].sort();
|
|
134
|
+
}
|
|
135
|
+
if (acc.missingPaths.size > 0) {
|
|
136
|
+
details.deleted_files = [...acc.missingPaths].sort();
|
|
137
|
+
}
|
|
138
|
+
if (acc.uncheckedPaths.size > 0) {
|
|
139
|
+
details.unchecked_files = [...acc.uncheckedPaths].sort();
|
|
140
|
+
}
|
|
141
|
+
let status = base;
|
|
142
|
+
if (emptyIndex && base !== 'missing_index') {
|
|
143
|
+
// §6.1 — zero confident matches: hint refresh rather than imply absence.
|
|
144
|
+
details.hint = 'missing_index_or_refresh';
|
|
145
|
+
}
|
|
146
|
+
if (acc.staleChangedPaths.size > 0 || acc.missingPaths.size > 0) {
|
|
147
|
+
status = 'stale_changed_files';
|
|
148
|
+
}
|
|
149
|
+
// §6.1.6 — `partial` means the lazy-check budget (file count / wall clock) ran
|
|
150
|
+
// out before we could validate everything. Reserve it for that cause only:
|
|
151
|
+
// unchecked-for-other-reasons (oversized file per §6.1.4, missing shard,
|
|
152
|
+
// unreadable file) must NOT be mislabeled as budget exhaustion. When the budget
|
|
153
|
+
// truly ran out, `partial` wins the top-line status — the agent should refresh
|
|
154
|
+
// before trusting the result — and the confirmed-stale list still rides along
|
|
155
|
+
// in `details.stale_changed_files`.
|
|
156
|
+
if (budgetExhausted || acc.budgetSkippedPaths.size > 0) {
|
|
157
|
+
status = 'partial';
|
|
158
|
+
details.partial_reason = 'lazy_check_budget_exhausted';
|
|
159
|
+
details.budget = { ...LAZY_BUDGET };
|
|
160
|
+
}
|
|
161
|
+
void hadConfidentMatch;
|
|
162
|
+
return { status, details };
|
|
163
|
+
}
|
|
164
|
+
const DEFAULT_FIND_LIMIT = 20;
|
|
165
|
+
/** Lowercase token normalization mirroring indexes.ts (spec §5.6 keys). */
|
|
166
|
+
function queryTokens(query) {
|
|
167
|
+
const lower = query.toLowerCase();
|
|
168
|
+
const tokens = new Set([lower]);
|
|
169
|
+
for (const part of query.split(/[^A-Za-z0-9]+/)) {
|
|
170
|
+
if (!part)
|
|
171
|
+
continue;
|
|
172
|
+
for (const sub of part.replace(/([a-z0-9])([A-Z])/g, '$1 $2').split(/\s+/)) {
|
|
173
|
+
if (sub)
|
|
174
|
+
tokens.add(sub.toLowerCase());
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return [...tokens];
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Score a symbol index entry against the query. Exact (full-query) token match
|
|
181
|
+
* scores highest; a prefix/substring match scores lower. Exported symbols and
|
|
182
|
+
* components/hooks get a small boost (these are what agents most want to find).
|
|
183
|
+
*/
|
|
184
|
+
function scoreEntry(entry, query) {
|
|
185
|
+
const q = query.toLowerCase();
|
|
186
|
+
const name = entry.name.toLowerCase();
|
|
187
|
+
let score = 0;
|
|
188
|
+
if (name === q)
|
|
189
|
+
score += 10;
|
|
190
|
+
else if (name.startsWith(q))
|
|
191
|
+
score += 6;
|
|
192
|
+
else if (name.includes(q))
|
|
193
|
+
score += 3;
|
|
194
|
+
else
|
|
195
|
+
score += 1; // matched only via a sub-token bucket
|
|
196
|
+
score *= entry.score_hint; // exported (1.0) vs internal (0.8)
|
|
197
|
+
if (entry.subtype === 'component' || entry.subtype === 'hook')
|
|
198
|
+
score += 1;
|
|
199
|
+
return score;
|
|
200
|
+
}
|
|
201
|
+
function resolveRoot(ctx) {
|
|
202
|
+
if (ctx.projectRoot)
|
|
203
|
+
return ctx.projectRoot;
|
|
204
|
+
const manifest = readManifest(ctx.cwd, ctx.preferredDirName);
|
|
205
|
+
return manifest?.project_root ?? ctx.cwd ?? process.cwd();
|
|
206
|
+
}
|
|
207
|
+
function maxParseBytes(ctx) {
|
|
208
|
+
const manifest = readManifest(ctx.cwd, ctx.preferredDirName);
|
|
209
|
+
return manifest?.extractor_config.max_parse_file_bytes ?? 1024 * 1024;
|
|
210
|
+
}
|
|
211
|
+
function baseStatus(ctx) {
|
|
212
|
+
const manifest = readManifest(ctx.cwd, ctx.preferredDirName);
|
|
213
|
+
return manifest ? manifest.freshness.status : 'missing_index';
|
|
214
|
+
}
|
|
215
|
+
/** Gather candidate symbol entries from the symbols index for a query. */
|
|
216
|
+
function gatherSymbolEntries(index, query) {
|
|
217
|
+
const seen = new Set();
|
|
218
|
+
const out = [];
|
|
219
|
+
for (const token of queryTokens(query)) {
|
|
220
|
+
const bucket = index.entries[token];
|
|
221
|
+
if (!bucket)
|
|
222
|
+
continue;
|
|
223
|
+
for (const entry of bucket) {
|
|
224
|
+
if (seen.has(entry.node_id))
|
|
225
|
+
continue;
|
|
226
|
+
seen.add(entry.node_id);
|
|
227
|
+
out.push(entry);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return out;
|
|
231
|
+
}
|
|
232
|
+
export function find(query, limit, ctx) {
|
|
233
|
+
const base = baseStatus(ctx);
|
|
234
|
+
const index = readSymbolsIndex(ctx.cwd, ctx.preferredDirName);
|
|
235
|
+
if (!index) {
|
|
236
|
+
return {
|
|
237
|
+
query,
|
|
238
|
+
matches: [],
|
|
239
|
+
freshness_badge: { status: 'missing_index', details: { hint: 'run refresh' } },
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
const root = resolveRoot(ctx);
|
|
243
|
+
const maxBytes = maxParseBytes(ctx);
|
|
244
|
+
const checker = makeLazyChecker();
|
|
245
|
+
const acc = newAccumulator();
|
|
246
|
+
const candidates = gatherSymbolEntries(index, query);
|
|
247
|
+
const ranked = [];
|
|
248
|
+
for (const entry of candidates) {
|
|
249
|
+
// §6.1 — lazy validate before serving as confident.
|
|
250
|
+
const confident = validateEntry(entry, checker, acc, root, maxBytes, ctx.cwd, ctx.preferredDirName);
|
|
251
|
+
if (!confident)
|
|
252
|
+
continue;
|
|
253
|
+
ranked.push({
|
|
254
|
+
node_id: entry.node_id,
|
|
255
|
+
name: entry.name,
|
|
256
|
+
path: entry.path,
|
|
257
|
+
file_id: entry.file_id,
|
|
258
|
+
kind: entry.kind,
|
|
259
|
+
subtype: entry.subtype ?? null,
|
|
260
|
+
score: scoreEntry(entry, query),
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
ranked.sort((a, b) => b.score - a.score || a.path.localeCompare(b.path) || a.name.localeCompare(b.name));
|
|
264
|
+
const capped = ranked.slice(0, limit ?? DEFAULT_FIND_LIMIT);
|
|
265
|
+
const badge = deriveBadge(base, acc, checker.exhausted, capped.length > 0, candidates.length === 0);
|
|
266
|
+
return { query, matches: capped, freshness_badge: badge };
|
|
267
|
+
}
|
|
268
|
+
/** spec §11 — cap related memory at top 5 by relevance. */
|
|
269
|
+
export const RELATED_MEMORY_CAP = 5;
|
|
270
|
+
/**
|
|
271
|
+
* Match memory items to a set of candidate file paths + the query symbol name
|
|
272
|
+
* by (spec §11): related_paths, tags, or a literal file-path mention in the
|
|
273
|
+
* memory text. Returns the top `RELATED_MEMORY_CAP` by relevance.
|
|
274
|
+
*/
|
|
275
|
+
export function attachRelatedMemory(items, paths, symbolNames) {
|
|
276
|
+
const pathSet = new Set(paths.map((p) => p.replace(/\\/g, '/')));
|
|
277
|
+
const baseNames = new Set(paths.map((p) => path.basename(p)));
|
|
278
|
+
const symLower = new Set(symbolNames.map((s) => s.toLowerCase()));
|
|
279
|
+
const scored = [];
|
|
280
|
+
for (const item of items) {
|
|
281
|
+
let score = 0;
|
|
282
|
+
// related_paths — strongest signal.
|
|
283
|
+
for (const rp of item.related_paths ?? []) {
|
|
284
|
+
const norm = rp.replace(/\\/g, '/');
|
|
285
|
+
if (pathSet.has(norm))
|
|
286
|
+
score += 5;
|
|
287
|
+
else if (baseNames.has(path.basename(norm)))
|
|
288
|
+
score += 3;
|
|
289
|
+
}
|
|
290
|
+
// literal file-path mention in the memory text.
|
|
291
|
+
const text = item.text ?? '';
|
|
292
|
+
for (const p of pathSet) {
|
|
293
|
+
if (text.includes(p))
|
|
294
|
+
score += 2;
|
|
295
|
+
}
|
|
296
|
+
for (const bn of baseNames) {
|
|
297
|
+
if (text.includes(bn))
|
|
298
|
+
score += 1;
|
|
299
|
+
}
|
|
300
|
+
// tags matching a symbol name (e.g. tag "App" / "useAuth").
|
|
301
|
+
for (const tag of item.tags ?? []) {
|
|
302
|
+
if (symLower.has(tag.toLowerCase()))
|
|
303
|
+
score += 2;
|
|
304
|
+
}
|
|
305
|
+
if (score > 0)
|
|
306
|
+
scored.push({ item, score });
|
|
307
|
+
}
|
|
308
|
+
scored.sort((a, b) => b.score - a.score || a.item.id.localeCompare(b.item.id));
|
|
309
|
+
return scored.slice(0, RELATED_MEMORY_CAP).map((s) => s.item);
|
|
310
|
+
}
|
|
311
|
+
/** spec §9 — the brief reading list is capped at 12 files. */
|
|
312
|
+
export const BRIEF_FILE_CAP = 12;
|
|
313
|
+
/**
|
|
314
|
+
* Build the ranked suggested_files_to_read for a brief (spec §9; P1d graph signals).
|
|
315
|
+
*
|
|
316
|
+
* Relevance signals, highest first:
|
|
317
|
+
* - defining file of the matching symbol (+12)
|
|
318
|
+
* - reverse dependent — a file that imports the target (+5, blast radius; P1d)
|
|
319
|
+
* - forward dependency — a file the target imports, resolved (+4; P1d)
|
|
320
|
+
* - import-specifier heuristic (+3, weak fallback)
|
|
321
|
+
* - same directory as a defining file (+1)
|
|
322
|
+
*
|
|
323
|
+
* `bump` accumulates score but keeps the reason of the STRONGEST single signal
|
|
324
|
+
* (Codex review) and tracks whether a path is graph-only. Each signal class bumps a
|
|
325
|
+
* given path at most once (callers dedupe their rows), bounding score runaway.
|
|
326
|
+
*/
|
|
327
|
+
function rankFiles(defining, forwardRows, reverseRows, symbolsIndex, importsIndex, query) {
|
|
328
|
+
const byPath = new Map();
|
|
329
|
+
const bump = (p, fileId, reason, delta, graph) => {
|
|
330
|
+
const cur = byPath.get(p);
|
|
331
|
+
if (cur) {
|
|
332
|
+
cur.score += delta;
|
|
333
|
+
if (delta > cur.bestDelta) {
|
|
334
|
+
cur.bestDelta = delta;
|
|
335
|
+
cur.reason = reason;
|
|
336
|
+
}
|
|
337
|
+
cur.graphDerived = cur.graphDerived && graph; // graph-only iff every signal is graph
|
|
338
|
+
}
|
|
339
|
+
else {
|
|
340
|
+
byPath.set(p, { path: p, file_id: fileId, reason, score: delta, bestDelta: delta, graphDerived: graph });
|
|
341
|
+
}
|
|
342
|
+
};
|
|
343
|
+
// 1. defining files — strongest, non-graph.
|
|
344
|
+
const definingDirs = new Set();
|
|
345
|
+
for (const entry of defining) {
|
|
346
|
+
const subtypeNote = entry.subtype ? ` (${entry.subtype})` : '';
|
|
347
|
+
bump(entry.path, entry.file_id, `defines matching symbol ${entry.name}${subtypeNote}`, 12, false);
|
|
348
|
+
definingDirs.add(path.posix.dirname(entry.path.replace(/\\/g, '/')));
|
|
349
|
+
}
|
|
350
|
+
// 2. reverse dependents (P1d) — who imports the target = blast radius.
|
|
351
|
+
for (const r of reverseRows)
|
|
352
|
+
bump(r.path, r.file_id, r.reason, 5, true);
|
|
353
|
+
// 3. forward dependencies (P1d) — files the target imports (resolved).
|
|
354
|
+
for (const f of forwardRows)
|
|
355
|
+
bump(f.path, f.file_id, f.reason, 4, true);
|
|
356
|
+
// 4. import-specifier heuristic — weak fallback (kept; real graph rows outrank it).
|
|
357
|
+
// Dedup by path (a file is bumped ONCE even if it matches several specifiers /
|
|
358
|
+
// appears in several token buckets) so the weak signal can't accumulate.
|
|
359
|
+
if (importsIndex) {
|
|
360
|
+
const qLower = query.toLowerCase();
|
|
361
|
+
const heuristicPaths = new Map();
|
|
362
|
+
for (const [moduleSpec, entries] of Object.entries(importsIndex.entries)) {
|
|
363
|
+
const specLower = moduleSpec.toLowerCase();
|
|
364
|
+
const relevant = specLower.includes(qLower) ||
|
|
365
|
+
[...definingDirs].some((d) => moduleSpec.includes(path.posix.basename(d)));
|
|
366
|
+
if (!relevant)
|
|
367
|
+
continue;
|
|
368
|
+
for (const e of entries) {
|
|
369
|
+
if (!heuristicPaths.has(e.path))
|
|
370
|
+
heuristicPaths.set(e.path, { fileId: e.file_id, reason: `imports ${moduleSpec}` });
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
for (const [p, { fileId, reason }] of heuristicPaths)
|
|
374
|
+
bump(p, fileId, reason, 3, false);
|
|
375
|
+
}
|
|
376
|
+
// 5. files that share a directory with a defining file — bumped ONCE per file
|
|
377
|
+
// (the symbols index repeats a file across every symbol AND every token bucket;
|
|
378
|
+
// without dedup a symbol-dense file would accumulate +1 dozens of times and bury
|
|
379
|
+
// the real graph signals).
|
|
380
|
+
if (definingDirs.size > 0) {
|
|
381
|
+
const sameDirPaths = new Map(); // path -> file_id
|
|
382
|
+
for (const bucket of Object.values(symbolsIndex.entries)) {
|
|
383
|
+
for (const entry of bucket) {
|
|
384
|
+
const dir = path.posix.dirname(entry.path.replace(/\\/g, '/'));
|
|
385
|
+
if (definingDirs.has(dir) && !sameDirPaths.has(entry.path))
|
|
386
|
+
sameDirPaths.set(entry.path, entry.file_id);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
for (const [p, fid] of sameDirPaths)
|
|
390
|
+
bump(p, fid, `shares directory with the matching symbol`, 1, false);
|
|
391
|
+
}
|
|
392
|
+
return [...byPath.values()].sort((a, b) => b.score - a.score || a.path.localeCompare(b.path));
|
|
393
|
+
}
|
|
394
|
+
/** Build a node-id → symbol index entry map (deduped; entries repeat across token buckets). */
|
|
395
|
+
function buildNodeIdIndex(symbolsIndex) {
|
|
396
|
+
const out = new Map();
|
|
397
|
+
for (const bucket of Object.values(symbolsIndex.entries)) {
|
|
398
|
+
for (const entry of bucket)
|
|
399
|
+
if (!out.has(entry.node_id))
|
|
400
|
+
out.set(entry.node_id, entry);
|
|
401
|
+
}
|
|
402
|
+
return out;
|
|
403
|
+
}
|
|
404
|
+
/**
|
|
405
|
+
* Forward dependencies of the target: files the defining symbols import. Read from
|
|
406
|
+
* each (already-validated) defining shard's `imports_symbol` edges, mapped to the
|
|
407
|
+
* target symbol's own index entry (path + file_id + name). Deduped by path. Reading
|
|
408
|
+
* only confident defining shards is the graph-SOURCE freshness gate (Codex review):
|
|
409
|
+
* a stale importer shard's edge list is not trusted.
|
|
410
|
+
*/
|
|
411
|
+
function forwardDeps(confidentDefiningFileIds, // path -> file_id of confident defining files
|
|
412
|
+
nodeIndex, cwd, preferredDirName) {
|
|
413
|
+
const byPath = new Map();
|
|
414
|
+
for (const fileId of new Set(confidentDefiningFileIds.values())) {
|
|
415
|
+
const shard = readShard(fileId, cwd, preferredDirName);
|
|
416
|
+
if (!shard)
|
|
417
|
+
continue;
|
|
418
|
+
for (const edge of shard.edges) {
|
|
419
|
+
if (edge.kind !== 'imports_symbol')
|
|
420
|
+
continue;
|
|
421
|
+
const target = nodeIndex.get(edge.to);
|
|
422
|
+
if (!target)
|
|
423
|
+
continue;
|
|
424
|
+
if (byPath.has(target.path))
|
|
425
|
+
continue;
|
|
426
|
+
byPath.set(target.path, {
|
|
427
|
+
path: target.path,
|
|
428
|
+
file_id: target.file_id,
|
|
429
|
+
reason: `imported by the matching symbol (resolved): ${target.name}`,
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
return [...byPath.values()];
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Reverse dependents of the target (blast radius), from the P1d resolution index:
|
|
437
|
+
* files that import any defining file (`dependents_by_file`) or any defining symbol
|
|
438
|
+
* (`dependents_by_symbol`). Deduped by importer path; the strongest-named reason wins.
|
|
439
|
+
*/
|
|
440
|
+
function reverseDeps(resolutionIndex, definingPaths, definingByNodeId) {
|
|
441
|
+
if (!resolutionIndex)
|
|
442
|
+
return [];
|
|
443
|
+
const byPath = new Map();
|
|
444
|
+
const add = (importerPath, fileId, reason) => {
|
|
445
|
+
if (!byPath.has(importerPath))
|
|
446
|
+
byPath.set(importerPath, { path: importerPath, file_id: fileId, reason });
|
|
447
|
+
};
|
|
448
|
+
// by symbol — more precise (names the symbol).
|
|
449
|
+
for (const [nodeId, entry] of definingByNodeId) {
|
|
450
|
+
for (const dep of resolutionIndex.dependents_by_symbol[nodeId] ?? []) {
|
|
451
|
+
add(dep.path, dep.file_id, `imports the matching symbol ${entry.name}`);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
// by file — covers default/namespace imports + path-target briefs.
|
|
455
|
+
for (const p of definingPaths) {
|
|
456
|
+
const base = path.posix.basename(p.replace(/\\/g, '/'));
|
|
457
|
+
for (const dep of resolutionIndex.dependents_by_file[p] ?? []) {
|
|
458
|
+
add(dep.path, dep.file_id, `imports ${base}`);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
return [...byPath.values()];
|
|
462
|
+
}
|
|
463
|
+
/** Find files whose path matches the target directly (path-target briefs). */
|
|
464
|
+
function filesMatchingPath(symbolsIndex, target) {
|
|
465
|
+
const norm = target.replace(/\\/g, '/');
|
|
466
|
+
const seenPaths = new Set();
|
|
467
|
+
const out = [];
|
|
468
|
+
for (const bucket of Object.values(symbolsIndex.entries)) {
|
|
469
|
+
for (const entry of bucket) {
|
|
470
|
+
const p = entry.path.replace(/\\/g, '/');
|
|
471
|
+
if ((p === norm || p.endsWith(`/${norm}`) || p.includes(norm)) && !seenPaths.has(entry.path)) {
|
|
472
|
+
seenPaths.add(entry.path);
|
|
473
|
+
out.push(entry);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
return out;
|
|
478
|
+
}
|
|
479
|
+
export function brief(target, limit, ctx, memoryReader) {
|
|
480
|
+
const base = baseStatus(ctx);
|
|
481
|
+
const symbolsIndex = readSymbolsIndex(ctx.cwd, ctx.preferredDirName);
|
|
482
|
+
if (!symbolsIndex) {
|
|
483
|
+
return {
|
|
484
|
+
target,
|
|
485
|
+
suggested_files_to_read: [],
|
|
486
|
+
related_memory: [],
|
|
487
|
+
freshness_badge: { status: 'missing_index', details: { hint: 'run refresh' } },
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
const importsIndex = readImportsIndex(ctx.cwd, ctx.preferredDirName);
|
|
491
|
+
const resolutionIndex = readResolutionIndex(ctx.cwd, ctx.preferredDirName);
|
|
492
|
+
// Resolve target -> defining symbol entries. A brief orients on a SPECIFIC target,
|
|
493
|
+
// so prefer EXACT name matches when present — otherwise the token index floods the
|
|
494
|
+
// result with unrelated same-token symbols (e.g. `resolveProjectImports` would pull
|
|
495
|
+
// in every `resolve*`), burying the real defining file + its graph signals. Fall
|
|
496
|
+
// back to the fuzzy token set, then to a path match. (find() stays fuzzy by design.)
|
|
497
|
+
let defining = gatherSymbolEntries(symbolsIndex, target);
|
|
498
|
+
const exact = defining.filter((e) => e.name.toLowerCase() === target.toLowerCase());
|
|
499
|
+
if (exact.length > 0)
|
|
500
|
+
defining = exact;
|
|
501
|
+
else if (defining.length === 0)
|
|
502
|
+
defining = filesMatchingPath(symbolsIndex, target);
|
|
503
|
+
const root = resolveRoot(ctx);
|
|
504
|
+
const maxBytes = maxParseBytes(ctx);
|
|
505
|
+
const checker = makeLazyChecker();
|
|
506
|
+
const acc = newAccumulator();
|
|
507
|
+
// P1d graph signals. FORWARD: read from defining shards — but only CONFIDENT ones
|
|
508
|
+
// (validate first; a stale importer shard's edge list is not trusted). REVERSE: from
|
|
509
|
+
// the resolution index (each importer row is lazy-validated below like any other).
|
|
510
|
+
const definingPaths = new Set(defining.map((e) => e.path));
|
|
511
|
+
const definingByNodeId = new Map(defining.map((e) => [e.node_id, e]));
|
|
512
|
+
const confidentDefiningFileIds = new Map();
|
|
513
|
+
for (const e of defining) {
|
|
514
|
+
if (confidentDefiningFileIds.has(e.path))
|
|
515
|
+
continue;
|
|
516
|
+
const ok = validateEntry({ path: e.path, file_id: e.file_id }, checker, acc, root, maxBytes, ctx.cwd, ctx.preferredDirName);
|
|
517
|
+
if (ok)
|
|
518
|
+
confidentDefiningFileIds.set(e.path, e.file_id);
|
|
519
|
+
}
|
|
520
|
+
const nodeIndex = buildNodeIdIndex(symbolsIndex);
|
|
521
|
+
const fwd = forwardDeps(confidentDefiningFileIds, nodeIndex, ctx.cwd, ctx.preferredDirName);
|
|
522
|
+
const rev = reverseDeps(resolutionIndex, definingPaths, definingByNodeId);
|
|
523
|
+
const ranked = rankFiles(defining, fwd, rev, symbolsIndex, importsIndex, target);
|
|
524
|
+
// §6.1 — lazy validate each suggested file; exclude deletions from the confident
|
|
525
|
+
// list (still recorded in the badge). P1d: a GRAPH-ONLY row that fails validation
|
|
526
|
+
// (stale / unchecked / deleted) is SUPPRESSED — no silent stale graph hints (Codex).
|
|
527
|
+
const confident = [];
|
|
528
|
+
for (const rf of ranked) {
|
|
529
|
+
const ok = validateEntry({ path: rf.path, file_id: rf.file_id }, checker, acc, root, maxBytes, ctx.cwd, ctx.preferredDirName);
|
|
530
|
+
if (acc.missingPaths.has(rf.path))
|
|
531
|
+
continue; // deletion: exclude entirely.
|
|
532
|
+
if (rf.graphDerived && !ok)
|
|
533
|
+
continue; // graph-only + not confident → suppress.
|
|
534
|
+
// Non-graph stale/unchecked rows still appear (badge flags them) so the agent
|
|
535
|
+
// knows the file exists but may be out of date.
|
|
536
|
+
confident.push(rf);
|
|
537
|
+
}
|
|
538
|
+
const cap = Math.min(limit ?? BRIEF_FILE_CAP, BRIEF_FILE_CAP);
|
|
539
|
+
const capped = confident.slice(0, cap);
|
|
540
|
+
// Related memory (spec §11): match by the candidate paths + symbol names.
|
|
541
|
+
const candidatePaths = capped.map((f) => f.path);
|
|
542
|
+
const symbolNames = [...new Set(defining.map((e) => e.name))];
|
|
543
|
+
if (symbolNames.length === 0)
|
|
544
|
+
symbolNames.push(target);
|
|
545
|
+
const memoryItems = memoryReader(ctx);
|
|
546
|
+
const related = attachRelatedMemory(memoryItems, candidatePaths, symbolNames);
|
|
547
|
+
// Attach matching memory ids per file (those whose related_paths/text name it).
|
|
548
|
+
const suggested = capped.map((f) => {
|
|
549
|
+
const ids = related
|
|
550
|
+
.filter((m) => {
|
|
551
|
+
const fileNorm = f.path.replace(/\\/g, '/');
|
|
552
|
+
const base2 = path.basename(fileNorm);
|
|
553
|
+
const inPaths = (m.related_paths ?? []).some((rp) => rp.replace(/\\/g, '/') === fileNorm || path.basename(rp) === base2);
|
|
554
|
+
const inText = (m.text ?? '').includes(fileNorm) || (m.text ?? '').includes(base2);
|
|
555
|
+
return inPaths || inText;
|
|
556
|
+
})
|
|
557
|
+
.map((m) => m.id);
|
|
558
|
+
return { path: f.path, reason: f.reason, score: f.score, related_memory_ids: ids };
|
|
559
|
+
});
|
|
560
|
+
const badge = deriveBadge(base, acc, checker.exhausted, capped.length > 0, ranked.length === 0);
|
|
561
|
+
return {
|
|
562
|
+
target,
|
|
563
|
+
suggested_files_to_read: suggested,
|
|
564
|
+
related_memory: related,
|
|
565
|
+
freshness_badge: badge,
|
|
566
|
+
};
|
|
567
|
+
}
|
|
568
|
+
//# sourceMappingURL=query.js.map
|
|
Binary file
|