brainclaw 1.9.1 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -1
- package/dist/brainclaw-vscode.vsix +0 -0
- package/dist/cli.js +18 -1
- package/dist/commands/code-map.js +129 -0
- package/dist/commands/codev.js +7 -0
- package/dist/commands/mcp.js +121 -0
- package/dist/commands/run-profile.js +3 -2
- package/dist/commands/switch.js +100 -89
- package/dist/core/agent-files.js +12 -0
- package/dist/core/code-map/backend.js +123 -0
- package/dist/core/code-map/core.js +81 -0
- package/dist/core/code-map/drafts.js +2 -0
- package/dist/core/code-map/extractor.js +29 -0
- package/dist/core/code-map/finalizer.js +191 -0
- package/dist/core/code-map/freshness.js +108 -0
- package/dist/core/code-map/ids.js +0 -0
- package/dist/core/code-map/importable.js +35 -0
- package/dist/core/code-map/indexes.js +197 -0
- package/dist/core/code-map/lang/java/imports.scm +17 -0
- package/dist/core/code-map/lang/java/index.js +254 -0
- package/dist/core/code-map/lang/java/tags.scm +48 -0
- package/dist/core/code-map/lang/php/imports.scm +21 -0
- package/dist/core/code-map/lang/php/index.js +251 -0
- package/dist/core/code-map/lang/php/tags.scm +44 -0
- package/dist/core/code-map/lang/provider.js +9 -0
- package/dist/core/code-map/lang/providers.js +24 -0
- package/dist/core/code-map/lang/python/imports.scm +90 -0
- package/dist/core/code-map/lang/python/index.js +364 -0
- package/dist/core/code-map/lang/python/tags.scm +81 -0
- package/dist/core/code-map/lang/query-runtime.js +374 -0
- package/dist/core/code-map/lang/registry.js +125 -0
- package/dist/core/code-map/lang/typescript/imports.scm +90 -0
- package/dist/core/code-map/lang/typescript/index.js +306 -0
- package/dist/core/code-map/lang/typescript/tags.js.scm +106 -0
- package/dist/core/code-map/lang/typescript/tags.scm +151 -0
- package/dist/core/code-map/lock.js +210 -0
- package/dist/core/code-map/materialized.js +51 -0
- package/dist/core/code-map/memory-reader.js +59 -0
- package/dist/core/code-map/paths.js +53 -0
- package/dist/core/code-map/query.js +568 -0
- package/dist/core/code-map/refresh.js +0 -0
- package/dist/core/code-map/resolve.js +177 -0
- package/dist/core/code-map/store.js +206 -0
- package/dist/core/code-map/types.js +288 -0
- package/dist/core/code-map/vocabulary.js +57 -0
- package/dist/core/code-map/wasm-loader.js +294 -0
- package/dist/core/code-map/work-section.js +206 -0
- package/dist/core/codev-rounds.js +4 -0
- package/dist/core/execution-adapters.js +11 -10
- package/dist/core/execution-profile.js +58 -0
- package/dist/core/facade-schema.js +9 -0
- package/dist/core/instruction-templates.js +2 -0
- package/dist/core/mcp-command-resolution.js +3 -1
- package/dist/core/store-resolution.js +41 -4
- package/dist/facts.js +9 -5
- package/dist/facts.json +8 -4
- package/dist/vendor/web-tree-sitter/tree-sitter.js +3980 -0
- package/dist/vendor/web-tree-sitter/tree-sitter.wasm +0 -0
- package/dist/wasm/tree-sitter-java.wasm +0 -0
- package/dist/wasm/tree-sitter-javascript.wasm +0 -0
- package/dist/wasm/tree-sitter-php.wasm +0 -0
- package/dist/wasm/tree-sitter-python.wasm +0 -0
- package/dist/wasm/tree-sitter-tsx.wasm +0 -0
- package/dist/wasm/tree-sitter-typescript.wasm +0 -0
- package/dist/wasm/tree-sitter.wasm +0 -0
- package/docs/cli.md +46 -8
- package/docs/code-map.md +198 -0
- package/docs/integrations/mcp.md +13 -6
- package/docs/mcp-schema-changelog.md +7 -3
- package/docs/quickstart.md +1 -1
- package/package.json +11 -6
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Map P1a — CORE finalizer (spec §3, §6; dec#108 #1, dec#109 P0 #4 / P1 #5/#6/#7).
|
|
3
|
+
*
|
|
4
|
+
* The finalizer is the ONE AND ONLY identity authority. Providers hand it typed,
|
|
5
|
+
* id-free {@link ExtractionDraft}s; this module reproduces TODAY'S exact legacy
|
|
6
|
+
* `extractor.ts` output: byte-identical node/edge IDs (via `ids.ts`), spans,
|
|
7
|
+
* confidence, the `exported` flag, and — critically — the LEGACY SOURCE-APPEND
|
|
8
|
+
* ORDER. Nothing here sorts node/edge content: draft items carry an `ordinal`
|
|
9
|
+
* (their traversal position) and the finalizer replays them in that order.
|
|
10
|
+
*
|
|
11
|
+
* Emission contract (mirrors the legacy `addSymbol` / `addModule` / `markOrAddExport`):
|
|
12
|
+
* - File node FIRST.
|
|
13
|
+
* - Per definition (ascending ordinal): symbol node, `contains` edge, `defines` edge.
|
|
14
|
+
* - Per import / re-export source: `module` node, `imports` edge.
|
|
15
|
+
* - Per export clause / default-id (`markOrAddExport`): if the name matches an
|
|
16
|
+
* already-emitted symbol, set that node `exported=true` + emit ONE `exports`
|
|
17
|
+
* edge to it (no new node); otherwise fabricate an `export`-subtype symbol
|
|
18
|
+
* (node + `contains` + `defines`) then emit the `exports` edge to it.
|
|
19
|
+
*
|
|
20
|
+
* The `exported` FLAG is NOT an `exports` EDGE: in-place exported declarations set
|
|
21
|
+
* only `node.exported=true`; only export clauses / default-identifier exports emit
|
|
22
|
+
* an `exports` edge (and they ALSO flip the referenced node's flag, matching legacy).
|
|
23
|
+
*
|
|
24
|
+
* Output nodes/edges are validated against the `types.ts` zod schemas before return.
|
|
25
|
+
*/
|
|
26
|
+
import { edgeId, fileNodeId, nodeId } from './ids.js';
|
|
27
|
+
import { EdgeSchema, NodeSchema } from './types.js';
|
|
28
|
+
/** Compute the legacy `sym:<hash>` node id (mirrors `extractor.ts:symId`). */
|
|
29
|
+
function symNodeId(projectId, path, lang, subtype, name, span) {
|
|
30
|
+
return `sym:${nodeId({
|
|
31
|
+
projectId,
|
|
32
|
+
path,
|
|
33
|
+
lang,
|
|
34
|
+
kind: 'symbol',
|
|
35
|
+
subtype,
|
|
36
|
+
name,
|
|
37
|
+
startLine: span.start_line,
|
|
38
|
+
startCol: span.start_col,
|
|
39
|
+
})}`;
|
|
40
|
+
}
|
|
41
|
+
/** Compute the legacy `module:<hash>` node id (mirrors `extractor.ts:addModule`). */
|
|
42
|
+
function moduleNodeId(projectId, path, lang, source, span) {
|
|
43
|
+
return `module:${nodeId({
|
|
44
|
+
projectId,
|
|
45
|
+
path,
|
|
46
|
+
lang,
|
|
47
|
+
kind: 'module',
|
|
48
|
+
subtype: null,
|
|
49
|
+
name: source,
|
|
50
|
+
startLine: span.start_line,
|
|
51
|
+
startCol: span.start_col,
|
|
52
|
+
})}`;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Turn a provider draft into the final {@link ExtractResult}. The ONLY identity
|
|
56
|
+
* authority — reproduces the legacy extractor output exactly (see file header).
|
|
57
|
+
*
|
|
58
|
+
* `input` supplies `projectId`/`path`/`lang` (the id inputs). `parseStatus`
|
|
59
|
+
* defaults to `'parsed'`; provider diagnostics ride on `draft.facts`.
|
|
60
|
+
*/
|
|
61
|
+
export function finalize(draft, input) {
|
|
62
|
+
const { projectId, path, lang } = input;
|
|
63
|
+
const fileNode = fileNodeId(projectId, path, lang);
|
|
64
|
+
const nodes = [
|
|
65
|
+
{
|
|
66
|
+
id: fileNode,
|
|
67
|
+
kind: 'file',
|
|
68
|
+
subtype: null,
|
|
69
|
+
lang,
|
|
70
|
+
name: path,
|
|
71
|
+
path,
|
|
72
|
+
span: null,
|
|
73
|
+
exported: false,
|
|
74
|
+
confidence: 1.0,
|
|
75
|
+
related_memory_ids: [],
|
|
76
|
+
imported_names: [],
|
|
77
|
+
},
|
|
78
|
+
];
|
|
79
|
+
const edges = [];
|
|
80
|
+
// symbol name -> node id, mirroring the legacy `ctx.byName` (used by export
|
|
81
|
+
// clauses to mark-or-add). Last writer per name wins, exactly like legacy.
|
|
82
|
+
const byName = new Map();
|
|
83
|
+
// node id -> index in `nodes`, so an export clause can flip `exported` in place.
|
|
84
|
+
const nodeIndexById = new Map();
|
|
85
|
+
const pushSymbol = (subtype, name, span, exported, confidence) => {
|
|
86
|
+
const id = symNodeId(projectId, path, lang, subtype, name, span);
|
|
87
|
+
nodeIndexById.set(id, nodes.length);
|
|
88
|
+
nodes.push({
|
|
89
|
+
id,
|
|
90
|
+
kind: 'symbol',
|
|
91
|
+
subtype,
|
|
92
|
+
lang,
|
|
93
|
+
name,
|
|
94
|
+
path,
|
|
95
|
+
span,
|
|
96
|
+
exported,
|
|
97
|
+
confidence,
|
|
98
|
+
related_memory_ids: [],
|
|
99
|
+
imported_names: [],
|
|
100
|
+
});
|
|
101
|
+
byName.set(name, id);
|
|
102
|
+
edges.push({
|
|
103
|
+
id: edgeId({ projectId, from: fileNode, to: id, kind: 'contains' }),
|
|
104
|
+
from: fileNode,
|
|
105
|
+
to: id,
|
|
106
|
+
kind: 'contains',
|
|
107
|
+
confidence: 1.0,
|
|
108
|
+
source: { path, line: span.start_line },
|
|
109
|
+
});
|
|
110
|
+
edges.push({
|
|
111
|
+
id: edgeId({ projectId, from: fileNode, to: id, kind: 'defines' }),
|
|
112
|
+
from: fileNode,
|
|
113
|
+
to: id,
|
|
114
|
+
kind: 'defines',
|
|
115
|
+
confidence: 1.0,
|
|
116
|
+
source: { path, line: span.start_line },
|
|
117
|
+
});
|
|
118
|
+
return id;
|
|
119
|
+
};
|
|
120
|
+
// Build a single ordinal-ordered stream across all draft kinds so the finalizer
|
|
121
|
+
// replays the legacy source-append order without ever sorting node/edge content.
|
|
122
|
+
const items = [
|
|
123
|
+
...draft.definitions.map((ref) => ({ kind: 'def', ordinal: ref.ordinal, ref })),
|
|
124
|
+
...draft.imports.map((ref) => ({ kind: 'import', ordinal: ref.ordinal, ref })),
|
|
125
|
+
...draft.exports.map((ref) => ({ kind: 'export', ordinal: ref.ordinal, ref })),
|
|
126
|
+
].sort((a, b) => a.ordinal - b.ordinal);
|
|
127
|
+
for (const item of items) {
|
|
128
|
+
if (item.kind === 'def') {
|
|
129
|
+
const d = item.ref;
|
|
130
|
+
pushSymbol(d.subtype, d.name, d.span, d.exported === true, d.confidence ?? 1.0);
|
|
131
|
+
}
|
|
132
|
+
else if (item.kind === 'import') {
|
|
133
|
+
const im = item.ref;
|
|
134
|
+
const id = moduleNodeId(projectId, path, lang, im.source, im.span);
|
|
135
|
+
nodeIndexById.set(id, nodes.length);
|
|
136
|
+
nodes.push({
|
|
137
|
+
id,
|
|
138
|
+
kind: 'module',
|
|
139
|
+
subtype: null,
|
|
140
|
+
lang,
|
|
141
|
+
name: im.source,
|
|
142
|
+
path,
|
|
143
|
+
span: im.span,
|
|
144
|
+
exported: false,
|
|
145
|
+
confidence: im.confidence ?? 1.0,
|
|
146
|
+
related_memory_ids: [],
|
|
147
|
+
imported_names: [...im.importedNames],
|
|
148
|
+
});
|
|
149
|
+
edges.push({
|
|
150
|
+
id: edgeId({ projectId, from: fileNode, to: id, kind: 'imports' }),
|
|
151
|
+
from: fileNode,
|
|
152
|
+
to: id,
|
|
153
|
+
kind: 'imports',
|
|
154
|
+
confidence: 1.0,
|
|
155
|
+
source: { path, line: im.span.start_line },
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
// export clause / default-identifier — legacy `markOrAddExport`.
|
|
160
|
+
const ex = item.ref;
|
|
161
|
+
const existing = byName.get(ex.name);
|
|
162
|
+
let target;
|
|
163
|
+
if (existing) {
|
|
164
|
+
const idx = nodeIndexById.get(existing);
|
|
165
|
+
if (idx !== undefined)
|
|
166
|
+
nodes[idx] = { ...nodes[idx], exported: true };
|
|
167
|
+
target = existing;
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
target = pushSymbol('export', ex.name, ex.span, true, ex.confidence ?? 1.0);
|
|
171
|
+
}
|
|
172
|
+
edges.push({
|
|
173
|
+
id: edgeId({ projectId, from: fileNode, to: target, kind: 'exports' }),
|
|
174
|
+
from: fileNode,
|
|
175
|
+
to: target,
|
|
176
|
+
kind: 'exports',
|
|
177
|
+
confidence: 1.0,
|
|
178
|
+
source: { path, line: ex.span.start_line },
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const parseStatus = draft.attributes?.parseStatus ?? 'parsed';
|
|
183
|
+
const diagnostics = draft.facts.map((f) => ({ ...f }));
|
|
184
|
+
// Validate the finalized output against the durable schemas (spec §6).
|
|
185
|
+
for (const n of nodes)
|
|
186
|
+
NodeSchema.parse(n);
|
|
187
|
+
for (const e of edges)
|
|
188
|
+
EdgeSchema.parse(e);
|
|
189
|
+
return { parseStatus, nodes, edges, diagnostics };
|
|
190
|
+
}
|
|
191
|
+
//# sourceMappingURL=finalizer.js.map
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Write-side freshness hashing + per-shard freshness classification
|
|
3
|
+
* (spec §5.1, §6.2, §12.4).
|
|
4
|
+
*
|
|
5
|
+
* The READ-path lazy freshness check (§6.1) is Sprint 3 — NOT implemented here.
|
|
6
|
+
* This module owns only:
|
|
7
|
+
* - `computeExtractorConfigHash` — sha256 of a stable serialization of
|
|
8
|
+
* extractor_config + the active language set + (P1a) the registry's
|
|
9
|
+
* `configHashInputs()` (provider versions + every query-asset hash). Changing
|
|
10
|
+
* ignore rules, size caps, supported extensions, query budget, active langs, a
|
|
11
|
+
* provider version, OR a tags/imports `.scm` => stale_extractor.
|
|
12
|
+
* NOTE: grammar/engine hashes are deliberately NOT folded in (spec §6.2):
|
|
13
|
+
* stale_grammar (changed parse binary) is kept separable from stale_extractor.
|
|
14
|
+
* - `shardFreshnessStatus` — classify a stored shard against the current
|
|
15
|
+
* extractor_config_hash + per-language grammar hashes.
|
|
16
|
+
*/
|
|
17
|
+
import crypto from 'node:crypto';
|
|
18
|
+
/** Stable serialization: sort object keys recursively so hashing is order-independent. */
|
|
19
|
+
function stableStringify(value) {
|
|
20
|
+
if (value === null || typeof value !== 'object')
|
|
21
|
+
return JSON.stringify(value);
|
|
22
|
+
if (Array.isArray(value))
|
|
23
|
+
return `[${value.map(stableStringify).join(',')}]`;
|
|
24
|
+
const obj = value;
|
|
25
|
+
const keys = Object.keys(obj).sort();
|
|
26
|
+
return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(',')}}`;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* spec §5.1 / §9 — sha256 of (extractor_config + active language set + the
|
|
30
|
+
* registry's provider/query-asset fingerprint). The active language set is the
|
|
31
|
+
* sorted list of enabled languages, so enabling/disabling a language invalidates
|
|
32
|
+
* affected shards as stale_extractor. `registryInputs` (from
|
|
33
|
+
* `registry.configHashInputs()`) folds in provider `version` + every tags/imports
|
|
34
|
+
* `.scm` hash, so editing a query asset flips affected shards to stale_extractor
|
|
35
|
+
* (dec#109 P0#3). Optional + omitted-vs-undefined hash the same so legacy callers
|
|
36
|
+
* (config-only) keep a stable hash for that input combination.
|
|
37
|
+
*/
|
|
38
|
+
export function computeExtractorConfigHash(config, activeLanguages, registryInputs) {
|
|
39
|
+
const payload = {
|
|
40
|
+
extractor_config: config,
|
|
41
|
+
active_languages: [...activeLanguages].sort(),
|
|
42
|
+
registry: registryInputs ?? null,
|
|
43
|
+
};
|
|
44
|
+
return `sha256:${crypto.createHash('sha256').update(stableStringify(payload)).digest('hex')}`;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* spec §12.4 — classify a stored shard:
|
|
48
|
+
* - extractor_config_hash mismatch => stale_extractor
|
|
49
|
+
* - tree_sitter_grammar_hash mismatch => stale_grammar
|
|
50
|
+
* - otherwise fresh (content/path drift is the §6.1 read-path concern, Sprint 3)
|
|
51
|
+
*
|
|
52
|
+
* Precedence: extractor first, then grammar — both are "the binary/logic that
|
|
53
|
+
* produced this shard changed", and the badge only needs to surface one reason;
|
|
54
|
+
* extractor-config drift is the cheaper, more common cause.
|
|
55
|
+
*/
|
|
56
|
+
export function shardFreshnessStatus(input) {
|
|
57
|
+
const { shard } = input;
|
|
58
|
+
if (shard.extractor_config_hash !== input.currentExtractorConfigHash) {
|
|
59
|
+
return 'stale_extractor';
|
|
60
|
+
}
|
|
61
|
+
const expectedGrammar = input.grammarHashFor(shard.lang);
|
|
62
|
+
if (expectedGrammar !== undefined &&
|
|
63
|
+
shard.tree_sitter_grammar_hash != null &&
|
|
64
|
+
shard.tree_sitter_grammar_hash !== expectedGrammar) {
|
|
65
|
+
return 'stale_grammar';
|
|
66
|
+
}
|
|
67
|
+
return 'fresh';
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Roll per-shard freshness up into a manifest-level freshness summary
|
|
71
|
+
* (spec §5.1). `missing_index` when nothing parsed; otherwise the dominant
|
|
72
|
+
* stale reason, else fresh.
|
|
73
|
+
*/
|
|
74
|
+
export function summarizeFreshness(shards) {
|
|
75
|
+
if (shards.length === 0) {
|
|
76
|
+
return { status: 'missing_index', stale_file_count: 0, partial_reason: null };
|
|
77
|
+
}
|
|
78
|
+
let staleExtractor = 0;
|
|
79
|
+
let staleGrammar = 0;
|
|
80
|
+
let staleChanged = 0;
|
|
81
|
+
for (const s of shards) {
|
|
82
|
+
switch (s.freshness.status) {
|
|
83
|
+
case 'stale_extractor':
|
|
84
|
+
staleExtractor++;
|
|
85
|
+
break;
|
|
86
|
+
case 'stale_grammar':
|
|
87
|
+
staleGrammar++;
|
|
88
|
+
break;
|
|
89
|
+
case 'stale_changed_files':
|
|
90
|
+
staleChanged++;
|
|
91
|
+
break;
|
|
92
|
+
default:
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const staleTotal = staleExtractor + staleGrammar + staleChanged;
|
|
97
|
+
if (staleTotal === 0) {
|
|
98
|
+
return { status: 'fresh', stale_file_count: 0, partial_reason: null };
|
|
99
|
+
}
|
|
100
|
+
// Surface the dominant reason for the manifest badge.
|
|
101
|
+
let status = 'stale_changed_files';
|
|
102
|
+
if (staleExtractor >= staleGrammar && staleExtractor >= staleChanged)
|
|
103
|
+
status = 'stale_extractor';
|
|
104
|
+
else if (staleGrammar >= staleChanged)
|
|
105
|
+
status = 'stale_grammar';
|
|
106
|
+
return { status, stale_file_count: staleTotal, partial_reason: null };
|
|
107
|
+
}
|
|
108
|
+
//# sourceMappingURL=freshness.js.map
|
|
Binary file
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { defaultImportableSymbol } from './lang/provider.js';
|
|
2
|
+
/**
|
|
3
|
+
* Build `name -> importable symbol candidates` for one target file. `nodes` is the
|
|
4
|
+
* target shard's node list; only `kind: 'symbol'` nodes are considered, and the full
|
|
5
|
+
* symbol set is passed to the provider hook (Python needs it for top-level span
|
|
6
|
+
* containment).
|
|
7
|
+
*/
|
|
8
|
+
export function buildImportableIndex(nodes, provider) {
|
|
9
|
+
const symbols = nodes.filter((n) => n.kind === 'symbol');
|
|
10
|
+
const predicate = provider?.isImportableSymbol
|
|
11
|
+
? (n) => provider.isImportableSymbol(n, symbols)
|
|
12
|
+
: defaultImportableSymbol;
|
|
13
|
+
const byName = new Map();
|
|
14
|
+
for (const n of symbols) {
|
|
15
|
+
if (!predicate(n))
|
|
16
|
+
continue;
|
|
17
|
+
const arr = byName.get(n.name);
|
|
18
|
+
if (arr)
|
|
19
|
+
arr.push(n);
|
|
20
|
+
else
|
|
21
|
+
byName.set(n.name, [n]);
|
|
22
|
+
}
|
|
23
|
+
return byName;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Resolve one imported name to its UNAMBIGUOUS importable symbol, or null when the
|
|
27
|
+
* name is absent OR matches more than one importable candidate (ambiguous → skip).
|
|
28
|
+
*/
|
|
29
|
+
export function lookupImportable(index, name) {
|
|
30
|
+
const cands = index.get(name);
|
|
31
|
+
if (!cands || cands.length !== 1)
|
|
32
|
+
return null;
|
|
33
|
+
return cands[0];
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=importable.js.map
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Map index builders (spec §5.6, §5.7).
|
|
3
|
+
*
|
|
4
|
+
* Both indexes are derived purely from `files/**` shards and written atomically
|
|
5
|
+
* (store.ts uses writeFileAtomic). Queries answer from these + shards alone;
|
|
6
|
+
* materialized JSONL is never required.
|
|
7
|
+
*
|
|
8
|
+
* Ordering is deterministic so two refreshes over identical inputs produce
|
|
9
|
+
* byte-identical indexes (concurrency rule 5 spirit; helps "no JSONL committed"
|
|
10
|
+
* diffs stay clean).
|
|
11
|
+
*/
|
|
12
|
+
import { CODE_MAP_SCHEMA_VERSION, } from './types.js';
|
|
13
|
+
import { fileNodeId } from './ids.js';
|
|
14
|
+
/** Lowercase token normalization (spec §5.6 keys). */
|
|
15
|
+
function tokenize(name) {
|
|
16
|
+
const lower = name.toLowerCase();
|
|
17
|
+
const tokens = new Set();
|
|
18
|
+
tokens.add(lower);
|
|
19
|
+
// split camelCase / snake / kebab boundaries into sub-tokens for partial recall
|
|
20
|
+
for (const part of name.split(/[^A-Za-z0-9]+/)) {
|
|
21
|
+
if (!part)
|
|
22
|
+
continue;
|
|
23
|
+
// camelCase split
|
|
24
|
+
for (const sub of part.replace(/([a-z0-9])([A-Z])/g, '$1 $2').split(/\s+/)) {
|
|
25
|
+
if (sub)
|
|
26
|
+
tokens.add(sub.toLowerCase());
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return [...tokens];
|
|
30
|
+
}
|
|
31
|
+
export function buildSymbolsIndex(projectId, shards, extractorVersion) {
|
|
32
|
+
// Null-proto map: symbol-name tokens can collide with Object.prototype members
|
|
33
|
+
// (e.g. a method named `constructor`, or a `__proto__` key), which would make
|
|
34
|
+
// `entries[token] ??= []` see the inherited function and crash on .push, or make
|
|
35
|
+
// `entries['__proto__'] = …` mutate the prototype instead of adding a key.
|
|
36
|
+
const entries = Object.create(null);
|
|
37
|
+
// Deterministic shard order by path.
|
|
38
|
+
const ordered = [...shards].sort((a, b) => a.path.localeCompare(b.path));
|
|
39
|
+
for (const shard of ordered) {
|
|
40
|
+
for (const node of shard.nodes) {
|
|
41
|
+
if (node.kind !== 'symbol')
|
|
42
|
+
continue;
|
|
43
|
+
const entry = {
|
|
44
|
+
node_id: node.id,
|
|
45
|
+
name: node.name,
|
|
46
|
+
kind: node.kind,
|
|
47
|
+
subtype: node.subtype ?? null,
|
|
48
|
+
path: node.path,
|
|
49
|
+
file_id: shard.file_id,
|
|
50
|
+
score_hint: node.exported ? 1.0 : 0.8,
|
|
51
|
+
};
|
|
52
|
+
for (const token of tokenize(node.name)) {
|
|
53
|
+
(entries[token] ??= []).push(entry);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// Deterministic ordering within each token bucket.
|
|
58
|
+
for (const token of Object.keys(entries)) {
|
|
59
|
+
entries[token].sort((a, b) => a.path.localeCompare(b.path) ||
|
|
60
|
+
a.name.localeCompare(b.name) ||
|
|
61
|
+
a.node_id.localeCompare(b.node_id));
|
|
62
|
+
}
|
|
63
|
+
// Sort keys for byte-stable output.
|
|
64
|
+
const sortedEntries = Object.create(null);
|
|
65
|
+
for (const key of Object.keys(entries).sort())
|
|
66
|
+
sortedEntries[key] = entries[key];
|
|
67
|
+
return {
|
|
68
|
+
schema_version: CODE_MAP_SCHEMA_VERSION,
|
|
69
|
+
project_id: projectId,
|
|
70
|
+
updated_at: new Date().toISOString(),
|
|
71
|
+
extractor_version: extractorVersion,
|
|
72
|
+
entries: sortedEntries,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
export function buildImportsIndex(projectId, shards) {
|
|
76
|
+
// module specifier -> (path -> entry)
|
|
77
|
+
const byModule = new Map();
|
|
78
|
+
const ordered = [...shards].sort((a, b) => a.path.localeCompare(b.path));
|
|
79
|
+
for (const shard of ordered) {
|
|
80
|
+
for (const node of shard.nodes) {
|
|
81
|
+
if (node.kind !== 'module')
|
|
82
|
+
continue;
|
|
83
|
+
const module = node.name;
|
|
84
|
+
const perPath = byModule.get(module) ?? new Map();
|
|
85
|
+
const entry = perPath.get(shard.path) ?? {
|
|
86
|
+
path: shard.path,
|
|
87
|
+
file_id: shard.file_id,
|
|
88
|
+
imported: [],
|
|
89
|
+
};
|
|
90
|
+
// Merge imported bindings across multiple imports of the same module in one
|
|
91
|
+
// file, deduped + sorted for byte-stable output (spec §5.7 imported[]).
|
|
92
|
+
const merged = new Set(entry.imported);
|
|
93
|
+
for (const name of node.imported_names ?? [])
|
|
94
|
+
merged.add(name);
|
|
95
|
+
entry.imported = [...merged].sort();
|
|
96
|
+
perPath.set(shard.path, entry);
|
|
97
|
+
byModule.set(module, perPath);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
const entries = Object.create(null);
|
|
101
|
+
for (const key of [...byModule.keys()].sort()) {
|
|
102
|
+
const list = [...byModule.get(key).values()].sort((a, b) => a.path.localeCompare(b.path));
|
|
103
|
+
entries[key] = list;
|
|
104
|
+
}
|
|
105
|
+
return {
|
|
106
|
+
schema_version: CODE_MAP_SCHEMA_VERSION,
|
|
107
|
+
project_id: projectId,
|
|
108
|
+
updated_at: new Date().toISOString(),
|
|
109
|
+
entries,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* P1d reverse-dependency index: "who imports this target". Derived from the P1c
|
|
114
|
+
* resolution edges that live on each IMPORTER's shard:
|
|
115
|
+
* - `resolves_to` (module → target FILE node) → `dependents_by_file[targetPath]`
|
|
116
|
+
* - `imports_symbol` (module → target SYMBOL node) → `dependents_by_symbol[nodeId]`
|
|
117
|
+
*
|
|
118
|
+
* `resolves_to.to` is a file NODE ID (not a path), so we invert fileNodeId→path by
|
|
119
|
+
* computing the id for every indexed shard (Codex review). One entry per (target,
|
|
120
|
+
* importer file): multiple module nodes in one importer that hit the same target are
|
|
121
|
+
* merged (imported names unioned, strongest confidence, lexicographically-smallest
|
|
122
|
+
* specifier) for byte-stable output. Deterministic key + array ordering.
|
|
123
|
+
*/
|
|
124
|
+
export function buildResolutionIndex(projectId, shards) {
|
|
125
|
+
// Invert file-node id → path so reverse resolves_to can be keyed by target path.
|
|
126
|
+
const fileNodeIdToPath = new Map();
|
|
127
|
+
for (const shard of shards) {
|
|
128
|
+
fileNodeIdToPath.set(fileNodeId(projectId, shard.path, shard.lang), shard.path);
|
|
129
|
+
}
|
|
130
|
+
// target key -> (importer path -> merged entry)
|
|
131
|
+
const byFile = new Map();
|
|
132
|
+
const bySymbol = new Map();
|
|
133
|
+
const addDependent = (bucket, targetKey, importerPath, importerFileId, module, imported, confidence) => {
|
|
134
|
+
const perImporter = bucket.get(targetKey) ?? new Map();
|
|
135
|
+
const prev = perImporter.get(importerPath);
|
|
136
|
+
if (!prev) {
|
|
137
|
+
perImporter.set(importerPath, {
|
|
138
|
+
path: importerPath,
|
|
139
|
+
file_id: importerFileId,
|
|
140
|
+
module,
|
|
141
|
+
imported: [...new Set(imported)].sort(),
|
|
142
|
+
confidence,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
// Merge a second edge from the same importer to the same target.
|
|
147
|
+
const mergedNames = new Set(prev.imported);
|
|
148
|
+
for (const n of imported)
|
|
149
|
+
mergedNames.add(n);
|
|
150
|
+
prev.imported = [...mergedNames].sort();
|
|
151
|
+
if (module && (!prev.module || module < prev.module))
|
|
152
|
+
prev.module = module; // smallest spec, stable
|
|
153
|
+
if (typeof confidence === 'number') {
|
|
154
|
+
prev.confidence = typeof prev.confidence === 'number' ? Math.max(prev.confidence, confidence) : confidence;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
bucket.set(targetKey, perImporter);
|
|
158
|
+
};
|
|
159
|
+
const ordered = [...shards].sort((a, b) => a.path.localeCompare(b.path));
|
|
160
|
+
for (const shard of ordered) {
|
|
161
|
+
// module node id -> its specifier + imported names (for reason metadata).
|
|
162
|
+
const moduleById = new Map();
|
|
163
|
+
for (const n of shard.nodes) {
|
|
164
|
+
if (n.kind === 'module')
|
|
165
|
+
moduleById.set(n.id, { name: n.name, imported: n.imported_names ?? [] });
|
|
166
|
+
}
|
|
167
|
+
for (const e of shard.edges) {
|
|
168
|
+
if (e.kind !== 'resolves_to' && e.kind !== 'imports_symbol')
|
|
169
|
+
continue;
|
|
170
|
+
const mod = moduleById.get(e.from);
|
|
171
|
+
if (e.kind === 'resolves_to') {
|
|
172
|
+
const targetPath = fileNodeIdToPath.get(e.to);
|
|
173
|
+
if (!targetPath)
|
|
174
|
+
continue; // target id not an indexed file (defensive)
|
|
175
|
+
addDependent(byFile, targetPath, shard.path, shard.file_id, mod?.name, mod?.imported ?? [], e.confidence);
|
|
176
|
+
}
|
|
177
|
+
else {
|
|
178
|
+
addDependent(bySymbol, e.to, shard.path, shard.file_id, mod?.name, mod?.imported ?? [], e.confidence);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const finalize = (bucket) => {
|
|
183
|
+
const out = Object.create(null);
|
|
184
|
+
for (const key of [...bucket.keys()].sort()) {
|
|
185
|
+
out[key] = [...bucket.get(key).values()].sort((a, b) => a.path.localeCompare(b.path));
|
|
186
|
+
}
|
|
187
|
+
return out;
|
|
188
|
+
};
|
|
189
|
+
return {
|
|
190
|
+
schema_version: CODE_MAP_SCHEMA_VERSION,
|
|
191
|
+
project_id: projectId,
|
|
192
|
+
updated_at: new Date().toISOString(),
|
|
193
|
+
dependents_by_file: finalize(byFile),
|
|
194
|
+
dependents_by_symbol: finalize(bySymbol),
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
//# sourceMappingURL=indexes.js.map
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
; Code Map — Java imports (imports.scm). Provider #4 (langs#3-4).
|
|
2
|
+
;
|
|
3
|
+
; enclosingStatementNodeTypes = [import_declaration] (provider declaration); that
|
|
4
|
+
; statement is the import span/ordinal anchor. Java import shapes (Codex R1):
|
|
5
|
+
; import a.b.C; -> module a.b.C
|
|
6
|
+
; import a.b.*; -> module a.b + imported name '*'
|
|
7
|
+
; import static a.b.C.m; -> module a.b.C + imported name 'm' (split type/member)
|
|
8
|
+
; import static a.b.C.*; -> module a.b.C + imported name '*'
|
|
9
|
+
;
|
|
10
|
+
; A single capture takes the scoped_identifier path verbatim (e.g. "a.b.C" or, for a
|
|
11
|
+
; wildcard import, the package "a.b" — the grammar puts the `*` in a sibling
|
|
12
|
+
; `asterisk` node, NOT inside scoped_identifier). The provider's refine() inspects
|
|
13
|
+
; each import_declaration for the `static` keyword and the `asterisk` sibling and
|
|
14
|
+
; rewrites source/imported-names accordingly (the static-split and wildcard rules
|
|
15
|
+
; can't be expressed structurally without overlapping matches).
|
|
16
|
+
|
|
17
|
+
(import_declaration [(scoped_identifier) (identifier)] @import.source)
|