brainclaw 1.9.1 → 1.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +78 -25
- package/dist/brainclaw-vscode.vsix +0 -0
- package/dist/cli.js +18 -1
- package/dist/commands/code-map.js +129 -0
- package/dist/commands/codev.js +7 -0
- package/dist/commands/dispatch-watch.js +1 -1
- package/dist/commands/doctor.js +3 -5
- package/dist/commands/loops-handlers.js +4 -1
- package/dist/commands/mcp-read-handlers.js +8 -0
- package/dist/commands/mcp.js +121 -1
- package/dist/commands/metrics.js +0 -1
- package/dist/commands/release-claims.js +1 -1
- package/dist/commands/run-profile.js +3 -2
- package/dist/commands/sequence.js +1 -1
- package/dist/commands/switch.js +100 -89
- package/dist/commands/sync.js +1 -1
- package/dist/commands/upgrade.js +0 -7
- package/dist/core/agent-context.js +1 -1
- package/dist/core/agent-files.js +13 -2
- package/dist/core/agent-integrations.js +3 -3
- package/dist/core/agent-registry.js +2 -2
- package/dist/core/assignments.js +12 -0
- package/dist/core/brainclaw-version.js +2 -2
- package/dist/core/code-map/backend.js +176 -0
- package/dist/core/code-map/core.js +81 -0
- package/dist/core/code-map/drafts.js +2 -0
- package/dist/core/code-map/extractor.js +29 -0
- package/dist/core/code-map/finalizer.js +191 -0
- package/dist/core/code-map/freshness.js +144 -0
- package/dist/core/code-map/ids.js +0 -0
- package/dist/core/code-map/importable.js +35 -0
- package/dist/core/code-map/indexes.js +197 -0
- package/dist/core/code-map/lang/java/imports.scm +17 -0
- package/dist/core/code-map/lang/java/index.js +254 -0
- package/dist/core/code-map/lang/java/tags.scm +48 -0
- package/dist/core/code-map/lang/php/imports.scm +21 -0
- package/dist/core/code-map/lang/php/index.js +251 -0
- package/dist/core/code-map/lang/php/tags.scm +44 -0
- package/dist/core/code-map/lang/provider.js +9 -0
- package/dist/core/code-map/lang/providers.js +24 -0
- package/dist/core/code-map/lang/python/imports.scm +90 -0
- package/dist/core/code-map/lang/python/index.js +364 -0
- package/dist/core/code-map/lang/python/tags.scm +81 -0
- package/dist/core/code-map/lang/query-runtime.js +374 -0
- package/dist/core/code-map/lang/registry.js +125 -0
- package/dist/core/code-map/lang/typescript/imports.scm +90 -0
- package/dist/core/code-map/lang/typescript/index.js +306 -0
- package/dist/core/code-map/lang/typescript/tags.js.scm +106 -0
- package/dist/core/code-map/lang/typescript/tags.scm +151 -0
- package/dist/core/code-map/lock.js +210 -0
- package/dist/core/code-map/materialized.js +51 -0
- package/dist/core/code-map/memory-reader.js +59 -0
- package/dist/core/code-map/paths.js +53 -0
- package/dist/core/code-map/query.js +599 -0
- package/dist/core/code-map/refresh.js +0 -0
- package/dist/core/code-map/resolve.js +177 -0
- package/dist/core/code-map/store.js +206 -0
- package/dist/core/code-map/types.js +293 -0
- package/dist/core/code-map/vocabulary.js +57 -0
- package/dist/core/code-map/wasm-loader.js +294 -0
- package/dist/core/code-map/work-section.js +206 -0
- package/dist/core/codev-rounds.js +4 -0
- package/dist/core/context.js +1 -1
- package/dist/core/cross-project.js +1 -1
- package/dist/core/dispatcher.js +0 -2
- package/dist/core/entity-operations.js +0 -3
- package/dist/core/execution-adapters.js +11 -10
- package/dist/core/execution-profile.js +58 -0
- package/dist/core/facade-schema.js +9 -0
- package/dist/core/ids.js +1 -1
- package/dist/core/instruction-templates.js +2 -0
- package/dist/core/instructions.js +0 -1
- package/dist/core/loops/lock.js +0 -3
- package/dist/core/mcp-command-resolution.js +3 -1
- package/dist/core/protocol-skills.js +5 -3
- package/dist/core/security-detectors.js +2 -2
- package/dist/core/security-extract.js +2 -2
- package/dist/core/store-resolution.js +41 -4
- package/dist/facts.js +9 -5
- package/dist/facts.json +8 -4
- package/dist/vendor/web-tree-sitter/tree-sitter.js +3980 -0
- package/dist/vendor/web-tree-sitter/tree-sitter.wasm +0 -0
- package/dist/wasm/tree-sitter-java.wasm +0 -0
- package/dist/wasm/tree-sitter-javascript.wasm +0 -0
- package/dist/wasm/tree-sitter-php.wasm +0 -0
- package/dist/wasm/tree-sitter-python.wasm +0 -0
- package/dist/wasm/tree-sitter-tsx.wasm +0 -0
- package/dist/wasm/tree-sitter-typescript.wasm +0 -0
- package/dist/wasm/tree-sitter.wasm +0 -0
- package/docs/cli.md +46 -8
- package/docs/code-map.md +209 -0
- package/docs/integrations/mcp.md +13 -6
- package/docs/mcp-schema-changelog.md +7 -3
- package/docs/quickstart.md +1 -1
- package/package.json +11 -6
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { finalize } from './finalizer.js';
|
|
2
|
+
import { defaultRegistry } from './lang/providers.js';
|
|
3
|
+
/**
|
|
4
|
+
* The default registry is constructed + registered in `lang/providers.ts` (P1b
|
|
5
|
+
* §3.2) — the declared extension point for "which providers ship by default".
|
|
6
|
+
* Re-exported here so existing importers (`core.js`) keep working unchanged.
|
|
7
|
+
*/
|
|
8
|
+
export { defaultRegistry };
|
|
9
|
+
const SERVICES = { version: '0.1.0' };
|
|
10
|
+
function fileOnlyResult(input, parseStatus) {
|
|
11
|
+
// Reuse the finalizer over an empty draft so the file node id matches exactly.
|
|
12
|
+
return finalize({
|
|
13
|
+
file: { path: input.path },
|
|
14
|
+
definitions: [],
|
|
15
|
+
imports: [],
|
|
16
|
+
exports: [],
|
|
17
|
+
tests: [],
|
|
18
|
+
facts: [{ code: 'skipped_unsupported', message: `no provider for ${input.path}` }],
|
|
19
|
+
attributes: { parseStatus },
|
|
20
|
+
}, input);
|
|
21
|
+
}
|
|
22
|
+
/** Delete the retained parse tree (best-effort). */
|
|
23
|
+
function releaseTree(draft) {
|
|
24
|
+
const tree = draft.attributes?.__tree;
|
|
25
|
+
if (tree) {
|
|
26
|
+
try {
|
|
27
|
+
tree.delete();
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
/* best effort */
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Extract a single file via the provider pipeline. Signature-compatible with the
|
|
36
|
+
* legacy `extractor.ts:extractFile`. Resolves the provider by path; an unsupported
|
|
37
|
+
* extension yields a `skipped_unsupported` file-only result (never throws).
|
|
38
|
+
*/
|
|
39
|
+
export async function extractFile(input, registry = defaultRegistry) {
|
|
40
|
+
const resolved = registry.providerForPath(input.path);
|
|
41
|
+
if (!resolved) {
|
|
42
|
+
return fileOnlyResult(input, 'skipped_unsupported');
|
|
43
|
+
}
|
|
44
|
+
const { provider, lang } = resolved;
|
|
45
|
+
// The caller's `input.lang` is authoritative for identity (it matches what the
|
|
46
|
+
// refresh pipeline resolved + what the oracle froze). We pass it through; the
|
|
47
|
+
// resolved `lang` is used only as a cross-check / for providers that re-resolve.
|
|
48
|
+
const providerInput = {
|
|
49
|
+
projectId: input.projectId,
|
|
50
|
+
path: input.path,
|
|
51
|
+
lang: input.lang,
|
|
52
|
+
source: input.source,
|
|
53
|
+
sizeBytes: input.sizeBytes,
|
|
54
|
+
maxParseFileBytes: input.maxParseFileBytes,
|
|
55
|
+
maxQueryWaitMs: input.maxQueryWaitMs,
|
|
56
|
+
};
|
|
57
|
+
void lang;
|
|
58
|
+
let draft = await provider.extractDraft(providerInput, SERVICES);
|
|
59
|
+
if (provider.refine) {
|
|
60
|
+
try {
|
|
61
|
+
draft = await provider.refine(draft, { input: providerInput, lang: input.lang });
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
// Fall back to the pre-refine draft + a loud diagnostic (never drop the file).
|
|
65
|
+
draft = {
|
|
66
|
+
...draft,
|
|
67
|
+
facts: [
|
|
68
|
+
...draft.facts,
|
|
69
|
+
{ code: 'refine_error', message: err instanceof Error ? err.message : String(err) },
|
|
70
|
+
],
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
try {
|
|
75
|
+
return finalize(draft, input);
|
|
76
|
+
}
|
|
77
|
+
finally {
|
|
78
|
+
releaseTree(draft);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=core.js.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Map extractor — THIN backward-compat surface (spec §3, §9 cutover).
|
|
3
|
+
*
|
|
4
|
+
* P1a cutover (Sprint 4): the legacy 540-line imperative extractor has been
|
|
5
|
+
* replaced by the query-driven provider pipeline. The real `extractFile` now
|
|
6
|
+
* lives on the CORE (`core.ts` → registry → provider.extractDraft → refine →
|
|
7
|
+
* finalize). This module keeps the historical import surface stable:
|
|
8
|
+
*
|
|
9
|
+
* - `extractFile` re-exported from `core.ts` (provider pipeline).
|
|
10
|
+
* - `ExtractInput` / `ExtractResult` the public extraction shapes (owned here so
|
|
11
|
+
* `core.ts`, `finalizer.ts`, and the oracle tests keep
|
|
12
|
+
* importing them from this module).
|
|
13
|
+
* - `hashContent` sha256 of file contents (used by refresh.ts + query.ts).
|
|
14
|
+
*
|
|
15
|
+
* The legacy imperative bodies (handleFunctionDeclaration / handleClassDeclaration
|
|
16
|
+
* / classifySubtype / returnsJsx / handleImport / markOrAddExport / …) are GONE.
|
|
17
|
+
* The oracle (`oracle.test.ts`) now exercises this re-export and so doubles as a
|
|
18
|
+
* provider-path regression guard against the frozen `oracle-golden.json`.
|
|
19
|
+
*/
|
|
20
|
+
import crypto from 'node:crypto';
|
|
21
|
+
// The query-driven CORE entrypoint, re-exported under the historical name so all
|
|
22
|
+
// existing importers (refresh.ts, oracle.test.ts, …) keep resolving `extractFile`
|
|
23
|
+
// here. core.ts imports the types above (type-only → erased, so no runtime cycle).
|
|
24
|
+
export { extractFile } from './core.js';
|
|
25
|
+
/** sha256 of file contents (file_hash on the shard). */
|
|
26
|
+
export function hashContent(source) {
|
|
27
|
+
return `sha256:${crypto.createHash('sha256').update(source, 'utf-8').digest('hex')}`;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=extractor.js.map
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Map P1a — CORE finalizer (spec §3, §6; dec#108 #1, dec#109 P0 #4 / P1 #5/#6/#7).
|
|
3
|
+
*
|
|
4
|
+
* The finalizer is the ONE AND ONLY identity authority. Providers hand it typed,
|
|
5
|
+
* id-free {@link ExtractionDraft}s; this module reproduces TODAY'S exact legacy
|
|
6
|
+
* `extractor.ts` output: byte-identical node/edge IDs (via `ids.ts`), spans,
|
|
7
|
+
* confidence, the `exported` flag, and — critically — the LEGACY SOURCE-APPEND
|
|
8
|
+
* ORDER. Nothing here sorts node/edge content: draft items carry an `ordinal`
|
|
9
|
+
* (their traversal position) and the finalizer replays them in that order.
|
|
10
|
+
*
|
|
11
|
+
* Emission contract (mirrors the legacy `addSymbol` / `addModule` / `markOrAddExport`):
|
|
12
|
+
* - File node FIRST.
|
|
13
|
+
* - Per definition (ascending ordinal): symbol node, `contains` edge, `defines` edge.
|
|
14
|
+
* - Per import / re-export source: `module` node, `imports` edge.
|
|
15
|
+
* - Per export clause / default-id (`markOrAddExport`): if the name matches an
|
|
16
|
+
* already-emitted symbol, set that node `exported=true` + emit ONE `exports`
|
|
17
|
+
* edge to it (no new node); otherwise fabricate an `export`-subtype symbol
|
|
18
|
+
* (node + `contains` + `defines`) then emit the `exports` edge to it.
|
|
19
|
+
*
|
|
20
|
+
* The `exported` FLAG is NOT an `exports` EDGE: in-place exported declarations set
|
|
21
|
+
* only `node.exported=true`; only export clauses / default-identifier exports emit
|
|
22
|
+
* an `exports` edge (and they ALSO flip the referenced node's flag, matching legacy).
|
|
23
|
+
*
|
|
24
|
+
* Output nodes/edges are validated against the `types.ts` zod schemas before return.
|
|
25
|
+
*/
|
|
26
|
+
import { edgeId, fileNodeId, nodeId } from './ids.js';
|
|
27
|
+
import { EdgeSchema, NodeSchema } from './types.js';
|
|
28
|
+
/** Compute the legacy `sym:<hash>` node id (mirrors `extractor.ts:symId`). */
|
|
29
|
+
function symNodeId(projectId, path, lang, subtype, name, span) {
|
|
30
|
+
return `sym:${nodeId({
|
|
31
|
+
projectId,
|
|
32
|
+
path,
|
|
33
|
+
lang,
|
|
34
|
+
kind: 'symbol',
|
|
35
|
+
subtype,
|
|
36
|
+
name,
|
|
37
|
+
startLine: span.start_line,
|
|
38
|
+
startCol: span.start_col,
|
|
39
|
+
})}`;
|
|
40
|
+
}
|
|
41
|
+
/** Compute the legacy `module:<hash>` node id (mirrors `extractor.ts:addModule`). */
|
|
42
|
+
function moduleNodeId(projectId, path, lang, source, span) {
|
|
43
|
+
return `module:${nodeId({
|
|
44
|
+
projectId,
|
|
45
|
+
path,
|
|
46
|
+
lang,
|
|
47
|
+
kind: 'module',
|
|
48
|
+
subtype: null,
|
|
49
|
+
name: source,
|
|
50
|
+
startLine: span.start_line,
|
|
51
|
+
startCol: span.start_col,
|
|
52
|
+
})}`;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Turn a provider draft into the final {@link ExtractResult}. The ONLY identity
|
|
56
|
+
* authority — reproduces the legacy extractor output exactly (see file header).
|
|
57
|
+
*
|
|
58
|
+
* `input` supplies `projectId`/`path`/`lang` (the id inputs). `parseStatus`
|
|
59
|
+
* defaults to `'parsed'`; provider diagnostics ride on `draft.facts`.
|
|
60
|
+
*/
|
|
61
|
+
export function finalize(draft, input) {
|
|
62
|
+
const { projectId, path, lang } = input;
|
|
63
|
+
const fileNode = fileNodeId(projectId, path, lang);
|
|
64
|
+
const nodes = [
|
|
65
|
+
{
|
|
66
|
+
id: fileNode,
|
|
67
|
+
kind: 'file',
|
|
68
|
+
subtype: null,
|
|
69
|
+
lang,
|
|
70
|
+
name: path,
|
|
71
|
+
path,
|
|
72
|
+
span: null,
|
|
73
|
+
exported: false,
|
|
74
|
+
confidence: 1.0,
|
|
75
|
+
related_memory_ids: [],
|
|
76
|
+
imported_names: [],
|
|
77
|
+
},
|
|
78
|
+
];
|
|
79
|
+
const edges = [];
|
|
80
|
+
// symbol name -> node id, mirroring the legacy `ctx.byName` (used by export
|
|
81
|
+
// clauses to mark-or-add). Last writer per name wins, exactly like legacy.
|
|
82
|
+
const byName = new Map();
|
|
83
|
+
// node id -> index in `nodes`, so an export clause can flip `exported` in place.
|
|
84
|
+
const nodeIndexById = new Map();
|
|
85
|
+
const pushSymbol = (subtype, name, span, exported, confidence) => {
|
|
86
|
+
const id = symNodeId(projectId, path, lang, subtype, name, span);
|
|
87
|
+
nodeIndexById.set(id, nodes.length);
|
|
88
|
+
nodes.push({
|
|
89
|
+
id,
|
|
90
|
+
kind: 'symbol',
|
|
91
|
+
subtype,
|
|
92
|
+
lang,
|
|
93
|
+
name,
|
|
94
|
+
path,
|
|
95
|
+
span,
|
|
96
|
+
exported,
|
|
97
|
+
confidence,
|
|
98
|
+
related_memory_ids: [],
|
|
99
|
+
imported_names: [],
|
|
100
|
+
});
|
|
101
|
+
byName.set(name, id);
|
|
102
|
+
edges.push({
|
|
103
|
+
id: edgeId({ projectId, from: fileNode, to: id, kind: 'contains' }),
|
|
104
|
+
from: fileNode,
|
|
105
|
+
to: id,
|
|
106
|
+
kind: 'contains',
|
|
107
|
+
confidence: 1.0,
|
|
108
|
+
source: { path, line: span.start_line },
|
|
109
|
+
});
|
|
110
|
+
edges.push({
|
|
111
|
+
id: edgeId({ projectId, from: fileNode, to: id, kind: 'defines' }),
|
|
112
|
+
from: fileNode,
|
|
113
|
+
to: id,
|
|
114
|
+
kind: 'defines',
|
|
115
|
+
confidence: 1.0,
|
|
116
|
+
source: { path, line: span.start_line },
|
|
117
|
+
});
|
|
118
|
+
return id;
|
|
119
|
+
};
|
|
120
|
+
// Build a single ordinal-ordered stream across all draft kinds so the finalizer
|
|
121
|
+
// replays the legacy source-append order without ever sorting node/edge content.
|
|
122
|
+
const items = [
|
|
123
|
+
...draft.definitions.map((ref) => ({ kind: 'def', ordinal: ref.ordinal, ref })),
|
|
124
|
+
...draft.imports.map((ref) => ({ kind: 'import', ordinal: ref.ordinal, ref })),
|
|
125
|
+
...draft.exports.map((ref) => ({ kind: 'export', ordinal: ref.ordinal, ref })),
|
|
126
|
+
].sort((a, b) => a.ordinal - b.ordinal);
|
|
127
|
+
for (const item of items) {
|
|
128
|
+
if (item.kind === 'def') {
|
|
129
|
+
const d = item.ref;
|
|
130
|
+
pushSymbol(d.subtype, d.name, d.span, d.exported === true, d.confidence ?? 1.0);
|
|
131
|
+
}
|
|
132
|
+
else if (item.kind === 'import') {
|
|
133
|
+
const im = item.ref;
|
|
134
|
+
const id = moduleNodeId(projectId, path, lang, im.source, im.span);
|
|
135
|
+
nodeIndexById.set(id, nodes.length);
|
|
136
|
+
nodes.push({
|
|
137
|
+
id,
|
|
138
|
+
kind: 'module',
|
|
139
|
+
subtype: null,
|
|
140
|
+
lang,
|
|
141
|
+
name: im.source,
|
|
142
|
+
path,
|
|
143
|
+
span: im.span,
|
|
144
|
+
exported: false,
|
|
145
|
+
confidence: im.confidence ?? 1.0,
|
|
146
|
+
related_memory_ids: [],
|
|
147
|
+
imported_names: [...im.importedNames],
|
|
148
|
+
});
|
|
149
|
+
edges.push({
|
|
150
|
+
id: edgeId({ projectId, from: fileNode, to: id, kind: 'imports' }),
|
|
151
|
+
from: fileNode,
|
|
152
|
+
to: id,
|
|
153
|
+
kind: 'imports',
|
|
154
|
+
confidence: 1.0,
|
|
155
|
+
source: { path, line: im.span.start_line },
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
// export clause / default-identifier — legacy `markOrAddExport`.
|
|
160
|
+
const ex = item.ref;
|
|
161
|
+
const existing = byName.get(ex.name);
|
|
162
|
+
let target;
|
|
163
|
+
if (existing) {
|
|
164
|
+
const idx = nodeIndexById.get(existing);
|
|
165
|
+
if (idx !== undefined)
|
|
166
|
+
nodes[idx] = { ...nodes[idx], exported: true };
|
|
167
|
+
target = existing;
|
|
168
|
+
}
|
|
169
|
+
else {
|
|
170
|
+
target = pushSymbol('export', ex.name, ex.span, true, ex.confidence ?? 1.0);
|
|
171
|
+
}
|
|
172
|
+
edges.push({
|
|
173
|
+
id: edgeId({ projectId, from: fileNode, to: target, kind: 'exports' }),
|
|
174
|
+
from: fileNode,
|
|
175
|
+
to: target,
|
|
176
|
+
kind: 'exports',
|
|
177
|
+
confidence: 1.0,
|
|
178
|
+
source: { path, line: ex.span.start_line },
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const parseStatus = draft.attributes?.parseStatus ?? 'parsed';
|
|
183
|
+
const diagnostics = draft.facts.map((f) => ({ ...f }));
|
|
184
|
+
// Validate the finalized output against the durable schemas (spec §6).
|
|
185
|
+
for (const n of nodes)
|
|
186
|
+
NodeSchema.parse(n);
|
|
187
|
+
for (const e of edges)
|
|
188
|
+
EdgeSchema.parse(e);
|
|
189
|
+
return { parseStatus, nodes, edges, diagnostics };
|
|
190
|
+
}
|
|
191
|
+
//# sourceMappingURL=finalizer.js.map
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Write-side freshness hashing + per-shard freshness classification
|
|
3
|
+
* (spec §5.1, §6.2, §12.4).
|
|
4
|
+
*
|
|
5
|
+
* The READ-path lazy freshness check (§6.1) is Sprint 3 — NOT implemented here.
|
|
6
|
+
* This module owns only:
|
|
7
|
+
* - `computeExtractorConfigHash` — sha256 of a stable serialization of
|
|
8
|
+
* extractor_config + the active language set + (P1a) the registry's
|
|
9
|
+
* `configHashInputs()` (provider versions + every query-asset hash). Changing
|
|
10
|
+
* ignore rules, size caps, supported extensions, query budget, active langs, a
|
|
11
|
+
* provider version, OR a tags/imports `.scm` => stale_extractor.
|
|
12
|
+
* NOTE: grammar/engine hashes are deliberately NOT folded in (spec §6.2):
|
|
13
|
+
* stale_grammar (changed parse binary) is kept separable from stale_extractor.
|
|
14
|
+
* - `shardFreshnessStatus` — classify a stored shard against the current
|
|
15
|
+
* extractor_config_hash + per-language grammar hashes.
|
|
16
|
+
*/
|
|
17
|
+
import crypto from 'node:crypto';
|
|
18
|
+
/** Stable serialization: sort object keys recursively so hashing is order-independent. */
|
|
19
|
+
function stableStringify(value) {
|
|
20
|
+
if (value === null || typeof value !== 'object')
|
|
21
|
+
return JSON.stringify(value);
|
|
22
|
+
if (Array.isArray(value))
|
|
23
|
+
return `[${value.map(stableStringify).join(',')}]`;
|
|
24
|
+
const obj = value;
|
|
25
|
+
const keys = Object.keys(obj).sort();
|
|
26
|
+
return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(',')}}`;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* spec §5.1 / §9 — sha256 of (extractor_config + active language set + the
|
|
30
|
+
* registry's provider/query-asset fingerprint). The active language set is the
|
|
31
|
+
* sorted list of enabled languages, so enabling/disabling a language invalidates
|
|
32
|
+
* affected shards as stale_extractor. `registryInputs` (from
|
|
33
|
+
* `registry.configHashInputs()`) folds in provider `version` + every tags/imports
|
|
34
|
+
* `.scm` hash, so editing a query asset flips affected shards to stale_extractor
|
|
35
|
+
* (dec#109 P0#3). Optional + omitted-vs-undefined hash the same so legacy callers
|
|
36
|
+
* (config-only) keep a stable hash for that input combination.
|
|
37
|
+
*/
|
|
38
|
+
export function computeExtractorConfigHash(config, activeLanguages, registryInputs) {
|
|
39
|
+
const payload = {
|
|
40
|
+
extractor_config: config,
|
|
41
|
+
active_languages: [...activeLanguages].sort(),
|
|
42
|
+
registry: registryInputs ?? null,
|
|
43
|
+
};
|
|
44
|
+
return `sha256:${crypto.createHash('sha256').update(stableStringify(payload)).digest('hex')}`;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* spec §12.4 — classify a stored shard:
|
|
48
|
+
* - extractor_config_hash mismatch => stale_extractor
|
|
49
|
+
* - tree_sitter_grammar_hash mismatch => stale_grammar
|
|
50
|
+
* - otherwise fresh (content/path drift is the §6.1 read-path concern, Sprint 3)
|
|
51
|
+
*
|
|
52
|
+
* Precedence: extractor first, then grammar — both are "the binary/logic that
|
|
53
|
+
* produced this shard changed", and the badge only needs to surface one reason;
|
|
54
|
+
* extractor-config drift is the cheaper, more common cause.
|
|
55
|
+
*/
|
|
56
|
+
export function shardFreshnessStatus(input) {
|
|
57
|
+
const { shard } = input;
|
|
58
|
+
if (shard.extractor_config_hash !== input.currentExtractorConfigHash) {
|
|
59
|
+
return 'stale_extractor';
|
|
60
|
+
}
|
|
61
|
+
const expectedGrammar = input.grammarHashFor(shard.lang);
|
|
62
|
+
if (expectedGrammar !== undefined &&
|
|
63
|
+
shard.tree_sitter_grammar_hash != null &&
|
|
64
|
+
shard.tree_sitter_grammar_hash !== expectedGrammar) {
|
|
65
|
+
return 'stale_grammar';
|
|
66
|
+
}
|
|
67
|
+
return 'fresh';
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Roll per-shard freshness up into a manifest-level freshness summary
|
|
71
|
+
* (spec §5.1). `missing_index` when nothing parsed; otherwise the dominant
|
|
72
|
+
* stale reason, else fresh.
|
|
73
|
+
*/
|
|
74
|
+
export function summarizeFreshness(shards) {
|
|
75
|
+
if (shards.length === 0) {
|
|
76
|
+
return { status: 'missing_index', stale_file_count: 0, partial_reason: null };
|
|
77
|
+
}
|
|
78
|
+
let staleExtractor = 0;
|
|
79
|
+
let staleGrammar = 0;
|
|
80
|
+
let staleChanged = 0;
|
|
81
|
+
for (const s of shards) {
|
|
82
|
+
switch (s.freshness.status) {
|
|
83
|
+
case 'stale_extractor':
|
|
84
|
+
staleExtractor++;
|
|
85
|
+
break;
|
|
86
|
+
case 'stale_grammar':
|
|
87
|
+
staleGrammar++;
|
|
88
|
+
break;
|
|
89
|
+
case 'stale_changed_files':
|
|
90
|
+
staleChanged++;
|
|
91
|
+
break;
|
|
92
|
+
default:
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const staleTotal = staleExtractor + staleGrammar + staleChanged;
|
|
97
|
+
if (staleTotal === 0) {
|
|
98
|
+
return { status: 'fresh', stale_file_count: 0, partial_reason: null };
|
|
99
|
+
}
|
|
100
|
+
// Surface the dominant reason for the manifest badge.
|
|
101
|
+
let status = 'stale_changed_files';
|
|
102
|
+
if (staleExtractor >= staleGrammar && staleExtractor >= staleChanged)
|
|
103
|
+
status = 'stale_extractor';
|
|
104
|
+
else if (staleGrammar >= staleChanged)
|
|
105
|
+
status = 'stale_grammar';
|
|
106
|
+
return { status, stale_file_count: staleTotal, partial_reason: null };
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Read-path git-HEAD drift (trp_42688015).
|
|
110
|
+
*
|
|
111
|
+
* The index records the commit it was built against (`manifest.git.head`). The
|
|
112
|
+
* per-file lazy read check (query.ts §6.1) only samples a query's candidate files
|
|
113
|
+
* within a bounded budget, and `status` reports ONLY the write-side manifest
|
|
114
|
+
* freshness (extractor/grammar hashes) — neither keys on git HEAD. So a whole-tree
|
|
115
|
+
* move such as `git checkout <other-branch>` left the index reported `fresh`, and
|
|
116
|
+
* find/brief could serve OLD-branch paths/symbols. This compares the index head to
|
|
117
|
+
* the working tree's current head and, when they differ, sets a clean `fresh` badge
|
|
118
|
+
* to the dedicated `stale_git_head` reason, recording the precise cause in
|
|
119
|
+
* `details.git_head_changed`.
|
|
120
|
+
*
|
|
121
|
+
* `stale_git_head` is kept DISTINCT from `stale_changed_files` (review finding):
|
|
122
|
+
* the latter means CONFIRMED per-file content/path drift (and carries a real
|
|
123
|
+
* `stale_file_count`); a HEAD move is a weaker signal — "the index was built at
|
|
124
|
+
* another commit, refresh recommended" — and must not masquerade as confirmed file
|
|
125
|
+
* changes with a contradictory `stale_file_count: 0`.
|
|
126
|
+
*
|
|
127
|
+
* No-op when either head is unknown (non-git project, older manifest) or the heads
|
|
128
|
+
* match — so existing fresh/non-git behaviour is unchanged. A badge that is already
|
|
129
|
+
* non-`fresh` (stale_*, partial, missing_index) keeps its more-specific/equally-
|
|
130
|
+
* actionable status; only the cause detail is added.
|
|
131
|
+
*/
|
|
132
|
+
export function applyGitHeadDrift(badge, indexHead, currentHead) {
|
|
133
|
+
if (!indexHead || !currentHead || indexHead === currentHead)
|
|
134
|
+
return badge;
|
|
135
|
+
const status = badge.status === 'fresh' ? 'stale_git_head' : badge.status;
|
|
136
|
+
return {
|
|
137
|
+
status,
|
|
138
|
+
details: {
|
|
139
|
+
...badge.details,
|
|
140
|
+
git_head_changed: { index_head: indexHead, current_head: currentHead },
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=freshness.js.map
|
|
Binary file
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { defaultImportableSymbol } from './lang/provider.js';
|
|
2
|
+
/**
|
|
3
|
+
* Build `name -> importable symbol candidates` for one target file. `nodes` is the
|
|
4
|
+
* target shard's node list; only `kind: 'symbol'` nodes are considered, and the full
|
|
5
|
+
* symbol set is passed to the provider hook (Python needs it for top-level span
|
|
6
|
+
* containment).
|
|
7
|
+
*/
|
|
8
|
+
export function buildImportableIndex(nodes, provider) {
|
|
9
|
+
const symbols = nodes.filter((n) => n.kind === 'symbol');
|
|
10
|
+
const predicate = provider?.isImportableSymbol
|
|
11
|
+
? (n) => provider.isImportableSymbol(n, symbols)
|
|
12
|
+
: defaultImportableSymbol;
|
|
13
|
+
const byName = new Map();
|
|
14
|
+
for (const n of symbols) {
|
|
15
|
+
if (!predicate(n))
|
|
16
|
+
continue;
|
|
17
|
+
const arr = byName.get(n.name);
|
|
18
|
+
if (arr)
|
|
19
|
+
arr.push(n);
|
|
20
|
+
else
|
|
21
|
+
byName.set(n.name, [n]);
|
|
22
|
+
}
|
|
23
|
+
return byName;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Resolve one imported name to its UNAMBIGUOUS importable symbol, or null when the
|
|
27
|
+
* name is absent OR matches more than one importable candidate (ambiguous → skip).
|
|
28
|
+
*/
|
|
29
|
+
export function lookupImportable(index, name) {
|
|
30
|
+
const cands = index.get(name);
|
|
31
|
+
if (!cands || cands.length !== 1)
|
|
32
|
+
return null;
|
|
33
|
+
return cands[0];
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=importable.js.map
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code Map index builders (spec §5.6, §5.7).
|
|
3
|
+
*
|
|
4
|
+
* Both indexes are derived purely from `files/**` shards and written atomically
|
|
5
|
+
* (store.ts uses writeFileAtomic). Queries answer from these + shards alone;
|
|
6
|
+
* materialized JSONL is never required.
|
|
7
|
+
*
|
|
8
|
+
* Ordering is deterministic so two refreshes over identical inputs produce
|
|
9
|
+
* byte-identical indexes (concurrency rule 5 spirit; helps "no JSONL committed"
|
|
10
|
+
* diffs stay clean).
|
|
11
|
+
*/
|
|
12
|
+
import { CODE_MAP_SCHEMA_VERSION, } from './types.js';
|
|
13
|
+
import { fileNodeId } from './ids.js';
|
|
14
|
+
/** Lowercase token normalization (spec §5.6 keys). */
|
|
15
|
+
function tokenize(name) {
|
|
16
|
+
const lower = name.toLowerCase();
|
|
17
|
+
const tokens = new Set();
|
|
18
|
+
tokens.add(lower);
|
|
19
|
+
// split camelCase / snake / kebab boundaries into sub-tokens for partial recall
|
|
20
|
+
for (const part of name.split(/[^A-Za-z0-9]+/)) {
|
|
21
|
+
if (!part)
|
|
22
|
+
continue;
|
|
23
|
+
// camelCase split
|
|
24
|
+
for (const sub of part.replace(/([a-z0-9])([A-Z])/g, '$1 $2').split(/\s+/)) {
|
|
25
|
+
if (sub)
|
|
26
|
+
tokens.add(sub.toLowerCase());
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return [...tokens];
|
|
30
|
+
}
|
|
31
|
+
export function buildSymbolsIndex(projectId, shards, extractorVersion) {
|
|
32
|
+
// Null-proto map: symbol-name tokens can collide with Object.prototype members
|
|
33
|
+
// (e.g. a method named `constructor`, or a `__proto__` key), which would make
|
|
34
|
+
// `entries[token] ??= []` see the inherited function and crash on .push, or make
|
|
35
|
+
// `entries['__proto__'] = …` mutate the prototype instead of adding a key.
|
|
36
|
+
const entries = Object.create(null);
|
|
37
|
+
// Deterministic shard order by path.
|
|
38
|
+
const ordered = [...shards].sort((a, b) => a.path.localeCompare(b.path));
|
|
39
|
+
for (const shard of ordered) {
|
|
40
|
+
for (const node of shard.nodes) {
|
|
41
|
+
if (node.kind !== 'symbol')
|
|
42
|
+
continue;
|
|
43
|
+
const entry = {
|
|
44
|
+
node_id: node.id,
|
|
45
|
+
name: node.name,
|
|
46
|
+
kind: node.kind,
|
|
47
|
+
subtype: node.subtype ?? null,
|
|
48
|
+
path: node.path,
|
|
49
|
+
file_id: shard.file_id,
|
|
50
|
+
score_hint: node.exported ? 1.0 : 0.8,
|
|
51
|
+
};
|
|
52
|
+
for (const token of tokenize(node.name)) {
|
|
53
|
+
(entries[token] ??= []).push(entry);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// Deterministic ordering within each token bucket.
|
|
58
|
+
for (const token of Object.keys(entries)) {
|
|
59
|
+
entries[token].sort((a, b) => a.path.localeCompare(b.path) ||
|
|
60
|
+
a.name.localeCompare(b.name) ||
|
|
61
|
+
a.node_id.localeCompare(b.node_id));
|
|
62
|
+
}
|
|
63
|
+
// Sort keys for byte-stable output.
|
|
64
|
+
const sortedEntries = Object.create(null);
|
|
65
|
+
for (const key of Object.keys(entries).sort())
|
|
66
|
+
sortedEntries[key] = entries[key];
|
|
67
|
+
return {
|
|
68
|
+
schema_version: CODE_MAP_SCHEMA_VERSION,
|
|
69
|
+
project_id: projectId,
|
|
70
|
+
updated_at: new Date().toISOString(),
|
|
71
|
+
extractor_version: extractorVersion,
|
|
72
|
+
entries: sortedEntries,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
export function buildImportsIndex(projectId, shards) {
|
|
76
|
+
// module specifier -> (path -> entry)
|
|
77
|
+
const byModule = new Map();
|
|
78
|
+
const ordered = [...shards].sort((a, b) => a.path.localeCompare(b.path));
|
|
79
|
+
for (const shard of ordered) {
|
|
80
|
+
for (const node of shard.nodes) {
|
|
81
|
+
if (node.kind !== 'module')
|
|
82
|
+
continue;
|
|
83
|
+
const module = node.name;
|
|
84
|
+
const perPath = byModule.get(module) ?? new Map();
|
|
85
|
+
const entry = perPath.get(shard.path) ?? {
|
|
86
|
+
path: shard.path,
|
|
87
|
+
file_id: shard.file_id,
|
|
88
|
+
imported: [],
|
|
89
|
+
};
|
|
90
|
+
// Merge imported bindings across multiple imports of the same module in one
|
|
91
|
+
// file, deduped + sorted for byte-stable output (spec §5.7 imported[]).
|
|
92
|
+
const merged = new Set(entry.imported);
|
|
93
|
+
for (const name of node.imported_names ?? [])
|
|
94
|
+
merged.add(name);
|
|
95
|
+
entry.imported = [...merged].sort();
|
|
96
|
+
perPath.set(shard.path, entry);
|
|
97
|
+
byModule.set(module, perPath);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
const entries = Object.create(null);
|
|
101
|
+
for (const key of [...byModule.keys()].sort()) {
|
|
102
|
+
const list = [...byModule.get(key).values()].sort((a, b) => a.path.localeCompare(b.path));
|
|
103
|
+
entries[key] = list;
|
|
104
|
+
}
|
|
105
|
+
return {
|
|
106
|
+
schema_version: CODE_MAP_SCHEMA_VERSION,
|
|
107
|
+
project_id: projectId,
|
|
108
|
+
updated_at: new Date().toISOString(),
|
|
109
|
+
entries,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* P1d reverse-dependency index: "who imports this target". Derived from the P1c
|
|
114
|
+
* resolution edges that live on each IMPORTER's shard:
|
|
115
|
+
* - `resolves_to` (module → target FILE node) → `dependents_by_file[targetPath]`
|
|
116
|
+
* - `imports_symbol` (module → target SYMBOL node) → `dependents_by_symbol[nodeId]`
|
|
117
|
+
*
|
|
118
|
+
* `resolves_to.to` is a file NODE ID (not a path), so we invert fileNodeId→path by
|
|
119
|
+
* computing the id for every indexed shard (Codex review). One entry per (target,
|
|
120
|
+
* importer file): multiple module nodes in one importer that hit the same target are
|
|
121
|
+
* merged (imported names unioned, strongest confidence, lexicographically-smallest
|
|
122
|
+
* specifier) for byte-stable output. Deterministic key + array ordering.
|
|
123
|
+
*/
|
|
124
|
+
export function buildResolutionIndex(projectId, shards) {
|
|
125
|
+
// Invert file-node id → path so reverse resolves_to can be keyed by target path.
|
|
126
|
+
const fileNodeIdToPath = new Map();
|
|
127
|
+
for (const shard of shards) {
|
|
128
|
+
fileNodeIdToPath.set(fileNodeId(projectId, shard.path, shard.lang), shard.path);
|
|
129
|
+
}
|
|
130
|
+
// target key -> (importer path -> merged entry)
|
|
131
|
+
const byFile = new Map();
|
|
132
|
+
const bySymbol = new Map();
|
|
133
|
+
const addDependent = (bucket, targetKey, importerPath, importerFileId, module, imported, confidence) => {
|
|
134
|
+
const perImporter = bucket.get(targetKey) ?? new Map();
|
|
135
|
+
const prev = perImporter.get(importerPath);
|
|
136
|
+
if (!prev) {
|
|
137
|
+
perImporter.set(importerPath, {
|
|
138
|
+
path: importerPath,
|
|
139
|
+
file_id: importerFileId,
|
|
140
|
+
module,
|
|
141
|
+
imported: [...new Set(imported)].sort(),
|
|
142
|
+
confidence,
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
// Merge a second edge from the same importer to the same target.
|
|
147
|
+
const mergedNames = new Set(prev.imported);
|
|
148
|
+
for (const n of imported)
|
|
149
|
+
mergedNames.add(n);
|
|
150
|
+
prev.imported = [...mergedNames].sort();
|
|
151
|
+
if (module && (!prev.module || module < prev.module))
|
|
152
|
+
prev.module = module; // smallest spec, stable
|
|
153
|
+
if (typeof confidence === 'number') {
|
|
154
|
+
prev.confidence = typeof prev.confidence === 'number' ? Math.max(prev.confidence, confidence) : confidence;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
bucket.set(targetKey, perImporter);
|
|
158
|
+
};
|
|
159
|
+
const ordered = [...shards].sort((a, b) => a.path.localeCompare(b.path));
|
|
160
|
+
for (const shard of ordered) {
|
|
161
|
+
// module node id -> its specifier + imported names (for reason metadata).
|
|
162
|
+
const moduleById = new Map();
|
|
163
|
+
for (const n of shard.nodes) {
|
|
164
|
+
if (n.kind === 'module')
|
|
165
|
+
moduleById.set(n.id, { name: n.name, imported: n.imported_names ?? [] });
|
|
166
|
+
}
|
|
167
|
+
for (const e of shard.edges) {
|
|
168
|
+
if (e.kind !== 'resolves_to' && e.kind !== 'imports_symbol')
|
|
169
|
+
continue;
|
|
170
|
+
const mod = moduleById.get(e.from);
|
|
171
|
+
if (e.kind === 'resolves_to') {
|
|
172
|
+
const targetPath = fileNodeIdToPath.get(e.to);
|
|
173
|
+
if (!targetPath)
|
|
174
|
+
continue; // target id not an indexed file (defensive)
|
|
175
|
+
addDependent(byFile, targetPath, shard.path, shard.file_id, mod?.name, mod?.imported ?? [], e.confidence);
|
|
176
|
+
}
|
|
177
|
+
else {
|
|
178
|
+
addDependent(bySymbol, e.to, shard.path, shard.file_id, mod?.name, mod?.imported ?? [], e.confidence);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const finalize = (bucket) => {
|
|
183
|
+
const out = Object.create(null);
|
|
184
|
+
for (const key of [...bucket.keys()].sort()) {
|
|
185
|
+
out[key] = [...bucket.get(key).values()].sort((a, b) => a.path.localeCompare(b.path));
|
|
186
|
+
}
|
|
187
|
+
return out;
|
|
188
|
+
};
|
|
189
|
+
return {
|
|
190
|
+
schema_version: CODE_MAP_SCHEMA_VERSION,
|
|
191
|
+
project_id: projectId,
|
|
192
|
+
updated_at: new Date().toISOString(),
|
|
193
|
+
dependents_by_file: finalize(byFile),
|
|
194
|
+
dependents_by_symbol: finalize(bySymbol),
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
//# sourceMappingURL=indexes.js.map
|