arcscope 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -0
- package/dist/adoption/counter.js +23 -0
- package/dist/engine/discover.js +53 -0
- package/dist/engine/extract.js +71 -0
- package/dist/engine/glob.js +35 -0
- package/dist/engine/grammar-registry.js +89 -0
- package/dist/engine/import-graph.js +0 -0
- package/dist/engine/imports.js +89 -0
- package/dist/engine/index-store.js +196 -0
- package/dist/engine/module-resolver.js +134 -0
- package/dist/engine/ref-scan.js +88 -0
- package/dist/engine/types.js +2 -0
- package/dist/grammars/arcscope-extra-javascript.scm +9 -0
- package/dist/grammars/arcscope-extra-typescript.scm +23 -0
- package/dist/grammars/javascript-tags.scm +99 -0
- package/dist/grammars/tree-sitter-javascript.wasm +0 -0
- package/dist/grammars/tree-sitter-tsx.wasm +0 -0
- package/dist/grammars/tree-sitter-typescript.wasm +0 -0
- package/dist/grammars/typescript-tags.scm +23 -0
- package/dist/grammars/web-tree-sitter.wasm +0 -0
- package/dist/index.js +31 -0
- package/dist/init/init.js +66 -0
- package/dist/knowledge/drift.js +68 -0
- package/dist/knowledge/locator.js +24 -0
- package/dist/knowledge/resolver.js +45 -0
- package/dist/knowledge/types.js +2 -0
- package/dist/knowledge/vocab-loader.js +69 -0
- package/dist/log.js +9 -0
- package/dist/server/serve.js +179 -0
- package/dist/tools/arch-list.js +33 -0
- package/dist/tools/arch-query.js +81 -0
- package/dist/tools/dep-graph.js +0 -0
- package/dist/tools/find-def.js +46 -0
- package/dist/tools/find-refs.js +88 -0
- package/package.json +55 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ilie Danila
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# arcscope
|
|
2
|
+
|
|
3
|
+
A fully-local MCP server that gives an AI coding agent three stacked views of an unfamiliar codebase — symbols, the module/dependency graph, and a repo-declared **architecture vocabulary** answered *live* against current code — so it stops re-deriving structure with grep every session.
|
|
4
|
+
|
|
5
|
+
**Status:** design complete, pre-build. See the full spec: [`docs/arcscope-spec.html`](docs/arcscope-spec.html).
|
|
6
|
+
|
|
7
|
+
The wedge: where existing tools keep project knowledge as static prose that silently rots, arcscope binds each named concept to an executable locator that is recomputed on every query — so drift is a signal, not a surprise.
|
|
8
|
+
|
|
9
|
+
> Building with Claude Code? Start at [`CLAUDE.md`](CLAUDE.md).
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { appendFile, mkdir } from 'node:fs/promises';
|
|
2
|
+
import { dirname } from 'node:path';
|
|
3
|
+
import { logError } from '../log.js';
|
|
4
|
+
// Local-only invocation counter for the adoption gate. Appends one JSONL line per
|
|
5
|
+
// tool call to .arcscope/usage.jsonl so the grep-vs-tool ratio can be inspected
|
|
6
|
+
// during dogfooding. NEVER touches the network. Fire-and-forget: a failed write
|
|
7
|
+
// is logged to stderr and swallowed — instrumentation must never break the tool.
|
|
8
|
+
export class InvocationCounter {
|
|
9
|
+
file;
|
|
10
|
+
constructor(file) {
|
|
11
|
+
this.file = file;
|
|
12
|
+
}
|
|
13
|
+
async record(tool, args) {
|
|
14
|
+
const line = JSON.stringify({ ts: new Date().toISOString(), tool, args }) + '\n';
|
|
15
|
+
try {
|
|
16
|
+
await mkdir(dirname(this.file), { recursive: true });
|
|
17
|
+
await appendFile(this.file, line, 'utf8');
|
|
18
|
+
}
|
|
19
|
+
catch (err) {
|
|
20
|
+
logError('usage counter write failed (non-fatal):', err);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { execFileSync } from 'node:child_process';
|
|
2
|
+
import { readdirSync } from 'node:fs';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
// Source extensions arcscope parses in P0 (TS/JS family). .d.ts is excluded
|
|
5
|
+
// separately — declaration files hold no navigable definitions.
|
|
6
|
+
const SOURCE_RE = /\.(?:[cm]?ts|tsx|[cm]?js|jsx)$/;
|
|
7
|
+
const DECL_RE = /\.d\.[cm]?ts$/;
|
|
8
|
+
// Directories the non-git fallback walk never descends into.
|
|
9
|
+
const SKIP_DIRS = new Set(['node_modules', '.git', 'dist', 'build', 'out', 'coverage', '.arcscope']);
|
|
10
|
+
export function isSourceFile(name) {
|
|
11
|
+
return SOURCE_RE.test(name) && !DECL_RE.test(name);
|
|
12
|
+
}
|
|
13
|
+
// Returns absolute paths of source files under `root`.
|
|
14
|
+
//
|
|
15
|
+
// git-first: `git ls-files` (tracked + untracked-not-ignored) respects .gitignore
|
|
16
|
+
// for free, which is the single biggest correctness/perf lever — on a real repo it
|
|
17
|
+
// is the difference between ~1k real source files and ~18k including dist/ and
|
|
18
|
+
// tool caches. Falls back to a directory walk when `root` is not a git repo.
|
|
19
|
+
export function discoverFiles(root) {
|
|
20
|
+
return discoverViaGit(root) ?? walk(root, []);
|
|
21
|
+
}
|
|
22
|
+
function discoverViaGit(root) {
|
|
23
|
+
try {
|
|
24
|
+
const raw = execFileSync('git', ['-C', root, 'ls-files', '-z', '--cached', '--others', '--exclude-standard'], { encoding: 'utf8', maxBuffer: 64 * 1024 * 1024, stdio: ['ignore', 'pipe', 'ignore'] });
|
|
25
|
+
return raw
|
|
26
|
+
.split('\0')
|
|
27
|
+
.filter((rel) => rel.length > 0 && isSourceFile(rel))
|
|
28
|
+
.map((rel) => join(root, rel));
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
return null; // not a git repo / git unavailable -> walk
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
function walk(dir, out) {
|
|
35
|
+
let entries;
|
|
36
|
+
try {
|
|
37
|
+
entries = readdirSync(dir, { withFileTypes: true });
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
return out;
|
|
41
|
+
}
|
|
42
|
+
for (const e of entries) {
|
|
43
|
+
if (e.isDirectory()) {
|
|
44
|
+
if (SKIP_DIRS.has(e.name) || e.name.startsWith('.'))
|
|
45
|
+
continue;
|
|
46
|
+
walk(join(dir, e.name), out);
|
|
47
|
+
}
|
|
48
|
+
else if (e.isFile() && isSourceFile(e.name)) {
|
|
49
|
+
out.push(join(dir, e.name));
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return out;
|
|
53
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
const DEF_PREFIX = 'definition.';
|
|
2
|
+
const SIGNATURE_MAX = 200;
|
|
3
|
+
// Extract symbol definitions from an already-parsed tree via the grammar's tags
|
|
4
|
+
// query. The caller owns the tree (so definitions and imports can share one
|
|
5
|
+
// parse). Pairs each @definition.<kind> capture with its @name; dedupes by
|
|
6
|
+
// (symbol, line), preferring a specific kind over the generic 'constant'.
|
|
7
|
+
export function extractDefs(query, file, tree) {
|
|
8
|
+
const byKey = new Map();
|
|
9
|
+
for (const match of query.matches(tree.rootNode)) {
|
|
10
|
+
let defCap;
|
|
11
|
+
let nameCap;
|
|
12
|
+
for (const c of match.captures) {
|
|
13
|
+
if (c.name.startsWith(DEF_PREFIX))
|
|
14
|
+
defCap = c;
|
|
15
|
+
else if (c.name === 'name')
|
|
16
|
+
nameCap = c;
|
|
17
|
+
}
|
|
18
|
+
if (!defCap || !nameCap)
|
|
19
|
+
continue;
|
|
20
|
+
const symbol = nameCap.node.text;
|
|
21
|
+
// The JS tags exclude constructors via a predicate; web-tree-sitter does not
|
|
22
|
+
// auto-apply it, so honor the intent here.
|
|
23
|
+
if (symbol.length === 0 || symbol === 'constructor')
|
|
24
|
+
continue;
|
|
25
|
+
const kind = defCap.name.slice(DEF_PREFIX.length);
|
|
26
|
+
const line = nameCap.node.startPosition.row + 1;
|
|
27
|
+
const key = `${symbol}:${line}`;
|
|
28
|
+
// Keep the first match for a (symbol, line), but let a specific kind replace
|
|
29
|
+
// the generic 'constant' — an exported arrow function is a function, not a
|
|
30
|
+
// constant, and both patterns match it.
|
|
31
|
+
const existing = byKey.get(key);
|
|
32
|
+
const newIsMoreSpecific = existing?.kind === 'constant' && kind !== 'constant';
|
|
33
|
+
if (existing && !newIsMoreSpecific)
|
|
34
|
+
continue;
|
|
35
|
+
byKey.set(key, {
|
|
36
|
+
symbol,
|
|
37
|
+
kind,
|
|
38
|
+
file,
|
|
39
|
+
line,
|
|
40
|
+
signature: signatureOf(defCap.node),
|
|
41
|
+
precisionTier: 'tree-sitter',
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
return [...byKey.values()];
|
|
45
|
+
}
|
|
46
|
+
// Honest "signature": the definition's real header text with any block body
|
|
47
|
+
// stripped using the AST (the body node's start) — NOT a text search for `{`,
|
|
48
|
+
// which truncates generic constraints (`<T extends { id }>`), object-typed params
|
|
49
|
+
// (`(x: { a })`), and object const values (`= { a: 1 }`). Definitions with no
|
|
50
|
+
// block body (type aliases, ambient signatures, expression-bodied arrows,
|
|
51
|
+
// non-function consts) keep their full text. Whitespace is collapsed so a wrapped
|
|
52
|
+
// multi-line signature reads on one line; the body is never included.
|
|
53
|
+
function signatureOf(node) {
|
|
54
|
+
const bodyStart = bodyStartIndex(node);
|
|
55
|
+
const text = node.text;
|
|
56
|
+
// web-tree-sitter indices are UTF-16 code units, matching JS string indexing,
|
|
57
|
+
// so this slice is exact even with non-ASCII identifiers before the body.
|
|
58
|
+
const header = bodyStart === null ? text : text.slice(0, bodyStart - node.startIndex);
|
|
59
|
+
return header.replace(/\s+/g, ' ').trim().slice(0, SIGNATURE_MAX);
|
|
60
|
+
}
|
|
61
|
+
// Source index where the definition's block body begins, or null if it has none.
|
|
62
|
+
function bodyStartIndex(node) {
|
|
63
|
+
const body = node.childForFieldName('body');
|
|
64
|
+
if (body)
|
|
65
|
+
return body.startIndex;
|
|
66
|
+
// An arrow/function assigned to a variable_declarator carries its body on `value`.
|
|
67
|
+
const valueBody = node.childForFieldName('value')?.childForFieldName('body');
|
|
68
|
+
if (valueBody && valueBody.type === 'statement_block')
|
|
69
|
+
return valueBody.startIndex;
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// Minimal glob -> RegExp for scoping find_def results by path. Supports `**`
|
|
2
|
+
// (across path segments), `*` (within a segment) and `?`. Paths are posix-style.
|
|
3
|
+
// Intentionally NOT a full glob (no brace/extglob expansion) — sufficient for
|
|
4
|
+
// find_def's optional path scoping. A fuller matcher can replace this in P2 when
|
|
5
|
+
// vocabulary path-locators need richer semantics.
|
|
6
|
+
const REGEX_SPECIAL = /[.+^${}()|[\]\\]/g;
|
|
7
|
+
function globToRegExp(glob) {
|
|
8
|
+
let re = '';
|
|
9
|
+
for (let i = 0; i < glob.length; i++) {
|
|
10
|
+
const c = glob[i];
|
|
11
|
+
if (c === '*') {
|
|
12
|
+
if (glob[i + 1] === '*') {
|
|
13
|
+
// `**` matches across segments; consume a following slash so `**/x`
|
|
14
|
+
// matches `x` at the root too.
|
|
15
|
+
re += '.*';
|
|
16
|
+
i++;
|
|
17
|
+
if (glob[i + 1] === '/')
|
|
18
|
+
i++;
|
|
19
|
+
}
|
|
20
|
+
else {
|
|
21
|
+
re += '[^/]*';
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
else if (c === '?') {
|
|
25
|
+
re += '[^/]';
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
re += c.replace(REGEX_SPECIAL, '\\$&');
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return new RegExp('^' + re + '$');
|
|
32
|
+
}
|
|
33
|
+
export function matchGlob(path, glob) {
|
|
34
|
+
return globToRegExp(glob).test(path);
|
|
35
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { Parser, Language, Query } from 'web-tree-sitter';
|
|
2
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
import { dirname, join } from 'node:path';
|
|
5
|
+
// Each grammar's tags query is the upstream javascript tags ++ (for TS) the
|
|
6
|
+
// upstream typescript tags ++ arcscope's vetted additions (type aliases, enums,
|
|
7
|
+
// namespaces, exported consts — forms the stock tags omit). The TS grammar is a
|
|
8
|
+
// superset of JS, so the JS patterns compile and match against it; running the TS
|
|
9
|
+
// tags alone would miss most real definitions. Verified by the engine tests.
|
|
10
|
+
const GRAMMARS = {
|
|
11
|
+
typescript: {
|
|
12
|
+
wasm: 'tree-sitter-typescript.wasm',
|
|
13
|
+
queries: ['javascript-tags.scm', 'typescript-tags.scm', 'arcscope-extra-typescript.scm'],
|
|
14
|
+
},
|
|
15
|
+
tsx: {
|
|
16
|
+
wasm: 'tree-sitter-tsx.wasm',
|
|
17
|
+
queries: ['javascript-tags.scm', 'typescript-tags.scm', 'arcscope-extra-typescript.scm'],
|
|
18
|
+
},
|
|
19
|
+
javascript: {
|
|
20
|
+
wasm: 'tree-sitter-javascript.wasm',
|
|
21
|
+
queries: ['javascript-tags.scm', 'arcscope-extra-javascript.scm'],
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
const EXT_TO_GRAMMAR = {
|
|
25
|
+
'.ts': 'typescript',
|
|
26
|
+
'.mts': 'typescript',
|
|
27
|
+
'.cts': 'typescript',
|
|
28
|
+
'.tsx': 'tsx',
|
|
29
|
+
'.js': 'javascript',
|
|
30
|
+
'.jsx': 'javascript',
|
|
31
|
+
'.mjs': 'javascript',
|
|
32
|
+
'.cjs': 'javascript',
|
|
33
|
+
};
|
|
34
|
+
// The web-tree-sitter WASM runtime is a single global; init it once per process
|
|
35
|
+
// regardless of how many GrammarRegistry instances exist (tests create several).
|
|
36
|
+
let runtimeInit;
|
|
37
|
+
// Lazily loads WASM grammars + compiled tag queries from the vendored, offline
|
|
38
|
+
// grammar directory. Nothing here touches the network.
|
|
39
|
+
export class GrammarRegistry {
|
|
40
|
+
parser;
|
|
41
|
+
cache = new Map();
|
|
42
|
+
dir;
|
|
43
|
+
constructor(grammarsDir) {
|
|
44
|
+
this.dir = grammarsDir ?? resolveGrammarsDir();
|
|
45
|
+
}
|
|
46
|
+
grammarIdForExt(ext) {
|
|
47
|
+
return EXT_TO_GRAMMAR[ext];
|
|
48
|
+
}
|
|
49
|
+
async ensureInit() {
|
|
50
|
+
if (!runtimeInit) {
|
|
51
|
+
runtimeInit = Parser.init({ locateFile: (name) => join(this.dir, name) });
|
|
52
|
+
}
|
|
53
|
+
await runtimeInit;
|
|
54
|
+
if (!this.parser)
|
|
55
|
+
this.parser = new Parser();
|
|
56
|
+
return this.parser;
|
|
57
|
+
}
|
|
58
|
+
async getForExt(ext) {
|
|
59
|
+
const id = EXT_TO_GRAMMAR[ext];
|
|
60
|
+
return id ? this.getGrammar(id) : undefined;
|
|
61
|
+
}
|
|
62
|
+
async getGrammar(id) {
|
|
63
|
+
const cached = this.cache.get(id);
|
|
64
|
+
if (cached)
|
|
65
|
+
return cached;
|
|
66
|
+
await this.ensureInit();
|
|
67
|
+
const spec = GRAMMARS[id];
|
|
68
|
+
if (!spec)
|
|
69
|
+
throw new Error(`unknown grammar id: ${id}`);
|
|
70
|
+
const language = await Language.load(new Uint8Array(readFileSync(join(this.dir, spec.wasm))));
|
|
71
|
+
const source = spec.queries.map((q) => readFileSync(join(this.dir, q), 'utf8')).join('\n');
|
|
72
|
+
const loaded = { id, language, query: new Query(language, source) };
|
|
73
|
+
this.cache.set(id, loaded);
|
|
74
|
+
return loaded;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
// Grammars live in dist/grammars/ in the published package and vendor/grammars/
|
|
78
|
+
// in the source tree (tests run against the latter). Prefer the built location.
|
|
79
|
+
function resolveGrammarsDir() {
|
|
80
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
81
|
+
const dist = join(here, '..', 'grammars'); // dist/engine -> dist/grammars
|
|
82
|
+
if (existsSync(dist))
|
|
83
|
+
return dist;
|
|
84
|
+
const vendor = join(here, '..', '..', 'vendor', 'grammars'); // src/engine -> vendor/grammars
|
|
85
|
+
if (existsSync(vendor))
|
|
86
|
+
return vendor;
|
|
87
|
+
throw new Error('arcscope: grammar assets not found (looked in dist/grammars and vendor/grammars). ' +
|
|
88
|
+
'Run `npm run build`, or reinstall the package.');
|
|
89
|
+
}
|
|
Binary file
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// Extract import + re-export edges from an already-parsed tree by walking the
|
|
2
|
+
// top-level statements — import/re-export statements are always top-level in ESM,
|
|
3
|
+
// so this is cheap and complete (no full query needed). Verified against real
|
|
4
|
+
// barrels in the de-risking spike. The caller owns the tree.
|
|
5
|
+
export function extractImports(file, tree) {
|
|
6
|
+
const edges = [];
|
|
7
|
+
for (const node of tree.rootNode.namedChildren) {
|
|
8
|
+
if (node.type === 'import_statement') {
|
|
9
|
+
const edge = fromImport(file, node);
|
|
10
|
+
if (edge)
|
|
11
|
+
edges.push(edge);
|
|
12
|
+
}
|
|
13
|
+
else if (node.type === 'export_statement') {
|
|
14
|
+
const edge = fromReExport(file, node);
|
|
15
|
+
if (edge)
|
|
16
|
+
edges.push(edge);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return edges;
|
|
20
|
+
}
|
|
21
|
+
function specifierOf(node) {
|
|
22
|
+
const src = node.childForFieldName('source');
|
|
23
|
+
return src ? src.text.slice(1, -1) : null; // strip quotes
|
|
24
|
+
}
|
|
25
|
+
function fromImport(file, node) {
|
|
26
|
+
const specifier = specifierOf(node);
|
|
27
|
+
if (specifier === null)
|
|
28
|
+
return null;
|
|
29
|
+
const line = node.startPosition.row + 1;
|
|
30
|
+
const clause = node.namedChildren.find((c) => c.type === 'import_clause');
|
|
31
|
+
if (!clause)
|
|
32
|
+
return { file, specifier, kind: 'import', star: false, names: [], line }; // side-effect
|
|
33
|
+
const names = [];
|
|
34
|
+
let star = false;
|
|
35
|
+
for (const c of clause.namedChildren) {
|
|
36
|
+
if (c.type === 'identifier') {
|
|
37
|
+
names.push({ imported: 'default', local: c.text }); // default import
|
|
38
|
+
}
|
|
39
|
+
else if (c.type === 'namespace_import') {
|
|
40
|
+
star = true;
|
|
41
|
+
const id = c.namedChildren.find((x) => x.type === 'identifier');
|
|
42
|
+
if (id)
|
|
43
|
+
names.push({ imported: '*', local: id.text });
|
|
44
|
+
}
|
|
45
|
+
else if (c.type === 'named_imports') {
|
|
46
|
+
for (const s of c.namedChildren) {
|
|
47
|
+
if (s.type !== 'import_specifier')
|
|
48
|
+
continue;
|
|
49
|
+
const n = s.childForFieldName('name');
|
|
50
|
+
if (!n)
|
|
51
|
+
continue;
|
|
52
|
+
const a = s.childForFieldName('alias');
|
|
53
|
+
names.push({ imported: n.text, local: (a ?? n).text });
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return { file, specifier, kind: 'import', star, names, line };
|
|
58
|
+
}
|
|
59
|
+
function fromReExport(file, node) {
|
|
60
|
+
const specifier = specifierOf(node);
|
|
61
|
+
if (specifier === null)
|
|
62
|
+
return null; // a local `export` declaration, not a re-export edge
|
|
63
|
+
const line = node.startPosition.row + 1;
|
|
64
|
+
const clause = node.namedChildren.find((c) => c.type === 'export_clause');
|
|
65
|
+
if (clause) {
|
|
66
|
+
const names = [];
|
|
67
|
+
for (const s of clause.namedChildren) {
|
|
68
|
+
if (s.type !== 'export_specifier')
|
|
69
|
+
continue;
|
|
70
|
+
const n = s.childForFieldName('name');
|
|
71
|
+
if (!n)
|
|
72
|
+
continue;
|
|
73
|
+
const a = s.childForFieldName('alias');
|
|
74
|
+
names.push({ imported: n.text, local: (a ?? n).text }); // local = exported-as name
|
|
75
|
+
}
|
|
76
|
+
return { file, specifier, kind: 're-export', star: false, names, line };
|
|
77
|
+
}
|
|
78
|
+
// `export * from './m'` or `export * as Agg from './m'`
|
|
79
|
+
const ns = node.namedChildren.find((c) => c.type === 'namespace_export');
|
|
80
|
+
const alias = ns?.namedChildren.find((x) => x.type === 'identifier')?.text;
|
|
81
|
+
return {
|
|
82
|
+
file,
|
|
83
|
+
specifier,
|
|
84
|
+
kind: 're-export',
|
|
85
|
+
star: true,
|
|
86
|
+
names: alias ? [{ imported: '*', local: alias }] : [],
|
|
87
|
+
line,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
import { readFileSync, statSync } from 'node:fs';
|
|
2
|
+
import { extname, relative, sep } from 'node:path';
|
|
3
|
+
import { discoverFiles } from './discover.js';
|
|
4
|
+
import { extractDefs } from './extract.js';
|
|
5
|
+
import { extractImports } from './imports.js';
|
|
6
|
+
import { matchGlob } from './glob.js';
|
|
7
|
+
// The in-memory index: bare symbol name -> definitions across the repo. No
|
|
8
|
+
// persistent store, no file watcher (a deliberate slice cut). `sync()` is the
|
|
9
|
+
// single idempotent index lifecycle: the first call is a full build; later calls
|
|
10
|
+
// re-discover and re-extract only files whose mtime/size changed, and evict
|
|
11
|
+
// deleted ones. That's the lazy re-index — cheap because unchanged files are
|
|
12
|
+
// never re-read.
|
|
13
|
+
export class IndexStore {
|
|
14
|
+
root;
|
|
15
|
+
registry;
|
|
16
|
+
defs = new Map();
|
|
17
|
+
files = new Map(); // absolute path -> entry
|
|
18
|
+
edges = new Map(); // absolute path -> import/re-export edges
|
|
19
|
+
constructor(root, registry) {
|
|
20
|
+
this.root = root;
|
|
21
|
+
this.registry = registry;
|
|
22
|
+
}
|
|
23
|
+
get fileCount() {
|
|
24
|
+
return this.files.size;
|
|
25
|
+
}
|
|
26
|
+
get symbolCount() {
|
|
27
|
+
let n = 0;
|
|
28
|
+
for (const arr of this.defs.values())
|
|
29
|
+
n += arr.length;
|
|
30
|
+
return n;
|
|
31
|
+
}
|
|
32
|
+
get edgeCount() {
|
|
33
|
+
let n = 0;
|
|
34
|
+
for (const arr of this.edges.values())
|
|
35
|
+
n += arr.length;
|
|
36
|
+
return n;
|
|
37
|
+
}
|
|
38
|
+
// All definitions across the repo (used by the vocabulary resolver).
|
|
39
|
+
allDefs() {
|
|
40
|
+
const out = [];
|
|
41
|
+
for (const arr of this.defs.values())
|
|
42
|
+
out.push(...arr);
|
|
43
|
+
return out;
|
|
44
|
+
}
|
|
45
|
+
// All import/re-export edges across the repo (used by the import graph).
|
|
46
|
+
allEdges() {
|
|
47
|
+
const out = [];
|
|
48
|
+
for (const arr of this.edges.values())
|
|
49
|
+
out.push(...arr);
|
|
50
|
+
return out;
|
|
51
|
+
}
|
|
52
|
+
// The set of indexed files, relative-posix — used to confirm a resolved
|
|
53
|
+
// module specifier actually points at a file arcscope knows about.
|
|
54
|
+
relFileSet() {
|
|
55
|
+
const out = new Set();
|
|
56
|
+
for (const abs of this.files.keys())
|
|
57
|
+
out.add(this.relPath(abs));
|
|
58
|
+
return out;
|
|
59
|
+
}
|
|
60
|
+
async sync() {
|
|
61
|
+
const start = performance.now();
|
|
62
|
+
const current = discoverFiles(this.root);
|
|
63
|
+
const currentSet = new Set(current);
|
|
64
|
+
let removed = 0;
|
|
65
|
+
for (const abs of [...this.files.keys()]) {
|
|
66
|
+
if (!currentSet.has(abs)) {
|
|
67
|
+
this.evict(abs);
|
|
68
|
+
removed++;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
let changed = 0;
|
|
72
|
+
for (const abs of current) {
|
|
73
|
+
let st;
|
|
74
|
+
try {
|
|
75
|
+
st = statSync(abs);
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
const prev = this.files.get(abs);
|
|
81
|
+
if (prev && prev.mtimeMs === st.mtimeMs && prev.size === st.size)
|
|
82
|
+
continue;
|
|
83
|
+
await this.indexFile(abs, st.mtimeMs, st.size);
|
|
84
|
+
changed++;
|
|
85
|
+
}
|
|
86
|
+
return {
|
|
87
|
+
fileCount: this.files.size,
|
|
88
|
+
symbolCount: this.symbolCount,
|
|
89
|
+
changed,
|
|
90
|
+
removed,
|
|
91
|
+
elapsedMs: Math.round(performance.now() - start),
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
find(symbol, pathGlob) {
|
|
95
|
+
const arr = this.defs.get(symbol) ?? [];
|
|
96
|
+
const res = pathGlob ? arr.filter((r) => matchGlob(r.file, pathGlob)) : arr;
|
|
97
|
+
return [...res].sort(byFileLine);
|
|
98
|
+
}
|
|
99
|
+
// Approximate name matches, used only when an exact find() returns nothing — so
|
|
100
|
+
// the agent doesn't have to guess the precise symbol name. Returns one
|
|
101
|
+
// representative definition per similarly-named symbol (case-insensitive),
|
|
102
|
+
// ranked: prefix match, then substring, then the query containing the name.
|
|
103
|
+
// Matching is by NAME only; the returned locations are still exact.
|
|
104
|
+
findFuzzy(symbol, pathGlob, limit = 12) {
|
|
105
|
+
const q = symbol.toLowerCase();
|
|
106
|
+
const matches = [];
|
|
107
|
+
for (const [name, recs] of this.defs) {
|
|
108
|
+
const lower = name.toLowerCase();
|
|
109
|
+
if (lower === q)
|
|
110
|
+
continue; // exact is handled by find()
|
|
111
|
+
let score;
|
|
112
|
+
if (lower.startsWith(q))
|
|
113
|
+
score = 0;
|
|
114
|
+
else if (lower.includes(q))
|
|
115
|
+
score = 1;
|
|
116
|
+
// The weakest signal — the query contains the symbol's name — only counts for
|
|
117
|
+
// substantial names, else tiny symbols like `y`/`ry` pollute every query.
|
|
118
|
+
else if (lower.length >= 4 && q.includes(lower))
|
|
119
|
+
score = 2;
|
|
120
|
+
else
|
|
121
|
+
continue;
|
|
122
|
+
const inScope = pathGlob ? recs.filter((r) => matchGlob(r.file, pathGlob)) : recs;
|
|
123
|
+
if (inScope.length === 0)
|
|
124
|
+
continue;
|
|
125
|
+
matches.push({ rec: [...inScope].sort(byFileLine)[0], score });
|
|
126
|
+
}
|
|
127
|
+
matches.sort((a, b) => a.score - b.score ||
|
|
128
|
+
a.rec.symbol.length - b.rec.symbol.length ||
|
|
129
|
+
(a.rec.symbol < b.rec.symbol ? -1 : 1));
|
|
130
|
+
return matches.slice(0, limit).map((m) => m.rec);
|
|
131
|
+
}
|
|
132
|
+
async indexFile(abs, mtimeMs, size) {
|
|
133
|
+
this.evict(abs);
|
|
134
|
+
const grammar = await this.registry.getForExt(extname(abs));
|
|
135
|
+
if (!grammar)
|
|
136
|
+
return; // unsupported extension (shouldn't happen after discovery filter)
|
|
137
|
+
let source;
|
|
138
|
+
try {
|
|
139
|
+
source = readFileSync(abs, 'utf8');
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
const parser = await this.registry.ensureInit();
|
|
145
|
+
parser.setLanguage(grammar.language);
|
|
146
|
+
const tree = parser.parse(source);
|
|
147
|
+
if (!tree)
|
|
148
|
+
return;
|
|
149
|
+
const rel = this.relPath(abs);
|
|
150
|
+
try {
|
|
151
|
+
const records = extractDefs(grammar.query, rel, tree);
|
|
152
|
+
const fileEdges = extractImports(rel, tree);
|
|
153
|
+
const symbols = new Set();
|
|
154
|
+
for (const r of records) {
|
|
155
|
+
let arr = this.defs.get(r.symbol);
|
|
156
|
+
if (!arr) {
|
|
157
|
+
arr = [];
|
|
158
|
+
this.defs.set(r.symbol, arr);
|
|
159
|
+
}
|
|
160
|
+
arr.push(r);
|
|
161
|
+
symbols.add(r.symbol);
|
|
162
|
+
}
|
|
163
|
+
if (fileEdges.length > 0)
|
|
164
|
+
this.edges.set(abs, fileEdges);
|
|
165
|
+
this.files.set(abs, { mtimeMs, size, symbols: [...symbols] });
|
|
166
|
+
}
|
|
167
|
+
finally {
|
|
168
|
+
tree.delete();
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
evict(abs) {
|
|
172
|
+
const entry = this.files.get(abs);
|
|
173
|
+
if (!entry)
|
|
174
|
+
return;
|
|
175
|
+
const rel = this.relPath(abs);
|
|
176
|
+
for (const sym of entry.symbols) {
|
|
177
|
+
const arr = this.defs.get(sym);
|
|
178
|
+
if (!arr)
|
|
179
|
+
continue;
|
|
180
|
+
const kept = arr.filter((r) => r.file !== rel);
|
|
181
|
+
if (kept.length)
|
|
182
|
+
this.defs.set(sym, kept);
|
|
183
|
+
else
|
|
184
|
+
this.defs.delete(sym);
|
|
185
|
+
}
|
|
186
|
+
this.edges.delete(abs);
|
|
187
|
+
this.files.delete(abs);
|
|
188
|
+
}
|
|
189
|
+
relPath(abs) {
|
|
190
|
+
const rel = relative(this.root, abs);
|
|
191
|
+
return sep === '/' ? rel : rel.split(sep).join('/');
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
function byFileLine(a, b) {
|
|
195
|
+
return a.file === b.file ? a.line - b.line : a.file < b.file ? -1 : 1;
|
|
196
|
+
}
|