universal-ast-mapper 1.24.0 → 1.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -1
- package/README.md +26 -2
- package/dist/cli.js +38 -0
- package/dist/explorer.js +25 -8
- package/dist/index.js +45 -0
- package/dist/semantic.js +365 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,39 @@ since 1.0.0, guarantees a stable MCP tool / CLI surface across the 1.x line.
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [1.26.0] — 2026-06-11 · Coupling overlay in the explorer
|
|
10
|
+
- **`ast-map explore` color modes** — new toolbar dropdown: `color: folder`
|
|
11
|
+
(existing per-directory hues) or **`color: coupling`** — nodes shaded by
|
|
12
|
+
**instability** I = Ce/(Ca+Ce) on a green (0, stable) → yellow → red
|
|
13
|
+
(1, volatile) scale; orphan files stay gray.
|
|
14
|
+
- **Legend** (bottom-left, shown in coupling mode) explains the scale; the hover
|
|
15
|
+
tooltip and the detail sidebar now show **Ca / Ce / I** per file.
|
|
16
|
+
- Explorer nodes carry `ca` / `ce` / `inst` computed from the deduped file-level
|
|
17
|
+
import edges — same definition as `get_coupling` (Robert C. Martin metrics).
|
|
18
|
+
- Still a single self-contained HTML file, dark-mode aware, zero dependencies.
|
|
19
|
+
- Tests: +5 checks in `test/analysis.mjs` (144 total).
|
|
20
|
+
|
|
21
|
+
## [1.25.0] — 2026-06-11 · Semantic symbol search
|
|
22
|
+
- **New MCP tool `semantic_search`** + **CLI `ast-map find <query> [dir]`** — find
|
|
23
|
+
symbols by *meaning*, not exact name: "remove expired cache entries" →
|
|
24
|
+
`clearDiskCache`, "find unused exported code" → `findDeadExports`.
|
|
25
|
+
- Pure lexical semantics — **no embeddings, no network, no model downloads**:
|
|
26
|
+
- **Identifier tokenization**: camelCase / PascalCase / snake_case / kebab-case /
|
|
27
|
+
digit and acronym boundaries (`getHTTPServerByID` → `get http server by id`).
|
|
28
|
+
- **Programming thesaurus**: 60 synonym groups (`fetch≈get≈load≈retrieve`,
|
|
29
|
+
`remove≈delete≈clear`, `unused≈dead`, `auth≈login≈session`, …).
|
|
30
|
+
- **Light stemming** (plural/gerund/past: `users`→`user`) + **fuzzy matching**
|
|
31
|
+
(edit distance ≤ 1 on tokens ≥ 4 chars).
|
|
32
|
+
- **BM25-style ranking**: corpus IDF (rare tokens weigh more), field weights
|
|
33
|
+
(name 3× > doc 2× > signature 1.5× > path/kind 1×), match-type weights
|
|
34
|
+
(direct > synonym > fuzzy), coverage bonus, and length normalization so
|
|
35
|
+
focused names (`login`) outrank composites (`handleLogin`).
|
|
36
|
+
- Results include a normalized `score` (0–1) and `matchedTerms` explaining each hit
|
|
37
|
+
(`unused≈dead` = synonym, `cach~cache` = fuzzy).
|
|
38
|
+
- Options: `limit` (default 20), `kind` filter, `exportedOnly`.
|
|
39
|
+
- New module `semantic` (`semanticSearch`, `splitIdentifier`, `stem`). Tests: +8
|
|
40
|
+
checks in `test/analysis.mjs` (139 total). **29 MCP tools / 31 CLI commands.**
|
|
41
|
+
|
|
9
42
|
## [1.24.0] — 2026-06-10 · TS path-alias resolution
|
|
10
43
|
- Bare imports like `@/components/Button` now resolve through **`tsconfig.json` /
|
|
11
44
|
`jsconfig.json` `compilerOptions.paths`** (+ `baseUrl`): nearest-config lookup above
|
|
@@ -232,4 +265,36 @@ since 1.0.0, guarantees a stable MCP tool / CLI surface across the 1.x line.
|
|
|
232
265
|
declared in 2+ files.
|
|
233
266
|
|
|
234
267
|
## [0.8.3] — 2026-05-31 · TSX/React component props
|
|
235
|
-
- Component
|
|
268
|
+
- Component symbols carry `propsType` + `props[]`; detects `React.FC<P>` and
|
|
269
|
+
JSX-returning PascalCase functions. MCP server version now read from package.json.
|
|
270
|
+
|
|
271
|
+
## [0.8.2] — 2026-05-30 · Swift cross-file wiring
|
|
272
|
+
- `import <Module>` → that module's files (`Sources/<Module>/`). Completes
|
|
273
|
+
cross-file graph/resolver support for all four v0.8.0 languages.
|
|
274
|
+
|
|
275
|
+
## [0.8.1] — 2026-05-30 · Kotlin + C/C++ cross-file wiring
|
|
276
|
+
- Kotlin FQCN/package index; C/C++ `#include` resolution with header↔impl pairing.
|
|
277
|
+
- Fixes: parse-cache rel-path leak; Kotlin call-graph extraction.
|
|
278
|
+
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
## Earlier (pre-session history)
|
|
282
|
+
|
|
283
|
+
- **0.8.0** — +4 languages: C · C++ · Kotlin · Swift (symbol extraction + imports).
|
|
284
|
+
- **0.7.0** — Go full module resolution; C# reverse `calledBy`; 4-suite test harness.
|
|
285
|
+
- **0.6.0** — +3 languages: Rust · Java · C#; cross-language resolver.
|
|
286
|
+
- **0.5.x** — `/ast-map` skill auto-install; iterative DFS; barrel re-exports; parse cache; call-graph aliases; `.ast-map.config.json`.
|
|
287
|
+
- **0.4.0** — `search_symbol`, `get_file_deps`, `get_top_symbols`, dead-code tiers.
|
|
288
|
+
- **0.3.0** — CLI; `find_dead_code`, `find_circular_deps`, `get_change_impact`, `get_call_graph`.
|
|
289
|
+
- **0.2.0** — import extraction; `resolve_imports`; `build_symbol_graph`.
|
|
290
|
+
- **0.1.0** — `get_skeleton_json`, `generate_skeleton`, `get_symbol_context`, `validate_architecture`.
|
|
291
|
+
|
|
292
|
+
[1.13.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.13.0
|
|
293
|
+
[1.12.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.12.0
|
|
294
|
+
[1.11.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.11.0
|
|
295
|
+
[1.10.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.10.0
|
|
296
|
+
[1.9.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.9.0
|
|
297
|
+
[1.8.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.8.0
|
|
298
|
+
[1.7.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.7.0
|
|
299
|
+
[1.6.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.6.0
|
|
300
|
+
[1.5.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.5.0
|
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@ An **MCP server + CLI tool** that turns source code into structured, machine-rea
|
|
|
4
4
|
|
|
5
5
|
Built on [tree-sitter](https://tree-sitter.github.io/) WASM grammars. Zero regex guessing — real AST parsing.
|
|
6
6
|
|
|
7
|
-
**
|
|
7
|
+
**29 MCP tools / 31 CLI commands / 5 MCP prompts** spanning skeletons, dependency graphs, and deep analysis — dead code, cycles, change-impact, complexity, duplicates, unused params, type-flow, decorators — plus monorepo support, an interactive **graph explorer** with a **coupling overlay** (`ast-map explore`), **watch mode**, a one-page **health dashboard** (`ast-map report`), a **persistent parse cache + parallel parsing** (warm re-scans skip parsing entirely), and a **CI quality gate** (`ast-map check`, baseline ratchet).
|
|
8
8
|
|
|
9
9
|
**Supported languages:** TypeScript · TSX · JavaScript (ESM/CJS) · Python · Go · Rust · Java · C# · C · C++ · Kotlin · Swift · Vue · Svelte (SFC `<script>`) · **PHP** · **Ruby**
|
|
10
10
|
|
|
@@ -127,6 +127,7 @@ ast-map modules [dir] # directory-level coupling + ed
|
|
|
127
127
|
ast-map cache [stats|clear] # persistent parse cache (.ast-map/cache)
|
|
128
128
|
ast-map check [dir] [--update-baseline] [--min-score N] [--max-cycles N] ...
|
|
129
129
|
ast-map search <pattern> [dir] [-m contains|exact|regex] [-k kind] [-e]
|
|
130
|
+
ast-map find <query> [dir] [-l N] [-k kind] [-e] # semantic: by meaning
|
|
130
131
|
ast-map deps <file> [--scan <dir>]
|
|
131
132
|
ast-map top <dir> [-n 10]
|
|
132
133
|
ast-map impact <file> <symbol> [--scan <dir>]
|
|
@@ -155,6 +156,9 @@ ast-map validate src/ --max-lines 300 --max-imports 20
|
|
|
155
156
|
# Find all symbols named like "handler" across the project
|
|
156
157
|
ast-map search handler src/ --exported
|
|
157
158
|
|
|
159
|
+
# Don't know the name? Search by meaning
|
|
160
|
+
ast-map find "remove expired cache entries" src/
|
|
161
|
+
|
|
158
162
|
# What does this file import / what imports it?
|
|
159
163
|
ast-map deps src/lib/auth.ts --scan src/
|
|
160
164
|
|
|
@@ -514,6 +518,21 @@ Find symbols by name across all source files in a directory.
|
|
|
514
518
|
|
|
515
519
|
---
|
|
516
520
|
|
|
521
|
+
### `semantic_search`
|
|
522
|
+
Find symbols by **meaning**, not exact name — for when you know what the code *does* but not what it's called.
|
|
523
|
+
|
|
524
|
+
No embeddings, no network: identifier tokenization (camelCase / snake_case / acronyms), a built-in programming thesaurus (`fetch≈get≈load`, `remove≈delete≈clear`, `unused≈dead`, …), light stemming, fuzzy matching, and BM25-style IDF ranking over symbol names, doc comments, signatures and file paths. Results carry a normalized `score` and `matchedTerms` explaining *why* each symbol matched.
|
|
525
|
+
|
|
526
|
+
```
|
|
527
|
+
semantic_search("find unused exported code") →
|
|
528
|
+
1.000 findDeadExports (find, unused≈dead, export)
|
|
529
|
+
0.557 DeadExport (unused≈dead, export)
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
**Params:** `path`, `query`, `limit` (default 20), `kind`, `exportedOnly`
|
|
533
|
+
|
|
534
|
+
---
|
|
535
|
+
|
|
517
536
|
### `get_file_deps`
|
|
518
537
|
For a single file, show what it imports and what imports it (with symbol names).
|
|
519
538
|
More focused than `build_symbol_graph` — use for quick dependency lookup.
|
|
@@ -801,6 +820,8 @@ Not part of the public API: the internal `src/` module layout and the generated
|
|
|
801
820
|
|
|
802
821
|
| Version | What changed |
|
|
803
822
|
|---------|--------------|
|
|
823
|
+
| **1.26.0** | **Coupling overlay in the explorer** — `ast-map explore` gains a `color: coupling` mode: nodes shaded by **instability** I = Ce/(Ca+Ce) on a green (stable) → red (volatile) scale, with a legend, and Ca / Ce / I readouts in the hover tooltip and detail sidebar. Spot load-bearing files and volatile hotspots at a glance. |
|
|
824
|
+
| **1.25.0** | **Semantic symbol search** — new MCP tool `semantic_search` + CLI `ast-map find <query>`: find symbols by *meaning* ("remove expired sessions" → `clearDiskCache`). Identifier tokenization + 60-group programming thesaurus + stemming + fuzzy matching + BM25-style IDF ranking over names, docs, signatures and paths. No embeddings, no network. (**29 tools / 31 commands**) |
|
|
804
825
|
| **1.24.0** | **TS path-alias resolution** — bare imports like `@/components/Button` now resolve via the **nearest** `tsconfig.json`/`jsconfig.json` (`compilerOptions.paths` + `baseUrl`, relative `extends` chains, longest-prefix matching, string-aware JSONC parser). Wired into `resolve_imports`, the symbol graph, and the call graph — on a real Next.js app this took the import graph from 31 to **324 edges** and cut false dead-exports by ~30%. |
|
|
805
826
|
| **1.23.0** | **Configurable root boundary** — `AST_MAP_ROOT` accepts **multiple roots** (path-delimiter separated) and `AST_MAP_UNLOCKED=1` allows analyzing **any absolute path** on request (default stays locked). Analysis/graph/report rel-paths now computed against the matched root, so cross-root results are correct. New `roots` module + 13-check test suite. |
|
|
806
827
|
| **1.22.0** | **PHP & Ruby support** — `.php` (classes, interfaces, traits, enums, methods with visibility, `use` imports incl. grouped, require/include) and `.rb`/`.rake` (classes, modules, methods, `self.` singleton methods, `private` section tracking, require/require_relative). Unblocked by upgrading `web-tree-sitter` 0.20.8 → 0.21.0 (all existing grammars re-verified). **16 languages**. |
|
|
@@ -834,4 +855,7 @@ Not part of the public API: the internal `src/` module layout and the generated
|
|
|
834
855
|
| **0.9.0** | **Scoped type-flow tracing** — new `trace_type` MCP tool + `ast-map trace-type` (alias `flow`) CLI: follow a named type through function params, return types, typed variables, and class fields across a directory. Completes the deeper-analysis suite (dead code · cycles · impact · complexity · duplicates · unused params · type flow). **18 MCP tools**. |
|
|
835
856
|
| **0.8.7** | **Python decorators in the call graph** — function/method symbols now carry a `decorators` field (`@router.get("/x")` → `router.get("/x")`), surfaced in skeletons (outline + full) and in `get_call_graph`. Traces framework wiring like FastAPI/Flask routes and `@staticmethod`/`@property` stacks to their handler. |
|
|
836
857
|
| **0.8.6** | **Unused parameter detection** — new `find_unused_params` MCP tool + `ast-map unused-params` (alias `unused`) CLI: named functions whose params are never referenced. Skips `_`-prefixed/destructured/anonymous and treats object-shorthand as a use (low false-positive). Server now 17 tools. |
|
|
837
|
-
| **0.8.5** | **Cyclomatic complexity** — new `get_complexity` MCP tool + `ast-map
|
|
858
|
+
| **0.8.5** | **Cyclomatic complexity** — new `get_complexity` MCP tool + `ast-map complexity` (alias `cx`) CLI: per-function cyclomatic complexity with low/moderate/high/very-high ratings, file or directory scope. |
|
|
859
|
+
| **0.8.4** | **Duplicate symbol detection** — `find_duplicate_symbols` / `ast-map duplicates` (alias `dupes`): symbol names exported from more than one file. |
|
|
860
|
+
| **0.8.1–0.8.3** | Kotlin + C/C++ cross-file wiring · Swift module resolution (`Sources/<Module>/`) · TSX/React component props (`propsType` + `props[]`, `React.FC<P>` detection). |
|
|
861
|
+
| **0.1.0–0.8.0** | Foundation: skeleton extraction (`get_skeleton_json`, `generate_skeleton`, `get_symbol_context`, `validate_architecture`) · import resolution + symbol graph · dead code / cycles / impact / call graph · CLI · 12 languages (+Rust · Java · C# · Go · C · C++ · Kotlin · Swift) · `/ast-map` skill auto-install · barrel re-exports · parse cache. |
|
package/dist/cli.js
CHANGED
|
@@ -27,6 +27,7 @@ import { findLayerViolations } from "./layers.js";
|
|
|
27
27
|
import { computeModuleCoupling } from "./modulecoupling.js";
|
|
28
28
|
import { buildCallGraph } from "./callgraph.js";
|
|
29
29
|
import { searchSymbols } from "./search.js";
|
|
30
|
+
import { semanticSearch } from "./semantic.js";
|
|
30
31
|
import { parseRootsFromEnv } from "./roots.js";
|
|
31
32
|
const ROOT = parseRootsFromEnv().roots[0]; // CLI is local — no boundary, primary root only
|
|
32
33
|
// Persistent parse cache (disable with AST_MAP_NO_CACHE=1 or "cache": false in config).
|
|
@@ -1092,6 +1093,43 @@ program
|
|
|
1092
1093
|
}
|
|
1093
1094
|
console.log();
|
|
1094
1095
|
});
|
|
1096
|
+
// ─── Command: find (semantic search) ─────────────────────────────────────────
|
|
1097
|
+
program
|
|
1098
|
+
.command("find <query> [dir]")
|
|
1099
|
+
.description("Semantic symbol search — find symbols by meaning, not exact name")
|
|
1100
|
+
.option("-l, --limit <n>", "Max results (default 20)", "20")
|
|
1101
|
+
.option("-k, --kind <kind>", "Filter by kind: function, class, interface, type, method, const…")
|
|
1102
|
+
.option("-e, --exported", "Only show exported symbols")
|
|
1103
|
+
.option("--json", "Output as JSON")
|
|
1104
|
+
.action(async (query, dir, opts) => {
|
|
1105
|
+
const searchDir = dir ?? ".";
|
|
1106
|
+
const { abs, rel } = resolveArg(searchDir);
|
|
1107
|
+
if (!fs.statSync(abs).isDirectory())
|
|
1108
|
+
die(`"${rel}" is not a directory`);
|
|
1109
|
+
const limit = Math.max(1, parseInt(opts.limit ?? "20", 10) || 20);
|
|
1110
|
+
const matches = await semanticSearch(abs, query, ROOT, {
|
|
1111
|
+
limit,
|
|
1112
|
+
kind: opts.kind,
|
|
1113
|
+
exportedOnly: opts.exported,
|
|
1114
|
+
});
|
|
1115
|
+
if (opts.json)
|
|
1116
|
+
return jsonOut({ directory: rel, query, matchCount: matches.length, matches });
|
|
1117
|
+
header(`Semantic Search — ${bold(`"${query}"`)} in ${rel}/`);
|
|
1118
|
+
if (matches.length === 0) {
|
|
1119
|
+
console.log(indent(dim("No matches found.")));
|
|
1120
|
+
}
|
|
1121
|
+
else {
|
|
1122
|
+
table(matches.map(m => [
|
|
1123
|
+
m.score.toFixed(3),
|
|
1124
|
+
m.file,
|
|
1125
|
+
m.symbol,
|
|
1126
|
+
m.kind,
|
|
1127
|
+
m.matchedTerms.slice(0, 4).join(", "),
|
|
1128
|
+
]), [["Score", 6], ["File", 34], ["Symbol", 26], ["Kind", 10], ["Matched", 30]]);
|
|
1129
|
+
console.log(`\n ${matches.length} match(es)`);
|
|
1130
|
+
}
|
|
1131
|
+
console.log();
|
|
1132
|
+
});
|
|
1095
1133
|
// ─── Command: deps ────────────────────────────────────────────────────────────
|
|
1096
1134
|
program
|
|
1097
1135
|
.command("deps <file>")
|
package/dist/explorer.js
CHANGED
|
@@ -20,7 +20,7 @@ function deriveFileGraph(graph) {
|
|
|
20
20
|
continue;
|
|
21
21
|
const f = n;
|
|
22
22
|
const parts = f.id.split("/");
|
|
23
|
-
nodes.push({ id: f.id, symbols: f.symbolCount, group: parts.length > 1 ? parts[0] : "(root)", lang: f.language, syms: fileSyms.get(f.id) ?? [] });
|
|
23
|
+
nodes.push({ id: f.id, symbols: f.symbolCount, group: parts.length > 1 ? parts[0] : "(root)", lang: f.language, syms: fileSyms.get(f.id) ?? [], ca: 0, ce: 0, inst: 0 });
|
|
24
24
|
}
|
|
25
25
|
const seen = new Set();
|
|
26
26
|
const links = [];
|
|
@@ -37,6 +37,18 @@ function deriveFileGraph(graph) {
|
|
|
37
37
|
seen.add(key);
|
|
38
38
|
links.push({ source: e.from, target: toFile });
|
|
39
39
|
}
|
|
40
|
+
// Per-file coupling (Ca = fan-in, Ce = fan-out, I = Ce/(Ca+Ce)) from the deduped links.
|
|
41
|
+
const outSet = new Map();
|
|
42
|
+
const inSet = new Map();
|
|
43
|
+
for (const l of links) {
|
|
44
|
+
(outSet.get(l.source) ?? outSet.set(l.source, new Set()).get(l.source)).add(l.target);
|
|
45
|
+
(inSet.get(l.target) ?? inSet.set(l.target, new Set()).get(l.target)).add(l.source);
|
|
46
|
+
}
|
|
47
|
+
for (const n of nodes) {
|
|
48
|
+
n.ce = outSet.get(n.id)?.size ?? 0;
|
|
49
|
+
n.ca = inSet.get(n.id)?.size ?? 0;
|
|
50
|
+
n.inst = n.ca + n.ce === 0 ? 0 : Math.round((n.ce / (n.ca + n.ce)) * 100) / 100;
|
|
51
|
+
}
|
|
40
52
|
return { nodes, links };
|
|
41
53
|
}
|
|
42
54
|
const STYLE = "body{margin:0;font-family:system-ui,sans-serif;color:#222;background:#fafafa}" +
|
|
@@ -51,7 +63,10 @@ const STYLE = "body{margin:0;font-family:system-ui,sans-serif;color:#222;backgro
|
|
|
51
63
|
"#panel .row{padding:3px 6px;border-radius:5px;cursor:pointer;word-break:break-all;line-height:1.5}#panel .row:hover{background:#f0f0f0}" +
|
|
52
64
|
"#panel .sym{color:#444;padding:2px 6px;word-break:break-all}#panel .k{color:#999;font-size:11px}" +
|
|
53
65
|
"#close{position:absolute;top:10px;right:12px;cursor:pointer;color:#999;font-size:18px;line-height:1;border:none;background:none}" +
|
|
54
|
-
"
|
|
66
|
+
"#mode{padding:5px 8px;border:1px solid #ddd;border-radius:6px;font-size:12px;background:#fff;color:#222}" +
|
|
67
|
+
"#leg{position:fixed;left:14px;bottom:14px;z-index:3;background:#fff;border:1px solid #e5e5e5;border-radius:8px;padding:8px 12px;font-size:11px;color:#555;display:none}" +
|
|
68
|
+
"#leg .bar{width:150px;height:8px;border-radius:4px;background:linear-gradient(90deg,hsl(120,65%,46%),hsl(60,75%,50%),hsl(0,70%,52%));margin:5px 0 3px}" +
|
|
69
|
+
"@media(prefers-color-scheme:dark){body{color:#ddd;background:#161616}#bar,#panel{background:#1e1e1e;border-color:#333}#q{background:#2a2a2a;border-color:#444;color:#ddd}#panel .row:hover{background:#2a2a2a}#panel .sym{color:#bbb}#mode{background:#2a2a2a;border-color:#444;color:#ddd}#leg{background:#1e1e1e;border-color:#333;color:#bbb}}";
|
|
55
70
|
const CLIENT = "var c=document.getElementById('cv'),ctx=c.getContext('2d'),tip=document.getElementById('tip'),panel=document.getElementById('panel');" +
|
|
56
71
|
"var PANELW=300,panelOpen=false;" +
|
|
57
72
|
"var W,H;function resize(){var r=devicePixelRatio||1;W=innerWidth||c.clientWidth||800;H=(innerHeight-48)||c.clientHeight||600;c.width=W*r;c.height=H*r;ctx.setTransform(r,0,0,r,0,0);}addEventListener('resize',function(){resize();});resize();" +
|
|
@@ -62,7 +77,8 @@ const CLIENT = "var c=document.getElementById('cv'),ctx=c.getContext('2d'),tip=d
|
|
|
62
77
|
"sim.forEach(function(n){n.x=W/2+(Math.random()-0.5)*240;n.y=H/2+(Math.random()-0.5)*240;});" +
|
|
63
78
|
"var groups={},gi=0;function color(g){if(groups[g]==null)groups[g]=gi++;return 'hsl('+((groups[g]*67)%360)+',58%,55%)';}" +
|
|
64
79
|
"var adj={};links.forEach(function(l){(adj[l.source]=adj[l.source]||[]).push(l.target);(adj[l.target]=adj[l.target]||[]).push(l.source);});" +
|
|
65
|
-
"var view={x:0,y:0,k:1},sel=null,hover=null,drag=null,pan=null,q='',autofit=true;" +
|
|
80
|
+
"var view={x:0,y:0,k:1},sel=null,hover=null,drag=null,pan=null,q='',autofit=true,mode='group';" +
|
|
81
|
+
"function instColor(i){return 'hsl('+Math.round((1-i)*120)+',65%,'+Math.round(46+i*8)+'%)';}" +
|
|
66
82
|
"function radius(n){return 4+Math.sqrt(n.symbols||0)*1.7;}" +
|
|
67
83
|
"function tick(){if(!sim.length)return;var k=0.0016;for(var i=0;i<sim.length;i++){var a=sim[i];a.vx+=(W/2-a.x)*k;a.vy+=(H/2-a.y)*k;for(var j=i+1;j<sim.length;j++){var b=sim[j];var dx=a.x-b.x,dy=a.y-b.y,d2=dx*dx+dy*dy;if(d2<100)d2=100;var d=Math.sqrt(d2),f=2200/d2,fx=f*dx/d,fy=f*dy/d;a.vx+=fx;a.vy+=fy;b.vx-=fx;b.vy-=fy;}}" +
|
|
68
84
|
"links.forEach(function(l){var a=byId[l.source],b=byId[l.target];if(!a||!b)return;var dx=b.x-a.x,dy=b.y-a.y,d=Math.sqrt(dx*dx+dy*dy)+0.01,f=(d-90)*0.02,fx=f*dx/d,fy=f*dy/d;a.vx+=fx;a.vy+=fy;b.vx-=fx;b.vy-=fy;});" +
|
|
@@ -74,12 +90,12 @@ const CLIENT = "var c=document.getElementById('cv'),ctx=c.getContext('2d'),tip=d
|
|
|
74
90
|
"function esc(t){return String(t).replace(/&/g,'&').replace(/</g,'<');}" +
|
|
75
91
|
"function rowList(ids){if(!ids||!ids.length)return '<div class=\"sym\" style=\"color:#aaa\">none</div>';return ids.slice().sort().map(function(id){return '<div class=\"row\" data-id=\"'+esc(id)+'\">'+esc(id)+'</div>';}).join('');}" +
|
|
76
92
|
"function showPanel(n){sel=n;panelOpen=true;var imp=out[n.id]||[],impBy=inn[n.id]||[];var syms=(n.syms||[]).map(function(s){var i=s.indexOf(' ');return '<div class=\"sym\"><span class=\"k\">'+esc(s.slice(0,i))+'</span> '+esc(s.slice(i+1))+'</div>';}).join('')||'<div class=\"sym\" style=\"color:#aaa\">none</div>';" +
|
|
77
|
-
"panel.innerHTML='<button id=\"close\">×</button>'+'<h2>'+esc(n.id.split('/').pop())+'</h2><div class=\"path\">'+esc(n.id)+'</div>'+'<div class=\"meta\">'+esc(n.lang)+' · '+(n.symbols||0)+' symbols'+(deg[n.id]?'':' · no in-scope deps')+'</div>'+'<h3>Imports ('+imp.length+')</h3>'+rowList(imp)+'<h3>Imported by ('+impBy.length+')</h3>'+rowList(impBy)+'<h3>Symbols</h3>'+syms;" +
|
|
93
|
+
"panel.innerHTML='<button id=\"close\">×</button>'+'<h2>'+esc(n.id.split('/').pop())+'</h2><div class=\"path\">'+esc(n.id)+'</div>'+'<div class=\"meta\">'+esc(n.lang)+' · '+(n.symbols||0)+' symbols'+(deg[n.id]?' · Ca '+(n.ca||0)+' · Ce '+(n.ce||0)+' · I '+(n.inst||0):' · no in-scope deps')+'</div>'+'<h3>Imports ('+imp.length+')</h3>'+rowList(imp)+'<h3>Imported by ('+impBy.length+')</h3>'+rowList(impBy)+'<h3>Symbols</h3>'+syms;" +
|
|
78
94
|
"panel.style.display='block';}" +
|
|
79
95
|
"panel.addEventListener('click',function(e){if(e.target.id==='close'){panelOpen=false;sel=null;panel.style.display='none';autofit=true;return;}var id=e.target.getAttribute('data-id');if(id&&byId[id]){showPanel(byId[id]);center(byId[id]);}});" +
|
|
80
96
|
"function draw(){ctx.clearRect(0,0,W,H);ctx.save();ctx.translate(view.x,view.y);ctx.scale(view.k,view.k);ctx.lineWidth=0.8;" +
|
|
81
97
|
"links.forEach(function(l){var a=byId[l.source],b=byId[l.target];if(!a||!b)return;var on=sel&&(l.source===sel.id||l.target===sel.id);ctx.strokeStyle=on?'rgba(110,110,240,0.9)':'rgba(150,150,150,0.18)';ctx.beginPath();ctx.moveTo(a.x,a.y);ctx.lineTo(b.x,b.y);ctx.stroke();});" +
|
|
82
|
-
"function dot(n,orphan){var dim=(sel&&n!==sel&&(adj[sel.id]||[]).indexOf(n.id)<0)||(q&&n.id.toLowerCase().indexOf(q)<0);ctx.globalAlpha=dim?0.14:(orphan?0.55:1);ctx.beginPath();ctx.arc(n.x,n.y,orphan?3.2:radius(n),0,6.2832);ctx.fillStyle=color(n.group);ctx.fill();if(n===sel||n===hover){ctx.lineWidth=2;ctx.strokeStyle='#fff';ctx.stroke();ctx.lineWidth=0.8;}}" +
|
|
98
|
+
"function dot(n,orphan){var dim=(sel&&n!==sel&&(adj[sel.id]||[]).indexOf(n.id)<0)||(q&&n.id.toLowerCase().indexOf(q)<0);ctx.globalAlpha=dim?0.14:(orphan?0.55:1);ctx.beginPath();ctx.arc(n.x,n.y,orphan?3.2:radius(n),0,6.2832);ctx.fillStyle=mode==='inst'?(deg[n.id]?instColor(n.inst):'#999'):color(n.group);ctx.fill();if(n===sel||n===hover){ctx.lineWidth=2;ctx.strokeStyle='#fff';ctx.stroke();ctx.lineWidth=0.8;}}" +
|
|
83
99
|
"orphans.forEach(function(n){dot(n,true);});sim.forEach(function(n){dot(n,false);});" +
|
|
84
100
|
"ctx.globalAlpha=1;ctx.fillStyle=getComputedStyle(document.body).color;ctx.font='11px system-ui';sim.forEach(function(n){if(n===sel||n===hover||n.symbols>=14){ctx.fillText(n.id.split('/').pop(),n.x+radius(n)+3,n.y+3);}});ctx.restore();}" +
|
|
85
101
|
"function loop(){var w=innerWidth,hh=innerHeight-48;if(w&&hh&&(w!==W||hh!==H))resize();tick();tick();layoutOrphans();if(autofit)fitView();draw();var bx=bb4(nodes);document.getElementById(\"dbg\").textContent=\"W=\"+W+\" H=\"+H+\" iw=\"+innerWidth+\"x\"+innerHeight+\" dpr=\"+(devicePixelRatio||1)+\" k=\"+view.k.toFixed(2)+\" vx=\"+Math.round(view.x)+\" vy=\"+Math.round(view.y)+\" fit=\"+autofit+\" sim=\"+sim.length+\" orph=\"+orphans.length+\" worldBox=\"+Math.round(bx[0])+\",\"+Math.round(bx[1])+\"..\"+Math.round(bx[2])+\",\"+Math.round(bx[3]);requestAnimationFrame(loop);}" +
|
|
@@ -87,10 +103,11 @@ const CLIENT = "var c=document.getElementById('cv'),ctx=c.getContext('2d'),tip=d
|
|
|
87
103
|
"function pick(p){var all=sim.concat(orphans);for(var i=all.length-1;i>=0;i--){var n=all[i];var r=(deg[n.id]?radius(n):3.2)+5;if((p.x-n.x)*(p.x-n.x)+(p.y-n.y)*(p.y-n.y)<=r*r)return n;}return null;}" +
|
|
88
104
|
"c.addEventListener('mousedown',function(e){autofit=false;var n=pick(world(e));if(n){drag=n;showPanel(n);}else{pan={x:e.clientX-view.x,y:e.clientY-view.y};}});" +
|
|
89
105
|
"c.addEventListener('dblclick',function(){panelOpen=false;sel=null;panel.style.display='none';autofit=true;});" +
|
|
90
|
-
"addEventListener('mousemove',function(e){var p=world(e);if(drag){drag.x=p.x;drag.y=p.y;drag.vx=0;drag.vy=0;}else if(pan){view.x=e.clientX-pan.x;view.y=e.clientY-pan.y;}else{hover=pick(p);if(hover){tip.style.display='block';tip.style.left=(e.clientX+12)+'px';tip.style.top=(e.clientY+12)+'px';tip.textContent=hover.id+' · '+(hover.symbols||0)+' symbols · '+hover.lang;}else tip.style.display='none';}});" +
|
|
106
|
+
"addEventListener('mousemove',function(e){var p=world(e);if(drag){drag.x=p.x;drag.y=p.y;drag.vx=0;drag.vy=0;}else if(pan){view.x=e.clientX-pan.x;view.y=e.clientY-pan.y;}else{hover=pick(p);if(hover){tip.style.display='block';tip.style.left=(e.clientX+12)+'px';tip.style.top=(e.clientY+12)+'px';tip.textContent=hover.id+' · '+(hover.symbols||0)+' symbols · '+hover.lang+(deg[hover.id]?' · Ca '+hover.ca+' Ce '+hover.ce+' I '+hover.inst:'');}else tip.style.display='none';}});" +
|
|
91
107
|
"addEventListener('mouseup',function(){drag=null;pan=null;});" +
|
|
92
108
|
"c.addEventListener('wheel',function(e){e.preventDefault();autofit=false;var s=e.deltaY<0?1.1:0.9;var mx=e.clientX,my=e.clientY-48;view.x=mx-(mx-view.x)*s;view.y=my-(my-view.y)*s;view.k*=s;},{passive:false});" +
|
|
93
109
|
"document.getElementById('q').addEventListener('input',function(e){q=e.target.value.toLowerCase();});" +
|
|
110
|
+
"document.getElementById('mode').addEventListener('change',function(e){mode=e.target.value;document.getElementById('leg').style.display=mode==='inst'?'block':'none';});" +
|
|
94
111
|
"addEventListener('keydown',function(e){if(e.key==='d'&&e.target.tagName!=='INPUT'){var x=document.getElementById('dbg');x.style.display=x.style.display==='none'?'block':'none';}});loop();";
|
|
95
112
|
/** Build a self-contained, dependency-free HTML graph explorer. */
|
|
96
113
|
export function buildExplorerHtml(graph, root) {
|
|
@@ -100,7 +117,7 @@ export function buildExplorerHtml(graph, root) {
|
|
|
100
117
|
return ("<!doctype html><html><head><meta charset=\"utf-8\"><meta name=\"viewport\" content=\"width=device-width,initial-scale=1\">" +
|
|
101
118
|
"<title>AST-MCP — " + title + " graph</title><style>" + STYLE + "</style></head><body>" +
|
|
102
119
|
"<div id=\"bar\"><h1>AST-MCP graph</h1><span class=\"muted\">" + data.nodes.length + " files · " + data.links.length + " edges · drag / scroll / click</span>" +
|
|
103
|
-
"<input id=\"q\" placeholder=\"filter files…\"
|
|
104
|
-
"<canvas id=\"cv\"></canvas><div id=\"tip\"></div><div id=\"panel\"></div><div id=\"dbg\" style=\"position:fixed;left:8px;bottom:8px;font:11px monospace;color:#e07;z-index:6;pointer-events:none;white-space:pre;display:none\"></div>" +
|
|
120
|
+
"<input id=\"q\" placeholder=\"filter files…\" /><select id=\"mode\"><option value=\"group\">color: folder</option><option value=\"inst\">color: coupling</option></select></div>" +
|
|
121
|
+
"<canvas id=\"cv\"></canvas><div id=\"tip\"></div><div id=\"panel\"></div><div id=\"leg\"><b>Instability I = Ce/(Ca+Ce)</b><div class=\"bar\"></div><div style=\"display:flex;justify-content:space-between\"><span>0 = stable</span><span>1 = volatile</span></div></div><div id=\"dbg\" style=\"position:fixed;left:8px;bottom:8px;font:11px monospace;color:#e07;z-index:6;pointer-events:none;white-space:pre;display:none\"></div>" +
|
|
105
122
|
"<script>var DATA=" + dataJson + ";</script><script>" + CLIENT + "</script></body></html>");
|
|
106
123
|
}
|
package/dist/index.js
CHANGED
|
@@ -17,6 +17,7 @@ import { buildSymbolGraph } from "./graph.js";
|
|
|
17
17
|
import { findDeadExports, findCircularDeps, getChangeImpact, getFileDeps, getTopSymbols, findDuplicateSymbols } from "./graph-analysis.js";
|
|
18
18
|
import { buildCallGraph } from "./callgraph.js";
|
|
19
19
|
import { searchSymbols } from "./search.js";
|
|
20
|
+
import { semanticSearch } from "./semantic.js";
|
|
20
21
|
import { computeFileComplexity } from "./complexity.js";
|
|
21
22
|
import { findUnusedParams } from "./unused-params.js";
|
|
22
23
|
import { traceTypeInFile } from "./typeflow.js";
|
|
@@ -1146,6 +1147,50 @@ server.registerTool("search_symbol", {
|
|
|
1146
1147
|
return errorText(describeError(err));
|
|
1147
1148
|
}
|
|
1148
1149
|
});
|
|
1150
|
+
/* ─────────────────── tool: semantic_search ─────────────────────── */
|
|
1151
|
+
server.registerTool("semantic_search", {
|
|
1152
|
+
title: "Search symbols by meaning",
|
|
1153
|
+
description: "Find symbols by *meaning*, not exact name. Tokenizes identifiers (camelCase/snake_case), " +
|
|
1154
|
+
"expands programming synonyms (fetch≈get≈load, remove≈delete≈destroy, …), applies light " +
|
|
1155
|
+
"stemming and fuzzy matching, and ranks with BM25-style IDF weighting over symbol names, " +
|
|
1156
|
+
"doc comments, signatures and file paths.\n" +
|
|
1157
|
+
'Use when you know what code *does* but not what it\'s called: "remove expired sessions", ' +
|
|
1158
|
+
'"parse config file", "validate user input".',
|
|
1159
|
+
inputSchema: {
|
|
1160
|
+
path: z
|
|
1161
|
+
.string()
|
|
1162
|
+
.describe("Directory to search in, relative to project root or absolute within it."),
|
|
1163
|
+
query: z
|
|
1164
|
+
.string()
|
|
1165
|
+
.describe('What the code does, e.g. "delete old cache entries" or "load user settings".'),
|
|
1166
|
+
limit: z.number().int().min(1).max(100).optional().describe("Max results. Default 20."),
|
|
1167
|
+
kind: z
|
|
1168
|
+
.enum(["function", "class", "interface", "type", "method", "const", "var", "enum", "struct", "field"])
|
|
1169
|
+
.optional()
|
|
1170
|
+
.describe("Filter by symbol kind."),
|
|
1171
|
+
exportedOnly: z
|
|
1172
|
+
.boolean()
|
|
1173
|
+
.optional()
|
|
1174
|
+
.describe("Only return exported symbols. Default false."),
|
|
1175
|
+
},
|
|
1176
|
+
}, async ({ path: input, query, limit, kind, exportedOnly }) => {
|
|
1177
|
+
try {
|
|
1178
|
+
const { abs, rel, root } = resolveInRoot(input);
|
|
1179
|
+
if (!fs.statSync(abs).isDirectory()) {
|
|
1180
|
+
return errorText(`"${input}" is not a directory. semantic_search requires a directory.`);
|
|
1181
|
+
}
|
|
1182
|
+
const matches = await semanticSearch(abs, query, root, { limit, kind, exportedOnly });
|
|
1183
|
+
return jsonText({
|
|
1184
|
+
directory: rel.split(path.sep).join("/"),
|
|
1185
|
+
query,
|
|
1186
|
+
matchCount: matches.length,
|
|
1187
|
+
matches,
|
|
1188
|
+
});
|
|
1189
|
+
}
|
|
1190
|
+
catch (err) {
|
|
1191
|
+
return errorText(describeError(err));
|
|
1192
|
+
}
|
|
1193
|
+
});
|
|
1149
1194
|
/* ─────────────────── tool: get_file_deps ───────────────────────────────── */
|
|
1150
1195
|
server.registerTool("get_file_deps", {
|
|
1151
1196
|
title: "Get file-level import dependencies",
|
package/dist/semantic.js
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic symbol search — find symbols by *meaning*, not exact name.
|
|
3
|
+
*
|
|
4
|
+
* No embeddings, no network, no model downloads. Pure lexical semantics:
|
|
5
|
+
* 1. Identifier tokenization — camelCase / PascalCase / snake_case /
|
|
6
|
+
* kebab-case / digits / acronym boundaries ("HTTPServer" → http, server).
|
|
7
|
+
* 2. Concept expansion — a built-in thesaurus of programming
|
|
8
|
+
* synonym groups (fetch≈get≈load≈retrieve, remove≈delete≈destroy, …).
|
|
9
|
+
* 3. Light stemming — plural/gerund/past suffixes folded so
|
|
10
|
+
* "parsing" matches "parse", "users" matches "user".
|
|
11
|
+
* 4. BM25-style ranking — rare tokens weigh more (IDF over the
|
|
12
|
+
* scanned corpus); name hits outweigh doc/signature/path hits;
|
|
13
|
+
* direct hits outweigh synonym hits outweigh fuzzy hits.
|
|
14
|
+
*/
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import { buildSkeleton, collectSourceFiles } from "./skeleton.js";
|
|
17
|
+
import { resolveOptions, loadProjectConfig } from "./config.js";
|
|
18
|
+
// ─── Synonym groups (programming thesaurus) ────────────────────────────────────
|
|
19
|
+
// Tokens in the same group are considered semantically equivalent (at a small
|
|
20
|
+
// penalty vs. a direct match). Keep each group tight — over-broad groups cause
|
|
21
|
+
// noisy results.
|
|
22
|
+
const SYNONYM_GROUPS = [
|
|
23
|
+
["get", "fetch", "load", "retrieve", "read", "lookup", "resolve"],
|
|
24
|
+
["set", "update", "write", "assign", "put", "patch", "modify", "change", "edit"],
|
|
25
|
+
["create", "make", "build", "new", "generate", "construct", "init", "initialize", "spawn"],
|
|
26
|
+
["delete", "remove", "destroy", "drop", "clear", "purge", "erase"],
|
|
27
|
+
["find", "search", "query", "locate", "match", "scan", "discover"],
|
|
28
|
+
["send", "dispatch", "emit", "publish", "post", "broadcast", "notify"],
|
|
29
|
+
["receive", "consume", "subscribe", "listen", "handle", "process"],
|
|
30
|
+
["start", "begin", "launch", "run", "execute", "invoke", "trigger"],
|
|
31
|
+
["stop", "end", "halt", "kill", "terminate", "cancel", "abort", "shutdown", "close"],
|
|
32
|
+
["check", "validate", "verify", "test", "assert", "ensure", "confirm"],
|
|
33
|
+
["parse", "decode", "deserialize", "unmarshal", "extract", "tokenize"],
|
|
34
|
+
["format", "encode", "serialize", "marshal", "stringify", "render", "print"],
|
|
35
|
+
["convert", "transform", "map", "translate", "cast", "normalize"],
|
|
36
|
+
["user", "account", "member", "person", "profile", "customer"],
|
|
37
|
+
["auth", "authenticate", "login", "signin", "authorize", "session", "credential"],
|
|
38
|
+
["config", "configuration", "settings", "options", "preferences", "setup"],
|
|
39
|
+
["error", "exception", "fault", "failure", "err", "panic"],
|
|
40
|
+
["log", "logger", "logging", "trace", "audit"],
|
|
41
|
+
["cache", "memo", "memoize", "store", "buffer"],
|
|
42
|
+
["list", "enumerate", "all", "collection", "array", "items"],
|
|
43
|
+
["count", "total", "sum", "aggregate", "tally"],
|
|
44
|
+
["file", "document", "path", "filename"],
|
|
45
|
+
["dir", "directory", "folder"],
|
|
46
|
+
["request", "req", "call", "http"],
|
|
47
|
+
["response", "res", "reply", "result", "output"],
|
|
48
|
+
["message", "msg", "event", "signal"],
|
|
49
|
+
["connect", "connection", "link", "attach", "bind", "join"],
|
|
50
|
+
["disconnect", "detach", "unbind", "release", "unsubscribe"],
|
|
51
|
+
["save", "persist", "commit", "flush", "sync"],
|
|
52
|
+
["copy", "clone", "duplicate", "snapshot"],
|
|
53
|
+
["merge", "combine", "concat", "union", "join"],
|
|
54
|
+
["split", "divide", "partition", "chunk", "segment"],
|
|
55
|
+
["sort", "order", "rank", "arrange"],
|
|
56
|
+
["filter", "select", "exclude", "where"],
|
|
57
|
+
["compare", "diff", "equal", "equals", "cmp"],
|
|
58
|
+
["compute", "calculate", "calc", "derive", "evaluate", "measure"],
|
|
59
|
+
["watch", "observe", "monitor", "track", "poll"],
|
|
60
|
+
["wait", "sleep", "delay", "debounce", "throttle", "defer"],
|
|
61
|
+
["retry", "attempt", "backoff"],
|
|
62
|
+
["lock", "mutex", "semaphore", "guard"],
|
|
63
|
+
["queue", "stack", "heap", "pool", "buffer"],
|
|
64
|
+
["graph", "tree", "node", "edge", "vertex"],
|
|
65
|
+
["dependency", "dep", "import", "require"],
|
|
66
|
+
["token", "symbol", "identifier", "ident", "name"],
|
|
67
|
+
["database", "db", "storage", "repository", "repo", "dao"],
|
|
68
|
+
["key", "id", "identifier", "uuid", "guid"],
|
|
69
|
+
["string", "str", "text", "char"],
|
|
70
|
+
["number", "num", "int", "integer", "float", "numeric"],
|
|
71
|
+
["boolean", "bool", "flag", "toggle"],
|
|
72
|
+
["helper", "util", "utility", "utils", "tool", "common"],
|
|
73
|
+
["test", "spec", "mock", "stub", "fixture"],
|
|
74
|
+
["render", "draw", "paint", "display", "show", "view"],
|
|
75
|
+
["hide", "conceal", "mask", "suppress"],
|
|
76
|
+
["enable", "activate", "on"],
|
|
77
|
+
["disable", "deactivate", "off"],
|
|
78
|
+
["add", "insert", "append", "push", "register"],
|
|
79
|
+
["pop", "shift", "dequeue", "take"],
|
|
80
|
+
["circular", "cycle", "cyclic", "loop", "recursive"],
|
|
81
|
+
["dead", "unused", "orphan", "unreachable", "stale"],
|
|
82
|
+
["complexity", "complex", "cyclomatic", "cognitive"],
|
|
83
|
+
["coupling", "cohesion", "instability", "afferent", "efferent"],
|
|
84
|
+
];
|
|
85
|
+
const GROUP_OF = new Map();
|
|
86
|
+
SYNONYM_GROUPS.forEach((group, gi) => {
|
|
87
|
+
for (const word of group) {
|
|
88
|
+
// Register both raw and stemmed forms so stemmed corpus/query tokens
|
|
89
|
+
// ("setting", "item") still hit groups declared as "settings", "items".
|
|
90
|
+
for (const form of new Set([word, stem(word)])) {
|
|
91
|
+
const list = GROUP_OF.get(form);
|
|
92
|
+
if (list) {
|
|
93
|
+
if (!list.includes(gi))
|
|
94
|
+
list.push(gi);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
GROUP_OF.set(form, [gi]);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
// ─── Tokenization ──────────────────────────────────────────────────────────────
|
|
103
|
+
/** Light stemmer: fold common English suffixes so "parsing"→"parse", "users"→"user". */
|
|
104
|
+
export function stem(word) {
|
|
105
|
+
let w = word;
|
|
106
|
+
if (w.length > 4 && w.endsWith("ies"))
|
|
107
|
+
return w.slice(0, -3) + "y";
|
|
108
|
+
if (w.length > 4 && w.endsWith("ing")) {
|
|
109
|
+
w = w.slice(0, -3);
|
|
110
|
+
// "mapping" → "mapp" → "map"; "parsing" → "pars" → add back "e"? keep both simple:
|
|
111
|
+
if (w.length > 2 && w[w.length - 1] === w[w.length - 2])
|
|
112
|
+
w = w.slice(0, -1);
|
|
113
|
+
return w;
|
|
114
|
+
}
|
|
115
|
+
if (w.length > 4 && w.endsWith("ed")) {
|
|
116
|
+
w = w.slice(0, -2);
|
|
117
|
+
if (w.length > 2 && w[w.length - 1] === w[w.length - 2])
|
|
118
|
+
w = w.slice(0, -1);
|
|
119
|
+
return w;
|
|
120
|
+
}
|
|
121
|
+
if (w.length > 3 && w.endsWith("es"))
|
|
122
|
+
return w.slice(0, -2);
|
|
123
|
+
if (w.length > 3 && w.endsWith("s") && !w.endsWith("ss"))
|
|
124
|
+
return w.slice(0, -1);
|
|
125
|
+
return w;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Split an identifier into lowercase word tokens.
|
|
129
|
+
* Handles camelCase, PascalCase, snake_case, kebab-case, dots, digits and
|
|
130
|
+
* acronym boundaries: "getHTTPServerByID" → [get, http, server, by, id].
|
|
131
|
+
*/
|
|
132
|
+
export function splitIdentifier(identifier) {
|
|
133
|
+
const out = [];
|
|
134
|
+
for (const chunk of identifier.split(/[^A-Za-z0-9]+/)) {
|
|
135
|
+
if (!chunk)
|
|
136
|
+
continue;
|
|
137
|
+
// Insert boundaries: aA | AAa (acronym→word) | letter↔digit
|
|
138
|
+
const spaced = chunk
|
|
139
|
+
.replace(/([a-z0-9])([A-Z])/g, "$1 $2")
|
|
140
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2")
|
|
141
|
+
.replace(/([A-Za-z])([0-9])/g, "$1 $2")
|
|
142
|
+
.replace(/([0-9])([A-Za-z])/g, "$1 $2");
|
|
143
|
+
for (const word of spaced.split(" ")) {
|
|
144
|
+
if (word)
|
|
145
|
+
out.push(word.toLowerCase());
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return out;
|
|
149
|
+
}
|
|
150
|
+
/** Levenshtein distance with early exit when > max. */
|
|
151
|
+
function editDistance(a, b, max) {
|
|
152
|
+
if (Math.abs(a.length - b.length) > max)
|
|
153
|
+
return max + 1;
|
|
154
|
+
const prev = new Array(b.length + 1);
|
|
155
|
+
const curr = new Array(b.length + 1);
|
|
156
|
+
for (let j = 0; j <= b.length; j++)
|
|
157
|
+
prev[j] = j;
|
|
158
|
+
for (let i = 1; i <= a.length; i++) {
|
|
159
|
+
curr[0] = i;
|
|
160
|
+
let rowMin = curr[0];
|
|
161
|
+
for (let j = 1; j <= b.length; j++) {
|
|
162
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
163
|
+
curr[j] = Math.min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost);
|
|
164
|
+
if (curr[j] < rowMin)
|
|
165
|
+
rowMin = curr[j];
|
|
166
|
+
}
|
|
167
|
+
if (rowMin > max)
|
|
168
|
+
return max + 1;
|
|
169
|
+
for (let j = 0; j <= b.length; j++)
|
|
170
|
+
prev[j] = curr[j];
|
|
171
|
+
}
|
|
172
|
+
return prev[b.length];
|
|
173
|
+
}
|
|
174
|
+
function sharesGroup(a, b) {
|
|
175
|
+
const ga = GROUP_OF.get(a);
|
|
176
|
+
if (!ga)
|
|
177
|
+
return false;
|
|
178
|
+
const gb = GROUP_OF.get(b);
|
|
179
|
+
if (!gb)
|
|
180
|
+
return false;
|
|
181
|
+
return ga.some((g) => gb.includes(g));
|
|
182
|
+
}
|
|
183
|
+
const FIELD_WEIGHT = { name: 3, doc: 2, signature: 1.5, path: 1, kind: 1 };
|
|
184
|
+
function addToken(doc, raw, weight) {
|
|
185
|
+
const t = stem(raw);
|
|
186
|
+
if (t.length < 2)
|
|
187
|
+
return;
|
|
188
|
+
const existing = doc.tokens.get(t);
|
|
189
|
+
if (existing === undefined || weight > existing)
|
|
190
|
+
doc.tokens.set(t, weight);
|
|
191
|
+
}
|
|
192
|
+
function* flattenDocs(symbols, file, parentName) {
|
|
193
|
+
for (const sym of symbols) {
|
|
194
|
+
const fullName = parentName ? `${parentName}.${sym.name}` : sym.name;
|
|
195
|
+
yield { sym, fullName };
|
|
196
|
+
if (sym.children.length > 0)
|
|
197
|
+
yield* flattenDocs(sym.children, file, fullName);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
function buildDoc(sym, fullName, file) {
|
|
201
|
+
const doc = {
|
|
202
|
+
match: {
|
|
203
|
+
file,
|
|
204
|
+
symbol: fullName,
|
|
205
|
+
kind: sym.kind,
|
|
206
|
+
exported: sym.exported ?? false,
|
|
207
|
+
range: sym.range,
|
|
208
|
+
...(sym.signature ? { signature: sym.signature } : {}),
|
|
209
|
+
},
|
|
210
|
+
tokens: new Map(),
|
|
211
|
+
nameTokens: new Set(),
|
|
212
|
+
};
|
|
213
|
+
for (const t of splitIdentifier(fullName)) {
|
|
214
|
+
addToken(doc, t, FIELD_WEIGHT.name);
|
|
215
|
+
doc.nameTokens.add(stem(t));
|
|
216
|
+
}
|
|
217
|
+
addToken(doc, sym.kind, FIELD_WEIGHT.kind);
|
|
218
|
+
if (sym.doc) {
|
|
219
|
+
for (const t of splitIdentifier(sym.doc))
|
|
220
|
+
addToken(doc, t, FIELD_WEIGHT.doc);
|
|
221
|
+
}
|
|
222
|
+
if (sym.signature) {
|
|
223
|
+
for (const t of splitIdentifier(sym.signature))
|
|
224
|
+
addToken(doc, t, FIELD_WEIGHT.signature);
|
|
225
|
+
}
|
|
226
|
+
for (const seg of file.split("/")) {
|
|
227
|
+
for (const t of splitIdentifier(seg))
|
|
228
|
+
addToken(doc, t, FIELD_WEIGHT.path);
|
|
229
|
+
}
|
|
230
|
+
return doc;
|
|
231
|
+
}
|
|
232
|
+
// ─── Scoring ───────────────────────────────────────────────────────────────────
|
|
233
|
+
const MATCH_WEIGHT = { direct: 1, synonym: 0.7, fuzzy: 0.45 };
|
|
234
|
+
// English/query stopwords — ignored as query concepts.
|
|
235
|
+
const STOPWORDS = new Set([
|
|
236
|
+
"a", "an", "the", "of", "in", "on", "for", "to", "with", "that", "this",
|
|
237
|
+
"is", "are", "be", "and", "or", "by", "from", "at", "it", "its", "as",
|
|
238
|
+
"do", "does", "how", "what", "which", "where", "when", "i", "we", "you",
|
|
239
|
+
"function", "method", "code", "thing", "stuff", "something",
|
|
240
|
+
]);
|
|
241
|
+
/**
|
|
242
|
+
* Search for symbols by meaning across all source files in a directory.
|
|
243
|
+
*
|
|
244
|
+
* @param dirAbs Absolute path of directory to scan.
|
|
245
|
+
* @param query Natural-language-ish query, e.g. "remove expired sessions".
|
|
246
|
+
* @param root Project root (for relative paths in results).
|
|
247
|
+
* @param options limit, kind filter, exportedOnly.
|
|
248
|
+
*/
|
|
249
|
+
export async function semanticSearch(dirAbs, query, root, options = {}) {
|
|
250
|
+
const { limit = 20, kind, exportedOnly = false } = options;
|
|
251
|
+
// Query concepts: tokenized, stopword-filtered, stemmed (dedup, keep order).
|
|
252
|
+
const concepts = [];
|
|
253
|
+
for (const raw of splitIdentifier(query)) {
|
|
254
|
+
if (STOPWORDS.has(raw))
|
|
255
|
+
continue;
|
|
256
|
+
const t = stem(raw);
|
|
257
|
+
if (t.length >= 2 && !concepts.includes(t))
|
|
258
|
+
concepts.push(t);
|
|
259
|
+
}
|
|
260
|
+
if (concepts.length === 0)
|
|
261
|
+
return [];
|
|
262
|
+
// Build corpus (detail "full" so doc comments and signatures are available).
|
|
263
|
+
const opts = resolveOptions({ detail: "full", emitHtml: false }, loadProjectConfig(root));
|
|
264
|
+
const files = collectSourceFiles(dirAbs, opts);
|
|
265
|
+
const docs = [];
|
|
266
|
+
for (const file of files) {
|
|
267
|
+
const fileRel = path.relative(root, file).split(path.sep).join("/");
|
|
268
|
+
try {
|
|
269
|
+
const skel = await buildSkeleton(file, fileRel, opts);
|
|
270
|
+
for (const { sym, fullName } of flattenDocs(skel.symbols, skel.file)) {
|
|
271
|
+
if (kind && sym.kind !== kind)
|
|
272
|
+
continue;
|
|
273
|
+
if (exportedOnly && !(sym.exported ?? false))
|
|
274
|
+
continue;
|
|
275
|
+
docs.push(buildDoc(sym, fullName, skel.file));
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
catch {
|
|
279
|
+
// skip unreadable / unparseable files
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
if (docs.length === 0)
|
|
283
|
+
return [];
|
|
284
|
+
// Document frequency per concept (direct-token presence) → BM25-ish IDF.
|
|
285
|
+
const N = docs.length;
|
|
286
|
+
const idf = new Map();
|
|
287
|
+
for (const concept of concepts) {
|
|
288
|
+
let df = 0;
|
|
289
|
+
for (const doc of docs)
|
|
290
|
+
if (doc.tokens.has(concept))
|
|
291
|
+
df++;
|
|
292
|
+
idf.set(concept, Math.log(1 + (N - df + 0.5) / (df + 0.5)));
|
|
293
|
+
}
|
|
294
|
+
const scored = [];
|
|
295
|
+
for (const doc of docs) {
|
|
296
|
+
let score = 0;
|
|
297
|
+
const matchedTerms = [];
|
|
298
|
+
let nameHits = 0;
|
|
299
|
+
for (const concept of concepts) {
|
|
300
|
+
let best = 0;
|
|
301
|
+
let how = null;
|
|
302
|
+
for (const [token, fieldWeight] of doc.tokens) {
|
|
303
|
+
let mw = 0;
|
|
304
|
+
let label = null;
|
|
305
|
+
if (token === concept) {
|
|
306
|
+
mw = MATCH_WEIGHT.direct;
|
|
307
|
+
label = concept;
|
|
308
|
+
}
|
|
309
|
+
else if (sharesGroup(token, concept)) {
|
|
310
|
+
mw = MATCH_WEIGHT.synonym;
|
|
311
|
+
label = `${concept}≈${token}`;
|
|
312
|
+
}
|
|
313
|
+
else if (concept.length >= 4 &&
|
|
314
|
+
token.length >= 4 &&
|
|
315
|
+
editDistance(token, concept, 1) <= 1) {
|
|
316
|
+
mw = MATCH_WEIGHT.fuzzy;
|
|
317
|
+
label = `${concept}~${token}`;
|
|
318
|
+
}
|
|
319
|
+
const contribution = mw * fieldWeight;
|
|
320
|
+
if (contribution > best) {
|
|
321
|
+
best = contribution;
|
|
322
|
+
how = label;
|
|
323
|
+
if (fieldWeight >= FIELD_WEIGHT.name && mw === MATCH_WEIGHT.direct)
|
|
324
|
+
break; // can't beat this
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
if (best > 0 && how) {
|
|
328
|
+
score += best * (idf.get(concept) ?? 1);
|
|
329
|
+
matchedTerms.push(how);
|
|
330
|
+
if (doc.nameTokens.has(concept))
|
|
331
|
+
nameHits++;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
if (matchedTerms.length === 0)
|
|
335
|
+
continue;
|
|
336
|
+
// Bonuses: all concepts matched; full query substring of name; coverage ratio.
|
|
337
|
+
const coverage = matchedTerms.length / concepts.length;
|
|
338
|
+
score *= 0.5 + 0.5 * coverage;
|
|
339
|
+
if (nameHits === concepts.length)
|
|
340
|
+
score *= 1.25;
|
|
341
|
+
const flatQuery = concepts.join("");
|
|
342
|
+
if (doc.match.symbol.toLowerCase().includes(flatQuery))
|
|
343
|
+
score *= 1.2;
|
|
344
|
+
// Length normalization: prefer focused names — "login" beats "handleLogin"
|
|
345
|
+
// when both match the same concepts. Penalize name tokens no concept explains.
|
|
346
|
+
let unmatchedNameTokens = 0;
|
|
347
|
+
for (const t of doc.nameTokens) {
|
|
348
|
+
const explained = concepts.some((c) => t === c ||
|
|
349
|
+
sharesGroup(t, c) ||
|
|
350
|
+
(c.length >= 4 && t.length >= 4 && editDistance(t, c, 1) <= 1));
|
|
351
|
+
if (!explained)
|
|
352
|
+
unmatchedNameTokens++;
|
|
353
|
+
}
|
|
354
|
+
score /= 1 + 0.15 * unmatchedNameTokens;
|
|
355
|
+
scored.push({ ...doc.match, score, matchedTerms });
|
|
356
|
+
}
|
|
357
|
+
scored.sort((a, b) => b.score - a.score || a.symbol.localeCompare(b.symbol));
|
|
358
|
+
const top = scored.slice(0, limit);
|
|
359
|
+
// Normalize scores to 0–1 within the result set.
|
|
360
|
+
const max = top.length > 0 ? top[0].score : 1;
|
|
361
|
+
if (max > 0)
|
|
362
|
+
for (const m of top)
|
|
363
|
+
m.score = Math.round((m.score / max) * 1000) / 1000;
|
|
364
|
+
return top;
|
|
365
|
+
}
|
package/package.json
CHANGED