universal-ast-mapper 1.23.0 → 1.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +71 -1
- package/README.md +27 -3
- package/dist/callgraph.js +21 -7
- package/dist/cli.js +38 -0
- package/dist/graph.js +2 -2
- package/dist/index.js +45 -0
- package/dist/resolver.js +29 -0
- package/dist/semantic.js +365 -0
- package/dist/tsconfig.js +212 -0
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,44 @@ since 1.0.0, guarantees a stable MCP tool / CLI surface across the 1.x line.
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [1.25.0] — 2026-06-11 · Semantic symbol search
|
|
10
|
+
- **New MCP tool `semantic_search`** + **CLI `ast-map find <query> [dir]`** — find
|
|
11
|
+
symbols by *meaning*, not exact name: "remove expired cache entries" →
|
|
12
|
+
`clearDiskCache`, "find unused exported code" → `findDeadExports`.
|
|
13
|
+
- Pure lexical semantics — **no embeddings, no network, no model downloads**:
|
|
14
|
+
- **Identifier tokenization**: camelCase / PascalCase / snake_case / kebab-case /
|
|
15
|
+
digit and acronym boundaries (`getHTTPServerByID` → `get http server by id`).
|
|
16
|
+
- **Programming thesaurus**: 60 synonym groups (`fetch≈get≈load≈retrieve`,
|
|
17
|
+
`remove≈delete≈clear`, `unused≈dead`, `auth≈login≈session`, …).
|
|
18
|
+
- **Light stemming** (plural/gerund/past: `users`→`user`) + **fuzzy matching**
|
|
19
|
+
(edit distance ≤ 1 on tokens ≥ 4 chars).
|
|
20
|
+
- **BM25-style ranking**: corpus IDF (rare tokens weigh more), field weights
|
|
21
|
+
(name 3× > doc 2× > signature 1.5× > path/kind 1×), match-type weights
|
|
22
|
+
(direct > synonym > fuzzy), coverage bonus, and length normalization so
|
|
23
|
+
focused names (`login`) outrank composites (`handleLogin`).
|
|
24
|
+
- Results include a normalized `score` (0–1) and `matchedTerms` explaining each hit
|
|
25
|
+
(`unused≈dead` = synonym, `cach~cache` = fuzzy).
|
|
26
|
+
- Options: `limit` (default 20), `kind` filter, `exportedOnly`.
|
|
27
|
+
- New module `semantic` (`semanticSearch`, `splitIdentifier`, `stem`). Tests: +8
|
|
28
|
+
checks in `test/analysis.mjs` (139 total). **29 MCP tools / 31 CLI commands.**
|
|
29
|
+
|
|
30
|
+
## [1.24.0] — 2026-06-10 · TS path-alias resolution
|
|
31
|
+
- Bare imports like `@/components/Button` now resolve through **`tsconfig.json` /
|
|
32
|
+
`jsconfig.json` `compilerOptions.paths`** (+ `baseUrl`): nearest-config lookup above
|
|
33
|
+
the importing file (monorepo-safe, per-process cached), relative `extends` chains
|
|
34
|
+
(child `paths` replace the parent's, per TS semantics), longest-prefix pattern
|
|
35
|
+
matching, candidate probing with the usual extension/index logic.
|
|
36
|
+
- **String-aware JSONC parser** — comments/trailing commas are stripped with a
|
|
37
|
+
character walk, not regex (naive stripping corrupts Next.js configs where `"@/*"`
|
|
38
|
+
pairs with the `*/` inside `"**/*.ts"` include globs).
|
|
39
|
+
- Wired into `resolve_imports` (aliased imports report `importKind: "relative"` +
|
|
40
|
+
resolved file), `build_symbol_graph` (alias edges before workspace-package fallback),
|
|
41
|
+
and the call graph (callee origin + reverse `calledBy`).
|
|
42
|
+
- Real-world effect (Next.js app, 186 files): import graph 31 → **324 edges**;
|
|
43
|
+
dead exports 210 → 153; god nodes now reflect true usage.
|
|
44
|
+
- New module `tsconfig` (`aliasCandidates`, `clearAliasCaches`) + `resolveAliasedImport`
|
|
45
|
+
in the resolver. Tests: new `test/tsalias-smoke.mjs` (15 checks), wired into `npm test`.
|
|
46
|
+
|
|
9
47
|
## [1.23.0] — 2026-06-10 · Configurable root boundary (multi-root + unlocked)
|
|
10
48
|
- **`AST_MAP_ROOT` accepts multiple roots**, separated by the OS path delimiter
|
|
11
49
|
(`;` Windows / `:` POSIX). The first root is primary; absolute paths inside any
|
|
@@ -215,4 +253,36 @@ since 1.0.0, guarantees a stable MCP tool / CLI surface across the 1.x line.
|
|
|
215
253
|
declared in 2+ files.
|
|
216
254
|
|
|
217
255
|
## [0.8.3] — 2026-05-31 · TSX/React component props
|
|
218
|
-
- Component
|
|
256
|
+
- Component symbols carry `propsType` + `props[]`; detects `React.FC<P>` and
|
|
257
|
+
JSX-returning PascalCase functions. MCP server version now read from package.json.
|
|
258
|
+
|
|
259
|
+
## [0.8.2] — 2026-05-30 · Swift cross-file wiring
|
|
260
|
+
- `import <Module>` → that module's files (`Sources/<Module>/`). Completes
|
|
261
|
+
cross-file graph/resolver support for all four v0.8.0 languages.
|
|
262
|
+
|
|
263
|
+
## [0.8.1] — 2026-05-30 · Kotlin + C/C++ cross-file wiring
|
|
264
|
+
- Kotlin FQCN/package index; C/C++ `#include` resolution with header↔impl pairing.
|
|
265
|
+
- Fixes: parse-cache rel-path leak; Kotlin call-graph extraction.
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## Earlier (pre-session history)
|
|
270
|
+
|
|
271
|
+
- **0.8.0** — +4 languages: C · C++ · Kotlin · Swift (symbol extraction + imports).
|
|
272
|
+
- **0.7.0** — Go full module resolution; C# reverse `calledBy`; 4-suite test harness.
|
|
273
|
+
- **0.6.0** — +3 languages: Rust · Java · C#; cross-language resolver.
|
|
274
|
+
- **0.5.x** — `/ast-map` skill auto-install; iterative DFS; barrel re-exports; parse cache; call-graph aliases; `.ast-map.config.json`.
|
|
275
|
+
- **0.4.0** — `search_symbol`, `get_file_deps`, `get_top_symbols`, dead-code tiers.
|
|
276
|
+
- **0.3.0** — CLI; `find_dead_code`, `find_circular_deps`, `get_change_impact`, `get_call_graph`.
|
|
277
|
+
- **0.2.0** — import extraction; `resolve_imports`; `build_symbol_graph`.
|
|
278
|
+
- **0.1.0** — `get_skeleton_json`, `generate_skeleton`, `get_symbol_context`, `validate_architecture`.
|
|
279
|
+
|
|
280
|
+
[1.13.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.13.0
|
|
281
|
+
[1.12.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.12.0
|
|
282
|
+
[1.11.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.11.0
|
|
283
|
+
[1.10.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.10.0
|
|
284
|
+
[1.9.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.9.0
|
|
285
|
+
[1.8.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.8.0
|
|
286
|
+
[1.7.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.7.0
|
|
287
|
+
[1.6.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.6.0
|
|
288
|
+
[1.5.0]: https://github.com/6ixthxense/AST-MCP/releases/tag/v1.5.0
|
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@ An **MCP server + CLI tool** that turns source code into structured, machine-rea
|
|
|
4
4
|
|
|
5
5
|
Built on [tree-sitter](https://tree-sitter.github.io/) WASM grammars. Zero regex guessing — real AST parsing.
|
|
6
6
|
|
|
7
|
-
**
|
|
7
|
+
**29 MCP tools / 31 CLI commands / 5 MCP prompts** spanning skeletons, dependency graphs, and deep analysis — dead code, cycles, change-impact, complexity, duplicates, unused params, type-flow, decorators — plus monorepo support, an interactive **graph explorer** (`ast-map explore`), **watch mode**, a one-page **health dashboard** (`ast-map report`), a **persistent parse cache + parallel parsing** (warm re-scans skip parsing entirely), and a **CI quality gate** (`ast-map check`, baseline ratchet).
|
|
8
8
|
|
|
9
9
|
**Supported languages:** TypeScript · TSX · JavaScript (ESM/CJS) · Python · Go · Rust · Java · C# · C · C++ · Kotlin · Swift · Vue · Svelte (SFC `<script>`) · **PHP** · **Ruby**
|
|
10
10
|
|
|
@@ -20,7 +20,7 @@ Built on [tree-sitter](https://tree-sitter.github.io/) WASM grammars. Zero regex
|
|
|
20
20
|
> As of v0.8.2, all four v0.8.0 languages have **cross-file graph + resolver** wiring: Kotlin (FQCN/package index), C/C++ (`#include` with header↔impl pairing), and Swift (module = directory under `Sources/`). Call-graph callee origin is resolved for Kotlin; for C/C++/Swift it stays limited because their imports don't name individual symbols. (PHP & Ruby landed in v1.22.0 — symbol extraction + imports; cross-file graph wiring for them is the next step. Ruby was unblocked by upgrading `web-tree-sitter` to 0.21.0.)
|
|
21
21
|
|
|
22
22
|
Each language uses the resolution strategy that fits it:
|
|
23
|
-
- **TS/JS/Python** — relative paths (`./foo`, `..mod`) resolved against the importing file's directory, with TS-ESM `.js` → `.ts` rewriting.
|
|
23
|
+
- **TS/JS/Python** — relative paths (`./foo`, `..mod`) resolved against the importing file's directory, with TS-ESM `.js` → `.ts` rewriting. **Path aliases** (`@/*` etc.) resolve via the nearest `tsconfig.json`/`jsconfig.json` (`paths` + `baseUrl`, relative `extends`). *(v1.24.0)*
|
|
24
24
|
- **Go** — `go.mod` ancestor lookup → module path prefix → package directory → all `.go` files (skips `_test.go`).
|
|
25
25
|
- **Rust** — `Cargo.toml` ancestor → `crate::` / `self::` / `super::` walks; supports `mod.rs` + Rust-2018 sibling-dir style.
|
|
26
26
|
- **Java** — project-wide FQCN index (`package + "." + className → file`) built lazily on first cross-lang call; supports wildcard imports.
|
|
@@ -127,6 +127,7 @@ ast-map modules [dir] # directory-level coupling + ed
|
|
|
127
127
|
ast-map cache [stats|clear] # persistent parse cache (.ast-map/cache)
|
|
128
128
|
ast-map check [dir] [--update-baseline] [--min-score N] [--max-cycles N] ...
|
|
129
129
|
ast-map search <pattern> [dir] [-m contains|exact|regex] [-k kind] [-e]
|
|
130
|
+
ast-map find <query> [dir] [-l N] [-k kind] [-e] # semantic: by meaning
|
|
130
131
|
ast-map deps <file> [--scan <dir>]
|
|
131
132
|
ast-map top <dir> [-n 10]
|
|
132
133
|
ast-map impact <file> <symbol> [--scan <dir>]
|
|
@@ -155,6 +156,9 @@ ast-map validate src/ --max-lines 300 --max-imports 20
|
|
|
155
156
|
# Find all symbols named like "handler" across the project
|
|
156
157
|
ast-map search handler src/ --exported
|
|
157
158
|
|
|
159
|
+
# Don't know the name? Search by meaning
|
|
160
|
+
ast-map find "remove expired cache entries" src/
|
|
161
|
+
|
|
158
162
|
# What does this file import / what imports it?
|
|
159
163
|
ast-map deps src/lib/auth.ts --scan src/
|
|
160
164
|
|
|
@@ -514,6 +518,21 @@ Find symbols by name across all source files in a directory.
|
|
|
514
518
|
|
|
515
519
|
---
|
|
516
520
|
|
|
521
|
+
### `semantic_search`
|
|
522
|
+
Find symbols by **meaning**, not exact name — for when you know what the code *does* but not what it's called.
|
|
523
|
+
|
|
524
|
+
No embeddings, no network: identifier tokenization (camelCase / snake_case / acronyms), a built-in programming thesaurus (`fetch≈get≈load`, `remove≈delete≈clear`, `unused≈dead`, …), light stemming, fuzzy matching, and BM25-style IDF ranking over symbol names, doc comments, signatures and file paths. Results carry a normalized `score` and `matchedTerms` explaining *why* each symbol matched.
|
|
525
|
+
|
|
526
|
+
```
|
|
527
|
+
semantic_search("find unused exported code") →
|
|
528
|
+
1.000 findDeadExports (find, unused≈dead, export)
|
|
529
|
+
0.557 DeadExport (unused≈dead, export)
|
|
530
|
+
```
|
|
531
|
+
|
|
532
|
+
**Params:** `path`, `query`, `limit` (default 20), `kind`, `exportedOnly`
|
|
533
|
+
|
|
534
|
+
---
|
|
535
|
+
|
|
517
536
|
### `get_file_deps`
|
|
518
537
|
For a single file, show what it imports and what imports it (with symbol names).
|
|
519
538
|
More focused than `build_symbol_graph` — use for quick dependency lookup.
|
|
@@ -801,6 +820,8 @@ Not part of the public API: the internal `src/` module layout and the generated
|
|
|
801
820
|
|
|
802
821
|
| Version | What changed |
|
|
803
822
|
|---------|--------------|
|
|
823
|
+
| **1.25.0** | **Semantic symbol search** — new MCP tool `semantic_search` + CLI `ast-map find <query>`: find symbols by *meaning* ("remove expired sessions" → `clearDiskCache`). Identifier tokenization + 60-group programming thesaurus + stemming + fuzzy matching + BM25-style IDF ranking over names, docs, signatures and paths. No embeddings, no network. (**29 tools / 31 commands**) |
|
|
824
|
+
| **1.24.0** | **TS path-alias resolution** — bare imports like `@/components/Button` now resolve via the **nearest** `tsconfig.json`/`jsconfig.json` (`compilerOptions.paths` + `baseUrl`, relative `extends` chains, longest-prefix matching, string-aware JSONC parser). Wired into `resolve_imports`, the symbol graph, and the call graph — on a real Next.js app this took the import graph from 31 to **324 edges** and cut false dead-exports by ~30%. |
|
|
804
825
|
| **1.23.0** | **Configurable root boundary** — `AST_MAP_ROOT` accepts **multiple roots** (path-delimiter separated) and `AST_MAP_UNLOCKED=1` allows analyzing **any absolute path** on request (default stays locked). Analysis/graph/report rel-paths now computed against the matched root, so cross-root results are correct. New `roots` module + 13-check test suite. |
|
|
805
826
|
| **1.22.0** | **PHP & Ruby support** — `.php` (classes, interfaces, traits, enums, methods with visibility, `use` imports incl. grouped, require/include) and `.rb`/`.rake` (classes, modules, methods, `self.` singleton methods, `private` section tracking, require/require_relative). Unblocked by upgrading `web-tree-sitter` 0.20.8 → 0.21.0 (all existing grammars re-verified). **16 languages**. |
|
|
806
827
|
| **1.21.0** | **Quality gate** — `ast-map check` fails CI when quality regresses: **baseline ratchet** vs `.ast-map.baseline.json` (cycles · dead exports · SDP · very-high complexity · score; `--update-baseline` re-anchors) + absolute thresholds (flags or config `"check"`). New MCP tool `check_quality_gate` (**28 tools**); GitHub Action gains `mode: check`. |
|
|
@@ -833,4 +854,7 @@ Not part of the public API: the internal `src/` module layout and the generated
|
|
|
833
854
|
| **0.9.0** | **Scoped type-flow tracing** — new `trace_type` MCP tool + `ast-map trace-type` (alias `flow`) CLI: follow a named type through function params, return types, typed variables, and class fields across a directory. Completes the deeper-analysis suite (dead code · cycles · impact · complexity · duplicates · unused params · type flow). **18 MCP tools**. |
|
|
834
855
|
| **0.8.7** | **Python decorators in the call graph** — function/method symbols now carry a `decorators` field (`@router.get("/x")` → `router.get("/x")`), surfaced in skeletons (outline + full) and in `get_call_graph`. Traces framework wiring like FastAPI/Flask routes and `@staticmethod`/`@property` stacks to their handler. |
|
|
835
856
|
| **0.8.6** | **Unused parameter detection** — new `find_unused_params` MCP tool + `ast-map unused-params` (alias `unused`) CLI: named functions whose params are never referenced. Skips `_`-prefixed/destructured/anonymous and treats object-shorthand as a use (low false-positive). Server now 17 tools. |
|
|
836
|
-
| **0.8.5** | **Cyclomatic complexity** — new `get_complexity` MCP tool + `ast-map
|
|
857
|
+
| **0.8.5** | **Cyclomatic complexity** — new `get_complexity` MCP tool + `ast-map complexity` (alias `cx`) CLI: per-function cyclomatic complexity with low/moderate/high/very-high ratings, file or directory scope. |
|
|
858
|
+
| **0.8.4** | **Duplicate symbol detection** — `find_duplicate_symbols` / `ast-map duplicates` (alias `dupes`): symbol names exported from more than one file. |
|
|
859
|
+
| **0.8.1–0.8.3** | Kotlin + C/C++ cross-file wiring · Swift module resolution (`Sources/<Module>/`) · TSX/React component props (`propsType` + `props[]`, `React.FC<P>` detection). |
|
|
860
|
+
| **0.1.0–0.8.0** | Foundation: skeleton extraction (`get_skeleton_json`, `generate_skeleton`, `get_symbol_context`, `validate_architecture`) · import resolution + symbol graph · dead code / cycles / impact / call graph · CLI · 12 languages (+Rust · Java · C# · Go · C · C++ · Kotlin · Swift) · `/ast-map` skill auto-install · barrel re-exports · parse cache. |
|
package/dist/callgraph.js
CHANGED
|
@@ -4,7 +4,7 @@ import { parseSource } from "./parser.js";
|
|
|
4
4
|
import { buildSkeleton } from "./skeleton.js";
|
|
5
5
|
import { resolveOptions, loadProjectConfig } from "./config.js";
|
|
6
6
|
import { detectLanguage } from "./registry.js";
|
|
7
|
-
import { resolveImportPath, getOrBuildCrossLangIndex } from "./resolver.js";
|
|
7
|
+
import { resolveImportPath, resolveAliasedImport, getOrBuildCrossLangIndex } from "./resolver.js";
|
|
8
8
|
import { resolveCrossLangTarget } from "./crosslang.js";
|
|
9
9
|
const CROSS_LANG = new Set(["java", "csharp", "rust", "go", "kotlin", "c", "cpp", "swift"]);
|
|
10
10
|
function pushCall(out, callee, anchor) {
|
|
@@ -313,8 +313,14 @@ export async function buildCallGraph(filePath, funcName, root, allSkeletons) {
|
|
|
313
313
|
}
|
|
314
314
|
}
|
|
315
315
|
else {
|
|
316
|
-
|
|
317
|
-
|
|
316
|
+
const aliasAbs = resolveAliasedImport(importRef.from, filePath);
|
|
317
|
+
if (aliasAbs) {
|
|
318
|
+
call.calleeFileRel = path.relative(root, aliasAbs).split(path.sep).join("/");
|
|
319
|
+
}
|
|
320
|
+
else {
|
|
321
|
+
call.isExternal = true;
|
|
322
|
+
call.calleeFileRel = importRef.from;
|
|
323
|
+
}
|
|
318
324
|
}
|
|
319
325
|
}
|
|
320
326
|
else if (aliasOrigin) {
|
|
@@ -326,8 +332,14 @@ export async function buildCallGraph(filePath, funcName, root, allSkeletons) {
|
|
|
326
332
|
}
|
|
327
333
|
}
|
|
328
334
|
else {
|
|
329
|
-
|
|
330
|
-
|
|
335
|
+
const aliasAbs = resolveAliasedImport(aliasOrigin, filePath);
|
|
336
|
+
if (aliasAbs) {
|
|
337
|
+
call.calleeFileRel = path.relative(root, aliasAbs).split(path.sep).join("/");
|
|
338
|
+
}
|
|
339
|
+
else {
|
|
340
|
+
call.isExternal = true;
|
|
341
|
+
call.calleeFileRel = aliasOrigin;
|
|
342
|
+
}
|
|
331
343
|
}
|
|
332
344
|
}
|
|
333
345
|
else if (crossIndex && skel.language === "csharp") {
|
|
@@ -387,8 +399,10 @@ export async function buildCallGraph(filePath, funcName, root, allSkeletons) {
|
|
|
387
399
|
break;
|
|
388
400
|
}
|
|
389
401
|
}
|
|
390
|
-
else
|
|
391
|
-
const resolvedAbs =
|
|
402
|
+
else {
|
|
403
|
+
const resolvedAbs = imp.from.startsWith(".")
|
|
404
|
+
? resolveImportPath(imp.from, otherAbs)
|
|
405
|
+
: resolveAliasedImport(imp.from, otherAbs);
|
|
392
406
|
if (!resolvedAbs)
|
|
393
407
|
continue;
|
|
394
408
|
const resolvedRel = path.relative(root, resolvedAbs).split(path.sep).join("/");
|
package/dist/cli.js
CHANGED
|
@@ -27,6 +27,7 @@ import { findLayerViolations } from "./layers.js";
|
|
|
27
27
|
import { computeModuleCoupling } from "./modulecoupling.js";
|
|
28
28
|
import { buildCallGraph } from "./callgraph.js";
|
|
29
29
|
import { searchSymbols } from "./search.js";
|
|
30
|
+
import { semanticSearch } from "./semantic.js";
|
|
30
31
|
import { parseRootsFromEnv } from "./roots.js";
|
|
31
32
|
const ROOT = parseRootsFromEnv().roots[0]; // CLI is local — no boundary, primary root only
|
|
32
33
|
// Persistent parse cache (disable with AST_MAP_NO_CACHE=1 or "cache": false in config).
|
|
@@ -1092,6 +1093,43 @@ program
|
|
|
1092
1093
|
}
|
|
1093
1094
|
console.log();
|
|
1094
1095
|
});
|
|
1096
|
+
// ─── Command: find (semantic search) ─────────────────────────────────────────
|
|
1097
|
+
program
|
|
1098
|
+
.command("find <query> [dir]")
|
|
1099
|
+
.description("Semantic symbol search — find symbols by meaning, not exact name")
|
|
1100
|
+
.option("-l, --limit <n>", "Max results (default 20)", "20")
|
|
1101
|
+
.option("-k, --kind <kind>", "Filter by kind: function, class, interface, type, method, const…")
|
|
1102
|
+
.option("-e, --exported", "Only show exported symbols")
|
|
1103
|
+
.option("--json", "Output as JSON")
|
|
1104
|
+
.action(async (query, dir, opts) => {
|
|
1105
|
+
const searchDir = dir ?? ".";
|
|
1106
|
+
const { abs, rel } = resolveArg(searchDir);
|
|
1107
|
+
if (!fs.statSync(abs).isDirectory())
|
|
1108
|
+
die(`"${rel}" is not a directory`);
|
|
1109
|
+
const limit = Math.max(1, parseInt(opts.limit ?? "20", 10) || 20);
|
|
1110
|
+
const matches = await semanticSearch(abs, query, ROOT, {
|
|
1111
|
+
limit,
|
|
1112
|
+
kind: opts.kind,
|
|
1113
|
+
exportedOnly: opts.exported,
|
|
1114
|
+
});
|
|
1115
|
+
if (opts.json)
|
|
1116
|
+
return jsonOut({ directory: rel, query, matchCount: matches.length, matches });
|
|
1117
|
+
header(`Semantic Search — ${bold(`"${query}"`)} in ${rel}/`);
|
|
1118
|
+
if (matches.length === 0) {
|
|
1119
|
+
console.log(indent(dim("No matches found.")));
|
|
1120
|
+
}
|
|
1121
|
+
else {
|
|
1122
|
+
table(matches.map(m => [
|
|
1123
|
+
m.score.toFixed(3),
|
|
1124
|
+
m.file,
|
|
1125
|
+
m.symbol,
|
|
1126
|
+
m.kind,
|
|
1127
|
+
m.matchedTerms.slice(0, 4).join(", "),
|
|
1128
|
+
]), [["Score", 6], ["File", 34], ["Symbol", 26], ["Kind", 10], ["Matched", 30]]);
|
|
1129
|
+
console.log(`\n ${matches.length} match(es)`);
|
|
1130
|
+
}
|
|
1131
|
+
console.log();
|
|
1132
|
+
});
|
|
1095
1133
|
// ─── Command: deps ────────────────────────────────────────────────────────────
|
|
1096
1134
|
program
|
|
1097
1135
|
.command("deps <file>")
|
package/dist/graph.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
|
-
import { resolveImportPath } from "./resolver.js";
|
|
2
|
+
import { resolveImportPath, resolveAliasedImport } from "./resolver.js";
|
|
3
3
|
import { resolveWorkspaceImportCached } from "./workspace.js";
|
|
4
4
|
import { buildCrossLangIndex, resolveCrossLangTarget, } from "./crosslang.js";
|
|
5
5
|
// ─── Internal helpers ─────────────────────────────────────────────────────────
|
|
@@ -38,7 +38,7 @@ function wirePathImport(skel, imp, fromFileAbs, root, exportedSymbolMap, edges)
|
|
|
38
38
|
// Relative import → path resolve; bare specifier → monorepo workspace package.
|
|
39
39
|
const resolvedAbs = imp.from.startsWith(".")
|
|
40
40
|
? resolveImportPath(imp.from, fromFileAbs)
|
|
41
|
-
: resolveWorkspaceImportCached(imp.from, root);
|
|
41
|
+
: resolveAliasedImport(imp.from, fromFileAbs) ?? resolveWorkspaceImportCached(imp.from, root);
|
|
42
42
|
if (!resolvedAbs)
|
|
43
43
|
return;
|
|
44
44
|
const resolvedRel = path.relative(root, resolvedAbs).split(path.sep).join("/");
|
package/dist/index.js
CHANGED
|
@@ -17,6 +17,7 @@ import { buildSymbolGraph } from "./graph.js";
|
|
|
17
17
|
import { findDeadExports, findCircularDeps, getChangeImpact, getFileDeps, getTopSymbols, findDuplicateSymbols } from "./graph-analysis.js";
|
|
18
18
|
import { buildCallGraph } from "./callgraph.js";
|
|
19
19
|
import { searchSymbols } from "./search.js";
|
|
20
|
+
import { semanticSearch } from "./semantic.js";
|
|
20
21
|
import { computeFileComplexity } from "./complexity.js";
|
|
21
22
|
import { findUnusedParams } from "./unused-params.js";
|
|
22
23
|
import { traceTypeInFile } from "./typeflow.js";
|
|
@@ -1146,6 +1147,50 @@ server.registerTool("search_symbol", {
|
|
|
1146
1147
|
return errorText(describeError(err));
|
|
1147
1148
|
}
|
|
1148
1149
|
});
|
|
1150
|
+
/* ─────────────────── tool: semantic_search ─────────────────────── */
|
|
1151
|
+
server.registerTool("semantic_search", {
|
|
1152
|
+
title: "Search symbols by meaning",
|
|
1153
|
+
description: "Find symbols by *meaning*, not exact name. Tokenizes identifiers (camelCase/snake_case), " +
|
|
1154
|
+
"expands programming synonyms (fetch≈get≈load, remove≈delete≈destroy, …), applies light " +
|
|
1155
|
+
"stemming and fuzzy matching, and ranks with BM25-style IDF weighting over symbol names, " +
|
|
1156
|
+
"doc comments, signatures and file paths.\n" +
|
|
1157
|
+
'Use when you know what code *does* but not what it\'s called: "remove expired sessions", ' +
|
|
1158
|
+
'"parse config file", "validate user input".',
|
|
1159
|
+
inputSchema: {
|
|
1160
|
+
path: z
|
|
1161
|
+
.string()
|
|
1162
|
+
.describe("Directory to search in, relative to project root or absolute within it."),
|
|
1163
|
+
query: z
|
|
1164
|
+
.string()
|
|
1165
|
+
.describe('What the code does, e.g. "delete old cache entries" or "load user settings".'),
|
|
1166
|
+
limit: z.number().int().min(1).max(100).optional().describe("Max results. Default 20."),
|
|
1167
|
+
kind: z
|
|
1168
|
+
.enum(["function", "class", "interface", "type", "method", "const", "var", "enum", "struct", "field"])
|
|
1169
|
+
.optional()
|
|
1170
|
+
.describe("Filter by symbol kind."),
|
|
1171
|
+
exportedOnly: z
|
|
1172
|
+
.boolean()
|
|
1173
|
+
.optional()
|
|
1174
|
+
.describe("Only return exported symbols. Default false."),
|
|
1175
|
+
},
|
|
1176
|
+
}, async ({ path: input, query, limit, kind, exportedOnly }) => {
|
|
1177
|
+
try {
|
|
1178
|
+
const { abs, rel, root } = resolveInRoot(input);
|
|
1179
|
+
if (!fs.statSync(abs).isDirectory()) {
|
|
1180
|
+
return errorText(`"${input}" is not a directory. semantic_search requires a directory.`);
|
|
1181
|
+
}
|
|
1182
|
+
const matches = await semanticSearch(abs, query, root, { limit, kind, exportedOnly });
|
|
1183
|
+
return jsonText({
|
|
1184
|
+
directory: rel.split(path.sep).join("/"),
|
|
1185
|
+
query,
|
|
1186
|
+
matchCount: matches.length,
|
|
1187
|
+
matches,
|
|
1188
|
+
});
|
|
1189
|
+
}
|
|
1190
|
+
catch (err) {
|
|
1191
|
+
return errorText(describeError(err));
|
|
1192
|
+
}
|
|
1193
|
+
});
|
|
1149
1194
|
/* ─────────────────── tool: get_file_deps ───────────────────────────────── */
|
|
1150
1195
|
server.registerTool("get_file_deps", {
|
|
1151
1196
|
title: "Get file-level import dependencies",
|
package/dist/resolver.js
CHANGED
|
@@ -5,6 +5,7 @@ import { resolveOptions } from "./config.js";
|
|
|
5
5
|
import { findSymbol } from "./analysis.js";
|
|
6
6
|
import { buildCrossLangIndex, resolveCrossLangTarget, } from "./crosslang.js";
|
|
7
7
|
import { resolveWorkspaceImportCached } from "./workspace.js";
|
|
8
|
+
import { aliasCandidates } from "./tsconfig.js";
|
|
8
9
|
const SRC_EXTS = [".ts", ".tsx", ".js", ".jsx", ".mts", ".cts", ".mjs", ".cjs", ".vue", ".svelte"];
|
|
9
10
|
function extractParams(sig) {
|
|
10
11
|
const start = sig.indexOf("(");
|
|
@@ -46,6 +47,10 @@ export function resolveImportPath(importFrom, fromAbs) {
|
|
|
46
47
|
return p;
|
|
47
48
|
}
|
|
48
49
|
}
|
|
50
|
+
return probeCandidate(candidate);
|
|
51
|
+
}
|
|
52
|
+
/** Probe a path base: exact file → +extensions → /index.<ext>. */
|
|
53
|
+
function probeCandidate(candidate) {
|
|
49
54
|
try {
|
|
50
55
|
const stat = fs.statSync(candidate);
|
|
51
56
|
if (stat.isFile())
|
|
@@ -64,6 +69,28 @@ export function resolveImportPath(importFrom, fromAbs) {
|
|
|
64
69
|
}
|
|
65
70
|
return null;
|
|
66
71
|
}
|
|
72
|
+
/**
|
|
73
|
+
* Resolve a tsconfig/jsconfig path-aliased bare import (e.g. `@/components/X`
|
|
74
|
+
* with `"@/*": ["./src/*"]`) to an absolute file path, using the nearest
|
|
75
|
+
* config above the importing file. Returns null when not an alias.
|
|
76
|
+
*/
|
|
77
|
+
export function resolveAliasedImport(importFrom, fromAbs) {
|
|
78
|
+
for (const base of aliasCandidates(importFrom, fromAbs)) {
|
|
79
|
+
const declaredExt = path.extname(base).toLowerCase();
|
|
80
|
+
if (declaredExt && JS_TO_TS[declaredExt]) {
|
|
81
|
+
const stem = base.slice(0, base.length - declaredExt.length);
|
|
82
|
+
for (const ext of JS_TO_TS[declaredExt]) {
|
|
83
|
+
const p = stem + ext;
|
|
84
|
+
if (fs.existsSync(p))
|
|
85
|
+
return p;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const hit = probeCandidate(base);
|
|
89
|
+
if (hit)
|
|
90
|
+
return hit;
|
|
91
|
+
}
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
67
94
|
/* ─── Cross-language index cache ──────────────────────────────────────────── */
|
|
68
95
|
// Java/C# need a project-wide index to resolve fully-qualified imports.
|
|
69
96
|
// Built lazily on first cross-language resolve, then reused for the process
|
|
@@ -124,6 +151,8 @@ async function enrichRelativeImport(imp, fromAbs, root) {
|
|
|
124
151
|
const isBare = !imp.from.startsWith(".");
|
|
125
152
|
// Relative import → path resolve; bare specifier → try monorepo workspace.
|
|
126
153
|
let resolvedAbs = isBare ? null : resolveImportPath(imp.from, fromAbs);
|
|
154
|
+
if (!resolvedAbs && isBare)
|
|
155
|
+
resolvedAbs = resolveAliasedImport(imp.from, fromAbs);
|
|
127
156
|
if (!resolvedAbs && isBare)
|
|
128
157
|
resolvedAbs = resolveWorkspaceImportCached(imp.from, root);
|
|
129
158
|
const treatedExternal = isBare && !resolvedAbs;
|
package/dist/semantic.js
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic symbol search — find symbols by *meaning*, not exact name.
|
|
3
|
+
*
|
|
4
|
+
* No embeddings, no network, no model downloads. Pure lexical semantics:
|
|
5
|
+
* 1. Identifier tokenization — camelCase / PascalCase / snake_case /
|
|
6
|
+
* kebab-case / digits / acronym boundaries ("HTTPServer" → http, server).
|
|
7
|
+
* 2. Concept expansion — a built-in thesaurus of programming
|
|
8
|
+
* synonym groups (fetch≈get≈load≈retrieve, remove≈delete≈destroy, …).
|
|
9
|
+
* 3. Light stemming — plural/gerund/past suffixes folded so
|
|
10
|
+
* "parsing" matches "parse", "users" matches "user".
|
|
11
|
+
* 4. BM25-style ranking — rare tokens weigh more (IDF over the
|
|
12
|
+
* scanned corpus); name hits outweigh doc/signature/path hits;
|
|
13
|
+
* direct hits outweigh synonym hits outweigh fuzzy hits.
|
|
14
|
+
*/
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import { buildSkeleton, collectSourceFiles } from "./skeleton.js";
|
|
17
|
+
import { resolveOptions, loadProjectConfig } from "./config.js";
|
|
18
|
+
// ─── Synonym groups (programming thesaurus) ────────────────────────────────────
|
|
19
|
+
// Tokens in the same group are considered semantically equivalent (at a small
|
|
20
|
+
// penalty vs. a direct match). Keep each group tight — over-broad groups cause
|
|
21
|
+
// noisy results.
|
|
22
|
+
const SYNONYM_GROUPS = [
|
|
23
|
+
["get", "fetch", "load", "retrieve", "read", "lookup", "resolve"],
|
|
24
|
+
["set", "update", "write", "assign", "put", "patch", "modify", "change", "edit"],
|
|
25
|
+
["create", "make", "build", "new", "generate", "construct", "init", "initialize", "spawn"],
|
|
26
|
+
["delete", "remove", "destroy", "drop", "clear", "purge", "erase"],
|
|
27
|
+
["find", "search", "query", "locate", "match", "scan", "discover"],
|
|
28
|
+
["send", "dispatch", "emit", "publish", "post", "broadcast", "notify"],
|
|
29
|
+
["receive", "consume", "subscribe", "listen", "handle", "process"],
|
|
30
|
+
["start", "begin", "launch", "run", "execute", "invoke", "trigger"],
|
|
31
|
+
["stop", "end", "halt", "kill", "terminate", "cancel", "abort", "shutdown", "close"],
|
|
32
|
+
["check", "validate", "verify", "test", "assert", "ensure", "confirm"],
|
|
33
|
+
["parse", "decode", "deserialize", "unmarshal", "extract", "tokenize"],
|
|
34
|
+
["format", "encode", "serialize", "marshal", "stringify", "render", "print"],
|
|
35
|
+
["convert", "transform", "map", "translate", "cast", "normalize"],
|
|
36
|
+
["user", "account", "member", "person", "profile", "customer"],
|
|
37
|
+
["auth", "authenticate", "login", "signin", "authorize", "session", "credential"],
|
|
38
|
+
["config", "configuration", "settings", "options", "preferences", "setup"],
|
|
39
|
+
["error", "exception", "fault", "failure", "err", "panic"],
|
|
40
|
+
["log", "logger", "logging", "trace", "audit"],
|
|
41
|
+
["cache", "memo", "memoize", "store", "buffer"],
|
|
42
|
+
["list", "enumerate", "all", "collection", "array", "items"],
|
|
43
|
+
["count", "total", "sum", "aggregate", "tally"],
|
|
44
|
+
["file", "document", "path", "filename"],
|
|
45
|
+
["dir", "directory", "folder"],
|
|
46
|
+
["request", "req", "call", "http"],
|
|
47
|
+
["response", "res", "reply", "result", "output"],
|
|
48
|
+
["message", "msg", "event", "signal"],
|
|
49
|
+
["connect", "connection", "link", "attach", "bind", "join"],
|
|
50
|
+
["disconnect", "detach", "unbind", "release", "unsubscribe"],
|
|
51
|
+
["save", "persist", "commit", "flush", "sync"],
|
|
52
|
+
["copy", "clone", "duplicate", "snapshot"],
|
|
53
|
+
["merge", "combine", "concat", "union", "join"],
|
|
54
|
+
["split", "divide", "partition", "chunk", "segment"],
|
|
55
|
+
["sort", "order", "rank", "arrange"],
|
|
56
|
+
["filter", "select", "exclude", "where"],
|
|
57
|
+
["compare", "diff", "equal", "equals", "cmp"],
|
|
58
|
+
["compute", "calculate", "calc", "derive", "evaluate", "measure"],
|
|
59
|
+
["watch", "observe", "monitor", "track", "poll"],
|
|
60
|
+
["wait", "sleep", "delay", "debounce", "throttle", "defer"],
|
|
61
|
+
["retry", "attempt", "backoff"],
|
|
62
|
+
["lock", "mutex", "semaphore", "guard"],
|
|
63
|
+
["queue", "stack", "heap", "pool", "buffer"],
|
|
64
|
+
["graph", "tree", "node", "edge", "vertex"],
|
|
65
|
+
["dependency", "dep", "import", "require"],
|
|
66
|
+
["token", "symbol", "identifier", "ident", "name"],
|
|
67
|
+
["database", "db", "storage", "repository", "repo", "dao"],
|
|
68
|
+
["key", "id", "identifier", "uuid", "guid"],
|
|
69
|
+
["string", "str", "text", "char"],
|
|
70
|
+
["number", "num", "int", "integer", "float", "numeric"],
|
|
71
|
+
["boolean", "bool", "flag", "toggle"],
|
|
72
|
+
["helper", "util", "utility", "utils", "tool", "common"],
|
|
73
|
+
["test", "spec", "mock", "stub", "fixture"],
|
|
74
|
+
["render", "draw", "paint", "display", "show", "view"],
|
|
75
|
+
["hide", "conceal", "mask", "suppress"],
|
|
76
|
+
["enable", "activate", "on"],
|
|
77
|
+
["disable", "deactivate", "off"],
|
|
78
|
+
["add", "insert", "append", "push", "register"],
|
|
79
|
+
["pop", "shift", "dequeue", "take"],
|
|
80
|
+
["circular", "cycle", "cyclic", "loop", "recursive"],
|
|
81
|
+
["dead", "unused", "orphan", "unreachable", "stale"],
|
|
82
|
+
["complexity", "complex", "cyclomatic", "cognitive"],
|
|
83
|
+
["coupling", "cohesion", "instability", "afferent", "efferent"],
|
|
84
|
+
];
|
|
85
|
+
const GROUP_OF = new Map();
|
|
86
|
+
SYNONYM_GROUPS.forEach((group, gi) => {
|
|
87
|
+
for (const word of group) {
|
|
88
|
+
// Register both raw and stemmed forms so stemmed corpus/query tokens
|
|
89
|
+
// ("setting", "item") still hit groups declared as "settings", "items".
|
|
90
|
+
for (const form of new Set([word, stem(word)])) {
|
|
91
|
+
const list = GROUP_OF.get(form);
|
|
92
|
+
if (list) {
|
|
93
|
+
if (!list.includes(gi))
|
|
94
|
+
list.push(gi);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
GROUP_OF.set(form, [gi]);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
// ─── Tokenization ──────────────────────────────────────────────────────────────
|
|
103
|
+
/** Light stemmer: fold common English suffixes so "parsing"→"parse", "users"→"user". */
|
|
104
|
+
export function stem(word) {
|
|
105
|
+
let w = word;
|
|
106
|
+
if (w.length > 4 && w.endsWith("ies"))
|
|
107
|
+
return w.slice(0, -3) + "y";
|
|
108
|
+
if (w.length > 4 && w.endsWith("ing")) {
|
|
109
|
+
w = w.slice(0, -3);
|
|
110
|
+
// "mapping" → "mapp" → "map"; "parsing" → "pars" → add back "e"? keep both simple:
|
|
111
|
+
if (w.length > 2 && w[w.length - 1] === w[w.length - 2])
|
|
112
|
+
w = w.slice(0, -1);
|
|
113
|
+
return w;
|
|
114
|
+
}
|
|
115
|
+
if (w.length > 4 && w.endsWith("ed")) {
|
|
116
|
+
w = w.slice(0, -2);
|
|
117
|
+
if (w.length > 2 && w[w.length - 1] === w[w.length - 2])
|
|
118
|
+
w = w.slice(0, -1);
|
|
119
|
+
return w;
|
|
120
|
+
}
|
|
121
|
+
if (w.length > 3 && w.endsWith("es"))
|
|
122
|
+
return w.slice(0, -2);
|
|
123
|
+
if (w.length > 3 && w.endsWith("s") && !w.endsWith("ss"))
|
|
124
|
+
return w.slice(0, -1);
|
|
125
|
+
return w;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Split an identifier into lowercase word tokens.
|
|
129
|
+
* Handles camelCase, PascalCase, snake_case, kebab-case, dots, digits and
|
|
130
|
+
* acronym boundaries: "getHTTPServerByID" → [get, http, server, by, id].
|
|
131
|
+
*/
|
|
132
|
+
export function splitIdentifier(identifier) {
|
|
133
|
+
const out = [];
|
|
134
|
+
for (const chunk of identifier.split(/[^A-Za-z0-9]+/)) {
|
|
135
|
+
if (!chunk)
|
|
136
|
+
continue;
|
|
137
|
+
// Insert boundaries: aA | AAa (acronym→word) | letter↔digit
|
|
138
|
+
const spaced = chunk
|
|
139
|
+
.replace(/([a-z0-9])([A-Z])/g, "$1 $2")
|
|
140
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, "$1 $2")
|
|
141
|
+
.replace(/([A-Za-z])([0-9])/g, "$1 $2")
|
|
142
|
+
.replace(/([0-9])([A-Za-z])/g, "$1 $2");
|
|
143
|
+
for (const word of spaced.split(" ")) {
|
|
144
|
+
if (word)
|
|
145
|
+
out.push(word.toLowerCase());
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return out;
|
|
149
|
+
}
|
|
150
|
+
/** Levenshtein distance with early exit when > max. */
|
|
151
|
+
function editDistance(a, b, max) {
|
|
152
|
+
if (Math.abs(a.length - b.length) > max)
|
|
153
|
+
return max + 1;
|
|
154
|
+
const prev = new Array(b.length + 1);
|
|
155
|
+
const curr = new Array(b.length + 1);
|
|
156
|
+
for (let j = 0; j <= b.length; j++)
|
|
157
|
+
prev[j] = j;
|
|
158
|
+
for (let i = 1; i <= a.length; i++) {
|
|
159
|
+
curr[0] = i;
|
|
160
|
+
let rowMin = curr[0];
|
|
161
|
+
for (let j = 1; j <= b.length; j++) {
|
|
162
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
163
|
+
curr[j] = Math.min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost);
|
|
164
|
+
if (curr[j] < rowMin)
|
|
165
|
+
rowMin = curr[j];
|
|
166
|
+
}
|
|
167
|
+
if (rowMin > max)
|
|
168
|
+
return max + 1;
|
|
169
|
+
for (let j = 0; j <= b.length; j++)
|
|
170
|
+
prev[j] = curr[j];
|
|
171
|
+
}
|
|
172
|
+
return prev[b.length];
|
|
173
|
+
}
|
|
174
|
+
function sharesGroup(a, b) {
|
|
175
|
+
const ga = GROUP_OF.get(a);
|
|
176
|
+
if (!ga)
|
|
177
|
+
return false;
|
|
178
|
+
const gb = GROUP_OF.get(b);
|
|
179
|
+
if (!gb)
|
|
180
|
+
return false;
|
|
181
|
+
return ga.some((g) => gb.includes(g));
|
|
182
|
+
}
|
|
183
|
+
const FIELD_WEIGHT = { name: 3, doc: 2, signature: 1.5, path: 1, kind: 1 };
|
|
184
|
+
function addToken(doc, raw, weight) {
|
|
185
|
+
const t = stem(raw);
|
|
186
|
+
if (t.length < 2)
|
|
187
|
+
return;
|
|
188
|
+
const existing = doc.tokens.get(t);
|
|
189
|
+
if (existing === undefined || weight > existing)
|
|
190
|
+
doc.tokens.set(t, weight);
|
|
191
|
+
}
|
|
192
|
+
function* flattenDocs(symbols, file, parentName) {
|
|
193
|
+
for (const sym of symbols) {
|
|
194
|
+
const fullName = parentName ? `${parentName}.${sym.name}` : sym.name;
|
|
195
|
+
yield { sym, fullName };
|
|
196
|
+
if (sym.children.length > 0)
|
|
197
|
+
yield* flattenDocs(sym.children, file, fullName);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
function buildDoc(sym, fullName, file) {
|
|
201
|
+
const doc = {
|
|
202
|
+
match: {
|
|
203
|
+
file,
|
|
204
|
+
symbol: fullName,
|
|
205
|
+
kind: sym.kind,
|
|
206
|
+
exported: sym.exported ?? false,
|
|
207
|
+
range: sym.range,
|
|
208
|
+
...(sym.signature ? { signature: sym.signature } : {}),
|
|
209
|
+
},
|
|
210
|
+
tokens: new Map(),
|
|
211
|
+
nameTokens: new Set(),
|
|
212
|
+
};
|
|
213
|
+
for (const t of splitIdentifier(fullName)) {
|
|
214
|
+
addToken(doc, t, FIELD_WEIGHT.name);
|
|
215
|
+
doc.nameTokens.add(stem(t));
|
|
216
|
+
}
|
|
217
|
+
addToken(doc, sym.kind, FIELD_WEIGHT.kind);
|
|
218
|
+
if (sym.doc) {
|
|
219
|
+
for (const t of splitIdentifier(sym.doc))
|
|
220
|
+
addToken(doc, t, FIELD_WEIGHT.doc);
|
|
221
|
+
}
|
|
222
|
+
if (sym.signature) {
|
|
223
|
+
for (const t of splitIdentifier(sym.signature))
|
|
224
|
+
addToken(doc, t, FIELD_WEIGHT.signature);
|
|
225
|
+
}
|
|
226
|
+
for (const seg of file.split("/")) {
|
|
227
|
+
for (const t of splitIdentifier(seg))
|
|
228
|
+
addToken(doc, t, FIELD_WEIGHT.path);
|
|
229
|
+
}
|
|
230
|
+
return doc;
|
|
231
|
+
}
|
|
232
|
+
// ─── Scoring ───────────────────────────────────────────────────────────────────
|
|
233
|
+
const MATCH_WEIGHT = { direct: 1, synonym: 0.7, fuzzy: 0.45 };
|
|
234
|
+
// English/query stopwords — ignored as query concepts.
|
|
235
|
+
const STOPWORDS = new Set([
|
|
236
|
+
"a", "an", "the", "of", "in", "on", "for", "to", "with", "that", "this",
|
|
237
|
+
"is", "are", "be", "and", "or", "by", "from", "at", "it", "its", "as",
|
|
238
|
+
"do", "does", "how", "what", "which", "where", "when", "i", "we", "you",
|
|
239
|
+
"function", "method", "code", "thing", "stuff", "something",
|
|
240
|
+
]);
|
|
241
|
+
/**
|
|
242
|
+
* Search for symbols by meaning across all source files in a directory.
|
|
243
|
+
*
|
|
244
|
+
* @param dirAbs Absolute path of directory to scan.
|
|
245
|
+
* @param query Natural-language-ish query, e.g. "remove expired sessions".
|
|
246
|
+
* @param root Project root (for relative paths in results).
|
|
247
|
+
* @param options limit, kind filter, exportedOnly.
|
|
248
|
+
*/
|
|
249
|
+
export async function semanticSearch(dirAbs, query, root, options = {}) {
|
|
250
|
+
const { limit = 20, kind, exportedOnly = false } = options;
|
|
251
|
+
// Query concepts: tokenized, stopword-filtered, stemmed (dedup, keep order).
|
|
252
|
+
const concepts = [];
|
|
253
|
+
for (const raw of splitIdentifier(query)) {
|
|
254
|
+
if (STOPWORDS.has(raw))
|
|
255
|
+
continue;
|
|
256
|
+
const t = stem(raw);
|
|
257
|
+
if (t.length >= 2 && !concepts.includes(t))
|
|
258
|
+
concepts.push(t);
|
|
259
|
+
}
|
|
260
|
+
if (concepts.length === 0)
|
|
261
|
+
return [];
|
|
262
|
+
// Build corpus (detail "full" so doc comments and signatures are available).
|
|
263
|
+
const opts = resolveOptions({ detail: "full", emitHtml: false }, loadProjectConfig(root));
|
|
264
|
+
const files = collectSourceFiles(dirAbs, opts);
|
|
265
|
+
const docs = [];
|
|
266
|
+
for (const file of files) {
|
|
267
|
+
const fileRel = path.relative(root, file).split(path.sep).join("/");
|
|
268
|
+
try {
|
|
269
|
+
const skel = await buildSkeleton(file, fileRel, opts);
|
|
270
|
+
for (const { sym, fullName } of flattenDocs(skel.symbols, skel.file)) {
|
|
271
|
+
if (kind && sym.kind !== kind)
|
|
272
|
+
continue;
|
|
273
|
+
if (exportedOnly && !(sym.exported ?? false))
|
|
274
|
+
continue;
|
|
275
|
+
docs.push(buildDoc(sym, fullName, skel.file));
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
catch {
|
|
279
|
+
// skip unreadable / unparseable files
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
if (docs.length === 0)
|
|
283
|
+
return [];
|
|
284
|
+
// Document frequency per concept (direct-token presence) → BM25-ish IDF.
|
|
285
|
+
const N = docs.length;
|
|
286
|
+
const idf = new Map();
|
|
287
|
+
for (const concept of concepts) {
|
|
288
|
+
let df = 0;
|
|
289
|
+
for (const doc of docs)
|
|
290
|
+
if (doc.tokens.has(concept))
|
|
291
|
+
df++;
|
|
292
|
+
idf.set(concept, Math.log(1 + (N - df + 0.5) / (df + 0.5)));
|
|
293
|
+
}
|
|
294
|
+
const scored = [];
|
|
295
|
+
for (const doc of docs) {
|
|
296
|
+
let score = 0;
|
|
297
|
+
const matchedTerms = [];
|
|
298
|
+
let nameHits = 0;
|
|
299
|
+
for (const concept of concepts) {
|
|
300
|
+
let best = 0;
|
|
301
|
+
let how = null;
|
|
302
|
+
for (const [token, fieldWeight] of doc.tokens) {
|
|
303
|
+
let mw = 0;
|
|
304
|
+
let label = null;
|
|
305
|
+
if (token === concept) {
|
|
306
|
+
mw = MATCH_WEIGHT.direct;
|
|
307
|
+
label = concept;
|
|
308
|
+
}
|
|
309
|
+
else if (sharesGroup(token, concept)) {
|
|
310
|
+
mw = MATCH_WEIGHT.synonym;
|
|
311
|
+
label = `${concept}≈${token}`;
|
|
312
|
+
}
|
|
313
|
+
else if (concept.length >= 4 &&
|
|
314
|
+
token.length >= 4 &&
|
|
315
|
+
editDistance(token, concept, 1) <= 1) {
|
|
316
|
+
mw = MATCH_WEIGHT.fuzzy;
|
|
317
|
+
label = `${concept}~${token}`;
|
|
318
|
+
}
|
|
319
|
+
const contribution = mw * fieldWeight;
|
|
320
|
+
if (contribution > best) {
|
|
321
|
+
best = contribution;
|
|
322
|
+
how = label;
|
|
323
|
+
if (fieldWeight >= FIELD_WEIGHT.name && mw === MATCH_WEIGHT.direct)
|
|
324
|
+
break; // can't beat this
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
if (best > 0 && how) {
|
|
328
|
+
score += best * (idf.get(concept) ?? 1);
|
|
329
|
+
matchedTerms.push(how);
|
|
330
|
+
if (doc.nameTokens.has(concept))
|
|
331
|
+
nameHits++;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
if (matchedTerms.length === 0)
|
|
335
|
+
continue;
|
|
336
|
+
// Bonuses: all concepts matched; full query substring of name; coverage ratio.
|
|
337
|
+
const coverage = matchedTerms.length / concepts.length;
|
|
338
|
+
score *= 0.5 + 0.5 * coverage;
|
|
339
|
+
if (nameHits === concepts.length)
|
|
340
|
+
score *= 1.25;
|
|
341
|
+
const flatQuery = concepts.join("");
|
|
342
|
+
if (doc.match.symbol.toLowerCase().includes(flatQuery))
|
|
343
|
+
score *= 1.2;
|
|
344
|
+
// Length normalization: prefer focused names — "login" beats "handleLogin"
|
|
345
|
+
// when both match the same concepts. Penalize name tokens no concept explains.
|
|
346
|
+
let unmatchedNameTokens = 0;
|
|
347
|
+
for (const t of doc.nameTokens) {
|
|
348
|
+
const explained = concepts.some((c) => t === c ||
|
|
349
|
+
sharesGroup(t, c) ||
|
|
350
|
+
(c.length >= 4 && t.length >= 4 && editDistance(t, c, 1) <= 1));
|
|
351
|
+
if (!explained)
|
|
352
|
+
unmatchedNameTokens++;
|
|
353
|
+
}
|
|
354
|
+
score /= 1 + 0.15 * unmatchedNameTokens;
|
|
355
|
+
scored.push({ ...doc.match, score, matchedTerms });
|
|
356
|
+
}
|
|
357
|
+
scored.sort((a, b) => b.score - a.score || a.symbol.localeCompare(b.symbol));
|
|
358
|
+
const top = scored.slice(0, limit);
|
|
359
|
+
// Normalize scores to 0–1 within the result set.
|
|
360
|
+
const max = top.length > 0 ? top[0].score : 1;
|
|
361
|
+
if (max > 0)
|
|
362
|
+
for (const m of top)
|
|
363
|
+
m.score = Math.round((m.score / max) * 1000) / 1000;
|
|
364
|
+
return top;
|
|
365
|
+
}
|
package/dist/tsconfig.js
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
const CONFIG_NAMES = ["tsconfig.json", "jsconfig.json"];
|
|
4
|
+
/**
|
|
5
|
+
* Tolerant JSONC parse. String-aware: comments and trailing commas are removed
|
|
6
|
+
* with a character walk, never with regex — naive stripping corrupts configs
|
|
7
|
+
* whose strings contain comment-like text (e.g. Next.js `"include": ["**\/*.ts"]`
|
|
8
|
+
* pairs the `/*` inside `"@/*"` with the `*\/` inside the glob).
|
|
9
|
+
*/
|
|
10
|
+
function parseJsonc(raw) {
|
|
11
|
+
let out = "";
|
|
12
|
+
let i = 0;
|
|
13
|
+
let inStr = false;
|
|
14
|
+
while (i < raw.length) {
|
|
15
|
+
const c = raw[i];
|
|
16
|
+
if (inStr) {
|
|
17
|
+
out += c;
|
|
18
|
+
if (c === "\\") {
|
|
19
|
+
out += raw[i + 1] ?? "";
|
|
20
|
+
i += 2;
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
if (c === '"')
|
|
24
|
+
inStr = false;
|
|
25
|
+
i++;
|
|
26
|
+
}
|
|
27
|
+
else if (c === '"') {
|
|
28
|
+
inStr = true;
|
|
29
|
+
out += c;
|
|
30
|
+
i++;
|
|
31
|
+
}
|
|
32
|
+
else if (c === "/" && raw[i + 1] === "/") {
|
|
33
|
+
while (i < raw.length && raw[i] !== "\n")
|
|
34
|
+
i++;
|
|
35
|
+
}
|
|
36
|
+
else if (c === "/" && raw[i + 1] === "*") {
|
|
37
|
+
i += 2;
|
|
38
|
+
while (i < raw.length && !(raw[i] === "*" && raw[i + 1] === "/"))
|
|
39
|
+
i++;
|
|
40
|
+
i += 2;
|
|
41
|
+
}
|
|
42
|
+
else if (c === ",") {
|
|
43
|
+
// trailing comma: skip when the next non-whitespace char closes a scope
|
|
44
|
+
let j = i + 1;
|
|
45
|
+
while (j < raw.length && /\s/.test(raw[j]))
|
|
46
|
+
j++;
|
|
47
|
+
if (raw[j] === "}" || raw[j] === "]")
|
|
48
|
+
i++; // drop the comma
|
|
49
|
+
else {
|
|
50
|
+
out += c;
|
|
51
|
+
i++;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
out += c;
|
|
56
|
+
i++;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
try {
|
|
60
|
+
return JSON.parse(out);
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
/** Read a config file, following relative `extends` (child overrides parent). */
|
|
67
|
+
function readConfigChain(configPath, depth = 0) {
|
|
68
|
+
if (depth > 5)
|
|
69
|
+
return null;
|
|
70
|
+
let raw;
|
|
71
|
+
try {
|
|
72
|
+
raw = fs.readFileSync(configPath, "utf8");
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
const json = parseJsonc(raw);
|
|
78
|
+
if (!json || typeof json !== "object")
|
|
79
|
+
return null;
|
|
80
|
+
const dir = path.dirname(configPath);
|
|
81
|
+
let baseUrl;
|
|
82
|
+
let paths;
|
|
83
|
+
let baseDir = dir;
|
|
84
|
+
const ext = json.extends;
|
|
85
|
+
if (typeof ext === "string" && ext.startsWith(".")) {
|
|
86
|
+
let parentPath = path.resolve(dir, ext);
|
|
87
|
+
if (!parentPath.endsWith(".json"))
|
|
88
|
+
parentPath += ".json";
|
|
89
|
+
const parent = readConfigChain(parentPath, depth + 1);
|
|
90
|
+
if (parent) {
|
|
91
|
+
baseUrl = parent.baseUrl;
|
|
92
|
+
paths = parent.paths;
|
|
93
|
+
baseDir = parent.dir; // paths in a parent resolve against the parent's dir
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const co = json.compilerOptions;
|
|
97
|
+
if (co && typeof co === "object") {
|
|
98
|
+
if (typeof co.baseUrl === "string") {
|
|
99
|
+
baseUrl = co.baseUrl;
|
|
100
|
+
baseDir = dir;
|
|
101
|
+
}
|
|
102
|
+
if (co.paths && typeof co.paths === "object") {
|
|
103
|
+
paths = co.paths;
|
|
104
|
+
baseDir = dir;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return { baseUrl, paths, dir: baseDir };
|
|
108
|
+
}
|
|
109
|
+
function buildAliasConfig(configPath) {
|
|
110
|
+
const merged = readConfigChain(configPath);
|
|
111
|
+
if (!merged || !merged.paths)
|
|
112
|
+
return null;
|
|
113
|
+
const base = path.resolve(merged.dir, merged.baseUrl ?? ".");
|
|
114
|
+
const patterns = [];
|
|
115
|
+
for (const [key, targets] of Object.entries(merged.paths)) {
|
|
116
|
+
if (!Array.isArray(targets) || targets.length === 0)
|
|
117
|
+
continue;
|
|
118
|
+
const star = key.indexOf("*");
|
|
119
|
+
const abs = targets
|
|
120
|
+
.filter((t) => typeof t === "string")
|
|
121
|
+
.map((t) => path.resolve(base, t));
|
|
122
|
+
if (abs.length === 0)
|
|
123
|
+
continue;
|
|
124
|
+
if (star === -1) {
|
|
125
|
+
patterns.push({ prefix: key, suffix: "", exact: true, targets: abs });
|
|
126
|
+
}
|
|
127
|
+
else {
|
|
128
|
+
patterns.push({ prefix: key.slice(0, star), suffix: key.slice(star + 1), exact: false, targets: abs });
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
// Longest prefix wins (TypeScript's matching rule).
|
|
132
|
+
patterns.sort((a, b) => b.prefix.length - a.prefix.length);
|
|
133
|
+
return patterns.length > 0 ? { patterns } : null;
|
|
134
|
+
}
|
|
135
|
+
// dir → config path (or null when none found up the tree)
|
|
136
|
+
const configPathCache = new Map();
|
|
137
|
+
// config path → parsed alias config (or null when it has no paths)
|
|
138
|
+
const aliasCache = new Map();
|
|
139
|
+
function findNearestConfig(fromDir) {
|
|
140
|
+
const cached = configPathCache.get(fromDir);
|
|
141
|
+
if (cached !== undefined)
|
|
142
|
+
return cached;
|
|
143
|
+
let dir = fromDir;
|
|
144
|
+
let result = null;
|
|
145
|
+
const visited = [];
|
|
146
|
+
for (;;) {
|
|
147
|
+
const hit = configPathCache.get(dir);
|
|
148
|
+
if (hit !== undefined) {
|
|
149
|
+
result = hit;
|
|
150
|
+
break;
|
|
151
|
+
}
|
|
152
|
+
visited.push(dir);
|
|
153
|
+
let found = null;
|
|
154
|
+
for (const name of CONFIG_NAMES) {
|
|
155
|
+
const p = path.join(dir, name);
|
|
156
|
+
if (fs.existsSync(p)) {
|
|
157
|
+
found = p;
|
|
158
|
+
break;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
if (found) {
|
|
162
|
+
result = found;
|
|
163
|
+
break;
|
|
164
|
+
}
|
|
165
|
+
const parent = path.dirname(dir);
|
|
166
|
+
if (parent === dir || dir.includes("node_modules")) {
|
|
167
|
+
result = null;
|
|
168
|
+
break;
|
|
169
|
+
}
|
|
170
|
+
dir = parent;
|
|
171
|
+
}
|
|
172
|
+
for (const d of visited)
|
|
173
|
+
configPathCache.set(d, result);
|
|
174
|
+
return result;
|
|
175
|
+
}
|
|
176
|
+
/** Test-only: clear the per-process caches. */
|
|
177
|
+
export function clearAliasCaches() {
|
|
178
|
+
configPathCache.clear();
|
|
179
|
+
aliasCache.clear();
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Map an aliased bare import to absolute candidate base paths (no extension
|
|
183
|
+
* probing). Empty array = not an alias / no config / no pattern match.
|
|
184
|
+
*/
|
|
185
|
+
export function aliasCandidates(importFrom, fromAbs) {
|
|
186
|
+
if (importFrom.startsWith(".") || path.isAbsolute(importFrom))
|
|
187
|
+
return [];
|
|
188
|
+
const configPath = findNearestConfig(path.dirname(fromAbs));
|
|
189
|
+
if (!configPath)
|
|
190
|
+
return [];
|
|
191
|
+
let cfg = aliasCache.get(configPath);
|
|
192
|
+
if (cfg === undefined) {
|
|
193
|
+
cfg = buildAliasConfig(configPath);
|
|
194
|
+
aliasCache.set(configPath, cfg);
|
|
195
|
+
}
|
|
196
|
+
if (!cfg)
|
|
197
|
+
return [];
|
|
198
|
+
for (const p of cfg.patterns) {
|
|
199
|
+
if (p.exact) {
|
|
200
|
+
if (importFrom === p.prefix)
|
|
201
|
+
return p.targets;
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
if (importFrom.length >= p.prefix.length + p.suffix.length &&
|
|
205
|
+
importFrom.startsWith(p.prefix) &&
|
|
206
|
+
importFrom.endsWith(p.suffix)) {
|
|
207
|
+
const star = importFrom.slice(p.prefix.length, importFrom.length - p.suffix.length);
|
|
208
|
+
return p.targets.map((t) => t.replace("*", star));
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return [];
|
|
212
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "universal-ast-mapper",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.25.0",
|
|
4
4
|
"description": "MCP server that maps source files into a normalized code skeleton (JSON + HTML) using tree-sitter.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
"build": "tsc",
|
|
20
20
|
"start": "node dist/index.js",
|
|
21
21
|
"smoke": "node test/smoke.mjs",
|
|
22
|
-
"test": "node test/smoke.mjs && node test/analysis.mjs && node test/cache-smoke.mjs && node test/check-smoke.mjs && node test/roots-smoke.mjs",
|
|
22
|
+
"test": "node test/smoke.mjs && node test/analysis.mjs && node test/cache-smoke.mjs && node test/check-smoke.mjs && node test/roots-smoke.mjs && node test/tsalias-smoke.mjs",
|
|
23
23
|
"postinstall": "node scripts/install-skill.mjs"
|
|
24
24
|
},
|
|
25
25
|
"engines": {
|