seer-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.vscode/settings.json +3 -0
- package/LICENSE +176 -0
- package/README.md +272 -0
- package/README_dev.md +199 -0
- package/dist/bundle/ci.d.ts +47 -0
- package/dist/bundle/ci.d.ts.map +1 -0
- package/dist/bundle/ci.js +113 -0
- package/dist/bundle/ci.js.map +1 -0
- package/dist/bundle/contract.d.ts +111 -0
- package/dist/bundle/contract.d.ts.map +1 -0
- package/dist/bundle/contract.js +352 -0
- package/dist/bundle/contract.js.map +1 -0
- package/dist/bundle/export.d.ts +36 -0
- package/dist/bundle/export.d.ts.map +1 -0
- package/dist/bundle/export.js +152 -0
- package/dist/bundle/export.js.map +1 -0
- package/dist/bundle/external.d.ts +66 -0
- package/dist/bundle/external.d.ts.map +1 -0
- package/dist/bundle/external.js +238 -0
- package/dist/bundle/external.js.map +1 -0
- package/dist/bundle/format.d.ts +94 -0
- package/dist/bundle/format.d.ts.map +1 -0
- package/dist/bundle/format.js +42 -0
- package/dist/bundle/format.js.map +1 -0
- package/dist/bundle/import.d.ts +49 -0
- package/dist/bundle/import.d.ts.map +1 -0
- package/dist/bundle/import.js +116 -0
- package/dist/bundle/import.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +1402 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/init.d.ts +48 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +284 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/db/schema.d.ts +3 -0
- package/dist/db/schema.d.ts.map +1 -0
- package/dist/db/schema.js +616 -0
- package/dist/db/schema.js.map +1 -0
- package/dist/db/store.d.ts +1011 -0
- package/dist/db/store.d.ts.map +1 -0
- package/dist/db/store.js +3888 -0
- package/dist/db/store.js.map +1 -0
- package/dist/graph/pagerank.d.ts +9 -0
- package/dist/graph/pagerank.d.ts.map +1 -0
- package/dist/graph/pagerank.js +47 -0
- package/dist/graph/pagerank.js.map +1 -0
- package/dist/indexer/architecture.d.ts +72 -0
- package/dist/indexer/architecture.d.ts.map +1 -0
- package/dist/indexer/architecture.js +112 -0
- package/dist/indexer/architecture.js.map +1 -0
- package/dist/indexer/behavior.d.ts +75 -0
- package/dist/indexer/behavior.d.ts.map +1 -0
- package/dist/indexer/behavior.js +395 -0
- package/dist/indexer/behavior.js.map +1 -0
- package/dist/indexer/boundaries.d.ts +60 -0
- package/dist/indexer/boundaries.d.ts.map +1 -0
- package/dist/indexer/boundaries.js +366 -0
- package/dist/indexer/boundaries.js.map +1 -0
- package/dist/indexer/churn.d.ts +15 -0
- package/dist/indexer/churn.d.ts.map +1 -0
- package/dist/indexer/churn.js +49 -0
- package/dist/indexer/churn.js.map +1 -0
- package/dist/indexer/classify.d.ts +9 -0
- package/dist/indexer/classify.d.ts.map +1 -0
- package/dist/indexer/classify.js +90 -0
- package/dist/indexer/classify.js.map +1 -0
- package/dist/indexer/context.d.ts +176 -0
- package/dist/indexer/context.d.ts.map +1 -0
- package/dist/indexer/context.js +193 -0
- package/dist/indexer/context.js.map +1 -0
- package/dist/indexer/continuity.d.ts +67 -0
- package/dist/indexer/continuity.d.ts.map +1 -0
- package/dist/indexer/continuity.js +288 -0
- package/dist/indexer/continuity.js.map +1 -0
- package/dist/indexer/detectchanges.d.ts +32 -0
- package/dist/indexer/detectchanges.d.ts.map +1 -0
- package/dist/indexer/detectchanges.js +74 -0
- package/dist/indexer/detectchanges.js.map +1 -0
- package/dist/indexer/discovery.d.ts +37 -0
- package/dist/indexer/discovery.d.ts.map +1 -0
- package/dist/indexer/discovery.js +136 -0
- package/dist/indexer/discovery.js.map +1 -0
- package/dist/indexer/externaldeps.d.ts +18 -0
- package/dist/indexer/externaldeps.d.ts.map +1 -0
- package/dist/indexer/externaldeps.js +288 -0
- package/dist/indexer/externaldeps.js.map +1 -0
- package/dist/indexer/freshness.d.ts +48 -0
- package/dist/indexer/freshness.d.ts.map +1 -0
- package/dist/indexer/freshness.js +128 -0
- package/dist/indexer/freshness.js.map +1 -0
- package/dist/indexer/git.d.ts +144 -0
- package/dist/indexer/git.d.ts.map +1 -0
- package/dist/indexer/git.js +444 -0
- package/dist/indexer/git.js.map +1 -0
- package/dist/indexer/index.d.ts +145 -0
- package/dist/indexer/index.d.ts.map +1 -0
- package/dist/indexer/index.js +930 -0
- package/dist/indexer/index.js.map +1 -0
- package/dist/indexer/modules.d.ts +62 -0
- package/dist/indexer/modules.d.ts.map +1 -0
- package/dist/indexer/modules.js +293 -0
- package/dist/indexer/modules.js.map +1 -0
- package/dist/indexer/preflight.d.ts +154 -0
- package/dist/indexer/preflight.d.ts.map +1 -0
- package/dist/indexer/preflight.js +399 -0
- package/dist/indexer/preflight.js.map +1 -0
- package/dist/indexer/protoScanner.d.ts +34 -0
- package/dist/indexer/protoScanner.d.ts.map +1 -0
- package/dist/indexer/protoScanner.js +133 -0
- package/dist/indexer/protoScanner.js.map +1 -0
- package/dist/indexer/risk.d.ts +115 -0
- package/dist/indexer/risk.d.ts.map +1 -0
- package/dist/indexer/risk.js +194 -0
- package/dist/indexer/risk.js.map +1 -0
- package/dist/indexer/serviceHostScanner.d.ts +25 -0
- package/dist/indexer/serviceHostScanner.d.ts.map +1 -0
- package/dist/indexer/serviceHostScanner.js +95 -0
- package/dist/indexer/serviceHostScanner.js.map +1 -0
- package/dist/indexer/serviceLinks.d.ts +105 -0
- package/dist/indexer/serviceLinks.d.ts.map +1 -0
- package/dist/indexer/serviceLinks.js +509 -0
- package/dist/indexer/serviceLinks.js.map +1 -0
- package/dist/indexer/shapehash.d.ts +98 -0
- package/dist/indexer/shapehash.d.ts.map +1 -0
- package/dist/indexer/shapehash.js +354 -0
- package/dist/indexer/shapehash.js.map +1 -0
- package/dist/indexer/skeleton.d.ts +15 -0
- package/dist/indexer/skeleton.d.ts.map +1 -0
- package/dist/indexer/skeleton.js +136 -0
- package/dist/indexer/skeleton.js.map +1 -0
- package/dist/indexer/symbolhistory.d.ts +41 -0
- package/dist/indexer/symbolhistory.d.ts.map +1 -0
- package/dist/indexer/symbolhistory.js +124 -0
- package/dist/indexer/symbolhistory.js.map +1 -0
- package/dist/indexer/watcher.d.ts +68 -0
- package/dist/indexer/watcher.d.ts.map +1 -0
- package/dist/indexer/watcher.js +179 -0
- package/dist/indexer/watcher.js.map +1 -0
- package/dist/mcp/server.d.ts +80 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +1610 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/parser/index.d.ts +8 -0
- package/dist/parser/index.d.ts.map +1 -0
- package/dist/parser/index.js +33 -0
- package/dist/parser/index.js.map +1 -0
- package/dist/parser/languages/cpp.d.ts +3 -0
- package/dist/parser/languages/cpp.d.ts.map +1 -0
- package/dist/parser/languages/cpp.js +350 -0
- package/dist/parser/languages/cpp.js.map +1 -0
- package/dist/parser/languages/csharp.d.ts +3 -0
- package/dist/parser/languages/csharp.d.ts.map +1 -0
- package/dist/parser/languages/csharp.js +239 -0
- package/dist/parser/languages/csharp.js.map +1 -0
- package/dist/parser/languages/go.d.ts +3 -0
- package/dist/parser/languages/go.d.ts.map +1 -0
- package/dist/parser/languages/go.js +259 -0
- package/dist/parser/languages/go.js.map +1 -0
- package/dist/parser/languages/java.d.ts +3 -0
- package/dist/parser/languages/java.d.ts.map +1 -0
- package/dist/parser/languages/java.js +391 -0
- package/dist/parser/languages/java.js.map +1 -0
- package/dist/parser/languages/python.d.ts +3 -0
- package/dist/parser/languages/python.d.ts.map +1 -0
- package/dist/parser/languages/python.js +396 -0
- package/dist/parser/languages/python.js.map +1 -0
- package/dist/parser/languages/rust.d.ts +3 -0
- package/dist/parser/languages/rust.d.ts.map +1 -0
- package/dist/parser/languages/rust.js +159 -0
- package/dist/parser/languages/rust.js.map +1 -0
- package/dist/parser/languages/typescript.d.ts +3 -0
- package/dist/parser/languages/typescript.d.ts.map +1 -0
- package/dist/parser/languages/typescript.js +1442 -0
- package/dist/parser/languages/typescript.js.map +1 -0
- package/dist/parser/parserContext.d.ts +77 -0
- package/dist/parser/parserContext.d.ts.map +1 -0
- package/dist/parser/parserContext.js +354 -0
- package/dist/parser/parserContext.js.map +1 -0
- package/dist/parser/walker.d.ts +81 -0
- package/dist/parser/walker.d.ts.map +1 -0
- package/dist/parser/walker.js +217 -0
- package/dist/parser/walker.js.map +1 -0
- package/dist/parser/worker.d.ts +66 -0
- package/dist/parser/worker.d.ts.map +1 -0
- package/dist/parser/worker.js +129 -0
- package/dist/parser/worker.js.map +1 -0
- package/dist/parser/workerpool.d.ts +107 -0
- package/dist/parser/workerpool.d.ts.map +1 -0
- package/dist/parser/workerpool.js +383 -0
- package/dist/parser/workerpool.js.map +1 -0
- package/dist/scip/format.d.ts +87 -0
- package/dist/scip/format.d.ts.map +1 -0
- package/dist/scip/format.js +31 -0
- package/dist/scip/format.js.map +1 -0
- package/dist/scip/import.d.ts +37 -0
- package/dist/scip/import.d.ts.map +1 -0
- package/dist/scip/import.js +180 -0
- package/dist/scip/import.js.map +1 -0
- package/dist/types.d.ts +392 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +4 -0
- package/dist/types.js.map +1 -0
- package/docs/architecture.md +105 -0
- package/docs/benchmarks/methodology.md +134 -0
- package/docs/benchmarks/raw-results.md +71 -0
- package/docs/benchmarks.md +74 -0
- package/docs/cli.md +148 -0
- package/docs/examples/behavior-tests.md +70 -0
- package/docs/examples/change-history.md +85 -0
- package/docs/examples/pre-edit-context.md +81 -0
- package/docs/examples/service-links.md +88 -0
- package/docs/examples.md +80 -0
- package/docs/faq.md +70 -0
- package/docs/internals.md +104 -0
- package/docs/languages.md +70 -0
- package/docs/limits.md +52 -0
- package/docs/mcp.md +199 -0
- package/docs/quickstart.md +119 -0
- package/docs/testing.md +123 -0
- package/docs/tools.md +115 -0
- package/package.json +52 -0
- package/research-codebase.md +578 -0
- package/seer-cli-docs.md +326 -0
- package/seer-master-guide.md +246 -0
- package/src/bundle/ci.ts +141 -0
- package/src/bundle/contract.ts +387 -0
- package/src/bundle/export.ts +175 -0
- package/src/bundle/external.ts +285 -0
- package/src/bundle/format.ts +92 -0
- package/src/bundle/import.ts +157 -0
- package/src/cli/index.ts +1249 -0
- package/src/cli/init.ts +389 -0
- package/src/db/schema.ts +614 -0
- package/src/db/store.ts +4306 -0
- package/src/graph/pagerank.ts +53 -0
- package/src/indexer/architecture.ts +148 -0
- package/src/indexer/behavior.ts +466 -0
- package/src/indexer/boundaries.ts +374 -0
- package/src/indexer/churn.ts +58 -0
- package/src/indexer/classify.ts +96 -0
- package/src/indexer/context.ts +340 -0
- package/src/indexer/continuity.ts +322 -0
- package/src/indexer/detectchanges.ts +94 -0
- package/src/indexer/discovery.ts +176 -0
- package/src/indexer/externaldeps.ts +243 -0
- package/src/indexer/freshness.ts +166 -0
- package/src/indexer/git.ts +453 -0
- package/src/indexer/index.ts +1092 -0
- package/src/indexer/modules.ts +358 -0
- package/src/indexer/preflight.ts +548 -0
- package/src/indexer/protoScanner.ts +147 -0
- package/src/indexer/risk.ts +304 -0
- package/src/indexer/serviceHostScanner.ts +92 -0
- package/src/indexer/serviceLinks.ts +543 -0
- package/src/indexer/shapehash.ts +370 -0
- package/src/indexer/skeleton.ts +169 -0
- package/src/indexer/symbolhistory.ts +172 -0
- package/src/indexer/watcher.ts +206 -0
- package/src/mcp/server.ts +1659 -0
- package/src/parser/index.ts +37 -0
- package/src/parser/languages/cpp.ts +361 -0
- package/src/parser/languages/csharp.ts +235 -0
- package/src/parser/languages/go.ts +259 -0
- package/src/parser/languages/java.ts +382 -0
- package/src/parser/languages/python.ts +370 -0
- package/src/parser/languages/rust.ts +164 -0
- package/src/parser/languages/typescript.ts +1435 -0
- package/src/parser/parserContext.ts +392 -0
- package/src/parser/walker.ts +306 -0
- package/src/parser/worker.ts +181 -0
- package/src/parser/workerpool.ts +448 -0
- package/src/scip/format.ts +83 -0
- package/src/scip/import.ts +216 -0
- package/src/types.ts +457 -0
- package/tests/benchmark-service-links.ts +244 -0
- package/tests/bug-regressions.ts +626 -0
- package/tests/filters.ts +264 -0
- package/tests/fixtures/Counter.tsx +38 -0
- package/tests/fixtures/caller.ts +7 -0
- package/tests/fixtures/collisions.ts +23 -0
- package/tests/fixtures/local_helper.ts +5 -0
- package/tests/fixtures/overloads.java +17 -0
- package/tests/fixtures/remote_helper.ts +4 -0
- package/tests/fixtures/sample.c +15 -0
- package/tests/fixtures/sample.cpp +47 -0
- package/tests/fixtures/sample.cs +62 -0
- package/tests/fixtures/sample.go +68 -0
- package/tests/fixtures/sample.h +30 -0
- package/tests/fixtures/sample.java +85 -0
- package/tests/fixtures/sample.py +46 -0
- package/tests/fixtures/sample.rs +78 -0
- package/tests/fixtures/sample.ts +76 -0
- package/tests/fixtures-service/HttpClients.cs +30 -0
- package/tests/fixtures-service/HttpClients.java +24 -0
- package/tests/fixtures-service/billing.ts +15 -0
- package/tests/fixtures-service/docker-compose.yml +15 -0
- package/tests/fixtures-service/gateway.ts +10 -0
- package/tests/fixtures-service/get_user.ts +11 -0
- package/tests/fixtures-service/graphql_client.ts +63 -0
- package/tests/fixtures-service/graphql_server.ts +30 -0
- package/tests/fixtures-service/grpc_client.go +30 -0
- package/tests/fixtures-service/http_clients.go +23 -0
- package/tests/fixtures-service/http_clients.py +38 -0
- package/tests/fixtures-service/http_clients.ts +49 -0
- package/tests/fixtures-service/k8s/payment-service.yaml +22 -0
- package/tests/fixtures-service/k8s_calls.ts +20 -0
- package/tests/fixtures-service/messaging.ts +87 -0
- package/tests/fixtures-service/trpc_client.ts +39 -0
- package/tests/fixtures-service/trpc_server.ts +39 -0
- package/tests/fixtures-service/user_service.proto +33 -0
- package/tests/fixtures-trackcd/Cargo.toml +11 -0
- package/tests/fixtures-trackcd/SpringController.java +36 -0
- package/tests/fixtures-trackcd/auth_service.ts +19 -0
- package/tests/fixtures-trackcd/complex_module.py +50 -0
- package/tests/fixtures-trackcd/express_app.js +30 -0
- package/tests/fixtures-trackcd/fastapi_app.py +49 -0
- package/tests/fixtures-trackcd/fastify_object_routes.js +32 -0
- package/tests/fixtures-trackcd/go.mod +8 -0
- package/tests/fixtures-trackcd/package.json +15 -0
- package/tests/fixtures-trackcd/requirements.txt +4 -0
- package/tests/fixtures-trackcd/tests/auth_service.test.ts +13 -0
- package/tests/fixtures-tracke/auth/AuthService.ts +23 -0
- package/tests/fixtures-tracke/auth/crypto.ts +7 -0
- package/tests/fixtures-tracke/billing/Billing.ts +20 -0
- package/tests/fixtures-tracke/billing/Invoice.ts +10 -0
- package/tests/fixtures-tracke/billing/server.ts +17 -0
- package/tests/fixtures-tracke/package.json +7 -0
- package/tests/fixtures-tracke/tests/auth.test.ts +23 -0
- package/tests/fixtures-tracke/tests/billing.test.ts +14 -0
- package/tests/fixtures-trackf/package.json +5 -0
- package/tests/fixtures-trackf/src/auth.ts +26 -0
- package/tests/fixtures-trackf/src/handlers.ts +35 -0
- package/tests/fixtures-tracki/billing/routes.ts +12 -0
- package/tests/fixtures-tracki/gateway/client.ts +13 -0
- package/tests/git-features.ts +267 -0
- package/tests/init.ts +141 -0
- package/tests/mcp-jit.ts +130 -0
- package/tests/mcp-smoke.ts +191 -0
- package/tests/mcp-trackcd.ts +169 -0
- package/tests/mcp-tracke.ts +229 -0
- package/tests/mcp-trackf.ts +330 -0
- package/tests/mcp-trackg.ts +219 -0
- package/tests/mcp-tracki.ts +174 -0
- package/tests/mcp-watcher.ts +126 -0
- package/tests/optspec.ts +194 -0
- package/tests/parallel-index.ts +333 -0
- package/tests/parallel-read.ts +125 -0
- package/tests/parallel-recovery.ts +241 -0
- package/tests/perf-callers.ts +145 -0
- package/tests/query-parity.ts +184 -0
- package/tests/query-perf.ts +55 -0
- package/tests/scale-parallel-parity.ts +225 -0
- package/tests/scale-test.ts +523 -0
- package/tests/smoke.ts +396 -0
- package/tests/trackcd.ts +325 -0
- package/tests/tracke-collisions.ts +255 -0
- package/tests/tracke.ts +314 -0
- package/tests/trackf-bugs.ts +406 -0
- package/tests/trackf.ts +390 -0
- package/tests/trackg.ts +1372 -0
- package/tests/tracki-boundaries.ts +202 -0
- package/tests/tracki-continuity.ts +253 -0
- package/tests/tracki-contract-diff.ts +249 -0
- package/tests/tracki-external-bundles.ts +341 -0
- package/tests/tracki-preflight.ts +251 -0
- package/tests/verify-roles.ts +51 -0
- package/tests/worker-parity.ts +286 -0
- package/tests/worker-pool.ts +262 -0
- package/tsconfig.json +20 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
import Parser from 'web-tree-sitter';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import type { FileExtraction, Language } from '../types.js';
|
|
4
|
+
import type { LanguageExtractor } from './walker.js';
|
|
5
|
+
import { walkTree } from './walker.js';
|
|
6
|
+
import { pythonExtractor } from './languages/python.js';
|
|
7
|
+
import { typescriptExtractor } from './languages/typescript.js';
|
|
8
|
+
import { goExtractor } from './languages/go.js';
|
|
9
|
+
import { javaExtractor } from './languages/java.js';
|
|
10
|
+
import { rustExtractor } from './languages/rust.js';
|
|
11
|
+
import { cppExtractor } from './languages/cpp.js';
|
|
12
|
+
import { csharpExtractor } from './languages/csharp.js';
|
|
13
|
+
|
|
14
|
+
// ── Extension → language mapping ───────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
export const EXT_TO_LANGUAGE: Record<string, Language> = {
|
|
17
|
+
'.py': 'python', '.pyw': 'python',
|
|
18
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
19
|
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
|
20
|
+
'.go': 'go',
|
|
21
|
+
'.java': 'java',
|
|
22
|
+
'.rs': 'rust',
|
|
23
|
+
// C source files use the dedicated C grammar with the C++ extractor's shared
|
|
24
|
+
// C-family symbol logic. `.h` remains ambiguous between C and C++, so we keep
|
|
25
|
+
// headers on the C++ grammar by default.
|
|
26
|
+
'.c': 'c',
|
|
27
|
+
'.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.c++': 'cpp',
|
|
28
|
+
'.hpp': 'cpp', '.hh': 'cpp', '.h++': 'cpp', '.h': 'cpp',
|
|
29
|
+
'.cs': 'csharp',
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
export function detectLanguage(filePath: string): Language | null {
|
|
33
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
34
|
+
return EXT_TO_LANGUAGE[ext] ?? null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// v9 Track-H — placeholder extractor for .proto files. Proto files don't go
|
|
38
|
+
// through tree-sitter; the proto scanner in protoScanner.ts handles them. The
|
|
39
|
+
// entry exists so the Record<Language, LanguageExtractor> contract is met.
|
|
40
|
+
const noopProtoExtractor: LanguageExtractor = {
|
|
41
|
+
languageName: 'proto',
|
|
42
|
+
extensions: ['.proto'],
|
|
43
|
+
tryExtractDefinition() { return null; },
|
|
44
|
+
tryExtractCallName() { return null; },
|
|
45
|
+
tryExtractImport() { return null; },
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
// JavaScript and TypeScript share the TypeScript extractor but need different
|
|
49
|
+
// WASM grammars (and .tsx needs the tsx variant).
|
|
50
|
+
export const EXTRACTORS: Record<Language, LanguageExtractor> = {
|
|
51
|
+
python: pythonExtractor,
|
|
52
|
+
typescript: typescriptExtractor,
|
|
53
|
+
javascript: { ...typescriptExtractor, languageName: 'javascript' },
|
|
54
|
+
go: goExtractor,
|
|
55
|
+
java: javaExtractor,
|
|
56
|
+
rust: rustExtractor,
|
|
57
|
+
c: { ...cppExtractor, languageName: 'c', extensions: ['.c'] },
|
|
58
|
+
cpp: cppExtractor,
|
|
59
|
+
csharp: csharpExtractor,
|
|
60
|
+
proto: noopProtoExtractor,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
// Default grammar for each language (used when there's no per-extension override
|
|
64
|
+
// in `grammarForExtension`). Note the underscore in `c_sharp` matches the WASM
|
|
65
|
+
// filename `tree-sitter-c_sharp.wasm`.
|
|
66
|
+
export const GRAMMAR_NAME: Record<Language, string> = {
|
|
67
|
+
python: 'python',
|
|
68
|
+
typescript: 'typescript',
|
|
69
|
+
javascript: 'javascript',
|
|
70
|
+
go: 'go',
|
|
71
|
+
java: 'java',
|
|
72
|
+
rust: 'rust',
|
|
73
|
+
c: 'c',
|
|
74
|
+
cpp: 'cpp',
|
|
75
|
+
csharp: 'c_sharp',
|
|
76
|
+
// proto files never enter the tree-sitter pipeline; this is unused.
|
|
77
|
+
proto: 'proto',
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Map a file extension to the specific WASM grammar to load. Most languages
|
|
82
|
+
* have one grammar, but TS/JS are split:
|
|
83
|
+
* .tsx → tsx grammar (typescript + JSX)
|
|
84
|
+
* .jsx → javascript grammar (which handles JSX natively)
|
|
85
|
+
* .ts → typescript grammar (no JSX)
|
|
86
|
+
*/
|
|
87
|
+
export function grammarForExtension(ext: string): string | null {
|
|
88
|
+
if (ext === '.tsx') return 'tsx';
|
|
89
|
+
if (ext === '.jsx' || ext === '.mjs' || ext === '.cjs' || ext === '.js') return 'javascript';
|
|
90
|
+
if (ext === '.ts') return 'typescript';
|
|
91
|
+
const lang = EXT_TO_LANGUAGE[ext];
|
|
92
|
+
if (!lang) return null;
|
|
93
|
+
return GRAMMAR_NAME[lang];
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function isLanguageString(s: string): boolean {
|
|
97
|
+
return s in GRAMMAR_NAME;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ── ParserContext ──────────────────────────────────────────────────────────────
|
|
101
|
+
//
|
|
102
|
+
// web-tree-sitter shares ONE WebAssembly module across all `Parser` instances.
|
|
103
|
+
// When tree-sitter aborts inside WASM (printing "Aborted()" to stderr — usually
|
|
104
|
+
// from a memory exhaustion on a pathological input), the WASM module is left
|
|
105
|
+
// poisoned and every subsequent parse fails. Creating a fresh `new Parser()`
|
|
106
|
+
// does NOT recover, because the underlying module is shared and broken.
|
|
107
|
+
//
|
|
108
|
+
// The recovery path is to throw away the entire WASM runtime and re-initialize:
|
|
109
|
+
// re-call `Parser.init()` and reload every grammar from disk. That's what
|
|
110
|
+
// `resetWasmRuntime` does. It's expensive (~100-300ms) but only runs after a
|
|
111
|
+
// real failure — the per-file size cap keeps it rare.
|
|
112
|
+
//
|
|
113
|
+
// IMPORTANT: this class is NOT a true isolation boundary. web-tree-sitter
|
|
114
|
+
// stores its WASM module at process/isolate scope, so two `ParserContext`
|
|
115
|
+
// instances in the same V8 isolate share the underlying runtime and a poison
|
|
116
|
+
// in one will poison the other. The real isolation boundary is the
|
|
117
|
+
// `worker_threads.Worker` — each worker gets its own V8 isolate and its own
|
|
118
|
+
// independent WASM module. Construct exactly one `ParserContext` per isolate.
|
|
119
|
+
//
|
|
120
|
+
// The class exists so the same parsing logic can be run from the main thread
|
|
121
|
+
// (via the lazy default instance in `./index.ts`) and inside a parser worker
|
|
122
|
+
// (which constructs its own instance), without keeping a tangle of
|
|
123
|
+
// module-level state that would have to be re-exported twice.
|
|
124
|
+
|
|
125
|
+
export interface ParserContextOptions {
|
|
126
|
+
/**
|
|
127
|
+
* Force the baseline walker for every parse, skipping the candidate-query
|
|
128
|
+
* fast path. Defaults to reading the `SEER_USE_CANDIDATE_QUERY` env var
|
|
129
|
+
* (`'0'` ⇒ force baseline). Used by the parity tests to A/B the two paths.
|
|
130
|
+
*/
|
|
131
|
+
forceBaselineWalker?: boolean;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export class ParserContext {
|
|
135
|
+
private _initialized = false;
|
|
136
|
+
private _parser: Parser | null = null;
|
|
137
|
+
private _languages = new Map<string, Parser.Language>();
|
|
138
|
+
// Per-grammar compiled candidate Query, or null if compilation failed (in
|
|
139
|
+
// which case we permanently fall back to the baseline walker for that grammar).
|
|
140
|
+
// Cache key matches `loadLanguage`'s grammar name.
|
|
141
|
+
private _candidateQueries = new Map<string, Parser.Query | null>();
|
|
142
|
+
private _wasmResets = 0;
|
|
143
|
+
private _consecutiveFailures = 0;
|
|
144
|
+
private static readonly RESET_AFTER_N_FAILURES = 3;
|
|
145
|
+
|
|
146
|
+
// Test/diagnostic override: when true, every parseFile() call uses the
|
|
147
|
+
// baseline walker even if the extractor has candidateNodeTypes. The parity
|
|
148
|
+
// test in tests/query-parity.ts flips this to compare both paths on the same
|
|
149
|
+
// fixtures. The env var SEER_USE_CANDIDATE_QUERY=0 has the same effect for
|
|
150
|
+
// users who want to skip the query path system-wide (e.g. if a future
|
|
151
|
+
// web-tree-sitter regression makes query.captures() expensive on their
|
|
152
|
+
// workload).
|
|
153
|
+
private _forceBaselineWalker: boolean;
|
|
154
|
+
|
|
155
|
+
constructor(options: ParserContextOptions = {}) {
|
|
156
|
+
this._forceBaselineWalker = options.forceBaselineWalker ??
|
|
157
|
+
(typeof process !== 'undefined' &&
|
|
158
|
+
process.env != null &&
|
|
159
|
+
process.env.SEER_USE_CANDIDATE_QUERY === '0');
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
setForceBaselineWalker(force: boolean): void {
|
|
163
|
+
this._forceBaselineWalker = force;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/** How many times the WASM module had to be hard-reset. Exposed for stats. */
|
|
167
|
+
wasmResetCount(): number {
|
|
168
|
+
return this._wasmResets;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
private async ensureReady(): Promise<void> {
|
|
172
|
+
if (this._initialized) return;
|
|
173
|
+
await Parser.init();
|
|
174
|
+
this._initialized = true;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
private getParser(): Parser {
|
|
178
|
+
if (!this._parser) this._parser = new Parser();
|
|
179
|
+
return this._parser;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
private async loadLanguage(grammarName: string): Promise<Parser.Language> {
|
|
183
|
+
await this.ensureReady();
|
|
184
|
+
const cached = this._languages.get(grammarName);
|
|
185
|
+
if (cached) return cached;
|
|
186
|
+
|
|
187
|
+
// tree-sitter-wasms uses underscores in some filenames (e.g. c_sharp). The
|
|
188
|
+
// `grammarName` we pass through is the canonical WASM-filename suffix.
|
|
189
|
+
const wasmDir = path.dirname(require.resolve('tree-sitter-wasms/package.json'));
|
|
190
|
+
const wasmPath = path.join(wasmDir, 'out', `tree-sitter-${grammarName}.wasm`);
|
|
191
|
+
const lang = await Parser.Language.load(wasmPath);
|
|
192
|
+
this._languages.set(grammarName, lang);
|
|
193
|
+
return lang;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
private noteParseSuccess(): void {
|
|
197
|
+
this._consecutiveFailures = 0;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
private async noteParseFailureMaybeReset(): Promise<void> {
|
|
201
|
+
this._consecutiveFailures++;
|
|
202
|
+
if (this._consecutiveFailures >= ParserContext.RESET_AFTER_N_FAILURES) {
|
|
203
|
+
this._consecutiveFailures = 0;
|
|
204
|
+
try { await this.resetWasmRuntime(); } catch { /* best effort */ }
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
private async resetWasmRuntime(): Promise<void> {
|
|
209
|
+
this._wasmResets++;
|
|
210
|
+
const grammarNames = Array.from(this._languages.keys());
|
|
211
|
+
this._initialized = false;
|
|
212
|
+
this._parser = null;
|
|
213
|
+
this._languages.clear();
|
|
214
|
+
await this.ensureReady();
|
|
215
|
+
for (const name of grammarNames) {
|
|
216
|
+
await this.loadLanguage(name);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Compile (and cache) the candidate-collection query for one grammar +
|
|
222
|
+
* extractor pair. Returns null if the extractor declares no candidate types
|
|
223
|
+
* OR if every type in the list was rejected by the grammar.
|
|
224
|
+
*
|
|
225
|
+
* Strategy:
|
|
226
|
+
* 1. Try the full combined query first (cheapest path).
|
|
227
|
+
* 2. If that throws — typically because one node type is unknown to the
|
|
228
|
+
* grammar (e.g. `class_specifier` doesn't exist in tree-sitter-c) —
|
|
229
|
+
* retry node types one at a time, keep only the ones that compile,
|
|
230
|
+
* then build a final combined query from the survivors.
|
|
231
|
+
* 3. If even individual probes fail, cache null and the parser falls back
|
|
232
|
+
* to the baseline walker for that grammar permanently.
|
|
233
|
+
*
|
|
234
|
+
* The query captures every candidate node under `@c` so the walker only has
|
|
235
|
+
* to check membership in a single Set; categorization is left to the
|
|
236
|
+
* extractor's `tryExtract*` callbacks (which retain all semantic authority).
|
|
237
|
+
*/
|
|
238
|
+
private getOrCompileCandidateQuery(
|
|
239
|
+
grammarName: string,
|
|
240
|
+
lang: Parser.Language,
|
|
241
|
+
candidateNodeTypes: readonly string[],
|
|
242
|
+
): Parser.Query | null {
|
|
243
|
+
if (this._candidateQueries.has(grammarName)) {
|
|
244
|
+
return this._candidateQueries.get(grammarName) ?? null;
|
|
245
|
+
}
|
|
246
|
+
if (candidateNodeTypes.length === 0) {
|
|
247
|
+
this._candidateQueries.set(grammarName, null);
|
|
248
|
+
return null;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const buildSource = (types: readonly string[]): string =>
|
|
252
|
+
types.map(t => `(${t}) @c`).join('\n');
|
|
253
|
+
|
|
254
|
+
// Pass 1: try the combined query.
|
|
255
|
+
try {
|
|
256
|
+
const q = lang.query(buildSource(candidateNodeTypes));
|
|
257
|
+
this._candidateQueries.set(grammarName, q);
|
|
258
|
+
return q;
|
|
259
|
+
} catch { /* fall through to per-type probe */ }
|
|
260
|
+
|
|
261
|
+
// Pass 2: probe each type individually, keep only the survivors.
|
|
262
|
+
const survivors: string[] = [];
|
|
263
|
+
for (const t of candidateNodeTypes) {
|
|
264
|
+
try {
|
|
265
|
+
const probe = lang.query(`(${t}) @c`);
|
|
266
|
+
try { probe.delete(); } catch { /* */ }
|
|
267
|
+
survivors.push(t);
|
|
268
|
+
} catch { /* type not in this grammar; skip */ }
|
|
269
|
+
}
|
|
270
|
+
if (survivors.length === 0) {
|
|
271
|
+
this._candidateQueries.set(grammarName, null);
|
|
272
|
+
return null;
|
|
273
|
+
}
|
|
274
|
+
try {
|
|
275
|
+
const q = lang.query(buildSource(survivors));
|
|
276
|
+
this._candidateQueries.set(grammarName, q);
|
|
277
|
+
return q;
|
|
278
|
+
} catch {
|
|
279
|
+
this._candidateQueries.set(grammarName, null);
|
|
280
|
+
return null;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Run the candidate query against a parsed tree and collect captured node
|
|
286
|
+
* ids into a Set. Returns null if the query fails at runtime — caller falls
|
|
287
|
+
* back to the baseline walker.
|
|
288
|
+
*/
|
|
289
|
+
private collectCandidateNodeIds(
|
|
290
|
+
query: Parser.Query,
|
|
291
|
+
rootNode: Parser.SyntaxNode,
|
|
292
|
+
): Set<number> | null {
|
|
293
|
+
try {
|
|
294
|
+
const caps = query.captures(rootNode);
|
|
295
|
+
const ids = new Set<number>();
|
|
296
|
+
for (const c of caps) ids.add(c.node.id);
|
|
297
|
+
return ids;
|
|
298
|
+
} catch {
|
|
299
|
+
return null;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Parse a file and return the extracted symbols, references, and imports.
|
|
305
|
+
* Returns null on parse failure (caller decides whether to warn).
|
|
306
|
+
*
|
|
307
|
+
* Takes the file path (not just the language) so we can route .tsx → the tsx
|
|
308
|
+
* grammar variant, which is required to parse JSX.
|
|
309
|
+
*/
|
|
310
|
+
async parseFile(
|
|
311
|
+
content: string,
|
|
312
|
+
filePathOrLanguage: string,
|
|
313
|
+
languageOverride?: Language,
|
|
314
|
+
): Promise<FileExtraction | null> {
|
|
315
|
+
try {
|
|
316
|
+
// Back-compat: callers used to pass (content, language). New callers pass
|
|
317
|
+
// (content, filePath). We detect by checking if it's a known Language.
|
|
318
|
+
let language: Language | null;
|
|
319
|
+
let ext: string;
|
|
320
|
+
if (languageOverride) {
|
|
321
|
+
language = languageOverride;
|
|
322
|
+
ext = path.extname(filePathOrLanguage).toLowerCase();
|
|
323
|
+
} else if (isLanguageString(filePathOrLanguage)) {
|
|
324
|
+
// Legacy two-arg form: parseFile(content, language)
|
|
325
|
+
language = filePathOrLanguage as Language;
|
|
326
|
+
// No file path → use the language's default grammar
|
|
327
|
+
ext = '';
|
|
328
|
+
} else {
|
|
329
|
+
language = detectLanguage(filePathOrLanguage);
|
|
330
|
+
ext = path.extname(filePathOrLanguage).toLowerCase();
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if (!language) return null;
|
|
334
|
+
|
|
335
|
+
const grammarName = ext ? grammarForExtension(ext) ?? GRAMMAR_NAME[language] : GRAMMAR_NAME[language];
|
|
336
|
+
const lang = await this.loadLanguage(grammarName);
|
|
337
|
+
const parser = this.getParser();
|
|
338
|
+
parser.setLanguage(lang);
|
|
339
|
+
// Cap parse time at 10s per file. tree-sitter aborts internally on
|
|
340
|
+
// truly pathological inputs (returning null without poisoning), which
|
|
341
|
+
// we'd much rather have than the WASM heap exhaustion that comes from
|
|
342
|
+
// letting it run indefinitely.
|
|
343
|
+
try {
|
|
344
|
+
// setTimeoutMicros may not exist on older web-tree-sitter versions.
|
|
345
|
+
(parser as unknown as { setTimeoutMicros?: (us: number) => void })
|
|
346
|
+
.setTimeoutMicros?.(10_000_000);
|
|
347
|
+
} catch { /* best effort */ }
|
|
348
|
+
const tree = parser.parse(content);
|
|
349
|
+
if (!tree) {
|
|
350
|
+
await this.noteParseFailureMaybeReset();
|
|
351
|
+
return null;
|
|
352
|
+
}
|
|
353
|
+
const extractor = EXTRACTORS[language];
|
|
354
|
+
try {
|
|
355
|
+
// Query-assisted candidate collection: when the extractor declares its
|
|
356
|
+
// candidate node types we compile a Tree-Sitter Query for the grammar,
|
|
357
|
+
// gather candidate node ids in one pass, and pass them to the walker so
|
|
358
|
+
// it can skip the per-node tryExtract* calls on the vast majority of
|
|
359
|
+
// structural nodes (binary_expression, parenthesized_expression, etc.).
|
|
360
|
+
// If query compilation or evaluation fails for any reason we fall back
|
|
361
|
+
// to the baseline walker, which still produces correct results.
|
|
362
|
+
let candidateIds: Set<number> | null = null;
|
|
363
|
+
if (!this._forceBaselineWalker && extractor.candidateNodeTypes && extractor.candidateNodeTypes.length > 0) {
|
|
364
|
+
const q = this.getOrCompileCandidateQuery(grammarName, lang, extractor.candidateNodeTypes);
|
|
365
|
+
if (q) {
|
|
366
|
+
candidateIds = this.collectCandidateNodeIds(q, tree.rootNode);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
const result = candidateIds
|
|
370
|
+
? walkTree(tree.rootNode, extractor, candidateIds)
|
|
371
|
+
: walkTree(tree.rootNode, extractor);
|
|
372
|
+
this.noteParseSuccess();
|
|
373
|
+
return result;
|
|
374
|
+
} finally {
|
|
375
|
+
// CRITICAL: tree-sitter trees hold WASM-allocated memory. If we don't
|
|
376
|
+
// explicitly delete them, every parse leaks and the WASM heap fills up.
|
|
377
|
+
// At small scale this is invisible; on a 100k+ file codebase like
|
|
378
|
+
// Unreal it cascades into the heap aborting and poisoning all
|
|
379
|
+
// subsequent parses. This single line is the difference between
|
|
380
|
+
// "indexer works" and "indexer falls over on big codebases."
|
|
381
|
+
try { (tree as { delete?: () => void }).delete?.(); } catch { /* */ }
|
|
382
|
+
}
|
|
383
|
+
} catch (err) {
|
|
384
|
+
// After a WASM Abort the entire shared WASM module may be poisoned, not
|
|
385
|
+
// just our Parser instance. Reset is opportunistic — only kicks in after
|
|
386
|
+
// a few failures in a row, so single legit parse errors don't trigger
|
|
387
|
+
// an expensive reload.
|
|
388
|
+
await this.noteParseFailureMaybeReset();
|
|
389
|
+
return null;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
import type Parser from 'web-tree-sitter';
|
|
2
|
+
import type { SymbolDef, SymbolRef, FileExtraction, RouteDef, ConfigKeyRead, ServiceCallDef } from '../types.js';
|
|
3
|
+
|
|
4
|
+
export interface LanguageExtractor {
|
|
5
|
+
/** tree-sitter language name (used to load the WASM grammar) */
|
|
6
|
+
languageName: string;
|
|
7
|
+
extensions: string[];
|
|
8
|
+
/** Return a SymbolDef if this node is a top-level definition, else null */
|
|
9
|
+
tryExtractDefinition(node: Parser.SyntaxNode): SymbolDef | null;
|
|
10
|
+
/** Return the callee name if this node is a call/reference, else null */
|
|
11
|
+
tryExtractCallName(node: Parser.SyntaxNode): string | null;
|
|
12
|
+
/** Return the imported module/path if this node is an import, else null */
|
|
13
|
+
tryExtractImport(node: Parser.SyntaxNode): string | null;
|
|
14
|
+
/**
|
|
15
|
+
* Optional: return a name for nodes that supply naming context but are not
|
|
16
|
+
* themselves symbols (e.g. Rust `impl AuthService { ... }`).
|
|
17
|
+
*/
|
|
18
|
+
tryExtractContextName?(node: Parser.SyntaxNode): string | null;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Optional: extract one or more HTTP routes from a node. Languages return
|
|
22
|
+
* different shapes (Python decorators vs JS call_expression on app), so the
|
|
23
|
+
* extractor owns the recognizer.
|
|
24
|
+
*/
|
|
25
|
+
tryExtractRoute?(node: Parser.SyntaxNode): RouteDef[] | null;
|
|
26
|
+
|
|
27
|
+
/** Optional: extract a config/env key read from a node. */
|
|
28
|
+
tryExtractConfigKey?(node: Parser.SyntaxNode): ConfigKeyRead | null;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Optional: extract zero-or-more outbound service calls from a node.
|
|
32
|
+
* Languages return different shapes for clients (Python `requests.get('/x')`
|
|
33
|
+
* vs JS `fetch('/x')` vs Go `http.Get('/x')`), so the extractor owns the
|
|
34
|
+
* recognizer. Returns null when the node is not a service-call site.
|
|
35
|
+
*/
|
|
36
|
+
tryExtractServiceCalls?(node: Parser.SyntaxNode): ServiceCallDef[] | null;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Set of tree-sitter node types that count as control-flow branches for
|
|
40
|
+
* cyclomatic/cognitive complexity. Optional; languages that omit this
|
|
41
|
+
* leave complexity at null on every symbol.
|
|
42
|
+
*/
|
|
43
|
+
branchNodeTypes?: ReadonlySet<string>;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Optional set of node types that increase nesting depth for cognitive
|
|
47
|
+
* complexity. If omitted, defaults to `branchNodeTypes` minus the
|
|
48
|
+
* "non-nesting" branches (logical operators, ternaries).
|
|
49
|
+
*/
|
|
50
|
+
nestingNodeTypes?: ReadonlySet<string>;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Optional: list of tree-sitter node types that may produce a
|
|
54
|
+
* definition / call / import / route / config-key / context name on this
|
|
55
|
+
* extractor. Used by the parser to compile a Tree-Sitter Query that bulk-
|
|
56
|
+
* collects candidate nodes in one tree pass, so the walker can skip the
|
|
57
|
+
* `tryExtract*` calls on the ~95% of nodes that can never match anything.
|
|
58
|
+
*
|
|
59
|
+
* This is purely a performance optimization — the extractor's `tryExtract*`
|
|
60
|
+
* functions still own all semantic decisions (body gates, qualified-name
|
|
61
|
+
* resolution, overload disambiguation, route vs prefix, etc.). The list
|
|
62
|
+
* must be a SUPERSET of every node type any `tryExtract*` may accept;
|
|
63
|
+
* missing a type means whole categories of extracted things go silently
|
|
64
|
+
* unindexed. The fallback walker (`walkTree` with no candidate set) stays
|
|
65
|
+
* available for languages that omit this list or for diagnostics.
|
|
66
|
+
*/
|
|
67
|
+
candidateNodeTypes?: readonly string[];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Walk the tree, tracking the enclosing definition stack so that calls can
|
|
72
|
+
* be attributed to their containing function/method.
|
|
73
|
+
*
|
|
74
|
+
* v4 additions:
|
|
75
|
+
* - For function/method/constructor symbols, computes cyclomatic + cognitive
|
|
76
|
+
* complexity + max nesting depth + LOC by walking the def's subtree once.
|
|
77
|
+
* - Calls `tryExtractRoute` / `tryExtractConfigKey` per node and threads the
|
|
78
|
+
* results back through `FileExtraction.routes` / `.configKeys`.
|
|
79
|
+
*
|
|
80
|
+
* Optional `candidates` parameter: when provided, the walker only invokes the
|
|
81
|
+
* extractor's `tryExtract*` callbacks on nodes whose id is in the set. Tree
|
|
82
|
+
* structure is still fully traversed so the def-stack stays accurate; we just
|
|
83
|
+
* skip the per-node switch on non-candidates. Pass `undefined` (the default)
|
|
84
|
+
* to run as a full baseline walker.
|
|
85
|
+
*/
|
|
86
|
+
export function walkTree(
|
|
87
|
+
root: Parser.SyntaxNode,
|
|
88
|
+
extractor: LanguageExtractor,
|
|
89
|
+
candidates?: ReadonlySet<number>,
|
|
90
|
+
): FileExtraction {
|
|
91
|
+
const extraction: FileExtraction = {
|
|
92
|
+
language: extractor.languageName as FileExtraction['language'],
|
|
93
|
+
definitions: [],
|
|
94
|
+
references: [],
|
|
95
|
+
importedModules: [],
|
|
96
|
+
routes: [],
|
|
97
|
+
configKeys: [],
|
|
98
|
+
serviceCalls: [],
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
const defStack: string[] = [];
|
|
102
|
+
// One counter-map per nesting level tracks how many times each short name
|
|
103
|
+
// has been pushed as a direct child, so overloads get distinct suffixes.
|
|
104
|
+
const siblingCounts: Map<string, number>[] = [new Map()];
|
|
105
|
+
|
|
106
|
+
function pushName(name: string): string {
|
|
107
|
+
const counters = siblingCounts[siblingCounts.length - 1];
|
|
108
|
+
const n = counters.get(name) ?? 0;
|
|
109
|
+
counters.set(name, n + 1);
|
|
110
|
+
const disambig = n === 0 ? name : `${name}#${n}`;
|
|
111
|
+
defStack.push(disambig);
|
|
112
|
+
siblingCounts.push(new Map());
|
|
113
|
+
return disambig;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function popName(): void {
|
|
117
|
+
defStack.pop();
|
|
118
|
+
siblingCounts.pop();
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// When `candidates` is provided, only nodes whose id is in the set get
|
|
122
|
+
// their tryExtract* callbacks fired. Tree structure is still fully walked
|
|
123
|
+
// so the def-stack remains correct across non-candidate ancestors.
|
|
124
|
+
const useCandidates = candidates !== undefined;
|
|
125
|
+
|
|
126
|
+
function walk(node: Parser.SyntaxNode): void {
|
|
127
|
+
const isCandidate = useCandidates ? candidates!.has(node.id) : true;
|
|
128
|
+
|
|
129
|
+
const def = isCandidate ? extractor.tryExtractDefinition(node) : null;
|
|
130
|
+
if (def) {
|
|
131
|
+
// Out-of-line / qualified definitions (e.g. C++ `Vec::dot` defined at
|
|
132
|
+
// namespace scope) carry extra owning-scope segments that aren't on the
|
|
133
|
+
// lexical def stack. Fold them into the local name so the qualified name
|
|
134
|
+
// reflects the true owner, and key overload disambiguation on the full
|
|
135
|
+
// (scope + name) so `Foo::bar` and `Baz::bar` don't collapse together.
|
|
136
|
+
const localName = def.scopePath && def.scopePath.length > 0
|
|
137
|
+
? `${def.scopePath.join('.')}.${def.name}`
|
|
138
|
+
: def.name;
|
|
139
|
+
const disambig = pushName(localName);
|
|
140
|
+
def.qualifiedName =
|
|
141
|
+
defStack.length === 1
|
|
142
|
+
? disambig
|
|
143
|
+
: `${defStack.slice(0, -1).join('.')}.${disambig}`;
|
|
144
|
+
|
|
145
|
+
// Compute complexity for behavior-bearing symbols only (kinds where
|
|
146
|
+
// cyclomatic complexity is meaningful).
|
|
147
|
+
if (
|
|
148
|
+
(def.kind === 'function' || def.kind === 'method' || def.kind === 'constructor') &&
|
|
149
|
+
extractor.branchNodeTypes
|
|
150
|
+
) {
|
|
151
|
+
const m = measureComplexity(node, extractor.branchNodeTypes, extractor.nestingNodeTypes);
|
|
152
|
+
def.cyclomatic = m.cyclomatic;
|
|
153
|
+
def.cognitive = m.cognitive;
|
|
154
|
+
def.maxNesting = m.maxNesting;
|
|
155
|
+
def.loc = m.loc;
|
|
156
|
+
} else if (def.kind === 'function' || def.kind === 'method' || def.kind === 'constructor') {
|
|
157
|
+
// LOC even without branchNodeTypes — cheap and useful.
|
|
158
|
+
def.loc = countNonBlankLines(node);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
extraction.definitions.push(def);
|
|
162
|
+
for (const child of node.children) walk(child);
|
|
163
|
+
popName();
|
|
164
|
+
return;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const ctxName = isCandidate ? extractor.tryExtractContextName?.(node) : null;
|
|
168
|
+
if (ctxName) {
|
|
169
|
+
defStack.push(ctxName);
|
|
170
|
+
siblingCounts.push(new Map());
|
|
171
|
+
for (const child of node.children) walk(child);
|
|
172
|
+
popName();
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
if (isCandidate) {
|
|
177
|
+
// Routes are checked before calls because route registrations are
|
|
178
|
+
// themselves call expressions in JS frameworks (`app.get("/x", handler)`).
|
|
179
|
+
// Returning routes doesn't prevent the call from also being recorded —
|
|
180
|
+
// the route registration call is itself useful in the call graph.
|
|
181
|
+
const routes = extractor.tryExtractRoute?.(node);
|
|
182
|
+
let wasRoute = false;
|
|
183
|
+
if (routes && routes.length > 0) {
|
|
184
|
+
for (const r of routes) extraction.routes!.push(r);
|
|
185
|
+
wasRoute = true;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const configKey = extractor.tryExtractConfigKey?.(node);
|
|
189
|
+
if (configKey) {
|
|
190
|
+
configKey.callerName = defStack.length > 0 ? defStack.join('.') : '';
|
|
191
|
+
extraction.configKeys!.push(configKey);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Skip service-call extraction on nodes that were already classified as
|
|
195
|
+
// route registrations — `app.post('/api/x', handler)` is a server-side
|
|
196
|
+
// mount, not a client dialing /api/x. Without this guard the resolver
|
|
197
|
+
// would see two service_calls for every route and link the route handler
|
|
198
|
+
// to its own registration site.
|
|
199
|
+
const svcCalls = !wasRoute ? extractor.tryExtractServiceCalls?.(node) : null;
|
|
200
|
+
if (svcCalls && svcCalls.length > 0) {
|
|
201
|
+
const callerName = defStack.length > 0 ? defStack.join('.') : '';
|
|
202
|
+
for (const sc of svcCalls) {
|
|
203
|
+
sc.callerName = sc.callerName ?? callerName;
|
|
204
|
+
extraction.serviceCalls!.push(sc);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const callee = extractor.tryExtractCallName(node);
|
|
209
|
+
if (callee) {
|
|
210
|
+
const callerName = defStack.length > 0 ? defStack.join('.') : '';
|
|
211
|
+
extraction.references.push({
|
|
212
|
+
calleeName: callee,
|
|
213
|
+
callerName,
|
|
214
|
+
kind: 'call',
|
|
215
|
+
line: node.startPosition.row,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const importPath = extractor.tryExtractImport(node);
|
|
220
|
+
if (importPath) {
|
|
221
|
+
extraction.importedModules.push(importPath);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
for (const child of node.children) walk(child);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
walk(root);
|
|
229
|
+
return extraction;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
export function fieldText(
|
|
233
|
+
node: Parser.SyntaxNode,
|
|
234
|
+
fieldName: string,
|
|
235
|
+
): string | null {
|
|
236
|
+
return node.childForFieldName(fieldName)?.text ?? null;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
export function firstLine(node: Parser.SyntaxNode, maxLen = 120): string {
|
|
240
|
+
const text = node.text;
|
|
241
|
+
const end = text.indexOf('\n');
|
|
242
|
+
const line = end === -1 ? text : text.slice(0, end);
|
|
243
|
+
return line.trim().slice(0, maxLen);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// ── Complexity computation ─────────────────────────────────────────────────────
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Walk a function/method subtree once and compute:
|
|
250
|
+
* - cyclomatic: 1 + count of branch nodes (if/while/for/case/catch/&&/||/?:)
|
|
251
|
+
* - cognitive: branch count + extra penalty for nesting depth
|
|
252
|
+
* - maxNesting: deepest nesting level reached inside the def body
|
|
253
|
+
* - loc: non-blank lines in the def's source span
|
|
254
|
+
*
|
|
255
|
+
* Definitions of "branch" come from the extractor — different grammars name
|
|
256
|
+
* the same constructs differently (Python `if_statement` vs JS `if_statement`
|
|
257
|
+
* are spelled identically, but Go has `if_statement` + `expression_switch_statement`).
|
|
258
|
+
*/
|
|
259
|
+
function measureComplexity(
|
|
260
|
+
defNode: Parser.SyntaxNode,
|
|
261
|
+
branchTypes: ReadonlySet<string>,
|
|
262
|
+
nestingTypes?: ReadonlySet<string>,
|
|
263
|
+
): { cyclomatic: number; cognitive: number; maxNesting: number; loc: number } {
|
|
264
|
+
let cyclomatic = 1;
|
|
265
|
+
let cognitive = 0;
|
|
266
|
+
let maxNesting = 0;
|
|
267
|
+
const nesting = nestingTypes ?? branchTypes;
|
|
268
|
+
|
|
269
|
+
function visit(n: Parser.SyntaxNode, depth: number): void {
|
|
270
|
+
if (n === defNode) {
|
|
271
|
+
for (const child of n.children) visit(child, 0);
|
|
272
|
+
return;
|
|
273
|
+
}
|
|
274
|
+
let newDepth = depth;
|
|
275
|
+
if (branchTypes.has(n.type)) {
|
|
276
|
+
cyclomatic++;
|
|
277
|
+
// Cognitive: +1 + current depth for every branch (so deeply nested
|
|
278
|
+
// branches cost more). This matches Sonar's rough scoring.
|
|
279
|
+
cognitive += 1 + depth;
|
|
280
|
+
}
|
|
281
|
+
if (nesting.has(n.type)) {
|
|
282
|
+
newDepth = depth + 1;
|
|
283
|
+
if (newDepth > maxNesting) maxNesting = newDepth;
|
|
284
|
+
}
|
|
285
|
+
for (const child of n.children) visit(child, newDepth);
|
|
286
|
+
}
|
|
287
|
+
visit(defNode, 0);
|
|
288
|
+
|
|
289
|
+
const loc = countNonBlankLines(defNode);
|
|
290
|
+
return { cyclomatic, cognitive, maxNesting, loc };
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function countNonBlankLines(node: Parser.SyntaxNode): number {
|
|
294
|
+
const text = node.text;
|
|
295
|
+
if (!text) return 0;
|
|
296
|
+
let count = 0;
|
|
297
|
+
let start = 0;
|
|
298
|
+
for (let i = 0; i <= text.length; i++) {
|
|
299
|
+
if (i === text.length || text.charCodeAt(i) === 10) {
|
|
300
|
+
const line = text.slice(start, i);
|
|
301
|
+
if (line.trim().length > 0) count++;
|
|
302
|
+
start = i + 1;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
return count;
|
|
306
|
+
}
|