causantic 0.9.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -56
- package/dist/cli/skill-templates.d.ts.map +1 -1
- package/dist/cli/skill-templates.js +23 -18
- package/dist/cli/skill-templates.js.map +1 -1
- package/dist/clusters/cluster-manager.d.ts +16 -0
- package/dist/clusters/cluster-manager.d.ts.map +1 -1
- package/dist/clusters/cluster-manager.js +119 -1
- package/dist/clusters/cluster-manager.js.map +1 -1
- package/dist/config/loader.d.ts +16 -0
- package/dist/config/loader.d.ts.map +1 -1
- package/dist/config/loader.js +51 -0
- package/dist/config/loader.js.map +1 -1
- package/dist/config/memory-config.d.ts +26 -0
- package/dist/config/memory-config.d.ts.map +1 -1
- package/dist/config/memory-config.js +22 -0
- package/dist/config/memory-config.js.map +1 -1
- package/dist/eval/experiments/embedding-model-comparison/run-experiment.d.ts +20 -0
- package/dist/eval/experiments/embedding-model-comparison/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/embedding-model-comparison/run-experiment.js +289 -0
- package/dist/eval/experiments/embedding-model-comparison/run-experiment.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/alignment-analysis.d.ts +53 -0
- package/dist/eval/experiments/index-differentiation/alignment-analysis.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/alignment-analysis.js +91 -0
- package/dist/eval/experiments/index-differentiation/alignment-analysis.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/discrimination-test.d.ts +24 -0
- package/dist/eval/experiments/index-differentiation/discrimination-test.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/discrimination-test.js +79 -0
- package/dist/eval/experiments/index-differentiation/discrimination-test.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/index.d.ts +11 -0
- package/dist/eval/experiments/index-differentiation/index.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/index.js +8 -0
- package/dist/eval/experiments/index-differentiation/index.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/refinement-test.d.ts +32 -0
- package/dist/eval/experiments/index-differentiation/refinement-test.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/refinement-test.js +203 -0
- package/dist/eval/experiments/index-differentiation/refinement-test.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/run-experiment.d.ts +20 -0
- package/dist/eval/experiments/index-differentiation/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/run-experiment.js +338 -0
- package/dist/eval/experiments/index-differentiation/run-experiment.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/similarity-analysis.d.ts +31 -0
- package/dist/eval/experiments/index-differentiation/similarity-analysis.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/similarity-analysis.js +60 -0
- package/dist/eval/experiments/index-differentiation/similarity-analysis.js.map +1 -0
- package/dist/eval/experiments/index-differentiation/types.d.ts +114 -0
- package/dist/eval/experiments/index-differentiation/types.d.ts.map +1 -0
- package/dist/eval/experiments/index-differentiation/types.js +8 -0
- package/dist/eval/experiments/index-differentiation/types.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.d.ts +19 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.js +328 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-experiment.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.d.ts +27 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.js +154 -0
- package/dist/eval/experiments/index-vs-chunk/jeopardy-generator.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/query-generator.d.ts +23 -0
- package/dist/eval/experiments/index-vs-chunk/query-generator.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/query-generator.js +113 -0
- package/dist/eval/experiments/index-vs-chunk/query-generator.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/run-experiment.d.ts +17 -0
- package/dist/eval/experiments/index-vs-chunk/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/run-experiment.js +341 -0
- package/dist/eval/experiments/index-vs-chunk/run-experiment.js.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/types.d.ts +71 -0
- package/dist/eval/experiments/index-vs-chunk/types.d.ts.map +1 -0
- package/dist/eval/experiments/index-vs-chunk/types.js +8 -0
- package/dist/eval/experiments/index-vs-chunk/types.js.map +1 -0
- package/dist/eval/experiments/pipeline-dropout/run-experiment.d.ts +18 -0
- package/dist/eval/experiments/pipeline-dropout/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/pipeline-dropout/run-experiment.js +347 -0
- package/dist/eval/experiments/pipeline-dropout/run-experiment.js.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/analyze-misses.d.ts +17 -0
- package/dist/eval/experiments/rescorer-ceiling/analyze-misses.d.ts.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/analyze-misses.js +247 -0
- package/dist/eval/experiments/rescorer-ceiling/analyze-misses.js.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.d.ts +18 -0
- package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.d.ts.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.js +443 -0
- package/dist/eval/experiments/rescorer-ceiling/benchmark-rescorers.js.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/run-experiment.d.ts +16 -0
- package/dist/eval/experiments/rescorer-ceiling/run-experiment.d.ts.map +1 -0
- package/dist/eval/experiments/rescorer-ceiling/run-experiment.js +226 -0
- package/dist/eval/experiments/rescorer-ceiling/run-experiment.js.map +1 -0
- package/dist/index-entries/index-generator.d.ts +74 -0
- package/dist/index-entries/index-generator.d.ts.map +1 -0
- package/dist/index-entries/index-generator.js +323 -0
- package/dist/index-entries/index-generator.js.map +1 -0
- package/dist/index-entries/index-refresher.d.ts +54 -0
- package/dist/index-entries/index-refresher.d.ts.map +1 -0
- package/dist/index-entries/index-refresher.js +203 -0
- package/dist/index-entries/index-refresher.js.map +1 -0
- package/dist/index-entries/index.d.ts +6 -0
- package/dist/index-entries/index.d.ts.map +1 -0
- package/dist/index-entries/index.js +6 -0
- package/dist/index-entries/index.js.map +1 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -1
- package/dist/ingest/index-entry-hook.d.ts +15 -0
- package/dist/ingest/index-entry-hook.d.ts.map +1 -0
- package/dist/ingest/index-entry-hook.js +84 -0
- package/dist/ingest/index-entry-hook.js.map +1 -0
- package/dist/ingest/ingest-session.d.ts.map +1 -1
- package/dist/ingest/ingest-session.js +72 -18
- package/dist/ingest/ingest-session.js.map +1 -1
- package/dist/ingest/session-state.d.ts +49 -0
- package/dist/ingest/session-state.d.ts.map +1 -0
- package/dist/ingest/session-state.js +158 -0
- package/dist/ingest/session-state.js.map +1 -0
- package/dist/maintenance/scheduler.d.ts.map +1 -1
- package/dist/maintenance/scheduler.js +25 -0
- package/dist/maintenance/scheduler.js.map +1 -1
- package/dist/maintenance/tasks/backfill-index.d.ts +27 -0
- package/dist/maintenance/tasks/backfill-index.d.ts.map +1 -0
- package/dist/maintenance/tasks/backfill-index.js +44 -0
- package/dist/maintenance/tasks/backfill-index.js.map +1 -0
- package/dist/mcp/tools.d.ts +4 -0
- package/dist/mcp/tools.d.ts.map +1 -1
- package/dist/mcp/tools.js +115 -7
- package/dist/mcp/tools.js.map +1 -1
- package/dist/models/embedder.js +2 -2
- package/dist/models/embedder.js.map +1 -1
- package/dist/models/model-registry.d.ts +2 -0
- package/dist/models/model-registry.d.ts.map +1 -1
- package/dist/models/model-registry.js +15 -0
- package/dist/models/model-registry.js.map +1 -1
- package/dist/repomap/cache.d.ts +58 -0
- package/dist/repomap/cache.d.ts.map +1 -0
- package/dist/repomap/cache.js +101 -0
- package/dist/repomap/cache.js.map +1 -0
- package/dist/repomap/graph.d.ts +54 -0
- package/dist/repomap/graph.d.ts.map +1 -0
- package/dist/repomap/graph.js +113 -0
- package/dist/repomap/graph.js.map +1 -0
- package/dist/repomap/index.d.ts +83 -0
- package/dist/repomap/index.d.ts.map +1 -0
- package/dist/repomap/index.js +99 -0
- package/dist/repomap/index.js.map +1 -0
- package/dist/repomap/parser.d.ts +43 -0
- package/dist/repomap/parser.d.ts.map +1 -0
- package/dist/repomap/parser.js +994 -0
- package/dist/repomap/parser.js.map +1 -0
- package/dist/repomap/regex-parser.d.ts +24 -0
- package/dist/repomap/regex-parser.d.ts.map +1 -0
- package/dist/repomap/regex-parser.js +190 -0
- package/dist/repomap/regex-parser.js.map +1 -0
- package/dist/repomap/renderer.d.ts +40 -0
- package/dist/repomap/renderer.d.ts.map +1 -0
- package/dist/repomap/renderer.js +163 -0
- package/dist/repomap/renderer.js.map +1 -0
- package/dist/repomap/scanner.d.ts +32 -0
- package/dist/repomap/scanner.d.ts.map +1 -0
- package/dist/repomap/scanner.js +171 -0
- package/dist/repomap/scanner.js.map +1 -0
- package/dist/retrieval/chain-assembler.d.ts.map +1 -1
- package/dist/retrieval/chain-assembler.js +22 -3
- package/dist/retrieval/chain-assembler.js.map +1 -1
- package/dist/retrieval/index.d.ts +2 -0
- package/dist/retrieval/index.d.ts.map +1 -1
- package/dist/retrieval/index.js +2 -0
- package/dist/retrieval/index.js.map +1 -1
- package/dist/retrieval/mmr.d.ts +1 -0
- package/dist/retrieval/mmr.d.ts.map +1 -1
- package/dist/retrieval/mmr.js +35 -1
- package/dist/retrieval/mmr.js.map +1 -1
- package/dist/retrieval/search-assembler.d.ts +10 -1
- package/dist/retrieval/search-assembler.d.ts.map +1 -1
- package/dist/retrieval/search-assembler.js +249 -81
- package/dist/retrieval/search-assembler.js.map +1 -1
- package/dist/retrieval/session-reconstructor.d.ts +36 -0
- package/dist/retrieval/session-reconstructor.d.ts.map +1 -1
- package/dist/retrieval/session-reconstructor.js +126 -0
- package/dist/retrieval/session-reconstructor.js.map +1 -1
- package/dist/storage/db.d.ts.map +1 -1
- package/dist/storage/db.js +15 -0
- package/dist/storage/db.js.map +1 -1
- package/dist/storage/index-entry-store.d.ts +71 -0
- package/dist/storage/index-entry-store.d.ts.map +1 -0
- package/dist/storage/index-entry-store.js +275 -0
- package/dist/storage/index-entry-store.js.map +1 -0
- package/dist/storage/index.d.ts +5 -2
- package/dist/storage/index.d.ts.map +1 -1
- package/dist/storage/index.js +5 -1
- package/dist/storage/index.js.map +1 -1
- package/dist/storage/migrations.d.ts.map +1 -1
- package/dist/storage/migrations.js +102 -0
- package/dist/storage/migrations.js.map +1 -1
- package/dist/storage/schema.sql +68 -2
- package/dist/storage/session-state-store.d.ts +61 -0
- package/dist/storage/session-state-store.d.ts.map +1 -0
- package/dist/storage/session-state-store.js +119 -0
- package/dist/storage/session-state-store.js.map +1 -0
- package/dist/storage/types.d.ts +50 -0
- package/dist/storage/types.d.ts.map +1 -1
- package/dist/storage/vector-store.d.ts +17 -2
- package/dist/storage/vector-store.d.ts.map +1 -1
- package/dist/storage/vector-store.js +96 -36
- package/dist/storage/vector-store.js.map +1 -1
- package/package.json +4 -2
|
@@ -0,0 +1,994 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tree-sitter AST parser for extracting definitions and references.
|
|
3
|
+
*
|
|
4
|
+
* Uses web-tree-sitter (WASM) to parse source files and extract:
|
|
5
|
+
* - Definitions: classes, functions, methods, interfaces, type aliases, exports
|
|
6
|
+
* - References: imports, identifiers used in code
|
|
7
|
+
*
|
|
8
|
+
* Supported languages: TypeScript, JavaScript, Python, Java, C, C++,
|
|
9
|
+
* Rust, Go, Ruby, C#, PHP, Bash (tree-sitter).
|
|
10
|
+
* Fallback regex parsing: Scala, Kotlin, Swift, Haskell, Lua, Dart,
|
|
11
|
+
* Zig, Elixir, Perl, R.
|
|
12
|
+
*/
|
|
13
|
+
import { readFileSync } from 'fs';
|
|
14
|
+
import { join, dirname } from 'path';
|
|
15
|
+
import { fileURLToPath } from 'url';
|
|
16
|
+
import { parseFileRegex, isRegexSupportedExtension, getRegexLanguageForExtension, } from './regex-parser.js';
|
|
17
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
18
|
+
/** Map from file extension to tree-sitter language name. */
|
|
19
|
+
const EXTENSION_TO_LANGUAGE = {
|
|
20
|
+
// TypeScript / JavaScript
|
|
21
|
+
'.ts': 'typescript',
|
|
22
|
+
'.mts': 'typescript',
|
|
23
|
+
'.tsx': 'tsx',
|
|
24
|
+
'.js': 'javascript',
|
|
25
|
+
'.mjs': 'javascript',
|
|
26
|
+
'.jsx': 'javascript',
|
|
27
|
+
// Python
|
|
28
|
+
'.py': 'python',
|
|
29
|
+
'.pyi': 'python',
|
|
30
|
+
// Java
|
|
31
|
+
'.java': 'java',
|
|
32
|
+
// C (parsed with C++ grammar — no standalone tree-sitter-c.wasm in @vscode/tree-sitter-wasm)
|
|
33
|
+
'.c': 'cpp',
|
|
34
|
+
'.h': 'cpp',
|
|
35
|
+
// C++
|
|
36
|
+
'.cpp': 'cpp',
|
|
37
|
+
'.cc': 'cpp',
|
|
38
|
+
'.cxx': 'cpp',
|
|
39
|
+
'.hpp': 'cpp',
|
|
40
|
+
'.hh': 'cpp',
|
|
41
|
+
'.hxx': 'cpp',
|
|
42
|
+
// Rust
|
|
43
|
+
'.rs': 'rust',
|
|
44
|
+
// Go
|
|
45
|
+
'.go': 'go',
|
|
46
|
+
// Ruby
|
|
47
|
+
'.rb': 'ruby',
|
|
48
|
+
// C#
|
|
49
|
+
'.cs': 'c-sharp',
|
|
50
|
+
// PHP
|
|
51
|
+
'.php': 'php',
|
|
52
|
+
// Bash / Shell
|
|
53
|
+
'.sh': 'bash',
|
|
54
|
+
'.bash': 'bash',
|
|
55
|
+
};
|
|
56
|
+
// Lazy-loaded tree-sitter module and languages
|
|
57
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
58
|
+
let ParserClass = null;
|
|
59
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
60
|
+
let LanguageClass = null;
|
|
61
|
+
let initPromise = null;
|
|
62
|
+
const languageCache = new Map();
|
|
63
|
+
/**
|
|
64
|
+
* Initialize tree-sitter. Must be called before parsing.
|
|
65
|
+
* Idempotent — safe to call multiple times.
|
|
66
|
+
*/
|
|
67
|
+
async function ensureInit() {
|
|
68
|
+
if (ParserClass)
|
|
69
|
+
return;
|
|
70
|
+
if (initPromise)
|
|
71
|
+
return initPromise;
|
|
72
|
+
initPromise = (async () => {
|
|
73
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
74
|
+
const mod = (await import('web-tree-sitter'));
|
|
75
|
+
const P = mod.Parser ?? mod.default;
|
|
76
|
+
const wasmPath = join(__dirname, '..', '..', 'node_modules', 'web-tree-sitter', 'web-tree-sitter.wasm');
|
|
77
|
+
await P.init({ locateFile: () => wasmPath });
|
|
78
|
+
ParserClass = P;
|
|
79
|
+
LanguageClass = mod.Language;
|
|
80
|
+
})();
|
|
81
|
+
return initPromise;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Load a tree-sitter language grammar.
|
|
85
|
+
*/
|
|
86
|
+
async function loadLanguage(languageName) {
|
|
87
|
+
const cached = languageCache.get(languageName);
|
|
88
|
+
if (cached)
|
|
89
|
+
return cached;
|
|
90
|
+
await ensureInit();
|
|
91
|
+
// Resolve the WASM file from @vscode/tree-sitter-wasm
|
|
92
|
+
const wasmFile = `tree-sitter-${languageName}.wasm`;
|
|
93
|
+
const wasmPath = join(__dirname, '..', '..', 'node_modules', '@vscode', 'tree-sitter-wasm', 'wasm', wasmFile);
|
|
94
|
+
const language = await LanguageClass.load(wasmPath);
|
|
95
|
+
languageCache.set(languageName, language);
|
|
96
|
+
return language;
|
|
97
|
+
}
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
// Per-language definition node types
|
|
100
|
+
// ---------------------------------------------------------------------------
|
|
101
|
+
/** TypeScript / JavaScript / TSX definition types. */
|
|
102
|
+
const DEFINITION_TYPES_TS = new Set([
|
|
103
|
+
'class_declaration',
|
|
104
|
+
'abstract_class_declaration',
|
|
105
|
+
'function_declaration',
|
|
106
|
+
'generator_function_declaration',
|
|
107
|
+
'method_definition',
|
|
108
|
+
'interface_declaration',
|
|
109
|
+
'type_alias_declaration',
|
|
110
|
+
'enum_declaration',
|
|
111
|
+
'variable_declarator',
|
|
112
|
+
'export_statement',
|
|
113
|
+
]);
|
|
114
|
+
/** Python definition types. */
|
|
115
|
+
const DEFINITION_TYPES_PYTHON = new Set([
|
|
116
|
+
'class_definition',
|
|
117
|
+
'function_definition',
|
|
118
|
+
'decorated_definition',
|
|
119
|
+
]);
|
|
120
|
+
/** Java definition types. */
|
|
121
|
+
const DEFINITION_TYPES_JAVA = new Set([
|
|
122
|
+
'class_declaration',
|
|
123
|
+
'interface_declaration',
|
|
124
|
+
'method_declaration',
|
|
125
|
+
'enum_declaration',
|
|
126
|
+
'constructor_declaration',
|
|
127
|
+
'annotation_type_declaration',
|
|
128
|
+
]);
|
|
129
|
+
/** C++ definition types (also used for C files — C grammar not available as standalone WASM). */
|
|
130
|
+
const DEFINITION_TYPES_CPP = new Set([
|
|
131
|
+
'function_definition',
|
|
132
|
+
'struct_specifier',
|
|
133
|
+
'class_specifier',
|
|
134
|
+
'enum_specifier',
|
|
135
|
+
'type_definition',
|
|
136
|
+
'namespace_definition',
|
|
137
|
+
'template_declaration',
|
|
138
|
+
'preproc_function_def',
|
|
139
|
+
]);
|
|
140
|
+
/** Rust definition types. */
|
|
141
|
+
const DEFINITION_TYPES_RUST = new Set([
|
|
142
|
+
'struct_item',
|
|
143
|
+
'enum_item',
|
|
144
|
+
'trait_item',
|
|
145
|
+
'impl_item',
|
|
146
|
+
'function_item',
|
|
147
|
+
'type_item',
|
|
148
|
+
'const_item',
|
|
149
|
+
'static_item',
|
|
150
|
+
'mod_item',
|
|
151
|
+
'macro_definition',
|
|
152
|
+
]);
|
|
153
|
+
/** Go definition types. */
|
|
154
|
+
const DEFINITION_TYPES_GO = new Set([
|
|
155
|
+
'type_declaration',
|
|
156
|
+
'function_declaration',
|
|
157
|
+
'method_declaration',
|
|
158
|
+
'const_declaration',
|
|
159
|
+
'var_declaration',
|
|
160
|
+
]);
|
|
161
|
+
/** Ruby definition types. */
|
|
162
|
+
const DEFINITION_TYPES_RUBY = new Set([
|
|
163
|
+
'class',
|
|
164
|
+
'module',
|
|
165
|
+
'method',
|
|
166
|
+
'singleton_method',
|
|
167
|
+
]);
|
|
168
|
+
/** C# definition types. */
|
|
169
|
+
const DEFINITION_TYPES_CSHARP = new Set([
|
|
170
|
+
'class_declaration',
|
|
171
|
+
'interface_declaration',
|
|
172
|
+
'struct_declaration',
|
|
173
|
+
'enum_declaration',
|
|
174
|
+
'method_declaration',
|
|
175
|
+
'constructor_declaration',
|
|
176
|
+
'namespace_declaration',
|
|
177
|
+
'delegate_declaration',
|
|
178
|
+
]);
|
|
179
|
+
/** PHP definition types. */
|
|
180
|
+
const DEFINITION_TYPES_PHP = new Set([
|
|
181
|
+
'class_declaration',
|
|
182
|
+
'interface_declaration',
|
|
183
|
+
'trait_declaration',
|
|
184
|
+
'enum_declaration',
|
|
185
|
+
'function_definition',
|
|
186
|
+
'method_declaration',
|
|
187
|
+
]);
|
|
188
|
+
/** Bash definition types. */
|
|
189
|
+
const DEFINITION_TYPES_BASH = new Set([
|
|
190
|
+
'function_definition',
|
|
191
|
+
]);
|
|
192
|
+
/** Lookup definition types by language. */
|
|
193
|
+
const DEFINITION_TYPES_BY_LANGUAGE = {
|
|
194
|
+
typescript: DEFINITION_TYPES_TS,
|
|
195
|
+
tsx: DEFINITION_TYPES_TS,
|
|
196
|
+
javascript: DEFINITION_TYPES_TS,
|
|
197
|
+
python: DEFINITION_TYPES_PYTHON,
|
|
198
|
+
java: DEFINITION_TYPES_JAVA,
|
|
199
|
+
cpp: DEFINITION_TYPES_CPP,
|
|
200
|
+
rust: DEFINITION_TYPES_RUST,
|
|
201
|
+
go: DEFINITION_TYPES_GO,
|
|
202
|
+
ruby: DEFINITION_TYPES_RUBY,
|
|
203
|
+
'c-sharp': DEFINITION_TYPES_CSHARP,
|
|
204
|
+
php: DEFINITION_TYPES_PHP,
|
|
205
|
+
bash: DEFINITION_TYPES_BASH,
|
|
206
|
+
};
|
|
207
|
+
// ---------------------------------------------------------------------------
|
|
208
|
+
// Name extraction
|
|
209
|
+
// ---------------------------------------------------------------------------
|
|
210
|
+
/**
|
|
211
|
+
* Extract the name from a definition node.
|
|
212
|
+
* Language-aware: handles different node structures per language.
|
|
213
|
+
*/
|
|
214
|
+
function extractDefinitionName(node, languageName) {
|
|
215
|
+
// Most declarations have a 'name' field — try it first
|
|
216
|
+
const nameNode = node.childForFieldName('name');
|
|
217
|
+
if (nameNode)
|
|
218
|
+
return nameNode.text;
|
|
219
|
+
// TS/JS: variable_declarator — name is the first child
|
|
220
|
+
if (node.type === 'variable_declarator') {
|
|
221
|
+
const first = node.firstChild;
|
|
222
|
+
if (first && (first.type === 'identifier' || first.type === 'type_identifier')) {
|
|
223
|
+
return first.text;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
// Python: decorated_definition — unwrap to the inner definition
|
|
227
|
+
if (node.type === 'decorated_definition') {
|
|
228
|
+
const def = node.childForFieldName('definition');
|
|
229
|
+
if (def)
|
|
230
|
+
return extractDefinitionName(def, languageName);
|
|
231
|
+
}
|
|
232
|
+
// C/C++: function_definition — declarator → function_declarator → declarator (identifier)
|
|
233
|
+
if (node.type === 'function_definition') {
|
|
234
|
+
const declarator = node.childForFieldName('declarator');
|
|
235
|
+
if (declarator)
|
|
236
|
+
return extractFunctionDeclaratorName(declarator);
|
|
237
|
+
}
|
|
238
|
+
// C/C++: type_definition — declarator field holds the alias name
|
|
239
|
+
if (node.type === 'type_definition') {
|
|
240
|
+
const declarator = node.childForFieldName('declarator');
|
|
241
|
+
if (declarator) {
|
|
242
|
+
if (declarator.type === 'type_identifier' || declarator.type === 'identifier') {
|
|
243
|
+
return declarator.text;
|
|
244
|
+
}
|
|
245
|
+
return extractFunctionDeclaratorName(declarator);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
// C/C++: struct_specifier, enum_specifier, class_specifier — name field (type_identifier)
|
|
249
|
+
if (node.type === 'struct_specifier' ||
|
|
250
|
+
node.type === 'enum_specifier' ||
|
|
251
|
+
node.type === 'class_specifier') {
|
|
252
|
+
// Some anonymous structs/enums don't have a name
|
|
253
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
254
|
+
const child = node.child(i);
|
|
255
|
+
if (child.type === 'type_identifier')
|
|
256
|
+
return child.text;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
// C++: template_declaration — unwrap to the inner definition (skip keywords/punctuation)
|
|
260
|
+
if (node.type === 'template_declaration') {
|
|
261
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
262
|
+
const child = node.child(i);
|
|
263
|
+
if (child.isNamed && child.type !== 'template_parameter_list') {
|
|
264
|
+
return extractDefinitionName(child, languageName);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
// C: preproc_function_def — name field
|
|
269
|
+
if (node.type === 'preproc_function_def') {
|
|
270
|
+
const macroName = node.childForFieldName('name');
|
|
271
|
+
if (macroName)
|
|
272
|
+
return macroName.text;
|
|
273
|
+
}
|
|
274
|
+
// Java: constructor_declaration — name field
|
|
275
|
+
if (node.type === 'constructor_declaration') {
|
|
276
|
+
const ctorName = node.childForFieldName('name');
|
|
277
|
+
if (ctorName)
|
|
278
|
+
return ctorName.text;
|
|
279
|
+
}
|
|
280
|
+
// Go: type_declaration wraps type_spec — extract name from the inner spec
|
|
281
|
+
if (node.type === 'type_declaration') {
|
|
282
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
283
|
+
const child = node.child(i);
|
|
284
|
+
if (child.type === 'type_spec') {
|
|
285
|
+
const specName = child.childForFieldName('name');
|
|
286
|
+
if (specName)
|
|
287
|
+
return specName.text;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
// Go: const_declaration / var_declaration — extract from const_spec / var_spec
|
|
292
|
+
if (node.type === 'const_declaration' || node.type === 'var_declaration') {
|
|
293
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
294
|
+
const child = node.child(i);
|
|
295
|
+
if (child.type === 'const_spec' || child.type === 'var_spec') {
|
|
296
|
+
const specName = child.childForFieldName('name');
|
|
297
|
+
if (specName)
|
|
298
|
+
return specName.text;
|
|
299
|
+
// Fallback: first identifier child
|
|
300
|
+
for (let j = 0; j < child.childCount; j++) {
|
|
301
|
+
const grandchild = child.child(j);
|
|
302
|
+
if (grandchild.type === 'identifier')
|
|
303
|
+
return grandchild.text;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
// Ruby: class/module — name field is a constant node
|
|
309
|
+
if (node.type === 'class' || node.type === 'module') {
|
|
310
|
+
const n = node.childForFieldName('name');
|
|
311
|
+
if (n)
|
|
312
|
+
return n.text;
|
|
313
|
+
}
|
|
314
|
+
// Bash: function_definition — name field is a word node
|
|
315
|
+
if (node.type === 'function_definition') {
|
|
316
|
+
const n = node.childForFieldName('name');
|
|
317
|
+
if (n)
|
|
318
|
+
return n.text;
|
|
319
|
+
}
|
|
320
|
+
return null;
|
|
321
|
+
}
|
|
322
|
+
/**
|
|
323
|
+
* Recursively extract the identifier name from a C/C++ function declarator chain.
|
|
324
|
+
* Handles: function_declarator → declarator → identifier,
|
|
325
|
+
* pointer_declarator → declarator → identifier, etc.
|
|
326
|
+
*/
|
|
327
|
+
function extractFunctionDeclaratorName(node) {
|
|
328
|
+
if (node.type === 'identifier' || node.type === 'field_identifier' || node.type === 'type_identifier') {
|
|
329
|
+
return node.text;
|
|
330
|
+
}
|
|
331
|
+
// Handle qualified identifiers in C++ (namespace::name)
|
|
332
|
+
if (node.type === 'qualified_identifier') {
|
|
333
|
+
const nameChild = node.childForFieldName('name');
|
|
334
|
+
if (nameChild)
|
|
335
|
+
return nameChild.text;
|
|
336
|
+
}
|
|
337
|
+
const declarator = node.childForFieldName('declarator');
|
|
338
|
+
if (declarator)
|
|
339
|
+
return extractFunctionDeclaratorName(declarator);
|
|
340
|
+
// Recurse into named children (parenthesized_declarator, pointer_declarator, etc.)
|
|
341
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
342
|
+
const child = node.child(i);
|
|
343
|
+
if (child.isNamed) {
|
|
344
|
+
const name = extractFunctionDeclaratorName(child);
|
|
345
|
+
if (name)
|
|
346
|
+
return name;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
return null;
|
|
350
|
+
}
|
|
351
|
+
// ---------------------------------------------------------------------------
|
|
352
|
+
// Type categorization
|
|
353
|
+
// ---------------------------------------------------------------------------
|
|
354
|
+
/**
|
|
355
|
+
* Get the definition type from a tree-sitter node type.
|
|
356
|
+
*/
|
|
357
|
+
function getDefinitionType(nodeType) {
|
|
358
|
+
switch (nodeType) {
|
|
359
|
+
// TS/JS
|
|
360
|
+
case 'class_declaration':
|
|
361
|
+
case 'abstract_class_declaration':
|
|
362
|
+
return 'class';
|
|
363
|
+
case 'function_declaration':
|
|
364
|
+
case 'generator_function_declaration':
|
|
365
|
+
return 'function';
|
|
366
|
+
case 'method_definition':
|
|
367
|
+
return 'method';
|
|
368
|
+
case 'interface_declaration':
|
|
369
|
+
return 'interface';
|
|
370
|
+
case 'type_alias_declaration':
|
|
371
|
+
return 'type';
|
|
372
|
+
case 'enum_declaration':
|
|
373
|
+
return 'enum';
|
|
374
|
+
case 'variable_declarator':
|
|
375
|
+
return 'variable';
|
|
376
|
+
case 'export_statement':
|
|
377
|
+
return 'export';
|
|
378
|
+
// Python
|
|
379
|
+
case 'class_definition':
|
|
380
|
+
return 'class';
|
|
381
|
+
case 'function_definition':
|
|
382
|
+
return 'function';
|
|
383
|
+
case 'decorated_definition':
|
|
384
|
+
return 'function'; // Will be overridden if inner def is a class
|
|
385
|
+
// Java
|
|
386
|
+
case 'method_declaration':
|
|
387
|
+
case 'constructor_declaration':
|
|
388
|
+
return 'method';
|
|
389
|
+
case 'annotation_type_declaration':
|
|
390
|
+
return 'interface';
|
|
391
|
+
// C / C++
|
|
392
|
+
case 'struct_specifier':
|
|
393
|
+
case 'class_specifier':
|
|
394
|
+
return 'class';
|
|
395
|
+
case 'enum_specifier':
|
|
396
|
+
return 'enum';
|
|
397
|
+
case 'type_definition':
|
|
398
|
+
return 'type';
|
|
399
|
+
case 'namespace_definition':
|
|
400
|
+
return 'class';
|
|
401
|
+
case 'template_declaration':
|
|
402
|
+
return 'class';
|
|
403
|
+
case 'preproc_function_def':
|
|
404
|
+
return 'function';
|
|
405
|
+
// Rust
|
|
406
|
+
case 'struct_item':
|
|
407
|
+
return 'class';
|
|
408
|
+
case 'enum_item':
|
|
409
|
+
return 'enum';
|
|
410
|
+
case 'trait_item':
|
|
411
|
+
return 'interface';
|
|
412
|
+
case 'impl_item':
|
|
413
|
+
return 'class';
|
|
414
|
+
case 'function_item':
|
|
415
|
+
return 'function';
|
|
416
|
+
case 'type_item':
|
|
417
|
+
return 'type';
|
|
418
|
+
case 'const_item':
|
|
419
|
+
case 'static_item':
|
|
420
|
+
return 'variable';
|
|
421
|
+
case 'mod_item':
|
|
422
|
+
return 'class';
|
|
423
|
+
case 'macro_definition':
|
|
424
|
+
return 'function';
|
|
425
|
+
// Go (function_declaration, method_declaration, enum_declaration shared with TS/Java)
|
|
426
|
+
case 'type_declaration':
|
|
427
|
+
return 'type'; // refined in visitNode for struct/interface
|
|
428
|
+
case 'const_declaration':
|
|
429
|
+
case 'var_declaration':
|
|
430
|
+
return 'variable';
|
|
431
|
+
// Ruby
|
|
432
|
+
case 'class':
|
|
433
|
+
return 'class';
|
|
434
|
+
case 'module':
|
|
435
|
+
return 'class';
|
|
436
|
+
case 'method':
|
|
437
|
+
case 'singleton_method':
|
|
438
|
+
return 'method';
|
|
439
|
+
// C#
|
|
440
|
+
case 'struct_declaration':
|
|
441
|
+
return 'class';
|
|
442
|
+
case 'delegate_declaration':
|
|
443
|
+
return 'type';
|
|
444
|
+
case 'namespace_declaration':
|
|
445
|
+
return 'class';
|
|
446
|
+
// PHP
|
|
447
|
+
case 'trait_declaration':
|
|
448
|
+
return 'class';
|
|
449
|
+
// Bash
|
|
450
|
+
// function_definition already handled by C/C++ case above
|
|
451
|
+
default:
|
|
452
|
+
return 'variable';
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* Refine the type for Python decorated definitions based on the inner node.
|
|
457
|
+
*/
|
|
458
|
+
function getDecoratedDefinitionType(node) {
|
|
459
|
+
const def = node.childForFieldName('definition');
|
|
460
|
+
if (def)
|
|
461
|
+
return getDefinitionType(def.type);
|
|
462
|
+
return 'function';
|
|
463
|
+
}
|
|
464
|
+
// ---------------------------------------------------------------------------
|
|
465
|
+
// Variable interest filter (TS/JS only)
|
|
466
|
+
// ---------------------------------------------------------------------------
|
|
467
|
+
/**
|
|
468
|
+
* Check if a variable declarator is "interesting" (arrow function, class expression, etc.)
|
|
469
|
+
* Filters out simple primitives to reduce noise.
|
|
470
|
+
*/
|
|
471
|
+
function isInterestingVariable(node) {
|
|
472
|
+
const value = node.childForFieldName('value');
|
|
473
|
+
if (!value)
|
|
474
|
+
return true; // Declaration without value is interesting
|
|
475
|
+
switch (value.type) {
|
|
476
|
+
case 'arrow_function':
|
|
477
|
+
case 'function_expression':
|
|
478
|
+
case 'class':
|
|
479
|
+
case 'call_expression':
|
|
480
|
+
case 'new_expression':
|
|
481
|
+
case 'object':
|
|
482
|
+
case 'array':
|
|
483
|
+
return true;
|
|
484
|
+
default:
|
|
485
|
+
return false;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
// ---------------------------------------------------------------------------
|
|
489
|
+
// Main parse function
|
|
490
|
+
// ---------------------------------------------------------------------------
|
|
491
|
+
/**
|
|
492
|
+
* Parse a source file and extract tags (definitions + references).
|
|
493
|
+
*
|
|
494
|
+
* @param filePath - Absolute path to the source file
|
|
495
|
+
* @param relativePath - Relative path for tag metadata
|
|
496
|
+
* @returns Array of tags extracted from the file
|
|
497
|
+
*/
|
|
498
|
+
export async function parseFile(filePath, relativePath) {
|
|
499
|
+
const ext = filePath.slice(filePath.lastIndexOf('.'));
|
|
500
|
+
const languageName = EXTENSION_TO_LANGUAGE[ext];
|
|
501
|
+
// Fall back to regex parser for languages without tree-sitter grammars
|
|
502
|
+
if (!languageName) {
|
|
503
|
+
return parseFileRegex(filePath, relativePath);
|
|
504
|
+
}
|
|
505
|
+
const defTypes = DEFINITION_TYPES_BY_LANGUAGE[languageName];
|
|
506
|
+
if (!defTypes)
|
|
507
|
+
return [];
|
|
508
|
+
await ensureInit();
|
|
509
|
+
const language = await loadLanguage(languageName);
|
|
510
|
+
const parser = new ParserClass();
|
|
511
|
+
parser.setLanguage(language);
|
|
512
|
+
let source;
|
|
513
|
+
try {
|
|
514
|
+
source = readFileSync(filePath, 'utf-8');
|
|
515
|
+
}
|
|
516
|
+
catch {
|
|
517
|
+
return [];
|
|
518
|
+
}
|
|
519
|
+
const tree = parser.parse(source);
|
|
520
|
+
if (!tree) {
|
|
521
|
+
parser.delete();
|
|
522
|
+
return [];
|
|
523
|
+
}
|
|
524
|
+
const tags = [];
|
|
525
|
+
const definedNames = new Set();
|
|
526
|
+
const isTS = languageName === 'typescript' || languageName === 'tsx' || languageName === 'javascript';
|
|
527
|
+
// Walk the tree to extract definitions
|
|
528
|
+
const cursor = tree.walk();
|
|
529
|
+
const visitNode = () => {
|
|
530
|
+
const node = cursor.currentNode;
|
|
531
|
+
// Extract definitions
|
|
532
|
+
if (defTypes.has(node.type)) {
|
|
533
|
+
// TS/JS: export_statement — look at the child declaration
|
|
534
|
+
if (node.type === 'export_statement') {
|
|
535
|
+
const declaration = node.childForFieldName('declaration');
|
|
536
|
+
if (declaration) {
|
|
537
|
+
if (defTypes.has(declaration.type)) {
|
|
538
|
+
const name = extractDefinitionName(declaration, languageName);
|
|
539
|
+
if (name && name.length > 1) {
|
|
540
|
+
tags.push({
|
|
541
|
+
name,
|
|
542
|
+
kind: 'def',
|
|
543
|
+
line: declaration.startPosition.row + 1,
|
|
544
|
+
file: relativePath,
|
|
545
|
+
type: getDefinitionType(declaration.type),
|
|
546
|
+
});
|
|
547
|
+
definedNames.add(name);
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
else if (declaration.type === 'lexical_declaration' ||
|
|
551
|
+
declaration.type === 'variable_declaration') {
|
|
552
|
+
// export const/let/var — recurse into variable_declarator children
|
|
553
|
+
for (let i = 0; i < declaration.childCount; i++) {
|
|
554
|
+
const child = declaration.child(i);
|
|
555
|
+
if (child.type === 'variable_declarator' && isInterestingVariable(child)) {
|
|
556
|
+
const name = extractDefinitionName(child, languageName);
|
|
557
|
+
if (name && name.length > 1) {
|
|
558
|
+
tags.push({
|
|
559
|
+
name,
|
|
560
|
+
kind: 'def',
|
|
561
|
+
line: child.startPosition.row + 1,
|
|
562
|
+
file: relativePath,
|
|
563
|
+
type: getDefinitionType(child.type),
|
|
564
|
+
});
|
|
565
|
+
definedNames.add(name);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
// Don't recurse further into the export statement for this branch
|
|
572
|
+
return;
|
|
573
|
+
}
|
|
574
|
+
// Python: decorated_definition — unwrap
|
|
575
|
+
if (node.type === 'decorated_definition') {
|
|
576
|
+
const name = extractDefinitionName(node, languageName);
|
|
577
|
+
if (name && name.length > 1) {
|
|
578
|
+
tags.push({
|
|
579
|
+
name,
|
|
580
|
+
kind: 'def',
|
|
581
|
+
line: node.startPosition.row + 1,
|
|
582
|
+
file: relativePath,
|
|
583
|
+
type: getDecoratedDefinitionType(node),
|
|
584
|
+
});
|
|
585
|
+
definedNames.add(name);
|
|
586
|
+
}
|
|
587
|
+
// Don't recurse into the decorated definition — we extracted the inner name
|
|
588
|
+
return;
|
|
589
|
+
}
|
|
590
|
+
// C++: template_declaration — unwrap
|
|
591
|
+
if (node.type === 'template_declaration') {
|
|
592
|
+
const name = extractDefinitionName(node, languageName);
|
|
593
|
+
if (name && name.length > 1) {
|
|
594
|
+
// Determine type from inner declaration
|
|
595
|
+
let innerType = 'class';
|
|
596
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
597
|
+
const child = node.child(i);
|
|
598
|
+
if (child.isNamed && child.type !== 'template_parameter_list') {
|
|
599
|
+
innerType = getDefinitionType(child.type);
|
|
600
|
+
break;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
tags.push({
|
|
604
|
+
name,
|
|
605
|
+
kind: 'def',
|
|
606
|
+
line: node.startPosition.row + 1,
|
|
607
|
+
file: relativePath,
|
|
608
|
+
type: innerType,
|
|
609
|
+
});
|
|
610
|
+
definedNames.add(name);
|
|
611
|
+
}
|
|
612
|
+
return;
|
|
613
|
+
}
|
|
614
|
+
// TS/JS: skip boring variable declarations (primitives, simple assignments)
|
|
615
|
+
if (node.type === 'variable_declarator' && !isInterestingVariable(node)) {
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
// Go: type_declaration — refine type based on inner type_spec
|
|
619
|
+
if (node.type === 'type_declaration') {
|
|
620
|
+
const name = extractDefinitionName(node, languageName);
|
|
621
|
+
if (name && name.length > 1) {
|
|
622
|
+
let defType = 'type';
|
|
623
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
624
|
+
const child = node.child(i);
|
|
625
|
+
if (child.type === 'type_spec') {
|
|
626
|
+
const typeField = child.childForFieldName('type');
|
|
627
|
+
if (typeField) {
|
|
628
|
+
if (typeField.type === 'struct_type')
|
|
629
|
+
defType = 'class';
|
|
630
|
+
else if (typeField.type === 'interface_type')
|
|
631
|
+
defType = 'interface';
|
|
632
|
+
}
|
|
633
|
+
break;
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
tags.push({ name, kind: 'def', line: node.startPosition.row + 1, file: relativePath, type: defType });
|
|
637
|
+
definedNames.add(name);
|
|
638
|
+
}
|
|
639
|
+
return;
|
|
640
|
+
}
|
|
641
|
+
// Rust: impl_item — extract trait and type names for references
|
|
642
|
+
if (node.type === 'impl_item') {
|
|
643
|
+
// Don't add impl as a definition (it's not a named symbol)
|
|
644
|
+
// but extract type references from it
|
|
645
|
+
return;
|
|
646
|
+
}
|
|
647
|
+
const name = extractDefinitionName(node, languageName);
|
|
648
|
+
if (name && name.length > 1) {
|
|
649
|
+
tags.push({
|
|
650
|
+
name,
|
|
651
|
+
kind: 'def',
|
|
652
|
+
line: node.startPosition.row + 1,
|
|
653
|
+
file: relativePath,
|
|
654
|
+
type: getDefinitionType(node.type),
|
|
655
|
+
});
|
|
656
|
+
definedNames.add(name);
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
// Extract import references (TS/JS)
|
|
660
|
+
if (isTS && node.type === 'import_specifier') {
|
|
661
|
+
const nameNode = node.childForFieldName('name');
|
|
662
|
+
const name = nameNode?.text ?? node.firstChild?.text;
|
|
663
|
+
if (name && name.length > 1) {
|
|
664
|
+
tags.push({
|
|
665
|
+
name,
|
|
666
|
+
kind: 'ref',
|
|
667
|
+
line: node.startPosition.row + 1,
|
|
668
|
+
file: relativePath,
|
|
669
|
+
type: 'import',
|
|
670
|
+
});
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
// Extract import references (Python)
|
|
674
|
+
if (languageName === 'python') {
|
|
675
|
+
// from X import name1, name2 — capture the imported names
|
|
676
|
+
if (node.type === 'import_from_statement') {
|
|
677
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
678
|
+
const child = node.child(i);
|
|
679
|
+
if (child.type === 'dotted_name' && i > 1) {
|
|
680
|
+
// Imported name (after 'from X import')
|
|
681
|
+
const name = child.text;
|
|
682
|
+
if (name && name.length > 1) {
|
|
683
|
+
tags.push({ name, kind: 'ref', line: child.startPosition.row + 1, file: relativePath, type: 'import' });
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
if (child.type === 'aliased_import') {
|
|
687
|
+
const nameChild = child.childForFieldName('name');
|
|
688
|
+
if (nameChild && nameChild.text.length > 1) {
|
|
689
|
+
tags.push({ name: nameChild.text, kind: 'ref', line: nameChild.startPosition.row + 1, file: relativePath, type: 'import' });
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
// Extract import references (Java)
|
|
696
|
+
if (languageName === 'java' && node.type === 'import_declaration') {
|
|
697
|
+
// import com.example.ClassName; — capture the last identifier
|
|
698
|
+
const lastChild = findLastIdentifier(node);
|
|
699
|
+
if (lastChild && lastChild.length > 1) {
|
|
700
|
+
tags.push({ name: lastChild, kind: 'ref', line: node.startPosition.row + 1, file: relativePath, type: 'import' });
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
// Extract import references (Rust)
|
|
704
|
+
if (languageName === 'rust' && node.type === 'use_declaration') {
|
|
705
|
+
collectRustUseIdentifiers(node, relativePath, tags);
|
|
706
|
+
}
|
|
707
|
+
// Extract import references (Go)
|
|
708
|
+
if (languageName === 'go' && node.type === 'import_declaration') {
|
|
709
|
+
collectGoImportIdentifiers(node, relativePath, tags);
|
|
710
|
+
}
|
|
711
|
+
// Extract import references (C#)
|
|
712
|
+
if (languageName === 'c-sharp' && node.type === 'using_directive') {
|
|
713
|
+
const lastId = findLastNameInTree(node);
|
|
714
|
+
if (lastId && lastId.length > 1) {
|
|
715
|
+
tags.push({ name: lastId, kind: 'ref', line: node.startPosition.row + 1, file: relativePath, type: 'import' });
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
// Extract import references (PHP)
|
|
719
|
+
if (languageName === 'php' && node.type === 'namespace_use_declaration') {
|
|
720
|
+
collectPhpUseIdentifiers(node, relativePath, tags);
|
|
721
|
+
}
|
|
722
|
+
// Recurse into children
|
|
723
|
+
if (cursor.gotoFirstChild()) {
|
|
724
|
+
do {
|
|
725
|
+
visitNode();
|
|
726
|
+
} while (cursor.gotoNextSibling());
|
|
727
|
+
cursor.gotoParent();
|
|
728
|
+
}
|
|
729
|
+
};
|
|
730
|
+
visitNode();
|
|
731
|
+
// Second pass: collect identifier references (type references, call expressions)
|
|
732
|
+
collectReferences(tree.rootNode, relativePath, definedNames, tags, languageName);
|
|
733
|
+
tree.delete();
|
|
734
|
+
parser.delete();
|
|
735
|
+
return tags;
|
|
736
|
+
}
|
|
737
|
+
/**
|
|
738
|
+
* Find the last identifier in a node's children (for Java import declarations).
|
|
739
|
+
*/
|
|
740
|
+
function findLastIdentifier(node) {
|
|
741
|
+
let last = null;
|
|
742
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
743
|
+
const child = node.child(i);
|
|
744
|
+
if (child.type === 'identifier') {
|
|
745
|
+
last = child.text;
|
|
746
|
+
}
|
|
747
|
+
// Java scoped identifiers: com.example.ClassName
|
|
748
|
+
if (child.type === 'scoped_identifier') {
|
|
749
|
+
const nameChild = child.childForFieldName('name');
|
|
750
|
+
if (nameChild)
|
|
751
|
+
last = nameChild.text;
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
return last;
|
|
755
|
+
}
|
|
756
|
+
/**
|
|
757
|
+
* Find the deepest last identifier in a node tree (for C# qualified_name chains).
|
|
758
|
+
*/
|
|
759
|
+
function findLastNameInTree(node) {
|
|
760
|
+
let last = null;
|
|
761
|
+
function walk(n) {
|
|
762
|
+
if (n.type === 'identifier')
|
|
763
|
+
last = n.text;
|
|
764
|
+
for (let i = 0; i < n.childCount; i++)
|
|
765
|
+
walk(n.child(i));
|
|
766
|
+
}
|
|
767
|
+
walk(node);
|
|
768
|
+
return last;
|
|
769
|
+
}
|
|
770
|
+
/**
|
|
771
|
+
* Collect identifiers from a Rust use declaration.
|
|
772
|
+
* Handles: use crate::module::{Foo, Bar}; use std::collections::HashMap;
|
|
773
|
+
*/
|
|
774
|
+
function collectRustUseIdentifiers(node, relativePath, tags) {
|
|
775
|
+
// Walk children but only the last identifier in a path is the import name
|
|
776
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
777
|
+
const child = node.child(i);
|
|
778
|
+
if (child.type === 'scoped_identifier') {
|
|
779
|
+
// use std::collections::HashMap → only HashMap
|
|
780
|
+
let deepest = child;
|
|
781
|
+
while (deepest.childForFieldName('name')) {
|
|
782
|
+
const next = deepest.childForFieldName('name');
|
|
783
|
+
if (next.type === 'scoped_identifier') {
|
|
784
|
+
deepest = next;
|
|
785
|
+
continue;
|
|
786
|
+
}
|
|
787
|
+
if (next.text.length > 1) {
|
|
788
|
+
tags.push({ name: next.text, kind: 'ref', line: next.startPosition.row + 1, file: relativePath, type: 'import' });
|
|
789
|
+
}
|
|
790
|
+
break;
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
if (child.type === 'scoped_use_list') {
|
|
794
|
+
// use crate::module::{Foo, Bar}
|
|
795
|
+
for (let j = 0; j < child.childCount; j++) {
|
|
796
|
+
const listChild = child.child(j);
|
|
797
|
+
if (listChild.type === 'use_list') {
|
|
798
|
+
for (let k = 0; k < listChild.childCount; k++) {
|
|
799
|
+
const item = listChild.child(k);
|
|
800
|
+
if (item.type === 'identifier' && item.text.length > 1) {
|
|
801
|
+
tags.push({ name: item.text, kind: 'ref', line: item.startPosition.row + 1, file: relativePath, type: 'import' });
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
/**
|
|
810
|
+
* Collect imported package names from a Go import declaration.
|
|
811
|
+
* Go imports are strings like "fmt" — extract the last path component.
|
|
812
|
+
*/
|
|
813
|
+
function collectGoImportIdentifiers(node, relativePath, tags) {
|
|
814
|
+
function extractFromSpec(spec) {
|
|
815
|
+
for (let i = 0; i < spec.childCount; i++) {
|
|
816
|
+
const child = spec.child(i);
|
|
817
|
+
if (child.type === 'interpreted_string_literal') {
|
|
818
|
+
// Extract last path component: "github.com/pkg/errors" → "errors"
|
|
819
|
+
const content = child.text.replace(/"/g, '');
|
|
820
|
+
const parts = content.split('/');
|
|
821
|
+
const name = parts[parts.length - 1];
|
|
822
|
+
if (name && name.length > 1) {
|
|
823
|
+
tags.push({ name, kind: 'ref', line: child.startPosition.row + 1, file: relativePath, type: 'import' });
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
829
|
+
const child = node.child(i);
|
|
830
|
+
if (child.type === 'import_spec')
|
|
831
|
+
extractFromSpec(child);
|
|
832
|
+
if (child.type === 'import_spec_list') {
|
|
833
|
+
for (let j = 0; j < child.childCount; j++) {
|
|
834
|
+
const spec = child.child(j);
|
|
835
|
+
if (spec.type === 'import_spec')
|
|
836
|
+
extractFromSpec(spec);
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
/**
|
|
842
|
+
* Collect class/interface names from a PHP use declaration.
|
|
843
|
+
* use App\Base\Model → "Model"
|
|
844
|
+
*/
|
|
845
|
+
function collectPhpUseIdentifiers(node, relativePath, tags) {
|
|
846
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
847
|
+
const child = node.child(i);
|
|
848
|
+
if (child.type === 'namespace_use_clause') {
|
|
849
|
+
// Get the last name in the qualified_name
|
|
850
|
+
for (let j = 0; j < child.childCount; j++) {
|
|
851
|
+
const qn = child.child(j);
|
|
852
|
+
if (qn.type === 'qualified_name') {
|
|
853
|
+
// Last name child is the class name
|
|
854
|
+
let lastName = null;
|
|
855
|
+
let lastLine = 0;
|
|
856
|
+
for (let k = 0; k < qn.childCount; k++) {
|
|
857
|
+
const part = qn.child(k);
|
|
858
|
+
if (part.type === 'name') {
|
|
859
|
+
lastName = part.text;
|
|
860
|
+
lastLine = part.startPosition.row + 1;
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
if (lastName && lastName.length > 1) {
|
|
864
|
+
tags.push({ name: lastName, kind: 'ref', line: lastLine, file: relativePath, type: 'import' });
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
// ---------------------------------------------------------------------------
|
|
872
|
+
// Reference collection
|
|
873
|
+
// ---------------------------------------------------------------------------
|
|
874
|
+
/**
|
|
875
|
+
* Collect reference identifiers from the AST.
|
|
876
|
+
* Looks for type references, call expressions, and constructor invocations.
|
|
877
|
+
*/
|
|
878
|
+
function collectReferences(rootNode, relativePath, localNames, tags, languageName) {
|
|
879
|
+
const seenRefs = new Set();
|
|
880
|
+
function addRef(name, line) {
|
|
881
|
+
if (name.length > 1 && !seenRefs.has(name)) {
|
|
882
|
+
seenRefs.add(name);
|
|
883
|
+
tags.push({ name, kind: 'ref', line, file: relativePath, type: 'identifier' });
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
function walk(node) {
|
|
887
|
+
// Type references (TS/JS/Java/C/C++: type_identifier)
|
|
888
|
+
if (node.type === 'type_identifier') {
|
|
889
|
+
addRef(node.text, node.startPosition.row + 1);
|
|
890
|
+
}
|
|
891
|
+
// Call expressions (TS/JS/C/C++)
|
|
892
|
+
if (node.type === 'call_expression') {
|
|
893
|
+
const fn = node.childForFieldName('function');
|
|
894
|
+
if (fn) {
|
|
895
|
+
if (fn.type === 'identifier') {
|
|
896
|
+
if (!localNames.has(fn.text))
|
|
897
|
+
addRef(fn.text, fn.startPosition.row + 1);
|
|
898
|
+
}
|
|
899
|
+
if (fn.type === 'member_expression' || fn.type === 'field_expression') {
|
|
900
|
+
const obj = fn.childForFieldName('object') ?? fn.childForFieldName('argument');
|
|
901
|
+
if (obj && obj.type === 'identifier' && !localNames.has(obj.text)) {
|
|
902
|
+
addRef(obj.text, obj.startPosition.row + 1);
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
// Python calls
|
|
908
|
+
if (node.type === 'call') {
|
|
909
|
+
const fn = node.childForFieldName('function');
|
|
910
|
+
if (fn) {
|
|
911
|
+
if (fn.type === 'identifier' && !localNames.has(fn.text)) {
|
|
912
|
+
addRef(fn.text, fn.startPosition.row + 1);
|
|
913
|
+
}
|
|
914
|
+
if (fn.type === 'attribute') {
|
|
915
|
+
const obj = fn.childForFieldName('object');
|
|
916
|
+
if (obj && obj.type === 'identifier' && !localNames.has(obj.text)) {
|
|
917
|
+
addRef(obj.text, obj.startPosition.row + 1);
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
// Java: method invocations
|
|
923
|
+
if (node.type === 'method_invocation') {
|
|
924
|
+
const obj = node.childForFieldName('object');
|
|
925
|
+
if (obj && obj.type === 'identifier' && !localNames.has(obj.text)) {
|
|
926
|
+
addRef(obj.text, obj.startPosition.row + 1);
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
// Java: object creation (new ClassName())
|
|
930
|
+
if (node.type === 'object_creation_expression') {
|
|
931
|
+
const typeNode = node.childForFieldName('type');
|
|
932
|
+
if (typeNode && typeNode.type === 'type_identifier') {
|
|
933
|
+
addRef(typeNode.text, typeNode.startPosition.row + 1);
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
// TS/JS/C++: new expressions
|
|
937
|
+
if (node.type === 'new_expression') {
|
|
938
|
+
const constructor = node.childForFieldName('constructor');
|
|
939
|
+
if (constructor && constructor.type === 'identifier') {
|
|
940
|
+
addRef(constructor.text, constructor.startPosition.row + 1);
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
// Rust: macro invocations (macro_name!)
|
|
944
|
+
if (node.type === 'macro_invocation') {
|
|
945
|
+
const macroNode = node.childForFieldName('macro');
|
|
946
|
+
if (macroNode && macroNode.type === 'identifier' && !localNames.has(macroNode.text)) {
|
|
947
|
+
addRef(macroNode.text, macroNode.startPosition.row + 1);
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
// Go: composite literals (Point{...})
|
|
951
|
+
if (node.type === 'composite_literal') {
|
|
952
|
+
const typeNode = node.childForFieldName('type');
|
|
953
|
+
if (typeNode && typeNode.type === 'type_identifier' && !localNames.has(typeNode.text)) {
|
|
954
|
+
addRef(typeNode.text, typeNode.startPosition.row + 1);
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
// Go: selector expressions (pkg.Function)
|
|
958
|
+
if (node.type === 'selector_expression') {
|
|
959
|
+
const operand = node.childForFieldName('operand');
|
|
960
|
+
if (operand && operand.type === 'identifier' && !localNames.has(operand.text)) {
|
|
961
|
+
addRef(operand.text, operand.startPosition.row + 1);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
// C#: object_creation_expression (new ClassName())
|
|
965
|
+
if (node.type === 'object_creation_expression' && languageName === 'c-sharp') {
|
|
966
|
+
const typeNode = node.childForFieldName('type');
|
|
967
|
+
if (typeNode && typeNode.type === 'identifier') {
|
|
968
|
+
addRef(typeNode.text, typeNode.startPosition.row + 1);
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
// Recurse
|
|
972
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
973
|
+
walk(node.child(i));
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
walk(rootNode);
|
|
977
|
+
}
|
|
978
|
+
// ---------------------------------------------------------------------------
|
|
979
|
+
// Public utilities
|
|
980
|
+
// ---------------------------------------------------------------------------
|
|
981
|
+
/**
|
|
982
|
+
* Get the language name for a file extension.
|
|
983
|
+
* Checks tree-sitter languages first, then regex-based fallback languages.
|
|
984
|
+
*/
|
|
985
|
+
export function getLanguageForExtension(ext) {
|
|
986
|
+
return EXTENSION_TO_LANGUAGE[ext] ?? getRegexLanguageForExtension(ext);
|
|
987
|
+
}
|
|
988
|
+
/**
|
|
989
|
+
* Check if a file extension is supported (tree-sitter or regex fallback).
|
|
990
|
+
*/
|
|
991
|
+
export function isSupportedExtension(ext) {
|
|
992
|
+
return ext in EXTENSION_TO_LANGUAGE || isRegexSupportedExtension(ext);
|
|
993
|
+
}
|
|
994
|
+
//# sourceMappingURL=parser.js.map
|