@mishasinitcyn/betterrank 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parser.js ADDED
@@ -0,0 +1,333 @@
1
+ import { createRequire } from 'module';
2
+ const require = createRequire(import.meta.url);
3
+
4
+ const Parser = require('tree-sitter');
5
+
6
+ // Load native grammars eagerly — no WASM, no async init needed
7
+ const tsGrammars = require('tree-sitter-typescript');
8
+ const phpModule = require('tree-sitter-php');
9
+
10
+ const GRAMMARS = {
11
+ javascript: require('tree-sitter-javascript'),
12
+ typescript: tsGrammars.typescript,
13
+ tsx: tsGrammars.tsx,
14
+ python: require('tree-sitter-python'),
15
+ rust: require('tree-sitter-rust'),
16
+ go: require('tree-sitter-go'),
17
+ ruby: require('tree-sitter-ruby'),
18
+ java: require('tree-sitter-java'),
19
+ c: require('tree-sitter-c'),
20
+ cpp: require('tree-sitter-cpp'),
21
+ c_sharp: require('tree-sitter-c-sharp'),
22
+ php: phpModule.php || phpModule,
23
+ };
24
+
25
+ const LANG_MAP = {
26
+ '.js': 'javascript',
27
+ '.mjs': 'javascript',
28
+ '.cjs': 'javascript',
29
+ '.jsx': 'javascript',
30
+ '.ts': 'typescript',
31
+ '.tsx': 'tsx',
32
+ '.py': 'python',
33
+ '.rs': 'rust',
34
+ '.go': 'go',
35
+ '.rb': 'ruby',
36
+ '.java': 'java',
37
+ '.c': 'c',
38
+ '.h': 'c',
39
+ '.cpp': 'cpp',
40
+ '.hpp': 'cpp',
41
+ '.cc': 'cpp',
42
+ '.cs': 'c_sharp',
43
+ '.php': 'php',
44
+ };
45
+
46
+ const SUPPORTED_EXTENSIONS = Object.keys(LANG_MAP);
47
+
48
+ function getLanguage(ext) {
49
+ const langName = LANG_MAP[ext];
50
+ if (!langName) return null;
51
+ return GRAMMARS[langName] || null;
52
+ }
53
+
54
+ function getLangName(ext) {
55
+ return LANG_MAP[ext] || null;
56
+ }
57
+
58
+ // --- Tree-sitter query strings per language ---
59
+
60
+ const DEF_QUERIES = {
61
+ javascript: `
62
+ (function_declaration name: (identifier) @name) @definition
63
+ (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function) @_val)) @definition
64
+ (class_declaration name: (identifier) @name) @definition
65
+ (method_definition name: (property_identifier) @name) @definition
66
+ (export_statement declaration: (function_declaration name: (identifier) @name) @definition)
67
+ (export_statement declaration: (class_declaration name: (identifier) @name) @definition)
68
+ (export_statement declaration: (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function) @_val)) @definition)
69
+ `,
70
+
71
+ typescript: `
72
+ (function_declaration name: (identifier) @name) @definition
73
+ (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function) @_val)) @definition
74
+ (class_declaration name: (type_identifier) @name) @definition
75
+ (method_definition name: (property_identifier) @name) @definition
76
+ (interface_declaration name: (type_identifier) @name) @definition
77
+ (type_alias_declaration name: (type_identifier) @name) @definition
78
+ (enum_declaration name: (identifier) @name) @definition
79
+ (export_statement declaration: (function_declaration name: (identifier) @name) @definition)
80
+ (export_statement declaration: (class_declaration name: (type_identifier) @name) @definition)
81
+ (export_statement declaration: (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function) @_val)) @definition)
82
+ (export_statement declaration: (interface_declaration name: (type_identifier) @name) @definition)
83
+ (export_statement declaration: (type_alias_declaration name: (type_identifier) @name) @definition)
84
+ (export_statement declaration: (enum_declaration name: (identifier) @name) @definition)
85
+ `,
86
+
87
+ python: `
88
+ (function_definition name: (identifier) @name) @definition
89
+ (class_definition name: (identifier) @name) @definition
90
+ (decorated_definition definition: (function_definition name: (identifier) @name) @definition)
91
+ (decorated_definition definition: (class_definition name: (identifier) @name) @definition)
92
+ `,
93
+
94
+ rust: `
95
+ (function_item name: (identifier) @name) @definition
96
+ (struct_item name: (type_identifier) @name) @definition
97
+ (enum_item name: (type_identifier) @name) @definition
98
+ (trait_item name: (type_identifier) @name) @definition
99
+ (impl_item trait: (type_identifier) @name) @definition
100
+ (type_item name: (type_identifier) @name) @definition
101
+ `,
102
+
103
+ go: `
104
+ (function_declaration name: (identifier) @name) @definition
105
+ (method_declaration name: (field_identifier) @name) @definition
106
+ (type_declaration (type_spec name: (type_identifier) @name)) @definition
107
+ `,
108
+
109
+ java: `
110
+ (class_declaration name: (identifier) @name) @definition
111
+ (interface_declaration name: (identifier) @name) @definition
112
+ (method_declaration name: (identifier) @name) @definition
113
+ (enum_declaration name: (identifier) @name) @definition
114
+ `,
115
+
116
+ ruby: `
117
+ (method name: (identifier) @name) @definition
118
+ (singleton_method name: (identifier) @name) @definition
119
+ (class name: (constant) @name) @definition
120
+ `,
121
+
122
+ c: `
123
+ (function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition
124
+ (struct_specifier name: (type_identifier) @name) @definition
125
+ (enum_specifier name: (type_identifier) @name) @definition
126
+ `,
127
+
128
+ cpp: `
129
+ (function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition
130
+ (class_specifier name: (type_identifier) @name) @definition
131
+ (struct_specifier name: (type_identifier) @name) @definition
132
+ (enum_specifier name: (type_identifier) @name) @definition
133
+ (namespace_definition name: (namespace_identifier) @name) @definition
134
+ `,
135
+
136
+ c_sharp: `
137
+ (class_declaration name: (identifier) @name) @definition
138
+ (interface_declaration name: (identifier) @name) @definition
139
+ (method_declaration name: (identifier) @name) @definition
140
+ (struct_declaration name: (identifier) @name) @definition
141
+ (enum_declaration name: (identifier) @name) @definition
142
+ `,
143
+
144
+ php: `
145
+ (function_definition name: (name) @name) @definition
146
+ (class_declaration name: (name) @name) @definition
147
+ (method_declaration name: (name) @name) @definition
148
+ (interface_declaration name: (name) @name) @definition
149
+ `,
150
+ };
151
+
152
+ // TSX uses the same queries as TypeScript
153
+ DEF_QUERIES.tsx = DEF_QUERIES.typescript;
154
+
155
+ const REF_QUERIES = {
156
+ // NOTE: We intentionally omit obj.method() patterns (member_expression,
157
+ // attribute calls, selector_expression) because without type information,
158
+ // common method names (get, close, execute, run) create massive spurious
159
+ // cross-wiring. Bare function calls and imports provide the structural
160
+ // backbone; IMPORTS edges from import statements connect files.
161
+ javascript: `
162
+ (call_expression function: (identifier) @ref)
163
+ (import_specifier name: (identifier) @ref)
164
+ (import_clause (identifier) @ref)
165
+ `,
166
+
167
+ typescript: `
168
+ (call_expression function: (identifier) @ref)
169
+ (import_specifier name: (identifier) @ref)
170
+ (import_clause (identifier) @ref)
171
+ (type_identifier) @ref
172
+ `,
173
+
174
+ python: `
175
+ (call function: (identifier) @ref)
176
+ (decorator (identifier) @ref)
177
+ `,
178
+
179
+ rust: `
180
+ (call_expression function: (identifier) @ref)
181
+ (call_expression function: (scoped_identifier name: (identifier) @ref))
182
+ (type_identifier) @ref
183
+ `,
184
+
185
+ go: `
186
+ (call_expression function: (identifier) @ref)
187
+ (type_identifier) @ref
188
+ `,
189
+
190
+ java: `
191
+ (object_creation_expression type: (type_identifier) @ref)
192
+ (type_identifier) @ref
193
+ `,
194
+
195
+ default: `
196
+ (call_expression function: (identifier) @ref)
197
+ `,
198
+ };
199
+
200
+ REF_QUERIES.tsx = REF_QUERIES.typescript;
201
+
202
+ const KIND_MAP = {
203
+ function_declaration: 'function',
204
+ function_definition: 'function',
205
+ arrow_function: 'function',
206
+ method_definition: 'function',
207
+ method_declaration: 'function',
208
+ function_item: 'function',
209
+ singleton_method: 'function',
210
+ class_declaration: 'class',
211
+ class_definition: 'class',
212
+ class_specifier: 'class',
213
+ struct_item: 'class',
214
+ struct_specifier: 'class',
215
+ struct_declaration: 'class',
216
+ interface_declaration: 'type',
217
+ type_alias_declaration: 'type',
218
+ type_item: 'type',
219
+ enum_declaration: 'type',
220
+ enum_item: 'type',
221
+ enum_specifier: 'type',
222
+ trait_item: 'type',
223
+ impl_item: 'type',
224
+ type_declaration: 'type',
225
+ type_spec: 'type',
226
+ lexical_declaration: 'variable',
227
+ variable_declaration: 'variable',
228
+ variable_declarator: 'variable',
229
+ namespace_definition: 'namespace',
230
+ decorated_definition: 'function',
231
+ };
232
+
233
+ function nodeKind(nodeType) {
234
+ return KIND_MAP[nodeType] || 'other';
235
+ }
236
+
237
+ function extractSignature(node, langName) {
238
+ const text = node.text;
239
+ if (langName === 'python') {
240
+ // Python signatures often span multiple lines. We need the colon that
241
+ // terminates the def/class line, NOT colons inside type annotations.
242
+ // Strategy: find the closing paren, then the next colon after it.
243
+ // For classes without parens (class Foo:), fall back to first colon.
244
+ const parenClose = text.indexOf(')');
245
+ let colonIdx;
246
+ if (parenClose !== -1) {
247
+ colonIdx = text.indexOf(':', parenClose + 1);
248
+ } else {
249
+ colonIdx = text.indexOf(':');
250
+ }
251
+ if (colonIdx !== -1) {
252
+ // Collapse multiline signature into a single line
253
+ const sig = text.substring(0, colonIdx + 1).replace(/\s*\n\s*/g, ' ').trim();
254
+ return sig.length > 300 ? sig.substring(0, 300) + '...' : sig;
255
+ }
256
+ const firstLine = text.split('\n')[0];
257
+ return firstLine.trim();
258
+ }
259
+ const braceIdx = text.indexOf('{');
260
+ let end = text.indexOf('\n');
261
+ if (end === -1) end = text.length;
262
+ if (braceIdx !== -1 && braceIdx < end) end = braceIdx;
263
+ const sig = text.substring(0, end).trim();
264
+ return sig.length > 200 ? sig.substring(0, 200) + '...' : sig;
265
+ }
266
+
267
+ /**
268
+ * Parse a single source file and extract definitions + references.
269
+ * Returns null if the language is unsupported.
270
+ */
271
+ function parseFile(filePath, source) {
272
+ const dotIdx = filePath.lastIndexOf('.');
273
+ if (dotIdx === -1) return null;
274
+ const ext = filePath.substring(dotIdx);
275
+ const lang = getLanguage(ext);
276
+ if (!lang) return null;
277
+ const langName = getLangName(ext);
278
+
279
+ const parser = new Parser();
280
+ parser.setLanguage(lang);
281
+ const tree = parser.parse(source);
282
+
283
+ const definitions = [];
284
+ const references = [];
285
+
286
+ const defQueryStr = DEF_QUERIES[langName] || null;
287
+ if (defQueryStr) {
288
+ try {
289
+ const defQuery = new Parser.Query(lang, defQueryStr);
290
+ for (const match of defQuery.matches(tree.rootNode)) {
291
+ const nameCapture = match.captures.find(c => c.name === 'name');
292
+ const defCapture = match.captures.find(c => c.name === 'definition');
293
+ if (!nameCapture) continue;
294
+ const defNode = defCapture || nameCapture;
295
+
296
+ definitions.push({
297
+ name: nameCapture.node.text,
298
+ kind: nodeKind(defNode.node.type),
299
+ file: filePath,
300
+ lineStart: defNode.node.startPosition.row + 1,
301
+ lineEnd: defNode.node.endPosition.row + 1,
302
+ signature: extractSignature(defNode.node, langName),
303
+ });
304
+ }
305
+ } catch (e) {
306
+ // Query may fail on some grammar versions; degrade gracefully
307
+ }
308
+ }
309
+
310
+ const refQueryStr = REF_QUERIES[langName] || REF_QUERIES.default;
311
+ if (refQueryStr) {
312
+ try {
313
+ const refQuery = new Parser.Query(lang, refQueryStr);
314
+ for (const match of refQuery.matches(tree.rootNode)) {
315
+ const refCapture = match.captures.find(c => c.name === 'ref');
316
+ if (!refCapture) continue;
317
+ references.push({
318
+ name: refCapture.node.text,
319
+ file: filePath,
320
+ line: refCapture.node.startPosition.row + 1,
321
+ });
322
+ }
323
+ } catch (e) {
324
+ // Degrade gracefully
325
+ }
326
+ }
327
+
328
+ // No tree.delete()/parser.delete() needed — native GC handles cleanup
329
+
330
+ return { file: filePath, definitions, references };
331
+ }
332
+
333
+ export { parseFile, SUPPORTED_EXTENSIONS, LANG_MAP };