codedeep-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +177 -0
  3. package/dist/config.js +223 -0
  4. package/dist/git/analyzer.js +177 -0
  5. package/dist/git/git-service.js +568 -0
  6. package/dist/git/head-watcher.js +113 -0
  7. package/dist/git/runner.js +204 -0
  8. package/dist/index.js +138 -0
  9. package/dist/indexer/code-index.js +1801 -0
  10. package/dist/indexer/complexity.js +633 -0
  11. package/dist/indexer/extractor.js +354 -0
  12. package/dist/indexer/languages/cpp.js +934 -0
  13. package/dist/indexer/languages/csharp.js +854 -0
  14. package/dist/indexer/languages/dart.js +777 -0
  15. package/dist/indexer/languages/go.js +665 -0
  16. package/dist/indexer/languages/java.js +507 -0
  17. package/dist/indexer/languages/kotlin.js +709 -0
  18. package/dist/indexer/languages/objc.js +397 -0
  19. package/dist/indexer/languages/php.js +771 -0
  20. package/dist/indexer/languages/python.js +455 -0
  21. package/dist/indexer/languages/ruby.js +697 -0
  22. package/dist/indexer/languages/rust.js +754 -0
  23. package/dist/indexer/languages/swift.js +691 -0
  24. package/dist/indexer/languages/typescript.js +485 -0
  25. package/dist/indexer/parser.js +175 -0
  26. package/dist/indexer/pipeline.js +342 -0
  27. package/dist/indexer/scanner.js +279 -0
  28. package/dist/indexer/watcher.js +353 -0
  29. package/dist/logger.js +16 -0
  30. package/dist/server.js +170 -0
  31. package/dist/tools/common.js +207 -0
  32. package/dist/tools/find-references.js +224 -0
  33. package/dist/tools/find-symbol.js +94 -0
  34. package/dist/tools/get-context.js +370 -0
  35. package/dist/tools/impact.js +218 -0
  36. package/dist/tools/overview.js +482 -0
  37. package/dist/tools/search-structure.js +303 -0
  38. package/dist/types.js +61 -0
  39. package/grammars/tree-sitter-c.wasm +0 -0
  40. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  41. package/grammars/tree-sitter-cpp.wasm +0 -0
  42. package/grammars/tree-sitter-dart.wasm +0 -0
  43. package/grammars/tree-sitter-go.wasm +0 -0
  44. package/grammars/tree-sitter-java.wasm +0 -0
  45. package/grammars/tree-sitter-javascript.wasm +0 -0
  46. package/grammars/tree-sitter-kotlin.wasm +0 -0
  47. package/grammars/tree-sitter-objc.wasm +0 -0
  48. package/grammars/tree-sitter-php.wasm +0 -0
  49. package/grammars/tree-sitter-python.wasm +0 -0
  50. package/grammars/tree-sitter-ruby.wasm +0 -0
  51. package/grammars/tree-sitter-rust.wasm +0 -0
  52. package/grammars/tree-sitter-swift.wasm +0 -0
  53. package/grammars/tree-sitter-tsx.wasm +0 -0
  54. package/grammars/tree-sitter-typescript.wasm +0 -0
  55. package/package.json +67 -0
@@ -0,0 +1,485 @@
1
+ import { IMPORT_DEFAULT, IMPORT_NAMESPACE, RECEIVER_OPAQUE } from '../../types.js';
2
+ import { SIGNATURE_DISPLAY_CAP, bareDecoratorIdentifier, commentDocLine, declSignature, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
3
+ import { cFamilyBooleanOperatorKind, computeComplexity, isCFamilyBooleanOperator, } from '../complexity.js';
4
+ // Function-like nodes whose bodies contain calls that shouldn't attribute
5
+ // to an enclosing body. walkDecorators uses this subset (NOT the full
6
+ // SKIP_TYPES) so it still descends through class bodies — top-level
7
+ // decorators on inner classes attribute to the enclosing function — but
8
+ // stops at nested function bodies, where decorator firing is gated on the
9
+ // nested function being called.
10
+ const TS_FUNCTION_BODY_SKIP_TYPES = new Set([
11
+ 'function_declaration',
12
+ 'function_expression',
13
+ 'arrow_function',
14
+ 'method_definition',
15
+ ]);
16
+ const TS_SKIP_TYPES = new Set([
17
+ ...TS_FUNCTION_BODY_SKIP_TYPES,
18
+ 'class_declaration',
19
+ 'class_expression',
20
+ 'abstract_class_declaration',
21
+ ]);
22
+ const TS_SELECTORS = [
23
+ { nodeType: 'call_expression', getCallee: (n) => n.childForFieldName('function') },
24
+ { nodeType: 'new_expression', getCallee: (n) => n.childForFieldName('constructor') },
25
+ { nodeType: 'jsx_opening_element', getCallee: jsxComponentName },
26
+ { nodeType: 'jsx_self_closing_element', getCallee: jsxComponentName },
27
+ { nodeType: 'decorator', getCallee: bareDecoratorIdentifier },
28
+ ];
29
+ // JSX components are PascalCase by convention; lowercase first char is an
30
+ // HTML element (`<div>`, `<span>`) which we don't track as a symbol ref.
31
+ function jsxComponentName(node) {
32
+ const name = node.childForFieldName('name');
33
+ if (!name || name.type !== 'identifier')
34
+ return null;
35
+ const ch = name.text.charAt(0);
36
+ if (ch >= 'a' && ch <= 'z')
37
+ return null;
38
+ return name;
39
+ }
40
+ // Dominant JS/TS fluent/stdlib method names (>=4 chars) suppressed when a
41
+ // member call to them is unresolved — without this, capturing chained calls
42
+ // floods the name-keyed store with `.then()`/`.filter()`/`.map()`-style noise.
43
+ // Domain method names (zod's `.optional`/`.nullable`/`.refine`, etc.) are
44
+ // deliberately absent — those are the recall win. <=3-char names (`.map`,
45
+ // `.get`, `.set`) are gated downstream by SHORT_NAME_THRESHOLD, so they're
46
+ // omitted here.
47
+ const TS_IGNORED_MEMBER_CALLEES = new Set([
48
+ 'then', 'catch', 'finally', 'filter', 'forEach', 'reduce', 'flatMap',
49
+ 'concat', 'slice', 'splice', 'indexOf', 'lastIndexOf', 'includes', 'join',
50
+ 'find', 'findIndex', 'some', 'every', 'sort', 'reverse', 'push',
51
+ 'replace', 'replaceAll', 'trim', 'split', 'startsWith', 'endsWith',
52
+ 'substring', 'toLowerCase', 'toUpperCase', 'toString', 'valueOf',
53
+ 'keys', 'values', 'entries', 'hasOwnProperty', 'charAt', 'padStart',
54
+ 'padEnd', 'repeat', 'delete',
55
+ ]);
56
+ // The four TS loop nodes (`for_in_statement` covers both for-of and for-in) —
57
+ // shared by the cyclomatic decision set and the cognitive surcharge set.
58
+ const TS_LOOP_NODE_TYPES = new Set([
59
+ 'for_statement',
60
+ 'for_in_statement',
61
+ 'while_statement',
62
+ 'do_statement',
63
+ ]);
64
+ // Cyclomatic decision nodes — VERIFIED against SonarJS source (S1541 rule.ts):
65
+ // each adds +1. `for_in_statement` covers both `for…of` and `for…in`;
66
+ // `switch_case` counts per non-default case label (the extractor's `switch_case`
67
+ // node corresponds to a SwitchCase WITH a test) while `switch_default` and the
68
+ // `switch_statement` container do NOT. `&&`/`||`/`??` count via the shared
69
+ // isCFamilyBooleanOperator. NOTE the deliberate omissions that match SonarJS but
70
+ // differ from the textbook set: `throw` and `catch` do NOT count (ThrowStatement
71
+ // / CatchClause are absent from SonarJS's cyclomatic switch); `else`/`finally`/
72
+ // `default` never count; logical-assignment `&&=`/`||=`/`??=` do NOT count.
73
+ const TS_DECISION_NODE_TYPES = new Set([
74
+ ...TS_LOOP_NODE_TYPES,
75
+ 'if_statement',
76
+ 'switch_case',
77
+ 'ternary_expression',
78
+ ]);
79
+ // COGNITIVE config — VERIFIED-EXACT against SonarJS S3776 (eslint-plugin-sonarjs
80
+ // `cjs/S3776/rule.js`, clean-room read + threshold-0 oracle), which differs
81
+ // MATERIALLY from sonar-java (do not assume the Java config transfers): see the
82
+ // boolean + JSX notes below. All node names AST-dumped against the bundled
83
+ // grammars. See complexity.ts + the project docs' "Cognitive Complexity Rules".
84
+ const TS_COGNITIVE_OPTIONS = {
85
+ ifType: 'if_statement',
86
+ conditionField: 'condition',
87
+ consequenceField: 'consequence',
88
+ alternativeField: 'alternative',
89
+ // tree-sitter-typescript wraps else/else-if in an `else_clause` node (UNLIKE
90
+ // Java's direct `alternative`); the engine unwraps it. else-if = +1 flat.
91
+ elseClauseType: 'else_clause',
92
+ loopTypes: TS_LOOP_NODE_TYPES,
93
+ // TS uses `switch_statement` (not Java's `switch_expression`); the WHOLE switch
94
+ // is +1 regardless of case count (the cognitive/cyclomatic divergence).
95
+ switchTypes: new Set(['switch_statement']),
96
+ ternaryType: 'ternary_expression',
97
+ catchType: 'catch_clause',
98
+ // EMPTY by design: nested functions/arrows are already in TS_SKIP_TYPES (the
99
+ // cognitive walk's boundary prunes them), so each top-level fn / method /
100
+ // arrow-const gets its OWN standalone cognitive number and nested-fn control
101
+ // flow counts toward nobody — matching SonarJS's per-function report (an
102
+ // extracted symbol's number == SonarJS's) and the TS cyclomatic arrow-callback
103
+ // gap. Adding arrows here would double-count them into the encloser.
104
+ nestOnlyTypes: new Set(),
105
+ labeledJumpTypes: new Set(['break_statement', 'continue_statement']),
106
+ // Read the `label` FIELD, not namedChildCount: an unlabeled `break /*c*/;`
107
+ // carries a comment as a named child, so counting children would misread it as
108
+ // labeled and add a spurious +1.
109
+ hasLabel: (node) => node.childForFieldName('label') != null,
110
+ // SonarJS counts ONLY maximal `&&` runs; `cFamilyBooleanOperatorKind` returns
111
+ // the kind for `&&`/`||`/`??` so `||`/`??` stay in the source-order run as
112
+ // breakers, and booleanRunStarts filters to `&&`-run-starts (`||`/`??` never
113
+ // count). NB cyclomatic DOES count `||`/`??` — the expected cyc/cog divergence.
114
+ booleanOperatorKind: cFamilyBooleanOperatorKind,
115
+ booleanRunStarts: (kind, prev) => kind === '&&' && prev !== '&&',
116
+ excludeBooleanRun: tsBooleanRunExcluded,
117
+ parenthesizedType: 'parenthesized_expression',
118
+ };
119
+ // SonarJS S3776 excludes a UNIFORM-operator logical expression whose immediate
120
+ // parent is a JSX `{...}` container (`jsx_expression` — covers both JSX children
121
+ // and attribute values) from the cognitive count: `{cond && <X/>}` / `{a && b}` /
122
+ // `{foo() && bar()}` / `<div x={a && a}/>` all score 0 (oracle-confirmed). A
123
+ // MIXED-operator tree is NOT excluded (`{(a || b) && <X/>}` = 1). Mirrors the
124
+ // plugin's `flattenJsxShortCircuitNodes`: bail on a ternary or a different-operator
125
+ // logical node; recurse same-operator operands; any other leaf is fine.
126
+ function tsBooleanRunExcluded(root) {
127
+ // Walk up through parenthesized_expression wrappers before the container test:
128
+ // SonarJS runs on ESTree, which has no paren nodes, so a WHOLE-expression-
129
+ // parenthesized short-circuit (`{(cond && <X/>)}`, a common conditional-render
130
+ // idiom) sits DIRECTLY under the JSX container there and IS excluded. tree-sitter
131
+ // keeps the paren node between them, so without this walk codedeep-mcp would over-count.
132
+ let container = root.parent;
133
+ while (container?.type === 'parenthesized_expression')
134
+ container = container.parent;
135
+ if (container?.type !== 'jsx_expression')
136
+ return false;
137
+ const rootOp = cFamilyBooleanOperatorKind(root);
138
+ if (rootOp === null)
139
+ return false;
140
+ const uniform = (node) => {
141
+ // Unwrap parens like the engine's skipParens (sonar's ESTree has no paren
142
+ // nodes, so operands are the raw children).
143
+ let n = node;
144
+ while (n && n.type === 'parenthesized_expression')
145
+ n = n.namedChild(0);
146
+ if (!n)
147
+ return true;
148
+ if (n.type === 'ternary_expression')
149
+ return false;
150
+ const k = cFamilyBooleanOperatorKind(n);
151
+ if (k === null)
152
+ return true; // non-logical leaf
153
+ if (k !== rootOp)
154
+ return false; // different operator → not a JSX short-circuit
155
+ return uniform(n.childForFieldName('left')) && uniform(n.childForFieldName('right'));
156
+ };
157
+ return uniform(root);
158
+ }
159
+ // Peels receiver wrappers that are transparent to receiver IDENTITY:
160
+ // `non_null_expression` (`a!`) and `parenthesized_expression` (`(a)`). The
161
+ // wrapped expression is the first NON-COMMENT named child (the `!` and parens are
162
+ // anonymous tokens; a leading inline comment — `(/*c*/ a)` — is a NAMED node, so
163
+ // skip it, the same comment-skip the Go receiver unwrap does), so `a!.x()` /
164
+ // `(a).x()` recover the inner `a`/`this` and resolve like `a.x()`. A genuinely
165
+ // chained receiver (`a.b().c()` → call_expression) is NOT a wrapper and is left
166
+ // intact → stays opaque.
167
+ function unwrapReceiver(node) {
168
+ let n = node;
169
+ while (n.type === 'non_null_expression' || n.type === 'parenthesized_expression') {
170
+ let inner = n.firstNamedChild;
171
+ while (inner && inner.type === 'comment')
172
+ inner = inner.nextNamedSibling;
173
+ if (!inner)
174
+ break;
175
+ n = inner;
176
+ }
177
+ return n;
178
+ }
179
+ // `this.x()` / `obj.x()` carry their literal receiver token; a non-null `a!.x()`
180
+ // or parenthesized `(a).x()` receiver is unwrapped to that token too (so it
181
+ // resolves like `a.x()`). Genuinely chained or indexed receivers (`a.b().c()`,
182
+ // `arr[0].run()`) carry RECEIVER_OPAQUE so the called method stays findable by
183
+ // name (recall) but never resolves. `super.x()` (parent-class call) and
184
+ // computed-property calls (no clean property name, e.g. `foo()[k]()`) emit nothing.
185
+ function tsMemberCallInfo(callee) {
186
+ if (callee.type !== 'member_expression')
187
+ return null;
188
+ const obj = callee.childForFieldName('object');
189
+ const prop = callee.childForFieldName('property');
190
+ if (!obj || !prop)
191
+ return null;
192
+ if (prop.type !== 'property_identifier' && prop.type !== 'private_property_identifier') {
193
+ return null;
194
+ }
195
+ const recv = unwrapReceiver(obj);
196
+ if (recv.type === 'this')
197
+ return { receiver: 'this', property: prop.text, isSelf: true };
198
+ if (recv.type === 'identifier') {
199
+ return { receiver: recv.text, property: prop.text, isSelf: false };
200
+ }
201
+ if (recv.type === 'super')
202
+ return null;
203
+ return { receiver: RECEIVER_OPAQUE, property: prop.text, isSelf: false };
204
+ }
205
+ export function extractTypeScript(tree, content, fileInfo) {
206
+ const symbols = [];
207
+ const imports = [];
208
+ const bodies = [];
209
+ for (const child of tree.rootNode.namedChildren) {
210
+ let exported = false;
211
+ let target = child;
212
+ if (child.type === 'export_statement') {
213
+ const decl = child.childForFieldName('declaration');
214
+ // Re-exports (`export { x }`, `export * from '...'`) have no
215
+ // `declaration` field and contribute no new symbols here.
216
+ if (!decl)
217
+ continue;
218
+ exported = true;
219
+ target = decl;
220
+ }
221
+ extractTopLevel(target, child, content, fileInfo, exported, symbols, imports, bodies);
222
+ }
223
+ const references = resolveCalls(bodies, tree.rootNode, symbols, fileInfo, TS_SELECTORS, TS_SKIP_TYPES, TS_FUNCTION_BODY_SKIP_TYPES, tsMemberCallInfo, { ignoredMemberCallees: TS_IGNORED_MEMBER_CALLEES });
224
+ computeComplexity(bodies, symbols, {
225
+ decisionNodeTypes: TS_DECISION_NODE_TYPES,
226
+ extraDecisionPredicate: isCFamilyBooleanOperator,
227
+ skipTypes: TS_SKIP_TYPES,
228
+ cognitive: TS_COGNITIVE_OPTIONS,
229
+ });
230
+ return { symbols, references, imports };
231
+ }
232
+ function extractTopLevel(target, outer, content, fileInfo, exported, outSymbols, outImports, outBodies) {
233
+ switch (target.type) {
234
+ case 'ambient_declaration': {
235
+ const inner = target.firstNamedChild;
236
+ if (inner)
237
+ extractTopLevel(inner, outer, content, fileInfo, exported, outSymbols, outImports, outBodies);
238
+ return;
239
+ }
240
+ case 'function_declaration':
241
+ case 'function_signature': {
242
+ const name = target.childForFieldName('name')?.text;
243
+ if (!name)
244
+ return;
245
+ const sym = makeSymbol(target, outer, declSignature(target, content), fileInfo, 'function', name, `${fileInfo.path}:${name}`, exported);
246
+ outSymbols.push(sym);
247
+ const body = target.childForFieldName('body');
248
+ if (body)
249
+ outBodies.push({ symbolId: sym.id, body });
250
+ return;
251
+ }
252
+ case 'class_declaration':
253
+ case 'abstract_class_declaration': {
254
+ const className = target.childForFieldName('name')?.text;
255
+ if (!className)
256
+ return;
257
+ const classSym = makeSymbol(target, outer, declSignature(target, content), fileInfo, 'class', className, `${fileInfo.path}:${className}`, exported);
258
+ outSymbols.push(classSym);
259
+ const body = target.childForFieldName('body');
260
+ if (!body)
261
+ return;
262
+ for (const member of body.namedChildren) {
263
+ extractClassMember(member, content, fileInfo, className, exported, outSymbols, outBodies);
264
+ }
265
+ // Walk the class body itself so calls in static blocks and
266
+ // non-callable field initializers (`static x = helper()`,
267
+ // `field = helper()`) attribute to the class. TS_SKIP_TYPES
268
+ // contains method_definition + function/arrow forms, so calls
269
+ // inside member function bodies stay attributed to the member.
270
+ outBodies.push({ symbolId: classSym.id, body, className });
271
+ return;
272
+ }
273
+ case 'interface_declaration': {
274
+ const name = target.childForFieldName('name')?.text;
275
+ if (!name)
276
+ return;
277
+ outSymbols.push(makeSymbol(target, outer, declSignature(target, content), fileInfo, 'interface', name, `${fileInfo.path}:${name}`, exported));
278
+ return;
279
+ }
280
+ case 'type_alias_declaration': {
281
+ const name = target.childForFieldName('name')?.text;
282
+ if (!name)
283
+ return;
284
+ outSymbols.push(makeSymbol(target, outer, declSignature(target, content), fileInfo, 'type', name, `${fileInfo.path}:${name}`, exported));
285
+ return;
286
+ }
287
+ case 'enum_declaration': {
288
+ // Covers `enum`, `const enum` (const modifier inside the node), and —
289
+ // via the ambient_declaration recursion above — `declare enum`. Enum
290
+ // MEMBERS are not extracted (extraction scope: top-level and
291
+ // class-level declarations only).
292
+ const name = target.childForFieldName('name')?.text;
293
+ if (!name)
294
+ return;
295
+ outSymbols.push(makeSymbol(target, outer, declSignature(target, content), fileInfo, 'enum', name, `${fileInfo.path}:${name}`, exported));
296
+ return;
297
+ }
298
+ // Bare `namespace X {}` parses as expression_statement > internal_module
299
+ // (grammar quirk); `export namespace` and `declare namespace` surface
300
+ // internal_module directly via the declaration field / ambient recursion.
301
+ case 'expression_statement': {
302
+ const inner = target.firstNamedChild;
303
+ if (inner && (inner.type === 'internal_module' || inner.type === 'module')) {
304
+ extractTopLevel(inner, outer, content, fileInfo, exported, outSymbols, outImports, outBodies);
305
+ }
306
+ return;
307
+ }
308
+ case 'internal_module': // namespace X { … }
309
+ case 'module': { // module X { … } (legacy keyword)
310
+ const nameNode = target.childForFieldName('name');
311
+ // Simple identifiers only. A dotted `namespace A.B` (nested_identifier)
312
+ // would put a '.' in the FQN and trip classNameFromFqn's member
313
+ // parsing (isClassMember → true → dropped from file outlines), and a
314
+ // string name (`declare module "pkg"`) names a package, not a symbol.
315
+ // Declaration-only: namespace MEMBERS are not extracted this round —
316
+ // a member FQN `file:Ns.fn` would collide with class-member semantics.
317
+ if (nameNode?.type !== 'identifier')
318
+ return;
319
+ outSymbols.push(makeSymbol(target, outer, declSignature(target, content), fileInfo, 'module', nameNode.text, `${fileInfo.path}:${nameNode.text}`, exported));
320
+ return;
321
+ }
322
+ case 'lexical_declaration':
323
+ case 'variable_declaration': {
324
+ for (const declarator of target.namedChildren) {
325
+ if (declarator.type !== 'variable_declarator')
326
+ continue;
327
+ const nameNode = declarator.childForFieldName('name');
328
+ // Skip destructuring patterns (array_pattern, object_pattern).
329
+ if (!nameNode || nameNode.type !== 'identifier')
330
+ continue;
331
+ const name = nameNode.text;
332
+ const value = declarator.childForFieldName('value');
333
+ const isFunction = !!value && (value.type === 'arrow_function' || value.type === 'function_expression');
334
+ const kind = isFunction ? 'function' : 'variable';
335
+ const sym = makeSymbol(declarator, outer, variableSignature(declarator, value, content), fileInfo, kind, name, `${fileInfo.path}:${name}`, exported);
336
+ outSymbols.push(sym);
337
+ if (isFunction && value) {
338
+ const body = value.childForFieldName('body');
339
+ if (body)
340
+ outBodies.push({ symbolId: sym.id, body });
341
+ }
342
+ }
343
+ return;
344
+ }
345
+ case 'import_statement': {
346
+ extractImport(target, fileInfo, outImports);
347
+ return;
348
+ }
349
+ default:
350
+ return;
351
+ }
352
+ }
353
+ function extractClassMember(member, content, fileInfo, className, exported, outSymbols, outBodies) {
354
+ switch (member.type) {
355
+ case 'method_definition':
356
+ case 'method_signature':
357
+ case 'abstract_method_signature': {
358
+ const methodName = member.childForFieldName('name')?.text;
359
+ if (!methodName)
360
+ return;
361
+ const methodSym = makeSymbol(member, member, declSignature(member, content), fileInfo, 'method', methodName, `${fileInfo.path}:${className}.${methodName}`, exported, className);
362
+ outSymbols.push(methodSym);
363
+ const methodBody = member.childForFieldName('body');
364
+ if (methodBody) {
365
+ outBodies.push({ symbolId: methodSym.id, body: methodBody, className });
366
+ }
367
+ return;
368
+ }
369
+ case 'public_field_definition': {
370
+ const propName = member.childForFieldName('name')?.text;
371
+ if (!propName)
372
+ return;
373
+ const value = member.childForFieldName('value');
374
+ const isCallable = !!value && (value.type === 'arrow_function' || value.type === 'function_expression');
375
+ const kind = isCallable ? 'method' : 'variable';
376
+ const fieldSym = makeSymbol(member, member, variableSignature(member, value, content), fileInfo, kind, propName, `${fileInfo.path}:${className}.${propName}`, exported, className);
377
+ outSymbols.push(fieldSym);
378
+ if (isCallable && value) {
379
+ const fnBody = value.childForFieldName('body');
380
+ if (fnBody) {
381
+ outBodies.push({ symbolId: fieldSym.id, body: fnBody, className });
382
+ }
383
+ }
384
+ return;
385
+ }
386
+ default:
387
+ return;
388
+ }
389
+ }
390
+ function extractImport(stmt, fileInfo, out) {
391
+ const sourceNode = stmt.childForFieldName('source');
392
+ if (!sourceNode)
393
+ return;
394
+ const sourceModule = sourceNode.text.replace(/^['"`]|['"`]$/g, '');
395
+ const importedNames = [];
396
+ // `import type { ... }` — the `type` token is an unnamed child of
397
+ // import_statement, so it isn't surfaced via namedChildren.
398
+ const wholeIsTypeOnly = hasTypeKeyword(stmt);
399
+ for (const child of stmt.namedChildren) {
400
+ if (child.type !== 'import_clause')
401
+ continue;
402
+ for (const item of child.namedChildren) {
403
+ if (item.type === 'identifier') {
404
+ const named = { name: IMPORT_DEFAULT, alias: item.text };
405
+ if (wholeIsTypeOnly)
406
+ named.kind = 'type';
407
+ importedNames.push(named);
408
+ }
409
+ else if (item.type === 'namespace_import') {
410
+ for (const nsChild of item.namedChildren) {
411
+ if (nsChild.type !== 'identifier')
412
+ continue;
413
+ const named = { name: IMPORT_NAMESPACE, alias: nsChild.text };
414
+ named.kind = wholeIsTypeOnly ? 'type' : 'namespace';
415
+ importedNames.push(named);
416
+ }
417
+ }
418
+ else if (item.type === 'named_imports') {
419
+ for (const spec of item.namedChildren) {
420
+ if (spec.type !== 'import_specifier')
421
+ continue;
422
+ const specName = spec.childForFieldName('name')?.text;
423
+ if (!specName)
424
+ continue;
425
+ const specAlias = spec.childForFieldName('alias')?.text;
426
+ const named = { name: specName };
427
+ if (specAlias)
428
+ named.alias = specAlias;
429
+ // `import { type X, Y }` — per-specifier `type` keyword sits
430
+ // as an unnamed child before the name identifier.
431
+ if (wholeIsTypeOnly || hasTypeKeyword(spec))
432
+ named.kind = 'type';
433
+ importedNames.push(named);
434
+ }
435
+ }
436
+ }
437
+ }
438
+ out.push({
439
+ file: fileInfo.path,
440
+ sourceModule,
441
+ importedNames,
442
+ line: stmt.startPosition.row + 1,
443
+ });
444
+ }
445
+ function hasTypeKeyword(node) {
446
+ for (let i = 0; i < node.childCount; i++) {
447
+ if (node.child(i)?.type === 'type')
448
+ return true;
449
+ }
450
+ return false;
451
+ }
452
+ function makeSymbol(decl, docNode, signature, fileInfo, kind, name, fqn, exported, qualifier = '') {
453
+ return {
454
+ // The id hashes the FULL signature; only the stored copy is capped —
455
+ // otherwise overloads differing past the cap share an id (JG1).
456
+ id: symbolId(fileInfo.path, name, kind, signature, qualifier),
457
+ name,
458
+ fqn,
459
+ kind,
460
+ file: fileInfo.path,
461
+ startLine: decl.startPosition.row + 1,
462
+ endLine: decl.endPosition.row + 1,
463
+ signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
464
+ doc: extractDoc(docNode),
465
+ exported,
466
+ language: fileInfo.language,
467
+ };
468
+ }
469
+ function variableSignature(declarator, value, content) {
470
+ if (value && (value.type === 'arrow_function' || value.type === 'function_expression')) {
471
+ const body = value.childForFieldName('body');
472
+ const sigEnd = body ? body.startIndex : declarator.endIndex;
473
+ let sig = normalizeSignature(content.slice(declarator.startIndex, sigEnd));
474
+ if (value.type === 'arrow_function')
475
+ sig = sig.replace(/=>\s*$/, '').trimEnd();
476
+ return sig;
477
+ }
478
+ return normalizeSignature(content.slice(declarator.startIndex, declarator.endIndex));
479
+ }
480
+ function extractDoc(node) {
481
+ const prev = node.previousNamedSibling;
482
+ if (!prev || prev.type !== 'comment')
483
+ return null;
484
+ return commentDocLine(prev.text);
485
+ }
@@ -0,0 +1,175 @@
1
+ import { fileURLToPath } from 'node:url';
2
+ import path from 'node:path';
3
+ import { Parser, Language } from 'web-tree-sitter';
4
+ import { log } from '../logger.js';
5
+ const here = path.dirname(fileURLToPath(import.meta.url));
6
+ // From dist/indexer/parser.js or src/indexer/parser.ts → up two levels to repo root.
7
+ const grammarsDir = path.resolve(here, '..', '..', 'grammars');
8
+ const LANG_TO_WASM = {
9
+ typescript: 'tree-sitter-typescript.wasm',
10
+ tsx: 'tree-sitter-tsx.wasm',
11
+ javascript: 'tree-sitter-javascript.wasm',
12
+ python: 'tree-sitter-python.wasm',
13
+ java: 'tree-sitter-java.wasm',
14
+ go: 'tree-sitter-go.wasm',
15
+ rust: 'tree-sitter-rust.wasm',
16
+ swift: 'tree-sitter-swift.wasm',
17
+ kotlin: 'tree-sitter-kotlin.wasm',
18
+ dart: 'tree-sitter-dart.wasm',
19
+ // @repomix ships this as tree-sitter-c_sharp.wasm (underscore), not -csharp.
20
+ csharp: 'tree-sitter-c_sharp.wasm',
21
+ php: 'tree-sitter-php.wasm',
22
+ ruby: 'tree-sitter-ruby.wasm',
23
+ cpp: 'tree-sitter-cpp.wasm',
24
+ // `.c` needs the dedicated C grammar (tree-sitter-cpp errors on K&R + C code
25
+ // using C++ keywords as identifiers); the extractor is shared with cpp.
26
+ c: 'tree-sitter-c.wasm',
27
+ objc: 'tree-sitter-objc.wasm',
28
+ };
29
+ const parsers = new Map();
30
+ let initPromise = null;
31
+ // Conditional-compilation directive lines (#if / #elseif / #else / #endif).
32
+ // `m` matches ^/$ per line; without `s`, `.*` stays within one line.
33
+ const SWIFT_DIRECTIVE_LINE = /^[ \t]*#(?:if|elseif|else|endif)\b.*$/gm;
34
+ // tree-sitter-swift cannot parse a #if/#endif conditional-compilation block
35
+ // INSIDE a type or function body — it emits ERROR nodes that drop the enclosing
36
+ // type AND its first guarded member, hoisting the rest to the top level with
37
+ // the wrong kind/FQN. We blank each directive LINE to equal-length whitespace
38
+ // (newlines untouched) before parsing: byte offsets and line numbers are
39
+ // preserved, so the extractor's signature slices and get_context body slices
40
+ // still match the on-disk file, while every #if branch's declarations now parse
41
+ // as ordinary consecutive members. A member defined in more than one branch is
42
+ // extracted from each — over-extraction (the symbol-id occurrence counter keeps
43
+ // ids unique), never the silent type loss the raw grammar produces. Fast-pathed
44
+ // when the file contains no #if at all. Invoked by parseFile ONLY when the raw
45
+ // parse errors and only adopted when the result parses clean (see call site).
46
+ function neutralizeSwiftDirectives(content) {
47
+ if (!content.includes('#if'))
48
+ return content;
49
+ return content.replace(SWIFT_DIRECTIVE_LINE, (line) => ' '.repeat(line.length));
50
+ }
51
+ // tree-sitter-objc cannot parse a bare `NS_ASSUME_NONNULL_BEGIN` (no trailing
52
+ // semicolon) before an `@interface` — it mis-parses the macro as a type and DROPS THE
53
+ // WHOLE interface (degrading to a labeled/expression statement, 0 symbols). That macro
54
+ // brackets virtually every modern ObjC header. We blank each `NS_ASSUME_NONNULL_BEGIN/
55
+ // END` LINE to equal-length whitespace (newlines untouched) so byte offsets + line
56
+ // numbers are preserved — the extractor's slices still match the on-disk file — while
57
+ // the interface now parses. Exactly the Swift `#if` pattern: applied by parseFile ONLY
58
+ // when the raw parse errors and the neutralized parse is clean (see call site).
59
+ // Fast-pathed when the file contains no such macro. (Residual NS_ENUM/NS_OPTIONS/
60
+ // FOUNDATION_EXPORT macro-opacity is a documented recall-only gap, not covered here.)
61
+ // Only a STANDALONE macro line (optionally followed by whitespace or a `//` line
62
+ // comment) is blanked — never a line that carries real code after the macro. This
63
+ // keeps the equal-length-blanking safe (it removes only the stray macro token, never
64
+ // shifts braces or drops a declaration sharing the line).
65
+ const OBJC_NULLABILITY_LINE = /^[ \t]*NS_ASSUME_NONNULL_(?:BEGIN|END)\b[ \t]*(?:\/\/.*)?$/gm;
66
+ function neutralizeObjcDirectives(content) {
67
+ if (!content.includes('NS_ASSUME_NONNULL_'))
68
+ return content;
69
+ return content.replace(OBJC_NULLABILITY_LINE, (line) => ' '.repeat(line.length));
70
+ }
71
+ // Counts ERROR + MISSING nodes in a tree. Used to decide whether the ObjC
72
+ // nullability-neutralized parse is strictly better than the raw one: an ObjC header
73
+ // commonly retains OTHER macro-opacity (NS_ENUM/FOUNDATION_EXPORT) after the
74
+ // NS_ASSUME_NONNULL fix, so the bar is "fewer errors", not "zero" (unlike Swift's
75
+ // brace-sensitive #if, where only a fully clean reparse is safe to adopt). Prunes
76
+ // clean subtrees via `hasError` so the walk stays cheap.
77
+ function countParseErrors(root) {
78
+ let count = 0;
79
+ const stack = [root];
80
+ while (stack.length > 0) {
81
+ const node = stack.pop();
82
+ if (node.type === 'ERROR' || node.isMissing)
83
+ count++;
84
+ for (const c of node.children) {
85
+ if (c.hasError || c.isMissing)
86
+ stack.push(c);
87
+ }
88
+ }
89
+ return count;
90
+ }
91
+ export function initParser() {
92
+ if (!initPromise) {
93
+ initPromise = (async () => {
94
+ await Parser.init();
95
+ const loaded = await Promise.all(Object.entries(LANG_TO_WASM).map(async ([lang, wasm]) => {
96
+ const language = await Language.load(path.join(grammarsDir, wasm));
97
+ const parser = new Parser();
98
+ parser.setLanguage(language);
99
+ return [lang, parser];
100
+ }));
101
+ for (const [lang, parser] of loaded) {
102
+ parsers.set(lang, parser);
103
+ }
104
+ })();
105
+ // A cached rejection would otherwise disable parsing (and pattern
106
+ // validation) for the process lifetime after one transient failure
107
+ // (EMFILE during the WASM reads) — reset so the next call retries.
108
+ initPromise.catch(() => {
109
+ initPromise = null;
110
+ });
111
+ }
112
+ return initPromise;
113
+ }
114
+ // The returned Tree holds WASM memory; callers must call `tree.delete()` when
115
+ // finished — JS GC won't free it.
116
+ export function parseFile(content, language) {
117
+ if (parsers.size === 0) {
118
+ throw new Error('parser not initialized; call initParser() first');
119
+ }
120
+ const parser = parsers.get(language);
121
+ if (!parser) {
122
+ log.warn(`parseFile: unsupported language "${language}"`);
123
+ return null;
124
+ }
125
+ let tree = parser.parse(content);
126
+ if (!tree) {
127
+ log.warn(`parseFile: parser returned null for language "${language}"`);
128
+ return null;
129
+ }
130
+ // Workaround for tree-sitter-swift's in-body #if mis-parse (see above), applied
131
+ // CONDITIONALLY: only swap in the directive-neutralized parse when the original
132
+ // ERRORS and neutralization yields a CLEAN parse. So a file that already parses
133
+ // — including one with a `#if`-looking line inside a multi-line string literal —
134
+ // is never rewritten, and a neutralization that unbalances braces (a guard
135
+ // straddling an opening brace) is discarded rather than producing wrong nesting.
136
+ // The neutralized tree's offsets stay aligned with the ORIGINAL content the
137
+ // caller passes to extractSymbols (equal-length blanking).
138
+ if (language === 'swift' && tree.rootNode.hasError && content.includes('#if')) {
139
+ const neutralized = neutralizeSwiftDirectives(content);
140
+ if (neutralized !== content) {
141
+ const alt = parser.parse(neutralized);
142
+ if (alt && !alt.rootNode.hasError) {
143
+ tree.delete();
144
+ tree = alt;
145
+ }
146
+ else {
147
+ alt?.delete();
148
+ }
149
+ }
150
+ }
151
+ // tree-sitter-objc's NS_ASSUME_NONNULL_BEGIN mis-parse (see above), applied the same
152
+ // CONDITIONALLY: adopt the neutralized parse only when the raw one ERRORS and
153
+ // neutralization does NOT increase the error count. A header may carry other
154
+ // unfixable macro-opacity (NS_ENUM etc.), so the bar is no-more-errors, not zero
155
+ // (unlike Swift). Crucially `<=`, not `<`: when the raw mis-parse buries a whole
156
+ // @interface inside ONE giant ERROR node and the neutralized parse hoists that
157
+ // interface cleanly but still errors on an intervening NS_ENUM, the error-NODE counts
158
+ // tie — yet the neutralized parse is strictly better. Blanking a balanced STANDALONE
159
+ // macro line only removes a stray token (never adds one or shifts braces), so the
160
+ // neutralized parse is never structurally worse; adopting an equal-count tie is safe.
161
+ if (language === 'objc' && tree.rootNode.hasError && content.includes('NS_ASSUME_NONNULL_')) {
162
+ const neutralized = neutralizeObjcDirectives(content);
163
+ if (neutralized !== content) {
164
+ const alt = parser.parse(neutralized);
165
+ if (alt && countParseErrors(alt.rootNode) <= countParseErrors(tree.rootNode)) {
166
+ tree.delete();
167
+ tree = alt;
168
+ }
169
+ else {
170
+ alt?.delete();
171
+ }
172
+ }
173
+ }
174
+ return tree;
175
+ }