codedeep-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +177 -0
  3. package/dist/config.js +223 -0
  4. package/dist/git/analyzer.js +177 -0
  5. package/dist/git/git-service.js +568 -0
  6. package/dist/git/head-watcher.js +113 -0
  7. package/dist/git/runner.js +204 -0
  8. package/dist/index.js +138 -0
  9. package/dist/indexer/code-index.js +1801 -0
  10. package/dist/indexer/complexity.js +633 -0
  11. package/dist/indexer/extractor.js +354 -0
  12. package/dist/indexer/languages/cpp.js +934 -0
  13. package/dist/indexer/languages/csharp.js +854 -0
  14. package/dist/indexer/languages/dart.js +777 -0
  15. package/dist/indexer/languages/go.js +665 -0
  16. package/dist/indexer/languages/java.js +507 -0
  17. package/dist/indexer/languages/kotlin.js +709 -0
  18. package/dist/indexer/languages/objc.js +397 -0
  19. package/dist/indexer/languages/php.js +771 -0
  20. package/dist/indexer/languages/python.js +455 -0
  21. package/dist/indexer/languages/ruby.js +697 -0
  22. package/dist/indexer/languages/rust.js +754 -0
  23. package/dist/indexer/languages/swift.js +691 -0
  24. package/dist/indexer/languages/typescript.js +485 -0
  25. package/dist/indexer/parser.js +175 -0
  26. package/dist/indexer/pipeline.js +342 -0
  27. package/dist/indexer/scanner.js +279 -0
  28. package/dist/indexer/watcher.js +353 -0
  29. package/dist/logger.js +16 -0
  30. package/dist/server.js +170 -0
  31. package/dist/tools/common.js +207 -0
  32. package/dist/tools/find-references.js +224 -0
  33. package/dist/tools/find-symbol.js +94 -0
  34. package/dist/tools/get-context.js +370 -0
  35. package/dist/tools/impact.js +218 -0
  36. package/dist/tools/overview.js +482 -0
  37. package/dist/tools/search-structure.js +303 -0
  38. package/dist/types.js +61 -0
  39. package/grammars/tree-sitter-c.wasm +0 -0
  40. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  41. package/grammars/tree-sitter-cpp.wasm +0 -0
  42. package/grammars/tree-sitter-dart.wasm +0 -0
  43. package/grammars/tree-sitter-go.wasm +0 -0
  44. package/grammars/tree-sitter-java.wasm +0 -0
  45. package/grammars/tree-sitter-javascript.wasm +0 -0
  46. package/grammars/tree-sitter-kotlin.wasm +0 -0
  47. package/grammars/tree-sitter-objc.wasm +0 -0
  48. package/grammars/tree-sitter-php.wasm +0 -0
  49. package/grammars/tree-sitter-python.wasm +0 -0
  50. package/grammars/tree-sitter-ruby.wasm +0 -0
  51. package/grammars/tree-sitter-rust.wasm +0 -0
  52. package/grammars/tree-sitter-swift.wasm +0 -0
  53. package/grammars/tree-sitter-tsx.wasm +0 -0
  54. package/grammars/tree-sitter-typescript.wasm +0 -0
  55. package/package.json +67 -0
@@ -0,0 +1,507 @@
1
+ import { IMPORT_NAMESPACE, RECEIVER_OPAQUE } from '../../types.js';
2
+ import { SIGNATURE_DISPLAY_CAP, collectAmbiguousTypeNames, commentDocLine, isTrailingComment, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
3
+ import { cFamilyBooleanOperatorKind, computeComplexity, isCFamilyBooleanOperator } from '../complexity.js';
4
+ // Function-like nodes whose bodies contain calls that shouldn't attribute
5
+ // to an enclosing body. lambda_expression is deliberately absent: Java
6
+ // lambdas can never be symbols of their own (unlike TS arrows assigned to
7
+ // consts), so pruning them would drop their calls entirely — calls inside
8
+ // `x -> f(x)` attribute to the enclosing method instead. A documented
9
+ // divergence from the TS arrow rule.
10
+ const JAVA_FUNCTION_BODY_SKIP_TYPES = new Set([
11
+ 'method_declaration',
12
+ 'constructor_declaration',
13
+ 'compact_constructor_declaration',
14
+ ]);
15
+ const JAVA_SKIP_TYPES = new Set([
16
+ ...JAVA_FUNCTION_BODY_SKIP_TYPES,
17
+ 'class_declaration',
18
+ 'interface_declaration',
19
+ 'enum_declaration',
20
+ 'record_declaration',
21
+ 'annotation_type_declaration',
22
+ // Anonymous classes: object_creation_expression carries a field-less
23
+ // class_body child; pruning keeps anonymous internals (including field
24
+ // initializers) out of every walk. Harmless as a PendingBody root —
25
+ // walkCalls never checks the root's own type, only children.
26
+ 'class_body',
27
+ ]);
28
+ // The four Java loop nodes — shared by the cyclomatic decision set and the
29
+ // cognitive surcharge set.
30
+ const JAVA_LOOP_NODE_TYPES = new Set([
31
+ 'for_statement',
32
+ 'enhanced_for_statement',
33
+ 'while_statement',
34
+ 'do_statement',
35
+ ]);
36
+ // CYCLOMATIC decision nodes (sonar-java ComplexityVisitor, clean-room verified +
37
+ // oracled). Each adds +1. `switch_expression` (container), `catch`/`throw`/
38
+ // `finally`/`default`/plain break-continue, AND `lambda_expression` are
39
+ // deliberately absent; non-default `switch_label` and `&&`/`||` come via the
40
+ // javaCyclomaticExtra predicate. LAMBDAS are excluded entirely from a METHOD's
41
+ // cyclomatic number: sonar-java's ComplexityVisitor, when its root is the method,
42
+ // counts neither the lambda arrow nor the lambda body (a lambda is a separate
43
+ // unit) — confirmed by the oracle diff (counting them over-reported every
44
+ // lambda-bearing method). See JAVA_CYCLOMATIC_SKIP_TYPES below; cognitive still
45
+ // DESCENDS lambdas (with a nesting bump), the metric asymmetry.
46
+ const JAVA_DECISION_NODE_TYPES = new Set([
47
+ ...JAVA_LOOP_NODE_TYPES,
48
+ 'if_statement',
49
+ 'ternary_expression',
50
+ ]);
51
+ // Cyclomatic-only skip set: JAVA_SKIP_TYPES plus `lambda_expression`, so a
52
+ // lambda's arrow + body are excluded from the enclosing method's cyclomatic
53
+ // number (sonar's per-method behavior). NOT added to JAVA_SKIP_TYPES itself —
54
+ // that set is shared with resolveCalls (which attributes lambda calls to the
55
+ // method) and with the cognitive walk (which descends lambdas).
56
+ const JAVA_CYCLOMATIC_SKIP_TYPES = new Set([
57
+ ...JAVA_SKIP_TYPES,
58
+ 'lambda_expression',
59
+ ]);
60
+ // The "+1 per node" cyclomatic cases a flat type set can't express, composed
61
+ // into the engine's `extraDecisionPredicate` slot:
62
+ // (1) a NON-DEFAULT `switch_label` — a `default` label has zero named children
63
+ // (just the keyword token), a `case X`/`case X,Y` label has ≥1; this counts each
64
+ // case label like sonar-java's CASE_LABEL, default excluded; (2) the C-family
65
+ // `&&`/`||` (one binary_expression node for all operators — read the op token).
66
+ function javaCyclomaticExtra(node) {
67
+ if (node.type === 'switch_label')
68
+ return node.namedChildCount > 0;
69
+ return isCFamilyBooleanOperator(node);
70
+ }
71
+ // COGNITIVE config (sonar-java CognitiveComplexityVisitor, clean-room verified;
72
+ // all node names AST-dumped against the bundled grammar). See complexity.ts.
73
+ const JAVA_COGNITIVE_OPTIONS = {
74
+ ifType: 'if_statement',
75
+ conditionField: 'condition',
76
+ consequenceField: 'consequence',
77
+ alternativeField: 'alternative',
78
+ loopTypes: JAVA_LOOP_NODE_TYPES,
79
+ // Colon AND Java-14 arrow switch are BOTH `switch_expression` (no
80
+ // `switch_statement` node); the whole switch is +1 regardless of case count.
81
+ switchTypes: new Set(['switch_expression']),
82
+ ternaryType: 'ternary_expression',
83
+ // Catch is recognized by its own node type, so this covers BOTH plain
84
+ // `try_statement` and `try_with_resources_statement` for free.
85
+ catchType: 'catch_clause',
86
+ // Lambdas raise nesting but add nothing (NOT "+1 hybrid").
87
+ nestOnlyTypes: new Set(['lambda_expression']),
88
+ labeledJumpTypes: new Set(['break_statement', 'continue_statement']),
89
+ // A break/continue's only named child is its optional label identifier.
90
+ hasLabel: (node) => node.namedChildCount > 0,
91
+ // Java has `&&`/`||` (no `??`); reuse the shared C-family token reader.
92
+ booleanOperatorKind: (node) => {
93
+ const op = cFamilyBooleanOperatorKind(node);
94
+ return op === '&&' || op === '||' ? op : null;
95
+ },
96
+ parenthesizedType: 'parenthesized_expression',
97
+ };
98
+ // `object_creation_expression`'s callee is a type_identifier, never a plain
99
+ // identifier — without this, every `new X()` ref would be dropped.
100
+ const JAVA_BARE_CALLEE_TYPES = new Set(['identifier', 'type_identifier']);
101
+ // A bare `foo()` in Java is ALWAYS a method call — fields and classes are
102
+ // never bare-callable — so identifier callees bind only through the
103
+ // enclosing-class fallback, never the callable-name map.
104
+ const JAVA_BARE_CALLABLE_KINDS = new Set();
105
+ // `new X()` binds to classes (records included) and interfaces — anonymous
106
+ // implementations (`new Iface() { ... }`) are real instantiation sites.
107
+ // Enums can't be instantiated, so they stay out.
108
+ const JAVA_CONSTRUCTOR_KINDS = new Set(['class', 'interface']);
109
+ // Type kinds sharing the simple-name FQN namespace — duplicates among these
110
+ // are excluded from extract-time resolution (collectAmbiguousTypeNames).
111
+ const JAVA_TYPE_KINDS = new Set(['class', 'interface', 'enum']);
112
+ const JAVA_SELECTORS = [
113
+ // method_invocation has no single callee field: bare calls expose only
114
+ // `name:`, member calls `object:` + `name:`. Return the node itself for
115
+ // the member form so javaMemberCallInfo can read both fields.
116
+ {
117
+ nodeType: 'method_invocation',
118
+ getCallee: (n) => (n.childForFieldName('object') ? n : n.childForFieldName('name')),
119
+ },
120
+ { nodeType: 'object_creation_expression', getCallee: objectCreationCallee },
121
+ ];
122
+ // Symbol kinds for the five type-declaration node types; doubles as the
123
+ // "is this a type declaration" test during body iteration.
124
+ const TYPE_KIND = {
125
+ class_declaration: 'class',
126
+ interface_declaration: 'interface',
127
+ enum_declaration: 'enum',
128
+ record_declaration: 'class',
129
+ annotation_type_declaration: 'interface',
130
+ };
131
+ // `new Widget()` → type_identifier (bare path, binds to the class symbol);
132
+ // `new ArrayList<String>()` → generic_type wrapping the real type;
133
+ // `new pkg.Thing()` / `new Outer.Inner()` → scoped_type_identifier
134
+ // (member path, single level only).
135
+ function objectCreationCallee(node) {
136
+ let type = node.childForFieldName('type');
137
+ if (type?.type === 'generic_type')
138
+ type = type.firstNamedChild;
139
+ if (!type)
140
+ return null;
141
+ if (type.type === 'type_identifier' || type.type === 'scoped_type_identifier')
142
+ return type;
143
+ return null;
144
+ }
145
+ function isComment(node) {
146
+ return node.type === 'line_comment' || node.type === 'block_comment';
147
+ }
148
+ // Dominant Java stdlib/collection/stream/string method names (>=4 chars)
149
+ // suppressed when a member call to them is unresolved — capturing chained
150
+ // calls otherwise floods the name-keyed store with `.stream().filter()`-style
151
+ // noise. Domain method names are deliberately absent. <=3-char names (`.add`,
152
+ // `.get`, `.put`) are gated downstream by SHORT_NAME_THRESHOLD.
153
+ const JAVA_IGNORED_MEMBER_CALLEES = new Set([
154
+ 'stream', 'filter', 'collect', 'forEach', 'flatMap', 'reduce', 'sorted',
155
+ 'distinct', 'limit', 'count', 'anyMatch', 'allMatch', 'noneMatch',
156
+ 'findFirst', 'findAny', 'toList', 'toArray', 'contains', 'containsKey',
157
+ 'containsValue', 'isEmpty', 'remove', 'clear', 'size', 'keySet', 'values',
158
+ 'entrySet', 'iterator', 'hasNext', 'append', 'toString', 'equals',
159
+ 'hashCode', 'length', 'substring', 'indexOf', 'replace', 'trim', 'split',
160
+ 'startsWith', 'endsWith', 'charAt', 'matches', 'format', 'valueOf',
161
+ 'getOrDefault', 'putIfAbsent', 'orElse', 'orElseGet', 'orElseThrow',
162
+ 'isPresent', 'ifPresent', 'getClass', 'println', 'print', 'printf',
163
+ ]);
164
+ // Peels transparent receiver wrappers off a member-call receiver so a wrapped
165
+ // receiver resolves like the bare form: parenthesized `(a).m()` and the classic
166
+ // downcast `((T)a).m()` (a `cast_expression` whose operand is its `value` field
167
+ // — Java has no force-unwrap operator, so the cast is its analog). Leading
168
+ // comment nodes inside the parens are skipped via isComment (tree-sitter-java
169
+ // names them `line_comment`/`block_comment`, never `comment`). Each step
170
+ // strictly descends a finite tree, so the loop always terminates.
171
+ function unwrapJavaReceiver(node) {
172
+ let n = node;
173
+ for (;;) {
174
+ if (n.type === 'parenthesized_expression') {
175
+ let inner = n.firstNamedChild;
176
+ while (inner && isComment(inner))
177
+ inner = inner.nextNamedSibling;
178
+ if (!inner)
179
+ break;
180
+ n = inner;
181
+ }
182
+ else if (n.type === 'cast_expression') {
183
+ const inner = n.childForFieldName('value');
184
+ if (!inner)
185
+ break;
186
+ n = inner;
187
+ }
188
+ else
189
+ break;
190
+ }
191
+ return n;
192
+ }
193
+ // `this.x()` and `obj.x()` carry their literal receiver token; chained and
194
+ // computed receivers (`a.b.c()`, `System.out.println()`, `foo().bar()`) carry
195
+ // RECEIVER_OPAQUE so the called method stays findable by name (recall) but
196
+ // never resolves. `super.x()` (object is a `super` node — the grammar has no
197
+ // super_method_invocation) and computed (non-`identifier` name) emit nothing.
198
+ function javaMemberCallInfo(callee) {
199
+ if (callee.type === 'method_invocation') {
200
+ const rawObj = callee.childForFieldName('object');
201
+ const prop = callee.childForFieldName('name');
202
+ if (!rawObj || !prop || prop.type !== 'identifier')
203
+ return null;
204
+ // Unwrap parens/cast first so `(a).m()` and `((T)a).m()` resolve like
205
+ // `a.m()`. The super-drop is checked AFTER the unwrap so a super receiver
206
+ // is dropped regardless of wrapping — a bare `super` is the only real case
207
+ // (`(super)` / `((T)super)` are illegal Java and error-parse to a method
208
+ // call whose object is already a bare `super`).
209
+ const obj = unwrapJavaReceiver(rawObj);
210
+ if (obj.type === 'this')
211
+ return { receiver: 'this', property: prop.text, isSelf: true };
212
+ if (obj.type === 'identifier') {
213
+ return { receiver: obj.text, property: prop.text, isSelf: false };
214
+ }
215
+ if (obj.type === 'super')
216
+ return null;
217
+ return { receiver: RECEIVER_OPAQUE, property: prop.text, isSelf: false };
218
+ }
219
+ if (callee.type === 'scoped_type_identifier') {
220
+ // Positional children, no fields — and comments are NAMED extras that
221
+ // can sit between the two type_identifiers, so filter them out before
222
+ // indexing. Deeper qualification nests another scoped_type_identifier
223
+ // in slot 0 and is skipped (chained analog).
224
+ const parts = callee.namedChildren.filter((c) => !isComment(c));
225
+ const scope = parts[0];
226
+ const name = parts[1];
227
+ if (scope?.type !== 'type_identifier' || name?.type !== 'type_identifier')
228
+ return null;
229
+ return { receiver: scope.text, property: name.text, isSelf: false };
230
+ }
231
+ return null;
232
+ }
233
+ export function extractJava(tree, content, fileInfo) {
234
+ const symbols = [];
235
+ const imports = [];
236
+ const bodies = [];
237
+ for (const child of tree.rootNode.namedChildren) {
238
+ if (child.type === 'import_declaration') {
239
+ extractImport(child, fileInfo, imports);
240
+ }
241
+ else if (TYPE_KIND[child.type] !== undefined) {
242
+ extractType(child, content, fileInfo, '', true, false, symbols, bodies);
243
+ }
244
+ // package_declaration, comments, module_declaration — no symbols.
245
+ }
246
+ // Two same-named types in one file (e.g. a `Builder` under two different
247
+ // outers) share the simple-name FQN; resolving through them first-wins
248
+ // would bind calls to the WRONG class, so their names are excluded from
249
+ // extract-time resolution entirely (calls stay unresolved instead).
250
+ const ambiguousTypeNames = collectAmbiguousTypeNames(symbols, JAVA_TYPE_KINDS);
251
+ const references = resolveCalls(bodies, tree.rootNode, symbols, fileInfo, JAVA_SELECTORS, JAVA_SKIP_TYPES, JAVA_FUNCTION_BODY_SKIP_TYPES, javaMemberCallInfo,
252
+ // Implicit this: a bare `foo()` inside a class body is a method call on
253
+ // the enclosing class (Java has no top-level functions), so bare calls
254
+ // resolve against the enclosing class's methods and nothing else.
255
+ {
256
+ bareCalleeTypes: JAVA_BARE_CALLEE_TYPES,
257
+ bareCallsBindToEnclosingClass: true,
258
+ bareCallableKinds: JAVA_BARE_CALLABLE_KINDS,
259
+ constructorKinds: JAVA_CONSTRUCTOR_KINDS,
260
+ ambiguousClassNames: ambiguousTypeNames,
261
+ ignoredMemberCallees: JAVA_IGNORED_MEMBER_CALLEES,
262
+ });
263
+ computeComplexity(bodies, symbols, {
264
+ decisionNodeTypes: JAVA_DECISION_NODE_TYPES,
265
+ extraDecisionPredicate: javaCyclomaticExtra,
266
+ skipTypes: JAVA_SKIP_TYPES,
267
+ cyclomaticSkipTypes: JAVA_CYCLOMATIC_SKIP_TYPES,
268
+ cognitive: JAVA_COGNITIVE_OPTIONS,
269
+ });
270
+ return { symbols, references, imports };
271
+ }
272
+ // Extracts a type declaration and recurses through its body. Recursion only
273
+ // ever enters type bodies (class/interface/enum) — local classes inside
274
+ // method blocks and anonymous classes are never reached, which implements
275
+ // the "top-level and class-level only" scope rule structurally.
276
+ function extractType(decl, content, fileInfo, qualifier, containerExported, inInterface, outSymbols, outBodies) {
277
+ const name = decl.childForFieldName('name')?.text;
278
+ if (!name)
279
+ return;
280
+ const kind = TYPE_KIND[decl.type];
281
+ const mods = findModifiers(decl);
282
+ // Member types of interfaces are implicitly public (JLS 9.5 — and unlike
283
+ // methods, they can't be declared private).
284
+ const exported = containerExported && (inInterface || hasModifier(mods, 'public', 'protected'));
285
+ // Nested types keep a simple-name FQN (`file:Inner` — a deeper dotted FQN
286
+ // would trip classNameFromFqn's member parsing); the enclosing chain goes
287
+ // into the hashed qualifier instead, so same-named nested types in one
288
+ // file keep distinct ids.
289
+ const sym = makeJavaSymbol(decl, javaSignature(decl, content, mods), fileInfo, kind, name, `${fileInfo.path}:${name}`, exported, qualifier);
290
+ outSymbols.push(sym);
291
+ // @interface is declaration-only: elements mirror the enum-constant
292
+ // exclusion, and annotation bodies carry no executable code.
293
+ if (decl.type === 'annotation_type_declaration')
294
+ return;
295
+ const body = decl.childForFieldName('body');
296
+ if (!body)
297
+ return;
298
+ // Walk the type body as the type's own PendingBody: field initializers,
299
+ // static/instance initializer blocks, and enum constant arguments
300
+ // (`RED(2)`) attribute to the type symbol. JAVA_SKIP_TYPES keeps
301
+ // method-body calls attributed to the methods.
302
+ outBodies.push({ symbolId: sym.id, body, className: name });
303
+ const memberQualifier = qualifier ? `${qualifier}.${name}` : name;
304
+ const isInterfaceBody = decl.type === 'interface_declaration';
305
+ // Enum members hide one level deeper: enum_body holds enum_constants plus
306
+ // an enum_body_declarations section after the `;`. Constants are never
307
+ // symbols (the enum-member rule); constant bodies (`BLUE { ... }`) are
308
+ // class_body nodes pruned like anonymous classes.
309
+ const members = decl.type === 'enum_declaration' ? enumMemberNodes(body) : body.namedChildren;
310
+ for (const member of members) {
311
+ if (TYPE_KIND[member.type] !== undefined) {
312
+ extractType(member, content, fileInfo, memberQualifier, exported, isInterfaceBody, outSymbols, outBodies);
313
+ }
314
+ else {
315
+ extractMember(member, content, fileInfo, name, memberQualifier, exported, isInterfaceBody, outSymbols, outBodies);
316
+ }
317
+ }
318
+ }
319
+ function enumMemberNodes(enumBody) {
320
+ for (const child of enumBody.namedChildren) {
321
+ if (child.type === 'enum_body_declarations')
322
+ return child.namedChildren;
323
+ }
324
+ return [];
325
+ }
326
+ function extractMember(member, content, fileInfo, className, qualifier, containerExported, inInterface, outSymbols, outBodies) {
327
+ // Interface members are implicitly public — except explicitly `private`
328
+ // ones (legal on interface methods since Java 9). Elsewhere a member is
329
+ // exported only when it carries its own public/protected modifier AND
330
+ // every enclosing type is exported.
331
+ const mods = findModifiers(member);
332
+ const exported = containerExported &&
333
+ (inInterface ? !hasModifier(mods, 'private') : hasModifier(mods, 'public', 'protected'));
334
+ switch (member.type) {
335
+ case 'method_declaration': {
336
+ const methodName = member.childForFieldName('name')?.text;
337
+ if (!methodName)
338
+ return;
339
+ extractCallable(member, methodName, content, fileInfo, className, qualifier, exported, mods, outSymbols, outBodies);
340
+ return;
341
+ }
342
+ case 'constructor_declaration':
343
+ case 'compact_constructor_declaration': {
344
+ // Named `constructor` per the established convention (FQN
345
+ // `file:Class.constructor`) — the AST name field repeats the class
346
+ // name, which would pair a same-named method with the class symbol in
347
+ // every lookup. `new C()` refs bind to the CLASS symbol instead.
348
+ extractCallable(member, 'constructor', content, fileInfo, className, qualifier, exported, mods, outSymbols, outBodies);
349
+ return;
350
+ }
351
+ case 'field_declaration':
352
+ case 'constant_declaration': {
353
+ // constant_declaration is the interface-constant variant — a distinct
354
+ // node type with the same internal shape. One field_declaration can
355
+ // carry multiple declarator: fields (`int a = 1, b;`) — one symbol per
356
+ // variable_declarator; the shared signature is fine, ids differ by name.
357
+ const signature = normalizeSignature(content.slice(signatureStart(member, mods), member.endIndex).replace(/;\s*$/, ''));
358
+ for (const declarator of member.childrenForFieldName('declarator')) {
359
+ if (declarator?.type !== 'variable_declarator')
360
+ continue;
361
+ const fieldName = declarator.childForFieldName('name')?.text;
362
+ if (!fieldName)
363
+ continue;
364
+ outSymbols.push(makeJavaSymbol(member, signature, fileInfo, 'variable', fieldName, `${fileInfo.path}:${className}.${fieldName}`, exported, qualifier));
365
+ }
366
+ return;
367
+ }
368
+ // static_initializer, enum constants, annotation elements, stray `;` —
369
+ // no symbol; initializer-block calls attribute via the type-body walk.
370
+ default:
371
+ return;
372
+ }
373
+ }
374
+ function extractCallable(member, symName, content, fileInfo, className, qualifier, exported, mods, outSymbols, outBodies) {
375
+ const sym = makeJavaSymbol(member, javaSignature(member, content, mods), fileInfo, 'method', symName, `${fileInfo.path}:${className}.${symName}`, exported, qualifier);
376
+ outSymbols.push(sym);
377
+ // The body field is `block` for methods and compact record constructors
378
+ // but `constructor_body` for constructors; abstract/interface methods
379
+ // have none (the symbol is still extracted, mirroring TS signatures).
380
+ const body = member.childForFieldName('body');
381
+ if (body)
382
+ outBodies.push({ symbolId: sym.id, body, className });
383
+ }
384
+ function extractImport(stmt, fileInfo, out) {
385
+ // Payload is a scoped_identifier (fields scope:/name:) or a bare
386
+ // identifier; wildcard imports add a named `asterisk` child. The `static`
387
+ // keyword is an anonymous token and needs no special handling — the
388
+ // scope/name split already yields `a.b.C` + `m` for static imports.
389
+ let payload = null;
390
+ let wildcard = false;
391
+ for (const child of stmt.namedChildren) {
392
+ if (child.type === 'scoped_identifier' || child.type === 'identifier')
393
+ payload = child;
394
+ else if (child.type === 'asterisk')
395
+ wildcard = true;
396
+ }
397
+ if (!payload)
398
+ return;
399
+ let sourceModule;
400
+ const importedNames = [];
401
+ if (wildcard) {
402
+ sourceModule = payload.text;
403
+ importedNames.push({ name: IMPORT_NAMESPACE });
404
+ }
405
+ else if (payload.type === 'scoped_identifier') {
406
+ const nameNode = payload.childForFieldName('name');
407
+ if (!nameNode)
408
+ return;
409
+ sourceModule = payload.childForFieldName('scope')?.text ?? '';
410
+ importedNames.push({ name: nameNode.text });
411
+ }
412
+ else {
413
+ // Bare `import Foo;` — default-package import, rare/legacy.
414
+ sourceModule = payload.text;
415
+ importedNames.push({ name: payload.text });
416
+ }
417
+ out.push({
418
+ file: fileInfo.path,
419
+ sourceModule,
420
+ importedNames,
421
+ line: stmt.startPosition.row + 1,
422
+ });
423
+ }
424
+ function makeJavaSymbol(decl, signature, fileInfo, kind, name, fqn, exported, qualifier = '') {
425
+ return {
426
+ // The id hashes the FULL signature; only the stored copy is capped —
427
+ // otherwise overloads differing past the cap share an id (JG1: rxjava's
428
+ // 10 `just` overloads collapsed to 5 ids).
429
+ id: symbolId(fileInfo.path, name, kind, signature, qualifier),
430
+ name,
431
+ fqn,
432
+ kind,
433
+ file: fileInfo.path,
434
+ // Annotations live inside the declaration node (its modifiers child),
435
+ // so startLine is the first annotation's line — same as Python's
436
+ // decorated_definition range.
437
+ startLine: decl.startPosition.row + 1,
438
+ endLine: decl.endPosition.row + 1,
439
+ signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
440
+ doc: extractJavaDoc(decl),
441
+ exported,
442
+ language: fileInfo.language,
443
+ };
444
+ }
445
+ // `modifiers` is a named CHILD, not a field — childForFieldName('modifiers')
446
+ // returns null despite "modifiers" appearing in the grammar's field table.
447
+ // Absent entirely on modifier-less declarations, so never address children
448
+ // by index. Each declaration finds its modifiers ONCE and threads the node
449
+ // through the exported/signature helpers.
450
+ function findModifiers(decl) {
451
+ for (const child of decl.namedChildren) {
452
+ if (child.type === 'modifiers')
453
+ return child;
454
+ }
455
+ return null;
456
+ }
457
+ // Keyword tokens inside `modifiers` are anonymous children whose type IS the
458
+ // literal text; annotations are named marker_annotation/annotation children.
459
+ function hasModifier(mods, ...wanted) {
460
+ if (!mods)
461
+ return false;
462
+ for (const child of mods.children) {
463
+ if (child && wanted.includes(child.type))
464
+ return true;
465
+ }
466
+ return false;
467
+ }
468
+ // Signature runs from the first non-annotation modifier token (or the
469
+ // declaration start) to the body start. Annotations are excluded — unlike
470
+ // Python's decorators-in-signature — because Spring/JUnit annotation blocks
471
+ // routinely exceed the 120-char cap, which would truncate the declaration
472
+ // proper out of the display and let same-name overloads collide on
473
+ // identical truncated signatures (= identical symbol ids). Body-less
474
+ // callables (abstract/interface methods) run to the declaration end with
475
+ // the trailing `;` stripped, matching the field path.
476
+ function javaSignature(decl, content, mods) {
477
+ const body = decl.childForFieldName('body');
478
+ const raw = body
479
+ ? content.slice(signatureStart(decl, mods), body.startIndex)
480
+ : content.slice(signatureStart(decl, mods), decl.endIndex).replace(/;\s*$/, '');
481
+ return normalizeSignature(raw);
482
+ }
483
+ function signatureStart(decl, mods) {
484
+ if (!mods)
485
+ return decl.startIndex;
486
+ for (const child of mods.children) {
487
+ if (!child || child.type === 'marker_annotation' || child.type === 'annotation' || isComment(child)) {
488
+ continue;
489
+ }
490
+ return child.startIndex;
491
+ }
492
+ // All-annotation modifiers (`@Override void f()`): start past them.
493
+ return mods.endIndex;
494
+ }
495
+ // Javadoc (and plain comments) precede the declaration as named
496
+ // block_comment/line_comment siblings — annotations don't break adjacency
497
+ // because they live inside the declaration's modifiers child.
498
+ function extractJavaDoc(decl) {
499
+ const prev = decl.previousNamedSibling;
500
+ if (!prev || !isComment(prev))
501
+ return null;
502
+ // A comment trailing an earlier statement on its own line is not doc for
503
+ // the next declaration.
504
+ if (isTrailingComment(prev))
505
+ return null;
506
+ return commentDocLine(prev.text);
507
+ }