codedeep-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +177 -0
- package/dist/config.js +223 -0
- package/dist/git/analyzer.js +177 -0
- package/dist/git/git-service.js +568 -0
- package/dist/git/head-watcher.js +113 -0
- package/dist/git/runner.js +204 -0
- package/dist/index.js +138 -0
- package/dist/indexer/code-index.js +1801 -0
- package/dist/indexer/complexity.js +633 -0
- package/dist/indexer/extractor.js +354 -0
- package/dist/indexer/languages/cpp.js +934 -0
- package/dist/indexer/languages/csharp.js +854 -0
- package/dist/indexer/languages/dart.js +777 -0
- package/dist/indexer/languages/go.js +665 -0
- package/dist/indexer/languages/java.js +507 -0
- package/dist/indexer/languages/kotlin.js +709 -0
- package/dist/indexer/languages/objc.js +397 -0
- package/dist/indexer/languages/php.js +771 -0
- package/dist/indexer/languages/python.js +455 -0
- package/dist/indexer/languages/ruby.js +697 -0
- package/dist/indexer/languages/rust.js +754 -0
- package/dist/indexer/languages/swift.js +691 -0
- package/dist/indexer/languages/typescript.js +485 -0
- package/dist/indexer/parser.js +175 -0
- package/dist/indexer/pipeline.js +342 -0
- package/dist/indexer/scanner.js +279 -0
- package/dist/indexer/watcher.js +353 -0
- package/dist/logger.js +16 -0
- package/dist/server.js +170 -0
- package/dist/tools/common.js +207 -0
- package/dist/tools/find-references.js +224 -0
- package/dist/tools/find-symbol.js +94 -0
- package/dist/tools/get-context.js +370 -0
- package/dist/tools/impact.js +218 -0
- package/dist/tools/overview.js +482 -0
- package/dist/tools/search-structure.js +303 -0
- package/dist/types.js +61 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-dart.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-kotlin.wasm +0 -0
- package/grammars/tree-sitter-objc.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-ruby.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-swift.wasm +0 -0
- package/grammars/tree-sitter-tsx.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +67 -0
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
import { IMPORT_NAMESPACE, RECEIVER_OPAQUE } from '../../types.js';
|
|
2
|
+
import { SIGNATURE_DISPLAY_CAP, collectAmbiguousTypeNames, commentDocLine, isTrailingComment, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
|
|
3
|
+
import { cFamilyBooleanOperatorKind, computeComplexity, isCFamilyBooleanOperator } from '../complexity.js';
|
|
4
|
+
// Function-like nodes whose bodies contain calls that shouldn't attribute
|
|
5
|
+
// to an enclosing body. lambda_expression is deliberately absent: Java
|
|
6
|
+
// lambdas can never be symbols of their own (unlike TS arrows assigned to
|
|
7
|
+
// consts), so pruning them would drop their calls entirely — calls inside
|
|
8
|
+
// `x -> f(x)` attribute to the enclosing method instead. A documented
|
|
9
|
+
// divergence from the TS arrow rule.
|
|
10
|
+
const JAVA_FUNCTION_BODY_SKIP_TYPES = new Set([
|
|
11
|
+
'method_declaration',
|
|
12
|
+
'constructor_declaration',
|
|
13
|
+
'compact_constructor_declaration',
|
|
14
|
+
]);
|
|
15
|
+
const JAVA_SKIP_TYPES = new Set([
|
|
16
|
+
...JAVA_FUNCTION_BODY_SKIP_TYPES,
|
|
17
|
+
'class_declaration',
|
|
18
|
+
'interface_declaration',
|
|
19
|
+
'enum_declaration',
|
|
20
|
+
'record_declaration',
|
|
21
|
+
'annotation_type_declaration',
|
|
22
|
+
// Anonymous classes: object_creation_expression carries a field-less
|
|
23
|
+
// class_body child; pruning keeps anonymous internals (including field
|
|
24
|
+
// initializers) out of every walk. Harmless as a PendingBody root —
|
|
25
|
+
// walkCalls never checks the root's own type, only children.
|
|
26
|
+
'class_body',
|
|
27
|
+
]);
|
|
28
|
+
// The four Java loop nodes — shared by the cyclomatic decision set and the
|
|
29
|
+
// cognitive surcharge set.
|
|
30
|
+
const JAVA_LOOP_NODE_TYPES = new Set([
|
|
31
|
+
'for_statement',
|
|
32
|
+
'enhanced_for_statement',
|
|
33
|
+
'while_statement',
|
|
34
|
+
'do_statement',
|
|
35
|
+
]);
|
|
36
|
+
// CYCLOMATIC decision nodes (sonar-java ComplexityVisitor, clean-room verified +
|
|
37
|
+
// oracled). Each adds +1. `switch_expression` (container), `catch`/`throw`/
|
|
38
|
+
// `finally`/`default`/plain break-continue, AND `lambda_expression` are
|
|
39
|
+
// deliberately absent; non-default `switch_label` and `&&`/`||` come via the
|
|
40
|
+
// javaCyclomaticExtra predicate. LAMBDAS are excluded entirely from a METHOD's
|
|
41
|
+
// cyclomatic number: sonar-java's ComplexityVisitor, when its root is the method,
|
|
42
|
+
// counts neither the lambda arrow nor the lambda body (a lambda is a separate
|
|
43
|
+
// unit) — confirmed by the oracle diff (counting them over-reported every
|
|
44
|
+
// lambda-bearing method). See JAVA_CYCLOMATIC_SKIP_TYPES below; cognitive still
|
|
45
|
+
// DESCENDS lambdas (with a nesting bump), the metric asymmetry.
|
|
46
|
+
const JAVA_DECISION_NODE_TYPES = new Set([
|
|
47
|
+
...JAVA_LOOP_NODE_TYPES,
|
|
48
|
+
'if_statement',
|
|
49
|
+
'ternary_expression',
|
|
50
|
+
]);
|
|
51
|
+
// Cyclomatic-only skip set: JAVA_SKIP_TYPES plus `lambda_expression`, so a
|
|
52
|
+
// lambda's arrow + body are excluded from the enclosing method's cyclomatic
|
|
53
|
+
// number (sonar's per-method behavior). NOT added to JAVA_SKIP_TYPES itself —
|
|
54
|
+
// that set is shared with resolveCalls (which attributes lambda calls to the
|
|
55
|
+
// method) and with the cognitive walk (which descends lambdas).
|
|
56
|
+
const JAVA_CYCLOMATIC_SKIP_TYPES = new Set([
|
|
57
|
+
...JAVA_SKIP_TYPES,
|
|
58
|
+
'lambda_expression',
|
|
59
|
+
]);
|
|
60
|
+
// The "+1 per node" cyclomatic cases a flat type set can't express, composed
|
|
61
|
+
// into the engine's `extraDecisionPredicate` slot:
|
|
62
|
+
// (1) a NON-DEFAULT `switch_label` — a `default` label has zero named children
|
|
63
|
+
// (just the keyword token), a `case X`/`case X,Y` label has ≥1; this counts each
|
|
64
|
+
// case label like sonar-java's CASE_LABEL, default excluded; (2) the C-family
|
|
65
|
+
// `&&`/`||` (one binary_expression node for all operators — read the op token).
|
|
66
|
+
function javaCyclomaticExtra(node) {
|
|
67
|
+
if (node.type === 'switch_label')
|
|
68
|
+
return node.namedChildCount > 0;
|
|
69
|
+
return isCFamilyBooleanOperator(node);
|
|
70
|
+
}
|
|
71
|
+
// COGNITIVE config (sonar-java CognitiveComplexityVisitor, clean-room verified;
|
|
72
|
+
// all node names AST-dumped against the bundled grammar). See complexity.ts.
|
|
73
|
+
const JAVA_COGNITIVE_OPTIONS = {
|
|
74
|
+
ifType: 'if_statement',
|
|
75
|
+
conditionField: 'condition',
|
|
76
|
+
consequenceField: 'consequence',
|
|
77
|
+
alternativeField: 'alternative',
|
|
78
|
+
loopTypes: JAVA_LOOP_NODE_TYPES,
|
|
79
|
+
// Colon AND Java-14 arrow switch are BOTH `switch_expression` (no
|
|
80
|
+
// `switch_statement` node); the whole switch is +1 regardless of case count.
|
|
81
|
+
switchTypes: new Set(['switch_expression']),
|
|
82
|
+
ternaryType: 'ternary_expression',
|
|
83
|
+
// Catch is recognized by its own node type, so this covers BOTH plain
|
|
84
|
+
// `try_statement` and `try_with_resources_statement` for free.
|
|
85
|
+
catchType: 'catch_clause',
|
|
86
|
+
// Lambdas raise nesting but add nothing (NOT "+1 hybrid").
|
|
87
|
+
nestOnlyTypes: new Set(['lambda_expression']),
|
|
88
|
+
labeledJumpTypes: new Set(['break_statement', 'continue_statement']),
|
|
89
|
+
// A break/continue's only named child is its optional label identifier.
|
|
90
|
+
hasLabel: (node) => node.namedChildCount > 0,
|
|
91
|
+
// Java has `&&`/`||` (no `??`); reuse the shared C-family token reader.
|
|
92
|
+
booleanOperatorKind: (node) => {
|
|
93
|
+
const op = cFamilyBooleanOperatorKind(node);
|
|
94
|
+
return op === '&&' || op === '||' ? op : null;
|
|
95
|
+
},
|
|
96
|
+
parenthesizedType: 'parenthesized_expression',
|
|
97
|
+
};
|
|
98
|
+
// `object_creation_expression`'s callee is a type_identifier, never a plain
|
|
99
|
+
// identifier — without this, every `new X()` ref would be dropped.
|
|
100
|
+
const JAVA_BARE_CALLEE_TYPES = new Set(['identifier', 'type_identifier']);
|
|
101
|
+
// A bare `foo()` in Java is ALWAYS a method call — fields and classes are
|
|
102
|
+
// never bare-callable — so identifier callees bind only through the
|
|
103
|
+
// enclosing-class fallback, never the callable-name map.
|
|
104
|
+
const JAVA_BARE_CALLABLE_KINDS = new Set();
|
|
105
|
+
// `new X()` binds to classes (records included) and interfaces — anonymous
|
|
106
|
+
// implementations (`new Iface() { ... }`) are real instantiation sites.
|
|
107
|
+
// Enums can't be instantiated, so they stay out.
|
|
108
|
+
const JAVA_CONSTRUCTOR_KINDS = new Set(['class', 'interface']);
|
|
109
|
+
// Type kinds sharing the simple-name FQN namespace — duplicates among these
|
|
110
|
+
// are excluded from extract-time resolution (collectAmbiguousTypeNames).
|
|
111
|
+
const JAVA_TYPE_KINDS = new Set(['class', 'interface', 'enum']);
|
|
112
|
+
const JAVA_SELECTORS = [
|
|
113
|
+
// method_invocation has no single callee field: bare calls expose only
|
|
114
|
+
// `name:`, member calls `object:` + `name:`. Return the node itself for
|
|
115
|
+
// the member form so javaMemberCallInfo can read both fields.
|
|
116
|
+
{
|
|
117
|
+
nodeType: 'method_invocation',
|
|
118
|
+
getCallee: (n) => (n.childForFieldName('object') ? n : n.childForFieldName('name')),
|
|
119
|
+
},
|
|
120
|
+
{ nodeType: 'object_creation_expression', getCallee: objectCreationCallee },
|
|
121
|
+
];
|
|
122
|
+
// Symbol kinds for the five type-declaration node types; doubles as the
|
|
123
|
+
// "is this a type declaration" test during body iteration.
|
|
124
|
+
const TYPE_KIND = {
|
|
125
|
+
class_declaration: 'class',
|
|
126
|
+
interface_declaration: 'interface',
|
|
127
|
+
enum_declaration: 'enum',
|
|
128
|
+
record_declaration: 'class',
|
|
129
|
+
annotation_type_declaration: 'interface',
|
|
130
|
+
};
|
|
131
|
+
// `new Widget()` → type_identifier (bare path, binds to the class symbol);
|
|
132
|
+
// `new ArrayList<String>()` → generic_type wrapping the real type;
|
|
133
|
+
// `new pkg.Thing()` / `new Outer.Inner()` → scoped_type_identifier
|
|
134
|
+
// (member path, single level only).
|
|
135
|
+
function objectCreationCallee(node) {
|
|
136
|
+
let type = node.childForFieldName('type');
|
|
137
|
+
if (type?.type === 'generic_type')
|
|
138
|
+
type = type.firstNamedChild;
|
|
139
|
+
if (!type)
|
|
140
|
+
return null;
|
|
141
|
+
if (type.type === 'type_identifier' || type.type === 'scoped_type_identifier')
|
|
142
|
+
return type;
|
|
143
|
+
return null;
|
|
144
|
+
}
|
|
145
|
+
function isComment(node) {
|
|
146
|
+
return node.type === 'line_comment' || node.type === 'block_comment';
|
|
147
|
+
}
|
|
148
|
+
// Dominant Java stdlib/collection/stream/string method names (>=4 chars)
|
|
149
|
+
// suppressed when a member call to them is unresolved — capturing chained
|
|
150
|
+
// calls otherwise floods the name-keyed store with `.stream().filter()`-style
|
|
151
|
+
// noise. Domain method names are deliberately absent. <=3-char names (`.add`,
|
|
152
|
+
// `.get`, `.put`) are gated downstream by SHORT_NAME_THRESHOLD.
|
|
153
|
+
const JAVA_IGNORED_MEMBER_CALLEES = new Set([
|
|
154
|
+
'stream', 'filter', 'collect', 'forEach', 'flatMap', 'reduce', 'sorted',
|
|
155
|
+
'distinct', 'limit', 'count', 'anyMatch', 'allMatch', 'noneMatch',
|
|
156
|
+
'findFirst', 'findAny', 'toList', 'toArray', 'contains', 'containsKey',
|
|
157
|
+
'containsValue', 'isEmpty', 'remove', 'clear', 'size', 'keySet', 'values',
|
|
158
|
+
'entrySet', 'iterator', 'hasNext', 'append', 'toString', 'equals',
|
|
159
|
+
'hashCode', 'length', 'substring', 'indexOf', 'replace', 'trim', 'split',
|
|
160
|
+
'startsWith', 'endsWith', 'charAt', 'matches', 'format', 'valueOf',
|
|
161
|
+
'getOrDefault', 'putIfAbsent', 'orElse', 'orElseGet', 'orElseThrow',
|
|
162
|
+
'isPresent', 'ifPresent', 'getClass', 'println', 'print', 'printf',
|
|
163
|
+
]);
|
|
164
|
+
// Peels transparent receiver wrappers off a member-call receiver so a wrapped
|
|
165
|
+
// receiver resolves like the bare form: parenthesized `(a).m()` and the classic
|
|
166
|
+
// downcast `((T)a).m()` (a `cast_expression` whose operand is its `value` field
|
|
167
|
+
// — Java has no force-unwrap operator, so the cast is its analog). Leading
|
|
168
|
+
// comment nodes inside the parens are skipped via isComment (tree-sitter-java
|
|
169
|
+
// names them `line_comment`/`block_comment`, never `comment`). Each step
|
|
170
|
+
// strictly descends a finite tree, so the loop always terminates.
|
|
171
|
+
function unwrapJavaReceiver(node) {
|
|
172
|
+
let n = node;
|
|
173
|
+
for (;;) {
|
|
174
|
+
if (n.type === 'parenthesized_expression') {
|
|
175
|
+
let inner = n.firstNamedChild;
|
|
176
|
+
while (inner && isComment(inner))
|
|
177
|
+
inner = inner.nextNamedSibling;
|
|
178
|
+
if (!inner)
|
|
179
|
+
break;
|
|
180
|
+
n = inner;
|
|
181
|
+
}
|
|
182
|
+
else if (n.type === 'cast_expression') {
|
|
183
|
+
const inner = n.childForFieldName('value');
|
|
184
|
+
if (!inner)
|
|
185
|
+
break;
|
|
186
|
+
n = inner;
|
|
187
|
+
}
|
|
188
|
+
else
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
191
|
+
return n;
|
|
192
|
+
}
|
|
193
|
+
// `this.x()` and `obj.x()` carry their literal receiver token; chained and
|
|
194
|
+
// computed receivers (`a.b.c()`, `System.out.println()`, `foo().bar()`) carry
|
|
195
|
+
// RECEIVER_OPAQUE so the called method stays findable by name (recall) but
|
|
196
|
+
// never resolves. `super.x()` (object is a `super` node — the grammar has no
|
|
197
|
+
// super_method_invocation) and computed (non-`identifier` name) emit nothing.
|
|
198
|
+
function javaMemberCallInfo(callee) {
|
|
199
|
+
if (callee.type === 'method_invocation') {
|
|
200
|
+
const rawObj = callee.childForFieldName('object');
|
|
201
|
+
const prop = callee.childForFieldName('name');
|
|
202
|
+
if (!rawObj || !prop || prop.type !== 'identifier')
|
|
203
|
+
return null;
|
|
204
|
+
// Unwrap parens/cast first so `(a).m()` and `((T)a).m()` resolve like
|
|
205
|
+
// `a.m()`. The super-drop is checked AFTER the unwrap so a super receiver
|
|
206
|
+
// is dropped regardless of wrapping — a bare `super` is the only real case
|
|
207
|
+
// (`(super)` / `((T)super)` are illegal Java and error-parse to a method
|
|
208
|
+
// call whose object is already a bare `super`).
|
|
209
|
+
const obj = unwrapJavaReceiver(rawObj);
|
|
210
|
+
if (obj.type === 'this')
|
|
211
|
+
return { receiver: 'this', property: prop.text, isSelf: true };
|
|
212
|
+
if (obj.type === 'identifier') {
|
|
213
|
+
return { receiver: obj.text, property: prop.text, isSelf: false };
|
|
214
|
+
}
|
|
215
|
+
if (obj.type === 'super')
|
|
216
|
+
return null;
|
|
217
|
+
return { receiver: RECEIVER_OPAQUE, property: prop.text, isSelf: false };
|
|
218
|
+
}
|
|
219
|
+
if (callee.type === 'scoped_type_identifier') {
|
|
220
|
+
// Positional children, no fields — and comments are NAMED extras that
|
|
221
|
+
// can sit between the two type_identifiers, so filter them out before
|
|
222
|
+
// indexing. Deeper qualification nests another scoped_type_identifier
|
|
223
|
+
// in slot 0 and is skipped (chained analog).
|
|
224
|
+
const parts = callee.namedChildren.filter((c) => !isComment(c));
|
|
225
|
+
const scope = parts[0];
|
|
226
|
+
const name = parts[1];
|
|
227
|
+
if (scope?.type !== 'type_identifier' || name?.type !== 'type_identifier')
|
|
228
|
+
return null;
|
|
229
|
+
return { receiver: scope.text, property: name.text, isSelf: false };
|
|
230
|
+
}
|
|
231
|
+
return null;
|
|
232
|
+
}
|
|
233
|
+
export function extractJava(tree, content, fileInfo) {
|
|
234
|
+
const symbols = [];
|
|
235
|
+
const imports = [];
|
|
236
|
+
const bodies = [];
|
|
237
|
+
for (const child of tree.rootNode.namedChildren) {
|
|
238
|
+
if (child.type === 'import_declaration') {
|
|
239
|
+
extractImport(child, fileInfo, imports);
|
|
240
|
+
}
|
|
241
|
+
else if (TYPE_KIND[child.type] !== undefined) {
|
|
242
|
+
extractType(child, content, fileInfo, '', true, false, symbols, bodies);
|
|
243
|
+
}
|
|
244
|
+
// package_declaration, comments, module_declaration — no symbols.
|
|
245
|
+
}
|
|
246
|
+
// Two same-named types in one file (e.g. a `Builder` under two different
|
|
247
|
+
// outers) share the simple-name FQN; resolving through them first-wins
|
|
248
|
+
// would bind calls to the WRONG class, so their names are excluded from
|
|
249
|
+
// extract-time resolution entirely (calls stay unresolved instead).
|
|
250
|
+
const ambiguousTypeNames = collectAmbiguousTypeNames(symbols, JAVA_TYPE_KINDS);
|
|
251
|
+
const references = resolveCalls(bodies, tree.rootNode, symbols, fileInfo, JAVA_SELECTORS, JAVA_SKIP_TYPES, JAVA_FUNCTION_BODY_SKIP_TYPES, javaMemberCallInfo,
|
|
252
|
+
// Implicit this: a bare `foo()` inside a class body is a method call on
|
|
253
|
+
// the enclosing class (Java has no top-level functions), so bare calls
|
|
254
|
+
// resolve against the enclosing class's methods and nothing else.
|
|
255
|
+
{
|
|
256
|
+
bareCalleeTypes: JAVA_BARE_CALLEE_TYPES,
|
|
257
|
+
bareCallsBindToEnclosingClass: true,
|
|
258
|
+
bareCallableKinds: JAVA_BARE_CALLABLE_KINDS,
|
|
259
|
+
constructorKinds: JAVA_CONSTRUCTOR_KINDS,
|
|
260
|
+
ambiguousClassNames: ambiguousTypeNames,
|
|
261
|
+
ignoredMemberCallees: JAVA_IGNORED_MEMBER_CALLEES,
|
|
262
|
+
});
|
|
263
|
+
computeComplexity(bodies, symbols, {
|
|
264
|
+
decisionNodeTypes: JAVA_DECISION_NODE_TYPES,
|
|
265
|
+
extraDecisionPredicate: javaCyclomaticExtra,
|
|
266
|
+
skipTypes: JAVA_SKIP_TYPES,
|
|
267
|
+
cyclomaticSkipTypes: JAVA_CYCLOMATIC_SKIP_TYPES,
|
|
268
|
+
cognitive: JAVA_COGNITIVE_OPTIONS,
|
|
269
|
+
});
|
|
270
|
+
return { symbols, references, imports };
|
|
271
|
+
}
|
|
272
|
+
// Extracts a type declaration and recurses through its body. Recursion only
|
|
273
|
+
// ever enters type bodies (class/interface/enum) — local classes inside
|
|
274
|
+
// method blocks and anonymous classes are never reached, which implements
|
|
275
|
+
// the "top-level and class-level only" scope rule structurally.
|
|
276
|
+
function extractType(decl, content, fileInfo, qualifier, containerExported, inInterface, outSymbols, outBodies) {
|
|
277
|
+
const name = decl.childForFieldName('name')?.text;
|
|
278
|
+
if (!name)
|
|
279
|
+
return;
|
|
280
|
+
const kind = TYPE_KIND[decl.type];
|
|
281
|
+
const mods = findModifiers(decl);
|
|
282
|
+
// Member types of interfaces are implicitly public (JLS 9.5 — and unlike
|
|
283
|
+
// methods, they can't be declared private).
|
|
284
|
+
const exported = containerExported && (inInterface || hasModifier(mods, 'public', 'protected'));
|
|
285
|
+
// Nested types keep a simple-name FQN (`file:Inner` — a deeper dotted FQN
|
|
286
|
+
// would trip classNameFromFqn's member parsing); the enclosing chain goes
|
|
287
|
+
// into the hashed qualifier instead, so same-named nested types in one
|
|
288
|
+
// file keep distinct ids.
|
|
289
|
+
const sym = makeJavaSymbol(decl, javaSignature(decl, content, mods), fileInfo, kind, name, `${fileInfo.path}:${name}`, exported, qualifier);
|
|
290
|
+
outSymbols.push(sym);
|
|
291
|
+
// @interface is declaration-only: elements mirror the enum-constant
|
|
292
|
+
// exclusion, and annotation bodies carry no executable code.
|
|
293
|
+
if (decl.type === 'annotation_type_declaration')
|
|
294
|
+
return;
|
|
295
|
+
const body = decl.childForFieldName('body');
|
|
296
|
+
if (!body)
|
|
297
|
+
return;
|
|
298
|
+
// Walk the type body as the type's own PendingBody: field initializers,
|
|
299
|
+
// static/instance initializer blocks, and enum constant arguments
|
|
300
|
+
// (`RED(2)`) attribute to the type symbol. JAVA_SKIP_TYPES keeps
|
|
301
|
+
// method-body calls attributed to the methods.
|
|
302
|
+
outBodies.push({ symbolId: sym.id, body, className: name });
|
|
303
|
+
const memberQualifier = qualifier ? `${qualifier}.${name}` : name;
|
|
304
|
+
const isInterfaceBody = decl.type === 'interface_declaration';
|
|
305
|
+
// Enum members hide one level deeper: enum_body holds enum_constants plus
|
|
306
|
+
// an enum_body_declarations section after the `;`. Constants are never
|
|
307
|
+
// symbols (the enum-member rule); constant bodies (`BLUE { ... }`) are
|
|
308
|
+
// class_body nodes pruned like anonymous classes.
|
|
309
|
+
const members = decl.type === 'enum_declaration' ? enumMemberNodes(body) : body.namedChildren;
|
|
310
|
+
for (const member of members) {
|
|
311
|
+
if (TYPE_KIND[member.type] !== undefined) {
|
|
312
|
+
extractType(member, content, fileInfo, memberQualifier, exported, isInterfaceBody, outSymbols, outBodies);
|
|
313
|
+
}
|
|
314
|
+
else {
|
|
315
|
+
extractMember(member, content, fileInfo, name, memberQualifier, exported, isInterfaceBody, outSymbols, outBodies);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
function enumMemberNodes(enumBody) {
|
|
320
|
+
for (const child of enumBody.namedChildren) {
|
|
321
|
+
if (child.type === 'enum_body_declarations')
|
|
322
|
+
return child.namedChildren;
|
|
323
|
+
}
|
|
324
|
+
return [];
|
|
325
|
+
}
|
|
326
|
+
function extractMember(member, content, fileInfo, className, qualifier, containerExported, inInterface, outSymbols, outBodies) {
|
|
327
|
+
// Interface members are implicitly public — except explicitly `private`
|
|
328
|
+
// ones (legal on interface methods since Java 9). Elsewhere a member is
|
|
329
|
+
// exported only when it carries its own public/protected modifier AND
|
|
330
|
+
// every enclosing type is exported.
|
|
331
|
+
const mods = findModifiers(member);
|
|
332
|
+
const exported = containerExported &&
|
|
333
|
+
(inInterface ? !hasModifier(mods, 'private') : hasModifier(mods, 'public', 'protected'));
|
|
334
|
+
switch (member.type) {
|
|
335
|
+
case 'method_declaration': {
|
|
336
|
+
const methodName = member.childForFieldName('name')?.text;
|
|
337
|
+
if (!methodName)
|
|
338
|
+
return;
|
|
339
|
+
extractCallable(member, methodName, content, fileInfo, className, qualifier, exported, mods, outSymbols, outBodies);
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
case 'constructor_declaration':
|
|
343
|
+
case 'compact_constructor_declaration': {
|
|
344
|
+
// Named `constructor` per the established convention (FQN
|
|
345
|
+
// `file:Class.constructor`) — the AST name field repeats the class
|
|
346
|
+
// name, which would pair a same-named method with the class symbol in
|
|
347
|
+
// every lookup. `new C()` refs bind to the CLASS symbol instead.
|
|
348
|
+
extractCallable(member, 'constructor', content, fileInfo, className, qualifier, exported, mods, outSymbols, outBodies);
|
|
349
|
+
return;
|
|
350
|
+
}
|
|
351
|
+
case 'field_declaration':
|
|
352
|
+
case 'constant_declaration': {
|
|
353
|
+
// constant_declaration is the interface-constant variant — a distinct
|
|
354
|
+
// node type with the same internal shape. One field_declaration can
|
|
355
|
+
// carry multiple declarator: fields (`int a = 1, b;`) — one symbol per
|
|
356
|
+
// variable_declarator; the shared signature is fine, ids differ by name.
|
|
357
|
+
const signature = normalizeSignature(content.slice(signatureStart(member, mods), member.endIndex).replace(/;\s*$/, ''));
|
|
358
|
+
for (const declarator of member.childrenForFieldName('declarator')) {
|
|
359
|
+
if (declarator?.type !== 'variable_declarator')
|
|
360
|
+
continue;
|
|
361
|
+
const fieldName = declarator.childForFieldName('name')?.text;
|
|
362
|
+
if (!fieldName)
|
|
363
|
+
continue;
|
|
364
|
+
outSymbols.push(makeJavaSymbol(member, signature, fileInfo, 'variable', fieldName, `${fileInfo.path}:${className}.${fieldName}`, exported, qualifier));
|
|
365
|
+
}
|
|
366
|
+
return;
|
|
367
|
+
}
|
|
368
|
+
// static_initializer, enum constants, annotation elements, stray `;` —
|
|
369
|
+
// no symbol; initializer-block calls attribute via the type-body walk.
|
|
370
|
+
default:
|
|
371
|
+
return;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
function extractCallable(member, symName, content, fileInfo, className, qualifier, exported, mods, outSymbols, outBodies) {
|
|
375
|
+
const sym = makeJavaSymbol(member, javaSignature(member, content, mods), fileInfo, 'method', symName, `${fileInfo.path}:${className}.${symName}`, exported, qualifier);
|
|
376
|
+
outSymbols.push(sym);
|
|
377
|
+
// The body field is `block` for methods and compact record constructors
|
|
378
|
+
// but `constructor_body` for constructors; abstract/interface methods
|
|
379
|
+
// have none (the symbol is still extracted, mirroring TS signatures).
|
|
380
|
+
const body = member.childForFieldName('body');
|
|
381
|
+
if (body)
|
|
382
|
+
outBodies.push({ symbolId: sym.id, body, className });
|
|
383
|
+
}
|
|
384
|
+
function extractImport(stmt, fileInfo, out) {
|
|
385
|
+
// Payload is a scoped_identifier (fields scope:/name:) or a bare
|
|
386
|
+
// identifier; wildcard imports add a named `asterisk` child. The `static`
|
|
387
|
+
// keyword is an anonymous token and needs no special handling — the
|
|
388
|
+
// scope/name split already yields `a.b.C` + `m` for static imports.
|
|
389
|
+
let payload = null;
|
|
390
|
+
let wildcard = false;
|
|
391
|
+
for (const child of stmt.namedChildren) {
|
|
392
|
+
if (child.type === 'scoped_identifier' || child.type === 'identifier')
|
|
393
|
+
payload = child;
|
|
394
|
+
else if (child.type === 'asterisk')
|
|
395
|
+
wildcard = true;
|
|
396
|
+
}
|
|
397
|
+
if (!payload)
|
|
398
|
+
return;
|
|
399
|
+
let sourceModule;
|
|
400
|
+
const importedNames = [];
|
|
401
|
+
if (wildcard) {
|
|
402
|
+
sourceModule = payload.text;
|
|
403
|
+
importedNames.push({ name: IMPORT_NAMESPACE });
|
|
404
|
+
}
|
|
405
|
+
else if (payload.type === 'scoped_identifier') {
|
|
406
|
+
const nameNode = payload.childForFieldName('name');
|
|
407
|
+
if (!nameNode)
|
|
408
|
+
return;
|
|
409
|
+
sourceModule = payload.childForFieldName('scope')?.text ?? '';
|
|
410
|
+
importedNames.push({ name: nameNode.text });
|
|
411
|
+
}
|
|
412
|
+
else {
|
|
413
|
+
// Bare `import Foo;` — default-package import, rare/legacy.
|
|
414
|
+
sourceModule = payload.text;
|
|
415
|
+
importedNames.push({ name: payload.text });
|
|
416
|
+
}
|
|
417
|
+
out.push({
|
|
418
|
+
file: fileInfo.path,
|
|
419
|
+
sourceModule,
|
|
420
|
+
importedNames,
|
|
421
|
+
line: stmt.startPosition.row + 1,
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
function makeJavaSymbol(decl, signature, fileInfo, kind, name, fqn, exported, qualifier = '') {
|
|
425
|
+
return {
|
|
426
|
+
// The id hashes the FULL signature; only the stored copy is capped —
|
|
427
|
+
// otherwise overloads differing past the cap share an id (JG1: rxjava's
|
|
428
|
+
// 10 `just` overloads collapsed to 5 ids).
|
|
429
|
+
id: symbolId(fileInfo.path, name, kind, signature, qualifier),
|
|
430
|
+
name,
|
|
431
|
+
fqn,
|
|
432
|
+
kind,
|
|
433
|
+
file: fileInfo.path,
|
|
434
|
+
// Annotations live inside the declaration node (its modifiers child),
|
|
435
|
+
// so startLine is the first annotation's line — same as Python's
|
|
436
|
+
// decorated_definition range.
|
|
437
|
+
startLine: decl.startPosition.row + 1,
|
|
438
|
+
endLine: decl.endPosition.row + 1,
|
|
439
|
+
signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
|
|
440
|
+
doc: extractJavaDoc(decl),
|
|
441
|
+
exported,
|
|
442
|
+
language: fileInfo.language,
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
// `modifiers` is a named CHILD, not a field — childForFieldName('modifiers')
|
|
446
|
+
// returns null despite "modifiers" appearing in the grammar's field table.
|
|
447
|
+
// Absent entirely on modifier-less declarations, so never address children
|
|
448
|
+
// by index. Each declaration finds its modifiers ONCE and threads the node
|
|
449
|
+
// through the exported/signature helpers.
|
|
450
|
+
function findModifiers(decl) {
|
|
451
|
+
for (const child of decl.namedChildren) {
|
|
452
|
+
if (child.type === 'modifiers')
|
|
453
|
+
return child;
|
|
454
|
+
}
|
|
455
|
+
return null;
|
|
456
|
+
}
|
|
457
|
+
// Keyword tokens inside `modifiers` are anonymous children whose type IS the
|
|
458
|
+
// literal text; annotations are named marker_annotation/annotation children.
|
|
459
|
+
function hasModifier(mods, ...wanted) {
|
|
460
|
+
if (!mods)
|
|
461
|
+
return false;
|
|
462
|
+
for (const child of mods.children) {
|
|
463
|
+
if (child && wanted.includes(child.type))
|
|
464
|
+
return true;
|
|
465
|
+
}
|
|
466
|
+
return false;
|
|
467
|
+
}
|
|
468
|
+
// Signature runs from the first non-annotation modifier token (or the
|
|
469
|
+
// declaration start) to the body start. Annotations are excluded — unlike
|
|
470
|
+
// Python's decorators-in-signature — because Spring/JUnit annotation blocks
|
|
471
|
+
// routinely exceed the 120-char cap, which would truncate the declaration
|
|
472
|
+
// proper out of the display and let same-name overloads collide on
|
|
473
|
+
// identical truncated signatures (= identical symbol ids). Body-less
|
|
474
|
+
// callables (abstract/interface methods) run to the declaration end with
|
|
475
|
+
// the trailing `;` stripped, matching the field path.
|
|
476
|
+
function javaSignature(decl, content, mods) {
|
|
477
|
+
const body = decl.childForFieldName('body');
|
|
478
|
+
const raw = body
|
|
479
|
+
? content.slice(signatureStart(decl, mods), body.startIndex)
|
|
480
|
+
: content.slice(signatureStart(decl, mods), decl.endIndex).replace(/;\s*$/, '');
|
|
481
|
+
return normalizeSignature(raw);
|
|
482
|
+
}
|
|
483
|
+
function signatureStart(decl, mods) {
|
|
484
|
+
if (!mods)
|
|
485
|
+
return decl.startIndex;
|
|
486
|
+
for (const child of mods.children) {
|
|
487
|
+
if (!child || child.type === 'marker_annotation' || child.type === 'annotation' || isComment(child)) {
|
|
488
|
+
continue;
|
|
489
|
+
}
|
|
490
|
+
return child.startIndex;
|
|
491
|
+
}
|
|
492
|
+
// All-annotation modifiers (`@Override void f()`): start past them.
|
|
493
|
+
return mods.endIndex;
|
|
494
|
+
}
|
|
495
|
+
// Javadoc (and plain comments) precede the declaration as named
|
|
496
|
+
// block_comment/line_comment siblings — annotations don't break adjacency
|
|
497
|
+
// because they live inside the declaration's modifiers child.
|
|
498
|
+
function extractJavaDoc(decl) {
|
|
499
|
+
const prev = decl.previousNamedSibling;
|
|
500
|
+
if (!prev || !isComment(prev))
|
|
501
|
+
return null;
|
|
502
|
+
// A comment trailing an earlier statement on its own line is not doc for
|
|
503
|
+
// the next declaration.
|
|
504
|
+
if (isTrailingComment(prev))
|
|
505
|
+
return null;
|
|
506
|
+
return commentDocLine(prev.text);
|
|
507
|
+
}
|