codedeep-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +177 -0
  3. package/dist/config.js +223 -0
  4. package/dist/git/analyzer.js +177 -0
  5. package/dist/git/git-service.js +568 -0
  6. package/dist/git/head-watcher.js +113 -0
  7. package/dist/git/runner.js +204 -0
  8. package/dist/index.js +138 -0
  9. package/dist/indexer/code-index.js +1801 -0
  10. package/dist/indexer/complexity.js +633 -0
  11. package/dist/indexer/extractor.js +354 -0
  12. package/dist/indexer/languages/cpp.js +934 -0
  13. package/dist/indexer/languages/csharp.js +854 -0
  14. package/dist/indexer/languages/dart.js +777 -0
  15. package/dist/indexer/languages/go.js +665 -0
  16. package/dist/indexer/languages/java.js +507 -0
  17. package/dist/indexer/languages/kotlin.js +709 -0
  18. package/dist/indexer/languages/objc.js +397 -0
  19. package/dist/indexer/languages/php.js +771 -0
  20. package/dist/indexer/languages/python.js +455 -0
  21. package/dist/indexer/languages/ruby.js +697 -0
  22. package/dist/indexer/languages/rust.js +754 -0
  23. package/dist/indexer/languages/swift.js +691 -0
  24. package/dist/indexer/languages/typescript.js +485 -0
  25. package/dist/indexer/parser.js +175 -0
  26. package/dist/indexer/pipeline.js +342 -0
  27. package/dist/indexer/scanner.js +279 -0
  28. package/dist/indexer/watcher.js +353 -0
  29. package/dist/logger.js +16 -0
  30. package/dist/server.js +170 -0
  31. package/dist/tools/common.js +207 -0
  32. package/dist/tools/find-references.js +224 -0
  33. package/dist/tools/find-symbol.js +94 -0
  34. package/dist/tools/get-context.js +370 -0
  35. package/dist/tools/impact.js +218 -0
  36. package/dist/tools/overview.js +482 -0
  37. package/dist/tools/search-structure.js +303 -0
  38. package/dist/types.js +61 -0
  39. package/grammars/tree-sitter-c.wasm +0 -0
  40. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  41. package/grammars/tree-sitter-cpp.wasm +0 -0
  42. package/grammars/tree-sitter-dart.wasm +0 -0
  43. package/grammars/tree-sitter-go.wasm +0 -0
  44. package/grammars/tree-sitter-java.wasm +0 -0
  45. package/grammars/tree-sitter-javascript.wasm +0 -0
  46. package/grammars/tree-sitter-kotlin.wasm +0 -0
  47. package/grammars/tree-sitter-objc.wasm +0 -0
  48. package/grammars/tree-sitter-php.wasm +0 -0
  49. package/grammars/tree-sitter-python.wasm +0 -0
  50. package/grammars/tree-sitter-ruby.wasm +0 -0
  51. package/grammars/tree-sitter-rust.wasm +0 -0
  52. package/grammars/tree-sitter-swift.wasm +0 -0
  53. package/grammars/tree-sitter-tsx.wasm +0 -0
  54. package/grammars/tree-sitter-typescript.wasm +0 -0
  55. package/package.json +67 -0
@@ -0,0 +1,665 @@
1
+ import { IMPORT_NAMESPACE, RECEIVER_OPAQUE } from '../../types.js';
2
+ import { SIGNATURE_DISPLAY_CAP, collectAmbiguousTypeNames, commentDocLine, declSignature, isTrailingComment, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
3
+ import { cFamilyBooleanOperatorKind, computeComplexity, isCFamilyBooleanOperator, } from '../complexity.js';
4
+ // Function-like nodes whose bodies contain calls that shouldn't attribute
5
+ // to an enclosing body. func_literal is deliberately absent (the Java
6
+ // lambda rule, not the TS arrow rule): an anonymous literal can never be a
7
+ // symbol, so calls inside `go func() { f() }()` attribute to the enclosing
8
+ // function. The one literal that IS a symbol — `var f = func() {...}` —
9
+ // still attributes correctly because its body is walked as f's own
10
+ // PendingBody first and the seen-set drops the moduleRoot duplicate.
11
+ const GO_FUNCTION_BODY_SKIP_TYPES = new Set([
12
+ 'function_declaration',
13
+ 'method_declaration',
14
+ ]);
15
+ // Same set: Go has no class-body analog to skip (type bodies carry no
16
+ // executable code), and function/method declarations can't even nest —
17
+ // the entries are parse-error tolerance, mirroring Java's structure.
18
+ const GO_SKIP_TYPES = GO_FUNCTION_BODY_SKIP_TYPES;
19
+ // `composite_literal`'s callee is a type_identifier, never a plain
20
+ // identifier — without this, every `Server{}` ref would be dropped.
21
+ const GO_BARE_CALLEE_TYPES = new Set(['identifier', 'type_identifier']);
22
+ // A bare `foo()` binds to top-level functions only (incl. `var f = func()`
23
+ // promotions). Type conversions parse as identical call_expressions
24
+ // (`MyInt(3)`), so type/class/variable kinds stay out — a conversion is
25
+ // emitted as an unresolved name-keyed ref, never a confidently-wrong edge.
26
+ const GO_BARE_CALLABLE_KINDS = new Set(['function']);
27
+ // Composite literals bind to structs and to named non-struct types
28
+ // (`type Pairs map[string]int; Pairs{...}`). 'type' matters: unresolved
29
+ // refs to 'type'-kind symbols are rejected at query time (NON_CALLABLE),
30
+ // so without it those literals would be invisible. Interfaces stay out —
31
+ // they cannot be composite-literal constructed.
32
+ const GO_CONSTRUCTOR_KINDS = new Set(['class', 'type']);
33
+ // Symbol kinds whose names share the simple-name FQN namespace — duplicates
34
+ // among these are excluded from extract-time call resolution. (struct→class,
35
+ // interface→interface, defined/alias→type.)
36
+ const GO_TYPE_KINDS = new Set(['class', 'interface', 'type']);
37
+ // Predeclared builtins are package-less bare names; unresolved calls to
38
+ // them would flood the name-keyed reference store. The set also covers the
39
+ // predeclared TYPE names: a conversion `string(b)` / `int64(n)` parses as a
40
+ // call_expression with an identifier callee, identical to a builtin call,
41
+ // so without these every conversion site would persist a junk ref. Resolved
42
+ // calls escape the filter (see ignoredBareCallees), so a file-local
43
+ // `clear()` / pre-1.21 `max()` — or a user type shadowing a predeclared
44
+ // name — keeps its refs.
45
+ const GO_IGNORED_BARE_CALLEES = new Set([
46
+ // builtin functions
47
+ 'append', 'cap', 'clear', 'close', 'complex', 'copy', 'delete', 'imag',
48
+ 'len', 'make', 'max', 'min', 'new', 'panic', 'print', 'println', 'real',
49
+ 'recover',
50
+ // predeclared types (conversion callees)
51
+ 'bool', 'byte', 'rune', 'string', 'error', 'any', 'uintptr',
52
+ 'int', 'int8', 'int16', 'int32', 'int64',
53
+ 'uint', 'uint8', 'uint16', 'uint32', 'uint64',
54
+ 'float32', 'float64', 'complex64', 'complex128',
55
+ ]);
56
+ const GO_SELECTORS = [
57
+ { nodeType: 'call_expression', getCallee: (n) => n.childForFieldName('function') },
58
+ { nodeType: 'composite_literal', getCallee: compositeLiteralCallee },
59
+ ];
60
+ // `Server{}` → type_identifier (bare constructor-form, binds via
61
+ // constructorKinds); `Pair[K, V]{}` → generic_type wrapping the real type;
62
+ // `pkg.Config{}` → qualified_type (member path). Slice/map/array literal
63
+ // types (`[]Server{...}`) return null — the element type is buried and the
64
+ // inner typed literals fire their own selectors.
65
+ function compositeLiteralCallee(node) {
66
+ let type = node.childForFieldName('type');
67
+ if (type?.type === 'generic_type')
68
+ type = type.childForFieldName('type');
69
+ if (!type)
70
+ return null;
71
+ if (type.type === 'type_identifier' || type.type === 'qualified_type')
72
+ return type;
73
+ return null;
74
+ }
75
+ // Go member suppression is EMPTY by design. The ignoredMemberCallees gate keys
76
+ // on the PROPERTY NAME alone (extractor.ts), never the receiver — so suppressing
77
+ // a stdlib-looking name (Println/String/Write) would ALSO drop the legitimate
78
+ // package-qualified call that shares it, and package-qualified calls
79
+ // (`fmt.Println`, `strings.Join`) are the dominant RESOLVABLE Go cross-file
80
+ // pattern. The two forms DO differ by receiver — `fmt.Println` keeps receiver
81
+ // `fmt`, only chained `x.fmt().Println()` goes opaque — but the property-keyed
82
+ // gate can't exploit that, so any non-empty set sacrifices the resolved
83
+ // package-qualified refs. Chained opaque refs to hot names (String/Close/Write,
84
+ // all >=4 chars) are therefore NOT suppressed: they stay tier-5 weak member
85
+ // rows, display-capped (WEAK_MEMBER_ROW_CAP) — recall over precision, the
86
+ // documented Go tradeoff. (Only <=3-char names are gated by SHORT_NAME_THRESHOLD.)
87
+ const GO_IGNORED_MEMBER_CALLEES = new Set();
88
+ // Cyclomatic decision nodes — codedeep-mcp's convention, since Go is undocumented by
89
+ // SonarQube. `for_statement` is Go's ONLY loop node (covers 3-clause, range, and
90
+ // infinite forms). All THREE switch arms count — `expression_case`, `type_case`,
91
+ // AND `communication_case` (select) — while the switch CONTAINERS and
92
+ // `default_case` do NOT. Go has no ternary/`while`/`catch`. `&&`/`||` count via
93
+ // the shared isCFamilyBooleanOperator. Closures (`func_literal`) are descended
94
+ // (GO_SKIP_TYPES omits them), so a closure's branches count toward the enclosing
95
+ // func. VERIFIED divergences from the two reference tools (which disagree with
96
+ // each other): sonar-go DROPS select-`case`s (its Go→SLANG converter maps select
97
+ // CommClauses to nil, so they never count) — codedeep-mcp counts them as genuine
98
+ // branches, matching gocyclo. gocyclo, conversely, counts `default` and every
99
+ // case incl. select — codedeep-mcp excludes `default` (the SonarQube/McCabe convention),
100
+ // matching sonar-go on that point. So codedeep-mcp = "count each non-default case of all
101
+ // three switch forms," a deliberate hybrid of the two.
102
+ const GO_DECISION_NODE_TYPES = new Set([
103
+ 'if_statement',
104
+ 'for_statement',
105
+ 'expression_case',
106
+ 'type_case',
107
+ 'communication_case',
108
+ ]);
109
+ // Cognitive-complexity config — gocognit-aligned (uudashr/gocognit, gocyclo's
110
+ // cognitive sibling), the same convention call the cyclomatic side made for
111
+ // gocyclo. ORACLE-VERIFIED EXACT against gocognit v1.2.1: 376/376 functions on
112
+ // spf13/cobra (157) + gin-gonic/gin (213) + a synthetic edge-case fixture (6).
113
+ // gocognit's nesting math is the shared whitepaper algorithm, but it DIVERGES
114
+ // from sonar-java (the engine default) in FOUR oracle-confirmed ways — each
115
+ // handled here without touching Java/TS:
116
+ // 1. nestElseBody:false — a plain `else { … }` body stays at the if's BASE
117
+ // nesting (gocognit decNesting's after the then-body), vs sonar's nesting+1.
118
+ // 2. initField — the `if x := f(); cond {}` init is walked (gocognit), where
119
+ // Go's `if err := recurse(); err != nil` idiom hides the recursive call.
120
+ // 3. parenthesizedType sentinel — gocognit does NOT unwrap parens in a boolean
121
+ // chain, so `(a&&b)&&c` = 2 (the inner && is its own run), not sonar's 1.
122
+ // 4. recursion — +1 per bare self-call site (gocognit counts direct recursion;
123
+ // sonar-java/SonarJS don't). function-only (methods self-call via selector).
124
+ // The 3 switch forms + select are each whole-switch +1 (cases add nothing — the
125
+ // cognitive/cyclomatic divergence, since GO_DECISION_NODE_TYPES counts each
126
+ // case). `for_statement` is Go's only loop; `func_literal` raises nesting (+0),
127
+ // matching the cyclomatic side which also descends closures (gocyclo-aligned).
128
+ // RESIDUAL DIVERGENCE (rare, deferred — the only place codedeep-mcp ≠ gocognit on Go):
129
+ // the engine's loop/switch branch bumps nesting for ALL children incl. the
130
+ // HEADER, while gocognit walks a for-clause (init/cond/post) and a switch/select
131
+ // init/tag at BASE nesting (incNesting runs only before the body). `initField`
132
+ // fixes this for the `if` header but the for/switch/select header still
133
+ // overbumps a nested STRUCTURAL construct (a closure-with-control-flow in a
134
+ // loop/switch header) — `if`-init vs for/switch-init asymmetry. Booleans in a
135
+ // header are flat (unaffected). 0 cases in the 376-fn oracle; this is the Go
136
+ // aperture of the pre-existing, accepted loop-header overbump (complexity.ts
137
+ // "KNOWN DIVERGENCE" note), shared with Java/TS and deferred to a dedicated
138
+ // engine pass (per-construct body fields + re-oracling Java).
139
+ const GO_COGNITIVE_OPTIONS = {
140
+ ifType: 'if_statement',
141
+ conditionField: 'condition',
142
+ consequenceField: 'consequence',
143
+ alternativeField: 'alternative',
144
+ // `if x := f(); cond {}` — walked at base nesting (gocognit walks if.Init).
145
+ initField: 'initializer',
146
+ // Go holds `alternative` as the if/block directly (Java-style) — no wrapper.
147
+ // Terminal `else` body stays at base nesting (gocognit ≠ sonar's nesting+1).
148
+ nestElseBody: false,
149
+ loopTypes: new Set(['for_statement']),
150
+ switchTypes: new Set([
151
+ 'expression_switch_statement',
152
+ 'type_switch_statement',
153
+ 'select_statement',
154
+ ]),
155
+ // Go has no ternary nor try/catch — sentinels that never match a real node.
156
+ ternaryType: '__go_no_ternary__',
157
+ catchType: '__go_no_catch__',
158
+ // Closures raise nesting and roll their control flow into the enclosing func
159
+ // (gocognit's FuncLit rule; the cyclomatic side descends them too).
160
+ nestOnlyTypes: new Set(['func_literal']),
161
+ // break/continue (optionally labeled) + goto (always labeled). Labels are a
162
+ // positional `label_name` named child, NOT a field — so detect by child type
163
+ // (robust to interleaved comments, unlike a positional namedChild check).
164
+ labeledJumpTypes: new Set(['break_statement', 'continue_statement', 'goto_statement']),
165
+ hasLabel: (n) => n.namedChildren.some((c) => c?.type === 'label_name'),
166
+ // `&&`/`||` via the shared C-family reader (Go has no `??`). booleanRunStarts
167
+ // unset → the default (+1 at every operator-kind change) matches gocognit's
168
+ // `lastOp != op` exactly.
169
+ booleanOperatorKind: cFamilyBooleanOperatorKind,
170
+ // Sentinel: do NOT unwrap parens — gocognit's collectBinaryOps stops at a
171
+ // parenthesized expression, so each parenthesized boolean is its own run.
172
+ parenthesizedType: '__go_no_paren__',
173
+ // Direct recursion (+1 per bare self-call site). Restricted to 'function'
174
+ // (top-level funcs + `var f = func(){}`); a method self-call is `s.m()`, a
175
+ // selector callee that bareCalleeName already returns null for.
176
+ recursion: {
177
+ callType: 'call_expression',
178
+ bareCalleeName: (n) => {
179
+ const callee = n.childForFieldName('function');
180
+ return callee?.type === 'identifier' ? callee.text : null;
181
+ },
182
+ eligibleKinds: new Set(['function']),
183
+ },
184
+ };
185
+ // `s.log()` and `pkg.Func()` carry their literal receiver token; chained and
186
+ // computed receivers (`s.conn.Close()`, `f().g()`, indexed) carry
187
+ // RECEIVER_OPAQUE so the called method stays findable by name (recall) but
188
+ // never resolves. A non-`field_identifier` field name emits nothing.
189
+ // Never returns isSelf — Go has no this/self token; selfness is decided
190
+ // in the engine by matching the receiver token against the enclosing
191
+ // method's PendingBody.selfReceiverName (opaque receivers never match it).
192
+ function goMemberCallInfo(callee) {
193
+ if (callee.type === 'selector_expression') {
194
+ const field = callee.childForFieldName('field');
195
+ if (field?.type !== 'field_identifier')
196
+ return null;
197
+ const operand = callee.childForFieldName('operand');
198
+ if (operand?.type === 'identifier') {
199
+ return { receiver: operand.text, property: field.text, isSelf: false };
200
+ }
201
+ // Chained/computed receiver (selector/call/index operand) → opaque. A
202
+ // missing operand (only on a malformed/ERROR parse — valid Go selectors
203
+ // always have one) emits nothing, preserving the pre-recall drop.
204
+ if (!operand)
205
+ return null;
206
+ return { receiver: RECEIVER_OPAQUE, property: field.text, isSelf: false };
207
+ }
208
+ // Qualified composite literal `pkg.Config{}` — the constructor analog of
209
+ // Java's `new pkg.Thing()` member path.
210
+ if (callee.type === 'qualified_type') {
211
+ const pkg = callee.childForFieldName('package');
212
+ const name = callee.childForFieldName('name');
213
+ if (!pkg || !name)
214
+ return null;
215
+ return { receiver: pkg.text, property: name.text, isSelf: false };
216
+ }
217
+ return null;
218
+ }
219
+ export function extractGo(tree, content, fileInfo) {
220
+ const symbols = [];
221
+ const imports = [];
222
+ const bodies = [];
223
+ const occurrences = new Map();
224
+ for (const child of tree.rootNode.namedChildren) {
225
+ switch (child.type) {
226
+ case 'import_declaration':
227
+ extractImport(child, fileInfo, imports);
228
+ break;
229
+ case 'function_declaration':
230
+ extractFunction(child, content, fileInfo, occurrences, symbols, bodies);
231
+ break;
232
+ case 'method_declaration':
233
+ extractMethod(child, content, fileInfo, occurrences, symbols, bodies);
234
+ break;
235
+ case 'type_declaration':
236
+ extractTypeDeclaration(child, content, fileInfo, occurrences, symbols);
237
+ break;
238
+ case 'const_declaration':
239
+ extractConstVar(child, 'const', content, fileInfo, occurrences, symbols, bodies);
240
+ break;
241
+ case 'var_declaration':
242
+ extractConstVar(child, 'var', content, fileInfo, occurrences, symbols, bodies);
243
+ break;
244
+ // package_clause, comments — no symbols.
245
+ default:
246
+ break;
247
+ }
248
+ }
249
+ // Same-file duplicate type names are invalid Go, so this only fires on
250
+ // broken parses — where refusing resolution beats binding through a
251
+ // half-parsed type (Java's nested-Builder rationale, kept as tolerance).
252
+ const ambiguousTypeNames = collectAmbiguousTypeNames(symbols, GO_TYPE_KINDS);
253
+ const references = resolveCalls(bodies, tree.rootNode, symbols, fileInfo, GO_SELECTORS, GO_SKIP_TYPES, GO_FUNCTION_BODY_SKIP_TYPES, goMemberCallInfo, {
254
+ bareCalleeTypes: GO_BARE_CALLEE_TYPES,
255
+ // A bare `foo()` in a method body is a package-level call — Go has
256
+ // no implicit method receiver (the opposite of Java).
257
+ bareCallsBindToEnclosingClass: false,
258
+ bareCallableKinds: GO_BARE_CALLABLE_KINDS,
259
+ constructorKinds: GO_CONSTRUCTOR_KINDS,
260
+ ambiguousClassNames: ambiguousTypeNames,
261
+ ignoredBareCallees: GO_IGNORED_BARE_CALLEES,
262
+ ignoredMemberCallees: GO_IGNORED_MEMBER_CALLEES,
263
+ });
264
+ computeComplexity(bodies, symbols, {
265
+ decisionNodeTypes: GO_DECISION_NODE_TYPES,
266
+ extraDecisionPredicate: isCFamilyBooleanOperator,
267
+ skipTypes: GO_SKIP_TYPES,
268
+ cognitive: GO_COGNITIVE_OPTIONS,
269
+ });
270
+ return { symbols, references, imports };
271
+ }
272
+ function extractFunction(decl, content, fileInfo, occurrences, outSymbols, outBodies) {
273
+ const name = decl.childForFieldName('name')?.text;
274
+ // `func _() { ... }` is legal Go — stringer/enumer emit one such
275
+ // compile-time assertion per enum. The blank identifier is never a real
276
+ // symbol; skip it as every other extraction path does.
277
+ if (!name || name === '_')
278
+ return;
279
+ const sym = makeGoSymbol(decl, declSignature(decl, content), fileInfo, 'function', name, `${fileInfo.path}:${name}`, isExportedName(name), goDoc(decl), occurrences);
280
+ outSymbols.push(sym);
281
+ // Assembly stubs (`func Stub(x int) int` with the body in a .s file)
282
+ // have no body field — the symbol is still extracted.
283
+ const body = decl.childForFieldName('body');
284
+ if (body)
285
+ outBodies.push({ symbolId: sym.id, body });
286
+ }
287
+ function extractMethod(decl, content, fileInfo, occurrences, outSymbols, outBodies) {
288
+ const name = decl.childForFieldName('name')?.text;
289
+ // `func (s *T) _() {}` is legal (blank method); never a real symbol.
290
+ if (!name || name === '_')
291
+ return;
292
+ const recv = receiverInfo(decl);
293
+ if (!recv)
294
+ return;
295
+ // Exported by METHOD-name case only — an exported method on an
296
+ // unexported type is reachable through interfaces and embedding
297
+ // promotion, so the receiver type's case doesn't gate it.
298
+ const sym = makeGoSymbol(decl, declSignature(decl, content), fileInfo, 'method', name,
299
+ // FQN uses the receiver base type as the "class" — slots straight
300
+ // into classNameFromFqn/methodsByClass. The receiver type also goes
301
+ // into the hashed qualifier: same-name same-signature methods on
302
+ // different receivers already differ via the signature, but the
303
+ // qualifier keeps the id stable if signatures ever normalize closer.
304
+ `${fileInfo.path}:${recv.typeName}.${name}`, isExportedName(name), goDoc(decl), occurrences, recv.typeName);
305
+ outSymbols.push(sym);
306
+ const body = decl.childForFieldName('body');
307
+ if (body) {
308
+ outBodies.push({
309
+ symbolId: sym.id,
310
+ body,
311
+ className: recv.typeName,
312
+ selfReceiverName: recv.varName,
313
+ });
314
+ }
315
+ }
316
+ // First named child that isn't a comment. pointer_type / parenthesized_type
317
+ // hold their wrapped type positionally, and tree-sitter-go attaches comments
318
+ // as named extras, so a naive firstNamedChild can return the comment.
319
+ function firstTypeChild(node) {
320
+ for (const child of node.namedChildren) {
321
+ if (child && child.type !== 'comment')
322
+ return child;
323
+ }
324
+ return null;
325
+ }
326
+ // Receiver base type and variable name. `func (s *Server)` → {Server, s};
327
+ // `func (S) f()` / `func (_ *S) f()` → varName undefined (no token can
328
+ // reference the receiver, so no self-call resolution either).
329
+ function receiverInfo(decl) {
330
+ const receiver = decl.childForFieldName('receiver');
331
+ const param = receiver?.namedChildren.find((c) => c?.type === 'parameter_declaration');
332
+ if (!param)
333
+ return null;
334
+ let type = param.childForFieldName('type');
335
+ // `*Server` → pointer_type wrapping the real type (no field name);
336
+ // `(T)` / `(*T)` → parenthesized_type (legal, if unusual, Go);
337
+ // `List[T]` → generic_type with the base name in its `type` field.
338
+ // Unwrap in any nesting order. pointer_type/parenthesized_type expose the
339
+ // wrapped type positionally — comments are NAMED extras in tree-sitter-go,
340
+ // so firstNamedChild can land on a comment (`* /*x*/ Server`) and silently
341
+ // drop the whole method; skip them.
342
+ for (;;) {
343
+ if (type?.type === 'pointer_type' || type?.type === 'parenthesized_type') {
344
+ type = firstTypeChild(type);
345
+ }
346
+ else if (type?.type === 'generic_type') {
347
+ type = type.childForFieldName('type');
348
+ }
349
+ else {
350
+ break;
351
+ }
352
+ }
353
+ if (type?.type !== 'type_identifier')
354
+ return null;
355
+ const nameNode = param.childForFieldName('name');
356
+ const varName = nameNode && nameNode.text !== '_' ? nameNode.text : undefined;
357
+ return { typeName: type.text, varName };
358
+ }
359
+ // Symbol kinds for type_spec by the shape of its `type` field; anything
360
+ // that isn't a struct or interface (defined types, function types, map
361
+ // types...) is a plain 'type'.
362
+ function typeSpecKind(typeNode) {
363
+ if (typeNode?.type === 'struct_type')
364
+ return 'class';
365
+ if (typeNode?.type === 'interface_type')
366
+ return 'interface';
367
+ return 'type';
368
+ }
369
+ function extractTypeDeclaration(decl, content, fileInfo, occurrences, outSymbols) {
370
+ // Grouped `type ( A struct{...} ; B int )` puts the specs as direct
371
+ // children; `type A = B` is a distinct type_alias node, same fields.
372
+ const specs = decl.namedChildren.filter((c) => c?.type === 'type_spec' || c?.type === 'type_alias');
373
+ for (const spec of specs) {
374
+ const name = spec.childForFieldName('name')?.text;
375
+ if (!name)
376
+ continue;
377
+ const typeNode = spec.childForFieldName('type');
378
+ const kind = spec.type === 'type_alias' ? 'type' : typeSpecKind(typeNode);
379
+ const exported = isExportedName(name);
380
+ outSymbols.push(makeGoSymbol(spec, typeSpecSignature(spec, typeNode, content), fileInfo, kind, name, `${fileInfo.path}:${name}`, exported,
381
+ // Ungrouped specs carry no preceding sibling inside the decl, so
382
+ // the doc sits on the declaration; grouped specs document
383
+ // individually (no group-comment fan-out — const/var rule).
384
+ goDoc(spec) ?? (specs.length === 1 ? goDoc(decl) : null), occurrences));
385
+ // Type bodies carry no executable code (no field initializers in Go),
386
+ // so unlike Java there is no type-body PendingBody.
387
+ if (typeNode?.type === 'struct_type') {
388
+ extractStructFields(typeNode, content, fileInfo, name, exported, occurrences, outSymbols);
389
+ }
390
+ else if (typeNode?.type === 'interface_type') {
391
+ extractInterfaceMembers(typeNode, content, fileInfo, name, exported, occurrences, outSymbols);
392
+ }
393
+ }
394
+ }
395
+ function extractStructFields(structType, content, fileInfo, typeName, typeExported, occurrences, outSymbols) {
396
+ const list = structType.namedChildren.find((c) => c?.type === 'field_declaration_list');
397
+ if (!list)
398
+ return;
399
+ for (const field of list.namedChildren) {
400
+ if (field?.type !== 'field_declaration')
401
+ continue;
402
+ // Embedded fields (`io.Reader`, `*Conn`) have no name children — the
403
+ // promoted members belong to the embedded type, not this struct.
404
+ // Anonymous nested struct types are not recursed either (no FQN scheme
405
+ // below one member level).
406
+ const signature = normalizeSignature(field.text);
407
+ const doc = goDoc(field);
408
+ for (const nameNode of field.childrenForFieldName('name')) {
409
+ const fieldName = nameNode?.text;
410
+ if (!fieldName || fieldName === '_')
411
+ continue;
412
+ outSymbols.push(makeGoSymbol(field, signature, fileInfo, 'variable', fieldName, `${fileInfo.path}:${typeName}.${fieldName}`, typeExported && isExportedName(fieldName), doc, occurrences, typeName));
413
+ }
414
+ }
415
+ }
416
+ // Interface method specs are declaration-only members (Java-interface
417
+ // precedent): they populate methodsByClass under the interface name, so
418
+ // method expressions (`Shape.Area`) and same-named lookups resolve.
419
+ // Embedded interfaces and type-set elements (type_elem) carry no name of
420
+ // their own and are skipped.
421
+ function extractInterfaceMembers(ifaceType, content, fileInfo, ifaceName, ifaceExported, occurrences, outSymbols) {
422
+ for (const member of ifaceType.namedChildren) {
423
+ if (member?.type !== 'method_elem')
424
+ continue;
425
+ const name = member.childForFieldName('name')?.text;
426
+ if (!name)
427
+ continue;
428
+ outSymbols.push(makeGoSymbol(member, normalizeSignature(member.text), fileInfo, 'method', name, `${fileInfo.path}:${ifaceName}.${name}`, ifaceExported && isExportedName(name), goDoc(member), occurrences, ifaceName));
429
+ }
430
+ }
431
+ function extractConstVar(decl, kindWord, content, fileInfo, occurrences, outSymbols, outBodies) {
432
+ // Spec collection handles the grammar's asymmetry: const_declaration
433
+ // holds const_spec children DIRECTLY even when grouped, var_declaration
434
+ // wraps grouped specs in a var_spec_list.
435
+ const specs = [];
436
+ for (const child of decl.namedChildren) {
437
+ if (!child)
438
+ continue;
439
+ if (child.type === 'const_spec' || child.type === 'var_spec')
440
+ specs.push(child);
441
+ else if (child.type === 'var_spec_list') {
442
+ for (const inner of child.namedChildren) {
443
+ if (inner?.type === 'var_spec')
444
+ specs.push(inner);
445
+ }
446
+ }
447
+ }
448
+ for (const spec of specs) {
449
+ // const_spec (unlike var_spec/field_declaration) puts the WHOLE name
450
+ // list under the `name:` field, so the anonymous `,` tokens carry it
451
+ // too — filter to identifiers or `const A, B = 1, 2` grows a phantom
452
+ // symbol named ','.
453
+ const nameNodes = spec
454
+ .childrenForFieldName('name')
455
+ .filter((n) => n?.type === 'identifier');
456
+ const doc = goDoc(spec) ?? (specs.length === 1 ? goDoc(decl) : null);
457
+ // `var f = func(...) ... { ... }` is a function symbol, mirroring the
458
+ // TS arrow-const rule; its literal body becomes f's own PendingBody.
459
+ // const can't hold a func value, and multi-name specs stay variables.
460
+ if (kindWord === 'var' && nameNodes.length === 1) {
461
+ const literal = singleFuncLiteralValue(spec);
462
+ const name = nameNodes[0]?.text;
463
+ if (literal && name && name !== '_') {
464
+ const literalBody = literal.childForFieldName('body');
465
+ const raw = literalBody
466
+ ? content.slice(spec.startIndex, literalBody.startIndex)
467
+ : spec.text;
468
+ const sym = makeGoSymbol(spec, normalizeSignature(`${kindWord} ${raw}`), fileInfo, 'function', name, `${fileInfo.path}:${name}`, isExportedName(name), doc, occurrences);
469
+ outSymbols.push(sym);
470
+ if (literalBody)
471
+ outBodies.push({ symbolId: sym.id, body: literalBody });
472
+ continue;
473
+ }
474
+ }
475
+ // One symbol per name (`var x, y = 1, 2` → two); the shared spec
476
+ // signature is fine — ids differ by name (Java declarator precedent).
477
+ const signature = normalizeSignature(`${kindWord} ${spec.text}`);
478
+ for (const nameNode of nameNodes) {
479
+ const name = nameNode.text;
480
+ if (!name || name === '_')
481
+ continue;
482
+ outSymbols.push(makeGoSymbol(spec, signature, fileInfo, 'variable', name, `${fileInfo.path}:${name}`, isExportedName(name), doc, occurrences));
483
+ }
484
+ }
485
+ }
486
+ // The spec's value expression list when it is exactly one func_literal.
487
+ function singleFuncLiteralValue(spec) {
488
+ const value = spec.childForFieldName('value');
489
+ if (!value)
490
+ return null;
491
+ const exprs = value.namedChildren;
492
+ if (exprs.length !== 1)
493
+ return null;
494
+ return exprs[0]?.type === 'func_literal' ? exprs[0] : null;
495
+ }
496
+ function extractImport(decl, fileInfo, out) {
497
+ // Single import → import_spec direct child; grouped → import_spec_list.
498
+ // One ImportInfo per spec keeps per-spec line attribution.
499
+ const specs = [];
500
+ for (const child of decl.namedChildren) {
501
+ if (child?.type === 'import_spec')
502
+ specs.push(child);
503
+ else if (child?.type === 'import_spec_list') {
504
+ for (const inner of child.namedChildren) {
505
+ if (inner?.type === 'import_spec')
506
+ specs.push(inner);
507
+ }
508
+ }
509
+ }
510
+ for (const spec of specs) {
511
+ const pathNode = spec.childForFieldName('path');
512
+ if (!pathNode)
513
+ continue;
514
+ // interpreted_string_literal or raw_string_literal — strip the quotes.
515
+ const sourceModule = pathNode.text.replace(/^["`]|["`]$/g, '');
516
+ if (!sourceModule)
517
+ continue;
518
+ // Whole-package imports map to Python's `'module'` shape: the local
519
+ // binding is the package name (or alias), members reach top-level
520
+ // exports only. Dot imports ARE wildcard imports (`from x import *`),
521
+ // and blank imports get an inert '_' binding (it can never be a
522
+ // receiver or bare callee, so it matches nothing downstream).
523
+ const nameNode = spec.childForFieldName('name');
524
+ let imported;
525
+ if (!nameNode) {
526
+ imported = { name: defaultPackageName(sourceModule), kind: 'module' };
527
+ }
528
+ else if (nameNode.type === 'dot') {
529
+ imported = { name: IMPORT_NAMESPACE };
530
+ }
531
+ else if (nameNode.text === '_') {
532
+ imported = { name: '_', kind: 'module' };
533
+ }
534
+ else {
535
+ imported = { name: defaultPackageName(sourceModule), alias: nameNode.text, kind: 'module' };
536
+ }
537
+ out.push({
538
+ file: fileInfo.path,
539
+ sourceModule,
540
+ importedNames: [imported],
541
+ line: spec.startPosition.row + 1,
542
+ });
543
+ }
544
+ }
545
+ // Best-effort package name from an import path. Wrong guesses fail open:
546
+ // a receiver that matches no import falls to the weak-include branch
547
+ // instead of being dropped.
548
+ function defaultPackageName(importPath) {
549
+ const segments = importPath.split('/');
550
+ let last = segments[segments.length - 1] ?? importPath;
551
+ // Module major-version suffix: `github.com/x/y/v2` → package y.
552
+ if (/^v\d+$/.test(last) && segments.length > 1) {
553
+ last = segments[segments.length - 2] ?? last;
554
+ }
555
+ // gopkg.in style: `gopkg.in/yaml.v2` → package yaml.
556
+ return last.replace(/\.v\d+$/, '');
557
+ }
558
+ // Exported in Go = first rune is an UPPERCASE LETTER — exact, no heuristic
559
+ // caveat (\p{Lu} covers the unicode classes the spec names).
560
+ function isExportedName(name) {
561
+ return /^\p{Lu}/u.test(name);
562
+ }
563
+ function makeGoSymbol(node, signature, fileInfo, kind, name, fqn, exported, doc, occurrences, qualifier = '') {
564
+ // Repeated identical (name, kind, signature, qualifier) tuples — legal
565
+ // only for `func init()` — get an ordinal so ids stay unique per file.
566
+ const key = `${name}\0${kind}\0${signature}\0${qualifier}`;
567
+ const n = (occurrences.get(key) ?? 0) + 1;
568
+ occurrences.set(key, n);
569
+ const effectiveQualifier = n === 1 ? qualifier : `${qualifier}#${n}`;
570
+ return {
571
+ // The id hashes the FULL signature; only the stored copy is capped.
572
+ id: symbolId(fileInfo.path, name, kind, signature, effectiveQualifier),
573
+ name,
574
+ fqn,
575
+ kind,
576
+ file: fileInfo.path,
577
+ startLine: node.startPosition.row + 1,
578
+ endLine: node.endPosition.row + 1,
579
+ signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
580
+ doc,
581
+ exported,
582
+ language: fileInfo.language,
583
+ };
584
+ }
585
+ // `type Server struct` / `type Handler interface` (keeps `[T any]` type
586
+ // params, drops the member block); other specs keep their full text
587
+ // (`type MyInt int`, `type A = B`). The uniform 'type ' prefix is added
588
+ // here because grouped specs don't contain the keyword.
589
+ function typeSpecSignature(spec, typeNode, content) {
590
+ const bodyStart = typeBodyStart(typeNode);
591
+ const raw = bodyStart !== null ? content.slice(spec.startIndex, bodyStart) : spec.text;
592
+ return normalizeSignature(`type ${raw}`);
593
+ }
594
+ // Where a struct/interface member block opens. struct_type wraps members
595
+ // in a field_declaration_list; interface_type holds them directly, so the
596
+ // anonymous '{' token is the marker (scanning children rather than text
597
+ // keeps generic constraints containing braces out of the signature).
598
+ function typeBodyStart(typeNode) {
599
+ if (typeNode?.type === 'struct_type') {
600
+ const list = typeNode.namedChildren.find((c) => c?.type === 'field_declaration_list');
601
+ return list ? list.startIndex : null;
602
+ }
603
+ if (typeNode?.type === 'interface_type') {
604
+ for (const child of typeNode.children) {
605
+ if (child?.type === '{')
606
+ return child.startIndex;
607
+ }
608
+ return null;
609
+ }
610
+ return null;
611
+ }
612
+ // Godoc extraction — two deliberate divergences from extractJavaDoc:
613
+ // the block must be ADJACENT (a blank line detaches it, per godoc), and
614
+ // the FIRST line of the comment block wins, not the last comment node
615
+ // (consecutive `//` lines are separate AST siblings; godoc's summary is
616
+ // the block's opening sentence). `//go:` directives inside the block
617
+ // (build tags, noinline, generate) are skipped wherever they sit.
618
+ function goDoc(decl) {
619
+ const nearest = decl.previousNamedSibling;
620
+ if (!nearest || nearest.type !== 'comment')
621
+ return null;
622
+ if (nearest.endPosition.row !== decl.startPosition.row - 1)
623
+ return null;
624
+ if (isTrailingComment(nearest))
625
+ return null;
626
+ if (!nearest.text.startsWith('//'))
627
+ return commentDocLine(nearest.text);
628
+ // Walk up the contiguous `//` chain (each comment exactly one line above
629
+ // the next, none of them trailing an earlier statement).
630
+ const chain = [nearest];
631
+ for (;;) {
632
+ const bottom = chain[chain.length - 1];
633
+ if (!bottom)
634
+ break;
635
+ const prev = bottom.previousNamedSibling;
636
+ if (!prev ||
637
+ prev.type !== 'comment' ||
638
+ !prev.text.startsWith('//') ||
639
+ prev.endPosition.row !== bottom.startPosition.row - 1 ||
640
+ isTrailingComment(prev)) {
641
+ break;
642
+ }
643
+ chain.push(prev);
644
+ }
645
+ chain.reverse(); // document order
646
+ for (const comment of chain) {
647
+ if (isDirectiveComment(comment.text))
648
+ continue;
649
+ // Empty `//` separator lines yield null — keep scanning the block
650
+ // (godoc's summary is the first line with content).
651
+ const line = commentDocLine(comment.text);
652
+ if (line)
653
+ return line;
654
+ }
655
+ return null;
656
+ }
657
+ // go/ast's directive rule (what go/doc strips from doc text): `//`
658
+ // immediately followed by `word:x` — a [a-z0-9]+ tag, a colon, then a
659
+ // [a-z0-9] char (//go:embed, //nolint:gocyclo) — or by `line `, with no
660
+ // space after the slashes. The trailing-char requirement matters: `//see:
661
+ // RFC` (space after colon) and `//https://x` (slash after colon) are prose
662
+ // go/doc keeps, not directives.
663
+ function isDirectiveComment(text) {
664
+ return /^\/\/(line |[a-z0-9]+:[a-z0-9])/.test(text);
665
+ }