codedeep-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +177 -0
  3. package/dist/config.js +223 -0
  4. package/dist/git/analyzer.js +177 -0
  5. package/dist/git/git-service.js +568 -0
  6. package/dist/git/head-watcher.js +113 -0
  7. package/dist/git/runner.js +204 -0
  8. package/dist/index.js +138 -0
  9. package/dist/indexer/code-index.js +1801 -0
  10. package/dist/indexer/complexity.js +633 -0
  11. package/dist/indexer/extractor.js +354 -0
  12. package/dist/indexer/languages/cpp.js +934 -0
  13. package/dist/indexer/languages/csharp.js +854 -0
  14. package/dist/indexer/languages/dart.js +777 -0
  15. package/dist/indexer/languages/go.js +665 -0
  16. package/dist/indexer/languages/java.js +507 -0
  17. package/dist/indexer/languages/kotlin.js +709 -0
  18. package/dist/indexer/languages/objc.js +397 -0
  19. package/dist/indexer/languages/php.js +771 -0
  20. package/dist/indexer/languages/python.js +455 -0
  21. package/dist/indexer/languages/ruby.js +697 -0
  22. package/dist/indexer/languages/rust.js +754 -0
  23. package/dist/indexer/languages/swift.js +691 -0
  24. package/dist/indexer/languages/typescript.js +485 -0
  25. package/dist/indexer/parser.js +175 -0
  26. package/dist/indexer/pipeline.js +342 -0
  27. package/dist/indexer/scanner.js +279 -0
  28. package/dist/indexer/watcher.js +353 -0
  29. package/dist/logger.js +16 -0
  30. package/dist/server.js +170 -0
  31. package/dist/tools/common.js +207 -0
  32. package/dist/tools/find-references.js +224 -0
  33. package/dist/tools/find-symbol.js +94 -0
  34. package/dist/tools/get-context.js +370 -0
  35. package/dist/tools/impact.js +218 -0
  36. package/dist/tools/overview.js +482 -0
  37. package/dist/tools/search-structure.js +303 -0
  38. package/dist/types.js +61 -0
  39. package/grammars/tree-sitter-c.wasm +0 -0
  40. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  41. package/grammars/tree-sitter-cpp.wasm +0 -0
  42. package/grammars/tree-sitter-dart.wasm +0 -0
  43. package/grammars/tree-sitter-go.wasm +0 -0
  44. package/grammars/tree-sitter-java.wasm +0 -0
  45. package/grammars/tree-sitter-javascript.wasm +0 -0
  46. package/grammars/tree-sitter-kotlin.wasm +0 -0
  47. package/grammars/tree-sitter-objc.wasm +0 -0
  48. package/grammars/tree-sitter-php.wasm +0 -0
  49. package/grammars/tree-sitter-python.wasm +0 -0
  50. package/grammars/tree-sitter-ruby.wasm +0 -0
  51. package/grammars/tree-sitter-rust.wasm +0 -0
  52. package/grammars/tree-sitter-swift.wasm +0 -0
  53. package/grammars/tree-sitter-tsx.wasm +0 -0
  54. package/grammars/tree-sitter-typescript.wasm +0 -0
  55. package/package.json +67 -0
@@ -0,0 +1,934 @@
1
+ import { collectAmbiguousTypeNames } from '../extractor.js';
2
+ import { cFamilyBooleanOperatorKind, computeComplexity } from '../complexity.js';
3
+ import { RECEIVER_OPAQUE } from '../../types.js';
4
+ import { SIGNATURE_DISPLAY_CAP, commentDocLine, isTrailingComment, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
5
+ // ── skip sets ────────────────────────────────────────────────────────────────
6
+ // walkDecorators uses this — C++ has no decorator selector, so it never runs;
7
+ // keep nested function_definition here for parity (local functions own a scope).
8
+ const CPP_FUNCTION_BODY_SKIP_TYPES = new Set(['function_definition']);
9
+ // Preprocessor conditional group nodes (`#if`/`#ifdef`/…). The member walk
10
+ // (extractScope/handleMember) recurses through these transparently to extract
11
+ // guarded members; the resolveCalls walk does NOT (they're in CPP_SKIP_TYPES,
12
+ // below). The continuations (`#else`/`#elif`) are nested CHILDREN of the head
13
+ // `#if` (under its `alternative:` field), each compiled independently.
14
+ // tree-sitter-cpp maps BOTH `#ifdef` and `#ifndef` to `preproc_ifdef`, and BOTH
15
+ // `#elifdef` and `#elifndef` to `preproc_elifdef` (verified) — so there is no
16
+ // `preproc_ifndef`/`preproc_elifndef` node type to list.
17
+ const PREPROC_GROUPS = new Set([
18
+ 'preproc_if',
19
+ 'preproc_ifdef',
20
+ 'preproc_else',
21
+ 'preproc_elif',
22
+ 'preproc_elifdef',
23
+ ]);
24
+ // `#else`/`#elif`-family continuation branches (a subset of PREPROC_GROUPS) — each
25
+ // compiles independently of the then-branch, so visibility resets to the enclosing
26
+ // baseline when entering one (see handleMember).
27
+ const PREPROC_CONTINUATIONS = new Set([
28
+ 'preproc_else',
29
+ 'preproc_elif',
30
+ 'preproc_elifdef',
31
+ ]);
32
+ // walkCalls skip set: each function/method/variable owns a per-member PendingBody,
33
+ // so the call walk must NOT re-descend into a function body or a type body (their
34
+ // calls are attributed via their own PendingBody) — pruning these also stops a
35
+ // nested local class/function's calls from mis-attributing to an enclosing body.
36
+ // The `preproc_*` conditionals are pruned HERE (but NOT from the member walk): a
37
+ // function-like-macro CONDITION (`#if FOO(3)`) parses as a `call_expression`, so
38
+ // descending into it would emit a SPURIOUS resolved call edge to a same-named real
39
+ // function. Guarded members keep their own PendingBodies (created by extractScope,
40
+ // which DOES descend preproc), so pruning here loses only top-level free-statement
41
+ // calls inside a guard (none exist in valid C++). `namespace_definition` stays
42
+ // ABSENT → DESCENDED (namespace-level free calls reach the module-root walk).
43
+ // `lambda_expression` is ABSENT → DESCENDED (a closure's calls roll into the
44
+ // enclosing function — the Go func_literal / Java lambda rule).
45
+ const CPP_SKIP_TYPES = new Set([
46
+ 'function_definition',
47
+ 'class_specifier',
48
+ 'struct_specifier',
49
+ 'union_specifier',
50
+ 'enum_specifier',
51
+ 'template_declaration',
52
+ ...PREPROC_GROUPS,
53
+ ]);
54
+ // ── complexity (cyclomatic + cognitive) ────────────────────────────────────────
55
+ // Cyclomatic = McCabe's decision-count; cognitive = the published SonarSource
56
+ // Cognitive Complexity whitepaper (a free spec, also implemented by gocognit,
57
+ // eslint-plugin-sonarjs, rust-code-analysis). The increment shapes are behaviorally
58
+ // compatible with SonarQube's C-family rules, verified against in-repo tools
59
+ // (rust-code-analysis for cyclomatic; the public whitepaper fixtures for cognitive).
60
+ // ONE shared options set drives cpp + c + objc (the AST dump confirmed
61
+ // tree-sitter-objc reuses the C control-flow node names AND names its catch node
62
+ // `catch_clause`, identical to cpp — so no objc fork and no engine change).
63
+ // Cyclomatic decision nodes (McCabe decision-count). C-family is the 2nd boolean-free
64
+ // language after Swift (the C-family cyclomatic convention counts no
65
+ // `&&`/`||` cyclomatically), so there is NO `extraDecisionPredicate`. Each `case` AND `default` adds +1 (a
66
+ // `default:` is a `case_statement` with no `value:` — verified — so one node type
67
+ // covers both; the Swift `switch_entry` precedent, a deliberate divergence from the
68
+ // Java/Go/TS default-EXCLUDED rule). Each C++ `lambda_expression` adds +1 AND is
69
+ // descended (the Go func_literal rule — NOT in the cyclomatic skip set), so its inner
70
+ // decisions also count toward the function. `for_range_loop` (C++ range-for) and
71
+ // `lambda_expression` are inert on c/objc (absent); `case_statement` covers all three.
72
+ // NOT counted: the `switch` container, try/catch, goto, break, continue, and `&&`/`||`.
73
+ // ObjC `^{}` blocks (`block_literal`) are NOT counted cyclomatically (only LambdaExpr is).
74
+ const CFAMILY_DECISION_NODE_TYPES = new Set([
75
+ 'if_statement',
76
+ 'for_statement',
77
+ 'for_range_loop',
78
+ 'while_statement',
79
+ 'do_statement',
80
+ 'case_statement',
81
+ 'conditional_expression',
82
+ 'lambda_expression',
83
+ ]);
84
+ // The complexity boundary — a SEPARATE skip set from CPP_SKIP_TYPES (the resolveCalls
85
+ // boundary). TWO deliberate differences:
86
+ // (1) function_definition / method_definition are ABSENT. The PendingBody.body for a
87
+ // cpp function IS the `function_definition` node and for an objc method IS the
88
+ // `method_definition` node (cpp.ts extractFunction / objc.ts extractMethod). The
89
+ // engine's root guards (computeComplexity's `skipTypes.has(body.type)` continue +
90
+ // computeCognitive's `visit` early-return) would SKIP the whole function if its own
91
+ // node type were in this set — zeroing every C-family function. (resolveCalls is
92
+ // unaffected: walkCalls skip-tests only CHILDREN, never the root body.) Local
93
+ // *type* bodies are still pruned (the `*_specifier` entries) so a local class's
94
+ // members don't leak in; a GNU nested function (a `function_definition` inside a
95
+ // body — a non-standard extension, absent from valid C/C++) then rolls INTO the
96
+ // enclosing function — a rare documented over-count, matching the C#/Dart per-member
97
+ // model for local functions.
98
+ // (2) PREPROC_GROUPS are ABSENT → preproc conditionals are DESCENDED, so control flow
99
+ // guarded by `#if`/`#else` IS counted (both branches → a syntactic over-count vs
100
+ // a preprocessor-evaluated active-branch count — the documented C# `#if`
101
+ // divergence, inherent to a tree-sitter extractor). resolveCalls prunes them (the
102
+ // spurious `#if FOO(3)`-as-call-expression edge), but for complexity a macro-call
103
+ // condition is not a decision node.
104
+ // lambda_expression / block_literal stay ABSENT → DESCENDED (their inner flow counts).
105
+ const CFAMILY_COMPLEXITY_SKIP_TYPES = new Set([
106
+ 'class_specifier',
107
+ 'struct_specifier',
108
+ 'union_specifier',
109
+ 'enum_specifier',
110
+ 'template_declaration',
111
+ ]);
112
+ // Cognitive config (the C-family cognitive rule is the SonarSource Cognitive
113
+ // Complexity whitepaper verbatim, so it maps onto
114
+ // the EXISTING engine knobs with no additions). Surcharge+nest: head `if`, `?:`, the
115
+ // whole `switch` once (cases FREE — the opposite of cyclomatic), the loops, and each
116
+ // `catch` (cpp `catch_clause`; objc `@catch` is ALSO a `catch_clause`, so a single string
117
+ // covers both — the predicted set-widening proved unnecessary). FLAT +1: else/else-if
118
+ // (the `else_clause` chain — tree-sitter-c/cpp/objc all wrap the else in an `else_clause`,
119
+ // the TS shape), `goto` (FLAT, DIVERGING from C#'s goto-surcharge), and each `&&`/`||`
120
+ // operator-CHANGE in a source-order chain (paren-TRANSPARENT → unwrap). Nest-only (+0):
121
+ // C++ lambdas + ObjC `^{}` blocks. Recursion: +1 per direct self-call SITE (the
122
+ // whitepaper's per-call-site rule). The `try` container is free (only catches score). NO
123
+ // baseline +1 in the C-family cognitive model; codedeep-mcp keeps its `1 + decisionPoints` (the documented constant offset).
124
+ const CFAMILY_COGNITIVE_OPTIONS = {
125
+ ifType: 'if_statement',
126
+ // cpp wraps the if condition in `condition_clause`, c/objc in `parenthesized_expression`
127
+ // — both are descended (so the condition's booleans count). The cpp `condition_clause`
128
+ // ALSO carries a C++17 `if (init; cond)` init_statement, which is descended here too (a
129
+ // decision in the init is counted) — so NO separate `initField` is needed.
130
+ conditionField: 'condition',
131
+ consequenceField: 'consequence',
132
+ alternativeField: 'alternative',
133
+ elseClauseType: 'else_clause', // the else/else-if wrapper (TS shape; verified on all three grammars)
134
+ loopTypes: new Set(['for_statement', 'for_range_loop', 'while_statement', 'do_statement']),
135
+ switchTypes: new Set(['switch_statement']), // whole-switch +1; case labels are free
136
+ ternaryType: 'conditional_expression',
137
+ // `catch_clause` contains its body → the generic catch branch (surcharge + nest). One
138
+ // string covers cpp AND objc `@catch` (both parse to `catch_clause`). DOCUMENTED GAP:
139
+ // the cognitive model would surcharge MSVC `__except`, but
140
+ // tree-sitter parses it as a DISTINCT `seh_except_clause` node, so a
141
+ // `__except` is NOT surcharged here (its body's control flow still counts, just at the
142
+ // try's nesting). A recall-only under-count, never wrong/over; `__try`/`__finally` are
143
+ // free (the spec ignores them too). MSVC SEH is a Windows-only non-standard extension
144
+ // absent from cross-platform corpora — documented (the macro-opacity precedent) rather
145
+ // than handled, to avoid widening the shared-engine single-string `catchType` for a
146
+ // ~0-frequency construct.
147
+ catchType: 'catch_clause',
148
+ nestOnlyTypes: new Set(['lambda_expression', 'block_literal']), // +0, deepen nesting
149
+ // `goto` = FLAT +1 (per the Cognitive Complexity whitepaper; diverges from C#'s surchargeTypes). A goto
150
+ // always targets a label, so hasLabel is unconditionally true.
151
+ labeledJumpTypes: new Set(['goto_statement']),
152
+ hasLabel: () => true,
153
+ booleanOperatorKind: cFamilyBooleanOperatorKind, // reads the binary_expression operator field
154
+ parenthesizedType: 'parenthesized_expression', // UNWRAP, source-order (the sonar-java default)
155
+ // Direct recursion (+1 per self-call SITE — the whitepaper recursion rule). eligibleKinds is {function} ONLY
156
+ // (the Go precedent): a name-only bare callee match has no arity guard (unlike C#'s
157
+ // isSelfCall), so admitting 'method' would risk a false positive when a method calls a
158
+ // same-named free function. ACCEPTED over-counts (name-only, no resolver — the Go
159
+ // name-shadowing precedent; rare, never a wrong-KIND edge): a free function that calls a
160
+ // same-named SIBLING OVERLOAD (`void f(int){} void f(double d){ f((int)d); }`, C++ only)
161
+ // OR a same-named local function-pointer that SHADOWS it (`void f(){ void(*f)()=g; f(); }`,
162
+ // valid in C too — so it is NOT strictly "exact for C"), where a full overload/type
163
+ // resolver would resolve away the self-match. Documented UNDER-counts (the bare-`identifier`-callee
164
+ // reader matches none of these): C++ method self-recursion (a method's bare self-call is
165
+ // excluded by eligibleKinds:{function}); ObjC `[self f]` (a `message_expression`, not a
166
+ // `call_expression`); and a QUALIFIED intra-namespace self-call (`N::f()` recursing via
167
+ // `N::f()` — a `qualified_identifier` callee, not an `identifier`, so it returns null).
168
+ recursion: {
169
+ callType: 'call_expression',
170
+ bareCalleeName: (n) => {
171
+ const f = n.childForFieldName('function');
172
+ return f?.type === 'identifier' ? f.text : null;
173
+ },
174
+ eligibleKinds: new Set(['function']),
175
+ },
176
+ };
177
+ // The assembled C-family ComplexityOptions — the SINGLE source of truth, used at BOTH
178
+ // call sites (extractCpp here AND extractObjc in objc.ts; `c` reaches extractCpp via the
179
+ // folded `case 'c'` dispatch). The C-family is the only extractor with two call sites, so
180
+ // exporting the assembled object (rather than the three building-block consts above) keeps
181
+ // cpp and objc provably in lock-step — a future tweak lands in one place.
182
+ export const CFAMILY_COMPLEXITY_OPTS = {
183
+ decisionNodeTypes: CFAMILY_DECISION_NODE_TYPES,
184
+ skipTypes: CFAMILY_COMPLEXITY_SKIP_TYPES,
185
+ cognitive: CFAMILY_COGNITIVE_OPTIONS,
186
+ };
187
+ // ── call resolution ──────────────────────────────────────────────────────────
188
+ // Bare callee is the engine-default `identifier`; `type_identifier` is added ONLY
189
+ // so the `new Foo()` callee (`new_expression`'s `type:` is a `type_identifier`)
190
+ // passes the bare-callee gate — `constructorSelectorTypes` then routes it through
191
+ // `typeNameToId` (constructorKinds={class}) by NODE type, so a `new Foo()` can
192
+ // never mis-bind to an enclosing method named Foo (the C# precedent). A call
193
+ // NEVER has a `type_identifier` callee (the grammar is syntactic — `Foo()` value
194
+ // construction parses with an `identifier` callee), so this only affects `new`.
195
+ const CPP_BARE_CALLEE_TYPES = new Set(['identifier', 'type_identifier']);
196
+ // A bare `foo()` is either a free-function call or — inside a class body — an
197
+ // implicit-`this` member call, so it binds to the enclosing class first
198
+ // (bareCallsBindToEnclosingClass) then the callable-name map over {function}.
199
+ // Methods are NOT bare-callable (they need a receiver) — they resolve only via
200
+ // methodsByClass. Classes are NOT bare-callable, so a bare `Foo()` value
201
+ // construction stays unresolved (a documented recall gap), never a wrong edge.
202
+ const CPP_BARE_CALLABLE_KINDS = new Set(['function']);
203
+ // `new Foo()` resolves to a 'class'-kind symbol via the constructor-form path.
204
+ const CPP_CONSTRUCTOR_KINDS = new Set(['class']);
205
+ // `new X()` is the distinct construction NODE (the C# object_creation precedent):
206
+ // route it through constructorKinds/typeNameToId by node type so it never flows
207
+ // through the enclosing-class/nameToId path.
208
+ const CPP_CONSTRUCTOR_SELECTORS = new Set(['new_expression']);
209
+ // C / C++ standard-library free functions that parse as bare `identifier` callees
210
+ // but never resolve to a local symbol — they would flood the name-keyed reference
211
+ // store. Suppressed ONLY when unresolved (a file-local shadow keeps its refs).
212
+ // START small + tune by dogfood (the measure-don't-guess method).
213
+ const CPP_IGNORED_BARE_CALLEES = new Set([
214
+ // <cstdio> / <cstdlib> / <cstring>
215
+ 'printf', 'fprintf', 'sprintf', 'snprintf', 'scanf', 'fscanf', 'sscanf',
216
+ 'puts', 'fputs', 'fopen', 'fclose', 'fread', 'fwrite', 'fflush', 'perror',
217
+ 'malloc', 'calloc', 'realloc', 'free', 'abort', 'exit', 'atexit', 'getenv',
218
+ 'memcpy', 'memset', 'memmove', 'memcmp', 'strlen', 'strcmp', 'strncmp',
219
+ 'strcpy', 'strncpy', 'strcat', 'strncat', 'strchr', 'strstr', 'atoi', 'atof',
220
+ // assertions / common <algorithm>/<utility> globals reached via `using`
221
+ 'assert', 'static_assert', 'move', 'forward', 'swap', 'min', 'max',
222
+ 'make_unique', 'make_shared', 'make_pair', 'make_tuple', 'to_string',
223
+ ]);
224
+ // STL container / iterator / smart-pointer / string method names whose chained
225
+ // captures are pure noise (`.push_back()`, `.begin()`, `.size()`, …). Suppressed
226
+ // only when UNRESOLVED, so a same-file `this->size()` that bound to a real
227
+ // sibling keeps its ref. Keep these to >=4 chars (SHORT_NAME_THRESHOLD gates the
228
+ // rest downstream). START small + tune by dogfood.
229
+ const CPP_IGNORED_MEMBER_CALLEES = new Set([
230
+ 'push_back', 'emplace_back', 'push_front', 'emplace_front', 'pop_back',
231
+ 'pop_front', 'insert', 'emplace', 'erase', 'clear', 'empty', 'size',
232
+ 'begin', 'end', 'cbegin', 'cend', 'rbegin', 'rend', 'find', 'count',
233
+ 'reserve', 'resize', 'data', 'front', 'back', 'c_str', 'length', 'substr',
234
+ 'append', 'compare', 'str', 'first', 'second', 'reset', 'release', 'lock',
235
+ 'value', 'value_or', 'has_value', 'get', 'count_if', 'contains',
236
+ ]);
237
+ const CPP_SELECTORS = [
238
+ // Ordinary calls: bare `foo()`, member `obj.m()`/`ptr->m()`/`this->m()`, and
239
+ // scope-resolution `Foo::bar()`/`ns::f()` — discriminated by the callee node.
240
+ { nodeType: 'call_expression', getCallee: (n) => n.childForFieldName('function') },
241
+ // Construction `new Foo()` (a distinct node). Only a simple `type_identifier`
242
+ // target resolves; `new ns::Widget()` (qualified_identifier) and
243
+ // `new Box<int>()` (template_type) are DROPPED — a documented recall gap, never
244
+ // a wrong cross-namespace edge.
245
+ {
246
+ nodeType: 'new_expression',
247
+ getCallee: (n) => {
248
+ const t = n.childForFieldName('type');
249
+ return t && t.type === 'type_identifier' ? t : null;
250
+ },
251
+ },
252
+ ];
253
+ // Reduces a member-expression / scope-resolution callee to {receiver, property}.
254
+ // `this->m()` → self-call (resolve against the enclosing class)
255
+ // `obj.m()` / `ptr->m()` → receiver = `obj`/`ptr` (an identifier; unresolved
256
+ // name-keyed member ref unless a class shares the name)
257
+ // `Foo::bar()` / `ns::f()`→ receiver = the innermost scope segment (the
258
+ // Rust/PHP/Ruby `::` single-level member-ref pattern)
259
+ // chained `a.b().c()` / `f()->g()` / computed → RECEIVER_OPAQUE (findable, never
260
+ // resolved)
261
+ function cppMemberCallInfo(callee) {
262
+ if (callee.type === 'field_expression') {
263
+ const field = callee.childForFieldName('field');
264
+ if (!field || field.type !== 'field_identifier')
265
+ return null; // `p->~Foo()` etc.
266
+ const property = field.text;
267
+ const arg = callee.childForFieldName('argument');
268
+ if (!arg)
269
+ return null;
270
+ if (arg.type === 'this')
271
+ return { receiver: 'this', property, isSelf: true };
272
+ if (arg.type === 'identifier')
273
+ return { receiver: arg.text, property, isSelf: false };
274
+ // chained / parenthesized / subscript / call receiver → opaque
275
+ return { receiver: RECEIVER_OPAQUE, property, isSelf: false };
276
+ }
277
+ if (callee.type === 'qualified_identifier') {
278
+ const q = qualifiedName(callee);
279
+ if (!q)
280
+ return null;
281
+ // The innermost scope segment is the receiver class; the final name is the
282
+ // method. `Foo::bar()` → {Foo, bar}; `ns::Foo::bar()` → {Foo, bar}.
283
+ return { receiver: q.classScope ?? RECEIVER_OPAQUE, property: q.name, isSelf: false };
284
+ }
285
+ return null;
286
+ }
287
+ export function extractCpp(tree, content, fileInfo) {
288
+ const ctx = {
289
+ content,
290
+ fileInfo,
291
+ occurrences: new Map(),
292
+ symbols: [],
293
+ imports: [],
294
+ bodies: [],
295
+ };
296
+ extractScope(ctx, tree.rootNode.namedChildren, {
297
+ className: null,
298
+ qualifier: '',
299
+ exported: true,
300
+ inClass: false,
301
+ defaultVisibility: 'public',
302
+ });
303
+ // Same-name classes/structs share the simple-name FQN; resolving through them
304
+ // first-wins would bind to the WRONG type, so exclude them from extract-time
305
+ // resolution (the Go/Java/C# pattern). Function overloads are NOT excluded —
306
+ // they are one logical family and first-wins binding to an overload is a
307
+ // same-kind, accepted imprecision (the Java method-overload precedent), unlike
308
+ // PHP/Ruby where same-name free functions are genuinely cross-namespace.
309
+ const ambiguousClassNames = collectAmbiguousTypeNames(ctx.symbols, new Set(['class']));
310
+ const references = resolveCalls(ctx.bodies, tree.rootNode, ctx.symbols, fileInfo, CPP_SELECTORS, CPP_SKIP_TYPES, CPP_FUNCTION_BODY_SKIP_TYPES, cppMemberCallInfo, {
311
+ bareCalleeTypes: CPP_BARE_CALLEE_TYPES,
312
+ plainCalleeType: 'identifier',
313
+ bareCallableKinds: CPP_BARE_CALLABLE_KINDS,
314
+ bareCallsBindToEnclosingClass: true, // implicit this
315
+ constructorKinds: CPP_CONSTRUCTOR_KINDS,
316
+ constructorSelectorTypes: CPP_CONSTRUCTOR_SELECTORS,
317
+ ambiguousClassNames,
318
+ ignoredBareCallees: CPP_IGNORED_BARE_CALLEES,
319
+ ignoredMemberCallees: CPP_IGNORED_MEMBER_CALLEES,
320
+ });
321
+ // Cyclomatic + cognitive complexity (McCabe + whitepaper-pinned), computed while the tree is
322
+ // alive — the shared csharp.ts/swift.ts call-site pattern. `c` reaches this via the
323
+ // folded `case 'c'` dispatch (extractor.ts), so it gets complexity for free.
324
+ computeComplexity(ctx.bodies, ctx.symbols, CFAMILY_COMPLEXITY_OPTS);
325
+ return { symbols: ctx.symbols, references, imports: ctx.imports };
326
+ }
327
+ // Walks a translation-unit / namespace / class body in document order, tracking
328
+ // the current access visibility (C++ `public:`/`private:` are stateful positional
329
+ // labels — the Ruby visibility pattern). `preproc_*` conditionals are recursed
330
+ // through transparently (an `#ifndef` include guard wraps the whole header, and
331
+ // `#if`-guarded members must still be extracted; both branches are extracted, the
332
+ // OccurrenceCounter keeps ids unique).
333
+ function extractScope(ctx, children, enclosing) {
334
+ const state = { visibility: enclosing.defaultVisibility };
335
+ for (const child of children)
336
+ handleMember(ctx, child, enclosing, state);
337
+ }
338
+ const RECORD_SPECIFIERS = new Set([
339
+ 'class_specifier',
340
+ 'struct_specifier',
341
+ 'union_specifier',
342
+ ]);
343
+ function handleMember(ctx, child, enclosing, state, wrap = null) {
344
+ const t = child.type;
345
+ if (PREPROC_GROUPS.has(t)) {
346
+ // Transparent: a guarded region's members are flattened into this loop so its
347
+ // members see the enclosing visibility. But an `access_specifier` INSIDE the
348
+ // guard must not leak past `#endif` — we can't evaluate the preprocessor, so a
349
+ // guarded `private:` shouldn't silently de-export every following member.
350
+ // Snapshot the ENCLOSING visibility; restore it after the whole group, AND
351
+ // reset to it before each `#else`/`#elif` continuation (nested as a child under
352
+ // the head's `alternative:` field) — each preprocessor branch compiles
353
+ // independently, so a then-branch label must not bleed into the else/elif.
354
+ const saved = state.visibility;
355
+ for (const c of child.namedChildren) {
356
+ if (PREPROC_CONTINUATIONS.has(c.type))
357
+ state.visibility = saved;
358
+ handleMember(ctx, c, enclosing, state, wrap); // thread wrap (a templated guarded decl)
359
+ }
360
+ state.visibility = saved;
361
+ return;
362
+ }
363
+ if (t === 'linkage_specification') {
364
+ // `extern "C" { … }` / `extern "C" <decl>` — a transparent grouping node (no
365
+ // new scope): recurse its body declarations with the same enclosing/state.
366
+ const body = child.childForFieldName('body');
367
+ if (body?.type === 'declaration_list') {
368
+ for (const c of body.namedChildren)
369
+ handleMember(ctx, c, enclosing, state, wrap);
370
+ }
371
+ else if (body) {
372
+ handleMember(ctx, body, enclosing, state, wrap);
373
+ }
374
+ return;
375
+ }
376
+ switch (t) {
377
+ case 'access_specifier':
378
+ if (enclosing.inClass) {
379
+ const v = accessText(child);
380
+ if (v)
381
+ state.visibility = v;
382
+ }
383
+ return;
384
+ case 'preproc_include':
385
+ extractInclude(ctx, child);
386
+ return;
387
+ case 'namespace_definition':
388
+ extractNamespace(ctx, child, enclosing);
389
+ return;
390
+ case 'class_specifier':
391
+ case 'struct_specifier':
392
+ case 'union_specifier':
393
+ extractRecord(ctx, child, enclosing, state.visibility, wrap);
394
+ return;
395
+ case 'enum_specifier':
396
+ extractEnum(ctx, child, enclosing, state.visibility, wrap);
397
+ return;
398
+ case 'template_declaration':
399
+ handleTemplate(ctx, child, enclosing, state);
400
+ return;
401
+ case 'type_definition':
402
+ extractTypedef(ctx, child, enclosing, state.visibility, wrap);
403
+ return;
404
+ case 'alias_declaration':
405
+ extractAlias(ctx, child, enclosing, state.visibility, wrap);
406
+ return;
407
+ case 'function_definition':
408
+ extractFunctionDef(ctx, child, enclosing, state.visibility, wrap);
409
+ return;
410
+ case 'declaration':
411
+ case 'field_declaration':
412
+ extractDeclaration(ctx, child, enclosing, state.visibility, wrap);
413
+ return;
414
+ default:
415
+ // using_declaration / friend_declaration / static_assert / expression
416
+ // statements / etc. — not symbols in v1.
417
+ return;
418
+ }
419
+ }
420
+ // `template<…> <decl>`: unwrap to the inner decl, carrying the `template<…>`
421
+ // preamble into the signature and the template_declaration as the doc anchor.
422
+ function handleTemplate(ctx, decl, enclosing, state) {
423
+ const params = decl.childForFieldName('parameters');
424
+ const prefix = params ? `template${normalizeSignature(params.text)} ` : 'template ';
425
+ const wrap = { prefix, docNode: decl };
426
+ for (const c of decl.namedChildren) {
427
+ if (c.type === 'template_parameter_list' || c.type === 'requires_clause')
428
+ continue;
429
+ handleMember(ctx, c, enclosing, state, wrap);
430
+ }
431
+ }
432
+ function extractNamespace(ctx, decl, enclosing) {
433
+ const nameNode = decl.childForFieldName('name');
434
+ const body = decl.childForFieldName('body');
435
+ if (!body)
436
+ return; // namespace alias / extension without a body block
437
+ const seg = nameNode ? nameNode.text : ''; // anonymous namespace → no segment
438
+ extractScope(ctx, body.namedChildren, {
439
+ className: null,
440
+ qualifier: joinQualifier(enclosing.qualifier, seg),
441
+ exported: enclosing.exported,
442
+ inClass: false,
443
+ defaultVisibility: 'public',
444
+ });
445
+ }
446
+ function extractRecord(ctx, decl, enclosing, visibility, wrap) {
447
+ const name = recordName(decl);
448
+ if (!name)
449
+ return; // anonymous struct/union (a field's inline type) — not a symbol
450
+ const exported = memberExported(enclosing, visibility);
451
+ ctx.symbols.push(makeCppSymbol(ctx, wrap?.docNode ?? decl, cppSignature(ctx, decl, wrap?.prefix), 'class', name, topFqn(ctx, name), exported, cppDoc(wrap?.docNode ?? decl), enclosing.qualifier));
452
+ const body = decl.childForFieldName('body');
453
+ if (!body)
454
+ return; // forward declaration `class Foo;`
455
+ extractScope(ctx, body.namedChildren, {
456
+ className: name,
457
+ qualifier: joinQualifier(enclosing.qualifier, name),
458
+ exported,
459
+ inClass: true,
460
+ defaultVisibility: decl.type === 'class_specifier' ? 'private' : 'public',
461
+ });
462
+ }
463
+ function extractEnum(ctx, decl, enclosing, visibility, wrap) {
464
+ const name = recordName(decl);
465
+ if (!name)
466
+ return; // anonymous enum — enumerators leak to the enclosing scope (v1 gap)
467
+ ctx.symbols.push(makeCppSymbol(ctx, wrap?.docNode ?? decl, cppSignature(ctx, decl, wrap?.prefix), 'enum', name, topFqn(ctx, name), memberExported(enclosing, visibility), cppDoc(wrap?.docNode ?? decl), enclosing.qualifier));
468
+ // Enumerators are NOT extracted (the universal enum rule).
469
+ }
470
+ // When a `declaration`/`field_declaration`/`typedef`'s `type:` slot is an inline
471
+ // record/enum definition (`struct Named {…} g;`, `typedef struct Pt {…} Point;`, a
472
+ // nested type), that type is ALSO defined here — extract it alongside the declarators.
473
+ function extractInlineTypeInSlot(ctx, decl, enclosing, visibility) {
474
+ const typeNode = decl.childForFieldName('type');
475
+ if (!typeNode)
476
+ return;
477
+ if (RECORD_SPECIFIERS.has(typeNode.type))
478
+ extractRecord(ctx, typeNode, enclosing, visibility, null);
479
+ else if (typeNode.type === 'enum_specifier')
480
+ extractEnum(ctx, typeNode, enclosing, visibility, null);
481
+ }
482
+ function extractTypedef(ctx, decl, enclosing, visibility, wrap) {
483
+ // A `typedef <type> Name;` whose <type> is a named record also defines that record.
484
+ extractInlineTypeInSlot(ctx, decl, enclosing, visibility);
485
+ const sig = cppSignature(ctx, decl, wrap?.prefix);
486
+ const doc = cppDoc(wrap?.docNode ?? decl);
487
+ const anchor = wrap?.docNode ?? decl;
488
+ for (const d of decl.childrenForFieldName('declarator')) {
489
+ const info = analyze(d);
490
+ if (!info || info.qualified)
491
+ continue;
492
+ ctx.symbols.push(makeCppSymbol(ctx, anchor, sig, 'type', info.name, topFqn(ctx, info.name), memberExported(enclosing, visibility), doc, enclosing.qualifier));
493
+ }
494
+ }
495
+ function extractAlias(ctx, decl, enclosing, visibility, wrap) {
496
+ const name = decl.childForFieldName('name')?.text;
497
+ if (!name)
498
+ return;
499
+ ctx.symbols.push(makeCppSymbol(ctx, wrap?.docNode ?? decl, cppSignature(ctx, decl, wrap?.prefix), 'type', name, topFqn(ctx, name), memberExported(enclosing, visibility), cppDoc(wrap?.docNode ?? decl), enclosing.qualifier));
500
+ }
501
+ // A `function_definition` (has a body, or is `= default`/`= delete`).
502
+ function extractFunctionDef(ctx, decl, enclosing, visibility, wrap) {
503
+ const declarator = decl.childForFieldName('declarator');
504
+ if (!declarator)
505
+ return;
506
+ const info = analyze(declarator);
507
+ if (!info || !info.isFunction)
508
+ return;
509
+ const tgt = resolveFunctionTarget(ctx, info, enclosing);
510
+ const exported = info.qualified ? true : declExported(decl, enclosing, visibility);
511
+ const sym = makeCppSymbol(ctx, wrap?.docNode ?? decl, cppSignature(ctx, decl, wrap?.prefix), tgt.kind, info.name, tgt.fqn, exported, cppDoc(wrap?.docNode ?? decl), tgt.qualifier);
512
+ ctx.symbols.push(sym);
513
+ // The whole function_definition is the PendingBody so calls in parameter
514
+ // defaults AND the ctor member-init list (`: v_(compute(x))`) attribute here
515
+ // alongside the body. Only defined functions (a compound_statement body) get a
516
+ // body; `= default`/`= delete` get the symbol but no body.
517
+ if (decl.childForFieldName('body')) {
518
+ ctx.bodies.push({ symbolId: sym.id, body: decl, className: tgt.className ?? undefined });
519
+ }
520
+ }
521
+ // A `declaration` / `field_declaration`: function declaration(s), variable/field
522
+ // declaration(s), and/or an inline record/enum definition in the `type:` slot.
523
+ function extractDeclaration(ctx, decl, enclosing, visibility, wrap) {
524
+ extractInlineTypeInSlot(ctx, decl, enclosing, visibility); // `struct Named {…} g;` / nested type
525
+ const declarators = decl.childrenForFieldName('declarator');
526
+ // sig + doc are declaration-level (the function branch + each variable share
527
+ // them) — compute once, not per declarator.
528
+ const sig = cppSignature(ctx, decl, wrap?.prefix);
529
+ const doc = cppDoc(wrap?.docNode ?? decl);
530
+ const anchor = wrap?.docNode ?? decl;
531
+ for (const d of declarators) {
532
+ const info = analyze(d);
533
+ if (!info)
534
+ continue;
535
+ if (info.isFunction) {
536
+ const tgt = resolveFunctionTarget(ctx, info, enclosing);
537
+ const exported = info.qualified ? true : declExported(decl, enclosing, visibility);
538
+ ctx.symbols.push(makeCppSymbol(ctx, anchor, sig, tgt.kind, info.name, tgt.fqn, exported, doc, tgt.qualifier));
539
+ // Bodiless declaration → no PendingBody.
540
+ }
541
+ else {
542
+ extractVariable(ctx, decl, d, info, enclosing, visibility, declarators.length === 1, doc);
543
+ }
544
+ }
545
+ }
546
+ function extractVariable(ctx, decl, declarator, info, enclosing, visibility, soleDeclarator, doc) {
547
+ if (info.qualified)
548
+ return; // out-of-line static-member definition `int C::n = …;` — skip (the in-class decl already exists)
549
+ const className = enclosing.inClass ? enclosing.className : null;
550
+ const fqn = className ? memberFqn(ctx, className, info.name) : topFqn(ctx, info.name);
551
+ const sym = makeCppSymbol(ctx, decl, variableSig(decl, declarator), 'variable', info.name, fqn, declExported(decl, enclosing, visibility), doc, enclosing.qualifier);
552
+ ctx.symbols.push(sym);
553
+ // Attribute initializer calls (`Logger log = makeLogger();`) to the variable —
554
+ // only for a sole declarator, where the initializer pairs unambiguously.
555
+ if (soleDeclarator) {
556
+ const init = initializerNode(declarator) ?? decl.childForFieldName('default_value');
557
+ if (init)
558
+ ctx.bodies.push({ symbolId: sym.id, body: init, className: className ?? undefined });
559
+ }
560
+ }
561
+ const WRAPPER_DECLARATORS = new Set([
562
+ 'pointer_declarator',
563
+ 'reference_declarator',
564
+ 'parenthesized_declarator',
565
+ 'array_declarator',
566
+ 'init_declarator',
567
+ // Objective-C / Clang blocks: `void (^handler)(int)` nests the name under a
568
+ // `block_pointer_declarator` (it carries a `declarator:` field, so innerDeclaratorChild
569
+ // reaches the inner identifier). Never produced by tree-sitter-c/-cpp → inert there.
570
+ 'block_pointer_declarator',
571
+ ]);
572
+ // Innermost name-bearing nodes (everything in DECL_OR_NAME that is NOT a
573
+ // wrapper/function declarator). Used to tell a REAL function (its name sits
574
+ // DIRECTLY under the function_declarator) from a function POINTER (a
575
+ // parenthesized_declarator is interposed — `int (*fp)(int)` → a variable).
576
+ const NAME_NODES = new Set([
577
+ 'identifier',
578
+ 'field_identifier',
579
+ 'qualified_identifier',
580
+ 'operator_name',
581
+ 'destructor_name',
582
+ 'operator_cast',
583
+ 'type_identifier',
584
+ 'template_function',
585
+ 'template_method',
586
+ ]);
587
+ const DECL_OR_NAME = new Set([
588
+ ...WRAPPER_DECLARATORS,
589
+ 'function_declarator',
590
+ 'abstract_function_declarator',
591
+ ...NAME_NODES,
592
+ ]);
593
+ // Descends pointer/reference/parenthesized/array/init wrappers and the
594
+ // function_declarator to the innermost name node, recording whether a
595
+ // function_declarator was crossed (→ it's a function/method).
596
+ function analyze(declarator) {
597
+ let node = declarator;
598
+ let sawFunc = false;
599
+ for (let i = 0; node && i < 24; i++) {
600
+ const t = node.type;
601
+ if (t === 'function_declarator' || t === 'abstract_function_declarator') {
602
+ const inner = innerDeclaratorChild(node);
603
+ // A REAL function/method has its name DIRECTLY under the function_declarator
604
+ // (`int foo()`, `int* C::bar()`). A function POINTER interposes a
605
+ // parenthesized_declarator (`int (*cb)(int)`) → it is a VARIABLE/field, not a
606
+ // function: descend WITHOUT marking it a function so it routes to a variable.
607
+ if (inner && NAME_NODES.has(inner.type))
608
+ sawFunc = true;
609
+ node = inner;
610
+ continue;
611
+ }
612
+ if (WRAPPER_DECLARATORS.has(t)) {
613
+ node = innerDeclaratorChild(node);
614
+ continue;
615
+ }
616
+ break;
617
+ }
618
+ if (!node)
619
+ return null;
620
+ if (node.type === 'qualified_identifier') {
621
+ const q = qualifiedName(node);
622
+ if (!q || !q.name)
623
+ return null;
624
+ return { name: q.name, isFunction: sawFunc || q.isConversion, qualified: true, classScope: q.classScope, nsScopes: q.nsScopes };
625
+ }
626
+ const s = simpleName(node);
627
+ // Empty name = a synthetic/degenerate declarator (e.g. the empty `identifier`
628
+ // tree-sitter inserts for an unbraced `extern "C" struct S {…};` record) — no
629
+ // symbol; the record + fields are emitted separately by extractInlineTypeInSlot.
630
+ if (!s || !s.name)
631
+ return null;
632
+ return { name: s.name, isFunction: sawFunc || s.isConversion, qualified: false, classScope: null, nsScopes: [] };
633
+ }
634
+ function innerDeclaratorChild(node) {
635
+ const field = node.childForFieldName('declarator');
636
+ if (field)
637
+ return field;
638
+ for (const c of node.namedChildren) {
639
+ if (DECL_OR_NAME.has(c.type))
640
+ return c; // positional (reference_declarator holds it un-fielded)
641
+ }
642
+ return null;
643
+ }
644
+ function simpleName(node) {
645
+ switch (node.type) {
646
+ case 'identifier':
647
+ case 'field_identifier':
648
+ case 'type_identifier':
649
+ case 'operator_name':
650
+ case 'destructor_name':
651
+ return { name: node.text.trim(), isConversion: false };
652
+ case 'operator_cast': {
653
+ // `operator bool() const` — name it `operator <type>` (drop the params/quals).
654
+ const ty = node.childForFieldName('type');
655
+ return { name: normalizeSignature(`operator ${ty?.text ?? ''}`), isConversion: true };
656
+ }
657
+ case 'template_function':
658
+ case 'template_method': {
659
+ const n = node.childForFieldName('name');
660
+ return n ? simpleName(n) : null;
661
+ }
662
+ default:
663
+ return null;
664
+ }
665
+ }
666
+ // Walks the right-nested `qualified_identifier` scope chain. The final `name`
667
+ // segment is the symbol; the immediately-enclosing scope is the "class"; earlier
668
+ // scopes are namespaces. Scope segments may be namespace_identifier,
669
+ // type_identifier, or template_type (`Box<T>::get`).
670
+ function qualifiedName(qi) {
671
+ const scopes = [];
672
+ let cur = qi;
673
+ let finalName = null;
674
+ for (let i = 0; cur && i < 24; i++) {
675
+ if (cur.type !== 'qualified_identifier') {
676
+ finalName = cur;
677
+ break;
678
+ }
679
+ const scope = cur.childForFieldName('scope');
680
+ if (scope) {
681
+ const s = scopeSimpleName(scope);
682
+ if (s)
683
+ scopes.push(s);
684
+ }
685
+ const nm = cur.childForFieldName('name');
686
+ if (!nm)
687
+ break;
688
+ if (nm.type === 'qualified_identifier') {
689
+ cur = nm;
690
+ continue;
691
+ }
692
+ finalName = nm;
693
+ break;
694
+ }
695
+ if (!finalName)
696
+ return null;
697
+ const base = simpleName(finalName);
698
+ if (!base)
699
+ return null;
700
+ const classScope = scopes.length ? scopes[scopes.length - 1] : null;
701
+ return { name: base.name, classScope, nsScopes: scopes.slice(0, -1), isConversion: base.isConversion };
702
+ }
703
+ function scopeSimpleName(node) {
704
+ switch (node.type) {
705
+ case 'namespace_identifier':
706
+ case 'type_identifier':
707
+ case 'identifier':
708
+ return node.text;
709
+ case 'template_type':
710
+ return node.childForFieldName('name')?.text ?? null;
711
+ default:
712
+ return null;
713
+ }
714
+ }
715
+ // Kind / FQN / qualifier for a function-shaped declarator. A qualified
716
+ // `Class::name` is an out-of-line method keyed on the class scope (the
717
+ // Go-receiver pattern, but cross-file); a `ns::freeFn` (namespace-qualified free
718
+ // function) is also keyed as a method on its last scope segment — a known,
719
+ // wrong-edge-free imperfection (the qualified-call `ns::freeFn()` still resolves
720
+ // correctly). In-class declarators are methods; everything else is a free
721
+ // function. Out-of-line defs/decls are exported=true (the in-class declaration in
722
+ // the header carries the real access; marking the def exported only widens
723
+ // cross-file recall, never adds a wrong edge).
724
+ function resolveFunctionTarget(ctx, info, enclosing) {
725
+ if (info.qualified) {
726
+ const scopeQual = [...info.nsScopes, info.classScope].filter((x) => !!x).join('::');
727
+ const qualifier = joinQualifier(enclosing.qualifier, scopeQual);
728
+ if (!info.classScope)
729
+ return { kind: 'function', className: null, fqn: topFqn(ctx, info.name), qualifier };
730
+ return { kind: 'method', className: info.classScope, fqn: memberFqn(ctx, info.classScope, info.name), qualifier };
731
+ }
732
+ if (enclosing.inClass) {
733
+ return {
734
+ kind: 'method',
735
+ className: enclosing.className,
736
+ fqn: memberFqn(ctx, enclosing.className, info.name),
737
+ qualifier: enclosing.qualifier,
738
+ };
739
+ }
740
+ return { kind: 'function', className: null, fqn: topFqn(ctx, info.name), qualifier: enclosing.qualifier };
741
+ }
742
+ // ── helpers ────────────────────────────────────────────────────────────────
743
+ function accessText(node) {
744
+ // `access_specifier` text is `public` / `private` / `protected` (the trailing
745
+ // `:` is a separate token).
746
+ const t = node.text.replace(':', '').trim();
747
+ return t === 'public' || t === 'private' || t === 'protected' ? t : null;
748
+ }
749
+ // Exported = public access. Free functions / top-level / namespace decls are
750
+ // always exported; class members need public visibility AND an exported container.
751
+ // (Private/protected members are NOT exported — the handoff rule; class default is
752
+ // private, struct/union default public.)
753
+ function memberExported(enclosing, visibility) {
754
+ if (!enclosing.inClass)
755
+ return true;
756
+ return enclosing.exported && visibility === 'public';
757
+ }
758
+ // A direct `static` storage-class specifier (`static int f(){}` /
759
+ // `static int g;`). `static inline` carries two specifiers — only the `static`
760
+ // one matches; `extern`/`register`/`inline`/`thread_local` never do.
761
+ function hasStaticStorage(decl) {
762
+ return decl.namedChildren.some((c) => c.type === 'storage_class_specifier' && c.text === 'static');
763
+ }
764
+ // Exportedness for a function/variable DECLARATION, accounting for file-scope
765
+ // `static` internal linkage. At file scope (not in a class), a `static` free
766
+ // function or global is internal linkage — C's primary privacy mechanism, and
767
+ // also internal for a C++ file-scope free function — so NOT exported. A class
768
+ // member `static` (inClass) means "no implicit this", NOT internal linkage, so
769
+ // it routes through the visibility-based `memberExported` untouched. Records,
770
+ // enums, and typedefs cannot legally take `static`, so they keep
771
+ // `memberExported` directly. `exported` is never hashed into the symbol id, and
772
+ // the extract-time engine (`resolveCalls`) never reads it. The only query-time
773
+ // edge consumer is `isCallerOf`'s cross-file member-ref gate (code-index.ts),
774
+ // which drops an unexported target. A flipped file-scope `static` CAN newly hit
775
+ // that gate — a cross-file member ref `obj.foo()` paired by name with a
776
+ // now-unexported top-level static `foo()` would be dropped — but such a
777
+ // member-call→free-function match is itself a wrong-kind false positive (you
778
+ // cannot call a free function via `obj.`), so the drop is a precision win, never
779
+ // a lost or wrong edge. Otherwise `exported` feeds only the overview
780
+ // exports/internal split, search ranking boost, entry-point detection, and
781
+ // get_context.
782
+ function declExported(decl, enclosing, visibility) {
783
+ if (enclosing.inClass)
784
+ return memberExported(enclosing, visibility);
785
+ return !hasStaticStorage(decl);
786
+ }
787
+ function recordName(decl) {
788
+ const n = decl.childForFieldName('name');
789
+ if (!n)
790
+ return null;
791
+ if (n.type === 'template_type')
792
+ return n.childForFieldName('name')?.text ?? null; // `Box<int>` specialization → Box
793
+ return n.text; // type_identifier (covers scoped/unscoped enums and records)
794
+ }
795
+ // `#include <foo>` / `#include "foo.h"` → import (sourceModule = the header path).
796
+ function extractInclude(ctx, node) {
797
+ const pathNode = node.childForFieldName('path');
798
+ if (!pathNode)
799
+ return;
800
+ let header = null;
801
+ if (pathNode.type === 'system_lib_string') {
802
+ header = pathNode.text.replace(/^<|>$/g, ''); // strip the angle brackets
803
+ }
804
+ else if (pathNode.type === 'string_literal') {
805
+ header = pathNode.namedChildren.find((c) => c.type === 'string_content')?.text ?? null;
806
+ }
807
+ if (!header)
808
+ return;
809
+ ctx.imports.push({
810
+ file: ctx.fileInfo.path,
811
+ sourceModule: header,
812
+ importedNames: [{ name: '*' }], // a header has no named binding — namespace-style
813
+ line: node.startPosition.row + 1,
814
+ });
815
+ }
816
+ // Doc = a `//` or `/** */`/`/* */` comment block immediately above the decl (the
817
+ // Go/Dart `commentDocLine` + `isTrailingComment` pattern). For a templated decl
818
+ // the anchor is the template_declaration (the comment is its sibling).
819
+ function cppDoc(anchor) {
820
+ const prev = anchor.previousNamedSibling;
821
+ if (prev &&
822
+ prev.type === 'comment' &&
823
+ prev.endPosition.row === anchor.startPosition.row - 1 &&
824
+ !isTrailingComment(prev)) {
825
+ return commentDocLine(prev.text);
826
+ }
827
+ return null;
828
+ }
829
+ // Signature = source from the decl start (after an optional `template<…>` prefix)
830
+ // to the body / member-init list, with a trailing `;`/`{` stripped. Keeps the
831
+ // `template<…>` preamble and trailing `const`/`noexcept`/`override`/`= default`/
832
+ // `= 0` qualifiers for display.
833
+ function cppSignature(ctx, decl, prefix = '') {
834
+ let cut = decl.endIndex;
835
+ const body = decl.childForFieldName('body');
836
+ if (body)
837
+ cut = Math.min(cut, body.startIndex);
838
+ const isFnDef = decl.type === 'function_definition';
839
+ for (const c of decl.namedChildren) {
840
+ if (c.type === 'field_initializer_list')
841
+ cut = Math.min(cut, c.startIndex);
842
+ // K&R old-style C: the parameter declarations (`int a;`) sit as `declaration`
843
+ // children between the function_declarator and the body — clamp before them so
844
+ // `int sum(a, b)` is the signature, not `int sum(a, b) int a; int b`.
845
+ if (isFnDef && c.type === 'declaration')
846
+ cut = Math.min(cut, c.startIndex);
847
+ }
848
+ let sig = normalizeSignature(prefix + ctx.content.slice(decl.startIndex, cut));
849
+ sig = sig.replace(/[;{]\s*$/, '').trimEnd();
850
+ return sig;
851
+ }
852
+ // Variable signature = `<storage/quals> <type> <declarator-core>`, dropping the
853
+ // `= initializer`. Built from the type + declarator so a multi-declarator
854
+ // `int a = 1, b` yields a clean "int a" / "int b" per symbol. When the type slot
855
+ // is an inline record/enum (`struct Named {…} g;`), use the type's NAME, not its
856
+ // whole `{…}` body (which would bloat the signature + the hashed id).
857
+ function variableSig(decl, declarator) {
858
+ const parts = [];
859
+ for (const c of decl.namedChildren) {
860
+ if (c.type === 'storage_class_specifier' || c.type === 'type_qualifier')
861
+ parts.push(c.text);
862
+ }
863
+ const typeNode = decl.childForFieldName('type');
864
+ if (typeNode) {
865
+ const isRecordOrEnum = RECORD_SPECIFIERS.has(typeNode.type) || typeNode.type === 'enum_specifier';
866
+ const nameNode = isRecordOrEnum ? typeNode.childForFieldName('name') : null;
867
+ parts.push(isRecordOrEnum ? (nameNode ? nameNode.text : '(anonymous)') : typeNode.text);
868
+ }
869
+ parts.push(declaratorCore(declarator));
870
+ return normalizeSignature(parts.join(' '));
871
+ }
872
+ // The declarator text without a trailing `= initializer` (so the signature/hash
873
+ // stay stable across initializer edits).
874
+ function declaratorCore(d) {
875
+ if (d.type === 'init_declarator') {
876
+ const inner = d.childForFieldName('declarator');
877
+ return inner ? inner.text : d.text;
878
+ }
879
+ return d.text;
880
+ }
881
+ // The initializer expression of a variable declarator, if any (for call
882
+ // attribution). `init_declarator` carries it in `value:`; an in-class field
883
+ // carries it as the declaration's `default_value:` (handled by the caller).
884
+ function initializerNode(d) {
885
+ if (d.type === 'init_declarator')
886
+ return d.childForFieldName('value');
887
+ return null;
888
+ }
889
+ function topFqn(ctx, name) {
890
+ return `${ctx.fileInfo.path}:${name}`;
891
+ }
892
+ function memberFqn(ctx, className, name) {
893
+ return `${ctx.fileInfo.path}:${className}.${name}`;
894
+ }
895
+ // The qualifier only disambiguates hashed ids — any unique join works.
896
+ function joinQualifier(a, b) {
897
+ if (!a)
898
+ return b;
899
+ if (!b)
900
+ return a;
901
+ return `${a}::${b}`;
902
+ }
903
+ function makeCppSymbol(ctx, node, signature, kind, name, fqn, exported, doc, qualifier = '') {
904
+ const key = `${name}\0${kind}\0${signature}\0${qualifier}`;
905
+ const n = (ctx.occurrences.get(key) ?? 0) + 1;
906
+ ctx.occurrences.set(key, n);
907
+ const effectiveQualifier = n === 1 ? qualifier : `${qualifier}#${n}`;
908
+ return {
909
+ id: symbolId(ctx.fileInfo.path, name, kind, signature, effectiveQualifier),
910
+ name,
911
+ fqn,
912
+ kind,
913
+ file: ctx.fileInfo.path,
914
+ startLine: node.startPosition.row + 1,
915
+ endLine: node.endPosition.row + 1,
916
+ signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
917
+ doc,
918
+ exported,
919
+ language: ctx.fileInfo.language,
920
+ };
921
+ }
922
+ // ── shared with the Objective-C extractor ───────────────────────────────────
923
+ // Objective-C is a C SUPERSET: `objc.ts` reuses this module's C-subset machinery
924
+ // and implements only the OO surface (`@interface`/`@implementation`/`@protocol`/
925
+ // `@property`/methods/message sends) itself. For every NON-OO top-level node (C
926
+ // functions, structs, enums, typedefs, globals, `#import`) objc.ts delegates to the
927
+ // shared `handleMember` dispatcher at FILE SCOPE — so the static-linkage gate, inline
928
+ // `struct Named {…} g;` types, K&R signatures, and `#import` all behave identically to
929
+ // the C extractor with zero divergence. It also reuses `cppMemberCallInfo` (C-subset
930
+ // `p->fn()`/`Foo::bar()` calls), `analyze` (the `@property` name), the FQN + symbol
931
+ // constructors, the doc reader, and the skip sets (incl. the `preproc_*` group set, so
932
+ // `#ifndef` include guards are recursed transparently). These are pure module-scope
933
+ // helpers; re-exporting them is inert for the cpp/c extractors (re-dogfood confirms).
934
+ export { handleMember, analyze, cppDoc, cppMemberCallInfo, topFqn, memberFqn, makeCppSymbol, CPP_SKIP_TYPES, CPP_FUNCTION_BODY_SKIP_TYPES, PREPROC_GROUPS, };