codedeep-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +177 -0
- package/dist/config.js +223 -0
- package/dist/git/analyzer.js +177 -0
- package/dist/git/git-service.js +568 -0
- package/dist/git/head-watcher.js +113 -0
- package/dist/git/runner.js +204 -0
- package/dist/index.js +138 -0
- package/dist/indexer/code-index.js +1801 -0
- package/dist/indexer/complexity.js +633 -0
- package/dist/indexer/extractor.js +354 -0
- package/dist/indexer/languages/cpp.js +934 -0
- package/dist/indexer/languages/csharp.js +854 -0
- package/dist/indexer/languages/dart.js +777 -0
- package/dist/indexer/languages/go.js +665 -0
- package/dist/indexer/languages/java.js +507 -0
- package/dist/indexer/languages/kotlin.js +709 -0
- package/dist/indexer/languages/objc.js +397 -0
- package/dist/indexer/languages/php.js +771 -0
- package/dist/indexer/languages/python.js +455 -0
- package/dist/indexer/languages/ruby.js +697 -0
- package/dist/indexer/languages/rust.js +754 -0
- package/dist/indexer/languages/swift.js +691 -0
- package/dist/indexer/languages/typescript.js +485 -0
- package/dist/indexer/parser.js +175 -0
- package/dist/indexer/pipeline.js +342 -0
- package/dist/indexer/scanner.js +279 -0
- package/dist/indexer/watcher.js +353 -0
- package/dist/logger.js +16 -0
- package/dist/server.js +170 -0
- package/dist/tools/common.js +207 -0
- package/dist/tools/find-references.js +224 -0
- package/dist/tools/find-symbol.js +94 -0
- package/dist/tools/get-context.js +370 -0
- package/dist/tools/impact.js +218 -0
- package/dist/tools/overview.js +482 -0
- package/dist/tools/search-structure.js +303 -0
- package/dist/types.js +61 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-dart.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-kotlin.wasm +0 -0
- package/grammars/tree-sitter-objc.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-ruby.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-swift.wasm +0 -0
- package/grammars/tree-sitter-tsx.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +67 -0
|
@@ -0,0 +1,934 @@
|
|
|
1
|
+
import { collectAmbiguousTypeNames } from '../extractor.js';
|
|
2
|
+
import { cFamilyBooleanOperatorKind, computeComplexity } from '../complexity.js';
|
|
3
|
+
import { RECEIVER_OPAQUE } from '../../types.js';
|
|
4
|
+
import { SIGNATURE_DISPLAY_CAP, commentDocLine, isTrailingComment, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
|
|
5
|
+
// ── skip sets ────────────────────────────────────────────────────────────────
|
|
6
|
+
// walkDecorators uses this — C++ has no decorator selector, so it never runs;
|
|
7
|
+
// keep nested function_definition here for parity (local functions own a scope).
|
|
8
|
+
const CPP_FUNCTION_BODY_SKIP_TYPES = new Set(['function_definition']);
|
|
9
|
+
// Preprocessor conditional group nodes (`#if`/`#ifdef`/…). The member walk
|
|
10
|
+
// (extractScope/handleMember) recurses through these transparently to extract
|
|
11
|
+
// guarded members; the resolveCalls walk does NOT (they're in CPP_SKIP_TYPES,
|
|
12
|
+
// below). The continuations (`#else`/`#elif`) are nested CHILDREN of the head
|
|
13
|
+
// `#if` (under its `alternative:` field), each compiled independently.
|
|
14
|
+
// tree-sitter-cpp maps BOTH `#ifdef` and `#ifndef` to `preproc_ifdef`, and BOTH
|
|
15
|
+
// `#elifdef` and `#elifndef` to `preproc_elifdef` (verified) — so there is no
|
|
16
|
+
// `preproc_ifndef`/`preproc_elifndef` node type to list.
|
|
17
|
+
const PREPROC_GROUPS = new Set([
|
|
18
|
+
'preproc_if',
|
|
19
|
+
'preproc_ifdef',
|
|
20
|
+
'preproc_else',
|
|
21
|
+
'preproc_elif',
|
|
22
|
+
'preproc_elifdef',
|
|
23
|
+
]);
|
|
24
|
+
// `#else`/`#elif`-family continuation branches (a subset of PREPROC_GROUPS) — each
|
|
25
|
+
// compiles independently of the then-branch, so visibility resets to the enclosing
|
|
26
|
+
// baseline when entering one (see handleMember).
|
|
27
|
+
const PREPROC_CONTINUATIONS = new Set([
|
|
28
|
+
'preproc_else',
|
|
29
|
+
'preproc_elif',
|
|
30
|
+
'preproc_elifdef',
|
|
31
|
+
]);
|
|
32
|
+
// walkCalls skip set: each function/method/variable owns a per-member PendingBody,
|
|
33
|
+
// so the call walk must NOT re-descend into a function body or a type body (their
|
|
34
|
+
// calls are attributed via their own PendingBody) — pruning these also stops a
|
|
35
|
+
// nested local class/function's calls from mis-attributing to an enclosing body.
|
|
36
|
+
// The `preproc_*` conditionals are pruned HERE (but NOT from the member walk): a
|
|
37
|
+
// function-like-macro CONDITION (`#if FOO(3)`) parses as a `call_expression`, so
|
|
38
|
+
// descending into it would emit a SPURIOUS resolved call edge to a same-named real
|
|
39
|
+
// function. Guarded members keep their own PendingBodies (created by extractScope,
|
|
40
|
+
// which DOES descend preproc), so pruning here loses only top-level free-statement
|
|
41
|
+
// calls inside a guard (none exist in valid C++). `namespace_definition` stays
|
|
42
|
+
// ABSENT → DESCENDED (namespace-level free calls reach the module-root walk).
|
|
43
|
+
// `lambda_expression` is ABSENT → DESCENDED (a closure's calls roll into the
|
|
44
|
+
// enclosing function — the Go func_literal / Java lambda rule).
|
|
45
|
+
const CPP_SKIP_TYPES = new Set([
|
|
46
|
+
'function_definition',
|
|
47
|
+
'class_specifier',
|
|
48
|
+
'struct_specifier',
|
|
49
|
+
'union_specifier',
|
|
50
|
+
'enum_specifier',
|
|
51
|
+
'template_declaration',
|
|
52
|
+
...PREPROC_GROUPS,
|
|
53
|
+
]);
|
|
54
|
+
// ── complexity (cyclomatic + cognitive) ────────────────────────────────────────
|
|
55
|
+
// Cyclomatic = McCabe's decision-count; cognitive = the published SonarSource
|
|
56
|
+
// Cognitive Complexity whitepaper (a free spec, also implemented by gocognit,
|
|
57
|
+
// eslint-plugin-sonarjs, rust-code-analysis). The increment shapes are behaviorally
|
|
58
|
+
// compatible with SonarQube's C-family rules, verified against in-repo tools
|
|
59
|
+
// (rust-code-analysis for cyclomatic; the public whitepaper fixtures for cognitive).
|
|
60
|
+
// ONE shared options set drives cpp + c + objc (the AST dump confirmed
|
|
61
|
+
// tree-sitter-objc reuses the C control-flow node names AND names its catch node
|
|
62
|
+
// `catch_clause`, identical to cpp — so no objc fork and no engine change).
|
|
63
|
+
// Cyclomatic decision nodes (McCabe decision-count). C-family is the 2nd boolean-free
|
|
64
|
+
// language after Swift (the C-family cyclomatic convention counts no
|
|
65
|
+
// `&&`/`||` cyclomatically), so there is NO `extraDecisionPredicate`. Each `case` AND `default` adds +1 (a
|
|
66
|
+
// `default:` is a `case_statement` with no `value:` — verified — so one node type
|
|
67
|
+
// covers both; the Swift `switch_entry` precedent, a deliberate divergence from the
|
|
68
|
+
// Java/Go/TS default-EXCLUDED rule). Each C++ `lambda_expression` adds +1 AND is
|
|
69
|
+
// descended (the Go func_literal rule — NOT in the cyclomatic skip set), so its inner
|
|
70
|
+
// decisions also count toward the function. `for_range_loop` (C++ range-for) and
|
|
71
|
+
// `lambda_expression` are inert on c/objc (absent); `case_statement` covers all three.
|
|
72
|
+
// NOT counted: the `switch` container, try/catch, goto, break, continue, and `&&`/`||`.
|
|
73
|
+
// ObjC `^{}` blocks (`block_literal`) are NOT counted cyclomatically (only LambdaExpr is).
|
|
74
|
+
const CFAMILY_DECISION_NODE_TYPES = new Set([
|
|
75
|
+
'if_statement',
|
|
76
|
+
'for_statement',
|
|
77
|
+
'for_range_loop',
|
|
78
|
+
'while_statement',
|
|
79
|
+
'do_statement',
|
|
80
|
+
'case_statement',
|
|
81
|
+
'conditional_expression',
|
|
82
|
+
'lambda_expression',
|
|
83
|
+
]);
|
|
84
|
+
// The complexity boundary — a SEPARATE skip set from CPP_SKIP_TYPES (the resolveCalls
|
|
85
|
+
// boundary). TWO deliberate differences:
|
|
86
|
+
// (1) function_definition / method_definition are ABSENT. The PendingBody.body for a
|
|
87
|
+
// cpp function IS the `function_definition` node and for an objc method IS the
|
|
88
|
+
// `method_definition` node (cpp.ts extractFunction / objc.ts extractMethod). The
|
|
89
|
+
// engine's root guards (computeComplexity's `skipTypes.has(body.type)` continue +
|
|
90
|
+
// computeCognitive's `visit` early-return) would SKIP the whole function if its own
|
|
91
|
+
// node type were in this set — zeroing every C-family function. (resolveCalls is
|
|
92
|
+
// unaffected: walkCalls skip-tests only CHILDREN, never the root body.) Local
|
|
93
|
+
// *type* bodies are still pruned (the `*_specifier` entries) so a local class's
|
|
94
|
+
// members don't leak in; a GNU nested function (a `function_definition` inside a
|
|
95
|
+
// body — a non-standard extension, absent from valid C/C++) then rolls INTO the
|
|
96
|
+
// enclosing function — a rare documented over-count, matching the C#/Dart per-member
|
|
97
|
+
// model for local functions.
|
|
98
|
+
// (2) PREPROC_GROUPS are ABSENT → preproc conditionals are DESCENDED, so control flow
|
|
99
|
+
// guarded by `#if`/`#else` IS counted (both branches → a syntactic over-count vs
|
|
100
|
+
// a preprocessor-evaluated active-branch count — the documented C# `#if`
|
|
101
|
+
// divergence, inherent to a tree-sitter extractor). resolveCalls prunes them (the
|
|
102
|
+
// spurious `#if FOO(3)`-as-call-expression edge), but for complexity a macro-call
|
|
103
|
+
// condition is not a decision node.
|
|
104
|
+
// lambda_expression / block_literal stay ABSENT → DESCENDED (their inner flow counts).
|
|
105
|
+
const CFAMILY_COMPLEXITY_SKIP_TYPES = new Set([
|
|
106
|
+
'class_specifier',
|
|
107
|
+
'struct_specifier',
|
|
108
|
+
'union_specifier',
|
|
109
|
+
'enum_specifier',
|
|
110
|
+
'template_declaration',
|
|
111
|
+
]);
|
|
112
|
+
// Cognitive config (the C-family cognitive rule is the SonarSource Cognitive
|
|
113
|
+
// Complexity whitepaper verbatim, so it maps onto
|
|
114
|
+
// the EXISTING engine knobs with no additions). Surcharge+nest: head `if`, `?:`, the
|
|
115
|
+
// whole `switch` once (cases FREE — the opposite of cyclomatic), the loops, and each
|
|
116
|
+
// `catch` (cpp `catch_clause`; objc `@catch` is ALSO a `catch_clause`, so a single string
|
|
117
|
+
// covers both — the predicted set-widening proved unnecessary). FLAT +1: else/else-if
|
|
118
|
+
// (the `else_clause` chain — tree-sitter-c/cpp/objc all wrap the else in an `else_clause`,
|
|
119
|
+
// the TS shape), `goto` (FLAT, DIVERGING from C#'s goto-surcharge), and each `&&`/`||`
|
|
120
|
+
// operator-CHANGE in a source-order chain (paren-TRANSPARENT → unwrap). Nest-only (+0):
|
|
121
|
+
// C++ lambdas + ObjC `^{}` blocks. Recursion: +1 per direct self-call SITE (the
|
|
122
|
+
// whitepaper's per-call-site rule). The `try` container is free (only catches score). NO
|
|
123
|
+
// baseline +1 in the C-family cognitive model; codedeep-mcp keeps its `1 + decisionPoints` (the documented constant offset).
|
|
124
|
+
const CFAMILY_COGNITIVE_OPTIONS = {
|
|
125
|
+
ifType: 'if_statement',
|
|
126
|
+
// cpp wraps the if condition in `condition_clause`, c/objc in `parenthesized_expression`
|
|
127
|
+
// — both are descended (so the condition's booleans count). The cpp `condition_clause`
|
|
128
|
+
// ALSO carries a C++17 `if (init; cond)` init_statement, which is descended here too (a
|
|
129
|
+
// decision in the init is counted) — so NO separate `initField` is needed.
|
|
130
|
+
conditionField: 'condition',
|
|
131
|
+
consequenceField: 'consequence',
|
|
132
|
+
alternativeField: 'alternative',
|
|
133
|
+
elseClauseType: 'else_clause', // the else/else-if wrapper (TS shape; verified on all three grammars)
|
|
134
|
+
loopTypes: new Set(['for_statement', 'for_range_loop', 'while_statement', 'do_statement']),
|
|
135
|
+
switchTypes: new Set(['switch_statement']), // whole-switch +1; case labels are free
|
|
136
|
+
ternaryType: 'conditional_expression',
|
|
137
|
+
// `catch_clause` contains its body → the generic catch branch (surcharge + nest). One
|
|
138
|
+
// string covers cpp AND objc `@catch` (both parse to `catch_clause`). DOCUMENTED GAP:
|
|
139
|
+
// the cognitive model would surcharge MSVC `__except`, but
|
|
140
|
+
// tree-sitter parses it as a DISTINCT `seh_except_clause` node, so a
|
|
141
|
+
// `__except` is NOT surcharged here (its body's control flow still counts, just at the
|
|
142
|
+
// try's nesting). A recall-only under-count, never wrong/over; `__try`/`__finally` are
|
|
143
|
+
// free (the spec ignores them too). MSVC SEH is a Windows-only non-standard extension
|
|
144
|
+
// absent from cross-platform corpora — documented (the macro-opacity precedent) rather
|
|
145
|
+
// than handled, to avoid widening the shared-engine single-string `catchType` for a
|
|
146
|
+
// ~0-frequency construct.
|
|
147
|
+
catchType: 'catch_clause',
|
|
148
|
+
nestOnlyTypes: new Set(['lambda_expression', 'block_literal']), // +0, deepen nesting
|
|
149
|
+
// `goto` = FLAT +1 (per the Cognitive Complexity whitepaper; diverges from C#'s surchargeTypes). A goto
|
|
150
|
+
// always targets a label, so hasLabel is unconditionally true.
|
|
151
|
+
labeledJumpTypes: new Set(['goto_statement']),
|
|
152
|
+
hasLabel: () => true,
|
|
153
|
+
booleanOperatorKind: cFamilyBooleanOperatorKind, // reads the binary_expression operator field
|
|
154
|
+
parenthesizedType: 'parenthesized_expression', // UNWRAP, source-order (the sonar-java default)
|
|
155
|
+
// Direct recursion (+1 per self-call SITE — the whitepaper recursion rule). eligibleKinds is {function} ONLY
|
|
156
|
+
// (the Go precedent): a name-only bare callee match has no arity guard (unlike C#'s
|
|
157
|
+
// isSelfCall), so admitting 'method' would risk a false positive when a method calls a
|
|
158
|
+
// same-named free function. ACCEPTED over-counts (name-only, no resolver — the Go
|
|
159
|
+
// name-shadowing precedent; rare, never a wrong-KIND edge): a free function that calls a
|
|
160
|
+
// same-named SIBLING OVERLOAD (`void f(int){} void f(double d){ f((int)d); }`, C++ only)
|
|
161
|
+
// OR a same-named local function-pointer that SHADOWS it (`void f(){ void(*f)()=g; f(); }`,
|
|
162
|
+
// valid in C too — so it is NOT strictly "exact for C"), where a full overload/type
|
|
163
|
+
// resolver would resolve away the self-match. Documented UNDER-counts (the bare-`identifier`-callee
|
|
164
|
+
// reader matches none of these): C++ method self-recursion (a method's bare self-call is
|
|
165
|
+
// excluded by eligibleKinds:{function}); ObjC `[self f]` (a `message_expression`, not a
|
|
166
|
+
// `call_expression`); and a QUALIFIED intra-namespace self-call (`N::f()` recursing via
|
|
167
|
+
// `N::f()` — a `qualified_identifier` callee, not an `identifier`, so it returns null).
|
|
168
|
+
recursion: {
|
|
169
|
+
callType: 'call_expression',
|
|
170
|
+
bareCalleeName: (n) => {
|
|
171
|
+
const f = n.childForFieldName('function');
|
|
172
|
+
return f?.type === 'identifier' ? f.text : null;
|
|
173
|
+
},
|
|
174
|
+
eligibleKinds: new Set(['function']),
|
|
175
|
+
},
|
|
176
|
+
};
|
|
177
|
+
// The assembled C-family ComplexityOptions — the SINGLE source of truth, used at BOTH
|
|
178
|
+
// call sites (extractCpp here AND extractObjc in objc.ts; `c` reaches extractCpp via the
|
|
179
|
+
// folded `case 'c'` dispatch). The C-family is the only extractor with two call sites, so
|
|
180
|
+
// exporting the assembled object (rather than the three building-block consts above) keeps
|
|
181
|
+
// cpp and objc provably in lock-step — a future tweak lands in one place.
|
|
182
|
+
export const CFAMILY_COMPLEXITY_OPTS = {
|
|
183
|
+
decisionNodeTypes: CFAMILY_DECISION_NODE_TYPES,
|
|
184
|
+
skipTypes: CFAMILY_COMPLEXITY_SKIP_TYPES,
|
|
185
|
+
cognitive: CFAMILY_COGNITIVE_OPTIONS,
|
|
186
|
+
};
|
|
187
|
+
// ── call resolution ──────────────────────────────────────────────────────────
|
|
188
|
+
// Bare callee is the engine-default `identifier`; `type_identifier` is added ONLY
|
|
189
|
+
// so the `new Foo()` callee (`new_expression`'s `type:` is a `type_identifier`)
|
|
190
|
+
// passes the bare-callee gate — `constructorSelectorTypes` then routes it through
|
|
191
|
+
// `typeNameToId` (constructorKinds={class}) by NODE type, so a `new Foo()` can
|
|
192
|
+
// never mis-bind to an enclosing method named Foo (the C# precedent). A call
|
|
193
|
+
// NEVER has a `type_identifier` callee (the grammar is syntactic — `Foo()` value
|
|
194
|
+
// construction parses with an `identifier` callee), so this only affects `new`.
|
|
195
|
+
const CPP_BARE_CALLEE_TYPES = new Set(['identifier', 'type_identifier']);
|
|
196
|
+
// A bare `foo()` is either a free-function call or — inside a class body — an
|
|
197
|
+
// implicit-`this` member call, so it binds to the enclosing class first
|
|
198
|
+
// (bareCallsBindToEnclosingClass) then the callable-name map over {function}.
|
|
199
|
+
// Methods are NOT bare-callable (they need a receiver) — they resolve only via
|
|
200
|
+
// methodsByClass. Classes are NOT bare-callable, so a bare `Foo()` value
|
|
201
|
+
// construction stays unresolved (a documented recall gap), never a wrong edge.
|
|
202
|
+
const CPP_BARE_CALLABLE_KINDS = new Set(['function']);
|
|
203
|
+
// `new Foo()` resolves to a 'class'-kind symbol via the constructor-form path.
|
|
204
|
+
const CPP_CONSTRUCTOR_KINDS = new Set(['class']);
|
|
205
|
+
// `new X()` is the distinct construction NODE (the C# object_creation precedent):
|
|
206
|
+
// route it through constructorKinds/typeNameToId by node type so it never flows
|
|
207
|
+
// through the enclosing-class/nameToId path.
|
|
208
|
+
const CPP_CONSTRUCTOR_SELECTORS = new Set(['new_expression']);
|
|
209
|
+
// C / C++ standard-library free functions that parse as bare `identifier` callees
|
|
210
|
+
// but never resolve to a local symbol — they would flood the name-keyed reference
|
|
211
|
+
// store. Suppressed ONLY when unresolved (a file-local shadow keeps its refs).
|
|
212
|
+
// START small + tune by dogfood (the measure-don't-guess method).
|
|
213
|
+
const CPP_IGNORED_BARE_CALLEES = new Set([
|
|
214
|
+
// <cstdio> / <cstdlib> / <cstring>
|
|
215
|
+
'printf', 'fprintf', 'sprintf', 'snprintf', 'scanf', 'fscanf', 'sscanf',
|
|
216
|
+
'puts', 'fputs', 'fopen', 'fclose', 'fread', 'fwrite', 'fflush', 'perror',
|
|
217
|
+
'malloc', 'calloc', 'realloc', 'free', 'abort', 'exit', 'atexit', 'getenv',
|
|
218
|
+
'memcpy', 'memset', 'memmove', 'memcmp', 'strlen', 'strcmp', 'strncmp',
|
|
219
|
+
'strcpy', 'strncpy', 'strcat', 'strncat', 'strchr', 'strstr', 'atoi', 'atof',
|
|
220
|
+
// assertions / common <algorithm>/<utility> globals reached via `using`
|
|
221
|
+
'assert', 'static_assert', 'move', 'forward', 'swap', 'min', 'max',
|
|
222
|
+
'make_unique', 'make_shared', 'make_pair', 'make_tuple', 'to_string',
|
|
223
|
+
]);
|
|
224
|
+
// STL container / iterator / smart-pointer / string method names whose chained
|
|
225
|
+
// captures are pure noise (`.push_back()`, `.begin()`, `.size()`, …). Suppressed
|
|
226
|
+
// only when UNRESOLVED, so a same-file `this->size()` that bound to a real
|
|
227
|
+
// sibling keeps its ref. Keep these to >=4 chars (SHORT_NAME_THRESHOLD gates the
|
|
228
|
+
// rest downstream). START small + tune by dogfood.
|
|
229
|
+
const CPP_IGNORED_MEMBER_CALLEES = new Set([
|
|
230
|
+
'push_back', 'emplace_back', 'push_front', 'emplace_front', 'pop_back',
|
|
231
|
+
'pop_front', 'insert', 'emplace', 'erase', 'clear', 'empty', 'size',
|
|
232
|
+
'begin', 'end', 'cbegin', 'cend', 'rbegin', 'rend', 'find', 'count',
|
|
233
|
+
'reserve', 'resize', 'data', 'front', 'back', 'c_str', 'length', 'substr',
|
|
234
|
+
'append', 'compare', 'str', 'first', 'second', 'reset', 'release', 'lock',
|
|
235
|
+
'value', 'value_or', 'has_value', 'get', 'count_if', 'contains',
|
|
236
|
+
]);
|
|
237
|
+
const CPP_SELECTORS = [
|
|
238
|
+
// Ordinary calls: bare `foo()`, member `obj.m()`/`ptr->m()`/`this->m()`, and
|
|
239
|
+
// scope-resolution `Foo::bar()`/`ns::f()` — discriminated by the callee node.
|
|
240
|
+
{ nodeType: 'call_expression', getCallee: (n) => n.childForFieldName('function') },
|
|
241
|
+
// Construction `new Foo()` (a distinct node). Only a simple `type_identifier`
|
|
242
|
+
// target resolves; `new ns::Widget()` (qualified_identifier) and
|
|
243
|
+
// `new Box<int>()` (template_type) are DROPPED — a documented recall gap, never
|
|
244
|
+
// a wrong cross-namespace edge.
|
|
245
|
+
{
|
|
246
|
+
nodeType: 'new_expression',
|
|
247
|
+
getCallee: (n) => {
|
|
248
|
+
const t = n.childForFieldName('type');
|
|
249
|
+
return t && t.type === 'type_identifier' ? t : null;
|
|
250
|
+
},
|
|
251
|
+
},
|
|
252
|
+
];
|
|
253
|
+
// Reduces a member-expression / scope-resolution callee to {receiver, property}.
|
|
254
|
+
// `this->m()` → self-call (resolve against the enclosing class)
|
|
255
|
+
// `obj.m()` / `ptr->m()` → receiver = `obj`/`ptr` (an identifier; unresolved
|
|
256
|
+
// name-keyed member ref unless a class shares the name)
|
|
257
|
+
// `Foo::bar()` / `ns::f()`→ receiver = the innermost scope segment (the
|
|
258
|
+
// Rust/PHP/Ruby `::` single-level member-ref pattern)
|
|
259
|
+
// chained `a.b().c()` / `f()->g()` / computed → RECEIVER_OPAQUE (findable, never
|
|
260
|
+
// resolved)
|
|
261
|
+
function cppMemberCallInfo(callee) {
|
|
262
|
+
if (callee.type === 'field_expression') {
|
|
263
|
+
const field = callee.childForFieldName('field');
|
|
264
|
+
if (!field || field.type !== 'field_identifier')
|
|
265
|
+
return null; // `p->~Foo()` etc.
|
|
266
|
+
const property = field.text;
|
|
267
|
+
const arg = callee.childForFieldName('argument');
|
|
268
|
+
if (!arg)
|
|
269
|
+
return null;
|
|
270
|
+
if (arg.type === 'this')
|
|
271
|
+
return { receiver: 'this', property, isSelf: true };
|
|
272
|
+
if (arg.type === 'identifier')
|
|
273
|
+
return { receiver: arg.text, property, isSelf: false };
|
|
274
|
+
// chained / parenthesized / subscript / call receiver → opaque
|
|
275
|
+
return { receiver: RECEIVER_OPAQUE, property, isSelf: false };
|
|
276
|
+
}
|
|
277
|
+
if (callee.type === 'qualified_identifier') {
|
|
278
|
+
const q = qualifiedName(callee);
|
|
279
|
+
if (!q)
|
|
280
|
+
return null;
|
|
281
|
+
// The innermost scope segment is the receiver class; the final name is the
|
|
282
|
+
// method. `Foo::bar()` → {Foo, bar}; `ns::Foo::bar()` → {Foo, bar}.
|
|
283
|
+
return { receiver: q.classScope ?? RECEIVER_OPAQUE, property: q.name, isSelf: false };
|
|
284
|
+
}
|
|
285
|
+
return null;
|
|
286
|
+
}
|
|
287
|
+
export function extractCpp(tree, content, fileInfo) {
|
|
288
|
+
const ctx = {
|
|
289
|
+
content,
|
|
290
|
+
fileInfo,
|
|
291
|
+
occurrences: new Map(),
|
|
292
|
+
symbols: [],
|
|
293
|
+
imports: [],
|
|
294
|
+
bodies: [],
|
|
295
|
+
};
|
|
296
|
+
extractScope(ctx, tree.rootNode.namedChildren, {
|
|
297
|
+
className: null,
|
|
298
|
+
qualifier: '',
|
|
299
|
+
exported: true,
|
|
300
|
+
inClass: false,
|
|
301
|
+
defaultVisibility: 'public',
|
|
302
|
+
});
|
|
303
|
+
// Same-name classes/structs share the simple-name FQN; resolving through them
|
|
304
|
+
// first-wins would bind to the WRONG type, so exclude them from extract-time
|
|
305
|
+
// resolution (the Go/Java/C# pattern). Function overloads are NOT excluded —
|
|
306
|
+
// they are one logical family and first-wins binding to an overload is a
|
|
307
|
+
// same-kind, accepted imprecision (the Java method-overload precedent), unlike
|
|
308
|
+
// PHP/Ruby where same-name free functions are genuinely cross-namespace.
|
|
309
|
+
const ambiguousClassNames = collectAmbiguousTypeNames(ctx.symbols, new Set(['class']));
|
|
310
|
+
const references = resolveCalls(ctx.bodies, tree.rootNode, ctx.symbols, fileInfo, CPP_SELECTORS, CPP_SKIP_TYPES, CPP_FUNCTION_BODY_SKIP_TYPES, cppMemberCallInfo, {
|
|
311
|
+
bareCalleeTypes: CPP_BARE_CALLEE_TYPES,
|
|
312
|
+
plainCalleeType: 'identifier',
|
|
313
|
+
bareCallableKinds: CPP_BARE_CALLABLE_KINDS,
|
|
314
|
+
bareCallsBindToEnclosingClass: true, // implicit this
|
|
315
|
+
constructorKinds: CPP_CONSTRUCTOR_KINDS,
|
|
316
|
+
constructorSelectorTypes: CPP_CONSTRUCTOR_SELECTORS,
|
|
317
|
+
ambiguousClassNames,
|
|
318
|
+
ignoredBareCallees: CPP_IGNORED_BARE_CALLEES,
|
|
319
|
+
ignoredMemberCallees: CPP_IGNORED_MEMBER_CALLEES,
|
|
320
|
+
});
|
|
321
|
+
// Cyclomatic + cognitive complexity (McCabe + whitepaper-pinned), computed while the tree is
|
|
322
|
+
// alive — the shared csharp.ts/swift.ts call-site pattern. `c` reaches this via the
|
|
323
|
+
// folded `case 'c'` dispatch (extractor.ts), so it gets complexity for free.
|
|
324
|
+
computeComplexity(ctx.bodies, ctx.symbols, CFAMILY_COMPLEXITY_OPTS);
|
|
325
|
+
return { symbols: ctx.symbols, references, imports: ctx.imports };
|
|
326
|
+
}
|
|
327
|
+
// Walks a translation-unit / namespace / class body in document order, tracking
|
|
328
|
+
// the current access visibility (C++ `public:`/`private:` are stateful positional
|
|
329
|
+
// labels — the Ruby visibility pattern). `preproc_*` conditionals are recursed
|
|
330
|
+
// through transparently (an `#ifndef` include guard wraps the whole header, and
|
|
331
|
+
// `#if`-guarded members must still be extracted; both branches are extracted, the
|
|
332
|
+
// OccurrenceCounter keeps ids unique).
|
|
333
|
+
function extractScope(ctx, children, enclosing) {
|
|
334
|
+
const state = { visibility: enclosing.defaultVisibility };
|
|
335
|
+
for (const child of children)
|
|
336
|
+
handleMember(ctx, child, enclosing, state);
|
|
337
|
+
}
|
|
338
|
+
const RECORD_SPECIFIERS = new Set([
|
|
339
|
+
'class_specifier',
|
|
340
|
+
'struct_specifier',
|
|
341
|
+
'union_specifier',
|
|
342
|
+
]);
|
|
343
|
+
function handleMember(ctx, child, enclosing, state, wrap = null) {
|
|
344
|
+
const t = child.type;
|
|
345
|
+
if (PREPROC_GROUPS.has(t)) {
|
|
346
|
+
// Transparent: a guarded region's members are flattened into this loop so its
|
|
347
|
+
// members see the enclosing visibility. But an `access_specifier` INSIDE the
|
|
348
|
+
// guard must not leak past `#endif` — we can't evaluate the preprocessor, so a
|
|
349
|
+
// guarded `private:` shouldn't silently de-export every following member.
|
|
350
|
+
// Snapshot the ENCLOSING visibility; restore it after the whole group, AND
|
|
351
|
+
// reset to it before each `#else`/`#elif` continuation (nested as a child under
|
|
352
|
+
// the head's `alternative:` field) — each preprocessor branch compiles
|
|
353
|
+
// independently, so a then-branch label must not bleed into the else/elif.
|
|
354
|
+
const saved = state.visibility;
|
|
355
|
+
for (const c of child.namedChildren) {
|
|
356
|
+
if (PREPROC_CONTINUATIONS.has(c.type))
|
|
357
|
+
state.visibility = saved;
|
|
358
|
+
handleMember(ctx, c, enclosing, state, wrap); // thread wrap (a templated guarded decl)
|
|
359
|
+
}
|
|
360
|
+
state.visibility = saved;
|
|
361
|
+
return;
|
|
362
|
+
}
|
|
363
|
+
if (t === 'linkage_specification') {
|
|
364
|
+
// `extern "C" { … }` / `extern "C" <decl>` — a transparent grouping node (no
|
|
365
|
+
// new scope): recurse its body declarations with the same enclosing/state.
|
|
366
|
+
const body = child.childForFieldName('body');
|
|
367
|
+
if (body?.type === 'declaration_list') {
|
|
368
|
+
for (const c of body.namedChildren)
|
|
369
|
+
handleMember(ctx, c, enclosing, state, wrap);
|
|
370
|
+
}
|
|
371
|
+
else if (body) {
|
|
372
|
+
handleMember(ctx, body, enclosing, state, wrap);
|
|
373
|
+
}
|
|
374
|
+
return;
|
|
375
|
+
}
|
|
376
|
+
switch (t) {
|
|
377
|
+
case 'access_specifier':
|
|
378
|
+
if (enclosing.inClass) {
|
|
379
|
+
const v = accessText(child);
|
|
380
|
+
if (v)
|
|
381
|
+
state.visibility = v;
|
|
382
|
+
}
|
|
383
|
+
return;
|
|
384
|
+
case 'preproc_include':
|
|
385
|
+
extractInclude(ctx, child);
|
|
386
|
+
return;
|
|
387
|
+
case 'namespace_definition':
|
|
388
|
+
extractNamespace(ctx, child, enclosing);
|
|
389
|
+
return;
|
|
390
|
+
case 'class_specifier':
|
|
391
|
+
case 'struct_specifier':
|
|
392
|
+
case 'union_specifier':
|
|
393
|
+
extractRecord(ctx, child, enclosing, state.visibility, wrap);
|
|
394
|
+
return;
|
|
395
|
+
case 'enum_specifier':
|
|
396
|
+
extractEnum(ctx, child, enclosing, state.visibility, wrap);
|
|
397
|
+
return;
|
|
398
|
+
case 'template_declaration':
|
|
399
|
+
handleTemplate(ctx, child, enclosing, state);
|
|
400
|
+
return;
|
|
401
|
+
case 'type_definition':
|
|
402
|
+
extractTypedef(ctx, child, enclosing, state.visibility, wrap);
|
|
403
|
+
return;
|
|
404
|
+
case 'alias_declaration':
|
|
405
|
+
extractAlias(ctx, child, enclosing, state.visibility, wrap);
|
|
406
|
+
return;
|
|
407
|
+
case 'function_definition':
|
|
408
|
+
extractFunctionDef(ctx, child, enclosing, state.visibility, wrap);
|
|
409
|
+
return;
|
|
410
|
+
case 'declaration':
|
|
411
|
+
case 'field_declaration':
|
|
412
|
+
extractDeclaration(ctx, child, enclosing, state.visibility, wrap);
|
|
413
|
+
return;
|
|
414
|
+
default:
|
|
415
|
+
// using_declaration / friend_declaration / static_assert / expression
|
|
416
|
+
// statements / etc. — not symbols in v1.
|
|
417
|
+
return;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
// `template<…> <decl>`: unwrap to the inner decl, carrying the `template<…>`
|
|
421
|
+
// preamble into the signature and the template_declaration as the doc anchor.
|
|
422
|
+
function handleTemplate(ctx, decl, enclosing, state) {
|
|
423
|
+
const params = decl.childForFieldName('parameters');
|
|
424
|
+
const prefix = params ? `template${normalizeSignature(params.text)} ` : 'template ';
|
|
425
|
+
const wrap = { prefix, docNode: decl };
|
|
426
|
+
for (const c of decl.namedChildren) {
|
|
427
|
+
if (c.type === 'template_parameter_list' || c.type === 'requires_clause')
|
|
428
|
+
continue;
|
|
429
|
+
handleMember(ctx, c, enclosing, state, wrap);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
function extractNamespace(ctx, decl, enclosing) {
|
|
433
|
+
const nameNode = decl.childForFieldName('name');
|
|
434
|
+
const body = decl.childForFieldName('body');
|
|
435
|
+
if (!body)
|
|
436
|
+
return; // namespace alias / extension without a body block
|
|
437
|
+
const seg = nameNode ? nameNode.text : ''; // anonymous namespace → no segment
|
|
438
|
+
extractScope(ctx, body.namedChildren, {
|
|
439
|
+
className: null,
|
|
440
|
+
qualifier: joinQualifier(enclosing.qualifier, seg),
|
|
441
|
+
exported: enclosing.exported,
|
|
442
|
+
inClass: false,
|
|
443
|
+
defaultVisibility: 'public',
|
|
444
|
+
});
|
|
445
|
+
}
|
|
446
|
+
function extractRecord(ctx, decl, enclosing, visibility, wrap) {
|
|
447
|
+
const name = recordName(decl);
|
|
448
|
+
if (!name)
|
|
449
|
+
return; // anonymous struct/union (a field's inline type) — not a symbol
|
|
450
|
+
const exported = memberExported(enclosing, visibility);
|
|
451
|
+
ctx.symbols.push(makeCppSymbol(ctx, wrap?.docNode ?? decl, cppSignature(ctx, decl, wrap?.prefix), 'class', name, topFqn(ctx, name), exported, cppDoc(wrap?.docNode ?? decl), enclosing.qualifier));
|
|
452
|
+
const body = decl.childForFieldName('body');
|
|
453
|
+
if (!body)
|
|
454
|
+
return; // forward declaration `class Foo;`
|
|
455
|
+
extractScope(ctx, body.namedChildren, {
|
|
456
|
+
className: name,
|
|
457
|
+
qualifier: joinQualifier(enclosing.qualifier, name),
|
|
458
|
+
exported,
|
|
459
|
+
inClass: true,
|
|
460
|
+
defaultVisibility: decl.type === 'class_specifier' ? 'private' : 'public',
|
|
461
|
+
});
|
|
462
|
+
}
|
|
463
|
+
function extractEnum(ctx, decl, enclosing, visibility, wrap) {
|
|
464
|
+
const name = recordName(decl);
|
|
465
|
+
if (!name)
|
|
466
|
+
return; // anonymous enum — enumerators leak to the enclosing scope (v1 gap)
|
|
467
|
+
ctx.symbols.push(makeCppSymbol(ctx, wrap?.docNode ?? decl, cppSignature(ctx, decl, wrap?.prefix), 'enum', name, topFqn(ctx, name), memberExported(enclosing, visibility), cppDoc(wrap?.docNode ?? decl), enclosing.qualifier));
|
|
468
|
+
// Enumerators are NOT extracted (the universal enum rule).
|
|
469
|
+
}
|
|
470
|
+
// When a `declaration`/`field_declaration`/`typedef`'s `type:` slot is an inline
|
|
471
|
+
// record/enum definition (`struct Named {…} g;`, `typedef struct Pt {…} Point;`, a
|
|
472
|
+
// nested type), that type is ALSO defined here — extract it alongside the declarators.
|
|
473
|
+
function extractInlineTypeInSlot(ctx, decl, enclosing, visibility) {
|
|
474
|
+
const typeNode = decl.childForFieldName('type');
|
|
475
|
+
if (!typeNode)
|
|
476
|
+
return;
|
|
477
|
+
if (RECORD_SPECIFIERS.has(typeNode.type))
|
|
478
|
+
extractRecord(ctx, typeNode, enclosing, visibility, null);
|
|
479
|
+
else if (typeNode.type === 'enum_specifier')
|
|
480
|
+
extractEnum(ctx, typeNode, enclosing, visibility, null);
|
|
481
|
+
}
|
|
482
|
+
function extractTypedef(ctx, decl, enclosing, visibility, wrap) {
|
|
483
|
+
// A `typedef <type> Name;` whose <type> is a named record also defines that record.
|
|
484
|
+
extractInlineTypeInSlot(ctx, decl, enclosing, visibility);
|
|
485
|
+
const sig = cppSignature(ctx, decl, wrap?.prefix);
|
|
486
|
+
const doc = cppDoc(wrap?.docNode ?? decl);
|
|
487
|
+
const anchor = wrap?.docNode ?? decl;
|
|
488
|
+
for (const d of decl.childrenForFieldName('declarator')) {
|
|
489
|
+
const info = analyze(d);
|
|
490
|
+
if (!info || info.qualified)
|
|
491
|
+
continue;
|
|
492
|
+
ctx.symbols.push(makeCppSymbol(ctx, anchor, sig, 'type', info.name, topFqn(ctx, info.name), memberExported(enclosing, visibility), doc, enclosing.qualifier));
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
function extractAlias(ctx, decl, enclosing, visibility, wrap) {
|
|
496
|
+
const name = decl.childForFieldName('name')?.text;
|
|
497
|
+
if (!name)
|
|
498
|
+
return;
|
|
499
|
+
ctx.symbols.push(makeCppSymbol(ctx, wrap?.docNode ?? decl, cppSignature(ctx, decl, wrap?.prefix), 'type', name, topFqn(ctx, name), memberExported(enclosing, visibility), cppDoc(wrap?.docNode ?? decl), enclosing.qualifier));
|
|
500
|
+
}
|
|
501
|
+
// A `function_definition` (has a body, or is `= default`/`= delete`).
|
|
502
|
+
function extractFunctionDef(ctx, decl, enclosing, visibility, wrap) {
|
|
503
|
+
const declarator = decl.childForFieldName('declarator');
|
|
504
|
+
if (!declarator)
|
|
505
|
+
return;
|
|
506
|
+
const info = analyze(declarator);
|
|
507
|
+
if (!info || !info.isFunction)
|
|
508
|
+
return;
|
|
509
|
+
const tgt = resolveFunctionTarget(ctx, info, enclosing);
|
|
510
|
+
const exported = info.qualified ? true : declExported(decl, enclosing, visibility);
|
|
511
|
+
const sym = makeCppSymbol(ctx, wrap?.docNode ?? decl, cppSignature(ctx, decl, wrap?.prefix), tgt.kind, info.name, tgt.fqn, exported, cppDoc(wrap?.docNode ?? decl), tgt.qualifier);
|
|
512
|
+
ctx.symbols.push(sym);
|
|
513
|
+
// The whole function_definition is the PendingBody so calls in parameter
|
|
514
|
+
// defaults AND the ctor member-init list (`: v_(compute(x))`) attribute here
|
|
515
|
+
// alongside the body. Only defined functions (a compound_statement body) get a
|
|
516
|
+
// body; `= default`/`= delete` get the symbol but no body.
|
|
517
|
+
if (decl.childForFieldName('body')) {
|
|
518
|
+
ctx.bodies.push({ symbolId: sym.id, body: decl, className: tgt.className ?? undefined });
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
// A `declaration` / `field_declaration`: function declaration(s), variable/field
|
|
522
|
+
// declaration(s), and/or an inline record/enum definition in the `type:` slot.
|
|
523
|
+
function extractDeclaration(ctx, decl, enclosing, visibility, wrap) {
|
|
524
|
+
extractInlineTypeInSlot(ctx, decl, enclosing, visibility); // `struct Named {…} g;` / nested type
|
|
525
|
+
const declarators = decl.childrenForFieldName('declarator');
|
|
526
|
+
// sig + doc are declaration-level (the function branch + each variable share
|
|
527
|
+
// them) — compute once, not per declarator.
|
|
528
|
+
const sig = cppSignature(ctx, decl, wrap?.prefix);
|
|
529
|
+
const doc = cppDoc(wrap?.docNode ?? decl);
|
|
530
|
+
const anchor = wrap?.docNode ?? decl;
|
|
531
|
+
for (const d of declarators) {
|
|
532
|
+
const info = analyze(d);
|
|
533
|
+
if (!info)
|
|
534
|
+
continue;
|
|
535
|
+
if (info.isFunction) {
|
|
536
|
+
const tgt = resolveFunctionTarget(ctx, info, enclosing);
|
|
537
|
+
const exported = info.qualified ? true : declExported(decl, enclosing, visibility);
|
|
538
|
+
ctx.symbols.push(makeCppSymbol(ctx, anchor, sig, tgt.kind, info.name, tgt.fqn, exported, doc, tgt.qualifier));
|
|
539
|
+
// Bodiless declaration → no PendingBody.
|
|
540
|
+
}
|
|
541
|
+
else {
|
|
542
|
+
extractVariable(ctx, decl, d, info, enclosing, visibility, declarators.length === 1, doc);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
function extractVariable(ctx, decl, declarator, info, enclosing, visibility, soleDeclarator, doc) {
|
|
547
|
+
if (info.qualified)
|
|
548
|
+
return; // out-of-line static-member definition `int C::n = …;` — skip (the in-class decl already exists)
|
|
549
|
+
const className = enclosing.inClass ? enclosing.className : null;
|
|
550
|
+
const fqn = className ? memberFqn(ctx, className, info.name) : topFqn(ctx, info.name);
|
|
551
|
+
const sym = makeCppSymbol(ctx, decl, variableSig(decl, declarator), 'variable', info.name, fqn, declExported(decl, enclosing, visibility), doc, enclosing.qualifier);
|
|
552
|
+
ctx.symbols.push(sym);
|
|
553
|
+
// Attribute initializer calls (`Logger log = makeLogger();`) to the variable —
|
|
554
|
+
// only for a sole declarator, where the initializer pairs unambiguously.
|
|
555
|
+
if (soleDeclarator) {
|
|
556
|
+
const init = initializerNode(declarator) ?? decl.childForFieldName('default_value');
|
|
557
|
+
if (init)
|
|
558
|
+
ctx.bodies.push({ symbolId: sym.id, body: init, className: className ?? undefined });
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
const WRAPPER_DECLARATORS = new Set([
|
|
562
|
+
'pointer_declarator',
|
|
563
|
+
'reference_declarator',
|
|
564
|
+
'parenthesized_declarator',
|
|
565
|
+
'array_declarator',
|
|
566
|
+
'init_declarator',
|
|
567
|
+
// Objective-C / Clang blocks: `void (^handler)(int)` nests the name under a
|
|
568
|
+
// `block_pointer_declarator` (it carries a `declarator:` field, so innerDeclaratorChild
|
|
569
|
+
// reaches the inner identifier). Never produced by tree-sitter-c/-cpp → inert there.
|
|
570
|
+
'block_pointer_declarator',
|
|
571
|
+
]);
|
|
572
|
+
// Innermost name-bearing nodes (everything in DECL_OR_NAME that is NOT a
|
|
573
|
+
// wrapper/function declarator). Used to tell a REAL function (its name sits
|
|
574
|
+
// DIRECTLY under the function_declarator) from a function POINTER (a
|
|
575
|
+
// parenthesized_declarator is interposed — `int (*fp)(int)` → a variable).
|
|
576
|
+
const NAME_NODES = new Set([
|
|
577
|
+
'identifier',
|
|
578
|
+
'field_identifier',
|
|
579
|
+
'qualified_identifier',
|
|
580
|
+
'operator_name',
|
|
581
|
+
'destructor_name',
|
|
582
|
+
'operator_cast',
|
|
583
|
+
'type_identifier',
|
|
584
|
+
'template_function',
|
|
585
|
+
'template_method',
|
|
586
|
+
]);
|
|
587
|
+
const DECL_OR_NAME = new Set([
|
|
588
|
+
...WRAPPER_DECLARATORS,
|
|
589
|
+
'function_declarator',
|
|
590
|
+
'abstract_function_declarator',
|
|
591
|
+
...NAME_NODES,
|
|
592
|
+
]);
|
|
593
|
+
// Descends pointer/reference/parenthesized/array/init wrappers and the
|
|
594
|
+
// function_declarator to the innermost name node, recording whether a
|
|
595
|
+
// function_declarator was crossed (→ it's a function/method).
|
|
596
|
+
function analyze(declarator) {
|
|
597
|
+
let node = declarator;
|
|
598
|
+
let sawFunc = false;
|
|
599
|
+
for (let i = 0; node && i < 24; i++) {
|
|
600
|
+
const t = node.type;
|
|
601
|
+
if (t === 'function_declarator' || t === 'abstract_function_declarator') {
|
|
602
|
+
const inner = innerDeclaratorChild(node);
|
|
603
|
+
// A REAL function/method has its name DIRECTLY under the function_declarator
|
|
604
|
+
// (`int foo()`, `int* C::bar()`). A function POINTER interposes a
|
|
605
|
+
// parenthesized_declarator (`int (*cb)(int)`) → it is a VARIABLE/field, not a
|
|
606
|
+
// function: descend WITHOUT marking it a function so it routes to a variable.
|
|
607
|
+
if (inner && NAME_NODES.has(inner.type))
|
|
608
|
+
sawFunc = true;
|
|
609
|
+
node = inner;
|
|
610
|
+
continue;
|
|
611
|
+
}
|
|
612
|
+
if (WRAPPER_DECLARATORS.has(t)) {
|
|
613
|
+
node = innerDeclaratorChild(node);
|
|
614
|
+
continue;
|
|
615
|
+
}
|
|
616
|
+
break;
|
|
617
|
+
}
|
|
618
|
+
if (!node)
|
|
619
|
+
return null;
|
|
620
|
+
if (node.type === 'qualified_identifier') {
|
|
621
|
+
const q = qualifiedName(node);
|
|
622
|
+
if (!q || !q.name)
|
|
623
|
+
return null;
|
|
624
|
+
return { name: q.name, isFunction: sawFunc || q.isConversion, qualified: true, classScope: q.classScope, nsScopes: q.nsScopes };
|
|
625
|
+
}
|
|
626
|
+
const s = simpleName(node);
|
|
627
|
+
// Empty name = a synthetic/degenerate declarator (e.g. the empty `identifier`
|
|
628
|
+
// tree-sitter inserts for an unbraced `extern "C" struct S {…};` record) — no
|
|
629
|
+
// symbol; the record + fields are emitted separately by extractInlineTypeInSlot.
|
|
630
|
+
if (!s || !s.name)
|
|
631
|
+
return null;
|
|
632
|
+
return { name: s.name, isFunction: sawFunc || s.isConversion, qualified: false, classScope: null, nsScopes: [] };
|
|
633
|
+
}
|
|
634
|
+
function innerDeclaratorChild(node) {
|
|
635
|
+
const field = node.childForFieldName('declarator');
|
|
636
|
+
if (field)
|
|
637
|
+
return field;
|
|
638
|
+
for (const c of node.namedChildren) {
|
|
639
|
+
if (DECL_OR_NAME.has(c.type))
|
|
640
|
+
return c; // positional (reference_declarator holds it un-fielded)
|
|
641
|
+
}
|
|
642
|
+
return null;
|
|
643
|
+
}
|
|
644
|
+
function simpleName(node) {
|
|
645
|
+
switch (node.type) {
|
|
646
|
+
case 'identifier':
|
|
647
|
+
case 'field_identifier':
|
|
648
|
+
case 'type_identifier':
|
|
649
|
+
case 'operator_name':
|
|
650
|
+
case 'destructor_name':
|
|
651
|
+
return { name: node.text.trim(), isConversion: false };
|
|
652
|
+
case 'operator_cast': {
|
|
653
|
+
// `operator bool() const` — name it `operator <type>` (drop the params/quals).
|
|
654
|
+
const ty = node.childForFieldName('type');
|
|
655
|
+
return { name: normalizeSignature(`operator ${ty?.text ?? ''}`), isConversion: true };
|
|
656
|
+
}
|
|
657
|
+
case 'template_function':
|
|
658
|
+
case 'template_method': {
|
|
659
|
+
const n = node.childForFieldName('name');
|
|
660
|
+
return n ? simpleName(n) : null;
|
|
661
|
+
}
|
|
662
|
+
default:
|
|
663
|
+
return null;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
// Walks the right-nested `qualified_identifier` scope chain. The final `name`
|
|
667
|
+
// segment is the symbol; the immediately-enclosing scope is the "class"; earlier
|
|
668
|
+
// scopes are namespaces. Scope segments may be namespace_identifier,
|
|
669
|
+
// type_identifier, or template_type (`Box<T>::get`).
|
|
670
|
+
function qualifiedName(qi) {
|
|
671
|
+
const scopes = [];
|
|
672
|
+
let cur = qi;
|
|
673
|
+
let finalName = null;
|
|
674
|
+
for (let i = 0; cur && i < 24; i++) {
|
|
675
|
+
if (cur.type !== 'qualified_identifier') {
|
|
676
|
+
finalName = cur;
|
|
677
|
+
break;
|
|
678
|
+
}
|
|
679
|
+
const scope = cur.childForFieldName('scope');
|
|
680
|
+
if (scope) {
|
|
681
|
+
const s = scopeSimpleName(scope);
|
|
682
|
+
if (s)
|
|
683
|
+
scopes.push(s);
|
|
684
|
+
}
|
|
685
|
+
const nm = cur.childForFieldName('name');
|
|
686
|
+
if (!nm)
|
|
687
|
+
break;
|
|
688
|
+
if (nm.type === 'qualified_identifier') {
|
|
689
|
+
cur = nm;
|
|
690
|
+
continue;
|
|
691
|
+
}
|
|
692
|
+
finalName = nm;
|
|
693
|
+
break;
|
|
694
|
+
}
|
|
695
|
+
if (!finalName)
|
|
696
|
+
return null;
|
|
697
|
+
const base = simpleName(finalName);
|
|
698
|
+
if (!base)
|
|
699
|
+
return null;
|
|
700
|
+
const classScope = scopes.length ? scopes[scopes.length - 1] : null;
|
|
701
|
+
return { name: base.name, classScope, nsScopes: scopes.slice(0, -1), isConversion: base.isConversion };
|
|
702
|
+
}
|
|
703
|
+
function scopeSimpleName(node) {
|
|
704
|
+
switch (node.type) {
|
|
705
|
+
case 'namespace_identifier':
|
|
706
|
+
case 'type_identifier':
|
|
707
|
+
case 'identifier':
|
|
708
|
+
return node.text;
|
|
709
|
+
case 'template_type':
|
|
710
|
+
return node.childForFieldName('name')?.text ?? null;
|
|
711
|
+
default:
|
|
712
|
+
return null;
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
// Kind / FQN / qualifier for a function-shaped declarator. A qualified
|
|
716
|
+
// `Class::name` is an out-of-line method keyed on the class scope (the
|
|
717
|
+
// Go-receiver pattern, but cross-file); a `ns::freeFn` (namespace-qualified free
|
|
718
|
+
// function) is also keyed as a method on its last scope segment — a known,
|
|
719
|
+
// wrong-edge-free imperfection (the qualified-call `ns::freeFn()` still resolves
|
|
720
|
+
// correctly). In-class declarators are methods; everything else is a free
|
|
721
|
+
// function. Out-of-line defs/decls are exported=true (the in-class declaration in
|
|
722
|
+
// the header carries the real access; marking the def exported only widens
|
|
723
|
+
// cross-file recall, never adds a wrong edge).
|
|
724
|
+
function resolveFunctionTarget(ctx, info, enclosing) {
|
|
725
|
+
if (info.qualified) {
|
|
726
|
+
const scopeQual = [...info.nsScopes, info.classScope].filter((x) => !!x).join('::');
|
|
727
|
+
const qualifier = joinQualifier(enclosing.qualifier, scopeQual);
|
|
728
|
+
if (!info.classScope)
|
|
729
|
+
return { kind: 'function', className: null, fqn: topFqn(ctx, info.name), qualifier };
|
|
730
|
+
return { kind: 'method', className: info.classScope, fqn: memberFqn(ctx, info.classScope, info.name), qualifier };
|
|
731
|
+
}
|
|
732
|
+
if (enclosing.inClass) {
|
|
733
|
+
return {
|
|
734
|
+
kind: 'method',
|
|
735
|
+
className: enclosing.className,
|
|
736
|
+
fqn: memberFqn(ctx, enclosing.className, info.name),
|
|
737
|
+
qualifier: enclosing.qualifier,
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
return { kind: 'function', className: null, fqn: topFqn(ctx, info.name), qualifier: enclosing.qualifier };
|
|
741
|
+
}
|
|
742
|
+
// ── helpers ────────────────────────────────────────────────────────────────
|
|
743
|
+
function accessText(node) {
|
|
744
|
+
// `access_specifier` text is `public` / `private` / `protected` (the trailing
|
|
745
|
+
// `:` is a separate token).
|
|
746
|
+
const t = node.text.replace(':', '').trim();
|
|
747
|
+
return t === 'public' || t === 'private' || t === 'protected' ? t : null;
|
|
748
|
+
}
|
|
749
|
+
// Exported = public access. Free functions / top-level / namespace decls are
|
|
750
|
+
// always exported; class members need public visibility AND an exported container.
|
|
751
|
+
// (Private/protected members are NOT exported — the handoff rule; class default is
|
|
752
|
+
// private, struct/union default public.)
|
|
753
|
+
function memberExported(enclosing, visibility) {
|
|
754
|
+
if (!enclosing.inClass)
|
|
755
|
+
return true;
|
|
756
|
+
return enclosing.exported && visibility === 'public';
|
|
757
|
+
}
|
|
758
|
+
// A direct `static` storage-class specifier (`static int f(){}` /
|
|
759
|
+
// `static int g;`). `static inline` carries two specifiers — only the `static`
|
|
760
|
+
// one matches; `extern`/`register`/`inline`/`thread_local` never do.
|
|
761
|
+
function hasStaticStorage(decl) {
|
|
762
|
+
return decl.namedChildren.some((c) => c.type === 'storage_class_specifier' && c.text === 'static');
|
|
763
|
+
}
|
|
764
|
+
// Exportedness for a function/variable DECLARATION, accounting for file-scope
|
|
765
|
+
// `static` internal linkage. At file scope (not in a class), a `static` free
|
|
766
|
+
// function or global is internal linkage — C's primary privacy mechanism, and
|
|
767
|
+
// also internal for a C++ file-scope free function — so NOT exported. A class
|
|
768
|
+
// member `static` (inClass) means "no implicit this", NOT internal linkage, so
|
|
769
|
+
// it routes through the visibility-based `memberExported` untouched. Records,
|
|
770
|
+
// enums, and typedefs cannot legally take `static`, so they keep
|
|
771
|
+
// `memberExported` directly. `exported` is never hashed into the symbol id, and
|
|
772
|
+
// the extract-time engine (`resolveCalls`) never reads it. The only query-time
|
|
773
|
+
// edge consumer is `isCallerOf`'s cross-file member-ref gate (code-index.ts),
|
|
774
|
+
// which drops an unexported target. A flipped file-scope `static` CAN newly hit
|
|
775
|
+
// that gate — a cross-file member ref `obj.foo()` paired by name with a
|
|
776
|
+
// now-unexported top-level static `foo()` would be dropped — but such a
|
|
777
|
+
// member-call→free-function match is itself a wrong-kind false positive (you
|
|
778
|
+
// cannot call a free function via `obj.`), so the drop is a precision win, never
|
|
779
|
+
// a lost or wrong edge. Otherwise `exported` feeds only the overview
|
|
780
|
+
// exports/internal split, search ranking boost, entry-point detection, and
|
|
781
|
+
// get_context.
|
|
782
|
+
function declExported(decl, enclosing, visibility) {
|
|
783
|
+
if (enclosing.inClass)
|
|
784
|
+
return memberExported(enclosing, visibility);
|
|
785
|
+
return !hasStaticStorage(decl);
|
|
786
|
+
}
|
|
787
|
+
function recordName(decl) {
|
|
788
|
+
const n = decl.childForFieldName('name');
|
|
789
|
+
if (!n)
|
|
790
|
+
return null;
|
|
791
|
+
if (n.type === 'template_type')
|
|
792
|
+
return n.childForFieldName('name')?.text ?? null; // `Box<int>` specialization → Box
|
|
793
|
+
return n.text; // type_identifier (covers scoped/unscoped enums and records)
|
|
794
|
+
}
|
|
795
|
+
// `#include <foo>` / `#include "foo.h"` → import (sourceModule = the header path).
|
|
796
|
+
function extractInclude(ctx, node) {
|
|
797
|
+
const pathNode = node.childForFieldName('path');
|
|
798
|
+
if (!pathNode)
|
|
799
|
+
return;
|
|
800
|
+
let header = null;
|
|
801
|
+
if (pathNode.type === 'system_lib_string') {
|
|
802
|
+
header = pathNode.text.replace(/^<|>$/g, ''); // strip the angle brackets
|
|
803
|
+
}
|
|
804
|
+
else if (pathNode.type === 'string_literal') {
|
|
805
|
+
header = pathNode.namedChildren.find((c) => c.type === 'string_content')?.text ?? null;
|
|
806
|
+
}
|
|
807
|
+
if (!header)
|
|
808
|
+
return;
|
|
809
|
+
ctx.imports.push({
|
|
810
|
+
file: ctx.fileInfo.path,
|
|
811
|
+
sourceModule: header,
|
|
812
|
+
importedNames: [{ name: '*' }], // a header has no named binding — namespace-style
|
|
813
|
+
line: node.startPosition.row + 1,
|
|
814
|
+
});
|
|
815
|
+
}
|
|
816
|
+
// Doc = a `//` or `/** */`/`/* */` comment block immediately above the decl (the
|
|
817
|
+
// Go/Dart `commentDocLine` + `isTrailingComment` pattern). For a templated decl
|
|
818
|
+
// the anchor is the template_declaration (the comment is its sibling).
|
|
819
|
+
function cppDoc(anchor) {
|
|
820
|
+
const prev = anchor.previousNamedSibling;
|
|
821
|
+
if (prev &&
|
|
822
|
+
prev.type === 'comment' &&
|
|
823
|
+
prev.endPosition.row === anchor.startPosition.row - 1 &&
|
|
824
|
+
!isTrailingComment(prev)) {
|
|
825
|
+
return commentDocLine(prev.text);
|
|
826
|
+
}
|
|
827
|
+
return null;
|
|
828
|
+
}
|
|
829
|
+
// Signature = source from the decl start (after an optional `template<…>` prefix)
|
|
830
|
+
// to the body / member-init list, with a trailing `;`/`{` stripped. Keeps the
|
|
831
|
+
// `template<…>` preamble and trailing `const`/`noexcept`/`override`/`= default`/
|
|
832
|
+
// `= 0` qualifiers for display.
|
|
833
|
+
function cppSignature(ctx, decl, prefix = '') {
|
|
834
|
+
let cut = decl.endIndex;
|
|
835
|
+
const body = decl.childForFieldName('body');
|
|
836
|
+
if (body)
|
|
837
|
+
cut = Math.min(cut, body.startIndex);
|
|
838
|
+
const isFnDef = decl.type === 'function_definition';
|
|
839
|
+
for (const c of decl.namedChildren) {
|
|
840
|
+
if (c.type === 'field_initializer_list')
|
|
841
|
+
cut = Math.min(cut, c.startIndex);
|
|
842
|
+
// K&R old-style C: the parameter declarations (`int a;`) sit as `declaration`
|
|
843
|
+
// children between the function_declarator and the body — clamp before them so
|
|
844
|
+
// `int sum(a, b)` is the signature, not `int sum(a, b) int a; int b`.
|
|
845
|
+
if (isFnDef && c.type === 'declaration')
|
|
846
|
+
cut = Math.min(cut, c.startIndex);
|
|
847
|
+
}
|
|
848
|
+
let sig = normalizeSignature(prefix + ctx.content.slice(decl.startIndex, cut));
|
|
849
|
+
sig = sig.replace(/[;{]\s*$/, '').trimEnd();
|
|
850
|
+
return sig;
|
|
851
|
+
}
|
|
852
|
+
// Variable signature = `<storage/quals> <type> <declarator-core>`, dropping the
|
|
853
|
+
// `= initializer`. Built from the type + declarator so a multi-declarator
|
|
854
|
+
// `int a = 1, b` yields a clean "int a" / "int b" per symbol. When the type slot
|
|
855
|
+
// is an inline record/enum (`struct Named {…} g;`), use the type's NAME, not its
|
|
856
|
+
// whole `{…}` body (which would bloat the signature + the hashed id).
|
|
857
|
+
function variableSig(decl, declarator) {
|
|
858
|
+
const parts = [];
|
|
859
|
+
for (const c of decl.namedChildren) {
|
|
860
|
+
if (c.type === 'storage_class_specifier' || c.type === 'type_qualifier')
|
|
861
|
+
parts.push(c.text);
|
|
862
|
+
}
|
|
863
|
+
const typeNode = decl.childForFieldName('type');
|
|
864
|
+
if (typeNode) {
|
|
865
|
+
const isRecordOrEnum = RECORD_SPECIFIERS.has(typeNode.type) || typeNode.type === 'enum_specifier';
|
|
866
|
+
const nameNode = isRecordOrEnum ? typeNode.childForFieldName('name') : null;
|
|
867
|
+
parts.push(isRecordOrEnum ? (nameNode ? nameNode.text : '(anonymous)') : typeNode.text);
|
|
868
|
+
}
|
|
869
|
+
parts.push(declaratorCore(declarator));
|
|
870
|
+
return normalizeSignature(parts.join(' '));
|
|
871
|
+
}
|
|
872
|
+
// The declarator text without a trailing `= initializer` (so the signature/hash
|
|
873
|
+
// stay stable across initializer edits).
|
|
874
|
+
function declaratorCore(d) {
|
|
875
|
+
if (d.type === 'init_declarator') {
|
|
876
|
+
const inner = d.childForFieldName('declarator');
|
|
877
|
+
return inner ? inner.text : d.text;
|
|
878
|
+
}
|
|
879
|
+
return d.text;
|
|
880
|
+
}
|
|
881
|
+
// The initializer expression of a variable declarator, if any (for call
|
|
882
|
+
// attribution). `init_declarator` carries it in `value:`; an in-class field
|
|
883
|
+
// carries it as the declaration's `default_value:` (handled by the caller).
|
|
884
|
+
function initializerNode(d) {
|
|
885
|
+
if (d.type === 'init_declarator')
|
|
886
|
+
return d.childForFieldName('value');
|
|
887
|
+
return null;
|
|
888
|
+
}
|
|
889
|
+
function topFqn(ctx, name) {
|
|
890
|
+
return `${ctx.fileInfo.path}:${name}`;
|
|
891
|
+
}
|
|
892
|
+
function memberFqn(ctx, className, name) {
|
|
893
|
+
return `${ctx.fileInfo.path}:${className}.${name}`;
|
|
894
|
+
}
|
|
895
|
+
// The qualifier only disambiguates hashed ids — any unique join works.
|
|
896
|
+
function joinQualifier(a, b) {
|
|
897
|
+
if (!a)
|
|
898
|
+
return b;
|
|
899
|
+
if (!b)
|
|
900
|
+
return a;
|
|
901
|
+
return `${a}::${b}`;
|
|
902
|
+
}
|
|
903
|
+
function makeCppSymbol(ctx, node, signature, kind, name, fqn, exported, doc, qualifier = '') {
|
|
904
|
+
const key = `${name}\0${kind}\0${signature}\0${qualifier}`;
|
|
905
|
+
const n = (ctx.occurrences.get(key) ?? 0) + 1;
|
|
906
|
+
ctx.occurrences.set(key, n);
|
|
907
|
+
const effectiveQualifier = n === 1 ? qualifier : `${qualifier}#${n}`;
|
|
908
|
+
return {
|
|
909
|
+
id: symbolId(ctx.fileInfo.path, name, kind, signature, effectiveQualifier),
|
|
910
|
+
name,
|
|
911
|
+
fqn,
|
|
912
|
+
kind,
|
|
913
|
+
file: ctx.fileInfo.path,
|
|
914
|
+
startLine: node.startPosition.row + 1,
|
|
915
|
+
endLine: node.endPosition.row + 1,
|
|
916
|
+
signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
|
|
917
|
+
doc,
|
|
918
|
+
exported,
|
|
919
|
+
language: ctx.fileInfo.language,
|
|
920
|
+
};
|
|
921
|
+
}
|
|
922
|
+
// ── shared with the Objective-C extractor ───────────────────────────────────
|
|
923
|
+
// Objective-C is a C SUPERSET: `objc.ts` reuses this module's C-subset machinery
|
|
924
|
+
// and implements only the OO surface (`@interface`/`@implementation`/`@protocol`/
|
|
925
|
+
// `@property`/methods/message sends) itself. For every NON-OO top-level node (C
|
|
926
|
+
// functions, structs, enums, typedefs, globals, `#import`) objc.ts delegates to the
|
|
927
|
+
// shared `handleMember` dispatcher at FILE SCOPE — so the static-linkage gate, inline
|
|
928
|
+
// `struct Named {…} g;` types, K&R signatures, and `#import` all behave identically to
|
|
929
|
+
// the C extractor with zero divergence. It also reuses `cppMemberCallInfo` (C-subset
|
|
930
|
+
// `p->fn()`/`Foo::bar()` calls), `analyze` (the `@property` name), the FQN + symbol
|
|
931
|
+
// constructors, the doc reader, and the skip sets (incl. the `preproc_*` group set, so
|
|
932
|
+
// `#ifndef` include guards are recursed transparently). These are pure module-scope
|
|
933
|
+
// helpers; re-exporting them is inert for the cpp/c extractors (re-dogfood confirms).
|
|
934
|
+
export { handleMember, analyze, cppDoc, cppMemberCallInfo, topFqn, memberFqn, makeCppSymbol, CPP_SKIP_TYPES, CPP_FUNCTION_BODY_SKIP_TYPES, PREPROC_GROUPS, };
|