codedeep-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +177 -0
- package/dist/config.js +223 -0
- package/dist/git/analyzer.js +177 -0
- package/dist/git/git-service.js +568 -0
- package/dist/git/head-watcher.js +113 -0
- package/dist/git/runner.js +204 -0
- package/dist/index.js +138 -0
- package/dist/indexer/code-index.js +1801 -0
- package/dist/indexer/complexity.js +633 -0
- package/dist/indexer/extractor.js +354 -0
- package/dist/indexer/languages/cpp.js +934 -0
- package/dist/indexer/languages/csharp.js +854 -0
- package/dist/indexer/languages/dart.js +777 -0
- package/dist/indexer/languages/go.js +665 -0
- package/dist/indexer/languages/java.js +507 -0
- package/dist/indexer/languages/kotlin.js +709 -0
- package/dist/indexer/languages/objc.js +397 -0
- package/dist/indexer/languages/php.js +771 -0
- package/dist/indexer/languages/python.js +455 -0
- package/dist/indexer/languages/ruby.js +697 -0
- package/dist/indexer/languages/rust.js +754 -0
- package/dist/indexer/languages/swift.js +691 -0
- package/dist/indexer/languages/typescript.js +485 -0
- package/dist/indexer/parser.js +175 -0
- package/dist/indexer/pipeline.js +342 -0
- package/dist/indexer/scanner.js +279 -0
- package/dist/indexer/watcher.js +353 -0
- package/dist/logger.js +16 -0
- package/dist/server.js +170 -0
- package/dist/tools/common.js +207 -0
- package/dist/tools/find-references.js +224 -0
- package/dist/tools/find-symbol.js +94 -0
- package/dist/tools/get-context.js +370 -0
- package/dist/tools/impact.js +218 -0
- package/dist/tools/overview.js +482 -0
- package/dist/tools/search-structure.js +303 -0
- package/dist/types.js +61 -0
- package/grammars/tree-sitter-c.wasm +0 -0
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-cpp.wasm +0 -0
- package/grammars/tree-sitter-dart.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-javascript.wasm +0 -0
- package/grammars/tree-sitter-kotlin.wasm +0 -0
- package/grammars/tree-sitter-objc.wasm +0 -0
- package/grammars/tree-sitter-php.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-ruby.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/grammars/tree-sitter-swift.wasm +0 -0
- package/grammars/tree-sitter-tsx.wasm +0 -0
- package/grammars/tree-sitter-typescript.wasm +0 -0
- package/package.json +67 -0
|
@@ -0,0 +1,754 @@
|
|
|
1
|
+
import { IMPORT_NAMESPACE, RECEIVER_OPAQUE } from '../../types.js';
|
|
2
|
+
import { SIGNATURE_DISPLAY_CAP, collectAmbiguousTypeNames, declSignature, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
|
|
3
|
+
import { cFamilyBooleanOperatorKind, computeComplexity, isCFamilyBooleanOperator, } from '../complexity.js';
|
|
4
|
+
// Nested `fn` items create their own scope — their calls must NOT attribute to
|
|
5
|
+
// an enclosing function, so they're pruned from the body walk (and aren't
|
|
6
|
+
// extracted as symbols, the "top-level + member only" rule). closure_expression
|
|
7
|
+
// is deliberately ABSENT: a closure can't be a symbol, so calls inside
|
|
8
|
+
// `it.map(|x| foo(x))` attribute to the enclosing fn (the Go func_literal / Java
|
|
9
|
+
// lambda rule). function_signature_item has no body, so it never reaches here.
|
|
10
|
+
const RUST_FUNCTION_BODY_SKIP_TYPES = new Set(['function_item']);
|
|
11
|
+
// Same set — Rust impl/trait/struct/enum/mod bodies are descended on the
|
|
12
|
+
// module-root walk (their methods are already PendingBodies, dropped by the
|
|
13
|
+
// seen-set; their const/static initializers attribute as module-level calls).
|
|
14
|
+
// ALSO the complexity boundary (cyclomatic DFS + cognitive walk): nested
|
|
15
|
+
// `function_item` is pruned (its control flow counts toward no symbol — the
|
|
16
|
+
// per-symbol model, like Java anon-classes / Python nested fns), while
|
|
17
|
+
// `closure_expression` is DESCENDED (absent here), so a closure's branches roll
|
|
18
|
+
// into the enclosing fn (matching rust-code-analysis, which merges the closure
|
|
19
|
+
// func-space upward).
|
|
20
|
+
const RUST_SKIP_TYPES = RUST_FUNCTION_BODY_SKIP_TYPES;
|
|
21
|
+
// Cyclomatic decision set — pinned to Mozilla's `rust-code-analysis` (the
|
|
22
|
+
// empirical `rust-code-analysis-cli` oracle), DELIBERATELY divergent from
|
|
23
|
+
// SonarSource's sonar-rust on two points the two analyzers disagree on: (1) the
|
|
24
|
+
// `?` try operator (`try_expression`) COUNTS — rust-code-analysis treats each `?`
|
|
25
|
+
// as a decision point (an implicit early-return-on-`Err`), sonar-rust does NOT;
|
|
26
|
+
// pinned to count it (it's the dominant control construct in Result-heavy Rust,
|
|
27
|
+
// and a McCabe-faithful branch — the Go precedent, where codedeep-mcp pinned gocyclo
|
|
28
|
+
// over sonar-go for counting select-cases sonar-go dropped). (2) EVERY `match_arm`
|
|
29
|
+
// counts (incl. the wildcard `_`; an or-pattern `A | B =>` is ONE arm), where
|
|
30
|
+
// sonar-rust filters empty-bodied arms. `if let`/`while let` are plain
|
|
31
|
+
// `if_expression`/`while_expression` (the `let` lives in a `let_condition`/
|
|
32
|
+
// `let_chain` in the `condition` field) so they're counted automatically;
|
|
33
|
+
// `else if` is a nested `if_expression`. `closure_expression` counts +1 (the
|
|
34
|
+
// func-space base rust-code-analysis merges upward) and is descended (its inner
|
|
35
|
+
// branches also count). `match_expression` is NOT here (only its arms count);
|
|
36
|
+
// `let … else` (`let_declaration` + `alternative` block) is NOT counted (neither
|
|
37
|
+
// analyzer does). `&&`/`||` count via the shared isCFamilyBooleanOperator (Rust
|
|
38
|
+
// has no `??`, so that branch is inert, as in Go). VERIFIED against
|
|
39
|
+
// rust-code-analysis-cli on ripgrep + serde.
|
|
40
|
+
const RUST_DECISION_NODE_TYPES = new Set([
|
|
41
|
+
'if_expression',
|
|
42
|
+
'while_expression',
|
|
43
|
+
'for_expression',
|
|
44
|
+
'loop_expression',
|
|
45
|
+
'match_arm',
|
|
46
|
+
'try_expression',
|
|
47
|
+
'closure_expression',
|
|
48
|
+
]);
|
|
49
|
+
// Cognitive-complexity config. The CYCLOMATIC side pins to rust-code-analysis;
|
|
50
|
+
// the COGNITIVE side is whitepaper/sonar-rust-aligned and DELIBERATELY does NOT
|
|
51
|
+
// replicate two rust-code-analysis BUGS the empirical oracle surfaced (both make
|
|
52
|
+
// rca's cognitive number indefensible, so codedeep-mcp stays whitepaper-correct — the
|
|
53
|
+
// SonarJS-ternary-bug precedent): (1) rca's Rust cognitive visitor OMITS
|
|
54
|
+
// `loop_expression` entirely (it counts `while`/`for` but a bare `loop {}` adds
|
|
55
|
+
// nothing and doesn't nest its body — an obvious omission, inconsistent with rca's
|
|
56
|
+
// own cyclomatic which DOES count `Loop`); codedeep-mcp counts all three loops. (2) rca's
|
|
57
|
+
// boolean handling carries its run-state across the whole function (reset only on a
|
|
58
|
+
// nesting bump, unlike rca's own Python impl which resets per clause), so it both
|
|
59
|
+
// under-counts (merged else-if conditions: `if (c||d){} else if (e||f){}` scores 3
|
|
60
|
+
// vs codedeep-mcp's 4) and over-counts (`a && b && c || d || e` scores 3 vs codedeep-mcp's 2,
|
|
61
|
+
// worse with `!`/longer chains);
|
|
62
|
+
// codedeep-mcp counts per maximal same-kind run per expression = 2 (the whitepaper rule).
|
|
63
|
+
// Where rca is NOT buggy the two agree. Also: `?` is NOT counted
|
|
64
|
+
// cognitively (both analyzers agree — unlike cyclomatic), recursion is NOT counted,
|
|
65
|
+
// a whole `match` is +1 with arms nesting (the cyc/cog divergence), closures raise
|
|
66
|
+
// nesting (+0, the lambda rule) and roll into the enclosing fn (descended), and the
|
|
67
|
+
// plain-`else` body NESTS (the whitepaper/sonar default — rca agrees: its `Else`
|
|
68
|
+
// node inherits the if's bumped nesting via the nesting_map). VERIFIED against
|
|
69
|
+
// rust-code-analysis-cli on ripgrep + serde: every divergence decomposes into the
|
|
70
|
+
// two rca bugs above, `let … else` (rca's grammar parses its `else` as a counted
|
|
71
|
+
// node, codedeep-mcp's as a plain block — documented gap), macro-internal control flow
|
|
72
|
+
// (codedeep-mcp's grammar treats macro token-trees as opaque — documented), and nested
|
|
73
|
+
// fn/impl bodies (the per-symbol model, like Java anon-classes).
|
|
74
|
+
const RUST_COGNITIVE_OPTIONS = {
|
|
75
|
+
ifType: 'if_expression',
|
|
76
|
+
conditionField: 'condition',
|
|
77
|
+
consequenceField: 'consequence',
|
|
78
|
+
alternativeField: 'alternative',
|
|
79
|
+
// tree-sitter-rust wraps else/else-if in an `else_clause` (the TS shape, NOT
|
|
80
|
+
// Java's direct alternative) — unwrap it to the inner if_expression/block.
|
|
81
|
+
// nestElseBody is left unset (true): the plain-`else` body nests one level (the
|
|
82
|
+
// sonar/whitepaper default; rca matches via its Else-inherits-bumped-nesting).
|
|
83
|
+
elseClauseType: 'else_clause',
|
|
84
|
+
loopTypes: new Set(['loop_expression', 'while_expression', 'for_expression']),
|
|
85
|
+
// loopBodyField UNSET → bump-ALL children (matches rust-code-analysis, which
|
|
86
|
+
// raises nesting for the whole loop subtree; keeps the accepted loop-header
|
|
87
|
+
// overbump shared with Java/TS/Go, not Python's body-only nesting).
|
|
88
|
+
switchTypes: new Set(['match_expression']),
|
|
89
|
+
// Rust has no ternary (`if` is an expression) nor try/catch (`?` is separate
|
|
90
|
+
// and uncounted cognitively) — sentinels that never match a real node.
|
|
91
|
+
ternaryType: '__rust_no_ternary__',
|
|
92
|
+
catchType: '__rust_no_catch__',
|
|
93
|
+
// Closures raise nesting and roll their control flow into the enclosing fn
|
|
94
|
+
// (+0 themselves; the cyclomatic side counts them +1 and descends them too).
|
|
95
|
+
nestOnlyTypes: new Set(['closure_expression']),
|
|
96
|
+
// `break 'outer` / `continue 'outer` carry a `label` NAMED CHILD (Rust break/
|
|
97
|
+
// continue are expressions); unlabeled ones add nothing.
|
|
98
|
+
labeledJumpTypes: new Set(['break_expression', 'continue_expression']),
|
|
99
|
+
hasLabel: (n) => n.namedChildren.some((c) => c?.type === 'label'),
|
|
100
|
+
// `&&`/`||` via the shared C-family reader (Rust has no `??`). booleanRunStarts
|
|
101
|
+
// unset → the default (+1 at every operator-kind change) matches both analyzers.
|
|
102
|
+
booleanOperatorKind: cFamilyBooleanOperatorKind,
|
|
103
|
+
// Unwrap parens while linearizing a boolean run — rust-code-analysis does NOT
|
|
104
|
+
// stop a run at a parenthesized expression (unlike Go's sentinel).
|
|
105
|
+
parenthesizedType: 'parenthesized_expression',
|
|
106
|
+
// `let … else` (`let_declaration` with an `alternative` block) adds +1 FLAT —
|
|
107
|
+
// the irrefutable-binding analog of `if let … else` (which IS counted), so
|
|
108
|
+
// counting it is the whitepaper-correct choice (and matches rust-code-analysis,
|
|
109
|
+
// whose grammar parses the `else` as a counted node — one of the places rca's
|
|
110
|
+
// cognitive is NOT buggy). CYCLOMATIC does not count it (neither analyzer does;
|
|
111
|
+
// a let-else introduces no If/loop/&&/|| node of its own).
|
|
112
|
+
flatIncrement: (n) => n.type === 'let_declaration' && n.childForFieldName('alternative') !== null,
|
|
113
|
+
// No initField (Rust's `if` has no init clause — the let-chain is in `condition`)
|
|
114
|
+
// and no recursion (rust-code-analysis does not count direct recursion).
|
|
115
|
+
};
|
|
116
|
+
// Cyclomatic extra-decision predicate (the engine's `extraDecisionPredicate` slot,
|
|
117
|
+
// as Java reuses it for switch labels). Counts `&&`/`||` via the
|
|
118
|
+
// shared C-family reader, PLUS a MATCH-ARM GUARD (`pat if cond => …`). The guard
|
|
119
|
+
// is a decision point rust-code-analysis counts (its grammar yields an `If` for
|
|
120
|
+
// the guard), but it has no dedicated node type here — it's a `condition` field
|
|
121
|
+
// on `match_pattern` (`match_arm → match_pattern{pattern, condition: <expr>}`), so
|
|
122
|
+
// a flat node-type set can't catch it without also counting every unguarded arm.
|
|
123
|
+
// CYCLOMATIC ONLY: rust-code-analysis does NOT count the guard cognitively (only
|
|
124
|
+
// its inner booleans, which the cognitive walk already descends). The guard's own
|
|
125
|
+
// `&&`/`||` are still counted separately by the C-family reader (so `A if c && d`
|
|
126
|
+
// is +2: the guard +1 and the `&&` +1 — matching rca).
|
|
127
|
+
//
|
|
128
|
+
// KNOWN RECALL GAP (safe under-count, grammar-driven, like macro-opacity): the
|
|
129
|
+
// `&&`/`||` that join an `if let`/`while let` LET-CHAIN (`if let Some(x) = o && x > 0`)
|
|
130
|
+
// are ANONYMOUS tokens inside the `let_chain` node, NOT `binary_expression` nodes, so
|
|
131
|
+
// neither this predicate nor the DFS (which walks named children) sees them — codedeep-mcp
|
|
132
|
+
// under-counts such a chain by its `&&`/`||` count where rust-code-analysis (whose
|
|
133
|
+
// grammar exposes them) counts each. The `if`/`while` itself is still counted, and
|
|
134
|
+
// the cognitive side agrees (rca's cognitive boolean reader also misses let-chain
|
|
135
|
+
// `&&`). Never an over-count; counting them would need anonymous-token machinery for
|
|
136
|
+
// a low-frequency pattern.
|
|
137
|
+
function rustCyclomaticExtra(node) {
|
|
138
|
+
if (isCFamilyBooleanOperator(node))
|
|
139
|
+
return true;
|
|
140
|
+
return node.type === 'match_pattern' && node.childForFieldName('condition') !== null;
|
|
141
|
+
}
|
|
142
|
+
// `struct_expression`'s callee is a type_identifier (`Point { .. }`), never a
|
|
143
|
+
// plain identifier — without this, every brace-construction ref is dropped.
|
|
144
|
+
const RUST_BARE_CALLEE_TYPES = new Set(['identifier', 'type_identifier']);
|
|
145
|
+
// A bare `foo()` binds to free functions only. Tuple-struct / enum-variant
|
|
146
|
+
// constructors (`Tuple(1, 2)`, `Variant(x)`) parse as identical call_expressions
|
|
147
|
+
// with identifier callees, so struct/enum kinds stay out — a constructor is
|
|
148
|
+
// emitted as an unresolved name-keyed ref, never a confidently-wrong edge (the
|
|
149
|
+
// Go type-conversion rule).
|
|
150
|
+
const RUST_BARE_CALLABLE_KINDS = new Set(['function']);
|
|
151
|
+
// Brace construction `Point { .. }` binds to structs/unions (both 'class').
|
|
152
|
+
// Enums can't be brace-constructed (only their variants, via a scoped name that
|
|
153
|
+
// reaches memberCallInfo); traits/type-aliases never can.
|
|
154
|
+
const RUST_CONSTRUCTOR_KINDS = new Set(['class']);
|
|
155
|
+
// Kinds sharing the simple-name FQN namespace — duplicates among these are
|
|
156
|
+
// excluded from extract-time resolution. (struct/union→class, trait→interface,
|
|
157
|
+
// enum→enum, type alias→type.)
|
|
158
|
+
const RUST_TYPE_KINDS = new Set(['class', 'interface', 'enum', 'type']);
|
|
159
|
+
// Prelude enum-variant constructors used as bare calls everywhere
|
|
160
|
+
// (`Ok(x)`, `Some(v)`, `Err(e)`). They parse as call_expression with an
|
|
161
|
+
// identifier callee and would otherwise flood the name-keyed reference store —
|
|
162
|
+
// the Rust analog of Go's builtins. Suppressed ONLY when unresolved (a file-
|
|
163
|
+
// local function shadowing the name still keeps its refs). Extend after
|
|
164
|
+
// measuring on real repos.
|
|
165
|
+
const RUST_IGNORED_BARE_CALLEES = new Set(['Some', 'Ok', 'Err']);
|
|
166
|
+
const RUST_SELECTORS = [
|
|
167
|
+
{ nodeType: 'call_expression', getCallee: (n) => n.childForFieldName('function') },
|
|
168
|
+
{ nodeType: 'struct_expression', getCallee: structExpressionCallee },
|
|
169
|
+
];
|
|
170
|
+
// `Point { .. }` → type_identifier (bare constructor-form, binds via
|
|
171
|
+
// constructorKinds); `Shape::Circle { .. }` / `mod::Type { .. }` →
|
|
172
|
+
// scoped_type_identifier (member path, handled by rustMemberCallInfo).
|
|
173
|
+
// `Self { .. }` is suppressed: as a bare type_identifier it would resolve
|
|
174
|
+
// against a (non-existent) symbol literally named `Self` and flood the ref
|
|
175
|
+
// store with junk `Self` targets — ignoredBareCallees can't catch it (that
|
|
176
|
+
// gate is identifier-only). Self-construction edges aren't worth the noise.
|
|
177
|
+
function structExpressionCallee(node) {
|
|
178
|
+
let name = node.childForFieldName('name');
|
|
179
|
+
// `Pair::<i32> { .. }` — turbofish wraps the real type in its `type` field
|
|
180
|
+
// (type_identifier for `Pair`, scoped_identifier for `m::Pair`).
|
|
181
|
+
if (name?.type === 'generic_type_with_turbofish')
|
|
182
|
+
name = name.childForFieldName('type');
|
|
183
|
+
if (!name)
|
|
184
|
+
return null;
|
|
185
|
+
if (name.type === 'type_identifier')
|
|
186
|
+
return name.text === 'Self' ? null : name;
|
|
187
|
+
// scoped_type_identifier from `Enum::Variant { .. }`; scoped_identifier from
|
|
188
|
+
// a turbofish-unwrapped `m::Pair::<T>` — both reach rustMemberCallInfo.
|
|
189
|
+
if (name.type === 'scoped_type_identifier' || name.type === 'scoped_identifier')
|
|
190
|
+
return name;
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
// Reduces a `.`/`::` call callee to {receiver, property}. Mirrors TS/Python/
|
|
194
|
+
// Java/Go for the `.` form; the `::` form additionally captures MULTI-segment
|
|
195
|
+
// paths so fully-qualified calls aren't dropped (the cross-file recall gap).
|
|
196
|
+
// - field_expression: `self.x()` (value is the fixed `self` token → isSelf),
|
|
197
|
+
// `obj.x()` (identifier receiver). Chained/computed receivers (`a.b().c()`,
|
|
198
|
+
// `obj.inner.m()`) → RECEIVER_OPAQUE (findable by method name, never resolved).
|
|
199
|
+
// - scoped_identifier / scoped_type_identifier (the `::` path form):
|
|
200
|
+
// * single-segment `foo::bar()` / `Type::assoc()` / `Enum::Variant {}` —
|
|
201
|
+
// `Self::x()` → isSelf (resolve against the enclosing impl type);
|
|
202
|
+
// * multi-segment `crate::defs::f()` / `std::mem::swap()` / `a::b::c::d()` —
|
|
203
|
+
// the IMMEDIATE qualifier (the path's own last name) is the receiver, so
|
|
204
|
+
// the ref takes the same shape as `qualifier::name()` and resolves
|
|
205
|
+
// cross-file through the existing member-ref machinery. External paths
|
|
206
|
+
// (std::…) stay unresolved name-keyed refs, like any cross-file member;
|
|
207
|
+
// * root-relative `crate::f()` / `super::f()` / `self::f()` — the root
|
|
208
|
+
// keyword is the only receiver token available.
|
|
209
|
+
// Rust needs no PendingBody.selfReceiverName (Go's mechanism): `self` is a
|
|
210
|
+
// fixed token and `Self` is a fixed identifier, decided here like Python.
|
|
211
|
+
//
|
|
212
|
+
// Dominant Rust stdlib/iterator/Option/Result/string method names (>=4 chars)
|
|
213
|
+
// suppressed when a member call to them is unresolved — capturing chained
|
|
214
|
+
// `.iter().map().collect()` calls otherwise floods the name-keyed store. Domain
|
|
215
|
+
// method names are deliberately absent. <=3-char names (`.len`, `.get`) are
|
|
216
|
+
// gated downstream by SHORT_NAME_THRESHOLD.
|
|
217
|
+
//
|
|
218
|
+
// Composition trimmed after a ripgrep dogfood measured flood-vs-recall PER name
|
|
219
|
+
// (member-call sites vs in-repo `pub fn` defs). The kept names are
|
|
220
|
+
// canonical-by-usage: even where ripgrep also defines one, ~0–28% of `.name()`
|
|
221
|
+
// sites target it, so capturing would inject mostly-FALSE weak-include callers
|
|
222
|
+
// onto a boilerplate/look-alike method (the precision-over-recall stance that
|
|
223
|
+
// already drops external multi-segment path calls). Notably `parse` stays —
|
|
224
|
+
// its in-repo defs are private/free-fns and every `.parse()` site is stdlib
|
|
225
|
+
// `str::parse`, so trimming it surfaces ZERO real callers. `bytes`/`remove` were
|
|
226
|
+
// REMOVED (now captured): distinctive domain methods (Sink/printer accessors;
|
|
227
|
+
// ByteSet/Dir mutators) with real in-repo recall stake and ~0 stdlib false-include.
|
|
228
|
+
const RUST_IGNORED_MEMBER_CALLEES = new Set([
|
|
229
|
+
'unwrap', 'expect', 'unwrap_or', 'unwrap_or_else', 'unwrap_or_default',
|
|
230
|
+
'clone', 'into', 'into_iter', 'iter', 'iter_mut', 'collect',
|
|
231
|
+
'map_err', 'filter', 'filter_map', 'flat_map', 'fold', 'reduce',
|
|
232
|
+
'for_each', 'count', 'take', 'skip', 'chain', 'enumerate',
|
|
233
|
+
'next', 'peekable', 'cloned', 'copied', 'as_ref', 'as_mut', 'as_str',
|
|
234
|
+
'as_slice', 'to_string', 'to_owned', 'to_vec', 'borrow', 'borrow_mut',
|
|
235
|
+
'push', 'insert', 'contains', 'contains_key',
|
|
236
|
+
'is_empty', 'is_some', 'is_none', 'is_ok', 'is_err', 'ok_or', 'and_then',
|
|
237
|
+
'or_else', 'ok_or_else', 'trim', 'split', 'splitn', 'replace', 'starts_with',
|
|
238
|
+
'ends_with', 'parse', 'lock', 'read', 'write', 'lines', 'chars',
|
|
239
|
+
]);
|
|
240
|
+
function rustMemberCallInfo(callee) {
|
|
241
|
+
if (callee.type === 'field_expression') {
|
|
242
|
+
const value = callee.childForFieldName('value');
|
|
243
|
+
const field = callee.childForFieldName('field');
|
|
244
|
+
if (!value || field?.type !== 'field_identifier')
|
|
245
|
+
return null;
|
|
246
|
+
if (value.type === 'self')
|
|
247
|
+
return { receiver: 'self', property: field.text, isSelf: true };
|
|
248
|
+
if (value.type === 'identifier') {
|
|
249
|
+
return { receiver: value.text, property: field.text, isSelf: false };
|
|
250
|
+
}
|
|
251
|
+
// Chained/computed `.` receiver (`obj.inner.method()`, `f().g()`) → opaque:
|
|
252
|
+
// findable by the called method name (recall) but never resolved.
|
|
253
|
+
return { receiver: RECEIVER_OPAQUE, property: field.text, isSelf: false };
|
|
254
|
+
}
|
|
255
|
+
if (callee.type === 'scoped_identifier' || callee.type === 'scoped_type_identifier') {
|
|
256
|
+
const name = callee.childForFieldName('name');
|
|
257
|
+
const path = callee.childForFieldName('path');
|
|
258
|
+
if (!name || !path)
|
|
259
|
+
return null;
|
|
260
|
+
// `::` path calls are pathQualified: a small intra-crate population (not the
|
|
261
|
+
// dot-method flood), so emit() exempts them from RUST_IGNORED_MEMBER_CALLEES
|
|
262
|
+
// — `crate::cfg::parse()` to an in-repo `fn parse` stays findable even though
|
|
263
|
+
// `.parse()` method calls are suppressed.
|
|
264
|
+
if (path.type === 'identifier') {
|
|
265
|
+
if (path.text === 'Self')
|
|
266
|
+
return { receiver: 'Self', property: name.text, isSelf: true, pathQualified: true };
|
|
267
|
+
return { receiver: path.text, property: name.text, isSelf: false, pathQualified: true };
|
|
268
|
+
}
|
|
269
|
+
// Multi-segment path (`A::B::name()`). Emit ONLY when the path is rooted at
|
|
270
|
+
// crate/self/super — those are reliably INTRA-crate, so the immediate
|
|
271
|
+
// qualifier (path's own last name) as receiver resolves to a same-crate
|
|
272
|
+
// symbol cross-file. Paths rooted at an external/workspace crate name
|
|
273
|
+
// (`std::io::stdout()`, `grep::…`) are dropped: their final segment
|
|
274
|
+
// routinely collides with a same-named in-repo member, and the weak
|
|
275
|
+
// member-include can't see the receiver, so capturing them injects FALSE
|
|
276
|
+
// cross-file callers (measured on ripgrep: 217/230 multi-segment calls are
|
|
277
|
+
// external, and std::io::stdout() was being attributed to a local `stdout`).
|
|
278
|
+
if (path.type === 'scoped_identifier' || path.type === 'scoped_type_identifier') {
|
|
279
|
+
if (!isCrateRooted(path))
|
|
280
|
+
return null;
|
|
281
|
+
const qualifier = path.childForFieldName('name');
|
|
282
|
+
if (!qualifier)
|
|
283
|
+
return null;
|
|
284
|
+
return { receiver: qualifier.text, property: name.text, isSelf: false, pathQualified: true };
|
|
285
|
+
}
|
|
286
|
+
// Two-segment root-relative path (`crate::f()`, `super::f()`, `self::f()`):
|
|
287
|
+
// the path IS the keyword node — intra-crate, the keyword is the receiver.
|
|
288
|
+
if (path.type === 'crate' || path.type === 'super' || path.type === 'self') {
|
|
289
|
+
return { receiver: path.text, property: name.text, isSelf: false, pathQualified: true };
|
|
290
|
+
}
|
|
291
|
+
return null;
|
|
292
|
+
}
|
|
293
|
+
return null;
|
|
294
|
+
}
|
|
295
|
+
// True when a multi-segment path's deepest root is the crate/self/super keyword
|
|
296
|
+
// (reliably the current crate), as opposed to an external/workspace crate name.
|
|
297
|
+
function isCrateRooted(scoped) {
|
|
298
|
+
let node = scoped;
|
|
299
|
+
while (node && (node.type === 'scoped_identifier' || node.type === 'scoped_type_identifier')) {
|
|
300
|
+
node = node.childForFieldName('path');
|
|
301
|
+
}
|
|
302
|
+
return node !== null && (node.type === 'crate' || node.type === 'super' || node.type === 'self');
|
|
303
|
+
}
|
|
304
|
+
export function extractRust(tree, content, fileInfo) {
|
|
305
|
+
const symbols = [];
|
|
306
|
+
const imports = [];
|
|
307
|
+
const bodies = [];
|
|
308
|
+
const occurrences = new Map();
|
|
309
|
+
extractItems(tree.rootNode.namedChildren, content, fileInfo, '', true, occurrences, symbols, imports, bodies);
|
|
310
|
+
// Same-name types in one file are invalid Rust, so this only fires on broken
|
|
311
|
+
// parses — where refusing resolution beats binding through a half-parsed type.
|
|
312
|
+
const ambiguousTypeNames = collectAmbiguousTypeNames(symbols, RUST_TYPE_KINDS);
|
|
313
|
+
const references = resolveCalls(bodies, tree.rootNode, symbols, fileInfo, RUST_SELECTORS, RUST_SKIP_TYPES, RUST_FUNCTION_BODY_SKIP_TYPES, rustMemberCallInfo, {
|
|
314
|
+
bareCalleeTypes: RUST_BARE_CALLEE_TYPES,
|
|
315
|
+
// A bare `foo()` in a method body is a free-function call — Rust has no
|
|
316
|
+
// implicit `self` receiver (the Go rule, opposite of Java).
|
|
317
|
+
bareCallsBindToEnclosingClass: false,
|
|
318
|
+
bareCallableKinds: RUST_BARE_CALLABLE_KINDS,
|
|
319
|
+
constructorKinds: RUST_CONSTRUCTOR_KINDS,
|
|
320
|
+
ambiguousClassNames: ambiguousTypeNames,
|
|
321
|
+
ignoredBareCallees: RUST_IGNORED_BARE_CALLEES,
|
|
322
|
+
ignoredMemberCallees: RUST_IGNORED_MEMBER_CALLEES,
|
|
323
|
+
});
|
|
324
|
+
computeComplexity(bodies, symbols, {
|
|
325
|
+
decisionNodeTypes: RUST_DECISION_NODE_TYPES,
|
|
326
|
+
extraDecisionPredicate: rustCyclomaticExtra,
|
|
327
|
+
skipTypes: RUST_SKIP_TYPES,
|
|
328
|
+
cognitive: RUST_COGNITIVE_OPTIONS,
|
|
329
|
+
});
|
|
330
|
+
return { symbols, references, imports };
|
|
331
|
+
}
|
|
332
|
+
// Walks a list of items (source_file children, or a mod/foreign-mod body) and
|
|
333
|
+
// dispatches by node type. `modulePath` is the `::`-joined enclosing module
|
|
334
|
+
// chain (folded into hashed qualifiers for id uniqueness, never into the FQN).
|
|
335
|
+
// `containerExported` is false inside a private module, so its `pub` items
|
|
336
|
+
// aren't reachable from outside and stay exported=false.
|
|
337
|
+
function extractItems(children, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outImports, outBodies) {
|
|
338
|
+
for (const child of children) {
|
|
339
|
+
switch (child.type) {
|
|
340
|
+
case 'use_declaration':
|
|
341
|
+
extractUse(child, fileInfo, outImports);
|
|
342
|
+
break;
|
|
343
|
+
// function_signature_item appears here only inside an extern block
|
|
344
|
+
// (transparent — see foreign_mod_item); both forms are top-level fns.
|
|
345
|
+
case 'function_item':
|
|
346
|
+
case 'function_signature_item':
|
|
347
|
+
extractFunction(child, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outBodies);
|
|
348
|
+
break;
|
|
349
|
+
case 'const_item':
|
|
350
|
+
case 'static_item':
|
|
351
|
+
extractConstStatic(child, content, fileInfo, modulePath, containerExported, occurrences, outSymbols);
|
|
352
|
+
break;
|
|
353
|
+
case 'struct_item':
|
|
354
|
+
case 'union_item':
|
|
355
|
+
extractStructLike(child, content, fileInfo, modulePath, containerExported, occurrences, outSymbols);
|
|
356
|
+
break;
|
|
357
|
+
case 'enum_item':
|
|
358
|
+
extractSimpleType(child, content, fileInfo, 'enum', modulePath, containerExported, occurrences, outSymbols);
|
|
359
|
+
break;
|
|
360
|
+
case 'type_item':
|
|
361
|
+
extractSimpleType(child, content, fileInfo, 'type', modulePath, containerExported, occurrences, outSymbols);
|
|
362
|
+
break;
|
|
363
|
+
case 'trait_item':
|
|
364
|
+
extractTrait(child, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outBodies);
|
|
365
|
+
break;
|
|
366
|
+
case 'impl_item':
|
|
367
|
+
extractImpl(child, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outBodies);
|
|
368
|
+
break;
|
|
369
|
+
case 'mod_item':
|
|
370
|
+
extractMod(child, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outImports, outBodies);
|
|
371
|
+
break;
|
|
372
|
+
case 'foreign_mod_item': {
|
|
373
|
+
// `extern "C" { .. }` is transparent: its fns/statics live in the
|
|
374
|
+
// enclosing module's namespace, so recurse with the same path.
|
|
375
|
+
const body = child.childForFieldName('body');
|
|
376
|
+
if (body) {
|
|
377
|
+
extractItems(body.namedChildren, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outImports, outBodies);
|
|
378
|
+
}
|
|
379
|
+
break;
|
|
380
|
+
}
|
|
381
|
+
case 'macro_definition':
|
|
382
|
+
extractMacro(child, content, fileInfo, modulePath, occurrences, outSymbols);
|
|
383
|
+
break;
|
|
384
|
+
// attribute_item, line_comment, inner attributes, etc. — no symbols.
|
|
385
|
+
default:
|
|
386
|
+
break;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
function extractFunction(decl, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outBodies) {
|
|
391
|
+
const name = decl.childForFieldName('name')?.text;
|
|
392
|
+
if (!name)
|
|
393
|
+
return;
|
|
394
|
+
// `fn main` at the crate root is the entry point even without `pub`.
|
|
395
|
+
const exported = (containerExported && hasPubVisibility(decl)) || (modulePath === '' && name === 'main');
|
|
396
|
+
const sym = makeRustSymbol(decl, declSignature(decl, content), fileInfo, 'function', name, `${fileInfo.path}:${name}`, exported, rustDoc(decl), occurrences, modulePath);
|
|
397
|
+
outSymbols.push(sym);
|
|
398
|
+
// function_signature_item (extern/trait-required) is bodiless — symbol only.
|
|
399
|
+
const body = decl.childForFieldName('body');
|
|
400
|
+
if (body)
|
|
401
|
+
outBodies.push({ symbolId: sym.id, body });
|
|
402
|
+
}
|
|
403
|
+
function extractConstStatic(decl, content, fileInfo, modulePath, containerExported, occurrences, outSymbols) {
|
|
404
|
+
const name = decl.childForFieldName('name')?.text;
|
|
405
|
+
if (!name)
|
|
406
|
+
return;
|
|
407
|
+
outSymbols.push(makeRustSymbol(decl, declSignature(decl, content), fileInfo, 'variable', name, `${fileInfo.path}:${name}`, containerExported && hasPubVisibility(decl), rustDoc(decl), occurrences, modulePath));
|
|
408
|
+
}
|
|
409
|
+
// struct / union → 'class'; fields (named structs/unions only) → 'variable'
|
|
410
|
+
// members. Tuple structs (ordered_field_declaration_list) and unit structs
|
|
411
|
+
// have no named members.
|
|
412
|
+
function extractStructLike(decl, content, fileInfo, modulePath, containerExported, occurrences, outSymbols) {
|
|
413
|
+
const name = decl.childForFieldName('name')?.text;
|
|
414
|
+
if (!name)
|
|
415
|
+
return;
|
|
416
|
+
const exported = containerExported && hasPubVisibility(decl);
|
|
417
|
+
outSymbols.push(makeRustSymbol(decl, declSignature(decl, content), fileInfo, 'class', name, `${fileInfo.path}:${name}`, exported, rustDoc(decl), occurrences, modulePath));
|
|
418
|
+
const body = decl.childForFieldName('body');
|
|
419
|
+
if (body?.type !== 'field_declaration_list')
|
|
420
|
+
return;
|
|
421
|
+
for (const field of body.namedChildren) {
|
|
422
|
+
if (field.type !== 'field_declaration')
|
|
423
|
+
continue;
|
|
424
|
+
const fieldName = field.childForFieldName('name')?.text;
|
|
425
|
+
if (!fieldName)
|
|
426
|
+
continue;
|
|
427
|
+
outSymbols.push(makeRustSymbol(field, normalizeSignature(field.text), fileInfo, 'variable', fieldName, `${fileInfo.path}:${name}.${fieldName}`, exported && hasPubVisibility(field), rustDoc(field), occurrences, joinQualifier(modulePath, name)));
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
// enum / type-alias: a single declaration-only symbol (enum variants are not
|
|
431
|
+
// extracted — the TS/Java/Go enum-member rule).
|
|
432
|
+
function extractSimpleType(decl, content, fileInfo, kind, modulePath, containerExported, occurrences, outSymbols) {
|
|
433
|
+
const name = decl.childForFieldName('name')?.text;
|
|
434
|
+
if (!name)
|
|
435
|
+
return;
|
|
436
|
+
outSymbols.push(makeRustSymbol(decl, declSignature(decl, content), fileInfo, kind, name, `${fileInfo.path}:${name}`, containerExported && hasPubVisibility(decl), rustDoc(decl), occurrences, modulePath));
|
|
437
|
+
}
|
|
438
|
+
// trait → 'interface'; its body members are declaration-only (or default-bodied)
|
|
439
|
+
// methods + associated consts/types. Trait items carry no visibility modifier —
|
|
440
|
+
// they inherit the trait's visibility.
|
|
441
|
+
function extractTrait(decl, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outBodies) {
|
|
442
|
+
const name = decl.childForFieldName('name')?.text;
|
|
443
|
+
if (!name)
|
|
444
|
+
return;
|
|
445
|
+
const traitExported = containerExported && hasPubVisibility(decl);
|
|
446
|
+
outSymbols.push(makeRustSymbol(decl, declSignature(decl, content), fileInfo, 'interface', name, `${fileInfo.path}:${name}`, traitExported, rustDoc(decl), occurrences, modulePath));
|
|
447
|
+
const body = decl.childForFieldName('body');
|
|
448
|
+
if (body?.type !== 'declaration_list')
|
|
449
|
+
return;
|
|
450
|
+
extractMembers(body, content, fileInfo, name, modulePath, () => traitExported, occurrences, outSymbols, outBodies);
|
|
451
|
+
}
|
|
452
|
+
// impl block: not a symbol itself. Its methods become `file:ImplType.method`
|
|
453
|
+
// (keyed on the IMPLEMENTING type, not the trait — `impl Drawable for Point`
|
|
454
|
+
// gives `Point.draw`, so `self.draw()` resolves against Point). Members carry
|
|
455
|
+
// their own `pub` (inherent `pub fn`); trait-impl conformance methods have no
|
|
456
|
+
// `pub` and stay exported=false (discoverable via the type, not the API surface).
|
|
457
|
+
function extractImpl(decl, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outBodies) {
|
|
458
|
+
const typeName = implTypeName(decl);
|
|
459
|
+
if (!typeName)
|
|
460
|
+
return; // non-nominal impl target (&T, (A,B), dyn Trait, [T]) — skip its methods.
|
|
461
|
+
const body = decl.childForFieldName('body');
|
|
462
|
+
if (body?.type !== 'declaration_list')
|
|
463
|
+
return;
|
|
464
|
+
extractMembers(body, content, fileInfo, typeName, modulePath, (member) => containerExported && hasPubVisibility(member), occurrences, outSymbols, outBodies);
|
|
465
|
+
}
|
|
466
|
+
// Shared trait/impl body extraction. `memberExported` decides exportedness per
|
|
467
|
+
// member (traits: constant; impls: own-pub). Methods key into methodsByClass
|
|
468
|
+
// under `className`; their bodies (default trait methods, impl methods) become
|
|
469
|
+
// PendingBodies with className set so self-calls resolve.
|
|
470
|
+
function extractMembers(body, content, fileInfo, className, modulePath, memberExported, occurrences, outSymbols, outBodies) {
|
|
471
|
+
const qualifier = joinQualifier(modulePath, className);
|
|
472
|
+
for (const member of body.namedChildren) {
|
|
473
|
+
switch (member.type) {
|
|
474
|
+
case 'function_item':
|
|
475
|
+
case 'function_signature_item': {
|
|
476
|
+
const name = member.childForFieldName('name')?.text;
|
|
477
|
+
if (!name)
|
|
478
|
+
break;
|
|
479
|
+
const sym = makeRustSymbol(member, declSignature(member, content), fileInfo, 'method', name, `${fileInfo.path}:${className}.${name}`, memberExported(member), rustDoc(member), occurrences, qualifier);
|
|
480
|
+
outSymbols.push(sym);
|
|
481
|
+
const methodBody = member.childForFieldName('body');
|
|
482
|
+
if (methodBody)
|
|
483
|
+
outBodies.push({ symbolId: sym.id, body: methodBody, className });
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
case 'const_item':
|
|
487
|
+
case 'static_item': {
|
|
488
|
+
const name = member.childForFieldName('name')?.text;
|
|
489
|
+
if (!name)
|
|
490
|
+
break;
|
|
491
|
+
outSymbols.push(makeRustSymbol(member, declSignature(member, content), fileInfo, 'variable', name, `${fileInfo.path}:${className}.${name}`, memberExported(member), rustDoc(member), occurrences, qualifier));
|
|
492
|
+
break;
|
|
493
|
+
}
|
|
494
|
+
// Associated types: `type Output;` (trait) or `type Output = T;` (impl).
|
|
495
|
+
case 'associated_type':
|
|
496
|
+
case 'type_item': {
|
|
497
|
+
const name = member.childForFieldName('name')?.text;
|
|
498
|
+
if (!name)
|
|
499
|
+
break;
|
|
500
|
+
outSymbols.push(makeRustSymbol(member, declSignature(member, content), fileInfo, 'type', name, `${fileInfo.path}:${className}.${name}`, memberExported(member), rustDoc(member), occurrences, qualifier));
|
|
501
|
+
break;
|
|
502
|
+
}
|
|
503
|
+
// macro_invocation, attribute_item, comments — no member symbol.
|
|
504
|
+
default:
|
|
505
|
+
break;
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
// Inline `mod m { .. }` → a 'module' symbol plus recursion into the body with
|
|
510
|
+
// `m` appended to the module path. External `mod m;` (no body) → a declaration-
|
|
511
|
+
// only 'module' symbol (the target file is indexed separately via the .rs scan).
|
|
512
|
+
function extractMod(decl, content, fileInfo, modulePath, containerExported, occurrences, outSymbols, outImports, outBodies) {
|
|
513
|
+
const name = decl.childForFieldName('name')?.text;
|
|
514
|
+
if (!name)
|
|
515
|
+
return;
|
|
516
|
+
const exported = containerExported && hasPubVisibility(decl);
|
|
517
|
+
outSymbols.push(makeRustSymbol(decl, declSignature(decl, content), fileInfo, 'module', name, `${fileInfo.path}:${name}`, exported, rustDoc(decl), occurrences, modulePath));
|
|
518
|
+
const body = decl.childForFieldName('body');
|
|
519
|
+
if (body?.type !== 'declaration_list')
|
|
520
|
+
return;
|
|
521
|
+
extractItems(body.namedChildren, content, fileInfo, joinQualifier(modulePath, name), exported, occurrences, outSymbols, outImports, outBodies);
|
|
522
|
+
}
|
|
523
|
+
// `macro_rules! m { .. }` → a findable 'function'-kind symbol. Macros carry no
|
|
524
|
+
// visibility node (even `#[macro_export]` is a separate attribute_item sibling),
|
|
525
|
+
// so they're always exported=false. Macro INVOCATIONS emit no refs (token-tree
|
|
526
|
+
// args are opaque to tree-sitter) — a documented recall gap.
|
|
527
|
+
function extractMacro(decl, content, fileInfo, modulePath, occurrences, outSymbols) {
|
|
528
|
+
const name = decl.childForFieldName('name')?.text;
|
|
529
|
+
if (!name)
|
|
530
|
+
return;
|
|
531
|
+
outSymbols.push(makeRustSymbol(decl, declSignature(decl, content), fileInfo, 'function', name, `${fileInfo.path}:${name}`, false, rustDoc(decl), occurrences, modulePath));
|
|
532
|
+
}
|
|
533
|
+
// Impl target type name: unwrap the `type:` field (NOT `trait:` — methods
|
|
534
|
+
// belong to the implementing type). generic_type → its `type` field;
|
|
535
|
+
// scoped_type_identifier → last `name` segment. Non-nominal targets
|
|
536
|
+
// (reference/tuple/array/dynamic/pointer types) return null and their methods
|
|
537
|
+
// are skipped (no name to key on).
|
|
538
|
+
function implTypeName(decl) {
|
|
539
|
+
let type = decl.childForFieldName('type');
|
|
540
|
+
if (type?.type === 'generic_type')
|
|
541
|
+
type = type.childForFieldName('type');
|
|
542
|
+
if (!type)
|
|
543
|
+
return null;
|
|
544
|
+
if (type.type === 'type_identifier')
|
|
545
|
+
return type.text;
|
|
546
|
+
if (type.type === 'scoped_type_identifier')
|
|
547
|
+
return type.childForFieldName('name')?.text ?? null;
|
|
548
|
+
return null;
|
|
549
|
+
}
|
|
550
|
+
function hasPubVisibility(decl) {
|
|
551
|
+
for (const child of decl.namedChildren) {
|
|
552
|
+
if (child.type === 'visibility_modifier')
|
|
553
|
+
return child.text.startsWith('pub');
|
|
554
|
+
}
|
|
555
|
+
return false;
|
|
556
|
+
}
|
|
557
|
+
// Module path and enclosing-type chain are opaque to FQN parsing — they only
|
|
558
|
+
// disambiguate hashed ids — so any unique join works.
|
|
559
|
+
function joinQualifier(a, b) {
|
|
560
|
+
if (!a)
|
|
561
|
+
return b;
|
|
562
|
+
if (!b)
|
|
563
|
+
return a;
|
|
564
|
+
return `${a}::${b}`;
|
|
565
|
+
}
|
|
566
|
+
function makeRustSymbol(node, signature, fileInfo, kind, name, fqn, exported, doc, occurrences, qualifier = '') {
|
|
567
|
+
// Repeated identical (name, kind, signature, qualifier) tuples — legal for
|
|
568
|
+
// same-signature methods across two trait impls on one type — get an ordinal
|
|
569
|
+
// so ids stay unique per file.
|
|
570
|
+
const key = `${name}\0${kind}\0${signature}\0${qualifier}`;
|
|
571
|
+
const n = (occurrences.get(key) ?? 0) + 1;
|
|
572
|
+
occurrences.set(key, n);
|
|
573
|
+
const effectiveQualifier = n === 1 ? qualifier : `${qualifier}#${n}`;
|
|
574
|
+
return {
|
|
575
|
+
// The id hashes the FULL signature; only the stored copy is capped.
|
|
576
|
+
id: symbolId(fileInfo.path, name, kind, signature, effectiveQualifier),
|
|
577
|
+
name,
|
|
578
|
+
fqn,
|
|
579
|
+
kind,
|
|
580
|
+
file: fileInfo.path,
|
|
581
|
+
startLine: node.startPosition.row + 1,
|
|
582
|
+
endLine: node.endPosition.row + 1,
|
|
583
|
+
signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
|
|
584
|
+
doc,
|
|
585
|
+
exported,
|
|
586
|
+
language: fileInfo.language,
|
|
587
|
+
};
|
|
588
|
+
}
|
|
589
|
+
// Rustdoc — diverges from BOTH Go and Java: outer doc comments (`///`, `/**`)
|
|
590
|
+
// sit ABOVE any `#[attr]` siblings (attributes are separate nodes, not inside
|
|
591
|
+
// the decl), so walk back over attribute_item nodes first. `///` parses as a
|
|
592
|
+
// line_comment with a `doc:` field child (text pre-stripped of the slashes) and
|
|
593
|
+
// an `outer:` marker; `//!` carries an `inner:` marker and documents the
|
|
594
|
+
// ENCLOSING item, so it's excluded. Plain `//` / `/* */` have a null `doc`
|
|
595
|
+
// field. Take the first content line of the contiguous outer-doc block.
|
|
596
|
+
function rustDoc(decl) {
|
|
597
|
+
// Anchor = top of the contiguous attribute block (or the decl), so the doc
|
|
598
|
+
// block's adjacency is measured against whatever sits directly below it.
|
|
599
|
+
let anchorRow = decl.startPosition.row;
|
|
600
|
+
let prev = decl.previousNamedSibling;
|
|
601
|
+
while (prev && prev.type === 'attribute_item') {
|
|
602
|
+
anchorRow = prev.startPosition.row;
|
|
603
|
+
prev = prev.previousNamedSibling;
|
|
604
|
+
}
|
|
605
|
+
if (!prev || !isOuterDocComment(prev) || rustIsTrailingComment(prev))
|
|
606
|
+
return null;
|
|
607
|
+
if (commentEndContentRow(prev) !== anchorRow - 1)
|
|
608
|
+
return null;
|
|
609
|
+
const chain = [prev];
|
|
610
|
+
for (;;) {
|
|
611
|
+
// chain is seeded with `prev` and only grows, so the last element is
|
|
612
|
+
// always defined (no need for the defensive guard go.ts carries).
|
|
613
|
+
const bottom = chain[chain.length - 1];
|
|
614
|
+
const p = bottom.previousNamedSibling;
|
|
615
|
+
if (!p ||
|
|
616
|
+
!isOuterDocComment(p) ||
|
|
617
|
+
rustIsTrailingComment(p) ||
|
|
618
|
+
commentEndContentRow(p) !== bottom.startPosition.row - 1) {
|
|
619
|
+
break;
|
|
620
|
+
}
|
|
621
|
+
chain.push(p);
|
|
622
|
+
}
|
|
623
|
+
chain.reverse(); // document order
|
|
624
|
+
for (const comment of chain) {
|
|
625
|
+
const line = docCommentFirstLine(comment);
|
|
626
|
+
if (line)
|
|
627
|
+
return line;
|
|
628
|
+
}
|
|
629
|
+
return null;
|
|
630
|
+
}
|
|
631
|
+
// tree-sitter-rust line_comment nodes INCLUDE their trailing newline (a `///`
|
|
632
|
+
// on row N reports endPosition.row N+1), while block_comment nodes do not. Use
|
|
633
|
+
// the last row that actually holds comment text so adjacency math is uniform.
|
|
634
|
+
function commentEndContentRow(node) {
|
|
635
|
+
return node.text.endsWith('\n') ? node.endPosition.row - 1 : node.endPosition.row;
|
|
636
|
+
}
|
|
637
|
+
// A comment sharing its line with the END of an earlier sibling is a trailing
|
|
638
|
+
// comment on that statement, not doc for the next item. The shared
|
|
639
|
+
// isTrailingComment can't be reused: a preceding `///` line's newline-inflated
|
|
640
|
+
// endPosition.row equals the next comment's startPosition.row, which would
|
|
641
|
+
// misflag every second line of a multi-line doc block as trailing. Comparing
|
|
642
|
+
// content-end rows (newline-stripped) fixes it and still catches real trailing
|
|
643
|
+
// comments (`let x = 1; // c`), whose code sibling has no trailing newline.
|
|
644
|
+
function rustIsTrailingComment(comment) {
|
|
645
|
+
const before = comment.previousSibling;
|
|
646
|
+
if (!before)
|
|
647
|
+
return false;
|
|
648
|
+
return commentEndContentRow(before) === comment.startPosition.row;
|
|
649
|
+
}
|
|
650
|
+
function isOuterDocComment(node) {
|
|
651
|
+
if (node.type !== 'line_comment' && node.type !== 'block_comment')
|
|
652
|
+
return false;
|
|
653
|
+
return node.childForFieldName('doc') !== null && node.childForFieldName('inner') === null;
|
|
654
|
+
}
|
|
655
|
+
// First non-empty line of a doc comment's `doc` field. The field text keeps a
|
|
656
|
+
// leading space, a trailing newline, and (for `/** */`) ` * ` continuation
|
|
657
|
+
// markers — strip a leading `*` and surrounding whitespace per line.
|
|
658
|
+
function docCommentFirstLine(node) {
|
|
659
|
+
const doc = node.childForFieldName('doc');
|
|
660
|
+
if (!doc)
|
|
661
|
+
return null;
|
|
662
|
+
for (const raw of doc.text.split('\n')) {
|
|
663
|
+
const cleaned = raw.replace(/^\s*\*?\s?/, '').trim();
|
|
664
|
+
if (cleaned)
|
|
665
|
+
return cleaned;
|
|
666
|
+
}
|
|
667
|
+
return null;
|
|
668
|
+
}
|
|
669
|
+
// `use` declarations → one ImportInfo per imported leaf. The argument is a
|
|
670
|
+
// scoped_identifier, scoped_use_list, use_as_clause, use_wildcard, use_list, or
|
|
671
|
+
// bare identifier; nested `{ .. }` lists recurse, accumulating the path prefix.
|
|
672
|
+
function extractUse(decl, fileInfo, out) {
|
|
673
|
+
const arg = decl.childForFieldName('argument');
|
|
674
|
+
if (!arg)
|
|
675
|
+
return;
|
|
676
|
+
walkUse(arg, '', fileInfo, decl.startPosition.row + 1, out);
|
|
677
|
+
}
|
|
678
|
+
function walkUse(node, prefix, fileInfo, line, out) {
|
|
679
|
+
const push = (sourceModule, imported) => {
|
|
680
|
+
out.push({ file: fileInfo.path, sourceModule: stripPathAnchor(sourceModule), importedNames: [imported], line });
|
|
681
|
+
};
|
|
682
|
+
switch (node.type) {
|
|
683
|
+
case 'identifier':
|
|
684
|
+
push(prefix, { name: node.text });
|
|
685
|
+
return;
|
|
686
|
+
case 'scoped_identifier': {
|
|
687
|
+
const name = node.childForFieldName('name');
|
|
688
|
+
const path = node.childForFieldName('path');
|
|
689
|
+
if (!name)
|
|
690
|
+
return;
|
|
691
|
+
push(joinPath(prefix, path?.text), { name: name.text });
|
|
692
|
+
return;
|
|
693
|
+
}
|
|
694
|
+
case 'self': {
|
|
695
|
+
// `use a::b::{self}` binds the module `b` (last segment of the prefix).
|
|
696
|
+
const sep = prefix.lastIndexOf('::');
|
|
697
|
+
const seg = sep === -1 ? prefix : prefix.slice(sep + 2);
|
|
698
|
+
if (seg)
|
|
699
|
+
push(prefix, { name: seg, kind: 'module' });
|
|
700
|
+
return;
|
|
701
|
+
}
|
|
702
|
+
case 'use_as_clause': {
|
|
703
|
+
const path = node.childForFieldName('path');
|
|
704
|
+
const alias = node.childForFieldName('alias');
|
|
705
|
+
if (path?.type === 'scoped_identifier') {
|
|
706
|
+
const inner = path.childForFieldName('name');
|
|
707
|
+
const innerPath = path.childForFieldName('path');
|
|
708
|
+
if (inner)
|
|
709
|
+
push(joinPath(prefix, innerPath?.text), { name: inner.text, alias: alias?.text });
|
|
710
|
+
}
|
|
711
|
+
else if (path) {
|
|
712
|
+
push(prefix, { name: path.text, alias: alias?.text });
|
|
713
|
+
}
|
|
714
|
+
return;
|
|
715
|
+
}
|
|
716
|
+
case 'use_wildcard': {
|
|
717
|
+
// The scoped path is a positional child (no field).
|
|
718
|
+
const inner = node.namedChild(0);
|
|
719
|
+
push(joinPath(prefix, inner?.text), { name: IMPORT_NAMESPACE });
|
|
720
|
+
return;
|
|
721
|
+
}
|
|
722
|
+
case 'scoped_use_list': {
|
|
723
|
+
const path = node.childForFieldName('path');
|
|
724
|
+
const list = node.childForFieldName('list');
|
|
725
|
+
const newPrefix = joinPath(prefix, path?.text);
|
|
726
|
+
if (list)
|
|
727
|
+
for (const item of list.namedChildren)
|
|
728
|
+
walkUse(item, newPrefix, fileInfo, line, out);
|
|
729
|
+
return;
|
|
730
|
+
}
|
|
731
|
+
case 'use_list':
|
|
732
|
+
for (const item of node.namedChildren)
|
|
733
|
+
walkUse(item, prefix, fileInfo, line, out);
|
|
734
|
+
return;
|
|
735
|
+
// crate / super / metavariable path roots — no usable binding.
|
|
736
|
+
default:
|
|
737
|
+
return;
|
|
738
|
+
}
|
|
739
|
+
}
|
|
740
|
+
function joinPath(prefix, seg) {
|
|
741
|
+
if (!seg)
|
|
742
|
+
return prefix;
|
|
743
|
+
if (!prefix)
|
|
744
|
+
return seg;
|
|
745
|
+
return `${prefix}::${seg}`;
|
|
746
|
+
}
|
|
747
|
+
// Drops a leading crate::/self::/super:: anchor chain from a use-path's module.
|
|
748
|
+
// These anchors locate the path but don't identify the module for the name-
|
|
749
|
+
// based cross-file resolution Rust uses, so `use crate::foo::Bar` and a
|
|
750
|
+
// re-exported `use foo::Bar` normalize to the same sourceModule ('foo') instead
|
|
751
|
+
// of the inconsistent 'crate::foo' vs 'foo'.
|
|
752
|
+
function stripPathAnchor(mod) {
|
|
753
|
+
return mod.replace(/^(?:(?:crate|self|super)::)+/, '');
|
|
754
|
+
}
|