codedeep-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +177 -0
  3. package/dist/config.js +223 -0
  4. package/dist/git/analyzer.js +177 -0
  5. package/dist/git/git-service.js +568 -0
  6. package/dist/git/head-watcher.js +113 -0
  7. package/dist/git/runner.js +204 -0
  8. package/dist/index.js +138 -0
  9. package/dist/indexer/code-index.js +1801 -0
  10. package/dist/indexer/complexity.js +633 -0
  11. package/dist/indexer/extractor.js +354 -0
  12. package/dist/indexer/languages/cpp.js +934 -0
  13. package/dist/indexer/languages/csharp.js +854 -0
  14. package/dist/indexer/languages/dart.js +777 -0
  15. package/dist/indexer/languages/go.js +665 -0
  16. package/dist/indexer/languages/java.js +507 -0
  17. package/dist/indexer/languages/kotlin.js +709 -0
  18. package/dist/indexer/languages/objc.js +397 -0
  19. package/dist/indexer/languages/php.js +771 -0
  20. package/dist/indexer/languages/python.js +455 -0
  21. package/dist/indexer/languages/ruby.js +697 -0
  22. package/dist/indexer/languages/rust.js +754 -0
  23. package/dist/indexer/languages/swift.js +691 -0
  24. package/dist/indexer/languages/typescript.js +485 -0
  25. package/dist/indexer/parser.js +175 -0
  26. package/dist/indexer/pipeline.js +342 -0
  27. package/dist/indexer/scanner.js +279 -0
  28. package/dist/indexer/watcher.js +353 -0
  29. package/dist/logger.js +16 -0
  30. package/dist/server.js +170 -0
  31. package/dist/tools/common.js +207 -0
  32. package/dist/tools/find-references.js +224 -0
  33. package/dist/tools/find-symbol.js +94 -0
  34. package/dist/tools/get-context.js +370 -0
  35. package/dist/tools/impact.js +218 -0
  36. package/dist/tools/overview.js +482 -0
  37. package/dist/tools/search-structure.js +303 -0
  38. package/dist/types.js +61 -0
  39. package/grammars/tree-sitter-c.wasm +0 -0
  40. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  41. package/grammars/tree-sitter-cpp.wasm +0 -0
  42. package/grammars/tree-sitter-dart.wasm +0 -0
  43. package/grammars/tree-sitter-go.wasm +0 -0
  44. package/grammars/tree-sitter-java.wasm +0 -0
  45. package/grammars/tree-sitter-javascript.wasm +0 -0
  46. package/grammars/tree-sitter-kotlin.wasm +0 -0
  47. package/grammars/tree-sitter-objc.wasm +0 -0
  48. package/grammars/tree-sitter-php.wasm +0 -0
  49. package/grammars/tree-sitter-python.wasm +0 -0
  50. package/grammars/tree-sitter-ruby.wasm +0 -0
  51. package/grammars/tree-sitter-rust.wasm +0 -0
  52. package/grammars/tree-sitter-swift.wasm +0 -0
  53. package/grammars/tree-sitter-tsx.wasm +0 -0
  54. package/grammars/tree-sitter-typescript.wasm +0 -0
  55. package/package.json +67 -0
@@ -0,0 +1,697 @@
1
+ import { collectAmbiguousTypeNames } from '../extractor.js';
2
+ import { computeComplexity } from '../complexity.js';
3
+ import { RECEIVER_OPAQUE } from '../../types.js';
4
+ import { SIGNATURE_DISPLAY_CAP, commentDocLine, isTrailingComment, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
5
+ // ── skip sets ────────────────────────────────────────────────────────────────
6
+ // Nested `def`/`singleton_method` create their own scope — their calls must NOT
7
+ // attribute to an enclosing body, and they are not extracted (the global
8
+ // no-nested-functions rule). Blocks (`block`/`do_block`) are deliberately ABSENT
9
+ // → DESCENDED, so their calls attribute to the enclosing method (the Go/Kotlin/
10
+ // PHP lambda rule — a block closes over the method's `self`).
11
+ const RUBY_FUNCTION_BODY_SKIP_TYPES = new Set([
12
+ 'method',
13
+ 'singleton_method',
14
+ ]);
15
+ // walkCalls skip set: nested defs (own scope) + the type/namespace nodes
16
+ // (`class`/`module`/`singleton_class`). Each Ruby member owns a per-member
17
+ // PendingBody, so the module-root walk never needs to descend INTO a type to find
18
+ // member calls; pruning the type nodes also stops a nested `class {}`'s method
19
+ // calls from mis-attributing to an enclosing body. Top-level script calls are
20
+ // direct program children, so they stay reachable.
21
+ const RUBY_SKIP_TYPES = new Set([
22
+ ...RUBY_FUNCTION_BODY_SKIP_TYPES,
23
+ 'class',
24
+ 'module',
25
+ 'singleton_class',
26
+ ]);
27
+ // ── call resolution ──────────────────────────────────────────────────────────
28
+ // Ruby's bare callee is the engine-default `identifier`; `constant` is ALSO a bare
29
+ // callee type ONLY so that `Foo.new` construction (a member call whose receiver is
30
+ // a constant) routes through the constructor-form path: getCallee returns the
31
+ // `constant`, and because `constant` !== `plainCalleeType` ('identifier') the
32
+ // engine treats it as constructor-form → typeNameToId (constructorKinds={class}).
33
+ // No `constructorSelectorTypes` is needed (construction has no distinct call NODE
34
+ // type — it is an ordinary `call`).
35
+ const RUBY_BARE_CALLEE_TYPES = new Set(['identifier', 'constant']);
36
+ const RUBY_PLAIN_CALLEE_TYPE = 'identifier';
37
+ // A bare `foo`/`foo(x)` is EITHER an implicit-self method call OR a top-level
38
+ // function call — Ruby has both (a top-level `def` is modelled as 'function').
39
+ // So bare calls bind to the enclosing class first (bareCallsBindToEnclosingClass)
40
+ // then the callable-name map over {function, method}. This is the C#-enclosing-
41
+ // class × PHP-top-level-function hybrid.
42
+ const RUBY_BARE_CALLABLE_KINDS = new Set(['function', 'method']);
43
+ // `Foo.new` resolves to a 'class'-kind symbol (routed via the constant-callee
44
+ // constructor-form path above). A class can never be reached by a bare `foo()`
45
+ // (bareCallableKinds excludes 'class'), so a construction can never bind to a
46
+ // method/function and vice-versa.
47
+ const RUBY_CONSTRUCTOR_KINDS = new Set(['class']);
48
+ // Kernel / core built-ins that parse as bare `identifier`/`call` callees but never
49
+ // resolve to a local symbol — they would flood the name-keyed reference store.
50
+ // Suppressed ONLY when unresolved (a file-local method shadowing the name keeps
51
+ // its refs). START small + tune by dogfood (the Kotlin/PHP measure-don't-guess
52
+ // method). The visibility/`attr_*`/mixin keywords are handled structurally before
53
+ // resolve, but listing them is belt-and-suspenders for any that slip through as
54
+ // plain `call` nodes (`private :foo` etc.).
55
+ const RUBY_IGNORED_BARE_CALLEES = new Set([
56
+ // IO / Kernel
57
+ 'puts', 'print', 'pp', 'warn', 'gets', 'sleep', 'exit', 'abort', 'raise', 'fail',
58
+ 'throw', 'catch', 'loop', 'lambda', 'proc', 'format', 'sprintf', 'printf', 'rand',
59
+ 'srand', 'require', 'require_relative', 'load', 'autoload', 'at_exit', 'binding',
60
+ 'caller', 'eval', 'freeze', 'frozen?', 'block_given?', 'gsub', 'sub',
61
+ // metaprogramming / definition
62
+ 'attr_accessor', 'attr_reader', 'attr_writer', 'attr', 'include', 'extend',
63
+ 'prepend', 'private', 'protected', 'public', 'module_function', 'define_method',
64
+ 'alias_method', 'private_class_method', 'public_class_method', 'private_constant',
65
+ 'send', '__send__', 'public_send', 'respond_to?', 'instance_variable_get',
66
+ 'instance_variable_set', 'instance_of?', 'is_a?', 'kind_of?',
67
+ // type coercion (Kernel conversion methods)
68
+ 'Integer', 'Float', 'String', 'Array', 'Hash',
69
+ ]);
70
+ // Enumerable / core-protocol method names whose chained captures are pure noise
71
+ // (`.map`/`.each`/`.to_s`/…). Suppressed only when UNRESOLVED. Deliberately small —
72
+ // Ruby instance methods are overwhelmingly domain — and tuned by dogfood. <=3-char
73
+ // names (`map`, `to_a`) are gated downstream by SHORT_NAME_THRESHOLD regardless.
74
+ const RUBY_IGNORED_MEMBER_CALLEES = new Set([
75
+ 'each', 'each_with_index', 'each_with_object', 'map', 'flat_map', 'collect',
76
+ 'select', 'reject', 'filter', 'find', 'detect', 'reduce', 'inject', 'group_by',
77
+ 'sort_by', 'min_by', 'max_by', 'partition', 'to_a', 'to_h', 'to_s', 'to_sym',
78
+ 'to_i', 'to_f', 'to_proc', 'include?', 'key?', 'empty?', 'any?', 'all?', 'none?',
79
+ 'push', 'pop', 'shift', 'unshift', 'freeze', 'frozen?', 'dup', 'clone', 'tap',
80
+ 'respond_to?', 'present?', 'blank?', 'nil?', 'call',
81
+ ]);
82
+ // One selector: every Ruby call is a `call` node (bare calls WITH args/parens, and
83
+ // every receiver call); a bare no-arg no-paren call is a lone `identifier` —
84
+ // indistinguishable from a local read, so NOT captured (a documented recall gap).
85
+ // `super`/`yield` are their own nodes, not selected.
86
+ const RUBY_SELECTORS = [
87
+ { nodeType: 'call', getCallee: rubyCallCallee },
88
+ ];
89
+ // Discriminate the three call shapes by what getCallee returns:
90
+ // bare `foo(x)` → the `method` identifier (bare branch)
91
+ // `Foo.new` / `A::B.new` → the receiver `constant` (constructor-form: constant
92
+ // type !== plainCalleeType → typeNameToId)
93
+ // `obj.m()` / `Foo.bar()` → the call NODE itself (member branch via
94
+ // rubyMemberCallInfo)
95
+ // `super(x)` (method=super) → null (super-like, the Java `base`/PHP `parent::` rule).
96
+ function rubyCallCallee(node) {
97
+ const method = node.childForFieldName('method');
98
+ if (!method || method.type === 'super')
99
+ return null;
100
+ const receiver = node.childForFieldName('receiver');
101
+ if (!receiver)
102
+ return method; // bare call → identifier
103
+ if (method.text === 'new') {
104
+ if (receiver.type === 'constant')
105
+ return receiver;
106
+ if (receiver.type === 'scope_resolution') {
107
+ const name = receiver.childForFieldName('name');
108
+ if (name && name.type === 'constant')
109
+ return name; // A::B.new → B
110
+ }
111
+ // self.new / obj.new (non-constant) → fall through to the member path.
112
+ }
113
+ return node; // member call
114
+ }
115
+ // Reduces a member `call` node to {receiver, property, isSelf}.
116
+ // `self.m()` → self-call (resolve against the enclosing class)
117
+ // `Foo.m()` → receiver = `Foo` (resolves a class method via methodsByClass)
118
+ // `obj.m()` / `obj&.m()` → receiver = `obj` (a lowercase local — never collides
119
+ // with a Capitalized class name, so it stays an
120
+ // unresolved name-keyed member ref; Ruby needs no PHP-
121
+ // style `$` sigil guard)
122
+ // `A::B.m()` → receiver = `B` (the scope's last constant)
123
+ // chained `a.b.c()` / `f().g()` / `a[0].m()` → RECEIVER_OPAQUE (findable, never resolved)
124
+ function rubyMemberCallInfo(callee) {
125
+ if (callee.type !== 'call')
126
+ return null;
127
+ const method = callee.childForFieldName('method');
128
+ if (!method || method.type !== 'identifier')
129
+ return null; // operators parse as `binary`, not `call`
130
+ const property = method.text;
131
+ const receiver = callee.childForFieldName('receiver');
132
+ if (!receiver)
133
+ return null; // bare — handled in getCallee, never reaches here
134
+ if (receiver.type === 'self')
135
+ return { receiver: 'self', property, isSelf: true };
136
+ if (receiver.type === 'identifier' || receiver.type === 'constant') {
137
+ return { receiver: receiver.text, property, isSelf: false };
138
+ }
139
+ if (receiver.type === 'scope_resolution') {
140
+ const name = receiver.childForFieldName('name');
141
+ return { receiver: name?.text ?? RECEIVER_OPAQUE, property, isSelf: false };
142
+ }
143
+ // chained / indexed / parenthesized / computed receiver → opaque.
144
+ return { receiver: RECEIVER_OPAQUE, property, isSelf: false };
145
+ }
146
+ // ── complexity (cyclomatic + cognitive) ──────────────────────────────────────
147
+ //
148
+ // BOTH metrics pinned EXACT to sonar-ruby — SonarSource's SLANG-based analyzer —
149
+ // via a RUNNABLE per-function oracle: the sonar-ruby-plugin's `RubyConverter`
150
+ // (JRuby + whitequark/parser) builds the SLANG tree, then the shared
151
+ // `org.sonarsource.slang` `CyclomaticComplexityVisitor` / `CognitiveComplexity`
152
+ // score each function. The increments below were MEASURED against that oracle on a
153
+ // per-construct battery + the sinatra/rack/liquid/devise corpus, never guessed —
154
+ // the campaign standard (oracle the PIN). See the project docs' "Cyclomatic /
155
+ // Cognitive Complexity Rules".
156
+ // CYCLOMATIC (SLANG `CyclomaticComplexityVisitor`): base +1 per named function,
157
+ // then +1 per IfTree (if/unless/elsif/ternary/modifier-if/unless), +1 per LoopTree
158
+ // (while/until/for/modifier-while/until), +1 per MatchCaseTree-with-expression (a
159
+ // `when` arm — NOT the `case` container, NOT the `else` arm), and +1 per
160
+ // CONDITIONAL_AND/OR binary (the rubyCyclomaticExtra predicate). NOTABLY ABSENT
161
+ // (measured, matching the pin — sonar-ruby is the "compare to SonarQube" north
162
+ // star): `rescue`/`rescue_modifier` (SLANG registers no CatchTree cyclomatically —
163
+ // consistent with sonar-java/JS omitting catch; defensible, so NOT forked toward
164
+ // rubocop, which DOES count rescue), `case/in` pattern matching (`case_match`/
165
+ // `in_clause` map to an uncounted native tree — a converter limitation, rare in
166
+ // real Ruby), and `&.` safe-navigation. These are deliberate pin-faithful
167
+ // divergences from rubocop/McCabe, documented in the project docs.
168
+ const RUBY_DECISION_NODE_TYPES = new Set([
169
+ 'if', 'elsif', 'unless', 'if_modifier', 'unless_modifier', // IfTree (+1 each)
170
+ 'while', 'until', 'for', 'while_modifier', 'until_modifier', // LoopTree (+1 each)
171
+ 'when', // MatchCaseTree with expression (the case container + `else` arm add nothing)
172
+ 'conditional', // ternary `?:` → IfTree (+1)
173
+ ]);
174
+ // Boolean operators counted by SLANG (BinaryExpressionTree CONDITIONAL_AND/OR):
175
+ // the symbolic `&&`/`||` AND the keyword `and`/`or` (both are `binary` nodes). The
176
+ // shared isCFamilyBooleanOperator only matches `&&`/`||`/`??`, so Ruby reads the
177
+ // `operator` field token itself. Returns the raw operator TEXT (not a normalized
178
+ // kind) because SLANG's cognitive run-collapse compares operator TEXT — so `&&`
179
+ // and `and` are DISTINCT runs (`a && b and c` = cog 2), oracle-verified.
180
+ const RUBY_BOOLEAN_OPS = new Set(['&&', '||', 'and', 'or']);
181
+ function rubyBooleanKind(node) {
182
+ if (node.type !== 'binary')
183
+ return null;
184
+ const op = node.childForFieldName('operator')?.type;
185
+ return op !== undefined && RUBY_BOOLEAN_OPS.has(op) ? op : null;
186
+ }
187
+ function rubyCyclomaticExtra(node) {
188
+ return rubyBooleanKind(node) !== null;
189
+ }
190
+ // Complexity body boundary — SEPARATE from RUBY_SKIP_TYPES (the resolveCalls
191
+ // boundary), which includes `method`/`singleton_method`. A method's PendingBody.body
192
+ // IS a `method`/`singleton_method` node, so reusing RUBY_SKIP_TYPES would root-skip
193
+ // the whole body (the engine's root guard + the cognitive walk both bail on a
194
+ // skip-typed root) → every method reads trivial. So the cognitive + root boundary
195
+ // lists ONLY the type/namespace nodes; blocks (`block`/`do_block`) DESCEND (roll
196
+ // into the enclosing method — they are SLANG NativeTrees, transparent, no nesting),
197
+ // and a NESTED `def` is descended pass-through cognitively (its control flow rolls
198
+ // into the encloser WITHOUT a nesting bump — a documented minor cognitive
199
+ // under-count vs the oracle's per-funcTree roll-in; nested defs are rare in Ruby,
200
+ // the Java-anon-class / PHP-nested-fn precedent).
201
+ const RUBY_COMPLEXITY_SKIP_TYPES = new Set([
202
+ 'class', 'module', 'singleton_class',
203
+ ]);
204
+ // CYCLOMATIC-only child skip: additionally exclude nested `def`/`def self.x` so a
205
+ // nested method's decisions don't count toward the encloser (the per-symbol model —
206
+ // SLANG's per-funcTree oracle rolls them in, a rare documented divergence). The
207
+ // root method body is checked against RUBY_COMPLEXITY_SKIP_TYPES (which lacks
208
+ // `method`), so it survives; only nested-method CHILDREN are skipped here.
209
+ const RUBY_CYCLOMATIC_SKIP_TYPES = new Set([
210
+ ...RUBY_COMPLEXITY_SKIP_TYPES,
211
+ 'method', 'singleton_method',
212
+ ]);
213
+ // Never-matching paren sentinel: SLANG's `flattenOperators` does NOT skip
214
+ // parentheses (the `// TODO parentheses` in CognitiveComplexity.java), so a
215
+ // parenthesized boolean is its OWN run — `(a && b) && c` = cog 2, oracle-verified.
216
+ // The sentinel makes the engine's skipParens a no-op (the gocognit/sonar-python
217
+ // convention).
218
+ const RUBY_NO_PAREN = '__ruby_no_paren__';
219
+ // Statement-list container PARENT node types where a `?:`/`if`-else sits in STATEMENT
220
+ // position (→ if-else, +2). Maps to a SLANG BlockTree: an if/loop/case body (`then`/
221
+ // `else`/`ensure`/`do`), a `begin` body, the top level, AND a string `interpolation`
222
+ // (`"#{a ? x : y}"` is +2 — measured; the interpolation embeds a statement list). NOT
223
+ // included (→ EXPRESSION position, +1 ternary): `block_body` (a brace `{ }` block, a
224
+ // SLANG NativeTree) and a `body_statement` whose grandparent is a `do_block` (a
225
+ // `do…end` block, also a NativeTree) — both handled in rubyInStatementPosition.
226
+ const RUBY_STATEMENT_PARENTS = new Set([
227
+ 'then', 'else', 'ensure', 'do', 'begin', 'program', 'interpolation',
228
+ ]);
229
+ // True when an if-family node is in STATEMENT position (value discarded / a statement
230
+ // in a SLANG BlockTree), false when in EXPRESSION position (assignment RHS, arg,
231
+ // operator operand, OR the SOLE statement of a do/brace block — a NativeTree whose
232
+ // value is the block's result). The block-body distinction is the subtle bit, measured
233
+ // against the oracle: a method/class/module/begin body is ALWAYS a BlockTree (a
234
+ // single-statement method body is still +2), but a BLOCK body (brace `block_body` or a
235
+ // `do_block`'s `body_statement`) is a NativeTree → EXPRESSION only when it holds ONE
236
+ // statement (the if-else is the block's return value); a MULTI-statement block body is
237
+ // a BlockTree → STATEMENT. (A ternary nested in an arg/expression inside a multi-stmt
238
+ // block is still expression — its immediate parent is the arg, not the block body.)
239
+ function rubyInStatementPosition(node) {
240
+ const parent = node.parent;
241
+ if (!parent)
242
+ return false;
243
+ const t = parent.type;
244
+ if (t === 'block_body')
245
+ return parent.namedChildCount > 1; // brace block: sole stmt → expr, multi → stmt
246
+ if (t === 'body_statement') {
247
+ return parent.parent?.type === 'do_block' ? parent.namedChildCount > 1 : true; // do_block sole → expr
248
+ }
249
+ return RUBY_STATEMENT_PARENTS.has(t);
250
+ }
251
+ // SLANG BlockTree node types: a STATEMENT-LIST that, when it holds >1 statement, is a
252
+ // BlockTree (a single statement is unwrapped to the bare expression). A string
253
+ // `interpolation` (`#{…}`) is ALWAYS a BlockTree (it embeds a statement list).
254
+ const RUBY_BLOCK_LIST_NODES = new Set([
255
+ 'then', 'else', 'ensure', 'do', 'block_body', 'body_statement', 'begin',
256
+ ]);
257
+ // True if the subtree under `node` contains a SLANG BlockTree — an `interpolation`, or
258
+ // a multi-statement statement-list. This is `isTernaryOperator`'s final condition
259
+ // (`tree.descendants().noneMatch(BlockTree)`): an if-with-else whose branches embed a
260
+ // BlockTree (e.g. a string interpolation `["#{x}"] + super`, or a multi-statement
261
+ // then/else) is NOT a ternary even in expression position. The DFS INTENTIONALLY
262
+ // descends EVERYTHING — including a nested `def`/`class` in a branch — because SLANG's
263
+ // `tree.descendants()` does too; a nested scope's multi-statement body IS a BlockTree
264
+ // that disqualifies the ternary (oracle-confirmed: `v = if a; def g; x; y; end; else;
265
+ // z; end` is cog 2, not 1). This is DELIBERATELY a different boundary from the
266
+ // per-symbol cognitive walk (which skips nested scopes) — do NOT add a skip here, it
267
+ // would diverge from the pin. It early-returns on the first BlockTree (a nested scope
268
+ // is almost always multi-statement → detected immediately), so the bounded DFS stays
269
+ // cheap on the small if-else subtree.
270
+ function rubyHasBlockTree(node) {
271
+ const stack = [...node.namedChildren];
272
+ while (stack.length > 0) {
273
+ const n = stack.pop();
274
+ if (n.type === 'interpolation')
275
+ return true;
276
+ if (RUBY_BLOCK_LIST_NODES.has(n.type) && n.namedChildCount > 1)
277
+ return true;
278
+ for (const c of n.namedChildren)
279
+ stack.push(c);
280
+ }
281
+ return false;
282
+ }
283
+ // True when an if-family node is an EXPRESSION-TERNARY (sonar-ruby `isTernaryOperator`):
284
+ // an if-with-else used as an EXPRESSION (not statement position) with NO nested BlockTree
285
+ // (single-statement branches, no string interpolation) → its `else` +1 is SUPPRESSED
286
+ // (cog 1, not 2). Measured against the SLANG oracle:
287
+ // v = cond ? a : b → ternary (+1) def f; cond ? a : b; end → NOT (+2)
288
+ // v = if a; x; else; y end → ternary (+1) if a; x; else; y; end → NOT (+2)
289
+ // arr.each { if a;x else y } → ternary (+1) "#{cond ? a : b}" → NOT (+2, interpolation)
290
+ // v = if a; x; elsif… → NOT (elsif-chain, +3) v = if a; p; q; else… → NOT (multi-stmt, +2)
291
+ // @x ||= if a; ["#{p}"]; else; q; end → NOT (+2, interpolation in a branch)
292
+ // A `?:` (`conditional`) and an `if`/`unless` with a PLAIN `else` (no elsif) both qualify
293
+ // when expression-position AND BlockTree-free; `elsif` is never a standalone ternary.
294
+ function rubyIsExpressionTernary(node) {
295
+ if (rubyInStatementPosition(node))
296
+ return false; // statement position → if-else, charge else
297
+ if (node.type === 'conditional')
298
+ return !rubyHasBlockTree(node); // `?:`: ternary iff no nested BlockTree
299
+ if (node.type !== 'if' && node.type !== 'unless')
300
+ return false; // elsif / others: never standalone
301
+ const alt = node.childForFieldName('alternative');
302
+ if (!alt || alt.type !== 'else')
303
+ return false; // no else, or an elsif-chain
304
+ return !rubyHasBlockTree(node);
305
+ }
306
+ // COGNITIVE (SLANG `CognitiveComplexity`). SLANG's nesting is ANCESTOR-based — every
307
+ // IfTree(non-elseif)/LoopTree/MatchTree/CatchTree ancestor adds a level (reset at a
308
+ // class), so a construct's CONDITION nests too (unlike sonar-java, where the engine
309
+ // visits if-conditions at base). Consequences of that, all oracle-measured:
310
+ // - `unless`/`elsif` are if-like (the collectionIfType SET — the engine widening
311
+ // this slice): `unless` surcharges + handles its `else`; `elsif` is the chain
312
+ // link recursed by handleAlternative. (`if a; x; elsif b; y; else z` = cog 3.)
313
+ // - Modifier `if`/`unless` go in loopTypes (surcharge + bump ALL children — the engine's
314
+ // loop branch nests every child at nesting+1 because Ruby leaves `loopBodyField` unset,
315
+ // unlike Python which nests only the body), NOT collectionIfType: SLANG's ancestor-nesting
316
+ // bumps a modifier's CONDITION too (`x if (a?b:c)` = cog 3), and their then-branch lives
317
+ // under `body`, not `consequence`. Modifier loops bump-all likewise. So loopTypes carries
318
+ // the 3 real loops + the 4 modifier forms.
319
+ // - `conditional` (ternary) is +1 (ternaryType) — the dominant EXPRESSION-ternary
320
+ // form (`x = a?1:2` = cog 1); a rare BARE-statement ternary is +2 in SLANG (it
321
+ // becomes an if-else) → a documented near-zero under-count.
322
+ // - whole `case` is +1 (switchTypes/MatchTree); `case/in` is uncounted (not MatchTree).
323
+ // - block `rescue` is +1+nesting (catchType/CatchTree); modifier `x rescue y` is
324
+ // UNCOUNTED cognitively (not a CatchTree).
325
+ // - Ruby BLOCKS (`{}`/`do..end`) are SLANG NativeTrees → TRANSPARENT (no nesting):
326
+ // a 3-deep block keeps an inner `if` at base nesting → nestOnlyTypes is EMPTY.
327
+ // - booleans: source-order, +1 per operator-TEXT change, NO paren skip (above).
328
+ // - no recursion (SLANG doesn't count it), no labeled jumps (Ruby has none).
329
+ const RUBY_COGNITIVE_OPTIONS = {
330
+ ifType: 'if',
331
+ collectionIfType: new Set(['unless', 'elsif']),
332
+ conditionField: 'condition',
333
+ consequenceField: 'consequence',
334
+ alternativeField: 'alternative',
335
+ loopTypes: new Set([
336
+ 'while', 'until', 'for',
337
+ 'while_modifier', 'until_modifier', 'if_modifier', 'unless_modifier',
338
+ ]),
339
+ switchTypes: new Set(['case']),
340
+ ternaryType: 'conditional',
341
+ // sonar-ruby's isTernaryOperator: a simple if-with-else used as an EXPRESSION
342
+ // suppresses its `else` +1 (applies to `?:` AND `if`/`unless`). See the helper.
343
+ isExpressionTernary: rubyIsExpressionTernary,
344
+ catchType: 'rescue',
345
+ // Only an explicit `begin … rescue` (parent `begin`) is a SLANG CatchTree (+1); a
346
+ // METHOD-level rescue (`def f; …; rescue E; …`, parent `body_statement`) is
347
+ // uncounted — measured on the corpus (rescue parents: begin / body_statement only).
348
+ catchPredicate: (node) => node.parent?.type === 'begin',
349
+ nestOnlyTypes: new Set(),
350
+ labeledJumpTypes: new Set(),
351
+ hasLabel: () => false,
352
+ booleanOperatorKind: rubyBooleanKind,
353
+ parenthesizedType: RUBY_NO_PAREN,
354
+ };
355
+ export function extractRuby(tree, content, fileInfo) {
356
+ const ctx = {
357
+ content,
358
+ fileInfo,
359
+ occurrences: new Map(),
360
+ symbols: [],
361
+ imports: [],
362
+ bodies: [],
363
+ };
364
+ // Top level: no enclosing class, no visibility state (top-level defs are public).
365
+ extractBody(ctx, tree.rootNode.namedChildren, null, '', true);
366
+ // Same-name types/modules share the simple-name FQN; resolving through them
367
+ // first-wins would bind to the WRONG one, so exclude them from extract-time
368
+ // resolution. Same for same-name top-level functions (the bare-path analogue).
369
+ const ambiguousClassNames = collectAmbiguousTypeNames(ctx.symbols, new Set(['class', 'module']));
370
+ const ambiguousBareCallees = collectAmbiguousTypeNames(ctx.symbols, new Set(['function']));
371
+ const references = resolveCalls(ctx.bodies, tree.rootNode, ctx.symbols, fileInfo, RUBY_SELECTORS, RUBY_SKIP_TYPES, RUBY_FUNCTION_BODY_SKIP_TYPES, rubyMemberCallInfo, {
372
+ bareCalleeTypes: RUBY_BARE_CALLEE_TYPES,
373
+ plainCalleeType: RUBY_PLAIN_CALLEE_TYPE,
374
+ bareCallableKinds: RUBY_BARE_CALLABLE_KINDS,
375
+ bareCallsBindToEnclosingClass: true, // implicit self
376
+ constructorKinds: RUBY_CONSTRUCTOR_KINDS,
377
+ ambiguousClassNames,
378
+ ambiguousBareCallees,
379
+ ignoredBareCallees: RUBY_IGNORED_BARE_CALLEES,
380
+ ignoredMemberCallees: RUBY_IGNORED_MEMBER_CALLEES,
381
+ });
382
+ // Cyclomatic + cognitive complexity (sonar-ruby SLANG-pinned), computed while the
383
+ // tree is alive (the php/csharp call-site pattern). The complexity boundary is
384
+ // SEPARATE from the resolveCalls RUBY_SKIP_TYPES (which includes the method node
385
+ // types a method's own PendingBody.body IS — see RUBY_COMPLEXITY_SKIP_TYPES).
386
+ // Cyclomatic additionally excludes nested defs (the Shallow per-function model).
387
+ computeComplexity(ctx.bodies, ctx.symbols, {
388
+ decisionNodeTypes: RUBY_DECISION_NODE_TYPES,
389
+ extraDecisionPredicate: rubyCyclomaticExtra,
390
+ skipTypes: RUBY_COMPLEXITY_SKIP_TYPES,
391
+ cyclomaticSkipTypes: RUBY_CYCLOMATIC_SKIP_TYPES,
392
+ cognitive: RUBY_COGNITIVE_OPTIONS,
393
+ });
394
+ return { symbols: ctx.symbols, references, imports: ctx.imports };
395
+ }
396
+ // Walks a class/module/top-level body in DOCUMENT ORDER, tracking the current
397
+ // visibility (Ruby's `private`/`protected`/`public` are stateful positional
398
+ // modifiers). `className === null` is the top level (no class, defs are functions,
399
+ // always exported — no visibility state). A class/module body resets visibility to
400
+ // public at entry; a per-body name→symbol map supports the retroactive
401
+ // `private :foo` form.
402
+ function extractBody(ctx, children, className, qualifier, containerExported) {
403
+ let visibility = 'public';
404
+ // name → the symbols emitted under that name in THIS body (for `private :foo`).
405
+ const byName = new Map();
406
+ const record = (sym) => pushByName(byName, sym);
407
+ const exportedNow = () => className === null ? true : containerExported && visibility !== 'private';
408
+ for (const child of children) {
409
+ switch (child.type) {
410
+ case 'method': {
411
+ const sym = extractMethod(ctx, child, className, qualifier, exportedNow());
412
+ if (sym)
413
+ record(sym);
414
+ break;
415
+ }
416
+ case 'singleton_method': {
417
+ // `def self.x` — a class method of the enclosing class. `def obj.x` on an
418
+ // arbitrary object is skipped (no static owner).
419
+ const obj = child.childForFieldName('object');
420
+ if (className !== null && obj?.type === 'self') {
421
+ const sym = extractMethod(ctx, child, className, qualifier, exportedNow());
422
+ if (sym)
423
+ record(sym);
424
+ }
425
+ break;
426
+ }
427
+ case 'class':
428
+ case 'module':
429
+ extractClassOrModule(ctx, child, qualifier, containerExported);
430
+ break;
431
+ case 'singleton_class':
432
+ // `class << self` — its methods are class methods of the enclosing class.
433
+ if (className !== null) {
434
+ const body = child.childForFieldName('body');
435
+ if (body)
436
+ extractBody(ctx, body.namedChildren, className, qualifier, containerExported);
437
+ }
438
+ break;
439
+ case 'assignment':
440
+ extractConstant(ctx, child, className, qualifier, exportedNow());
441
+ break;
442
+ case 'identifier':
443
+ // A bare no-arg `private`/`protected`/`public` flips visibility for the
444
+ // following sibling defs (only inside a class/module body).
445
+ if (className !== null) {
446
+ const v = visibilityKeyword(child.text);
447
+ if (v)
448
+ visibility = v;
449
+ }
450
+ break;
451
+ case 'call':
452
+ visibility = handleBodyCall(ctx, child, className, qualifier, containerExported, visibility, byName);
453
+ break;
454
+ default:
455
+ break;
456
+ }
457
+ }
458
+ }
459
+ // A `call` in a class/module/top-level body. Handles the call-FORM visibility
460
+ // modifiers (`private :foo`, `private def foo`, a bare-call `private`), `attr_*`
461
+ // accessor synthesis, and `require`-family imports. Returns the (possibly updated)
462
+ // visibility. Other calls (mixins `include M`, executable class-level code) are
463
+ // not symbols.
464
+ function handleBodyCall(ctx, call, className, qualifier, containerExported, visibility, byName) {
465
+ const method = call.childForFieldName('method');
466
+ if (!method || method.type !== 'identifier' || call.childForFieldName('receiver')) {
467
+ return visibility; // only bare `name(...)` forms matter here
468
+ }
469
+ const name = method.text;
470
+ const args = call.childForFieldName('arguments');
471
+ if (className !== null) {
472
+ const v = visibilityKeyword(name);
473
+ if (v) {
474
+ // `private`/`protected`/`public` WITH arguments: a symbol-list form
475
+ // (`private :a, :b` → retroactively set those already-emitted members) or a
476
+ // def-arg form (`private def foo`). Neither flips the running visibility.
477
+ let touched = false;
478
+ for (const arg of args?.namedChildren ?? []) {
479
+ if (arg.type === 'simple_symbol') {
480
+ touched = true;
481
+ const sym = byName.get(symbolName(arg.text));
482
+ if (sym)
483
+ for (const s of sym)
484
+ s.exported = containerExported && v !== 'private';
485
+ }
486
+ else if (arg.type === 'method' || arg.type === 'singleton_method') {
487
+ touched = true;
488
+ const exported = containerExported && v !== 'private';
489
+ const obj = arg.childForFieldName('object');
490
+ if (arg.type === 'method' || obj?.type === 'self') {
491
+ const s = extractMethod(ctx, arg, className, qualifier, exported);
492
+ if (s)
493
+ pushByName(byName, s);
494
+ }
495
+ }
496
+ }
497
+ // A bare-call `private` (no args, parsed as a `call` not an `identifier` in
498
+ // some contexts) flips the running visibility.
499
+ if (!touched)
500
+ return v;
501
+ return visibility;
502
+ }
503
+ if (name === 'attr_accessor' || name === 'attr_reader' || name === 'attr_writer' || name === 'attr') {
504
+ extractAttrAccessors(ctx, call, name, args, className, qualifier, containerExported && visibility !== 'private', byName);
505
+ return visibility;
506
+ }
507
+ }
508
+ if (name === 'require' || name === 'require_relative' || name === 'load' || name === 'autoload') {
509
+ extractRequire(ctx, call, name, args);
510
+ }
511
+ return visibility;
512
+ }
513
+ // A `def`/`def self.x` → 'method' (in a class/module) or 'function' (top level).
514
+ function extractMethod(ctx, decl, className, qualifier, exported) {
515
+ const name = methodName(decl);
516
+ if (!name)
517
+ return null;
518
+ const kind = className === null ? 'function' : 'method';
519
+ const fqn = className === null ? topFqn(ctx, name) : memberFqn(ctx, className, name);
520
+ const sym = makeRubySymbol(ctx, decl, rubySig(ctx, decl), kind, name, fqn, exported, rubyDoc(decl), qualifier);
521
+ ctx.symbols.push(sym);
522
+ // The whole decl is the PendingBody so calls in parameter defaults attribute
523
+ // here alongside the body. className threads self-call resolution.
524
+ ctx.bodies.push({ symbolId: sym.id, body: decl, className: className ?? undefined });
525
+ return sym;
526
+ }
527
+ // A `class C < S` / `module M` → 'class'/'module' symbol; recurse the body. Top-
528
+ // level types are always exported (Ruby has no type-level privacy short of
529
+ // `private_constant`, a documented v1 gap). The class/module name folds into the
530
+ // member qualifier (the C#/Kotlin/PHP rule) so members of same-name types get
531
+ // distinct hashed ids.
532
+ function extractClassOrModule(ctx, decl, qualifier, containerExported) {
533
+ const nameNode = decl.childForFieldName('name');
534
+ // `class A::B` (scope_resolution name) → use the last constant as the simple name.
535
+ const name = nameNode?.type === 'constant'
536
+ ? nameNode.text
537
+ : nameNode?.type === 'scope_resolution'
538
+ ? nameNode.childForFieldName('name')?.text ?? null
539
+ : null;
540
+ if (!name)
541
+ return;
542
+ const kind = decl.type === 'module' ? 'module' : 'class';
543
+ const exported = containerExported; // type-level privacy not modelled
544
+ ctx.symbols.push(makeRubySymbol(ctx, decl, rubySig(ctx, decl), kind, name, topFqn(ctx, name), exported, rubyDoc(decl), qualifier));
545
+ const body = decl.childForFieldName('body');
546
+ if (!body)
547
+ return;
548
+ extractBody(ctx, body.namedChildren, name, joinQualifier(qualifier, name), exported);
549
+ }
550
+ // `NAME = ...` (a `constant` LHS) → 'variable'. Lowercase locals, `@ivar`s, and
551
+ // `@@cvar`s are NOT symbols (Ruby has no field declarations). The whole assignment
552
+ // is the PendingBody so the RHS's calls attribute to the constant.
553
+ function extractConstant(ctx, assign, className, qualifier, exported) {
554
+ const left = assign.childForFieldName('left');
555
+ if (!left || left.type !== 'constant')
556
+ return;
557
+ const name = left.text;
558
+ const sym = makeRubySymbol(ctx, assign, rubyConstSig(ctx, assign), 'variable', name, className === null ? topFqn(ctx, name) : memberFqn(ctx, className, name), exported, rubyDoc(assign), qualifier);
559
+ ctx.symbols.push(sym);
560
+ ctx.bodies.push({ symbolId: sym.id, body: assign, className: className ?? undefined });
561
+ }
562
+ // `attr_accessor :a, :b` / `attr_reader` / `attr_writer` → one 'method' symbol per
563
+ // symbol argument (Ruby's "field" pattern, but the accessors are CALL targets
564
+ // `obj.a`, so method-kind not variable). No PendingBody (no body to attribute).
565
+ function extractAttrAccessors(ctx, call, kind, args, className, qualifier, exported, byName) {
566
+ for (const arg of args?.namedChildren ?? []) {
567
+ if (arg.type !== 'simple_symbol')
568
+ continue;
569
+ const name = symbolName(arg.text);
570
+ if (!name)
571
+ continue;
572
+ const sig = normalizeSignature(`${kind} :${name}`);
573
+ const sym = makeRubySymbol(ctx, call, sig, 'method', name, memberFqn(ctx, className, name), exported, null, qualifier);
574
+ ctx.symbols.push(sym);
575
+ pushByName(byName, sym);
576
+ }
577
+ }
578
+ // `require 'set'` / `require_relative '../lib/foo'` / `autoload :Bar, 'bar'` →
579
+ // ImportInfo. Low cross-file value (Ruby load paths are load-path-relative, rarely
580
+ // map to indexed files — the Rust/PHP framing). sourceModule = the string literal.
581
+ function extractRequire(ctx, call, _kind, args) {
582
+ const strArg = args?.namedChildren.find((a) => a.type === 'string');
583
+ if (!strArg)
584
+ return;
585
+ const content = stringContent(strArg);
586
+ if (!content)
587
+ return;
588
+ ctx.imports.push({
589
+ file: ctx.fileInfo.path,
590
+ sourceModule: content,
591
+ importedNames: [{ name: '*' }], // IMPORT_NAMESPACE-style — require has no named binding
592
+ line: call.startPosition.row + 1,
593
+ });
594
+ }
595
+ // ── helpers ────────────────────────────────────────────────────────────────
596
+ function visibilityKeyword(text) {
597
+ return text === 'private' || text === 'protected' || text === 'public' ? text : null;
598
+ }
599
+ // Append a symbol to the per-body name→symbols map (get-or-create). Used by the
600
+ // three recording sites (the extractBody `record` closure, the `private def`
601
+ // arm, attr_* synthesis) — they live in separate functions sharing only `byName`.
602
+ function pushByName(byName, sym) {
603
+ const list = byName.get(sym.name);
604
+ if (list)
605
+ list.push(sym);
606
+ else
607
+ byName.set(sym.name, [sym]);
608
+ }
609
+ // The method name from a `method`/`singleton_method` `name:` field: an
610
+ // `identifier` (incl. predicate `valid?` / bang `save!`), a `setter` (`name=`), or
611
+ // an `operator` (`+`, `[]`). Each node's text already carries the full name.
612
+ function methodName(decl) {
613
+ return decl.childForFieldName('name')?.text ?? null;
614
+ }
615
+ // `:name` → `name` (a `simple_symbol` node's text includes the leading colon).
616
+ function symbolName(text) {
617
+ return text.startsWith(':') ? text.slice(1) : text;
618
+ }
619
+ // The body of a string literal (between the quotes), skipping interpolations.
620
+ function stringContent(str) {
621
+ const c = str.namedChildren.find((n) => n.type === 'string_content');
622
+ return c?.text ?? null;
623
+ }
624
+ // Signature = source from the decl start to the `body:` field. When the body is
625
+ // EMPTY there is no body field (`class Widget\nend`), so cut before the trailing
626
+ // `end` keyword token instead — a string-strip of "end" would corrupt a name that
627
+ // ends in "end" (`class Friend`). Ruby has no leading attributes.
628
+ function rubySig(ctx, decl) {
629
+ const body = decl.childForFieldName('body');
630
+ let end;
631
+ if (body)
632
+ end = body.startIndex;
633
+ else {
634
+ const endTok = decl.children.find((c) => c?.type === 'end');
635
+ end = endTok ? endTok.startIndex : decl.endIndex;
636
+ }
637
+ return normalizeSignature(ctx.content.slice(decl.startIndex, end));
638
+ }
639
+ // Constant signature = just the `NAME` head — the `= <rhs>` is dropped (it can be
640
+ // large, and the RHS in the id hash would bloat it), matching the PHP/C# const-
641
+ // signature convention. Cut at the `right:` value; an OccurrenceCounter `#n`
642
+ // disambiguates a same-name redefinition.
643
+ function rubyConstSig(ctx, assign) {
644
+ const right = assign.childForFieldName('right');
645
+ const end = right ? right.startIndex : assign.endIndex;
646
+ let sig = normalizeSignature(ctx.content.slice(assign.startIndex, end));
647
+ if (sig.endsWith('='))
648
+ sig = sig.slice(0, -1).trimEnd();
649
+ return sig;
650
+ }
651
+ // Doc = the contiguous `#` comment block immediately above the decl (the RDoc
652
+ // convention; `=begin/=end` blocks are not handled in v1). Consecutive `#` lines
653
+ // are SEPARATE comment nodes — take the nearest one's first content line.
654
+ function rubyDoc(decl) {
655
+ const prev = decl.previousNamedSibling;
656
+ if (prev &&
657
+ prev.type === 'comment' &&
658
+ prev.text.startsWith('#') &&
659
+ prev.endPosition.row === decl.startPosition.row - 1 &&
660
+ !isTrailingComment(prev)) {
661
+ return commentDocLine(prev.text);
662
+ }
663
+ return null;
664
+ }
665
+ function topFqn(ctx, name) {
666
+ return `${ctx.fileInfo.path}:${name}`;
667
+ }
668
+ function memberFqn(ctx, className, name) {
669
+ return `${ctx.fileInfo.path}:${className}.${name}`;
670
+ }
671
+ // The qualifier only disambiguates hashed ids — any unique join works.
672
+ function joinQualifier(a, b) {
673
+ if (!a)
674
+ return b;
675
+ if (!b)
676
+ return a;
677
+ return `${a}::${b}`;
678
+ }
679
+ function makeRubySymbol(ctx, node, signature, kind, name, fqn, exported, doc, qualifier = '') {
680
+ const key = `${name}\0${kind}\0${signature}\0${qualifier}`;
681
+ const n = (ctx.occurrences.get(key) ?? 0) + 1;
682
+ ctx.occurrences.set(key, n);
683
+ const effectiveQualifier = n === 1 ? qualifier : `${qualifier}#${n}`;
684
+ return {
685
+ id: symbolId(ctx.fileInfo.path, name, kind, signature, effectiveQualifier),
686
+ name,
687
+ fqn,
688
+ kind,
689
+ file: ctx.fileInfo.path,
690
+ startLine: node.startPosition.row + 1,
691
+ endLine: node.endPosition.row + 1,
692
+ signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
693
+ doc,
694
+ exported,
695
+ language: ctx.fileInfo.language,
696
+ };
697
+ }