codedeep-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +177 -0
  3. package/dist/config.js +223 -0
  4. package/dist/git/analyzer.js +177 -0
  5. package/dist/git/git-service.js +568 -0
  6. package/dist/git/head-watcher.js +113 -0
  7. package/dist/git/runner.js +204 -0
  8. package/dist/index.js +138 -0
  9. package/dist/indexer/code-index.js +1801 -0
  10. package/dist/indexer/complexity.js +633 -0
  11. package/dist/indexer/extractor.js +354 -0
  12. package/dist/indexer/languages/cpp.js +934 -0
  13. package/dist/indexer/languages/csharp.js +854 -0
  14. package/dist/indexer/languages/dart.js +777 -0
  15. package/dist/indexer/languages/go.js +665 -0
  16. package/dist/indexer/languages/java.js +507 -0
  17. package/dist/indexer/languages/kotlin.js +709 -0
  18. package/dist/indexer/languages/objc.js +397 -0
  19. package/dist/indexer/languages/php.js +771 -0
  20. package/dist/indexer/languages/python.js +455 -0
  21. package/dist/indexer/languages/ruby.js +697 -0
  22. package/dist/indexer/languages/rust.js +754 -0
  23. package/dist/indexer/languages/swift.js +691 -0
  24. package/dist/indexer/languages/typescript.js +485 -0
  25. package/dist/indexer/parser.js +175 -0
  26. package/dist/indexer/pipeline.js +342 -0
  27. package/dist/indexer/scanner.js +279 -0
  28. package/dist/indexer/watcher.js +353 -0
  29. package/dist/logger.js +16 -0
  30. package/dist/server.js +170 -0
  31. package/dist/tools/common.js +207 -0
  32. package/dist/tools/find-references.js +224 -0
  33. package/dist/tools/find-symbol.js +94 -0
  34. package/dist/tools/get-context.js +370 -0
  35. package/dist/tools/impact.js +218 -0
  36. package/dist/tools/overview.js +482 -0
  37. package/dist/tools/search-structure.js +303 -0
  38. package/dist/types.js +61 -0
  39. package/grammars/tree-sitter-c.wasm +0 -0
  40. package/grammars/tree-sitter-c_sharp.wasm +0 -0
  41. package/grammars/tree-sitter-cpp.wasm +0 -0
  42. package/grammars/tree-sitter-dart.wasm +0 -0
  43. package/grammars/tree-sitter-go.wasm +0 -0
  44. package/grammars/tree-sitter-java.wasm +0 -0
  45. package/grammars/tree-sitter-javascript.wasm +0 -0
  46. package/grammars/tree-sitter-kotlin.wasm +0 -0
  47. package/grammars/tree-sitter-objc.wasm +0 -0
  48. package/grammars/tree-sitter-php.wasm +0 -0
  49. package/grammars/tree-sitter-python.wasm +0 -0
  50. package/grammars/tree-sitter-ruby.wasm +0 -0
  51. package/grammars/tree-sitter-rust.wasm +0 -0
  52. package/grammars/tree-sitter-swift.wasm +0 -0
  53. package/grammars/tree-sitter-tsx.wasm +0 -0
  54. package/grammars/tree-sitter-typescript.wasm +0 -0
  55. package/package.json +67 -0
@@ -0,0 +1,771 @@
1
+ import { collectAmbiguousTypeNames } from '../extractor.js';
2
+ import { RECEIVER_OPAQUE } from '../../types.js';
3
+ import { SIGNATURE_DISPLAY_CAP, commentDocLine, isTrailingComment, normalizeSignature, resolveCalls, symbolId, } from '../extractor.js';
4
+ import { computeComplexity } from '../complexity.js';
5
+ // Type-declaration node types → SymbolKind. A `trait` maps to **class** (PHP
6
+ // traits are concrete, stateful, member-bearing mixins — unlike Rust's
7
+ // interface-like traits — so 'class' fits, and the sliced signature still
8
+ // carries the literal `trait` keyword for display, the Go struct → 'class'
9
+ // rule). enum → enum (cases NOT extracted, the universal rule). PHP has no
10
+ // nested type declarations, so there is no in-body type recursion.
11
+ const PHP_TYPE_KIND = {
12
+ class_declaration: 'class',
13
+ trait_declaration: 'class',
14
+ interface_declaration: 'interface',
15
+ enum_declaration: 'enum',
16
+ };
17
+ // Nested named `function_definition`s create their own scope — their calls must
18
+ // NOT attribute to an enclosing body, and they are not extracted as symbols
19
+ // (the global no-nested-functions rule). Closures (`arrow_function`,
20
+ // `anonymous_function`) are deliberately ABSENT → DESCENDED so their calls
21
+ // attribute to the enclosing body (the Go/Kotlin/Dart lambda rule).
22
+ const PHP_FUNCTION_BODY_SKIP_TYPES = new Set(['function_definition']);
23
+ // walkCalls skip set: nested funcs (own scope) + `attribute_list` (PHP-8
24
+ // `#[Attr(Bar())]` parses a real call inside the leading attribute node — the
25
+ // Dart/C# `annotation` rule) + the type declarations themselves. PHP class
26
+ // members each own a per-member PendingBody, so the module-root walk never
27
+ // needs to descend INTO a type to find their calls; pruning the type nodes also
28
+ // stops a rare function-nested `class {}`'s method calls from mis-attributing to
29
+ // the enclosing body. (Top-level script calls are direct program children, so
30
+ // they are unaffected.)
31
+ const PHP_SKIP_TYPES = new Set([
32
+ ...PHP_FUNCTION_BODY_SKIP_TYPES,
33
+ 'attribute_list',
34
+ 'class_declaration',
35
+ 'interface_declaration',
36
+ 'trait_declaration',
37
+ 'enum_declaration',
38
+ // An ANONYMOUS class (`new class {}`) is not a symbol and its methods own no
39
+ // PendingBody, so without pruning its body the inner calls would attribute to
40
+ // the ENCLOSING method — wrong edges, including wrong `$this->`/`self::`
41
+ // self-edges (the anon `$this` is the anon instance, not the outer class).
42
+ // The anon body is a `declaration_list`; pruning THAT (not the whole
43
+ // anonymous_class) drops the body while keeping the OUTERMOST anon's ctor-arg
44
+ // calls (`new class(make()){}`), which live in the sibling `arguments`. (A
45
+ // `new class(call()){}` nested inside another anon's body loses its ctor-arg
46
+ // edge too — vanishingly rare, a missed edge, never a wrong one.) A NAMED
47
+ // type's declaration_list is never reached in a walk — its parent decl is
48
+ // pruned above — so this targets only the anonymous case (Java's
49
+ // anonymous-internals-not-descended rule).
50
+ 'declaration_list',
51
+ ]);
52
+ // PHP's bare/construction callee is a `name` node (NOT the engine-default
53
+ // `identifier`, NOR Swift's `simple_identifier`) → reuse `plainCalleeType`.
54
+ const PHP_BARE_CALLEE_TYPES = new Set(['name']);
55
+ const PHP_PLAIN_CALLEE_TYPE = 'name';
56
+ // A bare `foo()` is a FREE-FUNCTION call (PHP has top-level functions; an
57
+ // instance method always needs `$this->`/`self::`), so bare calls bind to
58
+ // functions ONLY — never a class (construction is routed separately) and never
59
+ // the enclosing class (no implicit-this). This is the TS/Py model, opposite of
60
+ // C#/Kotlin.
61
+ const PHP_BARE_CALLABLE_KINDS = new Set(['function']);
62
+ // `new Foo()` resolves to a 'class'-kind symbol. Its callee is a plain `name`
63
+ // (indistinguishable from a bare call by callee type — the C# problem), so it is
64
+ // recognized by CALL-NODE type and routed through constructorKinds. Combined
65
+ // with the function-only bareCallableKinds above, BOTH wrong-edge directions are
66
+ // structurally impossible: a bare `foo()` can never reach a class, and a
67
+ // `new Foo()` can never reach a function or method.
68
+ const PHP_CONSTRUCTOR_KINDS = new Set(['class']);
69
+ const PHP_CONSTRUCTOR_SELECTORS = new Set(['object_creation_expression']);
70
+ // Self-construction keywords: `new self()`/`new static()`/`new parent()` parse
71
+ // with a `name` callee whose text is one of these. They are not type names, and
72
+ // the engine's constructor-form path has no enclosing-class context to resolve
73
+ // them, so the selector drops them (a documented minor recall gap) rather than
74
+ // emit junk `self`/`static`-targeted construction refs.
75
+ const PHP_SELF_CONSTRUCT = new Set(['self', 'static', 'parent']);
76
+ // Common PHP global built-ins: they parse as bare `name` calls but never resolve
77
+ // to a local symbol and would otherwise flood the name-keyed reference store.
78
+ // Suppressed ONLY when unresolved (a file-local function shadowing the name keeps
79
+ // its refs). PHP's stdlib is procedural, so this is larger than the C#/Go sets;
80
+ // tune further from dogfood measurement (the Kotlin/Dart empirical method).
81
+ // Several constructs that LOOK like calls parse as their own nodes (no entry
82
+ // needed): `unset`→unset_statement, `list`→list_assignment, `array`→
83
+ // array_creation, `include`/`require`→*_expression, `echo`→echo_statement,
84
+ // `print`→print_intrinsic, `exit`→exit_statement. But `isset`/`empty`/`eval`/
85
+ // `die` DO parse as function_call_expression with a `name` callee, so they ARE
86
+ // listed below — isset/empty especially are among the most frequent PHP tokens
87
+ // and would otherwise flood the store.
88
+ const PHP_IGNORED_BARE_CALLEES = new Set([
89
+ // call-shaped language constructs (function_call_expression, must be listed)
90
+ 'isset', 'empty', 'eval', 'die',
91
+ // strings
92
+ 'strlen', 'strpos', 'stripos', 'strrpos', 'substr', 'substr_count', 'str_replace',
93
+ 'str_ireplace', 'str_repeat', 'str_split', 'str_pad', 'str_contains',
94
+ 'str_starts_with', 'str_ends_with', 'strtolower', 'strtoupper', 'ucfirst',
95
+ 'ucwords', 'lcfirst', 'trim', 'ltrim', 'rtrim', 'sprintf', 'printf', 'vsprintf',
96
+ 'vprintf', 'number_format', 'nl2br', 'wordwrap', 'htmlspecialchars',
97
+ 'htmlentities', 'html_entity_decode', 'strip_tags', 'addslashes', 'stripslashes',
98
+ 'strrev', 'strtr', 'chunk_split', 'mb_strlen', 'mb_substr', 'mb_strtolower',
99
+ 'mb_strtoupper', 'preg_match', 'preg_match_all', 'preg_replace',
100
+ 'preg_replace_callback', 'preg_split', 'preg_quote',
101
+ // arrays
102
+ 'count', 'sizeof', 'array_map', 'array_filter', 'array_merge', 'array_merge_recursive',
103
+ 'array_keys', 'array_values', 'array_key_exists', 'array_key_first', 'array_key_last',
104
+ 'array_search', 'array_slice', 'array_splice', 'array_push', 'array_pop',
105
+ 'array_shift', 'array_unshift', 'array_reverse', 'array_unique', 'array_flip',
106
+ 'array_combine', 'array_fill', 'array_fill_keys', 'array_column', 'array_diff',
107
+ 'array_diff_key', 'array_intersect', 'array_intersect_key', 'array_reduce',
108
+ 'array_walk', 'array_sum', 'array_product', 'array_chunk', 'array_pad',
109
+ 'in_array', 'implode', 'explode', 'join', 'sort', 'rsort', 'usort', 'uasort',
110
+ 'uksort', 'ksort', 'krsort', 'asort', 'arsort', 'natsort', 'shuffle', 'range',
111
+ 'compact', 'extract', 'current', 'reset', 'end', 'key', 'next', 'prev',
112
+ // type/var
113
+ 'is_array', 'is_string', 'is_int', 'is_integer', 'is_long', 'is_bool', 'is_float',
114
+ 'is_double', 'is_numeric', 'is_null', 'is_object', 'is_callable', 'is_scalar',
115
+ 'is_iterable', 'is_countable', 'is_a', 'is_subclass_of', 'gettype', 'settype',
116
+ 'intval', 'floatval', 'doubleval', 'strval', 'boolval', 'var_dump', 'var_export',
117
+ 'print_r', 'get_class', 'get_parent_class', 'get_object_vars', 'get_class_methods',
118
+ 'property_exists', 'method_exists', 'function_exists', 'class_exists',
119
+ 'interface_exists', 'trait_exists', 'enum_exists', 'defined', 'constant',
120
+ 'spl_object_id', 'spl_object_hash', 'spl_autoload_register',
121
+ // math
122
+ 'abs', 'ceil', 'floor', 'round', 'min', 'max', 'pow', 'sqrt', 'rand', 'mt_rand',
123
+ 'random_int', 'random_bytes', 'intdiv', 'fmod', 'pi', 'log', 'exp',
124
+ // json / serialize / encoding
125
+ 'json_encode', 'json_decode', 'serialize', 'unserialize', 'base64_encode',
126
+ 'base64_decode', 'http_build_query', 'urlencode', 'urldecode', 'rawurlencode',
127
+ // callables / reflection
128
+ 'call_user_func', 'call_user_func_array', 'func_get_args', 'func_num_args',
129
+ 'func_get_arg', 'define',
130
+ // misc / fs / time
131
+ 'dirname', 'basename', 'realpath', 'pathinfo', 'file_exists', 'file_get_contents',
132
+ 'file_put_contents', 'fopen', 'fclose', 'fwrite', 'fread', 'fgets', 'is_dir',
133
+ 'is_file', 'mkdir', 'unlink', 'getenv', 'putenv', 'error_reporting',
134
+ 'trigger_error', 'set_error_handler', 'date', 'time', 'mktime', 'strtotime',
135
+ 'microtime', 'sleep', 'usleep', 'header', 'http_response_code',
136
+ 'iterator_to_array', 'ctype_digit', 'ctype_alpha', 'dd', 'dump',
137
+ ]);
138
+ // PHP instance methods are overwhelmingly domain/framework, so the member
139
+ // suppression set is deliberately small — only clearly-stdlib SPL-protocol /
140
+ // Throwable / magic methods whose chained captures would be pure noise (Iterator
141
+ // current/next/rewind/valid, ArrayAccess offset*, IteratorAggregate/ArrayObject
142
+ // getIterator/getArrayCopy, Throwable get*, jsonSerialize/__toString).
143
+ // DELIBERATELY EXCLUDED: `format` — the measure-don't-guess rule (dogfood on
144
+ // Carbon/php-parser/symfony-console) found 1166 `->format()` call-sites against 8
145
+ // real DOMAIN definitions (Carbon dates, translators, formatters) and zero stdlib
146
+ // protocol — i.e. a distinctive fluent domain method this feature exists to
147
+ // capture, not the DateTime::format stdlib noise its name suggests. (current/next
148
+ // /count are kept despite a few Carbon/collection domain defs: SPL-protocol-
149
+ // dominant, small flood, and find_references caps tier-5 rows anyway.)
150
+ // <=3-char names are gated downstream by SHORT_NAME_THRESHOLD.
151
+ const PHP_IGNORED_MEMBER_CALLEES = new Set([
152
+ 'getMessage', 'getCode', 'getPrevious', 'getTrace', 'getTraceAsString',
153
+ 'getFile', 'getLine', 'current', 'next', 'rewind', 'valid', 'count',
154
+ 'offsetGet', 'offsetSet', 'offsetExists', 'offsetUnset', 'getIterator',
155
+ 'getArrayCopy', 'jsonSerialize', '__toString',
156
+ ]);
157
+ // ── call selectors ─────────────────────────────────────────────────────────
158
+ // Bare `foo()` callee = the `function:` field, kept only when it is a plain
159
+ // `name` (drop `qualified_name` namespaced calls — cross-namespace, final
160
+ // segment collides — and `variable_name` dynamic `$fn()` calls). A first-class-
161
+ // callable `strlen(...)` (sole `variadic_placeholder` argument) is closure
162
+ // creation, not a call — suppressed.
163
+ function phpFunctionCallCallee(node) {
164
+ const fn = node.childForFieldName('function');
165
+ if (!fn || fn.type !== 'name')
166
+ return null;
167
+ if (isFirstClassCallable(node))
168
+ return null;
169
+ return fn;
170
+ }
171
+ // PHP 8.1 first-class-callable syntax (`strlen(...)`, `$obj->m(...)`,
172
+ // `C::m(...)`) is closure CREATION, not an invocation: the sole argument is a
173
+ // `variadic_placeholder`. Suppressed on every call form so it never emits a
174
+ // (wrong-kind) `calls` edge to the wrapped callable.
175
+ function isFirstClassCallable(callNode) {
176
+ const args = childOfType(callNode, 'arguments');
177
+ return (!!args &&
178
+ args.namedChildren.length === 1 &&
179
+ args.namedChildren[0]?.type === 'variadic_placeholder');
180
+ }
181
+ // `new Foo()` callee = the first named child (the `new` keyword is anonymous):
182
+ // kept only when a plain `name`. `qualified_name` (`new \App\X()`),
183
+ // `variable_name` (`new $cls()`), and anonymous-class bodies are dropped, as are
184
+ // the self-construction keywords `self`/`static`/`parent`.
185
+ function phpObjectCreationCallee(node) {
186
+ const callee = node.namedChildren[0];
187
+ if (!callee || callee.type !== 'name')
188
+ return null;
189
+ if (PHP_SELF_CONSTRUCT.has(callee.text))
190
+ return null;
191
+ return callee;
192
+ }
193
+ // Member / nullsafe / scoped call nodes carry their receiver+property directly,
194
+ // so the selector returns the call NODE itself; phpMemberCallInfo reads it.
195
+ function selfNode(node) {
196
+ return node;
197
+ }
198
+ const PHP_SELECTORS = [
199
+ { nodeType: 'function_call_expression', getCallee: phpFunctionCallCallee },
200
+ { nodeType: 'object_creation_expression', getCallee: phpObjectCreationCallee },
201
+ { nodeType: 'member_call_expression', getCallee: selfNode },
202
+ { nodeType: 'nullsafe_member_call_expression', getCallee: selfNode },
203
+ { nodeType: 'scoped_call_expression', getCallee: selfNode },
204
+ ];
205
+ // Reduces a member/nullsafe/scoped call to {receiver, property, isSelf}.
206
+ // `$this->m()` → self-call (resolve against the enclosing class)
207
+ // `$obj->m()` / `$o?->m()`→ unresolvable instance call (receiver keeps the `$`)
208
+ // `Class::m()` → receiver = `Class` (resolves via methodsByClass!)
209
+ // `self::m()`/`static::m()` → self-call; `parent::m()` → null (super-like)
210
+ // chained/computed `$a->b->c()`, `$cls::m()`, `Foo::bar()::baz()` → RECEIVER_OPAQUE
211
+ // `qualified\Ns::m()` → null (cross-namespace static path, identity-bearing)
212
+ function phpMemberCallInfo(callee) {
213
+ const t = callee.type;
214
+ if (isFirstClassCallable(callee))
215
+ return null; // `C::m(...)` / `$o->m(...)` closure creation
216
+ if (t === 'member_call_expression' || t === 'nullsafe_member_call_expression') {
217
+ const obj = callee.childForFieldName('object');
218
+ const nameNode = callee.childForFieldName('name');
219
+ // A dynamic name (`$obj->$prop()`) has a `variable_name` here, not a `name`;
220
+ // drop it (its `$`-text would be junk in the ref store), like the bare path.
221
+ if (!obj || nameNode?.type !== 'name')
222
+ return null;
223
+ const prop = nameNode.text;
224
+ // Any non-`variable_name` object is a runtime-computed receiver — chained
225
+ // `$a->b->c()` (member_access) / `$a->b()->c()` (member_call), indexed
226
+ // `$a[0]->m()` (subscript), parenthesized `(new X())->m()` — and is opaque:
227
+ // findable by method name, never resolved. ($this / $obj are variable_name,
228
+ // handled below; an instance receiver is always a value, so unlike the static
229
+ // path there is no class-name/qualified scope to exclude first.)
230
+ if (obj.type !== 'variable_name') {
231
+ return { receiver: RECEIVER_OPAQUE, property: prop, isSelf: false };
232
+ }
233
+ const inner = innerVarName(obj);
234
+ if (inner === 'this')
235
+ return { receiver: 'this', property: prop, isSelf: true };
236
+ // `->`/`?->` is an INSTANCE call: the receiver is a value, never a type. The
237
+ // engine resolves a non-self receiver via methodsByClass[receiver], so a
238
+ // variable named like a same-file class (`$Request->validate()`) would
239
+ // mis-bind to that class's method — a WRONG edge. Keep the `$` sigil in the
240
+ // recorded receiver so it can never match a (sigil-free) class name; the ref
241
+ // stays unresolved and routes through the cross-file weak-include by name.
242
+ return { receiver: obj.text, property: prop, isSelf: false };
243
+ }
244
+ if (t === 'scoped_call_expression') {
245
+ const scope = callee.childForFieldName('scope');
246
+ const nameNode = callee.childForFieldName('name');
247
+ // `Other::$dynMethod()` has a `variable_name` name — drop the dynamic call.
248
+ if (!scope || nameNode?.type !== 'name')
249
+ return null;
250
+ const prop = nameNode.text;
251
+ if (scope.type === 'name')
252
+ return { receiver: scope.text, property: prop, isSelf: false };
253
+ if (scope.type === 'relative_scope') {
254
+ const kw = scope.text;
255
+ if (kw === 'self' || kw === 'static')
256
+ return { receiver: kw, property: prop, isSelf: true };
257
+ return null; // parent:: → super-like
258
+ }
259
+ // `\Ns\C::m()` (qualified_name) and `namespace\C::m()` (relative_name, the
260
+ // namespace-relative operator) are identity-bearing — their final segment
261
+ // routinely collides with a same-named in-repo class, so they stay null (the
262
+ // deliberate cross-namespace drop, the Rust external-path rule).
263
+ if (scope.type === 'qualified_name' || scope.type === 'relative_name')
264
+ return null;
265
+ // Every OTHER scope is a runtime-computed receiver — chained `Foo::bar()::baz()`
266
+ // (scoped_call) / `$x->m()::n()` (member_call), dynamic `$cls::create()`
267
+ // (variable_name, the Laravel/Eloquent idiom), indexed `$a[0]::m()` (subscript),
268
+ // parenthesized `(new X())::m()`, `$a->b::c()` (member_access), `make()::m()`
269
+ // (function_call). It is opaque: findable by method name, never resolved —
270
+ // mirroring the instance catch-all above (zero wrong-edge: '()' matches no
271
+ // class, so methodsByClass.get('()') is always undefined).
272
+ return { receiver: RECEIVER_OPAQUE, property: prop, isSelf: false };
273
+ }
274
+ return null;
275
+ }
276
+ // The inner identifier of a `variable_name` (`$this` → 'this', `$obj` → 'obj').
277
+ // The `$` is an anonymous token, so the name is the first NAMED child.
278
+ function innerVarName(vn) {
279
+ return vn.namedChildren[0]?.text ?? null;
280
+ }
281
+ // ── complexity (cyclomatic + cognitive) — pinned EXACT to SonarPHP 3.38.0.12239 ──
282
+ // (php-frontend's ComplexityVisitor / CognitiveComplexityVisitor, run as a per-function
283
+ // oracle; see the project docs' "Cyclomatic/Cognitive Complexity Rules"). Both metrics MEASURED
284
+ // against the real analyzer. TWO master-vs-3.38 divergences are pinned to the RELEASED
285
+ // 3.38 (the runnable version users compare against): one-word `elseif` is NOT counted
286
+ // cyclomatically (3.38's ComplexityVisitor has no visitElseifClause; master added it),
287
+ // and the bitwise `|` (PIPE) is NOT counted cognitively (master added it). PHP also
288
+ // FORKS on `match`: it counts each arm like a switch case — a DELIBERATE divergence from
289
+ // SonarPHP (which counts `match` in NEITHER metric), user-chosen for McCabe-truth +
290
+ // consistency with switch.
291
+ // Cyclomatic decision nodes (+1 each). `else_if_clause` is DELIBERATELY ABSENT (3.38
292
+ // does not count one-word `elseif`; the inner `if` of a two-word `else if` still counts
293
+ // via `if_statement`). `default_statement` and the switch/match CONTAINERS are absent.
294
+ // `match_conditional_expression` (each non-default arm) is the match FORK
295
+ // (`match_default_expression` excluded). `conditional_expression` covers BOTH the full
296
+ // ternary and the elvis `?:` (same node).
297
+ const PHP_DECISION_NODE_TYPES = new Set([
298
+ 'if_statement',
299
+ 'for_statement', 'foreach_statement', 'while_statement', 'do_statement',
300
+ 'conditional_expression', // ternary + elvis ?:
301
+ 'case_statement', // switch case (NOT default_statement)
302
+ 'match_conditional_expression', // match arm — the FORK (NOT match_default_expression)
303
+ ]);
304
+ // Cyclomatic booleans: `&&`/`||` AND the word operators `and`/`or` (SonarPHP counts
305
+ // CONDITIONAL_AND/OR + ALTERNATIVE_CONDITIONAL_AND/OR), but NOT `xor`, `??`, or `|`.
306
+ // One `binary_expression` covers all binary ops, so read the `operator` field token.
307
+ const PHP_CYCLOMATIC_BOOLEAN_OPS = new Set(['&&', '||', 'and', 'or']);
308
+ function phpCyclomaticExtra(node) {
309
+ if (node.type !== 'binary_expression')
310
+ return false;
311
+ const op = node.childForFieldName('operator')?.type;
312
+ return op !== undefined && PHP_CYCLOMATIC_BOOLEAN_OPS.has(op);
313
+ }
314
+ // Cognitive boolean-run kind: `&&`/`||` ONLY (3.38 cognitive tests only CONDITIONAL_AND/
315
+ // CONDITIONAL_OR — NOT the word `and`/`or`, NOT `xor`/`??`, NOT the PIPE `|` master
316
+ // added). Source-order + kind-change + paren-unwrap is the engine default.
317
+ const PHP_COGNITIVE_BOOLEAN_OPS = new Set(['&&', '||']);
318
+ function phpCognitiveBooleanKind(node) {
319
+ if (node.type !== 'binary_expression')
320
+ return null;
321
+ const op = node.childForFieldName('operator')?.type;
322
+ return op !== undefined && PHP_COGNITIVE_BOOLEAN_OPS.has(op) ? op : null;
323
+ }
324
+ // Cognitive jump: PHP `break`/`continue` take an optional numeric LEVEL (`break 2;`) —
325
+ // SonarPHP charges +1 FLAT only when that argument is present (bare `break;` adds 0).
326
+ // The argument is the statement's lone non-comment named child.
327
+ function phpJumpHasArgument(node) {
328
+ return node.namedChildren.some((c) => c.type !== 'comment');
329
+ }
330
+ // Complexity body boundary — SEPARATE from PHP_SKIP_TYPES (the resolveCalls boundary).
331
+ // Closures (`anonymous_function`) and arrow fns (`arrow_function`) are DELIBERATELY
332
+ // ABSENT → DESCENDED, so they roll into the enclosing function cognitively (nestOnly,
333
+ // +1 nesting — SonarPHP's per-function model via visitWithNesting). The type
334
+ // declarations + their `declaration_list` body + `attribute_list` ARE skipped (an
335
+ // anon-class member's control flow then rolls into nobody — a rare documented
336
+ // per-symbol-model under-count, the Java anon-class precedent). `function_definition`
337
+ // is NOT listed: a top-level function's PendingBody IS a function_definition, so
338
+ // skipping it would root-skip the whole function; nested NAMED functions (vanishingly
339
+ // rare in PHP) are instead descended pass-through — a documented minor cognitive
340
+ // under-count (cyclomatic still excludes them via PHP_CYCLOMATIC_SKIP_TYPES).
341
+ const PHP_COMPLEXITY_SKIP_TYPES = new Set([
342
+ 'attribute_list',
343
+ 'class_declaration', 'interface_declaration', 'trait_declaration', 'enum_declaration',
344
+ 'declaration_list',
345
+ ]);
346
+ // CYCLOMATIC-only child skip: additionally exclude ALL nested functions (closures,
347
+ // arrow fns, nested named fns). SonarPHP's per-function cyclomatic uses the
348
+ // ShallowComplexityVisitor, which does not descend nested functions (each gets its own
349
+ // number). Cognitive instead descends closures/arrow-fns (nestOnly) via the narrower
350
+ // PHP_COMPLEXITY_SKIP_TYPES — the Java lambda asymmetry.
351
+ const PHP_CYCLOMATIC_SKIP_TYPES = new Set([
352
+ ...PHP_COMPLEXITY_SKIP_TYPES,
353
+ 'function_definition', 'anonymous_function', 'arrow_function',
354
+ ]);
355
+ // Cognitive config (SonarPHP 3.38 CognitiveComplexityVisitor). PHP's `if_statement`
356
+ // holds elseif/else under a REPEATED `alternative` field (the Python elifClauseType
357
+ // shape) PLUS the two-word `else if` = else-clause-contains-if hybrid (elseChainsIf, one
358
+ // of the two new engine knobs this slice — see complexity.ts; the other is
359
+ // ternaryBranchFields below). Loops nest body only (loopBodyField).
360
+ // switch + match (the FORK) are whole-+1. catch surcharges; try/finally pass through.
361
+ // break/continue WITH a level argument + goto are +1 flat. Booleans `&&`/`||`
362
+ // source-order. No recursion (SonarPHP doesn't count it).
363
+ const PHP_COGNITIVE_OPTIONS = {
364
+ ifType: 'if_statement',
365
+ conditionField: 'condition',
366
+ consequenceField: 'body',
367
+ alternativeField: 'alternative',
368
+ elifClauseType: 'else_if_clause',
369
+ elseClauseType: 'else_clause',
370
+ elseChainsIf: true, // two-word `else if` (else_clause→if_statement) flatten + extra nesting
371
+ loopTypes: new Set(['for_statement', 'foreach_statement', 'while_statement', 'do_statement']),
372
+ loopBodyField: 'body',
373
+ switchTypes: new Set(['switch_statement', 'match_expression']), // match = the FORK (whole +1)
374
+ ternaryType: 'conditional_expression',
375
+ // Nest ONLY the true/false branches; the condition stays at ambient nesting so a
376
+ // CHAINED elvis `a ?: b ?: c` (each link in the next's condition) doesn't compound —
377
+ // SonarPHP-exact (verified on 5 Laravel chained-elvis cases). Elvis `?:` has only the
378
+ // `alternative` branch; the full ternary adds `body`.
379
+ ternaryBranchFields: ['body', 'alternative'],
380
+ catchType: 'catch_clause',
381
+ nestOnlyTypes: new Set(['anonymous_function', 'arrow_function']), // closures roll in, +0
382
+ labeledJumpTypes: new Set(['break_statement', 'continue_statement']),
383
+ hasLabel: phpJumpHasArgument, // `break 2;`/`continue 2;` → +1 flat (bare → 0)
384
+ flatIncrement: (node) => node.type === 'goto_statement', // goto → +1 flat
385
+ booleanOperatorKind: phpCognitiveBooleanKind,
386
+ parenthesizedType: 'parenthesized_expression', // UNWRAP (SonarPHP removeParenthesis)
387
+ // NO: recursion / booleanByTreeParent / booleanRunStarts / initField / nestElseBody /
388
+ // conditionFromNamedChildren / positional-if knobs — PHP is field-based.
389
+ };
390
+ export function extractPHP(tree, content, fileInfo) {
391
+ const ctx = {
392
+ content,
393
+ fileInfo,
394
+ occurrences: new Map(),
395
+ symbols: [],
396
+ imports: [],
397
+ bodies: [],
398
+ };
399
+ extractMembers(ctx, tree.rootNode.namedChildren, '');
400
+ // Same-name types share the simple-name FQN (e.g. two classes of the same name
401
+ // in different namespaces in one file); resolving through them first-wins would
402
+ // bind to the WRONG type, so exclude them from extract-time resolution.
403
+ const ambiguousClassNames = collectAmbiguousTypeNames(ctx.symbols, new Set(['class', 'interface', 'enum']));
404
+ // Same-name top-level functions across namespaces in ONE file share the
405
+ // simple-name FQN too; a bare call to that name would first-wins bind to the
406
+ // wrong namespace's function — keep it unresolved (the bare-path analogue).
407
+ const ambiguousBareCallees = collectAmbiguousTypeNames(ctx.symbols, new Set(['function']));
408
+ const references = resolveCalls(ctx.bodies, tree.rootNode, ctx.symbols, fileInfo, PHP_SELECTORS, PHP_SKIP_TYPES, PHP_FUNCTION_BODY_SKIP_TYPES, phpMemberCallInfo, {
409
+ bareCalleeTypes: PHP_BARE_CALLEE_TYPES,
410
+ plainCalleeType: PHP_PLAIN_CALLEE_TYPE,
411
+ bareCallableKinds: PHP_BARE_CALLABLE_KINDS,
412
+ // PHP has no implicit-this: a bare call is never a sibling-method call.
413
+ bareCallsBindToEnclosingClass: false,
414
+ constructorKinds: PHP_CONSTRUCTOR_KINDS,
415
+ constructorSelectorTypes: PHP_CONSTRUCTOR_SELECTORS,
416
+ ambiguousClassNames,
417
+ ambiguousBareCallees,
418
+ ignoredBareCallees: PHP_IGNORED_BARE_CALLEES,
419
+ ignoredMemberCallees: PHP_IGNORED_MEMBER_CALLEES,
420
+ });
421
+ // Cyclomatic + cognitive complexity (SonarPHP-3.38-pinned), computed while the tree
422
+ // is alive (the Dart/Kotlin/C# call-site pattern). Cyclomatic uses its OWN skip set
423
+ // (nested functions excluded — the Shallow per-function model); cognitive descends
424
+ // closures/arrow-fns (nestOnly) via PHP_COMPLEXITY_SKIP_TYPES.
425
+ computeComplexity(ctx.bodies, ctx.symbols, {
426
+ decisionNodeTypes: PHP_DECISION_NODE_TYPES,
427
+ extraDecisionPredicate: phpCyclomaticExtra,
428
+ skipTypes: PHP_COMPLEXITY_SKIP_TYPES,
429
+ cyclomaticSkipTypes: PHP_CYCLOMATIC_SKIP_TYPES,
430
+ cognitive: PHP_COGNITIVE_OPTIONS,
431
+ });
432
+ return { symbols: ctx.symbols, references, imports: ctx.imports };
433
+ }
434
+ // Processes a list of program / namespace-body children, threading the current
435
+ // namespace qualifier. A file-level `namespace X;` (no body) updates the
436
+ // qualifier for all FOLLOWING siblings; a block `namespace X { }` recurses into
437
+ // its body with the joined qualifier. Namespaces are NOT symbols — PHP FQNs are
438
+ // file-path based, so the namespace path only disambiguates hashed ids.
439
+ function extractMembers(ctx, children, nsQualifier) {
440
+ let ns = nsQualifier;
441
+ for (const child of children) {
442
+ switch (child.type) {
443
+ case 'namespace_use_declaration':
444
+ extractImport(ctx, child);
445
+ break;
446
+ case 'namespace_definition': {
447
+ const name = child.childForFieldName('name')?.text ?? '';
448
+ const body = child.childForFieldName('body');
449
+ if (body)
450
+ extractMembers(ctx, body.namedChildren, joinQualifier(ns, name));
451
+ else
452
+ ns = joinQualifier(nsQualifier, name);
453
+ break;
454
+ }
455
+ case 'function_definition':
456
+ extractFunction(ctx, child, ns);
457
+ break;
458
+ case 'const_declaration':
459
+ // Top-level / namespace-level `const FOO = 1;` → 'variable' symbols (the
460
+ // TS/Go top-level-const convention); always exported (no private here).
461
+ extractConsts(ctx, child, null, ns, true);
462
+ break;
463
+ // Recall gap (Python parity): functions/classes defined inside a top-level
464
+ // `if (!function_exists(..)) { .. }` / version guard (the polyfill idiom)
465
+ // are NOT descended into — accepted, never a wrong edge.
466
+ default:
467
+ if (PHP_TYPE_KIND[child.type] !== undefined)
468
+ extractType(ctx, child, ns);
469
+ break;
470
+ }
471
+ }
472
+ }
473
+ // A top-level function → 'function' kind (always exported — PHP has no private
474
+ // top-level functions).
475
+ function extractFunction(ctx, decl, ns) {
476
+ const name = decl.childForFieldName('name')?.text;
477
+ if (!name)
478
+ return;
479
+ const sym = makePhpSymbol(ctx, decl, phpSig(ctx, decl), 'function', name, topFqn(ctx, name), true, phpDoc(decl), ns);
480
+ ctx.symbols.push(sym);
481
+ // The WHOLE decl is the PendingBody so calls in parameter defaults
482
+ // (`function f($x = make())`) attribute here alongside the body.
483
+ ctx.bodies.push({ symbolId: sym.id, body: decl });
484
+ }
485
+ // A class / interface / trait / enum declaration. Top-level types are always
486
+ // exported (PHP types carry no private visibility). Iterates the body for
487
+ // members; enum cases are skipped (the universal rule). No nested-type recursion
488
+ // — PHP has none.
489
+ function extractType(ctx, decl, ns) {
490
+ const name = decl.childForFieldName('name')?.text;
491
+ if (!name)
492
+ return;
493
+ const kind = PHP_TYPE_KIND[decl.type];
494
+ ctx.symbols.push(makePhpSymbol(ctx, decl, phpSig(ctx, decl), kind, name, topFqn(ctx, name), true, phpDoc(decl), ns));
495
+ const body = decl.childForFieldName('body'); // declaration_list / enum_declaration_list
496
+ if (!body)
497
+ return;
498
+ // Members fold the class name into the hashed qualifier (the C#/Kotlin rule)
499
+ // so two classes' same-(name,kind,signature) members get distinct ids — the
500
+ // class name only otherwise lives in the FQN, which is NOT hashed.
501
+ const memberQualifier = joinQualifier(ns, name);
502
+ for (const member of body.namedChildren) {
503
+ extractMember(ctx, member, name, memberQualifier, true);
504
+ }
505
+ }
506
+ // A class/interface/trait/enum body member. Methods, properties, and constants
507
+ // become symbols; enum cases, trait `use`, and stray tokens are skipped.
508
+ function extractMember(ctx, member, className, qualifier, containerExported) {
509
+ switch (member.type) {
510
+ case 'method_declaration': {
511
+ const name = member.childForFieldName('name')?.text;
512
+ if (!name)
513
+ return;
514
+ const exported = phpExported(member, containerExported);
515
+ // The WHOLE decl is the PendingBody so parameter-default calls (and, for
516
+ // the constructor, promoted-param defaults + `: parent::__construct(...)`
517
+ // delegation) attribute to the method alongside its body.
518
+ extractCallable(ctx, member, name, className, qualifier, exported);
519
+ if (name === '__construct') {
520
+ extractPromotedProperties(ctx, member, className, qualifier, containerExported);
521
+ }
522
+ return;
523
+ }
524
+ case 'property_declaration':
525
+ extractProperties(ctx, member, className, qualifier, containerExported);
526
+ return;
527
+ case 'const_declaration':
528
+ extractConsts(ctx, member, className, qualifier, containerExported);
529
+ return;
530
+ // enum_case (universal rule), use_declaration (trait use — v1 recall gap),
531
+ // comments, tokens → no symbol.
532
+ default:
533
+ return;
534
+ }
535
+ }
536
+ // A method / constructor → 'method' symbol keyed `file:Class.name`. The
537
+ // constructor keeps its real name `__construct` (the Swift `init` precedent —
538
+ // findable, and `self::__construct()` self-delegation resolves); `new C()` binds
539
+ // to the CLASS via constructorKinds regardless.
540
+ function extractCallable(ctx, decl, name, className, qualifier, exported) {
541
+ const sym = makePhpSymbol(ctx, decl, phpSig(ctx, decl), 'method', name, memberFqn(ctx, className, name), exported, phpDoc(decl), qualifier);
542
+ ctx.symbols.push(sym);
543
+ // The whole decl is always the PendingBody (param-default + body calls).
544
+ ctx.bodies.push({ symbolId: sym.id, body: decl, className });
545
+ }
546
+ // Constructor property promotion (`function __construct(private int $x)`): each
547
+ // `property_promotion_parameter` is also a class property. Exported by its OWN
548
+ // visibility (the constructor owns its initializer/default calls).
549
+ function extractPromotedProperties(ctx, ctorDecl, className, qualifier, containerExported) {
550
+ const plist = ctorDecl.childForFieldName('parameters');
551
+ if (!plist)
552
+ return;
553
+ for (const param of plist.namedChildren) {
554
+ if (param.type !== 'property_promotion_parameter')
555
+ continue;
556
+ const vn = param.childForFieldName('name') ?? childOfType(param, 'variable_name');
557
+ const pname = vn ? innerVarName(vn) : null;
558
+ if (!pname || !vn)
559
+ continue;
560
+ const exported = phpExported(param, containerExported);
561
+ const sig = normalizeSignature(ctx.content.slice(signatureStart(param), vn.endIndex));
562
+ ctx.symbols.push(makePhpSymbol(ctx, param, sig, 'variable', pname, memberFqn(ctx, className, pname), exported, null, qualifier));
563
+ }
564
+ }
565
+ // `[mods] [type] $a = .., $b;` → one 'variable' per `property_element` (name
566
+ // without the `$`). Each element owns a per-binding PendingBody (its default
567
+ // initializer's calls); the signature drops the `= default`.
568
+ function extractProperties(ctx, member, className, qualifier, containerExported) {
569
+ const exported = phpExported(member, containerExported);
570
+ const doc = phpDoc(member);
571
+ const firstEl = childOfType(member, 'property_element');
572
+ if (!firstEl)
573
+ return;
574
+ // signatureStart skips a leading PHP-8 `#[Attr]` block, matching phpSig.
575
+ const head = ctx.content.slice(signatureStart(member), firstEl.startIndex);
576
+ for (const el of member.namedChildren) {
577
+ if (el.type !== 'property_element')
578
+ continue;
579
+ const vn = childOfType(el, 'variable_name');
580
+ const pname = vn ? innerVarName(vn) : null;
581
+ if (!pname)
582
+ continue;
583
+ const sig = normalizeSignature(`${head}$${pname}`);
584
+ const sym = makePhpSymbol(ctx, member, sig, 'variable', pname, memberFqn(ctx, className, pname), exported, doc, qualifier);
585
+ ctx.symbols.push(sym);
586
+ ctx.bodies.push({ symbolId: sym.id, body: el, className });
587
+ }
588
+ }
589
+ // `[visibility] const A = .., B = ..;` → one 'variable' per `const_element`
590
+ // (its name is the element's first named child — no `name:` field). Each owns a
591
+ // per-binding PendingBody (the value expression's calls). `className === null`
592
+ // is a top-level / namespace-level const (FQN `file:NAME`, always exported, no
593
+ // enclosing class for self-resolution).
594
+ function extractConsts(ctx, member, className, qualifier, containerExported) {
595
+ const topLevel = className === null;
596
+ const exported = topLevel ? true : phpExported(member, containerExported);
597
+ const doc = phpDoc(member);
598
+ const firstEl = childOfType(member, 'const_element');
599
+ if (!firstEl)
600
+ return;
601
+ // signatureStart skips a leading PHP-8 `#[Attr]` block, matching phpSig.
602
+ const head = ctx.content.slice(signatureStart(member), firstEl.startIndex);
603
+ for (const el of member.namedChildren) {
604
+ if (el.type !== 'const_element')
605
+ continue;
606
+ const cname = el.namedChildren[0]?.text;
607
+ if (!cname)
608
+ continue;
609
+ const sig = normalizeSignature(`${head}${cname}`);
610
+ const fqn = topLevel ? topFqn(ctx, cname) : memberFqn(ctx, className, cname);
611
+ const sym = makePhpSymbol(ctx, member, sig, 'variable', cname, fqn, exported, doc, qualifier);
612
+ ctx.symbols.push(sym);
613
+ ctx.bodies.push({ symbolId: sym.id, body: el, className: className ?? undefined });
614
+ }
615
+ }
616
+ // `use App\Model\User;` / `use App\{A, B as C};` / `use Foo as Bar;` →
617
+ // ImportInfo per clause. Low cross-file value (PHP namespaces don't map to
618
+ // indexed file paths — the Rust/Kotlin framing); the `use function`/`use const`
619
+ // distinction is not tracked.
620
+ function extractImport(ctx, node) {
621
+ const line = node.startPosition.row + 1;
622
+ const group = childOfType(node, 'namespace_use_group');
623
+ if (group) {
624
+ const prefix = childOfType(node, 'namespace_name')?.text ?? '';
625
+ for (const clause of group.namedChildren) {
626
+ if (clause.type === 'namespace_use_clause')
627
+ addUseClause(ctx, clause, prefix, line);
628
+ }
629
+ return;
630
+ }
631
+ for (const clause of node.namedChildren) {
632
+ if (clause.type === 'namespace_use_clause')
633
+ addUseClause(ctx, clause, '', line);
634
+ }
635
+ }
636
+ function addUseClause(ctx, clause, prefix, line) {
637
+ const aliasNode = clause.childForFieldName('alias');
638
+ const pathNode = clause.namedChildren.find((c) => c.id !== aliasNode?.id && (c.type === 'qualified_name' || c.type === 'name'));
639
+ // An empty group-use clause (`use App\{};`) parses to a MISSING name (text "")
640
+ // — skip it rather than emit a phantom import named for the bare prefix.
641
+ if (!pathNode || !pathNode.text)
642
+ return;
643
+ const full = prefix ? `${prefix}\\${pathNode.text}` : pathNode.text;
644
+ const segs = full.split('\\').filter(Boolean);
645
+ const name = segs[segs.length - 1] ?? full;
646
+ const sourceModule = segs.slice(0, -1).join('\\');
647
+ const imported = aliasNode ? { name, alias: aliasNode.text } : { name };
648
+ ctx.imports.push({ file: ctx.fileInfo.path, sourceModule, importedNames: [imported], line });
649
+ }
650
+ // ── helpers ──────────────────────────────────────────────────────────────
651
+ // PHP's member default is PUBLIC, so a declaration exports unless it carries a
652
+ // `private` visibility_modifier (absent / public / protected all export —
653
+ // protected is inheritance API; no namespace carve-out). Members AND-in their
654
+ // container's exportedness via the caller.
655
+ function phpExported(decl, containerExported) {
656
+ if (!containerExported)
657
+ return false;
658
+ // PHP 8.4 asymmetric visibility emits TWO visibility_modifiers; the READ
659
+ // (exportedness) visibility is the one WITHOUT a `(set)` suffix (a `(set)`
660
+ // modifier governs writes only). When only `(set)` modifiers exist, GET
661
+ // defaults to public.
662
+ const vis = decl.namedChildren.filter((c) => c.type === 'visibility_modifier');
663
+ const get = vis.find((c) => !c.text.endsWith('(set)'));
664
+ return get ? get.text !== 'private' : true;
665
+ }
666
+ // Signature = source from the first non-attribute token (modifiers/keyword
667
+ // included; leading PHP-8 `#[Attr]` blocks excluded — they blow the 120-char cap
668
+ // and collide overload ids, the Java rationale) to the `body:` (compound_statement
669
+ // / declaration_list / enum_declaration_list), or the decl end when bodiless
670
+ // (abstract/interface methods), with a trailing `;` stripped.
671
+ function phpSig(ctx, node) {
672
+ const start = signatureStart(node);
673
+ const body = node.childForFieldName('body');
674
+ const end = body ? body.startIndex : node.endIndex;
675
+ let sig = normalizeSignature(ctx.content.slice(start, end));
676
+ if (sig.endsWith(';'))
677
+ sig = sig.slice(0, -1).trimEnd();
678
+ return sig;
679
+ }
680
+ // Past the leading `attribute_list` children (PHP-8 `#[Attr]` sits before the
681
+ // modifiers/keyword; the keyword is anonymous, so start right after them). A
682
+ // `comment` that FOLLOWS an attribute is relocated INSIDE the declaration by
683
+ // tree-sitter (see phpDoc), so skip it too — but ONLY after an attribute: a
684
+ // comment with no preceding attribute (`function /*c*/ name()`) sits AFTER the
685
+ // anonymous `function`/`const` keyword, so skipping it would drop the keyword.
686
+ function signatureStart(decl) {
687
+ let start = decl.startIndex;
688
+ let sawAttr = false;
689
+ for (const c of decl.namedChildren) {
690
+ if (c.type === 'attribute_list') {
691
+ start = c.endIndex;
692
+ sawAttr = true;
693
+ }
694
+ else if (c.type === 'comment' && sawAttr) {
695
+ start = c.endIndex;
696
+ }
697
+ else
698
+ break;
699
+ }
700
+ return start;
701
+ }
702
+ // Doc = a `/** */` PHPDoc block (a single `comment` node). Plain `//` and `#`
703
+ // comments are NOT docs (the PHPDoc convention; the Java/Kotlin `/** */`-only
704
+ // rule). Normally the block is the declaration's previousNamedSibling — but when
705
+ // a leading `#[Attr]` precedes it, tree-sitter relocates the comment INTO the
706
+ // declaration (an inner child after the attribute_list(s)), so check there too.
707
+ function phpDoc(decl) {
708
+ const nearest = decl.previousNamedSibling;
709
+ if (nearest &&
710
+ nearest.type === 'comment' &&
711
+ nearest.endPosition.row === decl.startPosition.row - 1 && // adjacency
712
+ !isTrailingComment(nearest) &&
713
+ nearest.text.startsWith('/**')) {
714
+ return commentDocLine(nearest.text);
715
+ }
716
+ // Attribute-then-doc ordering: the doc is a leading inner child AFTER an
717
+ // attribute_list. Keep scanning past non-doc `//`/`#` comments to a later
718
+ // `/**` block; stop at the first real token. Gate on having seen an attribute
719
+ // (like signatureStart) — a leading inner comment with NO preceding attribute
720
+ // (`function /** x */ f()`) is a mid-header comment, not a relocated doc.
721
+ let sawAttr = false;
722
+ for (const c of decl.namedChildren) {
723
+ if (c.type === 'attribute_list') {
724
+ sawAttr = true;
725
+ continue;
726
+ }
727
+ if (c.type !== 'comment' || !sawAttr)
728
+ break;
729
+ if (c.text.startsWith('/**'))
730
+ return commentDocLine(c.text);
731
+ }
732
+ return null;
733
+ }
734
+ function topFqn(ctx, name) {
735
+ return `${ctx.fileInfo.path}:${name}`;
736
+ }
737
+ function memberFqn(ctx, className, name) {
738
+ return `${ctx.fileInfo.path}:${className}.${name}`;
739
+ }
740
+ function childOfType(node, ...types) {
741
+ return node.namedChildren.find((c) => types.includes(c.type)) ?? null;
742
+ }
743
+ // Namespace path only disambiguates hashed ids — it never reaches FQN parsing —
744
+ // so any unique join works.
745
+ function joinQualifier(a, b) {
746
+ if (!a)
747
+ return b;
748
+ if (!b)
749
+ return a;
750
+ return `${a}\\${b}`;
751
+ }
752
+ function makePhpSymbol(ctx, node, signature, kind, name, fqn, exported, doc, qualifier = '') {
753
+ const key = `${name}\0${kind}\0${signature}\0${qualifier}`;
754
+ const n = (ctx.occurrences.get(key) ?? 0) + 1;
755
+ ctx.occurrences.set(key, n);
756
+ const effectiveQualifier = n === 1 ? qualifier : `${qualifier}#${n}`;
757
+ return {
758
+ // The id hashes the FULL signature; only the stored copy is capped.
759
+ id: symbolId(ctx.fileInfo.path, name, kind, signature, effectiveQualifier),
760
+ name,
761
+ fqn,
762
+ kind,
763
+ file: ctx.fileInfo.path,
764
+ startLine: node.startPosition.row + 1,
765
+ endLine: node.endPosition.row + 1,
766
+ signature: signature.slice(0, SIGNATURE_DISPLAY_CAP),
767
+ doc,
768
+ exported,
769
+ language: ctx.fileInfo.language,
770
+ };
771
+ }