flurryx-code-memory 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. code_memory/__init__.py +1 -0
  2. code_memory/claims/__init__.py +32 -0
  3. code_memory/claims/extractor.py +325 -0
  4. code_memory/claims/indexer.py +258 -0
  5. code_memory/claims/resolver.py +186 -0
  6. code_memory/claims/store.py +424 -0
  7. code_memory/cli.py +1192 -0
  8. code_memory/config.py +268 -0
  9. code_memory/embed/__init__.py +224 -0
  10. code_memory/embed/cache.py +204 -0
  11. code_memory/embed/m3.py +174 -0
  12. code_memory/embed/ollama.py +92 -0
  13. code_memory/embed/tei.py +106 -0
  14. code_memory/episodic/__init__.py +3 -0
  15. code_memory/episodic/sqlite_store.py +278 -0
  16. code_memory/extractor/__init__.py +3 -0
  17. code_memory/extractor/csproj.py +166 -0
  18. code_memory/extractor/dll.py +385 -0
  19. code_memory/extractor/gitignore.py +162 -0
  20. code_memory/extractor/nuget.py +275 -0
  21. code_memory/extractor/sanity.py +124 -0
  22. code_memory/extractor/sln.py +108 -0
  23. code_memory/extractor/treesitter.py +1172 -0
  24. code_memory/graph/__init__.py +3 -0
  25. code_memory/graph/falkor_store.py +740 -0
  26. code_memory/mcp_server.py +1816 -0
  27. code_memory/metrics.py +260 -0
  28. code_memory/orchestrator/__init__.py +13 -0
  29. code_memory/orchestrator/git_delta.py +211 -0
  30. code_memory/orchestrator/ingest_state.py +71 -0
  31. code_memory/orchestrator/pipeline.py +1478 -0
  32. code_memory/orchestrator/reset.py +130 -0
  33. code_memory/orchestrator/resolver.py +825 -0
  34. code_memory/orchestrator/retrieve.py +505 -0
  35. code_memory/resilience.py +73 -0
  36. code_memory/sync/__init__.py +20 -0
  37. code_memory/sync/autostart/__init__.py +42 -0
  38. code_memory/sync/autostart/base.py +106 -0
  39. code_memory/sync/autostart/launchd.py +115 -0
  40. code_memory/sync/autostart/schtasks.py +155 -0
  41. code_memory/sync/autostart/systemd.py +113 -0
  42. code_memory/sync/hooks.py +164 -0
  43. code_memory/sync/safety.py +65 -0
  44. code_memory/sync/snapshot.py +461 -0
  45. code_memory/sync/store.py +399 -0
  46. code_memory/sync/sync.py +405 -0
  47. code_memory/sync/watcher.py +320 -0
  48. code_memory/vector/__init__.py +3 -0
  49. code_memory/vector/qdrant_store.py +302 -0
  50. flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
  51. flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
  52. flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
  53. flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,1172 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass, field
5
+ from functools import lru_cache
6
+ from pathlib import Path
7
+
8
+ from tree_sitter import Language, Node, Parser
9
+ from tree_sitter_language_pack import get_language
10
+
11
+ LANG_BY_EXT: dict[str, str] = {
12
+ ".ts": "typescript",
13
+ ".tsx": "tsx",
14
+ ".js": "javascript",
15
+ ".jsx": "javascript",
16
+ ".mjs": "javascript",
17
+ ".cjs": "javascript",
18
+ ".py": "python",
19
+ # .NET ecosystem
20
+ ".cs": "csharp",
21
+ ".cshtml": "razor",
22
+ ".razor": "razor",
23
+ ".vb": "vb",
24
+ ".fs": "fsharp",
25
+ ".fsi": "fsharp",
26
+ ".fsx": "fsharp",
27
+ # PHP — ``.phtml`` is the legacy template extension still used by
28
+ # Laravel/Zend/WordPress for view files mixing PHP + HTML; same grammar.
29
+ ".php": "php",
30
+ ".phtml": "php",
31
+ }
32
+
33
+ SYMBOL_NODE_TYPES = {
34
+ "function_declaration",
35
+ "function_definition",
36
+ "method_definition",
37
+ "class_declaration",
38
+ "class_definition",
39
+ # TypeScript ``abstract class`` parses as its own node type; missing
40
+ # it makes Angular clean-arch ports invisible to the graph, which
41
+ # in turn leaves every ``inject(Port)`` edge unresolved.
42
+ "abstract_class_declaration",
43
+ "abstract_method_signature",
44
+ "arrow_function",
45
+ "export_statement",
46
+ # C# / Razor (Razor embeds C#)
47
+ "method_declaration",
48
+ "interface_declaration",
49
+ "struct_declaration",
50
+ "record_declaration",
51
+ "enum_declaration",
52
+ "constructor_declaration",
53
+ "delegate_declaration",
54
+ "property_declaration",
55
+ # VB.NET
56
+ "class_block",
57
+ "module_block",
58
+ "namespace_block",
59
+ # F#
60
+ "function_or_value_defn",
61
+ "type_definition",
62
+ "method_or_prop_defn",
63
+ "named_module",
64
+ # PHP — trait_declaration is the only one not already covered by
65
+ # the C#/TS/Py names above (class_declaration, interface_declaration,
66
+ # enum_declaration, method_declaration, function_definition are reused).
67
+ "trait_declaration",
68
+ }
69
+
70
+ CALL_NODE_TYPES = {
71
+ "call_expression",
72
+ "call",
73
+ "invocation_expression",
74
+ "invocation", # VB
75
+ # C# / VB / Razor: ``new Foo()`` parses as ``object_creation_expression``
76
+ # rather than ``invocation_expression``. Without this, calls to
77
+ # constructors (factories, DI registrations, ``new Builder().X()``) never
78
+ # become CALLS edges, which is the #1 reason the call graph looks empty
79
+ # on real .NET codebases.
80
+ "object_creation_expression",
81
+ # PHP — function call (``foo($x)``), instance method (``$obj->bar($x)``),
82
+ # static method (``Foo::baz($x)``). PHP ``new Foo()`` is also
83
+ # ``object_creation_expression`` (shared name).
84
+ "function_call_expression",
85
+ "member_call_expression",
86
+ "scoped_call_expression",
87
+ }
88
+
89
+ # Nodes that carry a type expression via a field named "type" (or "returns").
90
+ # When walking, look up these fields and harvest every identifier inside the
91
+ # type subtree. Covers C# parameter/field/property/variable/cast/typeof/is/as
92
+ # plus TypeScript/JS type annotations.
93
+ TYPE_FIELD_NODE_TYPES = {
94
+ # C# declarations
95
+ "parameter", "variable_declaration", "property_declaration",
96
+ "field_declaration", "event_declaration", "indexer_declaration",
97
+ "delegate_declaration", "method_declaration",
98
+ # C# expressions referencing a type
99
+ "cast_expression", "as_expression", "is_expression",
100
+ "typeof_expression", "sizeof_expression", "default_expression",
101
+ "array_creation_expression", "stack_alloc_array_creation_expression",
102
+ # TS / JS
103
+ "type_annotation", "type_alias_declaration",
104
+ "as_expression", # TS overlaps name
105
+ "satisfies_expression",
106
+ }
107
+
108
+ # Nodes whose direct (non-punctuation) children ARE type expressions —
109
+ # walk every child as a type tree. ``base_list`` (`class X : Foo, IBar`),
110
+ # generic arguments, and constraint clauses fall here.
111
+ TYPE_CHILDREN_NODE_TYPES = {
112
+ "base_list", # C#
113
+ "type_argument_list", # C# generics
114
+ "type_arguments", # TS/JS generics
115
+ "type_parameter_constraints_clause", # C# `where T : Foo`
116
+ "implements_clause", # TS `implements Foo, Bar`
117
+ "extends_clause", # TS `extends Foo`
118
+ "extends_type_clause", # TS interface extends
119
+ "heritage_clause", # TS class heritage
120
+ "tuple_type", # C# `(int, Foo)` — walk for Foo
121
+ "tuple_element",
122
+ # PHP
123
+ "base_clause", # ``extends Bar``
124
+ "class_interface_clause", # ``implements I1, I2``
125
+ }
126
+
127
+ # PHP type-expression wrapper nodes — recurse to find inner ``name``s.
128
+ # ``primitive_type`` (``int``, ``string``, ``array``, ...) is skipped at
129
+ # the top of ``_collect_type_refs`` because primitives carry no graph
130
+ # value; they'd otherwise pollute "who touches type X" queries.
131
+ _PHP_TYPE_WRAPPER_NODE_TYPES = {
132
+ "named_type",
133
+ "optional_type", # ``?Foo``
134
+ "union_type", # ``Foo|Bar``
135
+ "intersection_type", # ``Foo&Bar``
136
+ "disjunctive_normal_form_type", # PHP 8.2 ``(Foo&Bar)|Baz``
137
+ }
138
+
139
+ # Parent nodes whose children include a PHP type expression in a
140
+ # positional slot (no ``type`` field). Walk each child whose type is a
141
+ # wrapper and collect the references. This is in addition to
142
+ # ``TYPE_FIELD_NODE_TYPES`` (field-name lookup) so C#/TS keep working.
143
+ _PHP_TYPED_PARENT_NODE_TYPES = {
144
+ "property_declaration",
145
+ "simple_parameter",
146
+ "variadic_parameter",
147
+ "property_promotion_parameter",
148
+ "method_declaration", # return type after ``:``
149
+ "function_definition", # free-function return type
150
+ }
151
+
152
+ # Primitive / language-built-in type tokens — never emit as a reference.
153
+ # These usually appear as `predefined_type` nodes (skipped structurally) but
154
+ # some grammars emit them as bare identifiers in odd positions.
155
+ _PRIMITIVE_TYPE_NAMES: frozenset[str] = frozenset({
156
+ # C#
157
+ "void", "bool", "byte", "sbyte", "short", "ushort", "int", "uint",
158
+ "long", "ulong", "float", "double", "decimal", "char", "string",
159
+ "object", "dynamic", "var", "nint", "nuint",
160
+ # TS/JS
161
+ "any", "unknown", "never", "number", "boolean", "undefined", "null",
162
+ "this", "symbol", "bigint",
163
+ })
164
+
165
+ IMPORT_NODE_TYPES = {
166
+ "import_statement",
167
+ "import_from_statement",
168
+ "using_directive", # C#
169
+ "razor_using_directive", # Razor
170
+ "imports_statement", # VB
171
+ "import_decl", # F#
172
+ "namespace_use_declaration", # PHP ``use Foo\Bar;``
173
+ }
174
+
175
+ # Razor / Blazor ``@inject TypeName Member`` directives. Each one is
176
+ # a DI dependency declaration that we want as a graph edge from the
177
+ # file to the injected type.
178
+ INJECT_NODE_TYPES = {
179
+ "razor_inject_directive",
180
+ }
181
+
182
+
183
+ @dataclass
184
+ class Symbol:
185
+ name: str
186
+ kind: str
187
+ start_line: int
188
+ end_line: int
189
+ snippet: str
190
+ namespace: str | None = None
191
+ partial: bool = False
192
+ # Parameter count for callable kinds (method_declaration,
193
+ # function_declaration, ...). ``None`` when the kind is not
194
+ # callable (class_declaration, etc.) or when the parser couldn't
195
+ # locate a parameter_list child.
196
+ param_count: int | None = None
197
+
198
+
199
+ @dataclass(frozen=True)
200
+ class Call:
201
+ """One call site: ``name(args)`` with arity captured.
202
+
203
+ Arity feeds the resolver's overload-disambiguation tier: when
204
+ multiple definitions share the same name (classic C# / Java
205
+ overload pattern), prefer the one whose parameter count matches.
206
+
207
+ ``receiver_type`` is the inferred type of the call's receiver, set
208
+ for TS ``this.<field>.<method>()`` patterns where the field's type
209
+ can be read off a member initializer or annotation. The resolver
210
+ uses it to narrow ``<method>`` to the methods defined on that type
211
+ — without it, every Angular use case's call to its port collapses
212
+ to an ambiguous bare identifier.
213
+ """
214
+
215
+ name: str
216
+ arity: int
217
+ receiver_type: str | None = None
218
+
219
+
220
+ @dataclass
221
+ class ExtractedFile:
222
+ path: str
223
+ lang: str
224
+ symbols: list[Symbol] = field(default_factory=list)
225
+ imports: list[str] = field(default_factory=list)
226
+ calls: list[Call] = field(default_factory=list)
227
+ # DI declarations: list of injected type names (Razor ``@inject TypeName Member``).
228
+ # Populated for ``.razor`` / ``.cshtml`` files; empty for other languages.
229
+ injects: list[str] = field(default_factory=list)
230
+ # Type-position name references: base lists (`class X : IFoo`), parameter
231
+ # types, field/property types, generic args, type constraints, cast/is/as/
232
+ # typeof targets, etc. Powers "who touches type X" queries (callers + refs).
233
+ references: list[str] = field(default_factory=list)
234
+ source: str = ""
235
+ generated: bool = False
236
+
237
+
238
+ @lru_cache(maxsize=16)
239
+ def _parser_for(lang: str) -> Parser:
240
+ language: Language = get_language(lang)
241
+ return Parser(language)
242
+
243
+
244
+ def lang_for(path: str | Path) -> str | None:
245
+ return LANG_BY_EXT.get(Path(path).suffix.lower())
246
+
247
+
248
+ MAX_FILE_BYTES = 500_000 # skip files larger than ~500KB (bundles, minified)
249
+ MAX_LINE_LEN = 2000 # likely minified if any line is this long
250
+ MINIFIED_SNIFF_BYTES = 4096 # bytes to inspect for minified-file heuristic
251
+ MINIFIED_AVG_LINE = 200 # avg line length above this in sniff window => minified
252
+
253
+ # Substrings that, when present in the first ~2KB of a file, mark it as
254
+ # auto-generated. These are case-insensitive contains checks.
255
+ GENERATED_HEADER_MARKERS = (
256
+ "@generated",
257
+ "auto-generated",
258
+ "autogenerated",
259
+ "code generated by",
260
+ "do not edit",
261
+ "this file was generated",
262
+ "generated by openapi",
263
+ "generated by swagger",
264
+ "generated by ng-openapi-gen",
265
+ "generated by openapi-generator",
266
+ )
267
+
268
+ # Path segments / suffixes that indicate generated output.
269
+ _GENERATED_PATH_PARTS = ("generated", "__generated__", "openapi-gen", "swagger-gen")
270
+ _GENERATED_PATH_SUFFIXES = (".generated.ts", ".generated.js", ".g.ts", ".g.dart")
271
+
272
+
273
+ def _has_generated_header(sample: str) -> bool:
274
+ lower = sample[:2048].lower()
275
+ return any(m in lower for m in GENERATED_HEADER_MARKERS)
276
+
277
+
278
+ def _has_generated_path(path: Path) -> bool:
279
+ parts_lower = {part.lower() for part in path.parts}
280
+ if any(p in parts_lower for p in _GENERATED_PATH_PARTS):
281
+ return True
282
+ name_lower = path.name.lower()
283
+ return any(name_lower.endswith(suf) for suf in _GENERATED_PATH_SUFFIXES)
284
+
285
+
286
+ def looks_generated(path: str | Path, sample: str) -> bool:
287
+ """Detect auto-generated code by path heuristics or header markers."""
288
+ p = Path(path)
289
+ return _has_generated_path(p) or _has_generated_header(sample)
290
+
291
+
292
+ def looks_minified(sample: str) -> bool:
293
+ """Detect minified / pre-bundled JS without parsing.
294
+
295
+ Triggers when:
296
+ - the sniffed window has no newline (one giant line), or
297
+ - the average line length within the sniffed window exceeds
298
+ ``MINIFIED_AVG_LINE``, or
299
+ - any line in the sniffed window exceeds ``MAX_LINE_LEN``.
300
+
301
+ Vite/webpack dep caches and minified bundles all match at least one.
302
+ """
303
+ if not sample:
304
+ return False
305
+ if "\n" not in sample:
306
+ return True
307
+ lines = sample.splitlines()
308
+ if any(len(line) > MAX_LINE_LEN for line in lines):
309
+ return True
310
+ avg = len(sample) / max(len(lines), 1)
311
+ return avg > MINIFIED_AVG_LINE
312
+
313
+
314
+ def extract_file(path: str | Path) -> ExtractedFile | None:
315
+ p = Path(path)
316
+ lang = lang_for(p)
317
+ if lang is None:
318
+ return None
319
+ try:
320
+ size = p.stat().st_size
321
+ except OSError:
322
+ return None
323
+ if size > MAX_FILE_BYTES:
324
+ return None
325
+ raw = p.read_bytes()
326
+ # Strip a UTF-8 BOM if present so tree-sitter's byte offsets line up
327
+ # with our slicing buffer. Some Windows-authored C# files ship one.
328
+ if raw.startswith(b"\xef\xbb\xbf"):
329
+ raw = raw[3:]
330
+ source = raw.decode("utf-8", errors="replace")
331
+ if looks_minified(source[:MINIFIED_SNIFF_BYTES]):
332
+ return None # minified / bundled
333
+ parser = _parser_for(lang)
334
+ tree = parser.parse(raw)
335
+ root = tree.root_node
336
+ ex = ExtractedFile(
337
+ path=str(p.resolve()),
338
+ lang=lang,
339
+ source=source,
340
+ generated=looks_generated(p, source),
341
+ )
342
+ _walk(root, raw, ex, ns_stack=[], class_stack=[])
343
+ return ex
344
+
345
+
346
+ # C# block-scoped ``namespace Foo { ... }``. Pushed while walking the
347
+ # block's children and popped on exit.
348
+ _BLOCK_NAMESPACE_NODE_TYPES = {"namespace_declaration"}
349
+
350
+ # C# 10 ``namespace Foo;`` (file-scoped). One per file by spec; applies
351
+ # to *everything after it*. We push without popping.
352
+ _FILE_SCOPED_NAMESPACE_NODE_TYPES = {"file_scoped_namespace_declaration"}
353
+
354
+ # PHP ``namespace X;`` (file-scoped, persists for the rest of the file) vs
355
+ # ``namespace X { ... }`` (block, scopes only its body). Tree-sitter emits
356
+ # the same ``namespace_definition`` node for both — we disambiguate by
357
+ # checking for a ``compound_statement`` child.
358
+ _PHP_NAMESPACE_NODE_TYPE = "namespace_definition"
359
+
360
+ # Symbol kinds that can carry a ``partial`` modifier in C#. Partial
361
+ # classes / structs / interfaces / records get merged into a single
362
+ # logical entity in the graph; non-partial symbols stay file-scoped.
363
+ _PARTIAL_CAPABLE_KINDS = {
364
+ "class_declaration",
365
+ "struct_declaration",
366
+ "interface_declaration",
367
+ "record_declaration",
368
+ }
369
+
370
+ # Symbol kinds that take parameters — we record their arity for the
371
+ # resolver's overload disambiguation tier. Non-callable kinds
372
+ # (classes, modules, enums) skip the count.
373
+ _CALLABLE_KINDS = {
374
+ "function_declaration",
375
+ "function_definition",
376
+ "method_definition",
377
+ "method_declaration",
378
+ "constructor_declaration",
379
+ "delegate_declaration",
380
+ "arrow_function",
381
+ "function_or_value_defn", # F#
382
+ }
383
+
384
+
385
+ def _is_partial_modifier(node: Node, source: bytes) -> bool:
386
+ """``True`` when this is a ``modifier`` node carrying ``partial``."""
387
+ if node.type != "modifier":
388
+ return False
389
+ text = _slice(source, node).strip()
390
+ return text == "partial"
391
+
392
+
393
+ def _has_partial_modifier(node: Node, source: bytes) -> bool:
394
+ return any(_is_partial_modifier(c, source) for c in node.children)
395
+
396
+
397
+ def _namespace_name(node: Node, source: bytes) -> str | None:
398
+ """Return the dotted name of a namespace declaration (C#/PHP)."""
399
+ for child in node.children:
400
+ # ``namespace_name`` is PHP's wrapper for ``Foo\Bar\Baz``;
401
+ # ``qualified_name`` is C# / PHP ``use`` clauses.
402
+ if child.type in {"qualified_name", "identifier", "namespace_name"}:
403
+ return _slice(source, child)
404
+ return None
405
+
406
+
407
+ _CLASS_DECL_NODE_TYPES = frozenset(
408
+ {"class_declaration", "abstract_class_declaration", "class"}
409
+ )
410
+
411
+
412
+ def _walk(
413
+ node: Node,
414
+ source: bytes,
415
+ ex: ExtractedFile,
416
+ ns_stack: list[str],
417
+ class_stack: list[dict[str, str]],
418
+ ) -> None:
419
+ t = node.type
420
+ pushed_ns = False
421
+ pushed_class = False
422
+ if t in _CLASS_DECL_NODE_TYPES:
423
+ body = None
424
+ for child in node.children:
425
+ if child.type == "class_body":
426
+ body = child
427
+ break
428
+ if body is not None:
429
+ class_stack.append(_ts_class_field_types(body, source))
430
+ pushed_class = True
431
+ if t in _BLOCK_NAMESPACE_NODE_TYPES:
432
+ ns = _namespace_name(node, source)
433
+ if ns:
434
+ ns_stack.append(ns)
435
+ pushed_ns = True
436
+ elif t in _FILE_SCOPED_NAMESPACE_NODE_TYPES:
437
+ # C# 10 file-scoped namespace scopes the rest of the file.
438
+ # Push and never pop within this walk — there is at most one.
439
+ ns = _namespace_name(node, source)
440
+ if ns:
441
+ ns_stack.append(ns)
442
+ elif t == _PHP_NAMESPACE_NODE_TYPE:
443
+ # PHP ``namespace X { ... }`` has a ``compound_statement`` body —
444
+ # push+pop so symbols outside the braces stay unqualified.
445
+ # ``namespace X;`` has no body — push without pop so the rest of
446
+ # the file (parsed as sibling nodes of the ``program``) inherits it.
447
+ ns = _namespace_name(node, source)
448
+ if ns:
449
+ ns_stack.append(ns)
450
+ if any(c.type == "compound_statement" for c in node.children):
451
+ pushed_ns = True
452
+
453
+ if t in SYMBOL_NODE_TYPES:
454
+ name = _symbol_name(node, source)
455
+ if name:
456
+ partial = (
457
+ t in _PARTIAL_CAPABLE_KINDS and _has_partial_modifier(node, source)
458
+ )
459
+ param_count = _param_count(node) if t in _CALLABLE_KINDS else None
460
+ ex.symbols.append(
461
+ Symbol(
462
+ name=name,
463
+ kind=t,
464
+ start_line=node.start_point[0] + 1,
465
+ end_line=node.end_point[0] + 1,
466
+ snippet=_slice(source, node),
467
+ namespace=".".join(ns_stack) if ns_stack else None,
468
+ partial=partial,
469
+ param_count=param_count,
470
+ )
471
+ )
472
+ elif t in IMPORT_NODE_TYPES:
473
+ if t == "namespace_use_declaration":
474
+ # PHP allows multiple clauses per statement; emit each FQCN.
475
+ ex.imports.extend(_php_use_imports(node, source))
476
+ else:
477
+ mod = _import_module(node, source)
478
+ if mod:
479
+ ex.imports.append(mod)
480
+ elif t in INJECT_NODE_TYPES:
481
+ injected = _inject_type(node, source)
482
+ if injected:
483
+ ex.injects.append(injected)
484
+ elif t in CALL_NODE_TYPES:
485
+ # Angular DI: ``inject(Token)`` becomes an INJECTS edge instead
486
+ # of a (stoplisted) CALL. Without this, the entire DI graph for
487
+ # Angular 14+ codebases is invisible.
488
+ token = _angular_inject_token(node, source)
489
+ if token:
490
+ ex.injects.append(token)
491
+ else:
492
+ callee = _callee_name(node, source)
493
+ if callee:
494
+ receiver_type: str | None = None
495
+ if class_stack:
496
+ field = _this_field_receiver(node, source)
497
+ if field:
498
+ receiver_type = class_stack[-1].get(field)
499
+ ex.calls.append(
500
+ Call(
501
+ name=callee,
502
+ arity=_call_arity(node),
503
+ receiver_type=receiver_type,
504
+ )
505
+ )
506
+
507
+ if t in TYPE_FIELD_NODE_TYPES:
508
+ # ``method_declaration`` exposes the return type via ``returns``
509
+ # in some grammars; everything else uses ``type``.
510
+ type_node = node.child_by_field_name("type") or node.child_by_field_name(
511
+ "returns"
512
+ )
513
+ if type_node is not None:
514
+ _collect_type_refs(type_node, source, ex.references)
515
+ if t in TYPE_CHILDREN_NODE_TYPES:
516
+ for child in node.children:
517
+ if child.type in {
518
+ ",", ":", "(", ")", "<", ">",
519
+ "where", "extends", "implements",
520
+ "|", "&", # PHP union/intersection separators
521
+ }:
522
+ continue
523
+ _collect_type_refs(child, source, ex.references)
524
+ if t in _PHP_TYPED_PARENT_NODE_TYPES:
525
+ # PHP property/parameter/return types are positional children
526
+ # (no ``type`` field). Find any type-wrapper child and harvest
527
+ # the inner identifiers. The wrapper-only filter keeps us from
528
+ # over-walking unrelated children like ``visibility_modifier``
529
+ # or ``variable_name`` that share the parent node.
530
+ for child in node.children:
531
+ if child.type in _PHP_TYPE_WRAPPER_NODE_TYPES:
532
+ _collect_type_refs(child, source, ex.references)
533
+ # C# pattern / cast / typeof: tree-sitter doesn't expose a `type`
534
+ # field on these, so collect the type child positionally.
535
+ if t == "cast_expression":
536
+ # `(Type)expr` — type is the single child between `(` and `)`.
537
+ between = []
538
+ opened = False
539
+ for child in node.children:
540
+ if child.type == "(":
541
+ opened = True
542
+ continue
543
+ if child.type == ")":
544
+ break
545
+ if opened:
546
+ between.append(child)
547
+ for c in between:
548
+ _collect_type_refs(c, source, ex.references)
549
+ elif t in {"as_expression", "is_expression"}:
550
+ # `value as Type` / `value is Type` — type follows the keyword.
551
+ keyword = "as" if t == "as_expression" else "is"
552
+ seen_kw = False
553
+ for child in node.children:
554
+ if not seen_kw:
555
+ if child.type == keyword:
556
+ seen_kw = True
557
+ continue
558
+ _collect_type_refs(child, source, ex.references)
559
+ elif t == "is_pattern_expression":
560
+ # `value is Pattern` — find declaration_pattern / type_pattern
561
+ # children and pick their type identifier(s).
562
+ for child in node.children:
563
+ if child.type in {"declaration_pattern", "type_pattern", "recursive_pattern"}:
564
+ # First identifier-bearing sub is the type name.
565
+ for sub in child.children:
566
+ if sub.type in {"identifier", "type_identifier", "qualified_name", "generic_name"}:
567
+ _collect_type_refs(sub, source, ex.references)
568
+ break
569
+ elif t in {"typeof_expression", "sizeof_expression", "default_expression"}:
570
+ # `typeof(Type)` — type between the parens.
571
+ opened = False
572
+ for child in node.children:
573
+ if child.type == "(":
574
+ opened = True
575
+ continue
576
+ if child.type == ")":
577
+ break
578
+ if opened and child.type not in {","}:
579
+ _collect_type_refs(child, source, ex.references)
580
+
581
+ for child in node.children:
582
+ _walk(child, source, ex, ns_stack, class_stack)
583
+
584
+ if pushed_ns:
585
+ ns_stack.pop()
586
+ if pushed_class:
587
+ class_stack.pop()
588
+
589
+
590
+ def _slice(source: bytes, node: Node) -> str:
591
+ """Return UTF-8 text at the node's byte range.
592
+
593
+ Tree-sitter reports byte offsets into the parsed buffer, not
594
+ character offsets. Slicing a Python ``str`` with those offsets
595
+ silently chops identifiers on files that contain any non-ASCII
596
+ bytes (e.g. French C# with accents). Slicing ``bytes`` then
597
+ decoding fixes the off-by-many-bytes drift.
598
+ """
599
+ return source[node.start_byte : node.end_byte].decode("utf-8", errors="replace")
600
+
601
+
602
+ _FSHARP_DEEP_NAME_NODES = {
603
+ "function_or_value_defn",
604
+ "type_definition",
605
+ }
606
+
607
+
608
+ def _first_identifier_deep(node: Node, source: bytes) -> str | None:
609
+ """BFS for the first identifier-bearing token inside ``node``."""
610
+ queue: list[Node] = [node]
611
+ while queue:
612
+ current = queue.pop(0)
613
+ for child in current.children:
614
+ if child.type in {"identifier", "type_identifier"}:
615
+ return _slice(source, child)
616
+ queue.append(child)
617
+ return None
618
+
619
+
620
+ def _symbol_name(node: Node, source: bytes) -> str | None:
621
+ name = node.child_by_field_name("name")
622
+ if name is not None:
623
+ return _slice(source, name)
624
+ if node.type in _FSHARP_DEEP_NAME_NODES:
625
+ return _first_identifier_deep(node, source)
626
+ for child in node.children:
627
+ if child.type in {"identifier", "type_identifier", "property_identifier"}:
628
+ return _slice(source, child)
629
+ return None
630
+
631
+
632
+ def _php_use_imports(node: Node, source: bytes) -> list[str]:
633
+ """Extract every FQCN imported by a PHP ``use`` statement.
634
+
635
+ Handles single-clause (``use Foo\\Bar;``), multi-clause
636
+ (``use A\\B, C\\D;``), and aliased (``use A\\B as Alias;``) forms.
637
+ The alias is discarded — the graph tracks what the file *imports*,
638
+ not the local rebinding name. For plain ``use Alias;`` (no
639
+ backslash) we surface the bare ``name`` child so the import shows
640
+ up under its declared identifier.
641
+ """
642
+ out: list[str] = []
643
+ for child in node.children:
644
+ if child.type != "namespace_use_clause":
645
+ continue
646
+ for sub in child.children:
647
+ if sub.type in {"qualified_name", "name"}:
648
+ out.append(_slice(source, sub).strip())
649
+ break # first id-bearing child is the FQCN; ignore ``as Alias``
650
+ return out
651
+
652
+
653
+ def _import_module(node: Node, source: bytes) -> str | None:
654
+ # Python ``from X import Y`` and ``from .X import Y`` expose the
655
+ # module via a ``module_name`` field. Without this branch the first
656
+ # ``dotted_name`` child wins — which for ``from ..pkg.mod import Sym``
657
+ # is ``Sym`` (the imported name), not the module. Result: the graph
658
+ # files the import under the wrong key and ``importers <module>``
659
+ # misses every relative caller.
660
+ module_name_field = node.child_by_field_name("module_name") or node.child_by_field_name("name")
661
+ if module_name_field is not None:
662
+ return _slice(source, module_name_field).strip("'\"")
663
+ for child in node.children:
664
+ if child.type in {
665
+ "string",
666
+ "string_fragment",
667
+ "dotted_name",
668
+ "module_name",
669
+ "relative_import", # Python ``..pkg.mod``
670
+ "qualified_name",
671
+ "namespace_name", # VB
672
+ "long_identifier", # F#
673
+ "identifier",
674
+ }:
675
+ return _slice(source, child).strip("'\"")
676
+ return None
677
+
678
+
679
+ _PARAMETER_LIST_TYPES = {
680
+ "parameter_list",
681
+ "formal_parameters",
682
+ "parameters", # F# / Python
683
+ }
684
+
685
+ _PARAMETER_NODE_TYPES = {
686
+ "parameter",
687
+ "required_parameter",
688
+ "optional_parameter",
689
+ "rest_parameter",
690
+ "typed_parameter",
691
+ "typed_default_parameter",
692
+ "default_parameter",
693
+ "identifier", # F# value bindings expose bare identifiers
694
+ # PHP
695
+ "simple_parameter",
696
+ "variadic_parameter",
697
+ "property_promotion_parameter", # PHP 8 ctor promotion
698
+ }
699
+
700
+
701
+ def _param_count(node: Node) -> int | None:
702
+ """Count parameters of a callable declaration.
703
+
704
+ Looks for a ``parameter_list`` (or grammar-specific equivalent)
705
+ child and counts its parameter children, ignoring punctuation
706
+ tokens like ``(``, ``)``, ``,``. Returns ``None`` when no
707
+ parameter list child is found — that signals the caller to leave
708
+ ``param_count`` unset rather than write a misleading 0.
709
+ """
710
+ for child in node.children:
711
+ if child.type in _PARAMETER_LIST_TYPES:
712
+ count = 0
713
+ for sub in child.children:
714
+ if sub.type in _PARAMETER_NODE_TYPES:
715
+ count += 1
716
+ return count
717
+ return None
718
+
719
+
720
+ def _call_arity(node: Node) -> int:
721
+ """Count arguments at a call site.
722
+
723
+ Returns the number of argument children in the call's argument
724
+ list. Falls back to ``0`` when we can't find one — that matches
725
+ what tree-sitter reports for property/field references parsed as
726
+ invocation_expression (rare, but happens in C# generated code).
727
+ """
728
+ for child in node.children:
729
+ if child.type in {"argument_list", "arguments"}:
730
+ count = 0
731
+ for sub in child.children:
732
+ if sub.type in {"argument", "spread_element"}:
733
+ count += 1
734
+ elif sub.type not in {"(", ")", ",", "{", "}"}:
735
+ # Some grammars (Python) emit expression children
736
+ # directly without an ``argument`` wrapper.
737
+ count += 1
738
+ return count
739
+ return 0
740
+
741
+
742
+ def _collect_type_refs(node: Node, source: bytes, out: list[str]) -> None:
743
+ """Walk a type expression subtree, appending each referenced type name.
744
+
745
+ Handles:
746
+ - ``identifier`` / ``type_identifier`` → emit text
747
+ - ``qualified_name`` / ``member_access_expression`` → emit right-most segment
748
+ - ``generic_name`` → emit the generic's name + recurse into type_arguments
749
+ - ``nullable_type`` / ``array_type`` / ``pointer_type`` → recurse into element
750
+ - ``predefined_type`` / primitive identifiers → skip (no graph value)
751
+ - ``tuple_type`` / ``tuple_element`` → recurse for inner names
752
+ """
753
+ t = node.type
754
+ if t in {"predefined_type", "implicit_type", "this_type", "primitive_type"}:
755
+ return
756
+ if t in {"identifier", "type_identifier", "name"}:
757
+ # ``name`` is PHP's identifier node; included here so PHP type
758
+ # positions (``named_type``, ``base_clause`` children, etc.)
759
+ # surface as references the same way C#/TS identifiers do.
760
+ name = _slice(source, node).strip()
761
+ if name and name not in _PRIMITIVE_TYPE_NAMES:
762
+ out.append(name)
763
+ return
764
+ if t in _PHP_TYPE_WRAPPER_NODE_TYPES:
765
+ # ``?Foo`` / ``Foo|Bar`` / ``Foo&Bar`` / ``(A&B)|C`` — recurse,
766
+ # skipping the punctuation that separates the alternatives.
767
+ for child in node.children:
768
+ if child.type in {"?", "|", "&", "(", ")"}:
769
+ continue
770
+ _collect_type_refs(child, source, out)
771
+ return
772
+ if t == "qualified_name":
773
+ # ``Foo.Bar.Baz`` — recurse into the right-most type-bearing
774
+ # child. Left segments are usually namespaces, not types. The
775
+ # right-most can be a plain identifier (``Foo.Bar``), a
776
+ # ``generic_name`` (``Foo.Bar.List<T>``), or another nested
777
+ # qualified_name when grammars produce a left-leaning tree.
778
+ last = None
779
+ for child in node.children:
780
+ if child.type in {
781
+ "identifier",
782
+ "type_identifier",
783
+ "generic_name",
784
+ "qualified_name",
785
+ "name", # PHP: trailing segment of ``App\Repo\UserRepo``
786
+ }:
787
+ last = child
788
+ if last is not None:
789
+ _collect_type_refs(last, source, out)
790
+ return
791
+ if t == "generic_name":
792
+ # ``List<int, Foo>`` — emit `List`, then recurse into the type args.
793
+ for child in node.children:
794
+ if child.type in {"identifier", "type_identifier"}:
795
+ name = _slice(source, child).strip()
796
+ if name and name not in _PRIMITIVE_TYPE_NAMES:
797
+ out.append(name)
798
+ break
799
+ for child in node.children:
800
+ if child.type in {"type_argument_list", "type_arguments"}:
801
+ for sub in child.children:
802
+ if sub.type in {"<", ">", ","}:
803
+ continue
804
+ _collect_type_refs(sub, source, out)
805
+ return
806
+ # Wrapper / composite type nodes — recurse to find inner type names.
807
+ for child in node.children:
808
+ _collect_type_refs(child, source, out)
809
+
810
+
811
+ _CLASS_BODY_NODE_TYPES = frozenset({"class_body", "object_type"})
812
+ _TS_FIELD_DECL_TYPES = frozenset(
813
+ {
814
+ "public_field_definition",
815
+ "property_definition",
816
+ "property_signature",
817
+ "abstract_method_signature",
818
+ }
819
+ )
820
+
821
+
822
+ def _ts_class_field_types(body: Node, source: bytes) -> dict[str, str]:
823
+ """Map of ``field_name → type_name`` for a TS class body.
824
+
825
+ Reads two sources per field:
826
+
827
+ 1. A type annotation (``private foo: Bar``) — the most reliable
828
+ signal.
829
+ 2. An initializer of the form ``inject(Token)`` — Angular 14+ DI;
830
+ lets a use case's injected port surface its type even when no
831
+ explicit annotation is written.
832
+
833
+ Also handles constructor parameter properties
834
+ (``constructor(private foo: Bar) {}``), which TypeScript treats as
835
+ fields. Without the constructor scan, Angular services that stick
836
+ to the older ``constructor(private repo: Repo)`` style stay
837
+ invisible to receiver-type resolution.
838
+ """
839
+ out: dict[str, str] = {}
840
+ for child in body.children:
841
+ if child.type in _TS_FIELD_DECL_TYPES:
842
+ name_node = child.child_by_field_name("name")
843
+ field_name: str | None = None
844
+ for sub in child.children:
845
+ if sub.type == "property_identifier":
846
+ field_name = _slice(source, sub)
847
+ break
848
+ if name_node is not None:
849
+ field_name = _slice(source, name_node)
850
+ if not field_name:
851
+ continue
852
+ type_name = _ts_field_type_from_annotation(child, source)
853
+ if type_name is None:
854
+ type_name = _ts_field_type_from_inject_init(child, source)
855
+ if type_name:
856
+ out[field_name] = type_name
857
+ elif child.type == "method_definition":
858
+ # Constructor parameter properties live on the formal_parameters.
859
+ name_node = child.child_by_field_name("name")
860
+ method_name = _slice(source, name_node) if name_node else None
861
+ if method_name != "constructor":
862
+ continue
863
+ params = child.child_by_field_name("parameters")
864
+ if params is None:
865
+ for sub in child.children:
866
+ if sub.type == "formal_parameters":
867
+ params = sub
868
+ break
869
+ if params is None:
870
+ continue
871
+ for param in params.children:
872
+ if param.type not in {"required_parameter", "optional_parameter"}:
873
+ continue
874
+ # Only treat as a field when there is an accessibility modifier
875
+ # (private/public/protected) — that's TS's "parameter property"
876
+ # syntax. Plain ctor params live in local scope.
877
+ has_modifier = any(
878
+ sub.type == "accessibility_modifier" for sub in param.children
879
+ )
880
+ if not has_modifier:
881
+ continue
882
+ pname = None
883
+ for sub in param.children:
884
+ if sub.type == "identifier":
885
+ pname = _slice(source, sub)
886
+ break
887
+ if not pname:
888
+ continue
889
+ type_name = _ts_field_type_from_annotation(param, source)
890
+ if type_name:
891
+ out[pname] = type_name
892
+ return out
893
+
894
+
895
+ def _ts_field_type_from_annotation(node: Node, source: bytes) -> str | None:
896
+ """Read ``: <Type>`` annotation off a field / param node."""
897
+ for child in node.children:
898
+ if child.type == "type_annotation":
899
+ for sub in child.children:
900
+ if sub.type in {"type_identifier", "identifier"}:
901
+ return _slice(source, sub)
902
+ if sub.type == "generic_type":
903
+ for inner in sub.children:
904
+ if inner.type in {"type_identifier", "identifier"}:
905
+ return _slice(source, inner)
906
+ return None
907
+ return None
908
+
909
+
910
+ def _ts_field_type_from_inject_init(node: Node, source: bytes) -> str | None:
911
+ """Read ``= inject(Token)`` initializer off a field node."""
912
+ for child in node.children:
913
+ if child.type == "call_expression":
914
+ return _angular_inject_token(child, source)
915
+ return None
916
+
917
+
918
+ def _this_field_receiver(node: Node, source: bytes) -> str | None:
919
+ """For a callee ``this.<field>.<method>``, return ``<field>``.
920
+
921
+ Other receiver shapes (chained calls, computed members, bare
922
+ identifiers) return ``None`` — too ambiguous for the receiver-type
923
+ table to help.
924
+ """
925
+ fn = node.child_by_field_name("function") or node.child_by_field_name("callee")
926
+ if fn is None or fn.type != "member_expression":
927
+ return None
928
+ obj = fn.child_by_field_name("object")
929
+ if obj is None or obj.type != "member_expression":
930
+ return None
931
+ inner_obj = obj.child_by_field_name("object")
932
+ inner_prop = obj.child_by_field_name("property")
933
+ if inner_obj is None or inner_obj.type != "this":
934
+ return None
935
+ if inner_prop is None or inner_prop.type != "property_identifier":
936
+ return None
937
+ return _slice(source, inner_prop)
938
+
939
+
940
+ def _angular_inject_token(node: Node, source: bytes) -> str | None:
941
+ """Pull the DI token out of an Angular ``inject(Token)`` call.
942
+
943
+ Angular 14+ replaced constructor-DI with the ``inject()`` primitive.
944
+ Without this hook the call gets filtered by ``CALLEE_STOPLIST`` and
945
+ the DI graph for any Angular codebase disappears entirely. We only
946
+ accept call sites whose function is literally ``inject`` to avoid
947
+ capturing user-defined functions of the same name in module scope.
948
+ """
949
+ fn = node.child_by_field_name("function") or node.child_by_field_name("callee")
950
+ if fn is None:
951
+ return None
952
+ fn_text = _slice(source, fn).strip()
953
+ # Drop generic args: ``inject<Token>`` parses as the bare identifier
954
+ # in the function field; defensive split keeps qualified forms out.
955
+ if fn_text.split("<", 1)[0] != "inject":
956
+ return None
957
+ args = None
958
+ for child in node.children:
959
+ if child.type in {"arguments", "argument_list"}:
960
+ args = child
961
+ break
962
+ if args is None:
963
+ return None
964
+ for sub in args.children:
965
+ if sub.type in {"(", ")", ",", "argument"}:
966
+ if sub.type == "argument":
967
+ # Some grammars wrap each arg in `argument`; descend.
968
+ for inner in sub.children:
969
+ name = _last_identifier(_slice(source, inner).strip())
970
+ if name:
971
+ return name
972
+ continue
973
+ raw = _slice(source, sub).strip()
974
+ name = _last_identifier(raw)
975
+ if name:
976
+ return name
977
+ return None
978
+
979
+
980
+ def _inject_type(node: Node, source: bytes) -> str | None:
981
+ """Pull the injected type name out of a Razor ``@inject`` directive.
982
+
983
+ Grammar: ``@inject <Type> <Member>``. Tree-sitter wraps the
984
+ `<Type> <Member>` pair in a ``variable_declaration``; the type is
985
+ the first ``identifier`` / ``qualified_name`` / ``generic_name``
986
+ child. We capture the **type name only** — for ``ILogger<Foo>``
987
+ that's ``ILogger`` (the resolver matches by bare identifier;
988
+ generic parameters live at the call site, not in the graph).
989
+ """
990
+ for child in node.children:
991
+ if child.type == "variable_declaration":
992
+ for sub in child.children:
993
+ if sub.type in {"identifier", "qualified_name", "type_identifier"}:
994
+ return _slice(source, sub)
995
+ if sub.type == "generic_name":
996
+ # Drop the ``<T, ...>`` tail by finding the first
997
+ # plain identifier under it.
998
+ for inner in sub.children:
999
+ if inner.type in {"identifier", "type_identifier"}:
1000
+ return _slice(source, inner)
1001
+ break
1002
+ return None
1003
+
1004
+
1005
+ # Callees that are stdlib / framework / RxJS / Angular DI built-ins.
1006
+ # Filtered at extract time so they never enter the graph as CALLS edges;
1007
+ # they pollute "who calls X" queries with high-frequency noise.
1008
+ CALLEE_STOPLIST: frozenset[str] = frozenset(
1009
+ {
1010
+ # JS builtins
1011
+ "console", "JSON", "Math", "Object", "Array", "Promise", "Number",
1012
+ "String", "Boolean", "Date", "RegExp", "Symbol", "Map", "Set",
1013
+ "parseInt", "parseFloat", "isNaN", "isFinite",
1014
+ "setTimeout", "setInterval", "clearTimeout", "clearInterval",
1015
+ "fetch", "structuredClone", "queueMicrotask",
1016
+ # Angular DI / lifecycle
1017
+ "inject", "Injectable", "Component", "Directive", "Pipe", "NgModule",
1018
+ "Input", "Output", "ViewChild", "ContentChild", "HostListener",
1019
+ "HostBinding",
1020
+ # RxJS operators commonly chained via .pipe()
1021
+ "pipe", "subscribe", "map", "filter", "tap", "switchMap", "mergeMap",
1022
+ "concatMap", "exhaustMap", "catchError", "take", "takeUntil", "first",
1023
+ "of", "from", "EMPTY", "throwError", "combineLatest", "forkJoin",
1024
+ "BehaviorSubject", "Subject", "ReplaySubject",
1025
+ # Generic test helpers
1026
+ "describe", "it", "test", "expect", "beforeEach", "afterEach",
1027
+ "beforeAll", "afterAll", "jest", "vi", "spyOn",
1028
+ }
1029
+ )
1030
+
1031
+
1032
+ def _callee_name(node: Node, source: bytes) -> str | None:
1033
+ """Return the last identifier of a call expression's callee.
1034
+
1035
+ For ``foo()`` → ``foo``. For ``this.svc.method()`` → ``method``.
1036
+ For ``a.b.c()`` → ``c``. Computed (``a[b]()``) and chained
1037
+ (``f()()``) callees collapse to ``None`` — too ambiguous to resolve.
1038
+
1039
+ Returns ``None`` for callees in :data:`CALLEE_STOPLIST` so they don't
1040
+ enter the graph as noise.
1041
+ """
1042
+ # ``new Foo()`` exposes the constructor target under the ``type`` field;
1043
+ # plain calls use ``function`` / ``callee``. Without the ``type`` branch
1044
+ # the first-child fallback would land on the ``new`` keyword and miss
1045
+ # every constructor invocation.
1046
+ fn = (
1047
+ node.child_by_field_name("type")
1048
+ or node.child_by_field_name("function")
1049
+ or node.child_by_field_name("callee")
1050
+ # PHP ``member_call_expression`` and ``scoped_call_expression``
1051
+ # expose the method name via a ``name`` field instead of
1052
+ # ``function`` / ``callee``. Without this branch, callee
1053
+ # resolution falls through to the first-child fallback, which
1054
+ # for ``$this->repo->byId(...)`` lands on the
1055
+ # ``member_access_expression`` text (``$this->repo``) and
1056
+ # ``_last_identifier`` rejects the ``->`` separator — every PHP
1057
+ # method call disappears from the call graph.
1058
+ or node.child_by_field_name("name")
1059
+ )
1060
+ if fn is None and node.type == "object_creation_expression":
1061
+ # PHP ``new Foo()`` / ``new App\Foo()`` — no field names on
1062
+ # children. The first child is the ``new`` keyword; the class
1063
+ # name follows. Without this, the first-child fallback returns
1064
+ # ``new`` as the callee for every PHP ctor call.
1065
+ for c in node.children:
1066
+ if c.type in {"name", "identifier", "qualified_name", "type_identifier"}:
1067
+ fn = c
1068
+ break
1069
+ if fn is None and node.children:
1070
+ fn = node.children[0]
1071
+ if fn is None:
1072
+ return None
1073
+ raw = _slice(source, fn).split("(")[0].strip()
1074
+ name = _last_identifier(raw)
1075
+ if name is None or name in CALLEE_STOPLIST:
1076
+ return None
1077
+ return name
1078
+
1079
+
1080
+ _IDENT_RE = re.compile(r"[A-Za-z_$][\w$]*")
1081
+
1082
+
1083
+ def _last_identifier(expr: str) -> str | None:
1084
+ """Extract the trailing identifier from a (possibly chained) expression.
1085
+
1086
+ ``this.foo.bar`` → ``bar``
1087
+ ``MyClass.staticFn`` → ``staticFn``
1088
+ ``foo`` → ``foo``
1089
+ ``arr[i]`` → ``None`` (computed)
1090
+ ``f()`` → ``None`` (chained call; shouldn't normally hit)
1091
+ """
1092
+ # Reject anything with brackets or calls in the trailing position.
1093
+ if expr.endswith("]") or expr.endswith(")"):
1094
+ return None
1095
+ # PHP fully-qualified names use ``\`` as the namespace separator
1096
+ # (``App\Repo\UserRepo``). Normalize to ``.`` so the chained-name
1097
+ # split below picks the trailing class/method identifier.
1098
+ parts = expr.replace("\\", ".").split(".")
1099
+ last = parts[-1].strip()
1100
+ if not last:
1101
+ return None
1102
+ m = _IDENT_RE.fullmatch(last)
1103
+ return m.group(0) if m else None
1104
+
1105
+
1106
+ DEFAULT_IGNORE_DIRS: tuple[str, ...] = (
1107
+ ".git",
1108
+ "node_modules",
1109
+ ".venv",
1110
+ "venv",
1111
+ "dist",
1112
+ "build",
1113
+ ".next",
1114
+ ".nuxt",
1115
+ "out",
1116
+ "coverage",
1117
+ ".turbo",
1118
+ ".cache",
1119
+ "__pycache__",
1120
+ ".mypy_cache",
1121
+ ".pytest_cache",
1122
+ ".ruff_cache",
1123
+ "target",
1124
+ # Angular / Vite / Nx / Yarn / Parcel / SvelteKit caches and tarballs
1125
+ ".angular",
1126
+ ".nx",
1127
+ ".yarn",
1128
+ ".parcel-cache",
1129
+ ".svelte-kit",
1130
+ "bower_components",
1131
+ "vendor",
1132
+ "tmp",
1133
+ # .NET build output / IDE caches
1134
+ "bin",
1135
+ "obj",
1136
+ "packages",
1137
+ "TestResults",
1138
+ ".vs",
1139
+ "artifacts",
1140
+ )
1141
+
1142
+
1143
+ class Extractor:
1144
+ """Convenience wrapper to walk a directory."""
1145
+
1146
+ def __init__(
1147
+ self,
1148
+ ignore_dirs: tuple[str, ...] = DEFAULT_IGNORE_DIRS,
1149
+ *,
1150
+ respect_gitignore: bool = True,
1151
+ ) -> None:
1152
+ self.ignore_dirs = ignore_dirs
1153
+ self.respect_gitignore = respect_gitignore
1154
+
1155
+ def walk(self, root: str | Path):
1156
+ from .gitignore import GitignoreMatcher
1157
+
1158
+ root_path = Path(root).resolve()
1159
+ matcher = (
1160
+ GitignoreMatcher.from_root(root_path) if self.respect_gitignore else None
1161
+ )
1162
+ ignore_set = set(self.ignore_dirs)
1163
+ for p in root_path.rglob("*"):
1164
+ if not p.is_file():
1165
+ continue
1166
+ if any(part in ignore_set for part in p.parts):
1167
+ continue
1168
+ if matcher is not None and matcher.match(p, is_dir=False):
1169
+ continue
1170
+ ex = extract_file(p)
1171
+ if ex is not None:
1172
+ yield ex