chisel-test-impact 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chisel/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.5.0"
chisel/ast_utils.py ADDED
@@ -0,0 +1,578 @@
1
+ """Multi-language AST extraction for Chisel.
2
+
3
+ Extracts code units (functions, classes, structs, etc.) from source files
4
+ across Python, JavaScript/TypeScript, Go, Rust, C#, Java, C/C++, Kotlin,
5
+ Swift, PHP, Ruby, and Dart. Fully self-contained with zero external
6
+ dependencies beyond the Python standard library.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import ast
12
+ import hashlib
13
+ import re
14
+ from dataclasses import dataclass
15
+ from functools import partial
16
+ from pathlib import Path
17
+
18
+ # Directories to always skip when walking the project tree.
19
+ _SKIP_DIRS = {
20
+ ".git", "node_modules", "__pycache__", ".tox", ".venv", "venv",
21
+ "env", ".mypy_cache", ".pytest_cache", ".ruff_cache", "dist",
22
+ "build", ".eggs", "target", "vendor", "Pods",
23
+ }
24
+
25
+
26
+ @dataclass
27
+ class CodeUnit:
28
+ """Represents a single extractable unit of code."""
29
+
30
+ file_path: str
31
+ name: str
32
+ unit_type: str # "function", "async_function", "class", "struct", "enum", "impl", etc.
33
+ line_start: int
34
+ line_end: int
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # Language detection
39
+ # ---------------------------------------------------------------------------
40
+
41
+ _EXTENSION_MAP = {
42
+ # Python
43
+ ".py": "python", ".pyw": "python",
44
+ # JavaScript / TypeScript
45
+ ".js": "javascript", ".jsx": "javascript", ".mjs": "javascript", ".cjs": "javascript",
46
+ ".ts": "typescript", ".tsx": "typescript",
47
+ # Go
48
+ ".go": "go",
49
+ # Rust
50
+ ".rs": "rust",
51
+ # C#
52
+ ".cs": "csharp",
53
+ # Java
54
+ ".java": "java",
55
+ # C / C++
56
+ ".c": "c", ".h": "c",
57
+ ".cc": "cpp", ".cpp": "cpp", ".cxx": "cpp", ".hpp": "cpp", ".hxx": "cpp",
58
+ # Kotlin
59
+ ".kt": "kotlin", ".kts": "kotlin",
60
+ # Swift
61
+ ".swift": "swift",
62
+ # PHP
63
+ ".php": "php",
64
+ # Ruby
65
+ ".rb": "ruby",
66
+ # Dart
67
+ ".dart": "dart",
68
+ }
69
+
70
+
71
+ def detect_language(file_path: str) -> str | None:
72
+ """Return the language string for a file path based on its extension."""
73
+ ext = Path(file_path).suffix.lower()
74
+ return _EXTENSION_MAP.get(ext)
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # File hashing
79
+ # ---------------------------------------------------------------------------
80
+
81
+
82
+ def compute_file_hash(file_path: str) -> str:
83
+ """Return the SHA-256 hex digest of a file's contents."""
84
+ h = hashlib.sha256()
85
+ with open(file_path, "rb") as f:
86
+ for chunk in iter(lambda: f.read(8192), b""):
87
+ h.update(chunk)
88
+ return h.hexdigest()
89
+
90
+
91
+ # ---------------------------------------------------------------------------
92
+ # Brace-matching helper (shared by all brace-delimited languages)
93
+ # ---------------------------------------------------------------------------
94
+
95
+
96
+ def _find_block_end(lines: list[str], start_idx: int) -> int:
97
+ """Find the line number (1-based) of the closing brace for a block.
98
+
99
+ Scans forward from *start_idx* (0-based index into *lines*) looking for
100
+ the first ``{``. Once found, tracks brace depth and returns the 1-based
101
+ line number where depth returns to zero. If no opening brace is found,
102
+ returns ``start_idx + 1`` (the 1-based line of the start line itself).
103
+
104
+ String literals and single-line comments are stripped before counting
105
+ braces so that ``"{"`` or ``// }`` do not cause false matches.
106
+ """
107
+ depth = 0
108
+ found_open = False
109
+
110
+ for i in range(start_idx, len(lines)):
111
+ cleaned = _strip_strings_and_comments(lines[i])
112
+ for ch in cleaned:
113
+ if ch == "{":
114
+ depth += 1
115
+ found_open = True
116
+ elif ch == "}":
117
+ depth -= 1
118
+ if found_open and depth == 0:
119
+ return i + 1 # 1-based
120
+
121
+ if found_open:
122
+ return len(lines)
123
+ return start_idx + 1
124
+
125
+
126
+ def _strip_strings_and_comments(line: str) -> str:
127
+ """Remove string literals, ``//`` comments, and ``/* */`` blocks from a line."""
128
+ result: list = []
129
+ i = 0
130
+ length = len(line)
131
+ while i < length:
132
+ ch = line[i]
133
+ # Single-line comment: //
134
+ if ch == "/" and i + 1 < length and line[i + 1] == "/":
135
+ break
136
+ # Block comment: /* ... */ (may not close on same line)
137
+ if ch == "/" and i + 1 < length and line[i + 1] == "*":
138
+ end = line.find("*/", i + 2)
139
+ if end != -1:
140
+ i = end + 2
141
+ else:
142
+ break # unclosed block comment — ignore rest of line
143
+ continue
144
+ if ch in ('"', "'", "`"):
145
+ quote = ch
146
+ i += 1
147
+ while i < length and line[i] != quote:
148
+ if line[i] == "\\" and i + 1 < length:
149
+ i += 2
150
+ continue
151
+ i += 1
152
+ i += 1 # skip closing quote
153
+ continue
154
+ result.append(ch)
155
+ i += 1
156
+ return "".join(result)
157
+
158
+
159
+ def _extract_brace_lang(
160
+ file_path: str, content: str, patterns: list,
161
+ ) -> list[CodeUnit]:
162
+ """Extract code units from a brace-delimited language.
163
+
164
+ Args:
165
+ patterns: list of (compiled_regex, unit_type) tuples.
166
+ unit_type is a string, OR a callable(match) -> (name, type).
167
+ """
168
+ units: list[CodeUnit] = []
169
+ lines = content.splitlines()
170
+
171
+ for idx, line in enumerate(lines):
172
+ lineno = idx + 1
173
+ for regex, unit_type in patterns:
174
+ m = regex.match(line)
175
+ if m:
176
+ end = _find_block_end(lines, idx)
177
+ if callable(unit_type):
178
+ name, utype = unit_type(m)
179
+ else:
180
+ name = m.group("name")
181
+ utype = unit_type
182
+ units.append(CodeUnit(file_path, name, utype, lineno, end))
183
+ break
184
+
185
+ return units
186
+
187
+
188
+ # ---------------------------------------------------------------------------
189
+ # Python extraction
190
+ # ---------------------------------------------------------------------------
191
+
192
+ _PY_FUNC_RE = re.compile(
193
+ r"^(?P<indent>\s*)(?:async\s+)?def\s+(?P<name>[A-Za-z_]\w*)\s*\(",
194
+ )
195
+ _PY_CLASS_RE = re.compile(
196
+ r"^(?P<indent>\s*)class\s+(?P<name>[A-Za-z_]\w*)\s*[\(:]",
197
+ )
198
+
199
+
200
+ def _extract_python_ast(file_path: str, content: str) -> list[CodeUnit]:
201
+ """Extract code units from Python source using the ``ast`` module."""
202
+ try:
203
+ tree = ast.parse(content, filename=file_path)
204
+ except SyntaxError:
205
+ return _extract_python_regex(file_path, content)
206
+
207
+ units: list[CodeUnit] = []
208
+
209
+ parent_map: dict = {}
210
+ for cls_node in ast.walk(tree):
211
+ if isinstance(cls_node, ast.ClassDef):
212
+ for child in ast.iter_child_nodes(cls_node):
213
+ parent_map[id(child)] = cls_node.name
214
+
215
+ for node in ast.walk(tree):
216
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
217
+ parent_class = parent_map.get(id(node))
218
+ name = f"{parent_class}.{node.name}" if parent_class else node.name
219
+ unit_type = (
220
+ "async_function"
221
+ if isinstance(node, ast.AsyncFunctionDef)
222
+ else "function"
223
+ )
224
+ end = getattr(node, "end_lineno", None) or node.lineno
225
+ units.append(CodeUnit(file_path, name, unit_type, node.lineno, end))
226
+
227
+ elif isinstance(node, ast.ClassDef):
228
+ end = getattr(node, "end_lineno", None) or node.lineno
229
+ units.append(CodeUnit(file_path, node.name, "class", node.lineno, end))
230
+
231
+ return units
232
+
233
+
234
+ def _extract_python_regex(file_path: str, content: str) -> list[CodeUnit]:
235
+ """Regex fallback for Python files that fail ``ast.parse``."""
236
+ units: list[CodeUnit] = []
237
+ lines = content.splitlines()
238
+ current_class: str | None = None
239
+ current_class_indent: int = -1
240
+
241
+ for idx, line in enumerate(lines):
242
+ lineno = idx + 1
243
+
244
+ cls_m = _PY_CLASS_RE.match(line)
245
+ if cls_m:
246
+ indent_len = len(cls_m.group("indent"))
247
+ name = cls_m.group("name")
248
+ current_class = name
249
+ current_class_indent = indent_len
250
+ end = _py_block_end(lines, idx, indent_len)
251
+ units.append(CodeUnit(file_path, name, "class", lineno, end))
252
+ continue
253
+
254
+ fn_m = _PY_FUNC_RE.match(line)
255
+ if fn_m:
256
+ indent_len = len(fn_m.group("indent"))
257
+ name = fn_m.group("name")
258
+ is_async = line.lstrip().startswith("async ")
259
+
260
+ if current_class and indent_len > current_class_indent:
261
+ name = f"{current_class}.{name}"
262
+ else:
263
+ current_class = None
264
+ current_class_indent = -1
265
+
266
+ unit_type = "async_function" if is_async else "function"
267
+ end = _py_block_end(lines, idx, indent_len)
268
+ units.append(CodeUnit(file_path, name, unit_type, lineno, end))
269
+
270
+ return units
271
+
272
+
273
+ def _py_block_end(lines: list[str], start_idx: int, indent: int) -> int:
274
+ """Estimate the end line of a Python block starting at *start_idx*."""
275
+ for i in range(start_idx + 1, len(lines)):
276
+ stripped = lines[i].strip()
277
+ if not stripped or stripped.startswith("#"):
278
+ continue
279
+ line_indent = len(lines[i]) - len(lines[i].lstrip())
280
+ if line_indent <= indent:
281
+ return i
282
+ return len(lines)
283
+
284
+
285
+ # ---------------------------------------------------------------------------
286
+ # JavaScript / TypeScript
287
+ # ---------------------------------------------------------------------------
288
+
289
+ _JS_NAMED_FUNC_RE = re.compile(
290
+ r"^\s*(?:export\s+)?(?:async\s+)?function\s+(?P<name>[A-Za-z_$]\w*)\s*\(",
291
+ )
292
+ _JS_CLASS_RE = re.compile(
293
+ r"^\s*(?:export\s+)?class\s+(?P<name>[A-Za-z_$]\w*)",
294
+ )
295
+ _JS_ARROW_RE = re.compile(
296
+ r"^\s*(?:export\s+)?(?:const|let|var)\s+(?P<name>[A-Za-z_$]\w*)"
297
+ r"\s*=\s*(?:async\s+)?(?:\([^)]*\)|[A-Za-z_$]\w*)\s*=>",
298
+ )
299
+
300
+ # ---------------------------------------------------------------------------
301
+ # Go
302
+ # ---------------------------------------------------------------------------
303
+
304
+ _GO_FUNC_RE = re.compile(
305
+ r"^\s*func\s+(?:\(\s*\w+\s+\*?\w+\s*\)\s+)?(?P<name>[A-Za-z_]\w*)\s*\(",
306
+ )
307
+ _GO_TYPE_RE = re.compile(
308
+ r"^\s*type\s+(?P<name>[A-Za-z_]\w*)\s+(?P<kind>struct|interface)\b",
309
+ )
310
+
311
+ # ---------------------------------------------------------------------------
312
+ # Rust
313
+ # ---------------------------------------------------------------------------
314
+
315
+ _RS_FN_RE = re.compile(
316
+ r"^\s*(?:pub(?:\s*\(\s*\w+\s*\))?\s+)?(?:async\s+)?(?:unsafe\s+)?fn\s+(?P<name>[A-Za-z_]\w*)\s*[<(]",
317
+ )
318
+ _RS_STRUCT_RE = re.compile(
319
+ r"^\s*(?:pub(?:\s*\(\s*\w+\s*\))?\s+)?struct\s+(?P<name>[A-Za-z_]\w*)",
320
+ )
321
+ _RS_ENUM_RE = re.compile(
322
+ r"^\s*(?:pub(?:\s*\(\s*\w+\s*\))?\s+)?enum\s+(?P<name>[A-Za-z_]\w*)",
323
+ )
324
+ _RS_IMPL_RE = re.compile(
325
+ r"^\s*impl(?:\s*<[^>]*>)?\s+"
326
+ r"(?:[A-Za-z_]\w*(?:\s*<[^>]*>)?\s+for\s+)?"
327
+ r"(?P<name>[A-Za-z_]\w*(?:\s*<[^>]*>)?)",
328
+ )
329
+
330
+ # ---------------------------------------------------------------------------
331
+ # C#
332
+ # ---------------------------------------------------------------------------
333
+
334
+ _CS_CLASS_RE = re.compile(
335
+ r"^(?:\s*\[[^\]]*\]\s*)*"
336
+ r"\s*(?:(?:public|private|protected|internal)\s+)?"
337
+ r"(?:(?:static|abstract|sealed|partial)\s+)*"
338
+ r"(?P<kind>class|struct|interface|enum|record)\s+(?P<name>[A-Za-z_]\w*)",
339
+ )
340
+ _CS_METHOD_RE = re.compile(
341
+ r"^(?:\s*\[[^\]]*\]\s*)*"
342
+ r"\s*(?:(?:public|private|protected|internal)\s+)?"
343
+ r"(?:(?:static|virtual|override|abstract|async|new|partial|extern|sealed|unsafe)\s+)*"
344
+ r"(?:[A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*(?:<(?:[^<>]|<[^>]*>)*>)?(?:\[\])*\??\s+)"
345
+ r"(?P<name>[A-Za-z_]\w*)\s*[<(]",
346
+ )
347
+
348
+ # ---------------------------------------------------------------------------
349
+ # Java
350
+ # ---------------------------------------------------------------------------
351
+
352
+ _JAVA_CLASS_RE = re.compile(
353
+ r"^(?:\s*@\w+(?:\s*\([^)]*\))?\s*)*"
354
+ r"\s*(?:(?:public|private|protected)\s+)?"
355
+ r"(?:(?:static|final|abstract|sealed)\s+)*"
356
+ r"(?P<kind>class|interface|enum|record)\s+(?P<name>[A-Za-z_]\w*)",
357
+ )
358
+ _JAVA_METHOD_RE = re.compile(
359
+ r"^(?:\s*@\w+(?:\s*\([^)]*\))?\s*)*"
360
+ r"\s*(?:(?:public|private|protected)\s+)?"
361
+ r"(?:(?:static|final|abstract|synchronized|native|default)\s+)*"
362
+ r"(?:[A-Za-z_]\w*(?:<(?:[^<>]|<[^>]*>)*>)?(?:\[\])*\s+)"
363
+ r"(?P<name>[A-Za-z_]\w*)\s*\(",
364
+ )
365
+
366
+ # ---------------------------------------------------------------------------
367
+ # C / C++
368
+ # ---------------------------------------------------------------------------
369
+
370
+ _CPP_CLASS_RE = re.compile(
371
+ r"^\s*(?:template\s*<[^>]*>\s*)?"
372
+ r"(?P<kind>class|struct|namespace)\s+(?P<name>[A-Za-z_]\w*)",
373
+ )
374
+ _CPP_ENUM_RE = re.compile(
375
+ r"^\s*enum\s+(?:class\s+)?(?P<name>[A-Za-z_]\w*)",
376
+ )
377
+ _CPP_FUNC_RE = re.compile(
378
+ r"^\s*(?:template\s*<[^>]*>\s+)?"
379
+ r"(?:(?:static|inline|virtual|explicit|constexpr|extern|friend)\s+)*"
380
+ r"(?:[A-Za-z_]\w*(?:::\w+)*(?:\s*<(?:[^<>]|<[^>]*>)*>)?\s*[*&]?\s+)"
381
+ r"(?P<name>~?[A-Za-z_]\w*(?:::[A-Za-z_]\w*)?)\s*\(",
382
+ )
383
+
384
+ # ---------------------------------------------------------------------------
385
+ # Kotlin
386
+ # ---------------------------------------------------------------------------
387
+
388
+ _KT_CLASS_RE = re.compile(
389
+ r"^\s*(?:(?:private|public|internal|protected|open|abstract|sealed|data|enum|inner|value|inline)\s+)*"
390
+ r"(?P<kind>class|object|interface)\s+(?P<name>[A-Za-z_]\w*)",
391
+ )
392
+ _KT_FUN_RE = re.compile(
393
+ r"^\s*(?:(?:private|public|internal|protected|open|override|suspend|inline|tailrec)\s+)*"
394
+ r"fun\s+(?:[A-Za-z_]\w*(?:<[^>]*>)?\.)?(?P<name>[A-Za-z_]\w*)\s*[<(]",
395
+ )
396
+
397
+ # ---------------------------------------------------------------------------
398
+ # Swift
399
+ # ---------------------------------------------------------------------------
400
+
401
+ _SWIFT_TYPE_RE = re.compile(
402
+ r"^(?:\s*@\w+(?:\s*\([^)]*\))?\s*)*"
403
+ r"\s*(?:(?:private|public|internal|fileprivate|open|final)\s+)*"
404
+ r"(?P<kind>class|struct|enum|protocol|actor)\s+(?P<name>[A-Za-z_]\w*)",
405
+ )
406
+ _SWIFT_FUNC_RE = re.compile(
407
+ r"^(?:\s*@\w+(?:\s*\([^)]*\))?\s*)*"
408
+ r"\s*(?:(?:private|public|internal|fileprivate|open|static|class|override|mutating|final)\s+)*"
409
+ r"func\s+(?P<name>[A-Za-z_]\w*)\s*[<(]",
410
+ )
411
+
412
+ # ---------------------------------------------------------------------------
413
+ # PHP
414
+ # ---------------------------------------------------------------------------
415
+
416
+ _PHP_CLASS_RE = re.compile(
417
+ r"^\s*(?:(?:abstract|final)\s+)?(?P<kind>class|interface|trait|enum)\s+(?P<name>[A-Za-z_]\w*)",
418
+ )
419
+ _PHP_FUNC_RE = re.compile(
420
+ r"^\s*(?:(?:public|private|protected)\s+)?(?:static\s+)?function\s+(?P<name>[A-Za-z_]\w*)\s*\(",
421
+ )
422
+
423
+ # ---------------------------------------------------------------------------
424
+ # Dart
425
+ # ---------------------------------------------------------------------------
426
+
427
+ _DART_CLASS_RE = re.compile(
428
+ r"^\s*(?:abstract\s+)?(?P<kind>class|mixin|extension)\s+(?P<name>[A-Za-z_]\w*)",
429
+ )
430
+ _DART_FUNC_RE = re.compile(
431
+ r"^\s*(?:(?:static|external)\s+)?"
432
+ r"(?:factory\s+|(?:[A-Za-z_]\w*(?:<[^>]*>)?\??\s+)?(?:(?:get|set)\s+)?)"
433
+ r"(?P<name>[A-Za-z_]\w*)\s*[<(={]",
434
+ )
435
+
436
+ # ---------------------------------------------------------------------------
437
+ # Ruby (end-delimited, not brace-delimited)
438
+ # ---------------------------------------------------------------------------
439
+
440
+ _RB_CLASS_RE = re.compile(
441
+ r"^(?P<indent>\s*)(?P<kind>class|module)\s+(?P<name>[A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)",
442
+ )
443
+ _RB_DEF_RE = re.compile(
444
+ r"^(?P<indent>\s*)def\s+(?:self\.)?(?P<name>[A-Za-z_]\w*[?!=]?)\s*[\(;\n]?",
445
+ )
446
+
447
+
448
+ def _ruby_block_end(lines: list[str], start_idx: int, indent: int) -> int:
449
+ """Find the closing ``end`` for a Ruby block at the given indent level."""
450
+ for i in range(start_idx + 1, len(lines)):
451
+ stripped = lines[i].strip()
452
+ if not stripped or stripped.startswith("#"):
453
+ continue
454
+ line_indent = len(lines[i]) - len(lines[i].lstrip())
455
+ if line_indent <= indent and (stripped == "end" or stripped.startswith("end ")):
456
+ return i + 1 # 1-based
457
+ return len(lines)
458
+
459
+
460
+ def _extract_ruby(file_path: str, content: str) -> list[CodeUnit]:
461
+ """Extract code units from Ruby source using keyword-based block detection."""
462
+ units: list[CodeUnit] = []
463
+ lines = content.splitlines()
464
+
465
+ for idx, line in enumerate(lines):
466
+ lineno = idx + 1
467
+ m = _RB_CLASS_RE.match(line)
468
+ if m:
469
+ indent = len(m.group("indent"))
470
+ end = _ruby_block_end(lines, idx, indent)
471
+ units.append(CodeUnit(file_path, m.group("name"), m.group("kind"), lineno, end))
472
+ continue
473
+ m = _RB_DEF_RE.match(line)
474
+ if m:
475
+ indent = len(m.group("indent"))
476
+ end = _ruby_block_end(lines, idx, indent)
477
+ units.append(CodeUnit(file_path, m.group("name"), "function", lineno, end))
478
+
479
+ return units
480
+
481
+
482
+ # ---------------------------------------------------------------------------
483
+ # Per-language pattern tables
484
+ # ---------------------------------------------------------------------------
485
+
486
+
487
+ def _name_kind(m):
488
+ """Extract (name, kind) groups from a regex match — shared by many pattern tables."""
489
+ return m.group("name"), m.group("kind")
490
+
491
+
492
+ _JS_TS_PATTERNS = [
493
+ (_JS_NAMED_FUNC_RE, "function"),
494
+ (_JS_CLASS_RE, "class"),
495
+ (_JS_ARROW_RE, "function"),
496
+ ]
497
+
498
+ _GO_PATTERNS = [
499
+ (_GO_FUNC_RE, "function"),
500
+ (_GO_TYPE_RE, _name_kind),
501
+ ]
502
+
503
+ _RS_PATTERNS = [
504
+ (_RS_FN_RE, "function"),
505
+ (_RS_STRUCT_RE, "struct"),
506
+ (_RS_ENUM_RE, "enum"),
507
+ (_RS_IMPL_RE, lambda m: (m.group("name"), "impl")),
508
+ ]
509
+
510
+ _CS_PATTERNS = [
511
+ (_CS_CLASS_RE, _name_kind),
512
+ (_CS_METHOD_RE, "function"),
513
+ ]
514
+
515
+ _JAVA_PATTERNS = [
516
+ (_JAVA_CLASS_RE, _name_kind),
517
+ (_JAVA_METHOD_RE, "function"),
518
+ ]
519
+
520
+ _CPP_PATTERNS = [
521
+ (_CPP_CLASS_RE, _name_kind),
522
+ (_CPP_ENUM_RE, "enum"),
523
+ (_CPP_FUNC_RE, "function"),
524
+ ]
525
+
526
+ _KT_PATTERNS = [
527
+ (_KT_CLASS_RE, _name_kind),
528
+ (_KT_FUN_RE, "function"),
529
+ ]
530
+
531
+ _SWIFT_PATTERNS = [
532
+ (_SWIFT_TYPE_RE, _name_kind),
533
+ (_SWIFT_FUNC_RE, "function"),
534
+ ]
535
+
536
+ _PHP_PATTERNS = [
537
+ (_PHP_CLASS_RE, _name_kind),
538
+ (_PHP_FUNC_RE, "function"),
539
+ ]
540
+
541
+ _DART_PATTERNS = [
542
+ (_DART_CLASS_RE, _name_kind),
543
+ (_DART_FUNC_RE, "function"),
544
+ ]
545
+
546
+
547
+ # ---------------------------------------------------------------------------
548
+ # Dispatcher
549
+ # ---------------------------------------------------------------------------
550
+
551
+ _EXTRACTORS = {
552
+ "python": _extract_python_ast,
553
+ "javascript": partial(_extract_brace_lang, patterns=_JS_TS_PATTERNS),
554
+ "typescript": partial(_extract_brace_lang, patterns=_JS_TS_PATTERNS),
555
+ "go": partial(_extract_brace_lang, patterns=_GO_PATTERNS),
556
+ "rust": partial(_extract_brace_lang, patterns=_RS_PATTERNS),
557
+ "csharp": partial(_extract_brace_lang, patterns=_CS_PATTERNS),
558
+ "java": partial(_extract_brace_lang, patterns=_JAVA_PATTERNS),
559
+ "c": partial(_extract_brace_lang, patterns=_CPP_PATTERNS),
560
+ "cpp": partial(_extract_brace_lang, patterns=_CPP_PATTERNS),
561
+ "kotlin": partial(_extract_brace_lang, patterns=_KT_PATTERNS),
562
+ "swift": partial(_extract_brace_lang, patterns=_SWIFT_PATTERNS),
563
+ "php": partial(_extract_brace_lang, patterns=_PHP_PATTERNS),
564
+ "ruby": _extract_ruby,
565
+ "dart": partial(_extract_brace_lang, patterns=_DART_PATTERNS),
566
+ }
567
+
568
+
569
+ def extract_code_units(file_path: str, content: str) -> list[CodeUnit]:
570
+ """Extract code units from *content* using the appropriate language parser.
571
+
572
+ Dispatches to a language-specific extractor based on the file extension.
573
+ Returns an empty list for unsupported languages.
574
+ """
575
+ lang = detect_language(file_path)
576
+ if lang is None or lang not in _EXTRACTORS:
577
+ return []
578
+ return _EXTRACTORS[lang](file_path, content)