raise-cli 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. raise_cli/__init__.py +38 -0
  2. raise_cli/__main__.py +30 -0
  3. raise_cli/adapters/__init__.py +91 -0
  4. raise_cli/adapters/declarative/__init__.py +26 -0
  5. raise_cli/adapters/declarative/adapter.py +267 -0
  6. raise_cli/adapters/declarative/discovery.py +94 -0
  7. raise_cli/adapters/declarative/expressions.py +150 -0
  8. raise_cli/adapters/declarative/reference/__init__.py +1 -0
  9. raise_cli/adapters/declarative/reference/github.yaml +143 -0
  10. raise_cli/adapters/declarative/schema.py +98 -0
  11. raise_cli/adapters/filesystem.py +299 -0
  12. raise_cli/adapters/mcp_bridge.py +10 -0
  13. raise_cli/adapters/mcp_confluence.py +246 -0
  14. raise_cli/adapters/mcp_jira.py +405 -0
  15. raise_cli/adapters/models.py +205 -0
  16. raise_cli/adapters/protocols.py +180 -0
  17. raise_cli/adapters/registry.py +90 -0
  18. raise_cli/adapters/sync.py +149 -0
  19. raise_cli/agents/__init__.py +14 -0
  20. raise_cli/agents/antigravity.yaml +8 -0
  21. raise_cli/agents/claude.yaml +8 -0
  22. raise_cli/agents/copilot.yaml +8 -0
  23. raise_cli/agents/copilot_plugin.py +124 -0
  24. raise_cli/agents/cursor.yaml +7 -0
  25. raise_cli/agents/roo.yaml +8 -0
  26. raise_cli/agents/windsurf.yaml +8 -0
  27. raise_cli/artifacts/__init__.py +30 -0
  28. raise_cli/artifacts/models.py +43 -0
  29. raise_cli/artifacts/reader.py +55 -0
  30. raise_cli/artifacts/renderer.py +104 -0
  31. raise_cli/artifacts/story_design.py +69 -0
  32. raise_cli/artifacts/writer.py +45 -0
  33. raise_cli/backlog/__init__.py +1 -0
  34. raise_cli/backlog/sync.py +115 -0
  35. raise_cli/cli/__init__.py +3 -0
  36. raise_cli/cli/commands/__init__.py +3 -0
  37. raise_cli/cli/commands/_resolve.py +153 -0
  38. raise_cli/cli/commands/adapters.py +362 -0
  39. raise_cli/cli/commands/artifact.py +137 -0
  40. raise_cli/cli/commands/backlog.py +333 -0
  41. raise_cli/cli/commands/base.py +31 -0
  42. raise_cli/cli/commands/discover.py +551 -0
  43. raise_cli/cli/commands/docs.py +130 -0
  44. raise_cli/cli/commands/doctor.py +177 -0
  45. raise_cli/cli/commands/gate.py +223 -0
  46. raise_cli/cli/commands/graph.py +1086 -0
  47. raise_cli/cli/commands/info.py +81 -0
  48. raise_cli/cli/commands/init.py +746 -0
  49. raise_cli/cli/commands/journal.py +167 -0
  50. raise_cli/cli/commands/mcp.py +524 -0
  51. raise_cli/cli/commands/memory.py +467 -0
  52. raise_cli/cli/commands/pattern.py +348 -0
  53. raise_cli/cli/commands/profile.py +59 -0
  54. raise_cli/cli/commands/publish.py +80 -0
  55. raise_cli/cli/commands/release.py +338 -0
  56. raise_cli/cli/commands/session.py +528 -0
  57. raise_cli/cli/commands/signal.py +410 -0
  58. raise_cli/cli/commands/skill.py +350 -0
  59. raise_cli/cli/commands/skill_set.py +145 -0
  60. raise_cli/cli/error_handler.py +158 -0
  61. raise_cli/cli/main.py +163 -0
  62. raise_cli/compat.py +66 -0
  63. raise_cli/config/__init__.py +41 -0
  64. raise_cli/config/agent_plugin.py +105 -0
  65. raise_cli/config/agent_registry.py +233 -0
  66. raise_cli/config/agents.py +120 -0
  67. raise_cli/config/ide.py +32 -0
  68. raise_cli/config/paths.py +379 -0
  69. raise_cli/config/settings.py +180 -0
  70. raise_cli/context/__init__.py +42 -0
  71. raise_cli/context/analyzers/__init__.py +16 -0
  72. raise_cli/context/analyzers/models.py +36 -0
  73. raise_cli/context/analyzers/protocol.py +43 -0
  74. raise_cli/context/analyzers/python.py +292 -0
  75. raise_cli/context/builder.py +1569 -0
  76. raise_cli/context/diff.py +213 -0
  77. raise_cli/context/extractors/__init__.py +13 -0
  78. raise_cli/context/extractors/skills.py +121 -0
  79. raise_cli/core/__init__.py +37 -0
  80. raise_cli/core/files.py +66 -0
  81. raise_cli/core/text.py +174 -0
  82. raise_cli/core/tools.py +441 -0
  83. raise_cli/discovery/__init__.py +50 -0
  84. raise_cli/discovery/analyzer.py +691 -0
  85. raise_cli/discovery/drift.py +355 -0
  86. raise_cli/discovery/scanner.py +1687 -0
  87. raise_cli/doctor/__init__.py +4 -0
  88. raise_cli/doctor/checks/__init__.py +1 -0
  89. raise_cli/doctor/checks/environment.py +110 -0
  90. raise_cli/doctor/checks/project.py +238 -0
  91. raise_cli/doctor/fix.py +80 -0
  92. raise_cli/doctor/models.py +56 -0
  93. raise_cli/doctor/protocol.py +43 -0
  94. raise_cli/doctor/registry.py +100 -0
  95. raise_cli/doctor/report.py +141 -0
  96. raise_cli/doctor/runner.py +95 -0
  97. raise_cli/engines/__init__.py +3 -0
  98. raise_cli/exceptions.py +215 -0
  99. raise_cli/gates/__init__.py +19 -0
  100. raise_cli/gates/builtin/__init__.py +1 -0
  101. raise_cli/gates/builtin/coverage.py +52 -0
  102. raise_cli/gates/builtin/lint.py +48 -0
  103. raise_cli/gates/builtin/tests.py +48 -0
  104. raise_cli/gates/builtin/types.py +48 -0
  105. raise_cli/gates/models.py +40 -0
  106. raise_cli/gates/protocol.py +41 -0
  107. raise_cli/gates/registry.py +141 -0
  108. raise_cli/governance/__init__.py +11 -0
  109. raise_cli/governance/extractor.py +412 -0
  110. raise_cli/governance/models.py +134 -0
  111. raise_cli/governance/parsers/__init__.py +35 -0
  112. raise_cli/governance/parsers/_convert.py +38 -0
  113. raise_cli/governance/parsers/adr.py +274 -0
  114. raise_cli/governance/parsers/backlog.py +356 -0
  115. raise_cli/governance/parsers/constitution.py +119 -0
  116. raise_cli/governance/parsers/epic.py +323 -0
  117. raise_cli/governance/parsers/glossary.py +316 -0
  118. raise_cli/governance/parsers/guardrails.py +345 -0
  119. raise_cli/governance/parsers/prd.py +112 -0
  120. raise_cli/governance/parsers/roadmap.py +118 -0
  121. raise_cli/governance/parsers/vision.py +116 -0
  122. raise_cli/graph/__init__.py +1 -0
  123. raise_cli/graph/backends/__init__.py +57 -0
  124. raise_cli/graph/backends/api.py +137 -0
  125. raise_cli/graph/backends/dual.py +139 -0
  126. raise_cli/graph/backends/pending.py +84 -0
  127. raise_cli/handlers/__init__.py +3 -0
  128. raise_cli/hooks/__init__.py +54 -0
  129. raise_cli/hooks/builtin/__init__.py +1 -0
  130. raise_cli/hooks/builtin/backlog.py +216 -0
  131. raise_cli/hooks/builtin/gate_bridge.py +83 -0
  132. raise_cli/hooks/builtin/jira_sync.py +127 -0
  133. raise_cli/hooks/builtin/memory.py +117 -0
  134. raise_cli/hooks/builtin/telemetry.py +72 -0
  135. raise_cli/hooks/emitter.py +184 -0
  136. raise_cli/hooks/events.py +262 -0
  137. raise_cli/hooks/protocol.py +38 -0
  138. raise_cli/hooks/registry.py +117 -0
  139. raise_cli/mcp/__init__.py +33 -0
  140. raise_cli/mcp/bridge.py +218 -0
  141. raise_cli/mcp/models.py +43 -0
  142. raise_cli/mcp/registry.py +77 -0
  143. raise_cli/mcp/schema.py +41 -0
  144. raise_cli/memory/__init__.py +58 -0
  145. raise_cli/memory/loader.py +247 -0
  146. raise_cli/memory/migration.py +241 -0
  147. raise_cli/memory/models.py +169 -0
  148. raise_cli/memory/writer.py +598 -0
  149. raise_cli/onboarding/__init__.py +103 -0
  150. raise_cli/onboarding/bootstrap.py +324 -0
  151. raise_cli/onboarding/claudemd.py +17 -0
  152. raise_cli/onboarding/conventions.py +742 -0
  153. raise_cli/onboarding/detection.py +374 -0
  154. raise_cli/onboarding/governance.py +443 -0
  155. raise_cli/onboarding/instructions.py +672 -0
  156. raise_cli/onboarding/manifest.py +201 -0
  157. raise_cli/onboarding/memory_md.py +399 -0
  158. raise_cli/onboarding/migration.py +207 -0
  159. raise_cli/onboarding/profile.py +624 -0
  160. raise_cli/onboarding/skill_conflict.py +100 -0
  161. raise_cli/onboarding/skill_manifest.py +176 -0
  162. raise_cli/onboarding/skills.py +437 -0
  163. raise_cli/onboarding/workflows.py +101 -0
  164. raise_cli/output/__init__.py +28 -0
  165. raise_cli/output/console.py +394 -0
  166. raise_cli/output/formatters/__init__.py +9 -0
  167. raise_cli/output/formatters/adapters.py +135 -0
  168. raise_cli/output/formatters/discover.py +439 -0
  169. raise_cli/output/formatters/skill.py +298 -0
  170. raise_cli/publish/__init__.py +3 -0
  171. raise_cli/publish/changelog.py +80 -0
  172. raise_cli/publish/check.py +179 -0
  173. raise_cli/publish/version.py +172 -0
  174. raise_cli/rai_base/__init__.py +22 -0
  175. raise_cli/rai_base/framework/__init__.py +7 -0
  176. raise_cli/rai_base/framework/methodology.yaml +233 -0
  177. raise_cli/rai_base/governance/__init__.py +1 -0
  178. raise_cli/rai_base/governance/architecture/__init__.py +1 -0
  179. raise_cli/rai_base/governance/architecture/domain-model.md +20 -0
  180. raise_cli/rai_base/governance/architecture/system-context.md +34 -0
  181. raise_cli/rai_base/governance/architecture/system-design.md +24 -0
  182. raise_cli/rai_base/governance/backlog.md +8 -0
  183. raise_cli/rai_base/governance/guardrails.md +17 -0
  184. raise_cli/rai_base/governance/prd.md +25 -0
  185. raise_cli/rai_base/governance/vision.md +16 -0
  186. raise_cli/rai_base/identity/__init__.py +8 -0
  187. raise_cli/rai_base/identity/core.md +119 -0
  188. raise_cli/rai_base/identity/perspective.md +119 -0
  189. raise_cli/rai_base/memory/__init__.py +7 -0
  190. raise_cli/rai_base/memory/patterns-base.jsonl +55 -0
  191. raise_cli/schemas/__init__.py +3 -0
  192. raise_cli/schemas/journal.py +49 -0
  193. raise_cli/schemas/session_state.py +117 -0
  194. raise_cli/session/__init__.py +5 -0
  195. raise_cli/session/bundle.py +820 -0
  196. raise_cli/session/close.py +268 -0
  197. raise_cli/session/journal.py +119 -0
  198. raise_cli/session/resolver.py +126 -0
  199. raise_cli/session/state.py +187 -0
  200. raise_cli/skills/__init__.py +44 -0
  201. raise_cli/skills/locator.py +141 -0
  202. raise_cli/skills/name_checker.py +199 -0
  203. raise_cli/skills/parser.py +145 -0
  204. raise_cli/skills/scaffold.py +212 -0
  205. raise_cli/skills/schema.py +132 -0
  206. raise_cli/skills/skillsets.py +195 -0
  207. raise_cli/skills/validator.py +197 -0
  208. raise_cli/skills_base/__init__.py +80 -0
  209. raise_cli/skills_base/contract-template.md +60 -0
  210. raise_cli/skills_base/preamble.md +37 -0
  211. raise_cli/skills_base/rai-architecture-review/SKILL.md +137 -0
  212. raise_cli/skills_base/rai-debug/SKILL.md +171 -0
  213. raise_cli/skills_base/rai-discover/SKILL.md +167 -0
  214. raise_cli/skills_base/rai-discover-document/SKILL.md +128 -0
  215. raise_cli/skills_base/rai-discover-scan/SKILL.md +147 -0
  216. raise_cli/skills_base/rai-discover-start/SKILL.md +145 -0
  217. raise_cli/skills_base/rai-discover-validate/SKILL.md +142 -0
  218. raise_cli/skills_base/rai-docs-update/SKILL.md +142 -0
  219. raise_cli/skills_base/rai-doctor/SKILL.md +120 -0
  220. raise_cli/skills_base/rai-epic-close/SKILL.md +165 -0
  221. raise_cli/skills_base/rai-epic-close/templates/retrospective.md +68 -0
  222. raise_cli/skills_base/rai-epic-design/SKILL.md +146 -0
  223. raise_cli/skills_base/rai-epic-design/templates/design.md +24 -0
  224. raise_cli/skills_base/rai-epic-design/templates/scope.md +76 -0
  225. raise_cli/skills_base/rai-epic-plan/SKILL.md +153 -0
  226. raise_cli/skills_base/rai-epic-plan/_references/sequencing-strategies.md +67 -0
  227. raise_cli/skills_base/rai-epic-plan/templates/plan-section.md +49 -0
  228. raise_cli/skills_base/rai-epic-run/SKILL.md +208 -0
  229. raise_cli/skills_base/rai-epic-start/SKILL.md +136 -0
  230. raise_cli/skills_base/rai-epic-start/templates/brief.md +34 -0
  231. raise_cli/skills_base/rai-mcp-add/SKILL.md +176 -0
  232. raise_cli/skills_base/rai-mcp-remove/SKILL.md +120 -0
  233. raise_cli/skills_base/rai-mcp-status/SKILL.md +147 -0
  234. raise_cli/skills_base/rai-problem-shape/SKILL.md +138 -0
  235. raise_cli/skills_base/rai-project-create/SKILL.md +144 -0
  236. raise_cli/skills_base/rai-project-onboard/SKILL.md +162 -0
  237. raise_cli/skills_base/rai-quality-review/SKILL.md +189 -0
  238. raise_cli/skills_base/rai-research/SKILL.md +143 -0
  239. raise_cli/skills_base/rai-research/references/research-prompt-template.md +317 -0
  240. raise_cli/skills_base/rai-session-close/SKILL.md +176 -0
  241. raise_cli/skills_base/rai-session-start/SKILL.md +110 -0
  242. raise_cli/skills_base/rai-story-close/SKILL.md +198 -0
  243. raise_cli/skills_base/rai-story-design/SKILL.md +203 -0
  244. raise_cli/skills_base/rai-story-design/references/tech-design-story-v2.md +293 -0
  245. raise_cli/skills_base/rai-story-implement/SKILL.md +115 -0
  246. raise_cli/skills_base/rai-story-plan/SKILL.md +135 -0
  247. raise_cli/skills_base/rai-story-review/SKILL.md +178 -0
  248. raise_cli/skills_base/rai-story-run/SKILL.md +282 -0
  249. raise_cli/skills_base/rai-story-start/SKILL.md +166 -0
  250. raise_cli/skills_base/rai-story-start/templates/story.md +38 -0
  251. raise_cli/skills_base/rai-welcome/SKILL.md +134 -0
  252. raise_cli/telemetry/__init__.py +42 -0
  253. raise_cli/telemetry/schemas.py +285 -0
  254. raise_cli/telemetry/writer.py +217 -0
  255. raise_cli/tier/__init__.py +0 -0
  256. raise_cli/tier/context.py +134 -0
  257. raise_cli/viz/__init__.py +7 -0
  258. raise_cli/viz/generator.py +406 -0
  259. raise_cli-2.2.1.dist-info/METADATA +433 -0
  260. raise_cli-2.2.1.dist-info/RECORD +264 -0
  261. raise_cli-2.2.1.dist-info/WHEEL +4 -0
  262. raise_cli-2.2.1.dist-info/entry_points.txt +40 -0
  263. raise_cli-2.2.1.dist-info/licenses/LICENSE +190 -0
  264. raise_cli-2.2.1.dist-info/licenses/NOTICE +4 -0
@@ -0,0 +1,1687 @@
1
+ """Code scanner for symbol extraction.
2
+
3
+ This module extracts classes, functions, and module-level information
4
+ from source files. Supports:
5
+ - Python (via built-in ast module)
6
+ - TypeScript/JavaScript (via tree-sitter)
7
+
8
+ Example:
9
+ >>> from raise_cli.discovery.scanner import extract_python_symbols
10
+ >>> symbols = extract_python_symbols("class Foo: pass", "example.py")
11
+ >>> symbols[0].name
12
+ 'Foo'
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import ast
18
+ from pathlib import Path
19
+ from typing import TYPE_CHECKING, Literal
20
+
21
+ from pydantic import BaseModel, Field
22
+
23
+ from raise_cli.compat import portable_path
24
+
25
+ if TYPE_CHECKING:
26
+ from tree_sitter import Node, Parser
27
+
28
+ # Symbol kinds that can be extracted
29
+ SymbolKind = Literal[
30
+ "class",
31
+ "function",
32
+ "method",
33
+ "module",
34
+ "interface",
35
+ "enum",
36
+ "type_alias",
37
+ "constant",
38
+ "trait",
39
+ "component",
40
+ ]
41
+
42
+ # Supported languages for scanning
43
+ Language = Literal[
44
+ "python", "typescript", "javascript", "php", "svelte", "csharp", "dart"
45
+ ]
46
+
47
+ # File extension to language mapping
48
+ EXTENSION_TO_LANGUAGE: dict[str, Language] = {
49
+ ".py": "python",
50
+ ".ts": "typescript",
51
+ ".tsx": "typescript",
52
+ ".js": "javascript",
53
+ ".jsx": "javascript",
54
+ ".mjs": "javascript",
55
+ ".cjs": "javascript",
56
+ ".php": "php",
57
+ ".svelte": "svelte",
58
+ ".cs": "csharp",
59
+ ".dart": "dart",
60
+ }
61
+
62
+
63
+ class Symbol(BaseModel):
64
+ """A code symbol extracted from source.
65
+
66
+ Attributes:
67
+ name: Symbol name (e.g., "UserService", "get_user").
68
+ kind: Symbol type (class, function, method, module).
69
+ file: Relative path to source file.
70
+ line: Line number where symbol is defined (1-indexed).
71
+ signature: Full signature (e.g., "class UserService(BaseService)").
72
+ docstring: Symbol's docstring if present.
73
+ parent: Parent symbol name for methods (e.g., class name).
74
+
75
+ Examples:
76
+ >>> symbol = Symbol(
77
+ ... name="UserService",
78
+ ... kind="class",
79
+ ... file="src/services/user.py",
80
+ ... line=15,
81
+ ... signature="class UserService(BaseService)",
82
+ ... )
83
+ >>> symbol.name
84
+ 'UserService'
85
+ """
86
+
87
+ name: str = Field(..., description="Symbol name")
88
+ kind: SymbolKind = Field(..., description="Symbol type")
89
+ file: str = Field(..., description="Relative path to source file")
90
+ line: int = Field(..., description="Line number (1-indexed)")
91
+ signature: str = Field(default="", description="Full signature")
92
+ docstring: str | None = Field(default=None, description="Symbol docstring")
93
+ parent: str | None = Field(default=None, description="Parent symbol name")
94
+
95
+
96
+ class ScanResult(BaseModel):
97
+ """Result of scanning a directory or file.
98
+
99
+ Attributes:
100
+ symbols: List of extracted symbols.
101
+ files_scanned: Number of files processed.
102
+ errors: List of files that failed to parse.
103
+
104
+ Examples:
105
+ >>> result = ScanResult(symbols=[], files_scanned=5, errors=[])
106
+ >>> result.files_scanned
107
+ 5
108
+ """
109
+
110
+ symbols: list[Symbol] = Field(default_factory=list) # pyright: ignore[reportUnknownVariableType]
111
+ files_scanned: int = Field(default=0)
112
+ errors: list[str] = Field(default_factory=list) # pyright: ignore[reportUnknownVariableType]
113
+
114
+
115
+ def _get_signature(node: ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef) -> str:
116
+ """Extract signature from an AST node.
117
+
118
+ Args:
119
+ node: AST node for class or function definition.
120
+
121
+ Returns:
122
+ Signature string (e.g., "class Foo(Bar)" or "def func(a, b)").
123
+ """
124
+ if isinstance(node, ast.ClassDef):
125
+ bases = ", ".join(ast.unparse(base) for base in node.bases)
126
+ if bases:
127
+ return f"class {node.name}({bases})"
128
+ return f"class {node.name}"
129
+
130
+ # FunctionDef or AsyncFunctionDef
131
+ args_str = ast.unparse(node.args)
132
+ prefix = "async def" if isinstance(node, ast.AsyncFunctionDef) else "def"
133
+
134
+ if node.returns:
135
+ return_annotation = ast.unparse(node.returns)
136
+ return f"{prefix} {node.name}({args_str}) -> {return_annotation}"
137
+ return f"{prefix} {node.name}({args_str})"
138
+
139
+
140
+ def _extract_module_symbol(tree: ast.Module, file_path: str) -> Symbol | None:
141
+ """Extract module-level symbol if docstring exists."""
142
+ module_docstring = ast.get_docstring(tree)
143
+ if not module_docstring:
144
+ return None
145
+ return Symbol(
146
+ name=Path(file_path).stem,
147
+ kind="module",
148
+ file=file_path,
149
+ line=1,
150
+ signature=f"module {Path(file_path).stem}",
151
+ docstring=module_docstring,
152
+ )
153
+
154
+
155
+ def _extract_class_symbols(node: ast.ClassDef, file_path: str) -> list[Symbol]:
156
+ """Extract class and its methods as symbols."""
157
+ symbols: list[Symbol] = [
158
+ Symbol(
159
+ name=node.name,
160
+ kind="class",
161
+ file=file_path,
162
+ line=node.lineno,
163
+ signature=_get_signature(node),
164
+ docstring=ast.get_docstring(node),
165
+ )
166
+ ]
167
+ for item in node.body:
168
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
169
+ symbols.append(
170
+ Symbol(
171
+ name=item.name,
172
+ kind="method",
173
+ file=file_path,
174
+ line=item.lineno,
175
+ signature=_get_signature(item),
176
+ docstring=ast.get_docstring(item),
177
+ parent=node.name,
178
+ )
179
+ )
180
+ return symbols
181
+
182
+
183
+ def extract_python_symbols(source: str, file_path: str) -> list[Symbol]:
184
+ """Extract symbols from Python source code.
185
+
186
+ Parses the source code and extracts all classes, functions, and methods
187
+ with their signatures and docstrings.
188
+
189
+ Args:
190
+ source: Python source code as string.
191
+ file_path: Path to the source file (for metadata).
192
+
193
+ Returns:
194
+ List of Symbol objects.
195
+
196
+ Raises:
197
+ SyntaxError: If source code cannot be parsed.
198
+
199
+ Examples:
200
+ >>> source = '''
201
+ ... class MyClass:
202
+ ... \"\"\"A sample class.\"\"\"
203
+ ... def method(self):
204
+ ... pass
205
+ ... '''
206
+ >>> symbols = extract_python_symbols(source, "example.py")
207
+ >>> len(symbols)
208
+ 2
209
+ >>> symbols[0].kind
210
+ 'class'
211
+ """
212
+ tree = ast.parse(source)
213
+ symbols: list[Symbol] = []
214
+
215
+ # Module docstring
216
+ module_symbol = _extract_module_symbol(tree, file_path)
217
+ if module_symbol:
218
+ symbols.append(module_symbol)
219
+
220
+ # Classes and their methods
221
+ for node in ast.walk(tree):
222
+ if isinstance(node, ast.ClassDef):
223
+ symbols.extend(_extract_class_symbols(node, file_path))
224
+
225
+ # Top-level functions (separate pass to avoid duplicates with methods)
226
+ for node in ast.iter_child_nodes(tree):
227
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
228
+ symbols.append(
229
+ Symbol(
230
+ name=node.name,
231
+ kind="function",
232
+ file=file_path,
233
+ line=node.lineno,
234
+ signature=_get_signature(node),
235
+ docstring=ast.get_docstring(node),
236
+ )
237
+ )
238
+
239
+ return symbols
240
+
241
+
242
+ # -----------------------------------------------------------------------------
243
+ # TypeScript/JavaScript Extraction (tree-sitter)
244
+ # -----------------------------------------------------------------------------
245
+
246
+
247
+ def _get_ts_parser(language: Language, *, file_path: str = "") -> Parser:
248
+ """Get a tree-sitter parser for TypeScript or JavaScript.
249
+
250
+ Args:
251
+ language: Either "typescript" or "javascript".
252
+ file_path: File path (used to dispatch .tsx to TSX parser).
253
+
254
+ Returns:
255
+ Configured tree-sitter Parser.
256
+
257
+ Raises:
258
+ ImportError: If tree-sitter packages are not installed.
259
+ """
260
+ try:
261
+ from tree_sitter import Language as TSLanguage
262
+ from tree_sitter import Parser
263
+ except ImportError as e:
264
+ raise ImportError(
265
+ "tree-sitter is required for TypeScript/JavaScript scanning. "
266
+ "Install with: uv add tree-sitter tree-sitter-typescript tree-sitter-javascript"
267
+ ) from e
268
+
269
+ if language == "typescript":
270
+ import tree_sitter_typescript as ts_typescript
271
+
272
+ is_tsx = file_path.endswith(".tsx")
273
+ if is_tsx:
274
+ lang = TSLanguage(ts_typescript.language_tsx())
275
+ else:
276
+ lang = TSLanguage(ts_typescript.language_typescript())
277
+ else:
278
+ import tree_sitter_javascript as ts_javascript
279
+
280
+ lang = TSLanguage(ts_javascript.language())
281
+
282
+ return Parser(lang)
283
+
284
+
285
+ def _find_child_by_type(node: Node, *types: str) -> Node | None:
286
+ """Find the first child node matching any of the given types."""
287
+ for child in node.children:
288
+ if child.type in types:
289
+ return child
290
+ return None
291
+
292
+
293
+ def _get_node_text(node: Node, source: bytes) -> str:
294
+ """Get the text content of a tree-sitter node."""
295
+ return source[node.start_byte : node.end_byte].decode("utf-8")
296
+
297
+
298
+ def _extract_ts_signature(node: Node, source: bytes) -> str:
299
+ """Extract a signature from a TypeScript/JavaScript AST node."""
300
+ node_type = node.type
301
+
302
+ if node_type == "class_declaration":
303
+ name_node = _find_child_by_type(node, "type_identifier", "identifier")
304
+ name = _get_node_text(name_node, source) if name_node else "unknown"
305
+ # Check for extends
306
+ heritage = _find_child_by_type(node, "class_heritage")
307
+ if heritage:
308
+ return f"class {name} {_get_node_text(heritage, source)}"
309
+ return f"class {name}"
310
+
311
+ elif node_type in ("function_declaration", "generator_function_declaration"):
312
+ name_node = _find_child_by_type(node, "identifier")
313
+ name = _get_node_text(name_node, source) if name_node else "unknown"
314
+ params_node = _find_child_by_type(node, "formal_parameters")
315
+ params = _get_node_text(params_node, source) if params_node else "()"
316
+ # Check for return type annotation
317
+ return_type = _find_child_by_type(node, "type_annotation")
318
+ if return_type:
319
+ return f"function {name}{params}{_get_node_text(return_type, source)}"
320
+ return f"function {name}{params}"
321
+
322
+ elif node_type in ("method_definition", "method_signature"):
323
+ name_node = _find_child_by_type(node, "property_identifier", "identifier")
324
+ name = _get_node_text(name_node, source) if name_node else "unknown"
325
+ params_node = _find_child_by_type(node, "formal_parameters")
326
+ params = _get_node_text(params_node, source) if params_node else "()"
327
+ return f"{name}{params}"
328
+
329
+ elif node_type == "interface_declaration":
330
+ name_node = _find_child_by_type(node, "type_identifier", "identifier")
331
+ name = _get_node_text(name_node, source) if name_node else "unknown"
332
+ return f"interface {name}"
333
+
334
+ elif node_type == "enum_declaration":
335
+ name_node = _find_child_by_type(node, "identifier")
336
+ name = _get_node_text(name_node, source) if name_node else "unknown"
337
+ return f"enum {name}"
338
+
339
+ elif node_type == "type_alias_declaration":
340
+ name_node = _find_child_by_type(node, "type_identifier", "identifier")
341
+ name = _get_node_text(name_node, source) if name_node else "unknown"
342
+ return f"type {name}"
343
+
344
+ return ""
345
+
346
+
347
+ def extract_typescript_symbols(source: str, file_path: str) -> list[Symbol]:
348
+ """Extract symbols from TypeScript source code.
349
+
350
+ Uses tree-sitter to parse TypeScript and extract classes, functions,
351
+ methods, and interfaces.
352
+
353
+ Args:
354
+ source: TypeScript source code as string.
355
+ file_path: Path to the source file (for metadata).
356
+
357
+ Returns:
358
+ List of Symbol objects.
359
+
360
+ Examples:
361
+ >>> source = '''
362
+ ... export class MyClass {
363
+ ... myMethod(): void {}
364
+ ... }
365
+ ... '''
366
+ >>> symbols = extract_typescript_symbols(source, "example.ts")
367
+ >>> symbols[0].kind
368
+ 'class'
369
+ """
370
+ parser = _get_ts_parser("typescript", file_path=file_path)
371
+ source_bytes = source.encode("utf-8")
372
+ tree = parser.parse(source_bytes)
373
+
374
+ return _extract_ts_js_symbols(tree.root_node, source_bytes, file_path)
375
+
376
+
377
+ def extract_javascript_symbols(source: str, file_path: str) -> list[Symbol]:
378
+ """Extract symbols from JavaScript source code.
379
+
380
+ Uses tree-sitter to parse JavaScript and extract classes, functions,
381
+ and methods.
382
+
383
+ Args:
384
+ source: JavaScript source code as string.
385
+ file_path: Path to the source file (for metadata).
386
+
387
+ Returns:
388
+ List of Symbol objects.
389
+
390
+ Examples:
391
+ >>> source = '''
392
+ ... class MyClass {
393
+ ... myMethod() {}
394
+ ... }
395
+ ... '''
396
+ >>> symbols = extract_javascript_symbols(source, "example.js")
397
+ >>> symbols[0].kind
398
+ 'class'
399
+ """
400
+ parser = _get_ts_parser("javascript")
401
+ source_bytes = source.encode("utf-8")
402
+ tree = parser.parse(source_bytes)
403
+
404
+ return _extract_ts_js_symbols(tree.root_node, source_bytes, file_path)
405
+
406
+
407
+ def _extract_ts_js_symbols(
408
+ root: Node,
409
+ source: bytes,
410
+ file_path: str,
411
+ ) -> list[Symbol]:
412
+ """Extract symbols from a tree-sitter parse tree.
413
+
414
+ Walks the AST and extracts classes, functions, methods, and interfaces.
415
+
416
+ Args:
417
+ root: Root node of the tree-sitter parse tree.
418
+ source: Source code as bytes.
419
+ file_path: Path to the source file.
420
+
421
+ Returns:
422
+ List of Symbol objects.
423
+ """
424
+ symbols: list[Symbol] = []
425
+
426
+ # Node types we care about
427
+ class_types = {"class_declaration"}
428
+ function_types = {"function_declaration", "generator_function_declaration"}
429
+ method_types = {"method_definition", "method_signature"}
430
+ interface_types = {"interface_declaration"}
431
+ enum_types = {"enum_declaration"}
432
+ type_alias_types = {"type_alias_declaration"}
433
+
434
+ def _extract_exported_const(node: Node) -> None:
435
+ """Extract exported const variable declarations as constants."""
436
+ # export_statement → declaration → lexical_declaration → variable_declarator
437
+ # Also handle top-level lexical_declaration directly
438
+ decl = node
439
+ if node.type == "export_statement":
440
+ decl = _find_child_by_type(node, "lexical_declaration")
441
+ if decl is None:
442
+ return
443
+
444
+ if decl.type != "lexical_declaration":
445
+ return
446
+
447
+ # Only extract 'const' (not 'let' or 'var')
448
+ first_child = decl.children[0] if decl.children else None
449
+ if first_child is None or _get_node_text(first_child, source) != "const":
450
+ return
451
+
452
+ for child in decl.children:
453
+ if child.type == "variable_declarator":
454
+ name_node = _find_child_by_type(child, "identifier")
455
+ if name_node:
456
+ name = _get_node_text(name_node, source)
457
+ symbols.append(
458
+ Symbol(
459
+ name=name,
460
+ kind="constant",
461
+ file=file_path,
462
+ line=child.start_point[0] + 1,
463
+ signature=f"const {name}",
464
+ )
465
+ )
466
+
467
+ def walk(node: Node, parent_class: str | None = None) -> None:
468
+ node_type = node.type
469
+
470
+ if node_type in class_types:
471
+ name_node = _find_child_by_type(node, "type_identifier", "identifier")
472
+ name = _get_node_text(name_node, source) if name_node else "unknown"
473
+
474
+ symbols.append(
475
+ Symbol(
476
+ name=name,
477
+ kind="class",
478
+ file=file_path,
479
+ line=node.start_point[0] + 1, # tree-sitter is 0-indexed
480
+ signature=_extract_ts_signature(node, source),
481
+ )
482
+ )
483
+
484
+ # Walk class body for methods
485
+ body = _find_child_by_type(node, "class_body")
486
+ if body:
487
+ for child in body.children:
488
+ walk(child, parent_class=name)
489
+ return # Don't recurse further into class
490
+
491
+ elif node_type in function_types and parent_class is None:
492
+ name_node = _find_child_by_type(node, "identifier")
493
+ name = _get_node_text(name_node, source) if name_node else "unknown"
494
+
495
+ symbols.append(
496
+ Symbol(
497
+ name=name,
498
+ kind="function",
499
+ file=file_path,
500
+ line=node.start_point[0] + 1,
501
+ signature=_extract_ts_signature(node, source),
502
+ )
503
+ )
504
+
505
+ elif node_type in method_types and parent_class is not None:
506
+ name_node = _find_child_by_type(node, "property_identifier", "identifier")
507
+ name = _get_node_text(name_node, source) if name_node else "unknown"
508
+
509
+ symbols.append(
510
+ Symbol(
511
+ name=name,
512
+ kind="method",
513
+ file=file_path,
514
+ line=node.start_point[0] + 1,
515
+ signature=_extract_ts_signature(node, source),
516
+ parent=parent_class,
517
+ )
518
+ )
519
+
520
+ elif node_type in interface_types:
521
+ name_node = _find_child_by_type(node, "type_identifier", "identifier")
522
+ name = _get_node_text(name_node, source) if name_node else "unknown"
523
+
524
+ symbols.append(
525
+ Symbol(
526
+ name=name,
527
+ kind="interface",
528
+ file=file_path,
529
+ line=node.start_point[0] + 1,
530
+ signature=_extract_ts_signature(node, source),
531
+ )
532
+ )
533
+
534
+ elif node_type in enum_types:
535
+ name_node = _find_child_by_type(node, "identifier")
536
+ name = _get_node_text(name_node, source) if name_node else "unknown"
537
+
538
+ symbols.append(
539
+ Symbol(
540
+ name=name,
541
+ kind="enum",
542
+ file=file_path,
543
+ line=node.start_point[0] + 1,
544
+ signature=_extract_ts_signature(node, source),
545
+ )
546
+ )
547
+
548
+ elif node_type in type_alias_types:
549
+ name_node = _find_child_by_type(node, "type_identifier", "identifier")
550
+ name = _get_node_text(name_node, source) if name_node else "unknown"
551
+
552
+ symbols.append(
553
+ Symbol(
554
+ name=name,
555
+ kind="type_alias",
556
+ file=file_path,
557
+ line=node.start_point[0] + 1,
558
+ signature=_extract_ts_signature(node, source),
559
+ )
560
+ )
561
+
562
+ elif node_type == "export_statement":
563
+ # Handle exported const declarations
564
+ _extract_exported_const(node)
565
+
566
+ # Recurse into children
567
+ for child in node.children:
568
+ walk(child, parent_class)
569
+
570
+ walk(root)
571
+ return symbols
572
+
573
+
574
+ def _get_php_parser() -> Parser:
575
+ """Get a tree-sitter parser for PHP.
576
+
577
+ Returns:
578
+ Configured tree-sitter Parser.
579
+
580
+ Raises:
581
+ ImportError: If tree-sitter-php is not installed.
582
+ """
583
+ try:
584
+ from tree_sitter import Language as TSLanguage
585
+ from tree_sitter import Parser
586
+ except ImportError as e:
587
+ raise ImportError(
588
+ "tree-sitter is required for PHP scanning. "
589
+ "Install with: uv add tree-sitter tree-sitter-php"
590
+ ) from e
591
+
592
+ import tree_sitter_php as ts_php
593
+
594
+ lang = TSLanguage(ts_php.language_php())
595
+ return Parser(lang)
596
+
597
+
598
+ def _extract_php_signature(node: Node, source: bytes) -> str:
599
+ """Extract a signature from a PHP AST node."""
600
+ node_type = node.type
601
+
602
+ if node_type == "class_declaration":
603
+ name_node = _find_child_by_type(node, "name")
604
+ name = _get_node_text(name_node, source) if name_node else "unknown"
605
+ parts = [f"class {name}"]
606
+ base = _find_child_by_type(node, "base_clause")
607
+ if base:
608
+ parts.append(_get_node_text(base, source))
609
+ iface = _find_child_by_type(node, "class_interface_clause")
610
+ if iface:
611
+ parts.append(_get_node_text(iface, source))
612
+ return " ".join(parts)
613
+
614
+ elif node_type == "interface_declaration":
615
+ name_node = _find_child_by_type(node, "name")
616
+ name = _get_node_text(name_node, source) if name_node else "unknown"
617
+ return f"interface {name}"
618
+
619
+ elif node_type == "trait_declaration":
620
+ name_node = _find_child_by_type(node, "name")
621
+ name = _get_node_text(name_node, source) if name_node else "unknown"
622
+ return f"trait {name}"
623
+
624
+ elif node_type == "function_definition":
625
+ name_node = _find_child_by_type(node, "name")
626
+ name = _get_node_text(name_node, source) if name_node else "unknown"
627
+ params_node = _find_child_by_type(node, "formal_parameters")
628
+ params = _get_node_text(params_node, source) if params_node else "()"
629
+ # Return type
630
+ ret_type = _find_child_by_type(node, "primitive_type", "named_type")
631
+ if ret_type:
632
+ return f"function {name}{params}: {_get_node_text(ret_type, source)}"
633
+ return f"function {name}{params}"
634
+
635
+ elif node_type == "method_declaration":
636
+ parts: list[str] = []
637
+ vis = _find_child_by_type(node, "visibility_modifier")
638
+ if vis:
639
+ parts.append(_get_node_text(vis, source))
640
+ static = _find_child_by_type(node, "static_modifier")
641
+ if static:
642
+ parts.append("static")
643
+ name_node = _find_child_by_type(node, "name")
644
+ name = _get_node_text(name_node, source) if name_node else "unknown"
645
+ parts.append(f"function {name}")
646
+ params_node = _find_child_by_type(node, "formal_parameters")
647
+ params = _get_node_text(params_node, source) if params_node else "()"
648
+ sig = " ".join(parts) + params
649
+ ret_type = _find_child_by_type(node, "primitive_type", "named_type")
650
+ if ret_type:
651
+ sig += f": {_get_node_text(ret_type, source)}"
652
+ return sig
653
+
654
+ elif node_type == "enum_declaration":
655
+ name_node = _find_child_by_type(node, "name")
656
+ name = _get_node_text(name_node, source) if name_node else "unknown"
657
+ # Backed enum type (e.g., ": string")
658
+ ret_type = _find_child_by_type(node, "primitive_type")
659
+ if ret_type:
660
+ return f"enum {name}: {_get_node_text(ret_type, source)}"
661
+ return f"enum {name}"
662
+
663
+ return ""
664
+
665
+
666
+ def _extract_php_symbols(
667
+ root: Node,
668
+ source: bytes,
669
+ file_path: str,
670
+ ) -> list[Symbol]:
671
+ """Extract symbols from a PHP tree-sitter parse tree.
672
+
673
+ Walks the AST and extracts classes, interfaces, traits, functions,
674
+ methods, and enums. Tracks namespace for qualified names.
675
+
676
+ Args:
677
+ root: Root node of the tree-sitter parse tree.
678
+ source: Source code as bytes.
679
+ file_path: Path to the source file.
680
+
681
+ Returns:
682
+ List of Symbol objects.
683
+ """
684
+ symbols: list[Symbol] = []
685
+ namespace = ""
686
+
687
+ # Container types whose children include methods
688
+ container_types = {
689
+ "class_declaration",
690
+ "interface_declaration",
691
+ "trait_declaration",
692
+ }
693
+
694
+ def _qualify(name: str) -> str:
695
+ # Use dot separator for internal IDs — PHP uses backslash for namespaces
696
+ # but backslashes in component IDs break JSON, graph queries, and ID dedup.
697
+ return f"{namespace}.{name}" if namespace else name
698
+
699
+ def walk(node: Node, parent_name: str | None = None) -> None:
700
+ nonlocal namespace
701
+ node_type = node.type
702
+
703
+ if node_type == "namespace_definition":
704
+ ns_node = _find_child_by_type(node, "namespace_name")
705
+ if ns_node:
706
+ # Normalize PHP backslash separators to dots for graph IDs
707
+ namespace = _get_node_text(ns_node, source).replace("\\", ".")
708
+ # Continue walking children (declarations inside namespace)
709
+ for child in node.children:
710
+ walk(child, parent_name)
711
+ return
712
+
713
+ if node_type in container_types:
714
+ name_node = _find_child_by_type(node, "name")
715
+ local_name = _get_node_text(name_node, source) if name_node else "unknown"
716
+ qualified = _qualify(local_name)
717
+
718
+ kind: SymbolKind = "class"
719
+ if node_type == "interface_declaration":
720
+ kind = "interface"
721
+ elif node_type == "trait_declaration":
722
+ kind = "trait"
723
+
724
+ symbols.append(
725
+ Symbol(
726
+ name=qualified,
727
+ kind=kind,
728
+ file=file_path,
729
+ line=node.start_point[0] + 1,
730
+ signature=_extract_php_signature(node, source),
731
+ )
732
+ )
733
+
734
+ # Walk declaration_list for methods
735
+ body = _find_child_by_type(node, "declaration_list")
736
+ if body:
737
+ for child in body.children:
738
+ walk(child, parent_name=qualified)
739
+ return
740
+
741
+ if node_type == "method_declaration" and parent_name is not None:
742
+ name_node = _find_child_by_type(node, "name")
743
+ local_name = _get_node_text(name_node, source) if name_node else "unknown"
744
+
745
+ symbols.append(
746
+ Symbol(
747
+ name=local_name,
748
+ kind="method",
749
+ file=file_path,
750
+ line=node.start_point[0] + 1,
751
+ signature=_extract_php_signature(node, source),
752
+ parent=parent_name,
753
+ )
754
+ )
755
+ return
756
+
757
+ if node_type == "function_definition" and parent_name is None:
758
+ name_node = _find_child_by_type(node, "name")
759
+ local_name = _get_node_text(name_node, source) if name_node else "unknown"
760
+
761
+ symbols.append(
762
+ Symbol(
763
+ name=_qualify(local_name),
764
+ kind="function",
765
+ file=file_path,
766
+ line=node.start_point[0] + 1,
767
+ signature=_extract_php_signature(node, source),
768
+ )
769
+ )
770
+ return
771
+
772
+ if node_type == "enum_declaration":
773
+ name_node = _find_child_by_type(node, "name")
774
+ local_name = _get_node_text(name_node, source) if name_node else "unknown"
775
+
776
+ symbols.append(
777
+ Symbol(
778
+ name=_qualify(local_name),
779
+ kind="enum",
780
+ file=file_path,
781
+ line=node.start_point[0] + 1,
782
+ signature=_extract_php_signature(node, source),
783
+ )
784
+ )
785
+ return
786
+
787
+ # Recurse into children
788
+ for child in node.children:
789
+ walk(child, parent_name)
790
+
791
+ walk(root)
792
+ return symbols
793
+
794
+
795
+ def extract_php_symbols(source: str, file_path: str) -> list[Symbol]:
796
+ """Extract symbols from PHP source code.
797
+
798
+ Uses tree-sitter to parse PHP and extract classes, interfaces,
799
+ traits, functions, methods, and enums.
800
+
801
+ Args:
802
+ source: PHP source code as string.
803
+ file_path: Path to the source file (for metadata).
804
+
805
+ Returns:
806
+ List of Symbol objects.
807
+
808
+ Examples:
809
+ >>> source = '''
810
+ ... <?php
811
+ ... class User {
812
+ ... public function getName(): string {}
813
+ ... }
814
+ ... '''
815
+ >>> symbols = extract_php_symbols(source, "User.php")
816
+ >>> symbols[0].kind
817
+ 'class'
818
+ """
819
+ parser = _get_php_parser()
820
+ source_bytes = source.encode("utf-8")
821
+ tree = parser.parse(source_bytes)
822
+
823
+ return _extract_php_symbols(tree.root_node, source_bytes, file_path)
824
+
825
+
826
+ # -----------------------------------------------------------------------------
827
+ # Svelte Extraction (tree-sitter-svelte + JS/TS re-parse)
828
+ # -----------------------------------------------------------------------------
829
+
830
+
831
+ def _get_svelte_parser() -> Parser:
832
+ """Get a tree-sitter parser for Svelte.
833
+
834
+ Returns:
835
+ Configured tree-sitter Parser.
836
+
837
+ Raises:
838
+ ImportError: If tree-sitter-svelte is not installed.
839
+ """
840
+ try:
841
+ from tree_sitter import Language as TSLanguage
842
+ from tree_sitter import Parser
843
+ except ImportError as e:
844
+ raise ImportError(
845
+ "tree-sitter is required for Svelte scanning. "
846
+ "Install with: uv add tree-sitter tree-sitter-svelte"
847
+ ) from e
848
+
849
+ import tree_sitter_svelte
850
+
851
+ lang = TSLanguage(tree_sitter_svelte.language())
852
+ return Parser(lang)
853
+
854
+
855
+ def _detect_svelte_script_lang(script_element: Node, source: bytes) -> Language:
856
+ """Detect whether a Svelte script block uses TypeScript or JavaScript.
857
+
858
+ Checks for ``lang="ts"`` or ``lang="typescript"`` attribute on the
859
+ ``<script>`` tag.
860
+
861
+ Args:
862
+ script_element: The ``script_element`` node from tree-sitter-svelte.
863
+ source: Source code as bytes.
864
+
865
+ Returns:
866
+ ``"typescript"`` if lang attribute indicates TS, else ``"javascript"``.
867
+ """
868
+ for child in script_element.children:
869
+ if child.type != "start_tag":
870
+ continue
871
+ for attr in child.children:
872
+ if attr.type != "attribute":
873
+ continue
874
+ attr_name: Node | None = None
875
+ attr_value: Node | None = None
876
+ for part in attr.children:
877
+ if part.type == "attribute_name":
878
+ attr_name = part
879
+ elif part.type == "quoted_attribute_value":
880
+ attr_value = part
881
+ if attr_name is None or attr_value is None:
882
+ continue
883
+ name_text = source[attr_name.start_byte : attr_name.end_byte].decode(
884
+ "utf-8"
885
+ )
886
+ if name_text != "lang":
887
+ continue
888
+ # Extract value from quoted_attribute_value → attribute_value
889
+ for val_child in attr_value.children:
890
+ if val_child.type == "attribute_value":
891
+ val_text = source[val_child.start_byte : val_child.end_byte].decode(
892
+ "utf-8"
893
+ )
894
+ if val_text in ("ts", "typescript"):
895
+ return "typescript"
896
+ return "javascript"
897
+
898
+
899
+ def extract_svelte_symbols(source: str, file_path: str) -> list[Symbol]:
900
+ """Extract symbols from Svelte source code.
901
+
902
+ Uses a two-pass approach:
903
+ 1. Parse with tree-sitter-svelte to find ``<script>`` blocks
904
+ 2. Re-parse script content with JS or TS tree-sitter parser
905
+
906
+ Each ``.svelte`` file is also registered as a ``"component"`` symbol.
907
+
908
+ Args:
909
+ source: Svelte source code as string.
910
+ file_path: Path to the source file (for metadata).
911
+
912
+ Returns:
913
+ List of Symbol objects.
914
+
915
+ Examples:
916
+ >>> source = '''
917
+ ... <script>
918
+ ... function hello() {}
919
+ ... </script>
920
+ ... '''
921
+ >>> symbols = extract_svelte_symbols(source, "App.svelte")
922
+ >>> symbols[0].kind
923
+ 'component'
924
+ """
925
+ source_bytes = source.encode("utf-8")
926
+ component_name = Path(file_path).stem
927
+
928
+ symbols: list[Symbol] = [
929
+ Symbol(
930
+ name=component_name,
931
+ kind="component",
932
+ file=file_path,
933
+ line=1,
934
+ signature=f"component {component_name}",
935
+ )
936
+ ]
937
+
938
+ svelte_parser = _get_svelte_parser()
939
+ svelte_tree = svelte_parser.parse(source_bytes)
940
+ root = svelte_tree.root_node
941
+
942
+ for script_el in root.children:
943
+ if script_el.type != "script_element":
944
+ continue
945
+
946
+ # Detect lang="ts" on this specific script element
947
+ script_lang = _detect_svelte_script_lang(script_el, source_bytes)
948
+
949
+ # Find raw_text content
950
+ raw_text_node: Node | None = None
951
+ for sub in script_el.children:
952
+ if sub.type == "raw_text":
953
+ raw_text_node = sub
954
+ break
955
+ if raw_text_node is None:
956
+ continue
957
+
958
+ content = source_bytes[raw_text_node.start_byte : raw_text_node.end_byte]
959
+ if not content.strip():
960
+ continue
961
+
962
+ # Line offset: raw_text starts on the line after <script>
963
+ line_offset = raw_text_node.start_point[0]
964
+
965
+ # Parse script content with JS or TS parser
966
+ js_parser = _get_ts_parser(script_lang, file_path=file_path)
967
+ js_tree = js_parser.parse(content)
968
+ script_symbols = _extract_ts_js_symbols(js_tree.root_node, content, file_path)
969
+
970
+ # Adjust line numbers by offset
971
+ for sym in script_symbols:
972
+ sym_with_offset = Symbol(
973
+ name=sym.name,
974
+ kind=sym.kind,
975
+ file=sym.file,
976
+ line=sym.line + line_offset,
977
+ signature=sym.signature,
978
+ docstring=sym.docstring,
979
+ parent=sym.parent,
980
+ )
981
+ symbols.append(sym_with_offset)
982
+
983
+ return symbols
984
+
985
+
986
+ # -----------------------------------------------------------------------------
987
+ # C# Extraction (tree-sitter-c-sharp)
988
+ # -----------------------------------------------------------------------------
989
+
990
+
991
+ def _get_csharp_parser() -> Parser:
992
+ """Get a tree-sitter parser for C#.
993
+
994
+ Returns:
995
+ Configured tree-sitter Parser.
996
+
997
+ Raises:
998
+ ImportError: If tree-sitter-c-sharp is not installed.
999
+ """
1000
+ try:
1001
+ from tree_sitter import Language as TSLanguage
1002
+ from tree_sitter import Parser
1003
+ except ImportError as e:
1004
+ raise ImportError(
1005
+ "tree-sitter is required for C# scanning. "
1006
+ "Install with: uv add tree-sitter tree-sitter-c-sharp"
1007
+ ) from e
1008
+
1009
+ import tree_sitter_c_sharp
1010
+
1011
+ lang = TSLanguage(tree_sitter_c_sharp.language())
1012
+ return Parser(lang)
1013
+
1014
+
1015
+ def _extract_csharp_signature(node: Node, source: bytes) -> str:
1016
+ """Extract a signature from a C# AST node."""
1017
+ node_type = node.type
1018
+
1019
+ if node_type == "class_declaration":
1020
+ name_node = _find_child_by_type(node, "identifier")
1021
+ name = _get_node_text(name_node, source) if name_node else "unknown"
1022
+ base_list = _find_child_by_type(node, "base_list")
1023
+ if base_list:
1024
+ return f"class {name} {_get_node_text(base_list, source)}"
1025
+ return f"class {name}"
1026
+
1027
+ elif node_type == "interface_declaration":
1028
+ name_node = _find_child_by_type(node, "identifier")
1029
+ name = _get_node_text(name_node, source) if name_node else "unknown"
1030
+ return f"interface {name}"
1031
+
1032
+ elif node_type == "struct_declaration":
1033
+ name_node = _find_child_by_type(node, "identifier")
1034
+ name = _get_node_text(name_node, source) if name_node else "unknown"
1035
+ base_list = _find_child_by_type(node, "base_list")
1036
+ if base_list:
1037
+ return f"struct {name} {_get_node_text(base_list, source)}"
1038
+ return f"struct {name}"
1039
+
1040
+ elif node_type == "record_declaration":
1041
+ name_node = _find_child_by_type(node, "identifier")
1042
+ name = _get_node_text(name_node, source) if name_node else "unknown"
1043
+ return f"record {name}"
1044
+
1045
+ elif node_type == "enum_declaration":
1046
+ name_node = _find_child_by_type(node, "identifier")
1047
+ name = _get_node_text(name_node, source) if name_node else "unknown"
1048
+ return f"enum {name}"
1049
+
1050
+ elif node_type == "method_declaration":
1051
+ parts: list[str] = []
1052
+ for child in node.children:
1053
+ if child.type == "modifier":
1054
+ parts.append(_get_node_text(child, source))
1055
+ # Return type
1056
+ ret_type = _find_child_by_type(
1057
+ node, "predefined_type", "identifier", "generic_name", "void_keyword"
1058
+ )
1059
+ if ret_type:
1060
+ parts.append(_get_node_text(ret_type, source))
1061
+ name_node = _find_child_by_type(node, "identifier")
1062
+ if name_node:
1063
+ # Skip if this is the return type identifier we already added
1064
+ name_text = _get_node_text(name_node, source)
1065
+ # Find the method name (last identifier before parameter_list)
1066
+ method_name = name_text
1067
+ for child in node.children:
1068
+ if child.type == "identifier":
1069
+ method_name = _get_node_text(child, source)
1070
+ elif child.type == "parameter_list":
1071
+ break
1072
+ parts.append(method_name)
1073
+ params_node = _find_child_by_type(node, "parameter_list")
1074
+ if params_node:
1075
+ parts.append(_get_node_text(params_node, source))
1076
+ return " ".join(parts)
1077
+
1078
+ elif node_type == "property_declaration":
1079
+ parts_p: list[str] = []
1080
+ for child in node.children:
1081
+ if child.type == "modifier":
1082
+ parts_p.append(_get_node_text(child, source))
1083
+ ret_type = _find_child_by_type(
1084
+ node, "predefined_type", "identifier", "generic_name"
1085
+ )
1086
+ if ret_type:
1087
+ parts_p.append(_get_node_text(ret_type, source))
1088
+ name_node = _find_child_by_type(node, "identifier")
1089
+ if name_node:
1090
+ parts_p.append(_get_node_text(name_node, source))
1091
+ return " ".join(parts_p)
1092
+
1093
+ return ""
1094
+
1095
+
1096
+ def _extract_csharp_symbols_from_tree(
1097
+ root: Node,
1098
+ source: bytes,
1099
+ file_path: str,
1100
+ ) -> list[Symbol]:
1101
+ """Extract symbols from a C# tree-sitter parse tree.
1102
+
1103
+ Walks the AST and extracts classes, interfaces, structs, records,
1104
+ enums, methods, and properties. Tracks namespace for qualified names.
1105
+
1106
+ Args:
1107
+ root: Root node of the tree-sitter parse tree.
1108
+ source: Source code as bytes.
1109
+ file_path: Path to the source file.
1110
+
1111
+ Returns:
1112
+ List of Symbol objects.
1113
+ """
1114
+ symbols: list[Symbol] = []
1115
+ namespace = ""
1116
+
1117
+ container_types = {
1118
+ "class_declaration",
1119
+ "interface_declaration",
1120
+ "struct_declaration",
1121
+ "record_declaration",
1122
+ }
1123
+
1124
+ def _get_name(node: Node) -> str:
1125
+ name_node = _find_child_by_type(node, "identifier")
1126
+ return _get_node_text(name_node, source) if name_node else "unknown"
1127
+
1128
+ def walk(node: Node, parent_name: str | None = None) -> None:
1129
+ nonlocal namespace
1130
+ node_type = node.type
1131
+
1132
+ if node_type == "namespace_declaration":
1133
+ ns_node = _find_child_by_type(node, "qualified_name", "identifier")
1134
+ if ns_node:
1135
+ namespace = _get_node_text(ns_node, source)
1136
+ body = _find_child_by_type(node, "declaration_list")
1137
+ if body:
1138
+ for child in body.children:
1139
+ walk(child, parent_name)
1140
+ return
1141
+
1142
+ if node_type in container_types:
1143
+ local_name = _get_name(node)
1144
+
1145
+ kind: SymbolKind = "class"
1146
+ if node_type == "interface_declaration":
1147
+ kind = "interface"
1148
+
1149
+ symbols.append(
1150
+ Symbol(
1151
+ name=local_name,
1152
+ kind=kind,
1153
+ file=file_path,
1154
+ line=node.start_point[0] + 1,
1155
+ signature=_extract_csharp_signature(node, source),
1156
+ )
1157
+ )
1158
+
1159
+ body = _find_child_by_type(node, "declaration_list")
1160
+ if body:
1161
+ for child in body.children:
1162
+ walk(child, parent_name=local_name)
1163
+ return
1164
+
1165
+ if node_type == "method_declaration" and parent_name is not None:
1166
+ # Find the method name — last identifier before parameter_list
1167
+ method_name = "unknown"
1168
+ for child in node.children:
1169
+ if child.type == "identifier":
1170
+ method_name = _get_node_text(child, source)
1171
+ elif child.type == "parameter_list":
1172
+ break
1173
+
1174
+ symbols.append(
1175
+ Symbol(
1176
+ name=method_name,
1177
+ kind="method",
1178
+ file=file_path,
1179
+ line=node.start_point[0] + 1,
1180
+ signature=_extract_csharp_signature(node, source),
1181
+ parent=parent_name,
1182
+ )
1183
+ )
1184
+ return
1185
+
1186
+ if node_type == "property_declaration" and parent_name is not None:
1187
+ local_name = _get_name(node)
1188
+ symbols.append(
1189
+ Symbol(
1190
+ name=local_name,
1191
+ kind="method",
1192
+ file=file_path,
1193
+ line=node.start_point[0] + 1,
1194
+ signature=_extract_csharp_signature(node, source),
1195
+ parent=parent_name,
1196
+ )
1197
+ )
1198
+ return
1199
+
1200
+ if node_type == "enum_declaration":
1201
+ local_name = _get_name(node)
1202
+ symbols.append(
1203
+ Symbol(
1204
+ name=local_name,
1205
+ kind="enum",
1206
+ file=file_path,
1207
+ line=node.start_point[0] + 1,
1208
+ signature=_extract_csharp_signature(node, source),
1209
+ )
1210
+ )
1211
+ return
1212
+
1213
+ for child in node.children:
1214
+ walk(child, parent_name)
1215
+
1216
+ walk(root)
1217
+ return symbols
1218
+
1219
+
1220
+ def extract_csharp_symbols(source: str, file_path: str) -> list[Symbol]:
1221
+ """Extract symbols from C# source code.
1222
+
1223
+ Uses tree-sitter to parse C# and extract classes, interfaces,
1224
+ structs, records, enums, methods, and properties.
1225
+
1226
+ Args:
1227
+ source: C# source code as string.
1228
+ file_path: Path to the source file (for metadata).
1229
+
1230
+ Returns:
1231
+ List of Symbol objects.
1232
+
1233
+ Examples:
1234
+ >>> source = '''
1235
+ ... public class UserService {
1236
+ ... public void Process() { }
1237
+ ... }
1238
+ ... '''
1239
+ >>> symbols = extract_csharp_symbols(source, "UserService.cs")
1240
+ >>> symbols[0].kind
1241
+ 'class'
1242
+ """
1243
+ parser = _get_csharp_parser()
1244
+ source_bytes = source.encode("utf-8")
1245
+ tree = parser.parse(source_bytes)
1246
+
1247
+ return _extract_csharp_symbols_from_tree(tree.root_node, source_bytes, file_path)
1248
+
1249
+
1250
+ # ── Dart / Flutter ───────────────────────────────────────────────────────
1251
+
1252
+
1253
+ def _get_dart_parser() -> Parser:
1254
+ """Create a tree-sitter parser for Dart.
1255
+
1256
+ Uses tree-sitter-language-pack since no standalone tree-sitter-dart
1257
+ package exists on PyPI.
1258
+
1259
+ Returns:
1260
+ Configured tree-sitter Parser for Dart.
1261
+
1262
+ Raises:
1263
+ ImportError: If tree-sitter-language-pack is not installed.
1264
+ """
1265
+ try:
1266
+ from tree_sitter_language_pack import get_parser
1267
+ except ImportError as e:
1268
+ raise ImportError(
1269
+ "tree-sitter-language-pack is required for Dart scanning. "
1270
+ "Install with: uv add tree-sitter-language-pack"
1271
+ ) from e
1272
+
1273
+ return get_parser("dart")
1274
+
1275
+
1276
+ def _extract_dart_signature(node: Node, source: bytes) -> str:
1277
+ """Extract a signature from a Dart AST node."""
1278
+ node_type = node.type
1279
+
1280
+ if node_type == "class_definition":
1281
+ name_node = _find_child_by_type(node, "identifier")
1282
+ name = _get_node_text(name_node, source) if name_node else "unknown"
1283
+ # Check for abstract modifier
1284
+ abstract_node = _find_child_by_type(node, "abstract")
1285
+ prefix = "abstract class" if abstract_node else "class"
1286
+ superclass = _find_child_by_type(node, "superclass")
1287
+ if superclass:
1288
+ return f"{prefix} {name} {_get_node_text(superclass, source)}"
1289
+ return f"{prefix} {name}"
1290
+
1291
+ elif node_type == "mixin_declaration":
1292
+ name_node = _find_child_by_type(node, "identifier")
1293
+ name = _get_node_text(name_node, source) if name_node else "unknown"
1294
+ return f"mixin {name}"
1295
+
1296
+ elif node_type == "extension_declaration":
1297
+ name_node = _find_child_by_type(node, "identifier")
1298
+ name = _get_node_text(name_node, source) if name_node else "unknown"
1299
+ # Find the "on" type
1300
+ type_node = _find_child_by_type(node, "type_identifier")
1301
+ if type_node:
1302
+ return f"extension {name} on {_get_node_text(type_node, source)}"
1303
+ return f"extension {name}"
1304
+
1305
+ elif node_type == "enum_declaration":
1306
+ name_node = _find_child_by_type(node, "identifier")
1307
+ name = _get_node_text(name_node, source) if name_node else "unknown"
1308
+ return f"enum {name}"
1309
+
1310
+ elif node_type == "function_signature" or node_type == "method_signature":
1311
+ return _get_node_text(node, source).strip()
1312
+
1313
+ return ""
1314
+
1315
+
1316
+ def _extract_dart_symbols_from_tree(
1317
+ root: Node,
1318
+ source: bytes,
1319
+ file_path: str,
1320
+ ) -> list[Symbol]:
1321
+ """Extract symbols from a Dart tree-sitter parse tree.
1322
+
1323
+ Walks the AST and extracts classes, mixins, extensions, enums,
1324
+ top-level functions, and methods.
1325
+
1326
+ Args:
1327
+ root: Root node of the tree-sitter parse tree.
1328
+ source: Source code as bytes.
1329
+ file_path: Path to the source file.
1330
+
1331
+ Returns:
1332
+ List of Symbol objects.
1333
+ """
1334
+ symbols: list[Symbol] = []
1335
+
1336
+ container_types = {
1337
+ "class_definition",
1338
+ "mixin_declaration",
1339
+ "extension_declaration",
1340
+ }
1341
+
1342
+ def _get_name(node: Node) -> str:
1343
+ name_node = _find_child_by_type(node, "identifier")
1344
+ return _get_node_text(name_node, source) if name_node else "unknown"
1345
+
1346
+ def walk(node: Node, parent_name: str | None = None) -> None:
1347
+ node_type = node.type
1348
+
1349
+ if node_type in container_types:
1350
+ local_name = _get_name(node)
1351
+
1352
+ kind: SymbolKind = "class"
1353
+ if node_type == "mixin_declaration":
1354
+ kind = "trait"
1355
+
1356
+ symbols.append(
1357
+ Symbol(
1358
+ name=local_name,
1359
+ kind=kind,
1360
+ file=file_path,
1361
+ line=node.start_point[0] + 1,
1362
+ signature=_extract_dart_signature(node, source),
1363
+ )
1364
+ )
1365
+
1366
+ # Walk into body for methods
1367
+ body = _find_child_by_type(node, "class_body", "extension_body")
1368
+ if body:
1369
+ for child in body.children:
1370
+ walk(child, parent_name=local_name)
1371
+ return
1372
+
1373
+ if node_type == "enum_declaration":
1374
+ local_name = _get_name(node)
1375
+ symbols.append(
1376
+ Symbol(
1377
+ name=local_name,
1378
+ kind="enum",
1379
+ file=file_path,
1380
+ line=node.start_point[0] + 1,
1381
+ signature=_extract_dart_signature(node, source),
1382
+ )
1383
+ )
1384
+ return
1385
+
1386
+ # Top-level function: function_signature at program level
1387
+ if node_type == "function_signature" and parent_name is None:
1388
+ local_name = _get_name(node)
1389
+ symbols.append(
1390
+ Symbol(
1391
+ name=local_name,
1392
+ kind="function",
1393
+ file=file_path,
1394
+ line=node.start_point[0] + 1,
1395
+ signature=_extract_dart_signature(node, source),
1396
+ )
1397
+ )
1398
+ return
1399
+
1400
+ # Method inside a container
1401
+ if node_type == "method_signature" and parent_name is not None:
1402
+ # method_signature contains function_signature or getter_signature
1403
+ inner = _find_child_by_type(node, "function_signature", "getter_signature")
1404
+ method_name = _get_name(inner) if inner else _get_name(node)
1405
+
1406
+ symbols.append(
1407
+ Symbol(
1408
+ name=method_name,
1409
+ kind="method",
1410
+ file=file_path,
1411
+ line=node.start_point[0] + 1,
1412
+ signature=_extract_dart_signature(node, source),
1413
+ parent=parent_name,
1414
+ )
1415
+ )
1416
+ return
1417
+
1418
+ for child in node.children:
1419
+ walk(child, parent_name)
1420
+
1421
+ walk(root)
1422
+ return symbols
1423
+
1424
+
1425
+ def extract_dart_symbols(source: str, file_path: str) -> list[Symbol]:
1426
+ """Extract symbols from Dart source code.
1427
+
1428
+ Uses tree-sitter to parse Dart and extract classes, mixins,
1429
+ extensions, enums, top-level functions, and methods.
1430
+
1431
+ Args:
1432
+ source: Dart source code as string.
1433
+ file_path: Path to the source file (for metadata).
1434
+
1435
+ Returns:
1436
+ List of Symbol objects.
1437
+
1438
+ Examples:
1439
+ >>> source = '''
1440
+ ... class UserService {
1441
+ ... void process() {}
1442
+ ... }
1443
+ ... '''
1444
+ >>> symbols = extract_dart_symbols(source, "user_service.dart")
1445
+ >>> symbols[0].kind
1446
+ 'class'
1447
+ """
1448
+ parser = _get_dart_parser()
1449
+ source_bytes = source.encode("utf-8")
1450
+ tree = parser.parse(source_bytes)
1451
+
1452
+ return _extract_dart_symbols_from_tree(tree.root_node, source_bytes, file_path)
1453
+
1454
+
1455
+ def detect_language(file_path: str | Path) -> Language | None:
1456
+ """Detect language from file extension.
1457
+
1458
+ Args:
1459
+ file_path: Path to the file.
1460
+
1461
+ Returns:
1462
+ Language literal or None if not supported.
1463
+
1464
+ Examples:
1465
+ >>> detect_language("foo.py")
1466
+ 'python'
1467
+ >>> detect_language("bar.ts")
1468
+ 'typescript'
1469
+ >>> detect_language("baz.rs") # Returns None
1470
+ """
1471
+ ext = Path(file_path).suffix.lower()
1472
+ return EXTENSION_TO_LANGUAGE.get(ext)
1473
+
1474
+
1475
+ def extract_symbols(source: str, file_path: str, language: Language) -> list[Symbol]:
1476
+ """Extract symbols from source code in any supported language.
1477
+
1478
+ Args:
1479
+ source: Source code as string.
1480
+ file_path: Path to the source file (for metadata).
1481
+ language: Language of the source code.
1482
+
1483
+ Returns:
1484
+ List of Symbol objects.
1485
+
1486
+ Raises:
1487
+ ValueError: If language is not supported.
1488
+ SyntaxError: If source code cannot be parsed (Python only).
1489
+
1490
+ Examples:
1491
+ >>> symbols = extract_symbols("class Foo: pass", "foo.py", "python")
1492
+ >>> symbols[0].kind
1493
+ 'class'
1494
+ """
1495
+ if language == "python":
1496
+ return extract_python_symbols(source, file_path)
1497
+ elif language == "typescript":
1498
+ return extract_typescript_symbols(source, file_path)
1499
+ elif language == "javascript":
1500
+ return extract_javascript_symbols(source, file_path)
1501
+ elif language == "php":
1502
+ return extract_php_symbols(source, file_path)
1503
+ elif language == "svelte":
1504
+ return extract_svelte_symbols(source, file_path)
1505
+ elif language == "csharp":
1506
+ return extract_csharp_symbols(source, file_path)
1507
+ elif language == "dart":
1508
+ return extract_dart_symbols(source, file_path)
1509
+ else:
1510
+ raise ValueError(f"Unsupported language: {language}")
1511
+
1512
+
1513
+ # Default patterns to exclude when scanning directories
1514
+ DEFAULT_EXCLUDE_PATTERNS: list[str] = [
1515
+ "**/__pycache__/**",
1516
+ "**/.venv/**",
1517
+ "**/venv/**",
1518
+ "**/node_modules/**",
1519
+ "**/vendor/**",
1520
+ "**/dist/**",
1521
+ "**/build/**",
1522
+ "**/.git/**",
1523
+ "**/*.blade.php",
1524
+ "*.Designer.cs",
1525
+ ]
1526
+
1527
+ # Language-specific default glob patterns (list to support multiple extensions)
1528
+ DEFAULT_LANGUAGE_PATTERNS: dict[Language | None, list[str]] = {
1529
+ "python": ["**/*.py"],
1530
+ "typescript": ["**/*.ts", "**/*.tsx"],
1531
+ "javascript": ["**/*.js", "**/*.jsx", "**/*.mjs", "**/*.cjs"],
1532
+ "php": ["**/*.php"],
1533
+ "svelte": ["**/*.svelte"],
1534
+ "csharp": ["**/*.cs"],
1535
+ "dart": ["**/*.dart"],
1536
+ None: ["**/*"], # Auto-detect: scan all files
1537
+ }
1538
+
1539
+
1540
+ def _read_gitignore(root: Path) -> list[str]:
1541
+ """Read .gitignore from *root* and convert entries to glob patterns.
1542
+
1543
+ Only handles simple .gitignore entries (directory names, file globs).
1544
+ Negation patterns (``!``) and anchored paths are ignored — they cover
1545
+ edge cases that don't affect typical exclude behaviour.
1546
+ """
1547
+ gitignore = root / ".gitignore"
1548
+ if not gitignore.is_file():
1549
+ return []
1550
+
1551
+ patterns: list[str] = []
1552
+ try:
1553
+ for raw_line in gitignore.read_text(encoding="utf-8").splitlines():
1554
+ line = raw_line.strip()
1555
+ # Skip blanks, comments, negation
1556
+ if not line or line.startswith("#") or line.startswith("!"):
1557
+ continue
1558
+ # Strip trailing slash (directory marker)
1559
+ entry = line.rstrip("/")
1560
+ # Convert to glob: wrap bare names with **/ so they match anywhere
1561
+ if "/" not in entry:
1562
+ patterns.append(f"**/{entry}/**")
1563
+ else:
1564
+ patterns.append(f"**/{entry}/**")
1565
+ except OSError:
1566
+ return []
1567
+ return patterns
1568
+
1569
+
1570
+ def _is_directory_pattern(pattern: str) -> str | None:
1571
+ """Extract directory name from patterns like ``**/node_modules/**``.
1572
+
1573
+ Returns the bare directory name if the pattern represents a directory
1574
+ exclusion, or None if it's a file-level pattern.
1575
+ """
1576
+ stripped = pattern.strip("*").strip("/")
1577
+ if "/" not in stripped and pattern.endswith("/**"):
1578
+ return stripped
1579
+ return None
1580
+
1581
+
1582
+ def _should_exclude(file_path: Path, exclude_patterns: list[str]) -> bool:
1583
+ """Check if a file should be excluded based on patterns.
1584
+
1585
+ Directory patterns (``**/name/**``) are matched by checking whether
1586
+ the directory name appears anywhere in the path parts. File patterns
1587
+ (``**/test_*``) use ``PurePath.match`` which handles ``**`` correctly
1588
+ for filename globbing.
1589
+ """
1590
+ parts = file_path.parts
1591
+ for pattern in exclude_patterns:
1592
+ dir_name = _is_directory_pattern(pattern)
1593
+ if dir_name is not None:
1594
+ if dir_name in parts:
1595
+ return True
1596
+ else:
1597
+ if file_path.match(pattern):
1598
+ return True
1599
+ return False
1600
+
1601
+
1602
+ def _process_source_file(
1603
+ file_path: Path, rel_str: str, language: Language, result: ScanResult
1604
+ ) -> None:
1605
+ """Extract symbols from a source file and update result."""
1606
+ try:
1607
+ source = file_path.read_text(encoding="utf-8")
1608
+ symbols = extract_symbols(source, rel_str, language)
1609
+ result.symbols.extend(symbols)
1610
+ result.files_scanned += 1
1611
+ except SyntaxError as e:
1612
+ result.errors.append(f"{rel_str}: {e}")
1613
+ except UnicodeDecodeError as e:
1614
+ result.errors.append(f"{rel_str}: {e}")
1615
+ except Exception as e:
1616
+ result.errors.append(f"{rel_str}: {e}")
1617
+
1618
+
1619
+ def scan_directory(
1620
+ path: Path,
1621
+ *,
1622
+ language: Language | None = None,
1623
+ pattern: str | None = None,
1624
+ exclude_patterns: list[str] | None = None,
1625
+ ) -> ScanResult:
1626
+ """Scan a directory for code symbols.
1627
+
1628
+ Recursively walks the directory, extracts symbols from source files,
1629
+ and returns aggregated results. Supports Python, TypeScript, and JavaScript.
1630
+
1631
+ Args:
1632
+ path: Directory path to scan.
1633
+ language: Language to scan for. If None, auto-detects from extensions.
1634
+ pattern: Glob pattern for files. If None, uses language-specific default.
1635
+ exclude_patterns: List of patterns to exclude (e.g., ["**/test_*"]).
1636
+
1637
+ Returns:
1638
+ ScanResult with all extracted symbols.
1639
+
1640
+ Examples:
1641
+ >>> result = scan_directory(Path("src/")) # Auto-detect
1642
+ >>> result = scan_directory(Path("src/"), language="typescript")
1643
+ """
1644
+ if exclude_patterns is None:
1645
+ exclude_patterns = list(DEFAULT_EXCLUDE_PATTERNS)
1646
+
1647
+ # Merge .gitignore patterns when present
1648
+ gitignore_patterns = _read_gitignore(path)
1649
+ if gitignore_patterns:
1650
+ exclude_patterns = list(exclude_patterns) + gitignore_patterns
1651
+
1652
+ # Resolve glob patterns: single pattern string or language-specific defaults
1653
+ if pattern is not None:
1654
+ patterns = [pattern]
1655
+ else:
1656
+ patterns = DEFAULT_LANGUAGE_PATTERNS.get(language, ["**/*"])
1657
+
1658
+ result = ScanResult()
1659
+ root = path.resolve()
1660
+
1661
+ # Collect files from all patterns, dedup by resolved path
1662
+ seen: set[Path] = set()
1663
+ for glob_pattern in patterns:
1664
+ for file_path in path.glob(glob_pattern):
1665
+ if file_path.is_dir():
1666
+ continue
1667
+
1668
+ resolved = file_path.resolve()
1669
+ if resolved in seen:
1670
+ continue
1671
+ seen.add(resolved)
1672
+
1673
+ if _should_exclude(file_path, exclude_patterns):
1674
+ continue
1675
+
1676
+ if file_path.is_relative_to(root):
1677
+ rel_str = portable_path(file_path, root)
1678
+ else:
1679
+ rel_str = file_path.as_posix()
1680
+
1681
+ file_language = language or detect_language(file_path)
1682
+ if file_language is None:
1683
+ continue
1684
+
1685
+ _process_source_file(file_path, rel_str, file_language, result)
1686
+
1687
+ return result