raise-cli 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- raise_cli/__init__.py +38 -0
- raise_cli/__main__.py +30 -0
- raise_cli/adapters/__init__.py +91 -0
- raise_cli/adapters/declarative/__init__.py +26 -0
- raise_cli/adapters/declarative/adapter.py +267 -0
- raise_cli/adapters/declarative/discovery.py +94 -0
- raise_cli/adapters/declarative/expressions.py +150 -0
- raise_cli/adapters/declarative/reference/__init__.py +1 -0
- raise_cli/adapters/declarative/reference/github.yaml +143 -0
- raise_cli/adapters/declarative/schema.py +98 -0
- raise_cli/adapters/filesystem.py +299 -0
- raise_cli/adapters/mcp_bridge.py +10 -0
- raise_cli/adapters/mcp_confluence.py +246 -0
- raise_cli/adapters/mcp_jira.py +405 -0
- raise_cli/adapters/models.py +205 -0
- raise_cli/adapters/protocols.py +180 -0
- raise_cli/adapters/registry.py +90 -0
- raise_cli/adapters/sync.py +149 -0
- raise_cli/agents/__init__.py +14 -0
- raise_cli/agents/antigravity.yaml +8 -0
- raise_cli/agents/claude.yaml +8 -0
- raise_cli/agents/copilot.yaml +8 -0
- raise_cli/agents/copilot_plugin.py +124 -0
- raise_cli/agents/cursor.yaml +7 -0
- raise_cli/agents/roo.yaml +8 -0
- raise_cli/agents/windsurf.yaml +8 -0
- raise_cli/artifacts/__init__.py +30 -0
- raise_cli/artifacts/models.py +43 -0
- raise_cli/artifacts/reader.py +55 -0
- raise_cli/artifacts/renderer.py +104 -0
- raise_cli/artifacts/story_design.py +69 -0
- raise_cli/artifacts/writer.py +45 -0
- raise_cli/backlog/__init__.py +1 -0
- raise_cli/backlog/sync.py +115 -0
- raise_cli/cli/__init__.py +3 -0
- raise_cli/cli/commands/__init__.py +3 -0
- raise_cli/cli/commands/_resolve.py +153 -0
- raise_cli/cli/commands/adapters.py +362 -0
- raise_cli/cli/commands/artifact.py +137 -0
- raise_cli/cli/commands/backlog.py +333 -0
- raise_cli/cli/commands/base.py +31 -0
- raise_cli/cli/commands/discover.py +551 -0
- raise_cli/cli/commands/docs.py +130 -0
- raise_cli/cli/commands/doctor.py +177 -0
- raise_cli/cli/commands/gate.py +223 -0
- raise_cli/cli/commands/graph.py +1086 -0
- raise_cli/cli/commands/info.py +81 -0
- raise_cli/cli/commands/init.py +746 -0
- raise_cli/cli/commands/journal.py +167 -0
- raise_cli/cli/commands/mcp.py +524 -0
- raise_cli/cli/commands/memory.py +467 -0
- raise_cli/cli/commands/pattern.py +348 -0
- raise_cli/cli/commands/profile.py +59 -0
- raise_cli/cli/commands/publish.py +80 -0
- raise_cli/cli/commands/release.py +338 -0
- raise_cli/cli/commands/session.py +528 -0
- raise_cli/cli/commands/signal.py +410 -0
- raise_cli/cli/commands/skill.py +350 -0
- raise_cli/cli/commands/skill_set.py +145 -0
- raise_cli/cli/error_handler.py +158 -0
- raise_cli/cli/main.py +163 -0
- raise_cli/compat.py +66 -0
- raise_cli/config/__init__.py +41 -0
- raise_cli/config/agent_plugin.py +105 -0
- raise_cli/config/agent_registry.py +233 -0
- raise_cli/config/agents.py +120 -0
- raise_cli/config/ide.py +32 -0
- raise_cli/config/paths.py +379 -0
- raise_cli/config/settings.py +180 -0
- raise_cli/context/__init__.py +42 -0
- raise_cli/context/analyzers/__init__.py +16 -0
- raise_cli/context/analyzers/models.py +36 -0
- raise_cli/context/analyzers/protocol.py +43 -0
- raise_cli/context/analyzers/python.py +292 -0
- raise_cli/context/builder.py +1569 -0
- raise_cli/context/diff.py +213 -0
- raise_cli/context/extractors/__init__.py +13 -0
- raise_cli/context/extractors/skills.py +121 -0
- raise_cli/core/__init__.py +37 -0
- raise_cli/core/files.py +66 -0
- raise_cli/core/text.py +174 -0
- raise_cli/core/tools.py +441 -0
- raise_cli/discovery/__init__.py +50 -0
- raise_cli/discovery/analyzer.py +691 -0
- raise_cli/discovery/drift.py +355 -0
- raise_cli/discovery/scanner.py +1687 -0
- raise_cli/doctor/__init__.py +4 -0
- raise_cli/doctor/checks/__init__.py +1 -0
- raise_cli/doctor/checks/environment.py +110 -0
- raise_cli/doctor/checks/project.py +238 -0
- raise_cli/doctor/fix.py +80 -0
- raise_cli/doctor/models.py +56 -0
- raise_cli/doctor/protocol.py +43 -0
- raise_cli/doctor/registry.py +100 -0
- raise_cli/doctor/report.py +141 -0
- raise_cli/doctor/runner.py +95 -0
- raise_cli/engines/__init__.py +3 -0
- raise_cli/exceptions.py +215 -0
- raise_cli/gates/__init__.py +19 -0
- raise_cli/gates/builtin/__init__.py +1 -0
- raise_cli/gates/builtin/coverage.py +52 -0
- raise_cli/gates/builtin/lint.py +48 -0
- raise_cli/gates/builtin/tests.py +48 -0
- raise_cli/gates/builtin/types.py +48 -0
- raise_cli/gates/models.py +40 -0
- raise_cli/gates/protocol.py +41 -0
- raise_cli/gates/registry.py +141 -0
- raise_cli/governance/__init__.py +11 -0
- raise_cli/governance/extractor.py +412 -0
- raise_cli/governance/models.py +134 -0
- raise_cli/governance/parsers/__init__.py +35 -0
- raise_cli/governance/parsers/_convert.py +38 -0
- raise_cli/governance/parsers/adr.py +274 -0
- raise_cli/governance/parsers/backlog.py +356 -0
- raise_cli/governance/parsers/constitution.py +119 -0
- raise_cli/governance/parsers/epic.py +323 -0
- raise_cli/governance/parsers/glossary.py +316 -0
- raise_cli/governance/parsers/guardrails.py +345 -0
- raise_cli/governance/parsers/prd.py +112 -0
- raise_cli/governance/parsers/roadmap.py +118 -0
- raise_cli/governance/parsers/vision.py +116 -0
- raise_cli/graph/__init__.py +1 -0
- raise_cli/graph/backends/__init__.py +57 -0
- raise_cli/graph/backends/api.py +137 -0
- raise_cli/graph/backends/dual.py +139 -0
- raise_cli/graph/backends/pending.py +84 -0
- raise_cli/handlers/__init__.py +3 -0
- raise_cli/hooks/__init__.py +54 -0
- raise_cli/hooks/builtin/__init__.py +1 -0
- raise_cli/hooks/builtin/backlog.py +216 -0
- raise_cli/hooks/builtin/gate_bridge.py +83 -0
- raise_cli/hooks/builtin/jira_sync.py +127 -0
- raise_cli/hooks/builtin/memory.py +117 -0
- raise_cli/hooks/builtin/telemetry.py +72 -0
- raise_cli/hooks/emitter.py +184 -0
- raise_cli/hooks/events.py +262 -0
- raise_cli/hooks/protocol.py +38 -0
- raise_cli/hooks/registry.py +117 -0
- raise_cli/mcp/__init__.py +33 -0
- raise_cli/mcp/bridge.py +218 -0
- raise_cli/mcp/models.py +43 -0
- raise_cli/mcp/registry.py +77 -0
- raise_cli/mcp/schema.py +41 -0
- raise_cli/memory/__init__.py +58 -0
- raise_cli/memory/loader.py +247 -0
- raise_cli/memory/migration.py +241 -0
- raise_cli/memory/models.py +169 -0
- raise_cli/memory/writer.py +598 -0
- raise_cli/onboarding/__init__.py +103 -0
- raise_cli/onboarding/bootstrap.py +324 -0
- raise_cli/onboarding/claudemd.py +17 -0
- raise_cli/onboarding/conventions.py +742 -0
- raise_cli/onboarding/detection.py +374 -0
- raise_cli/onboarding/governance.py +443 -0
- raise_cli/onboarding/instructions.py +672 -0
- raise_cli/onboarding/manifest.py +201 -0
- raise_cli/onboarding/memory_md.py +399 -0
- raise_cli/onboarding/migration.py +207 -0
- raise_cli/onboarding/profile.py +624 -0
- raise_cli/onboarding/skill_conflict.py +100 -0
- raise_cli/onboarding/skill_manifest.py +176 -0
- raise_cli/onboarding/skills.py +437 -0
- raise_cli/onboarding/workflows.py +101 -0
- raise_cli/output/__init__.py +28 -0
- raise_cli/output/console.py +394 -0
- raise_cli/output/formatters/__init__.py +9 -0
- raise_cli/output/formatters/adapters.py +135 -0
- raise_cli/output/formatters/discover.py +439 -0
- raise_cli/output/formatters/skill.py +298 -0
- raise_cli/publish/__init__.py +3 -0
- raise_cli/publish/changelog.py +80 -0
- raise_cli/publish/check.py +179 -0
- raise_cli/publish/version.py +172 -0
- raise_cli/rai_base/__init__.py +22 -0
- raise_cli/rai_base/framework/__init__.py +7 -0
- raise_cli/rai_base/framework/methodology.yaml +233 -0
- raise_cli/rai_base/governance/__init__.py +1 -0
- raise_cli/rai_base/governance/architecture/__init__.py +1 -0
- raise_cli/rai_base/governance/architecture/domain-model.md +20 -0
- raise_cli/rai_base/governance/architecture/system-context.md +34 -0
- raise_cli/rai_base/governance/architecture/system-design.md +24 -0
- raise_cli/rai_base/governance/backlog.md +8 -0
- raise_cli/rai_base/governance/guardrails.md +17 -0
- raise_cli/rai_base/governance/prd.md +25 -0
- raise_cli/rai_base/governance/vision.md +16 -0
- raise_cli/rai_base/identity/__init__.py +8 -0
- raise_cli/rai_base/identity/core.md +119 -0
- raise_cli/rai_base/identity/perspective.md +119 -0
- raise_cli/rai_base/memory/__init__.py +7 -0
- raise_cli/rai_base/memory/patterns-base.jsonl +55 -0
- raise_cli/schemas/__init__.py +3 -0
- raise_cli/schemas/journal.py +49 -0
- raise_cli/schemas/session_state.py +117 -0
- raise_cli/session/__init__.py +5 -0
- raise_cli/session/bundle.py +820 -0
- raise_cli/session/close.py +268 -0
- raise_cli/session/journal.py +119 -0
- raise_cli/session/resolver.py +126 -0
- raise_cli/session/state.py +187 -0
- raise_cli/skills/__init__.py +44 -0
- raise_cli/skills/locator.py +141 -0
- raise_cli/skills/name_checker.py +199 -0
- raise_cli/skills/parser.py +145 -0
- raise_cli/skills/scaffold.py +212 -0
- raise_cli/skills/schema.py +132 -0
- raise_cli/skills/skillsets.py +195 -0
- raise_cli/skills/validator.py +197 -0
- raise_cli/skills_base/__init__.py +80 -0
- raise_cli/skills_base/contract-template.md +60 -0
- raise_cli/skills_base/preamble.md +37 -0
- raise_cli/skills_base/rai-architecture-review/SKILL.md +137 -0
- raise_cli/skills_base/rai-debug/SKILL.md +171 -0
- raise_cli/skills_base/rai-discover/SKILL.md +167 -0
- raise_cli/skills_base/rai-discover-document/SKILL.md +128 -0
- raise_cli/skills_base/rai-discover-scan/SKILL.md +147 -0
- raise_cli/skills_base/rai-discover-start/SKILL.md +145 -0
- raise_cli/skills_base/rai-discover-validate/SKILL.md +142 -0
- raise_cli/skills_base/rai-docs-update/SKILL.md +142 -0
- raise_cli/skills_base/rai-doctor/SKILL.md +120 -0
- raise_cli/skills_base/rai-epic-close/SKILL.md +165 -0
- raise_cli/skills_base/rai-epic-close/templates/retrospective.md +68 -0
- raise_cli/skills_base/rai-epic-design/SKILL.md +146 -0
- raise_cli/skills_base/rai-epic-design/templates/design.md +24 -0
- raise_cli/skills_base/rai-epic-design/templates/scope.md +76 -0
- raise_cli/skills_base/rai-epic-plan/SKILL.md +153 -0
- raise_cli/skills_base/rai-epic-plan/_references/sequencing-strategies.md +67 -0
- raise_cli/skills_base/rai-epic-plan/templates/plan-section.md +49 -0
- raise_cli/skills_base/rai-epic-run/SKILL.md +208 -0
- raise_cli/skills_base/rai-epic-start/SKILL.md +136 -0
- raise_cli/skills_base/rai-epic-start/templates/brief.md +34 -0
- raise_cli/skills_base/rai-mcp-add/SKILL.md +176 -0
- raise_cli/skills_base/rai-mcp-remove/SKILL.md +120 -0
- raise_cli/skills_base/rai-mcp-status/SKILL.md +147 -0
- raise_cli/skills_base/rai-problem-shape/SKILL.md +138 -0
- raise_cli/skills_base/rai-project-create/SKILL.md +144 -0
- raise_cli/skills_base/rai-project-onboard/SKILL.md +162 -0
- raise_cli/skills_base/rai-quality-review/SKILL.md +189 -0
- raise_cli/skills_base/rai-research/SKILL.md +143 -0
- raise_cli/skills_base/rai-research/references/research-prompt-template.md +317 -0
- raise_cli/skills_base/rai-session-close/SKILL.md +176 -0
- raise_cli/skills_base/rai-session-start/SKILL.md +110 -0
- raise_cli/skills_base/rai-story-close/SKILL.md +198 -0
- raise_cli/skills_base/rai-story-design/SKILL.md +203 -0
- raise_cli/skills_base/rai-story-design/references/tech-design-story-v2.md +293 -0
- raise_cli/skills_base/rai-story-implement/SKILL.md +115 -0
- raise_cli/skills_base/rai-story-plan/SKILL.md +135 -0
- raise_cli/skills_base/rai-story-review/SKILL.md +178 -0
- raise_cli/skills_base/rai-story-run/SKILL.md +282 -0
- raise_cli/skills_base/rai-story-start/SKILL.md +166 -0
- raise_cli/skills_base/rai-story-start/templates/story.md +38 -0
- raise_cli/skills_base/rai-welcome/SKILL.md +134 -0
- raise_cli/telemetry/__init__.py +42 -0
- raise_cli/telemetry/schemas.py +285 -0
- raise_cli/telemetry/writer.py +217 -0
- raise_cli/tier/__init__.py +0 -0
- raise_cli/tier/context.py +134 -0
- raise_cli/viz/__init__.py +7 -0
- raise_cli/viz/generator.py +406 -0
- raise_cli-2.2.1.dist-info/METADATA +433 -0
- raise_cli-2.2.1.dist-info/RECORD +264 -0
- raise_cli-2.2.1.dist-info/WHEEL +4 -0
- raise_cli-2.2.1.dist-info/entry_points.txt +40 -0
- raise_cli-2.2.1.dist-info/licenses/LICENSE +190 -0
- raise_cli-2.2.1.dist-info/licenses/NOTICE +4 -0
|
@@ -0,0 +1,1687 @@
|
|
|
1
|
+
"""Code scanner for symbol extraction.
|
|
2
|
+
|
|
3
|
+
This module extracts classes, functions, and module-level information
|
|
4
|
+
from source files. Supports:
|
|
5
|
+
- Python (via built-in ast module)
|
|
6
|
+
- TypeScript/JavaScript (via tree-sitter)
|
|
7
|
+
|
|
8
|
+
Example:
|
|
9
|
+
>>> from raise_cli.discovery.scanner import extract_python_symbols
|
|
10
|
+
>>> symbols = extract_python_symbols("class Foo: pass", "example.py")
|
|
11
|
+
>>> symbols[0].name
|
|
12
|
+
'Foo'
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import ast
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import TYPE_CHECKING, Literal
|
|
20
|
+
|
|
21
|
+
from pydantic import BaseModel, Field
|
|
22
|
+
|
|
23
|
+
from raise_cli.compat import portable_path
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from tree_sitter import Node, Parser
|
|
27
|
+
|
|
28
|
+
# Symbol kinds that can be extracted
|
|
29
|
+
SymbolKind = Literal[
|
|
30
|
+
"class",
|
|
31
|
+
"function",
|
|
32
|
+
"method",
|
|
33
|
+
"module",
|
|
34
|
+
"interface",
|
|
35
|
+
"enum",
|
|
36
|
+
"type_alias",
|
|
37
|
+
"constant",
|
|
38
|
+
"trait",
|
|
39
|
+
"component",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
# Supported languages for scanning
|
|
43
|
+
Language = Literal[
|
|
44
|
+
"python", "typescript", "javascript", "php", "svelte", "csharp", "dart"
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
# File extension to language mapping
|
|
48
|
+
EXTENSION_TO_LANGUAGE: dict[str, Language] = {
|
|
49
|
+
".py": "python",
|
|
50
|
+
".ts": "typescript",
|
|
51
|
+
".tsx": "typescript",
|
|
52
|
+
".js": "javascript",
|
|
53
|
+
".jsx": "javascript",
|
|
54
|
+
".mjs": "javascript",
|
|
55
|
+
".cjs": "javascript",
|
|
56
|
+
".php": "php",
|
|
57
|
+
".svelte": "svelte",
|
|
58
|
+
".cs": "csharp",
|
|
59
|
+
".dart": "dart",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Symbol(BaseModel):
|
|
64
|
+
"""A code symbol extracted from source.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
name: Symbol name (e.g., "UserService", "get_user").
|
|
68
|
+
kind: Symbol type (class, function, method, module).
|
|
69
|
+
file: Relative path to source file.
|
|
70
|
+
line: Line number where symbol is defined (1-indexed).
|
|
71
|
+
signature: Full signature (e.g., "class UserService(BaseService)").
|
|
72
|
+
docstring: Symbol's docstring if present.
|
|
73
|
+
parent: Parent symbol name for methods (e.g., class name).
|
|
74
|
+
|
|
75
|
+
Examples:
|
|
76
|
+
>>> symbol = Symbol(
|
|
77
|
+
... name="UserService",
|
|
78
|
+
... kind="class",
|
|
79
|
+
... file="src/services/user.py",
|
|
80
|
+
... line=15,
|
|
81
|
+
... signature="class UserService(BaseService)",
|
|
82
|
+
... )
|
|
83
|
+
>>> symbol.name
|
|
84
|
+
'UserService'
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
name: str = Field(..., description="Symbol name")
|
|
88
|
+
kind: SymbolKind = Field(..., description="Symbol type")
|
|
89
|
+
file: str = Field(..., description="Relative path to source file")
|
|
90
|
+
line: int = Field(..., description="Line number (1-indexed)")
|
|
91
|
+
signature: str = Field(default="", description="Full signature")
|
|
92
|
+
docstring: str | None = Field(default=None, description="Symbol docstring")
|
|
93
|
+
parent: str | None = Field(default=None, description="Parent symbol name")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class ScanResult(BaseModel):
|
|
97
|
+
"""Result of scanning a directory or file.
|
|
98
|
+
|
|
99
|
+
Attributes:
|
|
100
|
+
symbols: List of extracted symbols.
|
|
101
|
+
files_scanned: Number of files processed.
|
|
102
|
+
errors: List of files that failed to parse.
|
|
103
|
+
|
|
104
|
+
Examples:
|
|
105
|
+
>>> result = ScanResult(symbols=[], files_scanned=5, errors=[])
|
|
106
|
+
>>> result.files_scanned
|
|
107
|
+
5
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
symbols: list[Symbol] = Field(default_factory=list) # pyright: ignore[reportUnknownVariableType]
|
|
111
|
+
files_scanned: int = Field(default=0)
|
|
112
|
+
errors: list[str] = Field(default_factory=list) # pyright: ignore[reportUnknownVariableType]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _get_signature(node: ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
|
116
|
+
"""Extract signature from an AST node.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
node: AST node for class or function definition.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Signature string (e.g., "class Foo(Bar)" or "def func(a, b)").
|
|
123
|
+
"""
|
|
124
|
+
if isinstance(node, ast.ClassDef):
|
|
125
|
+
bases = ", ".join(ast.unparse(base) for base in node.bases)
|
|
126
|
+
if bases:
|
|
127
|
+
return f"class {node.name}({bases})"
|
|
128
|
+
return f"class {node.name}"
|
|
129
|
+
|
|
130
|
+
# FunctionDef or AsyncFunctionDef
|
|
131
|
+
args_str = ast.unparse(node.args)
|
|
132
|
+
prefix = "async def" if isinstance(node, ast.AsyncFunctionDef) else "def"
|
|
133
|
+
|
|
134
|
+
if node.returns:
|
|
135
|
+
return_annotation = ast.unparse(node.returns)
|
|
136
|
+
return f"{prefix} {node.name}({args_str}) -> {return_annotation}"
|
|
137
|
+
return f"{prefix} {node.name}({args_str})"
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _extract_module_symbol(tree: ast.Module, file_path: str) -> Symbol | None:
|
|
141
|
+
"""Extract module-level symbol if docstring exists."""
|
|
142
|
+
module_docstring = ast.get_docstring(tree)
|
|
143
|
+
if not module_docstring:
|
|
144
|
+
return None
|
|
145
|
+
return Symbol(
|
|
146
|
+
name=Path(file_path).stem,
|
|
147
|
+
kind="module",
|
|
148
|
+
file=file_path,
|
|
149
|
+
line=1,
|
|
150
|
+
signature=f"module {Path(file_path).stem}",
|
|
151
|
+
docstring=module_docstring,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _extract_class_symbols(node: ast.ClassDef, file_path: str) -> list[Symbol]:
|
|
156
|
+
"""Extract class and its methods as symbols."""
|
|
157
|
+
symbols: list[Symbol] = [
|
|
158
|
+
Symbol(
|
|
159
|
+
name=node.name,
|
|
160
|
+
kind="class",
|
|
161
|
+
file=file_path,
|
|
162
|
+
line=node.lineno,
|
|
163
|
+
signature=_get_signature(node),
|
|
164
|
+
docstring=ast.get_docstring(node),
|
|
165
|
+
)
|
|
166
|
+
]
|
|
167
|
+
for item in node.body:
|
|
168
|
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
169
|
+
symbols.append(
|
|
170
|
+
Symbol(
|
|
171
|
+
name=item.name,
|
|
172
|
+
kind="method",
|
|
173
|
+
file=file_path,
|
|
174
|
+
line=item.lineno,
|
|
175
|
+
signature=_get_signature(item),
|
|
176
|
+
docstring=ast.get_docstring(item),
|
|
177
|
+
parent=node.name,
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
return symbols
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def extract_python_symbols(source: str, file_path: str) -> list[Symbol]:
|
|
184
|
+
"""Extract symbols from Python source code.
|
|
185
|
+
|
|
186
|
+
Parses the source code and extracts all classes, functions, and methods
|
|
187
|
+
with their signatures and docstrings.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
source: Python source code as string.
|
|
191
|
+
file_path: Path to the source file (for metadata).
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
List of Symbol objects.
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
SyntaxError: If source code cannot be parsed.
|
|
198
|
+
|
|
199
|
+
Examples:
|
|
200
|
+
>>> source = '''
|
|
201
|
+
... class MyClass:
|
|
202
|
+
... \"\"\"A sample class.\"\"\"
|
|
203
|
+
... def method(self):
|
|
204
|
+
... pass
|
|
205
|
+
... '''
|
|
206
|
+
>>> symbols = extract_python_symbols(source, "example.py")
|
|
207
|
+
>>> len(symbols)
|
|
208
|
+
2
|
|
209
|
+
>>> symbols[0].kind
|
|
210
|
+
'class'
|
|
211
|
+
"""
|
|
212
|
+
tree = ast.parse(source)
|
|
213
|
+
symbols: list[Symbol] = []
|
|
214
|
+
|
|
215
|
+
# Module docstring
|
|
216
|
+
module_symbol = _extract_module_symbol(tree, file_path)
|
|
217
|
+
if module_symbol:
|
|
218
|
+
symbols.append(module_symbol)
|
|
219
|
+
|
|
220
|
+
# Classes and their methods
|
|
221
|
+
for node in ast.walk(tree):
|
|
222
|
+
if isinstance(node, ast.ClassDef):
|
|
223
|
+
symbols.extend(_extract_class_symbols(node, file_path))
|
|
224
|
+
|
|
225
|
+
# Top-level functions (separate pass to avoid duplicates with methods)
|
|
226
|
+
for node in ast.iter_child_nodes(tree):
|
|
227
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
228
|
+
symbols.append(
|
|
229
|
+
Symbol(
|
|
230
|
+
name=node.name,
|
|
231
|
+
kind="function",
|
|
232
|
+
file=file_path,
|
|
233
|
+
line=node.lineno,
|
|
234
|
+
signature=_get_signature(node),
|
|
235
|
+
docstring=ast.get_docstring(node),
|
|
236
|
+
)
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
return symbols
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# -----------------------------------------------------------------------------
|
|
243
|
+
# TypeScript/JavaScript Extraction (tree-sitter)
|
|
244
|
+
# -----------------------------------------------------------------------------
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _get_ts_parser(language: Language, *, file_path: str = "") -> Parser:
|
|
248
|
+
"""Get a tree-sitter parser for TypeScript or JavaScript.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
language: Either "typescript" or "javascript".
|
|
252
|
+
file_path: File path (used to dispatch .tsx to TSX parser).
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Configured tree-sitter Parser.
|
|
256
|
+
|
|
257
|
+
Raises:
|
|
258
|
+
ImportError: If tree-sitter packages are not installed.
|
|
259
|
+
"""
|
|
260
|
+
try:
|
|
261
|
+
from tree_sitter import Language as TSLanguage
|
|
262
|
+
from tree_sitter import Parser
|
|
263
|
+
except ImportError as e:
|
|
264
|
+
raise ImportError(
|
|
265
|
+
"tree-sitter is required for TypeScript/JavaScript scanning. "
|
|
266
|
+
"Install with: uv add tree-sitter tree-sitter-typescript tree-sitter-javascript"
|
|
267
|
+
) from e
|
|
268
|
+
|
|
269
|
+
if language == "typescript":
|
|
270
|
+
import tree_sitter_typescript as ts_typescript
|
|
271
|
+
|
|
272
|
+
is_tsx = file_path.endswith(".tsx")
|
|
273
|
+
if is_tsx:
|
|
274
|
+
lang = TSLanguage(ts_typescript.language_tsx())
|
|
275
|
+
else:
|
|
276
|
+
lang = TSLanguage(ts_typescript.language_typescript())
|
|
277
|
+
else:
|
|
278
|
+
import tree_sitter_javascript as ts_javascript
|
|
279
|
+
|
|
280
|
+
lang = TSLanguage(ts_javascript.language())
|
|
281
|
+
|
|
282
|
+
return Parser(lang)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _find_child_by_type(node: Node, *types: str) -> Node | None:
|
|
286
|
+
"""Find the first child node matching any of the given types."""
|
|
287
|
+
for child in node.children:
|
|
288
|
+
if child.type in types:
|
|
289
|
+
return child
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _get_node_text(node: Node, source: bytes) -> str:
|
|
294
|
+
"""Get the text content of a tree-sitter node."""
|
|
295
|
+
return source[node.start_byte : node.end_byte].decode("utf-8")
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _extract_ts_signature(node: Node, source: bytes) -> str:
|
|
299
|
+
"""Extract a signature from a TypeScript/JavaScript AST node."""
|
|
300
|
+
node_type = node.type
|
|
301
|
+
|
|
302
|
+
if node_type == "class_declaration":
|
|
303
|
+
name_node = _find_child_by_type(node, "type_identifier", "identifier")
|
|
304
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
305
|
+
# Check for extends
|
|
306
|
+
heritage = _find_child_by_type(node, "class_heritage")
|
|
307
|
+
if heritage:
|
|
308
|
+
return f"class {name} {_get_node_text(heritage, source)}"
|
|
309
|
+
return f"class {name}"
|
|
310
|
+
|
|
311
|
+
elif node_type in ("function_declaration", "generator_function_declaration"):
|
|
312
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
313
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
314
|
+
params_node = _find_child_by_type(node, "formal_parameters")
|
|
315
|
+
params = _get_node_text(params_node, source) if params_node else "()"
|
|
316
|
+
# Check for return type annotation
|
|
317
|
+
return_type = _find_child_by_type(node, "type_annotation")
|
|
318
|
+
if return_type:
|
|
319
|
+
return f"function {name}{params}{_get_node_text(return_type, source)}"
|
|
320
|
+
return f"function {name}{params}"
|
|
321
|
+
|
|
322
|
+
elif node_type in ("method_definition", "method_signature"):
|
|
323
|
+
name_node = _find_child_by_type(node, "property_identifier", "identifier")
|
|
324
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
325
|
+
params_node = _find_child_by_type(node, "formal_parameters")
|
|
326
|
+
params = _get_node_text(params_node, source) if params_node else "()"
|
|
327
|
+
return f"{name}{params}"
|
|
328
|
+
|
|
329
|
+
elif node_type == "interface_declaration":
|
|
330
|
+
name_node = _find_child_by_type(node, "type_identifier", "identifier")
|
|
331
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
332
|
+
return f"interface {name}"
|
|
333
|
+
|
|
334
|
+
elif node_type == "enum_declaration":
|
|
335
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
336
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
337
|
+
return f"enum {name}"
|
|
338
|
+
|
|
339
|
+
elif node_type == "type_alias_declaration":
|
|
340
|
+
name_node = _find_child_by_type(node, "type_identifier", "identifier")
|
|
341
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
342
|
+
return f"type {name}"
|
|
343
|
+
|
|
344
|
+
return ""
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def extract_typescript_symbols(source: str, file_path: str) -> list[Symbol]:
|
|
348
|
+
"""Extract symbols from TypeScript source code.
|
|
349
|
+
|
|
350
|
+
Uses tree-sitter to parse TypeScript and extract classes, functions,
|
|
351
|
+
methods, and interfaces.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
source: TypeScript source code as string.
|
|
355
|
+
file_path: Path to the source file (for metadata).
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
List of Symbol objects.
|
|
359
|
+
|
|
360
|
+
Examples:
|
|
361
|
+
>>> source = '''
|
|
362
|
+
... export class MyClass {
|
|
363
|
+
... myMethod(): void {}
|
|
364
|
+
... }
|
|
365
|
+
... '''
|
|
366
|
+
>>> symbols = extract_typescript_symbols(source, "example.ts")
|
|
367
|
+
>>> symbols[0].kind
|
|
368
|
+
'class'
|
|
369
|
+
"""
|
|
370
|
+
parser = _get_ts_parser("typescript", file_path=file_path)
|
|
371
|
+
source_bytes = source.encode("utf-8")
|
|
372
|
+
tree = parser.parse(source_bytes)
|
|
373
|
+
|
|
374
|
+
return _extract_ts_js_symbols(tree.root_node, source_bytes, file_path)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def extract_javascript_symbols(source: str, file_path: str) -> list[Symbol]:
|
|
378
|
+
"""Extract symbols from JavaScript source code.
|
|
379
|
+
|
|
380
|
+
Uses tree-sitter to parse JavaScript and extract classes, functions,
|
|
381
|
+
and methods.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
source: JavaScript source code as string.
|
|
385
|
+
file_path: Path to the source file (for metadata).
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
List of Symbol objects.
|
|
389
|
+
|
|
390
|
+
Examples:
|
|
391
|
+
>>> source = '''
|
|
392
|
+
... class MyClass {
|
|
393
|
+
... myMethod() {}
|
|
394
|
+
... }
|
|
395
|
+
... '''
|
|
396
|
+
>>> symbols = extract_javascript_symbols(source, "example.js")
|
|
397
|
+
>>> symbols[0].kind
|
|
398
|
+
'class'
|
|
399
|
+
"""
|
|
400
|
+
parser = _get_ts_parser("javascript")
|
|
401
|
+
source_bytes = source.encode("utf-8")
|
|
402
|
+
tree = parser.parse(source_bytes)
|
|
403
|
+
|
|
404
|
+
return _extract_ts_js_symbols(tree.root_node, source_bytes, file_path)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _extract_ts_js_symbols(
|
|
408
|
+
root: Node,
|
|
409
|
+
source: bytes,
|
|
410
|
+
file_path: str,
|
|
411
|
+
) -> list[Symbol]:
|
|
412
|
+
"""Extract symbols from a tree-sitter parse tree.
|
|
413
|
+
|
|
414
|
+
Walks the AST and extracts classes, functions, methods, and interfaces.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
root: Root node of the tree-sitter parse tree.
|
|
418
|
+
source: Source code as bytes.
|
|
419
|
+
file_path: Path to the source file.
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
List of Symbol objects.
|
|
423
|
+
"""
|
|
424
|
+
symbols: list[Symbol] = []
|
|
425
|
+
|
|
426
|
+
# Node types we care about
|
|
427
|
+
class_types = {"class_declaration"}
|
|
428
|
+
function_types = {"function_declaration", "generator_function_declaration"}
|
|
429
|
+
method_types = {"method_definition", "method_signature"}
|
|
430
|
+
interface_types = {"interface_declaration"}
|
|
431
|
+
enum_types = {"enum_declaration"}
|
|
432
|
+
type_alias_types = {"type_alias_declaration"}
|
|
433
|
+
|
|
434
|
+
def _extract_exported_const(node: Node) -> None:
|
|
435
|
+
"""Extract exported const variable declarations as constants."""
|
|
436
|
+
# export_statement → declaration → lexical_declaration → variable_declarator
|
|
437
|
+
# Also handle top-level lexical_declaration directly
|
|
438
|
+
decl = node
|
|
439
|
+
if node.type == "export_statement":
|
|
440
|
+
decl = _find_child_by_type(node, "lexical_declaration")
|
|
441
|
+
if decl is None:
|
|
442
|
+
return
|
|
443
|
+
|
|
444
|
+
if decl.type != "lexical_declaration":
|
|
445
|
+
return
|
|
446
|
+
|
|
447
|
+
# Only extract 'const' (not 'let' or 'var')
|
|
448
|
+
first_child = decl.children[0] if decl.children else None
|
|
449
|
+
if first_child is None or _get_node_text(first_child, source) != "const":
|
|
450
|
+
return
|
|
451
|
+
|
|
452
|
+
for child in decl.children:
|
|
453
|
+
if child.type == "variable_declarator":
|
|
454
|
+
name_node = _find_child_by_type(child, "identifier")
|
|
455
|
+
if name_node:
|
|
456
|
+
name = _get_node_text(name_node, source)
|
|
457
|
+
symbols.append(
|
|
458
|
+
Symbol(
|
|
459
|
+
name=name,
|
|
460
|
+
kind="constant",
|
|
461
|
+
file=file_path,
|
|
462
|
+
line=child.start_point[0] + 1,
|
|
463
|
+
signature=f"const {name}",
|
|
464
|
+
)
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
def walk(node: Node, parent_class: str | None = None) -> None:
|
|
468
|
+
node_type = node.type
|
|
469
|
+
|
|
470
|
+
if node_type in class_types:
|
|
471
|
+
name_node = _find_child_by_type(node, "type_identifier", "identifier")
|
|
472
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
473
|
+
|
|
474
|
+
symbols.append(
|
|
475
|
+
Symbol(
|
|
476
|
+
name=name,
|
|
477
|
+
kind="class",
|
|
478
|
+
file=file_path,
|
|
479
|
+
line=node.start_point[0] + 1, # tree-sitter is 0-indexed
|
|
480
|
+
signature=_extract_ts_signature(node, source),
|
|
481
|
+
)
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
# Walk class body for methods
|
|
485
|
+
body = _find_child_by_type(node, "class_body")
|
|
486
|
+
if body:
|
|
487
|
+
for child in body.children:
|
|
488
|
+
walk(child, parent_class=name)
|
|
489
|
+
return # Don't recurse further into class
|
|
490
|
+
|
|
491
|
+
elif node_type in function_types and parent_class is None:
|
|
492
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
493
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
494
|
+
|
|
495
|
+
symbols.append(
|
|
496
|
+
Symbol(
|
|
497
|
+
name=name,
|
|
498
|
+
kind="function",
|
|
499
|
+
file=file_path,
|
|
500
|
+
line=node.start_point[0] + 1,
|
|
501
|
+
signature=_extract_ts_signature(node, source),
|
|
502
|
+
)
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
elif node_type in method_types and parent_class is not None:
|
|
506
|
+
name_node = _find_child_by_type(node, "property_identifier", "identifier")
|
|
507
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
508
|
+
|
|
509
|
+
symbols.append(
|
|
510
|
+
Symbol(
|
|
511
|
+
name=name,
|
|
512
|
+
kind="method",
|
|
513
|
+
file=file_path,
|
|
514
|
+
line=node.start_point[0] + 1,
|
|
515
|
+
signature=_extract_ts_signature(node, source),
|
|
516
|
+
parent=parent_class,
|
|
517
|
+
)
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
elif node_type in interface_types:
|
|
521
|
+
name_node = _find_child_by_type(node, "type_identifier", "identifier")
|
|
522
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
523
|
+
|
|
524
|
+
symbols.append(
|
|
525
|
+
Symbol(
|
|
526
|
+
name=name,
|
|
527
|
+
kind="interface",
|
|
528
|
+
file=file_path,
|
|
529
|
+
line=node.start_point[0] + 1,
|
|
530
|
+
signature=_extract_ts_signature(node, source),
|
|
531
|
+
)
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
elif node_type in enum_types:
|
|
535
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
536
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
537
|
+
|
|
538
|
+
symbols.append(
|
|
539
|
+
Symbol(
|
|
540
|
+
name=name,
|
|
541
|
+
kind="enum",
|
|
542
|
+
file=file_path,
|
|
543
|
+
line=node.start_point[0] + 1,
|
|
544
|
+
signature=_extract_ts_signature(node, source),
|
|
545
|
+
)
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
elif node_type in type_alias_types:
|
|
549
|
+
name_node = _find_child_by_type(node, "type_identifier", "identifier")
|
|
550
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
551
|
+
|
|
552
|
+
symbols.append(
|
|
553
|
+
Symbol(
|
|
554
|
+
name=name,
|
|
555
|
+
kind="type_alias",
|
|
556
|
+
file=file_path,
|
|
557
|
+
line=node.start_point[0] + 1,
|
|
558
|
+
signature=_extract_ts_signature(node, source),
|
|
559
|
+
)
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
elif node_type == "export_statement":
|
|
563
|
+
# Handle exported const declarations
|
|
564
|
+
_extract_exported_const(node)
|
|
565
|
+
|
|
566
|
+
# Recurse into children
|
|
567
|
+
for child in node.children:
|
|
568
|
+
walk(child, parent_class)
|
|
569
|
+
|
|
570
|
+
walk(root)
|
|
571
|
+
return symbols
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _get_php_parser() -> Parser:
|
|
575
|
+
"""Get a tree-sitter parser for PHP.
|
|
576
|
+
|
|
577
|
+
Returns:
|
|
578
|
+
Configured tree-sitter Parser.
|
|
579
|
+
|
|
580
|
+
Raises:
|
|
581
|
+
ImportError: If tree-sitter-php is not installed.
|
|
582
|
+
"""
|
|
583
|
+
try:
|
|
584
|
+
from tree_sitter import Language as TSLanguage
|
|
585
|
+
from tree_sitter import Parser
|
|
586
|
+
except ImportError as e:
|
|
587
|
+
raise ImportError(
|
|
588
|
+
"tree-sitter is required for PHP scanning. "
|
|
589
|
+
"Install with: uv add tree-sitter tree-sitter-php"
|
|
590
|
+
) from e
|
|
591
|
+
|
|
592
|
+
import tree_sitter_php as ts_php
|
|
593
|
+
|
|
594
|
+
lang = TSLanguage(ts_php.language_php())
|
|
595
|
+
return Parser(lang)
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def _extract_php_signature(node: Node, source: bytes) -> str:
|
|
599
|
+
"""Extract a signature from a PHP AST node."""
|
|
600
|
+
node_type = node.type
|
|
601
|
+
|
|
602
|
+
if node_type == "class_declaration":
|
|
603
|
+
name_node = _find_child_by_type(node, "name")
|
|
604
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
605
|
+
parts = [f"class {name}"]
|
|
606
|
+
base = _find_child_by_type(node, "base_clause")
|
|
607
|
+
if base:
|
|
608
|
+
parts.append(_get_node_text(base, source))
|
|
609
|
+
iface = _find_child_by_type(node, "class_interface_clause")
|
|
610
|
+
if iface:
|
|
611
|
+
parts.append(_get_node_text(iface, source))
|
|
612
|
+
return " ".join(parts)
|
|
613
|
+
|
|
614
|
+
elif node_type == "interface_declaration":
|
|
615
|
+
name_node = _find_child_by_type(node, "name")
|
|
616
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
617
|
+
return f"interface {name}"
|
|
618
|
+
|
|
619
|
+
elif node_type == "trait_declaration":
|
|
620
|
+
name_node = _find_child_by_type(node, "name")
|
|
621
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
622
|
+
return f"trait {name}"
|
|
623
|
+
|
|
624
|
+
elif node_type == "function_definition":
|
|
625
|
+
name_node = _find_child_by_type(node, "name")
|
|
626
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
627
|
+
params_node = _find_child_by_type(node, "formal_parameters")
|
|
628
|
+
params = _get_node_text(params_node, source) if params_node else "()"
|
|
629
|
+
# Return type
|
|
630
|
+
ret_type = _find_child_by_type(node, "primitive_type", "named_type")
|
|
631
|
+
if ret_type:
|
|
632
|
+
return f"function {name}{params}: {_get_node_text(ret_type, source)}"
|
|
633
|
+
return f"function {name}{params}"
|
|
634
|
+
|
|
635
|
+
elif node_type == "method_declaration":
|
|
636
|
+
parts: list[str] = []
|
|
637
|
+
vis = _find_child_by_type(node, "visibility_modifier")
|
|
638
|
+
if vis:
|
|
639
|
+
parts.append(_get_node_text(vis, source))
|
|
640
|
+
static = _find_child_by_type(node, "static_modifier")
|
|
641
|
+
if static:
|
|
642
|
+
parts.append("static")
|
|
643
|
+
name_node = _find_child_by_type(node, "name")
|
|
644
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
645
|
+
parts.append(f"function {name}")
|
|
646
|
+
params_node = _find_child_by_type(node, "formal_parameters")
|
|
647
|
+
params = _get_node_text(params_node, source) if params_node else "()"
|
|
648
|
+
sig = " ".join(parts) + params
|
|
649
|
+
ret_type = _find_child_by_type(node, "primitive_type", "named_type")
|
|
650
|
+
if ret_type:
|
|
651
|
+
sig += f": {_get_node_text(ret_type, source)}"
|
|
652
|
+
return sig
|
|
653
|
+
|
|
654
|
+
elif node_type == "enum_declaration":
|
|
655
|
+
name_node = _find_child_by_type(node, "name")
|
|
656
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
657
|
+
# Backed enum type (e.g., ": string")
|
|
658
|
+
ret_type = _find_child_by_type(node, "primitive_type")
|
|
659
|
+
if ret_type:
|
|
660
|
+
return f"enum {name}: {_get_node_text(ret_type, source)}"
|
|
661
|
+
return f"enum {name}"
|
|
662
|
+
|
|
663
|
+
return ""
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def _extract_php_symbols(
|
|
667
|
+
root: Node,
|
|
668
|
+
source: bytes,
|
|
669
|
+
file_path: str,
|
|
670
|
+
) -> list[Symbol]:
|
|
671
|
+
"""Extract symbols from a PHP tree-sitter parse tree.
|
|
672
|
+
|
|
673
|
+
Walks the AST and extracts classes, interfaces, traits, functions,
|
|
674
|
+
methods, and enums. Tracks namespace for qualified names.
|
|
675
|
+
|
|
676
|
+
Args:
|
|
677
|
+
root: Root node of the tree-sitter parse tree.
|
|
678
|
+
source: Source code as bytes.
|
|
679
|
+
file_path: Path to the source file.
|
|
680
|
+
|
|
681
|
+
Returns:
|
|
682
|
+
List of Symbol objects.
|
|
683
|
+
"""
|
|
684
|
+
symbols: list[Symbol] = []
|
|
685
|
+
namespace = ""
|
|
686
|
+
|
|
687
|
+
# Container types whose children include methods
|
|
688
|
+
container_types = {
|
|
689
|
+
"class_declaration",
|
|
690
|
+
"interface_declaration",
|
|
691
|
+
"trait_declaration",
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
def _qualify(name: str) -> str:
|
|
695
|
+
# Use dot separator for internal IDs — PHP uses backslash for namespaces
|
|
696
|
+
# but backslashes in component IDs break JSON, graph queries, and ID dedup.
|
|
697
|
+
return f"{namespace}.{name}" if namespace else name
|
|
698
|
+
|
|
699
|
+
def walk(node: Node, parent_name: str | None = None) -> None:
|
|
700
|
+
nonlocal namespace
|
|
701
|
+
node_type = node.type
|
|
702
|
+
|
|
703
|
+
if node_type == "namespace_definition":
|
|
704
|
+
ns_node = _find_child_by_type(node, "namespace_name")
|
|
705
|
+
if ns_node:
|
|
706
|
+
# Normalize PHP backslash separators to dots for graph IDs
|
|
707
|
+
namespace = _get_node_text(ns_node, source).replace("\\", ".")
|
|
708
|
+
# Continue walking children (declarations inside namespace)
|
|
709
|
+
for child in node.children:
|
|
710
|
+
walk(child, parent_name)
|
|
711
|
+
return
|
|
712
|
+
|
|
713
|
+
if node_type in container_types:
|
|
714
|
+
name_node = _find_child_by_type(node, "name")
|
|
715
|
+
local_name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
716
|
+
qualified = _qualify(local_name)
|
|
717
|
+
|
|
718
|
+
kind: SymbolKind = "class"
|
|
719
|
+
if node_type == "interface_declaration":
|
|
720
|
+
kind = "interface"
|
|
721
|
+
elif node_type == "trait_declaration":
|
|
722
|
+
kind = "trait"
|
|
723
|
+
|
|
724
|
+
symbols.append(
|
|
725
|
+
Symbol(
|
|
726
|
+
name=qualified,
|
|
727
|
+
kind=kind,
|
|
728
|
+
file=file_path,
|
|
729
|
+
line=node.start_point[0] + 1,
|
|
730
|
+
signature=_extract_php_signature(node, source),
|
|
731
|
+
)
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
# Walk declaration_list for methods
|
|
735
|
+
body = _find_child_by_type(node, "declaration_list")
|
|
736
|
+
if body:
|
|
737
|
+
for child in body.children:
|
|
738
|
+
walk(child, parent_name=qualified)
|
|
739
|
+
return
|
|
740
|
+
|
|
741
|
+
if node_type == "method_declaration" and parent_name is not None:
|
|
742
|
+
name_node = _find_child_by_type(node, "name")
|
|
743
|
+
local_name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
744
|
+
|
|
745
|
+
symbols.append(
|
|
746
|
+
Symbol(
|
|
747
|
+
name=local_name,
|
|
748
|
+
kind="method",
|
|
749
|
+
file=file_path,
|
|
750
|
+
line=node.start_point[0] + 1,
|
|
751
|
+
signature=_extract_php_signature(node, source),
|
|
752
|
+
parent=parent_name,
|
|
753
|
+
)
|
|
754
|
+
)
|
|
755
|
+
return
|
|
756
|
+
|
|
757
|
+
if node_type == "function_definition" and parent_name is None:
|
|
758
|
+
name_node = _find_child_by_type(node, "name")
|
|
759
|
+
local_name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
760
|
+
|
|
761
|
+
symbols.append(
|
|
762
|
+
Symbol(
|
|
763
|
+
name=_qualify(local_name),
|
|
764
|
+
kind="function",
|
|
765
|
+
file=file_path,
|
|
766
|
+
line=node.start_point[0] + 1,
|
|
767
|
+
signature=_extract_php_signature(node, source),
|
|
768
|
+
)
|
|
769
|
+
)
|
|
770
|
+
return
|
|
771
|
+
|
|
772
|
+
if node_type == "enum_declaration":
|
|
773
|
+
name_node = _find_child_by_type(node, "name")
|
|
774
|
+
local_name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
775
|
+
|
|
776
|
+
symbols.append(
|
|
777
|
+
Symbol(
|
|
778
|
+
name=_qualify(local_name),
|
|
779
|
+
kind="enum",
|
|
780
|
+
file=file_path,
|
|
781
|
+
line=node.start_point[0] + 1,
|
|
782
|
+
signature=_extract_php_signature(node, source),
|
|
783
|
+
)
|
|
784
|
+
)
|
|
785
|
+
return
|
|
786
|
+
|
|
787
|
+
# Recurse into children
|
|
788
|
+
for child in node.children:
|
|
789
|
+
walk(child, parent_name)
|
|
790
|
+
|
|
791
|
+
walk(root)
|
|
792
|
+
return symbols
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
def extract_php_symbols(source: str, file_path: str) -> list[Symbol]:
|
|
796
|
+
"""Extract symbols from PHP source code.
|
|
797
|
+
|
|
798
|
+
Uses tree-sitter to parse PHP and extract classes, interfaces,
|
|
799
|
+
traits, functions, methods, and enums.
|
|
800
|
+
|
|
801
|
+
Args:
|
|
802
|
+
source: PHP source code as string.
|
|
803
|
+
file_path: Path to the source file (for metadata).
|
|
804
|
+
|
|
805
|
+
Returns:
|
|
806
|
+
List of Symbol objects.
|
|
807
|
+
|
|
808
|
+
Examples:
|
|
809
|
+
>>> source = '''
|
|
810
|
+
... <?php
|
|
811
|
+
... class User {
|
|
812
|
+
... public function getName(): string {}
|
|
813
|
+
... }
|
|
814
|
+
... '''
|
|
815
|
+
>>> symbols = extract_php_symbols(source, "User.php")
|
|
816
|
+
>>> symbols[0].kind
|
|
817
|
+
'class'
|
|
818
|
+
"""
|
|
819
|
+
parser = _get_php_parser()
|
|
820
|
+
source_bytes = source.encode("utf-8")
|
|
821
|
+
tree = parser.parse(source_bytes)
|
|
822
|
+
|
|
823
|
+
return _extract_php_symbols(tree.root_node, source_bytes, file_path)
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
# -----------------------------------------------------------------------------
|
|
827
|
+
# Svelte Extraction (tree-sitter-svelte + JS/TS re-parse)
|
|
828
|
+
# -----------------------------------------------------------------------------
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def _get_svelte_parser() -> Parser:
|
|
832
|
+
"""Get a tree-sitter parser for Svelte.
|
|
833
|
+
|
|
834
|
+
Returns:
|
|
835
|
+
Configured tree-sitter Parser.
|
|
836
|
+
|
|
837
|
+
Raises:
|
|
838
|
+
ImportError: If tree-sitter-svelte is not installed.
|
|
839
|
+
"""
|
|
840
|
+
try:
|
|
841
|
+
from tree_sitter import Language as TSLanguage
|
|
842
|
+
from tree_sitter import Parser
|
|
843
|
+
except ImportError as e:
|
|
844
|
+
raise ImportError(
|
|
845
|
+
"tree-sitter is required for Svelte scanning. "
|
|
846
|
+
"Install with: uv add tree-sitter tree-sitter-svelte"
|
|
847
|
+
) from e
|
|
848
|
+
|
|
849
|
+
import tree_sitter_svelte
|
|
850
|
+
|
|
851
|
+
lang = TSLanguage(tree_sitter_svelte.language())
|
|
852
|
+
return Parser(lang)
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
def _detect_svelte_script_lang(script_element: Node, source: bytes) -> Language:
|
|
856
|
+
"""Detect whether a Svelte script block uses TypeScript or JavaScript.
|
|
857
|
+
|
|
858
|
+
Checks for ``lang="ts"`` or ``lang="typescript"`` attribute on the
|
|
859
|
+
``<script>`` tag.
|
|
860
|
+
|
|
861
|
+
Args:
|
|
862
|
+
script_element: The ``script_element`` node from tree-sitter-svelte.
|
|
863
|
+
source: Source code as bytes.
|
|
864
|
+
|
|
865
|
+
Returns:
|
|
866
|
+
``"typescript"`` if lang attribute indicates TS, else ``"javascript"``.
|
|
867
|
+
"""
|
|
868
|
+
for child in script_element.children:
|
|
869
|
+
if child.type != "start_tag":
|
|
870
|
+
continue
|
|
871
|
+
for attr in child.children:
|
|
872
|
+
if attr.type != "attribute":
|
|
873
|
+
continue
|
|
874
|
+
attr_name: Node | None = None
|
|
875
|
+
attr_value: Node | None = None
|
|
876
|
+
for part in attr.children:
|
|
877
|
+
if part.type == "attribute_name":
|
|
878
|
+
attr_name = part
|
|
879
|
+
elif part.type == "quoted_attribute_value":
|
|
880
|
+
attr_value = part
|
|
881
|
+
if attr_name is None or attr_value is None:
|
|
882
|
+
continue
|
|
883
|
+
name_text = source[attr_name.start_byte : attr_name.end_byte].decode(
|
|
884
|
+
"utf-8"
|
|
885
|
+
)
|
|
886
|
+
if name_text != "lang":
|
|
887
|
+
continue
|
|
888
|
+
# Extract value from quoted_attribute_value → attribute_value
|
|
889
|
+
for val_child in attr_value.children:
|
|
890
|
+
if val_child.type == "attribute_value":
|
|
891
|
+
val_text = source[val_child.start_byte : val_child.end_byte].decode(
|
|
892
|
+
"utf-8"
|
|
893
|
+
)
|
|
894
|
+
if val_text in ("ts", "typescript"):
|
|
895
|
+
return "typescript"
|
|
896
|
+
return "javascript"
|
|
897
|
+
|
|
898
|
+
|
|
899
|
+
def extract_svelte_symbols(source: str, file_path: str) -> list[Symbol]:
|
|
900
|
+
"""Extract symbols from Svelte source code.
|
|
901
|
+
|
|
902
|
+
Uses a two-pass approach:
|
|
903
|
+
1. Parse with tree-sitter-svelte to find ``<script>`` blocks
|
|
904
|
+
2. Re-parse script content with JS or TS tree-sitter parser
|
|
905
|
+
|
|
906
|
+
Each ``.svelte`` file is also registered as a ``"component"`` symbol.
|
|
907
|
+
|
|
908
|
+
Args:
|
|
909
|
+
source: Svelte source code as string.
|
|
910
|
+
file_path: Path to the source file (for metadata).
|
|
911
|
+
|
|
912
|
+
Returns:
|
|
913
|
+
List of Symbol objects.
|
|
914
|
+
|
|
915
|
+
Examples:
|
|
916
|
+
>>> source = '''
|
|
917
|
+
... <script>
|
|
918
|
+
... function hello() {}
|
|
919
|
+
... </script>
|
|
920
|
+
... '''
|
|
921
|
+
>>> symbols = extract_svelte_symbols(source, "App.svelte")
|
|
922
|
+
>>> symbols[0].kind
|
|
923
|
+
'component'
|
|
924
|
+
"""
|
|
925
|
+
source_bytes = source.encode("utf-8")
|
|
926
|
+
component_name = Path(file_path).stem
|
|
927
|
+
|
|
928
|
+
symbols: list[Symbol] = [
|
|
929
|
+
Symbol(
|
|
930
|
+
name=component_name,
|
|
931
|
+
kind="component",
|
|
932
|
+
file=file_path,
|
|
933
|
+
line=1,
|
|
934
|
+
signature=f"component {component_name}",
|
|
935
|
+
)
|
|
936
|
+
]
|
|
937
|
+
|
|
938
|
+
svelte_parser = _get_svelte_parser()
|
|
939
|
+
svelte_tree = svelte_parser.parse(source_bytes)
|
|
940
|
+
root = svelte_tree.root_node
|
|
941
|
+
|
|
942
|
+
for script_el in root.children:
|
|
943
|
+
if script_el.type != "script_element":
|
|
944
|
+
continue
|
|
945
|
+
|
|
946
|
+
# Detect lang="ts" on this specific script element
|
|
947
|
+
script_lang = _detect_svelte_script_lang(script_el, source_bytes)
|
|
948
|
+
|
|
949
|
+
# Find raw_text content
|
|
950
|
+
raw_text_node: Node | None = None
|
|
951
|
+
for sub in script_el.children:
|
|
952
|
+
if sub.type == "raw_text":
|
|
953
|
+
raw_text_node = sub
|
|
954
|
+
break
|
|
955
|
+
if raw_text_node is None:
|
|
956
|
+
continue
|
|
957
|
+
|
|
958
|
+
content = source_bytes[raw_text_node.start_byte : raw_text_node.end_byte]
|
|
959
|
+
if not content.strip():
|
|
960
|
+
continue
|
|
961
|
+
|
|
962
|
+
# Line offset: raw_text starts on the line after <script>
|
|
963
|
+
line_offset = raw_text_node.start_point[0]
|
|
964
|
+
|
|
965
|
+
# Parse script content with JS or TS parser
|
|
966
|
+
js_parser = _get_ts_parser(script_lang, file_path=file_path)
|
|
967
|
+
js_tree = js_parser.parse(content)
|
|
968
|
+
script_symbols = _extract_ts_js_symbols(js_tree.root_node, content, file_path)
|
|
969
|
+
|
|
970
|
+
# Adjust line numbers by offset
|
|
971
|
+
for sym in script_symbols:
|
|
972
|
+
sym_with_offset = Symbol(
|
|
973
|
+
name=sym.name,
|
|
974
|
+
kind=sym.kind,
|
|
975
|
+
file=sym.file,
|
|
976
|
+
line=sym.line + line_offset,
|
|
977
|
+
signature=sym.signature,
|
|
978
|
+
docstring=sym.docstring,
|
|
979
|
+
parent=sym.parent,
|
|
980
|
+
)
|
|
981
|
+
symbols.append(sym_with_offset)
|
|
982
|
+
|
|
983
|
+
return symbols
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
# -----------------------------------------------------------------------------
|
|
987
|
+
# C# Extraction (tree-sitter-c-sharp)
|
|
988
|
+
# -----------------------------------------------------------------------------
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
def _get_csharp_parser() -> Parser:
|
|
992
|
+
"""Get a tree-sitter parser for C#.
|
|
993
|
+
|
|
994
|
+
Returns:
|
|
995
|
+
Configured tree-sitter Parser.
|
|
996
|
+
|
|
997
|
+
Raises:
|
|
998
|
+
ImportError: If tree-sitter-c-sharp is not installed.
|
|
999
|
+
"""
|
|
1000
|
+
try:
|
|
1001
|
+
from tree_sitter import Language as TSLanguage
|
|
1002
|
+
from tree_sitter import Parser
|
|
1003
|
+
except ImportError as e:
|
|
1004
|
+
raise ImportError(
|
|
1005
|
+
"tree-sitter is required for C# scanning. "
|
|
1006
|
+
"Install with: uv add tree-sitter tree-sitter-c-sharp"
|
|
1007
|
+
) from e
|
|
1008
|
+
|
|
1009
|
+
import tree_sitter_c_sharp
|
|
1010
|
+
|
|
1011
|
+
lang = TSLanguage(tree_sitter_c_sharp.language())
|
|
1012
|
+
return Parser(lang)
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
def _extract_csharp_signature(node: Node, source: bytes) -> str:
|
|
1016
|
+
"""Extract a signature from a C# AST node."""
|
|
1017
|
+
node_type = node.type
|
|
1018
|
+
|
|
1019
|
+
if node_type == "class_declaration":
|
|
1020
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1021
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
1022
|
+
base_list = _find_child_by_type(node, "base_list")
|
|
1023
|
+
if base_list:
|
|
1024
|
+
return f"class {name} {_get_node_text(base_list, source)}"
|
|
1025
|
+
return f"class {name}"
|
|
1026
|
+
|
|
1027
|
+
elif node_type == "interface_declaration":
|
|
1028
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1029
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
1030
|
+
return f"interface {name}"
|
|
1031
|
+
|
|
1032
|
+
elif node_type == "struct_declaration":
|
|
1033
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1034
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
1035
|
+
base_list = _find_child_by_type(node, "base_list")
|
|
1036
|
+
if base_list:
|
|
1037
|
+
return f"struct {name} {_get_node_text(base_list, source)}"
|
|
1038
|
+
return f"struct {name}"
|
|
1039
|
+
|
|
1040
|
+
elif node_type == "record_declaration":
|
|
1041
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1042
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
1043
|
+
return f"record {name}"
|
|
1044
|
+
|
|
1045
|
+
elif node_type == "enum_declaration":
|
|
1046
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1047
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
1048
|
+
return f"enum {name}"
|
|
1049
|
+
|
|
1050
|
+
elif node_type == "method_declaration":
|
|
1051
|
+
parts: list[str] = []
|
|
1052
|
+
for child in node.children:
|
|
1053
|
+
if child.type == "modifier":
|
|
1054
|
+
parts.append(_get_node_text(child, source))
|
|
1055
|
+
# Return type
|
|
1056
|
+
ret_type = _find_child_by_type(
|
|
1057
|
+
node, "predefined_type", "identifier", "generic_name", "void_keyword"
|
|
1058
|
+
)
|
|
1059
|
+
if ret_type:
|
|
1060
|
+
parts.append(_get_node_text(ret_type, source))
|
|
1061
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1062
|
+
if name_node:
|
|
1063
|
+
# Skip if this is the return type identifier we already added
|
|
1064
|
+
name_text = _get_node_text(name_node, source)
|
|
1065
|
+
# Find the method name (last identifier before parameter_list)
|
|
1066
|
+
method_name = name_text
|
|
1067
|
+
for child in node.children:
|
|
1068
|
+
if child.type == "identifier":
|
|
1069
|
+
method_name = _get_node_text(child, source)
|
|
1070
|
+
elif child.type == "parameter_list":
|
|
1071
|
+
break
|
|
1072
|
+
parts.append(method_name)
|
|
1073
|
+
params_node = _find_child_by_type(node, "parameter_list")
|
|
1074
|
+
if params_node:
|
|
1075
|
+
parts.append(_get_node_text(params_node, source))
|
|
1076
|
+
return " ".join(parts)
|
|
1077
|
+
|
|
1078
|
+
elif node_type == "property_declaration":
|
|
1079
|
+
parts_p: list[str] = []
|
|
1080
|
+
for child in node.children:
|
|
1081
|
+
if child.type == "modifier":
|
|
1082
|
+
parts_p.append(_get_node_text(child, source))
|
|
1083
|
+
ret_type = _find_child_by_type(
|
|
1084
|
+
node, "predefined_type", "identifier", "generic_name"
|
|
1085
|
+
)
|
|
1086
|
+
if ret_type:
|
|
1087
|
+
parts_p.append(_get_node_text(ret_type, source))
|
|
1088
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1089
|
+
if name_node:
|
|
1090
|
+
parts_p.append(_get_node_text(name_node, source))
|
|
1091
|
+
return " ".join(parts_p)
|
|
1092
|
+
|
|
1093
|
+
return ""
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
def _extract_csharp_symbols_from_tree(
|
|
1097
|
+
root: Node,
|
|
1098
|
+
source: bytes,
|
|
1099
|
+
file_path: str,
|
|
1100
|
+
) -> list[Symbol]:
|
|
1101
|
+
"""Extract symbols from a C# tree-sitter parse tree.
|
|
1102
|
+
|
|
1103
|
+
Walks the AST and extracts classes, interfaces, structs, records,
|
|
1104
|
+
enums, methods, and properties. Tracks namespace for qualified names.
|
|
1105
|
+
|
|
1106
|
+
Args:
|
|
1107
|
+
root: Root node of the tree-sitter parse tree.
|
|
1108
|
+
source: Source code as bytes.
|
|
1109
|
+
file_path: Path to the source file.
|
|
1110
|
+
|
|
1111
|
+
Returns:
|
|
1112
|
+
List of Symbol objects.
|
|
1113
|
+
"""
|
|
1114
|
+
symbols: list[Symbol] = []
|
|
1115
|
+
namespace = ""
|
|
1116
|
+
|
|
1117
|
+
container_types = {
|
|
1118
|
+
"class_declaration",
|
|
1119
|
+
"interface_declaration",
|
|
1120
|
+
"struct_declaration",
|
|
1121
|
+
"record_declaration",
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
def _get_name(node: Node) -> str:
|
|
1125
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1126
|
+
return _get_node_text(name_node, source) if name_node else "unknown"
|
|
1127
|
+
|
|
1128
|
+
def walk(node: Node, parent_name: str | None = None) -> None:
|
|
1129
|
+
nonlocal namespace
|
|
1130
|
+
node_type = node.type
|
|
1131
|
+
|
|
1132
|
+
if node_type == "namespace_declaration":
|
|
1133
|
+
ns_node = _find_child_by_type(node, "qualified_name", "identifier")
|
|
1134
|
+
if ns_node:
|
|
1135
|
+
namespace = _get_node_text(ns_node, source)
|
|
1136
|
+
body = _find_child_by_type(node, "declaration_list")
|
|
1137
|
+
if body:
|
|
1138
|
+
for child in body.children:
|
|
1139
|
+
walk(child, parent_name)
|
|
1140
|
+
return
|
|
1141
|
+
|
|
1142
|
+
if node_type in container_types:
|
|
1143
|
+
local_name = _get_name(node)
|
|
1144
|
+
|
|
1145
|
+
kind: SymbolKind = "class"
|
|
1146
|
+
if node_type == "interface_declaration":
|
|
1147
|
+
kind = "interface"
|
|
1148
|
+
|
|
1149
|
+
symbols.append(
|
|
1150
|
+
Symbol(
|
|
1151
|
+
name=local_name,
|
|
1152
|
+
kind=kind,
|
|
1153
|
+
file=file_path,
|
|
1154
|
+
line=node.start_point[0] + 1,
|
|
1155
|
+
signature=_extract_csharp_signature(node, source),
|
|
1156
|
+
)
|
|
1157
|
+
)
|
|
1158
|
+
|
|
1159
|
+
body = _find_child_by_type(node, "declaration_list")
|
|
1160
|
+
if body:
|
|
1161
|
+
for child in body.children:
|
|
1162
|
+
walk(child, parent_name=local_name)
|
|
1163
|
+
return
|
|
1164
|
+
|
|
1165
|
+
if node_type == "method_declaration" and parent_name is not None:
|
|
1166
|
+
# Find the method name — last identifier before parameter_list
|
|
1167
|
+
method_name = "unknown"
|
|
1168
|
+
for child in node.children:
|
|
1169
|
+
if child.type == "identifier":
|
|
1170
|
+
method_name = _get_node_text(child, source)
|
|
1171
|
+
elif child.type == "parameter_list":
|
|
1172
|
+
break
|
|
1173
|
+
|
|
1174
|
+
symbols.append(
|
|
1175
|
+
Symbol(
|
|
1176
|
+
name=method_name,
|
|
1177
|
+
kind="method",
|
|
1178
|
+
file=file_path,
|
|
1179
|
+
line=node.start_point[0] + 1,
|
|
1180
|
+
signature=_extract_csharp_signature(node, source),
|
|
1181
|
+
parent=parent_name,
|
|
1182
|
+
)
|
|
1183
|
+
)
|
|
1184
|
+
return
|
|
1185
|
+
|
|
1186
|
+
if node_type == "property_declaration" and parent_name is not None:
|
|
1187
|
+
local_name = _get_name(node)
|
|
1188
|
+
symbols.append(
|
|
1189
|
+
Symbol(
|
|
1190
|
+
name=local_name,
|
|
1191
|
+
kind="method",
|
|
1192
|
+
file=file_path,
|
|
1193
|
+
line=node.start_point[0] + 1,
|
|
1194
|
+
signature=_extract_csharp_signature(node, source),
|
|
1195
|
+
parent=parent_name,
|
|
1196
|
+
)
|
|
1197
|
+
)
|
|
1198
|
+
return
|
|
1199
|
+
|
|
1200
|
+
if node_type == "enum_declaration":
|
|
1201
|
+
local_name = _get_name(node)
|
|
1202
|
+
symbols.append(
|
|
1203
|
+
Symbol(
|
|
1204
|
+
name=local_name,
|
|
1205
|
+
kind="enum",
|
|
1206
|
+
file=file_path,
|
|
1207
|
+
line=node.start_point[0] + 1,
|
|
1208
|
+
signature=_extract_csharp_signature(node, source),
|
|
1209
|
+
)
|
|
1210
|
+
)
|
|
1211
|
+
return
|
|
1212
|
+
|
|
1213
|
+
for child in node.children:
|
|
1214
|
+
walk(child, parent_name)
|
|
1215
|
+
|
|
1216
|
+
walk(root)
|
|
1217
|
+
return symbols
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
def extract_csharp_symbols(source: str, file_path: str) -> list[Symbol]:
|
|
1221
|
+
"""Extract symbols from C# source code.
|
|
1222
|
+
|
|
1223
|
+
Uses tree-sitter to parse C# and extract classes, interfaces,
|
|
1224
|
+
structs, records, enums, methods, and properties.
|
|
1225
|
+
|
|
1226
|
+
Args:
|
|
1227
|
+
source: C# source code as string.
|
|
1228
|
+
file_path: Path to the source file (for metadata).
|
|
1229
|
+
|
|
1230
|
+
Returns:
|
|
1231
|
+
List of Symbol objects.
|
|
1232
|
+
|
|
1233
|
+
Examples:
|
|
1234
|
+
>>> source = '''
|
|
1235
|
+
... public class UserService {
|
|
1236
|
+
... public void Process() { }
|
|
1237
|
+
... }
|
|
1238
|
+
... '''
|
|
1239
|
+
>>> symbols = extract_csharp_symbols(source, "UserService.cs")
|
|
1240
|
+
>>> symbols[0].kind
|
|
1241
|
+
'class'
|
|
1242
|
+
"""
|
|
1243
|
+
parser = _get_csharp_parser()
|
|
1244
|
+
source_bytes = source.encode("utf-8")
|
|
1245
|
+
tree = parser.parse(source_bytes)
|
|
1246
|
+
|
|
1247
|
+
return _extract_csharp_symbols_from_tree(tree.root_node, source_bytes, file_path)
|
|
1248
|
+
|
|
1249
|
+
|
|
1250
|
+
# ── Dart / Flutter ───────────────────────────────────────────────────────
|
|
1251
|
+
|
|
1252
|
+
|
|
1253
|
+
def _get_dart_parser() -> Parser:
|
|
1254
|
+
"""Create a tree-sitter parser for Dart.
|
|
1255
|
+
|
|
1256
|
+
Uses tree-sitter-language-pack since no standalone tree-sitter-dart
|
|
1257
|
+
package exists on PyPI.
|
|
1258
|
+
|
|
1259
|
+
Returns:
|
|
1260
|
+
Configured tree-sitter Parser for Dart.
|
|
1261
|
+
|
|
1262
|
+
Raises:
|
|
1263
|
+
ImportError: If tree-sitter-language-pack is not installed.
|
|
1264
|
+
"""
|
|
1265
|
+
try:
|
|
1266
|
+
from tree_sitter_language_pack import get_parser
|
|
1267
|
+
except ImportError as e:
|
|
1268
|
+
raise ImportError(
|
|
1269
|
+
"tree-sitter-language-pack is required for Dart scanning. "
|
|
1270
|
+
"Install with: uv add tree-sitter-language-pack"
|
|
1271
|
+
) from e
|
|
1272
|
+
|
|
1273
|
+
return get_parser("dart")
|
|
1274
|
+
|
|
1275
|
+
|
|
1276
|
+
def _extract_dart_signature(node: Node, source: bytes) -> str:
|
|
1277
|
+
"""Extract a signature from a Dart AST node."""
|
|
1278
|
+
node_type = node.type
|
|
1279
|
+
|
|
1280
|
+
if node_type == "class_definition":
|
|
1281
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1282
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
1283
|
+
# Check for abstract modifier
|
|
1284
|
+
abstract_node = _find_child_by_type(node, "abstract")
|
|
1285
|
+
prefix = "abstract class" if abstract_node else "class"
|
|
1286
|
+
superclass = _find_child_by_type(node, "superclass")
|
|
1287
|
+
if superclass:
|
|
1288
|
+
return f"{prefix} {name} {_get_node_text(superclass, source)}"
|
|
1289
|
+
return f"{prefix} {name}"
|
|
1290
|
+
|
|
1291
|
+
elif node_type == "mixin_declaration":
|
|
1292
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1293
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
1294
|
+
return f"mixin {name}"
|
|
1295
|
+
|
|
1296
|
+
elif node_type == "extension_declaration":
|
|
1297
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1298
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
1299
|
+
# Find the "on" type
|
|
1300
|
+
type_node = _find_child_by_type(node, "type_identifier")
|
|
1301
|
+
if type_node:
|
|
1302
|
+
return f"extension {name} on {_get_node_text(type_node, source)}"
|
|
1303
|
+
return f"extension {name}"
|
|
1304
|
+
|
|
1305
|
+
elif node_type == "enum_declaration":
|
|
1306
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1307
|
+
name = _get_node_text(name_node, source) if name_node else "unknown"
|
|
1308
|
+
return f"enum {name}"
|
|
1309
|
+
|
|
1310
|
+
elif node_type == "function_signature" or node_type == "method_signature":
|
|
1311
|
+
return _get_node_text(node, source).strip()
|
|
1312
|
+
|
|
1313
|
+
return ""
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
def _extract_dart_symbols_from_tree(
|
|
1317
|
+
root: Node,
|
|
1318
|
+
source: bytes,
|
|
1319
|
+
file_path: str,
|
|
1320
|
+
) -> list[Symbol]:
|
|
1321
|
+
"""Extract symbols from a Dart tree-sitter parse tree.
|
|
1322
|
+
|
|
1323
|
+
Walks the AST and extracts classes, mixins, extensions, enums,
|
|
1324
|
+
top-level functions, and methods.
|
|
1325
|
+
|
|
1326
|
+
Args:
|
|
1327
|
+
root: Root node of the tree-sitter parse tree.
|
|
1328
|
+
source: Source code as bytes.
|
|
1329
|
+
file_path: Path to the source file.
|
|
1330
|
+
|
|
1331
|
+
Returns:
|
|
1332
|
+
List of Symbol objects.
|
|
1333
|
+
"""
|
|
1334
|
+
symbols: list[Symbol] = []
|
|
1335
|
+
|
|
1336
|
+
container_types = {
|
|
1337
|
+
"class_definition",
|
|
1338
|
+
"mixin_declaration",
|
|
1339
|
+
"extension_declaration",
|
|
1340
|
+
}
|
|
1341
|
+
|
|
1342
|
+
def _get_name(node: Node) -> str:
|
|
1343
|
+
name_node = _find_child_by_type(node, "identifier")
|
|
1344
|
+
return _get_node_text(name_node, source) if name_node else "unknown"
|
|
1345
|
+
|
|
1346
|
+
def walk(node: Node, parent_name: str | None = None) -> None:
|
|
1347
|
+
node_type = node.type
|
|
1348
|
+
|
|
1349
|
+
if node_type in container_types:
|
|
1350
|
+
local_name = _get_name(node)
|
|
1351
|
+
|
|
1352
|
+
kind: SymbolKind = "class"
|
|
1353
|
+
if node_type == "mixin_declaration":
|
|
1354
|
+
kind = "trait"
|
|
1355
|
+
|
|
1356
|
+
symbols.append(
|
|
1357
|
+
Symbol(
|
|
1358
|
+
name=local_name,
|
|
1359
|
+
kind=kind,
|
|
1360
|
+
file=file_path,
|
|
1361
|
+
line=node.start_point[0] + 1,
|
|
1362
|
+
signature=_extract_dart_signature(node, source),
|
|
1363
|
+
)
|
|
1364
|
+
)
|
|
1365
|
+
|
|
1366
|
+
# Walk into body for methods
|
|
1367
|
+
body = _find_child_by_type(node, "class_body", "extension_body")
|
|
1368
|
+
if body:
|
|
1369
|
+
for child in body.children:
|
|
1370
|
+
walk(child, parent_name=local_name)
|
|
1371
|
+
return
|
|
1372
|
+
|
|
1373
|
+
if node_type == "enum_declaration":
|
|
1374
|
+
local_name = _get_name(node)
|
|
1375
|
+
symbols.append(
|
|
1376
|
+
Symbol(
|
|
1377
|
+
name=local_name,
|
|
1378
|
+
kind="enum",
|
|
1379
|
+
file=file_path,
|
|
1380
|
+
line=node.start_point[0] + 1,
|
|
1381
|
+
signature=_extract_dart_signature(node, source),
|
|
1382
|
+
)
|
|
1383
|
+
)
|
|
1384
|
+
return
|
|
1385
|
+
|
|
1386
|
+
# Top-level function: function_signature at program level
|
|
1387
|
+
if node_type == "function_signature" and parent_name is None:
|
|
1388
|
+
local_name = _get_name(node)
|
|
1389
|
+
symbols.append(
|
|
1390
|
+
Symbol(
|
|
1391
|
+
name=local_name,
|
|
1392
|
+
kind="function",
|
|
1393
|
+
file=file_path,
|
|
1394
|
+
line=node.start_point[0] + 1,
|
|
1395
|
+
signature=_extract_dart_signature(node, source),
|
|
1396
|
+
)
|
|
1397
|
+
)
|
|
1398
|
+
return
|
|
1399
|
+
|
|
1400
|
+
# Method inside a container
|
|
1401
|
+
if node_type == "method_signature" and parent_name is not None:
|
|
1402
|
+
# method_signature contains function_signature or getter_signature
|
|
1403
|
+
inner = _find_child_by_type(node, "function_signature", "getter_signature")
|
|
1404
|
+
method_name = _get_name(inner) if inner else _get_name(node)
|
|
1405
|
+
|
|
1406
|
+
symbols.append(
|
|
1407
|
+
Symbol(
|
|
1408
|
+
name=method_name,
|
|
1409
|
+
kind="method",
|
|
1410
|
+
file=file_path,
|
|
1411
|
+
line=node.start_point[0] + 1,
|
|
1412
|
+
signature=_extract_dart_signature(node, source),
|
|
1413
|
+
parent=parent_name,
|
|
1414
|
+
)
|
|
1415
|
+
)
|
|
1416
|
+
return
|
|
1417
|
+
|
|
1418
|
+
for child in node.children:
|
|
1419
|
+
walk(child, parent_name)
|
|
1420
|
+
|
|
1421
|
+
walk(root)
|
|
1422
|
+
return symbols
|
|
1423
|
+
|
|
1424
|
+
|
|
1425
|
+
def extract_dart_symbols(source: str, file_path: str) -> list[Symbol]:
|
|
1426
|
+
"""Extract symbols from Dart source code.
|
|
1427
|
+
|
|
1428
|
+
Uses tree-sitter to parse Dart and extract classes, mixins,
|
|
1429
|
+
extensions, enums, top-level functions, and methods.
|
|
1430
|
+
|
|
1431
|
+
Args:
|
|
1432
|
+
source: Dart source code as string.
|
|
1433
|
+
file_path: Path to the source file (for metadata).
|
|
1434
|
+
|
|
1435
|
+
Returns:
|
|
1436
|
+
List of Symbol objects.
|
|
1437
|
+
|
|
1438
|
+
Examples:
|
|
1439
|
+
>>> source = '''
|
|
1440
|
+
... class UserService {
|
|
1441
|
+
... void process() {}
|
|
1442
|
+
... }
|
|
1443
|
+
... '''
|
|
1444
|
+
>>> symbols = extract_dart_symbols(source, "user_service.dart")
|
|
1445
|
+
>>> symbols[0].kind
|
|
1446
|
+
'class'
|
|
1447
|
+
"""
|
|
1448
|
+
parser = _get_dart_parser()
|
|
1449
|
+
source_bytes = source.encode("utf-8")
|
|
1450
|
+
tree = parser.parse(source_bytes)
|
|
1451
|
+
|
|
1452
|
+
return _extract_dart_symbols_from_tree(tree.root_node, source_bytes, file_path)
|
|
1453
|
+
|
|
1454
|
+
|
|
1455
|
+
def detect_language(file_path: str | Path) -> Language | None:
|
|
1456
|
+
"""Detect language from file extension.
|
|
1457
|
+
|
|
1458
|
+
Args:
|
|
1459
|
+
file_path: Path to the file.
|
|
1460
|
+
|
|
1461
|
+
Returns:
|
|
1462
|
+
Language literal or None if not supported.
|
|
1463
|
+
|
|
1464
|
+
Examples:
|
|
1465
|
+
>>> detect_language("foo.py")
|
|
1466
|
+
'python'
|
|
1467
|
+
>>> detect_language("bar.ts")
|
|
1468
|
+
'typescript'
|
|
1469
|
+
>>> detect_language("baz.rs") # Returns None
|
|
1470
|
+
"""
|
|
1471
|
+
ext = Path(file_path).suffix.lower()
|
|
1472
|
+
return EXTENSION_TO_LANGUAGE.get(ext)
|
|
1473
|
+
|
|
1474
|
+
|
|
1475
|
+
def extract_symbols(source: str, file_path: str, language: Language) -> list[Symbol]:
|
|
1476
|
+
"""Extract symbols from source code in any supported language.
|
|
1477
|
+
|
|
1478
|
+
Args:
|
|
1479
|
+
source: Source code as string.
|
|
1480
|
+
file_path: Path to the source file (for metadata).
|
|
1481
|
+
language: Language of the source code.
|
|
1482
|
+
|
|
1483
|
+
Returns:
|
|
1484
|
+
List of Symbol objects.
|
|
1485
|
+
|
|
1486
|
+
Raises:
|
|
1487
|
+
ValueError: If language is not supported.
|
|
1488
|
+
SyntaxError: If source code cannot be parsed (Python only).
|
|
1489
|
+
|
|
1490
|
+
Examples:
|
|
1491
|
+
>>> symbols = extract_symbols("class Foo: pass", "foo.py", "python")
|
|
1492
|
+
>>> symbols[0].kind
|
|
1493
|
+
'class'
|
|
1494
|
+
"""
|
|
1495
|
+
if language == "python":
|
|
1496
|
+
return extract_python_symbols(source, file_path)
|
|
1497
|
+
elif language == "typescript":
|
|
1498
|
+
return extract_typescript_symbols(source, file_path)
|
|
1499
|
+
elif language == "javascript":
|
|
1500
|
+
return extract_javascript_symbols(source, file_path)
|
|
1501
|
+
elif language == "php":
|
|
1502
|
+
return extract_php_symbols(source, file_path)
|
|
1503
|
+
elif language == "svelte":
|
|
1504
|
+
return extract_svelte_symbols(source, file_path)
|
|
1505
|
+
elif language == "csharp":
|
|
1506
|
+
return extract_csharp_symbols(source, file_path)
|
|
1507
|
+
elif language == "dart":
|
|
1508
|
+
return extract_dart_symbols(source, file_path)
|
|
1509
|
+
else:
|
|
1510
|
+
raise ValueError(f"Unsupported language: {language}")
|
|
1511
|
+
|
|
1512
|
+
|
|
1513
|
+
# Default patterns to exclude when scanning directories
|
|
1514
|
+
DEFAULT_EXCLUDE_PATTERNS: list[str] = [
|
|
1515
|
+
"**/__pycache__/**",
|
|
1516
|
+
"**/.venv/**",
|
|
1517
|
+
"**/venv/**",
|
|
1518
|
+
"**/node_modules/**",
|
|
1519
|
+
"**/vendor/**",
|
|
1520
|
+
"**/dist/**",
|
|
1521
|
+
"**/build/**",
|
|
1522
|
+
"**/.git/**",
|
|
1523
|
+
"**/*.blade.php",
|
|
1524
|
+
"*.Designer.cs",
|
|
1525
|
+
]
|
|
1526
|
+
|
|
1527
|
+
# Language-specific default glob patterns (list to support multiple extensions)
|
|
1528
|
+
DEFAULT_LANGUAGE_PATTERNS: dict[Language | None, list[str]] = {
|
|
1529
|
+
"python": ["**/*.py"],
|
|
1530
|
+
"typescript": ["**/*.ts", "**/*.tsx"],
|
|
1531
|
+
"javascript": ["**/*.js", "**/*.jsx", "**/*.mjs", "**/*.cjs"],
|
|
1532
|
+
"php": ["**/*.php"],
|
|
1533
|
+
"svelte": ["**/*.svelte"],
|
|
1534
|
+
"csharp": ["**/*.cs"],
|
|
1535
|
+
"dart": ["**/*.dart"],
|
|
1536
|
+
None: ["**/*"], # Auto-detect: scan all files
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
|
|
1540
|
+
def _read_gitignore(root: Path) -> list[str]:
|
|
1541
|
+
"""Read .gitignore from *root* and convert entries to glob patterns.
|
|
1542
|
+
|
|
1543
|
+
Only handles simple .gitignore entries (directory names, file globs).
|
|
1544
|
+
Negation patterns (``!``) and anchored paths are ignored — they cover
|
|
1545
|
+
edge cases that don't affect typical exclude behaviour.
|
|
1546
|
+
"""
|
|
1547
|
+
gitignore = root / ".gitignore"
|
|
1548
|
+
if not gitignore.is_file():
|
|
1549
|
+
return []
|
|
1550
|
+
|
|
1551
|
+
patterns: list[str] = []
|
|
1552
|
+
try:
|
|
1553
|
+
for raw_line in gitignore.read_text(encoding="utf-8").splitlines():
|
|
1554
|
+
line = raw_line.strip()
|
|
1555
|
+
# Skip blanks, comments, negation
|
|
1556
|
+
if not line or line.startswith("#") or line.startswith("!"):
|
|
1557
|
+
continue
|
|
1558
|
+
# Strip trailing slash (directory marker)
|
|
1559
|
+
entry = line.rstrip("/")
|
|
1560
|
+
# Convert to glob: wrap bare names with **/ so they match anywhere
|
|
1561
|
+
if "/" not in entry:
|
|
1562
|
+
patterns.append(f"**/{entry}/**")
|
|
1563
|
+
else:
|
|
1564
|
+
patterns.append(f"**/{entry}/**")
|
|
1565
|
+
except OSError:
|
|
1566
|
+
return []
|
|
1567
|
+
return patterns
|
|
1568
|
+
|
|
1569
|
+
|
|
1570
|
+
def _is_directory_pattern(pattern: str) -> str | None:
|
|
1571
|
+
"""Extract directory name from patterns like ``**/node_modules/**``.
|
|
1572
|
+
|
|
1573
|
+
Returns the bare directory name if the pattern represents a directory
|
|
1574
|
+
exclusion, or None if it's a file-level pattern.
|
|
1575
|
+
"""
|
|
1576
|
+
stripped = pattern.strip("*").strip("/")
|
|
1577
|
+
if "/" not in stripped and pattern.endswith("/**"):
|
|
1578
|
+
return stripped
|
|
1579
|
+
return None
|
|
1580
|
+
|
|
1581
|
+
|
|
1582
|
+
def _should_exclude(file_path: Path, exclude_patterns: list[str]) -> bool:
|
|
1583
|
+
"""Check if a file should be excluded based on patterns.
|
|
1584
|
+
|
|
1585
|
+
Directory patterns (``**/name/**``) are matched by checking whether
|
|
1586
|
+
the directory name appears anywhere in the path parts. File patterns
|
|
1587
|
+
(``**/test_*``) use ``PurePath.match`` which handles ``**`` correctly
|
|
1588
|
+
for filename globbing.
|
|
1589
|
+
"""
|
|
1590
|
+
parts = file_path.parts
|
|
1591
|
+
for pattern in exclude_patterns:
|
|
1592
|
+
dir_name = _is_directory_pattern(pattern)
|
|
1593
|
+
if dir_name is not None:
|
|
1594
|
+
if dir_name in parts:
|
|
1595
|
+
return True
|
|
1596
|
+
else:
|
|
1597
|
+
if file_path.match(pattern):
|
|
1598
|
+
return True
|
|
1599
|
+
return False
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
def _process_source_file(
|
|
1603
|
+
file_path: Path, rel_str: str, language: Language, result: ScanResult
|
|
1604
|
+
) -> None:
|
|
1605
|
+
"""Extract symbols from a source file and update result."""
|
|
1606
|
+
try:
|
|
1607
|
+
source = file_path.read_text(encoding="utf-8")
|
|
1608
|
+
symbols = extract_symbols(source, rel_str, language)
|
|
1609
|
+
result.symbols.extend(symbols)
|
|
1610
|
+
result.files_scanned += 1
|
|
1611
|
+
except SyntaxError as e:
|
|
1612
|
+
result.errors.append(f"{rel_str}: {e}")
|
|
1613
|
+
except UnicodeDecodeError as e:
|
|
1614
|
+
result.errors.append(f"{rel_str}: {e}")
|
|
1615
|
+
except Exception as e:
|
|
1616
|
+
result.errors.append(f"{rel_str}: {e}")
|
|
1617
|
+
|
|
1618
|
+
|
|
1619
|
+
def scan_directory(
|
|
1620
|
+
path: Path,
|
|
1621
|
+
*,
|
|
1622
|
+
language: Language | None = None,
|
|
1623
|
+
pattern: str | None = None,
|
|
1624
|
+
exclude_patterns: list[str] | None = None,
|
|
1625
|
+
) -> ScanResult:
|
|
1626
|
+
"""Scan a directory for code symbols.
|
|
1627
|
+
|
|
1628
|
+
Recursively walks the directory, extracts symbols from source files,
|
|
1629
|
+
and returns aggregated results. Supports Python, TypeScript, and JavaScript.
|
|
1630
|
+
|
|
1631
|
+
Args:
|
|
1632
|
+
path: Directory path to scan.
|
|
1633
|
+
language: Language to scan for. If None, auto-detects from extensions.
|
|
1634
|
+
pattern: Glob pattern for files. If None, uses language-specific default.
|
|
1635
|
+
exclude_patterns: List of patterns to exclude (e.g., ["**/test_*"]).
|
|
1636
|
+
|
|
1637
|
+
Returns:
|
|
1638
|
+
ScanResult with all extracted symbols.
|
|
1639
|
+
|
|
1640
|
+
Examples:
|
|
1641
|
+
>>> result = scan_directory(Path("src/")) # Auto-detect
|
|
1642
|
+
>>> result = scan_directory(Path("src/"), language="typescript")
|
|
1643
|
+
"""
|
|
1644
|
+
if exclude_patterns is None:
|
|
1645
|
+
exclude_patterns = list(DEFAULT_EXCLUDE_PATTERNS)
|
|
1646
|
+
|
|
1647
|
+
# Merge .gitignore patterns when present
|
|
1648
|
+
gitignore_patterns = _read_gitignore(path)
|
|
1649
|
+
if gitignore_patterns:
|
|
1650
|
+
exclude_patterns = list(exclude_patterns) + gitignore_patterns
|
|
1651
|
+
|
|
1652
|
+
# Resolve glob patterns: single pattern string or language-specific defaults
|
|
1653
|
+
if pattern is not None:
|
|
1654
|
+
patterns = [pattern]
|
|
1655
|
+
else:
|
|
1656
|
+
patterns = DEFAULT_LANGUAGE_PATTERNS.get(language, ["**/*"])
|
|
1657
|
+
|
|
1658
|
+
result = ScanResult()
|
|
1659
|
+
root = path.resolve()
|
|
1660
|
+
|
|
1661
|
+
# Collect files from all patterns, dedup by resolved path
|
|
1662
|
+
seen: set[Path] = set()
|
|
1663
|
+
for glob_pattern in patterns:
|
|
1664
|
+
for file_path in path.glob(glob_pattern):
|
|
1665
|
+
if file_path.is_dir():
|
|
1666
|
+
continue
|
|
1667
|
+
|
|
1668
|
+
resolved = file_path.resolve()
|
|
1669
|
+
if resolved in seen:
|
|
1670
|
+
continue
|
|
1671
|
+
seen.add(resolved)
|
|
1672
|
+
|
|
1673
|
+
if _should_exclude(file_path, exclude_patterns):
|
|
1674
|
+
continue
|
|
1675
|
+
|
|
1676
|
+
if file_path.is_relative_to(root):
|
|
1677
|
+
rel_str = portable_path(file_path, root)
|
|
1678
|
+
else:
|
|
1679
|
+
rel_str = file_path.as_posix()
|
|
1680
|
+
|
|
1681
|
+
file_language = language or detect_language(file_path)
|
|
1682
|
+
if file_language is None:
|
|
1683
|
+
continue
|
|
1684
|
+
|
|
1685
|
+
_process_source_file(file_path, rel_str, file_language, result)
|
|
1686
|
+
|
|
1687
|
+
return result
|