kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +51 -23
- kodit/application/factories/reporting_factory.py +6 -2
- kodit/application/factories/server_factory.py +353 -0
- kodit/application/services/code_search_application_service.py +144 -0
- kodit/application/services/commit_indexing_application_service.py +700 -0
- kodit/application/services/indexing_worker_service.py +13 -44
- kodit/application/services/queue_service.py +24 -3
- kodit/application/services/reporting.py +0 -2
- kodit/application/services/sync_scheduler.py +15 -31
- kodit/cli.py +2 -753
- kodit/cli_utils.py +2 -9
- kodit/config.py +4 -97
- kodit/database.py +38 -1
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/{entities.py → entities/__init__.py} +50 -195
- kodit/domain/entities/git.py +190 -0
- kodit/domain/factories/__init__.py +1 -0
- kodit/domain/factories/git_repo_factory.py +76 -0
- kodit/domain/protocols.py +264 -64
- kodit/domain/services/bm25_service.py +5 -1
- kodit/domain/services/embedding_service.py +3 -0
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/git_repository_service.py +429 -0
- kodit/domain/services/git_service.py +300 -0
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/services/task_status_query_service.py +2 -2
- kodit/domain/value_objects.py +87 -135
- kodit/infrastructure/api/client/__init__.py +0 -2
- kodit/infrastructure/api/v1/__init__.py +0 -4
- kodit/infrastructure/api/v1/dependencies.py +92 -46
- kodit/infrastructure/api/v1/routers/__init__.py +0 -6
- kodit/infrastructure/api/v1/routers/commits.py +352 -0
- kodit/infrastructure/api/v1/routers/queue.py +2 -2
- kodit/infrastructure/api/v1/routers/repositories.py +282 -0
- kodit/infrastructure/api/v1/routers/search.py +31 -14
- kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
- kodit/infrastructure/api/v1/schemas/commit.py +96 -0
- kodit/infrastructure/api/v1/schemas/context.py +2 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/api/v1/schemas/repository.py +128 -0
- kodit/infrastructure/api/v1/schemas/search.py +12 -9
- kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
- kodit/infrastructure/api/v1/schemas/tag.py +31 -0
- kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
- kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
- kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
- kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
- kodit/infrastructure/cloning/git/working_copy.py +1 -1
- kodit/infrastructure/embedding/embedding_factory.py +3 -2
- kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
- kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/indexing/fusion_service.py +1 -1
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/git_mapper.py +193 -0
- kodit/infrastructure/mappers/snippet_mapper.py +104 -0
- kodit/infrastructure/mappers/task_mapper.py +5 -44
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/reporting/log_progress.py +8 -5
- kodit/infrastructure/reporting/telemetry_progress.py +21 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +87 -421
- kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +402 -158
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
- kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
- kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
- kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
- kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
- kodit/mcp.py +12 -30
- kodit/migrations/env.py +1 -0
- kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
- kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
- kodit/py.typed +0 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +6 -4
- kodit/utils/path_utils.py +29 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
- kodit-0.5.1.dist-info/RECORD +168 -0
- kodit/application/factories/code_indexing_factory.py +0 -195
- kodit/application/services/auto_indexing_service.py +0 -99
- kodit/application/services/code_indexing_application_service.py +0 -410
- kodit/domain/services/index_query_service.py +0 -70
- kodit/domain/services/index_service.py +0 -269
- kodit/infrastructure/api/client/index_client.py +0 -57
- kodit/infrastructure/api/v1/routers/indexes.py +0 -164
- kodit/infrastructure/api/v1/schemas/index.py +0 -101
- kodit/infrastructure/bm25/bm25_factory.py +0 -28
- kodit/infrastructure/cloning/__init__.py +0 -1
- kodit/infrastructure/cloning/metadata.py +0 -98
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- kodit/infrastructure/mappers/index_mapper.py +0 -345
- kodit/infrastructure/reporting/tdqm_progress.py +0 -38
- kodit/infrastructure/slicing/language_detection_service.py +0 -18
- kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
- kodit-0.4.3.dist-info/RECORD +0 -125
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
- {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1128 @@
|
|
|
1
|
+
"""AST analyzer for extracting code definitions across multiple languages.
|
|
2
|
+
|
|
3
|
+
This module provides language-agnostic AST parsing and analysis using tree-sitter.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from collections.abc import Generator
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, ClassVar
|
|
10
|
+
|
|
11
|
+
import structlog
|
|
12
|
+
from tree_sitter import Node, Parser, Tree
|
|
13
|
+
from tree_sitter_language_pack import get_language
|
|
14
|
+
|
|
15
|
+
from kodit.domain.entities.git import GitFile
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LanguageConfig:
|
|
19
|
+
"""Language-specific configuration."""
|
|
20
|
+
|
|
21
|
+
CONFIGS: ClassVar[dict[str, dict[str, Any]]] = {
|
|
22
|
+
"python": {
|
|
23
|
+
"function_nodes": ["function_definition"],
|
|
24
|
+
"method_nodes": [],
|
|
25
|
+
"call_node": "call",
|
|
26
|
+
"import_nodes": ["import_statement", "import_from_statement"],
|
|
27
|
+
"extension": ".py",
|
|
28
|
+
"name_field": None, # Use identifier child
|
|
29
|
+
},
|
|
30
|
+
"java": {
|
|
31
|
+
"function_nodes": ["method_declaration"],
|
|
32
|
+
"method_nodes": [],
|
|
33
|
+
"call_node": "method_invocation",
|
|
34
|
+
"import_nodes": ["import_declaration"],
|
|
35
|
+
"extension": ".java",
|
|
36
|
+
"name_field": None,
|
|
37
|
+
},
|
|
38
|
+
"c": {
|
|
39
|
+
"function_nodes": ["function_definition"],
|
|
40
|
+
"method_nodes": [],
|
|
41
|
+
"call_node": "call_expression",
|
|
42
|
+
"import_nodes": ["preproc_include"],
|
|
43
|
+
"extension": ".c",
|
|
44
|
+
"name_field": "declarator",
|
|
45
|
+
},
|
|
46
|
+
"cpp": {
|
|
47
|
+
"function_nodes": ["function_definition"],
|
|
48
|
+
"method_nodes": [],
|
|
49
|
+
"call_node": "call_expression",
|
|
50
|
+
"import_nodes": ["preproc_include", "using_declaration"],
|
|
51
|
+
"extension": ".cpp",
|
|
52
|
+
"name_field": "declarator",
|
|
53
|
+
},
|
|
54
|
+
"rust": {
|
|
55
|
+
"function_nodes": ["function_item"],
|
|
56
|
+
"method_nodes": [],
|
|
57
|
+
"call_node": "call_expression",
|
|
58
|
+
"import_nodes": ["use_declaration", "extern_crate_declaration"],
|
|
59
|
+
"extension": ".rs",
|
|
60
|
+
"name_field": "name",
|
|
61
|
+
},
|
|
62
|
+
"go": {
|
|
63
|
+
"function_nodes": ["function_declaration"],
|
|
64
|
+
"method_nodes": ["method_declaration"],
|
|
65
|
+
"call_node": "call_expression",
|
|
66
|
+
"import_nodes": ["import_declaration"],
|
|
67
|
+
"extension": ".go",
|
|
68
|
+
"name_field": None,
|
|
69
|
+
},
|
|
70
|
+
"javascript": {
|
|
71
|
+
"function_nodes": [
|
|
72
|
+
"function_declaration",
|
|
73
|
+
"function_expression",
|
|
74
|
+
"arrow_function",
|
|
75
|
+
],
|
|
76
|
+
"method_nodes": [],
|
|
77
|
+
"call_node": "call_expression",
|
|
78
|
+
"import_nodes": ["import_statement", "import_declaration"],
|
|
79
|
+
"extension": ".js",
|
|
80
|
+
"name_field": None,
|
|
81
|
+
},
|
|
82
|
+
"csharp": {
|
|
83
|
+
"function_nodes": ["method_declaration"],
|
|
84
|
+
"method_nodes": ["constructor_declaration"],
|
|
85
|
+
"call_node": "invocation_expression",
|
|
86
|
+
"import_nodes": ["using_directive"],
|
|
87
|
+
"extension": ".cs",
|
|
88
|
+
"name_field": None,
|
|
89
|
+
},
|
|
90
|
+
"html": {
|
|
91
|
+
"function_nodes": ["script_element", "style_element"],
|
|
92
|
+
"method_nodes": ["element"], # Elements with id/class attributes
|
|
93
|
+
"call_node": "attribute",
|
|
94
|
+
"import_nodes": ["script_element", "element"], # script and link elements
|
|
95
|
+
"extension": ".html",
|
|
96
|
+
"name_field": None,
|
|
97
|
+
},
|
|
98
|
+
"css": {
|
|
99
|
+
"function_nodes": ["rule_set", "keyframes_statement"],
|
|
100
|
+
"method_nodes": ["media_statement"],
|
|
101
|
+
"call_node": "call_expression",
|
|
102
|
+
"import_nodes": ["import_statement"],
|
|
103
|
+
"extension": ".css",
|
|
104
|
+
"name_field": None,
|
|
105
|
+
},
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
# Aliases
|
|
109
|
+
CONFIGS["c++"] = CONFIGS["cpp"]
|
|
110
|
+
CONFIGS["typescript"] = CONFIGS["javascript"]
|
|
111
|
+
CONFIGS["ts"] = CONFIGS["javascript"]
|
|
112
|
+
CONFIGS["js"] = CONFIGS["javascript"]
|
|
113
|
+
CONFIGS["c#"] = CONFIGS["csharp"]
|
|
114
|
+
CONFIGS["cs"] = CONFIGS["csharp"]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class ParsedFile:
|
|
119
|
+
"""Result of parsing a single file with tree-sitter."""
|
|
120
|
+
|
|
121
|
+
path: Path
|
|
122
|
+
git_file: GitFile
|
|
123
|
+
tree: Tree
|
|
124
|
+
source_code: bytes
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@dataclass
|
|
128
|
+
class FunctionDefinition:
|
|
129
|
+
"""Information about a function or method definition."""
|
|
130
|
+
|
|
131
|
+
file: Path
|
|
132
|
+
node: Node
|
|
133
|
+
span: tuple[int, int]
|
|
134
|
+
qualified_name: str
|
|
135
|
+
simple_name: str
|
|
136
|
+
is_public: bool
|
|
137
|
+
is_method: bool
|
|
138
|
+
docstring: str | None
|
|
139
|
+
parameters: list[str]
|
|
140
|
+
return_type: str | None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dataclass
|
|
144
|
+
class ClassDefinition:
|
|
145
|
+
"""Information about a class definition."""
|
|
146
|
+
|
|
147
|
+
file: Path
|
|
148
|
+
node: Node
|
|
149
|
+
span: tuple[int, int]
|
|
150
|
+
qualified_name: str
|
|
151
|
+
simple_name: str
|
|
152
|
+
is_public: bool
|
|
153
|
+
docstring: str | None
|
|
154
|
+
methods: list[FunctionDefinition]
|
|
155
|
+
base_classes: list[str]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass
|
|
159
|
+
class TypeDefinition:
|
|
160
|
+
"""Information about a type definition (enum, interface, type alias)."""
|
|
161
|
+
|
|
162
|
+
file: Path
|
|
163
|
+
node: Node
|
|
164
|
+
span: tuple[int, int]
|
|
165
|
+
qualified_name: str
|
|
166
|
+
simple_name: str
|
|
167
|
+
is_public: bool
|
|
168
|
+
docstring: str | None
|
|
169
|
+
kind: str
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@dataclass
|
|
173
|
+
class ModuleDefinition:
|
|
174
|
+
"""All definitions in a module, grouped by language conventions."""
|
|
175
|
+
|
|
176
|
+
module_path: str
|
|
177
|
+
files: list[ParsedFile]
|
|
178
|
+
functions: list[FunctionDefinition]
|
|
179
|
+
classes: list[ClassDefinition]
|
|
180
|
+
types: list[TypeDefinition]
|
|
181
|
+
constants: list[tuple[str, Node]]
|
|
182
|
+
module_docstring: str | None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class ASTAnalyzer:
|
|
186
|
+
"""Language-agnostic AST analyzer.
|
|
187
|
+
|
|
188
|
+
Parses files with tree-sitter and extracts structured information about
|
|
189
|
+
definitions (functions, classes, types). Used by both Slicer (for code
|
|
190
|
+
snippets) and other consumers (e.g., API documentation extraction, module
|
|
191
|
+
hierarchy analysis).
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
def __init__(self, language: str) -> None:
|
|
195
|
+
"""Initialize analyzer for a specific language."""
|
|
196
|
+
self.language = language.lower()
|
|
197
|
+
config = LanguageConfig.CONFIGS.get(self.language)
|
|
198
|
+
if not config:
|
|
199
|
+
raise ValueError(f"Unsupported language: {language}")
|
|
200
|
+
self.config = config
|
|
201
|
+
|
|
202
|
+
ts_language = get_language(self._get_tree_sitter_name()) # type: ignore[arg-type]
|
|
203
|
+
self.parser = Parser(ts_language)
|
|
204
|
+
self.log = structlog.get_logger(__name__)
|
|
205
|
+
|
|
206
|
+
def parse_files(self, files: list[GitFile]) -> list[ParsedFile]:
|
|
207
|
+
"""Parse files into AST trees."""
|
|
208
|
+
parsed = []
|
|
209
|
+
for git_file in files:
|
|
210
|
+
path = Path(git_file.path)
|
|
211
|
+
if not path.exists():
|
|
212
|
+
self.log.debug("Skipping non-existent file", path=str(path))
|
|
213
|
+
continue
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
with path.open("rb") as f:
|
|
217
|
+
source_code = f.read()
|
|
218
|
+
|
|
219
|
+
tree = self.parser.parse(source_code)
|
|
220
|
+
parsed.append(
|
|
221
|
+
ParsedFile(
|
|
222
|
+
path=path,
|
|
223
|
+
git_file=git_file,
|
|
224
|
+
tree=tree,
|
|
225
|
+
source_code=source_code,
|
|
226
|
+
)
|
|
227
|
+
)
|
|
228
|
+
except OSError as e:
|
|
229
|
+
self.log.warning("Failed to parse file", path=str(path), error=str(e))
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
return parsed
|
|
233
|
+
|
|
234
|
+
def extract_definitions(
|
|
235
|
+
self,
|
|
236
|
+
parsed_files: list[ParsedFile],
|
|
237
|
+
*,
|
|
238
|
+
include_private: bool = True,
|
|
239
|
+
) -> tuple[list[FunctionDefinition], list[ClassDefinition], list[TypeDefinition]]:
|
|
240
|
+
"""Extract all definitions from parsed files."""
|
|
241
|
+
functions = []
|
|
242
|
+
classes = []
|
|
243
|
+
types = []
|
|
244
|
+
|
|
245
|
+
for parsed in parsed_files:
|
|
246
|
+
functions.extend(
|
|
247
|
+
self._extract_functions(parsed, include_private=include_private)
|
|
248
|
+
)
|
|
249
|
+
classes.extend(
|
|
250
|
+
self._extract_classes(parsed, include_private=include_private)
|
|
251
|
+
)
|
|
252
|
+
types.extend(
|
|
253
|
+
self._extract_types(parsed, include_private=include_private)
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
return functions, classes, types
|
|
257
|
+
|
|
258
|
+
def extract_module_definitions(
|
|
259
|
+
self, parsed_files: list[ParsedFile], *, include_private: bool = False
|
|
260
|
+
) -> list[ModuleDefinition]:
|
|
261
|
+
"""Extract definitions grouped by module."""
|
|
262
|
+
modules = self._group_by_module(parsed_files)
|
|
263
|
+
|
|
264
|
+
result = []
|
|
265
|
+
for module_files in modules.values():
|
|
266
|
+
functions = []
|
|
267
|
+
classes = []
|
|
268
|
+
types = []
|
|
269
|
+
constants = []
|
|
270
|
+
|
|
271
|
+
for parsed in module_files:
|
|
272
|
+
functions.extend(
|
|
273
|
+
self._extract_functions(parsed, include_private=include_private)
|
|
274
|
+
)
|
|
275
|
+
classes.extend(
|
|
276
|
+
self._extract_classes(parsed, include_private=include_private)
|
|
277
|
+
)
|
|
278
|
+
types.extend(
|
|
279
|
+
self._extract_types(parsed, include_private=include_private)
|
|
280
|
+
)
|
|
281
|
+
constants.extend(
|
|
282
|
+
self._extract_constants(parsed, include_private=include_private)
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
module_doc = self._extract_module_docstring(module_files)
|
|
286
|
+
|
|
287
|
+
# Extract the actual module path from the file using Tree-sitter
|
|
288
|
+
module_path = self._extract_module_path(module_files[0])
|
|
289
|
+
|
|
290
|
+
result.append(
|
|
291
|
+
ModuleDefinition(
|
|
292
|
+
module_path=module_path,
|
|
293
|
+
files=module_files,
|
|
294
|
+
functions=functions,
|
|
295
|
+
classes=classes,
|
|
296
|
+
types=types,
|
|
297
|
+
constants=constants,
|
|
298
|
+
module_docstring=module_doc,
|
|
299
|
+
)
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
return result
|
|
303
|
+
|
|
304
|
+
def _get_tree_sitter_name(self) -> str:
|
|
305
|
+
"""Map language name to tree-sitter language name."""
|
|
306
|
+
mapping = {
|
|
307
|
+
"c++": "cpp",
|
|
308
|
+
"c#": "csharp",
|
|
309
|
+
"cs": "csharp",
|
|
310
|
+
"js": "javascript",
|
|
311
|
+
"ts": "typescript",
|
|
312
|
+
}
|
|
313
|
+
return mapping.get(self.language, self.language)
|
|
314
|
+
|
|
315
|
+
def _walk_tree(self, node: Node) -> Generator[Node, None, None]:
|
|
316
|
+
"""Walk the AST tree, yielding all nodes."""
|
|
317
|
+
queue = [node]
|
|
318
|
+
visited: set[int] = set()
|
|
319
|
+
|
|
320
|
+
while queue:
|
|
321
|
+
current = queue.pop(0)
|
|
322
|
+
node_id = id(current)
|
|
323
|
+
if node_id in visited:
|
|
324
|
+
continue
|
|
325
|
+
visited.add(node_id)
|
|
326
|
+
|
|
327
|
+
yield current
|
|
328
|
+
queue.extend(current.children)
|
|
329
|
+
|
|
330
|
+
def _is_function_definition(self, node: Node) -> bool:
|
|
331
|
+
"""Check if node is a function definition."""
|
|
332
|
+
return node.type in (
|
|
333
|
+
self.config["function_nodes"] + self.config["method_nodes"]
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
def _extract_function_name(self, node: Node) -> str | None:
|
|
337
|
+
"""Extract function name from a function definition node."""
|
|
338
|
+
if self.language == "html":
|
|
339
|
+
return self._extract_html_element_name(node)
|
|
340
|
+
if self.language == "css":
|
|
341
|
+
return self._extract_css_rule_name(node)
|
|
342
|
+
if self.language == "go" and node.type == "method_declaration":
|
|
343
|
+
return self._extract_go_method_name(node)
|
|
344
|
+
if self.language in ["c", "cpp"] and self.config["name_field"]:
|
|
345
|
+
return self._extract_c_cpp_function_name(node)
|
|
346
|
+
if self.language == "rust" and self.config["name_field"]:
|
|
347
|
+
return self._extract_rust_function_name(node)
|
|
348
|
+
return self._extract_default_function_name(node)
|
|
349
|
+
|
|
350
|
+
def _extract_go_method_name(self, node: Node) -> str | None:
|
|
351
|
+
"""Extract method name from Go method declaration."""
|
|
352
|
+
for child in node.children:
|
|
353
|
+
if child.type == "field_identifier" and child.text is not None:
|
|
354
|
+
return child.text.decode("utf-8")
|
|
355
|
+
return None
|
|
356
|
+
|
|
357
|
+
def _extract_c_cpp_function_name(self, node: Node) -> str | None:
|
|
358
|
+
"""Extract function name from C/C++ function definition."""
|
|
359
|
+
declarator = node.child_by_field_name(self.config["name_field"])
|
|
360
|
+
if not declarator:
|
|
361
|
+
return None
|
|
362
|
+
|
|
363
|
+
if declarator.type == "function_declarator":
|
|
364
|
+
for child in declarator.children:
|
|
365
|
+
if child.type == "identifier" and child.text is not None:
|
|
366
|
+
return child.text.decode("utf-8")
|
|
367
|
+
elif declarator.type == "identifier" and declarator.text is not None:
|
|
368
|
+
return declarator.text.decode("utf-8")
|
|
369
|
+
return None
|
|
370
|
+
|
|
371
|
+
def _extract_rust_function_name(self, node: Node) -> str | None:
|
|
372
|
+
"""Extract function name from Rust function definition."""
|
|
373
|
+
name_node = node.child_by_field_name(self.config["name_field"])
|
|
374
|
+
if name_node and name_node.type == "identifier" and name_node.text is not None:
|
|
375
|
+
return name_node.text.decode("utf-8")
|
|
376
|
+
return None
|
|
377
|
+
|
|
378
|
+
def _extract_html_element_name(self, node: Node) -> str | None:
|
|
379
|
+
"""Extract meaningful name from HTML element."""
|
|
380
|
+
if node.type == "script_element":
|
|
381
|
+
return "script"
|
|
382
|
+
if node.type == "style_element":
|
|
383
|
+
return "style"
|
|
384
|
+
if node.type == "element":
|
|
385
|
+
return self._extract_html_element_info(node)
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
def _extract_html_element_info(self, node: Node) -> str | None:
|
|
389
|
+
"""Extract element info with ID or class."""
|
|
390
|
+
for child in node.children:
|
|
391
|
+
if child.type == "start_tag":
|
|
392
|
+
tag_name = self._get_tag_name(child)
|
|
393
|
+
element_id = self._get_element_id(child)
|
|
394
|
+
class_name = self._get_element_class(child)
|
|
395
|
+
|
|
396
|
+
if element_id:
|
|
397
|
+
return f"{tag_name or 'element'}#{element_id}"
|
|
398
|
+
if class_name:
|
|
399
|
+
return f"{tag_name or 'element'}.{class_name}"
|
|
400
|
+
if tag_name:
|
|
401
|
+
return tag_name
|
|
402
|
+
return None
|
|
403
|
+
|
|
404
|
+
def _get_tag_name(self, start_tag: Node) -> str | None:
|
|
405
|
+
"""Get tag name from start_tag node."""
|
|
406
|
+
for child in start_tag.children:
|
|
407
|
+
if child.type == "tag_name" and child.text:
|
|
408
|
+
try:
|
|
409
|
+
return child.text.decode("utf-8")
|
|
410
|
+
except UnicodeDecodeError:
|
|
411
|
+
return None
|
|
412
|
+
return None
|
|
413
|
+
|
|
414
|
+
def _get_element_id(self, start_tag: Node) -> str | None:
|
|
415
|
+
"""Get element ID from start_tag node."""
|
|
416
|
+
return self._get_attribute_value(start_tag, "id")
|
|
417
|
+
|
|
418
|
+
def _get_element_class(self, start_tag: Node) -> str | None:
|
|
419
|
+
"""Get first class name from start_tag node."""
|
|
420
|
+
class_value = self._get_attribute_value(start_tag, "class")
|
|
421
|
+
return class_value.split()[0] if class_value else None
|
|
422
|
+
|
|
423
|
+
def _get_attribute_value(self, start_tag: Node, attr_name: str) -> str | None:
|
|
424
|
+
"""Get attribute value from start_tag node."""
|
|
425
|
+
for child in start_tag.children:
|
|
426
|
+
if child.type == "attribute":
|
|
427
|
+
name = self._get_attr_name(child)
|
|
428
|
+
if name == attr_name:
|
|
429
|
+
return self._get_attr_value(child)
|
|
430
|
+
return None
|
|
431
|
+
|
|
432
|
+
def _get_attr_name(self, attr_node: Node) -> str | None:
|
|
433
|
+
"""Get attribute name."""
|
|
434
|
+
for child in attr_node.children:
|
|
435
|
+
if child.type == "attribute_name" and child.text:
|
|
436
|
+
try:
|
|
437
|
+
return child.text.decode("utf-8")
|
|
438
|
+
except UnicodeDecodeError:
|
|
439
|
+
return None
|
|
440
|
+
return None
|
|
441
|
+
|
|
442
|
+
def _get_attr_value(self, attr_node: Node) -> str | None:
|
|
443
|
+
"""Get attribute value."""
|
|
444
|
+
for child in attr_node.children:
|
|
445
|
+
if child.type == "quoted_attribute_value":
|
|
446
|
+
for val_child in child.children:
|
|
447
|
+
if val_child.type == "attribute_value" and val_child.text:
|
|
448
|
+
try:
|
|
449
|
+
return val_child.text.decode("utf-8")
|
|
450
|
+
except UnicodeDecodeError:
|
|
451
|
+
return None
|
|
452
|
+
return None
|
|
453
|
+
|
|
454
|
+
def _extract_css_rule_name(self, node: Node) -> str | None:
|
|
455
|
+
"""Extract meaningful name from CSS rule."""
|
|
456
|
+
if node.type == "rule_set":
|
|
457
|
+
return self._extract_css_selector(node)
|
|
458
|
+
if node.type == "keyframes_statement":
|
|
459
|
+
return self._extract_keyframes_name(node)
|
|
460
|
+
if node.type == "media_statement":
|
|
461
|
+
return "@media"
|
|
462
|
+
return None
|
|
463
|
+
|
|
464
|
+
def _extract_css_selector(self, rule_node: Node) -> str | None:
|
|
465
|
+
"""Extract CSS selector from rule_set."""
|
|
466
|
+
for child in rule_node.children:
|
|
467
|
+
if child.type == "selectors":
|
|
468
|
+
selector_parts = []
|
|
469
|
+
for selector_child in child.children:
|
|
470
|
+
part = self._get_selector_part(selector_child)
|
|
471
|
+
if part:
|
|
472
|
+
selector_parts.append(part)
|
|
473
|
+
if selector_parts:
|
|
474
|
+
return "".join(selector_parts[:2])
|
|
475
|
+
return None
|
|
476
|
+
|
|
477
|
+
def _get_selector_part(self, selector_node: Node) -> str | None:
|
|
478
|
+
"""Get a single selector part."""
|
|
479
|
+
if selector_node.type == "class_selector":
|
|
480
|
+
return self._extract_class_selector(selector_node)
|
|
481
|
+
if selector_node.type == "id_selector":
|
|
482
|
+
return self._extract_id_selector(selector_node)
|
|
483
|
+
if selector_node.type == "type_selector" and selector_node.text:
|
|
484
|
+
return selector_node.text.decode("utf-8")
|
|
485
|
+
return None
|
|
486
|
+
|
|
487
|
+
def _extract_class_selector(self, node: Node) -> str | None:
|
|
488
|
+
"""Extract class selector name."""
|
|
489
|
+
for child in node.children:
|
|
490
|
+
if child.type == "class_name":
|
|
491
|
+
for name_child in child.children:
|
|
492
|
+
if name_child.type == "identifier" and name_child.text:
|
|
493
|
+
return f".{name_child.text.decode('utf-8')}"
|
|
494
|
+
return None
|
|
495
|
+
|
|
496
|
+
def _extract_id_selector(self, node: Node) -> str | None:
|
|
497
|
+
"""Extract ID selector name."""
|
|
498
|
+
for child in node.children:
|
|
499
|
+
if child.type == "id_name":
|
|
500
|
+
for name_child in child.children:
|
|
501
|
+
if name_child.type == "identifier" and name_child.text:
|
|
502
|
+
return f"#{name_child.text.decode('utf-8')}"
|
|
503
|
+
return None
|
|
504
|
+
|
|
505
|
+
def _extract_keyframes_name(self, node: Node) -> str | None:
|
|
506
|
+
"""Extract keyframes animation name."""
|
|
507
|
+
for child in node.children:
|
|
508
|
+
if child.type == "keyframes_name" and child.text:
|
|
509
|
+
return f"@keyframes-{child.text.decode('utf-8')}"
|
|
510
|
+
return None
|
|
511
|
+
|
|
512
|
+
def _extract_default_function_name(self, node: Node) -> str | None:
|
|
513
|
+
"""Extract function name using default identifier search."""
|
|
514
|
+
for child in node.children:
|
|
515
|
+
if child.type == "identifier" and child.text is not None:
|
|
516
|
+
return child.text.decode("utf-8")
|
|
517
|
+
return None
|
|
518
|
+
|
|
519
|
+
def _qualify_name(self, node: Node, file_path: Path) -> str | None:
|
|
520
|
+
"""Create qualified name for a function node."""
|
|
521
|
+
function_name = self._extract_function_name(node)
|
|
522
|
+
if not function_name:
|
|
523
|
+
return None
|
|
524
|
+
|
|
525
|
+
module_name = file_path.stem
|
|
526
|
+
return f"{module_name}.{function_name}"
|
|
527
|
+
|
|
528
|
+
def _extract_functions(
|
|
529
|
+
self, parsed: ParsedFile, *, include_private: bool
|
|
530
|
+
) -> list[FunctionDefinition]:
|
|
531
|
+
"""Extract function definitions from a parsed file."""
|
|
532
|
+
functions = []
|
|
533
|
+
|
|
534
|
+
for node in self._walk_tree(parsed.tree.root_node):
|
|
535
|
+
if self._is_function_definition(node):
|
|
536
|
+
qualified_name = self._qualify_name(node, parsed.path)
|
|
537
|
+
if not qualified_name:
|
|
538
|
+
continue
|
|
539
|
+
|
|
540
|
+
simple_name = self._extract_function_name(node)
|
|
541
|
+
if not simple_name:
|
|
542
|
+
continue
|
|
543
|
+
|
|
544
|
+
is_public = self._is_public(node, simple_name)
|
|
545
|
+
if not include_private and not is_public:
|
|
546
|
+
continue
|
|
547
|
+
|
|
548
|
+
span = (node.start_byte, node.end_byte)
|
|
549
|
+
docstring = self._extract_docstring(node)
|
|
550
|
+
parameters = self._extract_parameters(node)
|
|
551
|
+
return_type = self._extract_return_type(node)
|
|
552
|
+
is_method = self._is_method(node)
|
|
553
|
+
|
|
554
|
+
functions.append(
|
|
555
|
+
FunctionDefinition(
|
|
556
|
+
file=parsed.path,
|
|
557
|
+
node=node,
|
|
558
|
+
span=span,
|
|
559
|
+
qualified_name=qualified_name,
|
|
560
|
+
simple_name=simple_name,
|
|
561
|
+
is_public=is_public,
|
|
562
|
+
is_method=is_method,
|
|
563
|
+
docstring=docstring,
|
|
564
|
+
parameters=parameters,
|
|
565
|
+
return_type=return_type,
|
|
566
|
+
)
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
return functions
|
|
570
|
+
|
|
571
|
+
def _extract_classes(
|
|
572
|
+
self, parsed: ParsedFile, *, include_private: bool
|
|
573
|
+
) -> list[ClassDefinition]:
|
|
574
|
+
"""Extract class definitions with their methods."""
|
|
575
|
+
if self.language == "python":
|
|
576
|
+
return self._extract_python_classes(parsed, include_private=include_private)
|
|
577
|
+
# For other languages, not yet implemented
|
|
578
|
+
return []
|
|
579
|
+
|
|
580
|
+
def _extract_python_classes(
|
|
581
|
+
self, parsed: ParsedFile, *, include_private: bool
|
|
582
|
+
) -> list[ClassDefinition]:
|
|
583
|
+
"""Extract Python class definitions."""
|
|
584
|
+
classes = []
|
|
585
|
+
|
|
586
|
+
for node in self._walk_tree(parsed.tree.root_node):
|
|
587
|
+
if node.type == "class_definition":
|
|
588
|
+
# Extract class name
|
|
589
|
+
class_name = None
|
|
590
|
+
for child in node.children:
|
|
591
|
+
if child.type == "identifier" and child.text:
|
|
592
|
+
class_name = child.text.decode("utf-8")
|
|
593
|
+
break
|
|
594
|
+
|
|
595
|
+
if not class_name:
|
|
596
|
+
continue
|
|
597
|
+
|
|
598
|
+
# Check if public
|
|
599
|
+
is_public = self._is_public(node, class_name)
|
|
600
|
+
if not include_private and not is_public:
|
|
601
|
+
continue
|
|
602
|
+
|
|
603
|
+
# Extract docstring
|
|
604
|
+
docstring = self._extract_docstring(node)
|
|
605
|
+
|
|
606
|
+
# Extract methods (functions defined inside the class)
|
|
607
|
+
methods = self._extract_class_methods(node, parsed, include_private)
|
|
608
|
+
|
|
609
|
+
# Extract base classes
|
|
610
|
+
base_classes = self._extract_base_classes(node)
|
|
611
|
+
|
|
612
|
+
qualified_name = f"{parsed.path.stem}.{class_name}"
|
|
613
|
+
span = (node.start_byte, node.end_byte)
|
|
614
|
+
|
|
615
|
+
classes.append(
|
|
616
|
+
ClassDefinition(
|
|
617
|
+
file=parsed.path,
|
|
618
|
+
node=node,
|
|
619
|
+
span=span,
|
|
620
|
+
qualified_name=qualified_name,
|
|
621
|
+
simple_name=class_name,
|
|
622
|
+
is_public=is_public,
|
|
623
|
+
docstring=docstring,
|
|
624
|
+
methods=methods,
|
|
625
|
+
base_classes=base_classes,
|
|
626
|
+
)
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
return classes
|
|
630
|
+
|
|
631
|
+
def _extract_class_methods( # noqa: C901
|
|
632
|
+
self, class_node: Node, parsed: ParsedFile, include_private: bool # noqa: FBT001
|
|
633
|
+
) -> list[FunctionDefinition]:
|
|
634
|
+
"""Extract methods from a class definition."""
|
|
635
|
+
methods = []
|
|
636
|
+
|
|
637
|
+
# Find the block (class body)
|
|
638
|
+
for child in class_node.children:
|
|
639
|
+
if child.type == "block":
|
|
640
|
+
# Look for function_definition nodes in the block
|
|
641
|
+
for block_child in child.children:
|
|
642
|
+
if block_child.type == "function_definition":
|
|
643
|
+
method_name = None
|
|
644
|
+
for func_child in block_child.children:
|
|
645
|
+
if func_child.type == "identifier" and func_child.text:
|
|
646
|
+
method_name = func_child.text.decode("utf-8")
|
|
647
|
+
break
|
|
648
|
+
|
|
649
|
+
if not method_name:
|
|
650
|
+
continue
|
|
651
|
+
|
|
652
|
+
# Check if public
|
|
653
|
+
is_public = self._is_public(block_child, method_name)
|
|
654
|
+
if not include_private and not is_public:
|
|
655
|
+
continue
|
|
656
|
+
|
|
657
|
+
# Extract docstring
|
|
658
|
+
docstring = self._extract_docstring(block_child)
|
|
659
|
+
|
|
660
|
+
# Get class name for qualified name
|
|
661
|
+
class_name = None
|
|
662
|
+
for class_child in class_node.children:
|
|
663
|
+
if class_child.type == "identifier" and class_child.text:
|
|
664
|
+
class_name = class_child.text.decode("utf-8")
|
|
665
|
+
break
|
|
666
|
+
|
|
667
|
+
qualified_name = (
|
|
668
|
+
f"{parsed.path.stem}.{class_name}.{method_name}"
|
|
669
|
+
)
|
|
670
|
+
span = (block_child.start_byte, block_child.end_byte)
|
|
671
|
+
|
|
672
|
+
methods.append(
|
|
673
|
+
FunctionDefinition(
|
|
674
|
+
file=parsed.path,
|
|
675
|
+
node=block_child,
|
|
676
|
+
span=span,
|
|
677
|
+
qualified_name=qualified_name,
|
|
678
|
+
simple_name=method_name,
|
|
679
|
+
is_public=is_public,
|
|
680
|
+
is_method=True,
|
|
681
|
+
docstring=docstring,
|
|
682
|
+
parameters=[],
|
|
683
|
+
return_type=None,
|
|
684
|
+
)
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
return methods
|
|
688
|
+
|
|
689
|
+
def _extract_base_classes(self, class_node: Node) -> list[str]:
|
|
690
|
+
"""Extract base class names from a class definition."""
|
|
691
|
+
base_classes: list[str] = []
|
|
692
|
+
|
|
693
|
+
# Look for argument_list (the inheritance list in Python)
|
|
694
|
+
for child in class_node.children:
|
|
695
|
+
if child.type == "argument_list":
|
|
696
|
+
base_classes.extend(
|
|
697
|
+
arg_child.text.decode("utf-8")
|
|
698
|
+
for arg_child in child.children
|
|
699
|
+
if arg_child.type == "identifier" and arg_child.text
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
return base_classes
|
|
703
|
+
|
|
704
|
+
def _extract_types(
|
|
705
|
+
self, parsed: ParsedFile, *, include_private: bool
|
|
706
|
+
) -> list[TypeDefinition]:
|
|
707
|
+
"""Extract type definitions (enums, interfaces, type aliases, structs)."""
|
|
708
|
+
if self.language == "go":
|
|
709
|
+
return self._extract_go_types(parsed, include_private=include_private)
|
|
710
|
+
# For other languages, not yet implemented
|
|
711
|
+
return []
|
|
712
|
+
|
|
713
|
+
def _extract_go_types(
|
|
714
|
+
self, parsed: ParsedFile, *, include_private: bool
|
|
715
|
+
) -> list[TypeDefinition]:
|
|
716
|
+
"""Extract Go type definitions."""
|
|
717
|
+
types = []
|
|
718
|
+
|
|
719
|
+
for node in self._walk_tree(parsed.tree.root_node):
|
|
720
|
+
if node.type == "type_declaration":
|
|
721
|
+
# Extract type_spec child which contains the actual type info
|
|
722
|
+
for child in node.children:
|
|
723
|
+
if child.type == "type_spec":
|
|
724
|
+
type_def = self._extract_go_type_from_spec(
|
|
725
|
+
child, parsed, include_private=include_private
|
|
726
|
+
)
|
|
727
|
+
if type_def:
|
|
728
|
+
types.append(type_def)
|
|
729
|
+
|
|
730
|
+
return types
|
|
731
|
+
|
|
732
|
+
def _extract_go_type_from_spec(
|
|
733
|
+
self, type_spec_node: Node, parsed: ParsedFile, *, include_private: bool
|
|
734
|
+
) -> TypeDefinition | None:
|
|
735
|
+
"""Extract a single Go type definition from a type_spec node."""
|
|
736
|
+
# Get type name (type_identifier)
|
|
737
|
+
type_name = None
|
|
738
|
+
type_kind = "type"
|
|
739
|
+
|
|
740
|
+
for child in type_spec_node.children:
|
|
741
|
+
if child.type == "type_identifier" and child.text:
|
|
742
|
+
type_name = child.text.decode("utf-8")
|
|
743
|
+
elif child.type == "struct_type":
|
|
744
|
+
type_kind = "struct"
|
|
745
|
+
elif child.type == "interface_type":
|
|
746
|
+
type_kind = "interface"
|
|
747
|
+
elif child.type in ["slice_type", "array_type", "pointer_type"]:
|
|
748
|
+
type_kind = "alias"
|
|
749
|
+
elif child.type == "map_type":
|
|
750
|
+
type_kind = "map"
|
|
751
|
+
|
|
752
|
+
if not type_name:
|
|
753
|
+
return None
|
|
754
|
+
|
|
755
|
+
# Check if public
|
|
756
|
+
is_public = self._is_public(type_spec_node, type_name)
|
|
757
|
+
if not include_private and not is_public:
|
|
758
|
+
return None
|
|
759
|
+
|
|
760
|
+
# Extract docstring (comment before the type declaration)
|
|
761
|
+
parent_node = type_spec_node.parent
|
|
762
|
+
if not parent_node:
|
|
763
|
+
return None
|
|
764
|
+
|
|
765
|
+
docstring = self._extract_go_type_comment(parent_node)
|
|
766
|
+
|
|
767
|
+
qualified_name = f"{parsed.path.stem}.{type_name}"
|
|
768
|
+
# Use the parent type_declaration node to include the "type" keyword
|
|
769
|
+
span = (parent_node.start_byte, parent_node.end_byte)
|
|
770
|
+
|
|
771
|
+
return TypeDefinition(
|
|
772
|
+
file=parsed.path,
|
|
773
|
+
node=parent_node, # Use parent to include "type" keyword
|
|
774
|
+
span=span,
|
|
775
|
+
qualified_name=qualified_name,
|
|
776
|
+
simple_name=type_name,
|
|
777
|
+
is_public=is_public,
|
|
778
|
+
docstring=docstring,
|
|
779
|
+
kind=type_kind,
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
def _extract_go_type_comment(self, type_decl_node: Node) -> str | None:
|
|
783
|
+
"""Extract comment before a Go type declaration."""
|
|
784
|
+
# Look for comment node immediately before the type_declaration
|
|
785
|
+
parent = type_decl_node.parent
|
|
786
|
+
if not parent:
|
|
787
|
+
return None
|
|
788
|
+
|
|
789
|
+
# Find the index of the type_declaration in parent's children
|
|
790
|
+
type_decl_index = None
|
|
791
|
+
for i, child in enumerate(parent.children):
|
|
792
|
+
if child == type_decl_node:
|
|
793
|
+
type_decl_index = i
|
|
794
|
+
break
|
|
795
|
+
|
|
796
|
+
if type_decl_index is None or type_decl_index == 0:
|
|
797
|
+
return None
|
|
798
|
+
|
|
799
|
+
# Check the previous sibling
|
|
800
|
+
prev_sibling = parent.children[type_decl_index - 1]
|
|
801
|
+
if prev_sibling.type == "comment" and prev_sibling.text:
|
|
802
|
+
comment_text = prev_sibling.text.decode("utf-8")
|
|
803
|
+
# Remove leading // and whitespace
|
|
804
|
+
return comment_text.lstrip("/").strip()
|
|
805
|
+
|
|
806
|
+
return None
|
|
807
|
+
|
|
808
|
+
def _extract_constants(
|
|
809
|
+
self, parsed: ParsedFile, *, include_private: bool
|
|
810
|
+
) -> list[tuple[str, Node]]:
|
|
811
|
+
"""Extract public constants."""
|
|
812
|
+
_ = parsed, include_private # Mark as intentionally unused for now
|
|
813
|
+
return []
|
|
814
|
+
|
|
815
|
+
def _group_by_module(
|
|
816
|
+
self, parsed_files: list[ParsedFile]
|
|
817
|
+
) -> dict[str, list[ParsedFile]]:
|
|
818
|
+
"""Create one module per file.
|
|
819
|
+
|
|
820
|
+
Each file becomes its own module with a unique key.
|
|
821
|
+
The module_path is extracted separately for display purposes.
|
|
822
|
+
"""
|
|
823
|
+
modules: dict[str, list[ParsedFile]] = {}
|
|
824
|
+
for idx, parsed in enumerate(parsed_files):
|
|
825
|
+
# Use file path + index as unique key to prevent collisions
|
|
826
|
+
# The actual module_path for display is extracted later
|
|
827
|
+
unique_key = f"{parsed.path}#{idx}"
|
|
828
|
+
modules[unique_key] = [parsed]
|
|
829
|
+
return modules
|
|
830
|
+
|
|
831
|
+
def _extract_module_path(self, parsed: ParsedFile) -> str:
|
|
832
|
+
"""Extract module/package path based on language conventions.
|
|
833
|
+
|
|
834
|
+
Uses Tree-sitter to parse package declarations from source code.
|
|
835
|
+
For languages without explicit package declarations (like Python),
|
|
836
|
+
uses the file path structure to build a fully qualified module path.
|
|
837
|
+
"""
|
|
838
|
+
if self.language == "go":
|
|
839
|
+
return self._extract_go_package_path(parsed)
|
|
840
|
+
if self.language == "java":
|
|
841
|
+
return self._extract_java_package_name(parsed)
|
|
842
|
+
if self.language == "python":
|
|
843
|
+
return self._extract_python_module_path(parsed)
|
|
844
|
+
# Default: use file path without extension
|
|
845
|
+
return self._extract_path_based_module(parsed)
|
|
846
|
+
|
|
847
|
+
def _extract_go_package_name(self, parsed: ParsedFile) -> str:
|
|
848
|
+
"""Extract Go package name (last component) from package declaration."""
|
|
849
|
+
root = parsed.tree.root_node
|
|
850
|
+
for child in root.children:
|
|
851
|
+
if child.type == "package_clause":
|
|
852
|
+
for package_child in child.children:
|
|
853
|
+
if (
|
|
854
|
+
package_child.type == "package_identifier"
|
|
855
|
+
and package_child.text
|
|
856
|
+
):
|
|
857
|
+
return package_child.text.decode("utf-8")
|
|
858
|
+
# Fallback to file stem
|
|
859
|
+
return parsed.path.stem
|
|
860
|
+
|
|
861
|
+
def _extract_go_package_path(self, parsed: ParsedFile) -> str:
|
|
862
|
+
"""Extract full Go package path using directory structure.
|
|
863
|
+
|
|
864
|
+
Go packages are identified by their import path, which is
|
|
865
|
+
typically the directory path. The package name is the last component.
|
|
866
|
+
"""
|
|
867
|
+
# Get package name from source
|
|
868
|
+
package_name = self._extract_go_package_name(parsed)
|
|
869
|
+
|
|
870
|
+
# Build path from directory structure
|
|
871
|
+
file_path = Path(parsed.git_file.path)
|
|
872
|
+
clean_path = self._clean_path_for_module(file_path)
|
|
873
|
+
dir_path = clean_path.parent
|
|
874
|
+
|
|
875
|
+
# Convert to Go-style import path (use / separator)
|
|
876
|
+
if str(dir_path) != ".":
|
|
877
|
+
dir_str = str(dir_path).replace("\\", "/")
|
|
878
|
+
# Check if package name is already the last component of the path
|
|
879
|
+
# to avoid duplication like "agent/agent"
|
|
880
|
+
if dir_str.endswith("/" + package_name) or dir_str == package_name:
|
|
881
|
+
return dir_str
|
|
882
|
+
return f"{dir_str}/{package_name}"
|
|
883
|
+
return package_name
|
|
884
|
+
|
|
885
|
+
def _extract_python_module_path(self, parsed: ParsedFile) -> str:
|
|
886
|
+
"""Extract Python module path from file path structure.
|
|
887
|
+
|
|
888
|
+
Python modules are identified by their file path, with / replaced by dots.
|
|
889
|
+
Attempts to extract a clean relative path by removing common prefixes.
|
|
890
|
+
__init__.py files represent the parent directory as a module.
|
|
891
|
+
"""
|
|
892
|
+
file_path = Path(parsed.git_file.path)
|
|
893
|
+
|
|
894
|
+
# Try to make it relative and clean
|
|
895
|
+
clean_path = self._clean_path_for_module(file_path)
|
|
896
|
+
|
|
897
|
+
# For __init__.py, the module is just the directory name
|
|
898
|
+
if clean_path.name == "__init__.py":
|
|
899
|
+
# Just directory parts, no filename
|
|
900
|
+
module_parts = list(clean_path.parts[:-1])
|
|
901
|
+
else:
|
|
902
|
+
# For regular files, include filename
|
|
903
|
+
module_parts = list(clean_path.parts[:-1]) # Get directory parts
|
|
904
|
+
module_parts.append(clean_path.stem) # Add filename without extension
|
|
905
|
+
|
|
906
|
+
# Filter out empty parts and convert to dotted notation
|
|
907
|
+
module_path = ".".join(p for p in module_parts if p and p != ".")
|
|
908
|
+
|
|
909
|
+
# For top-level __init__.py where clean path has no parent directory,
|
|
910
|
+
# use the actual parent directory name from the full path
|
|
911
|
+
if not module_path and clean_path.name == "__init__.py":
|
|
912
|
+
# Get the parent directory from the original file path
|
|
913
|
+
parent_dir = file_path.parent.name
|
|
914
|
+
if parent_dir and parent_dir != ".":
|
|
915
|
+
return parent_dir
|
|
916
|
+
return ""
|
|
917
|
+
|
|
918
|
+
return module_path if module_path else clean_path.stem
|
|
919
|
+
|
|
920
|
+
def _extract_path_based_module(self, parsed: ParsedFile) -> str:
|
|
921
|
+
"""Extract module path based on file path for languages without declarations."""
|
|
922
|
+
file_path = Path(parsed.git_file.path)
|
|
923
|
+
|
|
924
|
+
# Try to make it relative and clean
|
|
925
|
+
clean_path = self._clean_path_for_module(file_path)
|
|
926
|
+
|
|
927
|
+
# Remove extension and convert to module path
|
|
928
|
+
module_parts = list(clean_path.parts[:-1]) # Get directory parts
|
|
929
|
+
module_parts.append(clean_path.stem) # Add filename without extension
|
|
930
|
+
|
|
931
|
+
# Filter out empty parts and convert to dotted notation
|
|
932
|
+
module_path = ".".join(p for p in module_parts if p and p != ".")
|
|
933
|
+
return module_path if module_path else clean_path.stem
|
|
934
|
+
|
|
935
|
+
def _clean_path_for_module(self, file_path: Path) -> Path:
|
|
936
|
+
"""Clean a file path to extract a reasonable module path.
|
|
937
|
+
|
|
938
|
+
Attempts to remove common repository root indicators like 'src',
|
|
939
|
+
'lib', project directories, etc. to get a clean module path that
|
|
940
|
+
represents the full import path a user would use.
|
|
941
|
+
"""
|
|
942
|
+
parts = list(file_path.parts)
|
|
943
|
+
|
|
944
|
+
# If it's already relative, just return it
|
|
945
|
+
if not file_path.is_absolute():
|
|
946
|
+
return file_path
|
|
947
|
+
|
|
948
|
+
# Special case: if this is test data (contains /data/<language>/),
|
|
949
|
+
# return everything after the language directory
|
|
950
|
+
test_languages = {
|
|
951
|
+
"go",
|
|
952
|
+
"python",
|
|
953
|
+
"java",
|
|
954
|
+
"javascript",
|
|
955
|
+
"typescript",
|
|
956
|
+
"c",
|
|
957
|
+
"cpp",
|
|
958
|
+
"rust",
|
|
959
|
+
"csharp",
|
|
960
|
+
}
|
|
961
|
+
for i in range(len(parts) - 1):
|
|
962
|
+
if (
|
|
963
|
+
parts[i] == "data"
|
|
964
|
+
and i + 1 < len(parts)
|
|
965
|
+
and parts[i + 1] in test_languages
|
|
966
|
+
and i + 2 < len(parts)
|
|
967
|
+
):
|
|
968
|
+
# Return everything after the language directory
|
|
969
|
+
return Path(*parts[i + 2 :])
|
|
970
|
+
|
|
971
|
+
# Try to find common source root markers and return everything after them
|
|
972
|
+
common_roots = {"src", "lib", "pkg", "internal", "app"}
|
|
973
|
+
for i, part in enumerate(parts):
|
|
974
|
+
if part in common_roots:
|
|
975
|
+
# Return path from this point forward (after the root marker)
|
|
976
|
+
if i + 1 < len(parts):
|
|
977
|
+
return Path(*parts[i + 1 :])
|
|
978
|
+
return file_path
|
|
979
|
+
|
|
980
|
+
# If no common root found, look for go.mod, package.json, pyproject.toml
|
|
981
|
+
# and return the path relative to that directory
|
|
982
|
+
# For now, return everything after the last "src-like" directory
|
|
983
|
+
# or just the filename if nothing found
|
|
984
|
+
if len(parts) >= 2:
|
|
985
|
+
return Path(*parts[-2:])
|
|
986
|
+
|
|
987
|
+
return file_path
|
|
988
|
+
|
|
989
|
+
def _extract_java_package_name(self, parsed: ParsedFile) -> str:
|
|
990
|
+
"""Extract Java package name from package declaration."""
|
|
991
|
+
root = parsed.tree.root_node
|
|
992
|
+
for child in root.children:
|
|
993
|
+
if child.type == "package_declaration":
|
|
994
|
+
for package_child in child.children:
|
|
995
|
+
if package_child.type == "scoped_identifier" and package_child.text:
|
|
996
|
+
return package_child.text.decode("utf-8")
|
|
997
|
+
if package_child.type == "identifier" and package_child.text:
|
|
998
|
+
return package_child.text.decode("utf-8")
|
|
999
|
+
# Fallback to file stem
|
|
1000
|
+
return parsed.path.stem
|
|
1001
|
+
|
|
1002
|
+
def _extract_module_docstring(
|
|
1003
|
+
self, module_files: list[ParsedFile]
|
|
1004
|
+
) -> str | None:
|
|
1005
|
+
"""Extract module-level documentation."""
|
|
1006
|
+
if self.language == "python":
|
|
1007
|
+
# For Python, extract docstring from __init__.py or first file
|
|
1008
|
+
for parsed in module_files:
|
|
1009
|
+
if parsed.path.name == "__init__.py":
|
|
1010
|
+
# Extract module docstring from __init__.py
|
|
1011
|
+
return self._extract_python_docstring(parsed.tree.root_node)
|
|
1012
|
+
# If no __init__.py, try first file
|
|
1013
|
+
if module_files:
|
|
1014
|
+
return self._extract_python_docstring(module_files[0].tree.root_node)
|
|
1015
|
+
return None
|
|
1016
|
+
|
|
1017
|
+
def _is_public(self, node: Node, name: str) -> bool:
|
|
1018
|
+
"""Determine if a definition is public based on language conventions."""
|
|
1019
|
+
_ = node # Mark as intentionally unused for now
|
|
1020
|
+
if self.language == "python":
|
|
1021
|
+
return not name.startswith("_")
|
|
1022
|
+
if self.language == "go":
|
|
1023
|
+
return name[0].isupper() if name else False
|
|
1024
|
+
return True
|
|
1025
|
+
|
|
1026
|
+
def _extract_docstring(self, node: Node) -> str | None:
|
|
1027
|
+
"""Extract documentation comment for a definition."""
|
|
1028
|
+
if self.language == "go":
|
|
1029
|
+
return self._extract_go_function_comment(node)
|
|
1030
|
+
if self.language == "python":
|
|
1031
|
+
return self._extract_python_docstring(node)
|
|
1032
|
+
# For other languages, not yet implemented
|
|
1033
|
+
return None
|
|
1034
|
+
|
|
1035
|
+
def _extract_python_docstring(self, node: Node) -> str | None: # noqa: C901
|
|
1036
|
+
"""Extract Python docstring from function, class, or module.
|
|
1037
|
+
|
|
1038
|
+
Python docstrings are string literals that appear as the first statement
|
|
1039
|
+
in a function, class, or module body.
|
|
1040
|
+
"""
|
|
1041
|
+
# Look for a block (function body, class body, or module)
|
|
1042
|
+
body_node = None
|
|
1043
|
+
|
|
1044
|
+
if node.type in {"function_definition", "class_definition"}:
|
|
1045
|
+
# Find the block child
|
|
1046
|
+
for child in node.children:
|
|
1047
|
+
if child.type == "block":
|
|
1048
|
+
body_node = child
|
|
1049
|
+
break
|
|
1050
|
+
elif node.type == "module":
|
|
1051
|
+
# Module node is already the body
|
|
1052
|
+
body_node = node
|
|
1053
|
+
|
|
1054
|
+
if not body_node:
|
|
1055
|
+
return None
|
|
1056
|
+
|
|
1057
|
+
# Look for the first expression_statement containing a string
|
|
1058
|
+
for child in body_node.children:
|
|
1059
|
+
if child.type == "expression_statement":
|
|
1060
|
+
# Check if it contains a string node
|
|
1061
|
+
for expr_child in child.children:
|
|
1062
|
+
if expr_child.type == "string" and expr_child.text:
|
|
1063
|
+
# Extract and clean the docstring
|
|
1064
|
+
docstring_bytes = expr_child.text
|
|
1065
|
+
try:
|
|
1066
|
+
docstring_text = docstring_bytes.decode("utf-8")
|
|
1067
|
+
# Remove triple quotes and extra whitespace
|
|
1068
|
+
docstring_text = docstring_text.strip()
|
|
1069
|
+
# Remove leading/trailing quotes
|
|
1070
|
+
for quote in ['"""', "'''", '"', "'"]:
|
|
1071
|
+
starts = docstring_text.startswith(quote)
|
|
1072
|
+
ends = docstring_text.endswith(quote)
|
|
1073
|
+
if starts and ends:
|
|
1074
|
+
quote_len = len(quote)
|
|
1075
|
+
docstring_text = docstring_text[
|
|
1076
|
+
quote_len:-quote_len
|
|
1077
|
+
]
|
|
1078
|
+
break
|
|
1079
|
+
return docstring_text.strip()
|
|
1080
|
+
except UnicodeDecodeError:
|
|
1081
|
+
return None
|
|
1082
|
+
# Found expression_statement but no string - stop looking
|
|
1083
|
+
break
|
|
1084
|
+
|
|
1085
|
+
return None
|
|
1086
|
+
|
|
1087
|
+
def _extract_go_function_comment(self, func_node: Node) -> str | None:
|
|
1088
|
+
"""Extract comment before a Go function or method declaration."""
|
|
1089
|
+
parent = func_node.parent
|
|
1090
|
+
if not parent:
|
|
1091
|
+
return None
|
|
1092
|
+
|
|
1093
|
+
# Find the index of the function in parent's children
|
|
1094
|
+
func_index = None
|
|
1095
|
+
for i, child in enumerate(parent.children):
|
|
1096
|
+
if child == func_node:
|
|
1097
|
+
func_index = i
|
|
1098
|
+
break
|
|
1099
|
+
|
|
1100
|
+
if func_index is None or func_index == 0:
|
|
1101
|
+
return None
|
|
1102
|
+
|
|
1103
|
+
# Check the previous sibling
|
|
1104
|
+
prev_sibling = parent.children[func_index - 1]
|
|
1105
|
+
if prev_sibling.type == "comment" and prev_sibling.text:
|
|
1106
|
+
comment_text = prev_sibling.text.decode("utf-8")
|
|
1107
|
+
# Remove leading // and whitespace
|
|
1108
|
+
return comment_text.lstrip("/").strip()
|
|
1109
|
+
|
|
1110
|
+
return None
|
|
1111
|
+
|
|
1112
|
+
def _extract_parameters(self, node: Node) -> list[str]:
|
|
1113
|
+
"""Extract parameter names from a function definition."""
|
|
1114
|
+
_ = node # Mark as intentionally unused for now
|
|
1115
|
+
return []
|
|
1116
|
+
|
|
1117
|
+
def _extract_return_type(self, node: Node) -> str | None:
|
|
1118
|
+
"""Extract return type from a function definition."""
|
|
1119
|
+
_ = node # Mark as intentionally unused for now
|
|
1120
|
+
return None
|
|
1121
|
+
|
|
1122
|
+
def _is_method(self, node: Node) -> bool:
|
|
1123
|
+
"""Check if a function is a method (inside a class)."""
|
|
1124
|
+
# For Go, check if it's a method_declaration node type
|
|
1125
|
+
if self.language == "go":
|
|
1126
|
+
return node.type == "method_declaration"
|
|
1127
|
+
# For other languages, could check if parent is a class node
|
|
1128
|
+
return False
|