ai-codeindex 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_codeindex-0.7.0.dist-info/METADATA +966 -0
- ai_codeindex-0.7.0.dist-info/RECORD +41 -0
- ai_codeindex-0.7.0.dist-info/WHEEL +4 -0
- ai_codeindex-0.7.0.dist-info/entry_points.txt +2 -0
- ai_codeindex-0.7.0.dist-info/licenses/LICENSE +21 -0
- codeindex/README_AI.md +767 -0
- codeindex/__init__.py +11 -0
- codeindex/adaptive_config.py +83 -0
- codeindex/adaptive_selector.py +171 -0
- codeindex/ai_helper.py +48 -0
- codeindex/cli.py +40 -0
- codeindex/cli_common.py +10 -0
- codeindex/cli_config.py +97 -0
- codeindex/cli_docs.py +66 -0
- codeindex/cli_hooks.py +765 -0
- codeindex/cli_scan.py +562 -0
- codeindex/cli_symbols.py +295 -0
- codeindex/cli_tech_debt.py +238 -0
- codeindex/config.py +479 -0
- codeindex/directory_tree.py +229 -0
- codeindex/docstring_processor.py +342 -0
- codeindex/errors.py +62 -0
- codeindex/extractors/__init__.py +9 -0
- codeindex/extractors/thinkphp.py +132 -0
- codeindex/file_classifier.py +148 -0
- codeindex/framework_detect.py +323 -0
- codeindex/hierarchical.py +428 -0
- codeindex/incremental.py +278 -0
- codeindex/invoker.py +260 -0
- codeindex/parallel.py +155 -0
- codeindex/parser.py +740 -0
- codeindex/route_extractor.py +98 -0
- codeindex/route_registry.py +77 -0
- codeindex/scanner.py +167 -0
- codeindex/semantic_extractor.py +408 -0
- codeindex/smart_writer.py +737 -0
- codeindex/symbol_index.py +199 -0
- codeindex/symbol_scorer.py +283 -0
- codeindex/tech_debt.py +619 -0
- codeindex/tech_debt_formatters.py +234 -0
- codeindex/writer.py +164 -0
codeindex/parser.py
ADDED
|
@@ -0,0 +1,740 @@
|
|
|
1
|
+
"""Multi-language AST parser using tree-sitter."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict
|
|
6
|
+
|
|
7
|
+
import tree_sitter_php as tsphp
|
|
8
|
+
import tree_sitter_python as tspython
|
|
9
|
+
from tree_sitter import Language, Parser
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Symbol:
|
|
14
|
+
"""Represents a code symbol (class, function, etc.)."""
|
|
15
|
+
|
|
16
|
+
name: str
|
|
17
|
+
kind: str # class, function, method
|
|
18
|
+
signature: str = ""
|
|
19
|
+
docstring: str = ""
|
|
20
|
+
line_start: int = 0
|
|
21
|
+
line_end: int = 0
|
|
22
|
+
|
|
23
|
+
def to_dict(self) -> dict:
|
|
24
|
+
"""Convert Symbol to JSON-serializable dict."""
|
|
25
|
+
return {
|
|
26
|
+
"name": self.name,
|
|
27
|
+
"kind": self.kind,
|
|
28
|
+
"signature": self.signature,
|
|
29
|
+
"docstring": self.docstring,
|
|
30
|
+
"line_start": self.line_start,
|
|
31
|
+
"line_end": self.line_end,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class Import:
|
|
37
|
+
"""Represents an import statement."""
|
|
38
|
+
|
|
39
|
+
module: str
|
|
40
|
+
names: list[str] = field(default_factory=list)
|
|
41
|
+
is_from: bool = False
|
|
42
|
+
|
|
43
|
+
def to_dict(self) -> dict:
|
|
44
|
+
"""Convert Import to JSON-serializable dict."""
|
|
45
|
+
return {
|
|
46
|
+
"module": self.module,
|
|
47
|
+
"names": self.names,
|
|
48
|
+
"is_from": self.is_from,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class ParseResult:
|
|
54
|
+
"""Result of parsing a file."""
|
|
55
|
+
|
|
56
|
+
path: Path
|
|
57
|
+
symbols: list[Symbol] = field(default_factory=list)
|
|
58
|
+
imports: list[Import] = field(default_factory=list)
|
|
59
|
+
module_docstring: str = ""
|
|
60
|
+
namespace: str = "" # PHP namespace
|
|
61
|
+
error: str | None = None
|
|
62
|
+
file_lines: int = 0 # Number of lines in the file
|
|
63
|
+
|
|
64
|
+
def to_dict(self) -> dict:
|
|
65
|
+
"""Convert ParseResult to JSON-serializable dict."""
|
|
66
|
+
return {
|
|
67
|
+
"path": str(self.path),
|
|
68
|
+
"symbols": [symbol.to_dict() for symbol in self.symbols],
|
|
69
|
+
"imports": [imp.to_dict() for imp in self.imports],
|
|
70
|
+
"module_docstring": self.module_docstring,
|
|
71
|
+
"namespace": self.namespace,
|
|
72
|
+
"error": self.error,
|
|
73
|
+
"file_lines": self.file_lines,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# Initialize languages
|
|
78
|
+
PY_LANGUAGE = Language(tspython.language())
|
|
79
|
+
PHP_LANGUAGE = Language(tsphp.language_php())
|
|
80
|
+
|
|
81
|
+
# Language-specific parsers
|
|
82
|
+
PARSERS: Dict[str, Parser] = {
|
|
83
|
+
"python": Parser(PY_LANGUAGE),
|
|
84
|
+
"php": Parser(PHP_LANGUAGE),
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# File extension to language mapping
|
|
88
|
+
FILE_EXTENSIONS: Dict[str, str] = {
|
|
89
|
+
".py": "python",
|
|
90
|
+
".php": "php",
|
|
91
|
+
".phtml": "php",
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _get_node_text(node, source_bytes: bytes) -> str:
|
|
96
|
+
"""Extract text from a tree-sitter node."""
|
|
97
|
+
return source_bytes[node.start_byte : node.end_byte].decode("utf-8")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _extract_docstring(node, source_bytes: bytes) -> str:
|
|
101
|
+
"""Extract docstring from first child if it's a string."""
|
|
102
|
+
if node.child_count == 0:
|
|
103
|
+
return ""
|
|
104
|
+
|
|
105
|
+
# Look for expression_statement containing string
|
|
106
|
+
for child in node.children:
|
|
107
|
+
if child.type == "block":
|
|
108
|
+
for block_child in child.children:
|
|
109
|
+
if block_child.type == "expression_statement":
|
|
110
|
+
for expr_child in block_child.children:
|
|
111
|
+
if expr_child.type == "string":
|
|
112
|
+
text = _get_node_text(expr_child, source_bytes)
|
|
113
|
+
# Remove quotes
|
|
114
|
+
if text.startswith('"""') or text.startswith("'''"):
|
|
115
|
+
return text[3:-3].strip()
|
|
116
|
+
elif text.startswith('"') or text.startswith("'"):
|
|
117
|
+
return text[1:-1].strip()
|
|
118
|
+
break
|
|
119
|
+
break
|
|
120
|
+
|
|
121
|
+
return ""
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _parse_function(
|
|
125
|
+
node,
|
|
126
|
+
source_bytes: bytes,
|
|
127
|
+
class_name: str = "",
|
|
128
|
+
decorators: list[str] | None = None
|
|
129
|
+
) -> Symbol:
|
|
130
|
+
"""Parse a function definition node."""
|
|
131
|
+
name = ""
|
|
132
|
+
signature_parts = []
|
|
133
|
+
|
|
134
|
+
for child in node.children:
|
|
135
|
+
if child.type == "identifier": # function name is 'identifier', not 'name'
|
|
136
|
+
name = _get_node_text(child, source_bytes)
|
|
137
|
+
elif child.type == "parameters":
|
|
138
|
+
signature_parts.append(_get_node_text(child, source_bytes))
|
|
139
|
+
elif child.type == "type":
|
|
140
|
+
signature_parts.append(f" -> {_get_node_text(child, source_bytes)}")
|
|
141
|
+
|
|
142
|
+
kind = "method" if class_name else "function"
|
|
143
|
+
full_name = f"{class_name}.{name}" if class_name else name
|
|
144
|
+
signature = f"def {name}{''.join(signature_parts)}"
|
|
145
|
+
docstring = _extract_docstring(node, source_bytes)
|
|
146
|
+
|
|
147
|
+
return Symbol(
|
|
148
|
+
name=full_name,
|
|
149
|
+
kind=kind,
|
|
150
|
+
signature=signature,
|
|
151
|
+
docstring=docstring,
|
|
152
|
+
line_start=node.start_point[0] + 1,
|
|
153
|
+
line_end=node.end_point[0] + 1,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _parse_class(node, source_bytes: bytes) -> list[Symbol]:
|
|
158
|
+
"""Parse a class definition node and its methods."""
|
|
159
|
+
symbols = []
|
|
160
|
+
class_name = ""
|
|
161
|
+
bases = []
|
|
162
|
+
|
|
163
|
+
for child in node.children:
|
|
164
|
+
if child.type == "identifier": # class name is 'identifier', not 'name'
|
|
165
|
+
class_name = _get_node_text(child, source_bytes)
|
|
166
|
+
elif child.type == "argument_list":
|
|
167
|
+
bases.append(_get_node_text(child, source_bytes))
|
|
168
|
+
|
|
169
|
+
signature = f"class {class_name}"
|
|
170
|
+
if bases:
|
|
171
|
+
signature += "".join(bases)
|
|
172
|
+
|
|
173
|
+
docstring = _extract_docstring(node, source_bytes)
|
|
174
|
+
|
|
175
|
+
symbols.append(
|
|
176
|
+
Symbol(
|
|
177
|
+
name=class_name,
|
|
178
|
+
kind="class",
|
|
179
|
+
signature=signature,
|
|
180
|
+
docstring=docstring,
|
|
181
|
+
line_start=node.start_point[0] + 1,
|
|
182
|
+
line_end=node.end_point[0] + 1,
|
|
183
|
+
)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Parse methods
|
|
187
|
+
for child in node.children:
|
|
188
|
+
if child.type == "block":
|
|
189
|
+
for block_child in child.children:
|
|
190
|
+
if block_child.type == "function_definition":
|
|
191
|
+
method = _parse_function(block_child, source_bytes, class_name)
|
|
192
|
+
symbols.append(method)
|
|
193
|
+
|
|
194
|
+
return symbols
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _parse_import(node, source_bytes: bytes) -> Import | None:
|
|
198
|
+
"""Parse an import statement."""
|
|
199
|
+
if node.type == "import_statement":
|
|
200
|
+
# import foo, bar
|
|
201
|
+
names = []
|
|
202
|
+
for child in node.children:
|
|
203
|
+
if child.type == "dotted_name":
|
|
204
|
+
names.append(_get_node_text(child, source_bytes))
|
|
205
|
+
elif child.type == "aliased_import":
|
|
206
|
+
for ac in child.children:
|
|
207
|
+
if ac.type == "dotted_name":
|
|
208
|
+
names.append(_get_node_text(ac, source_bytes))
|
|
209
|
+
break
|
|
210
|
+
if names:
|
|
211
|
+
return Import(module=names[0], names=names[1:] if len(names) > 1 else [], is_from=False)
|
|
212
|
+
|
|
213
|
+
elif node.type == "import_from_statement":
|
|
214
|
+
# from foo import bar, baz
|
|
215
|
+
module = ""
|
|
216
|
+
names = []
|
|
217
|
+
for child in node.children:
|
|
218
|
+
if child.type == "dotted_name":
|
|
219
|
+
if not module:
|
|
220
|
+
module = _get_node_text(child, source_bytes)
|
|
221
|
+
else:
|
|
222
|
+
names.append(_get_node_text(child, source_bytes))
|
|
223
|
+
elif child.type == "relative_import":
|
|
224
|
+
module = _get_node_text(child, source_bytes)
|
|
225
|
+
elif child.type == "aliased_import":
|
|
226
|
+
for ac in child.children:
|
|
227
|
+
if ac.type == "dotted_name":
|
|
228
|
+
names.append(_get_node_text(ac, source_bytes))
|
|
229
|
+
break
|
|
230
|
+
elif ac.type == "identifier":
|
|
231
|
+
names.append(_get_node_text(ac, source_bytes))
|
|
232
|
+
break
|
|
233
|
+
elif child.type == "identifier":
|
|
234
|
+
names.append(_get_node_text(child, source_bytes))
|
|
235
|
+
|
|
236
|
+
if module:
|
|
237
|
+
return Import(module=module, names=names, is_from=True)
|
|
238
|
+
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _extract_module_docstring(tree, source_bytes: bytes) -> str:
|
|
243
|
+
"""Extract module-level docstring."""
|
|
244
|
+
root = tree.root_node
|
|
245
|
+
for child in root.children:
|
|
246
|
+
if child.type == "expression_statement":
|
|
247
|
+
for expr_child in child.children:
|
|
248
|
+
if expr_child.type == "string":
|
|
249
|
+
text = _get_node_text(expr_child, source_bytes)
|
|
250
|
+
if text.startswith('"""') or text.startswith("'''"):
|
|
251
|
+
return text[3:-3].strip()
|
|
252
|
+
elif text.startswith('"') or text.startswith("'"):
|
|
253
|
+
return text[1:-1].strip()
|
|
254
|
+
break
|
|
255
|
+
elif child.type not in ("comment",):
|
|
256
|
+
break
|
|
257
|
+
return ""
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def parse_file(path: Path, language: str | None = None) -> ParseResult:
|
|
261
|
+
"""
|
|
262
|
+
Parse a source file (Python or PHP) and extract symbols and imports.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
path: Path to the source file
|
|
266
|
+
language: Optional language override ("python" or "php").
|
|
267
|
+
If None, language is detected from file extension.
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
ParseResult containing symbols, imports, and docstrings
|
|
271
|
+
"""
|
|
272
|
+
try:
|
|
273
|
+
source_bytes = path.read_bytes()
|
|
274
|
+
except Exception as e:
|
|
275
|
+
return ParseResult(path=path, error=str(e), file_lines=0)
|
|
276
|
+
|
|
277
|
+
# Calculate file lines
|
|
278
|
+
file_lines = source_bytes.count(b"\n") + (
|
|
279
|
+
1 if source_bytes and not source_bytes.endswith(b"\n") else 0
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# Determine language
|
|
283
|
+
if language is None:
|
|
284
|
+
language = _get_language(path)
|
|
285
|
+
if not language:
|
|
286
|
+
return ParseResult(
|
|
287
|
+
path=path, error=f"Unsupported file type: {path.suffix}", file_lines=file_lines
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Validate language
|
|
291
|
+
if language not in PARSERS:
|
|
292
|
+
return ParseResult(
|
|
293
|
+
path=path, error=f"Unsupported language: {language}", file_lines=file_lines
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Get appropriate parser
|
|
297
|
+
parser = PARSERS.get(language)
|
|
298
|
+
if not parser:
|
|
299
|
+
return ParseResult(
|
|
300
|
+
path=path, error=f"No parser for language: {language}", file_lines=file_lines
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
try:
|
|
304
|
+
tree = parser.parse(source_bytes)
|
|
305
|
+
except Exception as e:
|
|
306
|
+
return ParseResult(path=path, error=f"Parse error: {e}", file_lines=file_lines)
|
|
307
|
+
|
|
308
|
+
# Check for syntax errors
|
|
309
|
+
if tree.root_node.has_error:
|
|
310
|
+
return ParseResult(
|
|
311
|
+
path=path,
|
|
312
|
+
error="Syntax error in file (tree-sitter parse failure)",
|
|
313
|
+
file_lines=file_lines,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
symbols: list[Symbol] = []
|
|
317
|
+
imports: list[Import] = []
|
|
318
|
+
module_docstring = ""
|
|
319
|
+
|
|
320
|
+
# Language-specific parsing
|
|
321
|
+
if language == "python":
|
|
322
|
+
module_docstring = _extract_module_docstring(tree, source_bytes)
|
|
323
|
+
root = tree.root_node
|
|
324
|
+
for child in root.children:
|
|
325
|
+
if child.type == "function_definition":
|
|
326
|
+
symbols.append(_parse_function(child, source_bytes))
|
|
327
|
+
elif child.type == "class_definition":
|
|
328
|
+
symbols.extend(_parse_class(child, source_bytes))
|
|
329
|
+
elif child.type == "decorated_definition":
|
|
330
|
+
for dec_child in child.children:
|
|
331
|
+
if dec_child.type == "function_definition":
|
|
332
|
+
symbols.append(_parse_function(dec_child, source_bytes))
|
|
333
|
+
elif dec_child.type == "class_definition":
|
|
334
|
+
symbols.extend(_parse_class(dec_child, source_bytes))
|
|
335
|
+
elif child.type in ("import_statement", "import_from_statement"):
|
|
336
|
+
imp = _parse_import(child, source_bytes)
|
|
337
|
+
if imp:
|
|
338
|
+
imports.append(imp)
|
|
339
|
+
|
|
340
|
+
elif language == "php":
|
|
341
|
+
# PHP parsing
|
|
342
|
+
root = tree.root_node
|
|
343
|
+
namespace = ""
|
|
344
|
+
|
|
345
|
+
for child in root.children:
|
|
346
|
+
if child.type == "namespace_definition":
|
|
347
|
+
namespace = _parse_php_namespace(child, source_bytes)
|
|
348
|
+
elif child.type == "namespace_use_declaration":
|
|
349
|
+
use_imports = _parse_php_use(child, source_bytes)
|
|
350
|
+
imports.extend(use_imports)
|
|
351
|
+
elif child.type == "class_declaration":
|
|
352
|
+
symbols.extend(_parse_php_class(child, source_bytes))
|
|
353
|
+
elif child.type == "function_definition":
|
|
354
|
+
symbols.append(_parse_php_function(child, source_bytes))
|
|
355
|
+
elif child.type in ("include_expression", "require_expression"):
|
|
356
|
+
imp = _parse_php_include(child, source_bytes)
|
|
357
|
+
if imp:
|
|
358
|
+
imports.append(imp)
|
|
359
|
+
|
|
360
|
+
# Extract module docstring from PHP file comments
|
|
361
|
+
module_docstring = ""
|
|
362
|
+
for child in root.children:
|
|
363
|
+
if child.type == "comment" and child.text.startswith(b"/**"):
|
|
364
|
+
module_docstring = _extract_php_docstring(child, source_bytes)
|
|
365
|
+
break
|
|
366
|
+
|
|
367
|
+
return ParseResult(
|
|
368
|
+
path=path,
|
|
369
|
+
symbols=symbols,
|
|
370
|
+
imports=imports,
|
|
371
|
+
module_docstring=module_docstring,
|
|
372
|
+
namespace=namespace,
|
|
373
|
+
file_lines=file_lines,
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
return ParseResult(
|
|
377
|
+
path=path,
|
|
378
|
+
symbols=symbols,
|
|
379
|
+
imports=imports,
|
|
380
|
+
module_docstring=module_docstring,
|
|
381
|
+
file_lines=file_lines,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def parse_directory(paths: list[Path]) -> list[ParseResult]:
|
|
386
|
+
"""Parse multiple files."""
|
|
387
|
+
return [parse_file(p) for p in paths]
|
|
388
|
+
|
|
389
|
+
def _get_language(file_path: Path) -> str:
|
|
390
|
+
"""Determine language from file extension."""
|
|
391
|
+
suffix = file_path.suffix.lower()
|
|
392
|
+
return FILE_EXTENSIONS.get(suffix)
|
|
393
|
+
|
|
394
|
+
def _extract_php_docstring(node, source_bytes: bytes) -> str:
|
|
395
|
+
"""
|
|
396
|
+
Extract docstring from PHPDoc/DocComment or inline comments.
|
|
397
|
+
|
|
398
|
+
For PHP, the comment is often a sibling node (previous sibling)
|
|
399
|
+
rather than a child node.
|
|
400
|
+
|
|
401
|
+
Supports:
|
|
402
|
+
- PHPDoc blocks: /** ... */
|
|
403
|
+
- Inline comments: // ...
|
|
404
|
+
"""
|
|
405
|
+
# First check children (for class-level comments)
|
|
406
|
+
for child in node.children:
|
|
407
|
+
if child.type == "comment":
|
|
408
|
+
text = _get_node_text(child, source_bytes)
|
|
409
|
+
if text.startswith("/**"):
|
|
410
|
+
return _parse_phpdoc_text(text)
|
|
411
|
+
elif text.startswith("//"):
|
|
412
|
+
# Inline comment: remove // and strip
|
|
413
|
+
return text[2:].strip()
|
|
414
|
+
|
|
415
|
+
# Check previous sibling (for method-level comments)
|
|
416
|
+
if node.prev_sibling and node.prev_sibling.type == "comment":
|
|
417
|
+
text = _get_node_text(node.prev_sibling, source_bytes)
|
|
418
|
+
if text.startswith("/**"):
|
|
419
|
+
return _parse_phpdoc_text(text)
|
|
420
|
+
elif text.startswith("//"):
|
|
421
|
+
# Inline comment: remove // and strip
|
|
422
|
+
return text[2:].strip()
|
|
423
|
+
|
|
424
|
+
return ""
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _parse_phpdoc_text(text: str) -> str:
|
|
428
|
+
"""
|
|
429
|
+
Parse PHPDoc comment text and extract description.
|
|
430
|
+
|
|
431
|
+
Extracts the first non-annotation line(s) from PHPDoc.
|
|
432
|
+
Skips @param, @return, @throws, etc.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
text: Raw PHPDoc comment text (/** ... */)
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
Cleaned description text
|
|
439
|
+
"""
|
|
440
|
+
# Handle single-line PHPDoc: /** Description */
|
|
441
|
+
if "\n" not in text:
|
|
442
|
+
# Remove /** and */
|
|
443
|
+
content = text.strip()
|
|
444
|
+
if content.startswith("/**"):
|
|
445
|
+
content = content[3:]
|
|
446
|
+
if content.endswith("*/"):
|
|
447
|
+
content = content[:-2]
|
|
448
|
+
content = content.strip()
|
|
449
|
+
# Skip if it's only annotations
|
|
450
|
+
if content.startswith("@"):
|
|
451
|
+
return ""
|
|
452
|
+
return content
|
|
453
|
+
|
|
454
|
+
# Handle multi-line PHPDoc
|
|
455
|
+
lines = text.split("\n")
|
|
456
|
+
description_lines = []
|
|
457
|
+
|
|
458
|
+
for line in lines[1:-1]: # Skip first (/**) and last (*/) lines
|
|
459
|
+
line = line.strip()
|
|
460
|
+
# Remove leading * and whitespace
|
|
461
|
+
if line.startswith("*"):
|
|
462
|
+
line = line[1:].strip()
|
|
463
|
+
|
|
464
|
+
# Skip empty lines
|
|
465
|
+
if not line:
|
|
466
|
+
continue
|
|
467
|
+
|
|
468
|
+
# Skip annotation lines (@param, @return, etc.)
|
|
469
|
+
if line.startswith("@"):
|
|
470
|
+
break # Stop at first annotation
|
|
471
|
+
|
|
472
|
+
description_lines.append(line)
|
|
473
|
+
|
|
474
|
+
return " ".join(description_lines)
|
|
475
|
+
|
|
476
|
+
def _parse_php_function(node, source_bytes: bytes, class_name: str = "") -> Symbol:
|
|
477
|
+
"""Parse a PHP function definition node (standalone function, not method)."""
|
|
478
|
+
name = ""
|
|
479
|
+
params = ""
|
|
480
|
+
return_type = ""
|
|
481
|
+
|
|
482
|
+
for child in node.children:
|
|
483
|
+
if child.type == "name":
|
|
484
|
+
name = _get_node_text(child, source_bytes)
|
|
485
|
+
elif child.type == "formal_parameters":
|
|
486
|
+
params = _get_node_text(child, source_bytes)
|
|
487
|
+
elif child.type in ("named_type", "primitive_type", "optional_type"):
|
|
488
|
+
return_type = _get_node_text(child, source_bytes)
|
|
489
|
+
|
|
490
|
+
signature = f"function {name}{params}"
|
|
491
|
+
if return_type:
|
|
492
|
+
signature += f": {return_type}"
|
|
493
|
+
|
|
494
|
+
docstring = _extract_php_docstring(node, source_bytes)
|
|
495
|
+
|
|
496
|
+
return Symbol(
|
|
497
|
+
name=name,
|
|
498
|
+
kind="function",
|
|
499
|
+
signature=signature,
|
|
500
|
+
docstring=docstring,
|
|
501
|
+
line_start=node.start_point[0] + 1,
|
|
502
|
+
line_end=node.end_point[0] + 1,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _parse_php_method(node, source_bytes: bytes, class_name: str) -> Symbol:
|
|
507
|
+
"""Parse a PHP method declaration node with visibility, static, and return type."""
|
|
508
|
+
name = ""
|
|
509
|
+
params = ""
|
|
510
|
+
return_type = ""
|
|
511
|
+
visibility = ""
|
|
512
|
+
is_static = False
|
|
513
|
+
|
|
514
|
+
for child in node.children:
|
|
515
|
+
if child.type == "visibility_modifier":
|
|
516
|
+
visibility = _get_node_text(child, source_bytes)
|
|
517
|
+
elif child.type == "static_modifier":
|
|
518
|
+
is_static = True
|
|
519
|
+
elif child.type == "name":
|
|
520
|
+
name = _get_node_text(child, source_bytes)
|
|
521
|
+
elif child.type == "formal_parameters":
|
|
522
|
+
params = _get_node_text(child, source_bytes)
|
|
523
|
+
elif child.type in ("named_type", "primitive_type", "optional_type"):
|
|
524
|
+
return_type = _get_node_text(child, source_bytes)
|
|
525
|
+
|
|
526
|
+
# Build signature: [visibility] [static] function name(params)[: return_type]
|
|
527
|
+
sig_parts = []
|
|
528
|
+
if visibility:
|
|
529
|
+
sig_parts.append(visibility)
|
|
530
|
+
if is_static:
|
|
531
|
+
sig_parts.append("static")
|
|
532
|
+
sig_parts.append(f"function {name}{params}")
|
|
533
|
+
signature = " ".join(sig_parts)
|
|
534
|
+
if return_type:
|
|
535
|
+
signature += f": {return_type}"
|
|
536
|
+
|
|
537
|
+
docstring = _extract_php_docstring(node, source_bytes)
|
|
538
|
+
full_name = f"{class_name}::{name}"
|
|
539
|
+
|
|
540
|
+
return Symbol(
|
|
541
|
+
name=full_name,
|
|
542
|
+
kind="method",
|
|
543
|
+
signature=signature,
|
|
544
|
+
docstring=docstring,
|
|
545
|
+
line_start=node.start_point[0] + 1,
|
|
546
|
+
line_end=node.end_point[0] + 1,
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def _parse_php_property(node, source_bytes: bytes, class_name: str) -> Symbol:
|
|
551
|
+
"""Parse a PHP property declaration node."""
|
|
552
|
+
prop_name = ""
|
|
553
|
+
visibility = ""
|
|
554
|
+
is_static = False
|
|
555
|
+
prop_type = ""
|
|
556
|
+
|
|
557
|
+
for child in node.children:
|
|
558
|
+
if child.type == "visibility_modifier":
|
|
559
|
+
visibility = _get_node_text(child, source_bytes)
|
|
560
|
+
elif child.type == "static_modifier":
|
|
561
|
+
is_static = True
|
|
562
|
+
elif child.type in ("named_type", "primitive_type", "optional_type"):
|
|
563
|
+
prop_type = _get_node_text(child, source_bytes)
|
|
564
|
+
elif child.type == "property_element":
|
|
565
|
+
for prop_child in child.children:
|
|
566
|
+
if prop_child.type == "variable_name":
|
|
567
|
+
prop_name = _get_node_text(prop_child, source_bytes)
|
|
568
|
+
|
|
569
|
+
# Build signature: [visibility] [static] [type] $name
|
|
570
|
+
sig_parts = []
|
|
571
|
+
if visibility:
|
|
572
|
+
sig_parts.append(visibility)
|
|
573
|
+
if is_static:
|
|
574
|
+
sig_parts.append("static")
|
|
575
|
+
if prop_type:
|
|
576
|
+
sig_parts.append(prop_type)
|
|
577
|
+
sig_parts.append(prop_name)
|
|
578
|
+
signature = " ".join(sig_parts)
|
|
579
|
+
|
|
580
|
+
full_name = f"{class_name}::{prop_name}"
|
|
581
|
+
|
|
582
|
+
return Symbol(
|
|
583
|
+
name=full_name,
|
|
584
|
+
kind="property",
|
|
585
|
+
signature=signature,
|
|
586
|
+
docstring="",
|
|
587
|
+
line_start=node.start_point[0] + 1,
|
|
588
|
+
line_end=node.end_point[0] + 1,
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
def _parse_php_class(node, source_bytes: bytes) -> list[Symbol]:
|
|
592
|
+
"""Parse a PHP class definition node with extends, implements, properties and methods."""
|
|
593
|
+
symbols = []
|
|
594
|
+
class_name = ""
|
|
595
|
+
extends = ""
|
|
596
|
+
implements = []
|
|
597
|
+
is_abstract = False
|
|
598
|
+
is_final = False
|
|
599
|
+
|
|
600
|
+
for child in node.children:
|
|
601
|
+
if child.type == "name":
|
|
602
|
+
class_name = _get_node_text(child, source_bytes)
|
|
603
|
+
elif child.type == "abstract_modifier":
|
|
604
|
+
is_abstract = True
|
|
605
|
+
elif child.type == "final_modifier":
|
|
606
|
+
is_final = True
|
|
607
|
+
elif child.type == "base_clause":
|
|
608
|
+
# extends BaseClass
|
|
609
|
+
for bc_child in child.children:
|
|
610
|
+
if bc_child.type == "name":
|
|
611
|
+
extends = _get_node_text(bc_child, source_bytes)
|
|
612
|
+
elif child.type == "class_interface_clause":
|
|
613
|
+
# implements Interface1, Interface2
|
|
614
|
+
for ic_child in child.children:
|
|
615
|
+
if ic_child.type == "name":
|
|
616
|
+
implements.append(_get_node_text(ic_child, source_bytes))
|
|
617
|
+
|
|
618
|
+
# Build signature: [abstract|final] class Name [extends Base] [implements I1, I2]
|
|
619
|
+
sig_parts = []
|
|
620
|
+
if is_abstract:
|
|
621
|
+
sig_parts.append("abstract")
|
|
622
|
+
elif is_final:
|
|
623
|
+
sig_parts.append("final")
|
|
624
|
+
sig_parts.append(f"class {class_name}")
|
|
625
|
+
if extends:
|
|
626
|
+
sig_parts.append(f"extends {extends}")
|
|
627
|
+
if implements:
|
|
628
|
+
sig_parts.append(f"implements {', '.join(implements)}")
|
|
629
|
+
signature = " ".join(sig_parts)
|
|
630
|
+
|
|
631
|
+
docstring = _extract_php_docstring(node, source_bytes)
|
|
632
|
+
|
|
633
|
+
symbols.append(
|
|
634
|
+
Symbol(
|
|
635
|
+
name=class_name,
|
|
636
|
+
kind="class",
|
|
637
|
+
signature=signature,
|
|
638
|
+
docstring=docstring,
|
|
639
|
+
line_start=node.start_point[0] + 1,
|
|
640
|
+
line_end=node.end_point[0] + 1,
|
|
641
|
+
)
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
# Parse properties and methods from declaration_list
|
|
645
|
+
for child in node.children:
|
|
646
|
+
if child.type == "declaration_list":
|
|
647
|
+
for decl in child.children:
|
|
648
|
+
if decl.type == "property_declaration":
|
|
649
|
+
prop = _parse_php_property(decl, source_bytes, class_name)
|
|
650
|
+
symbols.append(prop)
|
|
651
|
+
elif decl.type == "method_declaration":
|
|
652
|
+
method = _parse_php_method(decl, source_bytes, class_name)
|
|
653
|
+
symbols.append(method)
|
|
654
|
+
|
|
655
|
+
return symbols
|
|
656
|
+
|
|
657
|
+
def _parse_php_include(node, source_bytes: bytes) -> Import | None:
|
|
658
|
+
"""Parse PHP include/require statements."""
|
|
659
|
+
if node.type == "include_expression" or node.type == "require_expression":
|
|
660
|
+
for child in node.children:
|
|
661
|
+
if child.type == "string":
|
|
662
|
+
module = _get_node_text(child, source_bytes)
|
|
663
|
+
# Remove quotes
|
|
664
|
+
module = module.strip('\'"')
|
|
665
|
+
return Import(module=module, names=[], is_from=False)
|
|
666
|
+
return None
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _parse_php_namespace(node, source_bytes: bytes) -> str:
|
|
670
|
+
"""Parse PHP namespace definition."""
|
|
671
|
+
for child in node.children:
|
|
672
|
+
if child.type == "namespace_name":
|
|
673
|
+
return _get_node_text(child, source_bytes)
|
|
674
|
+
return ""
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
def _parse_php_use(node, source_bytes: bytes) -> list[Import]:
|
|
678
|
+
"""
|
|
679
|
+
Parse PHP use statement.
|
|
680
|
+
|
|
681
|
+
Handles:
|
|
682
|
+
- use App\\Service\\UserService;
|
|
683
|
+
- use App\\Model\\User as UserModel;
|
|
684
|
+
- use App\\Repository\\{UserRepository, OrderRepository};
|
|
685
|
+
"""
|
|
686
|
+
imports = []
|
|
687
|
+
base_namespace = ""
|
|
688
|
+
|
|
689
|
+
for child in node.children:
|
|
690
|
+
if child.type == "namespace_name":
|
|
691
|
+
# Group import base: use App\Repository\{...}
|
|
692
|
+
base_namespace = _get_node_text(child, source_bytes)
|
|
693
|
+
|
|
694
|
+
elif child.type == "namespace_use_clause":
|
|
695
|
+
# Single import
|
|
696
|
+
module = ""
|
|
697
|
+
alias = ""
|
|
698
|
+
|
|
699
|
+
for clause_child in child.children:
|
|
700
|
+
if clause_child.type == "qualified_name":
|
|
701
|
+
module = _get_node_text(clause_child, source_bytes)
|
|
702
|
+
elif clause_child.type == "name" and module:
|
|
703
|
+
# This is the alias (after 'as')
|
|
704
|
+
alias = _get_node_text(clause_child, source_bytes)
|
|
705
|
+
|
|
706
|
+
if module:
|
|
707
|
+
# If there's a base namespace (group import), prepend it
|
|
708
|
+
if base_namespace:
|
|
709
|
+
module = f"{base_namespace}\\{module}"
|
|
710
|
+
|
|
711
|
+
imports.append(Import(
|
|
712
|
+
module=module,
|
|
713
|
+
names=[alias] if alias else [],
|
|
714
|
+
is_from=True, # PHP use is similar to Python's from...import
|
|
715
|
+
))
|
|
716
|
+
|
|
717
|
+
elif child.type == "namespace_use_group":
|
|
718
|
+
# Group import: {UserRepository, OrderRepository}
|
|
719
|
+
for group_child in child.children:
|
|
720
|
+
if group_child.type == "namespace_use_clause":
|
|
721
|
+
name = ""
|
|
722
|
+
alias = ""
|
|
723
|
+
for clause_child in group_child.children:
|
|
724
|
+
if clause_child.type == "qualified_name":
|
|
725
|
+
name = _get_node_text(clause_child, source_bytes)
|
|
726
|
+
elif clause_child.type == "name":
|
|
727
|
+
if not name:
|
|
728
|
+
name = _get_node_text(clause_child, source_bytes)
|
|
729
|
+
else:
|
|
730
|
+
alias = _get_node_text(clause_child, source_bytes)
|
|
731
|
+
|
|
732
|
+
if name:
|
|
733
|
+
full_module = f"{base_namespace}\\{name}" if base_namespace else name
|
|
734
|
+
imports.append(Import(
|
|
735
|
+
module=full_module,
|
|
736
|
+
names=[alias] if alias else [],
|
|
737
|
+
is_from=True,
|
|
738
|
+
))
|
|
739
|
+
|
|
740
|
+
return imports
|