mcp-vector-search 0.15.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +10 -0
- mcp_vector_search/cli/__init__.py +1 -0
- mcp_vector_search/cli/commands/__init__.py +1 -0
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/chat.py +534 -0
- mcp_vector_search/cli/commands/config.py +393 -0
- mcp_vector_search/cli/commands/demo.py +358 -0
- mcp_vector_search/cli/commands/index.py +762 -0
- mcp_vector_search/cli/commands/init.py +658 -0
- mcp_vector_search/cli/commands/install.py +869 -0
- mcp_vector_search/cli/commands/install_old.py +700 -0
- mcp_vector_search/cli/commands/mcp.py +1254 -0
- mcp_vector_search/cli/commands/reset.py +393 -0
- mcp_vector_search/cli/commands/search.py +796 -0
- mcp_vector_search/cli/commands/setup.py +1133 -0
- mcp_vector_search/cli/commands/status.py +584 -0
- mcp_vector_search/cli/commands/uninstall.py +404 -0
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +265 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +201 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
- mcp_vector_search/cli/commands/visualize.py.original +2536 -0
- mcp_vector_search/cli/commands/watch.py +287 -0
- mcp_vector_search/cli/didyoumean.py +520 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +295 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +484 -0
- mcp_vector_search/cli/output.py +414 -0
- mcp_vector_search/cli/suggestions.py +375 -0
- mcp_vector_search/config/__init__.py +1 -0
- mcp_vector_search/config/constants.py +24 -0
- mcp_vector_search/config/defaults.py +200 -0
- mcp_vector_search/config/settings.py +146 -0
- mcp_vector_search/core/__init__.py +1 -0
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/connection_pool.py +360 -0
- mcp_vector_search/core/database.py +1237 -0
- mcp_vector_search/core/directory_index.py +318 -0
- mcp_vector_search/core/embeddings.py +294 -0
- mcp_vector_search/core/exceptions.py +89 -0
- mcp_vector_search/core/factory.py +318 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +1002 -0
- mcp_vector_search/core/llm_client.py +453 -0
- mcp_vector_search/core/models.py +294 -0
- mcp_vector_search/core/project.py +350 -0
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +952 -0
- mcp_vector_search/core/watcher.py +322 -0
- mcp_vector_search/mcp/__init__.py +5 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +752 -0
- mcp_vector_search/parsers/__init__.py +8 -0
- mcp_vector_search/parsers/base.py +296 -0
- mcp_vector_search/parsers/dart.py +605 -0
- mcp_vector_search/parsers/html.py +413 -0
- mcp_vector_search/parsers/javascript.py +643 -0
- mcp_vector_search/parsers/php.py +694 -0
- mcp_vector_search/parsers/python.py +502 -0
- mcp_vector_search/parsers/registry.py +223 -0
- mcp_vector_search/parsers/ruby.py +678 -0
- mcp_vector_search/parsers/text.py +186 -0
- mcp_vector_search/parsers/utils.py +265 -0
- mcp_vector_search/py.typed +1 -0
- mcp_vector_search/utils/__init__.py +42 -0
- mcp_vector_search/utils/gitignore.py +250 -0
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +339 -0
- mcp_vector_search/utils/timing.py +338 -0
- mcp_vector_search/utils/version.py +47 -0
- mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
- mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
- mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
- mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
- mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,694 @@
|
|
|
1
|
+
"""PHP parser using Tree-sitter for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
from ..core.models import CodeChunk
|
|
9
|
+
from .base import BaseParser
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PHPParser(BaseParser):
|
|
13
|
+
"""PHP parser using Tree-sitter for AST-based code analysis."""
|
|
14
|
+
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
"""Initialize PHP parser."""
|
|
17
|
+
super().__init__("php")
|
|
18
|
+
self._parser = None
|
|
19
|
+
self._language = None
|
|
20
|
+
self._initialize_parser()
|
|
21
|
+
|
|
22
|
+
def _initialize_parser(self) -> None:
|
|
23
|
+
"""Initialize Tree-sitter parser for PHP."""
|
|
24
|
+
try:
|
|
25
|
+
# Try the tree-sitter-language-pack package (maintained alternative)
|
|
26
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
27
|
+
|
|
28
|
+
# Get the language and parser objects
|
|
29
|
+
self._language = get_language("php")
|
|
30
|
+
self._parser = get_parser("php")
|
|
31
|
+
|
|
32
|
+
logger.debug(
|
|
33
|
+
"PHP Tree-sitter parser initialized via tree-sitter-language-pack"
|
|
34
|
+
)
|
|
35
|
+
return
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logger.debug(f"tree-sitter-language-pack failed: {e}")
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# Fallback to manual tree-sitter setup (requires language binaries)
|
|
41
|
+
|
|
42
|
+
# This would require language binaries to be available
|
|
43
|
+
# For now, we'll skip this and rely on fallback parsing
|
|
44
|
+
logger.debug("Manual tree-sitter setup not implemented yet")
|
|
45
|
+
self._parser = None
|
|
46
|
+
self._language = None
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.debug(f"Manual tree-sitter setup failed: {e}")
|
|
49
|
+
self._parser = None
|
|
50
|
+
self._language = None
|
|
51
|
+
|
|
52
|
+
logger.info(
|
|
53
|
+
"Using fallback regex-based parsing for PHP (Tree-sitter unavailable)"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
57
|
+
"""Parse a PHP file and extract code chunks."""
|
|
58
|
+
try:
|
|
59
|
+
with open(file_path, encoding="utf-8") as f:
|
|
60
|
+
content = f.read()
|
|
61
|
+
return await self.parse_content(content, file_path)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
logger.error(f"Failed to read file {file_path}: {e}")
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
67
|
+
"""Parse PHP content and extract code chunks."""
|
|
68
|
+
if not content.strip():
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
# If Tree-sitter is not available, fall back to simple parsing
|
|
72
|
+
if not self._parser:
|
|
73
|
+
return await self._fallback_parse(content, file_path)
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
# Parse with Tree-sitter
|
|
77
|
+
tree = self._parser.parse(content.encode("utf-8"))
|
|
78
|
+
return self._extract_chunks_from_tree(tree, content, file_path)
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
|
|
81
|
+
return await self._fallback_parse(content, file_path)
|
|
82
|
+
|
|
83
|
+
def _extract_chunks_from_tree(
|
|
84
|
+
self, tree, content: str, file_path: Path
|
|
85
|
+
) -> list[CodeChunk]:
|
|
86
|
+
"""Extract code chunks from Tree-sitter AST."""
|
|
87
|
+
chunks = []
|
|
88
|
+
lines = self._split_into_lines(content)
|
|
89
|
+
|
|
90
|
+
def visit_node(node, current_class=None, current_namespace=None):
|
|
91
|
+
"""Recursively visit AST nodes."""
|
|
92
|
+
node_type = node.type
|
|
93
|
+
|
|
94
|
+
if node_type == "function_definition":
|
|
95
|
+
chunks.extend(
|
|
96
|
+
self._extract_function(
|
|
97
|
+
node, lines, file_path, current_class, current_namespace
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
elif node_type == "class_declaration":
|
|
101
|
+
class_chunks = self._extract_class(
|
|
102
|
+
node, lines, file_path, current_namespace
|
|
103
|
+
)
|
|
104
|
+
chunks.extend(class_chunks)
|
|
105
|
+
|
|
106
|
+
# Visit class methods with class context
|
|
107
|
+
class_name = self._get_node_name(node)
|
|
108
|
+
for child in node.children:
|
|
109
|
+
visit_node(child, class_name, current_namespace)
|
|
110
|
+
elif node_type == "interface_declaration":
|
|
111
|
+
chunks.extend(
|
|
112
|
+
self._extract_interface(node, lines, file_path, current_namespace)
|
|
113
|
+
)
|
|
114
|
+
elif node_type == "trait_declaration":
|
|
115
|
+
chunks.extend(
|
|
116
|
+
self._extract_trait(node, lines, file_path, current_namespace)
|
|
117
|
+
)
|
|
118
|
+
elif node_type == "method_declaration":
|
|
119
|
+
chunks.extend(
|
|
120
|
+
self._extract_method(
|
|
121
|
+
node, lines, file_path, current_class, current_namespace
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
elif node_type == "namespace_definition":
|
|
125
|
+
namespace_name = self._get_namespace_name(node)
|
|
126
|
+
# Visit children with namespace context
|
|
127
|
+
for child in node.children:
|
|
128
|
+
visit_node(child, current_class, namespace_name)
|
|
129
|
+
elif node_type == "program":
|
|
130
|
+
# Extract module-level code
|
|
131
|
+
module_chunk = self._extract_module_chunk(node, lines, file_path)
|
|
132
|
+
if module_chunk:
|
|
133
|
+
chunks.append(module_chunk)
|
|
134
|
+
|
|
135
|
+
# Visit all children
|
|
136
|
+
for child in node.children:
|
|
137
|
+
visit_node(child)
|
|
138
|
+
else:
|
|
139
|
+
# Visit children for other node types
|
|
140
|
+
for child in node.children:
|
|
141
|
+
visit_node(child, current_class, current_namespace)
|
|
142
|
+
|
|
143
|
+
# Start traversal from root
|
|
144
|
+
visit_node(tree.root_node)
|
|
145
|
+
|
|
146
|
+
# If no specific chunks found, create a single chunk for the whole file
|
|
147
|
+
if not chunks:
|
|
148
|
+
chunks.append(
|
|
149
|
+
self._create_chunk(
|
|
150
|
+
content=content,
|
|
151
|
+
file_path=file_path,
|
|
152
|
+
start_line=1,
|
|
153
|
+
end_line=len(lines),
|
|
154
|
+
chunk_type="module",
|
|
155
|
+
)
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return chunks
|
|
159
|
+
|
|
160
|
+
def _extract_function(
|
|
161
|
+
self,
|
|
162
|
+
node,
|
|
163
|
+
lines: list[str],
|
|
164
|
+
file_path: Path,
|
|
165
|
+
class_name: str | None = None,
|
|
166
|
+
namespace: str | None = None,
|
|
167
|
+
) -> list[CodeChunk]:
|
|
168
|
+
"""Extract function definition as a chunk."""
|
|
169
|
+
chunks = []
|
|
170
|
+
|
|
171
|
+
function_name = self._get_node_name(node)
|
|
172
|
+
start_line = node.start_point[0] + 1
|
|
173
|
+
end_line = node.end_point[0] + 1
|
|
174
|
+
|
|
175
|
+
# Get function content
|
|
176
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
177
|
+
|
|
178
|
+
# Extract PHPDoc if present
|
|
179
|
+
phpdoc = self._extract_phpdoc(node, lines)
|
|
180
|
+
|
|
181
|
+
# Build fully qualified function name
|
|
182
|
+
full_name = function_name
|
|
183
|
+
if namespace and not class_name:
|
|
184
|
+
full_name = f"{namespace}\\{function_name}"
|
|
185
|
+
|
|
186
|
+
chunk = self._create_chunk(
|
|
187
|
+
content=content,
|
|
188
|
+
file_path=file_path,
|
|
189
|
+
start_line=start_line,
|
|
190
|
+
end_line=end_line,
|
|
191
|
+
chunk_type="function",
|
|
192
|
+
function_name=full_name,
|
|
193
|
+
class_name=class_name,
|
|
194
|
+
docstring=phpdoc,
|
|
195
|
+
)
|
|
196
|
+
chunks.append(chunk)
|
|
197
|
+
|
|
198
|
+
return chunks
|
|
199
|
+
|
|
200
|
+
def _extract_method(
|
|
201
|
+
self,
|
|
202
|
+
node,
|
|
203
|
+
lines: list[str],
|
|
204
|
+
file_path: Path,
|
|
205
|
+
class_name: str | None = None,
|
|
206
|
+
namespace: str | None = None,
|
|
207
|
+
) -> list[CodeChunk]:
|
|
208
|
+
"""Extract method definition as a chunk."""
|
|
209
|
+
chunks = []
|
|
210
|
+
|
|
211
|
+
method_name = self._get_node_name(node)
|
|
212
|
+
start_line = node.start_point[0] + 1
|
|
213
|
+
end_line = node.end_point[0] + 1
|
|
214
|
+
|
|
215
|
+
# Get method content
|
|
216
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
217
|
+
|
|
218
|
+
# Extract PHPDoc if present
|
|
219
|
+
phpdoc = self._extract_phpdoc(node, lines)
|
|
220
|
+
|
|
221
|
+
chunk = self._create_chunk(
|
|
222
|
+
content=content,
|
|
223
|
+
file_path=file_path,
|
|
224
|
+
start_line=start_line,
|
|
225
|
+
end_line=end_line,
|
|
226
|
+
chunk_type="method",
|
|
227
|
+
function_name=method_name,
|
|
228
|
+
class_name=class_name,
|
|
229
|
+
docstring=phpdoc,
|
|
230
|
+
)
|
|
231
|
+
chunks.append(chunk)
|
|
232
|
+
|
|
233
|
+
return chunks
|
|
234
|
+
|
|
235
|
+
def _extract_class(
|
|
236
|
+
self, node, lines: list[str], file_path: Path, namespace: str | None = None
|
|
237
|
+
) -> list[CodeChunk]:
|
|
238
|
+
"""Extract class definition as a chunk."""
|
|
239
|
+
chunks = []
|
|
240
|
+
|
|
241
|
+
class_name = self._get_node_name(node)
|
|
242
|
+
start_line = node.start_point[0] + 1
|
|
243
|
+
end_line = node.end_point[0] + 1
|
|
244
|
+
|
|
245
|
+
# Get class content
|
|
246
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
247
|
+
|
|
248
|
+
# Extract PHPDoc if present
|
|
249
|
+
phpdoc = self._extract_phpdoc(node, lines)
|
|
250
|
+
|
|
251
|
+
# Build fully qualified class name
|
|
252
|
+
full_class_name = class_name
|
|
253
|
+
if namespace:
|
|
254
|
+
full_class_name = f"{namespace}\\{class_name}"
|
|
255
|
+
|
|
256
|
+
chunk = self._create_chunk(
|
|
257
|
+
content=content,
|
|
258
|
+
file_path=file_path,
|
|
259
|
+
start_line=start_line,
|
|
260
|
+
end_line=end_line,
|
|
261
|
+
chunk_type="class",
|
|
262
|
+
class_name=full_class_name,
|
|
263
|
+
docstring=phpdoc,
|
|
264
|
+
)
|
|
265
|
+
chunks.append(chunk)
|
|
266
|
+
|
|
267
|
+
return chunks
|
|
268
|
+
|
|
269
|
+
def _extract_interface(
|
|
270
|
+
self, node, lines: list[str], file_path: Path, namespace: str | None = None
|
|
271
|
+
) -> list[CodeChunk]:
|
|
272
|
+
"""Extract interface definition as a chunk."""
|
|
273
|
+
chunks = []
|
|
274
|
+
|
|
275
|
+
interface_name = self._get_node_name(node)
|
|
276
|
+
start_line = node.start_point[0] + 1
|
|
277
|
+
end_line = node.end_point[0] + 1
|
|
278
|
+
|
|
279
|
+
# Get interface content
|
|
280
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
281
|
+
|
|
282
|
+
# Extract PHPDoc if present
|
|
283
|
+
phpdoc = self._extract_phpdoc(node, lines)
|
|
284
|
+
|
|
285
|
+
# Build fully qualified interface name
|
|
286
|
+
full_interface_name = interface_name
|
|
287
|
+
if namespace:
|
|
288
|
+
full_interface_name = f"{namespace}\\{interface_name}"
|
|
289
|
+
|
|
290
|
+
chunk = self._create_chunk(
|
|
291
|
+
content=content,
|
|
292
|
+
file_path=file_path,
|
|
293
|
+
start_line=start_line,
|
|
294
|
+
end_line=end_line,
|
|
295
|
+
chunk_type="interface",
|
|
296
|
+
class_name=full_interface_name,
|
|
297
|
+
docstring=phpdoc,
|
|
298
|
+
)
|
|
299
|
+
chunks.append(chunk)
|
|
300
|
+
|
|
301
|
+
return chunks
|
|
302
|
+
|
|
303
|
+
def _extract_trait(
|
|
304
|
+
self, node, lines: list[str], file_path: Path, namespace: str | None = None
|
|
305
|
+
) -> list[CodeChunk]:
|
|
306
|
+
"""Extract trait definition as a chunk."""
|
|
307
|
+
chunks = []
|
|
308
|
+
|
|
309
|
+
trait_name = self._get_node_name(node)
|
|
310
|
+
start_line = node.start_point[0] + 1
|
|
311
|
+
end_line = node.end_point[0] + 1
|
|
312
|
+
|
|
313
|
+
# Get trait content
|
|
314
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
315
|
+
|
|
316
|
+
# Extract PHPDoc if present
|
|
317
|
+
phpdoc = self._extract_phpdoc(node, lines)
|
|
318
|
+
|
|
319
|
+
# Build fully qualified trait name
|
|
320
|
+
full_trait_name = trait_name
|
|
321
|
+
if namespace:
|
|
322
|
+
full_trait_name = f"{namespace}\\{trait_name}"
|
|
323
|
+
|
|
324
|
+
chunk = self._create_chunk(
|
|
325
|
+
content=content,
|
|
326
|
+
file_path=file_path,
|
|
327
|
+
start_line=start_line,
|
|
328
|
+
end_line=end_line,
|
|
329
|
+
chunk_type="trait",
|
|
330
|
+
class_name=full_trait_name,
|
|
331
|
+
docstring=phpdoc,
|
|
332
|
+
)
|
|
333
|
+
chunks.append(chunk)
|
|
334
|
+
|
|
335
|
+
return chunks
|
|
336
|
+
|
|
337
|
+
def _extract_module_chunk(
|
|
338
|
+
self, node, lines: list[str], file_path: Path
|
|
339
|
+
) -> CodeChunk | None:
|
|
340
|
+
"""Extract module-level code (use statements, requires, etc.)."""
|
|
341
|
+
# Look for module-level statements (not inside functions/classes)
|
|
342
|
+
module_lines = []
|
|
343
|
+
|
|
344
|
+
for child in node.children:
|
|
345
|
+
if child.type in ["namespace_use_declaration", "namespace_definition"]:
|
|
346
|
+
start_line = child.start_point[0] + 1
|
|
347
|
+
end_line = child.end_point[0] + 1
|
|
348
|
+
import_content = self._get_line_range(lines, start_line, end_line)
|
|
349
|
+
module_lines.append(import_content.strip())
|
|
350
|
+
|
|
351
|
+
if module_lines:
|
|
352
|
+
content = "\n".join(module_lines)
|
|
353
|
+
return self._create_chunk(
|
|
354
|
+
content=content,
|
|
355
|
+
file_path=file_path,
|
|
356
|
+
start_line=1,
|
|
357
|
+
end_line=len(module_lines),
|
|
358
|
+
chunk_type="imports",
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
return None
|
|
362
|
+
|
|
363
|
+
def _get_node_name(self, node) -> str | None:
|
|
364
|
+
"""Extract name from a named node (function, class, etc.)."""
|
|
365
|
+
for child in node.children:
|
|
366
|
+
if child.type in ["name", "identifier"]:
|
|
367
|
+
return child.text.decode("utf-8")
|
|
368
|
+
return None
|
|
369
|
+
|
|
370
|
+
def _get_namespace_name(self, node) -> str | None:
|
|
371
|
+
"""Extract namespace name from namespace definition."""
|
|
372
|
+
for child in node.children:
|
|
373
|
+
if child.type == "namespace_name":
|
|
374
|
+
return child.text.decode("utf-8")
|
|
375
|
+
return None
|
|
376
|
+
|
|
377
|
+
def _extract_phpdoc(self, node, lines: list[str]) -> str | None:
|
|
378
|
+
"""Extract PHPDoc from a function or class node."""
|
|
379
|
+
# Look for comment node before the definition
|
|
380
|
+
start_line = node.start_point[0]
|
|
381
|
+
|
|
382
|
+
# Check a few lines before the node for PHPDoc comments
|
|
383
|
+
phpdoc_lines = []
|
|
384
|
+
in_phpdoc = False
|
|
385
|
+
|
|
386
|
+
for i in range(max(0, start_line - 20), start_line):
|
|
387
|
+
if i >= len(lines):
|
|
388
|
+
continue
|
|
389
|
+
|
|
390
|
+
line = lines[i].strip()
|
|
391
|
+
|
|
392
|
+
if line.startswith("/**"):
|
|
393
|
+
in_phpdoc = True
|
|
394
|
+
# Extract content after /**
|
|
395
|
+
content = line[3:].strip()
|
|
396
|
+
if content and content != "*":
|
|
397
|
+
phpdoc_lines.append(content)
|
|
398
|
+
elif in_phpdoc and line.startswith("*/"):
|
|
399
|
+
in_phpdoc = False
|
|
400
|
+
break
|
|
401
|
+
elif in_phpdoc:
|
|
402
|
+
# Remove leading * and whitespace
|
|
403
|
+
content = line.lstrip("*").strip()
|
|
404
|
+
if content:
|
|
405
|
+
phpdoc_lines.append(content)
|
|
406
|
+
elif line and not line.startswith("//") and not in_phpdoc:
|
|
407
|
+
# Reset if we hit non-comment code
|
|
408
|
+
phpdoc_lines = []
|
|
409
|
+
|
|
410
|
+
if phpdoc_lines:
|
|
411
|
+
return " ".join(phpdoc_lines)
|
|
412
|
+
|
|
413
|
+
return None
|
|
414
|
+
|
|
415
|
+
async def _fallback_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
416
|
+
"""Fallback parsing using regex when Tree-sitter is not available."""
|
|
417
|
+
chunks = []
|
|
418
|
+
lines = self._split_into_lines(content)
|
|
419
|
+
|
|
420
|
+
# Enhanced regex patterns for PHP
|
|
421
|
+
namespace_pattern = re.compile(r"^\s*namespace\s+([\w\\]+)", re.MULTILINE)
|
|
422
|
+
class_pattern = re.compile(
|
|
423
|
+
r"^\s*(?:abstract\s+|final\s+)?class\s+(\w+)", re.MULTILINE
|
|
424
|
+
)
|
|
425
|
+
interface_pattern = re.compile(r"^\s*interface\s+(\w+)", re.MULTILINE)
|
|
426
|
+
trait_pattern = re.compile(r"^\s*trait\s+(\w+)", re.MULTILINE)
|
|
427
|
+
function_pattern = re.compile(
|
|
428
|
+
r"^\s*(?:public\s+|private\s+|protected\s+)?(?:static\s+)?function\s+(\w+)\s*\(",
|
|
429
|
+
re.MULTILINE,
|
|
430
|
+
)
|
|
431
|
+
use_pattern = re.compile(r"^\s*use\s+([\w\\]+)", re.MULTILINE)
|
|
432
|
+
|
|
433
|
+
# Extract namespace (there should be only one)
|
|
434
|
+
current_namespace = None
|
|
435
|
+
namespace_match = namespace_pattern.search(content)
|
|
436
|
+
if namespace_match:
|
|
437
|
+
current_namespace = namespace_match.group(1)
|
|
438
|
+
|
|
439
|
+
# Extract use statements
|
|
440
|
+
use_statements = []
|
|
441
|
+
for match in use_pattern.finditer(content):
|
|
442
|
+
use_line = match.group(0).strip()
|
|
443
|
+
use_statements.append(use_line)
|
|
444
|
+
|
|
445
|
+
# Find classes
|
|
446
|
+
for match in class_pattern.finditer(content):
|
|
447
|
+
class_name = match.group(1)
|
|
448
|
+
|
|
449
|
+
# Find the actual line with 'class' by looking for it in the match
|
|
450
|
+
match_text = match.group(0)
|
|
451
|
+
class_pos_in_match = match_text.find("class")
|
|
452
|
+
actual_class_pos = match.start() + class_pos_in_match
|
|
453
|
+
start_line = content[:actual_class_pos].count("\n") + 1
|
|
454
|
+
|
|
455
|
+
# Find end of class (simple heuristic)
|
|
456
|
+
end_line = self._find_class_end(lines, start_line)
|
|
457
|
+
|
|
458
|
+
class_content = self._get_line_range(lines, start_line, end_line)
|
|
459
|
+
|
|
460
|
+
if class_content.strip():
|
|
461
|
+
# Extract PHPDoc using regex
|
|
462
|
+
phpdoc = self._extract_phpdoc_regex(lines, start_line)
|
|
463
|
+
|
|
464
|
+
# Build fully qualified class name
|
|
465
|
+
full_class_name = class_name
|
|
466
|
+
if current_namespace:
|
|
467
|
+
full_class_name = f"{current_namespace}\\{class_name}"
|
|
468
|
+
|
|
469
|
+
chunk = self._create_chunk(
|
|
470
|
+
content=class_content,
|
|
471
|
+
file_path=file_path,
|
|
472
|
+
start_line=start_line,
|
|
473
|
+
end_line=end_line,
|
|
474
|
+
chunk_type="class",
|
|
475
|
+
class_name=full_class_name,
|
|
476
|
+
docstring=phpdoc,
|
|
477
|
+
)
|
|
478
|
+
chunk.imports = use_statements
|
|
479
|
+
chunks.append(chunk)
|
|
480
|
+
|
|
481
|
+
# Find interfaces
|
|
482
|
+
for match in interface_pattern.finditer(content):
|
|
483
|
+
interface_name = match.group(1)
|
|
484
|
+
|
|
485
|
+
match_text = match.group(0)
|
|
486
|
+
interface_pos_in_match = match_text.find("interface")
|
|
487
|
+
actual_interface_pos = match.start() + interface_pos_in_match
|
|
488
|
+
start_line = content[:actual_interface_pos].count("\n") + 1
|
|
489
|
+
|
|
490
|
+
# Find end of interface
|
|
491
|
+
end_line = self._find_class_end(lines, start_line)
|
|
492
|
+
|
|
493
|
+
interface_content = self._get_line_range(lines, start_line, end_line)
|
|
494
|
+
|
|
495
|
+
if interface_content.strip():
|
|
496
|
+
phpdoc = self._extract_phpdoc_regex(lines, start_line)
|
|
497
|
+
|
|
498
|
+
full_interface_name = interface_name
|
|
499
|
+
if current_namespace:
|
|
500
|
+
full_interface_name = f"{current_namespace}\\{interface_name}"
|
|
501
|
+
|
|
502
|
+
chunk = self._create_chunk(
|
|
503
|
+
content=interface_content,
|
|
504
|
+
file_path=file_path,
|
|
505
|
+
start_line=start_line,
|
|
506
|
+
end_line=end_line,
|
|
507
|
+
chunk_type="interface",
|
|
508
|
+
class_name=full_interface_name,
|
|
509
|
+
docstring=phpdoc,
|
|
510
|
+
)
|
|
511
|
+
chunk.imports = use_statements
|
|
512
|
+
chunks.append(chunk)
|
|
513
|
+
|
|
514
|
+
# Find traits
|
|
515
|
+
for match in trait_pattern.finditer(content):
|
|
516
|
+
trait_name = match.group(1)
|
|
517
|
+
|
|
518
|
+
match_text = match.group(0)
|
|
519
|
+
trait_pos_in_match = match_text.find("trait")
|
|
520
|
+
actual_trait_pos = match.start() + trait_pos_in_match
|
|
521
|
+
start_line = content[:actual_trait_pos].count("\n") + 1
|
|
522
|
+
|
|
523
|
+
# Find end of trait
|
|
524
|
+
end_line = self._find_class_end(lines, start_line)
|
|
525
|
+
|
|
526
|
+
trait_content = self._get_line_range(lines, start_line, end_line)
|
|
527
|
+
|
|
528
|
+
if trait_content.strip():
|
|
529
|
+
phpdoc = self._extract_phpdoc_regex(lines, start_line)
|
|
530
|
+
|
|
531
|
+
full_trait_name = trait_name
|
|
532
|
+
if current_namespace:
|
|
533
|
+
full_trait_name = f"{current_namespace}\\{trait_name}"
|
|
534
|
+
|
|
535
|
+
chunk = self._create_chunk(
|
|
536
|
+
content=trait_content,
|
|
537
|
+
file_path=file_path,
|
|
538
|
+
start_line=start_line,
|
|
539
|
+
end_line=end_line,
|
|
540
|
+
chunk_type="trait",
|
|
541
|
+
class_name=full_trait_name,
|
|
542
|
+
docstring=phpdoc,
|
|
543
|
+
)
|
|
544
|
+
chunk.imports = use_statements
|
|
545
|
+
chunks.append(chunk)
|
|
546
|
+
|
|
547
|
+
# Find functions (excluding methods inside classes)
|
|
548
|
+
class_regions = [(chunk.start_line, chunk.end_line) for chunk in chunks]
|
|
549
|
+
|
|
550
|
+
for match in function_pattern.finditer(content):
|
|
551
|
+
function_name = match.group(1)
|
|
552
|
+
|
|
553
|
+
# Skip magic methods and constructors
|
|
554
|
+
if function_name.startswith("__"):
|
|
555
|
+
continue
|
|
556
|
+
|
|
557
|
+
# Find the actual line
|
|
558
|
+
match_text = match.group(0)
|
|
559
|
+
func_pos_in_match = match_text.find("function")
|
|
560
|
+
actual_func_pos = match.start() + func_pos_in_match
|
|
561
|
+
start_line = content[:actual_func_pos].count("\n") + 1
|
|
562
|
+
|
|
563
|
+
# Skip if this function is inside a class
|
|
564
|
+
is_inside_class = any(
|
|
565
|
+
start <= start_line <= end for start, end in class_regions
|
|
566
|
+
)
|
|
567
|
+
if is_inside_class:
|
|
568
|
+
continue
|
|
569
|
+
|
|
570
|
+
# Find end of function
|
|
571
|
+
end_line = self._find_function_end(lines, start_line)
|
|
572
|
+
|
|
573
|
+
func_content = self._get_line_range(lines, start_line, end_line)
|
|
574
|
+
|
|
575
|
+
if func_content.strip():
|
|
576
|
+
# Extract PHPDoc
|
|
577
|
+
phpdoc = self._extract_phpdoc_regex(lines, start_line)
|
|
578
|
+
|
|
579
|
+
# Build fully qualified function name
|
|
580
|
+
full_function_name = function_name
|
|
581
|
+
if current_namespace:
|
|
582
|
+
full_function_name = f"{current_namespace}\\{function_name}"
|
|
583
|
+
|
|
584
|
+
chunk = self._create_chunk(
|
|
585
|
+
content=func_content,
|
|
586
|
+
file_path=file_path,
|
|
587
|
+
start_line=start_line,
|
|
588
|
+
end_line=end_line,
|
|
589
|
+
chunk_type="function",
|
|
590
|
+
function_name=full_function_name,
|
|
591
|
+
docstring=phpdoc,
|
|
592
|
+
)
|
|
593
|
+
chunk.imports = use_statements
|
|
594
|
+
chunks.append(chunk)
|
|
595
|
+
|
|
596
|
+
# If no functions or classes found, create chunks for the whole file
|
|
597
|
+
if not chunks:
|
|
598
|
+
chunks.append(
|
|
599
|
+
self._create_chunk(
|
|
600
|
+
content=content,
|
|
601
|
+
file_path=file_path,
|
|
602
|
+
start_line=1,
|
|
603
|
+
end_line=len(lines),
|
|
604
|
+
chunk_type="module",
|
|
605
|
+
)
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
return chunks
|
|
609
|
+
|
|
610
|
+
def _find_function_end(self, lines: list[str], start_line: int) -> int:
|
|
611
|
+
"""Find the end line of a function using brace matching."""
|
|
612
|
+
if start_line > len(lines):
|
|
613
|
+
return len(lines)
|
|
614
|
+
|
|
615
|
+
start_idx = start_line - 1
|
|
616
|
+
if start_idx >= len(lines):
|
|
617
|
+
return len(lines)
|
|
618
|
+
|
|
619
|
+
# For PHP, we need to count braces
|
|
620
|
+
brace_count = 0
|
|
621
|
+
found_opening_brace = False
|
|
622
|
+
|
|
623
|
+
for i in range(start_idx, len(lines)):
|
|
624
|
+
line = lines[i]
|
|
625
|
+
|
|
626
|
+
for char in line:
|
|
627
|
+
if char == "{":
|
|
628
|
+
brace_count += 1
|
|
629
|
+
found_opening_brace = True
|
|
630
|
+
elif char == "}":
|
|
631
|
+
brace_count -= 1
|
|
632
|
+
if found_opening_brace and brace_count == 0:
|
|
633
|
+
return i + 1 # Return 1-based line number
|
|
634
|
+
|
|
635
|
+
return len(lines)
|
|
636
|
+
|
|
637
|
+
def _find_class_end(self, lines: list[str], start_line: int) -> int:
|
|
638
|
+
"""Find the end line of a class using brace matching."""
|
|
639
|
+
return self._find_function_end(lines, start_line)
|
|
640
|
+
|
|
641
|
+
def _extract_phpdoc_regex(self, lines: list[str], start_line: int) -> str | None:
|
|
642
|
+
"""Extract PHPDoc using regex patterns."""
|
|
643
|
+
# Look for /** ... */ comments before the definition
|
|
644
|
+
phpdoc_lines = []
|
|
645
|
+
in_phpdoc = False
|
|
646
|
+
|
|
647
|
+
# Check a few lines before the start_line
|
|
648
|
+
for i in range(max(0, start_line - 20), start_line - 1):
|
|
649
|
+
if i >= len(lines):
|
|
650
|
+
continue
|
|
651
|
+
|
|
652
|
+
line = lines[i].strip()
|
|
653
|
+
|
|
654
|
+
if line.startswith("/**"):
|
|
655
|
+
in_phpdoc = True
|
|
656
|
+
# Extract content after /**
|
|
657
|
+
content = line[3:].strip()
|
|
658
|
+
if content and content not in ("*", "*/"):
|
|
659
|
+
phpdoc_lines.append(content)
|
|
660
|
+
|
|
661
|
+
# Check for single-line PHPDoc
|
|
662
|
+
if line.endswith("*/") and len(line) > 5:
|
|
663
|
+
# Single line PHPDoc
|
|
664
|
+
content = line[3:-2].strip()
|
|
665
|
+
if content and content != "*":
|
|
666
|
+
return content
|
|
667
|
+
in_phpdoc = False
|
|
668
|
+
elif in_phpdoc and line.endswith("*/"):
|
|
669
|
+
# End of multi-line PHPDoc
|
|
670
|
+
content = line[:-2].lstrip("*").strip()
|
|
671
|
+
if content:
|
|
672
|
+
phpdoc_lines.append(content)
|
|
673
|
+
in_phpdoc = False
|
|
674
|
+
break
|
|
675
|
+
elif in_phpdoc:
|
|
676
|
+
# Inside PHPDoc - remove leading * and whitespace
|
|
677
|
+
content = line.lstrip("*").strip()
|
|
678
|
+
if content:
|
|
679
|
+
phpdoc_lines.append(content)
|
|
680
|
+
elif line and not line.startswith("//") and not in_phpdoc and phpdoc_lines:
|
|
681
|
+
# If we hit non-comment code after finding PHPDoc, we're done
|
|
682
|
+
break
|
|
683
|
+
elif line and not line.startswith("//") and not in_phpdoc:
|
|
684
|
+
# Reset if we hit code before finding PHPDoc
|
|
685
|
+
phpdoc_lines = []
|
|
686
|
+
|
|
687
|
+
if phpdoc_lines:
|
|
688
|
+
return " ".join(phpdoc_lines)
|
|
689
|
+
|
|
690
|
+
return None
|
|
691
|
+
|
|
692
|
+
def get_supported_extensions(self) -> list[str]:
|
|
693
|
+
"""Get supported file extensions."""
|
|
694
|
+
return [".php", ".phtml"]
|