mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/chat.py +534 -0
  6. mcp_vector_search/cli/commands/config.py +393 -0
  7. mcp_vector_search/cli/commands/demo.py +358 -0
  8. mcp_vector_search/cli/commands/index.py +762 -0
  9. mcp_vector_search/cli/commands/init.py +658 -0
  10. mcp_vector_search/cli/commands/install.py +869 -0
  11. mcp_vector_search/cli/commands/install_old.py +700 -0
  12. mcp_vector_search/cli/commands/mcp.py +1254 -0
  13. mcp_vector_search/cli/commands/reset.py +393 -0
  14. mcp_vector_search/cli/commands/search.py +796 -0
  15. mcp_vector_search/cli/commands/setup.py +1133 -0
  16. mcp_vector_search/cli/commands/status.py +584 -0
  17. mcp_vector_search/cli/commands/uninstall.py +404 -0
  18. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  19. mcp_vector_search/cli/commands/visualize/cli.py +265 -0
  20. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  21. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  22. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  23. mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
  24. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  25. mcp_vector_search/cli/commands/visualize/server.py +201 -0
  26. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  27. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  28. mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
  29. mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
  30. mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
  31. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  32. mcp_vector_search/cli/commands/watch.py +287 -0
  33. mcp_vector_search/cli/didyoumean.py +520 -0
  34. mcp_vector_search/cli/export.py +320 -0
  35. mcp_vector_search/cli/history.py +295 -0
  36. mcp_vector_search/cli/interactive.py +342 -0
  37. mcp_vector_search/cli/main.py +484 -0
  38. mcp_vector_search/cli/output.py +414 -0
  39. mcp_vector_search/cli/suggestions.py +375 -0
  40. mcp_vector_search/config/__init__.py +1 -0
  41. mcp_vector_search/config/constants.py +24 -0
  42. mcp_vector_search/config/defaults.py +200 -0
  43. mcp_vector_search/config/settings.py +146 -0
  44. mcp_vector_search/core/__init__.py +1 -0
  45. mcp_vector_search/core/auto_indexer.py +298 -0
  46. mcp_vector_search/core/config_utils.py +394 -0
  47. mcp_vector_search/core/connection_pool.py +360 -0
  48. mcp_vector_search/core/database.py +1237 -0
  49. mcp_vector_search/core/directory_index.py +318 -0
  50. mcp_vector_search/core/embeddings.py +294 -0
  51. mcp_vector_search/core/exceptions.py +89 -0
  52. mcp_vector_search/core/factory.py +318 -0
  53. mcp_vector_search/core/git_hooks.py +345 -0
  54. mcp_vector_search/core/indexer.py +1002 -0
  55. mcp_vector_search/core/llm_client.py +453 -0
  56. mcp_vector_search/core/models.py +294 -0
  57. mcp_vector_search/core/project.py +350 -0
  58. mcp_vector_search/core/scheduler.py +330 -0
  59. mcp_vector_search/core/search.py +952 -0
  60. mcp_vector_search/core/watcher.py +322 -0
  61. mcp_vector_search/mcp/__init__.py +5 -0
  62. mcp_vector_search/mcp/__main__.py +25 -0
  63. mcp_vector_search/mcp/server.py +752 -0
  64. mcp_vector_search/parsers/__init__.py +8 -0
  65. mcp_vector_search/parsers/base.py +296 -0
  66. mcp_vector_search/parsers/dart.py +605 -0
  67. mcp_vector_search/parsers/html.py +413 -0
  68. mcp_vector_search/parsers/javascript.py +643 -0
  69. mcp_vector_search/parsers/php.py +694 -0
  70. mcp_vector_search/parsers/python.py +502 -0
  71. mcp_vector_search/parsers/registry.py +223 -0
  72. mcp_vector_search/parsers/ruby.py +678 -0
  73. mcp_vector_search/parsers/text.py +186 -0
  74. mcp_vector_search/parsers/utils.py +265 -0
  75. mcp_vector_search/py.typed +1 -0
  76. mcp_vector_search/utils/__init__.py +42 -0
  77. mcp_vector_search/utils/gitignore.py +250 -0
  78. mcp_vector_search/utils/gitignore_updater.py +212 -0
  79. mcp_vector_search/utils/monorepo.py +339 -0
  80. mcp_vector_search/utils/timing.py +338 -0
  81. mcp_vector_search/utils/version.py +47 -0
  82. mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
  83. mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
  84. mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
  85. mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
  86. mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,694 @@
1
+ """PHP parser using Tree-sitter for MCP Vector Search."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from loguru import logger
7
+
8
+ from ..core.models import CodeChunk
9
+ from .base import BaseParser
10
+
11
+
12
+ class PHPParser(BaseParser):
13
+ """PHP parser using Tree-sitter for AST-based code analysis."""
14
+
15
+ def __init__(self) -> None:
16
+ """Initialize PHP parser."""
17
+ super().__init__("php")
18
+ self._parser = None
19
+ self._language = None
20
+ self._initialize_parser()
21
+
22
+ def _initialize_parser(self) -> None:
23
+ """Initialize Tree-sitter parser for PHP."""
24
+ try:
25
+ # Try the tree-sitter-language-pack package (maintained alternative)
26
+ from tree_sitter_language_pack import get_language, get_parser
27
+
28
+ # Get the language and parser objects
29
+ self._language = get_language("php")
30
+ self._parser = get_parser("php")
31
+
32
+ logger.debug(
33
+ "PHP Tree-sitter parser initialized via tree-sitter-language-pack"
34
+ )
35
+ return
36
+ except Exception as e:
37
+ logger.debug(f"tree-sitter-language-pack failed: {e}")
38
+
39
+ try:
40
+ # Fallback to manual tree-sitter setup (requires language binaries)
41
+
42
+ # This would require language binaries to be available
43
+ # For now, we'll skip this and rely on fallback parsing
44
+ logger.debug("Manual tree-sitter setup not implemented yet")
45
+ self._parser = None
46
+ self._language = None
47
+ except Exception as e:
48
+ logger.debug(f"Manual tree-sitter setup failed: {e}")
49
+ self._parser = None
50
+ self._language = None
51
+
52
+ logger.info(
53
+ "Using fallback regex-based parsing for PHP (Tree-sitter unavailable)"
54
+ )
55
+
56
+ async def parse_file(self, file_path: Path) -> list[CodeChunk]:
57
+ """Parse a PHP file and extract code chunks."""
58
+ try:
59
+ with open(file_path, encoding="utf-8") as f:
60
+ content = f.read()
61
+ return await self.parse_content(content, file_path)
62
+ except Exception as e:
63
+ logger.error(f"Failed to read file {file_path}: {e}")
64
+ return []
65
+
66
+ async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
67
+ """Parse PHP content and extract code chunks."""
68
+ if not content.strip():
69
+ return []
70
+
71
+ # If Tree-sitter is not available, fall back to simple parsing
72
+ if not self._parser:
73
+ return await self._fallback_parse(content, file_path)
74
+
75
+ try:
76
+ # Parse with Tree-sitter
77
+ tree = self._parser.parse(content.encode("utf-8"))
78
+ return self._extract_chunks_from_tree(tree, content, file_path)
79
+ except Exception as e:
80
+ logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
81
+ return await self._fallback_parse(content, file_path)
82
+
83
+ def _extract_chunks_from_tree(
84
+ self, tree, content: str, file_path: Path
85
+ ) -> list[CodeChunk]:
86
+ """Extract code chunks from Tree-sitter AST."""
87
+ chunks = []
88
+ lines = self._split_into_lines(content)
89
+
90
+ def visit_node(node, current_class=None, current_namespace=None):
91
+ """Recursively visit AST nodes."""
92
+ node_type = node.type
93
+
94
+ if node_type == "function_definition":
95
+ chunks.extend(
96
+ self._extract_function(
97
+ node, lines, file_path, current_class, current_namespace
98
+ )
99
+ )
100
+ elif node_type == "class_declaration":
101
+ class_chunks = self._extract_class(
102
+ node, lines, file_path, current_namespace
103
+ )
104
+ chunks.extend(class_chunks)
105
+
106
+ # Visit class methods with class context
107
+ class_name = self._get_node_name(node)
108
+ for child in node.children:
109
+ visit_node(child, class_name, current_namespace)
110
+ elif node_type == "interface_declaration":
111
+ chunks.extend(
112
+ self._extract_interface(node, lines, file_path, current_namespace)
113
+ )
114
+ elif node_type == "trait_declaration":
115
+ chunks.extend(
116
+ self._extract_trait(node, lines, file_path, current_namespace)
117
+ )
118
+ elif node_type == "method_declaration":
119
+ chunks.extend(
120
+ self._extract_method(
121
+ node, lines, file_path, current_class, current_namespace
122
+ )
123
+ )
124
+ elif node_type == "namespace_definition":
125
+ namespace_name = self._get_namespace_name(node)
126
+ # Visit children with namespace context
127
+ for child in node.children:
128
+ visit_node(child, current_class, namespace_name)
129
+ elif node_type == "program":
130
+ # Extract module-level code
131
+ module_chunk = self._extract_module_chunk(node, lines, file_path)
132
+ if module_chunk:
133
+ chunks.append(module_chunk)
134
+
135
+ # Visit all children
136
+ for child in node.children:
137
+ visit_node(child)
138
+ else:
139
+ # Visit children for other node types
140
+ for child in node.children:
141
+ visit_node(child, current_class, current_namespace)
142
+
143
+ # Start traversal from root
144
+ visit_node(tree.root_node)
145
+
146
+ # If no specific chunks found, create a single chunk for the whole file
147
+ if not chunks:
148
+ chunks.append(
149
+ self._create_chunk(
150
+ content=content,
151
+ file_path=file_path,
152
+ start_line=1,
153
+ end_line=len(lines),
154
+ chunk_type="module",
155
+ )
156
+ )
157
+
158
+ return chunks
159
+
160
+ def _extract_function(
161
+ self,
162
+ node,
163
+ lines: list[str],
164
+ file_path: Path,
165
+ class_name: str | None = None,
166
+ namespace: str | None = None,
167
+ ) -> list[CodeChunk]:
168
+ """Extract function definition as a chunk."""
169
+ chunks = []
170
+
171
+ function_name = self._get_node_name(node)
172
+ start_line = node.start_point[0] + 1
173
+ end_line = node.end_point[0] + 1
174
+
175
+ # Get function content
176
+ content = self._get_line_range(lines, start_line, end_line)
177
+
178
+ # Extract PHPDoc if present
179
+ phpdoc = self._extract_phpdoc(node, lines)
180
+
181
+ # Build fully qualified function name
182
+ full_name = function_name
183
+ if namespace and not class_name:
184
+ full_name = f"{namespace}\\{function_name}"
185
+
186
+ chunk = self._create_chunk(
187
+ content=content,
188
+ file_path=file_path,
189
+ start_line=start_line,
190
+ end_line=end_line,
191
+ chunk_type="function",
192
+ function_name=full_name,
193
+ class_name=class_name,
194
+ docstring=phpdoc,
195
+ )
196
+ chunks.append(chunk)
197
+
198
+ return chunks
199
+
200
+ def _extract_method(
201
+ self,
202
+ node,
203
+ lines: list[str],
204
+ file_path: Path,
205
+ class_name: str | None = None,
206
+ namespace: str | None = None,
207
+ ) -> list[CodeChunk]:
208
+ """Extract method definition as a chunk."""
209
+ chunks = []
210
+
211
+ method_name = self._get_node_name(node)
212
+ start_line = node.start_point[0] + 1
213
+ end_line = node.end_point[0] + 1
214
+
215
+ # Get method content
216
+ content = self._get_line_range(lines, start_line, end_line)
217
+
218
+ # Extract PHPDoc if present
219
+ phpdoc = self._extract_phpdoc(node, lines)
220
+
221
+ chunk = self._create_chunk(
222
+ content=content,
223
+ file_path=file_path,
224
+ start_line=start_line,
225
+ end_line=end_line,
226
+ chunk_type="method",
227
+ function_name=method_name,
228
+ class_name=class_name,
229
+ docstring=phpdoc,
230
+ )
231
+ chunks.append(chunk)
232
+
233
+ return chunks
234
+
235
+ def _extract_class(
236
+ self, node, lines: list[str], file_path: Path, namespace: str | None = None
237
+ ) -> list[CodeChunk]:
238
+ """Extract class definition as a chunk."""
239
+ chunks = []
240
+
241
+ class_name = self._get_node_name(node)
242
+ start_line = node.start_point[0] + 1
243
+ end_line = node.end_point[0] + 1
244
+
245
+ # Get class content
246
+ content = self._get_line_range(lines, start_line, end_line)
247
+
248
+ # Extract PHPDoc if present
249
+ phpdoc = self._extract_phpdoc(node, lines)
250
+
251
+ # Build fully qualified class name
252
+ full_class_name = class_name
253
+ if namespace:
254
+ full_class_name = f"{namespace}\\{class_name}"
255
+
256
+ chunk = self._create_chunk(
257
+ content=content,
258
+ file_path=file_path,
259
+ start_line=start_line,
260
+ end_line=end_line,
261
+ chunk_type="class",
262
+ class_name=full_class_name,
263
+ docstring=phpdoc,
264
+ )
265
+ chunks.append(chunk)
266
+
267
+ return chunks
268
+
269
+ def _extract_interface(
270
+ self, node, lines: list[str], file_path: Path, namespace: str | None = None
271
+ ) -> list[CodeChunk]:
272
+ """Extract interface definition as a chunk."""
273
+ chunks = []
274
+
275
+ interface_name = self._get_node_name(node)
276
+ start_line = node.start_point[0] + 1
277
+ end_line = node.end_point[0] + 1
278
+
279
+ # Get interface content
280
+ content = self._get_line_range(lines, start_line, end_line)
281
+
282
+ # Extract PHPDoc if present
283
+ phpdoc = self._extract_phpdoc(node, lines)
284
+
285
+ # Build fully qualified interface name
286
+ full_interface_name = interface_name
287
+ if namespace:
288
+ full_interface_name = f"{namespace}\\{interface_name}"
289
+
290
+ chunk = self._create_chunk(
291
+ content=content,
292
+ file_path=file_path,
293
+ start_line=start_line,
294
+ end_line=end_line,
295
+ chunk_type="interface",
296
+ class_name=full_interface_name,
297
+ docstring=phpdoc,
298
+ )
299
+ chunks.append(chunk)
300
+
301
+ return chunks
302
+
303
+ def _extract_trait(
304
+ self, node, lines: list[str], file_path: Path, namespace: str | None = None
305
+ ) -> list[CodeChunk]:
306
+ """Extract trait definition as a chunk."""
307
+ chunks = []
308
+
309
+ trait_name = self._get_node_name(node)
310
+ start_line = node.start_point[0] + 1
311
+ end_line = node.end_point[0] + 1
312
+
313
+ # Get trait content
314
+ content = self._get_line_range(lines, start_line, end_line)
315
+
316
+ # Extract PHPDoc if present
317
+ phpdoc = self._extract_phpdoc(node, lines)
318
+
319
+ # Build fully qualified trait name
320
+ full_trait_name = trait_name
321
+ if namespace:
322
+ full_trait_name = f"{namespace}\\{trait_name}"
323
+
324
+ chunk = self._create_chunk(
325
+ content=content,
326
+ file_path=file_path,
327
+ start_line=start_line,
328
+ end_line=end_line,
329
+ chunk_type="trait",
330
+ class_name=full_trait_name,
331
+ docstring=phpdoc,
332
+ )
333
+ chunks.append(chunk)
334
+
335
+ return chunks
336
+
337
+ def _extract_module_chunk(
338
+ self, node, lines: list[str], file_path: Path
339
+ ) -> CodeChunk | None:
340
+ """Extract module-level code (use statements, requires, etc.)."""
341
+ # Look for module-level statements (not inside functions/classes)
342
+ module_lines = []
343
+
344
+ for child in node.children:
345
+ if child.type in ["namespace_use_declaration", "namespace_definition"]:
346
+ start_line = child.start_point[0] + 1
347
+ end_line = child.end_point[0] + 1
348
+ import_content = self._get_line_range(lines, start_line, end_line)
349
+ module_lines.append(import_content.strip())
350
+
351
+ if module_lines:
352
+ content = "\n".join(module_lines)
353
+ return self._create_chunk(
354
+ content=content,
355
+ file_path=file_path,
356
+ start_line=1,
357
+ end_line=len(module_lines),
358
+ chunk_type="imports",
359
+ )
360
+
361
+ return None
362
+
363
+ def _get_node_name(self, node) -> str | None:
364
+ """Extract name from a named node (function, class, etc.)."""
365
+ for child in node.children:
366
+ if child.type in ["name", "identifier"]:
367
+ return child.text.decode("utf-8")
368
+ return None
369
+
370
+ def _get_namespace_name(self, node) -> str | None:
371
+ """Extract namespace name from namespace definition."""
372
+ for child in node.children:
373
+ if child.type == "namespace_name":
374
+ return child.text.decode("utf-8")
375
+ return None
376
+
377
+ def _extract_phpdoc(self, node, lines: list[str]) -> str | None:
378
+ """Extract PHPDoc from a function or class node."""
379
+ # Look for comment node before the definition
380
+ start_line = node.start_point[0]
381
+
382
+ # Check a few lines before the node for PHPDoc comments
383
+ phpdoc_lines = []
384
+ in_phpdoc = False
385
+
386
+ for i in range(max(0, start_line - 20), start_line):
387
+ if i >= len(lines):
388
+ continue
389
+
390
+ line = lines[i].strip()
391
+
392
+ if line.startswith("/**"):
393
+ in_phpdoc = True
394
+ # Extract content after /**
395
+ content = line[3:].strip()
396
+ if content and content != "*":
397
+ phpdoc_lines.append(content)
398
+ elif in_phpdoc and line.startswith("*/"):
399
+ in_phpdoc = False
400
+ break
401
+ elif in_phpdoc:
402
+ # Remove leading * and whitespace
403
+ content = line.lstrip("*").strip()
404
+ if content:
405
+ phpdoc_lines.append(content)
406
+ elif line and not line.startswith("//") and not in_phpdoc:
407
+ # Reset if we hit non-comment code
408
+ phpdoc_lines = []
409
+
410
+ if phpdoc_lines:
411
+ return " ".join(phpdoc_lines)
412
+
413
+ return None
414
+
415
+ async def _fallback_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
416
+ """Fallback parsing using regex when Tree-sitter is not available."""
417
+ chunks = []
418
+ lines = self._split_into_lines(content)
419
+
420
+ # Enhanced regex patterns for PHP
421
+ namespace_pattern = re.compile(r"^\s*namespace\s+([\w\\]+)", re.MULTILINE)
422
+ class_pattern = re.compile(
423
+ r"^\s*(?:abstract\s+|final\s+)?class\s+(\w+)", re.MULTILINE
424
+ )
425
+ interface_pattern = re.compile(r"^\s*interface\s+(\w+)", re.MULTILINE)
426
+ trait_pattern = re.compile(r"^\s*trait\s+(\w+)", re.MULTILINE)
427
+ function_pattern = re.compile(
428
+ r"^\s*(?:public\s+|private\s+|protected\s+)?(?:static\s+)?function\s+(\w+)\s*\(",
429
+ re.MULTILINE,
430
+ )
431
+ use_pattern = re.compile(r"^\s*use\s+([\w\\]+)", re.MULTILINE)
432
+
433
+ # Extract namespace (there should be only one)
434
+ current_namespace = None
435
+ namespace_match = namespace_pattern.search(content)
436
+ if namespace_match:
437
+ current_namespace = namespace_match.group(1)
438
+
439
+ # Extract use statements
440
+ use_statements = []
441
+ for match in use_pattern.finditer(content):
442
+ use_line = match.group(0).strip()
443
+ use_statements.append(use_line)
444
+
445
+ # Find classes
446
+ for match in class_pattern.finditer(content):
447
+ class_name = match.group(1)
448
+
449
+ # Find the actual line with 'class' by looking for it in the match
450
+ match_text = match.group(0)
451
+ class_pos_in_match = match_text.find("class")
452
+ actual_class_pos = match.start() + class_pos_in_match
453
+ start_line = content[:actual_class_pos].count("\n") + 1
454
+
455
+ # Find end of class (simple heuristic)
456
+ end_line = self._find_class_end(lines, start_line)
457
+
458
+ class_content = self._get_line_range(lines, start_line, end_line)
459
+
460
+ if class_content.strip():
461
+ # Extract PHPDoc using regex
462
+ phpdoc = self._extract_phpdoc_regex(lines, start_line)
463
+
464
+ # Build fully qualified class name
465
+ full_class_name = class_name
466
+ if current_namespace:
467
+ full_class_name = f"{current_namespace}\\{class_name}"
468
+
469
+ chunk = self._create_chunk(
470
+ content=class_content,
471
+ file_path=file_path,
472
+ start_line=start_line,
473
+ end_line=end_line,
474
+ chunk_type="class",
475
+ class_name=full_class_name,
476
+ docstring=phpdoc,
477
+ )
478
+ chunk.imports = use_statements
479
+ chunks.append(chunk)
480
+
481
+ # Find interfaces
482
+ for match in interface_pattern.finditer(content):
483
+ interface_name = match.group(1)
484
+
485
+ match_text = match.group(0)
486
+ interface_pos_in_match = match_text.find("interface")
487
+ actual_interface_pos = match.start() + interface_pos_in_match
488
+ start_line = content[:actual_interface_pos].count("\n") + 1
489
+
490
+ # Find end of interface
491
+ end_line = self._find_class_end(lines, start_line)
492
+
493
+ interface_content = self._get_line_range(lines, start_line, end_line)
494
+
495
+ if interface_content.strip():
496
+ phpdoc = self._extract_phpdoc_regex(lines, start_line)
497
+
498
+ full_interface_name = interface_name
499
+ if current_namespace:
500
+ full_interface_name = f"{current_namespace}\\{interface_name}"
501
+
502
+ chunk = self._create_chunk(
503
+ content=interface_content,
504
+ file_path=file_path,
505
+ start_line=start_line,
506
+ end_line=end_line,
507
+ chunk_type="interface",
508
+ class_name=full_interface_name,
509
+ docstring=phpdoc,
510
+ )
511
+ chunk.imports = use_statements
512
+ chunks.append(chunk)
513
+
514
+ # Find traits
515
+ for match in trait_pattern.finditer(content):
516
+ trait_name = match.group(1)
517
+
518
+ match_text = match.group(0)
519
+ trait_pos_in_match = match_text.find("trait")
520
+ actual_trait_pos = match.start() + trait_pos_in_match
521
+ start_line = content[:actual_trait_pos].count("\n") + 1
522
+
523
+ # Find end of trait
524
+ end_line = self._find_class_end(lines, start_line)
525
+
526
+ trait_content = self._get_line_range(lines, start_line, end_line)
527
+
528
+ if trait_content.strip():
529
+ phpdoc = self._extract_phpdoc_regex(lines, start_line)
530
+
531
+ full_trait_name = trait_name
532
+ if current_namespace:
533
+ full_trait_name = f"{current_namespace}\\{trait_name}"
534
+
535
+ chunk = self._create_chunk(
536
+ content=trait_content,
537
+ file_path=file_path,
538
+ start_line=start_line,
539
+ end_line=end_line,
540
+ chunk_type="trait",
541
+ class_name=full_trait_name,
542
+ docstring=phpdoc,
543
+ )
544
+ chunk.imports = use_statements
545
+ chunks.append(chunk)
546
+
547
+ # Find functions (excluding methods inside classes)
548
+ class_regions = [(chunk.start_line, chunk.end_line) for chunk in chunks]
549
+
550
+ for match in function_pattern.finditer(content):
551
+ function_name = match.group(1)
552
+
553
+ # Skip magic methods and constructors
554
+ if function_name.startswith("__"):
555
+ continue
556
+
557
+ # Find the actual line
558
+ match_text = match.group(0)
559
+ func_pos_in_match = match_text.find("function")
560
+ actual_func_pos = match.start() + func_pos_in_match
561
+ start_line = content[:actual_func_pos].count("\n") + 1
562
+
563
+ # Skip if this function is inside a class
564
+ is_inside_class = any(
565
+ start <= start_line <= end for start, end in class_regions
566
+ )
567
+ if is_inside_class:
568
+ continue
569
+
570
+ # Find end of function
571
+ end_line = self._find_function_end(lines, start_line)
572
+
573
+ func_content = self._get_line_range(lines, start_line, end_line)
574
+
575
+ if func_content.strip():
576
+ # Extract PHPDoc
577
+ phpdoc = self._extract_phpdoc_regex(lines, start_line)
578
+
579
+ # Build fully qualified function name
580
+ full_function_name = function_name
581
+ if current_namespace:
582
+ full_function_name = f"{current_namespace}\\{function_name}"
583
+
584
+ chunk = self._create_chunk(
585
+ content=func_content,
586
+ file_path=file_path,
587
+ start_line=start_line,
588
+ end_line=end_line,
589
+ chunk_type="function",
590
+ function_name=full_function_name,
591
+ docstring=phpdoc,
592
+ )
593
+ chunk.imports = use_statements
594
+ chunks.append(chunk)
595
+
596
+ # If no functions or classes found, create chunks for the whole file
597
+ if not chunks:
598
+ chunks.append(
599
+ self._create_chunk(
600
+ content=content,
601
+ file_path=file_path,
602
+ start_line=1,
603
+ end_line=len(lines),
604
+ chunk_type="module",
605
+ )
606
+ )
607
+
608
+ return chunks
609
+
610
+ def _find_function_end(self, lines: list[str], start_line: int) -> int:
611
+ """Find the end line of a function using brace matching."""
612
+ if start_line > len(lines):
613
+ return len(lines)
614
+
615
+ start_idx = start_line - 1
616
+ if start_idx >= len(lines):
617
+ return len(lines)
618
+
619
+ # For PHP, we need to count braces
620
+ brace_count = 0
621
+ found_opening_brace = False
622
+
623
+ for i in range(start_idx, len(lines)):
624
+ line = lines[i]
625
+
626
+ for char in line:
627
+ if char == "{":
628
+ brace_count += 1
629
+ found_opening_brace = True
630
+ elif char == "}":
631
+ brace_count -= 1
632
+ if found_opening_brace and brace_count == 0:
633
+ return i + 1 # Return 1-based line number
634
+
635
+ return len(lines)
636
+
637
+ def _find_class_end(self, lines: list[str], start_line: int) -> int:
638
+ """Find the end line of a class using brace matching."""
639
+ return self._find_function_end(lines, start_line)
640
+
641
+ def _extract_phpdoc_regex(self, lines: list[str], start_line: int) -> str | None:
642
+ """Extract PHPDoc using regex patterns."""
643
+ # Look for /** ... */ comments before the definition
644
+ phpdoc_lines = []
645
+ in_phpdoc = False
646
+
647
+ # Check a few lines before the start_line
648
+ for i in range(max(0, start_line - 20), start_line - 1):
649
+ if i >= len(lines):
650
+ continue
651
+
652
+ line = lines[i].strip()
653
+
654
+ if line.startswith("/**"):
655
+ in_phpdoc = True
656
+ # Extract content after /**
657
+ content = line[3:].strip()
658
+ if content and content not in ("*", "*/"):
659
+ phpdoc_lines.append(content)
660
+
661
+ # Check for single-line PHPDoc
662
+ if line.endswith("*/") and len(line) > 5:
663
+ # Single line PHPDoc
664
+ content = line[3:-2].strip()
665
+ if content and content != "*":
666
+ return content
667
+ in_phpdoc = False
668
+ elif in_phpdoc and line.endswith("*/"):
669
+ # End of multi-line PHPDoc
670
+ content = line[:-2].lstrip("*").strip()
671
+ if content:
672
+ phpdoc_lines.append(content)
673
+ in_phpdoc = False
674
+ break
675
+ elif in_phpdoc:
676
+ # Inside PHPDoc - remove leading * and whitespace
677
+ content = line.lstrip("*").strip()
678
+ if content:
679
+ phpdoc_lines.append(content)
680
+ elif line and not line.startswith("//") and not in_phpdoc and phpdoc_lines:
681
+ # If we hit non-comment code after finding PHPDoc, we're done
682
+ break
683
+ elif line and not line.startswith("//") and not in_phpdoc:
684
+ # Reset if we hit code before finding PHPDoc
685
+ phpdoc_lines = []
686
+
687
+ if phpdoc_lines:
688
+ return " ".join(phpdoc_lines)
689
+
690
+ return None
691
+
692
+ def get_supported_extensions(self) -> list[str]:
693
+ """Get supported file extensions."""
694
+ return [".php", ".phtml"]