mcp-vector-search 0.15.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +10 -0
- mcp_vector_search/cli/__init__.py +1 -0
- mcp_vector_search/cli/commands/__init__.py +1 -0
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/chat.py +534 -0
- mcp_vector_search/cli/commands/config.py +393 -0
- mcp_vector_search/cli/commands/demo.py +358 -0
- mcp_vector_search/cli/commands/index.py +762 -0
- mcp_vector_search/cli/commands/init.py +658 -0
- mcp_vector_search/cli/commands/install.py +869 -0
- mcp_vector_search/cli/commands/install_old.py +700 -0
- mcp_vector_search/cli/commands/mcp.py +1254 -0
- mcp_vector_search/cli/commands/reset.py +393 -0
- mcp_vector_search/cli/commands/search.py +796 -0
- mcp_vector_search/cli/commands/setup.py +1133 -0
- mcp_vector_search/cli/commands/status.py +584 -0
- mcp_vector_search/cli/commands/uninstall.py +404 -0
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +265 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +201 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
- mcp_vector_search/cli/commands/visualize.py.original +2536 -0
- mcp_vector_search/cli/commands/watch.py +287 -0
- mcp_vector_search/cli/didyoumean.py +520 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +295 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +484 -0
- mcp_vector_search/cli/output.py +414 -0
- mcp_vector_search/cli/suggestions.py +375 -0
- mcp_vector_search/config/__init__.py +1 -0
- mcp_vector_search/config/constants.py +24 -0
- mcp_vector_search/config/defaults.py +200 -0
- mcp_vector_search/config/settings.py +146 -0
- mcp_vector_search/core/__init__.py +1 -0
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/connection_pool.py +360 -0
- mcp_vector_search/core/database.py +1237 -0
- mcp_vector_search/core/directory_index.py +318 -0
- mcp_vector_search/core/embeddings.py +294 -0
- mcp_vector_search/core/exceptions.py +89 -0
- mcp_vector_search/core/factory.py +318 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +1002 -0
- mcp_vector_search/core/llm_client.py +453 -0
- mcp_vector_search/core/models.py +294 -0
- mcp_vector_search/core/project.py +350 -0
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +952 -0
- mcp_vector_search/core/watcher.py +322 -0
- mcp_vector_search/mcp/__init__.py +5 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +752 -0
- mcp_vector_search/parsers/__init__.py +8 -0
- mcp_vector_search/parsers/base.py +296 -0
- mcp_vector_search/parsers/dart.py +605 -0
- mcp_vector_search/parsers/html.py +413 -0
- mcp_vector_search/parsers/javascript.py +643 -0
- mcp_vector_search/parsers/php.py +694 -0
- mcp_vector_search/parsers/python.py +502 -0
- mcp_vector_search/parsers/registry.py +223 -0
- mcp_vector_search/parsers/ruby.py +678 -0
- mcp_vector_search/parsers/text.py +186 -0
- mcp_vector_search/parsers/utils.py +265 -0
- mcp_vector_search/py.typed +1 -0
- mcp_vector_search/utils/__init__.py +42 -0
- mcp_vector_search/utils/gitignore.py +250 -0
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +339 -0
- mcp_vector_search/utils/timing.py +338 -0
- mcp_vector_search/utils/version.py +47 -0
- mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
- mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
- mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
- mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
- mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
"""Ruby parser using Tree-sitter for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
from ..core.models import CodeChunk
|
|
9
|
+
from .base import BaseParser
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RubyParser(BaseParser):
|
|
13
|
+
"""Ruby parser using Tree-sitter for AST-based code analysis."""
|
|
14
|
+
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
"""Initialize Ruby parser."""
|
|
17
|
+
super().__init__("ruby")
|
|
18
|
+
self._parser = None
|
|
19
|
+
self._language = None
|
|
20
|
+
self._initialize_parser()
|
|
21
|
+
|
|
22
|
+
def _initialize_parser(self) -> None:
|
|
23
|
+
"""Initialize Tree-sitter parser for Ruby."""
|
|
24
|
+
try:
|
|
25
|
+
# Try the tree-sitter-language-pack package (maintained alternative)
|
|
26
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
27
|
+
|
|
28
|
+
# Get the language and parser objects
|
|
29
|
+
self._language = get_language("ruby")
|
|
30
|
+
self._parser = get_parser("ruby")
|
|
31
|
+
|
|
32
|
+
logger.debug(
|
|
33
|
+
"Ruby Tree-sitter parser initialized via tree-sitter-language-pack"
|
|
34
|
+
)
|
|
35
|
+
return
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logger.debug(f"tree-sitter-language-pack failed: {e}")
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# Fallback to manual tree-sitter setup (requires language binaries)
|
|
41
|
+
|
|
42
|
+
# This would require language binaries to be available
|
|
43
|
+
# For now, we'll skip this and rely on fallback parsing
|
|
44
|
+
logger.debug("Manual tree-sitter setup not implemented yet")
|
|
45
|
+
self._parser = None
|
|
46
|
+
self._language = None
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.debug(f"Manual tree-sitter setup failed: {e}")
|
|
49
|
+
self._parser = None
|
|
50
|
+
self._language = None
|
|
51
|
+
|
|
52
|
+
logger.info(
|
|
53
|
+
"Using fallback regex-based parsing for Ruby (Tree-sitter unavailable)"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
57
|
+
"""Parse a Ruby file and extract code chunks."""
|
|
58
|
+
try:
|
|
59
|
+
with open(file_path, encoding="utf-8") as f:
|
|
60
|
+
content = f.read()
|
|
61
|
+
return await self.parse_content(content, file_path)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
logger.error(f"Failed to read file {file_path}: {e}")
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
67
|
+
"""Parse Ruby content and extract code chunks."""
|
|
68
|
+
if not content.strip():
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
# If Tree-sitter is not available, fall back to simple parsing
|
|
72
|
+
if not self._parser:
|
|
73
|
+
return await self._fallback_parse(content, file_path)
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
# Parse with Tree-sitter
|
|
77
|
+
tree = self._parser.parse(content.encode("utf-8"))
|
|
78
|
+
return self._extract_chunks_from_tree(tree, content, file_path)
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
|
|
81
|
+
return await self._fallback_parse(content, file_path)
|
|
82
|
+
|
|
83
|
+
def _extract_chunks_from_tree(
|
|
84
|
+
self, tree, content: str, file_path: Path
|
|
85
|
+
) -> list[CodeChunk]:
|
|
86
|
+
"""Extract code chunks from Tree-sitter AST."""
|
|
87
|
+
chunks = []
|
|
88
|
+
lines = self._split_into_lines(content)
|
|
89
|
+
|
|
90
|
+
def visit_node(node, current_class=None, current_module=None):
|
|
91
|
+
"""Recursively visit AST nodes."""
|
|
92
|
+
node_type = node.type
|
|
93
|
+
|
|
94
|
+
if node_type == "method":
|
|
95
|
+
chunks.extend(
|
|
96
|
+
self._extract_method(
|
|
97
|
+
node, lines, file_path, current_class, current_module
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
elif node_type == "singleton_method":
|
|
101
|
+
chunks.extend(
|
|
102
|
+
self._extract_class_method(
|
|
103
|
+
node, lines, file_path, current_class, current_module
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
elif node_type == "class":
|
|
107
|
+
class_chunks = self._extract_class(
|
|
108
|
+
node, lines, file_path, current_module
|
|
109
|
+
)
|
|
110
|
+
chunks.extend(class_chunks)
|
|
111
|
+
|
|
112
|
+
# Visit class methods with class context
|
|
113
|
+
class_name = self._get_node_name(node)
|
|
114
|
+
for child in node.children:
|
|
115
|
+
visit_node(child, class_name, current_module)
|
|
116
|
+
elif node_type == "module":
|
|
117
|
+
module_chunks = self._extract_module(node, lines, file_path)
|
|
118
|
+
chunks.extend(module_chunks)
|
|
119
|
+
|
|
120
|
+
# Visit module contents
|
|
121
|
+
module_name = self._get_node_name(node)
|
|
122
|
+
for child in node.children:
|
|
123
|
+
visit_node(child, current_class, module_name)
|
|
124
|
+
elif node_type == "program":
|
|
125
|
+
# Extract module-level code
|
|
126
|
+
module_chunk = self._extract_module_level_chunk(node, lines, file_path)
|
|
127
|
+
if module_chunk:
|
|
128
|
+
chunks.append(module_chunk)
|
|
129
|
+
|
|
130
|
+
# Visit all children
|
|
131
|
+
for child in node.children:
|
|
132
|
+
visit_node(child)
|
|
133
|
+
else:
|
|
134
|
+
# Visit children for other node types
|
|
135
|
+
for child in node.children:
|
|
136
|
+
visit_node(child, current_class, current_module)
|
|
137
|
+
|
|
138
|
+
# Start traversal from root
|
|
139
|
+
visit_node(tree.root_node)
|
|
140
|
+
|
|
141
|
+
# If no specific chunks found, create a single chunk for the whole file
|
|
142
|
+
if not chunks:
|
|
143
|
+
chunks.append(
|
|
144
|
+
self._create_chunk(
|
|
145
|
+
content=content,
|
|
146
|
+
file_path=file_path,
|
|
147
|
+
start_line=1,
|
|
148
|
+
end_line=len(lines),
|
|
149
|
+
chunk_type="module",
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return chunks
|
|
154
|
+
|
|
155
|
+
def _extract_method(
|
|
156
|
+
self,
|
|
157
|
+
node,
|
|
158
|
+
lines: list[str],
|
|
159
|
+
file_path: Path,
|
|
160
|
+
class_name: str | None = None,
|
|
161
|
+
module_name: str | None = None,
|
|
162
|
+
) -> list[CodeChunk]:
|
|
163
|
+
"""Extract instance method definition as a chunk."""
|
|
164
|
+
chunks = []
|
|
165
|
+
|
|
166
|
+
method_name = self._get_node_name(node)
|
|
167
|
+
start_line = node.start_point[0] + 1
|
|
168
|
+
end_line = node.end_point[0] + 1
|
|
169
|
+
|
|
170
|
+
# Get method content
|
|
171
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
172
|
+
|
|
173
|
+
# Extract RDoc if present
|
|
174
|
+
rdoc = self._extract_rdoc(node, lines)
|
|
175
|
+
|
|
176
|
+
# Build full qualified name
|
|
177
|
+
full_class_name = self._build_qualified_name(module_name, class_name)
|
|
178
|
+
|
|
179
|
+
chunk = self._create_chunk(
|
|
180
|
+
content=content,
|
|
181
|
+
file_path=file_path,
|
|
182
|
+
start_line=start_line,
|
|
183
|
+
end_line=end_line,
|
|
184
|
+
chunk_type="method",
|
|
185
|
+
function_name=method_name,
|
|
186
|
+
class_name=full_class_name,
|
|
187
|
+
docstring=rdoc,
|
|
188
|
+
)
|
|
189
|
+
chunks.append(chunk)
|
|
190
|
+
|
|
191
|
+
return chunks
|
|
192
|
+
|
|
193
|
+
def _extract_class_method(
|
|
194
|
+
self,
|
|
195
|
+
node,
|
|
196
|
+
lines: list[str],
|
|
197
|
+
file_path: Path,
|
|
198
|
+
class_name: str | None = None,
|
|
199
|
+
module_name: str | None = None,
|
|
200
|
+
) -> list[CodeChunk]:
|
|
201
|
+
"""Extract class method (singleton method) as a chunk."""
|
|
202
|
+
chunks = []
|
|
203
|
+
|
|
204
|
+
method_name = self._get_node_name(node)
|
|
205
|
+
start_line = node.start_point[0] + 1
|
|
206
|
+
end_line = node.end_point[0] + 1
|
|
207
|
+
|
|
208
|
+
# Get method content
|
|
209
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
210
|
+
|
|
211
|
+
# Extract RDoc if present
|
|
212
|
+
rdoc = self._extract_rdoc(node, lines)
|
|
213
|
+
|
|
214
|
+
# Build full qualified name
|
|
215
|
+
full_class_name = self._build_qualified_name(module_name, class_name)
|
|
216
|
+
|
|
217
|
+
chunk = self._create_chunk(
|
|
218
|
+
content=content,
|
|
219
|
+
file_path=file_path,
|
|
220
|
+
start_line=start_line,
|
|
221
|
+
end_line=end_line,
|
|
222
|
+
chunk_type="class_method",
|
|
223
|
+
function_name=f"self.{method_name}",
|
|
224
|
+
class_name=full_class_name,
|
|
225
|
+
docstring=rdoc,
|
|
226
|
+
)
|
|
227
|
+
chunks.append(chunk)
|
|
228
|
+
|
|
229
|
+
return chunks
|
|
230
|
+
|
|
231
|
+
def _extract_class(
|
|
232
|
+
self, node, lines: list[str], file_path: Path, module_name: str | None = None
|
|
233
|
+
) -> list[CodeChunk]:
|
|
234
|
+
"""Extract class definition as a chunk."""
|
|
235
|
+
chunks = []
|
|
236
|
+
|
|
237
|
+
class_name = self._get_node_name(node)
|
|
238
|
+
start_line = node.start_point[0] + 1
|
|
239
|
+
end_line = node.end_point[0] + 1
|
|
240
|
+
|
|
241
|
+
# Get class content
|
|
242
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
243
|
+
|
|
244
|
+
# Extract RDoc if present
|
|
245
|
+
rdoc = self._extract_rdoc(node, lines)
|
|
246
|
+
|
|
247
|
+
# Build full qualified name
|
|
248
|
+
full_class_name = self._build_qualified_name(module_name, class_name)
|
|
249
|
+
|
|
250
|
+
chunk = self._create_chunk(
|
|
251
|
+
content=content,
|
|
252
|
+
file_path=file_path,
|
|
253
|
+
start_line=start_line,
|
|
254
|
+
end_line=end_line,
|
|
255
|
+
chunk_type="class",
|
|
256
|
+
class_name=full_class_name,
|
|
257
|
+
docstring=rdoc,
|
|
258
|
+
)
|
|
259
|
+
chunks.append(chunk)
|
|
260
|
+
|
|
261
|
+
return chunks
|
|
262
|
+
|
|
263
|
+
def _extract_module(
|
|
264
|
+
self, node, lines: list[str], file_path: Path
|
|
265
|
+
) -> list[CodeChunk]:
|
|
266
|
+
"""Extract module definition as a chunk."""
|
|
267
|
+
chunks = []
|
|
268
|
+
|
|
269
|
+
module_name = self._get_node_name(node)
|
|
270
|
+
start_line = node.start_point[0] + 1
|
|
271
|
+
end_line = node.end_point[0] + 1
|
|
272
|
+
|
|
273
|
+
# Get module content
|
|
274
|
+
content = self._get_line_range(lines, start_line, end_line)
|
|
275
|
+
|
|
276
|
+
# Extract RDoc if present
|
|
277
|
+
rdoc = self._extract_rdoc(node, lines)
|
|
278
|
+
|
|
279
|
+
chunk = self._create_chunk(
|
|
280
|
+
content=content,
|
|
281
|
+
file_path=file_path,
|
|
282
|
+
start_line=start_line,
|
|
283
|
+
end_line=end_line,
|
|
284
|
+
chunk_type="module",
|
|
285
|
+
class_name=module_name,
|
|
286
|
+
docstring=rdoc,
|
|
287
|
+
)
|
|
288
|
+
chunks.append(chunk)
|
|
289
|
+
|
|
290
|
+
return chunks
|
|
291
|
+
|
|
292
|
+
def _extract_module_level_chunk(
|
|
293
|
+
self, node, lines: list[str], file_path: Path
|
|
294
|
+
) -> CodeChunk | None:
|
|
295
|
+
"""Extract module-level code (requires, constants, etc.)."""
|
|
296
|
+
# Look for module-level statements (not inside functions/classes)
|
|
297
|
+
module_lines = []
|
|
298
|
+
|
|
299
|
+
for child in node.children:
|
|
300
|
+
if child.type in ["call"]:
|
|
301
|
+
# Check if it's a require/require_relative
|
|
302
|
+
child_text = child.text.decode("utf-8")
|
|
303
|
+
if child_text.startswith("require") or "require_relative" in child_text:
|
|
304
|
+
start_line = child.start_point[0] + 1
|
|
305
|
+
end_line = child.end_point[0] + 1
|
|
306
|
+
require_content = self._get_line_range(lines, start_line, end_line)
|
|
307
|
+
module_lines.append(require_content.strip())
|
|
308
|
+
|
|
309
|
+
if module_lines:
|
|
310
|
+
content = "\n".join(module_lines)
|
|
311
|
+
return self._create_chunk(
|
|
312
|
+
content=content,
|
|
313
|
+
file_path=file_path,
|
|
314
|
+
start_line=1,
|
|
315
|
+
end_line=len(module_lines),
|
|
316
|
+
chunk_type="requires",
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
def _get_node_name(self, node) -> str | None:
|
|
322
|
+
"""Extract name from a named node (method, class, module, etc.)."""
|
|
323
|
+
for child in node.children:
|
|
324
|
+
if child.type in [
|
|
325
|
+
"identifier",
|
|
326
|
+
"constant",
|
|
327
|
+
"instance_variable",
|
|
328
|
+
"class_variable",
|
|
329
|
+
]:
|
|
330
|
+
return child.text.decode("utf-8")
|
|
331
|
+
return None
|
|
332
|
+
|
|
333
|
+
def _extract_rdoc(self, node, lines: list[str]) -> str | None:
|
|
334
|
+
"""Extract RDoc from a method or class node."""
|
|
335
|
+
# Look for comment nodes before the definition
|
|
336
|
+
start_line = node.start_point[0]
|
|
337
|
+
|
|
338
|
+
# Check a few lines before the node for # comments
|
|
339
|
+
rdoc_lines = []
|
|
340
|
+
for i in range(max(0, start_line - 15), start_line):
|
|
341
|
+
line = lines[i].strip()
|
|
342
|
+
if line.startswith("#"):
|
|
343
|
+
# Remove # and strip whitespace
|
|
344
|
+
rdoc_lines.append(line[1:].strip())
|
|
345
|
+
elif line and not rdoc_lines:
|
|
346
|
+
# Reset if we hit non-comment code before finding rdoc
|
|
347
|
+
continue
|
|
348
|
+
elif line and rdoc_lines:
|
|
349
|
+
# Stop if we hit non-comment code after finding rdoc
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
if rdoc_lines:
|
|
353
|
+
return " ".join(rdoc_lines)
|
|
354
|
+
|
|
355
|
+
# Check for =begin...=end block comments
|
|
356
|
+
for i in range(max(0, start_line - 20), start_line):
|
|
357
|
+
line = lines[i].strip()
|
|
358
|
+
if line == "=begin":
|
|
359
|
+
# Found start of block comment
|
|
360
|
+
block_lines = []
|
|
361
|
+
for j in range(i + 1, min(len(lines), start_line)):
|
|
362
|
+
block_line = lines[j].strip()
|
|
363
|
+
if block_line == "=end":
|
|
364
|
+
break
|
|
365
|
+
block_lines.append(block_line)
|
|
366
|
+
if block_lines:
|
|
367
|
+
return " ".join(block_lines)
|
|
368
|
+
|
|
369
|
+
return None
|
|
370
|
+
|
|
371
|
+
def _build_qualified_name(
|
|
372
|
+
self, module_name: str | None, class_name: str | None
|
|
373
|
+
) -> str | None:
|
|
374
|
+
"""Build a fully qualified name from module and class names."""
|
|
375
|
+
if module_name and class_name:
|
|
376
|
+
return f"{module_name}::{class_name}"
|
|
377
|
+
return class_name or module_name
|
|
378
|
+
|
|
379
|
+
async def _fallback_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
380
|
+
"""Fallback parsing using regex when Tree-sitter is not available."""
|
|
381
|
+
chunks = []
|
|
382
|
+
lines = self._split_into_lines(content)
|
|
383
|
+
|
|
384
|
+
# Enhanced regex patterns for Ruby
|
|
385
|
+
module_pattern = re.compile(r"^\s*module\s+(\w+(?:::\w+)*)", re.MULTILINE)
|
|
386
|
+
class_pattern = re.compile(
|
|
387
|
+
r"^\s*class\s+(\w+)(?:\s+<\s+(\w+(?:::\w+)*))?", re.MULTILINE
|
|
388
|
+
)
|
|
389
|
+
method_pattern = re.compile(r"^\s*def\s+(self\.)?(\w+[?!]?)", re.MULTILINE)
|
|
390
|
+
attr_pattern = re.compile(
|
|
391
|
+
r"^\s*attr_(accessor|reader|writer)\s+:(\w+)(?:\s*,\s*:(\w+))*",
|
|
392
|
+
re.MULTILINE,
|
|
393
|
+
)
|
|
394
|
+
require_pattern = re.compile(
|
|
395
|
+
r"^\s*(require|require_relative)\s+['\"](.+?)['\"]", re.MULTILINE
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# Extract requires first
|
|
399
|
+
requires = []
|
|
400
|
+
for match in require_pattern.finditer(content):
|
|
401
|
+
require_line = match.group(0).strip()
|
|
402
|
+
requires.append(require_line)
|
|
403
|
+
|
|
404
|
+
# Find modules
|
|
405
|
+
modules = {}
|
|
406
|
+
for match in module_pattern.finditer(content):
|
|
407
|
+
module_name = match.group(1)
|
|
408
|
+
match_text = match.group(0)
|
|
409
|
+
module_pos_in_match = match_text.find("module")
|
|
410
|
+
actual_module_pos = match.start() + module_pos_in_match
|
|
411
|
+
start_line = content[:actual_module_pos].count("\n") + 1
|
|
412
|
+
|
|
413
|
+
# Find end of module
|
|
414
|
+
end_line = self._find_block_end(lines, start_line)
|
|
415
|
+
|
|
416
|
+
module_content = self._get_line_range(lines, start_line, end_line)
|
|
417
|
+
|
|
418
|
+
if module_content.strip():
|
|
419
|
+
# Extract RDoc using regex
|
|
420
|
+
rdoc = self._extract_rdoc_regex(lines, start_line)
|
|
421
|
+
|
|
422
|
+
chunk = self._create_chunk(
|
|
423
|
+
content=module_content,
|
|
424
|
+
file_path=file_path,
|
|
425
|
+
start_line=start_line,
|
|
426
|
+
end_line=end_line,
|
|
427
|
+
chunk_type="module",
|
|
428
|
+
class_name=module_name,
|
|
429
|
+
docstring=rdoc,
|
|
430
|
+
)
|
|
431
|
+
chunks.append(chunk)
|
|
432
|
+
modules[module_name] = (start_line, end_line)
|
|
433
|
+
|
|
434
|
+
# Find classes
|
|
435
|
+
for match in class_pattern.finditer(content):
|
|
436
|
+
class_name = match.group(1)
|
|
437
|
+
# superclass = match.group(2) # Could be used for inheritance info
|
|
438
|
+
|
|
439
|
+
match_text = match.group(0)
|
|
440
|
+
class_pos_in_match = match_text.find("class")
|
|
441
|
+
actual_class_pos = match.start() + class_pos_in_match
|
|
442
|
+
start_line = content[:actual_class_pos].count("\n") + 1
|
|
443
|
+
|
|
444
|
+
# Find end of class
|
|
445
|
+
end_line = self._find_block_end(lines, start_line)
|
|
446
|
+
|
|
447
|
+
class_content = self._get_line_range(lines, start_line, end_line)
|
|
448
|
+
|
|
449
|
+
if class_content.strip():
|
|
450
|
+
# Extract RDoc
|
|
451
|
+
rdoc = self._extract_rdoc_regex(lines, start_line)
|
|
452
|
+
|
|
453
|
+
# Determine if class is inside a module
|
|
454
|
+
module_name = self._find_containing_module(start_line, modules)
|
|
455
|
+
full_class_name = self._build_qualified_name(module_name, class_name)
|
|
456
|
+
|
|
457
|
+
chunk = self._create_chunk(
|
|
458
|
+
content=class_content,
|
|
459
|
+
file_path=file_path,
|
|
460
|
+
start_line=start_line,
|
|
461
|
+
end_line=end_line,
|
|
462
|
+
chunk_type="class",
|
|
463
|
+
class_name=full_class_name,
|
|
464
|
+
docstring=rdoc,
|
|
465
|
+
)
|
|
466
|
+
chunks.append(chunk)
|
|
467
|
+
|
|
468
|
+
# Find methods
|
|
469
|
+
classes_and_modules = {}
|
|
470
|
+
for chunk in chunks:
|
|
471
|
+
if chunk.class_name:
|
|
472
|
+
classes_and_modules[chunk.class_name] = (
|
|
473
|
+
chunk.start_line,
|
|
474
|
+
chunk.end_line,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
for match in method_pattern.finditer(content):
|
|
478
|
+
is_class_method = match.group(1) is not None
|
|
479
|
+
method_name = match.group(2)
|
|
480
|
+
|
|
481
|
+
match_text = match.group(0)
|
|
482
|
+
def_pos_in_match = match_text.find("def")
|
|
483
|
+
actual_def_pos = match.start() + def_pos_in_match
|
|
484
|
+
start_line = content[:actual_def_pos].count("\n") + 1
|
|
485
|
+
|
|
486
|
+
# Find end of method
|
|
487
|
+
end_line = self._find_method_end(lines, start_line)
|
|
488
|
+
|
|
489
|
+
method_content = self._get_line_range(lines, start_line, end_line)
|
|
490
|
+
|
|
491
|
+
if method_content.strip():
|
|
492
|
+
# Extract RDoc
|
|
493
|
+
rdoc = self._extract_rdoc_regex(lines, start_line)
|
|
494
|
+
|
|
495
|
+
# Find containing class/module
|
|
496
|
+
containing_class = self._find_containing_class(
|
|
497
|
+
start_line, classes_and_modules
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
# Format method name
|
|
501
|
+
if is_class_method:
|
|
502
|
+
method_name = f"self.{method_name}"
|
|
503
|
+
|
|
504
|
+
chunk = self._create_chunk(
|
|
505
|
+
content=method_content,
|
|
506
|
+
file_path=file_path,
|
|
507
|
+
start_line=start_line,
|
|
508
|
+
end_line=end_line,
|
|
509
|
+
chunk_type="class_method" if is_class_method else "method",
|
|
510
|
+
function_name=method_name,
|
|
511
|
+
class_name=containing_class,
|
|
512
|
+
docstring=rdoc,
|
|
513
|
+
)
|
|
514
|
+
chunks.append(chunk)
|
|
515
|
+
|
|
516
|
+
# Find attr_accessor/reader/writer
|
|
517
|
+
for match in attr_pattern.finditer(content):
|
|
518
|
+
attr_type = match.group(1)
|
|
519
|
+
attr_name = match.group(2)
|
|
520
|
+
|
|
521
|
+
match_text = match.group(0)
|
|
522
|
+
start_line = content[: match.start()].count("\n") + 1
|
|
523
|
+
end_line = start_line
|
|
524
|
+
|
|
525
|
+
attr_content = match.group(0)
|
|
526
|
+
|
|
527
|
+
# Find containing class/module
|
|
528
|
+
containing_class = self._find_containing_class(
|
|
529
|
+
start_line, classes_and_modules
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
chunk = self._create_chunk(
|
|
533
|
+
content=attr_content,
|
|
534
|
+
file_path=file_path,
|
|
535
|
+
start_line=start_line,
|
|
536
|
+
end_line=end_line,
|
|
537
|
+
chunk_type="attribute",
|
|
538
|
+
function_name=f"attr_{attr_type} :{attr_name}",
|
|
539
|
+
class_name=containing_class,
|
|
540
|
+
)
|
|
541
|
+
chunks.append(chunk)
|
|
542
|
+
|
|
543
|
+
# If no functions or classes found, create chunks for the whole file
|
|
544
|
+
if not chunks:
|
|
545
|
+
chunks.append(
|
|
546
|
+
self._create_chunk(
|
|
547
|
+
content=content,
|
|
548
|
+
file_path=file_path,
|
|
549
|
+
start_line=1,
|
|
550
|
+
end_line=len(lines),
|
|
551
|
+
chunk_type="module",
|
|
552
|
+
)
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
return chunks
|
|
556
|
+
|
|
557
|
+
def _find_block_end(self, lines: list[str], start_line: int) -> int:
|
|
558
|
+
"""Find the end line of a block (module/class) using 'end' keyword matching."""
|
|
559
|
+
if start_line > len(lines):
|
|
560
|
+
return len(lines)
|
|
561
|
+
|
|
562
|
+
start_idx = start_line - 1
|
|
563
|
+
if start_idx >= len(lines):
|
|
564
|
+
return len(lines)
|
|
565
|
+
|
|
566
|
+
# Count nested blocks
|
|
567
|
+
block_count = 0
|
|
568
|
+
keywords_start = [
|
|
569
|
+
"module",
|
|
570
|
+
"class",
|
|
571
|
+
"def",
|
|
572
|
+
"do",
|
|
573
|
+
"begin",
|
|
574
|
+
"case",
|
|
575
|
+
"if",
|
|
576
|
+
"unless",
|
|
577
|
+
"while",
|
|
578
|
+
"until",
|
|
579
|
+
"for",
|
|
580
|
+
]
|
|
581
|
+
|
|
582
|
+
for i in range(start_idx, len(lines)):
|
|
583
|
+
line = lines[i].strip()
|
|
584
|
+
|
|
585
|
+
# Skip comments and empty lines
|
|
586
|
+
if not line or line.startswith("#"):
|
|
587
|
+
continue
|
|
588
|
+
|
|
589
|
+
# Check for block-starting keywords
|
|
590
|
+
for keyword in keywords_start:
|
|
591
|
+
# Use word boundaries to avoid matching substrings
|
|
592
|
+
if re.search(rf"\b{keyword}\b", line):
|
|
593
|
+
block_count += 1
|
|
594
|
+
break
|
|
595
|
+
|
|
596
|
+
# Check for 'end' keyword
|
|
597
|
+
if re.search(r"\bend\b", line):
|
|
598
|
+
block_count -= 1
|
|
599
|
+
if block_count == 0:
|
|
600
|
+
return i + 1 # Return 1-based line number
|
|
601
|
+
|
|
602
|
+
return len(lines)
|
|
603
|
+
|
|
604
|
+
def _find_method_end(self, lines: list[str], start_line: int) -> int:
|
|
605
|
+
"""Find the end line of a method using 'end' keyword matching."""
|
|
606
|
+
return self._find_block_end(lines, start_line)
|
|
607
|
+
|
|
608
|
+
def _find_containing_module(
|
|
609
|
+
self, line_number: int, modules: dict[str, tuple[int, int]]
|
|
610
|
+
) -> str | None:
|
|
611
|
+
"""Find the module containing a given line number."""
|
|
612
|
+
for module_name, (start, end) in modules.items():
|
|
613
|
+
if start < line_number < end:
|
|
614
|
+
return module_name
|
|
615
|
+
return None
|
|
616
|
+
|
|
617
|
+
def _find_containing_class(
|
|
618
|
+
self, line_number: int, classes_and_modules: dict[str, tuple[int, int]]
|
|
619
|
+
) -> str | None:
|
|
620
|
+
"""Find the class/module containing a given line number."""
|
|
621
|
+
# Find the most specific (innermost) containing class
|
|
622
|
+
containing = None
|
|
623
|
+
smallest_range = float("inf")
|
|
624
|
+
|
|
625
|
+
for name, (start, end) in classes_and_modules.items():
|
|
626
|
+
if start < line_number < end:
|
|
627
|
+
range_size = end - start
|
|
628
|
+
if range_size < smallest_range:
|
|
629
|
+
smallest_range = range_size
|
|
630
|
+
containing = name
|
|
631
|
+
|
|
632
|
+
return containing
|
|
633
|
+
|
|
634
|
+
def _extract_rdoc_regex(self, lines: list[str], start_line: int) -> str | None:
|
|
635
|
+
"""Extract RDoc using regex patterns."""
|
|
636
|
+
# Look for # comments before the definition
|
|
637
|
+
rdoc_lines = []
|
|
638
|
+
|
|
639
|
+
# Check lines before the start_line
|
|
640
|
+
for i in range(max(0, start_line - 15), start_line - 1):
|
|
641
|
+
if i >= len(lines):
|
|
642
|
+
continue
|
|
643
|
+
|
|
644
|
+
line = lines[i].strip()
|
|
645
|
+
if line.startswith("#"):
|
|
646
|
+
rdoc_lines.append(line[1:].strip())
|
|
647
|
+
elif line and rdoc_lines:
|
|
648
|
+
# If we hit non-comment code after finding rdoc, stop
|
|
649
|
+
break
|
|
650
|
+
elif line and not rdoc_lines:
|
|
651
|
+
# Reset if we hit code before finding rdoc
|
|
652
|
+
rdoc_lines = []
|
|
653
|
+
|
|
654
|
+
if rdoc_lines:
|
|
655
|
+
return " ".join(rdoc_lines)
|
|
656
|
+
|
|
657
|
+
# Check for =begin...=end block comments
|
|
658
|
+
for i in range(max(0, start_line - 20), start_line - 1):
|
|
659
|
+
if i >= len(lines):
|
|
660
|
+
continue
|
|
661
|
+
|
|
662
|
+
line = lines[i].strip()
|
|
663
|
+
if line == "=begin":
|
|
664
|
+
# Found start of block comment
|
|
665
|
+
block_lines = []
|
|
666
|
+
for j in range(i + 1, min(len(lines), start_line - 1)):
|
|
667
|
+
block_line = lines[j].strip()
|
|
668
|
+
if block_line == "=end":
|
|
669
|
+
break
|
|
670
|
+
block_lines.append(block_line)
|
|
671
|
+
if block_lines:
|
|
672
|
+
return " ".join(block_lines)
|
|
673
|
+
|
|
674
|
+
return None
|
|
675
|
+
|
|
676
|
+
def get_supported_extensions(self) -> list[str]:
|
|
677
|
+
"""Get supported file extensions."""
|
|
678
|
+
return [".rb", ".rake", ".gemspec"]
|