mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/chat.py +534 -0
  6. mcp_vector_search/cli/commands/config.py +393 -0
  7. mcp_vector_search/cli/commands/demo.py +358 -0
  8. mcp_vector_search/cli/commands/index.py +762 -0
  9. mcp_vector_search/cli/commands/init.py +658 -0
  10. mcp_vector_search/cli/commands/install.py +869 -0
  11. mcp_vector_search/cli/commands/install_old.py +700 -0
  12. mcp_vector_search/cli/commands/mcp.py +1254 -0
  13. mcp_vector_search/cli/commands/reset.py +393 -0
  14. mcp_vector_search/cli/commands/search.py +796 -0
  15. mcp_vector_search/cli/commands/setup.py +1133 -0
  16. mcp_vector_search/cli/commands/status.py +584 -0
  17. mcp_vector_search/cli/commands/uninstall.py +404 -0
  18. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  19. mcp_vector_search/cli/commands/visualize/cli.py +265 -0
  20. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  21. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  22. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  23. mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
  24. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  25. mcp_vector_search/cli/commands/visualize/server.py +201 -0
  26. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  27. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  28. mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
  29. mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
  30. mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
  31. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  32. mcp_vector_search/cli/commands/watch.py +287 -0
  33. mcp_vector_search/cli/didyoumean.py +520 -0
  34. mcp_vector_search/cli/export.py +320 -0
  35. mcp_vector_search/cli/history.py +295 -0
  36. mcp_vector_search/cli/interactive.py +342 -0
  37. mcp_vector_search/cli/main.py +484 -0
  38. mcp_vector_search/cli/output.py +414 -0
  39. mcp_vector_search/cli/suggestions.py +375 -0
  40. mcp_vector_search/config/__init__.py +1 -0
  41. mcp_vector_search/config/constants.py +24 -0
  42. mcp_vector_search/config/defaults.py +200 -0
  43. mcp_vector_search/config/settings.py +146 -0
  44. mcp_vector_search/core/__init__.py +1 -0
  45. mcp_vector_search/core/auto_indexer.py +298 -0
  46. mcp_vector_search/core/config_utils.py +394 -0
  47. mcp_vector_search/core/connection_pool.py +360 -0
  48. mcp_vector_search/core/database.py +1237 -0
  49. mcp_vector_search/core/directory_index.py +318 -0
  50. mcp_vector_search/core/embeddings.py +294 -0
  51. mcp_vector_search/core/exceptions.py +89 -0
  52. mcp_vector_search/core/factory.py +318 -0
  53. mcp_vector_search/core/git_hooks.py +345 -0
  54. mcp_vector_search/core/indexer.py +1002 -0
  55. mcp_vector_search/core/llm_client.py +453 -0
  56. mcp_vector_search/core/models.py +294 -0
  57. mcp_vector_search/core/project.py +350 -0
  58. mcp_vector_search/core/scheduler.py +330 -0
  59. mcp_vector_search/core/search.py +952 -0
  60. mcp_vector_search/core/watcher.py +322 -0
  61. mcp_vector_search/mcp/__init__.py +5 -0
  62. mcp_vector_search/mcp/__main__.py +25 -0
  63. mcp_vector_search/mcp/server.py +752 -0
  64. mcp_vector_search/parsers/__init__.py +8 -0
  65. mcp_vector_search/parsers/base.py +296 -0
  66. mcp_vector_search/parsers/dart.py +605 -0
  67. mcp_vector_search/parsers/html.py +413 -0
  68. mcp_vector_search/parsers/javascript.py +643 -0
  69. mcp_vector_search/parsers/php.py +694 -0
  70. mcp_vector_search/parsers/python.py +502 -0
  71. mcp_vector_search/parsers/registry.py +223 -0
  72. mcp_vector_search/parsers/ruby.py +678 -0
  73. mcp_vector_search/parsers/text.py +186 -0
  74. mcp_vector_search/parsers/utils.py +265 -0
  75. mcp_vector_search/py.typed +1 -0
  76. mcp_vector_search/utils/__init__.py +42 -0
  77. mcp_vector_search/utils/gitignore.py +250 -0
  78. mcp_vector_search/utils/gitignore_updater.py +212 -0
  79. mcp_vector_search/utils/monorepo.py +339 -0
  80. mcp_vector_search/utils/timing.py +338 -0
  81. mcp_vector_search/utils/version.py +47 -0
  82. mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
  83. mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
  84. mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
  85. mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
  86. mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,678 @@
1
+ """Ruby parser using Tree-sitter for MCP Vector Search."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from loguru import logger
7
+
8
+ from ..core.models import CodeChunk
9
+ from .base import BaseParser
10
+
11
+
12
+ class RubyParser(BaseParser):
13
+ """Ruby parser using Tree-sitter for AST-based code analysis."""
14
+
15
+ def __init__(self) -> None:
16
+ """Initialize Ruby parser."""
17
+ super().__init__("ruby")
18
+ self._parser = None
19
+ self._language = None
20
+ self._initialize_parser()
21
+
22
+ def _initialize_parser(self) -> None:
23
+ """Initialize Tree-sitter parser for Ruby."""
24
+ try:
25
+ # Try the tree-sitter-language-pack package (maintained alternative)
26
+ from tree_sitter_language_pack import get_language, get_parser
27
+
28
+ # Get the language and parser objects
29
+ self._language = get_language("ruby")
30
+ self._parser = get_parser("ruby")
31
+
32
+ logger.debug(
33
+ "Ruby Tree-sitter parser initialized via tree-sitter-language-pack"
34
+ )
35
+ return
36
+ except Exception as e:
37
+ logger.debug(f"tree-sitter-language-pack failed: {e}")
38
+
39
+ try:
40
+ # Fallback to manual tree-sitter setup (requires language binaries)
41
+
42
+ # This would require language binaries to be available
43
+ # For now, we'll skip this and rely on fallback parsing
44
+ logger.debug("Manual tree-sitter setup not implemented yet")
45
+ self._parser = None
46
+ self._language = None
47
+ except Exception as e:
48
+ logger.debug(f"Manual tree-sitter setup failed: {e}")
49
+ self._parser = None
50
+ self._language = None
51
+
52
+ logger.info(
53
+ "Using fallback regex-based parsing for Ruby (Tree-sitter unavailable)"
54
+ )
55
+
56
+ async def parse_file(self, file_path: Path) -> list[CodeChunk]:
57
+ """Parse a Ruby file and extract code chunks."""
58
+ try:
59
+ with open(file_path, encoding="utf-8") as f:
60
+ content = f.read()
61
+ return await self.parse_content(content, file_path)
62
+ except Exception as e:
63
+ logger.error(f"Failed to read file {file_path}: {e}")
64
+ return []
65
+
66
+ async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
67
+ """Parse Ruby content and extract code chunks."""
68
+ if not content.strip():
69
+ return []
70
+
71
+ # If Tree-sitter is not available, fall back to simple parsing
72
+ if not self._parser:
73
+ return await self._fallback_parse(content, file_path)
74
+
75
+ try:
76
+ # Parse with Tree-sitter
77
+ tree = self._parser.parse(content.encode("utf-8"))
78
+ return self._extract_chunks_from_tree(tree, content, file_path)
79
+ except Exception as e:
80
+ logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
81
+ return await self._fallback_parse(content, file_path)
82
+
83
+ def _extract_chunks_from_tree(
84
+ self, tree, content: str, file_path: Path
85
+ ) -> list[CodeChunk]:
86
+ """Extract code chunks from Tree-sitter AST."""
87
+ chunks = []
88
+ lines = self._split_into_lines(content)
89
+
90
+ def visit_node(node, current_class=None, current_module=None):
91
+ """Recursively visit AST nodes."""
92
+ node_type = node.type
93
+
94
+ if node_type == "method":
95
+ chunks.extend(
96
+ self._extract_method(
97
+ node, lines, file_path, current_class, current_module
98
+ )
99
+ )
100
+ elif node_type == "singleton_method":
101
+ chunks.extend(
102
+ self._extract_class_method(
103
+ node, lines, file_path, current_class, current_module
104
+ )
105
+ )
106
+ elif node_type == "class":
107
+ class_chunks = self._extract_class(
108
+ node, lines, file_path, current_module
109
+ )
110
+ chunks.extend(class_chunks)
111
+
112
+ # Visit class methods with class context
113
+ class_name = self._get_node_name(node)
114
+ for child in node.children:
115
+ visit_node(child, class_name, current_module)
116
+ elif node_type == "module":
117
+ module_chunks = self._extract_module(node, lines, file_path)
118
+ chunks.extend(module_chunks)
119
+
120
+ # Visit module contents
121
+ module_name = self._get_node_name(node)
122
+ for child in node.children:
123
+ visit_node(child, current_class, module_name)
124
+ elif node_type == "program":
125
+ # Extract module-level code
126
+ module_chunk = self._extract_module_level_chunk(node, lines, file_path)
127
+ if module_chunk:
128
+ chunks.append(module_chunk)
129
+
130
+ # Visit all children
131
+ for child in node.children:
132
+ visit_node(child)
133
+ else:
134
+ # Visit children for other node types
135
+ for child in node.children:
136
+ visit_node(child, current_class, current_module)
137
+
138
+ # Start traversal from root
139
+ visit_node(tree.root_node)
140
+
141
+ # If no specific chunks found, create a single chunk for the whole file
142
+ if not chunks:
143
+ chunks.append(
144
+ self._create_chunk(
145
+ content=content,
146
+ file_path=file_path,
147
+ start_line=1,
148
+ end_line=len(lines),
149
+ chunk_type="module",
150
+ )
151
+ )
152
+
153
+ return chunks
154
+
155
+ def _extract_method(
156
+ self,
157
+ node,
158
+ lines: list[str],
159
+ file_path: Path,
160
+ class_name: str | None = None,
161
+ module_name: str | None = None,
162
+ ) -> list[CodeChunk]:
163
+ """Extract instance method definition as a chunk."""
164
+ chunks = []
165
+
166
+ method_name = self._get_node_name(node)
167
+ start_line = node.start_point[0] + 1
168
+ end_line = node.end_point[0] + 1
169
+
170
+ # Get method content
171
+ content = self._get_line_range(lines, start_line, end_line)
172
+
173
+ # Extract RDoc if present
174
+ rdoc = self._extract_rdoc(node, lines)
175
+
176
+ # Build full qualified name
177
+ full_class_name = self._build_qualified_name(module_name, class_name)
178
+
179
+ chunk = self._create_chunk(
180
+ content=content,
181
+ file_path=file_path,
182
+ start_line=start_line,
183
+ end_line=end_line,
184
+ chunk_type="method",
185
+ function_name=method_name,
186
+ class_name=full_class_name,
187
+ docstring=rdoc,
188
+ )
189
+ chunks.append(chunk)
190
+
191
+ return chunks
192
+
193
+ def _extract_class_method(
194
+ self,
195
+ node,
196
+ lines: list[str],
197
+ file_path: Path,
198
+ class_name: str | None = None,
199
+ module_name: str | None = None,
200
+ ) -> list[CodeChunk]:
201
+ """Extract class method (singleton method) as a chunk."""
202
+ chunks = []
203
+
204
+ method_name = self._get_node_name(node)
205
+ start_line = node.start_point[0] + 1
206
+ end_line = node.end_point[0] + 1
207
+
208
+ # Get method content
209
+ content = self._get_line_range(lines, start_line, end_line)
210
+
211
+ # Extract RDoc if present
212
+ rdoc = self._extract_rdoc(node, lines)
213
+
214
+ # Build full qualified name
215
+ full_class_name = self._build_qualified_name(module_name, class_name)
216
+
217
+ chunk = self._create_chunk(
218
+ content=content,
219
+ file_path=file_path,
220
+ start_line=start_line,
221
+ end_line=end_line,
222
+ chunk_type="class_method",
223
+ function_name=f"self.{method_name}",
224
+ class_name=full_class_name,
225
+ docstring=rdoc,
226
+ )
227
+ chunks.append(chunk)
228
+
229
+ return chunks
230
+
231
+ def _extract_class(
232
+ self, node, lines: list[str], file_path: Path, module_name: str | None = None
233
+ ) -> list[CodeChunk]:
234
+ """Extract class definition as a chunk."""
235
+ chunks = []
236
+
237
+ class_name = self._get_node_name(node)
238
+ start_line = node.start_point[0] + 1
239
+ end_line = node.end_point[0] + 1
240
+
241
+ # Get class content
242
+ content = self._get_line_range(lines, start_line, end_line)
243
+
244
+ # Extract RDoc if present
245
+ rdoc = self._extract_rdoc(node, lines)
246
+
247
+ # Build full qualified name
248
+ full_class_name = self._build_qualified_name(module_name, class_name)
249
+
250
+ chunk = self._create_chunk(
251
+ content=content,
252
+ file_path=file_path,
253
+ start_line=start_line,
254
+ end_line=end_line,
255
+ chunk_type="class",
256
+ class_name=full_class_name,
257
+ docstring=rdoc,
258
+ )
259
+ chunks.append(chunk)
260
+
261
+ return chunks
262
+
263
+ def _extract_module(
264
+ self, node, lines: list[str], file_path: Path
265
+ ) -> list[CodeChunk]:
266
+ """Extract module definition as a chunk."""
267
+ chunks = []
268
+
269
+ module_name = self._get_node_name(node)
270
+ start_line = node.start_point[0] + 1
271
+ end_line = node.end_point[0] + 1
272
+
273
+ # Get module content
274
+ content = self._get_line_range(lines, start_line, end_line)
275
+
276
+ # Extract RDoc if present
277
+ rdoc = self._extract_rdoc(node, lines)
278
+
279
+ chunk = self._create_chunk(
280
+ content=content,
281
+ file_path=file_path,
282
+ start_line=start_line,
283
+ end_line=end_line,
284
+ chunk_type="module",
285
+ class_name=module_name,
286
+ docstring=rdoc,
287
+ )
288
+ chunks.append(chunk)
289
+
290
+ return chunks
291
+
292
+ def _extract_module_level_chunk(
293
+ self, node, lines: list[str], file_path: Path
294
+ ) -> CodeChunk | None:
295
+ """Extract module-level code (requires, constants, etc.)."""
296
+ # Look for module-level statements (not inside functions/classes)
297
+ module_lines = []
298
+
299
+ for child in node.children:
300
+ if child.type in ["call"]:
301
+ # Check if it's a require/require_relative
302
+ child_text = child.text.decode("utf-8")
303
+ if child_text.startswith("require") or "require_relative" in child_text:
304
+ start_line = child.start_point[0] + 1
305
+ end_line = child.end_point[0] + 1
306
+ require_content = self._get_line_range(lines, start_line, end_line)
307
+ module_lines.append(require_content.strip())
308
+
309
+ if module_lines:
310
+ content = "\n".join(module_lines)
311
+ return self._create_chunk(
312
+ content=content,
313
+ file_path=file_path,
314
+ start_line=1,
315
+ end_line=len(module_lines),
316
+ chunk_type="requires",
317
+ )
318
+
319
+ return None
320
+
321
+ def _get_node_name(self, node) -> str | None:
322
+ """Extract name from a named node (method, class, module, etc.)."""
323
+ for child in node.children:
324
+ if child.type in [
325
+ "identifier",
326
+ "constant",
327
+ "instance_variable",
328
+ "class_variable",
329
+ ]:
330
+ return child.text.decode("utf-8")
331
+ return None
332
+
333
+ def _extract_rdoc(self, node, lines: list[str]) -> str | None:
334
+ """Extract RDoc from a method or class node."""
335
+ # Look for comment nodes before the definition
336
+ start_line = node.start_point[0]
337
+
338
+ # Check a few lines before the node for # comments
339
+ rdoc_lines = []
340
+ for i in range(max(0, start_line - 15), start_line):
341
+ line = lines[i].strip()
342
+ if line.startswith("#"):
343
+ # Remove # and strip whitespace
344
+ rdoc_lines.append(line[1:].strip())
345
+ elif line and not rdoc_lines:
346
+ # Reset if we hit non-comment code before finding rdoc
347
+ continue
348
+ elif line and rdoc_lines:
349
+ # Stop if we hit non-comment code after finding rdoc
350
+ break
351
+
352
+ if rdoc_lines:
353
+ return " ".join(rdoc_lines)
354
+
355
+ # Check for =begin...=end block comments
356
+ for i in range(max(0, start_line - 20), start_line):
357
+ line = lines[i].strip()
358
+ if line == "=begin":
359
+ # Found start of block comment
360
+ block_lines = []
361
+ for j in range(i + 1, min(len(lines), start_line)):
362
+ block_line = lines[j].strip()
363
+ if block_line == "=end":
364
+ break
365
+ block_lines.append(block_line)
366
+ if block_lines:
367
+ return " ".join(block_lines)
368
+
369
+ return None
370
+
371
+ def _build_qualified_name(
372
+ self, module_name: str | None, class_name: str | None
373
+ ) -> str | None:
374
+ """Build a fully qualified name from module and class names."""
375
+ if module_name and class_name:
376
+ return f"{module_name}::{class_name}"
377
+ return class_name or module_name
378
+
379
+ async def _fallback_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
380
+ """Fallback parsing using regex when Tree-sitter is not available."""
381
+ chunks = []
382
+ lines = self._split_into_lines(content)
383
+
384
+ # Enhanced regex patterns for Ruby
385
+ module_pattern = re.compile(r"^\s*module\s+(\w+(?:::\w+)*)", re.MULTILINE)
386
+ class_pattern = re.compile(
387
+ r"^\s*class\s+(\w+)(?:\s+<\s+(\w+(?:::\w+)*))?", re.MULTILINE
388
+ )
389
+ method_pattern = re.compile(r"^\s*def\s+(self\.)?(\w+[?!]?)", re.MULTILINE)
390
+ attr_pattern = re.compile(
391
+ r"^\s*attr_(accessor|reader|writer)\s+:(\w+)(?:\s*,\s*:(\w+))*",
392
+ re.MULTILINE,
393
+ )
394
+ require_pattern = re.compile(
395
+ r"^\s*(require|require_relative)\s+['\"](.+?)['\"]", re.MULTILINE
396
+ )
397
+
398
+ # Extract requires first
399
+ requires = []
400
+ for match in require_pattern.finditer(content):
401
+ require_line = match.group(0).strip()
402
+ requires.append(require_line)
403
+
404
+ # Find modules
405
+ modules = {}
406
+ for match in module_pattern.finditer(content):
407
+ module_name = match.group(1)
408
+ match_text = match.group(0)
409
+ module_pos_in_match = match_text.find("module")
410
+ actual_module_pos = match.start() + module_pos_in_match
411
+ start_line = content[:actual_module_pos].count("\n") + 1
412
+
413
+ # Find end of module
414
+ end_line = self._find_block_end(lines, start_line)
415
+
416
+ module_content = self._get_line_range(lines, start_line, end_line)
417
+
418
+ if module_content.strip():
419
+ # Extract RDoc using regex
420
+ rdoc = self._extract_rdoc_regex(lines, start_line)
421
+
422
+ chunk = self._create_chunk(
423
+ content=module_content,
424
+ file_path=file_path,
425
+ start_line=start_line,
426
+ end_line=end_line,
427
+ chunk_type="module",
428
+ class_name=module_name,
429
+ docstring=rdoc,
430
+ )
431
+ chunks.append(chunk)
432
+ modules[module_name] = (start_line, end_line)
433
+
434
+ # Find classes
435
+ for match in class_pattern.finditer(content):
436
+ class_name = match.group(1)
437
+ # superclass = match.group(2) # Could be used for inheritance info
438
+
439
+ match_text = match.group(0)
440
+ class_pos_in_match = match_text.find("class")
441
+ actual_class_pos = match.start() + class_pos_in_match
442
+ start_line = content[:actual_class_pos].count("\n") + 1
443
+
444
+ # Find end of class
445
+ end_line = self._find_block_end(lines, start_line)
446
+
447
+ class_content = self._get_line_range(lines, start_line, end_line)
448
+
449
+ if class_content.strip():
450
+ # Extract RDoc
451
+ rdoc = self._extract_rdoc_regex(lines, start_line)
452
+
453
+ # Determine if class is inside a module
454
+ module_name = self._find_containing_module(start_line, modules)
455
+ full_class_name = self._build_qualified_name(module_name, class_name)
456
+
457
+ chunk = self._create_chunk(
458
+ content=class_content,
459
+ file_path=file_path,
460
+ start_line=start_line,
461
+ end_line=end_line,
462
+ chunk_type="class",
463
+ class_name=full_class_name,
464
+ docstring=rdoc,
465
+ )
466
+ chunks.append(chunk)
467
+
468
+ # Find methods
469
+ classes_and_modules = {}
470
+ for chunk in chunks:
471
+ if chunk.class_name:
472
+ classes_and_modules[chunk.class_name] = (
473
+ chunk.start_line,
474
+ chunk.end_line,
475
+ )
476
+
477
+ for match in method_pattern.finditer(content):
478
+ is_class_method = match.group(1) is not None
479
+ method_name = match.group(2)
480
+
481
+ match_text = match.group(0)
482
+ def_pos_in_match = match_text.find("def")
483
+ actual_def_pos = match.start() + def_pos_in_match
484
+ start_line = content[:actual_def_pos].count("\n") + 1
485
+
486
+ # Find end of method
487
+ end_line = self._find_method_end(lines, start_line)
488
+
489
+ method_content = self._get_line_range(lines, start_line, end_line)
490
+
491
+ if method_content.strip():
492
+ # Extract RDoc
493
+ rdoc = self._extract_rdoc_regex(lines, start_line)
494
+
495
+ # Find containing class/module
496
+ containing_class = self._find_containing_class(
497
+ start_line, classes_and_modules
498
+ )
499
+
500
+ # Format method name
501
+ if is_class_method:
502
+ method_name = f"self.{method_name}"
503
+
504
+ chunk = self._create_chunk(
505
+ content=method_content,
506
+ file_path=file_path,
507
+ start_line=start_line,
508
+ end_line=end_line,
509
+ chunk_type="class_method" if is_class_method else "method",
510
+ function_name=method_name,
511
+ class_name=containing_class,
512
+ docstring=rdoc,
513
+ )
514
+ chunks.append(chunk)
515
+
516
+ # Find attr_accessor/reader/writer
517
+ for match in attr_pattern.finditer(content):
518
+ attr_type = match.group(1)
519
+ attr_name = match.group(2)
520
+
521
+ match_text = match.group(0)
522
+ start_line = content[: match.start()].count("\n") + 1
523
+ end_line = start_line
524
+
525
+ attr_content = match.group(0)
526
+
527
+ # Find containing class/module
528
+ containing_class = self._find_containing_class(
529
+ start_line, classes_and_modules
530
+ )
531
+
532
+ chunk = self._create_chunk(
533
+ content=attr_content,
534
+ file_path=file_path,
535
+ start_line=start_line,
536
+ end_line=end_line,
537
+ chunk_type="attribute",
538
+ function_name=f"attr_{attr_type} :{attr_name}",
539
+ class_name=containing_class,
540
+ )
541
+ chunks.append(chunk)
542
+
543
+ # If no functions or classes found, create chunks for the whole file
544
+ if not chunks:
545
+ chunks.append(
546
+ self._create_chunk(
547
+ content=content,
548
+ file_path=file_path,
549
+ start_line=1,
550
+ end_line=len(lines),
551
+ chunk_type="module",
552
+ )
553
+ )
554
+
555
+ return chunks
556
+
557
+ def _find_block_end(self, lines: list[str], start_line: int) -> int:
558
+ """Find the end line of a block (module/class) using 'end' keyword matching."""
559
+ if start_line > len(lines):
560
+ return len(lines)
561
+
562
+ start_idx = start_line - 1
563
+ if start_idx >= len(lines):
564
+ return len(lines)
565
+
566
+ # Count nested blocks
567
+ block_count = 0
568
+ keywords_start = [
569
+ "module",
570
+ "class",
571
+ "def",
572
+ "do",
573
+ "begin",
574
+ "case",
575
+ "if",
576
+ "unless",
577
+ "while",
578
+ "until",
579
+ "for",
580
+ ]
581
+
582
+ for i in range(start_idx, len(lines)):
583
+ line = lines[i].strip()
584
+
585
+ # Skip comments and empty lines
586
+ if not line or line.startswith("#"):
587
+ continue
588
+
589
+ # Check for block-starting keywords
590
+ for keyword in keywords_start:
591
+ # Use word boundaries to avoid matching substrings
592
+ if re.search(rf"\b{keyword}\b", line):
593
+ block_count += 1
594
+ break
595
+
596
+ # Check for 'end' keyword
597
+ if re.search(r"\bend\b", line):
598
+ block_count -= 1
599
+ if block_count == 0:
600
+ return i + 1 # Return 1-based line number
601
+
602
+ return len(lines)
603
+
604
+ def _find_method_end(self, lines: list[str], start_line: int) -> int:
605
+ """Find the end line of a method using 'end' keyword matching."""
606
+ return self._find_block_end(lines, start_line)
607
+
608
+ def _find_containing_module(
609
+ self, line_number: int, modules: dict[str, tuple[int, int]]
610
+ ) -> str | None:
611
+ """Find the module containing a given line number."""
612
+ for module_name, (start, end) in modules.items():
613
+ if start < line_number < end:
614
+ return module_name
615
+ return None
616
+
617
+ def _find_containing_class(
618
+ self, line_number: int, classes_and_modules: dict[str, tuple[int, int]]
619
+ ) -> str | None:
620
+ """Find the class/module containing a given line number."""
621
+ # Find the most specific (innermost) containing class
622
+ containing = None
623
+ smallest_range = float("inf")
624
+
625
+ for name, (start, end) in classes_and_modules.items():
626
+ if start < line_number < end:
627
+ range_size = end - start
628
+ if range_size < smallest_range:
629
+ smallest_range = range_size
630
+ containing = name
631
+
632
+ return containing
633
+
634
+ def _extract_rdoc_regex(self, lines: list[str], start_line: int) -> str | None:
635
+ """Extract RDoc using regex patterns."""
636
+ # Look for # comments before the definition
637
+ rdoc_lines = []
638
+
639
+ # Check lines before the start_line
640
+ for i in range(max(0, start_line - 15), start_line - 1):
641
+ if i >= len(lines):
642
+ continue
643
+
644
+ line = lines[i].strip()
645
+ if line.startswith("#"):
646
+ rdoc_lines.append(line[1:].strip())
647
+ elif line and rdoc_lines:
648
+ # If we hit non-comment code after finding rdoc, stop
649
+ break
650
+ elif line and not rdoc_lines:
651
+ # Reset if we hit code before finding rdoc
652
+ rdoc_lines = []
653
+
654
+ if rdoc_lines:
655
+ return " ".join(rdoc_lines)
656
+
657
+ # Check for =begin...=end block comments
658
+ for i in range(max(0, start_line - 20), start_line - 1):
659
+ if i >= len(lines):
660
+ continue
661
+
662
+ line = lines[i].strip()
663
+ if line == "=begin":
664
+ # Found start of block comment
665
+ block_lines = []
666
+ for j in range(i + 1, min(len(lines), start_line - 1)):
667
+ block_line = lines[j].strip()
668
+ if block_line == "=end":
669
+ break
670
+ block_lines.append(block_line)
671
+ if block_lines:
672
+ return " ".join(block_lines)
673
+
674
+ return None
675
+
676
+ def get_supported_extensions(self) -> list[str]:
677
+ """Get supported file extensions."""
678
+ return [".rb", ".rake", ".gemspec"]