mcp-vector-search 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (35) hide show
  1. mcp_vector_search/__init__.py +9 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/config.py +303 -0
  5. mcp_vector_search/cli/commands/index.py +304 -0
  6. mcp_vector_search/cli/commands/init.py +212 -0
  7. mcp_vector_search/cli/commands/search.py +395 -0
  8. mcp_vector_search/cli/commands/status.py +340 -0
  9. mcp_vector_search/cli/commands/watch.py +288 -0
  10. mcp_vector_search/cli/main.py +117 -0
  11. mcp_vector_search/cli/output.py +242 -0
  12. mcp_vector_search/config/__init__.py +1 -0
  13. mcp_vector_search/config/defaults.py +175 -0
  14. mcp_vector_search/config/settings.py +108 -0
  15. mcp_vector_search/core/__init__.py +1 -0
  16. mcp_vector_search/core/database.py +431 -0
  17. mcp_vector_search/core/embeddings.py +250 -0
  18. mcp_vector_search/core/exceptions.py +66 -0
  19. mcp_vector_search/core/indexer.py +310 -0
  20. mcp_vector_search/core/models.py +174 -0
  21. mcp_vector_search/core/project.py +304 -0
  22. mcp_vector_search/core/search.py +324 -0
  23. mcp_vector_search/core/watcher.py +320 -0
  24. mcp_vector_search/mcp/__init__.py +1 -0
  25. mcp_vector_search/parsers/__init__.py +1 -0
  26. mcp_vector_search/parsers/base.py +180 -0
  27. mcp_vector_search/parsers/javascript.py +238 -0
  28. mcp_vector_search/parsers/python.py +407 -0
  29. mcp_vector_search/parsers/registry.py +187 -0
  30. mcp_vector_search/py.typed +1 -0
  31. mcp_vector_search-0.0.3.dist-info/METADATA +333 -0
  32. mcp_vector_search-0.0.3.dist-info/RECORD +35 -0
  33. mcp_vector_search-0.0.3.dist-info/WHEEL +4 -0
  34. mcp_vector_search-0.0.3.dist-info/entry_points.txt +2 -0
  35. mcp_vector_search-0.0.3.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,238 @@
1
+ """JavaScript/TypeScript parser for MCP Vector Search."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ from loguru import logger
8
+
9
+ from ..core.models import CodeChunk
10
+ from .base import BaseParser
11
+
12
+
13
+ class JavaScriptParser(BaseParser):
14
+ """JavaScript/TypeScript parser with fallback regex-based parsing."""
15
+
16
+ def __init__(self, language: str = "javascript") -> None:
17
+ """Initialize JavaScript parser."""
18
+ super().__init__(language)
19
+
20
+ async def parse_file(self, file_path: Path) -> List[CodeChunk]:
21
+ """Parse a JavaScript/TypeScript file and extract code chunks."""
22
+ try:
23
+ with open(file_path, "r", encoding="utf-8") as f:
24
+ content = f.read()
25
+ return await self.parse_content(content, file_path)
26
+ except Exception as e:
27
+ logger.error(f"Failed to read file {file_path}: {e}")
28
+ return []
29
+
30
+ async def parse_content(self, content: str, file_path: Path) -> List[CodeChunk]:
31
+ """Parse JavaScript/TypeScript content and extract code chunks."""
32
+ if not content.strip():
33
+ return []
34
+
35
+ return await self._regex_parse(content, file_path)
36
+
37
+ async def _regex_parse(self, content: str, file_path: Path) -> List[CodeChunk]:
38
+ """Parse JavaScript/TypeScript using regex patterns."""
39
+ chunks = []
40
+ lines = self._split_into_lines(content)
41
+
42
+ # JavaScript/TypeScript patterns
43
+ function_patterns = [
44
+ re.compile(r"^\s*function\s+(\w+)\s*\(", re.MULTILINE), # function name()
45
+ re.compile(r"^\s*const\s+(\w+)\s*=\s*\([^)]*\)\s*=>\s*{", re.MULTILINE), # const name = () => {
46
+ re.compile(r"^\s*const\s+(\w+)\s*=\s*function\s*\(", re.MULTILINE), # const name = function(
47
+ re.compile(r"^\s*(\w+)\s*:\s*function\s*\(", re.MULTILINE), # name: function(
48
+ re.compile(r"^\s*(\w+)\s*\([^)]*\)\s*{", re.MULTILINE), # name() { (method)
49
+ re.compile(r"^\s*async\s+function\s+(\w+)\s*\(", re.MULTILINE), # async function name()
50
+ re.compile(r"^\s*async\s+(\w+)\s*\([^)]*\)\s*{", re.MULTILINE), # async name() {
51
+ ]
52
+
53
+ class_patterns = [
54
+ re.compile(r"^\s*class\s+(\w+)", re.MULTILINE), # class Name
55
+ re.compile(r"^\s*export\s+class\s+(\w+)", re.MULTILINE), # export class Name
56
+ re.compile(r"^\s*export\s+default\s+class\s+(\w+)", re.MULTILINE), # export default class Name
57
+ ]
58
+
59
+ interface_patterns = [
60
+ re.compile(r"^\s*interface\s+(\w+)", re.MULTILINE), # interface Name (TypeScript)
61
+ re.compile(r"^\s*export\s+interface\s+(\w+)", re.MULTILINE), # export interface Name
62
+ ]
63
+
64
+ import_pattern = re.compile(r"^\s*(import|export).*", re.MULTILINE)
65
+
66
+ # Extract imports
67
+ imports = []
68
+ for match in import_pattern.finditer(content):
69
+ import_line = match.group(0).strip()
70
+ imports.append(import_line)
71
+
72
+ # Extract functions
73
+ for pattern in function_patterns:
74
+ for match in pattern.finditer(content):
75
+ function_name = match.group(1)
76
+ start_line = content[:match.start()].count("\n") + 1
77
+
78
+ # Find end of function
79
+ end_line = self._find_block_end(lines, start_line, "{", "}")
80
+
81
+ func_content = self._get_line_range(lines, start_line, end_line)
82
+
83
+ if func_content.strip():
84
+ # Extract JSDoc comment
85
+ jsdoc = self._extract_jsdoc(lines, start_line)
86
+
87
+ chunk = self._create_chunk(
88
+ content=func_content,
89
+ file_path=file_path,
90
+ start_line=start_line,
91
+ end_line=end_line,
92
+ chunk_type="function",
93
+ function_name=function_name,
94
+ docstring=jsdoc,
95
+ )
96
+ chunk.imports = imports
97
+ chunks.append(chunk)
98
+
99
+ # Extract classes
100
+ for pattern in class_patterns:
101
+ for match in pattern.finditer(content):
102
+ class_name = match.group(1)
103
+ start_line = content[:match.start()].count("\n") + 1
104
+
105
+ # Find end of class
106
+ end_line = self._find_block_end(lines, start_line, "{", "}")
107
+
108
+ class_content = self._get_line_range(lines, start_line, end_line)
109
+
110
+ if class_content.strip():
111
+ # Extract JSDoc comment
112
+ jsdoc = self._extract_jsdoc(lines, start_line)
113
+
114
+ chunk = self._create_chunk(
115
+ content=class_content,
116
+ file_path=file_path,
117
+ start_line=start_line,
118
+ end_line=end_line,
119
+ chunk_type="class",
120
+ class_name=class_name,
121
+ docstring=jsdoc,
122
+ )
123
+ chunk.imports = imports
124
+ chunks.append(chunk)
125
+
126
+ # Extract interfaces (TypeScript)
127
+ if self.language == "typescript":
128
+ for pattern in interface_patterns:
129
+ for match in pattern.finditer(content):
130
+ interface_name = match.group(1)
131
+ start_line = content[:match.start()].count("\n") + 1
132
+
133
+ # Find end of interface
134
+ end_line = self._find_block_end(lines, start_line, "{", "}")
135
+
136
+ interface_content = self._get_line_range(lines, start_line, end_line)
137
+
138
+ if interface_content.strip():
139
+ # Extract JSDoc comment
140
+ jsdoc = self._extract_jsdoc(lines, start_line)
141
+
142
+ chunk = self._create_chunk(
143
+ content=interface_content,
144
+ file_path=file_path,
145
+ start_line=start_line,
146
+ end_line=end_line,
147
+ chunk_type="interface",
148
+ class_name=interface_name, # Use class_name field for interface
149
+ docstring=jsdoc,
150
+ )
151
+ chunk.imports = imports
152
+ chunks.append(chunk)
153
+
154
+ # If no specific chunks found, create a single chunk for the whole file
155
+ if not chunks:
156
+ chunks.append(
157
+ self._create_chunk(
158
+ content=content,
159
+ file_path=file_path,
160
+ start_line=1,
161
+ end_line=len(lines),
162
+ chunk_type="module",
163
+ )
164
+ )
165
+
166
+ return chunks
167
+
168
+ def _find_block_end(self, lines: List[str], start_line: int, open_char: str, close_char: str) -> int:
169
+ """Find the end of a block by matching braces."""
170
+ if start_line > len(lines):
171
+ return len(lines)
172
+
173
+ brace_count = 0
174
+ found_opening = False
175
+
176
+ for i in range(start_line - 1, len(lines)):
177
+ line = lines[i]
178
+
179
+ for char in line:
180
+ if char == open_char:
181
+ brace_count += 1
182
+ found_opening = True
183
+ elif char == close_char:
184
+ brace_count -= 1
185
+
186
+ if found_opening and brace_count == 0:
187
+ return i + 1 # Return 1-based line number
188
+
189
+ return len(lines)
190
+
191
+ def _extract_jsdoc(self, lines: List[str], start_line: int) -> Optional[str]:
192
+ """Extract JSDoc comment before a function/class."""
193
+ if start_line <= 1:
194
+ return None
195
+
196
+ # Look backwards for JSDoc comment
197
+ for i in range(start_line - 2, max(-1, start_line - 10), -1):
198
+ line = lines[i].strip()
199
+
200
+ if line.endswith("*/"):
201
+ # Found end of JSDoc, collect the comment
202
+ jsdoc_lines = []
203
+ for j in range(i, -1, -1):
204
+ comment_line = lines[j].strip()
205
+ jsdoc_lines.insert(0, comment_line)
206
+
207
+ if comment_line.startswith("/**"):
208
+ # Found start of JSDoc
209
+ # Clean up the comment
210
+ cleaned_lines = []
211
+ for line in jsdoc_lines:
212
+ # Remove /** */ and * prefixes
213
+ cleaned = line.replace("/**", "").replace("*/", "").replace("*", "").strip()
214
+ if cleaned:
215
+ cleaned_lines.append(cleaned)
216
+
217
+ return " ".join(cleaned_lines) if cleaned_lines else None
218
+
219
+ # If we hit non-comment code, stop looking
220
+ elif line and not line.startswith("//") and not line.startswith("*"):
221
+ break
222
+
223
+ return None
224
+
225
+ def get_supported_extensions(self) -> List[str]:
226
+ """Get supported file extensions."""
227
+ if self.language == "typescript":
228
+ return [".ts", ".tsx"]
229
+ else:
230
+ return [".js", ".jsx", ".mjs"]
231
+
232
+
233
+ class TypeScriptParser(JavaScriptParser):
234
+ """TypeScript parser extending JavaScript parser."""
235
+
236
+ def __init__(self) -> None:
237
+ """Initialize TypeScript parser."""
238
+ super().__init__("typescript")
@@ -0,0 +1,407 @@
1
+ """Python parser using Tree-sitter for MCP Vector Search."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ from loguru import logger
8
+
9
+ from ..core.models import CodeChunk
10
+ from .base import BaseParser
11
+
12
+
13
+ class PythonParser(BaseParser):
14
+ """Python parser using Tree-sitter for AST-based code analysis."""
15
+
16
+ def __init__(self) -> None:
17
+ """Initialize Python parser."""
18
+ super().__init__("python")
19
+ self._parser = None
20
+ self._language = None
21
+ self._initialize_parser()
22
+
23
+ def _initialize_parser(self) -> None:
24
+ """Initialize Tree-sitter parser for Python."""
25
+ try:
26
+ # Try the tree-sitter-languages package first
27
+ import tree_sitter_languages
28
+
29
+ self._language = tree_sitter_languages.get_language("python")
30
+ self._parser = tree_sitter_languages.get_parser("python")
31
+ logger.debug("Python Tree-sitter parser initialized via tree-sitter-languages")
32
+ return
33
+ except Exception as e:
34
+ logger.debug(f"tree-sitter-languages failed: {e}")
35
+
36
+ try:
37
+ # Fallback to manual tree-sitter setup (requires language binaries)
38
+ import tree_sitter
39
+
40
+ # This would require language binaries to be available
41
+ # For now, we'll skip this and rely on fallback parsing
42
+ logger.debug("Manual tree-sitter setup not implemented yet")
43
+ self._parser = None
44
+ self._language = None
45
+ except Exception as e:
46
+ logger.debug(f"Manual tree-sitter setup failed: {e}")
47
+ self._parser = None
48
+ self._language = None
49
+
50
+ logger.info("Using fallback regex-based parsing for Python (Tree-sitter unavailable)")
51
+
52
+ async def parse_file(self, file_path: Path) -> List[CodeChunk]:
53
+ """Parse a Python file and extract code chunks."""
54
+ try:
55
+ with open(file_path, "r", encoding="utf-8") as f:
56
+ content = f.read()
57
+ return await self.parse_content(content, file_path)
58
+ except Exception as e:
59
+ logger.error(f"Failed to read file {file_path}: {e}")
60
+ return []
61
+
62
+ async def parse_content(self, content: str, file_path: Path) -> List[CodeChunk]:
63
+ """Parse Python content and extract code chunks."""
64
+ if not content.strip():
65
+ return []
66
+
67
+ # If Tree-sitter is not available, fall back to simple parsing
68
+ if not self._parser:
69
+ return await self._fallback_parse(content, file_path)
70
+
71
+ try:
72
+ # Parse with Tree-sitter
73
+ tree = self._parser.parse(content.encode("utf-8"))
74
+ return self._extract_chunks_from_tree(tree, content, file_path)
75
+ except Exception as e:
76
+ logger.warning(f"Tree-sitter parsing failed for {file_path}: {e}")
77
+ return await self._fallback_parse(content, file_path)
78
+
79
+ def _extract_chunks_from_tree(
80
+ self, tree, content: str, file_path: Path
81
+ ) -> List[CodeChunk]:
82
+ """Extract code chunks from Tree-sitter AST."""
83
+ chunks = []
84
+ lines = self._split_into_lines(content)
85
+
86
+ def visit_node(node, current_class=None):
87
+ """Recursively visit AST nodes."""
88
+ node_type = node.type
89
+
90
+ if node_type == "function_definition":
91
+ chunks.extend(
92
+ self._extract_function(node, lines, file_path, current_class)
93
+ )
94
+ elif node_type == "class_definition":
95
+ class_chunks = self._extract_class(node, lines, file_path)
96
+ chunks.extend(class_chunks)
97
+
98
+ # Visit class methods with class context
99
+ class_name = self._get_node_name(node)
100
+ for child in node.children:
101
+ visit_node(child, class_name)
102
+ elif node_type == "module":
103
+ # Extract module-level code
104
+ module_chunk = self._extract_module_chunk(node, lines, file_path)
105
+ if module_chunk:
106
+ chunks.append(module_chunk)
107
+
108
+ # Visit all children
109
+ for child in node.children:
110
+ visit_node(child)
111
+ else:
112
+ # Visit children for other node types
113
+ for child in node.children:
114
+ visit_node(child, current_class)
115
+
116
+ # Start traversal from root
117
+ visit_node(tree.root_node)
118
+
119
+ # If no specific chunks found, create a single chunk for the whole file
120
+ if not chunks:
121
+ chunks.append(
122
+ self._create_chunk(
123
+ content=content,
124
+ file_path=file_path,
125
+ start_line=1,
126
+ end_line=len(lines),
127
+ chunk_type="module",
128
+ )
129
+ )
130
+
131
+ return chunks
132
+
133
+ def _extract_function(
134
+ self, node, lines: List[str], file_path: Path, class_name: Optional[str] = None
135
+ ) -> List[CodeChunk]:
136
+ """Extract function definition as a chunk."""
137
+ chunks = []
138
+
139
+ function_name = self._get_node_name(node)
140
+ start_line = node.start_point[0] + 1
141
+ end_line = node.end_point[0] + 1
142
+
143
+ # Get function content
144
+ content = self._get_line_range(lines, start_line, end_line)
145
+
146
+ # Extract docstring if present
147
+ docstring = self._extract_docstring(node, lines)
148
+
149
+ chunk = self._create_chunk(
150
+ content=content,
151
+ file_path=file_path,
152
+ start_line=start_line,
153
+ end_line=end_line,
154
+ chunk_type="function",
155
+ function_name=function_name,
156
+ class_name=class_name,
157
+ docstring=docstring,
158
+ )
159
+ chunks.append(chunk)
160
+
161
+ return chunks
162
+
163
+ def _extract_class(self, node, lines: List[str], file_path: Path) -> List[CodeChunk]:
164
+ """Extract class definition as a chunk."""
165
+ chunks = []
166
+
167
+ class_name = self._get_node_name(node)
168
+ start_line = node.start_point[0] + 1
169
+ end_line = node.end_point[0] + 1
170
+
171
+ # Get class content
172
+ content = self._get_line_range(lines, start_line, end_line)
173
+
174
+ # Extract docstring if present
175
+ docstring = self._extract_docstring(node, lines)
176
+
177
+ chunk = self._create_chunk(
178
+ content=content,
179
+ file_path=file_path,
180
+ start_line=start_line,
181
+ end_line=end_line,
182
+ chunk_type="class",
183
+ class_name=class_name,
184
+ docstring=docstring,
185
+ )
186
+ chunks.append(chunk)
187
+
188
+ return chunks
189
+
190
+ def _extract_module_chunk(
191
+ self, node, lines: List[str], file_path: Path
192
+ ) -> Optional[CodeChunk]:
193
+ """Extract module-level code (imports, constants, etc.)."""
194
+ # Look for module-level statements (not inside functions/classes)
195
+ module_lines = []
196
+
197
+ for child in node.children:
198
+ if child.type in ["import_statement", "import_from_statement"]:
199
+ start_line = child.start_point[0] + 1
200
+ end_line = child.end_point[0] + 1
201
+ import_content = self._get_line_range(lines, start_line, end_line)
202
+ module_lines.append(import_content.strip())
203
+
204
+ if module_lines:
205
+ content = "\n".join(module_lines)
206
+ return self._create_chunk(
207
+ content=content,
208
+ file_path=file_path,
209
+ start_line=1,
210
+ end_line=len(module_lines),
211
+ chunk_type="imports",
212
+ )
213
+
214
+ return None
215
+
216
+ def _get_node_name(self, node) -> Optional[str]:
217
+ """Extract name from a named node (function, class, etc.)."""
218
+ for child in node.children:
219
+ if child.type == "identifier":
220
+ return child.text.decode("utf-8")
221
+ return None
222
+
223
+ def _extract_docstring(self, node, lines: List[str]) -> Optional[str]:
224
+ """Extract docstring from a function or class node."""
225
+ # Look for string literal as first statement in body
226
+ for child in node.children:
227
+ if child.type == "block":
228
+ for stmt in child.children:
229
+ if stmt.type == "expression_statement":
230
+ for expr_child in stmt.children:
231
+ if expr_child.type == "string":
232
+ # Extract string content
233
+ start_line = expr_child.start_point[0] + 1
234
+ end_line = expr_child.end_point[0] + 1
235
+ docstring = self._get_line_range(lines, start_line, end_line)
236
+ # Clean up docstring (remove quotes)
237
+ return self._clean_docstring(docstring)
238
+ return None
239
+
240
+ def _clean_docstring(self, docstring: str) -> str:
241
+ """Clean up extracted docstring."""
242
+ # Remove triple quotes and clean whitespace
243
+ cleaned = re.sub(r'^["\']{{3}}|["\']{{3}}$', "", docstring.strip())
244
+ cleaned = re.sub(r'^["\']|["\']$', "", cleaned.strip())
245
+ return cleaned.strip()
246
+
247
+ async def _fallback_parse(self, content: str, file_path: Path) -> List[CodeChunk]:
248
+ """Fallback parsing using regex when Tree-sitter is not available."""
249
+ chunks = []
250
+ lines = self._split_into_lines(content)
251
+
252
+ # Enhanced regex patterns
253
+ function_pattern = re.compile(r"^\s*def\s+(\w+)\s*\(", re.MULTILINE)
254
+ class_pattern = re.compile(r"^\s*class\s+(\w+)\s*[:\(]", re.MULTILINE)
255
+ import_pattern = re.compile(r"^\s*(from\s+\S+\s+)?import\s+(.+)", re.MULTILINE)
256
+
257
+ # Extract imports first
258
+ imports = []
259
+ for match in import_pattern.finditer(content):
260
+ import_line = match.group(0).strip()
261
+ imports.append(import_line)
262
+
263
+ # Find functions
264
+ for match in function_pattern.finditer(content):
265
+ function_name = match.group(1)
266
+ # Find the actual line with 'def' by looking for it in the match
267
+ match_text = match.group(0)
268
+ def_pos_in_match = match_text.find('def')
269
+ actual_def_pos = match.start() + def_pos_in_match
270
+ start_line = content[:actual_def_pos].count("\n") + 1
271
+
272
+ # Find end of function (simple heuristic)
273
+ end_line = self._find_function_end(lines, start_line)
274
+
275
+ func_content = self._get_line_range(lines, start_line, end_line)
276
+
277
+ if func_content.strip(): # Only add if content is not empty
278
+ # Extract docstring using regex
279
+ docstring = self._extract_docstring_regex(func_content)
280
+
281
+ chunk = self._create_chunk(
282
+ content=func_content,
283
+ file_path=file_path,
284
+ start_line=start_line,
285
+ end_line=end_line,
286
+ chunk_type="function",
287
+ function_name=function_name,
288
+ docstring=docstring,
289
+ )
290
+ chunk.imports = imports # Add imports to chunk
291
+ chunks.append(chunk)
292
+
293
+ # Find classes
294
+ for match in class_pattern.finditer(content):
295
+ class_name = match.group(1)
296
+ # Find the actual line with 'class' by looking for it in the match
297
+ match_text = match.group(0)
298
+ class_pos_in_match = match_text.find('class')
299
+ actual_class_pos = match.start() + class_pos_in_match
300
+ start_line = content[:actual_class_pos].count("\n") + 1
301
+
302
+ # Find end of class (simple heuristic)
303
+ end_line = self._find_class_end(lines, start_line)
304
+
305
+ class_content = self._get_line_range(lines, start_line, end_line)
306
+
307
+ if class_content.strip(): # Only add if content is not empty
308
+ # Extract class docstring
309
+ docstring = self._extract_docstring_regex(class_content)
310
+
311
+ chunk = self._create_chunk(
312
+ content=class_content,
313
+ file_path=file_path,
314
+ start_line=start_line,
315
+ end_line=end_line,
316
+ chunk_type="class",
317
+ class_name=class_name,
318
+ docstring=docstring,
319
+ )
320
+ chunk.imports = imports # Add imports to chunk
321
+ chunks.append(chunk)
322
+
323
+ # If no functions or classes found, create chunks for the whole file
324
+ if not chunks:
325
+ chunks.append(
326
+ self._create_chunk(
327
+ content=content,
328
+ file_path=file_path,
329
+ start_line=1,
330
+ end_line=len(lines),
331
+ chunk_type="module",
332
+ )
333
+ )
334
+
335
+ return chunks
336
+
337
+ def _find_function_end(self, lines: List[str], start_line: int) -> int:
338
+ """Find the end line of a function using indentation."""
339
+ if start_line > len(lines):
340
+ return len(lines)
341
+
342
+ # Get initial indentation of the def line
343
+ start_idx = start_line - 1
344
+ if start_idx >= len(lines):
345
+ return len(lines)
346
+
347
+ def_line = lines[start_idx]
348
+ def_indent = len(def_line) - len(def_line.lstrip())
349
+
350
+ # Find end by looking for line with indentation <= def indentation
351
+ # Start from the line after the def line
352
+ for i in range(start_idx + 1, len(lines)):
353
+ line = lines[i]
354
+ if line.strip(): # Skip empty lines
355
+ current_indent = len(line) - len(line.lstrip())
356
+ if current_indent <= def_indent:
357
+ return i # Return 1-based line number (i is 0-based index)
358
+
359
+ # If we reach here, the function goes to the end of the file
360
+ return len(lines)
361
+
362
+ def _find_class_end(self, lines: List[str], start_line: int) -> int:
363
+ """Find the end line of a class using indentation."""
364
+ return self._find_function_end(lines, start_line)
365
+
366
+ def _extract_docstring_regex(self, content: str) -> Optional[str]:
367
+ """Extract docstring using regex patterns."""
368
+ # Look for triple-quoted strings at the beginning of the content
369
+ # after the def/class line
370
+ lines = content.splitlines()
371
+ if len(lines) < 2:
372
+ return None
373
+
374
+ # Skip the def/class line and look for docstring in subsequent lines
375
+ for i in range(1, min(len(lines), 5)): # Check first few lines
376
+ line = lines[i].strip()
377
+ if not line:
378
+ continue
379
+
380
+ # Check for triple-quoted docstrings
381
+ if line.startswith('"""') or line.startswith("'''"):
382
+ quote_type = line[:3]
383
+
384
+ # Single-line docstring
385
+ if line.endswith(quote_type) and len(line) > 6:
386
+ return line[3:-3].strip()
387
+
388
+ # Multi-line docstring
389
+ docstring_lines = [line[3:]]
390
+ for j in range(i + 1, len(lines)):
391
+ next_line = lines[j].strip()
392
+ if next_line.endswith(quote_type):
393
+ docstring_lines.append(next_line[:-3])
394
+ break
395
+ docstring_lines.append(next_line)
396
+
397
+ return " ".join(docstring_lines).strip()
398
+
399
+ # If we hit non-docstring code, stop looking
400
+ if line and not line.startswith('#'):
401
+ break
402
+
403
+ return None
404
+
405
+ def get_supported_extensions(self) -> List[str]:
406
+ """Get supported file extensions."""
407
+ return [".py", ".pyw"]