mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +3 -2
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/config.py +88 -40
- mcp_vector_search/cli/commands/index.py +198 -52
- mcp_vector_search/cli/commands/init.py +472 -58
- mcp_vector_search/cli/commands/install.py +284 -0
- mcp_vector_search/cli/commands/mcp.py +495 -0
- mcp_vector_search/cli/commands/search.py +241 -87
- mcp_vector_search/cli/commands/status.py +184 -58
- mcp_vector_search/cli/commands/watch.py +34 -35
- mcp_vector_search/cli/didyoumean.py +184 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +292 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +163 -26
- mcp_vector_search/cli/output.py +63 -45
- mcp_vector_search/config/defaults.py +50 -36
- mcp_vector_search/config/settings.py +49 -35
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/connection_pool.py +322 -0
- mcp_vector_search/core/database.py +335 -25
- mcp_vector_search/core/embeddings.py +73 -29
- mcp_vector_search/core/exceptions.py +19 -2
- mcp_vector_search/core/factory.py +310 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +237 -73
- mcp_vector_search/core/models.py +21 -19
- mcp_vector_search/core/project.py +73 -58
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +574 -86
- mcp_vector_search/core/watcher.py +48 -46
- mcp_vector_search/mcp/__init__.py +4 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +701 -0
- mcp_vector_search/parsers/base.py +30 -31
- mcp_vector_search/parsers/javascript.py +74 -48
- mcp_vector_search/parsers/python.py +57 -49
- mcp_vector_search/parsers/registry.py +47 -32
- mcp_vector_search/parsers/text.py +179 -0
- mcp_vector_search/utils/__init__.py +40 -0
- mcp_vector_search/utils/gitignore.py +229 -0
- mcp_vector_search/utils/timing.py +334 -0
- mcp_vector_search/utils/version.py +47 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/METADATA +173 -7
- mcp_vector_search-0.4.11.dist-info/RECORD +54 -0
- mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/WHEEL +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import List, Optional
|
|
6
5
|
|
|
7
6
|
from loguru import logger
|
|
8
7
|
|
|
@@ -23,19 +22,22 @@ class PythonParser(BaseParser):
|
|
|
23
22
|
def _initialize_parser(self) -> None:
|
|
24
23
|
"""Initialize Tree-sitter parser for Python."""
|
|
25
24
|
try:
|
|
26
|
-
# Try the tree-sitter-
|
|
27
|
-
import
|
|
25
|
+
# Try the tree-sitter-language-pack package (maintained alternative)
|
|
26
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
28
27
|
|
|
29
|
-
|
|
30
|
-
self.
|
|
31
|
-
|
|
28
|
+
# Get the language and parser objects
|
|
29
|
+
self._language = get_language("python")
|
|
30
|
+
self._parser = get_parser("python")
|
|
31
|
+
|
|
32
|
+
logger.debug(
|
|
33
|
+
"Python Tree-sitter parser initialized via tree-sitter-language-pack"
|
|
34
|
+
)
|
|
32
35
|
return
|
|
33
36
|
except Exception as e:
|
|
34
|
-
logger.debug(f"tree-sitter-
|
|
37
|
+
logger.debug(f"tree-sitter-language-pack failed: {e}")
|
|
35
38
|
|
|
36
39
|
try:
|
|
37
40
|
# Fallback to manual tree-sitter setup (requires language binaries)
|
|
38
|
-
import tree_sitter
|
|
39
41
|
|
|
40
42
|
# This would require language binaries to be available
|
|
41
43
|
# For now, we'll skip this and rely on fallback parsing
|
|
@@ -47,19 +49,21 @@ class PythonParser(BaseParser):
|
|
|
47
49
|
self._parser = None
|
|
48
50
|
self._language = None
|
|
49
51
|
|
|
50
|
-
logger.info(
|
|
52
|
+
logger.info(
|
|
53
|
+
"Using fallback regex-based parsing for Python (Tree-sitter unavailable)"
|
|
54
|
+
)
|
|
51
55
|
|
|
52
|
-
async def parse_file(self, file_path: Path) ->
|
|
56
|
+
async def parse_file(self, file_path: Path) -> list[CodeChunk]:
|
|
53
57
|
"""Parse a Python file and extract code chunks."""
|
|
54
58
|
try:
|
|
55
|
-
with open(file_path,
|
|
59
|
+
with open(file_path, encoding="utf-8") as f:
|
|
56
60
|
content = f.read()
|
|
57
61
|
return await self.parse_content(content, file_path)
|
|
58
62
|
except Exception as e:
|
|
59
63
|
logger.error(f"Failed to read file {file_path}: {e}")
|
|
60
64
|
return []
|
|
61
65
|
|
|
62
|
-
async def parse_content(self, content: str, file_path: Path) ->
|
|
66
|
+
async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
63
67
|
"""Parse Python content and extract code chunks."""
|
|
64
68
|
if not content.strip():
|
|
65
69
|
return []
|
|
@@ -78,7 +82,7 @@ class PythonParser(BaseParser):
|
|
|
78
82
|
|
|
79
83
|
def _extract_chunks_from_tree(
|
|
80
84
|
self, tree, content: str, file_path: Path
|
|
81
|
-
) ->
|
|
85
|
+
) -> list[CodeChunk]:
|
|
82
86
|
"""Extract code chunks from Tree-sitter AST."""
|
|
83
87
|
chunks = []
|
|
84
88
|
lines = self._split_into_lines(content)
|
|
@@ -94,7 +98,7 @@ class PythonParser(BaseParser):
|
|
|
94
98
|
elif node_type == "class_definition":
|
|
95
99
|
class_chunks = self._extract_class(node, lines, file_path)
|
|
96
100
|
chunks.extend(class_chunks)
|
|
97
|
-
|
|
101
|
+
|
|
98
102
|
# Visit class methods with class context
|
|
99
103
|
class_name = self._get_node_name(node)
|
|
100
104
|
for child in node.children:
|
|
@@ -104,7 +108,7 @@ class PythonParser(BaseParser):
|
|
|
104
108
|
module_chunk = self._extract_module_chunk(node, lines, file_path)
|
|
105
109
|
if module_chunk:
|
|
106
110
|
chunks.append(module_chunk)
|
|
107
|
-
|
|
111
|
+
|
|
108
112
|
# Visit all children
|
|
109
113
|
for child in node.children:
|
|
110
114
|
visit_node(child)
|
|
@@ -115,7 +119,7 @@ class PythonParser(BaseParser):
|
|
|
115
119
|
|
|
116
120
|
# Start traversal from root
|
|
117
121
|
visit_node(tree.root_node)
|
|
118
|
-
|
|
122
|
+
|
|
119
123
|
# If no specific chunks found, create a single chunk for the whole file
|
|
120
124
|
if not chunks:
|
|
121
125
|
chunks.append(
|
|
@@ -131,21 +135,21 @@ class PythonParser(BaseParser):
|
|
|
131
135
|
return chunks
|
|
132
136
|
|
|
133
137
|
def _extract_function(
|
|
134
|
-
self, node, lines:
|
|
135
|
-
) ->
|
|
138
|
+
self, node, lines: list[str], file_path: Path, class_name: str | None = None
|
|
139
|
+
) -> list[CodeChunk]:
|
|
136
140
|
"""Extract function definition as a chunk."""
|
|
137
141
|
chunks = []
|
|
138
|
-
|
|
142
|
+
|
|
139
143
|
function_name = self._get_node_name(node)
|
|
140
144
|
start_line = node.start_point[0] + 1
|
|
141
145
|
end_line = node.end_point[0] + 1
|
|
142
|
-
|
|
146
|
+
|
|
143
147
|
# Get function content
|
|
144
148
|
content = self._get_line_range(lines, start_line, end_line)
|
|
145
|
-
|
|
149
|
+
|
|
146
150
|
# Extract docstring if present
|
|
147
151
|
docstring = self._extract_docstring(node, lines)
|
|
148
|
-
|
|
152
|
+
|
|
149
153
|
chunk = self._create_chunk(
|
|
150
154
|
content=content,
|
|
151
155
|
file_path=file_path,
|
|
@@ -157,23 +161,25 @@ class PythonParser(BaseParser):
|
|
|
157
161
|
docstring=docstring,
|
|
158
162
|
)
|
|
159
163
|
chunks.append(chunk)
|
|
160
|
-
|
|
164
|
+
|
|
161
165
|
return chunks
|
|
162
166
|
|
|
163
|
-
def _extract_class(
|
|
167
|
+
def _extract_class(
|
|
168
|
+
self, node, lines: list[str], file_path: Path
|
|
169
|
+
) -> list[CodeChunk]:
|
|
164
170
|
"""Extract class definition as a chunk."""
|
|
165
171
|
chunks = []
|
|
166
|
-
|
|
172
|
+
|
|
167
173
|
class_name = self._get_node_name(node)
|
|
168
174
|
start_line = node.start_point[0] + 1
|
|
169
175
|
end_line = node.end_point[0] + 1
|
|
170
|
-
|
|
176
|
+
|
|
171
177
|
# Get class content
|
|
172
178
|
content = self._get_line_range(lines, start_line, end_line)
|
|
173
|
-
|
|
179
|
+
|
|
174
180
|
# Extract docstring if present
|
|
175
181
|
docstring = self._extract_docstring(node, lines)
|
|
176
|
-
|
|
182
|
+
|
|
177
183
|
chunk = self._create_chunk(
|
|
178
184
|
content=content,
|
|
179
185
|
file_path=file_path,
|
|
@@ -184,23 +190,23 @@ class PythonParser(BaseParser):
|
|
|
184
190
|
docstring=docstring,
|
|
185
191
|
)
|
|
186
192
|
chunks.append(chunk)
|
|
187
|
-
|
|
193
|
+
|
|
188
194
|
return chunks
|
|
189
195
|
|
|
190
196
|
def _extract_module_chunk(
|
|
191
|
-
self, node, lines:
|
|
192
|
-
) ->
|
|
197
|
+
self, node, lines: list[str], file_path: Path
|
|
198
|
+
) -> CodeChunk | None:
|
|
193
199
|
"""Extract module-level code (imports, constants, etc.)."""
|
|
194
200
|
# Look for module-level statements (not inside functions/classes)
|
|
195
201
|
module_lines = []
|
|
196
|
-
|
|
202
|
+
|
|
197
203
|
for child in node.children:
|
|
198
204
|
if child.type in ["import_statement", "import_from_statement"]:
|
|
199
205
|
start_line = child.start_point[0] + 1
|
|
200
206
|
end_line = child.end_point[0] + 1
|
|
201
207
|
import_content = self._get_line_range(lines, start_line, end_line)
|
|
202
208
|
module_lines.append(import_content.strip())
|
|
203
|
-
|
|
209
|
+
|
|
204
210
|
if module_lines:
|
|
205
211
|
content = "\n".join(module_lines)
|
|
206
212
|
return self._create_chunk(
|
|
@@ -210,17 +216,17 @@ class PythonParser(BaseParser):
|
|
|
210
216
|
end_line=len(module_lines),
|
|
211
217
|
chunk_type="imports",
|
|
212
218
|
)
|
|
213
|
-
|
|
219
|
+
|
|
214
220
|
return None
|
|
215
221
|
|
|
216
|
-
def _get_node_name(self, node) ->
|
|
222
|
+
def _get_node_name(self, node) -> str | None:
|
|
217
223
|
"""Extract name from a named node (function, class, etc.)."""
|
|
218
224
|
for child in node.children:
|
|
219
225
|
if child.type == "identifier":
|
|
220
226
|
return child.text.decode("utf-8")
|
|
221
227
|
return None
|
|
222
228
|
|
|
223
|
-
def _extract_docstring(self, node, lines:
|
|
229
|
+
def _extract_docstring(self, node, lines: list[str]) -> str | None:
|
|
224
230
|
"""Extract docstring from a function or class node."""
|
|
225
231
|
# Look for string literal as first statement in body
|
|
226
232
|
for child in node.children:
|
|
@@ -232,7 +238,9 @@ class PythonParser(BaseParser):
|
|
|
232
238
|
# Extract string content
|
|
233
239
|
start_line = expr_child.start_point[0] + 1
|
|
234
240
|
end_line = expr_child.end_point[0] + 1
|
|
235
|
-
docstring = self._get_line_range(
|
|
241
|
+
docstring = self._get_line_range(
|
|
242
|
+
lines, start_line, end_line
|
|
243
|
+
)
|
|
236
244
|
# Clean up docstring (remove quotes)
|
|
237
245
|
return self._clean_docstring(docstring)
|
|
238
246
|
return None
|
|
@@ -244,7 +252,7 @@ class PythonParser(BaseParser):
|
|
|
244
252
|
cleaned = re.sub(r'^["\']|["\']$', "", cleaned.strip())
|
|
245
253
|
return cleaned.strip()
|
|
246
254
|
|
|
247
|
-
async def _fallback_parse(self, content: str, file_path: Path) ->
|
|
255
|
+
async def _fallback_parse(self, content: str, file_path: Path) -> list[CodeChunk]:
|
|
248
256
|
"""Fallback parsing using regex when Tree-sitter is not available."""
|
|
249
257
|
chunks = []
|
|
250
258
|
lines = self._split_into_lines(content)
|
|
@@ -259,13 +267,13 @@ class PythonParser(BaseParser):
|
|
|
259
267
|
for match in import_pattern.finditer(content):
|
|
260
268
|
import_line = match.group(0).strip()
|
|
261
269
|
imports.append(import_line)
|
|
262
|
-
|
|
270
|
+
|
|
263
271
|
# Find functions
|
|
264
272
|
for match in function_pattern.finditer(content):
|
|
265
273
|
function_name = match.group(1)
|
|
266
274
|
# Find the actual line with 'def' by looking for it in the match
|
|
267
275
|
match_text = match.group(0)
|
|
268
|
-
def_pos_in_match = match_text.find(
|
|
276
|
+
def_pos_in_match = match_text.find("def")
|
|
269
277
|
actual_def_pos = match.start() + def_pos_in_match
|
|
270
278
|
start_line = content[:actual_def_pos].count("\n") + 1
|
|
271
279
|
|
|
@@ -289,13 +297,13 @@ class PythonParser(BaseParser):
|
|
|
289
297
|
)
|
|
290
298
|
chunk.imports = imports # Add imports to chunk
|
|
291
299
|
chunks.append(chunk)
|
|
292
|
-
|
|
300
|
+
|
|
293
301
|
# Find classes
|
|
294
302
|
for match in class_pattern.finditer(content):
|
|
295
303
|
class_name = match.group(1)
|
|
296
304
|
# Find the actual line with 'class' by looking for it in the match
|
|
297
305
|
match_text = match.group(0)
|
|
298
|
-
class_pos_in_match = match_text.find(
|
|
306
|
+
class_pos_in_match = match_text.find("class")
|
|
299
307
|
actual_class_pos = match.start() + class_pos_in_match
|
|
300
308
|
start_line = content[:actual_class_pos].count("\n") + 1
|
|
301
309
|
|
|
@@ -319,7 +327,7 @@ class PythonParser(BaseParser):
|
|
|
319
327
|
)
|
|
320
328
|
chunk.imports = imports # Add imports to chunk
|
|
321
329
|
chunks.append(chunk)
|
|
322
|
-
|
|
330
|
+
|
|
323
331
|
# If no functions or classes found, create chunks for the whole file
|
|
324
332
|
if not chunks:
|
|
325
333
|
chunks.append(
|
|
@@ -331,10 +339,10 @@ class PythonParser(BaseParser):
|
|
|
331
339
|
chunk_type="module",
|
|
332
340
|
)
|
|
333
341
|
)
|
|
334
|
-
|
|
342
|
+
|
|
335
343
|
return chunks
|
|
336
344
|
|
|
337
|
-
def _find_function_end(self, lines:
|
|
345
|
+
def _find_function_end(self, lines: list[str], start_line: int) -> int:
|
|
338
346
|
"""Find the end line of a function using indentation."""
|
|
339
347
|
if start_line > len(lines):
|
|
340
348
|
return len(lines)
|
|
@@ -359,11 +367,11 @@ class PythonParser(BaseParser):
|
|
|
359
367
|
# If we reach here, the function goes to the end of the file
|
|
360
368
|
return len(lines)
|
|
361
369
|
|
|
362
|
-
def _find_class_end(self, lines:
|
|
370
|
+
def _find_class_end(self, lines: list[str], start_line: int) -> int:
|
|
363
371
|
"""Find the end line of a class using indentation."""
|
|
364
372
|
return self._find_function_end(lines, start_line)
|
|
365
373
|
|
|
366
|
-
def _extract_docstring_regex(self, content: str) ->
|
|
374
|
+
def _extract_docstring_regex(self, content: str) -> str | None:
|
|
367
375
|
"""Extract docstring using regex patterns."""
|
|
368
376
|
# Look for triple-quoted strings at the beginning of the content
|
|
369
377
|
# after the def/class line
|
|
@@ -397,11 +405,11 @@ class PythonParser(BaseParser):
|
|
|
397
405
|
return " ".join(docstring_lines).strip()
|
|
398
406
|
|
|
399
407
|
# If we hit non-docstring code, stop looking
|
|
400
|
-
if line and not line.startswith(
|
|
408
|
+
if line and not line.startswith("#"):
|
|
401
409
|
break
|
|
402
410
|
|
|
403
411
|
return None
|
|
404
412
|
|
|
405
|
-
def get_supported_extensions(self) ->
|
|
413
|
+
def get_supported_extensions(self) -> list[str]:
|
|
406
414
|
"""Get supported file extensions."""
|
|
407
415
|
return [".py", ".pyw"]
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
"""Parser registry for MCP Vector Search."""
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Dict, List, Optional
|
|
5
4
|
|
|
6
5
|
from loguru import logger
|
|
7
6
|
|
|
8
7
|
from .base import BaseParser, FallbackParser
|
|
9
|
-
from .python import PythonParser
|
|
10
8
|
from .javascript import JavaScriptParser, TypeScriptParser
|
|
9
|
+
from .python import PythonParser
|
|
10
|
+
from .text import TextParser
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class ParserRegistry:
|
|
@@ -15,10 +15,16 @@ class ParserRegistry:
|
|
|
15
15
|
|
|
16
16
|
def __init__(self) -> None:
|
|
17
17
|
"""Initialize parser registry."""
|
|
18
|
-
self._parsers:
|
|
19
|
-
self._extension_map:
|
|
18
|
+
self._parsers: dict[str, BaseParser] = {}
|
|
19
|
+
self._extension_map: dict[str, str] = {}
|
|
20
20
|
self._fallback_parser = FallbackParser()
|
|
21
|
-
self.
|
|
21
|
+
self._initialized = False
|
|
22
|
+
|
|
23
|
+
def _ensure_initialized(self) -> None:
|
|
24
|
+
"""Ensure parsers are initialized (lazy initialization)."""
|
|
25
|
+
if not self._initialized:
|
|
26
|
+
self._register_default_parsers()
|
|
27
|
+
self._initialized = True
|
|
22
28
|
|
|
23
29
|
def _register_default_parsers(self) -> None:
|
|
24
30
|
"""Register default parsers for supported languages."""
|
|
@@ -33,72 +39,79 @@ class ParserRegistry:
|
|
|
33
39
|
# Register TypeScript parser
|
|
34
40
|
typescript_parser = TypeScriptParser()
|
|
35
41
|
self.register_parser("typescript", typescript_parser)
|
|
42
|
+
|
|
43
|
+
# Register Text parser for .txt files
|
|
44
|
+
text_parser = TextParser()
|
|
45
|
+
self.register_parser("text", text_parser)
|
|
36
46
|
|
|
37
47
|
def register_parser(self, language: str, parser: BaseParser) -> None:
|
|
38
48
|
"""Register a parser for a specific language.
|
|
39
|
-
|
|
49
|
+
|
|
40
50
|
Args:
|
|
41
51
|
language: Language name
|
|
42
52
|
parser: Parser instance
|
|
43
53
|
"""
|
|
44
54
|
self._parsers[language] = parser
|
|
45
|
-
|
|
55
|
+
|
|
46
56
|
# Map file extensions to language
|
|
47
57
|
for ext in parser.get_supported_extensions():
|
|
48
58
|
if ext != "*": # Skip fallback marker
|
|
49
59
|
self._extension_map[ext.lower()] = language
|
|
50
|
-
|
|
60
|
+
|
|
51
61
|
logger.debug(f"Registered parser for {language}: {parser.__class__.__name__}")
|
|
52
62
|
|
|
53
63
|
def get_parser(self, file_extension: str) -> BaseParser:
|
|
54
64
|
"""Get parser for a file extension.
|
|
55
|
-
|
|
65
|
+
|
|
56
66
|
Args:
|
|
57
67
|
file_extension: File extension (including dot)
|
|
58
|
-
|
|
68
|
+
|
|
59
69
|
Returns:
|
|
60
70
|
Parser instance (fallback parser if no specific parser found)
|
|
61
71
|
"""
|
|
72
|
+
self._ensure_initialized()
|
|
62
73
|
language = self._extension_map.get(file_extension.lower())
|
|
63
74
|
if language and language in self._parsers:
|
|
64
75
|
return self._parsers[language]
|
|
65
|
-
|
|
76
|
+
|
|
66
77
|
# Return fallback parser for unsupported extensions
|
|
67
78
|
return self._fallback_parser
|
|
68
79
|
|
|
69
80
|
def get_parser_for_file(self, file_path: Path) -> BaseParser:
|
|
70
81
|
"""Get parser for a specific file.
|
|
71
|
-
|
|
82
|
+
|
|
72
83
|
Args:
|
|
73
84
|
file_path: Path to the file
|
|
74
|
-
|
|
85
|
+
|
|
75
86
|
Returns:
|
|
76
87
|
Parser instance
|
|
77
88
|
"""
|
|
78
89
|
return self.get_parser(file_path.suffix)
|
|
79
90
|
|
|
80
|
-
def get_supported_languages(self) ->
|
|
91
|
+
def get_supported_languages(self) -> list[str]:
|
|
81
92
|
"""Get list of supported languages.
|
|
82
|
-
|
|
93
|
+
|
|
83
94
|
Returns:
|
|
84
95
|
List of language names
|
|
85
96
|
"""
|
|
97
|
+
self._ensure_initialized()
|
|
86
98
|
return list(self._parsers.keys())
|
|
87
99
|
|
|
88
|
-
def get_supported_extensions(self) ->
|
|
100
|
+
def get_supported_extensions(self) -> list[str]:
|
|
89
101
|
"""Get list of supported file extensions.
|
|
90
|
-
|
|
102
|
+
|
|
91
103
|
Returns:
|
|
92
104
|
List of file extensions
|
|
93
105
|
"""
|
|
106
|
+
self._ensure_initialized()
|
|
94
107
|
return list(self._extension_map.keys())
|
|
95
108
|
|
|
96
109
|
def is_supported(self, file_extension: str) -> bool:
|
|
97
110
|
"""Check if a file extension is supported.
|
|
98
|
-
|
|
111
|
+
|
|
99
112
|
Args:
|
|
100
113
|
file_extension: File extension to check
|
|
101
|
-
|
|
114
|
+
|
|
102
115
|
Returns:
|
|
103
116
|
True if supported (always True due to fallback parser)
|
|
104
117
|
"""
|
|
@@ -106,37 +119,39 @@ class ParserRegistry:
|
|
|
106
119
|
|
|
107
120
|
def get_language_for_extension(self, file_extension: str) -> str:
|
|
108
121
|
"""Get language name for a file extension.
|
|
109
|
-
|
|
122
|
+
|
|
110
123
|
Args:
|
|
111
124
|
file_extension: File extension
|
|
112
|
-
|
|
125
|
+
|
|
113
126
|
Returns:
|
|
114
127
|
Language name (or "text" for unsupported extensions)
|
|
115
128
|
"""
|
|
129
|
+
self._ensure_initialized()
|
|
116
130
|
return self._extension_map.get(file_extension.lower(), "text")
|
|
117
131
|
|
|
118
|
-
def get_parser_info(self) ->
|
|
132
|
+
def get_parser_info(self) -> dict[str, dict[str, any]]:
|
|
119
133
|
"""Get information about registered parsers.
|
|
120
|
-
|
|
134
|
+
|
|
121
135
|
Returns:
|
|
122
136
|
Dictionary with parser information
|
|
123
137
|
"""
|
|
138
|
+
self._ensure_initialized()
|
|
124
139
|
info = {}
|
|
125
|
-
|
|
140
|
+
|
|
126
141
|
for language, parser in self._parsers.items():
|
|
127
142
|
info[language] = {
|
|
128
143
|
"class": parser.__class__.__name__,
|
|
129
144
|
"extensions": parser.get_supported_extensions(),
|
|
130
145
|
"language": parser.language,
|
|
131
146
|
}
|
|
132
|
-
|
|
147
|
+
|
|
133
148
|
# Add fallback parser info
|
|
134
149
|
info["fallback"] = {
|
|
135
150
|
"class": self._fallback_parser.__class__.__name__,
|
|
136
151
|
"extensions": ["*"],
|
|
137
152
|
"language": self._fallback_parser.language,
|
|
138
153
|
}
|
|
139
|
-
|
|
154
|
+
|
|
140
155
|
return info
|
|
141
156
|
|
|
142
157
|
|
|
@@ -146,7 +161,7 @@ _registry = ParserRegistry()
|
|
|
146
161
|
|
|
147
162
|
def get_parser_registry() -> ParserRegistry:
|
|
148
163
|
"""Get the global parser registry instance.
|
|
149
|
-
|
|
164
|
+
|
|
150
165
|
Returns:
|
|
151
166
|
Parser registry instance
|
|
152
167
|
"""
|
|
@@ -155,7 +170,7 @@ def get_parser_registry() -> ParserRegistry:
|
|
|
155
170
|
|
|
156
171
|
def register_parser(language: str, parser: BaseParser) -> None:
|
|
157
172
|
"""Register a parser in the global registry.
|
|
158
|
-
|
|
173
|
+
|
|
159
174
|
Args:
|
|
160
175
|
language: Language name
|
|
161
176
|
parser: Parser instance
|
|
@@ -165,10 +180,10 @@ def register_parser(language: str, parser: BaseParser) -> None:
|
|
|
165
180
|
|
|
166
181
|
def get_parser(file_extension: str) -> BaseParser:
|
|
167
182
|
"""Get parser for a file extension from the global registry.
|
|
168
|
-
|
|
183
|
+
|
|
169
184
|
Args:
|
|
170
185
|
file_extension: File extension
|
|
171
|
-
|
|
186
|
+
|
|
172
187
|
Returns:
|
|
173
188
|
Parser instance
|
|
174
189
|
"""
|
|
@@ -177,10 +192,10 @@ def get_parser(file_extension: str) -> BaseParser:
|
|
|
177
192
|
|
|
178
193
|
def get_parser_for_file(file_path: Path) -> BaseParser:
|
|
179
194
|
"""Get parser for a file from the global registry.
|
|
180
|
-
|
|
195
|
+
|
|
181
196
|
Args:
|
|
182
197
|
file_path: File path
|
|
183
|
-
|
|
198
|
+
|
|
184
199
|
Returns:
|
|
185
200
|
Parser instance
|
|
186
201
|
"""
|