mcp-vector-search 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +9 -0
- mcp_vector_search/cli/__init__.py +1 -0
- mcp_vector_search/cli/commands/__init__.py +1 -0
- mcp_vector_search/cli/commands/config.py +303 -0
- mcp_vector_search/cli/commands/index.py +304 -0
- mcp_vector_search/cli/commands/init.py +212 -0
- mcp_vector_search/cli/commands/search.py +395 -0
- mcp_vector_search/cli/commands/status.py +340 -0
- mcp_vector_search/cli/commands/watch.py +288 -0
- mcp_vector_search/cli/main.py +117 -0
- mcp_vector_search/cli/output.py +242 -0
- mcp_vector_search/config/__init__.py +1 -0
- mcp_vector_search/config/defaults.py +175 -0
- mcp_vector_search/config/settings.py +108 -0
- mcp_vector_search/core/__init__.py +1 -0
- mcp_vector_search/core/database.py +431 -0
- mcp_vector_search/core/embeddings.py +250 -0
- mcp_vector_search/core/exceptions.py +66 -0
- mcp_vector_search/core/indexer.py +310 -0
- mcp_vector_search/core/models.py +174 -0
- mcp_vector_search/core/project.py +304 -0
- mcp_vector_search/core/search.py +324 -0
- mcp_vector_search/core/watcher.py +320 -0
- mcp_vector_search/mcp/__init__.py +1 -0
- mcp_vector_search/parsers/__init__.py +1 -0
- mcp_vector_search/parsers/base.py +180 -0
- mcp_vector_search/parsers/javascript.py +238 -0
- mcp_vector_search/parsers/python.py +407 -0
- mcp_vector_search/parsers/registry.py +187 -0
- mcp_vector_search/py.typed +1 -0
- mcp_vector_search-0.0.3.dist-info/METADATA +333 -0
- mcp_vector_search-0.0.3.dist-info/RECORD +35 -0
- mcp_vector_search-0.0.3.dist-info/WHEEL +4 -0
- mcp_vector_search-0.0.3.dist-info/entry_points.txt +2 -0
- mcp_vector_search-0.0.3.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
"""File system watcher for incremental indexing."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Optional, Set, Callable, Awaitable, Union
|
|
6
|
+
from threading import Thread
|
|
7
|
+
import time
|
|
8
|
+
from concurrent.futures import Future
|
|
9
|
+
|
|
10
|
+
from loguru import logger
|
|
11
|
+
from watchdog.observers import Observer
|
|
12
|
+
from watchdog.events import FileSystemEventHandler, FileSystemEvent
|
|
13
|
+
|
|
14
|
+
from ..config.settings import ProjectConfig
|
|
15
|
+
from .indexer import SemanticIndexer
|
|
16
|
+
from .database import ChromaVectorDatabase
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CodeFileHandler(FileSystemEventHandler):
|
|
20
|
+
"""Handler for code file changes."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
file_extensions: List[str],
|
|
25
|
+
ignore_patterns: List[str],
|
|
26
|
+
callback: Callable[[str, str], Awaitable[None]],
|
|
27
|
+
loop: asyncio.AbstractEventLoop,
|
|
28
|
+
debounce_delay: float = 1.0,
|
|
29
|
+
):
|
|
30
|
+
"""Initialize file handler.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
file_extensions: List of file extensions to watch
|
|
34
|
+
ignore_patterns: List of patterns to ignore
|
|
35
|
+
callback: Async callback function for file changes
|
|
36
|
+
loop: Event loop to schedule tasks on
|
|
37
|
+
debounce_delay: Delay in seconds to debounce rapid changes
|
|
38
|
+
"""
|
|
39
|
+
super().__init__()
|
|
40
|
+
self.file_extensions = set(file_extensions)
|
|
41
|
+
self.ignore_patterns = ignore_patterns
|
|
42
|
+
self.callback = callback
|
|
43
|
+
self.loop = loop
|
|
44
|
+
self.debounce_delay = debounce_delay
|
|
45
|
+
self.pending_changes: Set[str] = set()
|
|
46
|
+
self.last_change_time: float = 0
|
|
47
|
+
self.debounce_task: Optional[Union[asyncio.Task, Future]] = None
|
|
48
|
+
|
|
49
|
+
def should_process_file(self, file_path: str) -> bool:
|
|
50
|
+
"""Check if file should be processed."""
|
|
51
|
+
path = Path(file_path)
|
|
52
|
+
|
|
53
|
+
# Check file extension
|
|
54
|
+
if path.suffix not in self.file_extensions:
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
# Check ignore patterns
|
|
58
|
+
for pattern in self.ignore_patterns:
|
|
59
|
+
if pattern in str(path):
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
def on_modified(self, event: FileSystemEvent) -> None:
|
|
65
|
+
"""Handle file modification."""
|
|
66
|
+
if not event.is_directory and self.should_process_file(event.src_path):
|
|
67
|
+
self._schedule_change(event.src_path, "modified")
|
|
68
|
+
|
|
69
|
+
def on_created(self, event: FileSystemEvent) -> None:
|
|
70
|
+
"""Handle file creation."""
|
|
71
|
+
if not event.is_directory and self.should_process_file(event.src_path):
|
|
72
|
+
self._schedule_change(event.src_path, "created")
|
|
73
|
+
|
|
74
|
+
def on_deleted(self, event: FileSystemEvent) -> None:
|
|
75
|
+
"""Handle file deletion."""
|
|
76
|
+
if not event.is_directory and self.should_process_file(event.src_path):
|
|
77
|
+
self._schedule_change(event.src_path, "deleted")
|
|
78
|
+
|
|
79
|
+
def on_moved(self, event: FileSystemEvent) -> None:
|
|
80
|
+
"""Handle file move/rename."""
|
|
81
|
+
if hasattr(event, 'dest_path'):
|
|
82
|
+
# Handle rename/move
|
|
83
|
+
if not event.is_directory:
|
|
84
|
+
if self.should_process_file(event.src_path):
|
|
85
|
+
self._schedule_change(event.src_path, "deleted")
|
|
86
|
+
if self.should_process_file(event.dest_path):
|
|
87
|
+
self._schedule_change(event.dest_path, "created")
|
|
88
|
+
|
|
89
|
+
def _schedule_change(self, file_path: str, change_type: str) -> None:
|
|
90
|
+
"""Schedule a file change for processing with debouncing."""
|
|
91
|
+
self.pending_changes.add(f"{change_type}:{file_path}")
|
|
92
|
+
self.last_change_time = time.time()
|
|
93
|
+
|
|
94
|
+
# Cancel existing debounce task
|
|
95
|
+
if self.debounce_task and not self.debounce_task.done():
|
|
96
|
+
self.debounce_task.cancel()
|
|
97
|
+
|
|
98
|
+
# Schedule new debounce task using the stored loop
|
|
99
|
+
future = asyncio.run_coroutine_threadsafe(
|
|
100
|
+
self._debounced_process(), self.loop
|
|
101
|
+
)
|
|
102
|
+
# Store the future as our task (it has a done() method)
|
|
103
|
+
self.debounce_task = future
|
|
104
|
+
|
|
105
|
+
async def _debounced_process(self) -> None:
|
|
106
|
+
"""Process pending changes after debounce delay."""
|
|
107
|
+
await asyncio.sleep(self.debounce_delay)
|
|
108
|
+
|
|
109
|
+
# Check if more changes occurred during debounce
|
|
110
|
+
if time.time() - self.last_change_time < self.debounce_delay:
|
|
111
|
+
return
|
|
112
|
+
|
|
113
|
+
# Process all pending changes
|
|
114
|
+
changes = self.pending_changes.copy()
|
|
115
|
+
self.pending_changes.clear()
|
|
116
|
+
|
|
117
|
+
for change in changes:
|
|
118
|
+
change_type, file_path = change.split(":", 1)
|
|
119
|
+
try:
|
|
120
|
+
await self.callback(file_path, change_type)
|
|
121
|
+
except Exception as e:
|
|
122
|
+
logger.error(f"Error processing file change {file_path}: {e}")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class FileWatcher:
|
|
126
|
+
"""File system watcher for incremental indexing."""
|
|
127
|
+
|
|
128
|
+
def __init__(
|
|
129
|
+
self,
|
|
130
|
+
project_root: Path,
|
|
131
|
+
config: ProjectConfig,
|
|
132
|
+
indexer: SemanticIndexer,
|
|
133
|
+
database: ChromaVectorDatabase,
|
|
134
|
+
):
|
|
135
|
+
"""Initialize file watcher.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
project_root: Root directory to watch
|
|
139
|
+
config: Project configuration
|
|
140
|
+
indexer: Semantic indexer instance
|
|
141
|
+
database: Vector database instance
|
|
142
|
+
"""
|
|
143
|
+
self.project_root = project_root
|
|
144
|
+
self.config = config
|
|
145
|
+
self.indexer = indexer
|
|
146
|
+
self.database = database
|
|
147
|
+
self.observer: Optional[Observer] = None
|
|
148
|
+
self.handler: Optional[CodeFileHandler] = None
|
|
149
|
+
self.is_running = False
|
|
150
|
+
|
|
151
|
+
async def start(self) -> None:
|
|
152
|
+
"""Start watching for file changes."""
|
|
153
|
+
if self.is_running:
|
|
154
|
+
logger.warning("File watcher is already running")
|
|
155
|
+
return
|
|
156
|
+
|
|
157
|
+
logger.info(f"Starting file watcher for {self.project_root}")
|
|
158
|
+
|
|
159
|
+
# Create handler
|
|
160
|
+
loop = asyncio.get_running_loop()
|
|
161
|
+
self.handler = CodeFileHandler(
|
|
162
|
+
file_extensions=self.config.file_extensions,
|
|
163
|
+
ignore_patterns=self._get_ignore_patterns(),
|
|
164
|
+
callback=self._handle_file_change,
|
|
165
|
+
loop=loop,
|
|
166
|
+
debounce_delay=1.0,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Create observer
|
|
170
|
+
self.observer = Observer()
|
|
171
|
+
self.observer.schedule(
|
|
172
|
+
self.handler,
|
|
173
|
+
str(self.project_root),
|
|
174
|
+
recursive=True
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Start observer in a separate thread
|
|
178
|
+
self.observer.start()
|
|
179
|
+
self.is_running = True
|
|
180
|
+
|
|
181
|
+
logger.info("File watcher started successfully")
|
|
182
|
+
|
|
183
|
+
async def stop(self) -> None:
|
|
184
|
+
"""Stop watching for file changes."""
|
|
185
|
+
if not self.is_running:
|
|
186
|
+
return
|
|
187
|
+
|
|
188
|
+
logger.info("Stopping file watcher")
|
|
189
|
+
|
|
190
|
+
if self.observer:
|
|
191
|
+
self.observer.stop()
|
|
192
|
+
self.observer.join()
|
|
193
|
+
self.observer = None
|
|
194
|
+
|
|
195
|
+
self.handler = None
|
|
196
|
+
self.is_running = False
|
|
197
|
+
|
|
198
|
+
logger.info("File watcher stopped")
|
|
199
|
+
|
|
200
|
+
def _get_ignore_patterns(self) -> List[str]:
|
|
201
|
+
"""Get patterns to ignore during watching."""
|
|
202
|
+
default_patterns = [
|
|
203
|
+
".git", ".svn", ".hg",
|
|
204
|
+
"__pycache__", ".pytest_cache",
|
|
205
|
+
"node_modules", ".venv", "venv",
|
|
206
|
+
".DS_Store", "Thumbs.db",
|
|
207
|
+
".idea", ".vscode",
|
|
208
|
+
"build", "dist", "target",
|
|
209
|
+
".mcp-vector-search", # Ignore our own index directory
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
# Add any custom ignore patterns from config
|
|
213
|
+
# TODO: Add custom ignore patterns to config
|
|
214
|
+
return default_patterns
|
|
215
|
+
|
|
216
|
+
async def _handle_file_change(self, file_path: str, change_type: str) -> None:
|
|
217
|
+
"""Handle a file change event.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
file_path: Path to the changed file
|
|
221
|
+
change_type: Type of change (created, modified, deleted)
|
|
222
|
+
"""
|
|
223
|
+
path = Path(file_path)
|
|
224
|
+
logger.debug(f"Processing file change: {change_type} {path}")
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
if change_type == "deleted":
|
|
228
|
+
# Remove chunks for deleted file
|
|
229
|
+
await self._remove_file_chunks(path)
|
|
230
|
+
elif change_type in ("created", "modified"):
|
|
231
|
+
# Re-index the file
|
|
232
|
+
await self._reindex_file(path)
|
|
233
|
+
|
|
234
|
+
logger.info(f"Processed {change_type} for {path.name}")
|
|
235
|
+
|
|
236
|
+
except Exception as e:
|
|
237
|
+
logger.error(f"Failed to process {change_type} for {path}: {e}")
|
|
238
|
+
|
|
239
|
+
async def _remove_file_chunks(self, file_path: Path) -> None:
|
|
240
|
+
"""Remove all chunks for a deleted file."""
|
|
241
|
+
# Get relative path for consistent IDs
|
|
242
|
+
try:
|
|
243
|
+
relative_path = file_path.relative_to(self.project_root)
|
|
244
|
+
except ValueError:
|
|
245
|
+
relative_path = file_path
|
|
246
|
+
|
|
247
|
+
# Remove chunks from database
|
|
248
|
+
await self.database.remove_file_chunks(str(relative_path))
|
|
249
|
+
logger.debug(f"Removed chunks for deleted file: {relative_path}")
|
|
250
|
+
|
|
251
|
+
async def _reindex_file(self, file_path: Path) -> None:
|
|
252
|
+
"""Re-index a single file."""
|
|
253
|
+
if not file_path.exists():
|
|
254
|
+
logger.warning(f"File no longer exists: {file_path}")
|
|
255
|
+
return
|
|
256
|
+
|
|
257
|
+
# Remove existing chunks first
|
|
258
|
+
await self._remove_file_chunks(file_path)
|
|
259
|
+
|
|
260
|
+
# Index the file
|
|
261
|
+
chunks_indexed = await self.indexer.index_file(file_path)
|
|
262
|
+
logger.debug(f"Re-indexed {file_path.name}: {chunks_indexed} chunks")
|
|
263
|
+
|
|
264
|
+
async def __aenter__(self):
|
|
265
|
+
"""Async context manager entry."""
|
|
266
|
+
await self.start()
|
|
267
|
+
return self
|
|
268
|
+
|
|
269
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
270
|
+
"""Async context manager exit."""
|
|
271
|
+
await self.stop()
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class WatcherManager:
|
|
275
|
+
"""Manager for file watchers across multiple projects."""
|
|
276
|
+
|
|
277
|
+
def __init__(self):
|
|
278
|
+
"""Initialize watcher manager."""
|
|
279
|
+
self.watchers: dict[str, FileWatcher] = {}
|
|
280
|
+
|
|
281
|
+
async def start_watcher(
|
|
282
|
+
self,
|
|
283
|
+
project_root: Path,
|
|
284
|
+
config: ProjectConfig,
|
|
285
|
+
indexer: SemanticIndexer,
|
|
286
|
+
database: ChromaVectorDatabase,
|
|
287
|
+
) -> FileWatcher:
|
|
288
|
+
"""Start a file watcher for a project."""
|
|
289
|
+
project_key = str(project_root)
|
|
290
|
+
|
|
291
|
+
if project_key in self.watchers:
|
|
292
|
+
logger.warning(f"Watcher already exists for {project_root}")
|
|
293
|
+
return self.watchers[project_key]
|
|
294
|
+
|
|
295
|
+
watcher = FileWatcher(project_root, config, indexer, database)
|
|
296
|
+
await watcher.start()
|
|
297
|
+
|
|
298
|
+
self.watchers[project_key] = watcher
|
|
299
|
+
return watcher
|
|
300
|
+
|
|
301
|
+
async def stop_watcher(self, project_root: Path) -> None:
|
|
302
|
+
"""Stop a file watcher for a project."""
|
|
303
|
+
project_key = str(project_root)
|
|
304
|
+
|
|
305
|
+
if project_key not in self.watchers:
|
|
306
|
+
logger.warning(f"No watcher found for {project_root}")
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
watcher = self.watchers.pop(project_key)
|
|
310
|
+
await watcher.stop()
|
|
311
|
+
|
|
312
|
+
async def stop_all(self) -> None:
|
|
313
|
+
"""Stop all file watchers."""
|
|
314
|
+
for watcher in list(self.watchers.values()):
|
|
315
|
+
await watcher.stop()
|
|
316
|
+
self.watchers.clear()
|
|
317
|
+
|
|
318
|
+
def is_watching(self, project_root: Path) -> bool:
|
|
319
|
+
"""Check if a project is being watched."""
|
|
320
|
+
return str(project_root) in self.watchers
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""MCP server integration for MCP Vector Search."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Language parsers for MCP Vector Search."""
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Base parser interface for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
from ..core.models import CodeChunk
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BaseParser(ABC):
|
|
11
|
+
"""Abstract base class for language parsers."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, language: str) -> None:
|
|
14
|
+
"""Initialize parser for a specific language.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
language: Programming language name
|
|
18
|
+
"""
|
|
19
|
+
self.language = language
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
async def parse_file(self, file_path: Path) -> List[CodeChunk]:
|
|
23
|
+
"""Parse a file and extract code chunks.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
file_path: Path to the file to parse
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
List of code chunks extracted from the file
|
|
30
|
+
"""
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
async def parse_content(self, content: str, file_path: Path) -> List[CodeChunk]:
|
|
35
|
+
"""Parse content and extract code chunks.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
content: File content to parse
|
|
39
|
+
file_path: Path to the source file (for metadata)
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List of code chunks extracted from the content
|
|
43
|
+
"""
|
|
44
|
+
...
|
|
45
|
+
|
|
46
|
+
def supports_file(self, file_path: Path) -> bool:
|
|
47
|
+
"""Check if this parser supports the given file.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
file_path: Path to check
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
True if this parser can handle the file
|
|
54
|
+
"""
|
|
55
|
+
return file_path.suffix.lower() in self.get_supported_extensions()
|
|
56
|
+
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def get_supported_extensions(self) -> List[str]:
|
|
59
|
+
"""Get list of file extensions supported by this parser.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
List of file extensions (including the dot)
|
|
63
|
+
"""
|
|
64
|
+
...
|
|
65
|
+
|
|
66
|
+
def _create_chunk(
|
|
67
|
+
self,
|
|
68
|
+
content: str,
|
|
69
|
+
file_path: Path,
|
|
70
|
+
start_line: int,
|
|
71
|
+
end_line: int,
|
|
72
|
+
chunk_type: str = "code",
|
|
73
|
+
function_name: Optional[str] = None,
|
|
74
|
+
class_name: Optional[str] = None,
|
|
75
|
+
docstring: Optional[str] = None,
|
|
76
|
+
) -> CodeChunk:
|
|
77
|
+
"""Create a code chunk with metadata.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
content: Code content
|
|
81
|
+
file_path: Source file path
|
|
82
|
+
start_line: Starting line number (1-based)
|
|
83
|
+
end_line: Ending line number (1-based)
|
|
84
|
+
chunk_type: Type of chunk (code, function, class, etc.)
|
|
85
|
+
function_name: Function name if applicable
|
|
86
|
+
class_name: Class name if applicable
|
|
87
|
+
docstring: Docstring if applicable
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
CodeChunk instance
|
|
91
|
+
"""
|
|
92
|
+
return CodeChunk(
|
|
93
|
+
content=content.strip(),
|
|
94
|
+
file_path=file_path,
|
|
95
|
+
start_line=start_line,
|
|
96
|
+
end_line=end_line,
|
|
97
|
+
language=self.language,
|
|
98
|
+
chunk_type=chunk_type,
|
|
99
|
+
function_name=function_name,
|
|
100
|
+
class_name=class_name,
|
|
101
|
+
docstring=docstring,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def _split_into_lines(self, content: str) -> List[str]:
|
|
105
|
+
"""Split content into lines, preserving line endings.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
content: Content to split
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
List of lines
|
|
112
|
+
"""
|
|
113
|
+
return content.splitlines(keepends=True)
|
|
114
|
+
|
|
115
|
+
def _get_line_range(self, lines: List[str], start_line: int, end_line: int) -> str:
|
|
116
|
+
"""Extract a range of lines from content.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
lines: List of lines
|
|
120
|
+
start_line: Starting line number (1-based)
|
|
121
|
+
end_line: Ending line number (1-based)
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
Content for the specified line range
|
|
125
|
+
"""
|
|
126
|
+
# Convert to 0-based indexing
|
|
127
|
+
start_idx = max(0, start_line - 1)
|
|
128
|
+
end_idx = min(len(lines), end_line)
|
|
129
|
+
|
|
130
|
+
return "".join(lines[start_idx:end_idx])
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class FallbackParser(BaseParser):
|
|
134
|
+
"""Fallback parser for unsupported languages using simple text chunking."""
|
|
135
|
+
|
|
136
|
+
def __init__(self, language: str = "text") -> None:
|
|
137
|
+
"""Initialize fallback parser."""
|
|
138
|
+
super().__init__(language)
|
|
139
|
+
|
|
140
|
+
async def parse_file(self, file_path: Path) -> List[CodeChunk]:
|
|
141
|
+
"""Parse file using simple text chunking."""
|
|
142
|
+
try:
|
|
143
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
144
|
+
content = f.read()
|
|
145
|
+
return await self.parse_content(content, file_path)
|
|
146
|
+
except Exception:
|
|
147
|
+
# Return empty list if file can't be read
|
|
148
|
+
return []
|
|
149
|
+
|
|
150
|
+
async def parse_content(self, content: str, file_path: Path) -> List[CodeChunk]:
|
|
151
|
+
"""Parse content using simple text chunking."""
|
|
152
|
+
if not content.strip():
|
|
153
|
+
return []
|
|
154
|
+
|
|
155
|
+
lines = self._split_into_lines(content)
|
|
156
|
+
chunks = []
|
|
157
|
+
|
|
158
|
+
# Simple chunking: split into chunks of ~50 lines
|
|
159
|
+
chunk_size = 50
|
|
160
|
+
for i in range(0, len(lines), chunk_size):
|
|
161
|
+
start_line = i + 1
|
|
162
|
+
end_line = min(i + chunk_size, len(lines))
|
|
163
|
+
|
|
164
|
+
chunk_content = self._get_line_range(lines, start_line, end_line)
|
|
165
|
+
|
|
166
|
+
if chunk_content.strip():
|
|
167
|
+
chunk = self._create_chunk(
|
|
168
|
+
content=chunk_content,
|
|
169
|
+
file_path=file_path,
|
|
170
|
+
start_line=start_line,
|
|
171
|
+
end_line=end_line,
|
|
172
|
+
chunk_type="text",
|
|
173
|
+
)
|
|
174
|
+
chunks.append(chunk)
|
|
175
|
+
|
|
176
|
+
return chunks
|
|
177
|
+
|
|
178
|
+
def get_supported_extensions(self) -> List[str]:
|
|
179
|
+
"""Fallback parser supports all extensions."""
|
|
180
|
+
return ["*"] # Special marker for "all extensions"
|