mcp-code-indexer 4.0.2__py3-none-any.whl → 4.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/database/models.py +125 -1
- mcp_code_indexer/main.py +60 -0
- mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
- mcp_code_indexer/server/mcp_server.py +3 -0
- mcp_code_indexer/vector_mode/__init__.py +36 -0
- mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
- mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
- mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
- mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
- mcp_code_indexer/vector_mode/config.py +167 -0
- mcp_code_indexer/vector_mode/daemon.py +335 -0
- mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
- mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
- mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
- mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
- mcp_code_indexer/vector_mode/providers/__init__.py +17 -0
- mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +217 -0
- mcp_code_indexer/vector_mode/providers/voyage_client.py +119 -0
- mcp_code_indexer/vector_mode/security/__init__.py +11 -0
- mcp_code_indexer/vector_mode/security/patterns.py +297 -0
- mcp_code_indexer/vector_mode/security/redactor.py +368 -0
- {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/METADATA +66 -5
- {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/RECORD +26 -8
- {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/LICENSE +0 -0
- {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vector Mode Daemon.
|
|
3
|
+
|
|
4
|
+
Runs as a background process to monitor file changes and maintain vector indexes.
|
|
5
|
+
Handles embedding generation, change detection, and vector database synchronization.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
import signal
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional, Set
|
|
14
|
+
import json
|
|
15
|
+
import time
|
|
16
|
+
|
|
17
|
+
from ..database.database import DatabaseManager
|
|
18
|
+
from .config import VectorConfig, load_vector_config
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
class VectorDaemon:
|
|
23
|
+
"""
|
|
24
|
+
Background daemon for vector mode operations.
|
|
25
|
+
|
|
26
|
+
Monitors file changes, generates embeddings, and maintains vector indexes
|
|
27
|
+
for all projects with vector mode enabled.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
config: VectorConfig,
|
|
33
|
+
db_manager: DatabaseManager,
|
|
34
|
+
cache_dir: Path,
|
|
35
|
+
):
|
|
36
|
+
"""Initialize vector daemon."""
|
|
37
|
+
self.config = config
|
|
38
|
+
self.db_manager = db_manager
|
|
39
|
+
self.cache_dir = cache_dir
|
|
40
|
+
self.is_running = False
|
|
41
|
+
self.shutdown_requested = False
|
|
42
|
+
|
|
43
|
+
# Process tracking
|
|
44
|
+
self.monitored_projects: Set[str] = set()
|
|
45
|
+
self.processing_queue: asyncio.Queue = asyncio.Queue(maxsize=config.max_queue_size)
|
|
46
|
+
self.workers: list[asyncio.Task] = []
|
|
47
|
+
|
|
48
|
+
# Statistics
|
|
49
|
+
self.stats = {
|
|
50
|
+
"start_time": time.time(),
|
|
51
|
+
"files_processed": 0,
|
|
52
|
+
"embeddings_generated": 0,
|
|
53
|
+
"errors_count": 0,
|
|
54
|
+
"last_activity": time.time(),
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Setup signal handlers
|
|
58
|
+
self._setup_signal_handlers()
|
|
59
|
+
|
|
60
|
+
def _setup_signal_handlers(self) -> None:
|
|
61
|
+
"""Setup signal handlers for graceful shutdown."""
|
|
62
|
+
try:
|
|
63
|
+
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
64
|
+
signal.signal(signal.SIGINT, self._signal_handler)
|
|
65
|
+
if hasattr(signal, 'SIGHUP'):
|
|
66
|
+
signal.signal(signal.SIGHUP, self._signal_handler)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.warning(f"Could not setup signal handlers: {e}")
|
|
69
|
+
|
|
70
|
+
def _signal_handler(self, signum: int, frame) -> None:
|
|
71
|
+
"""Handle shutdown signals."""
|
|
72
|
+
logger.info(f"Received signal {signum}, initiating graceful shutdown")
|
|
73
|
+
self.shutdown_requested = True
|
|
74
|
+
|
|
75
|
+
async def start(self) -> None:
|
|
76
|
+
"""Start the vector daemon."""
|
|
77
|
+
if self.is_running:
|
|
78
|
+
logger.warning("Daemon is already running")
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
self.is_running = True
|
|
82
|
+
logger.info(
|
|
83
|
+
"Starting vector daemon",
|
|
84
|
+
extra={
|
|
85
|
+
"structured_data": {
|
|
86
|
+
"config": {
|
|
87
|
+
"worker_count": self.config.worker_count,
|
|
88
|
+
"batch_size": self.config.batch_size,
|
|
89
|
+
"poll_interval": self.config.daemon_poll_interval,
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
# Start worker tasks
|
|
97
|
+
for i in range(self.config.worker_count):
|
|
98
|
+
worker = asyncio.create_task(self._worker(f"worker-{i}"))
|
|
99
|
+
self.workers.append(worker)
|
|
100
|
+
|
|
101
|
+
# Start monitoring tasks
|
|
102
|
+
monitor_task = asyncio.create_task(self._monitor_projects())
|
|
103
|
+
stats_task = asyncio.create_task(self._stats_reporter())
|
|
104
|
+
|
|
105
|
+
# Wait for shutdown signal
|
|
106
|
+
await self._run_until_shutdown()
|
|
107
|
+
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.error(f"Daemon error: {e}", exc_info=True)
|
|
110
|
+
self.stats["errors_count"] += 1
|
|
111
|
+
finally:
|
|
112
|
+
await self._cleanup()
|
|
113
|
+
|
|
114
|
+
async def _run_until_shutdown(self) -> None:
|
|
115
|
+
"""Run daemon until shutdown is requested."""
|
|
116
|
+
while not self.shutdown_requested:
|
|
117
|
+
try:
|
|
118
|
+
await asyncio.sleep(1.0)
|
|
119
|
+
except asyncio.CancelledError:
|
|
120
|
+
break
|
|
121
|
+
|
|
122
|
+
async def _monitor_projects(self) -> None:
|
|
123
|
+
"""Monitor projects for vector indexing requirements."""
|
|
124
|
+
logger.info("Starting project monitoring")
|
|
125
|
+
|
|
126
|
+
while not self.shutdown_requested:
|
|
127
|
+
try:
|
|
128
|
+
# Get all projects that need vector indexing
|
|
129
|
+
projects = await self.db_manager.get_all_projects()
|
|
130
|
+
|
|
131
|
+
for project in projects:
|
|
132
|
+
if project.name not in self.monitored_projects:
|
|
133
|
+
logger.info(f"Adding project to monitoring: {project.name}")
|
|
134
|
+
self.monitored_projects.add(project.name)
|
|
135
|
+
|
|
136
|
+
# Queue initial indexing task
|
|
137
|
+
await self._queue_project_scan(project.name, project.folder_path)
|
|
138
|
+
|
|
139
|
+
await asyncio.sleep(self.config.daemon_poll_interval)
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.error(f"Error in project monitoring: {e}")
|
|
143
|
+
self.stats["errors_count"] += 1
|
|
144
|
+
await asyncio.sleep(5.0) # Back off on error
|
|
145
|
+
|
|
146
|
+
async def _queue_project_scan(self, project_name: str, folder_path: str) -> None:
|
|
147
|
+
"""Queue a project for scanning and indexing."""
|
|
148
|
+
task = {
|
|
149
|
+
"type": "scan_project",
|
|
150
|
+
"project_name": project_name,
|
|
151
|
+
"folder_path": folder_path,
|
|
152
|
+
"timestamp": time.time(),
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
await self.processing_queue.put(task)
|
|
157
|
+
logger.debug(f"Queued project scan: {project_name}")
|
|
158
|
+
except asyncio.QueueFull:
|
|
159
|
+
logger.warning(f"Processing queue full, dropping scan task for {project_name}")
|
|
160
|
+
|
|
161
|
+
async def _worker(self, worker_id: str) -> None:
|
|
162
|
+
"""Worker task to process queued items."""
|
|
163
|
+
logger.info(f"Starting worker: {worker_id}")
|
|
164
|
+
|
|
165
|
+
while not self.shutdown_requested:
|
|
166
|
+
try:
|
|
167
|
+
# Get task from queue with timeout
|
|
168
|
+
try:
|
|
169
|
+
task = await asyncio.wait_for(
|
|
170
|
+
self.processing_queue.get(),
|
|
171
|
+
timeout=5.0
|
|
172
|
+
)
|
|
173
|
+
except asyncio.TimeoutError:
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
# Process the task
|
|
177
|
+
await self._process_task(task, worker_id)
|
|
178
|
+
self.stats["last_activity"] = time.time()
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
logger.error(f"Worker {worker_id} error: {e}")
|
|
182
|
+
self.stats["errors_count"] += 1
|
|
183
|
+
await asyncio.sleep(1.0) # Brief pause on error
|
|
184
|
+
|
|
185
|
+
async def _process_task(self, task: dict, worker_id: str) -> None:
|
|
186
|
+
"""Process a queued task."""
|
|
187
|
+
task_type = task.get("type")
|
|
188
|
+
|
|
189
|
+
if task_type == "scan_project":
|
|
190
|
+
await self._process_project_scan(task, worker_id)
|
|
191
|
+
else:
|
|
192
|
+
logger.warning(f"Unknown task type: {task_type}")
|
|
193
|
+
|
|
194
|
+
async def _process_project_scan(self, task: dict, worker_id: str) -> None:
|
|
195
|
+
"""Process a project scan task."""
|
|
196
|
+
project_name = task["project_name"]
|
|
197
|
+
folder_path = task["folder_path"]
|
|
198
|
+
|
|
199
|
+
logger.debug(f"Worker {worker_id} processing project: {project_name}")
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
# Check if vector mode components are available
|
|
203
|
+
# For now, just log that we would process this project
|
|
204
|
+
logger.info(
|
|
205
|
+
f"Vector processing for project {project_name}",
|
|
206
|
+
extra={
|
|
207
|
+
"structured_data": {
|
|
208
|
+
"project_name": project_name,
|
|
209
|
+
"folder_path": folder_path,
|
|
210
|
+
"worker_id": worker_id,
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
self.stats["files_processed"] += 1
|
|
216
|
+
|
|
217
|
+
# TODO: Implement actual vector processing:
|
|
218
|
+
# 1. Scan for file changes using Merkle tree
|
|
219
|
+
# 2. Chunk modified files using AST
|
|
220
|
+
# 3. Apply secret redaction
|
|
221
|
+
# 4. Generate embeddings via Voyage
|
|
222
|
+
# 5. Store in Turbopuffer
|
|
223
|
+
# 6. Update database metadata
|
|
224
|
+
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logger.error(f"Error processing project {project_name}: {e}")
|
|
227
|
+
self.stats["errors_count"] += 1
|
|
228
|
+
|
|
229
|
+
async def _stats_reporter(self) -> None:
|
|
230
|
+
"""Periodically report daemon statistics."""
|
|
231
|
+
while not self.shutdown_requested:
|
|
232
|
+
try:
|
|
233
|
+
uptime = time.time() - self.stats["start_time"]
|
|
234
|
+
|
|
235
|
+
logger.info(
|
|
236
|
+
"Daemon statistics",
|
|
237
|
+
extra={
|
|
238
|
+
"structured_data": {
|
|
239
|
+
"uptime_seconds": uptime,
|
|
240
|
+
"monitored_projects": len(self.monitored_projects),
|
|
241
|
+
"queue_size": self.processing_queue.qsize(),
|
|
242
|
+
"files_processed": self.stats["files_processed"],
|
|
243
|
+
"embeddings_generated": self.stats["embeddings_generated"],
|
|
244
|
+
"errors_count": self.stats["errors_count"],
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
await asyncio.sleep(60.0) # Report every minute
|
|
250
|
+
|
|
251
|
+
except Exception as e:
|
|
252
|
+
logger.error(f"Error in stats reporting: {e}")
|
|
253
|
+
await asyncio.sleep(10.0)
|
|
254
|
+
|
|
255
|
+
async def _cleanup(self) -> None:
|
|
256
|
+
"""Clean up resources and shut down workers."""
|
|
257
|
+
logger.info("Starting daemon cleanup")
|
|
258
|
+
self.is_running = False
|
|
259
|
+
|
|
260
|
+
# Cancel all workers
|
|
261
|
+
for worker in self.workers:
|
|
262
|
+
worker.cancel()
|
|
263
|
+
|
|
264
|
+
# Wait for workers to finish
|
|
265
|
+
if self.workers:
|
|
266
|
+
await asyncio.gather(*self.workers, return_exceptions=True)
|
|
267
|
+
|
|
268
|
+
logger.info("Vector daemon shutdown complete")
|
|
269
|
+
|
|
270
|
+
def get_status(self) -> dict:
|
|
271
|
+
"""Get current daemon status."""
|
|
272
|
+
return {
|
|
273
|
+
"is_running": self.is_running,
|
|
274
|
+
"uptime": time.time() - self.stats["start_time"] if self.is_running else 0,
|
|
275
|
+
"monitored_projects": len(self.monitored_projects),
|
|
276
|
+
"queue_size": self.processing_queue.qsize(),
|
|
277
|
+
"stats": self.stats.copy(),
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
async def start_vector_daemon(
|
|
281
|
+
config_path: Optional[Path] = None,
|
|
282
|
+
db_path: Optional[Path] = None,
|
|
283
|
+
cache_dir: Optional[Path] = None,
|
|
284
|
+
) -> None:
|
|
285
|
+
"""Start the vector daemon process."""
|
|
286
|
+
|
|
287
|
+
# Load configuration
|
|
288
|
+
config = load_vector_config(config_path)
|
|
289
|
+
|
|
290
|
+
# Setup database
|
|
291
|
+
if db_path is None:
|
|
292
|
+
db_path = Path.home() / ".mcp-code-index" / "tracker.db"
|
|
293
|
+
if cache_dir is None:
|
|
294
|
+
cache_dir = Path.home() / ".mcp-code-index" / "cache"
|
|
295
|
+
|
|
296
|
+
db_manager = DatabaseManager(db_path)
|
|
297
|
+
await db_manager.initialize()
|
|
298
|
+
|
|
299
|
+
# Create and start daemon
|
|
300
|
+
daemon = VectorDaemon(config, db_manager, cache_dir)
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
await daemon.start()
|
|
304
|
+
finally:
|
|
305
|
+
# Clean up database connections
|
|
306
|
+
await db_manager.close_pool()
|
|
307
|
+
|
|
308
|
+
def main() -> None:
|
|
309
|
+
"""CLI entry point for vector daemon."""
|
|
310
|
+
import argparse
|
|
311
|
+
|
|
312
|
+
parser = argparse.ArgumentParser(description="MCP Code Indexer Vector Daemon")
|
|
313
|
+
parser.add_argument("--config", type=Path, help="Path to config file")
|
|
314
|
+
parser.add_argument("--db-path", type=Path, help="Path to database")
|
|
315
|
+
parser.add_argument("--cache-dir", type=Path, help="Cache directory")
|
|
316
|
+
parser.add_argument("--log-level", default="INFO", help="Logging level")
|
|
317
|
+
|
|
318
|
+
args = parser.parse_args()
|
|
319
|
+
|
|
320
|
+
# Setup logging
|
|
321
|
+
logging.basicConfig(
|
|
322
|
+
level=getattr(logging, args.log_level.upper()),
|
|
323
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
try:
|
|
327
|
+
asyncio.run(start_vector_daemon(args.config, args.db_path, args.cache_dir))
|
|
328
|
+
except KeyboardInterrupt:
|
|
329
|
+
logger.info("Daemon interrupted by user")
|
|
330
|
+
except Exception as e:
|
|
331
|
+
logger.error(f"Daemon failed: {e}", exc_info=True)
|
|
332
|
+
sys.exit(1)
|
|
333
|
+
|
|
334
|
+
if __name__ == "__main__":
|
|
335
|
+
main()
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File system monitoring for vector mode.
|
|
3
|
+
|
|
4
|
+
Provides real-time file change detection using watchdog and efficient
|
|
5
|
+
change tracking using Merkle trees.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .file_watcher import FileWatcher
|
|
9
|
+
from .merkle_tree import MerkleTree, MerkleNode
|
|
10
|
+
from .change_detector import ChangeDetector, FileChange, ChangeType
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"FileWatcher",
|
|
14
|
+
"MerkleTree",
|
|
15
|
+
"MerkleNode",
|
|
16
|
+
"ChangeDetector",
|
|
17
|
+
"FileChange",
|
|
18
|
+
"ChangeType",
|
|
19
|
+
]
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Change detection utilities for file system monitoring.
|
|
3
|
+
|
|
4
|
+
Provides high-level change detection and classification for the vector mode
|
|
5
|
+
file monitoring system.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import List, Dict, Set, Optional, NamedTuple
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
class ChangeType(str, Enum):
|
|
18
|
+
"""Types of file system changes."""
|
|
19
|
+
CREATED = "created"
|
|
20
|
+
MODIFIED = "modified"
|
|
21
|
+
DELETED = "deleted"
|
|
22
|
+
MOVED = "moved"
|
|
23
|
+
|
|
24
|
+
class FileChange(NamedTuple):
|
|
25
|
+
"""Represents a file system change."""
|
|
26
|
+
path: str
|
|
27
|
+
change_type: ChangeType
|
|
28
|
+
timestamp: datetime
|
|
29
|
+
old_path: Optional[str] = None # For moves
|
|
30
|
+
size: Optional[int] = None
|
|
31
|
+
hash: Optional[str] = None
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ChangeStats:
|
|
35
|
+
"""Statistics about detected changes."""
|
|
36
|
+
total_changes: int = 0
|
|
37
|
+
creates: int = 0
|
|
38
|
+
modifications: int = 0
|
|
39
|
+
deletions: int = 0
|
|
40
|
+
moves: int = 0
|
|
41
|
+
start_time: Optional[datetime] = None
|
|
42
|
+
last_change: Optional[datetime] = None
|
|
43
|
+
|
|
44
|
+
class ChangeDetector:
|
|
45
|
+
"""
|
|
46
|
+
High-level change detection and classification.
|
|
47
|
+
|
|
48
|
+
Processes raw file system events and provides structured change information
|
|
49
|
+
for the vector indexing pipeline.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
project_root: Path,
|
|
55
|
+
ignore_patterns: Optional[List[str]] = None,
|
|
56
|
+
debounce_interval: float = 0.1,
|
|
57
|
+
):
|
|
58
|
+
"""
|
|
59
|
+
Initialize change detector.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
project_root: Root directory to monitor
|
|
63
|
+
ignore_patterns: Patterns to ignore (glob-style)
|
|
64
|
+
debounce_interval: Minimum time between processing same file
|
|
65
|
+
"""
|
|
66
|
+
self.project_root = Path(project_root).resolve()
|
|
67
|
+
self.ignore_patterns = ignore_patterns or [
|
|
68
|
+
"*.log", "*.tmp", "*~", ".git/*", "__pycache__/*",
|
|
69
|
+
"node_modules/*", "*.pyc", "*.pyo", ".DS_Store", "Thumbs.db"
|
|
70
|
+
]
|
|
71
|
+
self.debounce_interval = debounce_interval
|
|
72
|
+
|
|
73
|
+
# Change tracking
|
|
74
|
+
self.recent_changes: List[FileChange] = []
|
|
75
|
+
self.pending_changes: Dict[str, FileChange] = {}
|
|
76
|
+
self.last_change_time: Dict[str, datetime] = {}
|
|
77
|
+
|
|
78
|
+
# Statistics
|
|
79
|
+
self.stats = ChangeStats(start_time=datetime.utcnow())
|
|
80
|
+
|
|
81
|
+
# Compile ignore patterns for performance
|
|
82
|
+
import fnmatch
|
|
83
|
+
self._compiled_patterns = [
|
|
84
|
+
fnmatch.translate(pattern) for pattern in self.ignore_patterns
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
def should_ignore_path(self, path: Path) -> bool:
|
|
88
|
+
"""Check if a path should be ignored based on patterns."""
|
|
89
|
+
try:
|
|
90
|
+
relative_path = path.relative_to(self.project_root)
|
|
91
|
+
path_str = str(relative_path)
|
|
92
|
+
|
|
93
|
+
import re
|
|
94
|
+
for pattern in self._compiled_patterns:
|
|
95
|
+
if re.match(pattern, path_str):
|
|
96
|
+
return True
|
|
97
|
+
|
|
98
|
+
return False
|
|
99
|
+
|
|
100
|
+
except ValueError:
|
|
101
|
+
# Path is not relative to project root
|
|
102
|
+
return True
|
|
103
|
+
|
|
104
|
+
def _should_debounce(self, file_path: str) -> bool:
|
|
105
|
+
"""Check if change should be debounced."""
|
|
106
|
+
now = datetime.utcnow()
|
|
107
|
+
|
|
108
|
+
if file_path in self.last_change_time:
|
|
109
|
+
elapsed = (now - self.last_change_time[file_path]).total_seconds()
|
|
110
|
+
if elapsed < self.debounce_interval:
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
self.last_change_time[file_path] = now
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
def _get_file_info(self, path: Path) -> Dict[str, Optional[int]]:
|
|
117
|
+
"""Get file information (size, etc.)."""
|
|
118
|
+
try:
|
|
119
|
+
if path.exists() and path.is_file():
|
|
120
|
+
stat = path.stat()
|
|
121
|
+
return {"size": stat.st_size}
|
|
122
|
+
else:
|
|
123
|
+
return {"size": None}
|
|
124
|
+
except (OSError, PermissionError):
|
|
125
|
+
return {"size": None}
|
|
126
|
+
|
|
127
|
+
def _classify_change(
|
|
128
|
+
self,
|
|
129
|
+
path: Path,
|
|
130
|
+
event_type: str,
|
|
131
|
+
old_path: Optional[Path] = None
|
|
132
|
+
) -> Optional[FileChange]:
|
|
133
|
+
"""Classify a file system event into a structured change."""
|
|
134
|
+
|
|
135
|
+
# Convert to relative path
|
|
136
|
+
try:
|
|
137
|
+
relative_path = str(path.relative_to(self.project_root))
|
|
138
|
+
except ValueError:
|
|
139
|
+
# Path outside project root
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
# Check if should be ignored
|
|
143
|
+
if self.should_ignore_path(path):
|
|
144
|
+
logger.debug(f"Ignoring change to {relative_path} (matches ignore pattern)")
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
# Check debouncing
|
|
148
|
+
if self._should_debounce(relative_path):
|
|
149
|
+
logger.debug(f"Debouncing change to {relative_path}")
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
# Get file info
|
|
153
|
+
file_info = self._get_file_info(path)
|
|
154
|
+
|
|
155
|
+
# Map event types to change types
|
|
156
|
+
if event_type in ["created", "added"]:
|
|
157
|
+
change_type = ChangeType.CREATED
|
|
158
|
+
elif event_type in ["modified", "changed"]:
|
|
159
|
+
change_type = ChangeType.MODIFIED
|
|
160
|
+
elif event_type in ["deleted", "removed"]:
|
|
161
|
+
change_type = ChangeType.DELETED
|
|
162
|
+
elif event_type in ["moved", "renamed"]:
|
|
163
|
+
change_type = ChangeType.MOVED
|
|
164
|
+
else:
|
|
165
|
+
logger.warning(f"Unknown event type: {event_type}")
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
# Create change object
|
|
169
|
+
old_relative_path = None
|
|
170
|
+
if old_path:
|
|
171
|
+
try:
|
|
172
|
+
old_relative_path = str(old_path.relative_to(self.project_root))
|
|
173
|
+
except ValueError:
|
|
174
|
+
pass
|
|
175
|
+
|
|
176
|
+
change = FileChange(
|
|
177
|
+
path=relative_path,
|
|
178
|
+
change_type=change_type,
|
|
179
|
+
timestamp=datetime.utcnow(),
|
|
180
|
+
old_path=old_relative_path,
|
|
181
|
+
size=file_info.get("size"),
|
|
182
|
+
hash=None # Will be computed later if needed
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
return change
|
|
186
|
+
|
|
187
|
+
def process_fs_event(
|
|
188
|
+
self,
|
|
189
|
+
event_type: str,
|
|
190
|
+
path: Path,
|
|
191
|
+
old_path: Optional[Path] = None
|
|
192
|
+
) -> Optional[FileChange]:
|
|
193
|
+
"""
|
|
194
|
+
Process a file system event and return structured change.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
event_type: Type of event (created, modified, deleted, moved)
|
|
198
|
+
path: Path that changed
|
|
199
|
+
old_path: Old path (for moves)
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
FileChange object or None if ignored
|
|
203
|
+
"""
|
|
204
|
+
change = self._classify_change(path, event_type, old_path)
|
|
205
|
+
|
|
206
|
+
if change:
|
|
207
|
+
self.recent_changes.append(change)
|
|
208
|
+
|
|
209
|
+
# Update statistics
|
|
210
|
+
self.stats.total_changes += 1
|
|
211
|
+
self.stats.last_change = change.timestamp
|
|
212
|
+
|
|
213
|
+
if change.change_type == ChangeType.CREATED:
|
|
214
|
+
self.stats.creates += 1
|
|
215
|
+
elif change.change_type == ChangeType.MODIFIED:
|
|
216
|
+
self.stats.modifications += 1
|
|
217
|
+
elif change.change_type == ChangeType.DELETED:
|
|
218
|
+
self.stats.deletions += 1
|
|
219
|
+
elif change.change_type == ChangeType.MOVED:
|
|
220
|
+
self.stats.moves += 1
|
|
221
|
+
|
|
222
|
+
logger.info(f"Detected change: {change.change_type.value} {change.path}")
|
|
223
|
+
|
|
224
|
+
return change
|
|
225
|
+
|
|
226
|
+
def get_recent_changes(
|
|
227
|
+
self,
|
|
228
|
+
limit: Optional[int] = None,
|
|
229
|
+
change_types: Optional[List[ChangeType]] = None
|
|
230
|
+
) -> List[FileChange]:
|
|
231
|
+
"""
|
|
232
|
+
Get recent changes with optional filtering.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
limit: Maximum number of changes to return
|
|
236
|
+
change_types: Filter by change types
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
List of recent changes
|
|
240
|
+
"""
|
|
241
|
+
changes = self.recent_changes
|
|
242
|
+
|
|
243
|
+
# Filter by change types
|
|
244
|
+
if change_types:
|
|
245
|
+
changes = [c for c in changes if c.change_type in change_types]
|
|
246
|
+
|
|
247
|
+
# Sort by timestamp (most recent first)
|
|
248
|
+
changes = sorted(changes, key=lambda c: c.timestamp, reverse=True)
|
|
249
|
+
|
|
250
|
+
# Apply limit
|
|
251
|
+
if limit:
|
|
252
|
+
changes = changes[:limit]
|
|
253
|
+
|
|
254
|
+
return changes
|
|
255
|
+
|
|
256
|
+
def clear_recent_changes(self) -> int:
|
|
257
|
+
"""Clear recent changes and return count cleared."""
|
|
258
|
+
count = len(self.recent_changes)
|
|
259
|
+
self.recent_changes.clear()
|
|
260
|
+
return count
|
|
261
|
+
|
|
262
|
+
def get_changes_since(self, since: datetime) -> List[FileChange]:
|
|
263
|
+
"""Get all changes since a specific timestamp."""
|
|
264
|
+
return [
|
|
265
|
+
change for change in self.recent_changes
|
|
266
|
+
if change.timestamp >= since
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
def get_stats(self) -> ChangeStats:
|
|
270
|
+
"""Get change detection statistics."""
|
|
271
|
+
return self.stats
|
|
272
|
+
|
|
273
|
+
def reset_stats(self) -> None:
|
|
274
|
+
"""Reset change detection statistics."""
|
|
275
|
+
self.stats = ChangeStats(start_time=datetime.utcnow())
|
|
276
|
+
|
|
277
|
+
def get_changed_files(self, since: Optional[datetime] = None) -> Set[str]:
|
|
278
|
+
"""Get set of file paths that have changed."""
|
|
279
|
+
changes = self.recent_changes
|
|
280
|
+
|
|
281
|
+
if since:
|
|
282
|
+
changes = [c for c in changes if c.timestamp >= since]
|
|
283
|
+
|
|
284
|
+
# Collect unique file paths
|
|
285
|
+
changed_files = set()
|
|
286
|
+
for change in changes:
|
|
287
|
+
changed_files.add(change.path)
|
|
288
|
+
if change.old_path: # For moves
|
|
289
|
+
changed_files.add(change.old_path)
|
|
290
|
+
|
|
291
|
+
return changed_files
|
|
292
|
+
|
|
293
|
+
def is_code_file(self, path: str) -> bool:
|
|
294
|
+
"""Check if a file is likely a code file."""
|
|
295
|
+
code_extensions = {
|
|
296
|
+
'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
|
|
297
|
+
'.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
|
|
298
|
+
'.clj', '.cljs', '.hs', '.ml', '.fs', '.ex', '.exs', '.cr',
|
|
299
|
+
'.dart', '.lua', '.pl', '.sh', '.bash', '.zsh', '.fish',
|
|
300
|
+
'.sql', '.r', '.m', '.mm', '.vim', '.el', '.lisp', '.scm'
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return Path(path).suffix.lower() in code_extensions
|
|
304
|
+
|
|
305
|
+
def get_code_changes(self, since: Optional[datetime] = None) -> List[FileChange]:
|
|
306
|
+
"""Get changes to code files only."""
|
|
307
|
+
changes = self.get_recent_changes()
|
|
308
|
+
|
|
309
|
+
if since:
|
|
310
|
+
changes = [c for c in changes if c.timestamp >= since]
|
|
311
|
+
|
|
312
|
+
return [c for c in changes if self.is_code_file(c.path)]
|