mcp-code-indexer 4.0.2__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. mcp_code_indexer/database/models.py +125 -1
  2. mcp_code_indexer/main.py +60 -0
  3. mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
  4. mcp_code_indexer/server/mcp_server.py +3 -0
  5. mcp_code_indexer/vector_mode/__init__.py +36 -0
  6. mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
  7. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
  8. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
  9. mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
  10. mcp_code_indexer/vector_mode/config.py +167 -0
  11. mcp_code_indexer/vector_mode/daemon.py +335 -0
  12. mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
  13. mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
  14. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
  15. mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
  16. mcp_code_indexer/vector_mode/providers/__init__.py +17 -0
  17. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +217 -0
  18. mcp_code_indexer/vector_mode/providers/voyage_client.py +119 -0
  19. mcp_code_indexer/vector_mode/security/__init__.py +11 -0
  20. mcp_code_indexer/vector_mode/security/patterns.py +297 -0
  21. mcp_code_indexer/vector_mode/security/redactor.py +368 -0
  22. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/METADATA +66 -5
  23. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/RECORD +26 -8
  24. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/LICENSE +0 -0
  25. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/WHEEL +0 -0
  26. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,335 @@
1
+ """
2
+ Vector Mode Daemon.
3
+
4
+ Runs as a background process to monitor file changes and maintain vector indexes.
5
+ Handles embedding generation, change detection, and vector database synchronization.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import signal
11
+ import sys
12
+ from pathlib import Path
13
+ from typing import Optional, Set
14
+ import json
15
+ import time
16
+
17
+ from ..database.database import DatabaseManager
18
+ from .config import VectorConfig, load_vector_config
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ class VectorDaemon:
23
+ """
24
+ Background daemon for vector mode operations.
25
+
26
+ Monitors file changes, generates embeddings, and maintains vector indexes
27
+ for all projects with vector mode enabled.
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ config: VectorConfig,
33
+ db_manager: DatabaseManager,
34
+ cache_dir: Path,
35
+ ):
36
+ """Initialize vector daemon."""
37
+ self.config = config
38
+ self.db_manager = db_manager
39
+ self.cache_dir = cache_dir
40
+ self.is_running = False
41
+ self.shutdown_requested = False
42
+
43
+ # Process tracking
44
+ self.monitored_projects: Set[str] = set()
45
+ self.processing_queue: asyncio.Queue = asyncio.Queue(maxsize=config.max_queue_size)
46
+ self.workers: list[asyncio.Task] = []
47
+
48
+ # Statistics
49
+ self.stats = {
50
+ "start_time": time.time(),
51
+ "files_processed": 0,
52
+ "embeddings_generated": 0,
53
+ "errors_count": 0,
54
+ "last_activity": time.time(),
55
+ }
56
+
57
+ # Setup signal handlers
58
+ self._setup_signal_handlers()
59
+
60
+ def _setup_signal_handlers(self) -> None:
61
+ """Setup signal handlers for graceful shutdown."""
62
+ try:
63
+ signal.signal(signal.SIGTERM, self._signal_handler)
64
+ signal.signal(signal.SIGINT, self._signal_handler)
65
+ if hasattr(signal, 'SIGHUP'):
66
+ signal.signal(signal.SIGHUP, self._signal_handler)
67
+ except Exception as e:
68
+ logger.warning(f"Could not setup signal handlers: {e}")
69
+
70
+ def _signal_handler(self, signum: int, frame) -> None:
71
+ """Handle shutdown signals."""
72
+ logger.info(f"Received signal {signum}, initiating graceful shutdown")
73
+ self.shutdown_requested = True
74
+
75
+ async def start(self) -> None:
76
+ """Start the vector daemon."""
77
+ if self.is_running:
78
+ logger.warning("Daemon is already running")
79
+ return
80
+
81
+ self.is_running = True
82
+ logger.info(
83
+ "Starting vector daemon",
84
+ extra={
85
+ "structured_data": {
86
+ "config": {
87
+ "worker_count": self.config.worker_count,
88
+ "batch_size": self.config.batch_size,
89
+ "poll_interval": self.config.daemon_poll_interval,
90
+ }
91
+ }
92
+ }
93
+ )
94
+
95
+ try:
96
+ # Start worker tasks
97
+ for i in range(self.config.worker_count):
98
+ worker = asyncio.create_task(self._worker(f"worker-{i}"))
99
+ self.workers.append(worker)
100
+
101
+ # Start monitoring tasks
102
+ monitor_task = asyncio.create_task(self._monitor_projects())
103
+ stats_task = asyncio.create_task(self._stats_reporter())
104
+
105
+ # Wait for shutdown signal
106
+ await self._run_until_shutdown()
107
+
108
+ except Exception as e:
109
+ logger.error(f"Daemon error: {e}", exc_info=True)
110
+ self.stats["errors_count"] += 1
111
+ finally:
112
+ await self._cleanup()
113
+
114
+ async def _run_until_shutdown(self) -> None:
115
+ """Run daemon until shutdown is requested."""
116
+ while not self.shutdown_requested:
117
+ try:
118
+ await asyncio.sleep(1.0)
119
+ except asyncio.CancelledError:
120
+ break
121
+
122
+ async def _monitor_projects(self) -> None:
123
+ """Monitor projects for vector indexing requirements."""
124
+ logger.info("Starting project monitoring")
125
+
126
+ while not self.shutdown_requested:
127
+ try:
128
+ # Get all projects that need vector indexing
129
+ projects = await self.db_manager.get_all_projects()
130
+
131
+ for project in projects:
132
+ if project.name not in self.monitored_projects:
133
+ logger.info(f"Adding project to monitoring: {project.name}")
134
+ self.monitored_projects.add(project.name)
135
+
136
+ # Queue initial indexing task
137
+ await self._queue_project_scan(project.name, project.folder_path)
138
+
139
+ await asyncio.sleep(self.config.daemon_poll_interval)
140
+
141
+ except Exception as e:
142
+ logger.error(f"Error in project monitoring: {e}")
143
+ self.stats["errors_count"] += 1
144
+ await asyncio.sleep(5.0) # Back off on error
145
+
146
+ async def _queue_project_scan(self, project_name: str, folder_path: str) -> None:
147
+ """Queue a project for scanning and indexing."""
148
+ task = {
149
+ "type": "scan_project",
150
+ "project_name": project_name,
151
+ "folder_path": folder_path,
152
+ "timestamp": time.time(),
153
+ }
154
+
155
+ try:
156
+ await self.processing_queue.put(task)
157
+ logger.debug(f"Queued project scan: {project_name}")
158
+ except asyncio.QueueFull:
159
+ logger.warning(f"Processing queue full, dropping scan task for {project_name}")
160
+
161
+ async def _worker(self, worker_id: str) -> None:
162
+ """Worker task to process queued items."""
163
+ logger.info(f"Starting worker: {worker_id}")
164
+
165
+ while not self.shutdown_requested:
166
+ try:
167
+ # Get task from queue with timeout
168
+ try:
169
+ task = await asyncio.wait_for(
170
+ self.processing_queue.get(),
171
+ timeout=5.0
172
+ )
173
+ except asyncio.TimeoutError:
174
+ continue
175
+
176
+ # Process the task
177
+ await self._process_task(task, worker_id)
178
+ self.stats["last_activity"] = time.time()
179
+
180
+ except Exception as e:
181
+ logger.error(f"Worker {worker_id} error: {e}")
182
+ self.stats["errors_count"] += 1
183
+ await asyncio.sleep(1.0) # Brief pause on error
184
+
185
+ async def _process_task(self, task: dict, worker_id: str) -> None:
186
+ """Process a queued task."""
187
+ task_type = task.get("type")
188
+
189
+ if task_type == "scan_project":
190
+ await self._process_project_scan(task, worker_id)
191
+ else:
192
+ logger.warning(f"Unknown task type: {task_type}")
193
+
194
+ async def _process_project_scan(self, task: dict, worker_id: str) -> None:
195
+ """Process a project scan task."""
196
+ project_name = task["project_name"]
197
+ folder_path = task["folder_path"]
198
+
199
+ logger.debug(f"Worker {worker_id} processing project: {project_name}")
200
+
201
+ try:
202
+ # Check if vector mode components are available
203
+ # For now, just log that we would process this project
204
+ logger.info(
205
+ f"Vector processing for project {project_name}",
206
+ extra={
207
+ "structured_data": {
208
+ "project_name": project_name,
209
+ "folder_path": folder_path,
210
+ "worker_id": worker_id,
211
+ }
212
+ }
213
+ )
214
+
215
+ self.stats["files_processed"] += 1
216
+
217
+ # TODO: Implement actual vector processing:
218
+ # 1. Scan for file changes using Merkle tree
219
+ # 2. Chunk modified files using AST
220
+ # 3. Apply secret redaction
221
+ # 4. Generate embeddings via Voyage
222
+ # 5. Store in Turbopuffer
223
+ # 6. Update database metadata
224
+
225
+ except Exception as e:
226
+ logger.error(f"Error processing project {project_name}: {e}")
227
+ self.stats["errors_count"] += 1
228
+
229
+ async def _stats_reporter(self) -> None:
230
+ """Periodically report daemon statistics."""
231
+ while not self.shutdown_requested:
232
+ try:
233
+ uptime = time.time() - self.stats["start_time"]
234
+
235
+ logger.info(
236
+ "Daemon statistics",
237
+ extra={
238
+ "structured_data": {
239
+ "uptime_seconds": uptime,
240
+ "monitored_projects": len(self.monitored_projects),
241
+ "queue_size": self.processing_queue.qsize(),
242
+ "files_processed": self.stats["files_processed"],
243
+ "embeddings_generated": self.stats["embeddings_generated"],
244
+ "errors_count": self.stats["errors_count"],
245
+ }
246
+ }
247
+ )
248
+
249
+ await asyncio.sleep(60.0) # Report every minute
250
+
251
+ except Exception as e:
252
+ logger.error(f"Error in stats reporting: {e}")
253
+ await asyncio.sleep(10.0)
254
+
255
+ async def _cleanup(self) -> None:
256
+ """Clean up resources and shut down workers."""
257
+ logger.info("Starting daemon cleanup")
258
+ self.is_running = False
259
+
260
+ # Cancel all workers
261
+ for worker in self.workers:
262
+ worker.cancel()
263
+
264
+ # Wait for workers to finish
265
+ if self.workers:
266
+ await asyncio.gather(*self.workers, return_exceptions=True)
267
+
268
+ logger.info("Vector daemon shutdown complete")
269
+
270
+ def get_status(self) -> dict:
271
+ """Get current daemon status."""
272
+ return {
273
+ "is_running": self.is_running,
274
+ "uptime": time.time() - self.stats["start_time"] if self.is_running else 0,
275
+ "monitored_projects": len(self.monitored_projects),
276
+ "queue_size": self.processing_queue.qsize(),
277
+ "stats": self.stats.copy(),
278
+ }
279
+
280
+ async def start_vector_daemon(
281
+ config_path: Optional[Path] = None,
282
+ db_path: Optional[Path] = None,
283
+ cache_dir: Optional[Path] = None,
284
+ ) -> None:
285
+ """Start the vector daemon process."""
286
+
287
+ # Load configuration
288
+ config = load_vector_config(config_path)
289
+
290
+ # Setup database
291
+ if db_path is None:
292
+ db_path = Path.home() / ".mcp-code-index" / "tracker.db"
293
+ if cache_dir is None:
294
+ cache_dir = Path.home() / ".mcp-code-index" / "cache"
295
+
296
+ db_manager = DatabaseManager(db_path)
297
+ await db_manager.initialize()
298
+
299
+ # Create and start daemon
300
+ daemon = VectorDaemon(config, db_manager, cache_dir)
301
+
302
+ try:
303
+ await daemon.start()
304
+ finally:
305
+ # Clean up database connections
306
+ await db_manager.close_pool()
307
+
308
+ def main() -> None:
309
+ """CLI entry point for vector daemon."""
310
+ import argparse
311
+
312
+ parser = argparse.ArgumentParser(description="MCP Code Indexer Vector Daemon")
313
+ parser.add_argument("--config", type=Path, help="Path to config file")
314
+ parser.add_argument("--db-path", type=Path, help="Path to database")
315
+ parser.add_argument("--cache-dir", type=Path, help="Cache directory")
316
+ parser.add_argument("--log-level", default="INFO", help="Logging level")
317
+
318
+ args = parser.parse_args()
319
+
320
+ # Setup logging
321
+ logging.basicConfig(
322
+ level=getattr(logging, args.log_level.upper()),
323
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
324
+ )
325
+
326
+ try:
327
+ asyncio.run(start_vector_daemon(args.config, args.db_path, args.cache_dir))
328
+ except KeyboardInterrupt:
329
+ logger.info("Daemon interrupted by user")
330
+ except Exception as e:
331
+ logger.error(f"Daemon failed: {e}", exc_info=True)
332
+ sys.exit(1)
333
+
334
+ if __name__ == "__main__":
335
+ main()
@@ -0,0 +1,19 @@
1
+ """
2
+ File system monitoring for vector mode.
3
+
4
+ Provides real-time file change detection using watchdog and efficient
5
+ change tracking using Merkle trees.
6
+ """
7
+
8
+ from .file_watcher import FileWatcher
9
+ from .merkle_tree import MerkleTree, MerkleNode
10
+ from .change_detector import ChangeDetector, FileChange, ChangeType
11
+
12
+ __all__ = [
13
+ "FileWatcher",
14
+ "MerkleTree",
15
+ "MerkleNode",
16
+ "ChangeDetector",
17
+ "FileChange",
18
+ "ChangeType",
19
+ ]
@@ -0,0 +1,312 @@
1
+ """
2
+ Change detection utilities for file system monitoring.
3
+
4
+ Provides high-level change detection and classification for the vector mode
5
+ file monitoring system.
6
+ """
7
+
8
+ import logging
9
+ from enum import Enum
10
+ from typing import List, Dict, Set, Optional, NamedTuple
11
+ from pathlib import Path
12
+ from dataclasses import dataclass
13
+ from datetime import datetime
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class ChangeType(str, Enum):
18
+ """Types of file system changes."""
19
+ CREATED = "created"
20
+ MODIFIED = "modified"
21
+ DELETED = "deleted"
22
+ MOVED = "moved"
23
+
24
+ class FileChange(NamedTuple):
25
+ """Represents a file system change."""
26
+ path: str
27
+ change_type: ChangeType
28
+ timestamp: datetime
29
+ old_path: Optional[str] = None # For moves
30
+ size: Optional[int] = None
31
+ hash: Optional[str] = None
32
+
33
+ @dataclass
34
+ class ChangeStats:
35
+ """Statistics about detected changes."""
36
+ total_changes: int = 0
37
+ creates: int = 0
38
+ modifications: int = 0
39
+ deletions: int = 0
40
+ moves: int = 0
41
+ start_time: Optional[datetime] = None
42
+ last_change: Optional[datetime] = None
43
+
44
+ class ChangeDetector:
45
+ """
46
+ High-level change detection and classification.
47
+
48
+ Processes raw file system events and provides structured change information
49
+ for the vector indexing pipeline.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ project_root: Path,
55
+ ignore_patterns: Optional[List[str]] = None,
56
+ debounce_interval: float = 0.1,
57
+ ):
58
+ """
59
+ Initialize change detector.
60
+
61
+ Args:
62
+ project_root: Root directory to monitor
63
+ ignore_patterns: Patterns to ignore (glob-style)
64
+ debounce_interval: Minimum time between processing same file
65
+ """
66
+ self.project_root = Path(project_root).resolve()
67
+ self.ignore_patterns = ignore_patterns or [
68
+ "*.log", "*.tmp", "*~", ".git/*", "__pycache__/*",
69
+ "node_modules/*", "*.pyc", "*.pyo", ".DS_Store", "Thumbs.db"
70
+ ]
71
+ self.debounce_interval = debounce_interval
72
+
73
+ # Change tracking
74
+ self.recent_changes: List[FileChange] = []
75
+ self.pending_changes: Dict[str, FileChange] = {}
76
+ self.last_change_time: Dict[str, datetime] = {}
77
+
78
+ # Statistics
79
+ self.stats = ChangeStats(start_time=datetime.utcnow())
80
+
81
+ # Compile ignore patterns for performance
82
+ import fnmatch
83
+ self._compiled_patterns = [
84
+ fnmatch.translate(pattern) for pattern in self.ignore_patterns
85
+ ]
86
+
87
+ def should_ignore_path(self, path: Path) -> bool:
88
+ """Check if a path should be ignored based on patterns."""
89
+ try:
90
+ relative_path = path.relative_to(self.project_root)
91
+ path_str = str(relative_path)
92
+
93
+ import re
94
+ for pattern in self._compiled_patterns:
95
+ if re.match(pattern, path_str):
96
+ return True
97
+
98
+ return False
99
+
100
+ except ValueError:
101
+ # Path is not relative to project root
102
+ return True
103
+
104
+ def _should_debounce(self, file_path: str) -> bool:
105
+ """Check if change should be debounced."""
106
+ now = datetime.utcnow()
107
+
108
+ if file_path in self.last_change_time:
109
+ elapsed = (now - self.last_change_time[file_path]).total_seconds()
110
+ if elapsed < self.debounce_interval:
111
+ return True
112
+
113
+ self.last_change_time[file_path] = now
114
+ return False
115
+
116
+ def _get_file_info(self, path: Path) -> Dict[str, Optional[int]]:
117
+ """Get file information (size, etc.)."""
118
+ try:
119
+ if path.exists() and path.is_file():
120
+ stat = path.stat()
121
+ return {"size": stat.st_size}
122
+ else:
123
+ return {"size": None}
124
+ except (OSError, PermissionError):
125
+ return {"size": None}
126
+
127
+ def _classify_change(
128
+ self,
129
+ path: Path,
130
+ event_type: str,
131
+ old_path: Optional[Path] = None
132
+ ) -> Optional[FileChange]:
133
+ """Classify a file system event into a structured change."""
134
+
135
+ # Convert to relative path
136
+ try:
137
+ relative_path = str(path.relative_to(self.project_root))
138
+ except ValueError:
139
+ # Path outside project root
140
+ return None
141
+
142
+ # Check if should be ignored
143
+ if self.should_ignore_path(path):
144
+ logger.debug(f"Ignoring change to {relative_path} (matches ignore pattern)")
145
+ return None
146
+
147
+ # Check debouncing
148
+ if self._should_debounce(relative_path):
149
+ logger.debug(f"Debouncing change to {relative_path}")
150
+ return None
151
+
152
+ # Get file info
153
+ file_info = self._get_file_info(path)
154
+
155
+ # Map event types to change types
156
+ if event_type in ["created", "added"]:
157
+ change_type = ChangeType.CREATED
158
+ elif event_type in ["modified", "changed"]:
159
+ change_type = ChangeType.MODIFIED
160
+ elif event_type in ["deleted", "removed"]:
161
+ change_type = ChangeType.DELETED
162
+ elif event_type in ["moved", "renamed"]:
163
+ change_type = ChangeType.MOVED
164
+ else:
165
+ logger.warning(f"Unknown event type: {event_type}")
166
+ return None
167
+
168
+ # Create change object
169
+ old_relative_path = None
170
+ if old_path:
171
+ try:
172
+ old_relative_path = str(old_path.relative_to(self.project_root))
173
+ except ValueError:
174
+ pass
175
+
176
+ change = FileChange(
177
+ path=relative_path,
178
+ change_type=change_type,
179
+ timestamp=datetime.utcnow(),
180
+ old_path=old_relative_path,
181
+ size=file_info.get("size"),
182
+ hash=None # Will be computed later if needed
183
+ )
184
+
185
+ return change
186
+
187
+ def process_fs_event(
188
+ self,
189
+ event_type: str,
190
+ path: Path,
191
+ old_path: Optional[Path] = None
192
+ ) -> Optional[FileChange]:
193
+ """
194
+ Process a file system event and return structured change.
195
+
196
+ Args:
197
+ event_type: Type of event (created, modified, deleted, moved)
198
+ path: Path that changed
199
+ old_path: Old path (for moves)
200
+
201
+ Returns:
202
+ FileChange object or None if ignored
203
+ """
204
+ change = self._classify_change(path, event_type, old_path)
205
+
206
+ if change:
207
+ self.recent_changes.append(change)
208
+
209
+ # Update statistics
210
+ self.stats.total_changes += 1
211
+ self.stats.last_change = change.timestamp
212
+
213
+ if change.change_type == ChangeType.CREATED:
214
+ self.stats.creates += 1
215
+ elif change.change_type == ChangeType.MODIFIED:
216
+ self.stats.modifications += 1
217
+ elif change.change_type == ChangeType.DELETED:
218
+ self.stats.deletions += 1
219
+ elif change.change_type == ChangeType.MOVED:
220
+ self.stats.moves += 1
221
+
222
+ logger.info(f"Detected change: {change.change_type.value} {change.path}")
223
+
224
+ return change
225
+
226
+ def get_recent_changes(
227
+ self,
228
+ limit: Optional[int] = None,
229
+ change_types: Optional[List[ChangeType]] = None
230
+ ) -> List[FileChange]:
231
+ """
232
+ Get recent changes with optional filtering.
233
+
234
+ Args:
235
+ limit: Maximum number of changes to return
236
+ change_types: Filter by change types
237
+
238
+ Returns:
239
+ List of recent changes
240
+ """
241
+ changes = self.recent_changes
242
+
243
+ # Filter by change types
244
+ if change_types:
245
+ changes = [c for c in changes if c.change_type in change_types]
246
+
247
+ # Sort by timestamp (most recent first)
248
+ changes = sorted(changes, key=lambda c: c.timestamp, reverse=True)
249
+
250
+ # Apply limit
251
+ if limit:
252
+ changes = changes[:limit]
253
+
254
+ return changes
255
+
256
+ def clear_recent_changes(self) -> int:
257
+ """Clear recent changes and return count cleared."""
258
+ count = len(self.recent_changes)
259
+ self.recent_changes.clear()
260
+ return count
261
+
262
+ def get_changes_since(self, since: datetime) -> List[FileChange]:
263
+ """Get all changes since a specific timestamp."""
264
+ return [
265
+ change for change in self.recent_changes
266
+ if change.timestamp >= since
267
+ ]
268
+
269
+ def get_stats(self) -> ChangeStats:
270
+ """Get change detection statistics."""
271
+ return self.stats
272
+
273
+ def reset_stats(self) -> None:
274
+ """Reset change detection statistics."""
275
+ self.stats = ChangeStats(start_time=datetime.utcnow())
276
+
277
+ def get_changed_files(self, since: Optional[datetime] = None) -> Set[str]:
278
+ """Get set of file paths that have changed."""
279
+ changes = self.recent_changes
280
+
281
+ if since:
282
+ changes = [c for c in changes if c.timestamp >= since]
283
+
284
+ # Collect unique file paths
285
+ changed_files = set()
286
+ for change in changes:
287
+ changed_files.add(change.path)
288
+ if change.old_path: # For moves
289
+ changed_files.add(change.old_path)
290
+
291
+ return changed_files
292
+
293
+ def is_code_file(self, path: str) -> bool:
294
+ """Check if a file is likely a code file."""
295
+ code_extensions = {
296
+ '.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c', '.h',
297
+ '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
298
+ '.clj', '.cljs', '.hs', '.ml', '.fs', '.ex', '.exs', '.cr',
299
+ '.dart', '.lua', '.pl', '.sh', '.bash', '.zsh', '.fish',
300
+ '.sql', '.r', '.m', '.mm', '.vim', '.el', '.lisp', '.scm'
301
+ }
302
+
303
+ return Path(path).suffix.lower() in code_extensions
304
+
305
+ def get_code_changes(self, since: Optional[datetime] = None) -> List[FileChange]:
306
+ """Get changes to code files only."""
307
+ changes = self.get_recent_changes()
308
+
309
+ if since:
310
+ changes = [c for c in changes if c.timestamp >= since]
311
+
312
+ return [c for c in changes if self.is_code_file(c.path)]