shotgun-sh 0.1.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of shotgun-sh might be problematic. Click here for more details.

Files changed (94) hide show
  1. shotgun/__init__.py +3 -0
  2. shotgun/agents/__init__.py +1 -0
  3. shotgun/agents/agent_manager.py +196 -0
  4. shotgun/agents/common.py +295 -0
  5. shotgun/agents/config/__init__.py +13 -0
  6. shotgun/agents/config/manager.py +215 -0
  7. shotgun/agents/config/models.py +120 -0
  8. shotgun/agents/config/provider.py +91 -0
  9. shotgun/agents/history/__init__.py +5 -0
  10. shotgun/agents/history/history_processors.py +213 -0
  11. shotgun/agents/models.py +94 -0
  12. shotgun/agents/plan.py +119 -0
  13. shotgun/agents/research.py +131 -0
  14. shotgun/agents/tasks.py +122 -0
  15. shotgun/agents/tools/__init__.py +26 -0
  16. shotgun/agents/tools/codebase/__init__.py +28 -0
  17. shotgun/agents/tools/codebase/codebase_shell.py +256 -0
  18. shotgun/agents/tools/codebase/directory_lister.py +141 -0
  19. shotgun/agents/tools/codebase/file_read.py +144 -0
  20. shotgun/agents/tools/codebase/models.py +252 -0
  21. shotgun/agents/tools/codebase/query_graph.py +67 -0
  22. shotgun/agents/tools/codebase/retrieve_code.py +81 -0
  23. shotgun/agents/tools/file_management.py +130 -0
  24. shotgun/agents/tools/user_interaction.py +36 -0
  25. shotgun/agents/tools/web_search.py +69 -0
  26. shotgun/cli/__init__.py +1 -0
  27. shotgun/cli/codebase/__init__.py +5 -0
  28. shotgun/cli/codebase/commands.py +202 -0
  29. shotgun/cli/codebase/models.py +21 -0
  30. shotgun/cli/config.py +261 -0
  31. shotgun/cli/models.py +10 -0
  32. shotgun/cli/plan.py +65 -0
  33. shotgun/cli/research.py +78 -0
  34. shotgun/cli/tasks.py +71 -0
  35. shotgun/cli/utils.py +25 -0
  36. shotgun/codebase/__init__.py +12 -0
  37. shotgun/codebase/core/__init__.py +46 -0
  38. shotgun/codebase/core/change_detector.py +358 -0
  39. shotgun/codebase/core/code_retrieval.py +243 -0
  40. shotgun/codebase/core/ingestor.py +1497 -0
  41. shotgun/codebase/core/language_config.py +297 -0
  42. shotgun/codebase/core/manager.py +1554 -0
  43. shotgun/codebase/core/nl_query.py +327 -0
  44. shotgun/codebase/core/parser_loader.py +152 -0
  45. shotgun/codebase/models.py +107 -0
  46. shotgun/codebase/service.py +148 -0
  47. shotgun/logging_config.py +172 -0
  48. shotgun/main.py +73 -0
  49. shotgun/prompts/__init__.py +5 -0
  50. shotgun/prompts/agents/__init__.py +1 -0
  51. shotgun/prompts/agents/partials/codebase_understanding.j2 +79 -0
  52. shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +10 -0
  53. shotgun/prompts/agents/partials/interactive_mode.j2 +8 -0
  54. shotgun/prompts/agents/plan.j2 +57 -0
  55. shotgun/prompts/agents/research.j2 +38 -0
  56. shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +13 -0
  57. shotgun/prompts/agents/state/system_state.j2 +1 -0
  58. shotgun/prompts/agents/tasks.j2 +67 -0
  59. shotgun/prompts/codebase/__init__.py +1 -0
  60. shotgun/prompts/codebase/cypher_query_patterns.j2 +221 -0
  61. shotgun/prompts/codebase/cypher_system.j2 +28 -0
  62. shotgun/prompts/codebase/enhanced_query_context.j2 +10 -0
  63. shotgun/prompts/codebase/partials/cypher_rules.j2 +24 -0
  64. shotgun/prompts/codebase/partials/graph_schema.j2 +28 -0
  65. shotgun/prompts/codebase/partials/temporal_context.j2 +21 -0
  66. shotgun/prompts/history/__init__.py +1 -0
  67. shotgun/prompts/history/summarization.j2 +46 -0
  68. shotgun/prompts/loader.py +140 -0
  69. shotgun/prompts/user/research.j2 +5 -0
  70. shotgun/py.typed +0 -0
  71. shotgun/sdk/__init__.py +13 -0
  72. shotgun/sdk/codebase.py +195 -0
  73. shotgun/sdk/exceptions.py +17 -0
  74. shotgun/sdk/models.py +189 -0
  75. shotgun/sdk/services.py +23 -0
  76. shotgun/telemetry.py +68 -0
  77. shotgun/tui/__init__.py +0 -0
  78. shotgun/tui/app.py +49 -0
  79. shotgun/tui/components/prompt_input.py +69 -0
  80. shotgun/tui/components/spinner.py +86 -0
  81. shotgun/tui/components/splash.py +25 -0
  82. shotgun/tui/components/vertical_tail.py +28 -0
  83. shotgun/tui/screens/chat.py +415 -0
  84. shotgun/tui/screens/chat.tcss +28 -0
  85. shotgun/tui/screens/provider_config.py +221 -0
  86. shotgun/tui/screens/splash.py +31 -0
  87. shotgun/tui/styles.tcss +10 -0
  88. shotgun/utils/__init__.py +5 -0
  89. shotgun/utils/file_system_utils.py +31 -0
  90. shotgun_sh-0.1.0.dev1.dist-info/METADATA +318 -0
  91. shotgun_sh-0.1.0.dev1.dist-info/RECORD +94 -0
  92. shotgun_sh-0.1.0.dev1.dist-info/WHEEL +4 -0
  93. shotgun_sh-0.1.0.dev1.dist-info/entry_points.txt +3 -0
  94. shotgun_sh-0.1.0.dev1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1554 @@
1
+ """Kuzu graph database manager for code knowledge graphs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import hashlib
7
+ import json
8
+ import time
9
+ import uuid
10
+ from collections.abc import Awaitable, Callable
11
+ from pathlib import Path
12
+ from typing import Any, ClassVar
13
+
14
+ import anyio
15
+ import kuzu
16
+ from watchdog.events import FileSystemEvent, FileSystemEventHandler
17
+ from watchdog.observers import Observer
18
+
19
+ from shotgun.codebase.models import (
20
+ CodebaseGraph,
21
+ FileChange,
22
+ GraphStatus,
23
+ OperationStats,
24
+ )
25
+ from shotgun.logging_config import get_logger
26
+
27
+ logger = get_logger(__name__)
28
+
29
+
30
+ class CodebaseFileHandler(FileSystemEventHandler):
31
+ """Handles file system events for code graph updates."""
32
+
33
+ def __init__(
34
+ self,
35
+ graph_id: str,
36
+ callback: Callable[[str, list[FileChange]], Awaitable[None]] | None,
37
+ loop: asyncio.AbstractEventLoop,
38
+ ignore_patterns: set[str] | None = None,
39
+ ):
40
+ self.graph_id = graph_id
41
+ self.callback = callback
42
+ self.loop = loop
43
+ self.pending_changes: list[FileChange] = []
44
+ self._lock = anyio.Lock()
45
+ # Import default ignore patterns from ingestor
46
+ from shotgun.codebase.core.ingestor import IGNORE_PATTERNS
47
+
48
+ self.ignore_patterns = ignore_patterns or IGNORE_PATTERNS
49
+
50
+ def on_any_event(self, event: FileSystemEvent) -> None:
51
+ """Handle any file system event."""
52
+ if event.is_directory:
53
+ return
54
+
55
+ # Filter out temporary files
56
+ src_path_str = (
57
+ event.src_path.decode("utf-8")
58
+ if isinstance(event.src_path, bytes)
59
+ else event.src_path
60
+ )
61
+ path = Path(src_path_str)
62
+ filename = path.name
63
+
64
+ # Check if any parent directory should be ignored
65
+ for parent in path.parents:
66
+ if parent.name in self.ignore_patterns:
67
+ logger.debug(
68
+ f"Ignoring file in ignored directory: {parent.name} - path: {src_path_str}"
69
+ )
70
+ return
71
+
72
+ # Skip various temporary files
73
+ if any(
74
+ [
75
+ filename.startswith("."), # Hidden files
76
+ filename.endswith(".swp"), # Vim swap files
77
+ filename.endswith(".tmp"), # Generic temp files
78
+ filename.endswith("~"), # Backup files
79
+ "#" in filename, # Emacs temp files
80
+ filename.startswith("__pycache__"), # Python cache
81
+ path.suffix in [".pyc", ".pyo"], # Python compiled files
82
+ # Numeric temp files (like test_watcher_fix.py.tmp.27477.1755109972829)
83
+ any(part.isdigit() and len(part) > 4 for part in filename.split(".")),
84
+ ]
85
+ ):
86
+ logger.debug(
87
+ f"Ignoring temporary file: {filename} - event_type: {event.event_type}"
88
+ )
89
+ return
90
+
91
+ # For move events, also check destination path
92
+ dest_path_str = None
93
+ if hasattr(event, "dest_path") and event.dest_path:
94
+ dest_path_str = (
95
+ event.dest_path.decode("utf-8")
96
+ if isinstance(event.dest_path, bytes)
97
+ else event.dest_path
98
+ )
99
+ dest_path = Path(dest_path_str)
100
+ for parent in dest_path.parents:
101
+ if parent.name in self.ignore_patterns:
102
+ logger.debug(
103
+ f"Ignoring move to ignored directory: {parent.name} - dest_path: {dest_path_str}"
104
+ )
105
+ return
106
+
107
+ # Map event types
108
+ event_type_map = {
109
+ "created": "created",
110
+ "modified": "modified",
111
+ "deleted": "deleted",
112
+ "moved": "moved",
113
+ }
114
+
115
+ mapped_type = event_type_map.get(event.event_type, event.event_type)
116
+
117
+ # Log the event with type
118
+ logger.info(
119
+ f"File watcher detected {mapped_type} event - graph_id: {self.graph_id}, path: {src_path_str}, event_type: {mapped_type}"
120
+ )
121
+
122
+ change = FileChange(
123
+ event_type=mapped_type,
124
+ src_path=src_path_str,
125
+ dest_path=dest_path_str,
126
+ is_directory=event.is_directory,
127
+ )
128
+
129
+ # Queue change for batch processing
130
+ # Use asyncio.run_coroutine_threadsafe to schedule async work from watchdog thread
131
+ future = asyncio.run_coroutine_threadsafe(self._queue_change(change), self.loop)
132
+ # Handle any errors
133
+ try:
134
+ future.result(timeout=1.0) # Wait briefly to ensure it's scheduled
135
+ except Exception as e:
136
+ logger.error(
137
+ f"Failed to queue file change: {e} - graph_id: {self.graph_id}, path: {change.src_path}"
138
+ )
139
+
140
+ async def _queue_change(self, change: FileChange) -> None:
141
+ """Queue a change for processing."""
142
+ async with self._lock:
143
+ self.pending_changes.append(change)
144
+
145
+ # Trigger callback
146
+ if self.callback:
147
+ await self.callback(self.graph_id, [change])
148
+
149
+
150
+ class CodebaseGraphManager:
151
+ """Manages Kuzu code knowledge graphs with class-level connection pooling."""
152
+
153
+ # Class-level storage to ensure single connection per graph
154
+ _connections: ClassVar[dict[str, kuzu.Connection]] = {}
155
+ _databases: ClassVar[dict[str, kuzu.Database]] = {}
156
+ _watchers: ClassVar[dict[str, Any]] = {}
157
+ _handlers: ClassVar[dict[str, CodebaseFileHandler]] = {}
158
+ _lock: ClassVar[anyio.Lock | None] = None
159
+
160
+ # Operation tracking for async operations
161
+ _operations: ClassVar[dict[str, asyncio.Task[Any]]] = {}
162
+ _operation_stats: ClassVar[dict[str, OperationStats]] = {}
163
+
164
+ def __init__(self, storage_dir: Path):
165
+ """Initialize graph manager.
166
+
167
+ Args:
168
+ storage_dir: Directory to store graph databases
169
+ """
170
+ self.storage_dir = storage_dir
171
+ self.storage_dir.mkdir(parents=True, exist_ok=True)
172
+
173
+ @classmethod
174
+ async def _get_lock(cls) -> anyio.Lock:
175
+ """Get or create the class-level lock."""
176
+ if cls._lock is None:
177
+ cls._lock = anyio.Lock()
178
+ return cls._lock
179
+
180
+ @classmethod
181
+ def _generate_graph_id(cls, repo_path: str) -> str:
182
+ """Generate deterministic graph ID from repository path."""
183
+ normalized = str(Path(repo_path).resolve())
184
+ return hashlib.sha256(normalized.encode()).hexdigest()[:12]
185
+
186
+ async def _update_graph_status(
187
+ self, graph_id: str, status: GraphStatus, operation_id: str | None = None
188
+ ) -> None:
189
+ """Update the status of a graph in the database."""
190
+ try:
191
+ # First check if the Project node exists
192
+ results = await self._execute_query(
193
+ graph_id,
194
+ "MATCH (p:Project {graph_id: $graph_id}) RETURN p",
195
+ {"graph_id": graph_id},
196
+ )
197
+
198
+ if not results:
199
+ # Project node doesn't exist yet, skip update
200
+ logger.warning(
201
+ f"Project node not found for graph {graph_id}, skipping status update"
202
+ )
203
+ return
204
+
205
+ await self._execute_query(
206
+ graph_id,
207
+ """
208
+ MATCH (p:Project {graph_id: $graph_id})
209
+ SET p.status = $status, p.current_operation_id = $operation_id
210
+ """,
211
+ {
212
+ "graph_id": graph_id,
213
+ "status": status.value,
214
+ "operation_id": operation_id,
215
+ },
216
+ )
217
+ except Exception as e:
218
+ logger.error(
219
+ f"Failed to update graph status: {e} - graph_id: {graph_id}, status: {status}"
220
+ )
221
+
222
+ async def _store_operation_stats(
223
+ self, graph_id: str, stats: OperationStats
224
+ ) -> None:
225
+ """Store operation statistics in the database."""
226
+ try:
227
+ await self._execute_query(
228
+ graph_id,
229
+ """
230
+ MATCH (p:Project {graph_id: $graph_id})
231
+ SET p.last_operation = $stats
232
+ """,
233
+ {"graph_id": graph_id, "stats": stats.model_dump_json()},
234
+ )
235
+ # Also store in memory for quick access
236
+ self._operation_stats[graph_id] = stats
237
+ except Exception as e:
238
+ logger.error(f"Failed to store operation stats: {e} - graph_id: {graph_id}")
239
+
240
+ async def _initialize_graph_metadata(
241
+ self,
242
+ graph_id: str,
243
+ repo_path: str,
244
+ name: str,
245
+ languages: list[str] | None,
246
+ exclude_patterns: list[str] | None,
247
+ ) -> None:
248
+ """Initialize the graph database and create initial metadata.
249
+
250
+ This creates the database and Project node immediately so that
251
+ status can be tracked during the build process.
252
+ """
253
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
254
+
255
+ # Create database and connection
256
+ lock = await self._get_lock()
257
+ async with lock:
258
+ db = kuzu.Database(str(graph_path))
259
+ conn = kuzu.Connection(db)
260
+ self._databases[graph_id] = db
261
+ self._connections[graph_id] = conn
262
+
263
+ # Create the schema
264
+ from shotgun.codebase.core import Ingestor
265
+
266
+ def _create_schema() -> None:
267
+ ingestor = Ingestor(conn)
268
+ ingestor.create_schema()
269
+
270
+ await anyio.to_thread.run_sync(_create_schema)
271
+
272
+ # Create initial Project node with BUILDING status
273
+ await self._execute_query(
274
+ graph_id,
275
+ """
276
+ CREATE (p:Project {
277
+ name: $name,
278
+ repo_path: $repo_path,
279
+ graph_id: $graph_id,
280
+ created_at: $created_at,
281
+ updated_at: $updated_at,
282
+ schema_version: $schema_version,
283
+ build_options: $build_options,
284
+ status: $status,
285
+ current_operation_id: $current_operation_id,
286
+ last_operation: $last_operation,
287
+ node_count: 0,
288
+ relationship_count: 0,
289
+ stats_updated_at: $stats_updated_at
290
+ })
291
+ """,
292
+ {
293
+ "name": name,
294
+ "repo_path": repo_path,
295
+ "graph_id": graph_id,
296
+ "created_at": int(time.time()),
297
+ "updated_at": int(time.time()),
298
+ "schema_version": "1.0.0",
299
+ "build_options": json.dumps(
300
+ {"languages": languages, "exclude_patterns": exclude_patterns}
301
+ ),
302
+ "status": GraphStatus.BUILDING.value,
303
+ "current_operation_id": None,
304
+ "last_operation": None,
305
+ "stats_updated_at": int(time.time()),
306
+ },
307
+ )
308
+
309
+ # Ensure the Project node is committed
310
+ logger.info(f"Created initial Project node for graph {graph_id}")
311
+
312
+ async def build_graph(
313
+ self,
314
+ repo_path: str,
315
+ name: str | None = None,
316
+ languages: list[str] | None = None,
317
+ exclude_patterns: list[str] | None = None,
318
+ ) -> CodebaseGraph:
319
+ """Build a new code knowledge graph.
320
+
321
+ Args:
322
+ repo_path: Path to repository
323
+ name: Optional human-readable name
324
+ languages: Languages to parse (default: all supported)
325
+ exclude_patterns: Patterns to exclude
326
+
327
+ Returns:
328
+ Created graph metadata
329
+ """
330
+ repo_path = str(Path(repo_path).resolve())
331
+ graph_id = self._generate_graph_id(repo_path)
332
+
333
+ # Use repository name as default name
334
+ if not name:
335
+ name = Path(repo_path).name
336
+
337
+ # Determine graph path
338
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
339
+
340
+ # Check if graph already exists
341
+ if graph_path.exists():
342
+ raise ValueError(
343
+ f"Graph already exists for {repo_path}. Use update_graph() to modify it."
344
+ )
345
+
346
+ # Import the builder from local core module
347
+ from shotgun.codebase.core import CodebaseIngestor
348
+
349
+ # Build the graph
350
+ logger.info(
351
+ f"Building code graph - graph_id: {graph_id}, repo_path: {repo_path}"
352
+ )
353
+
354
+ # Create database and connection
355
+ lock = await self._get_lock()
356
+ async with lock:
357
+ if graph_id in self._databases:
358
+ # Close existing connections
359
+ if graph_id in self._connections:
360
+ self._connections[graph_id].close()
361
+ del self._connections[graph_id]
362
+ self._databases[graph_id].close()
363
+ del self._databases[graph_id]
364
+
365
+ # Build using the local ingestor
366
+ ingestor = CodebaseIngestor(
367
+ db_path=str(graph_path),
368
+ project_name=name,
369
+ exclude_patterns=exclude_patterns or [],
370
+ )
371
+
372
+ # Run build in thread pool
373
+ await anyio.to_thread.run_sync(ingestor.build_graph_from_directory, repo_path)
374
+
375
+ # Get statistics
376
+ lock = await self._get_lock()
377
+ async with lock:
378
+ db = kuzu.Database(str(graph_path))
379
+ conn = kuzu.Connection(db)
380
+ self._databases[graph_id] = db
381
+ self._connections[graph_id] = conn
382
+
383
+ # Create Project node with metadata BEFORE printing statistics
384
+ await self._execute_query(
385
+ graph_id,
386
+ """
387
+ CREATE (p:Project {
388
+ name: $name,
389
+ repo_path: $repo_path,
390
+ graph_id: $graph_id,
391
+ created_at: $created_at,
392
+ updated_at: $updated_at,
393
+ schema_version: $schema_version,
394
+ build_options: $build_options
395
+ })
396
+ """,
397
+ {
398
+ "name": name,
399
+ "repo_path": repo_path,
400
+ "graph_id": graph_id,
401
+ "created_at": int(time.time()),
402
+ "updated_at": int(time.time()),
403
+ "schema_version": "1.0.0",
404
+ "build_options": json.dumps(
405
+ {"languages": languages, "exclude_patterns": exclude_patterns}
406
+ ),
407
+ },
408
+ )
409
+
410
+ # Now print detailed statistics (will include Project: 1)
411
+ await self._print_graph_statistics(graph_id)
412
+
413
+ # Get language statistics
414
+ lang_stats = await self._execute_query(
415
+ graph_id,
416
+ """
417
+ MATCH (f:File)
418
+ WHERE f.extension IS NOT NULL
419
+ RETURN f.extension as extension, COUNT(f) as count
420
+ """,
421
+ )
422
+
423
+ language_stats = {}
424
+ if lang_stats:
425
+ for row in lang_stats:
426
+ ext = row.get("extension", "").lower()
427
+ if ext:
428
+ # Map extensions to languages
429
+ lang_map = {
430
+ ".py": "Python",
431
+ ".js": "JavaScript",
432
+ ".ts": "TypeScript",
433
+ ".go": "Go",
434
+ ".rs": "Rust",
435
+ ".java": "Java",
436
+ ".cpp": "C++",
437
+ ".c": "C",
438
+ ".cs": "C#",
439
+ ".rb": "Ruby",
440
+ }
441
+ lang = lang_map.get(ext, ext)
442
+ language_stats[lang] = row.get("count", 0)
443
+
444
+ # Get counts dynamically
445
+ node_count = await self._execute_query(
446
+ graph_id, "MATCH (n) RETURN COUNT(n) as count"
447
+ )
448
+ relationship_count = await self._execute_query(
449
+ graph_id, "MATCH ()-[r]->() RETURN COUNT(r) as count"
450
+ )
451
+
452
+ graph = CodebaseGraph(
453
+ graph_id=graph_id,
454
+ repo_path=repo_path,
455
+ graph_path=str(graph_path),
456
+ name=name,
457
+ created_at=time.time(),
458
+ updated_at=time.time(),
459
+ build_options={
460
+ "languages": languages,
461
+ "exclude_patterns": exclude_patterns,
462
+ },
463
+ node_count=node_count[0]["count"] if node_count else 0,
464
+ relationship_count=relationship_count[0]["count"]
465
+ if relationship_count
466
+ else 0,
467
+ language_stats=language_stats,
468
+ is_watching=False,
469
+ status=GraphStatus.READY,
470
+ last_operation=None,
471
+ current_operation_id=None,
472
+ )
473
+
474
+ # Update status to READY
475
+ await self._update_graph_status(graph_id, GraphStatus.READY)
476
+
477
+ return graph
478
+
479
+ async def update_graph(
480
+ self, graph_id: str, changes: list[FileChange] | None = None
481
+ ) -> dict[str, Any]:
482
+ """Update graph based on file changes.
483
+
484
+ Args:
485
+ graph_id: Graph to update
486
+ changes: List of file changes (if None, will auto-detect)
487
+
488
+ Returns:
489
+ Update statistics
490
+ """
491
+ # If no changes provided, use incremental update
492
+ if changes is None:
493
+ return await self.update_graph_incremental(graph_id)
494
+
495
+ start_time = time.time()
496
+
497
+ # Get graph metadata
498
+ graph = await self.get_graph(graph_id)
499
+ if not graph:
500
+ raise ValueError(f"Graph {graph_id} not found")
501
+
502
+ # Import is already done at the top of the method
503
+
504
+ # Process changes
505
+ stats = {
506
+ "nodes_added": 0,
507
+ "nodes_removed": 0,
508
+ "relationships_added": 0,
509
+ "relationships_removed": 0,
510
+ }
511
+
512
+ lock = await self._get_lock()
513
+ async with lock:
514
+ if graph_id not in self._connections:
515
+ db = kuzu.Database(graph.graph_path)
516
+ conn = kuzu.Connection(db)
517
+ self._databases[graph_id] = db
518
+ self._connections[graph_id] = conn
519
+
520
+ # Group changes by type
521
+ for change in changes:
522
+ if change.event_type == "deleted":
523
+ # Remove nodes for deleted files
524
+ await self._execute_query(
525
+ graph_id,
526
+ "MATCH (n) WHERE n.path = $path DELETE n",
527
+ {"path": change.src_path},
528
+ )
529
+ stats["nodes_removed"] += 1
530
+ elif change.event_type in ["created", "modified"]:
531
+ # Re-parse and update the file
532
+ # This is simplified - the actual implementation would use the ingestor
533
+ logger.info(f"Updating file in graph - path: {change.src_path}")
534
+
535
+ update_time = (time.time() - start_time) * 1000
536
+
537
+ # Update metadata
538
+ await self._execute_query(
539
+ graph_id,
540
+ """
541
+ MATCH (p:Project {graph_id: $graph_id})
542
+ SET p.updated_at = $updated_at
543
+ """,
544
+ {"graph_id": graph_id, "updated_at": int(time.time())},
545
+ )
546
+
547
+ return {"update_time_ms": update_time, **stats}
548
+
549
+ async def update_graph_incremental(self, graph_id: str) -> dict[str, Any]:
550
+ """Update graph by automatically detecting changes.
551
+
552
+ Args:
553
+ graph_id: Graph to update
554
+
555
+ Returns:
556
+ Update statistics
557
+ """
558
+ start_time = time.time()
559
+
560
+ # Get graph metadata
561
+ graph = await self.get_graph(graph_id)
562
+ if not graph:
563
+ raise ValueError(f"Graph {graph_id} not found")
564
+
565
+ # Validate that the repository path still exists
566
+ repo_path = Path(graph.repo_path)
567
+ if not repo_path.exists():
568
+ logger.error(f"Repository path no longer exists: {graph.repo_path}")
569
+ raise ValueError(f"Repository path no longer exists: {graph.repo_path}")
570
+ if not repo_path.is_dir():
571
+ logger.error(f"Repository path is not a directory: {graph.repo_path}")
572
+ raise ValueError(f"Repository path is not a directory: {graph.repo_path}")
573
+
574
+ # Parse build options
575
+ build_options = graph.build_options if graph.build_options else {}
576
+
577
+ languages = build_options.get("languages")
578
+ exclude_patterns = build_options.get("exclude_patterns")
579
+
580
+ lock = await self._get_lock()
581
+ async with lock:
582
+ if graph_id not in self._connections:
583
+ db = kuzu.Database(graph.graph_path)
584
+ self._connections[graph_id] = kuzu.Connection(db)
585
+
586
+ conn = self._connections[graph_id]
587
+
588
+ # Create change detector
589
+ from shotgun.codebase.core.change_detector import ChangeDetector, ChangeType
590
+
591
+ detector = ChangeDetector(conn, Path(graph.repo_path))
592
+
593
+ # Load parsers first to know what languages we can actually process
594
+ from shotgun.codebase.core.parser_loader import load_parsers
595
+
596
+ parsers, queries = load_parsers()
597
+ available_languages = list(parsers.keys())
598
+
599
+ # If no languages were specified in build options, use all available parsers
600
+ # Otherwise, filter to intersection of requested and available languages
601
+ if languages is None or languages == []:
602
+ effective_languages = available_languages
603
+ else:
604
+ effective_languages = [
605
+ lang for lang in languages if lang in available_languages
606
+ ]
607
+
608
+ if not effective_languages:
609
+ logger.warning(
610
+ f"No parsers available for requested languages - requested: {languages}, available: {available_languages}"
611
+ )
612
+ return {
613
+ "update_time_ms": (time.time() - start_time) * 1000,
614
+ "nodes_added": 0,
615
+ "nodes_removed": 0,
616
+ "nodes_modified": 0,
617
+ "relationships_added": 0,
618
+ "relationships_removed": 0,
619
+ "files_added": 0,
620
+ "files_modified": 0,
621
+ "files_deleted": 0,
622
+ "files_skipped": 0,
623
+ }
624
+
625
+ # Log what languages we're using for update
626
+ logger.info(f"Updating graph with languages: {effective_languages}")
627
+
628
+ # Detect changes only for languages we can process
629
+ changes = detector.detect_changes(effective_languages, exclude_patterns)
630
+
631
+ # Also detect ALL changes to report on skipped files
632
+ if languages is None or (
633
+ languages and len(languages) > len(effective_languages)
634
+ ):
635
+ all_changes = detector.detect_changes(None, exclude_patterns)
636
+ skipped_count = len(all_changes) - len(changes)
637
+ if skipped_count > 0:
638
+ logger.info(
639
+ f"Skipping {skipped_count} files due to missing parsers - available_parsers: {available_languages}, requested_languages: {languages}"
640
+ )
641
+ # Log some examples of skipped files
642
+ skipped_files = set(all_changes.keys()) - set(changes.keys())
643
+ examples = list(skipped_files)[:5]
644
+ if examples:
645
+ logger.info(f"Examples of skipped files: {examples}")
646
+ else:
647
+ skipped_count = 0
648
+
649
+ if not changes:
650
+ logger.info(f"No changes detected for graph {graph_id}")
651
+ return {
652
+ "update_time_ms": (time.time() - start_time) * 1000,
653
+ "nodes_added": 0,
654
+ "nodes_removed": 0,
655
+ "nodes_modified": 0,
656
+ "relationships_added": 0,
657
+ "relationships_removed": 0,
658
+ "files_added": 0,
659
+ "files_modified": 0,
660
+ "files_deleted": 0,
661
+ "files_skipped": skipped_count,
662
+ }
663
+
664
+ logger.info(f"Processing {len(changes)} file changes for graph {graph_id}")
665
+
666
+ # Initialize stats
667
+ stats = {
668
+ "nodes_added": 0,
669
+ "nodes_removed": 0,
670
+ "nodes_modified": 0,
671
+ "relationships_added": 0,
672
+ "relationships_removed": 0,
673
+ "files_added": 0,
674
+ "files_modified": 0,
675
+ "files_deleted": 0,
676
+ "files_skipped": 0,
677
+ }
678
+
679
+ # Initialize ingestor and builder
680
+ from shotgun.codebase.core.ingestor import Ingestor, SimpleGraphBuilder
681
+
682
+ ingestor = Ingestor(conn)
683
+
684
+ builder = SimpleGraphBuilder(
685
+ ingestor, Path(graph.repo_path), parsers, queries, exclude_patterns
686
+ )
687
+
688
+ # Process changes by type
689
+ deletions = []
690
+ modifications = []
691
+ additions = []
692
+
693
+ for filepath, change_type in changes.items():
694
+ if change_type == ChangeType.DELETED:
695
+ deletions.append(filepath)
696
+ stats["files_deleted"] += 1
697
+ elif change_type == ChangeType.MODIFIED:
698
+ modifications.append(filepath)
699
+ stats["files_modified"] += 1
700
+ elif change_type == ChangeType.ADDED:
701
+ additions.append(filepath)
702
+ stats["files_added"] += 1
703
+
704
+ # Process deletions first
705
+ for filepath in deletions:
706
+ logger.debug(f"Processing deletion: {filepath}")
707
+ deletion_stats = ingestor.delete_file_nodes(filepath)
708
+ stats["nodes_removed"] += sum(deletion_stats.values())
709
+
710
+ # Process modifications (as delete + add)
711
+ for filepath in modifications:
712
+ logger.debug(f"Processing modification: {filepath}")
713
+ # Delete old nodes
714
+ deletion_stats = ingestor.delete_file_nodes(filepath)
715
+ stats["nodes_removed"] += sum(deletion_stats.values())
716
+
717
+ # Re-process the file
718
+ full_path = Path(graph.repo_path) / filepath
719
+ if full_path.exists():
720
+ # Determine language from file extension
721
+ from shotgun.codebase.core.language_config import (
722
+ get_language_config,
723
+ )
724
+
725
+ lang_config = get_language_config(full_path.suffix)
726
+ if lang_config and lang_config.name in parsers:
727
+ builder._process_single_file(full_path, lang_config.name)
728
+ stats["nodes_modified"] += 1 # Approximate
729
+
730
+ # Process additions
731
+ for filepath in additions:
732
+ logger.debug(f"Processing addition: {filepath}")
733
+ full_path = Path(graph.repo_path) / filepath
734
+ if full_path.exists():
735
+ # Determine language from file extension
736
+ from shotgun.codebase.core.language_config import (
737
+ get_language_config,
738
+ )
739
+
740
+ lang_config = get_language_config(full_path.suffix)
741
+ if lang_config and lang_config.name in parsers:
742
+ builder._process_single_file(full_path, lang_config.name)
743
+ stats["nodes_added"] += 1 # Approximate
744
+
745
+ # Flush all pending operations
746
+ ingestor.flush_all()
747
+
748
+ # Update graph metadata
749
+ current_time = int(time.time())
750
+ conn.execute(
751
+ """
752
+ MATCH (p:Project {name: $name})
753
+ SET p.updated_at = $time
754
+ """,
755
+ {"name": graph.name, "time": current_time},
756
+ )
757
+
758
+ stats["update_time_ms"] = int((time.time() - start_time) * 1000)
759
+ stats["files_skipped"] = skipped_count
760
+ logger.info(f"Incremental update complete for graph {graph_id}: {stats}")
761
+ return stats
762
+
763
+ async def _update_graph_impl(
764
+ self, graph_id: str, changes: list[FileChange] | None = None
765
+ ) -> dict[str, Any]:
766
+ """Internal implementation of graph update (runs in background)."""
767
+ operation_id = str(uuid.uuid4())
768
+ start_time = time.time()
769
+
770
+ # Create operation stats
771
+ operation_stats = OperationStats(
772
+ operation_type="update",
773
+ started_at=start_time,
774
+ completed_at=None,
775
+ success=False,
776
+ error=None,
777
+ stats={},
778
+ )
779
+
780
+ try:
781
+ # Update status to UPDATING
782
+ await self._update_graph_status(
783
+ graph_id, GraphStatus.UPDATING, operation_id
784
+ )
785
+
786
+ # Do the actual update work
787
+ if changes is None:
788
+ stats = await self.update_graph_incremental(graph_id)
789
+ else:
790
+ stats = await self.update_graph(graph_id, changes)
791
+
792
+ # Update operation stats
793
+ operation_stats.completed_at = time.time()
794
+ operation_stats.success = True
795
+ operation_stats.stats = stats
796
+
797
+ # Update status to READY
798
+ await self._update_graph_status(graph_id, GraphStatus.READY, None)
799
+
800
+ # Store operation stats
801
+ await self._store_operation_stats(graph_id, operation_stats)
802
+
803
+ return stats
804
+
805
+ except Exception as e:
806
+ # Update operation stats with error
807
+ operation_stats.completed_at = time.time()
808
+ operation_stats.success = False
809
+ operation_stats.error = str(e)
810
+ operation_stats.stats["update_time_ms"] = (time.time() - start_time) * 1000
811
+
812
+ # Update status to ERROR
813
+ await self._update_graph_status(graph_id, GraphStatus.ERROR, None)
814
+
815
+ # Store operation stats
816
+ await self._store_operation_stats(graph_id, operation_stats)
817
+
818
+ logger.error(f"Update failed for graph {graph_id}: {e}")
819
+ raise
820
+ finally:
821
+ # Clean up operation tracking
822
+ if graph_id in self._operations:
823
+ del self._operations[graph_id]
824
+
825
+ async def get_operation_status(self, graph_id: str) -> dict[str, Any]:
826
+ """Get the current operation status for a graph.
827
+
828
+ Args:
829
+ graph_id: Graph ID to check
830
+
831
+ Returns:
832
+ Dictionary with status information
833
+
834
+ Raises:
835
+ ValueError: If graph not found
836
+ """
837
+ graph = await self.get_graph(graph_id)
838
+ if not graph:
839
+ raise ValueError(f"Graph {graph_id} not found")
840
+
841
+ # Build response
842
+ response: dict[str, Any] = {
843
+ "graph_id": graph_id,
844
+ "status": graph.status.value,
845
+ "current_operation_id": graph.current_operation_id,
846
+ }
847
+
848
+ # Add last operation details if available
849
+ if graph.last_operation:
850
+ response["last_operation"] = {
851
+ "operation_type": graph.last_operation.operation_type,
852
+ "started_at": graph.last_operation.started_at,
853
+ "completed_at": graph.last_operation.completed_at,
854
+ "success": graph.last_operation.success,
855
+ "error": graph.last_operation.error,
856
+ "stats": graph.last_operation.stats,
857
+ }
858
+
859
+ # Check if there's an active operation
860
+ if graph_id in self._operations:
861
+ task = self._operations[graph_id]
862
+ if not task.done():
863
+ response["operation_in_progress"] = True
864
+ else:
865
+ # Operation finished but not cleaned up yet
866
+ response["operation_in_progress"] = False
867
+ # Try to get the result or exception
868
+ try:
869
+ task.result()
870
+ except Exception as e:
871
+ response["operation_error"] = str(e)
872
+ else:
873
+ response["operation_in_progress"] = False
874
+
875
+ return response
876
+
877
+ async def update_graph_async(
878
+ self, graph_id: str, changes: list[FileChange] | None = None
879
+ ) -> str:
880
+ """Start updating a graph asynchronously.
881
+
882
+ Returns:
883
+ Operation ID
884
+ """
885
+ # Check if graph exists
886
+ graph = await self.get_graph(graph_id)
887
+ if not graph:
888
+ raise ValueError(f"Graph {graph_id} not found")
889
+
890
+ # Check if already updating
891
+ if graph_id in self._operations:
892
+ raise ValueError(f"Graph {graph_id} is already being updated.")
893
+
894
+ # Start the update operation in background
895
+ task = asyncio.create_task(self._update_graph_impl(graph_id, changes))
896
+ self._operations[graph_id] = task
897
+
898
+ return graph_id
899
+
900
+ async def start_watcher(
901
+ self,
902
+ graph_id: str,
903
+ callback: Callable[[str, list[FileChange]], Awaitable[None]] | None = None,
904
+ patterns: list[str] | None = None,
905
+ ignore_patterns: list[str] | None = None,
906
+ ) -> None:
907
+ """Start watching repository for changes.
908
+
909
+ Args:
910
+ graph_id: Graph to watch
911
+ callback: Async callback for changes
912
+ patterns: File patterns to watch
913
+ ignore_patterns: Patterns to ignore
914
+ """
915
+ graph = await self.get_graph(graph_id)
916
+ if not graph:
917
+ raise ValueError(f"Graph {graph_id} not found")
918
+
919
+ lock = await self._get_lock()
920
+ async with lock:
921
+ if graph_id in self._watchers:
922
+ logger.warning(f"Watcher already running - graph_id: {graph_id}")
923
+ return
924
+
925
+ # Get current event loop for thread-safe async calls
926
+ loop = asyncio.get_running_loop()
927
+
928
+ # Combine default ignore patterns with any custom ones
929
+ from shotgun.codebase.core.ingestor import IGNORE_PATTERNS
930
+
931
+ combined_ignore = IGNORE_PATTERNS.copy()
932
+ if ignore_patterns:
933
+ combined_ignore.update(ignore_patterns)
934
+
935
+ # Create handler with loop reference and ignore patterns
936
+ handler = CodebaseFileHandler(graph_id, callback, loop, combined_ignore)
937
+ self._handlers[graph_id] = handler
938
+
939
+ # Create and start observer
940
+ observer = Observer()
941
+ observer.schedule(handler, graph.repo_path, recursive=True)
942
+ observer.start()
943
+
944
+ self._watchers[graph_id] = observer
945
+
946
+ logger.info(
947
+ f"Started file watcher - graph_id: {graph_id}, repo_path: {graph.repo_path}"
948
+ )
949
+
950
+ async def stop_watcher(self, graph_id: str) -> int:
951
+ """Stop watching repository.
952
+
953
+ Args:
954
+ graph_id: Graph to stop watching
955
+
956
+ Returns:
957
+ Number of changes processed
958
+ """
959
+ lock = await self._get_lock()
960
+ async with lock:
961
+ if graph_id not in self._watchers:
962
+ logger.warning(f"No watcher running - graph_id: {graph_id}")
963
+ return 0
964
+
965
+ observer = self._watchers[graph_id]
966
+ observer.stop()
967
+ observer.join(timeout=5)
968
+
969
+ # Get change count
970
+ handler = self._handlers.get(graph_id)
971
+ change_count = len(handler.pending_changes) if handler else 0
972
+
973
+ # Clean up
974
+ del self._watchers[graph_id]
975
+ if graph_id in self._handlers:
976
+ del self._handlers[graph_id]
977
+
978
+ logger.info(
979
+ f"Stopped file watcher - graph_id: {graph_id}, changes_processed: {change_count}"
980
+ )
981
+ return change_count
982
+
983
+ async def execute_query(
984
+ self, graph_id: str, query: str, parameters: dict[str, Any] | None = None
985
+ ) -> list[dict[str, Any]]:
986
+ """Execute Cypher query on graph.
987
+
988
+ Args:
989
+ graph_id: Graph to query
990
+ query: Cypher query
991
+ parameters: Query parameters
992
+
993
+ Returns:
994
+ Query results
995
+ """
996
+ return await self._execute_query(graph_id, query, parameters)
997
+
998
+ async def _execute_query(
999
+ self, graph_id: str, query: str, parameters: dict[str, Any] | None = None
1000
+ ) -> list[dict[str, Any]]:
1001
+ """Internal query execution with connection management."""
1002
+ lock = await self._get_lock()
1003
+ async with lock:
1004
+ if graph_id not in self._connections:
1005
+ # Open connection if needed
1006
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
1007
+ if not graph_path.exists():
1008
+ raise ValueError(f"Graph {graph_id} not found")
1009
+
1010
+ db = kuzu.Database(str(graph_path))
1011
+ conn = kuzu.Connection(db)
1012
+ self._databases[graph_id] = db
1013
+ self._connections[graph_id] = conn
1014
+
1015
+ conn = self._connections[graph_id]
1016
+
1017
+ # Execute query in thread pool
1018
+ def _run_query() -> list[dict[str, Any]]:
1019
+ if parameters:
1020
+ result = conn.execute(query, parameters)
1021
+ else:
1022
+ result = conn.execute(query)
1023
+
1024
+ # Collect results
1025
+ rows = []
1026
+ columns = (
1027
+ result.get_column_names() if hasattr(result, "get_column_names") else []
1028
+ )
1029
+
1030
+ if hasattr(result, "has_next") and not isinstance(result, list):
1031
+ while result.has_next():
1032
+ row = result.get_next()
1033
+ row_dict = {}
1034
+ for i, col in enumerate(columns):
1035
+ if isinstance(row, tuple | list) and i < len(row):
1036
+ row_dict[col] = row[i]
1037
+ elif hasattr(row, col):
1038
+ row_dict[col] = getattr(row, col)
1039
+ rows.append(row_dict)
1040
+ elif isinstance(result, list):
1041
+ # Convert list of QueryResult objects to list of dicts
1042
+ for query_result in result:
1043
+ row_dict = {}
1044
+ for col in columns:
1045
+ if hasattr(query_result, col):
1046
+ row_dict[col] = getattr(query_result, col)
1047
+ rows.append(row_dict)
1048
+
1049
+ return rows
1050
+
1051
+ return await anyio.to_thread.run_sync(_run_query)
1052
+
1053
+ async def get_graph(self, graph_id: str) -> CodebaseGraph | None:
1054
+ """Get graph metadata.
1055
+
1056
+ Args:
1057
+ graph_id: Graph ID
1058
+
1059
+ Returns:
1060
+ Graph metadata or None if not found
1061
+ """
1062
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
1063
+ if not graph_path.exists():
1064
+ return None
1065
+
1066
+ # Query metadata from Project node
1067
+ try:
1068
+ results = await self._execute_query(
1069
+ graph_id,
1070
+ "MATCH (p:Project {graph_id: $graph_id}) RETURN p",
1071
+ {"graph_id": graph_id},
1072
+ )
1073
+
1074
+ if not results:
1075
+ return None
1076
+
1077
+ project = results[0]["p"]
1078
+
1079
+ # Check if watcher is active
1080
+ is_watching = graph_id in self._watchers
1081
+
1082
+ # Get language statistics
1083
+ lang_stats = await self._execute_query(
1084
+ graph_id,
1085
+ """
1086
+ MATCH (f:File)
1087
+ WHERE f.extension IS NOT NULL
1088
+ RETURN f.extension as extension, COUNT(f) as count
1089
+ """,
1090
+ )
1091
+
1092
+ language_stats = {}
1093
+ if lang_stats:
1094
+ for row in lang_stats:
1095
+ ext = row.get("extension", "").lower()
1096
+ if ext:
1097
+ # Map extensions to languages
1098
+ lang_map = {
1099
+ ".py": "Python",
1100
+ ".js": "JavaScript",
1101
+ ".ts": "TypeScript",
1102
+ ".go": "Go",
1103
+ ".rs": "Rust",
1104
+ ".java": "Java",
1105
+ ".cpp": "C++",
1106
+ ".c": "C",
1107
+ ".cs": "C#",
1108
+ ".rb": "Ruby",
1109
+ }
1110
+ lang = lang_map.get(ext, ext)
1111
+ language_stats[lang] = row.get("count", 0)
1112
+
1113
+ # Get counts dynamically
1114
+ node_count = await self._execute_query(
1115
+ graph_id, "MATCH (n) RETURN COUNT(n) as count"
1116
+ )
1117
+ relationship_count = await self._execute_query(
1118
+ graph_id, "MATCH ()-[r]->() RETURN COUNT(r) as count"
1119
+ )
1120
+
1121
+ # Get detailed statistics
1122
+ node_stats, relationship_stats = await self._get_graph_statistics(graph_id)
1123
+
1124
+ # Parse status
1125
+ status_str = project.get("status", GraphStatus.READY.value)
1126
+ try:
1127
+ status = GraphStatus(status_str)
1128
+ except ValueError:
1129
+ status = GraphStatus.READY
1130
+
1131
+ # Parse last operation
1132
+ last_operation = None
1133
+ last_op_str = project.get("last_operation")
1134
+ if last_op_str:
1135
+ try:
1136
+ last_op_data = json.loads(last_op_str)
1137
+ last_operation = OperationStats(**last_op_data)
1138
+ except Exception as e:
1139
+ logger.debug(f"Failed to parse last operation stats: {e}")
1140
+ last_operation = None
1141
+
1142
+ return CodebaseGraph(
1143
+ graph_id=graph_id,
1144
+ repo_path=project.get("repo_path", ""),
1145
+ graph_path=str(graph_path),
1146
+ name=project.get("name", ""),
1147
+ created_at=float(project.get("created_at", 0)),
1148
+ updated_at=float(project.get("updated_at", 0)),
1149
+ schema_version=project.get("schema_version", "1.0.0"),
1150
+ build_options=json.loads(project.get("build_options", "{}")),
1151
+ node_count=node_count[0]["count"] if node_count else 0,
1152
+ relationship_count=relationship_count[0]["count"]
1153
+ if relationship_count
1154
+ else 0,
1155
+ node_stats=node_stats,
1156
+ relationship_stats=relationship_stats,
1157
+ language_stats=language_stats,
1158
+ is_watching=is_watching,
1159
+ status=status,
1160
+ last_operation=last_operation,
1161
+ current_operation_id=project.get("current_operation_id"),
1162
+ )
1163
+ except Exception as e:
1164
+ logger.error(
1165
+ f"Failed to get graph metadata - graph_id: {graph_id}, error: {str(e)}"
1166
+ )
1167
+ return None
1168
+
1169
+ async def list_graphs(self) -> list[CodebaseGraph]:
1170
+ """List all available graphs.
1171
+
1172
+ Returns:
1173
+ List of graph metadata
1174
+ """
1175
+ graphs = []
1176
+
1177
+ # Find all .kuzu files
1178
+ for path in self.storage_dir.glob("*.kuzu"):
1179
+ if path.is_file():
1180
+ graph_id = path.stem
1181
+ graph = await self.get_graph(graph_id)
1182
+ if graph:
1183
+ graphs.append(graph)
1184
+
1185
+ return sorted(graphs, key=lambda g: g.updated_at, reverse=True)
1186
+
1187
+ async def delete_graph(self, graph_id: str) -> None:
1188
+ """Delete a graph.
1189
+
1190
+ Args:
1191
+ graph_id: Graph to delete
1192
+ """
1193
+ # Stop watcher if running
1194
+ if graph_id in self._watchers:
1195
+ await self.stop_watcher(graph_id)
1196
+
1197
+ # Close connections
1198
+ lock = await self._get_lock()
1199
+ async with lock:
1200
+ if graph_id in self._connections:
1201
+ self._connections[graph_id].close()
1202
+ del self._connections[graph_id]
1203
+ if graph_id in self._databases:
1204
+ self._databases[graph_id].close()
1205
+ del self._databases[graph_id]
1206
+
1207
+ # Delete files
1208
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
1209
+ if graph_path.exists():
1210
+ # Delete the database file
1211
+ await anyio.to_thread.run_sync(graph_path.unlink)
1212
+
1213
+ # Also delete the WAL file if it exists
1214
+ wal_path = self.storage_dir / f"{graph_id}.kuzu.wal"
1215
+ if wal_path.exists():
1216
+ await anyio.to_thread.run_sync(wal_path.unlink)
1217
+
1218
+ logger.info(f"Deleted graph - graph_id: {graph_id}")
1219
+
1220
+ async def _get_graph_statistics(
1221
+ self, graph_id: str
1222
+ ) -> tuple[dict[str, int], dict[str, int]]:
1223
+ """Get detailed statistics about the graph.
1224
+
1225
+ Returns:
1226
+ Tuple of (node_stats, relationship_stats)
1227
+ """
1228
+ node_stats = {}
1229
+
1230
+ # Count each node type
1231
+ node_types = [
1232
+ "Project",
1233
+ "Package",
1234
+ "Module",
1235
+ "Class",
1236
+ "Function",
1237
+ "Method",
1238
+ "File",
1239
+ "Folder",
1240
+ "FileMetadata",
1241
+ "DeletionLog",
1242
+ ]
1243
+
1244
+ for node_type in node_types:
1245
+ try:
1246
+ result = await self._execute_query(
1247
+ graph_id, f"MATCH (n:{node_type}) RETURN COUNT(n) as count"
1248
+ )
1249
+ count = result[0]["count"] if result else 0
1250
+ if count > 0:
1251
+ node_stats[node_type] = count
1252
+ except Exception as e:
1253
+ logger.debug(f"Failed to count {node_type} nodes: {e}")
1254
+
1255
+ # Count relationships - need to handle multiple tables for each type
1256
+ rel_counts = {}
1257
+
1258
+ # CONTAINS relationships
1259
+ for prefix in [
1260
+ "CONTAINS_PACKAGE",
1261
+ "CONTAINS_FOLDER",
1262
+ "CONTAINS_FILE",
1263
+ "CONTAINS_MODULE",
1264
+ ]:
1265
+ count = 0
1266
+ for suffix in ["", "_PKG", "_FOLDER"]:
1267
+ table = f"{prefix}{suffix}"
1268
+ try:
1269
+ result = await self._execute_query(
1270
+ graph_id, f"MATCH ()-[r:{table}]->() RETURN COUNT(r) as count"
1271
+ )
1272
+ if result:
1273
+ count += result[0]["count"]
1274
+ except Exception as e:
1275
+ logger.debug(f"Failed to count {table} relationships: {e}")
1276
+ if count > 0:
1277
+ rel_counts[prefix] = count
1278
+
1279
+ # Other relationships
1280
+ for rel_type in [
1281
+ "DEFINES",
1282
+ "DEFINES_FUNC",
1283
+ "DEFINES_METHOD",
1284
+ "INHERITS",
1285
+ "OVERRIDES",
1286
+ "DEPENDS_ON_EXTERNAL",
1287
+ "IMPORTS",
1288
+ ]:
1289
+ try:
1290
+ result = await self._execute_query(
1291
+ graph_id, f"MATCH ()-[r:{rel_type}]->() RETURN COUNT(r) as count"
1292
+ )
1293
+ if result and result[0]["count"] > 0:
1294
+ rel_counts[rel_type] = result[0]["count"]
1295
+ except Exception as e:
1296
+ logger.debug(f"Failed to count {rel_type} relationships: {e}")
1297
+
1298
+ # CALLS relationships (multiple tables)
1299
+ calls_count = 0
1300
+ for table in ["CALLS", "CALLS_FM", "CALLS_MF", "CALLS_MM"]:
1301
+ try:
1302
+ result = await self._execute_query(
1303
+ graph_id, f"MATCH ()-[r:{table}]->() RETURN COUNT(r) as count"
1304
+ )
1305
+ if result:
1306
+ calls_count += result[0]["count"]
1307
+ except Exception as e:
1308
+ logger.debug(f"Failed to count {table} relationships: {e}")
1309
+ if calls_count > 0:
1310
+ rel_counts["CALLS (total)"] = calls_count
1311
+
1312
+ # TRACKS relationships
1313
+ tracks_count = 0
1314
+ for entity in ["Module", "Class", "Function", "Method"]:
1315
+ try:
1316
+ result = await self._execute_query(
1317
+ graph_id,
1318
+ f"MATCH ()-[r:TRACKS_{entity}]->() RETURN COUNT(r) as count",
1319
+ )
1320
+ if result:
1321
+ tracks_count += result[0]["count"]
1322
+ except Exception as e:
1323
+ logger.debug(f"Failed to count TRACKS_{entity} relationships: {e}")
1324
+ if tracks_count > 0:
1325
+ rel_counts["TRACKS (total)"] = tracks_count
1326
+
1327
+ return node_stats, rel_counts
1328
+
1329
+ async def _print_graph_statistics(self, graph_id: str) -> None:
1330
+ """Print detailed statistics about the graph."""
1331
+ logger.info("\n=== Graph Statistics ===")
1332
+
1333
+ node_stats, rel_stats = await self._get_graph_statistics(graph_id)
1334
+
1335
+ # Print node stats
1336
+ for node_type in [
1337
+ "Project",
1338
+ "Package",
1339
+ "Module",
1340
+ "Class",
1341
+ "Function",
1342
+ "Method",
1343
+ "File",
1344
+ "Folder",
1345
+ "FileMetadata",
1346
+ "DeletionLog",
1347
+ ]:
1348
+ count = node_stats.get(node_type, 0)
1349
+ logger.info(f"{node_type}: {count}")
1350
+
1351
+ logger.info("\nRelationship counts:")
1352
+ for rel_type, count in sorted(rel_stats.items()):
1353
+ logger.info(f"{rel_type}: {count}")
1354
+
1355
+ async def _build_graph_impl(
1356
+ self,
1357
+ graph_id: str,
1358
+ repo_path: str,
1359
+ name: str,
1360
+ languages: list[str] | None,
1361
+ exclude_patterns: list[str] | None,
1362
+ ) -> CodebaseGraph:
1363
+ """Internal implementation of graph building (runs in background)."""
1364
+ operation_id = str(uuid.uuid4())
1365
+ start_time = time.time()
1366
+
1367
+ # Create operation stats
1368
+ operation_stats = OperationStats(
1369
+ operation_type="build",
1370
+ started_at=start_time,
1371
+ completed_at=None,
1372
+ success=False,
1373
+ error=None,
1374
+ stats={},
1375
+ )
1376
+
1377
+ try:
1378
+ # Update status to BUILDING
1379
+ await self._update_graph_status(
1380
+ graph_id, GraphStatus.BUILDING, operation_id
1381
+ )
1382
+
1383
+ # Do the actual build work
1384
+ graph = await self._do_build_graph(
1385
+ graph_id, repo_path, name, languages, exclude_patterns
1386
+ )
1387
+
1388
+ # Update operation stats
1389
+ operation_stats.completed_at = time.time()
1390
+ operation_stats.success = True
1391
+ operation_stats.stats = {
1392
+ "node_count": graph.node_count,
1393
+ "relationship_count": graph.relationship_count,
1394
+ "language_stats": graph.language_stats,
1395
+ "build_time_ms": (time.time() - start_time) * 1000,
1396
+ }
1397
+
1398
+ # Update status to READY
1399
+ await self._update_graph_status(graph_id, GraphStatus.READY, None)
1400
+
1401
+ # Store operation stats
1402
+ await self._store_operation_stats(graph_id, operation_stats)
1403
+
1404
+ return graph
1405
+
1406
+ except Exception as e:
1407
+ # Update operation stats with error
1408
+ operation_stats.completed_at = time.time()
1409
+ operation_stats.success = False
1410
+ operation_stats.error = str(e)
1411
+ operation_stats.stats["build_time_ms"] = (time.time() - start_time) * 1000
1412
+
1413
+ # Update status to ERROR
1414
+ await self._update_graph_status(graph_id, GraphStatus.ERROR, None)
1415
+
1416
+ # Store operation stats
1417
+ await self._store_operation_stats(graph_id, operation_stats)
1418
+
1419
+ logger.error(f"Build failed for graph {graph_id}: {e}")
1420
+ raise
1421
+ finally:
1422
+ # Clean up operation tracking
1423
+ if graph_id in self._operations:
1424
+ del self._operations[graph_id]
1425
+
1426
+ async def _do_build_graph(
1427
+ self,
1428
+ graph_id: str,
1429
+ repo_path: str,
1430
+ name: str,
1431
+ languages: list[str] | None,
1432
+ exclude_patterns: list[str] | None,
1433
+ ) -> CodebaseGraph:
1434
+ """Execute the actual graph building logic (extracted from original build_graph)."""
1435
+ # The database and Project node already exist from _initialize_graph_metadata
1436
+
1437
+ # Get existing connection
1438
+ lock = await self._get_lock()
1439
+ async with lock:
1440
+ if graph_id not in self._connections:
1441
+ raise RuntimeError(f"Connection not found for graph {graph_id}")
1442
+ conn = self._connections[graph_id]
1443
+
1444
+ # Import the builder from local core module
1445
+
1446
+ # Build the graph
1447
+ logger.info(
1448
+ f"Building code graph - graph_id: {graph_id}, repo_path: {repo_path}"
1449
+ )
1450
+
1451
+ # Build the graph using our existing connection
1452
+ def _build_graph() -> None:
1453
+ from shotgun.codebase.core import Ingestor, SimpleGraphBuilder
1454
+ from shotgun.codebase.core.parser_loader import load_parsers
1455
+
1456
+ # Load parsers for requested languages
1457
+ parsers, queries = load_parsers()
1458
+
1459
+ # Log available parsers before filtering
1460
+ logger.info(f"Available parsers: {list(parsers.keys())}")
1461
+
1462
+ # Filter parsers to requested languages if specified
1463
+ if languages:
1464
+ parsers = {
1465
+ lang: parser
1466
+ for lang, parser in parsers.items()
1467
+ if lang in languages
1468
+ }
1469
+ queries = {
1470
+ lang: query for lang, query in queries.items() if lang in languages
1471
+ }
1472
+ logger.info(
1473
+ f"Filtered parsers to requested languages {languages}: {list(parsers.keys())}"
1474
+ )
1475
+ else:
1476
+ logger.info(f"Using all available parsers: {list(parsers.keys())}")
1477
+
1478
+ # Create ingestor with existing connection
1479
+ ingestor = Ingestor(conn)
1480
+
1481
+ # Create builder
1482
+ builder = SimpleGraphBuilder(
1483
+ ingestor=ingestor,
1484
+ repo_path=Path(repo_path),
1485
+ parsers=parsers,
1486
+ queries=queries,
1487
+ exclude_patterns=exclude_patterns,
1488
+ )
1489
+
1490
+ # Build the graph
1491
+ builder.run()
1492
+
1493
+ # Run build in thread pool
1494
+ await anyio.to_thread.run_sync(_build_graph)
1495
+
1496
+ # Now print detailed statistics (will include Project: 1)
1497
+ await self._print_graph_statistics(graph_id)
1498
+
1499
+ # Get the updated graph metadata
1500
+ graph = await self.get_graph(graph_id)
1501
+ if not graph:
1502
+ raise RuntimeError(f"Failed to retrieve graph {graph_id} after build")
1503
+
1504
+ return graph
1505
+
1506
+ async def build_graph_async(
1507
+ self,
1508
+ repo_path: str,
1509
+ name: str | None = None,
1510
+ languages: list[str] | None = None,
1511
+ exclude_patterns: list[str] | None = None,
1512
+ ) -> str:
1513
+ """Start building a new code knowledge graph asynchronously.
1514
+
1515
+ Returns:
1516
+ Graph ID of the graph being built
1517
+ """
1518
+ repo_path = str(Path(repo_path).resolve())
1519
+ graph_id = self._generate_graph_id(repo_path)
1520
+
1521
+ # Use repository name as default name
1522
+ if not name:
1523
+ name = Path(repo_path).name
1524
+
1525
+ # Check if graph already exists
1526
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
1527
+ if graph_path.exists():
1528
+ raise ValueError(
1529
+ f"Graph already exists for {repo_path}. Use update_graph() to modify it."
1530
+ )
1531
+
1532
+ # Check if already building
1533
+ if graph_id in self._operations:
1534
+ raise ValueError(f"Graph {graph_id} is already being built.")
1535
+
1536
+ # Create the database and initial Project node immediately
1537
+ # This allows status tracking during the build
1538
+ await self._initialize_graph_metadata(
1539
+ graph_id=graph_id,
1540
+ repo_path=repo_path,
1541
+ name=name,
1542
+ languages=languages,
1543
+ exclude_patterns=exclude_patterns,
1544
+ )
1545
+
1546
+ # Start the build operation in background
1547
+ task = asyncio.create_task(
1548
+ self._build_graph_impl(
1549
+ graph_id, repo_path, name, languages, exclude_patterns
1550
+ )
1551
+ )
1552
+ self._operations[graph_id] = task
1553
+
1554
+ return graph_id