shotgun-sh 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of shotgun-sh might be problematic. Click here for more details.

Files changed (130) hide show
  1. shotgun/__init__.py +5 -0
  2. shotgun/agents/__init__.py +1 -0
  3. shotgun/agents/agent_manager.py +651 -0
  4. shotgun/agents/common.py +549 -0
  5. shotgun/agents/config/__init__.py +13 -0
  6. shotgun/agents/config/constants.py +17 -0
  7. shotgun/agents/config/manager.py +294 -0
  8. shotgun/agents/config/models.py +185 -0
  9. shotgun/agents/config/provider.py +206 -0
  10. shotgun/agents/conversation_history.py +106 -0
  11. shotgun/agents/conversation_manager.py +105 -0
  12. shotgun/agents/export.py +96 -0
  13. shotgun/agents/history/__init__.py +5 -0
  14. shotgun/agents/history/compaction.py +85 -0
  15. shotgun/agents/history/constants.py +19 -0
  16. shotgun/agents/history/context_extraction.py +108 -0
  17. shotgun/agents/history/history_building.py +104 -0
  18. shotgun/agents/history/history_processors.py +426 -0
  19. shotgun/agents/history/message_utils.py +84 -0
  20. shotgun/agents/history/token_counting.py +429 -0
  21. shotgun/agents/history/token_estimation.py +138 -0
  22. shotgun/agents/messages.py +35 -0
  23. shotgun/agents/models.py +275 -0
  24. shotgun/agents/plan.py +98 -0
  25. shotgun/agents/research.py +108 -0
  26. shotgun/agents/specify.py +98 -0
  27. shotgun/agents/tasks.py +96 -0
  28. shotgun/agents/tools/__init__.py +34 -0
  29. shotgun/agents/tools/codebase/__init__.py +28 -0
  30. shotgun/agents/tools/codebase/codebase_shell.py +256 -0
  31. shotgun/agents/tools/codebase/directory_lister.py +141 -0
  32. shotgun/agents/tools/codebase/file_read.py +144 -0
  33. shotgun/agents/tools/codebase/models.py +252 -0
  34. shotgun/agents/tools/codebase/query_graph.py +67 -0
  35. shotgun/agents/tools/codebase/retrieve_code.py +81 -0
  36. shotgun/agents/tools/file_management.py +218 -0
  37. shotgun/agents/tools/user_interaction.py +37 -0
  38. shotgun/agents/tools/web_search/__init__.py +60 -0
  39. shotgun/agents/tools/web_search/anthropic.py +144 -0
  40. shotgun/agents/tools/web_search/gemini.py +85 -0
  41. shotgun/agents/tools/web_search/openai.py +98 -0
  42. shotgun/agents/tools/web_search/utils.py +20 -0
  43. shotgun/build_constants.py +20 -0
  44. shotgun/cli/__init__.py +1 -0
  45. shotgun/cli/codebase/__init__.py +5 -0
  46. shotgun/cli/codebase/commands.py +202 -0
  47. shotgun/cli/codebase/models.py +21 -0
  48. shotgun/cli/config.py +275 -0
  49. shotgun/cli/export.py +81 -0
  50. shotgun/cli/models.py +10 -0
  51. shotgun/cli/plan.py +73 -0
  52. shotgun/cli/research.py +85 -0
  53. shotgun/cli/specify.py +69 -0
  54. shotgun/cli/tasks.py +78 -0
  55. shotgun/cli/update.py +152 -0
  56. shotgun/cli/utils.py +25 -0
  57. shotgun/codebase/__init__.py +12 -0
  58. shotgun/codebase/core/__init__.py +46 -0
  59. shotgun/codebase/core/change_detector.py +358 -0
  60. shotgun/codebase/core/code_retrieval.py +243 -0
  61. shotgun/codebase/core/ingestor.py +1497 -0
  62. shotgun/codebase/core/language_config.py +297 -0
  63. shotgun/codebase/core/manager.py +1662 -0
  64. shotgun/codebase/core/nl_query.py +331 -0
  65. shotgun/codebase/core/parser_loader.py +128 -0
  66. shotgun/codebase/models.py +111 -0
  67. shotgun/codebase/service.py +206 -0
  68. shotgun/logging_config.py +227 -0
  69. shotgun/main.py +167 -0
  70. shotgun/posthog_telemetry.py +158 -0
  71. shotgun/prompts/__init__.py +5 -0
  72. shotgun/prompts/agents/__init__.py +1 -0
  73. shotgun/prompts/agents/export.j2 +350 -0
  74. shotgun/prompts/agents/partials/codebase_understanding.j2 +87 -0
  75. shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +37 -0
  76. shotgun/prompts/agents/partials/content_formatting.j2 +65 -0
  77. shotgun/prompts/agents/partials/interactive_mode.j2 +26 -0
  78. shotgun/prompts/agents/plan.j2 +144 -0
  79. shotgun/prompts/agents/research.j2 +69 -0
  80. shotgun/prompts/agents/specify.j2 +51 -0
  81. shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +19 -0
  82. shotgun/prompts/agents/state/system_state.j2 +31 -0
  83. shotgun/prompts/agents/tasks.j2 +143 -0
  84. shotgun/prompts/codebase/__init__.py +1 -0
  85. shotgun/prompts/codebase/cypher_query_patterns.j2 +223 -0
  86. shotgun/prompts/codebase/cypher_system.j2 +28 -0
  87. shotgun/prompts/codebase/enhanced_query_context.j2 +10 -0
  88. shotgun/prompts/codebase/partials/cypher_rules.j2 +24 -0
  89. shotgun/prompts/codebase/partials/graph_schema.j2 +30 -0
  90. shotgun/prompts/codebase/partials/temporal_context.j2 +21 -0
  91. shotgun/prompts/history/__init__.py +1 -0
  92. shotgun/prompts/history/incremental_summarization.j2 +53 -0
  93. shotgun/prompts/history/summarization.j2 +46 -0
  94. shotgun/prompts/loader.py +140 -0
  95. shotgun/py.typed +0 -0
  96. shotgun/sdk/__init__.py +13 -0
  97. shotgun/sdk/codebase.py +219 -0
  98. shotgun/sdk/exceptions.py +17 -0
  99. shotgun/sdk/models.py +189 -0
  100. shotgun/sdk/services.py +23 -0
  101. shotgun/sentry_telemetry.py +87 -0
  102. shotgun/telemetry.py +93 -0
  103. shotgun/tui/__init__.py +0 -0
  104. shotgun/tui/app.py +116 -0
  105. shotgun/tui/commands/__init__.py +76 -0
  106. shotgun/tui/components/prompt_input.py +69 -0
  107. shotgun/tui/components/spinner.py +86 -0
  108. shotgun/tui/components/splash.py +25 -0
  109. shotgun/tui/components/vertical_tail.py +13 -0
  110. shotgun/tui/screens/chat.py +782 -0
  111. shotgun/tui/screens/chat.tcss +43 -0
  112. shotgun/tui/screens/chat_screen/__init__.py +0 -0
  113. shotgun/tui/screens/chat_screen/command_providers.py +219 -0
  114. shotgun/tui/screens/chat_screen/hint_message.py +40 -0
  115. shotgun/tui/screens/chat_screen/history.py +221 -0
  116. shotgun/tui/screens/directory_setup.py +113 -0
  117. shotgun/tui/screens/provider_config.py +221 -0
  118. shotgun/tui/screens/splash.py +31 -0
  119. shotgun/tui/styles.tcss +10 -0
  120. shotgun/tui/utils/__init__.py +5 -0
  121. shotgun/tui/utils/mode_progress.py +257 -0
  122. shotgun/utils/__init__.py +5 -0
  123. shotgun/utils/env_utils.py +35 -0
  124. shotgun/utils/file_system_utils.py +36 -0
  125. shotgun/utils/update_checker.py +375 -0
  126. shotgun_sh-0.1.0.dist-info/METADATA +466 -0
  127. shotgun_sh-0.1.0.dist-info/RECORD +130 -0
  128. shotgun_sh-0.1.0.dist-info/WHEEL +4 -0
  129. shotgun_sh-0.1.0.dist-info/entry_points.txt +2 -0
  130. shotgun_sh-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1662 @@
1
+ """Kuzu graph database manager for code knowledge graphs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import hashlib
7
+ import json
8
+ import time
9
+ import uuid
10
+ from collections.abc import Awaitable, Callable
11
+ from pathlib import Path
12
+ from typing import Any, ClassVar
13
+
14
+ import anyio
15
+ import kuzu
16
+ from watchdog.events import FileSystemEvent, FileSystemEventHandler
17
+ from watchdog.observers import Observer
18
+
19
+ from shotgun.codebase.models import (
20
+ CodebaseGraph,
21
+ FileChange,
22
+ GraphStatus,
23
+ OperationStats,
24
+ )
25
+ from shotgun.logging_config import get_logger
26
+
27
+ logger = get_logger(__name__)
28
+
29
+
30
+ class CodebaseAlreadyIndexedError(Exception):
31
+ """Raised when a codebase is already indexed."""
32
+
33
+ def __init__(self, repo_path: str):
34
+ self.repo_path = repo_path
35
+ super().__init__(f"Codebase already indexed: {repo_path}")
36
+
37
+
38
+ class CodebaseFileHandler(FileSystemEventHandler):
39
+ """Handles file system events for code graph updates."""
40
+
41
+ def __init__(
42
+ self,
43
+ graph_id: str,
44
+ callback: Callable[[str, list[FileChange]], Awaitable[None]] | None,
45
+ loop: asyncio.AbstractEventLoop,
46
+ ignore_patterns: set[str] | None = None,
47
+ ):
48
+ self.graph_id = graph_id
49
+ self.callback = callback
50
+ self.loop = loop
51
+ self.pending_changes: list[FileChange] = []
52
+ self._lock = anyio.Lock()
53
+ # Import default ignore patterns from ingestor
54
+ from shotgun.codebase.core.ingestor import IGNORE_PATTERNS
55
+
56
+ self.ignore_patterns = ignore_patterns or IGNORE_PATTERNS
57
+
58
+ def on_any_event(self, event: FileSystemEvent) -> None:
59
+ """Handle any file system event."""
60
+ if event.is_directory:
61
+ return
62
+
63
+ # Filter out temporary files
64
+ src_path_str = (
65
+ event.src_path.decode("utf-8")
66
+ if isinstance(event.src_path, bytes)
67
+ else event.src_path
68
+ )
69
+ path = Path(src_path_str)
70
+ filename = path.name
71
+
72
+ # Check if any parent directory should be ignored
73
+ for parent in path.parents:
74
+ if parent.name in self.ignore_patterns:
75
+ logger.debug(
76
+ f"Ignoring file in ignored directory: {parent.name} - path: {src_path_str}"
77
+ )
78
+ return
79
+
80
+ # Skip various temporary files
81
+ if any(
82
+ [
83
+ filename.startswith("."), # Hidden files
84
+ filename.endswith(".swp"), # Vim swap files
85
+ filename.endswith(".tmp"), # Generic temp files
86
+ filename.endswith("~"), # Backup files
87
+ "#" in filename, # Emacs temp files
88
+ filename.startswith("__pycache__"), # Python cache
89
+ path.suffix in [".pyc", ".pyo"], # Python compiled files
90
+ # Numeric temp files (like test_watcher_fix.py.tmp.27477.1755109972829)
91
+ any(part.isdigit() and len(part) > 4 for part in filename.split(".")),
92
+ ]
93
+ ):
94
+ logger.debug(
95
+ f"Ignoring temporary file: {filename} - event_type: {event.event_type}"
96
+ )
97
+ return
98
+
99
+ # For move events, also check destination path
100
+ dest_path_str = None
101
+ if hasattr(event, "dest_path") and event.dest_path:
102
+ dest_path_str = (
103
+ event.dest_path.decode("utf-8")
104
+ if isinstance(event.dest_path, bytes)
105
+ else event.dest_path
106
+ )
107
+ dest_path = Path(dest_path_str)
108
+ for parent in dest_path.parents:
109
+ if parent.name in self.ignore_patterns:
110
+ logger.debug(
111
+ f"Ignoring move to ignored directory: {parent.name} - dest_path: {dest_path_str}"
112
+ )
113
+ return
114
+
115
+ # Map event types
116
+ event_type_map = {
117
+ "created": "created",
118
+ "modified": "modified",
119
+ "deleted": "deleted",
120
+ "moved": "moved",
121
+ }
122
+
123
+ mapped_type = event_type_map.get(event.event_type, event.event_type)
124
+
125
+ # Log the event with type
126
+ logger.info(
127
+ f"File watcher detected {mapped_type} event - graph_id: {self.graph_id}, path: {src_path_str}, event_type: {mapped_type}"
128
+ )
129
+
130
+ change = FileChange(
131
+ event_type=mapped_type,
132
+ src_path=src_path_str,
133
+ dest_path=dest_path_str,
134
+ is_directory=event.is_directory,
135
+ )
136
+
137
+ # Queue change for batch processing
138
+ # Use asyncio.run_coroutine_threadsafe to schedule async work from watchdog thread
139
+ future = asyncio.run_coroutine_threadsafe(self._queue_change(change), self.loop)
140
+ # Handle any errors
141
+ try:
142
+ future.result(timeout=1.0) # Wait briefly to ensure it's scheduled
143
+ except Exception as e:
144
+ logger.error(
145
+ f"Failed to queue file change: {e} - graph_id: {self.graph_id}, path: {change.src_path}"
146
+ )
147
+
148
+ async def _queue_change(self, change: FileChange) -> None:
149
+ """Queue a change for processing."""
150
+ async with self._lock:
151
+ self.pending_changes.append(change)
152
+
153
+ # Trigger callback
154
+ if self.callback:
155
+ await self.callback(self.graph_id, [change])
156
+
157
+
158
+ class CodebaseGraphManager:
159
+ """Manages Kuzu code knowledge graphs with class-level connection pooling."""
160
+
161
+ # Class-level storage to ensure single connection per graph
162
+ _connections: ClassVar[dict[str, kuzu.Connection]] = {}
163
+ _databases: ClassVar[dict[str, kuzu.Database]] = {}
164
+ _watchers: ClassVar[dict[str, Any]] = {}
165
+ _handlers: ClassVar[dict[str, CodebaseFileHandler]] = {}
166
+ _lock: ClassVar[anyio.Lock | None] = None
167
+
168
+ # Operation tracking for async operations
169
+ _operations: ClassVar[dict[str, asyncio.Task[Any]]] = {}
170
+ _operation_stats: ClassVar[dict[str, OperationStats]] = {}
171
+
172
+ def __init__(self, storage_dir: Path):
173
+ """Initialize graph manager.
174
+
175
+ Args:
176
+ storage_dir: Directory to store graph databases
177
+ """
178
+ self.storage_dir = storage_dir
179
+ self.storage_dir.mkdir(parents=True, exist_ok=True)
180
+
181
+ @classmethod
182
+ async def _get_lock(cls) -> anyio.Lock:
183
+ """Get or create the class-level lock."""
184
+ if cls._lock is None:
185
+ cls._lock = anyio.Lock()
186
+ return cls._lock
187
+
188
+ @classmethod
189
+ def _generate_graph_id(cls, repo_path: str) -> str:
190
+ """Generate deterministic graph ID from repository path."""
191
+ normalized = str(Path(repo_path).resolve())
192
+ return hashlib.sha256(normalized.encode()).hexdigest()[:12]
193
+
194
+ async def _update_graph_status(
195
+ self, graph_id: str, status: GraphStatus, operation_id: str | None = None
196
+ ) -> None:
197
+ """Update the status of a graph in the database."""
198
+ try:
199
+ # First check if the Project node exists
200
+ results = await self._execute_query(
201
+ graph_id,
202
+ "MATCH (p:Project {graph_id: $graph_id}) RETURN p",
203
+ {"graph_id": graph_id},
204
+ )
205
+
206
+ if not results:
207
+ # Project node doesn't exist yet, skip update
208
+ logger.warning(
209
+ f"Project node not found for graph {graph_id}, skipping status update"
210
+ )
211
+ return
212
+
213
+ await self._execute_query(
214
+ graph_id,
215
+ """
216
+ MATCH (p:Project {graph_id: $graph_id})
217
+ SET p.status = $status, p.current_operation_id = $operation_id
218
+ """,
219
+ {
220
+ "graph_id": graph_id,
221
+ "status": status.value,
222
+ "operation_id": operation_id,
223
+ },
224
+ )
225
+ except Exception as e:
226
+ logger.error(
227
+ f"Failed to update graph status: {e} - graph_id: {graph_id}, status: {status}"
228
+ )
229
+
230
+ async def _store_operation_stats(
231
+ self, graph_id: str, stats: OperationStats
232
+ ) -> None:
233
+ """Store operation statistics in the database."""
234
+ try:
235
+ await self._execute_query(
236
+ graph_id,
237
+ """
238
+ MATCH (p:Project {graph_id: $graph_id})
239
+ SET p.last_operation = $stats
240
+ """,
241
+ {"graph_id": graph_id, "stats": stats.model_dump_json()},
242
+ )
243
+ # Also store in memory for quick access
244
+ self._operation_stats[graph_id] = stats
245
+ except Exception as e:
246
+ logger.error(f"Failed to store operation stats: {e} - graph_id: {graph_id}")
247
+
248
+ async def _initialize_graph_metadata(
249
+ self,
250
+ graph_id: str,
251
+ repo_path: str,
252
+ name: str,
253
+ languages: list[str] | None,
254
+ exclude_patterns: list[str] | None,
255
+ indexed_from_cwd: str | None = None,
256
+ ) -> None:
257
+ """Initialize the graph database and create initial metadata.
258
+
259
+ This creates the database and Project node immediately so that
260
+ status can be tracked during the build process.
261
+ """
262
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
263
+
264
+ # Create database and connection
265
+ lock = await self._get_lock()
266
+ async with lock:
267
+ db = kuzu.Database(str(graph_path))
268
+ conn = kuzu.Connection(db)
269
+ self._databases[graph_id] = db
270
+ self._connections[graph_id] = conn
271
+
272
+ # Create the schema
273
+ from shotgun.codebase.core import Ingestor
274
+
275
+ def _create_schema() -> None:
276
+ ingestor = Ingestor(conn)
277
+ ingestor.create_schema()
278
+
279
+ await anyio.to_thread.run_sync(_create_schema)
280
+
281
+ # Create initial Project node with BUILDING status
282
+ await self._execute_query(
283
+ graph_id,
284
+ """
285
+ CREATE (p:Project {
286
+ name: $name,
287
+ repo_path: $repo_path,
288
+ graph_id: $graph_id,
289
+ created_at: $created_at,
290
+ updated_at: $updated_at,
291
+ schema_version: $schema_version,
292
+ build_options: $build_options,
293
+ status: $status,
294
+ current_operation_id: $current_operation_id,
295
+ last_operation: $last_operation,
296
+ node_count: 0,
297
+ relationship_count: 0,
298
+ stats_updated_at: $stats_updated_at,
299
+ indexed_from_cwds: $indexed_from_cwds
300
+ })
301
+ """,
302
+ {
303
+ "name": name,
304
+ "repo_path": repo_path,
305
+ "graph_id": graph_id,
306
+ "created_at": int(time.time()),
307
+ "updated_at": int(time.time()),
308
+ "schema_version": "1.0.0",
309
+ "build_options": json.dumps(
310
+ {"languages": languages, "exclude_patterns": exclude_patterns}
311
+ ),
312
+ "status": GraphStatus.BUILDING.value,
313
+ "current_operation_id": None,
314
+ "last_operation": None,
315
+ "stats_updated_at": int(time.time()),
316
+ "indexed_from_cwds": json.dumps(
317
+ [indexed_from_cwd] if indexed_from_cwd else []
318
+ ),
319
+ },
320
+ )
321
+
322
+ # Ensure the Project node is committed
323
+ logger.info(f"Created initial Project node for graph {graph_id}")
324
+
325
+ async def build_graph(
326
+ self,
327
+ repo_path: str,
328
+ name: str | None = None,
329
+ languages: list[str] | None = None,
330
+ exclude_patterns: list[str] | None = None,
331
+ indexed_from_cwd: str | None = None,
332
+ ) -> CodebaseGraph:
333
+ """Build a new code knowledge graph.
334
+
335
+ Args:
336
+ repo_path: Path to repository
337
+ name: Optional human-readable name
338
+ languages: Languages to parse (default: all supported)
339
+ exclude_patterns: Patterns to exclude
340
+
341
+ Returns:
342
+ Created graph metadata
343
+ """
344
+ repo_path = str(Path(repo_path).resolve())
345
+ graph_id = self._generate_graph_id(repo_path)
346
+
347
+ # Use repository name as default name
348
+ if not name:
349
+ name = Path(repo_path).name
350
+
351
+ # Determine graph path
352
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
353
+
354
+ # Check if graph already exists
355
+ if graph_path.exists():
356
+ raise CodebaseAlreadyIndexedError(repo_path)
357
+
358
+ # Import the builder from local core module
359
+ from shotgun.codebase.core import CodebaseIngestor
360
+
361
+ # Build the graph
362
+ logger.info(
363
+ f"Building code graph - graph_id: {graph_id}, repo_path: {repo_path}"
364
+ )
365
+
366
+ # Create database and connection
367
+ lock = await self._get_lock()
368
+ async with lock:
369
+ if graph_id in self._databases:
370
+ # Close existing connections
371
+ if graph_id in self._connections:
372
+ self._connections[graph_id].close()
373
+ del self._connections[graph_id]
374
+ self._databases[graph_id].close()
375
+ del self._databases[graph_id]
376
+
377
+ # Build using the local ingestor
378
+ ingestor = CodebaseIngestor(
379
+ db_path=str(graph_path),
380
+ project_name=name,
381
+ exclude_patterns=exclude_patterns or [],
382
+ )
383
+
384
+ # Run build in thread pool
385
+ await anyio.to_thread.run_sync(ingestor.build_graph_from_directory, repo_path)
386
+
387
+ # Get statistics
388
+ lock = await self._get_lock()
389
+ async with lock:
390
+ db = kuzu.Database(str(graph_path))
391
+ conn = kuzu.Connection(db)
392
+ self._databases[graph_id] = db
393
+ self._connections[graph_id] = conn
394
+
395
+ # Create Project node with metadata BEFORE printing statistics
396
+ await self._execute_query(
397
+ graph_id,
398
+ """
399
+ CREATE (p:Project {
400
+ name: $name,
401
+ repo_path: $repo_path,
402
+ graph_id: $graph_id,
403
+ created_at: $created_at,
404
+ updated_at: $updated_at,
405
+ schema_version: $schema_version,
406
+ build_options: $build_options,
407
+ indexed_from_cwds: $indexed_from_cwds
408
+ })
409
+ """,
410
+ {
411
+ "name": name,
412
+ "repo_path": repo_path,
413
+ "graph_id": graph_id,
414
+ "created_at": int(time.time()),
415
+ "updated_at": int(time.time()),
416
+ "schema_version": "1.0.0",
417
+ "build_options": json.dumps(
418
+ {"languages": languages, "exclude_patterns": exclude_patterns}
419
+ ),
420
+ "indexed_from_cwds": json.dumps(
421
+ [indexed_from_cwd] if indexed_from_cwd else []
422
+ ),
423
+ },
424
+ )
425
+
426
+ # Now print detailed statistics (will include Project: 1)
427
+ await self._print_graph_statistics(graph_id)
428
+
429
+ # Get language statistics
430
+ lang_stats = await self._execute_query(
431
+ graph_id,
432
+ """
433
+ MATCH (f:File)
434
+ WHERE f.extension IS NOT NULL
435
+ RETURN f.extension as extension, COUNT(f) as count
436
+ """,
437
+ )
438
+
439
+ language_stats = {}
440
+ if lang_stats:
441
+ for row in lang_stats:
442
+ ext = row.get("extension", "").lower()
443
+ if ext:
444
+ # Map extensions to languages
445
+ lang_map = {
446
+ ".py": "Python",
447
+ ".js": "JavaScript",
448
+ ".ts": "TypeScript",
449
+ ".go": "Go",
450
+ ".rs": "Rust",
451
+ ".java": "Java",
452
+ ".cpp": "C++",
453
+ ".c": "C",
454
+ ".cs": "C#",
455
+ ".rb": "Ruby",
456
+ }
457
+ lang = lang_map.get(ext, ext)
458
+ language_stats[lang] = row.get("count", 0)
459
+
460
+ # Get counts dynamically
461
+ node_count = await self._execute_query(
462
+ graph_id, "MATCH (n) RETURN COUNT(n) as count"
463
+ )
464
+ relationship_count = await self._execute_query(
465
+ graph_id, "MATCH ()-[r]->() RETURN COUNT(r) as count"
466
+ )
467
+
468
+ graph = CodebaseGraph(
469
+ graph_id=graph_id,
470
+ repo_path=repo_path,
471
+ graph_path=str(graph_path),
472
+ name=name,
473
+ created_at=time.time(),
474
+ updated_at=time.time(),
475
+ build_options={
476
+ "languages": languages,
477
+ "exclude_patterns": exclude_patterns,
478
+ },
479
+ node_count=node_count[0]["count"] if node_count else 0,
480
+ relationship_count=relationship_count[0]["count"]
481
+ if relationship_count
482
+ else 0,
483
+ language_stats=language_stats,
484
+ is_watching=False,
485
+ status=GraphStatus.READY,
486
+ last_operation=None,
487
+ current_operation_id=None,
488
+ indexed_from_cwds=[indexed_from_cwd] if indexed_from_cwd else [],
489
+ )
490
+
491
+ # Update status to READY
492
+ await self._update_graph_status(graph_id, GraphStatus.READY)
493
+
494
+ return graph
495
+
496
+ async def update_graph(
497
+ self, graph_id: str, changes: list[FileChange] | None = None
498
+ ) -> dict[str, Any]:
499
+ """Update graph based on file changes.
500
+
501
+ Args:
502
+ graph_id: Graph to update
503
+ changes: List of file changes (if None, will auto-detect)
504
+
505
+ Returns:
506
+ Update statistics
507
+ """
508
+ # If no changes provided, use incremental update
509
+ if changes is None:
510
+ return await self.update_graph_incremental(graph_id)
511
+
512
+ start_time = time.time()
513
+
514
+ # Get graph metadata
515
+ graph = await self.get_graph(graph_id)
516
+ if not graph:
517
+ raise ValueError(f"Graph {graph_id} not found")
518
+
519
+ # Import is already done at the top of the method
520
+
521
+ # Process changes
522
+ stats = {
523
+ "nodes_added": 0,
524
+ "nodes_removed": 0,
525
+ "relationships_added": 0,
526
+ "relationships_removed": 0,
527
+ }
528
+
529
+ lock = await self._get_lock()
530
+ async with lock:
531
+ if graph_id not in self._connections:
532
+ db = kuzu.Database(graph.graph_path)
533
+ conn = kuzu.Connection(db)
534
+ self._databases[graph_id] = db
535
+ self._connections[graph_id] = conn
536
+
537
+ # Group changes by type
538
+ for change in changes:
539
+ if change.event_type == "deleted":
540
+ # Remove nodes for deleted files
541
+ await self._execute_query(
542
+ graph_id,
543
+ "MATCH (n) WHERE n.path = $path DELETE n",
544
+ {"path": change.src_path},
545
+ )
546
+ stats["nodes_removed"] += 1
547
+ elif change.event_type in ["created", "modified"]:
548
+ # Re-parse and update the file
549
+ # This is simplified - the actual implementation would use the ingestor
550
+ logger.info(f"Updating file in graph - path: {change.src_path}")
551
+
552
+ update_time = (time.time() - start_time) * 1000
553
+
554
+ # Update metadata
555
+ await self._execute_query(
556
+ graph_id,
557
+ """
558
+ MATCH (p:Project {graph_id: $graph_id})
559
+ SET p.updated_at = $updated_at
560
+ """,
561
+ {"graph_id": graph_id, "updated_at": int(time.time())},
562
+ )
563
+
564
+ return {"update_time_ms": update_time, **stats}
565
+
566
+ async def update_graph_incremental(self, graph_id: str) -> dict[str, Any]:
567
+ """Update graph by automatically detecting changes.
568
+
569
+ Args:
570
+ graph_id: Graph to update
571
+
572
+ Returns:
573
+ Update statistics
574
+ """
575
+ start_time = time.time()
576
+
577
+ # Get graph metadata
578
+ graph = await self.get_graph(graph_id)
579
+ if not graph:
580
+ raise ValueError(f"Graph {graph_id} not found")
581
+
582
+ # Validate that the repository path still exists
583
+ repo_path = Path(graph.repo_path)
584
+ if not repo_path.exists():
585
+ logger.error(f"Repository path no longer exists: {graph.repo_path}")
586
+ raise ValueError(f"Repository path no longer exists: {graph.repo_path}")
587
+ if not repo_path.is_dir():
588
+ logger.error(f"Repository path is not a directory: {graph.repo_path}")
589
+ raise ValueError(f"Repository path is not a directory: {graph.repo_path}")
590
+
591
+ # Parse build options
592
+ build_options = graph.build_options if graph.build_options else {}
593
+
594
+ languages = build_options.get("languages")
595
+ exclude_patterns = build_options.get("exclude_patterns")
596
+
597
+ lock = await self._get_lock()
598
+ async with lock:
599
+ if graph_id not in self._connections:
600
+ db = kuzu.Database(graph.graph_path)
601
+ self._connections[graph_id] = kuzu.Connection(db)
602
+
603
+ conn = self._connections[graph_id]
604
+
605
+ # Create change detector
606
+ from shotgun.codebase.core.change_detector import ChangeDetector, ChangeType
607
+
608
+ detector = ChangeDetector(conn, Path(graph.repo_path))
609
+
610
+ # Load parsers first to know what languages we can actually process
611
+ from shotgun.codebase.core.parser_loader import load_parsers
612
+
613
+ parsers, queries = load_parsers()
614
+ available_languages = list(parsers.keys())
615
+
616
+ # If no languages were specified in build options, use all available parsers
617
+ # Otherwise, filter to intersection of requested and available languages
618
+ if languages is None or languages == []:
619
+ effective_languages = available_languages
620
+ else:
621
+ effective_languages = [
622
+ lang for lang in languages if lang in available_languages
623
+ ]
624
+
625
+ if not effective_languages:
626
+ logger.warning(
627
+ f"No parsers available for requested languages - requested: {languages}, available: {available_languages}"
628
+ )
629
+ return {
630
+ "update_time_ms": (time.time() - start_time) * 1000,
631
+ "nodes_added": 0,
632
+ "nodes_removed": 0,
633
+ "nodes_modified": 0,
634
+ "relationships_added": 0,
635
+ "relationships_removed": 0,
636
+ "files_added": 0,
637
+ "files_modified": 0,
638
+ "files_deleted": 0,
639
+ "files_skipped": 0,
640
+ }
641
+
642
+ # Log what languages we're using for update
643
+ logger.info(f"Updating graph with languages: {effective_languages}")
644
+
645
+ # Detect changes only for languages we can process
646
+ changes = detector.detect_changes(effective_languages, exclude_patterns)
647
+
648
+ # Also detect ALL changes to report on skipped files
649
+ if languages is None or (
650
+ languages and len(languages) > len(effective_languages)
651
+ ):
652
+ all_changes = detector.detect_changes(None, exclude_patterns)
653
+ skipped_count = len(all_changes) - len(changes)
654
+ if skipped_count > 0:
655
+ logger.info(
656
+ f"Skipping {skipped_count} files due to missing parsers - available_parsers: {available_languages}, requested_languages: {languages}"
657
+ )
658
+ # Log some examples of skipped files
659
+ skipped_files = set(all_changes.keys()) - set(changes.keys())
660
+ examples = list(skipped_files)[:5]
661
+ if examples:
662
+ logger.info(f"Examples of skipped files: {examples}")
663
+ else:
664
+ skipped_count = 0
665
+
666
+ if not changes:
667
+ logger.info(f"No changes detected for graph {graph_id}")
668
+ return {
669
+ "update_time_ms": (time.time() - start_time) * 1000,
670
+ "nodes_added": 0,
671
+ "nodes_removed": 0,
672
+ "nodes_modified": 0,
673
+ "relationships_added": 0,
674
+ "relationships_removed": 0,
675
+ "files_added": 0,
676
+ "files_modified": 0,
677
+ "files_deleted": 0,
678
+ "files_skipped": skipped_count,
679
+ }
680
+
681
+ logger.info(f"Processing {len(changes)} file changes for graph {graph_id}")
682
+
683
+ # Initialize stats
684
+ stats = {
685
+ "nodes_added": 0,
686
+ "nodes_removed": 0,
687
+ "nodes_modified": 0,
688
+ "relationships_added": 0,
689
+ "relationships_removed": 0,
690
+ "files_added": 0,
691
+ "files_modified": 0,
692
+ "files_deleted": 0,
693
+ "files_skipped": 0,
694
+ }
695
+
696
+ # Initialize ingestor and builder
697
+ from shotgun.codebase.core.ingestor import Ingestor, SimpleGraphBuilder
698
+
699
+ ingestor = Ingestor(conn)
700
+
701
+ builder = SimpleGraphBuilder(
702
+ ingestor, Path(graph.repo_path), parsers, queries, exclude_patterns
703
+ )
704
+
705
+ # Process changes by type
706
+ deletions = []
707
+ modifications = []
708
+ additions = []
709
+
710
+ for filepath, change_type in changes.items():
711
+ if change_type == ChangeType.DELETED:
712
+ deletions.append(filepath)
713
+ stats["files_deleted"] += 1
714
+ elif change_type == ChangeType.MODIFIED:
715
+ modifications.append(filepath)
716
+ stats["files_modified"] += 1
717
+ elif change_type == ChangeType.ADDED:
718
+ additions.append(filepath)
719
+ stats["files_added"] += 1
720
+
721
+ # Process deletions first
722
+ for filepath in deletions:
723
+ logger.debug(f"Processing deletion: {filepath}")
724
+ deletion_stats = ingestor.delete_file_nodes(filepath)
725
+ stats["nodes_removed"] += sum(deletion_stats.values())
726
+
727
+ # Process modifications (as delete + add)
728
+ for filepath in modifications:
729
+ logger.debug(f"Processing modification: {filepath}")
730
+ # Delete old nodes
731
+ deletion_stats = ingestor.delete_file_nodes(filepath)
732
+ stats["nodes_removed"] += sum(deletion_stats.values())
733
+
734
+ # Re-process the file
735
+ full_path = Path(graph.repo_path) / filepath
736
+ if full_path.exists():
737
+ # Determine language from file extension
738
+ from shotgun.codebase.core.language_config import (
739
+ get_language_config,
740
+ )
741
+
742
+ lang_config = get_language_config(full_path.suffix)
743
+ if lang_config and lang_config.name in parsers:
744
+ builder._process_single_file(full_path, lang_config.name)
745
+ stats["nodes_modified"] += 1 # Approximate
746
+
747
+ # Process additions
748
+ for filepath in additions:
749
+ logger.debug(f"Processing addition: {filepath}")
750
+ full_path = Path(graph.repo_path) / filepath
751
+ if full_path.exists():
752
+ # Determine language from file extension
753
+ from shotgun.codebase.core.language_config import (
754
+ get_language_config,
755
+ )
756
+
757
+ lang_config = get_language_config(full_path.suffix)
758
+ if lang_config and lang_config.name in parsers:
759
+ builder._process_single_file(full_path, lang_config.name)
760
+ stats["nodes_added"] += 1 # Approximate
761
+
762
+ # Flush all pending operations
763
+ ingestor.flush_all()
764
+
765
+ # Update graph metadata
766
+ current_time = int(time.time())
767
+ conn.execute(
768
+ """
769
+ MATCH (p:Project {name: $name})
770
+ SET p.updated_at = $time
771
+ """,
772
+ {"name": graph.name, "time": current_time},
773
+ )
774
+
775
+ stats["update_time_ms"] = int((time.time() - start_time) * 1000)
776
+ stats["files_skipped"] = skipped_count
777
+ logger.info(f"Incremental update complete for graph {graph_id}: {stats}")
778
+ return stats
779
+
780
+ async def _update_graph_impl(
781
+ self, graph_id: str, changes: list[FileChange] | None = None
782
+ ) -> dict[str, Any]:
783
+ """Internal implementation of graph update (runs in background)."""
784
+ operation_id = str(uuid.uuid4())
785
+ start_time = time.time()
786
+
787
+ # Create operation stats
788
+ operation_stats = OperationStats(
789
+ operation_type="update",
790
+ started_at=start_time,
791
+ completed_at=None,
792
+ success=False,
793
+ error=None,
794
+ stats={},
795
+ )
796
+
797
+ try:
798
+ # Update status to UPDATING
799
+ await self._update_graph_status(
800
+ graph_id, GraphStatus.UPDATING, operation_id
801
+ )
802
+
803
+ # Do the actual update work
804
+ if changes is None:
805
+ stats = await self.update_graph_incremental(graph_id)
806
+ else:
807
+ stats = await self.update_graph(graph_id, changes)
808
+
809
+ # Update operation stats
810
+ operation_stats.completed_at = time.time()
811
+ operation_stats.success = True
812
+ operation_stats.stats = stats
813
+
814
+ # Update status to READY
815
+ await self._update_graph_status(graph_id, GraphStatus.READY, None)
816
+
817
+ # Store operation stats
818
+ await self._store_operation_stats(graph_id, operation_stats)
819
+
820
+ return stats
821
+
822
+ except Exception as e:
823
+ # Update operation stats with error
824
+ operation_stats.completed_at = time.time()
825
+ operation_stats.success = False
826
+ operation_stats.error = str(e)
827
+ operation_stats.stats["update_time_ms"] = (time.time() - start_time) * 1000
828
+
829
+ # Update status to ERROR
830
+ await self._update_graph_status(graph_id, GraphStatus.ERROR, None)
831
+
832
+ # Store operation stats
833
+ await self._store_operation_stats(graph_id, operation_stats)
834
+
835
+ logger.error(f"Update failed for graph {graph_id}: {e}")
836
+ raise
837
+ finally:
838
+ # Clean up operation tracking
839
+ if graph_id in self._operations:
840
+ del self._operations[graph_id]
841
+
842
+ async def get_operation_status(self, graph_id: str) -> dict[str, Any]:
843
+ """Get the current operation status for a graph.
844
+
845
+ Args:
846
+ graph_id: Graph ID to check
847
+
848
+ Returns:
849
+ Dictionary with status information
850
+
851
+ Raises:
852
+ ValueError: If graph not found
853
+ """
854
+ graph = await self.get_graph(graph_id)
855
+ if not graph:
856
+ raise ValueError(f"Graph {graph_id} not found")
857
+
858
+ # Build response
859
+ response: dict[str, Any] = {
860
+ "graph_id": graph_id,
861
+ "status": graph.status.value,
862
+ "current_operation_id": graph.current_operation_id,
863
+ }
864
+
865
+ # Add last operation details if available
866
+ if graph.last_operation:
867
+ response["last_operation"] = {
868
+ "operation_type": graph.last_operation.operation_type,
869
+ "started_at": graph.last_operation.started_at,
870
+ "completed_at": graph.last_operation.completed_at,
871
+ "success": graph.last_operation.success,
872
+ "error": graph.last_operation.error,
873
+ "stats": graph.last_operation.stats,
874
+ }
875
+
876
+ # Check if there's an active operation
877
+ if graph_id in self._operations:
878
+ task = self._operations[graph_id]
879
+ if not task.done():
880
+ response["operation_in_progress"] = True
881
+ else:
882
+ # Operation finished but not cleaned up yet
883
+ response["operation_in_progress"] = False
884
+ # Try to get the result or exception
885
+ try:
886
+ task.result()
887
+ except Exception as e:
888
+ response["operation_error"] = str(e)
889
+ else:
890
+ response["operation_in_progress"] = False
891
+
892
+ return response
893
+
894
+ async def update_graph_async(
895
+ self, graph_id: str, changes: list[FileChange] | None = None
896
+ ) -> str:
897
+ """Start updating a graph asynchronously.
898
+
899
+ Returns:
900
+ Operation ID
901
+ """
902
+ # Check if graph exists
903
+ graph = await self.get_graph(graph_id)
904
+ if not graph:
905
+ raise ValueError(f"Graph {graph_id} not found")
906
+
907
+ # Check if already updating
908
+ if graph_id in self._operations:
909
+ raise ValueError(f"Graph {graph_id} is already being updated.")
910
+
911
+ # Start the update operation in background
912
+ task = asyncio.create_task(self._update_graph_impl(graph_id, changes))
913
+ self._operations[graph_id] = task
914
+
915
+ return graph_id
916
+
917
+ async def start_watcher(
918
+ self,
919
+ graph_id: str,
920
+ callback: Callable[[str, list[FileChange]], Awaitable[None]] | None = None,
921
+ patterns: list[str] | None = None,
922
+ ignore_patterns: list[str] | None = None,
923
+ ) -> None:
924
+ """Start watching repository for changes.
925
+
926
+ Args:
927
+ graph_id: Graph to watch
928
+ callback: Async callback for changes
929
+ patterns: File patterns to watch
930
+ ignore_patterns: Patterns to ignore
931
+ """
932
+ graph = await self.get_graph(graph_id)
933
+ if not graph:
934
+ raise ValueError(f"Graph {graph_id} not found")
935
+
936
+ lock = await self._get_lock()
937
+ async with lock:
938
+ if graph_id in self._watchers:
939
+ logger.warning(f"Watcher already running - graph_id: {graph_id}")
940
+ return
941
+
942
+ # Get current event loop for thread-safe async calls
943
+ loop = asyncio.get_running_loop()
944
+
945
+ # Combine default ignore patterns with any custom ones
946
+ from shotgun.codebase.core.ingestor import IGNORE_PATTERNS
947
+
948
+ combined_ignore = IGNORE_PATTERNS.copy()
949
+ if ignore_patterns:
950
+ combined_ignore.update(ignore_patterns)
951
+
952
+ # Create handler with loop reference and ignore patterns
953
+ handler = CodebaseFileHandler(graph_id, callback, loop, combined_ignore)
954
+ self._handlers[graph_id] = handler
955
+
956
+ # Create and start observer
957
+ observer = Observer()
958
+ observer.schedule(handler, graph.repo_path, recursive=True)
959
+ observer.start()
960
+
961
+ self._watchers[graph_id] = observer
962
+
963
+ logger.info(
964
+ f"Started file watcher - graph_id: {graph_id}, repo_path: {graph.repo_path}"
965
+ )
966
+
967
+ async def stop_watcher(self, graph_id: str) -> int:
968
+ """Stop watching repository.
969
+
970
+ Args:
971
+ graph_id: Graph to stop watching
972
+
973
+ Returns:
974
+ Number of changes processed
975
+ """
976
+ lock = await self._get_lock()
977
+ async with lock:
978
+ if graph_id not in self._watchers:
979
+ logger.warning(f"No watcher running - graph_id: {graph_id}")
980
+ return 0
981
+
982
+ observer = self._watchers[graph_id]
983
+ observer.stop()
984
+ observer.join(timeout=5)
985
+
986
+ # Get change count
987
+ handler = self._handlers.get(graph_id)
988
+ change_count = len(handler.pending_changes) if handler else 0
989
+
990
+ # Clean up
991
+ del self._watchers[graph_id]
992
+ if graph_id in self._handlers:
993
+ del self._handlers[graph_id]
994
+
995
+ logger.info(
996
+ f"Stopped file watcher - graph_id: {graph_id}, changes_processed: {change_count}"
997
+ )
998
+ return change_count
999
+
1000
+ async def execute_query(
1001
+ self, graph_id: str, query: str, parameters: dict[str, Any] | None = None
1002
+ ) -> list[dict[str, Any]]:
1003
+ """Execute Cypher query on graph.
1004
+
1005
+ Args:
1006
+ graph_id: Graph to query
1007
+ query: Cypher query
1008
+ parameters: Query parameters
1009
+
1010
+ Returns:
1011
+ Query results
1012
+ """
1013
+ return await self._execute_query(graph_id, query, parameters)
1014
+
1015
+ async def _execute_query(
1016
+ self, graph_id: str, query: str, parameters: dict[str, Any] | None = None
1017
+ ) -> list[dict[str, Any]]:
1018
+ """Internal query execution with connection management."""
1019
+ lock = await self._get_lock()
1020
+ async with lock:
1021
+ if graph_id not in self._connections:
1022
+ # Open connection if needed
1023
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
1024
+ if not graph_path.exists():
1025
+ raise ValueError(f"Graph {graph_id} not found")
1026
+
1027
+ db = kuzu.Database(str(graph_path))
1028
+ conn = kuzu.Connection(db)
1029
+ self._databases[graph_id] = db
1030
+ self._connections[graph_id] = conn
1031
+
1032
+ conn = self._connections[graph_id]
1033
+
1034
+ # Execute query in thread pool
1035
+ def _run_query() -> list[dict[str, Any]]:
1036
+ if parameters:
1037
+ result = conn.execute(query, parameters)
1038
+ else:
1039
+ result = conn.execute(query)
1040
+
1041
+ # Collect results
1042
+ rows = []
1043
+ columns = (
1044
+ result.get_column_names() if hasattr(result, "get_column_names") else []
1045
+ )
1046
+
1047
+ if hasattr(result, "has_next") and not isinstance(result, list):
1048
+ while result.has_next():
1049
+ row = result.get_next()
1050
+ row_dict = {}
1051
+ for i, col in enumerate(columns):
1052
+ if isinstance(row, tuple | list) and i < len(row):
1053
+ row_dict[col] = row[i]
1054
+ elif hasattr(row, col):
1055
+ row_dict[col] = getattr(row, col)
1056
+ rows.append(row_dict)
1057
+ elif isinstance(result, list):
1058
+ # Convert list of QueryResult objects to list of dicts
1059
+ for query_result in result:
1060
+ row_dict = {}
1061
+ for col in columns:
1062
+ if hasattr(query_result, col):
1063
+ row_dict[col] = getattr(query_result, col)
1064
+ rows.append(row_dict)
1065
+
1066
+ return rows
1067
+
1068
+ return await anyio.to_thread.run_sync(_run_query)
1069
+
1070
+ async def get_graph(self, graph_id: str) -> CodebaseGraph | None:
1071
+ """Get graph metadata.
1072
+
1073
+ Args:
1074
+ graph_id: Graph ID
1075
+
1076
+ Returns:
1077
+ Graph metadata or None if not found
1078
+ """
1079
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
1080
+ if not graph_path.exists():
1081
+ return None
1082
+
1083
+ # Query metadata from Project node
1084
+ try:
1085
+ results = await self._execute_query(
1086
+ graph_id,
1087
+ "MATCH (p:Project {graph_id: $graph_id}) RETURN p",
1088
+ {"graph_id": graph_id},
1089
+ )
1090
+
1091
+ if not results:
1092
+ return None
1093
+
1094
+ project = results[0]["p"]
1095
+
1096
+ # Check if watcher is active
1097
+ is_watching = graph_id in self._watchers
1098
+
1099
+ # Get language statistics
1100
+ lang_stats = await self._execute_query(
1101
+ graph_id,
1102
+ """
1103
+ MATCH (f:File)
1104
+ WHERE f.extension IS NOT NULL
1105
+ RETURN f.extension as extension, COUNT(f) as count
1106
+ """,
1107
+ )
1108
+
1109
+ language_stats = {}
1110
+ if lang_stats:
1111
+ for row in lang_stats:
1112
+ ext = row.get("extension", "").lower()
1113
+ if ext:
1114
+ # Map extensions to languages
1115
+ lang_map = {
1116
+ ".py": "Python",
1117
+ ".js": "JavaScript",
1118
+ ".ts": "TypeScript",
1119
+ ".go": "Go",
1120
+ ".rs": "Rust",
1121
+ ".java": "Java",
1122
+ ".cpp": "C++",
1123
+ ".c": "C",
1124
+ ".cs": "C#",
1125
+ ".rb": "Ruby",
1126
+ }
1127
+ lang = lang_map.get(ext, ext)
1128
+ language_stats[lang] = row.get("count", 0)
1129
+
1130
+ # Get counts dynamically
1131
+ node_count = await self._execute_query(
1132
+ graph_id, "MATCH (n) RETURN COUNT(n) as count"
1133
+ )
1134
+ relationship_count = await self._execute_query(
1135
+ graph_id, "MATCH ()-[r]->() RETURN COUNT(r) as count"
1136
+ )
1137
+
1138
+ # Get detailed statistics
1139
+ node_stats, relationship_stats = await self._get_graph_statistics(graph_id)
1140
+
1141
+ # Parse status
1142
+ status_str = project.get("status", GraphStatus.READY.value)
1143
+ try:
1144
+ status = GraphStatus(status_str)
1145
+ except ValueError:
1146
+ status = GraphStatus.READY
1147
+
1148
+ # Parse last operation
1149
+ last_operation = None
1150
+ last_op_str = project.get("last_operation")
1151
+ if last_op_str:
1152
+ try:
1153
+ last_op_data = json.loads(last_op_str)
1154
+ last_operation = OperationStats(**last_op_data)
1155
+ except Exception as e:
1156
+ logger.debug(f"Failed to parse last operation stats: {e}")
1157
+ last_operation = None
1158
+
1159
+ # Parse indexed_from_cwds - handle backward compatibility
1160
+ indexed_from_cwds_json = project.get("indexed_from_cwds", "[]")
1161
+ try:
1162
+ indexed_from_cwds = (
1163
+ json.loads(indexed_from_cwds_json) if indexed_from_cwds_json else []
1164
+ )
1165
+ except (json.JSONDecodeError, TypeError):
1166
+ indexed_from_cwds = []
1167
+
1168
+ return CodebaseGraph(
1169
+ graph_id=graph_id,
1170
+ repo_path=project.get("repo_path", ""),
1171
+ graph_path=str(graph_path),
1172
+ name=project.get("name", ""),
1173
+ created_at=float(project.get("created_at", 0)),
1174
+ updated_at=float(project.get("updated_at", 0)),
1175
+ schema_version=project.get("schema_version", "1.0.0"),
1176
+ build_options=json.loads(project.get("build_options", "{}")),
1177
+ node_count=node_count[0]["count"] if node_count else 0,
1178
+ relationship_count=relationship_count[0]["count"]
1179
+ if relationship_count
1180
+ else 0,
1181
+ node_stats=node_stats,
1182
+ relationship_stats=relationship_stats,
1183
+ language_stats=language_stats,
1184
+ is_watching=is_watching,
1185
+ status=status,
1186
+ last_operation=last_operation,
1187
+ current_operation_id=project.get("current_operation_id"),
1188
+ indexed_from_cwds=indexed_from_cwds,
1189
+ )
1190
+ except Exception as e:
1191
+ logger.error(
1192
+ f"Failed to get graph metadata - graph_id: {graph_id}, error: {str(e)}"
1193
+ )
1194
+ return None
1195
+
1196
+ async def list_graphs(self) -> list[CodebaseGraph]:
1197
+ """List all available graphs.
1198
+
1199
+ Returns:
1200
+ List of graph metadata
1201
+ """
1202
+ graphs = []
1203
+
1204
+ # Find all .kuzu files
1205
+ for path in self.storage_dir.glob("*.kuzu"):
1206
+ if path.is_file():
1207
+ graph_id = path.stem
1208
+ graph = await self.get_graph(graph_id)
1209
+ if graph:
1210
+ graphs.append(graph)
1211
+
1212
+ return sorted(graphs, key=lambda g: g.updated_at, reverse=True)
1213
+
1214
+ async def add_cwd_access(self, graph_id: str, cwd: str | None = None) -> None:
1215
+ """Add a working directory to a graph's access list.
1216
+
1217
+ Args:
1218
+ graph_id: Graph ID to update
1219
+ cwd: Working directory to add. If None, uses current working directory.
1220
+ """
1221
+ from pathlib import Path
1222
+
1223
+ if cwd is None:
1224
+ cwd = str(Path.cwd().resolve())
1225
+ else:
1226
+ cwd = str(Path(cwd).resolve())
1227
+
1228
+ # Get current graph
1229
+ graph = await self.get_graph(graph_id)
1230
+ if not graph:
1231
+ raise ValueError(f"Graph {graph_id} not found")
1232
+
1233
+ # Get current list
1234
+ current_cwds = graph.indexed_from_cwds.copy()
1235
+
1236
+ # Add new CWD if not already present
1237
+ if cwd not in current_cwds:
1238
+ current_cwds.append(cwd)
1239
+
1240
+ # Update in database
1241
+ await self._execute_query(
1242
+ graph_id,
1243
+ """
1244
+ MATCH (p:Project {graph_id: $graph_id})
1245
+ SET p.indexed_from_cwds = $indexed_from_cwds
1246
+ """,
1247
+ {
1248
+ "graph_id": graph_id,
1249
+ "indexed_from_cwds": json.dumps(current_cwds),
1250
+ },
1251
+ )
1252
+ logger.info(f"Added CWD access for {cwd} to graph {graph_id}")
1253
+
1254
+ async def remove_cwd_access(self, graph_id: str, cwd: str) -> None:
1255
+ """Remove a working directory from a graph's access list.
1256
+
1257
+ Args:
1258
+ graph_id: Graph ID to update
1259
+ cwd: Working directory to remove
1260
+ """
1261
+ from pathlib import Path
1262
+
1263
+ cwd = str(Path(cwd).resolve())
1264
+
1265
+ # Get current graph
1266
+ graph = await self.get_graph(graph_id)
1267
+ if not graph:
1268
+ raise ValueError(f"Graph {graph_id} not found")
1269
+
1270
+ # Get current list
1271
+ current_cwds = graph.indexed_from_cwds.copy()
1272
+
1273
+ # Remove CWD if present
1274
+ if cwd in current_cwds:
1275
+ current_cwds.remove(cwd)
1276
+
1277
+ # Update in database
1278
+ await self._execute_query(
1279
+ graph_id,
1280
+ """
1281
+ MATCH (p:Project {graph_id: $graph_id})
1282
+ SET p.indexed_from_cwds = $indexed_from_cwds
1283
+ """,
1284
+ {
1285
+ "graph_id": graph_id,
1286
+ "indexed_from_cwds": json.dumps(current_cwds),
1287
+ },
1288
+ )
1289
+ logger.info(f"Removed CWD access for {cwd} from graph {graph_id}")
1290
+
1291
+ async def delete_graph(self, graph_id: str) -> None:
1292
+ """Delete a graph.
1293
+
1294
+ Args:
1295
+ graph_id: Graph to delete
1296
+ """
1297
+ # Stop watcher if running
1298
+ if graph_id in self._watchers:
1299
+ await self.stop_watcher(graph_id)
1300
+
1301
+ # Close connections
1302
+ lock = await self._get_lock()
1303
+ async with lock:
1304
+ if graph_id in self._connections:
1305
+ self._connections[graph_id].close()
1306
+ del self._connections[graph_id]
1307
+ if graph_id in self._databases:
1308
+ self._databases[graph_id].close()
1309
+ del self._databases[graph_id]
1310
+
1311
+ # Delete files
1312
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
1313
+ if graph_path.exists():
1314
+ # Delete the database file
1315
+ await anyio.to_thread.run_sync(graph_path.unlink)
1316
+
1317
+ # Also delete the WAL file if it exists
1318
+ wal_path = self.storage_dir / f"{graph_id}.kuzu.wal"
1319
+ if wal_path.exists():
1320
+ await anyio.to_thread.run_sync(wal_path.unlink)
1321
+
1322
+ logger.info(f"Deleted graph - graph_id: {graph_id}")
1323
+
1324
+ async def _get_graph_statistics(
1325
+ self, graph_id: str
1326
+ ) -> tuple[dict[str, int], dict[str, int]]:
1327
+ """Get detailed statistics about the graph.
1328
+
1329
+ Returns:
1330
+ Tuple of (node_stats, relationship_stats)
1331
+ """
1332
+ node_stats = {}
1333
+
1334
+ # Count each node type
1335
+ node_types = [
1336
+ "Project",
1337
+ "Package",
1338
+ "Module",
1339
+ "Class",
1340
+ "Function",
1341
+ "Method",
1342
+ "File",
1343
+ "Folder",
1344
+ "FileMetadata",
1345
+ "DeletionLog",
1346
+ ]
1347
+
1348
+ for node_type in node_types:
1349
+ try:
1350
+ result = await self._execute_query(
1351
+ graph_id, f"MATCH (n:{node_type}) RETURN COUNT(n) as count"
1352
+ )
1353
+ count = result[0]["count"] if result else 0
1354
+ if count > 0:
1355
+ node_stats[node_type] = count
1356
+ except Exception as e:
1357
+ logger.debug(f"Failed to count {node_type} nodes: {e}")
1358
+
1359
+ # Count relationships - need to handle multiple tables for each type
1360
+ rel_counts = {}
1361
+
1362
+ # CONTAINS relationships
1363
+ for prefix in [
1364
+ "CONTAINS_PACKAGE",
1365
+ "CONTAINS_FOLDER",
1366
+ "CONTAINS_FILE",
1367
+ "CONTAINS_MODULE",
1368
+ ]:
1369
+ count = 0
1370
+ for suffix in ["", "_PKG", "_FOLDER"]:
1371
+ table = f"{prefix}{suffix}"
1372
+ try:
1373
+ result = await self._execute_query(
1374
+ graph_id, f"MATCH ()-[r:{table}]->() RETURN COUNT(r) as count"
1375
+ )
1376
+ if result:
1377
+ count += result[0]["count"]
1378
+ except Exception as e:
1379
+ logger.debug(f"Failed to count {table} relationships: {e}")
1380
+ if count > 0:
1381
+ rel_counts[prefix] = count
1382
+
1383
+ # Other relationships
1384
+ for rel_type in [
1385
+ "DEFINES",
1386
+ "DEFINES_FUNC",
1387
+ "DEFINES_METHOD",
1388
+ "INHERITS",
1389
+ "OVERRIDES",
1390
+ "DEPENDS_ON_EXTERNAL",
1391
+ "IMPORTS",
1392
+ ]:
1393
+ try:
1394
+ result = await self._execute_query(
1395
+ graph_id, f"MATCH ()-[r:{rel_type}]->() RETURN COUNT(r) as count"
1396
+ )
1397
+ if result and result[0]["count"] > 0:
1398
+ rel_counts[rel_type] = result[0]["count"]
1399
+ except Exception as e:
1400
+ logger.debug(f"Failed to count {rel_type} relationships: {e}")
1401
+
1402
+ # CALLS relationships (multiple tables)
1403
+ calls_count = 0
1404
+ for table in ["CALLS", "CALLS_FM", "CALLS_MF", "CALLS_MM"]:
1405
+ try:
1406
+ result = await self._execute_query(
1407
+ graph_id, f"MATCH ()-[r:{table}]->() RETURN COUNT(r) as count"
1408
+ )
1409
+ if result:
1410
+ calls_count += result[0]["count"]
1411
+ except Exception as e:
1412
+ logger.debug(f"Failed to count {table} relationships: {e}")
1413
+ if calls_count > 0:
1414
+ rel_counts["CALLS (total)"] = calls_count
1415
+
1416
+ # TRACKS relationships
1417
+ tracks_count = 0
1418
+ for entity in ["Module", "Class", "Function", "Method"]:
1419
+ try:
1420
+ result = await self._execute_query(
1421
+ graph_id,
1422
+ f"MATCH ()-[r:TRACKS_{entity}]->() RETURN COUNT(r) as count",
1423
+ )
1424
+ if result:
1425
+ tracks_count += result[0]["count"]
1426
+ except Exception as e:
1427
+ logger.debug(f"Failed to count TRACKS_{entity} relationships: {e}")
1428
+ if tracks_count > 0:
1429
+ rel_counts["TRACKS (total)"] = tracks_count
1430
+
1431
+ return node_stats, rel_counts
1432
+
1433
+ async def _print_graph_statistics(self, graph_id: str) -> None:
1434
+ """Print detailed statistics about the graph."""
1435
+ logger.info("\n=== Graph Statistics ===")
1436
+
1437
+ node_stats, rel_stats = await self._get_graph_statistics(graph_id)
1438
+
1439
+ # Print node stats
1440
+ for node_type in [
1441
+ "Project",
1442
+ "Package",
1443
+ "Module",
1444
+ "Class",
1445
+ "Function",
1446
+ "Method",
1447
+ "File",
1448
+ "Folder",
1449
+ "FileMetadata",
1450
+ "DeletionLog",
1451
+ ]:
1452
+ count = node_stats.get(node_type, 0)
1453
+ logger.info(f"{node_type}: {count}")
1454
+
1455
+ logger.info("\nRelationship counts:")
1456
+ for rel_type, count in sorted(rel_stats.items()):
1457
+ logger.info(f"{rel_type}: {count}")
1458
+
1459
+ async def _build_graph_impl(
1460
+ self,
1461
+ graph_id: str,
1462
+ repo_path: str,
1463
+ name: str,
1464
+ languages: list[str] | None,
1465
+ exclude_patterns: list[str] | None,
1466
+ indexed_from_cwd: str | None = None,
1467
+ ) -> CodebaseGraph:
1468
+ """Internal implementation of graph building (runs in background)."""
1469
+ operation_id = str(uuid.uuid4())
1470
+ start_time = time.time()
1471
+
1472
+ # Create operation stats
1473
+ operation_stats = OperationStats(
1474
+ operation_type="build",
1475
+ started_at=start_time,
1476
+ completed_at=None,
1477
+ success=False,
1478
+ error=None,
1479
+ stats={},
1480
+ )
1481
+
1482
+ try:
1483
+ # Update status to BUILDING
1484
+ await self._update_graph_status(
1485
+ graph_id, GraphStatus.BUILDING, operation_id
1486
+ )
1487
+
1488
+ # Do the actual build work
1489
+ graph = await self._do_build_graph(
1490
+ graph_id, repo_path, name, languages, exclude_patterns, indexed_from_cwd
1491
+ )
1492
+
1493
+ # Update operation stats
1494
+ operation_stats.completed_at = time.time()
1495
+ operation_stats.success = True
1496
+ operation_stats.stats = {
1497
+ "node_count": graph.node_count,
1498
+ "relationship_count": graph.relationship_count,
1499
+ "language_stats": graph.language_stats,
1500
+ "build_time_ms": (time.time() - start_time) * 1000,
1501
+ }
1502
+
1503
+ # Update status to READY
1504
+ await self._update_graph_status(graph_id, GraphStatus.READY, None)
1505
+
1506
+ # Store operation stats
1507
+ await self._store_operation_stats(graph_id, operation_stats)
1508
+
1509
+ return graph
1510
+
1511
+ except Exception as e:
1512
+ # Update operation stats with error
1513
+ operation_stats.completed_at = time.time()
1514
+ operation_stats.success = False
1515
+ operation_stats.error = str(e)
1516
+ operation_stats.stats["build_time_ms"] = (time.time() - start_time) * 1000
1517
+
1518
+ # Update status to ERROR
1519
+ await self._update_graph_status(graph_id, GraphStatus.ERROR, None)
1520
+
1521
+ # Store operation stats
1522
+ await self._store_operation_stats(graph_id, operation_stats)
1523
+
1524
+ logger.error(f"Build failed for graph {graph_id}: {e}")
1525
+ raise
1526
+ finally:
1527
+ # Clean up operation tracking
1528
+ if graph_id in self._operations:
1529
+ del self._operations[graph_id]
1530
+
1531
+ async def _do_build_graph(
1532
+ self,
1533
+ graph_id: str,
1534
+ repo_path: str,
1535
+ name: str,
1536
+ languages: list[str] | None,
1537
+ exclude_patterns: list[str] | None,
1538
+ indexed_from_cwd: str | None = None,
1539
+ ) -> CodebaseGraph:
1540
+ """Execute the actual graph building logic (extracted from original build_graph)."""
1541
+ # The database and Project node already exist from _initialize_graph_metadata
1542
+
1543
+ # Get existing connection
1544
+ lock = await self._get_lock()
1545
+ async with lock:
1546
+ if graph_id not in self._connections:
1547
+ raise RuntimeError(f"Connection not found for graph {graph_id}")
1548
+ conn = self._connections[graph_id]
1549
+
1550
+ # Import the builder from local core module
1551
+
1552
+ # Build the graph
1553
+ logger.info(
1554
+ f"Building code graph - graph_id: {graph_id}, repo_path: {repo_path}"
1555
+ )
1556
+
1557
+ # Build the graph using our existing connection
1558
+ def _build_graph() -> None:
1559
+ from shotgun.codebase.core import Ingestor, SimpleGraphBuilder
1560
+ from shotgun.codebase.core.parser_loader import load_parsers
1561
+
1562
+ # Load parsers for requested languages
1563
+ parsers, queries = load_parsers()
1564
+
1565
+ # Log available parsers before filtering
1566
+ logger.info(f"Available parsers: {list(parsers.keys())}")
1567
+
1568
+ # Filter parsers to requested languages if specified
1569
+ if languages:
1570
+ parsers = {
1571
+ lang: parser
1572
+ for lang, parser in parsers.items()
1573
+ if lang in languages
1574
+ }
1575
+ queries = {
1576
+ lang: query for lang, query in queries.items() if lang in languages
1577
+ }
1578
+ logger.info(
1579
+ f"Filtered parsers to requested languages {languages}: {list(parsers.keys())}"
1580
+ )
1581
+ else:
1582
+ logger.info(f"Using all available parsers: {list(parsers.keys())}")
1583
+
1584
+ # Create ingestor with existing connection
1585
+ ingestor = Ingestor(conn)
1586
+
1587
+ # Create builder
1588
+ builder = SimpleGraphBuilder(
1589
+ ingestor=ingestor,
1590
+ repo_path=Path(repo_path),
1591
+ parsers=parsers,
1592
+ queries=queries,
1593
+ exclude_patterns=exclude_patterns,
1594
+ )
1595
+
1596
+ # Build the graph
1597
+ builder.run()
1598
+
1599
+ # Run build in thread pool
1600
+ await anyio.to_thread.run_sync(_build_graph)
1601
+
1602
+ # Now print detailed statistics (will include Project: 1)
1603
+ await self._print_graph_statistics(graph_id)
1604
+
1605
+ # Get the updated graph metadata
1606
+ graph = await self.get_graph(graph_id)
1607
+ if not graph:
1608
+ raise RuntimeError(f"Failed to retrieve graph {graph_id} after build")
1609
+
1610
+ return graph
1611
+
1612
+ async def build_graph_async(
1613
+ self,
1614
+ repo_path: str,
1615
+ name: str | None = None,
1616
+ languages: list[str] | None = None,
1617
+ exclude_patterns: list[str] | None = None,
1618
+ indexed_from_cwd: str | None = None,
1619
+ ) -> str:
1620
+ """Start building a new code knowledge graph asynchronously.
1621
+
1622
+ Returns:
1623
+ Graph ID of the graph being built
1624
+ """
1625
+ repo_path = str(Path(repo_path).resolve())
1626
+ graph_id = self._generate_graph_id(repo_path)
1627
+
1628
+ # Use repository name as default name
1629
+ if not name:
1630
+ name = Path(repo_path).name
1631
+
1632
+ # Check if graph already exists
1633
+ graph_path = self.storage_dir / f"{graph_id}.kuzu"
1634
+ if graph_path.exists():
1635
+ raise ValueError(
1636
+ f"Graph already exists for {repo_path}. Use update_graph() to modify it."
1637
+ )
1638
+
1639
+ # Check if already building
1640
+ if graph_id in self._operations:
1641
+ raise ValueError(f"Graph {graph_id} is already being built.")
1642
+
1643
+ # Create the database and initial Project node immediately
1644
+ # This allows status tracking during the build
1645
+ await self._initialize_graph_metadata(
1646
+ graph_id=graph_id,
1647
+ repo_path=repo_path,
1648
+ name=name,
1649
+ languages=languages,
1650
+ exclude_patterns=exclude_patterns,
1651
+ indexed_from_cwd=indexed_from_cwd,
1652
+ )
1653
+
1654
+ # Start the build operation in background
1655
+ task = asyncio.create_task(
1656
+ self._build_graph_impl(
1657
+ graph_id, repo_path, name, languages, exclude_patterns, indexed_from_cwd
1658
+ )
1659
+ )
1660
+ self._operations[graph_id] = task
1661
+
1662
+ return graph_id