shotgun-sh 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of shotgun-sh might be problematic. Click here for more details.
- shotgun/__init__.py +5 -0
- shotgun/agents/__init__.py +1 -0
- shotgun/agents/agent_manager.py +651 -0
- shotgun/agents/common.py +549 -0
- shotgun/agents/config/__init__.py +13 -0
- shotgun/agents/config/constants.py +17 -0
- shotgun/agents/config/manager.py +294 -0
- shotgun/agents/config/models.py +185 -0
- shotgun/agents/config/provider.py +206 -0
- shotgun/agents/conversation_history.py +106 -0
- shotgun/agents/conversation_manager.py +105 -0
- shotgun/agents/export.py +96 -0
- shotgun/agents/history/__init__.py +5 -0
- shotgun/agents/history/compaction.py +85 -0
- shotgun/agents/history/constants.py +19 -0
- shotgun/agents/history/context_extraction.py +108 -0
- shotgun/agents/history/history_building.py +104 -0
- shotgun/agents/history/history_processors.py +426 -0
- shotgun/agents/history/message_utils.py +84 -0
- shotgun/agents/history/token_counting.py +429 -0
- shotgun/agents/history/token_estimation.py +138 -0
- shotgun/agents/messages.py +35 -0
- shotgun/agents/models.py +275 -0
- shotgun/agents/plan.py +98 -0
- shotgun/agents/research.py +108 -0
- shotgun/agents/specify.py +98 -0
- shotgun/agents/tasks.py +96 -0
- shotgun/agents/tools/__init__.py +34 -0
- shotgun/agents/tools/codebase/__init__.py +28 -0
- shotgun/agents/tools/codebase/codebase_shell.py +256 -0
- shotgun/agents/tools/codebase/directory_lister.py +141 -0
- shotgun/agents/tools/codebase/file_read.py +144 -0
- shotgun/agents/tools/codebase/models.py +252 -0
- shotgun/agents/tools/codebase/query_graph.py +67 -0
- shotgun/agents/tools/codebase/retrieve_code.py +81 -0
- shotgun/agents/tools/file_management.py +218 -0
- shotgun/agents/tools/user_interaction.py +37 -0
- shotgun/agents/tools/web_search/__init__.py +60 -0
- shotgun/agents/tools/web_search/anthropic.py +144 -0
- shotgun/agents/tools/web_search/gemini.py +85 -0
- shotgun/agents/tools/web_search/openai.py +98 -0
- shotgun/agents/tools/web_search/utils.py +20 -0
- shotgun/build_constants.py +20 -0
- shotgun/cli/__init__.py +1 -0
- shotgun/cli/codebase/__init__.py +5 -0
- shotgun/cli/codebase/commands.py +202 -0
- shotgun/cli/codebase/models.py +21 -0
- shotgun/cli/config.py +275 -0
- shotgun/cli/export.py +81 -0
- shotgun/cli/models.py +10 -0
- shotgun/cli/plan.py +73 -0
- shotgun/cli/research.py +85 -0
- shotgun/cli/specify.py +69 -0
- shotgun/cli/tasks.py +78 -0
- shotgun/cli/update.py +152 -0
- shotgun/cli/utils.py +25 -0
- shotgun/codebase/__init__.py +12 -0
- shotgun/codebase/core/__init__.py +46 -0
- shotgun/codebase/core/change_detector.py +358 -0
- shotgun/codebase/core/code_retrieval.py +243 -0
- shotgun/codebase/core/ingestor.py +1497 -0
- shotgun/codebase/core/language_config.py +297 -0
- shotgun/codebase/core/manager.py +1662 -0
- shotgun/codebase/core/nl_query.py +331 -0
- shotgun/codebase/core/parser_loader.py +128 -0
- shotgun/codebase/models.py +111 -0
- shotgun/codebase/service.py +206 -0
- shotgun/logging_config.py +227 -0
- shotgun/main.py +167 -0
- shotgun/posthog_telemetry.py +158 -0
- shotgun/prompts/__init__.py +5 -0
- shotgun/prompts/agents/__init__.py +1 -0
- shotgun/prompts/agents/export.j2 +350 -0
- shotgun/prompts/agents/partials/codebase_understanding.j2 +87 -0
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +37 -0
- shotgun/prompts/agents/partials/content_formatting.j2 +65 -0
- shotgun/prompts/agents/partials/interactive_mode.j2 +26 -0
- shotgun/prompts/agents/plan.j2 +144 -0
- shotgun/prompts/agents/research.j2 +69 -0
- shotgun/prompts/agents/specify.j2 +51 -0
- shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +19 -0
- shotgun/prompts/agents/state/system_state.j2 +31 -0
- shotgun/prompts/agents/tasks.j2 +143 -0
- shotgun/prompts/codebase/__init__.py +1 -0
- shotgun/prompts/codebase/cypher_query_patterns.j2 +223 -0
- shotgun/prompts/codebase/cypher_system.j2 +28 -0
- shotgun/prompts/codebase/enhanced_query_context.j2 +10 -0
- shotgun/prompts/codebase/partials/cypher_rules.j2 +24 -0
- shotgun/prompts/codebase/partials/graph_schema.j2 +30 -0
- shotgun/prompts/codebase/partials/temporal_context.j2 +21 -0
- shotgun/prompts/history/__init__.py +1 -0
- shotgun/prompts/history/incremental_summarization.j2 +53 -0
- shotgun/prompts/history/summarization.j2 +46 -0
- shotgun/prompts/loader.py +140 -0
- shotgun/py.typed +0 -0
- shotgun/sdk/__init__.py +13 -0
- shotgun/sdk/codebase.py +219 -0
- shotgun/sdk/exceptions.py +17 -0
- shotgun/sdk/models.py +189 -0
- shotgun/sdk/services.py +23 -0
- shotgun/sentry_telemetry.py +87 -0
- shotgun/telemetry.py +93 -0
- shotgun/tui/__init__.py +0 -0
- shotgun/tui/app.py +116 -0
- shotgun/tui/commands/__init__.py +76 -0
- shotgun/tui/components/prompt_input.py +69 -0
- shotgun/tui/components/spinner.py +86 -0
- shotgun/tui/components/splash.py +25 -0
- shotgun/tui/components/vertical_tail.py +13 -0
- shotgun/tui/screens/chat.py +782 -0
- shotgun/tui/screens/chat.tcss +43 -0
- shotgun/tui/screens/chat_screen/__init__.py +0 -0
- shotgun/tui/screens/chat_screen/command_providers.py +219 -0
- shotgun/tui/screens/chat_screen/hint_message.py +40 -0
- shotgun/tui/screens/chat_screen/history.py +221 -0
- shotgun/tui/screens/directory_setup.py +113 -0
- shotgun/tui/screens/provider_config.py +221 -0
- shotgun/tui/screens/splash.py +31 -0
- shotgun/tui/styles.tcss +10 -0
- shotgun/tui/utils/__init__.py +5 -0
- shotgun/tui/utils/mode_progress.py +257 -0
- shotgun/utils/__init__.py +5 -0
- shotgun/utils/env_utils.py +35 -0
- shotgun/utils/file_system_utils.py +36 -0
- shotgun/utils/update_checker.py +375 -0
- shotgun_sh-0.1.0.dist-info/METADATA +466 -0
- shotgun_sh-0.1.0.dist-info/RECORD +130 -0
- shotgun_sh-0.1.0.dist-info/WHEEL +4 -0
- shotgun_sh-0.1.0.dist-info/entry_points.txt +2 -0
- shotgun_sh-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1662 @@
|
|
|
1
|
+
"""Kuzu graph database manager for code knowledge graphs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import hashlib
|
|
7
|
+
import json
|
|
8
|
+
import time
|
|
9
|
+
import uuid
|
|
10
|
+
from collections.abc import Awaitable, Callable
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, ClassVar
|
|
13
|
+
|
|
14
|
+
import anyio
|
|
15
|
+
import kuzu
|
|
16
|
+
from watchdog.events import FileSystemEvent, FileSystemEventHandler
|
|
17
|
+
from watchdog.observers import Observer
|
|
18
|
+
|
|
19
|
+
from shotgun.codebase.models import (
|
|
20
|
+
CodebaseGraph,
|
|
21
|
+
FileChange,
|
|
22
|
+
GraphStatus,
|
|
23
|
+
OperationStats,
|
|
24
|
+
)
|
|
25
|
+
from shotgun.logging_config import get_logger
|
|
26
|
+
|
|
27
|
+
logger = get_logger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class CodebaseAlreadyIndexedError(Exception):
|
|
31
|
+
"""Raised when a codebase is already indexed."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, repo_path: str):
|
|
34
|
+
self.repo_path = repo_path
|
|
35
|
+
super().__init__(f"Codebase already indexed: {repo_path}")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class CodebaseFileHandler(FileSystemEventHandler):
|
|
39
|
+
"""Handles file system events for code graph updates."""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
graph_id: str,
|
|
44
|
+
callback: Callable[[str, list[FileChange]], Awaitable[None]] | None,
|
|
45
|
+
loop: asyncio.AbstractEventLoop,
|
|
46
|
+
ignore_patterns: set[str] | None = None,
|
|
47
|
+
):
|
|
48
|
+
self.graph_id = graph_id
|
|
49
|
+
self.callback = callback
|
|
50
|
+
self.loop = loop
|
|
51
|
+
self.pending_changes: list[FileChange] = []
|
|
52
|
+
self._lock = anyio.Lock()
|
|
53
|
+
# Import default ignore patterns from ingestor
|
|
54
|
+
from shotgun.codebase.core.ingestor import IGNORE_PATTERNS
|
|
55
|
+
|
|
56
|
+
self.ignore_patterns = ignore_patterns or IGNORE_PATTERNS
|
|
57
|
+
|
|
58
|
+
def on_any_event(self, event: FileSystemEvent) -> None:
|
|
59
|
+
"""Handle any file system event."""
|
|
60
|
+
if event.is_directory:
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
# Filter out temporary files
|
|
64
|
+
src_path_str = (
|
|
65
|
+
event.src_path.decode("utf-8")
|
|
66
|
+
if isinstance(event.src_path, bytes)
|
|
67
|
+
else event.src_path
|
|
68
|
+
)
|
|
69
|
+
path = Path(src_path_str)
|
|
70
|
+
filename = path.name
|
|
71
|
+
|
|
72
|
+
# Check if any parent directory should be ignored
|
|
73
|
+
for parent in path.parents:
|
|
74
|
+
if parent.name in self.ignore_patterns:
|
|
75
|
+
logger.debug(
|
|
76
|
+
f"Ignoring file in ignored directory: {parent.name} - path: {src_path_str}"
|
|
77
|
+
)
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
# Skip various temporary files
|
|
81
|
+
if any(
|
|
82
|
+
[
|
|
83
|
+
filename.startswith("."), # Hidden files
|
|
84
|
+
filename.endswith(".swp"), # Vim swap files
|
|
85
|
+
filename.endswith(".tmp"), # Generic temp files
|
|
86
|
+
filename.endswith("~"), # Backup files
|
|
87
|
+
"#" in filename, # Emacs temp files
|
|
88
|
+
filename.startswith("__pycache__"), # Python cache
|
|
89
|
+
path.suffix in [".pyc", ".pyo"], # Python compiled files
|
|
90
|
+
# Numeric temp files (like test_watcher_fix.py.tmp.27477.1755109972829)
|
|
91
|
+
any(part.isdigit() and len(part) > 4 for part in filename.split(".")),
|
|
92
|
+
]
|
|
93
|
+
):
|
|
94
|
+
logger.debug(
|
|
95
|
+
f"Ignoring temporary file: {filename} - event_type: {event.event_type}"
|
|
96
|
+
)
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
# For move events, also check destination path
|
|
100
|
+
dest_path_str = None
|
|
101
|
+
if hasattr(event, "dest_path") and event.dest_path:
|
|
102
|
+
dest_path_str = (
|
|
103
|
+
event.dest_path.decode("utf-8")
|
|
104
|
+
if isinstance(event.dest_path, bytes)
|
|
105
|
+
else event.dest_path
|
|
106
|
+
)
|
|
107
|
+
dest_path = Path(dest_path_str)
|
|
108
|
+
for parent in dest_path.parents:
|
|
109
|
+
if parent.name in self.ignore_patterns:
|
|
110
|
+
logger.debug(
|
|
111
|
+
f"Ignoring move to ignored directory: {parent.name} - dest_path: {dest_path_str}"
|
|
112
|
+
)
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
# Map event types
|
|
116
|
+
event_type_map = {
|
|
117
|
+
"created": "created",
|
|
118
|
+
"modified": "modified",
|
|
119
|
+
"deleted": "deleted",
|
|
120
|
+
"moved": "moved",
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
mapped_type = event_type_map.get(event.event_type, event.event_type)
|
|
124
|
+
|
|
125
|
+
# Log the event with type
|
|
126
|
+
logger.info(
|
|
127
|
+
f"File watcher detected {mapped_type} event - graph_id: {self.graph_id}, path: {src_path_str}, event_type: {mapped_type}"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
change = FileChange(
|
|
131
|
+
event_type=mapped_type,
|
|
132
|
+
src_path=src_path_str,
|
|
133
|
+
dest_path=dest_path_str,
|
|
134
|
+
is_directory=event.is_directory,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Queue change for batch processing
|
|
138
|
+
# Use asyncio.run_coroutine_threadsafe to schedule async work from watchdog thread
|
|
139
|
+
future = asyncio.run_coroutine_threadsafe(self._queue_change(change), self.loop)
|
|
140
|
+
# Handle any errors
|
|
141
|
+
try:
|
|
142
|
+
future.result(timeout=1.0) # Wait briefly to ensure it's scheduled
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.error(
|
|
145
|
+
f"Failed to queue file change: {e} - graph_id: {self.graph_id}, path: {change.src_path}"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
async def _queue_change(self, change: FileChange) -> None:
|
|
149
|
+
"""Queue a change for processing."""
|
|
150
|
+
async with self._lock:
|
|
151
|
+
self.pending_changes.append(change)
|
|
152
|
+
|
|
153
|
+
# Trigger callback
|
|
154
|
+
if self.callback:
|
|
155
|
+
await self.callback(self.graph_id, [change])
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class CodebaseGraphManager:
|
|
159
|
+
"""Manages Kuzu code knowledge graphs with class-level connection pooling."""
|
|
160
|
+
|
|
161
|
+
# Class-level storage to ensure single connection per graph
|
|
162
|
+
_connections: ClassVar[dict[str, kuzu.Connection]] = {}
|
|
163
|
+
_databases: ClassVar[dict[str, kuzu.Database]] = {}
|
|
164
|
+
_watchers: ClassVar[dict[str, Any]] = {}
|
|
165
|
+
_handlers: ClassVar[dict[str, CodebaseFileHandler]] = {}
|
|
166
|
+
_lock: ClassVar[anyio.Lock | None] = None
|
|
167
|
+
|
|
168
|
+
# Operation tracking for async operations
|
|
169
|
+
_operations: ClassVar[dict[str, asyncio.Task[Any]]] = {}
|
|
170
|
+
_operation_stats: ClassVar[dict[str, OperationStats]] = {}
|
|
171
|
+
|
|
172
|
+
def __init__(self, storage_dir: Path):
|
|
173
|
+
"""Initialize graph manager.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
storage_dir: Directory to store graph databases
|
|
177
|
+
"""
|
|
178
|
+
self.storage_dir = storage_dir
|
|
179
|
+
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
|
180
|
+
|
|
181
|
+
@classmethod
|
|
182
|
+
async def _get_lock(cls) -> anyio.Lock:
|
|
183
|
+
"""Get or create the class-level lock."""
|
|
184
|
+
if cls._lock is None:
|
|
185
|
+
cls._lock = anyio.Lock()
|
|
186
|
+
return cls._lock
|
|
187
|
+
|
|
188
|
+
@classmethod
|
|
189
|
+
def _generate_graph_id(cls, repo_path: str) -> str:
|
|
190
|
+
"""Generate deterministic graph ID from repository path."""
|
|
191
|
+
normalized = str(Path(repo_path).resolve())
|
|
192
|
+
return hashlib.sha256(normalized.encode()).hexdigest()[:12]
|
|
193
|
+
|
|
194
|
+
async def _update_graph_status(
|
|
195
|
+
self, graph_id: str, status: GraphStatus, operation_id: str | None = None
|
|
196
|
+
) -> None:
|
|
197
|
+
"""Update the status of a graph in the database."""
|
|
198
|
+
try:
|
|
199
|
+
# First check if the Project node exists
|
|
200
|
+
results = await self._execute_query(
|
|
201
|
+
graph_id,
|
|
202
|
+
"MATCH (p:Project {graph_id: $graph_id}) RETURN p",
|
|
203
|
+
{"graph_id": graph_id},
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
if not results:
|
|
207
|
+
# Project node doesn't exist yet, skip update
|
|
208
|
+
logger.warning(
|
|
209
|
+
f"Project node not found for graph {graph_id}, skipping status update"
|
|
210
|
+
)
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
await self._execute_query(
|
|
214
|
+
graph_id,
|
|
215
|
+
"""
|
|
216
|
+
MATCH (p:Project {graph_id: $graph_id})
|
|
217
|
+
SET p.status = $status, p.current_operation_id = $operation_id
|
|
218
|
+
""",
|
|
219
|
+
{
|
|
220
|
+
"graph_id": graph_id,
|
|
221
|
+
"status": status.value,
|
|
222
|
+
"operation_id": operation_id,
|
|
223
|
+
},
|
|
224
|
+
)
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logger.error(
|
|
227
|
+
f"Failed to update graph status: {e} - graph_id: {graph_id}, status: {status}"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
async def _store_operation_stats(
|
|
231
|
+
self, graph_id: str, stats: OperationStats
|
|
232
|
+
) -> None:
|
|
233
|
+
"""Store operation statistics in the database."""
|
|
234
|
+
try:
|
|
235
|
+
await self._execute_query(
|
|
236
|
+
graph_id,
|
|
237
|
+
"""
|
|
238
|
+
MATCH (p:Project {graph_id: $graph_id})
|
|
239
|
+
SET p.last_operation = $stats
|
|
240
|
+
""",
|
|
241
|
+
{"graph_id": graph_id, "stats": stats.model_dump_json()},
|
|
242
|
+
)
|
|
243
|
+
# Also store in memory for quick access
|
|
244
|
+
self._operation_stats[graph_id] = stats
|
|
245
|
+
except Exception as e:
|
|
246
|
+
logger.error(f"Failed to store operation stats: {e} - graph_id: {graph_id}")
|
|
247
|
+
|
|
248
|
+
async def _initialize_graph_metadata(
|
|
249
|
+
self,
|
|
250
|
+
graph_id: str,
|
|
251
|
+
repo_path: str,
|
|
252
|
+
name: str,
|
|
253
|
+
languages: list[str] | None,
|
|
254
|
+
exclude_patterns: list[str] | None,
|
|
255
|
+
indexed_from_cwd: str | None = None,
|
|
256
|
+
) -> None:
|
|
257
|
+
"""Initialize the graph database and create initial metadata.
|
|
258
|
+
|
|
259
|
+
This creates the database and Project node immediately so that
|
|
260
|
+
status can be tracked during the build process.
|
|
261
|
+
"""
|
|
262
|
+
graph_path = self.storage_dir / f"{graph_id}.kuzu"
|
|
263
|
+
|
|
264
|
+
# Create database and connection
|
|
265
|
+
lock = await self._get_lock()
|
|
266
|
+
async with lock:
|
|
267
|
+
db = kuzu.Database(str(graph_path))
|
|
268
|
+
conn = kuzu.Connection(db)
|
|
269
|
+
self._databases[graph_id] = db
|
|
270
|
+
self._connections[graph_id] = conn
|
|
271
|
+
|
|
272
|
+
# Create the schema
|
|
273
|
+
from shotgun.codebase.core import Ingestor
|
|
274
|
+
|
|
275
|
+
def _create_schema() -> None:
|
|
276
|
+
ingestor = Ingestor(conn)
|
|
277
|
+
ingestor.create_schema()
|
|
278
|
+
|
|
279
|
+
await anyio.to_thread.run_sync(_create_schema)
|
|
280
|
+
|
|
281
|
+
# Create initial Project node with BUILDING status
|
|
282
|
+
await self._execute_query(
|
|
283
|
+
graph_id,
|
|
284
|
+
"""
|
|
285
|
+
CREATE (p:Project {
|
|
286
|
+
name: $name,
|
|
287
|
+
repo_path: $repo_path,
|
|
288
|
+
graph_id: $graph_id,
|
|
289
|
+
created_at: $created_at,
|
|
290
|
+
updated_at: $updated_at,
|
|
291
|
+
schema_version: $schema_version,
|
|
292
|
+
build_options: $build_options,
|
|
293
|
+
status: $status,
|
|
294
|
+
current_operation_id: $current_operation_id,
|
|
295
|
+
last_operation: $last_operation,
|
|
296
|
+
node_count: 0,
|
|
297
|
+
relationship_count: 0,
|
|
298
|
+
stats_updated_at: $stats_updated_at,
|
|
299
|
+
indexed_from_cwds: $indexed_from_cwds
|
|
300
|
+
})
|
|
301
|
+
""",
|
|
302
|
+
{
|
|
303
|
+
"name": name,
|
|
304
|
+
"repo_path": repo_path,
|
|
305
|
+
"graph_id": graph_id,
|
|
306
|
+
"created_at": int(time.time()),
|
|
307
|
+
"updated_at": int(time.time()),
|
|
308
|
+
"schema_version": "1.0.0",
|
|
309
|
+
"build_options": json.dumps(
|
|
310
|
+
{"languages": languages, "exclude_patterns": exclude_patterns}
|
|
311
|
+
),
|
|
312
|
+
"status": GraphStatus.BUILDING.value,
|
|
313
|
+
"current_operation_id": None,
|
|
314
|
+
"last_operation": None,
|
|
315
|
+
"stats_updated_at": int(time.time()),
|
|
316
|
+
"indexed_from_cwds": json.dumps(
|
|
317
|
+
[indexed_from_cwd] if indexed_from_cwd else []
|
|
318
|
+
),
|
|
319
|
+
},
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# Ensure the Project node is committed
|
|
323
|
+
logger.info(f"Created initial Project node for graph {graph_id}")
|
|
324
|
+
|
|
325
|
+
async def build_graph(
|
|
326
|
+
self,
|
|
327
|
+
repo_path: str,
|
|
328
|
+
name: str | None = None,
|
|
329
|
+
languages: list[str] | None = None,
|
|
330
|
+
exclude_patterns: list[str] | None = None,
|
|
331
|
+
indexed_from_cwd: str | None = None,
|
|
332
|
+
) -> CodebaseGraph:
|
|
333
|
+
"""Build a new code knowledge graph.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
repo_path: Path to repository
|
|
337
|
+
name: Optional human-readable name
|
|
338
|
+
languages: Languages to parse (default: all supported)
|
|
339
|
+
exclude_patterns: Patterns to exclude
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Created graph metadata
|
|
343
|
+
"""
|
|
344
|
+
repo_path = str(Path(repo_path).resolve())
|
|
345
|
+
graph_id = self._generate_graph_id(repo_path)
|
|
346
|
+
|
|
347
|
+
# Use repository name as default name
|
|
348
|
+
if not name:
|
|
349
|
+
name = Path(repo_path).name
|
|
350
|
+
|
|
351
|
+
# Determine graph path
|
|
352
|
+
graph_path = self.storage_dir / f"{graph_id}.kuzu"
|
|
353
|
+
|
|
354
|
+
# Check if graph already exists
|
|
355
|
+
if graph_path.exists():
|
|
356
|
+
raise CodebaseAlreadyIndexedError(repo_path)
|
|
357
|
+
|
|
358
|
+
# Import the builder from local core module
|
|
359
|
+
from shotgun.codebase.core import CodebaseIngestor
|
|
360
|
+
|
|
361
|
+
# Build the graph
|
|
362
|
+
logger.info(
|
|
363
|
+
f"Building code graph - graph_id: {graph_id}, repo_path: {repo_path}"
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
# Create database and connection
|
|
367
|
+
lock = await self._get_lock()
|
|
368
|
+
async with lock:
|
|
369
|
+
if graph_id in self._databases:
|
|
370
|
+
# Close existing connections
|
|
371
|
+
if graph_id in self._connections:
|
|
372
|
+
self._connections[graph_id].close()
|
|
373
|
+
del self._connections[graph_id]
|
|
374
|
+
self._databases[graph_id].close()
|
|
375
|
+
del self._databases[graph_id]
|
|
376
|
+
|
|
377
|
+
# Build using the local ingestor
|
|
378
|
+
ingestor = CodebaseIngestor(
|
|
379
|
+
db_path=str(graph_path),
|
|
380
|
+
project_name=name,
|
|
381
|
+
exclude_patterns=exclude_patterns or [],
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
# Run build in thread pool
|
|
385
|
+
await anyio.to_thread.run_sync(ingestor.build_graph_from_directory, repo_path)
|
|
386
|
+
|
|
387
|
+
# Get statistics
|
|
388
|
+
lock = await self._get_lock()
|
|
389
|
+
async with lock:
|
|
390
|
+
db = kuzu.Database(str(graph_path))
|
|
391
|
+
conn = kuzu.Connection(db)
|
|
392
|
+
self._databases[graph_id] = db
|
|
393
|
+
self._connections[graph_id] = conn
|
|
394
|
+
|
|
395
|
+
# Create Project node with metadata BEFORE printing statistics
|
|
396
|
+
await self._execute_query(
|
|
397
|
+
graph_id,
|
|
398
|
+
"""
|
|
399
|
+
CREATE (p:Project {
|
|
400
|
+
name: $name,
|
|
401
|
+
repo_path: $repo_path,
|
|
402
|
+
graph_id: $graph_id,
|
|
403
|
+
created_at: $created_at,
|
|
404
|
+
updated_at: $updated_at,
|
|
405
|
+
schema_version: $schema_version,
|
|
406
|
+
build_options: $build_options,
|
|
407
|
+
indexed_from_cwds: $indexed_from_cwds
|
|
408
|
+
})
|
|
409
|
+
""",
|
|
410
|
+
{
|
|
411
|
+
"name": name,
|
|
412
|
+
"repo_path": repo_path,
|
|
413
|
+
"graph_id": graph_id,
|
|
414
|
+
"created_at": int(time.time()),
|
|
415
|
+
"updated_at": int(time.time()),
|
|
416
|
+
"schema_version": "1.0.0",
|
|
417
|
+
"build_options": json.dumps(
|
|
418
|
+
{"languages": languages, "exclude_patterns": exclude_patterns}
|
|
419
|
+
),
|
|
420
|
+
"indexed_from_cwds": json.dumps(
|
|
421
|
+
[indexed_from_cwd] if indexed_from_cwd else []
|
|
422
|
+
),
|
|
423
|
+
},
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
# Now print detailed statistics (will include Project: 1)
|
|
427
|
+
await self._print_graph_statistics(graph_id)
|
|
428
|
+
|
|
429
|
+
# Get language statistics
|
|
430
|
+
lang_stats = await self._execute_query(
|
|
431
|
+
graph_id,
|
|
432
|
+
"""
|
|
433
|
+
MATCH (f:File)
|
|
434
|
+
WHERE f.extension IS NOT NULL
|
|
435
|
+
RETURN f.extension as extension, COUNT(f) as count
|
|
436
|
+
""",
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
language_stats = {}
|
|
440
|
+
if lang_stats:
|
|
441
|
+
for row in lang_stats:
|
|
442
|
+
ext = row.get("extension", "").lower()
|
|
443
|
+
if ext:
|
|
444
|
+
# Map extensions to languages
|
|
445
|
+
lang_map = {
|
|
446
|
+
".py": "Python",
|
|
447
|
+
".js": "JavaScript",
|
|
448
|
+
".ts": "TypeScript",
|
|
449
|
+
".go": "Go",
|
|
450
|
+
".rs": "Rust",
|
|
451
|
+
".java": "Java",
|
|
452
|
+
".cpp": "C++",
|
|
453
|
+
".c": "C",
|
|
454
|
+
".cs": "C#",
|
|
455
|
+
".rb": "Ruby",
|
|
456
|
+
}
|
|
457
|
+
lang = lang_map.get(ext, ext)
|
|
458
|
+
language_stats[lang] = row.get("count", 0)
|
|
459
|
+
|
|
460
|
+
# Get counts dynamically
|
|
461
|
+
node_count = await self._execute_query(
|
|
462
|
+
graph_id, "MATCH (n) RETURN COUNT(n) as count"
|
|
463
|
+
)
|
|
464
|
+
relationship_count = await self._execute_query(
|
|
465
|
+
graph_id, "MATCH ()-[r]->() RETURN COUNT(r) as count"
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
graph = CodebaseGraph(
|
|
469
|
+
graph_id=graph_id,
|
|
470
|
+
repo_path=repo_path,
|
|
471
|
+
graph_path=str(graph_path),
|
|
472
|
+
name=name,
|
|
473
|
+
created_at=time.time(),
|
|
474
|
+
updated_at=time.time(),
|
|
475
|
+
build_options={
|
|
476
|
+
"languages": languages,
|
|
477
|
+
"exclude_patterns": exclude_patterns,
|
|
478
|
+
},
|
|
479
|
+
node_count=node_count[0]["count"] if node_count else 0,
|
|
480
|
+
relationship_count=relationship_count[0]["count"]
|
|
481
|
+
if relationship_count
|
|
482
|
+
else 0,
|
|
483
|
+
language_stats=language_stats,
|
|
484
|
+
is_watching=False,
|
|
485
|
+
status=GraphStatus.READY,
|
|
486
|
+
last_operation=None,
|
|
487
|
+
current_operation_id=None,
|
|
488
|
+
indexed_from_cwds=[indexed_from_cwd] if indexed_from_cwd else [],
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
# Update status to READY
|
|
492
|
+
await self._update_graph_status(graph_id, GraphStatus.READY)
|
|
493
|
+
|
|
494
|
+
return graph
|
|
495
|
+
|
|
496
|
+
async def update_graph(
|
|
497
|
+
self, graph_id: str, changes: list[FileChange] | None = None
|
|
498
|
+
) -> dict[str, Any]:
|
|
499
|
+
"""Update graph based on file changes.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
graph_id: Graph to update
|
|
503
|
+
changes: List of file changes (if None, will auto-detect)
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
Update statistics
|
|
507
|
+
"""
|
|
508
|
+
# If no changes provided, use incremental update
|
|
509
|
+
if changes is None:
|
|
510
|
+
return await self.update_graph_incremental(graph_id)
|
|
511
|
+
|
|
512
|
+
start_time = time.time()
|
|
513
|
+
|
|
514
|
+
# Get graph metadata
|
|
515
|
+
graph = await self.get_graph(graph_id)
|
|
516
|
+
if not graph:
|
|
517
|
+
raise ValueError(f"Graph {graph_id} not found")
|
|
518
|
+
|
|
519
|
+
# Import is already done at the top of the method
|
|
520
|
+
|
|
521
|
+
# Process changes
|
|
522
|
+
stats = {
|
|
523
|
+
"nodes_added": 0,
|
|
524
|
+
"nodes_removed": 0,
|
|
525
|
+
"relationships_added": 0,
|
|
526
|
+
"relationships_removed": 0,
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
lock = await self._get_lock()
|
|
530
|
+
async with lock:
|
|
531
|
+
if graph_id not in self._connections:
|
|
532
|
+
db = kuzu.Database(graph.graph_path)
|
|
533
|
+
conn = kuzu.Connection(db)
|
|
534
|
+
self._databases[graph_id] = db
|
|
535
|
+
self._connections[graph_id] = conn
|
|
536
|
+
|
|
537
|
+
# Group changes by type
|
|
538
|
+
for change in changes:
|
|
539
|
+
if change.event_type == "deleted":
|
|
540
|
+
# Remove nodes for deleted files
|
|
541
|
+
await self._execute_query(
|
|
542
|
+
graph_id,
|
|
543
|
+
"MATCH (n) WHERE n.path = $path DELETE n",
|
|
544
|
+
{"path": change.src_path},
|
|
545
|
+
)
|
|
546
|
+
stats["nodes_removed"] += 1
|
|
547
|
+
elif change.event_type in ["created", "modified"]:
|
|
548
|
+
# Re-parse and update the file
|
|
549
|
+
# This is simplified - the actual implementation would use the ingestor
|
|
550
|
+
logger.info(f"Updating file in graph - path: {change.src_path}")
|
|
551
|
+
|
|
552
|
+
update_time = (time.time() - start_time) * 1000
|
|
553
|
+
|
|
554
|
+
# Update metadata
|
|
555
|
+
await self._execute_query(
|
|
556
|
+
graph_id,
|
|
557
|
+
"""
|
|
558
|
+
MATCH (p:Project {graph_id: $graph_id})
|
|
559
|
+
SET p.updated_at = $updated_at
|
|
560
|
+
""",
|
|
561
|
+
{"graph_id": graph_id, "updated_at": int(time.time())},
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
return {"update_time_ms": update_time, **stats}
|
|
565
|
+
|
|
566
|
+
async def update_graph_incremental(self, graph_id: str) -> dict[str, Any]:
|
|
567
|
+
"""Update graph by automatically detecting changes.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
graph_id: Graph to update
|
|
571
|
+
|
|
572
|
+
Returns:
|
|
573
|
+
Update statistics
|
|
574
|
+
"""
|
|
575
|
+
start_time = time.time()
|
|
576
|
+
|
|
577
|
+
# Get graph metadata
|
|
578
|
+
graph = await self.get_graph(graph_id)
|
|
579
|
+
if not graph:
|
|
580
|
+
raise ValueError(f"Graph {graph_id} not found")
|
|
581
|
+
|
|
582
|
+
# Validate that the repository path still exists
|
|
583
|
+
repo_path = Path(graph.repo_path)
|
|
584
|
+
if not repo_path.exists():
|
|
585
|
+
logger.error(f"Repository path no longer exists: {graph.repo_path}")
|
|
586
|
+
raise ValueError(f"Repository path no longer exists: {graph.repo_path}")
|
|
587
|
+
if not repo_path.is_dir():
|
|
588
|
+
logger.error(f"Repository path is not a directory: {graph.repo_path}")
|
|
589
|
+
raise ValueError(f"Repository path is not a directory: {graph.repo_path}")
|
|
590
|
+
|
|
591
|
+
# Parse build options
|
|
592
|
+
build_options = graph.build_options if graph.build_options else {}
|
|
593
|
+
|
|
594
|
+
languages = build_options.get("languages")
|
|
595
|
+
exclude_patterns = build_options.get("exclude_patterns")
|
|
596
|
+
|
|
597
|
+
lock = await self._get_lock()
|
|
598
|
+
async with lock:
|
|
599
|
+
if graph_id not in self._connections:
|
|
600
|
+
db = kuzu.Database(graph.graph_path)
|
|
601
|
+
self._connections[graph_id] = kuzu.Connection(db)
|
|
602
|
+
|
|
603
|
+
conn = self._connections[graph_id]
|
|
604
|
+
|
|
605
|
+
# Create change detector
|
|
606
|
+
from shotgun.codebase.core.change_detector import ChangeDetector, ChangeType
|
|
607
|
+
|
|
608
|
+
detector = ChangeDetector(conn, Path(graph.repo_path))
|
|
609
|
+
|
|
610
|
+
# Load parsers first to know what languages we can actually process
|
|
611
|
+
from shotgun.codebase.core.parser_loader import load_parsers
|
|
612
|
+
|
|
613
|
+
parsers, queries = load_parsers()
|
|
614
|
+
available_languages = list(parsers.keys())
|
|
615
|
+
|
|
616
|
+
# If no languages were specified in build options, use all available parsers
|
|
617
|
+
# Otherwise, filter to intersection of requested and available languages
|
|
618
|
+
if languages is None or languages == []:
|
|
619
|
+
effective_languages = available_languages
|
|
620
|
+
else:
|
|
621
|
+
effective_languages = [
|
|
622
|
+
lang for lang in languages if lang in available_languages
|
|
623
|
+
]
|
|
624
|
+
|
|
625
|
+
if not effective_languages:
|
|
626
|
+
logger.warning(
|
|
627
|
+
f"No parsers available for requested languages - requested: {languages}, available: {available_languages}"
|
|
628
|
+
)
|
|
629
|
+
return {
|
|
630
|
+
"update_time_ms": (time.time() - start_time) * 1000,
|
|
631
|
+
"nodes_added": 0,
|
|
632
|
+
"nodes_removed": 0,
|
|
633
|
+
"nodes_modified": 0,
|
|
634
|
+
"relationships_added": 0,
|
|
635
|
+
"relationships_removed": 0,
|
|
636
|
+
"files_added": 0,
|
|
637
|
+
"files_modified": 0,
|
|
638
|
+
"files_deleted": 0,
|
|
639
|
+
"files_skipped": 0,
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
# Log what languages we're using for update
|
|
643
|
+
logger.info(f"Updating graph with languages: {effective_languages}")
|
|
644
|
+
|
|
645
|
+
# Detect changes only for languages we can process
|
|
646
|
+
changes = detector.detect_changes(effective_languages, exclude_patterns)
|
|
647
|
+
|
|
648
|
+
# Also detect ALL changes to report on skipped files
|
|
649
|
+
if languages is None or (
|
|
650
|
+
languages and len(languages) > len(effective_languages)
|
|
651
|
+
):
|
|
652
|
+
all_changes = detector.detect_changes(None, exclude_patterns)
|
|
653
|
+
skipped_count = len(all_changes) - len(changes)
|
|
654
|
+
if skipped_count > 0:
|
|
655
|
+
logger.info(
|
|
656
|
+
f"Skipping {skipped_count} files due to missing parsers - available_parsers: {available_languages}, requested_languages: {languages}"
|
|
657
|
+
)
|
|
658
|
+
# Log some examples of skipped files
|
|
659
|
+
skipped_files = set(all_changes.keys()) - set(changes.keys())
|
|
660
|
+
examples = list(skipped_files)[:5]
|
|
661
|
+
if examples:
|
|
662
|
+
logger.info(f"Examples of skipped files: {examples}")
|
|
663
|
+
else:
|
|
664
|
+
skipped_count = 0
|
|
665
|
+
|
|
666
|
+
if not changes:
|
|
667
|
+
logger.info(f"No changes detected for graph {graph_id}")
|
|
668
|
+
return {
|
|
669
|
+
"update_time_ms": (time.time() - start_time) * 1000,
|
|
670
|
+
"nodes_added": 0,
|
|
671
|
+
"nodes_removed": 0,
|
|
672
|
+
"nodes_modified": 0,
|
|
673
|
+
"relationships_added": 0,
|
|
674
|
+
"relationships_removed": 0,
|
|
675
|
+
"files_added": 0,
|
|
676
|
+
"files_modified": 0,
|
|
677
|
+
"files_deleted": 0,
|
|
678
|
+
"files_skipped": skipped_count,
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
logger.info(f"Processing {len(changes)} file changes for graph {graph_id}")
|
|
682
|
+
|
|
683
|
+
# Initialize stats
|
|
684
|
+
stats = {
|
|
685
|
+
"nodes_added": 0,
|
|
686
|
+
"nodes_removed": 0,
|
|
687
|
+
"nodes_modified": 0,
|
|
688
|
+
"relationships_added": 0,
|
|
689
|
+
"relationships_removed": 0,
|
|
690
|
+
"files_added": 0,
|
|
691
|
+
"files_modified": 0,
|
|
692
|
+
"files_deleted": 0,
|
|
693
|
+
"files_skipped": 0,
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
# Initialize ingestor and builder
|
|
697
|
+
from shotgun.codebase.core.ingestor import Ingestor, SimpleGraphBuilder
|
|
698
|
+
|
|
699
|
+
ingestor = Ingestor(conn)
|
|
700
|
+
|
|
701
|
+
builder = SimpleGraphBuilder(
|
|
702
|
+
ingestor, Path(graph.repo_path), parsers, queries, exclude_patterns
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
# Process changes by type
|
|
706
|
+
deletions = []
|
|
707
|
+
modifications = []
|
|
708
|
+
additions = []
|
|
709
|
+
|
|
710
|
+
for filepath, change_type in changes.items():
|
|
711
|
+
if change_type == ChangeType.DELETED:
|
|
712
|
+
deletions.append(filepath)
|
|
713
|
+
stats["files_deleted"] += 1
|
|
714
|
+
elif change_type == ChangeType.MODIFIED:
|
|
715
|
+
modifications.append(filepath)
|
|
716
|
+
stats["files_modified"] += 1
|
|
717
|
+
elif change_type == ChangeType.ADDED:
|
|
718
|
+
additions.append(filepath)
|
|
719
|
+
stats["files_added"] += 1
|
|
720
|
+
|
|
721
|
+
# Process deletions first
|
|
722
|
+
for filepath in deletions:
|
|
723
|
+
logger.debug(f"Processing deletion: {filepath}")
|
|
724
|
+
deletion_stats = ingestor.delete_file_nodes(filepath)
|
|
725
|
+
stats["nodes_removed"] += sum(deletion_stats.values())
|
|
726
|
+
|
|
727
|
+
# Process modifications (as delete + add)
|
|
728
|
+
for filepath in modifications:
|
|
729
|
+
logger.debug(f"Processing modification: {filepath}")
|
|
730
|
+
# Delete old nodes
|
|
731
|
+
deletion_stats = ingestor.delete_file_nodes(filepath)
|
|
732
|
+
stats["nodes_removed"] += sum(deletion_stats.values())
|
|
733
|
+
|
|
734
|
+
# Re-process the file
|
|
735
|
+
full_path = Path(graph.repo_path) / filepath
|
|
736
|
+
if full_path.exists():
|
|
737
|
+
# Determine language from file extension
|
|
738
|
+
from shotgun.codebase.core.language_config import (
|
|
739
|
+
get_language_config,
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
lang_config = get_language_config(full_path.suffix)
|
|
743
|
+
if lang_config and lang_config.name in parsers:
|
|
744
|
+
builder._process_single_file(full_path, lang_config.name)
|
|
745
|
+
stats["nodes_modified"] += 1 # Approximate
|
|
746
|
+
|
|
747
|
+
# Process additions
|
|
748
|
+
for filepath in additions:
|
|
749
|
+
logger.debug(f"Processing addition: {filepath}")
|
|
750
|
+
full_path = Path(graph.repo_path) / filepath
|
|
751
|
+
if full_path.exists():
|
|
752
|
+
# Determine language from file extension
|
|
753
|
+
from shotgun.codebase.core.language_config import (
|
|
754
|
+
get_language_config,
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
lang_config = get_language_config(full_path.suffix)
|
|
758
|
+
if lang_config and lang_config.name in parsers:
|
|
759
|
+
builder._process_single_file(full_path, lang_config.name)
|
|
760
|
+
stats["nodes_added"] += 1 # Approximate
|
|
761
|
+
|
|
762
|
+
# Flush all pending operations
|
|
763
|
+
ingestor.flush_all()
|
|
764
|
+
|
|
765
|
+
# Update graph metadata
|
|
766
|
+
current_time = int(time.time())
|
|
767
|
+
conn.execute(
|
|
768
|
+
"""
|
|
769
|
+
MATCH (p:Project {name: $name})
|
|
770
|
+
SET p.updated_at = $time
|
|
771
|
+
""",
|
|
772
|
+
{"name": graph.name, "time": current_time},
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
stats["update_time_ms"] = int((time.time() - start_time) * 1000)
|
|
776
|
+
stats["files_skipped"] = skipped_count
|
|
777
|
+
logger.info(f"Incremental update complete for graph {graph_id}: {stats}")
|
|
778
|
+
return stats
|
|
779
|
+
|
|
780
|
+
async def _update_graph_impl(
|
|
781
|
+
self, graph_id: str, changes: list[FileChange] | None = None
|
|
782
|
+
) -> dict[str, Any]:
|
|
783
|
+
"""Internal implementation of graph update (runs in background)."""
|
|
784
|
+
operation_id = str(uuid.uuid4())
|
|
785
|
+
start_time = time.time()
|
|
786
|
+
|
|
787
|
+
# Create operation stats
|
|
788
|
+
operation_stats = OperationStats(
|
|
789
|
+
operation_type="update",
|
|
790
|
+
started_at=start_time,
|
|
791
|
+
completed_at=None,
|
|
792
|
+
success=False,
|
|
793
|
+
error=None,
|
|
794
|
+
stats={},
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
try:
|
|
798
|
+
# Update status to UPDATING
|
|
799
|
+
await self._update_graph_status(
|
|
800
|
+
graph_id, GraphStatus.UPDATING, operation_id
|
|
801
|
+
)
|
|
802
|
+
|
|
803
|
+
# Do the actual update work
|
|
804
|
+
if changes is None:
|
|
805
|
+
stats = await self.update_graph_incremental(graph_id)
|
|
806
|
+
else:
|
|
807
|
+
stats = await self.update_graph(graph_id, changes)
|
|
808
|
+
|
|
809
|
+
# Update operation stats
|
|
810
|
+
operation_stats.completed_at = time.time()
|
|
811
|
+
operation_stats.success = True
|
|
812
|
+
operation_stats.stats = stats
|
|
813
|
+
|
|
814
|
+
# Update status to READY
|
|
815
|
+
await self._update_graph_status(graph_id, GraphStatus.READY, None)
|
|
816
|
+
|
|
817
|
+
# Store operation stats
|
|
818
|
+
await self._store_operation_stats(graph_id, operation_stats)
|
|
819
|
+
|
|
820
|
+
return stats
|
|
821
|
+
|
|
822
|
+
except Exception as e:
|
|
823
|
+
# Update operation stats with error
|
|
824
|
+
operation_stats.completed_at = time.time()
|
|
825
|
+
operation_stats.success = False
|
|
826
|
+
operation_stats.error = str(e)
|
|
827
|
+
operation_stats.stats["update_time_ms"] = (time.time() - start_time) * 1000
|
|
828
|
+
|
|
829
|
+
# Update status to ERROR
|
|
830
|
+
await self._update_graph_status(graph_id, GraphStatus.ERROR, None)
|
|
831
|
+
|
|
832
|
+
# Store operation stats
|
|
833
|
+
await self._store_operation_stats(graph_id, operation_stats)
|
|
834
|
+
|
|
835
|
+
logger.error(f"Update failed for graph {graph_id}: {e}")
|
|
836
|
+
raise
|
|
837
|
+
finally:
|
|
838
|
+
# Clean up operation tracking
|
|
839
|
+
if graph_id in self._operations:
|
|
840
|
+
del self._operations[graph_id]
|
|
841
|
+
|
|
842
|
+
async def get_operation_status(self, graph_id: str) -> dict[str, Any]:
|
|
843
|
+
"""Get the current operation status for a graph.
|
|
844
|
+
|
|
845
|
+
Args:
|
|
846
|
+
graph_id: Graph ID to check
|
|
847
|
+
|
|
848
|
+
Returns:
|
|
849
|
+
Dictionary with status information
|
|
850
|
+
|
|
851
|
+
Raises:
|
|
852
|
+
ValueError: If graph not found
|
|
853
|
+
"""
|
|
854
|
+
graph = await self.get_graph(graph_id)
|
|
855
|
+
if not graph:
|
|
856
|
+
raise ValueError(f"Graph {graph_id} not found")
|
|
857
|
+
|
|
858
|
+
# Build response
|
|
859
|
+
response: dict[str, Any] = {
|
|
860
|
+
"graph_id": graph_id,
|
|
861
|
+
"status": graph.status.value,
|
|
862
|
+
"current_operation_id": graph.current_operation_id,
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
# Add last operation details if available
|
|
866
|
+
if graph.last_operation:
|
|
867
|
+
response["last_operation"] = {
|
|
868
|
+
"operation_type": graph.last_operation.operation_type,
|
|
869
|
+
"started_at": graph.last_operation.started_at,
|
|
870
|
+
"completed_at": graph.last_operation.completed_at,
|
|
871
|
+
"success": graph.last_operation.success,
|
|
872
|
+
"error": graph.last_operation.error,
|
|
873
|
+
"stats": graph.last_operation.stats,
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
# Check if there's an active operation
|
|
877
|
+
if graph_id in self._operations:
|
|
878
|
+
task = self._operations[graph_id]
|
|
879
|
+
if not task.done():
|
|
880
|
+
response["operation_in_progress"] = True
|
|
881
|
+
else:
|
|
882
|
+
# Operation finished but not cleaned up yet
|
|
883
|
+
response["operation_in_progress"] = False
|
|
884
|
+
# Try to get the result or exception
|
|
885
|
+
try:
|
|
886
|
+
task.result()
|
|
887
|
+
except Exception as e:
|
|
888
|
+
response["operation_error"] = str(e)
|
|
889
|
+
else:
|
|
890
|
+
response["operation_in_progress"] = False
|
|
891
|
+
|
|
892
|
+
return response
|
|
893
|
+
|
|
894
|
+
async def update_graph_async(
|
|
895
|
+
self, graph_id: str, changes: list[FileChange] | None = None
|
|
896
|
+
) -> str:
|
|
897
|
+
"""Start updating a graph asynchronously.
|
|
898
|
+
|
|
899
|
+
Returns:
|
|
900
|
+
Operation ID
|
|
901
|
+
"""
|
|
902
|
+
# Check if graph exists
|
|
903
|
+
graph = await self.get_graph(graph_id)
|
|
904
|
+
if not graph:
|
|
905
|
+
raise ValueError(f"Graph {graph_id} not found")
|
|
906
|
+
|
|
907
|
+
# Check if already updating
|
|
908
|
+
if graph_id in self._operations:
|
|
909
|
+
raise ValueError(f"Graph {graph_id} is already being updated.")
|
|
910
|
+
|
|
911
|
+
# Start the update operation in background
|
|
912
|
+
task = asyncio.create_task(self._update_graph_impl(graph_id, changes))
|
|
913
|
+
self._operations[graph_id] = task
|
|
914
|
+
|
|
915
|
+
return graph_id
|
|
916
|
+
|
|
917
|
+
async def start_watcher(
|
|
918
|
+
self,
|
|
919
|
+
graph_id: str,
|
|
920
|
+
callback: Callable[[str, list[FileChange]], Awaitable[None]] | None = None,
|
|
921
|
+
patterns: list[str] | None = None,
|
|
922
|
+
ignore_patterns: list[str] | None = None,
|
|
923
|
+
) -> None:
|
|
924
|
+
"""Start watching repository for changes.
|
|
925
|
+
|
|
926
|
+
Args:
|
|
927
|
+
graph_id: Graph to watch
|
|
928
|
+
callback: Async callback for changes
|
|
929
|
+
patterns: File patterns to watch
|
|
930
|
+
ignore_patterns: Patterns to ignore
|
|
931
|
+
"""
|
|
932
|
+
graph = await self.get_graph(graph_id)
|
|
933
|
+
if not graph:
|
|
934
|
+
raise ValueError(f"Graph {graph_id} not found")
|
|
935
|
+
|
|
936
|
+
lock = await self._get_lock()
|
|
937
|
+
async with lock:
|
|
938
|
+
if graph_id in self._watchers:
|
|
939
|
+
logger.warning(f"Watcher already running - graph_id: {graph_id}")
|
|
940
|
+
return
|
|
941
|
+
|
|
942
|
+
# Get current event loop for thread-safe async calls
|
|
943
|
+
loop = asyncio.get_running_loop()
|
|
944
|
+
|
|
945
|
+
# Combine default ignore patterns with any custom ones
|
|
946
|
+
from shotgun.codebase.core.ingestor import IGNORE_PATTERNS
|
|
947
|
+
|
|
948
|
+
combined_ignore = IGNORE_PATTERNS.copy()
|
|
949
|
+
if ignore_patterns:
|
|
950
|
+
combined_ignore.update(ignore_patterns)
|
|
951
|
+
|
|
952
|
+
# Create handler with loop reference and ignore patterns
|
|
953
|
+
handler = CodebaseFileHandler(graph_id, callback, loop, combined_ignore)
|
|
954
|
+
self._handlers[graph_id] = handler
|
|
955
|
+
|
|
956
|
+
# Create and start observer
|
|
957
|
+
observer = Observer()
|
|
958
|
+
observer.schedule(handler, graph.repo_path, recursive=True)
|
|
959
|
+
observer.start()
|
|
960
|
+
|
|
961
|
+
self._watchers[graph_id] = observer
|
|
962
|
+
|
|
963
|
+
logger.info(
|
|
964
|
+
f"Started file watcher - graph_id: {graph_id}, repo_path: {graph.repo_path}"
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
async def stop_watcher(self, graph_id: str) -> int:
|
|
968
|
+
"""Stop watching repository.
|
|
969
|
+
|
|
970
|
+
Args:
|
|
971
|
+
graph_id: Graph to stop watching
|
|
972
|
+
|
|
973
|
+
Returns:
|
|
974
|
+
Number of changes processed
|
|
975
|
+
"""
|
|
976
|
+
lock = await self._get_lock()
|
|
977
|
+
async with lock:
|
|
978
|
+
if graph_id not in self._watchers:
|
|
979
|
+
logger.warning(f"No watcher running - graph_id: {graph_id}")
|
|
980
|
+
return 0
|
|
981
|
+
|
|
982
|
+
observer = self._watchers[graph_id]
|
|
983
|
+
observer.stop()
|
|
984
|
+
observer.join(timeout=5)
|
|
985
|
+
|
|
986
|
+
# Get change count
|
|
987
|
+
handler = self._handlers.get(graph_id)
|
|
988
|
+
change_count = len(handler.pending_changes) if handler else 0
|
|
989
|
+
|
|
990
|
+
# Clean up
|
|
991
|
+
del self._watchers[graph_id]
|
|
992
|
+
if graph_id in self._handlers:
|
|
993
|
+
del self._handlers[graph_id]
|
|
994
|
+
|
|
995
|
+
logger.info(
|
|
996
|
+
f"Stopped file watcher - graph_id: {graph_id}, changes_processed: {change_count}"
|
|
997
|
+
)
|
|
998
|
+
return change_count
|
|
999
|
+
|
|
1000
|
+
async def execute_query(
|
|
1001
|
+
self, graph_id: str, query: str, parameters: dict[str, Any] | None = None
|
|
1002
|
+
) -> list[dict[str, Any]]:
|
|
1003
|
+
"""Execute Cypher query on graph.
|
|
1004
|
+
|
|
1005
|
+
Args:
|
|
1006
|
+
graph_id: Graph to query
|
|
1007
|
+
query: Cypher query
|
|
1008
|
+
parameters: Query parameters
|
|
1009
|
+
|
|
1010
|
+
Returns:
|
|
1011
|
+
Query results
|
|
1012
|
+
"""
|
|
1013
|
+
return await self._execute_query(graph_id, query, parameters)
|
|
1014
|
+
|
|
1015
|
+
async def _execute_query(
|
|
1016
|
+
self, graph_id: str, query: str, parameters: dict[str, Any] | None = None
|
|
1017
|
+
) -> list[dict[str, Any]]:
|
|
1018
|
+
"""Internal query execution with connection management."""
|
|
1019
|
+
lock = await self._get_lock()
|
|
1020
|
+
async with lock:
|
|
1021
|
+
if graph_id not in self._connections:
|
|
1022
|
+
# Open connection if needed
|
|
1023
|
+
graph_path = self.storage_dir / f"{graph_id}.kuzu"
|
|
1024
|
+
if not graph_path.exists():
|
|
1025
|
+
raise ValueError(f"Graph {graph_id} not found")
|
|
1026
|
+
|
|
1027
|
+
db = kuzu.Database(str(graph_path))
|
|
1028
|
+
conn = kuzu.Connection(db)
|
|
1029
|
+
self._databases[graph_id] = db
|
|
1030
|
+
self._connections[graph_id] = conn
|
|
1031
|
+
|
|
1032
|
+
conn = self._connections[graph_id]
|
|
1033
|
+
|
|
1034
|
+
# Execute query in thread pool
|
|
1035
|
+
def _run_query() -> list[dict[str, Any]]:
|
|
1036
|
+
if parameters:
|
|
1037
|
+
result = conn.execute(query, parameters)
|
|
1038
|
+
else:
|
|
1039
|
+
result = conn.execute(query)
|
|
1040
|
+
|
|
1041
|
+
# Collect results
|
|
1042
|
+
rows = []
|
|
1043
|
+
columns = (
|
|
1044
|
+
result.get_column_names() if hasattr(result, "get_column_names") else []
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
if hasattr(result, "has_next") and not isinstance(result, list):
|
|
1048
|
+
while result.has_next():
|
|
1049
|
+
row = result.get_next()
|
|
1050
|
+
row_dict = {}
|
|
1051
|
+
for i, col in enumerate(columns):
|
|
1052
|
+
if isinstance(row, tuple | list) and i < len(row):
|
|
1053
|
+
row_dict[col] = row[i]
|
|
1054
|
+
elif hasattr(row, col):
|
|
1055
|
+
row_dict[col] = getattr(row, col)
|
|
1056
|
+
rows.append(row_dict)
|
|
1057
|
+
elif isinstance(result, list):
|
|
1058
|
+
# Convert list of QueryResult objects to list of dicts
|
|
1059
|
+
for query_result in result:
|
|
1060
|
+
row_dict = {}
|
|
1061
|
+
for col in columns:
|
|
1062
|
+
if hasattr(query_result, col):
|
|
1063
|
+
row_dict[col] = getattr(query_result, col)
|
|
1064
|
+
rows.append(row_dict)
|
|
1065
|
+
|
|
1066
|
+
return rows
|
|
1067
|
+
|
|
1068
|
+
return await anyio.to_thread.run_sync(_run_query)
|
|
1069
|
+
|
|
1070
|
+
async def get_graph(self, graph_id: str) -> CodebaseGraph | None:
|
|
1071
|
+
"""Get graph metadata.
|
|
1072
|
+
|
|
1073
|
+
Args:
|
|
1074
|
+
graph_id: Graph ID
|
|
1075
|
+
|
|
1076
|
+
Returns:
|
|
1077
|
+
Graph metadata or None if not found
|
|
1078
|
+
"""
|
|
1079
|
+
graph_path = self.storage_dir / f"{graph_id}.kuzu"
|
|
1080
|
+
if not graph_path.exists():
|
|
1081
|
+
return None
|
|
1082
|
+
|
|
1083
|
+
# Query metadata from Project node
|
|
1084
|
+
try:
|
|
1085
|
+
results = await self._execute_query(
|
|
1086
|
+
graph_id,
|
|
1087
|
+
"MATCH (p:Project {graph_id: $graph_id}) RETURN p",
|
|
1088
|
+
{"graph_id": graph_id},
|
|
1089
|
+
)
|
|
1090
|
+
|
|
1091
|
+
if not results:
|
|
1092
|
+
return None
|
|
1093
|
+
|
|
1094
|
+
project = results[0]["p"]
|
|
1095
|
+
|
|
1096
|
+
# Check if watcher is active
|
|
1097
|
+
is_watching = graph_id in self._watchers
|
|
1098
|
+
|
|
1099
|
+
# Get language statistics
|
|
1100
|
+
lang_stats = await self._execute_query(
|
|
1101
|
+
graph_id,
|
|
1102
|
+
"""
|
|
1103
|
+
MATCH (f:File)
|
|
1104
|
+
WHERE f.extension IS NOT NULL
|
|
1105
|
+
RETURN f.extension as extension, COUNT(f) as count
|
|
1106
|
+
""",
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
language_stats = {}
|
|
1110
|
+
if lang_stats:
|
|
1111
|
+
for row in lang_stats:
|
|
1112
|
+
ext = row.get("extension", "").lower()
|
|
1113
|
+
if ext:
|
|
1114
|
+
# Map extensions to languages
|
|
1115
|
+
lang_map = {
|
|
1116
|
+
".py": "Python",
|
|
1117
|
+
".js": "JavaScript",
|
|
1118
|
+
".ts": "TypeScript",
|
|
1119
|
+
".go": "Go",
|
|
1120
|
+
".rs": "Rust",
|
|
1121
|
+
".java": "Java",
|
|
1122
|
+
".cpp": "C++",
|
|
1123
|
+
".c": "C",
|
|
1124
|
+
".cs": "C#",
|
|
1125
|
+
".rb": "Ruby",
|
|
1126
|
+
}
|
|
1127
|
+
lang = lang_map.get(ext, ext)
|
|
1128
|
+
language_stats[lang] = row.get("count", 0)
|
|
1129
|
+
|
|
1130
|
+
# Get counts dynamically
|
|
1131
|
+
node_count = await self._execute_query(
|
|
1132
|
+
graph_id, "MATCH (n) RETURN COUNT(n) as count"
|
|
1133
|
+
)
|
|
1134
|
+
relationship_count = await self._execute_query(
|
|
1135
|
+
graph_id, "MATCH ()-[r]->() RETURN COUNT(r) as count"
|
|
1136
|
+
)
|
|
1137
|
+
|
|
1138
|
+
# Get detailed statistics
|
|
1139
|
+
node_stats, relationship_stats = await self._get_graph_statistics(graph_id)
|
|
1140
|
+
|
|
1141
|
+
# Parse status
|
|
1142
|
+
status_str = project.get("status", GraphStatus.READY.value)
|
|
1143
|
+
try:
|
|
1144
|
+
status = GraphStatus(status_str)
|
|
1145
|
+
except ValueError:
|
|
1146
|
+
status = GraphStatus.READY
|
|
1147
|
+
|
|
1148
|
+
# Parse last operation
|
|
1149
|
+
last_operation = None
|
|
1150
|
+
last_op_str = project.get("last_operation")
|
|
1151
|
+
if last_op_str:
|
|
1152
|
+
try:
|
|
1153
|
+
last_op_data = json.loads(last_op_str)
|
|
1154
|
+
last_operation = OperationStats(**last_op_data)
|
|
1155
|
+
except Exception as e:
|
|
1156
|
+
logger.debug(f"Failed to parse last operation stats: {e}")
|
|
1157
|
+
last_operation = None
|
|
1158
|
+
|
|
1159
|
+
# Parse indexed_from_cwds - handle backward compatibility
|
|
1160
|
+
indexed_from_cwds_json = project.get("indexed_from_cwds", "[]")
|
|
1161
|
+
try:
|
|
1162
|
+
indexed_from_cwds = (
|
|
1163
|
+
json.loads(indexed_from_cwds_json) if indexed_from_cwds_json else []
|
|
1164
|
+
)
|
|
1165
|
+
except (json.JSONDecodeError, TypeError):
|
|
1166
|
+
indexed_from_cwds = []
|
|
1167
|
+
|
|
1168
|
+
return CodebaseGraph(
|
|
1169
|
+
graph_id=graph_id,
|
|
1170
|
+
repo_path=project.get("repo_path", ""),
|
|
1171
|
+
graph_path=str(graph_path),
|
|
1172
|
+
name=project.get("name", ""),
|
|
1173
|
+
created_at=float(project.get("created_at", 0)),
|
|
1174
|
+
updated_at=float(project.get("updated_at", 0)),
|
|
1175
|
+
schema_version=project.get("schema_version", "1.0.0"),
|
|
1176
|
+
build_options=json.loads(project.get("build_options", "{}")),
|
|
1177
|
+
node_count=node_count[0]["count"] if node_count else 0,
|
|
1178
|
+
relationship_count=relationship_count[0]["count"]
|
|
1179
|
+
if relationship_count
|
|
1180
|
+
else 0,
|
|
1181
|
+
node_stats=node_stats,
|
|
1182
|
+
relationship_stats=relationship_stats,
|
|
1183
|
+
language_stats=language_stats,
|
|
1184
|
+
is_watching=is_watching,
|
|
1185
|
+
status=status,
|
|
1186
|
+
last_operation=last_operation,
|
|
1187
|
+
current_operation_id=project.get("current_operation_id"),
|
|
1188
|
+
indexed_from_cwds=indexed_from_cwds,
|
|
1189
|
+
)
|
|
1190
|
+
except Exception as e:
|
|
1191
|
+
logger.error(
|
|
1192
|
+
f"Failed to get graph metadata - graph_id: {graph_id}, error: {str(e)}"
|
|
1193
|
+
)
|
|
1194
|
+
return None
|
|
1195
|
+
|
|
1196
|
+
async def list_graphs(self) -> list[CodebaseGraph]:
|
|
1197
|
+
"""List all available graphs.
|
|
1198
|
+
|
|
1199
|
+
Returns:
|
|
1200
|
+
List of graph metadata
|
|
1201
|
+
"""
|
|
1202
|
+
graphs = []
|
|
1203
|
+
|
|
1204
|
+
# Find all .kuzu files
|
|
1205
|
+
for path in self.storage_dir.glob("*.kuzu"):
|
|
1206
|
+
if path.is_file():
|
|
1207
|
+
graph_id = path.stem
|
|
1208
|
+
graph = await self.get_graph(graph_id)
|
|
1209
|
+
if graph:
|
|
1210
|
+
graphs.append(graph)
|
|
1211
|
+
|
|
1212
|
+
return sorted(graphs, key=lambda g: g.updated_at, reverse=True)
|
|
1213
|
+
|
|
1214
|
+
async def add_cwd_access(self, graph_id: str, cwd: str | None = None) -> None:
|
|
1215
|
+
"""Add a working directory to a graph's access list.
|
|
1216
|
+
|
|
1217
|
+
Args:
|
|
1218
|
+
graph_id: Graph ID to update
|
|
1219
|
+
cwd: Working directory to add. If None, uses current working directory.
|
|
1220
|
+
"""
|
|
1221
|
+
from pathlib import Path
|
|
1222
|
+
|
|
1223
|
+
if cwd is None:
|
|
1224
|
+
cwd = str(Path.cwd().resolve())
|
|
1225
|
+
else:
|
|
1226
|
+
cwd = str(Path(cwd).resolve())
|
|
1227
|
+
|
|
1228
|
+
# Get current graph
|
|
1229
|
+
graph = await self.get_graph(graph_id)
|
|
1230
|
+
if not graph:
|
|
1231
|
+
raise ValueError(f"Graph {graph_id} not found")
|
|
1232
|
+
|
|
1233
|
+
# Get current list
|
|
1234
|
+
current_cwds = graph.indexed_from_cwds.copy()
|
|
1235
|
+
|
|
1236
|
+
# Add new CWD if not already present
|
|
1237
|
+
if cwd not in current_cwds:
|
|
1238
|
+
current_cwds.append(cwd)
|
|
1239
|
+
|
|
1240
|
+
# Update in database
|
|
1241
|
+
await self._execute_query(
|
|
1242
|
+
graph_id,
|
|
1243
|
+
"""
|
|
1244
|
+
MATCH (p:Project {graph_id: $graph_id})
|
|
1245
|
+
SET p.indexed_from_cwds = $indexed_from_cwds
|
|
1246
|
+
""",
|
|
1247
|
+
{
|
|
1248
|
+
"graph_id": graph_id,
|
|
1249
|
+
"indexed_from_cwds": json.dumps(current_cwds),
|
|
1250
|
+
},
|
|
1251
|
+
)
|
|
1252
|
+
logger.info(f"Added CWD access for {cwd} to graph {graph_id}")
|
|
1253
|
+
|
|
1254
|
+
async def remove_cwd_access(self, graph_id: str, cwd: str) -> None:
|
|
1255
|
+
"""Remove a working directory from a graph's access list.
|
|
1256
|
+
|
|
1257
|
+
Args:
|
|
1258
|
+
graph_id: Graph ID to update
|
|
1259
|
+
cwd: Working directory to remove
|
|
1260
|
+
"""
|
|
1261
|
+
from pathlib import Path
|
|
1262
|
+
|
|
1263
|
+
cwd = str(Path(cwd).resolve())
|
|
1264
|
+
|
|
1265
|
+
# Get current graph
|
|
1266
|
+
graph = await self.get_graph(graph_id)
|
|
1267
|
+
if not graph:
|
|
1268
|
+
raise ValueError(f"Graph {graph_id} not found")
|
|
1269
|
+
|
|
1270
|
+
# Get current list
|
|
1271
|
+
current_cwds = graph.indexed_from_cwds.copy()
|
|
1272
|
+
|
|
1273
|
+
# Remove CWD if present
|
|
1274
|
+
if cwd in current_cwds:
|
|
1275
|
+
current_cwds.remove(cwd)
|
|
1276
|
+
|
|
1277
|
+
# Update in database
|
|
1278
|
+
await self._execute_query(
|
|
1279
|
+
graph_id,
|
|
1280
|
+
"""
|
|
1281
|
+
MATCH (p:Project {graph_id: $graph_id})
|
|
1282
|
+
SET p.indexed_from_cwds = $indexed_from_cwds
|
|
1283
|
+
""",
|
|
1284
|
+
{
|
|
1285
|
+
"graph_id": graph_id,
|
|
1286
|
+
"indexed_from_cwds": json.dumps(current_cwds),
|
|
1287
|
+
},
|
|
1288
|
+
)
|
|
1289
|
+
logger.info(f"Removed CWD access for {cwd} from graph {graph_id}")
|
|
1290
|
+
|
|
1291
|
+
async def delete_graph(self, graph_id: str) -> None:
|
|
1292
|
+
"""Delete a graph.
|
|
1293
|
+
|
|
1294
|
+
Args:
|
|
1295
|
+
graph_id: Graph to delete
|
|
1296
|
+
"""
|
|
1297
|
+
# Stop watcher if running
|
|
1298
|
+
if graph_id in self._watchers:
|
|
1299
|
+
await self.stop_watcher(graph_id)
|
|
1300
|
+
|
|
1301
|
+
# Close connections
|
|
1302
|
+
lock = await self._get_lock()
|
|
1303
|
+
async with lock:
|
|
1304
|
+
if graph_id in self._connections:
|
|
1305
|
+
self._connections[graph_id].close()
|
|
1306
|
+
del self._connections[graph_id]
|
|
1307
|
+
if graph_id in self._databases:
|
|
1308
|
+
self._databases[graph_id].close()
|
|
1309
|
+
del self._databases[graph_id]
|
|
1310
|
+
|
|
1311
|
+
# Delete files
|
|
1312
|
+
graph_path = self.storage_dir / f"{graph_id}.kuzu"
|
|
1313
|
+
if graph_path.exists():
|
|
1314
|
+
# Delete the database file
|
|
1315
|
+
await anyio.to_thread.run_sync(graph_path.unlink)
|
|
1316
|
+
|
|
1317
|
+
# Also delete the WAL file if it exists
|
|
1318
|
+
wal_path = self.storage_dir / f"{graph_id}.kuzu.wal"
|
|
1319
|
+
if wal_path.exists():
|
|
1320
|
+
await anyio.to_thread.run_sync(wal_path.unlink)
|
|
1321
|
+
|
|
1322
|
+
logger.info(f"Deleted graph - graph_id: {graph_id}")
|
|
1323
|
+
|
|
1324
|
+
async def _get_graph_statistics(
|
|
1325
|
+
self, graph_id: str
|
|
1326
|
+
) -> tuple[dict[str, int], dict[str, int]]:
|
|
1327
|
+
"""Get detailed statistics about the graph.
|
|
1328
|
+
|
|
1329
|
+
Returns:
|
|
1330
|
+
Tuple of (node_stats, relationship_stats)
|
|
1331
|
+
"""
|
|
1332
|
+
node_stats = {}
|
|
1333
|
+
|
|
1334
|
+
# Count each node type
|
|
1335
|
+
node_types = [
|
|
1336
|
+
"Project",
|
|
1337
|
+
"Package",
|
|
1338
|
+
"Module",
|
|
1339
|
+
"Class",
|
|
1340
|
+
"Function",
|
|
1341
|
+
"Method",
|
|
1342
|
+
"File",
|
|
1343
|
+
"Folder",
|
|
1344
|
+
"FileMetadata",
|
|
1345
|
+
"DeletionLog",
|
|
1346
|
+
]
|
|
1347
|
+
|
|
1348
|
+
for node_type in node_types:
|
|
1349
|
+
try:
|
|
1350
|
+
result = await self._execute_query(
|
|
1351
|
+
graph_id, f"MATCH (n:{node_type}) RETURN COUNT(n) as count"
|
|
1352
|
+
)
|
|
1353
|
+
count = result[0]["count"] if result else 0
|
|
1354
|
+
if count > 0:
|
|
1355
|
+
node_stats[node_type] = count
|
|
1356
|
+
except Exception as e:
|
|
1357
|
+
logger.debug(f"Failed to count {node_type} nodes: {e}")
|
|
1358
|
+
|
|
1359
|
+
# Count relationships - need to handle multiple tables for each type
|
|
1360
|
+
rel_counts = {}
|
|
1361
|
+
|
|
1362
|
+
# CONTAINS relationships
|
|
1363
|
+
for prefix in [
|
|
1364
|
+
"CONTAINS_PACKAGE",
|
|
1365
|
+
"CONTAINS_FOLDER",
|
|
1366
|
+
"CONTAINS_FILE",
|
|
1367
|
+
"CONTAINS_MODULE",
|
|
1368
|
+
]:
|
|
1369
|
+
count = 0
|
|
1370
|
+
for suffix in ["", "_PKG", "_FOLDER"]:
|
|
1371
|
+
table = f"{prefix}{suffix}"
|
|
1372
|
+
try:
|
|
1373
|
+
result = await self._execute_query(
|
|
1374
|
+
graph_id, f"MATCH ()-[r:{table}]->() RETURN COUNT(r) as count"
|
|
1375
|
+
)
|
|
1376
|
+
if result:
|
|
1377
|
+
count += result[0]["count"]
|
|
1378
|
+
except Exception as e:
|
|
1379
|
+
logger.debug(f"Failed to count {table} relationships: {e}")
|
|
1380
|
+
if count > 0:
|
|
1381
|
+
rel_counts[prefix] = count
|
|
1382
|
+
|
|
1383
|
+
# Other relationships
|
|
1384
|
+
for rel_type in [
|
|
1385
|
+
"DEFINES",
|
|
1386
|
+
"DEFINES_FUNC",
|
|
1387
|
+
"DEFINES_METHOD",
|
|
1388
|
+
"INHERITS",
|
|
1389
|
+
"OVERRIDES",
|
|
1390
|
+
"DEPENDS_ON_EXTERNAL",
|
|
1391
|
+
"IMPORTS",
|
|
1392
|
+
]:
|
|
1393
|
+
try:
|
|
1394
|
+
result = await self._execute_query(
|
|
1395
|
+
graph_id, f"MATCH ()-[r:{rel_type}]->() RETURN COUNT(r) as count"
|
|
1396
|
+
)
|
|
1397
|
+
if result and result[0]["count"] > 0:
|
|
1398
|
+
rel_counts[rel_type] = result[0]["count"]
|
|
1399
|
+
except Exception as e:
|
|
1400
|
+
logger.debug(f"Failed to count {rel_type} relationships: {e}")
|
|
1401
|
+
|
|
1402
|
+
# CALLS relationships (multiple tables)
|
|
1403
|
+
calls_count = 0
|
|
1404
|
+
for table in ["CALLS", "CALLS_FM", "CALLS_MF", "CALLS_MM"]:
|
|
1405
|
+
try:
|
|
1406
|
+
result = await self._execute_query(
|
|
1407
|
+
graph_id, f"MATCH ()-[r:{table}]->() RETURN COUNT(r) as count"
|
|
1408
|
+
)
|
|
1409
|
+
if result:
|
|
1410
|
+
calls_count += result[0]["count"]
|
|
1411
|
+
except Exception as e:
|
|
1412
|
+
logger.debug(f"Failed to count {table} relationships: {e}")
|
|
1413
|
+
if calls_count > 0:
|
|
1414
|
+
rel_counts["CALLS (total)"] = calls_count
|
|
1415
|
+
|
|
1416
|
+
# TRACKS relationships
|
|
1417
|
+
tracks_count = 0
|
|
1418
|
+
for entity in ["Module", "Class", "Function", "Method"]:
|
|
1419
|
+
try:
|
|
1420
|
+
result = await self._execute_query(
|
|
1421
|
+
graph_id,
|
|
1422
|
+
f"MATCH ()-[r:TRACKS_{entity}]->() RETURN COUNT(r) as count",
|
|
1423
|
+
)
|
|
1424
|
+
if result:
|
|
1425
|
+
tracks_count += result[0]["count"]
|
|
1426
|
+
except Exception as e:
|
|
1427
|
+
logger.debug(f"Failed to count TRACKS_{entity} relationships: {e}")
|
|
1428
|
+
if tracks_count > 0:
|
|
1429
|
+
rel_counts["TRACKS (total)"] = tracks_count
|
|
1430
|
+
|
|
1431
|
+
return node_stats, rel_counts
|
|
1432
|
+
|
|
1433
|
+
async def _print_graph_statistics(self, graph_id: str) -> None:
|
|
1434
|
+
"""Print detailed statistics about the graph."""
|
|
1435
|
+
logger.info("\n=== Graph Statistics ===")
|
|
1436
|
+
|
|
1437
|
+
node_stats, rel_stats = await self._get_graph_statistics(graph_id)
|
|
1438
|
+
|
|
1439
|
+
# Print node stats
|
|
1440
|
+
for node_type in [
|
|
1441
|
+
"Project",
|
|
1442
|
+
"Package",
|
|
1443
|
+
"Module",
|
|
1444
|
+
"Class",
|
|
1445
|
+
"Function",
|
|
1446
|
+
"Method",
|
|
1447
|
+
"File",
|
|
1448
|
+
"Folder",
|
|
1449
|
+
"FileMetadata",
|
|
1450
|
+
"DeletionLog",
|
|
1451
|
+
]:
|
|
1452
|
+
count = node_stats.get(node_type, 0)
|
|
1453
|
+
logger.info(f"{node_type}: {count}")
|
|
1454
|
+
|
|
1455
|
+
logger.info("\nRelationship counts:")
|
|
1456
|
+
for rel_type, count in sorted(rel_stats.items()):
|
|
1457
|
+
logger.info(f"{rel_type}: {count}")
|
|
1458
|
+
|
|
1459
|
+
async def _build_graph_impl(
|
|
1460
|
+
self,
|
|
1461
|
+
graph_id: str,
|
|
1462
|
+
repo_path: str,
|
|
1463
|
+
name: str,
|
|
1464
|
+
languages: list[str] | None,
|
|
1465
|
+
exclude_patterns: list[str] | None,
|
|
1466
|
+
indexed_from_cwd: str | None = None,
|
|
1467
|
+
) -> CodebaseGraph:
|
|
1468
|
+
"""Internal implementation of graph building (runs in background)."""
|
|
1469
|
+
operation_id = str(uuid.uuid4())
|
|
1470
|
+
start_time = time.time()
|
|
1471
|
+
|
|
1472
|
+
# Create operation stats
|
|
1473
|
+
operation_stats = OperationStats(
|
|
1474
|
+
operation_type="build",
|
|
1475
|
+
started_at=start_time,
|
|
1476
|
+
completed_at=None,
|
|
1477
|
+
success=False,
|
|
1478
|
+
error=None,
|
|
1479
|
+
stats={},
|
|
1480
|
+
)
|
|
1481
|
+
|
|
1482
|
+
try:
|
|
1483
|
+
# Update status to BUILDING
|
|
1484
|
+
await self._update_graph_status(
|
|
1485
|
+
graph_id, GraphStatus.BUILDING, operation_id
|
|
1486
|
+
)
|
|
1487
|
+
|
|
1488
|
+
# Do the actual build work
|
|
1489
|
+
graph = await self._do_build_graph(
|
|
1490
|
+
graph_id, repo_path, name, languages, exclude_patterns, indexed_from_cwd
|
|
1491
|
+
)
|
|
1492
|
+
|
|
1493
|
+
# Update operation stats
|
|
1494
|
+
operation_stats.completed_at = time.time()
|
|
1495
|
+
operation_stats.success = True
|
|
1496
|
+
operation_stats.stats = {
|
|
1497
|
+
"node_count": graph.node_count,
|
|
1498
|
+
"relationship_count": graph.relationship_count,
|
|
1499
|
+
"language_stats": graph.language_stats,
|
|
1500
|
+
"build_time_ms": (time.time() - start_time) * 1000,
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
# Update status to READY
|
|
1504
|
+
await self._update_graph_status(graph_id, GraphStatus.READY, None)
|
|
1505
|
+
|
|
1506
|
+
# Store operation stats
|
|
1507
|
+
await self._store_operation_stats(graph_id, operation_stats)
|
|
1508
|
+
|
|
1509
|
+
return graph
|
|
1510
|
+
|
|
1511
|
+
except Exception as e:
|
|
1512
|
+
# Update operation stats with error
|
|
1513
|
+
operation_stats.completed_at = time.time()
|
|
1514
|
+
operation_stats.success = False
|
|
1515
|
+
operation_stats.error = str(e)
|
|
1516
|
+
operation_stats.stats["build_time_ms"] = (time.time() - start_time) * 1000
|
|
1517
|
+
|
|
1518
|
+
# Update status to ERROR
|
|
1519
|
+
await self._update_graph_status(graph_id, GraphStatus.ERROR, None)
|
|
1520
|
+
|
|
1521
|
+
# Store operation stats
|
|
1522
|
+
await self._store_operation_stats(graph_id, operation_stats)
|
|
1523
|
+
|
|
1524
|
+
logger.error(f"Build failed for graph {graph_id}: {e}")
|
|
1525
|
+
raise
|
|
1526
|
+
finally:
|
|
1527
|
+
# Clean up operation tracking
|
|
1528
|
+
if graph_id in self._operations:
|
|
1529
|
+
del self._operations[graph_id]
|
|
1530
|
+
|
|
1531
|
+
async def _do_build_graph(
|
|
1532
|
+
self,
|
|
1533
|
+
graph_id: str,
|
|
1534
|
+
repo_path: str,
|
|
1535
|
+
name: str,
|
|
1536
|
+
languages: list[str] | None,
|
|
1537
|
+
exclude_patterns: list[str] | None,
|
|
1538
|
+
indexed_from_cwd: str | None = None,
|
|
1539
|
+
) -> CodebaseGraph:
|
|
1540
|
+
"""Execute the actual graph building logic (extracted from original build_graph)."""
|
|
1541
|
+
# The database and Project node already exist from _initialize_graph_metadata
|
|
1542
|
+
|
|
1543
|
+
# Get existing connection
|
|
1544
|
+
lock = await self._get_lock()
|
|
1545
|
+
async with lock:
|
|
1546
|
+
if graph_id not in self._connections:
|
|
1547
|
+
raise RuntimeError(f"Connection not found for graph {graph_id}")
|
|
1548
|
+
conn = self._connections[graph_id]
|
|
1549
|
+
|
|
1550
|
+
# Import the builder from local core module
|
|
1551
|
+
|
|
1552
|
+
# Build the graph
|
|
1553
|
+
logger.info(
|
|
1554
|
+
f"Building code graph - graph_id: {graph_id}, repo_path: {repo_path}"
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
# Build the graph using our existing connection
|
|
1558
|
+
def _build_graph() -> None:
|
|
1559
|
+
from shotgun.codebase.core import Ingestor, SimpleGraphBuilder
|
|
1560
|
+
from shotgun.codebase.core.parser_loader import load_parsers
|
|
1561
|
+
|
|
1562
|
+
# Load parsers for requested languages
|
|
1563
|
+
parsers, queries = load_parsers()
|
|
1564
|
+
|
|
1565
|
+
# Log available parsers before filtering
|
|
1566
|
+
logger.info(f"Available parsers: {list(parsers.keys())}")
|
|
1567
|
+
|
|
1568
|
+
# Filter parsers to requested languages if specified
|
|
1569
|
+
if languages:
|
|
1570
|
+
parsers = {
|
|
1571
|
+
lang: parser
|
|
1572
|
+
for lang, parser in parsers.items()
|
|
1573
|
+
if lang in languages
|
|
1574
|
+
}
|
|
1575
|
+
queries = {
|
|
1576
|
+
lang: query for lang, query in queries.items() if lang in languages
|
|
1577
|
+
}
|
|
1578
|
+
logger.info(
|
|
1579
|
+
f"Filtered parsers to requested languages {languages}: {list(parsers.keys())}"
|
|
1580
|
+
)
|
|
1581
|
+
else:
|
|
1582
|
+
logger.info(f"Using all available parsers: {list(parsers.keys())}")
|
|
1583
|
+
|
|
1584
|
+
# Create ingestor with existing connection
|
|
1585
|
+
ingestor = Ingestor(conn)
|
|
1586
|
+
|
|
1587
|
+
# Create builder
|
|
1588
|
+
builder = SimpleGraphBuilder(
|
|
1589
|
+
ingestor=ingestor,
|
|
1590
|
+
repo_path=Path(repo_path),
|
|
1591
|
+
parsers=parsers,
|
|
1592
|
+
queries=queries,
|
|
1593
|
+
exclude_patterns=exclude_patterns,
|
|
1594
|
+
)
|
|
1595
|
+
|
|
1596
|
+
# Build the graph
|
|
1597
|
+
builder.run()
|
|
1598
|
+
|
|
1599
|
+
# Run build in thread pool
|
|
1600
|
+
await anyio.to_thread.run_sync(_build_graph)
|
|
1601
|
+
|
|
1602
|
+
# Now print detailed statistics (will include Project: 1)
|
|
1603
|
+
await self._print_graph_statistics(graph_id)
|
|
1604
|
+
|
|
1605
|
+
# Get the updated graph metadata
|
|
1606
|
+
graph = await self.get_graph(graph_id)
|
|
1607
|
+
if not graph:
|
|
1608
|
+
raise RuntimeError(f"Failed to retrieve graph {graph_id} after build")
|
|
1609
|
+
|
|
1610
|
+
return graph
|
|
1611
|
+
|
|
1612
|
+
async def build_graph_async(
|
|
1613
|
+
self,
|
|
1614
|
+
repo_path: str,
|
|
1615
|
+
name: str | None = None,
|
|
1616
|
+
languages: list[str] | None = None,
|
|
1617
|
+
exclude_patterns: list[str] | None = None,
|
|
1618
|
+
indexed_from_cwd: str | None = None,
|
|
1619
|
+
) -> str:
|
|
1620
|
+
"""Start building a new code knowledge graph asynchronously.
|
|
1621
|
+
|
|
1622
|
+
Returns:
|
|
1623
|
+
Graph ID of the graph being built
|
|
1624
|
+
"""
|
|
1625
|
+
repo_path = str(Path(repo_path).resolve())
|
|
1626
|
+
graph_id = self._generate_graph_id(repo_path)
|
|
1627
|
+
|
|
1628
|
+
# Use repository name as default name
|
|
1629
|
+
if not name:
|
|
1630
|
+
name = Path(repo_path).name
|
|
1631
|
+
|
|
1632
|
+
# Check if graph already exists
|
|
1633
|
+
graph_path = self.storage_dir / f"{graph_id}.kuzu"
|
|
1634
|
+
if graph_path.exists():
|
|
1635
|
+
raise ValueError(
|
|
1636
|
+
f"Graph already exists for {repo_path}. Use update_graph() to modify it."
|
|
1637
|
+
)
|
|
1638
|
+
|
|
1639
|
+
# Check if already building
|
|
1640
|
+
if graph_id in self._operations:
|
|
1641
|
+
raise ValueError(f"Graph {graph_id} is already being built.")
|
|
1642
|
+
|
|
1643
|
+
# Create the database and initial Project node immediately
|
|
1644
|
+
# This allows status tracking during the build
|
|
1645
|
+
await self._initialize_graph_metadata(
|
|
1646
|
+
graph_id=graph_id,
|
|
1647
|
+
repo_path=repo_path,
|
|
1648
|
+
name=name,
|
|
1649
|
+
languages=languages,
|
|
1650
|
+
exclude_patterns=exclude_patterns,
|
|
1651
|
+
indexed_from_cwd=indexed_from_cwd,
|
|
1652
|
+
)
|
|
1653
|
+
|
|
1654
|
+
# Start the build operation in background
|
|
1655
|
+
task = asyncio.create_task(
|
|
1656
|
+
self._build_graph_impl(
|
|
1657
|
+
graph_id, repo_path, name, languages, exclude_patterns, indexed_from_cwd
|
|
1658
|
+
)
|
|
1659
|
+
)
|
|
1660
|
+
self._operations[graph_id] = task
|
|
1661
|
+
|
|
1662
|
+
return graph_id
|