shotgun-sh 0.1.9__py3-none-any.whl → 0.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of shotgun-sh might be problematic. Click here for more details.

Files changed (150) hide show
  1. shotgun/agents/agent_manager.py +761 -52
  2. shotgun/agents/common.py +80 -75
  3. shotgun/agents/config/constants.py +21 -10
  4. shotgun/agents/config/manager.py +322 -97
  5. shotgun/agents/config/models.py +114 -84
  6. shotgun/agents/config/provider.py +232 -88
  7. shotgun/agents/context_analyzer/__init__.py +28 -0
  8. shotgun/agents/context_analyzer/analyzer.py +471 -0
  9. shotgun/agents/context_analyzer/constants.py +9 -0
  10. shotgun/agents/context_analyzer/formatter.py +115 -0
  11. shotgun/agents/context_analyzer/models.py +212 -0
  12. shotgun/agents/conversation_history.py +125 -2
  13. shotgun/agents/conversation_manager.py +57 -19
  14. shotgun/agents/export.py +6 -7
  15. shotgun/agents/history/compaction.py +23 -3
  16. shotgun/agents/history/context_extraction.py +93 -6
  17. shotgun/agents/history/history_processors.py +179 -11
  18. shotgun/agents/history/token_counting/__init__.py +31 -0
  19. shotgun/agents/history/token_counting/anthropic.py +127 -0
  20. shotgun/agents/history/token_counting/base.py +78 -0
  21. shotgun/agents/history/token_counting/openai.py +90 -0
  22. shotgun/agents/history/token_counting/sentencepiece_counter.py +127 -0
  23. shotgun/agents/history/token_counting/tokenizer_cache.py +92 -0
  24. shotgun/agents/history/token_counting/utils.py +144 -0
  25. shotgun/agents/history/token_estimation.py +12 -12
  26. shotgun/agents/llm.py +62 -0
  27. shotgun/agents/models.py +59 -4
  28. shotgun/agents/plan.py +6 -7
  29. shotgun/agents/research.py +7 -8
  30. shotgun/agents/specify.py +6 -7
  31. shotgun/agents/tasks.py +6 -7
  32. shotgun/agents/tools/__init__.py +0 -2
  33. shotgun/agents/tools/codebase/codebase_shell.py +6 -0
  34. shotgun/agents/tools/codebase/directory_lister.py +6 -0
  35. shotgun/agents/tools/codebase/file_read.py +11 -2
  36. shotgun/agents/tools/codebase/query_graph.py +6 -0
  37. shotgun/agents/tools/codebase/retrieve_code.py +6 -0
  38. shotgun/agents/tools/file_management.py +82 -16
  39. shotgun/agents/tools/registry.py +217 -0
  40. shotgun/agents/tools/web_search/__init__.py +55 -16
  41. shotgun/agents/tools/web_search/anthropic.py +76 -51
  42. shotgun/agents/tools/web_search/gemini.py +50 -27
  43. shotgun/agents/tools/web_search/openai.py +26 -17
  44. shotgun/agents/tools/web_search/utils.py +2 -2
  45. shotgun/agents/usage_manager.py +164 -0
  46. shotgun/api_endpoints.py +15 -0
  47. shotgun/cli/clear.py +53 -0
  48. shotgun/cli/codebase/commands.py +71 -2
  49. shotgun/cli/compact.py +186 -0
  50. shotgun/cli/config.py +41 -67
  51. shotgun/cli/context.py +111 -0
  52. shotgun/cli/export.py +1 -1
  53. shotgun/cli/feedback.py +50 -0
  54. shotgun/cli/models.py +3 -2
  55. shotgun/cli/plan.py +1 -1
  56. shotgun/cli/research.py +1 -1
  57. shotgun/cli/specify.py +1 -1
  58. shotgun/cli/tasks.py +1 -1
  59. shotgun/cli/update.py +18 -5
  60. shotgun/codebase/core/change_detector.py +5 -3
  61. shotgun/codebase/core/code_retrieval.py +4 -2
  62. shotgun/codebase/core/ingestor.py +169 -19
  63. shotgun/codebase/core/manager.py +177 -13
  64. shotgun/codebase/core/nl_query.py +1 -1
  65. shotgun/codebase/models.py +28 -3
  66. shotgun/codebase/service.py +14 -2
  67. shotgun/exceptions.py +32 -0
  68. shotgun/llm_proxy/__init__.py +19 -0
  69. shotgun/llm_proxy/clients.py +44 -0
  70. shotgun/llm_proxy/constants.py +15 -0
  71. shotgun/logging_config.py +18 -27
  72. shotgun/main.py +91 -4
  73. shotgun/posthog_telemetry.py +87 -40
  74. shotgun/prompts/agents/export.j2 +18 -1
  75. shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +5 -1
  76. shotgun/prompts/agents/partials/interactive_mode.j2 +24 -7
  77. shotgun/prompts/agents/plan.j2 +1 -1
  78. shotgun/prompts/agents/research.j2 +1 -1
  79. shotgun/prompts/agents/specify.j2 +270 -3
  80. shotgun/prompts/agents/state/system_state.j2 +4 -0
  81. shotgun/prompts/agents/tasks.j2 +1 -1
  82. shotgun/prompts/codebase/partials/cypher_rules.j2 +13 -0
  83. shotgun/prompts/loader.py +2 -2
  84. shotgun/prompts/tools/web_search.j2 +14 -0
  85. shotgun/sdk/codebase.py +60 -2
  86. shotgun/sentry_telemetry.py +28 -21
  87. shotgun/settings.py +238 -0
  88. shotgun/shotgun_web/__init__.py +19 -0
  89. shotgun/shotgun_web/client.py +138 -0
  90. shotgun/shotgun_web/constants.py +21 -0
  91. shotgun/shotgun_web/models.py +47 -0
  92. shotgun/telemetry.py +24 -36
  93. shotgun/tui/app.py +275 -23
  94. shotgun/tui/commands/__init__.py +1 -1
  95. shotgun/tui/components/context_indicator.py +179 -0
  96. shotgun/tui/components/mode_indicator.py +70 -0
  97. shotgun/tui/components/status_bar.py +48 -0
  98. shotgun/tui/components/vertical_tail.py +6 -0
  99. shotgun/tui/containers.py +91 -0
  100. shotgun/tui/dependencies.py +39 -0
  101. shotgun/tui/filtered_codebase_service.py +46 -0
  102. shotgun/tui/protocols.py +45 -0
  103. shotgun/tui/screens/chat/__init__.py +5 -0
  104. shotgun/tui/screens/chat/chat.tcss +54 -0
  105. shotgun/tui/screens/chat/chat_screen.py +1234 -0
  106. shotgun/tui/screens/chat/codebase_index_prompt_screen.py +64 -0
  107. shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
  108. shotgun/tui/screens/chat/help_text.py +40 -0
  109. shotgun/tui/screens/chat/prompt_history.py +48 -0
  110. shotgun/tui/screens/chat.tcss +11 -0
  111. shotgun/tui/screens/chat_screen/command_providers.py +226 -11
  112. shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
  113. shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
  114. shotgun/tui/screens/chat_screen/history/chat_history.py +116 -0
  115. shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
  116. shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
  117. shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
  118. shotgun/tui/screens/confirmation_dialog.py +151 -0
  119. shotgun/tui/screens/feedback.py +193 -0
  120. shotgun/tui/screens/github_issue.py +102 -0
  121. shotgun/tui/screens/model_picker.py +352 -0
  122. shotgun/tui/screens/onboarding.py +431 -0
  123. shotgun/tui/screens/pipx_migration.py +153 -0
  124. shotgun/tui/screens/provider_config.py +156 -39
  125. shotgun/tui/screens/shotgun_auth.py +295 -0
  126. shotgun/tui/screens/welcome.py +198 -0
  127. shotgun/tui/services/__init__.py +5 -0
  128. shotgun/tui/services/conversation_service.py +184 -0
  129. shotgun/tui/state/__init__.py +7 -0
  130. shotgun/tui/state/processing_state.py +185 -0
  131. shotgun/tui/utils/mode_progress.py +14 -7
  132. shotgun/tui/widgets/__init__.py +5 -0
  133. shotgun/tui/widgets/widget_coordinator.py +262 -0
  134. shotgun/utils/datetime_utils.py +77 -0
  135. shotgun/utils/env_utils.py +13 -0
  136. shotgun/utils/file_system_utils.py +22 -2
  137. shotgun/utils/marketing.py +110 -0
  138. shotgun/utils/source_detection.py +16 -0
  139. shotgun/utils/update_checker.py +73 -21
  140. shotgun_sh-0.2.11.dist-info/METADATA +130 -0
  141. shotgun_sh-0.2.11.dist-info/RECORD +194 -0
  142. {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/entry_points.txt +1 -0
  143. {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/licenses/LICENSE +1 -1
  144. shotgun/agents/history/token_counting.py +0 -429
  145. shotgun/agents/tools/user_interaction.py +0 -37
  146. shotgun/tui/screens/chat.py +0 -818
  147. shotgun/tui/screens/chat_screen/history.py +0 -222
  148. shotgun_sh-0.1.9.dist-info/METADATA +0 -466
  149. shotgun_sh-0.1.9.dist-info/RECORD +0 -131
  150. {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/WHEEL +0 -0
@@ -1,5 +1,6 @@
1
1
  """Kuzu graph ingestor for building code knowledge graphs."""
2
2
 
3
+ import asyncio
3
4
  import hashlib
4
5
  import os
5
6
  import time
@@ -8,6 +9,7 @@ from collections import defaultdict
8
9
  from pathlib import Path
9
10
  from typing import Any
10
11
 
12
+ import aiofiles
11
13
  import kuzu
12
14
  from tree_sitter import Node, Parser, QueryCursor
13
15
 
@@ -18,15 +20,12 @@ from shotgun.logging_config import get_logger
18
20
  logger = get_logger(__name__)
19
21
 
20
22
 
21
- # Default ignore patterns
22
- IGNORE_PATTERNS = {
23
+ # Directories that should never be traversed during indexing
24
+ BASE_IGNORE_DIRECTORIES = {
23
25
  ".git",
24
26
  "venv",
25
27
  ".venv",
26
28
  "__pycache__",
27
- "node_modules",
28
- "build",
29
- "dist",
30
29
  ".eggs",
31
30
  ".pytest_cache",
32
31
  ".mypy_cache",
@@ -36,6 +35,46 @@ IGNORE_PATTERNS = {
36
35
  ".vscode",
37
36
  }
38
37
 
38
+ # Well-known build output directories to skip when determining source files
39
+ BUILD_ARTIFACT_DIRECTORIES = {
40
+ "node_modules",
41
+ ".next",
42
+ ".nuxt",
43
+ ".vite",
44
+ ".yarn",
45
+ ".svelte-kit",
46
+ ".output",
47
+ ".turbo",
48
+ ".parcel-cache",
49
+ ".vercel",
50
+ ".serverless",
51
+ "build",
52
+ "dist",
53
+ "out",
54
+ "tmp",
55
+ "coverage",
56
+ }
57
+
58
+ # Default ignore patterns combines base directories and build artifacts
59
+ IGNORE_PATTERNS = BASE_IGNORE_DIRECTORIES | BUILD_ARTIFACT_DIRECTORIES
60
+
61
+ # Directory prefixes that should always be ignored
62
+ IGNORED_DIRECTORY_PREFIXES = (".",)
63
+
64
+
65
+ def should_ignore_directory(name: str, ignore_patterns: set[str] | None = None) -> bool:
66
+ """Return True if the directory name should be ignored."""
67
+ patterns = IGNORE_PATTERNS if ignore_patterns is None else ignore_patterns
68
+ if name in patterns:
69
+ return True
70
+ return name.startswith(IGNORED_DIRECTORY_PREFIXES)
71
+
72
+
73
+ def is_path_ignored(path: Path, ignore_patterns: set[str] | None = None) -> bool:
74
+ """Return True if any part of the path should be ignored."""
75
+ patterns = IGNORE_PATTERNS if ignore_patterns is None else ignore_patterns
76
+ return any(should_ignore_directory(part, patterns) for part in path.parts)
77
+
39
78
 
40
79
  class Ingestor:
41
80
  """Handles all communication and ingestion with the Kuzu database."""
@@ -535,6 +574,7 @@ class SimpleGraphBuilder:
535
574
  parsers: dict[str, Parser],
536
575
  queries: dict[str, Any],
537
576
  exclude_patterns: list[str] | None = None,
577
+ progress_callback: Any | None = None,
538
578
  ):
539
579
  self.ingestor = ingestor
540
580
  self.repo_path = repo_path
@@ -544,6 +584,7 @@ class SimpleGraphBuilder:
544
584
  self.ignore_dirs = IGNORE_PATTERNS
545
585
  if exclude_patterns:
546
586
  self.ignore_dirs = self.ignore_dirs.union(set(exclude_patterns))
587
+ self.progress_callback = progress_callback
547
588
 
548
589
  # Caches
549
590
  self.structural_elements: dict[Path, str | None] = {}
@@ -552,7 +593,35 @@ class SimpleGraphBuilder:
552
593
  self.simple_name_lookup: dict[str, set[str]] = defaultdict(set)
553
594
  self.class_inheritance: dict[str, list[str]] = {} # class_qn -> [parent_qns]
554
595
 
555
- def run(self) -> None:
596
+ def _report_progress(
597
+ self,
598
+ phase: str,
599
+ phase_name: str,
600
+ current: int,
601
+ total: int | None = None,
602
+ phase_complete: bool = False,
603
+ ) -> None:
604
+ """Report progress via callback if available."""
605
+ if not self.progress_callback:
606
+ return
607
+
608
+ try:
609
+ # Import here to avoid circular dependency
610
+ from shotgun.codebase.models import IndexProgress, ProgressPhase
611
+
612
+ progress = IndexProgress(
613
+ phase=ProgressPhase(phase),
614
+ phase_name=phase_name,
615
+ current=current,
616
+ total=total,
617
+ phase_complete=phase_complete,
618
+ )
619
+ self.progress_callback(progress)
620
+ except Exception as e:
621
+ # Don't let progress callback errors crash the build
622
+ logger.debug(f"Progress callback error: {e}")
623
+
624
+ async def run(self) -> None:
556
625
  """Run the three-pass graph building process."""
557
626
  logger.info(f"Building graph for project: {self.project_name}")
558
627
 
@@ -562,7 +631,7 @@ class SimpleGraphBuilder:
562
631
 
563
632
  # Pass 2: Definitions
564
633
  logger.info("Pass 2: Processing files and extracting definitions...")
565
- self._process_files()
634
+ await self._process_files()
566
635
 
567
636
  # Pass 3: Relationships
568
637
  logger.info("Pass 3: Processing relationships (calls, imports)...")
@@ -575,8 +644,11 @@ class SimpleGraphBuilder:
575
644
 
576
645
  def _identify_structure(self) -> None:
577
646
  """First pass: Walk directory to find packages and folders."""
647
+ dir_count = 0
578
648
  for root_str, dirs, _ in os.walk(self.repo_path, topdown=True):
579
- dirs[:] = [d for d in dirs if d not in self.ignore_dirs]
649
+ dirs[:] = [
650
+ d for d in dirs if not should_ignore_directory(d, self.ignore_dirs)
651
+ ]
580
652
  root = Path(root_str)
581
653
  relative_root = root.relative_to(self.repo_path)
582
654
 
@@ -584,6 +656,13 @@ class SimpleGraphBuilder:
584
656
  if root == self.repo_path:
585
657
  continue
586
658
 
659
+ dir_count += 1
660
+ # Report progress every 10 directories
661
+ if dir_count % 10 == 0:
662
+ self._report_progress(
663
+ "structure", "Identifying packages and folders", dir_count
664
+ )
665
+
587
666
  parent_rel_path = relative_root.parent
588
667
  parent_container_qn = self.structural_elements.get(parent_rel_path)
589
668
 
@@ -686,14 +765,40 @@ class SimpleGraphBuilder:
686
765
 
687
766
  self.structural_elements[relative_root] = None
688
767
 
689
- def _process_files(self) -> None:
768
+ # Report phase completion
769
+ self._report_progress(
770
+ "structure",
771
+ "Identifying packages and folders",
772
+ dir_count,
773
+ phase_complete=True,
774
+ )
775
+
776
+ async def _process_files(self) -> None:
690
777
  """Second pass: Process files and extract definitions."""
778
+ # First pass: Count total files
779
+ total_files = 0
780
+ for root_str, _, files in os.walk(self.repo_path):
781
+ root = Path(root_str)
782
+
783
+ # Skip ignored directories
784
+ if is_path_ignored(root, self.ignore_dirs):
785
+ continue
786
+
787
+ for filename in files:
788
+ filepath = root / filename
789
+ ext = filepath.suffix
790
+ lang_config = get_language_config(ext)
791
+
792
+ if lang_config and lang_config.name in self.parsers:
793
+ total_files += 1
794
+
795
+ # Second pass: Process files with progress reporting
691
796
  file_count = 0
692
797
  for root_str, _, files in os.walk(self.repo_path):
693
798
  root = Path(root_str)
694
799
 
695
800
  # Skip ignored directories
696
- if any(part in self.ignore_dirs for part in root.parts):
801
+ if is_path_ignored(root, self.ignore_dirs):
697
802
  continue
698
803
 
699
804
  for filename in files:
@@ -704,15 +809,32 @@ class SimpleGraphBuilder:
704
809
  lang_config = get_language_config(ext)
705
810
 
706
811
  if lang_config and lang_config.name in self.parsers:
707
- self._process_single_file(filepath, lang_config.name)
812
+ await self._process_single_file(filepath, lang_config.name)
708
813
  file_count += 1
709
814
 
815
+ # Report progress after each file
816
+ self._report_progress(
817
+ "definitions",
818
+ "Processing files and extracting definitions",
819
+ file_count,
820
+ total_files,
821
+ )
822
+
710
823
  if file_count % 100 == 0:
711
- logger.info(f" Processed {file_count} files...")
824
+ logger.info(f" Processed {file_count}/{total_files} files...")
825
+
826
+ logger.info(f" Total files processed: {file_count}/{total_files}")
712
827
 
713
- logger.info(f" Total files processed: {file_count}")
828
+ # Report phase completion
829
+ self._report_progress(
830
+ "definitions",
831
+ "Processing files and extracting definitions",
832
+ file_count,
833
+ total_files,
834
+ phase_complete=True,
835
+ )
714
836
 
715
- def _process_single_file(self, filepath: Path, language: str) -> None:
837
+ async def _process_single_file(self, filepath: Path, language: str) -> None:
716
838
  """Process a single file."""
717
839
  relative_path = filepath.relative_to(self.repo_path)
718
840
  relative_path_str = str(relative_path).replace(os.sep, "/")
@@ -753,8 +875,8 @@ class SimpleGraphBuilder:
753
875
 
754
876
  # Parse file
755
877
  try:
756
- with open(filepath, "rb") as f:
757
- content = f.read()
878
+ async with aiofiles.open(filepath, "rb") as f:
879
+ content = await f.read()
758
880
 
759
881
  parser = self.parsers[language]
760
882
  tree = parser.parse(content)
@@ -1143,7 +1265,8 @@ class SimpleGraphBuilder:
1143
1265
  self._process_inheritance()
1144
1266
 
1145
1267
  # Then process function calls
1146
- logger.info(f"Processing function calls for {len(self.ast_cache)} files...")
1268
+ total_files = len(self.ast_cache)
1269
+ logger.info(f"Processing function calls for {total_files} files...")
1147
1270
  logger.info(f"Function registry has {len(self.function_registry)} entries")
1148
1271
  logger.info(
1149
1272
  f"Simple name lookup has {len(self.simple_name_lookup)} unique names"
@@ -1157,10 +1280,29 @@ class SimpleGraphBuilder:
1157
1280
  f" Example: '{name}' -> {list(self.simple_name_lookup[name])[:3]}"
1158
1281
  )
1159
1282
 
1283
+ file_count = 0
1160
1284
  for filepath, (root_node, language) in self.ast_cache.items():
1161
1285
  self._process_calls(filepath, root_node, language)
1162
1286
  # NOTE: Add import processing. wtf does this mean?
1163
1287
 
1288
+ file_count += 1
1289
+ # Report progress after each file
1290
+ self._report_progress(
1291
+ "relationships",
1292
+ "Processing relationships (calls, imports)",
1293
+ file_count,
1294
+ total_files,
1295
+ )
1296
+
1297
+ # Report phase completion
1298
+ self._report_progress(
1299
+ "relationships",
1300
+ "Processing relationships (calls, imports)",
1301
+ file_count,
1302
+ total_files,
1303
+ phase_complete=True,
1304
+ )
1305
+
1164
1306
  def _process_inheritance(self) -> None:
1165
1307
  """Process inheritance relationships between classes."""
1166
1308
  logger.info("Processing inheritance relationships...")
@@ -1444,6 +1586,7 @@ class CodebaseIngestor:
1444
1586
  db_path: str,
1445
1587
  project_name: str | None = None,
1446
1588
  exclude_patterns: list[str] | None = None,
1589
+ progress_callback: Any | None = None,
1447
1590
  ):
1448
1591
  """Initialize the ingestor.
1449
1592
 
@@ -1451,10 +1594,12 @@ class CodebaseIngestor:
1451
1594
  db_path: Path to Kuzu database
1452
1595
  project_name: Optional project name
1453
1596
  exclude_patterns: Patterns to exclude from processing
1597
+ progress_callback: Optional callback for progress reporting
1454
1598
  """
1455
1599
  self.db_path = Path(db_path)
1456
1600
  self.project_name = project_name
1457
1601
  self.exclude_patterns = exclude_patterns or []
1602
+ self.progress_callback = progress_callback
1458
1603
 
1459
1604
  def build_graph_from_directory(self, repo_path: str) -> None:
1460
1605
  """Build a code knowledge graph from a directory.
@@ -1484,11 +1629,16 @@ class CodebaseIngestor:
1484
1629
 
1485
1630
  # Build graph
1486
1631
  builder = SimpleGraphBuilder(
1487
- ingestor, repo_path_obj, parsers, queries, self.exclude_patterns
1632
+ ingestor,
1633
+ repo_path_obj,
1634
+ parsers,
1635
+ queries,
1636
+ self.exclude_patterns,
1637
+ self.progress_callback,
1488
1638
  )
1489
1639
  if self.project_name:
1490
1640
  builder.project_name = self.project_name
1491
- builder.run()
1641
+ asyncio.run(builder.run())
1492
1642
 
1493
1643
  logger.info(f"Graph successfully created at: {self.db_path}")
1494
1644
 
@@ -51,9 +51,13 @@ class CodebaseFileHandler(FileSystemEventHandler):
51
51
  self.pending_changes: list[FileChange] = []
52
52
  self._lock = anyio.Lock()
53
53
  # Import default ignore patterns from ingestor
54
- from shotgun.codebase.core.ingestor import IGNORE_PATTERNS
54
+ from shotgun.codebase.core.ingestor import (
55
+ IGNORE_PATTERNS,
56
+ should_ignore_directory,
57
+ )
55
58
 
56
59
  self.ignore_patterns = ignore_patterns or IGNORE_PATTERNS
60
+ self._should_ignore_directory = should_ignore_directory
57
61
 
58
62
  def on_any_event(self, event: FileSystemEvent) -> None:
59
63
  """Handle any file system event."""
@@ -71,7 +75,7 @@ class CodebaseFileHandler(FileSystemEventHandler):
71
75
 
72
76
  # Check if any parent directory should be ignored
73
77
  for parent in path.parents:
74
- if parent.name in self.ignore_patterns:
78
+ if self._should_ignore_directory(parent.name, self.ignore_patterns):
75
79
  logger.debug(
76
80
  f"Ignoring file in ignored directory: {parent.name} - path: {src_path_str}"
77
81
  )
@@ -106,7 +110,7 @@ class CodebaseFileHandler(FileSystemEventHandler):
106
110
  )
107
111
  dest_path = Path(dest_path_str)
108
112
  for parent in dest_path.parents:
109
- if parent.name in self.ignore_patterns:
113
+ if self._should_ignore_directory(parent.name, self.ignore_patterns):
110
114
  logger.debug(
111
115
  f"Ignoring move to ignored directory: {parent.name} - dest_path: {dest_path_str}"
112
116
  )
@@ -329,6 +333,7 @@ class CodebaseGraphManager:
329
333
  languages: list[str] | None = None,
330
334
  exclude_patterns: list[str] | None = None,
331
335
  indexed_from_cwd: str | None = None,
336
+ progress_callback: Any | None = None,
332
337
  ) -> CodebaseGraph:
333
338
  """Build a new code knowledge graph.
334
339
 
@@ -337,6 +342,7 @@ class CodebaseGraphManager:
337
342
  name: Optional human-readable name
338
343
  languages: Languages to parse (default: all supported)
339
344
  exclude_patterns: Patterns to exclude
345
+ progress_callback: Optional callback for progress reporting
340
346
 
341
347
  Returns:
342
348
  Created graph metadata
@@ -353,7 +359,28 @@ class CodebaseGraphManager:
353
359
 
354
360
  # Check if graph already exists
355
361
  if graph_path.exists():
356
- raise CodebaseAlreadyIndexedError(repo_path)
362
+ # Verify it's not corrupted by checking if we can load the Project node
363
+ existing_graph = await self.get_graph(graph_id)
364
+ if existing_graph:
365
+ # Valid existing graph
366
+ raise CodebaseAlreadyIndexedError(repo_path)
367
+ else:
368
+ # Corrupted database - remove and re-index
369
+ logger.warning(
370
+ f"Found corrupted database at {graph_path}, removing for re-indexing..."
371
+ )
372
+ import shutil
373
+
374
+ # Handle both files and directories (kuzu v0.11.2+ uses files)
375
+ if graph_path.is_file():
376
+ graph_path.unlink() # Delete file
377
+ # Also delete WAL file if it exists
378
+ wal_path = graph_path.with_suffix(graph_path.suffix + ".wal")
379
+ if wal_path.exists():
380
+ wal_path.unlink()
381
+ logger.debug(f"Deleted WAL file: {wal_path}")
382
+ else:
383
+ shutil.rmtree(graph_path) # Delete directory
357
384
 
358
385
  # Import the builder from local core module
359
386
  from shotgun.codebase.core import CodebaseIngestor
@@ -379,6 +406,7 @@ class CodebaseGraphManager:
379
406
  db_path=str(graph_path),
380
407
  project_name=name,
381
408
  exclude_patterns=exclude_patterns or [],
409
+ progress_callback=progress_callback,
382
410
  )
383
411
 
384
412
  # Run build in thread pool
@@ -741,7 +769,7 @@ class CodebaseGraphManager:
741
769
 
742
770
  lang_config = get_language_config(full_path.suffix)
743
771
  if lang_config and lang_config.name in parsers:
744
- builder._process_single_file(full_path, lang_config.name)
772
+ await builder._process_single_file(full_path, lang_config.name)
745
773
  stats["nodes_modified"] += 1 # Approximate
746
774
 
747
775
  # Process additions
@@ -756,7 +784,7 @@ class CodebaseGraphManager:
756
784
 
757
785
  lang_config = get_language_config(full_path.suffix)
758
786
  if lang_config and lang_config.name in parsers:
759
- builder._process_single_file(full_path, lang_config.name)
787
+ await builder._process_single_file(full_path, lang_config.name)
760
788
  stats["nodes_added"] += 1 # Approximate
761
789
 
762
790
  # Flush all pending operations
@@ -1193,6 +1221,121 @@ class CodebaseGraphManager:
1193
1221
  )
1194
1222
  return None
1195
1223
 
1224
+ async def cleanup_corrupted_databases(self) -> list[str]:
1225
+ """Detect and remove corrupted Kuzu databases.
1226
+
1227
+ This method iterates through all .kuzu files in the storage directory,
1228
+ attempts to open them, and removes any that are corrupted or unreadable.
1229
+
1230
+ Returns:
1231
+ List of graph_ids that were removed due to corruption
1232
+ """
1233
+ import shutil
1234
+
1235
+ removed_graphs = []
1236
+
1237
+ # Find all .kuzu databases (files in v0.11.2, directories in newer versions)
1238
+ for path in self.storage_dir.glob("*.kuzu"):
1239
+ graph_id = path.stem
1240
+
1241
+ # Try to open and validate the database
1242
+ try:
1243
+ # Try to open the database with a timeout to prevent hanging
1244
+ async def try_open_database(
1245
+ gid: str = graph_id, db_path: Path = path
1246
+ ) -> bool:
1247
+ lock = await self._get_lock()
1248
+ async with lock:
1249
+ # Close existing connections if any
1250
+ if gid in self._connections:
1251
+ try:
1252
+ self._connections[gid].close()
1253
+ except Exception as e:
1254
+ logger.debug(
1255
+ f"Failed to close connection for {gid}: {e}"
1256
+ )
1257
+ del self._connections[gid]
1258
+ if gid in self._databases:
1259
+ try:
1260
+ self._databases[gid].close()
1261
+ except Exception as e:
1262
+ logger.debug(f"Failed to close database for {gid}: {e}")
1263
+ del self._databases[gid]
1264
+
1265
+ # Try to open the database
1266
+ def _open_and_query(g: str = gid, p: Path = db_path) -> bool:
1267
+ db = kuzu.Database(str(p))
1268
+ conn = kuzu.Connection(db)
1269
+ try:
1270
+ result = conn.execute(
1271
+ "MATCH (p:Project {graph_id: $graph_id}) RETURN p",
1272
+ {"graph_id": g},
1273
+ )
1274
+ has_results = (
1275
+ result.has_next()
1276
+ if hasattr(result, "has_next")
1277
+ else False
1278
+ )
1279
+ return has_results
1280
+ finally:
1281
+ conn.close()
1282
+ db.close()
1283
+
1284
+ return await anyio.to_thread.run_sync(_open_and_query)
1285
+
1286
+ # Try to open with 5 second timeout
1287
+ has_project = await asyncio.wait_for(try_open_database(), timeout=5.0)
1288
+
1289
+ if not has_project:
1290
+ # Database exists but has no Project node - consider it corrupted
1291
+ raise ValueError("No Project node found in database")
1292
+
1293
+ except (Exception, asyncio.TimeoutError) as e:
1294
+ # Database is corrupted or timed out - remove it
1295
+ error_type = (
1296
+ "timed out" if isinstance(e, asyncio.TimeoutError) else "corrupted"
1297
+ )
1298
+ logger.warning(
1299
+ f"Detected {error_type} database at {path}, removing it. "
1300
+ f"Error: {str(e) if not isinstance(e, asyncio.TimeoutError) else 'Operation timed out after 5 seconds'}"
1301
+ )
1302
+
1303
+ try:
1304
+ # Clean up any open connections
1305
+ lock = await self._get_lock()
1306
+ async with lock:
1307
+ if graph_id in self._connections:
1308
+ try:
1309
+ self._connections[graph_id].close()
1310
+ except Exception as e:
1311
+ logger.debug(
1312
+ f"Failed to close connection during cleanup for {graph_id}: {e}"
1313
+ )
1314
+ del self._connections[graph_id]
1315
+ if graph_id in self._databases:
1316
+ try:
1317
+ self._databases[graph_id].close()
1318
+ except Exception as e:
1319
+ logger.debug(
1320
+ f"Failed to close database during cleanup for {graph_id}: {e}"
1321
+ )
1322
+ del self._databases[graph_id]
1323
+
1324
+ # Remove the database (could be file or directory)
1325
+ if path.is_dir():
1326
+ await anyio.to_thread.run_sync(shutil.rmtree, path)
1327
+ else:
1328
+ await anyio.to_thread.run_sync(path.unlink)
1329
+ removed_graphs.append(graph_id)
1330
+ logger.info(f"Removed {error_type} database: {graph_id}")
1331
+
1332
+ except Exception as cleanup_error:
1333
+ logger.error(
1334
+ f"Failed to remove corrupted database {graph_id}: {cleanup_error}"
1335
+ )
1336
+
1337
+ return removed_graphs
1338
+
1196
1339
  async def list_graphs(self) -> list[CodebaseGraph]:
1197
1340
  """List all available graphs.
1198
1341
 
@@ -1201,7 +1344,7 @@ class CodebaseGraphManager:
1201
1344
  """
1202
1345
  graphs = []
1203
1346
 
1204
- # Find all .kuzu files
1347
+ # Find all .kuzu database files (Kuzu v0.11.2 creates files, not directories)
1205
1348
  for path in self.storage_dir.glob("*.kuzu"):
1206
1349
  if path.is_file():
1207
1350
  graph_id = path.stem
@@ -1294,6 +1437,8 @@ class CodebaseGraphManager:
1294
1437
  Args:
1295
1438
  graph_id: Graph to delete
1296
1439
  """
1440
+ import shutil
1441
+
1297
1442
  # Stop watcher if running
1298
1443
  if graph_id in self._watchers:
1299
1444
  await self.stop_watcher(graph_id)
@@ -1308,11 +1453,14 @@ class CodebaseGraphManager:
1308
1453
  self._databases[graph_id].close()
1309
1454
  del self._databases[graph_id]
1310
1455
 
1311
- # Delete files
1456
+ # Delete database (files in v0.11.2, directories in newer versions)
1312
1457
  graph_path = self.storage_dir / f"{graph_id}.kuzu"
1313
1458
  if graph_path.exists():
1314
- # Delete the database file
1315
- await anyio.to_thread.run_sync(graph_path.unlink)
1459
+ if graph_path.is_dir():
1460
+ await anyio.to_thread.run_sync(shutil.rmtree, graph_path)
1461
+ else:
1462
+ # File-based database (Kuzu v0.11.2)
1463
+ await anyio.to_thread.run_sync(graph_path.unlink)
1316
1464
 
1317
1465
  # Also delete the WAL file if it exists
1318
1466
  wal_path = self.storage_dir / f"{graph_id}.kuzu.wal"
@@ -1464,6 +1612,7 @@ class CodebaseGraphManager:
1464
1612
  languages: list[str] | None,
1465
1613
  exclude_patterns: list[str] | None,
1466
1614
  indexed_from_cwd: str | None = None,
1615
+ progress_callback: Any | None = None,
1467
1616
  ) -> CodebaseGraph:
1468
1617
  """Internal implementation of graph building (runs in background)."""
1469
1618
  operation_id = str(uuid.uuid4())
@@ -1487,7 +1636,13 @@ class CodebaseGraphManager:
1487
1636
 
1488
1637
  # Do the actual build work
1489
1638
  graph = await self._do_build_graph(
1490
- graph_id, repo_path, name, languages, exclude_patterns, indexed_from_cwd
1639
+ graph_id,
1640
+ repo_path,
1641
+ name,
1642
+ languages,
1643
+ exclude_patterns,
1644
+ indexed_from_cwd,
1645
+ progress_callback,
1491
1646
  )
1492
1647
 
1493
1648
  # Update operation stats
@@ -1536,6 +1691,7 @@ class CodebaseGraphManager:
1536
1691
  languages: list[str] | None,
1537
1692
  exclude_patterns: list[str] | None,
1538
1693
  indexed_from_cwd: str | None = None,
1694
+ progress_callback: Any | None = None,
1539
1695
  ) -> CodebaseGraph:
1540
1696
  """Execute the actual graph building logic (extracted from original build_graph)."""
1541
1697
  # The database and Project node already exist from _initialize_graph_metadata
@@ -1591,10 +1747,11 @@ class CodebaseGraphManager:
1591
1747
  parsers=parsers,
1592
1748
  queries=queries,
1593
1749
  exclude_patterns=exclude_patterns,
1750
+ progress_callback=progress_callback,
1594
1751
  )
1595
1752
 
1596
1753
  # Build the graph
1597
- builder.run()
1754
+ asyncio.run(builder.run())
1598
1755
 
1599
1756
  # Run build in thread pool
1600
1757
  await anyio.to_thread.run_sync(_build_graph)
@@ -1616,6 +1773,7 @@ class CodebaseGraphManager:
1616
1773
  languages: list[str] | None = None,
1617
1774
  exclude_patterns: list[str] | None = None,
1618
1775
  indexed_from_cwd: str | None = None,
1776
+ progress_callback: Any | None = None,
1619
1777
  ) -> str:
1620
1778
  """Start building a new code knowledge graph asynchronously.
1621
1779
 
@@ -1654,7 +1812,13 @@ class CodebaseGraphManager:
1654
1812
  # Start the build operation in background
1655
1813
  task = asyncio.create_task(
1656
1814
  self._build_graph_impl(
1657
- graph_id, repo_path, name, languages, exclude_patterns, indexed_from_cwd
1815
+ graph_id,
1816
+ repo_path,
1817
+ name,
1818
+ languages,
1819
+ exclude_patterns,
1820
+ indexed_from_cwd,
1821
+ progress_callback,
1658
1822
  )
1659
1823
  )
1660
1824
  self._operations[graph_id] = task
@@ -34,7 +34,7 @@ async def llm_cypher_prompt(
34
34
  Returns:
35
35
  CypherGenerationResponse with cypher_query, can_generate flag, and reason if not
36
36
  """
37
- model_config = get_provider_model()
37
+ model_config = await get_provider_model()
38
38
 
39
39
  # Create an agent with structured output for Cypher generation
40
40
  cypher_agent = Agent(