shotgun-sh 0.1.9__py3-none-any.whl → 0.2.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of shotgun-sh might be problematic. Click here for more details.
- shotgun/agents/agent_manager.py +761 -52
- shotgun/agents/common.py +80 -75
- shotgun/agents/config/constants.py +21 -10
- shotgun/agents/config/manager.py +322 -97
- shotgun/agents/config/models.py +114 -84
- shotgun/agents/config/provider.py +232 -88
- shotgun/agents/context_analyzer/__init__.py +28 -0
- shotgun/agents/context_analyzer/analyzer.py +471 -0
- shotgun/agents/context_analyzer/constants.py +9 -0
- shotgun/agents/context_analyzer/formatter.py +115 -0
- shotgun/agents/context_analyzer/models.py +212 -0
- shotgun/agents/conversation_history.py +125 -2
- shotgun/agents/conversation_manager.py +57 -19
- shotgun/agents/export.py +6 -7
- shotgun/agents/history/compaction.py +23 -3
- shotgun/agents/history/context_extraction.py +93 -6
- shotgun/agents/history/history_processors.py +179 -11
- shotgun/agents/history/token_counting/__init__.py +31 -0
- shotgun/agents/history/token_counting/anthropic.py +127 -0
- shotgun/agents/history/token_counting/base.py +78 -0
- shotgun/agents/history/token_counting/openai.py +90 -0
- shotgun/agents/history/token_counting/sentencepiece_counter.py +127 -0
- shotgun/agents/history/token_counting/tokenizer_cache.py +92 -0
- shotgun/agents/history/token_counting/utils.py +144 -0
- shotgun/agents/history/token_estimation.py +12 -12
- shotgun/agents/llm.py +62 -0
- shotgun/agents/models.py +59 -4
- shotgun/agents/plan.py +6 -7
- shotgun/agents/research.py +7 -8
- shotgun/agents/specify.py +6 -7
- shotgun/agents/tasks.py +6 -7
- shotgun/agents/tools/__init__.py +0 -2
- shotgun/agents/tools/codebase/codebase_shell.py +6 -0
- shotgun/agents/tools/codebase/directory_lister.py +6 -0
- shotgun/agents/tools/codebase/file_read.py +11 -2
- shotgun/agents/tools/codebase/query_graph.py +6 -0
- shotgun/agents/tools/codebase/retrieve_code.py +6 -0
- shotgun/agents/tools/file_management.py +82 -16
- shotgun/agents/tools/registry.py +217 -0
- shotgun/agents/tools/web_search/__init__.py +55 -16
- shotgun/agents/tools/web_search/anthropic.py +76 -51
- shotgun/agents/tools/web_search/gemini.py +50 -27
- shotgun/agents/tools/web_search/openai.py +26 -17
- shotgun/agents/tools/web_search/utils.py +2 -2
- shotgun/agents/usage_manager.py +164 -0
- shotgun/api_endpoints.py +15 -0
- shotgun/cli/clear.py +53 -0
- shotgun/cli/codebase/commands.py +71 -2
- shotgun/cli/compact.py +186 -0
- shotgun/cli/config.py +41 -67
- shotgun/cli/context.py +111 -0
- shotgun/cli/export.py +1 -1
- shotgun/cli/feedback.py +50 -0
- shotgun/cli/models.py +3 -2
- shotgun/cli/plan.py +1 -1
- shotgun/cli/research.py +1 -1
- shotgun/cli/specify.py +1 -1
- shotgun/cli/tasks.py +1 -1
- shotgun/cli/update.py +18 -5
- shotgun/codebase/core/change_detector.py +5 -3
- shotgun/codebase/core/code_retrieval.py +4 -2
- shotgun/codebase/core/ingestor.py +169 -19
- shotgun/codebase/core/manager.py +177 -13
- shotgun/codebase/core/nl_query.py +1 -1
- shotgun/codebase/models.py +28 -3
- shotgun/codebase/service.py +14 -2
- shotgun/exceptions.py +32 -0
- shotgun/llm_proxy/__init__.py +19 -0
- shotgun/llm_proxy/clients.py +44 -0
- shotgun/llm_proxy/constants.py +15 -0
- shotgun/logging_config.py +18 -27
- shotgun/main.py +91 -4
- shotgun/posthog_telemetry.py +87 -40
- shotgun/prompts/agents/export.j2 +18 -1
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +5 -1
- shotgun/prompts/agents/partials/interactive_mode.j2 +24 -7
- shotgun/prompts/agents/plan.j2 +1 -1
- shotgun/prompts/agents/research.j2 +1 -1
- shotgun/prompts/agents/specify.j2 +270 -3
- shotgun/prompts/agents/state/system_state.j2 +4 -0
- shotgun/prompts/agents/tasks.j2 +1 -1
- shotgun/prompts/codebase/partials/cypher_rules.j2 +13 -0
- shotgun/prompts/loader.py +2 -2
- shotgun/prompts/tools/web_search.j2 +14 -0
- shotgun/sdk/codebase.py +60 -2
- shotgun/sentry_telemetry.py +28 -21
- shotgun/settings.py +238 -0
- shotgun/shotgun_web/__init__.py +19 -0
- shotgun/shotgun_web/client.py +138 -0
- shotgun/shotgun_web/constants.py +21 -0
- shotgun/shotgun_web/models.py +47 -0
- shotgun/telemetry.py +24 -36
- shotgun/tui/app.py +275 -23
- shotgun/tui/commands/__init__.py +1 -1
- shotgun/tui/components/context_indicator.py +179 -0
- shotgun/tui/components/mode_indicator.py +70 -0
- shotgun/tui/components/status_bar.py +48 -0
- shotgun/tui/components/vertical_tail.py +6 -0
- shotgun/tui/containers.py +91 -0
- shotgun/tui/dependencies.py +39 -0
- shotgun/tui/filtered_codebase_service.py +46 -0
- shotgun/tui/protocols.py +45 -0
- shotgun/tui/screens/chat/__init__.py +5 -0
- shotgun/tui/screens/chat/chat.tcss +54 -0
- shotgun/tui/screens/chat/chat_screen.py +1234 -0
- shotgun/tui/screens/chat/codebase_index_prompt_screen.py +64 -0
- shotgun/tui/screens/chat/codebase_index_selection.py +12 -0
- shotgun/tui/screens/chat/help_text.py +40 -0
- shotgun/tui/screens/chat/prompt_history.py +48 -0
- shotgun/tui/screens/chat.tcss +11 -0
- shotgun/tui/screens/chat_screen/command_providers.py +226 -11
- shotgun/tui/screens/chat_screen/history/__init__.py +22 -0
- shotgun/tui/screens/chat_screen/history/agent_response.py +66 -0
- shotgun/tui/screens/chat_screen/history/chat_history.py +116 -0
- shotgun/tui/screens/chat_screen/history/formatters.py +115 -0
- shotgun/tui/screens/chat_screen/history/partial_response.py +43 -0
- shotgun/tui/screens/chat_screen/history/user_question.py +42 -0
- shotgun/tui/screens/confirmation_dialog.py +151 -0
- shotgun/tui/screens/feedback.py +193 -0
- shotgun/tui/screens/github_issue.py +102 -0
- shotgun/tui/screens/model_picker.py +352 -0
- shotgun/tui/screens/onboarding.py +431 -0
- shotgun/tui/screens/pipx_migration.py +153 -0
- shotgun/tui/screens/provider_config.py +156 -39
- shotgun/tui/screens/shotgun_auth.py +295 -0
- shotgun/tui/screens/welcome.py +198 -0
- shotgun/tui/services/__init__.py +5 -0
- shotgun/tui/services/conversation_service.py +184 -0
- shotgun/tui/state/__init__.py +7 -0
- shotgun/tui/state/processing_state.py +185 -0
- shotgun/tui/utils/mode_progress.py +14 -7
- shotgun/tui/widgets/__init__.py +5 -0
- shotgun/tui/widgets/widget_coordinator.py +262 -0
- shotgun/utils/datetime_utils.py +77 -0
- shotgun/utils/env_utils.py +13 -0
- shotgun/utils/file_system_utils.py +22 -2
- shotgun/utils/marketing.py +110 -0
- shotgun/utils/source_detection.py +16 -0
- shotgun/utils/update_checker.py +73 -21
- shotgun_sh-0.2.11.dist-info/METADATA +130 -0
- shotgun_sh-0.2.11.dist-info/RECORD +194 -0
- {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/entry_points.txt +1 -0
- {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/licenses/LICENSE +1 -1
- shotgun/agents/history/token_counting.py +0 -429
- shotgun/agents/tools/user_interaction.py +0 -37
- shotgun/tui/screens/chat.py +0 -818
- shotgun/tui/screens/chat_screen/history.py +0 -222
- shotgun_sh-0.1.9.dist-info/METADATA +0 -466
- shotgun_sh-0.1.9.dist-info/RECORD +0 -131
- {shotgun_sh-0.1.9.dist-info → shotgun_sh-0.2.11.dist-info}/WHEEL +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Kuzu graph ingestor for building code knowledge graphs."""
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
import hashlib
|
|
4
5
|
import os
|
|
5
6
|
import time
|
|
@@ -8,6 +9,7 @@ from collections import defaultdict
|
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
12
|
+
import aiofiles
|
|
11
13
|
import kuzu
|
|
12
14
|
from tree_sitter import Node, Parser, QueryCursor
|
|
13
15
|
|
|
@@ -18,15 +20,12 @@ from shotgun.logging_config import get_logger
|
|
|
18
20
|
logger = get_logger(__name__)
|
|
19
21
|
|
|
20
22
|
|
|
21
|
-
#
|
|
22
|
-
|
|
23
|
+
# Directories that should never be traversed during indexing
|
|
24
|
+
BASE_IGNORE_DIRECTORIES = {
|
|
23
25
|
".git",
|
|
24
26
|
"venv",
|
|
25
27
|
".venv",
|
|
26
28
|
"__pycache__",
|
|
27
|
-
"node_modules",
|
|
28
|
-
"build",
|
|
29
|
-
"dist",
|
|
30
29
|
".eggs",
|
|
31
30
|
".pytest_cache",
|
|
32
31
|
".mypy_cache",
|
|
@@ -36,6 +35,46 @@ IGNORE_PATTERNS = {
|
|
|
36
35
|
".vscode",
|
|
37
36
|
}
|
|
38
37
|
|
|
38
|
+
# Well-known build output directories to skip when determining source files
|
|
39
|
+
BUILD_ARTIFACT_DIRECTORIES = {
|
|
40
|
+
"node_modules",
|
|
41
|
+
".next",
|
|
42
|
+
".nuxt",
|
|
43
|
+
".vite",
|
|
44
|
+
".yarn",
|
|
45
|
+
".svelte-kit",
|
|
46
|
+
".output",
|
|
47
|
+
".turbo",
|
|
48
|
+
".parcel-cache",
|
|
49
|
+
".vercel",
|
|
50
|
+
".serverless",
|
|
51
|
+
"build",
|
|
52
|
+
"dist",
|
|
53
|
+
"out",
|
|
54
|
+
"tmp",
|
|
55
|
+
"coverage",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
# Default ignore patterns combines base directories and build artifacts
|
|
59
|
+
IGNORE_PATTERNS = BASE_IGNORE_DIRECTORIES | BUILD_ARTIFACT_DIRECTORIES
|
|
60
|
+
|
|
61
|
+
# Directory prefixes that should always be ignored
|
|
62
|
+
IGNORED_DIRECTORY_PREFIXES = (".",)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def should_ignore_directory(name: str, ignore_patterns: set[str] | None = None) -> bool:
|
|
66
|
+
"""Return True if the directory name should be ignored."""
|
|
67
|
+
patterns = IGNORE_PATTERNS if ignore_patterns is None else ignore_patterns
|
|
68
|
+
if name in patterns:
|
|
69
|
+
return True
|
|
70
|
+
return name.startswith(IGNORED_DIRECTORY_PREFIXES)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def is_path_ignored(path: Path, ignore_patterns: set[str] | None = None) -> bool:
|
|
74
|
+
"""Return True if any part of the path should be ignored."""
|
|
75
|
+
patterns = IGNORE_PATTERNS if ignore_patterns is None else ignore_patterns
|
|
76
|
+
return any(should_ignore_directory(part, patterns) for part in path.parts)
|
|
77
|
+
|
|
39
78
|
|
|
40
79
|
class Ingestor:
|
|
41
80
|
"""Handles all communication and ingestion with the Kuzu database."""
|
|
@@ -535,6 +574,7 @@ class SimpleGraphBuilder:
|
|
|
535
574
|
parsers: dict[str, Parser],
|
|
536
575
|
queries: dict[str, Any],
|
|
537
576
|
exclude_patterns: list[str] | None = None,
|
|
577
|
+
progress_callback: Any | None = None,
|
|
538
578
|
):
|
|
539
579
|
self.ingestor = ingestor
|
|
540
580
|
self.repo_path = repo_path
|
|
@@ -544,6 +584,7 @@ class SimpleGraphBuilder:
|
|
|
544
584
|
self.ignore_dirs = IGNORE_PATTERNS
|
|
545
585
|
if exclude_patterns:
|
|
546
586
|
self.ignore_dirs = self.ignore_dirs.union(set(exclude_patterns))
|
|
587
|
+
self.progress_callback = progress_callback
|
|
547
588
|
|
|
548
589
|
# Caches
|
|
549
590
|
self.structural_elements: dict[Path, str | None] = {}
|
|
@@ -552,7 +593,35 @@ class SimpleGraphBuilder:
|
|
|
552
593
|
self.simple_name_lookup: dict[str, set[str]] = defaultdict(set)
|
|
553
594
|
self.class_inheritance: dict[str, list[str]] = {} # class_qn -> [parent_qns]
|
|
554
595
|
|
|
555
|
-
def
|
|
596
|
+
def _report_progress(
|
|
597
|
+
self,
|
|
598
|
+
phase: str,
|
|
599
|
+
phase_name: str,
|
|
600
|
+
current: int,
|
|
601
|
+
total: int | None = None,
|
|
602
|
+
phase_complete: bool = False,
|
|
603
|
+
) -> None:
|
|
604
|
+
"""Report progress via callback if available."""
|
|
605
|
+
if not self.progress_callback:
|
|
606
|
+
return
|
|
607
|
+
|
|
608
|
+
try:
|
|
609
|
+
# Import here to avoid circular dependency
|
|
610
|
+
from shotgun.codebase.models import IndexProgress, ProgressPhase
|
|
611
|
+
|
|
612
|
+
progress = IndexProgress(
|
|
613
|
+
phase=ProgressPhase(phase),
|
|
614
|
+
phase_name=phase_name,
|
|
615
|
+
current=current,
|
|
616
|
+
total=total,
|
|
617
|
+
phase_complete=phase_complete,
|
|
618
|
+
)
|
|
619
|
+
self.progress_callback(progress)
|
|
620
|
+
except Exception as e:
|
|
621
|
+
# Don't let progress callback errors crash the build
|
|
622
|
+
logger.debug(f"Progress callback error: {e}")
|
|
623
|
+
|
|
624
|
+
async def run(self) -> None:
|
|
556
625
|
"""Run the three-pass graph building process."""
|
|
557
626
|
logger.info(f"Building graph for project: {self.project_name}")
|
|
558
627
|
|
|
@@ -562,7 +631,7 @@ class SimpleGraphBuilder:
|
|
|
562
631
|
|
|
563
632
|
# Pass 2: Definitions
|
|
564
633
|
logger.info("Pass 2: Processing files and extracting definitions...")
|
|
565
|
-
self._process_files()
|
|
634
|
+
await self._process_files()
|
|
566
635
|
|
|
567
636
|
# Pass 3: Relationships
|
|
568
637
|
logger.info("Pass 3: Processing relationships (calls, imports)...")
|
|
@@ -575,8 +644,11 @@ class SimpleGraphBuilder:
|
|
|
575
644
|
|
|
576
645
|
def _identify_structure(self) -> None:
|
|
577
646
|
"""First pass: Walk directory to find packages and folders."""
|
|
647
|
+
dir_count = 0
|
|
578
648
|
for root_str, dirs, _ in os.walk(self.repo_path, topdown=True):
|
|
579
|
-
dirs[:] = [
|
|
649
|
+
dirs[:] = [
|
|
650
|
+
d for d in dirs if not should_ignore_directory(d, self.ignore_dirs)
|
|
651
|
+
]
|
|
580
652
|
root = Path(root_str)
|
|
581
653
|
relative_root = root.relative_to(self.repo_path)
|
|
582
654
|
|
|
@@ -584,6 +656,13 @@ class SimpleGraphBuilder:
|
|
|
584
656
|
if root == self.repo_path:
|
|
585
657
|
continue
|
|
586
658
|
|
|
659
|
+
dir_count += 1
|
|
660
|
+
# Report progress every 10 directories
|
|
661
|
+
if dir_count % 10 == 0:
|
|
662
|
+
self._report_progress(
|
|
663
|
+
"structure", "Identifying packages and folders", dir_count
|
|
664
|
+
)
|
|
665
|
+
|
|
587
666
|
parent_rel_path = relative_root.parent
|
|
588
667
|
parent_container_qn = self.structural_elements.get(parent_rel_path)
|
|
589
668
|
|
|
@@ -686,14 +765,40 @@ class SimpleGraphBuilder:
|
|
|
686
765
|
|
|
687
766
|
self.structural_elements[relative_root] = None
|
|
688
767
|
|
|
689
|
-
|
|
768
|
+
# Report phase completion
|
|
769
|
+
self._report_progress(
|
|
770
|
+
"structure",
|
|
771
|
+
"Identifying packages and folders",
|
|
772
|
+
dir_count,
|
|
773
|
+
phase_complete=True,
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
async def _process_files(self) -> None:
|
|
690
777
|
"""Second pass: Process files and extract definitions."""
|
|
778
|
+
# First pass: Count total files
|
|
779
|
+
total_files = 0
|
|
780
|
+
for root_str, _, files in os.walk(self.repo_path):
|
|
781
|
+
root = Path(root_str)
|
|
782
|
+
|
|
783
|
+
# Skip ignored directories
|
|
784
|
+
if is_path_ignored(root, self.ignore_dirs):
|
|
785
|
+
continue
|
|
786
|
+
|
|
787
|
+
for filename in files:
|
|
788
|
+
filepath = root / filename
|
|
789
|
+
ext = filepath.suffix
|
|
790
|
+
lang_config = get_language_config(ext)
|
|
791
|
+
|
|
792
|
+
if lang_config and lang_config.name in self.parsers:
|
|
793
|
+
total_files += 1
|
|
794
|
+
|
|
795
|
+
# Second pass: Process files with progress reporting
|
|
691
796
|
file_count = 0
|
|
692
797
|
for root_str, _, files in os.walk(self.repo_path):
|
|
693
798
|
root = Path(root_str)
|
|
694
799
|
|
|
695
800
|
# Skip ignored directories
|
|
696
|
-
if
|
|
801
|
+
if is_path_ignored(root, self.ignore_dirs):
|
|
697
802
|
continue
|
|
698
803
|
|
|
699
804
|
for filename in files:
|
|
@@ -704,15 +809,32 @@ class SimpleGraphBuilder:
|
|
|
704
809
|
lang_config = get_language_config(ext)
|
|
705
810
|
|
|
706
811
|
if lang_config and lang_config.name in self.parsers:
|
|
707
|
-
self._process_single_file(filepath, lang_config.name)
|
|
812
|
+
await self._process_single_file(filepath, lang_config.name)
|
|
708
813
|
file_count += 1
|
|
709
814
|
|
|
815
|
+
# Report progress after each file
|
|
816
|
+
self._report_progress(
|
|
817
|
+
"definitions",
|
|
818
|
+
"Processing files and extracting definitions",
|
|
819
|
+
file_count,
|
|
820
|
+
total_files,
|
|
821
|
+
)
|
|
822
|
+
|
|
710
823
|
if file_count % 100 == 0:
|
|
711
|
-
logger.info(f" Processed {file_count} files...")
|
|
824
|
+
logger.info(f" Processed {file_count}/{total_files} files...")
|
|
825
|
+
|
|
826
|
+
logger.info(f" Total files processed: {file_count}/{total_files}")
|
|
712
827
|
|
|
713
|
-
|
|
828
|
+
# Report phase completion
|
|
829
|
+
self._report_progress(
|
|
830
|
+
"definitions",
|
|
831
|
+
"Processing files and extracting definitions",
|
|
832
|
+
file_count,
|
|
833
|
+
total_files,
|
|
834
|
+
phase_complete=True,
|
|
835
|
+
)
|
|
714
836
|
|
|
715
|
-
def _process_single_file(self, filepath: Path, language: str) -> None:
|
|
837
|
+
async def _process_single_file(self, filepath: Path, language: str) -> None:
|
|
716
838
|
"""Process a single file."""
|
|
717
839
|
relative_path = filepath.relative_to(self.repo_path)
|
|
718
840
|
relative_path_str = str(relative_path).replace(os.sep, "/")
|
|
@@ -753,8 +875,8 @@ class SimpleGraphBuilder:
|
|
|
753
875
|
|
|
754
876
|
# Parse file
|
|
755
877
|
try:
|
|
756
|
-
with open(filepath, "rb") as f:
|
|
757
|
-
content = f.read()
|
|
878
|
+
async with aiofiles.open(filepath, "rb") as f:
|
|
879
|
+
content = await f.read()
|
|
758
880
|
|
|
759
881
|
parser = self.parsers[language]
|
|
760
882
|
tree = parser.parse(content)
|
|
@@ -1143,7 +1265,8 @@ class SimpleGraphBuilder:
|
|
|
1143
1265
|
self._process_inheritance()
|
|
1144
1266
|
|
|
1145
1267
|
# Then process function calls
|
|
1146
|
-
|
|
1268
|
+
total_files = len(self.ast_cache)
|
|
1269
|
+
logger.info(f"Processing function calls for {total_files} files...")
|
|
1147
1270
|
logger.info(f"Function registry has {len(self.function_registry)} entries")
|
|
1148
1271
|
logger.info(
|
|
1149
1272
|
f"Simple name lookup has {len(self.simple_name_lookup)} unique names"
|
|
@@ -1157,10 +1280,29 @@ class SimpleGraphBuilder:
|
|
|
1157
1280
|
f" Example: '{name}' -> {list(self.simple_name_lookup[name])[:3]}"
|
|
1158
1281
|
)
|
|
1159
1282
|
|
|
1283
|
+
file_count = 0
|
|
1160
1284
|
for filepath, (root_node, language) in self.ast_cache.items():
|
|
1161
1285
|
self._process_calls(filepath, root_node, language)
|
|
1162
1286
|
# NOTE: Add import processing. wtf does this mean?
|
|
1163
1287
|
|
|
1288
|
+
file_count += 1
|
|
1289
|
+
# Report progress after each file
|
|
1290
|
+
self._report_progress(
|
|
1291
|
+
"relationships",
|
|
1292
|
+
"Processing relationships (calls, imports)",
|
|
1293
|
+
file_count,
|
|
1294
|
+
total_files,
|
|
1295
|
+
)
|
|
1296
|
+
|
|
1297
|
+
# Report phase completion
|
|
1298
|
+
self._report_progress(
|
|
1299
|
+
"relationships",
|
|
1300
|
+
"Processing relationships (calls, imports)",
|
|
1301
|
+
file_count,
|
|
1302
|
+
total_files,
|
|
1303
|
+
phase_complete=True,
|
|
1304
|
+
)
|
|
1305
|
+
|
|
1164
1306
|
def _process_inheritance(self) -> None:
|
|
1165
1307
|
"""Process inheritance relationships between classes."""
|
|
1166
1308
|
logger.info("Processing inheritance relationships...")
|
|
@@ -1444,6 +1586,7 @@ class CodebaseIngestor:
|
|
|
1444
1586
|
db_path: str,
|
|
1445
1587
|
project_name: str | None = None,
|
|
1446
1588
|
exclude_patterns: list[str] | None = None,
|
|
1589
|
+
progress_callback: Any | None = None,
|
|
1447
1590
|
):
|
|
1448
1591
|
"""Initialize the ingestor.
|
|
1449
1592
|
|
|
@@ -1451,10 +1594,12 @@ class CodebaseIngestor:
|
|
|
1451
1594
|
db_path: Path to Kuzu database
|
|
1452
1595
|
project_name: Optional project name
|
|
1453
1596
|
exclude_patterns: Patterns to exclude from processing
|
|
1597
|
+
progress_callback: Optional callback for progress reporting
|
|
1454
1598
|
"""
|
|
1455
1599
|
self.db_path = Path(db_path)
|
|
1456
1600
|
self.project_name = project_name
|
|
1457
1601
|
self.exclude_patterns = exclude_patterns or []
|
|
1602
|
+
self.progress_callback = progress_callback
|
|
1458
1603
|
|
|
1459
1604
|
def build_graph_from_directory(self, repo_path: str) -> None:
|
|
1460
1605
|
"""Build a code knowledge graph from a directory.
|
|
@@ -1484,11 +1629,16 @@ class CodebaseIngestor:
|
|
|
1484
1629
|
|
|
1485
1630
|
# Build graph
|
|
1486
1631
|
builder = SimpleGraphBuilder(
|
|
1487
|
-
ingestor,
|
|
1632
|
+
ingestor,
|
|
1633
|
+
repo_path_obj,
|
|
1634
|
+
parsers,
|
|
1635
|
+
queries,
|
|
1636
|
+
self.exclude_patterns,
|
|
1637
|
+
self.progress_callback,
|
|
1488
1638
|
)
|
|
1489
1639
|
if self.project_name:
|
|
1490
1640
|
builder.project_name = self.project_name
|
|
1491
|
-
builder.run()
|
|
1641
|
+
asyncio.run(builder.run())
|
|
1492
1642
|
|
|
1493
1643
|
logger.info(f"Graph successfully created at: {self.db_path}")
|
|
1494
1644
|
|
shotgun/codebase/core/manager.py
CHANGED
|
@@ -51,9 +51,13 @@ class CodebaseFileHandler(FileSystemEventHandler):
|
|
|
51
51
|
self.pending_changes: list[FileChange] = []
|
|
52
52
|
self._lock = anyio.Lock()
|
|
53
53
|
# Import default ignore patterns from ingestor
|
|
54
|
-
from shotgun.codebase.core.ingestor import
|
|
54
|
+
from shotgun.codebase.core.ingestor import (
|
|
55
|
+
IGNORE_PATTERNS,
|
|
56
|
+
should_ignore_directory,
|
|
57
|
+
)
|
|
55
58
|
|
|
56
59
|
self.ignore_patterns = ignore_patterns or IGNORE_PATTERNS
|
|
60
|
+
self._should_ignore_directory = should_ignore_directory
|
|
57
61
|
|
|
58
62
|
def on_any_event(self, event: FileSystemEvent) -> None:
|
|
59
63
|
"""Handle any file system event."""
|
|
@@ -71,7 +75,7 @@ class CodebaseFileHandler(FileSystemEventHandler):
|
|
|
71
75
|
|
|
72
76
|
# Check if any parent directory should be ignored
|
|
73
77
|
for parent in path.parents:
|
|
74
|
-
if parent.name
|
|
78
|
+
if self._should_ignore_directory(parent.name, self.ignore_patterns):
|
|
75
79
|
logger.debug(
|
|
76
80
|
f"Ignoring file in ignored directory: {parent.name} - path: {src_path_str}"
|
|
77
81
|
)
|
|
@@ -106,7 +110,7 @@ class CodebaseFileHandler(FileSystemEventHandler):
|
|
|
106
110
|
)
|
|
107
111
|
dest_path = Path(dest_path_str)
|
|
108
112
|
for parent in dest_path.parents:
|
|
109
|
-
if parent.name
|
|
113
|
+
if self._should_ignore_directory(parent.name, self.ignore_patterns):
|
|
110
114
|
logger.debug(
|
|
111
115
|
f"Ignoring move to ignored directory: {parent.name} - dest_path: {dest_path_str}"
|
|
112
116
|
)
|
|
@@ -329,6 +333,7 @@ class CodebaseGraphManager:
|
|
|
329
333
|
languages: list[str] | None = None,
|
|
330
334
|
exclude_patterns: list[str] | None = None,
|
|
331
335
|
indexed_from_cwd: str | None = None,
|
|
336
|
+
progress_callback: Any | None = None,
|
|
332
337
|
) -> CodebaseGraph:
|
|
333
338
|
"""Build a new code knowledge graph.
|
|
334
339
|
|
|
@@ -337,6 +342,7 @@ class CodebaseGraphManager:
|
|
|
337
342
|
name: Optional human-readable name
|
|
338
343
|
languages: Languages to parse (default: all supported)
|
|
339
344
|
exclude_patterns: Patterns to exclude
|
|
345
|
+
progress_callback: Optional callback for progress reporting
|
|
340
346
|
|
|
341
347
|
Returns:
|
|
342
348
|
Created graph metadata
|
|
@@ -353,7 +359,28 @@ class CodebaseGraphManager:
|
|
|
353
359
|
|
|
354
360
|
# Check if graph already exists
|
|
355
361
|
if graph_path.exists():
|
|
356
|
-
|
|
362
|
+
# Verify it's not corrupted by checking if we can load the Project node
|
|
363
|
+
existing_graph = await self.get_graph(graph_id)
|
|
364
|
+
if existing_graph:
|
|
365
|
+
# Valid existing graph
|
|
366
|
+
raise CodebaseAlreadyIndexedError(repo_path)
|
|
367
|
+
else:
|
|
368
|
+
# Corrupted database - remove and re-index
|
|
369
|
+
logger.warning(
|
|
370
|
+
f"Found corrupted database at {graph_path}, removing for re-indexing..."
|
|
371
|
+
)
|
|
372
|
+
import shutil
|
|
373
|
+
|
|
374
|
+
# Handle both files and directories (kuzu v0.11.2+ uses files)
|
|
375
|
+
if graph_path.is_file():
|
|
376
|
+
graph_path.unlink() # Delete file
|
|
377
|
+
# Also delete WAL file if it exists
|
|
378
|
+
wal_path = graph_path.with_suffix(graph_path.suffix + ".wal")
|
|
379
|
+
if wal_path.exists():
|
|
380
|
+
wal_path.unlink()
|
|
381
|
+
logger.debug(f"Deleted WAL file: {wal_path}")
|
|
382
|
+
else:
|
|
383
|
+
shutil.rmtree(graph_path) # Delete directory
|
|
357
384
|
|
|
358
385
|
# Import the builder from local core module
|
|
359
386
|
from shotgun.codebase.core import CodebaseIngestor
|
|
@@ -379,6 +406,7 @@ class CodebaseGraphManager:
|
|
|
379
406
|
db_path=str(graph_path),
|
|
380
407
|
project_name=name,
|
|
381
408
|
exclude_patterns=exclude_patterns or [],
|
|
409
|
+
progress_callback=progress_callback,
|
|
382
410
|
)
|
|
383
411
|
|
|
384
412
|
# Run build in thread pool
|
|
@@ -741,7 +769,7 @@ class CodebaseGraphManager:
|
|
|
741
769
|
|
|
742
770
|
lang_config = get_language_config(full_path.suffix)
|
|
743
771
|
if lang_config and lang_config.name in parsers:
|
|
744
|
-
builder._process_single_file(full_path, lang_config.name)
|
|
772
|
+
await builder._process_single_file(full_path, lang_config.name)
|
|
745
773
|
stats["nodes_modified"] += 1 # Approximate
|
|
746
774
|
|
|
747
775
|
# Process additions
|
|
@@ -756,7 +784,7 @@ class CodebaseGraphManager:
|
|
|
756
784
|
|
|
757
785
|
lang_config = get_language_config(full_path.suffix)
|
|
758
786
|
if lang_config and lang_config.name in parsers:
|
|
759
|
-
builder._process_single_file(full_path, lang_config.name)
|
|
787
|
+
await builder._process_single_file(full_path, lang_config.name)
|
|
760
788
|
stats["nodes_added"] += 1 # Approximate
|
|
761
789
|
|
|
762
790
|
# Flush all pending operations
|
|
@@ -1193,6 +1221,121 @@ class CodebaseGraphManager:
|
|
|
1193
1221
|
)
|
|
1194
1222
|
return None
|
|
1195
1223
|
|
|
1224
|
+
async def cleanup_corrupted_databases(self) -> list[str]:
|
|
1225
|
+
"""Detect and remove corrupted Kuzu databases.
|
|
1226
|
+
|
|
1227
|
+
This method iterates through all .kuzu files in the storage directory,
|
|
1228
|
+
attempts to open them, and removes any that are corrupted or unreadable.
|
|
1229
|
+
|
|
1230
|
+
Returns:
|
|
1231
|
+
List of graph_ids that were removed due to corruption
|
|
1232
|
+
"""
|
|
1233
|
+
import shutil
|
|
1234
|
+
|
|
1235
|
+
removed_graphs = []
|
|
1236
|
+
|
|
1237
|
+
# Find all .kuzu databases (files in v0.11.2, directories in newer versions)
|
|
1238
|
+
for path in self.storage_dir.glob("*.kuzu"):
|
|
1239
|
+
graph_id = path.stem
|
|
1240
|
+
|
|
1241
|
+
# Try to open and validate the database
|
|
1242
|
+
try:
|
|
1243
|
+
# Try to open the database with a timeout to prevent hanging
|
|
1244
|
+
async def try_open_database(
|
|
1245
|
+
gid: str = graph_id, db_path: Path = path
|
|
1246
|
+
) -> bool:
|
|
1247
|
+
lock = await self._get_lock()
|
|
1248
|
+
async with lock:
|
|
1249
|
+
# Close existing connections if any
|
|
1250
|
+
if gid in self._connections:
|
|
1251
|
+
try:
|
|
1252
|
+
self._connections[gid].close()
|
|
1253
|
+
except Exception as e:
|
|
1254
|
+
logger.debug(
|
|
1255
|
+
f"Failed to close connection for {gid}: {e}"
|
|
1256
|
+
)
|
|
1257
|
+
del self._connections[gid]
|
|
1258
|
+
if gid in self._databases:
|
|
1259
|
+
try:
|
|
1260
|
+
self._databases[gid].close()
|
|
1261
|
+
except Exception as e:
|
|
1262
|
+
logger.debug(f"Failed to close database for {gid}: {e}")
|
|
1263
|
+
del self._databases[gid]
|
|
1264
|
+
|
|
1265
|
+
# Try to open the database
|
|
1266
|
+
def _open_and_query(g: str = gid, p: Path = db_path) -> bool:
|
|
1267
|
+
db = kuzu.Database(str(p))
|
|
1268
|
+
conn = kuzu.Connection(db)
|
|
1269
|
+
try:
|
|
1270
|
+
result = conn.execute(
|
|
1271
|
+
"MATCH (p:Project {graph_id: $graph_id}) RETURN p",
|
|
1272
|
+
{"graph_id": g},
|
|
1273
|
+
)
|
|
1274
|
+
has_results = (
|
|
1275
|
+
result.has_next()
|
|
1276
|
+
if hasattr(result, "has_next")
|
|
1277
|
+
else False
|
|
1278
|
+
)
|
|
1279
|
+
return has_results
|
|
1280
|
+
finally:
|
|
1281
|
+
conn.close()
|
|
1282
|
+
db.close()
|
|
1283
|
+
|
|
1284
|
+
return await anyio.to_thread.run_sync(_open_and_query)
|
|
1285
|
+
|
|
1286
|
+
# Try to open with 5 second timeout
|
|
1287
|
+
has_project = await asyncio.wait_for(try_open_database(), timeout=5.0)
|
|
1288
|
+
|
|
1289
|
+
if not has_project:
|
|
1290
|
+
# Database exists but has no Project node - consider it corrupted
|
|
1291
|
+
raise ValueError("No Project node found in database")
|
|
1292
|
+
|
|
1293
|
+
except (Exception, asyncio.TimeoutError) as e:
|
|
1294
|
+
# Database is corrupted or timed out - remove it
|
|
1295
|
+
error_type = (
|
|
1296
|
+
"timed out" if isinstance(e, asyncio.TimeoutError) else "corrupted"
|
|
1297
|
+
)
|
|
1298
|
+
logger.warning(
|
|
1299
|
+
f"Detected {error_type} database at {path}, removing it. "
|
|
1300
|
+
f"Error: {str(e) if not isinstance(e, asyncio.TimeoutError) else 'Operation timed out after 5 seconds'}"
|
|
1301
|
+
)
|
|
1302
|
+
|
|
1303
|
+
try:
|
|
1304
|
+
# Clean up any open connections
|
|
1305
|
+
lock = await self._get_lock()
|
|
1306
|
+
async with lock:
|
|
1307
|
+
if graph_id in self._connections:
|
|
1308
|
+
try:
|
|
1309
|
+
self._connections[graph_id].close()
|
|
1310
|
+
except Exception as e:
|
|
1311
|
+
logger.debug(
|
|
1312
|
+
f"Failed to close connection during cleanup for {graph_id}: {e}"
|
|
1313
|
+
)
|
|
1314
|
+
del self._connections[graph_id]
|
|
1315
|
+
if graph_id in self._databases:
|
|
1316
|
+
try:
|
|
1317
|
+
self._databases[graph_id].close()
|
|
1318
|
+
except Exception as e:
|
|
1319
|
+
logger.debug(
|
|
1320
|
+
f"Failed to close database during cleanup for {graph_id}: {e}"
|
|
1321
|
+
)
|
|
1322
|
+
del self._databases[graph_id]
|
|
1323
|
+
|
|
1324
|
+
# Remove the database (could be file or directory)
|
|
1325
|
+
if path.is_dir():
|
|
1326
|
+
await anyio.to_thread.run_sync(shutil.rmtree, path)
|
|
1327
|
+
else:
|
|
1328
|
+
await anyio.to_thread.run_sync(path.unlink)
|
|
1329
|
+
removed_graphs.append(graph_id)
|
|
1330
|
+
logger.info(f"Removed {error_type} database: {graph_id}")
|
|
1331
|
+
|
|
1332
|
+
except Exception as cleanup_error:
|
|
1333
|
+
logger.error(
|
|
1334
|
+
f"Failed to remove corrupted database {graph_id}: {cleanup_error}"
|
|
1335
|
+
)
|
|
1336
|
+
|
|
1337
|
+
return removed_graphs
|
|
1338
|
+
|
|
1196
1339
|
async def list_graphs(self) -> list[CodebaseGraph]:
|
|
1197
1340
|
"""List all available graphs.
|
|
1198
1341
|
|
|
@@ -1201,7 +1344,7 @@ class CodebaseGraphManager:
|
|
|
1201
1344
|
"""
|
|
1202
1345
|
graphs = []
|
|
1203
1346
|
|
|
1204
|
-
# Find all .kuzu files
|
|
1347
|
+
# Find all .kuzu database files (Kuzu v0.11.2 creates files, not directories)
|
|
1205
1348
|
for path in self.storage_dir.glob("*.kuzu"):
|
|
1206
1349
|
if path.is_file():
|
|
1207
1350
|
graph_id = path.stem
|
|
@@ -1294,6 +1437,8 @@ class CodebaseGraphManager:
|
|
|
1294
1437
|
Args:
|
|
1295
1438
|
graph_id: Graph to delete
|
|
1296
1439
|
"""
|
|
1440
|
+
import shutil
|
|
1441
|
+
|
|
1297
1442
|
# Stop watcher if running
|
|
1298
1443
|
if graph_id in self._watchers:
|
|
1299
1444
|
await self.stop_watcher(graph_id)
|
|
@@ -1308,11 +1453,14 @@ class CodebaseGraphManager:
|
|
|
1308
1453
|
self._databases[graph_id].close()
|
|
1309
1454
|
del self._databases[graph_id]
|
|
1310
1455
|
|
|
1311
|
-
# Delete files
|
|
1456
|
+
# Delete database (files in v0.11.2, directories in newer versions)
|
|
1312
1457
|
graph_path = self.storage_dir / f"{graph_id}.kuzu"
|
|
1313
1458
|
if graph_path.exists():
|
|
1314
|
-
|
|
1315
|
-
|
|
1459
|
+
if graph_path.is_dir():
|
|
1460
|
+
await anyio.to_thread.run_sync(shutil.rmtree, graph_path)
|
|
1461
|
+
else:
|
|
1462
|
+
# File-based database (Kuzu v0.11.2)
|
|
1463
|
+
await anyio.to_thread.run_sync(graph_path.unlink)
|
|
1316
1464
|
|
|
1317
1465
|
# Also delete the WAL file if it exists
|
|
1318
1466
|
wal_path = self.storage_dir / f"{graph_id}.kuzu.wal"
|
|
@@ -1464,6 +1612,7 @@ class CodebaseGraphManager:
|
|
|
1464
1612
|
languages: list[str] | None,
|
|
1465
1613
|
exclude_patterns: list[str] | None,
|
|
1466
1614
|
indexed_from_cwd: str | None = None,
|
|
1615
|
+
progress_callback: Any | None = None,
|
|
1467
1616
|
) -> CodebaseGraph:
|
|
1468
1617
|
"""Internal implementation of graph building (runs in background)."""
|
|
1469
1618
|
operation_id = str(uuid.uuid4())
|
|
@@ -1487,7 +1636,13 @@ class CodebaseGraphManager:
|
|
|
1487
1636
|
|
|
1488
1637
|
# Do the actual build work
|
|
1489
1638
|
graph = await self._do_build_graph(
|
|
1490
|
-
graph_id,
|
|
1639
|
+
graph_id,
|
|
1640
|
+
repo_path,
|
|
1641
|
+
name,
|
|
1642
|
+
languages,
|
|
1643
|
+
exclude_patterns,
|
|
1644
|
+
indexed_from_cwd,
|
|
1645
|
+
progress_callback,
|
|
1491
1646
|
)
|
|
1492
1647
|
|
|
1493
1648
|
# Update operation stats
|
|
@@ -1536,6 +1691,7 @@ class CodebaseGraphManager:
|
|
|
1536
1691
|
languages: list[str] | None,
|
|
1537
1692
|
exclude_patterns: list[str] | None,
|
|
1538
1693
|
indexed_from_cwd: str | None = None,
|
|
1694
|
+
progress_callback: Any | None = None,
|
|
1539
1695
|
) -> CodebaseGraph:
|
|
1540
1696
|
"""Execute the actual graph building logic (extracted from original build_graph)."""
|
|
1541
1697
|
# The database and Project node already exist from _initialize_graph_metadata
|
|
@@ -1591,10 +1747,11 @@ class CodebaseGraphManager:
|
|
|
1591
1747
|
parsers=parsers,
|
|
1592
1748
|
queries=queries,
|
|
1593
1749
|
exclude_patterns=exclude_patterns,
|
|
1750
|
+
progress_callback=progress_callback,
|
|
1594
1751
|
)
|
|
1595
1752
|
|
|
1596
1753
|
# Build the graph
|
|
1597
|
-
builder.run()
|
|
1754
|
+
asyncio.run(builder.run())
|
|
1598
1755
|
|
|
1599
1756
|
# Run build in thread pool
|
|
1600
1757
|
await anyio.to_thread.run_sync(_build_graph)
|
|
@@ -1616,6 +1773,7 @@ class CodebaseGraphManager:
|
|
|
1616
1773
|
languages: list[str] | None = None,
|
|
1617
1774
|
exclude_patterns: list[str] | None = None,
|
|
1618
1775
|
indexed_from_cwd: str | None = None,
|
|
1776
|
+
progress_callback: Any | None = None,
|
|
1619
1777
|
) -> str:
|
|
1620
1778
|
"""Start building a new code knowledge graph asynchronously.
|
|
1621
1779
|
|
|
@@ -1654,7 +1812,13 @@ class CodebaseGraphManager:
|
|
|
1654
1812
|
# Start the build operation in background
|
|
1655
1813
|
task = asyncio.create_task(
|
|
1656
1814
|
self._build_graph_impl(
|
|
1657
|
-
graph_id,
|
|
1815
|
+
graph_id,
|
|
1816
|
+
repo_path,
|
|
1817
|
+
name,
|
|
1818
|
+
languages,
|
|
1819
|
+
exclude_patterns,
|
|
1820
|
+
indexed_from_cwd,
|
|
1821
|
+
progress_callback,
|
|
1658
1822
|
)
|
|
1659
1823
|
)
|
|
1660
1824
|
self._operations[graph_id] = task
|
|
@@ -34,7 +34,7 @@ async def llm_cypher_prompt(
|
|
|
34
34
|
Returns:
|
|
35
35
|
CypherGenerationResponse with cypher_query, can_generate flag, and reason if not
|
|
36
36
|
"""
|
|
37
|
-
model_config = get_provider_model()
|
|
37
|
+
model_config = await get_provider_model()
|
|
38
38
|
|
|
39
39
|
# Create an agent with structured output for Cypher generation
|
|
40
40
|
cypher_agent = Agent(
|