shotgun-sh 0.3.3.dev1__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. shotgun/agents/agent_manager.py +497 -30
  2. shotgun/agents/cancellation.py +103 -0
  3. shotgun/agents/common.py +90 -77
  4. shotgun/agents/config/README.md +0 -1
  5. shotgun/agents/config/manager.py +52 -8
  6. shotgun/agents/config/models.py +21 -27
  7. shotgun/agents/config/provider.py +44 -27
  8. shotgun/agents/conversation/history/file_content_deduplication.py +66 -43
  9. shotgun/agents/conversation/history/token_counting/base.py +51 -9
  10. shotgun/agents/export.py +12 -13
  11. shotgun/agents/file_read.py +176 -0
  12. shotgun/agents/messages.py +15 -3
  13. shotgun/agents/models.py +90 -2
  14. shotgun/agents/plan.py +12 -13
  15. shotgun/agents/research.py +13 -10
  16. shotgun/agents/router/__init__.py +47 -0
  17. shotgun/agents/router/models.py +384 -0
  18. shotgun/agents/router/router.py +185 -0
  19. shotgun/agents/router/tools/__init__.py +18 -0
  20. shotgun/agents/router/tools/delegation_tools.py +557 -0
  21. shotgun/agents/router/tools/plan_tools.py +403 -0
  22. shotgun/agents/runner.py +17 -2
  23. shotgun/agents/specify.py +12 -13
  24. shotgun/agents/tasks.py +12 -13
  25. shotgun/agents/tools/__init__.py +8 -0
  26. shotgun/agents/tools/codebase/directory_lister.py +27 -39
  27. shotgun/agents/tools/codebase/file_read.py +26 -35
  28. shotgun/agents/tools/codebase/query_graph.py +9 -0
  29. shotgun/agents/tools/codebase/retrieve_code.py +9 -0
  30. shotgun/agents/tools/file_management.py +81 -3
  31. shotgun/agents/tools/file_read_tools/__init__.py +7 -0
  32. shotgun/agents/tools/file_read_tools/multimodal_file_read.py +167 -0
  33. shotgun/agents/tools/markdown_tools/__init__.py +62 -0
  34. shotgun/agents/tools/markdown_tools/insert_section.py +148 -0
  35. shotgun/agents/tools/markdown_tools/models.py +86 -0
  36. shotgun/agents/tools/markdown_tools/remove_section.py +114 -0
  37. shotgun/agents/tools/markdown_tools/replace_section.py +119 -0
  38. shotgun/agents/tools/markdown_tools/utils.py +453 -0
  39. shotgun/agents/tools/registry.py +46 -6
  40. shotgun/agents/tools/web_search/__init__.py +1 -2
  41. shotgun/agents/tools/web_search/gemini.py +1 -3
  42. shotgun/agents/tools/web_search/openai.py +42 -23
  43. shotgun/attachments/__init__.py +41 -0
  44. shotgun/attachments/errors.py +60 -0
  45. shotgun/attachments/models.py +107 -0
  46. shotgun/attachments/parser.py +257 -0
  47. shotgun/attachments/processor.py +193 -0
  48. shotgun/build_constants.py +4 -7
  49. shotgun/cli/clear.py +2 -2
  50. shotgun/cli/codebase/commands.py +181 -65
  51. shotgun/cli/compact.py +2 -2
  52. shotgun/cli/context.py +2 -2
  53. shotgun/cli/error_handler.py +2 -2
  54. shotgun/cli/run.py +90 -0
  55. shotgun/cli/spec/backup.py +2 -1
  56. shotgun/codebase/__init__.py +2 -0
  57. shotgun/codebase/benchmarks/__init__.py +35 -0
  58. shotgun/codebase/benchmarks/benchmark_runner.py +309 -0
  59. shotgun/codebase/benchmarks/exporters.py +119 -0
  60. shotgun/codebase/benchmarks/formatters/__init__.py +49 -0
  61. shotgun/codebase/benchmarks/formatters/base.py +34 -0
  62. shotgun/codebase/benchmarks/formatters/json_formatter.py +106 -0
  63. shotgun/codebase/benchmarks/formatters/markdown.py +136 -0
  64. shotgun/codebase/benchmarks/models.py +129 -0
  65. shotgun/codebase/core/__init__.py +4 -0
  66. shotgun/codebase/core/call_resolution.py +91 -0
  67. shotgun/codebase/core/change_detector.py +11 -6
  68. shotgun/codebase/core/errors.py +159 -0
  69. shotgun/codebase/core/extractors/__init__.py +23 -0
  70. shotgun/codebase/core/extractors/base.py +138 -0
  71. shotgun/codebase/core/extractors/factory.py +63 -0
  72. shotgun/codebase/core/extractors/go/__init__.py +7 -0
  73. shotgun/codebase/core/extractors/go/extractor.py +122 -0
  74. shotgun/codebase/core/extractors/javascript/__init__.py +7 -0
  75. shotgun/codebase/core/extractors/javascript/extractor.py +132 -0
  76. shotgun/codebase/core/extractors/protocol.py +109 -0
  77. shotgun/codebase/core/extractors/python/__init__.py +7 -0
  78. shotgun/codebase/core/extractors/python/extractor.py +141 -0
  79. shotgun/codebase/core/extractors/rust/__init__.py +7 -0
  80. shotgun/codebase/core/extractors/rust/extractor.py +139 -0
  81. shotgun/codebase/core/extractors/types.py +15 -0
  82. shotgun/codebase/core/extractors/typescript/__init__.py +7 -0
  83. shotgun/codebase/core/extractors/typescript/extractor.py +92 -0
  84. shotgun/codebase/core/gitignore.py +252 -0
  85. shotgun/codebase/core/ingestor.py +644 -354
  86. shotgun/codebase/core/kuzu_compat.py +119 -0
  87. shotgun/codebase/core/language_config.py +239 -0
  88. shotgun/codebase/core/manager.py +256 -46
  89. shotgun/codebase/core/metrics_collector.py +310 -0
  90. shotgun/codebase/core/metrics_types.py +347 -0
  91. shotgun/codebase/core/parallel_executor.py +424 -0
  92. shotgun/codebase/core/work_distributor.py +254 -0
  93. shotgun/codebase/core/worker.py +768 -0
  94. shotgun/codebase/indexing_state.py +86 -0
  95. shotgun/codebase/models.py +94 -0
  96. shotgun/codebase/service.py +13 -0
  97. shotgun/exceptions.py +9 -9
  98. shotgun/main.py +3 -16
  99. shotgun/posthog_telemetry.py +165 -24
  100. shotgun/prompts/agents/export.j2 +2 -0
  101. shotgun/prompts/agents/file_read.j2 +48 -0
  102. shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +19 -52
  103. shotgun/prompts/agents/partials/content_formatting.j2 +12 -33
  104. shotgun/prompts/agents/partials/interactive_mode.j2 +9 -32
  105. shotgun/prompts/agents/partials/router_delegation_mode.j2 +35 -0
  106. shotgun/prompts/agents/plan.j2 +38 -12
  107. shotgun/prompts/agents/research.j2 +70 -31
  108. shotgun/prompts/agents/router.j2 +713 -0
  109. shotgun/prompts/agents/specify.j2 +53 -16
  110. shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +14 -1
  111. shotgun/prompts/agents/state/system_state.j2 +24 -13
  112. shotgun/prompts/agents/tasks.j2 +72 -34
  113. shotgun/settings.py +49 -10
  114. shotgun/tui/app.py +154 -24
  115. shotgun/tui/commands/__init__.py +9 -1
  116. shotgun/tui/components/attachment_bar.py +87 -0
  117. shotgun/tui/components/mode_indicator.py +120 -25
  118. shotgun/tui/components/prompt_input.py +25 -28
  119. shotgun/tui/components/status_bar.py +14 -7
  120. shotgun/tui/dependencies.py +58 -8
  121. shotgun/tui/protocols.py +55 -0
  122. shotgun/tui/screens/chat/chat.tcss +24 -1
  123. shotgun/tui/screens/chat/chat_screen.py +1376 -213
  124. shotgun/tui/screens/chat/codebase_index_prompt_screen.py +8 -4
  125. shotgun/tui/screens/chat_screen/attachment_hint.py +40 -0
  126. shotgun/tui/screens/chat_screen/command_providers.py +0 -97
  127. shotgun/tui/screens/chat_screen/history/agent_response.py +7 -3
  128. shotgun/tui/screens/chat_screen/history/chat_history.py +58 -6
  129. shotgun/tui/screens/chat_screen/history/formatters.py +75 -15
  130. shotgun/tui/screens/chat_screen/history/partial_response.py +11 -1
  131. shotgun/tui/screens/chat_screen/history/user_question.py +25 -3
  132. shotgun/tui/screens/chat_screen/messages.py +219 -0
  133. shotgun/tui/screens/database_locked_dialog.py +219 -0
  134. shotgun/tui/screens/database_timeout_dialog.py +158 -0
  135. shotgun/tui/screens/kuzu_error_dialog.py +135 -0
  136. shotgun/tui/screens/model_picker.py +1 -3
  137. shotgun/tui/screens/models.py +11 -0
  138. shotgun/tui/state/processing_state.py +19 -0
  139. shotgun/tui/utils/mode_progress.py +20 -86
  140. shotgun/tui/widgets/__init__.py +2 -1
  141. shotgun/tui/widgets/approval_widget.py +152 -0
  142. shotgun/tui/widgets/cascade_confirmation_widget.py +203 -0
  143. shotgun/tui/widgets/plan_panel.py +129 -0
  144. shotgun/tui/widgets/step_checkpoint_widget.py +180 -0
  145. shotgun/tui/widgets/widget_coordinator.py +18 -0
  146. shotgun/utils/file_system_utils.py +4 -1
  147. {shotgun_sh-0.3.3.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/METADATA +88 -35
  148. shotgun_sh-0.6.2.dist-info/RECORD +291 -0
  149. shotgun/cli/export.py +0 -81
  150. shotgun/cli/plan.py +0 -73
  151. shotgun/cli/research.py +0 -93
  152. shotgun/cli/specify.py +0 -70
  153. shotgun/cli/tasks.py +0 -78
  154. shotgun/sentry_telemetry.py +0 -232
  155. shotgun/tui/screens/onboarding.py +0 -580
  156. shotgun_sh-0.3.3.dev1.dist-info/RECORD +0 -229
  157. {shotgun_sh-0.3.3.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/WHEEL +0 -0
  158. {shotgun_sh-0.3.3.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/entry_points.txt +0 -0
  159. {shotgun_sh-0.3.3.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,136 @@
1
+ """Markdown formatter for benchmark results.
2
+
3
+ This module provides the MarkdownFormatter class for displaying benchmark results
4
+ as GitHub-compatible markdown.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ if TYPE_CHECKING:
12
+ from shotgun.codebase.benchmarks.models import (
13
+ BenchmarkResults,
14
+ MetricsDisplayOptions,
15
+ )
16
+
17
+
18
+ class MarkdownFormatter:
19
+ """Format benchmark results as GitHub-compatible markdown."""
20
+
21
+ def format_results(
22
+ self,
23
+ results: BenchmarkResults,
24
+ options: MetricsDisplayOptions,
25
+ ) -> str:
26
+ """Format benchmark results as markdown.
27
+
28
+ Args:
29
+ results: Benchmark results to format
30
+ options: Display options
31
+
32
+ Returns:
33
+ Markdown string
34
+ """
35
+ lines = []
36
+
37
+ # Header
38
+ lines.append(f"# Indexing Benchmark: {results.codebase_name}")
39
+ lines.append("")
40
+ lines.append(f"**Path:** `{results.codebase_path}`")
41
+
42
+ mode = results.config.mode.capitalize()
43
+ worker_info = ""
44
+ if results.config.mode == "parallel":
45
+ worker_count = results.config.worker_count or "auto"
46
+ worker_info = f" ({worker_count} workers)"
47
+ lines.append(f"**Mode:** {mode}{worker_info}")
48
+ lines.append(
49
+ f"**Iterations:** {results.config.iterations} ({results.config.warmup_iterations} warmup)"
50
+ )
51
+ lines.append("")
52
+
53
+ # Summary statistics
54
+ lines.append("## Summary")
55
+ lines.append("")
56
+ lines.append("| Metric | Value |")
57
+ lines.append("|--------|-------|")
58
+
59
+ if results.config.iterations > 1:
60
+ lines.append(f"| Duration (avg) | {results.avg_duration_seconds:.2f}s |")
61
+ lines.append(f"| Duration (min) | {results.min_duration_seconds:.2f}s |")
62
+ lines.append(f"| Duration (max) | {results.max_duration_seconds:.2f}s |")
63
+ lines.append(f"| Duration (std dev) | {results.std_dev_seconds:.2f}s |")
64
+ else:
65
+ lines.append(f"| Duration | {results.avg_duration_seconds:.2f}s |")
66
+
67
+ lines.append(f"| Throughput | {results.avg_throughput:.1f} files/s |")
68
+ lines.append(f"| Peak Memory | {results.avg_memory_mb:.1f} MB |")
69
+
70
+ metrics = results.get_last_metrics()
71
+ if metrics:
72
+ lines.append(f"| Files Processed | {metrics.total_files:,} |")
73
+ lines.append(f"| Nodes Created | {metrics.total_nodes:,} |")
74
+ lines.append(f"| Relationships | {metrics.total_relationships:,} |")
75
+
76
+ if results.efficiency:
77
+ lines.append(
78
+ f"| Parallelism Efficiency | {results.efficiency * 100:.0f}% |"
79
+ )
80
+ if results.speedup_factor:
81
+ lines.append(f"| Speedup | {results.speedup_factor:.2f}x |")
82
+
83
+ lines.append("")
84
+
85
+ # Phase breakdown
86
+ if metrics and options.show_phase_metrics and not options.show_summary_only:
87
+ lines.append("## Phase Breakdown")
88
+ lines.append("")
89
+ lines.append("| Phase | Duration | Items | Throughput | Memory |")
90
+ lines.append("|-------|----------|-------|------------|--------|")
91
+
92
+ for name, phase in metrics.phase_metrics.items():
93
+ lines.append(
94
+ f"| {name} | {phase.duration_seconds:.2f}s | "
95
+ f"{phase.items_processed} | {phase.throughput:.1f}/s | "
96
+ f"{phase.memory_mb:.1f} MB |"
97
+ )
98
+
99
+ lines.append("")
100
+
101
+ # File metrics
102
+ if (
103
+ metrics
104
+ and options.show_file_metrics
105
+ and metrics.file_metrics
106
+ and not options.show_summary_only
107
+ ):
108
+ file_metrics = sorted(
109
+ metrics.file_metrics,
110
+ key=lambda f: f.parse_time_ms,
111
+ reverse=True,
112
+ )
113
+ if options.top_n_files:
114
+ file_metrics = file_metrics[: options.top_n_files]
115
+
116
+ if file_metrics:
117
+ title = "File Metrics"
118
+ if options.top_n_files:
119
+ title = f"Top {len(file_metrics)} Slowest Files"
120
+
121
+ lines.append(f"## {title}")
122
+ lines.append("")
123
+ lines.append("| File | Language | Size | Duration | Definitions |")
124
+ lines.append("|------|----------|------|----------|-------------|")
125
+
126
+ for f in file_metrics:
127
+ size_kb = f.file_size_bytes / 1024
128
+ lines.append(
129
+ f"| `{f.file_path}` | {f.language} | "
130
+ f"{size_kb:.1f} KB | {f.parse_time_ms:.1f}ms | "
131
+ f"{f.definitions_extracted} |"
132
+ )
133
+
134
+ lines.append("")
135
+
136
+ return "\n".join(lines)
@@ -0,0 +1,129 @@
1
+ """Pydantic models for benchmark system.
2
+
3
+ This module contains all data models used by the benchmark system.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import statistics
9
+ from enum import StrEnum
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+ from shotgun.codebase.core.metrics_types import IndexingMetrics
14
+
15
+
16
+ class BenchmarkMode(StrEnum):
17
+ """Execution mode for benchmarks."""
18
+
19
+ PARALLEL = "parallel"
20
+ SEQUENTIAL = "sequential"
21
+
22
+
23
+ class OutputFormat(StrEnum):
24
+ """Output format for benchmark results."""
25
+
26
+ JSON = "json"
27
+ MARKDOWN = "markdown"
28
+
29
+
30
+ class BenchmarkConfig(BaseModel):
31
+ """Configuration for benchmark execution."""
32
+
33
+ mode: BenchmarkMode = BenchmarkMode.PARALLEL
34
+ worker_count: int | None = None
35
+ iterations: int = 1
36
+ warmup_iterations: int = 0
37
+ collect_file_metrics: bool = True
38
+ collect_worker_metrics: bool = True
39
+
40
+
41
+ class BenchmarkRun(BaseModel):
42
+ """Results from a single benchmark run."""
43
+
44
+ run_id: int
45
+ is_warmup: bool
46
+ metrics: IndexingMetrics
47
+
48
+
49
+ class BenchmarkResults(BaseModel):
50
+ """Complete results from benchmark execution."""
51
+
52
+ codebase_name: str
53
+ codebase_path: str
54
+ config: BenchmarkConfig
55
+ warmup_runs: list[BenchmarkRun] = Field(default_factory=list)
56
+ measured_runs: list[BenchmarkRun] = Field(default_factory=list)
57
+
58
+ # Aggregate statistics (calculated after runs)
59
+ avg_duration_seconds: float = 0.0
60
+ min_duration_seconds: float = 0.0
61
+ max_duration_seconds: float = 0.0
62
+ std_dev_seconds: float = 0.0
63
+ avg_throughput: float = 0.0
64
+ avg_memory_mb: float = 0.0
65
+
66
+ # Comparison data
67
+ baseline_duration: float | None = None
68
+ speedup_factor: float | None = None
69
+ efficiency: float | None = None
70
+
71
+ def add_run(self, run: BenchmarkRun) -> None:
72
+ """Add a benchmark run to results.
73
+
74
+ Args:
75
+ run: Benchmark run to add
76
+ """
77
+ if run.is_warmup:
78
+ self.warmup_runs.append(run)
79
+ else:
80
+ self.measured_runs.append(run)
81
+
82
+ def calculate_statistics(self) -> None:
83
+ """Calculate aggregate statistics from measured runs."""
84
+ if not self.measured_runs:
85
+ return
86
+
87
+ durations = [r.metrics.total_duration_seconds for r in self.measured_runs]
88
+ throughputs = [r.metrics.avg_throughput for r in self.measured_runs]
89
+ memories = [r.metrics.peak_memory_mb for r in self.measured_runs]
90
+
91
+ self.avg_duration_seconds = statistics.mean(durations)
92
+ self.min_duration_seconds = min(durations)
93
+ self.max_duration_seconds = max(durations)
94
+ self.std_dev_seconds = (
95
+ statistics.stdev(durations) if len(durations) > 1 else 0.0
96
+ )
97
+ self.avg_throughput = statistics.mean(throughputs)
98
+ self.avg_memory_mb = statistics.mean(memories)
99
+
100
+ # Calculate efficiency if parallel mode with known worker count
101
+ if (
102
+ self.config.mode == BenchmarkMode.PARALLEL
103
+ and self.config.worker_count
104
+ and self.baseline_duration
105
+ ):
106
+ speedup = self.baseline_duration / self.avg_duration_seconds
107
+ self.speedup_factor = speedup
108
+ self.efficiency = speedup / self.config.worker_count
109
+
110
+ def get_last_metrics(self) -> IndexingMetrics | None:
111
+ """Get metrics from the last measured run.
112
+
113
+ Returns:
114
+ IndexingMetrics from last run, or None if no runs
115
+ """
116
+ if self.measured_runs:
117
+ return self.measured_runs[-1].metrics
118
+ return None
119
+
120
+
121
+ class MetricsDisplayOptions(BaseModel):
122
+ """Options for controlling metrics display."""
123
+
124
+ show_phase_metrics: bool = True
125
+ show_worker_metrics: bool = False
126
+ show_file_metrics: bool = False
127
+ show_summary_only: bool = False
128
+ top_n_files: int | None = None
129
+ min_file_duration_ms: float | None = None
@@ -5,6 +5,7 @@ from shotgun.codebase.core.code_retrieval import (
5
5
  retrieve_code_by_cypher,
6
6
  retrieve_code_by_qualified_name,
7
7
  )
8
+ from shotgun.codebase.core.gitignore import GitignoreManager, load_gitignore_for_repo
8
9
  from shotgun.codebase.core.ingestor import (
9
10
  CodebaseIngestor,
10
11
  Ingestor,
@@ -29,6 +30,9 @@ __all__ = [
29
30
  "Ingestor",
30
31
  "SimpleGraphBuilder",
31
32
  "CodebaseGraphManager",
33
+ # Gitignore support
34
+ "GitignoreManager",
35
+ "load_gitignore_for_repo",
32
36
  # Language configuration
33
37
  "LanguageConfig",
34
38
  "LANGUAGE_CONFIGS",
@@ -0,0 +1,91 @@
1
+ """Call resolution utilities for function/method call graph building.
2
+
3
+ This module provides shared utilities for resolving function calls
4
+ and calculating confidence scores for potential callee matches.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import Collection, Mapping
10
+
11
+
12
+ def calculate_callee_confidence(
13
+ caller_qn: str,
14
+ callee_qn: str,
15
+ module_qn: str,
16
+ object_name: str | None,
17
+ simple_name_lookup: Mapping[str, Collection[str]],
18
+ ) -> float:
19
+ """Calculate confidence score for a potential callee match.
20
+
21
+ Uses multiple heuristics to determine how likely a given callee
22
+ is the correct target of a function call:
23
+ 1. Module locality - functions in the same module are most likely
24
+ 2. Package locality - functions in the same package hierarchy
25
+ 3. Object/class match for method calls
26
+ 4. Standard library boost
27
+ 5. Name uniqueness boost
28
+
29
+ Args:
30
+ caller_qn: Qualified name of the calling function
31
+ callee_qn: Qualified name of the potential callee
32
+ module_qn: Qualified name of the current module
33
+ object_name: Object name for method calls (e.g., 'obj' in obj.method())
34
+ simple_name_lookup: Mapping from simple names to qualified names
35
+ (supports both set[str] and list[str] values)
36
+
37
+ Returns:
38
+ Confidence score between 0.0 and 1.0
39
+ """
40
+ score = 0.0
41
+
42
+ # 1. Module locality - functions in the same module are most likely
43
+ if callee_qn.startswith(module_qn + "."):
44
+ score += 0.5
45
+
46
+ # Even higher if in the same class
47
+ caller_parts = caller_qn.split(".")
48
+ callee_parts = callee_qn.split(".")
49
+ if len(caller_parts) >= 3 and len(callee_parts) >= 3:
50
+ if caller_parts[:-1] == callee_parts[:-1]: # Same class
51
+ score += 0.2
52
+
53
+ # 2. Package locality - functions in the same package hierarchy
54
+ elif "." in module_qn:
55
+ package = module_qn.rsplit(".", 1)[0]
56
+ if callee_qn.startswith(package + "."):
57
+ score += 0.3
58
+
59
+ # 3. Object/class match for method calls
60
+ if object_name:
61
+ # Check if callee is a method of a class matching the object name
62
+ callee_parts = callee_qn.split(".")
63
+ if len(callee_parts) >= 2:
64
+ # Simple heuristic: check if class name matches object name
65
+ # (In reality, we'd need type inference for accuracy)
66
+ class_name = callee_parts[-2]
67
+ if class_name.lower() == object_name.lower():
68
+ score += 0.3
69
+ elif object_name == "self" and callee_qn.startswith(
70
+ caller_qn.rsplit(".", 1)[0]
71
+ ):
72
+ # 'self' refers to the same class
73
+ score += 0.4
74
+
75
+ # 4. Standard library boost
76
+ # Give a small boost to standard library functions
77
+ if callee_qn.startswith(("builtins.", "typing.", "collections.")):
78
+ score += 0.1
79
+
80
+ # 5. Name uniqueness boost
81
+ # If function names are unique enough, boost confidence
82
+ callee_simple_name = callee_qn.split(".")[-1]
83
+ possible_matches = simple_name_lookup.get(callee_simple_name, [])
84
+ possible_count = len(possible_matches)
85
+ if possible_count == 1:
86
+ score += 0.2
87
+ elif possible_count <= 3:
88
+ score += 0.1
89
+
90
+ # Normalize to [0, 1]
91
+ return min(score, 1.0)
@@ -1,16 +1,21 @@
1
1
  """Change detection for incremental graph updates."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import hashlib
4
6
  import os
5
7
  from enum import Enum
6
8
  from pathlib import Path
7
- from typing import Any, cast
9
+ from typing import TYPE_CHECKING, Any, cast
8
10
 
9
11
  import aiofiles
10
- import kuzu
11
12
 
13
+ from shotgun.codebase.models import NodeLabel, RelationshipType
12
14
  from shotgun.logging_config import get_logger
13
15
 
16
+ if TYPE_CHECKING:
17
+ import real_ladybug as kuzu
18
+
14
19
  logger = get_logger(__name__)
15
20
 
16
21
 
@@ -332,10 +337,10 @@ class ChangeDetector:
332
337
 
333
338
  # Query each TRACKS relationship type
334
339
  for node_type, rel_type in [
335
- ("Module", "TRACKS_Module"),
336
- ("Class", "TRACKS_Class"),
337
- ("Function", "TRACKS_Function"),
338
- ("Method", "TRACKS_Method"),
340
+ (NodeLabel.MODULE, RelationshipType.TRACKS_MODULE),
341
+ (NodeLabel.CLASS, RelationshipType.TRACKS_CLASS),
342
+ (NodeLabel.FUNCTION, RelationshipType.TRACKS_FUNCTION),
343
+ (NodeLabel.METHOD, RelationshipType.TRACKS_METHOD),
339
344
  ]:
340
345
  try:
341
346
  result = self.conn.execute(
@@ -0,0 +1,159 @@
1
+ """Error classification for Kuzu database operations.
2
+
3
+ This module provides error classification for Kuzu database errors,
4
+ allowing the application to distinguish between different failure modes
5
+ (lock contention, corruption, permissions, etc.) and handle each appropriately.
6
+ """
7
+
8
+ from enum import StrEnum
9
+ from pathlib import Path
10
+
11
+ from pydantic import BaseModel
12
+
13
+
14
+ class KuzuErrorType(StrEnum):
15
+ """Classification of Kuzu database errors."""
16
+
17
+ LOCKED = "locked" # Another process has DB open
18
+ CORRUPTION = "corruption" # Database file is invalid/corrupted
19
+ PERMISSION = "permission" # Permission denied (transient)
20
+ MISSING = "missing" # File not found
21
+ SCHEMA = "schema" # Table doesn't exist (incomplete build)
22
+ TIMEOUT = "timeout" # Operation timed out
23
+ UNKNOWN = "unknown" # Unrecognized error
24
+
25
+
26
+ def classify_kuzu_error(exception: Exception) -> KuzuErrorType:
27
+ """Classify a Kuzu RuntimeError by its message pattern.
28
+
29
+ Note: Kuzu only throws generic RuntimeError exceptions with no error codes
30
+ or custom exception types. String matching on the error message is the only
31
+ way to distinguish between different failure modes.
32
+
33
+ Args:
34
+ exception: The exception to classify
35
+
36
+ Returns:
37
+ KuzuErrorType indicating the category of error
38
+ """
39
+ error_str = str(exception)
40
+
41
+ # Lock contention - another process has the database open
42
+ if "Could not set lock" in error_str:
43
+ return KuzuErrorType.LOCKED
44
+
45
+ # True corruption - database file is invalid
46
+ if "Unable to open database" in error_str:
47
+ return KuzuErrorType.CORRUPTION
48
+ if "Reading past the end of the file" in error_str:
49
+ return KuzuErrorType.CORRUPTION
50
+ if "not a valid" in error_str.lower() and "database" in error_str.lower():
51
+ return KuzuErrorType.CORRUPTION
52
+
53
+ # C++ internal errors - likely corruption
54
+ if "unordered_map" in error_str:
55
+ return KuzuErrorType.CORRUPTION
56
+ if "key not found" in error_str.lower():
57
+ return KuzuErrorType.CORRUPTION
58
+ if "std::exception" in error_str:
59
+ return KuzuErrorType.CORRUPTION
60
+
61
+ # Permission errors - transient, may resolve on retry
62
+ if "Permission denied" in error_str:
63
+ return KuzuErrorType.PERMISSION
64
+
65
+ # Missing file - nothing to delete
66
+ if "No such file or directory" in error_str:
67
+ return KuzuErrorType.MISSING
68
+
69
+ # Schema errors - incomplete build, table doesn't exist
70
+ if "Table" in error_str and "does not exist" in error_str:
71
+ return KuzuErrorType.SCHEMA
72
+ if "Binder exception" in error_str and "does not exist" in error_str:
73
+ return KuzuErrorType.SCHEMA
74
+
75
+ return KuzuErrorType.UNKNOWN
76
+
77
+
78
+ class DatabaseIssue(BaseModel):
79
+ """Structured information about a database issue.
80
+
81
+ Attributes:
82
+ graph_id: The ID of the affected graph
83
+ graph_path: Path to the database file
84
+ error_type: Classification of the error
85
+ message: Human-readable error message
86
+ """
87
+
88
+ model_config = {"arbitrary_types_allowed": True}
89
+
90
+ graph_id: str
91
+ graph_path: Path
92
+ error_type: KuzuErrorType
93
+ message: str
94
+
95
+
96
+ class KuzuDatabaseError(Exception):
97
+ """Base exception for Kuzu database errors with classification."""
98
+
99
+ def __init__(
100
+ self, message: str, graph_id: str, graph_path: str, error_type: KuzuErrorType
101
+ ) -> None:
102
+ super().__init__(message)
103
+ self.graph_id = graph_id
104
+ self.graph_path = graph_path
105
+ self.error_type = error_type
106
+
107
+
108
+ class DatabaseLockedError(KuzuDatabaseError):
109
+ """Raised when the database is locked by another process."""
110
+
111
+ def __init__(self, graph_id: str, graph_path: str) -> None:
112
+ super().__init__(
113
+ f"Database '{graph_id}' is locked by another process. "
114
+ "Only one shotgun instance can access a codebase at a time.",
115
+ graph_id=graph_id,
116
+ graph_path=graph_path,
117
+ error_type=KuzuErrorType.LOCKED,
118
+ )
119
+
120
+
121
+ class DatabaseCorruptedError(KuzuDatabaseError):
122
+ """Raised when the database is corrupted."""
123
+
124
+ def __init__(self, graph_id: str, graph_path: str, details: str = "") -> None:
125
+ message = f"Database '{graph_id}' is corrupted"
126
+ if details:
127
+ message += f": {details}"
128
+ super().__init__(
129
+ message,
130
+ graph_id=graph_id,
131
+ graph_path=graph_path,
132
+ error_type=KuzuErrorType.CORRUPTION,
133
+ )
134
+
135
+
136
+ class DatabaseSchemaError(KuzuDatabaseError):
137
+ """Raised when the database schema is incomplete (interrupted build)."""
138
+
139
+ def __init__(self, graph_id: str, graph_path: str) -> None:
140
+ super().__init__(
141
+ f"Database '{graph_id}' has incomplete schema (build was interrupted)",
142
+ graph_id=graph_id,
143
+ graph_path=graph_path,
144
+ error_type=KuzuErrorType.SCHEMA,
145
+ )
146
+
147
+
148
+ class DatabaseTimeoutError(KuzuDatabaseError):
149
+ """Raised when database operation times out."""
150
+
151
+ def __init__(self, graph_id: str, graph_path: str, timeout_seconds: float) -> None:
152
+ super().__init__(
153
+ f"Database '{graph_id}' operation timed out after {timeout_seconds}s. "
154
+ "This can happen with large codebases.",
155
+ graph_id=graph_id,
156
+ graph_path=graph_path,
157
+ error_type=KuzuErrorType.TIMEOUT,
158
+ )
159
+ self.timeout_seconds = timeout_seconds
@@ -0,0 +1,23 @@
1
+ """Language-specific AST extraction framework.
2
+
3
+ This module provides a Protocol-based architecture for extracting
4
+ definitions, relationships, and metadata from source code ASTs.
5
+
6
+ Usage:
7
+ from shotgun.codebase.core.extractors import get_extractor, SupportedLanguage
8
+
9
+ extractor = get_extractor(SupportedLanguage.PYTHON)
10
+ decorators = extractor.extract_decorators(node)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from .factory import get_extractor
16
+ from .protocol import LanguageExtractor
17
+ from .types import SupportedLanguage
18
+
19
+ __all__ = [
20
+ "SupportedLanguage",
21
+ "LanguageExtractor",
22
+ "get_extractor",
23
+ ]