shotgun-sh 0.4.0.dev1__py3-none-any.whl → 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shotgun/agents/agent_manager.py +307 -8
- shotgun/agents/cancellation.py +103 -0
- shotgun/agents/common.py +12 -0
- shotgun/agents/config/README.md +0 -1
- shotgun/agents/config/manager.py +10 -7
- shotgun/agents/config/models.py +5 -27
- shotgun/agents/config/provider.py +44 -27
- shotgun/agents/conversation/history/token_counting/base.py +51 -9
- shotgun/agents/file_read.py +176 -0
- shotgun/agents/messages.py +15 -3
- shotgun/agents/models.py +24 -1
- shotgun/agents/router/models.py +8 -0
- shotgun/agents/router/tools/delegation_tools.py +55 -1
- shotgun/agents/router/tools/plan_tools.py +88 -7
- shotgun/agents/runner.py +17 -2
- shotgun/agents/tools/__init__.py +8 -0
- shotgun/agents/tools/codebase/directory_lister.py +27 -39
- shotgun/agents/tools/codebase/file_read.py +26 -35
- shotgun/agents/tools/codebase/query_graph.py +9 -0
- shotgun/agents/tools/codebase/retrieve_code.py +9 -0
- shotgun/agents/tools/file_management.py +32 -2
- shotgun/agents/tools/file_read_tools/__init__.py +7 -0
- shotgun/agents/tools/file_read_tools/multimodal_file_read.py +167 -0
- shotgun/agents/tools/markdown_tools/__init__.py +62 -0
- shotgun/agents/tools/markdown_tools/insert_section.py +148 -0
- shotgun/agents/tools/markdown_tools/models.py +86 -0
- shotgun/agents/tools/markdown_tools/remove_section.py +114 -0
- shotgun/agents/tools/markdown_tools/replace_section.py +119 -0
- shotgun/agents/tools/markdown_tools/utils.py +453 -0
- shotgun/agents/tools/registry.py +44 -6
- shotgun/agents/tools/web_search/openai.py +42 -23
- shotgun/attachments/__init__.py +41 -0
- shotgun/attachments/errors.py +60 -0
- shotgun/attachments/models.py +107 -0
- shotgun/attachments/parser.py +257 -0
- shotgun/attachments/processor.py +193 -0
- shotgun/build_constants.py +4 -7
- shotgun/cli/clear.py +2 -2
- shotgun/cli/codebase/commands.py +181 -65
- shotgun/cli/compact.py +2 -2
- shotgun/cli/context.py +2 -2
- shotgun/cli/error_handler.py +2 -2
- shotgun/cli/run.py +90 -0
- shotgun/cli/spec/backup.py +2 -1
- shotgun/codebase/__init__.py +2 -0
- shotgun/codebase/benchmarks/__init__.py +35 -0
- shotgun/codebase/benchmarks/benchmark_runner.py +309 -0
- shotgun/codebase/benchmarks/exporters.py +119 -0
- shotgun/codebase/benchmarks/formatters/__init__.py +49 -0
- shotgun/codebase/benchmarks/formatters/base.py +34 -0
- shotgun/codebase/benchmarks/formatters/json_formatter.py +106 -0
- shotgun/codebase/benchmarks/formatters/markdown.py +136 -0
- shotgun/codebase/benchmarks/models.py +129 -0
- shotgun/codebase/core/__init__.py +4 -0
- shotgun/codebase/core/call_resolution.py +91 -0
- shotgun/codebase/core/change_detector.py +11 -6
- shotgun/codebase/core/errors.py +159 -0
- shotgun/codebase/core/extractors/__init__.py +23 -0
- shotgun/codebase/core/extractors/base.py +138 -0
- shotgun/codebase/core/extractors/factory.py +63 -0
- shotgun/codebase/core/extractors/go/__init__.py +7 -0
- shotgun/codebase/core/extractors/go/extractor.py +122 -0
- shotgun/codebase/core/extractors/javascript/__init__.py +7 -0
- shotgun/codebase/core/extractors/javascript/extractor.py +132 -0
- shotgun/codebase/core/extractors/protocol.py +109 -0
- shotgun/codebase/core/extractors/python/__init__.py +7 -0
- shotgun/codebase/core/extractors/python/extractor.py +141 -0
- shotgun/codebase/core/extractors/rust/__init__.py +7 -0
- shotgun/codebase/core/extractors/rust/extractor.py +139 -0
- shotgun/codebase/core/extractors/types.py +15 -0
- shotgun/codebase/core/extractors/typescript/__init__.py +7 -0
- shotgun/codebase/core/extractors/typescript/extractor.py +92 -0
- shotgun/codebase/core/gitignore.py +252 -0
- shotgun/codebase/core/ingestor.py +644 -354
- shotgun/codebase/core/kuzu_compat.py +119 -0
- shotgun/codebase/core/language_config.py +239 -0
- shotgun/codebase/core/manager.py +256 -46
- shotgun/codebase/core/metrics_collector.py +310 -0
- shotgun/codebase/core/metrics_types.py +347 -0
- shotgun/codebase/core/parallel_executor.py +424 -0
- shotgun/codebase/core/work_distributor.py +254 -0
- shotgun/codebase/core/worker.py +768 -0
- shotgun/codebase/indexing_state.py +86 -0
- shotgun/codebase/models.py +94 -0
- shotgun/codebase/service.py +13 -0
- shotgun/exceptions.py +9 -9
- shotgun/main.py +3 -16
- shotgun/posthog_telemetry.py +165 -24
- shotgun/prompts/agents/file_read.j2 +48 -0
- shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +19 -47
- shotgun/prompts/agents/partials/content_formatting.j2 +12 -33
- shotgun/prompts/agents/partials/interactive_mode.j2 +9 -32
- shotgun/prompts/agents/partials/router_delegation_mode.j2 +21 -22
- shotgun/prompts/agents/plan.j2 +14 -0
- shotgun/prompts/agents/router.j2 +531 -258
- shotgun/prompts/agents/specify.j2 +14 -0
- shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +14 -1
- shotgun/prompts/agents/state/system_state.j2 +13 -11
- shotgun/prompts/agents/tasks.j2 +14 -0
- shotgun/settings.py +49 -10
- shotgun/tui/app.py +149 -18
- shotgun/tui/commands/__init__.py +9 -1
- shotgun/tui/components/attachment_bar.py +87 -0
- shotgun/tui/components/prompt_input.py +25 -28
- shotgun/tui/components/status_bar.py +14 -7
- shotgun/tui/dependencies.py +3 -8
- shotgun/tui/protocols.py +18 -0
- shotgun/tui/screens/chat/chat.tcss +15 -0
- shotgun/tui/screens/chat/chat_screen.py +766 -235
- shotgun/tui/screens/chat/codebase_index_prompt_screen.py +8 -4
- shotgun/tui/screens/chat_screen/attachment_hint.py +40 -0
- shotgun/tui/screens/chat_screen/command_providers.py +0 -10
- shotgun/tui/screens/chat_screen/history/chat_history.py +54 -14
- shotgun/tui/screens/chat_screen/history/formatters.py +22 -0
- shotgun/tui/screens/chat_screen/history/user_question.py +25 -3
- shotgun/tui/screens/database_locked_dialog.py +219 -0
- shotgun/tui/screens/database_timeout_dialog.py +158 -0
- shotgun/tui/screens/kuzu_error_dialog.py +135 -0
- shotgun/tui/screens/model_picker.py +1 -3
- shotgun/tui/screens/models.py +11 -0
- shotgun/tui/state/processing_state.py +19 -0
- shotgun/tui/widgets/widget_coordinator.py +18 -0
- shotgun/utils/file_system_utils.py +4 -1
- {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/METADATA +87 -34
- {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/RECORD +128 -79
- shotgun/cli/export.py +0 -81
- shotgun/cli/plan.py +0 -73
- shotgun/cli/research.py +0 -93
- shotgun/cli/specify.py +0 -70
- shotgun/cli/tasks.py +0 -78
- shotgun/sentry_telemetry.py +0 -232
- shotgun/tui/screens/onboarding.py +0 -584
- {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/WHEEL +0 -0
- {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/entry_points.txt +0 -0
- {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Markdown formatter for benchmark results.
|
|
2
|
+
|
|
3
|
+
This module provides the MarkdownFormatter class for displaying benchmark results
|
|
4
|
+
as GitHub-compatible markdown.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from shotgun.codebase.benchmarks.models import (
|
|
13
|
+
BenchmarkResults,
|
|
14
|
+
MetricsDisplayOptions,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MarkdownFormatter:
|
|
19
|
+
"""Format benchmark results as GitHub-compatible markdown."""
|
|
20
|
+
|
|
21
|
+
def format_results(
|
|
22
|
+
self,
|
|
23
|
+
results: BenchmarkResults,
|
|
24
|
+
options: MetricsDisplayOptions,
|
|
25
|
+
) -> str:
|
|
26
|
+
"""Format benchmark results as markdown.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
results: Benchmark results to format
|
|
30
|
+
options: Display options
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Markdown string
|
|
34
|
+
"""
|
|
35
|
+
lines = []
|
|
36
|
+
|
|
37
|
+
# Header
|
|
38
|
+
lines.append(f"# Indexing Benchmark: {results.codebase_name}")
|
|
39
|
+
lines.append("")
|
|
40
|
+
lines.append(f"**Path:** `{results.codebase_path}`")
|
|
41
|
+
|
|
42
|
+
mode = results.config.mode.capitalize()
|
|
43
|
+
worker_info = ""
|
|
44
|
+
if results.config.mode == "parallel":
|
|
45
|
+
worker_count = results.config.worker_count or "auto"
|
|
46
|
+
worker_info = f" ({worker_count} workers)"
|
|
47
|
+
lines.append(f"**Mode:** {mode}{worker_info}")
|
|
48
|
+
lines.append(
|
|
49
|
+
f"**Iterations:** {results.config.iterations} ({results.config.warmup_iterations} warmup)"
|
|
50
|
+
)
|
|
51
|
+
lines.append("")
|
|
52
|
+
|
|
53
|
+
# Summary statistics
|
|
54
|
+
lines.append("## Summary")
|
|
55
|
+
lines.append("")
|
|
56
|
+
lines.append("| Metric | Value |")
|
|
57
|
+
lines.append("|--------|-------|")
|
|
58
|
+
|
|
59
|
+
if results.config.iterations > 1:
|
|
60
|
+
lines.append(f"| Duration (avg) | {results.avg_duration_seconds:.2f}s |")
|
|
61
|
+
lines.append(f"| Duration (min) | {results.min_duration_seconds:.2f}s |")
|
|
62
|
+
lines.append(f"| Duration (max) | {results.max_duration_seconds:.2f}s |")
|
|
63
|
+
lines.append(f"| Duration (std dev) | {results.std_dev_seconds:.2f}s |")
|
|
64
|
+
else:
|
|
65
|
+
lines.append(f"| Duration | {results.avg_duration_seconds:.2f}s |")
|
|
66
|
+
|
|
67
|
+
lines.append(f"| Throughput | {results.avg_throughput:.1f} files/s |")
|
|
68
|
+
lines.append(f"| Peak Memory | {results.avg_memory_mb:.1f} MB |")
|
|
69
|
+
|
|
70
|
+
metrics = results.get_last_metrics()
|
|
71
|
+
if metrics:
|
|
72
|
+
lines.append(f"| Files Processed | {metrics.total_files:,} |")
|
|
73
|
+
lines.append(f"| Nodes Created | {metrics.total_nodes:,} |")
|
|
74
|
+
lines.append(f"| Relationships | {metrics.total_relationships:,} |")
|
|
75
|
+
|
|
76
|
+
if results.efficiency:
|
|
77
|
+
lines.append(
|
|
78
|
+
f"| Parallelism Efficiency | {results.efficiency * 100:.0f}% |"
|
|
79
|
+
)
|
|
80
|
+
if results.speedup_factor:
|
|
81
|
+
lines.append(f"| Speedup | {results.speedup_factor:.2f}x |")
|
|
82
|
+
|
|
83
|
+
lines.append("")
|
|
84
|
+
|
|
85
|
+
# Phase breakdown
|
|
86
|
+
if metrics and options.show_phase_metrics and not options.show_summary_only:
|
|
87
|
+
lines.append("## Phase Breakdown")
|
|
88
|
+
lines.append("")
|
|
89
|
+
lines.append("| Phase | Duration | Items | Throughput | Memory |")
|
|
90
|
+
lines.append("|-------|----------|-------|------------|--------|")
|
|
91
|
+
|
|
92
|
+
for name, phase in metrics.phase_metrics.items():
|
|
93
|
+
lines.append(
|
|
94
|
+
f"| {name} | {phase.duration_seconds:.2f}s | "
|
|
95
|
+
f"{phase.items_processed} | {phase.throughput:.1f}/s | "
|
|
96
|
+
f"{phase.memory_mb:.1f} MB |"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
lines.append("")
|
|
100
|
+
|
|
101
|
+
# File metrics
|
|
102
|
+
if (
|
|
103
|
+
metrics
|
|
104
|
+
and options.show_file_metrics
|
|
105
|
+
and metrics.file_metrics
|
|
106
|
+
and not options.show_summary_only
|
|
107
|
+
):
|
|
108
|
+
file_metrics = sorted(
|
|
109
|
+
metrics.file_metrics,
|
|
110
|
+
key=lambda f: f.parse_time_ms,
|
|
111
|
+
reverse=True,
|
|
112
|
+
)
|
|
113
|
+
if options.top_n_files:
|
|
114
|
+
file_metrics = file_metrics[: options.top_n_files]
|
|
115
|
+
|
|
116
|
+
if file_metrics:
|
|
117
|
+
title = "File Metrics"
|
|
118
|
+
if options.top_n_files:
|
|
119
|
+
title = f"Top {len(file_metrics)} Slowest Files"
|
|
120
|
+
|
|
121
|
+
lines.append(f"## {title}")
|
|
122
|
+
lines.append("")
|
|
123
|
+
lines.append("| File | Language | Size | Duration | Definitions |")
|
|
124
|
+
lines.append("|------|----------|------|----------|-------------|")
|
|
125
|
+
|
|
126
|
+
for f in file_metrics:
|
|
127
|
+
size_kb = f.file_size_bytes / 1024
|
|
128
|
+
lines.append(
|
|
129
|
+
f"| `{f.file_path}` | {f.language} | "
|
|
130
|
+
f"{size_kb:.1f} KB | {f.parse_time_ms:.1f}ms | "
|
|
131
|
+
f"{f.definitions_extracted} |"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
lines.append("")
|
|
135
|
+
|
|
136
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Pydantic models for benchmark system.
|
|
2
|
+
|
|
3
|
+
This module contains all data models used by the benchmark system.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import statistics
|
|
9
|
+
from enum import StrEnum
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
from shotgun.codebase.core.metrics_types import IndexingMetrics
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BenchmarkMode(StrEnum):
|
|
17
|
+
"""Execution mode for benchmarks."""
|
|
18
|
+
|
|
19
|
+
PARALLEL = "parallel"
|
|
20
|
+
SEQUENTIAL = "sequential"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class OutputFormat(StrEnum):
|
|
24
|
+
"""Output format for benchmark results."""
|
|
25
|
+
|
|
26
|
+
JSON = "json"
|
|
27
|
+
MARKDOWN = "markdown"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class BenchmarkConfig(BaseModel):
|
|
31
|
+
"""Configuration for benchmark execution."""
|
|
32
|
+
|
|
33
|
+
mode: BenchmarkMode = BenchmarkMode.PARALLEL
|
|
34
|
+
worker_count: int | None = None
|
|
35
|
+
iterations: int = 1
|
|
36
|
+
warmup_iterations: int = 0
|
|
37
|
+
collect_file_metrics: bool = True
|
|
38
|
+
collect_worker_metrics: bool = True
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class BenchmarkRun(BaseModel):
|
|
42
|
+
"""Results from a single benchmark run."""
|
|
43
|
+
|
|
44
|
+
run_id: int
|
|
45
|
+
is_warmup: bool
|
|
46
|
+
metrics: IndexingMetrics
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BenchmarkResults(BaseModel):
|
|
50
|
+
"""Complete results from benchmark execution."""
|
|
51
|
+
|
|
52
|
+
codebase_name: str
|
|
53
|
+
codebase_path: str
|
|
54
|
+
config: BenchmarkConfig
|
|
55
|
+
warmup_runs: list[BenchmarkRun] = Field(default_factory=list)
|
|
56
|
+
measured_runs: list[BenchmarkRun] = Field(default_factory=list)
|
|
57
|
+
|
|
58
|
+
# Aggregate statistics (calculated after runs)
|
|
59
|
+
avg_duration_seconds: float = 0.0
|
|
60
|
+
min_duration_seconds: float = 0.0
|
|
61
|
+
max_duration_seconds: float = 0.0
|
|
62
|
+
std_dev_seconds: float = 0.0
|
|
63
|
+
avg_throughput: float = 0.0
|
|
64
|
+
avg_memory_mb: float = 0.0
|
|
65
|
+
|
|
66
|
+
# Comparison data
|
|
67
|
+
baseline_duration: float | None = None
|
|
68
|
+
speedup_factor: float | None = None
|
|
69
|
+
efficiency: float | None = None
|
|
70
|
+
|
|
71
|
+
def add_run(self, run: BenchmarkRun) -> None:
|
|
72
|
+
"""Add a benchmark run to results.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
run: Benchmark run to add
|
|
76
|
+
"""
|
|
77
|
+
if run.is_warmup:
|
|
78
|
+
self.warmup_runs.append(run)
|
|
79
|
+
else:
|
|
80
|
+
self.measured_runs.append(run)
|
|
81
|
+
|
|
82
|
+
def calculate_statistics(self) -> None:
|
|
83
|
+
"""Calculate aggregate statistics from measured runs."""
|
|
84
|
+
if not self.measured_runs:
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
durations = [r.metrics.total_duration_seconds for r in self.measured_runs]
|
|
88
|
+
throughputs = [r.metrics.avg_throughput for r in self.measured_runs]
|
|
89
|
+
memories = [r.metrics.peak_memory_mb for r in self.measured_runs]
|
|
90
|
+
|
|
91
|
+
self.avg_duration_seconds = statistics.mean(durations)
|
|
92
|
+
self.min_duration_seconds = min(durations)
|
|
93
|
+
self.max_duration_seconds = max(durations)
|
|
94
|
+
self.std_dev_seconds = (
|
|
95
|
+
statistics.stdev(durations) if len(durations) > 1 else 0.0
|
|
96
|
+
)
|
|
97
|
+
self.avg_throughput = statistics.mean(throughputs)
|
|
98
|
+
self.avg_memory_mb = statistics.mean(memories)
|
|
99
|
+
|
|
100
|
+
# Calculate efficiency if parallel mode with known worker count
|
|
101
|
+
if (
|
|
102
|
+
self.config.mode == BenchmarkMode.PARALLEL
|
|
103
|
+
and self.config.worker_count
|
|
104
|
+
and self.baseline_duration
|
|
105
|
+
):
|
|
106
|
+
speedup = self.baseline_duration / self.avg_duration_seconds
|
|
107
|
+
self.speedup_factor = speedup
|
|
108
|
+
self.efficiency = speedup / self.config.worker_count
|
|
109
|
+
|
|
110
|
+
def get_last_metrics(self) -> IndexingMetrics | None:
|
|
111
|
+
"""Get metrics from the last measured run.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
IndexingMetrics from last run, or None if no runs
|
|
115
|
+
"""
|
|
116
|
+
if self.measured_runs:
|
|
117
|
+
return self.measured_runs[-1].metrics
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class MetricsDisplayOptions(BaseModel):
|
|
122
|
+
"""Options for controlling metrics display."""
|
|
123
|
+
|
|
124
|
+
show_phase_metrics: bool = True
|
|
125
|
+
show_worker_metrics: bool = False
|
|
126
|
+
show_file_metrics: bool = False
|
|
127
|
+
show_summary_only: bool = False
|
|
128
|
+
top_n_files: int | None = None
|
|
129
|
+
min_file_duration_ms: float | None = None
|
|
@@ -5,6 +5,7 @@ from shotgun.codebase.core.code_retrieval import (
|
|
|
5
5
|
retrieve_code_by_cypher,
|
|
6
6
|
retrieve_code_by_qualified_name,
|
|
7
7
|
)
|
|
8
|
+
from shotgun.codebase.core.gitignore import GitignoreManager, load_gitignore_for_repo
|
|
8
9
|
from shotgun.codebase.core.ingestor import (
|
|
9
10
|
CodebaseIngestor,
|
|
10
11
|
Ingestor,
|
|
@@ -29,6 +30,9 @@ __all__ = [
|
|
|
29
30
|
"Ingestor",
|
|
30
31
|
"SimpleGraphBuilder",
|
|
31
32
|
"CodebaseGraphManager",
|
|
33
|
+
# Gitignore support
|
|
34
|
+
"GitignoreManager",
|
|
35
|
+
"load_gitignore_for_repo",
|
|
32
36
|
# Language configuration
|
|
33
37
|
"LanguageConfig",
|
|
34
38
|
"LANGUAGE_CONFIGS",
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Call resolution utilities for function/method call graph building.
|
|
2
|
+
|
|
3
|
+
This module provides shared utilities for resolving function calls
|
|
4
|
+
and calculating confidence scores for potential callee matches.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Collection, Mapping
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def calculate_callee_confidence(
|
|
13
|
+
caller_qn: str,
|
|
14
|
+
callee_qn: str,
|
|
15
|
+
module_qn: str,
|
|
16
|
+
object_name: str | None,
|
|
17
|
+
simple_name_lookup: Mapping[str, Collection[str]],
|
|
18
|
+
) -> float:
|
|
19
|
+
"""Calculate confidence score for a potential callee match.
|
|
20
|
+
|
|
21
|
+
Uses multiple heuristics to determine how likely a given callee
|
|
22
|
+
is the correct target of a function call:
|
|
23
|
+
1. Module locality - functions in the same module are most likely
|
|
24
|
+
2. Package locality - functions in the same package hierarchy
|
|
25
|
+
3. Object/class match for method calls
|
|
26
|
+
4. Standard library boost
|
|
27
|
+
5. Name uniqueness boost
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
caller_qn: Qualified name of the calling function
|
|
31
|
+
callee_qn: Qualified name of the potential callee
|
|
32
|
+
module_qn: Qualified name of the current module
|
|
33
|
+
object_name: Object name for method calls (e.g., 'obj' in obj.method())
|
|
34
|
+
simple_name_lookup: Mapping from simple names to qualified names
|
|
35
|
+
(supports both set[str] and list[str] values)
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Confidence score between 0.0 and 1.0
|
|
39
|
+
"""
|
|
40
|
+
score = 0.0
|
|
41
|
+
|
|
42
|
+
# 1. Module locality - functions in the same module are most likely
|
|
43
|
+
if callee_qn.startswith(module_qn + "."):
|
|
44
|
+
score += 0.5
|
|
45
|
+
|
|
46
|
+
# Even higher if in the same class
|
|
47
|
+
caller_parts = caller_qn.split(".")
|
|
48
|
+
callee_parts = callee_qn.split(".")
|
|
49
|
+
if len(caller_parts) >= 3 and len(callee_parts) >= 3:
|
|
50
|
+
if caller_parts[:-1] == callee_parts[:-1]: # Same class
|
|
51
|
+
score += 0.2
|
|
52
|
+
|
|
53
|
+
# 2. Package locality - functions in the same package hierarchy
|
|
54
|
+
elif "." in module_qn:
|
|
55
|
+
package = module_qn.rsplit(".", 1)[0]
|
|
56
|
+
if callee_qn.startswith(package + "."):
|
|
57
|
+
score += 0.3
|
|
58
|
+
|
|
59
|
+
# 3. Object/class match for method calls
|
|
60
|
+
if object_name:
|
|
61
|
+
# Check if callee is a method of a class matching the object name
|
|
62
|
+
callee_parts = callee_qn.split(".")
|
|
63
|
+
if len(callee_parts) >= 2:
|
|
64
|
+
# Simple heuristic: check if class name matches object name
|
|
65
|
+
# (In reality, we'd need type inference for accuracy)
|
|
66
|
+
class_name = callee_parts[-2]
|
|
67
|
+
if class_name.lower() == object_name.lower():
|
|
68
|
+
score += 0.3
|
|
69
|
+
elif object_name == "self" and callee_qn.startswith(
|
|
70
|
+
caller_qn.rsplit(".", 1)[0]
|
|
71
|
+
):
|
|
72
|
+
# 'self' refers to the same class
|
|
73
|
+
score += 0.4
|
|
74
|
+
|
|
75
|
+
# 4. Standard library boost
|
|
76
|
+
# Give a small boost to standard library functions
|
|
77
|
+
if callee_qn.startswith(("builtins.", "typing.", "collections.")):
|
|
78
|
+
score += 0.1
|
|
79
|
+
|
|
80
|
+
# 5. Name uniqueness boost
|
|
81
|
+
# If function names are unique enough, boost confidence
|
|
82
|
+
callee_simple_name = callee_qn.split(".")[-1]
|
|
83
|
+
possible_matches = simple_name_lookup.get(callee_simple_name, [])
|
|
84
|
+
possible_count = len(possible_matches)
|
|
85
|
+
if possible_count == 1:
|
|
86
|
+
score += 0.2
|
|
87
|
+
elif possible_count <= 3:
|
|
88
|
+
score += 0.1
|
|
89
|
+
|
|
90
|
+
# Normalize to [0, 1]
|
|
91
|
+
return min(score, 1.0)
|
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
"""Change detection for incremental graph updates."""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import hashlib
|
|
4
6
|
import os
|
|
5
7
|
from enum import Enum
|
|
6
8
|
from pathlib import Path
|
|
7
|
-
from typing import Any, cast
|
|
9
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
8
10
|
|
|
9
11
|
import aiofiles
|
|
10
|
-
import real_ladybug as kuzu
|
|
11
12
|
|
|
13
|
+
from shotgun.codebase.models import NodeLabel, RelationshipType
|
|
12
14
|
from shotgun.logging_config import get_logger
|
|
13
15
|
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
import real_ladybug as kuzu
|
|
18
|
+
|
|
14
19
|
logger = get_logger(__name__)
|
|
15
20
|
|
|
16
21
|
|
|
@@ -332,10 +337,10 @@ class ChangeDetector:
|
|
|
332
337
|
|
|
333
338
|
# Query each TRACKS relationship type
|
|
334
339
|
for node_type, rel_type in [
|
|
335
|
-
(
|
|
336
|
-
(
|
|
337
|
-
(
|
|
338
|
-
(
|
|
340
|
+
(NodeLabel.MODULE, RelationshipType.TRACKS_MODULE),
|
|
341
|
+
(NodeLabel.CLASS, RelationshipType.TRACKS_CLASS),
|
|
342
|
+
(NodeLabel.FUNCTION, RelationshipType.TRACKS_FUNCTION),
|
|
343
|
+
(NodeLabel.METHOD, RelationshipType.TRACKS_METHOD),
|
|
339
344
|
]:
|
|
340
345
|
try:
|
|
341
346
|
result = self.conn.execute(
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Error classification for Kuzu database operations.
|
|
2
|
+
|
|
3
|
+
This module provides error classification for Kuzu database errors,
|
|
4
|
+
allowing the application to distinguish between different failure modes
|
|
5
|
+
(lock contention, corruption, permissions, etc.) and handle each appropriately.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from enum import StrEnum
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class KuzuErrorType(StrEnum):
|
|
15
|
+
"""Classification of Kuzu database errors."""
|
|
16
|
+
|
|
17
|
+
LOCKED = "locked" # Another process has DB open
|
|
18
|
+
CORRUPTION = "corruption" # Database file is invalid/corrupted
|
|
19
|
+
PERMISSION = "permission" # Permission denied (transient)
|
|
20
|
+
MISSING = "missing" # File not found
|
|
21
|
+
SCHEMA = "schema" # Table doesn't exist (incomplete build)
|
|
22
|
+
TIMEOUT = "timeout" # Operation timed out
|
|
23
|
+
UNKNOWN = "unknown" # Unrecognized error
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def classify_kuzu_error(exception: Exception) -> KuzuErrorType:
|
|
27
|
+
"""Classify a Kuzu RuntimeError by its message pattern.
|
|
28
|
+
|
|
29
|
+
Note: Kuzu only throws generic RuntimeError exceptions with no error codes
|
|
30
|
+
or custom exception types. String matching on the error message is the only
|
|
31
|
+
way to distinguish between different failure modes.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
exception: The exception to classify
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
KuzuErrorType indicating the category of error
|
|
38
|
+
"""
|
|
39
|
+
error_str = str(exception)
|
|
40
|
+
|
|
41
|
+
# Lock contention - another process has the database open
|
|
42
|
+
if "Could not set lock" in error_str:
|
|
43
|
+
return KuzuErrorType.LOCKED
|
|
44
|
+
|
|
45
|
+
# True corruption - database file is invalid
|
|
46
|
+
if "Unable to open database" in error_str:
|
|
47
|
+
return KuzuErrorType.CORRUPTION
|
|
48
|
+
if "Reading past the end of the file" in error_str:
|
|
49
|
+
return KuzuErrorType.CORRUPTION
|
|
50
|
+
if "not a valid" in error_str.lower() and "database" in error_str.lower():
|
|
51
|
+
return KuzuErrorType.CORRUPTION
|
|
52
|
+
|
|
53
|
+
# C++ internal errors - likely corruption
|
|
54
|
+
if "unordered_map" in error_str:
|
|
55
|
+
return KuzuErrorType.CORRUPTION
|
|
56
|
+
if "key not found" in error_str.lower():
|
|
57
|
+
return KuzuErrorType.CORRUPTION
|
|
58
|
+
if "std::exception" in error_str:
|
|
59
|
+
return KuzuErrorType.CORRUPTION
|
|
60
|
+
|
|
61
|
+
# Permission errors - transient, may resolve on retry
|
|
62
|
+
if "Permission denied" in error_str:
|
|
63
|
+
return KuzuErrorType.PERMISSION
|
|
64
|
+
|
|
65
|
+
# Missing file - nothing to delete
|
|
66
|
+
if "No such file or directory" in error_str:
|
|
67
|
+
return KuzuErrorType.MISSING
|
|
68
|
+
|
|
69
|
+
# Schema errors - incomplete build, table doesn't exist
|
|
70
|
+
if "Table" in error_str and "does not exist" in error_str:
|
|
71
|
+
return KuzuErrorType.SCHEMA
|
|
72
|
+
if "Binder exception" in error_str and "does not exist" in error_str:
|
|
73
|
+
return KuzuErrorType.SCHEMA
|
|
74
|
+
|
|
75
|
+
return KuzuErrorType.UNKNOWN
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class DatabaseIssue(BaseModel):
|
|
79
|
+
"""Structured information about a database issue.
|
|
80
|
+
|
|
81
|
+
Attributes:
|
|
82
|
+
graph_id: The ID of the affected graph
|
|
83
|
+
graph_path: Path to the database file
|
|
84
|
+
error_type: Classification of the error
|
|
85
|
+
message: Human-readable error message
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
model_config = {"arbitrary_types_allowed": True}
|
|
89
|
+
|
|
90
|
+
graph_id: str
|
|
91
|
+
graph_path: Path
|
|
92
|
+
error_type: KuzuErrorType
|
|
93
|
+
message: str
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class KuzuDatabaseError(Exception):
|
|
97
|
+
"""Base exception for Kuzu database errors with classification."""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self, message: str, graph_id: str, graph_path: str, error_type: KuzuErrorType
|
|
101
|
+
) -> None:
|
|
102
|
+
super().__init__(message)
|
|
103
|
+
self.graph_id = graph_id
|
|
104
|
+
self.graph_path = graph_path
|
|
105
|
+
self.error_type = error_type
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class DatabaseLockedError(KuzuDatabaseError):
|
|
109
|
+
"""Raised when the database is locked by another process."""
|
|
110
|
+
|
|
111
|
+
def __init__(self, graph_id: str, graph_path: str) -> None:
|
|
112
|
+
super().__init__(
|
|
113
|
+
f"Database '{graph_id}' is locked by another process. "
|
|
114
|
+
"Only one shotgun instance can access a codebase at a time.",
|
|
115
|
+
graph_id=graph_id,
|
|
116
|
+
graph_path=graph_path,
|
|
117
|
+
error_type=KuzuErrorType.LOCKED,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class DatabaseCorruptedError(KuzuDatabaseError):
|
|
122
|
+
"""Raised when the database is corrupted."""
|
|
123
|
+
|
|
124
|
+
def __init__(self, graph_id: str, graph_path: str, details: str = "") -> None:
|
|
125
|
+
message = f"Database '{graph_id}' is corrupted"
|
|
126
|
+
if details:
|
|
127
|
+
message += f": {details}"
|
|
128
|
+
super().__init__(
|
|
129
|
+
message,
|
|
130
|
+
graph_id=graph_id,
|
|
131
|
+
graph_path=graph_path,
|
|
132
|
+
error_type=KuzuErrorType.CORRUPTION,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class DatabaseSchemaError(KuzuDatabaseError):
|
|
137
|
+
"""Raised when the database schema is incomplete (interrupted build)."""
|
|
138
|
+
|
|
139
|
+
def __init__(self, graph_id: str, graph_path: str) -> None:
|
|
140
|
+
super().__init__(
|
|
141
|
+
f"Database '{graph_id}' has incomplete schema (build was interrupted)",
|
|
142
|
+
graph_id=graph_id,
|
|
143
|
+
graph_path=graph_path,
|
|
144
|
+
error_type=KuzuErrorType.SCHEMA,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class DatabaseTimeoutError(KuzuDatabaseError):
|
|
149
|
+
"""Raised when database operation times out."""
|
|
150
|
+
|
|
151
|
+
def __init__(self, graph_id: str, graph_path: str, timeout_seconds: float) -> None:
|
|
152
|
+
super().__init__(
|
|
153
|
+
f"Database '{graph_id}' operation timed out after {timeout_seconds}s. "
|
|
154
|
+
"This can happen with large codebases.",
|
|
155
|
+
graph_id=graph_id,
|
|
156
|
+
graph_path=graph_path,
|
|
157
|
+
error_type=KuzuErrorType.TIMEOUT,
|
|
158
|
+
)
|
|
159
|
+
self.timeout_seconds = timeout_seconds
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Language-specific AST extraction framework.
|
|
2
|
+
|
|
3
|
+
This module provides a Protocol-based architecture for extracting
|
|
4
|
+
definitions, relationships, and metadata from source code ASTs.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from shotgun.codebase.core.extractors import get_extractor, SupportedLanguage
|
|
8
|
+
|
|
9
|
+
extractor = get_extractor(SupportedLanguage.PYTHON)
|
|
10
|
+
decorators = extractor.extract_decorators(node)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from .factory import get_extractor
|
|
16
|
+
from .protocol import LanguageExtractor
|
|
17
|
+
from .types import SupportedLanguage
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"SupportedLanguage",
|
|
21
|
+
"LanguageExtractor",
|
|
22
|
+
"get_extractor",
|
|
23
|
+
]
|