codespy-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. codespy/__init__.py +3 -0
  2. codespy/agents/__init__.py +21 -0
  3. codespy/agents/cost_tracker.py +255 -0
  4. codespy/agents/dspy_config.py +158 -0
  5. codespy/agents/reviewer/__init__.py +17 -0
  6. codespy/agents/reviewer/models.py +338 -0
  7. codespy/agents/reviewer/modules/__init__.py +17 -0
  8. codespy/agents/reviewer/modules/bug_detector.py +197 -0
  9. codespy/agents/reviewer/modules/deduplicator.py +105 -0
  10. codespy/agents/reviewer/modules/doc_reviewer.py +195 -0
  11. codespy/agents/reviewer/modules/domain_expert.py +246 -0
  12. codespy/agents/reviewer/modules/helpers.py +119 -0
  13. codespy/agents/reviewer/modules/scope_identifier.py +330 -0
  14. codespy/agents/reviewer/modules/security_auditor.py +355 -0
  15. codespy/agents/reviewer/reporters/__init__.py +11 -0
  16. codespy/agents/reviewer/reporters/base.py +18 -0
  17. codespy/agents/reviewer/reporters/github_pr.py +304 -0
  18. codespy/agents/reviewer/reporters/stdout.py +38 -0
  19. codespy/agents/reviewer/reviewer.py +202 -0
  20. codespy/cli.py +241 -0
  21. codespy/config.py +361 -0
  22. codespy/config_dspy.py +103 -0
  23. codespy/config_git.py +93 -0
  24. codespy/config_io.py +40 -0
  25. codespy/config_llm.py +271 -0
  26. codespy/tools/__init__.py +24 -0
  27. codespy/tools/cyber/__init__.py +17 -0
  28. codespy/tools/cyber/osv/__init__.py +54 -0
  29. codespy/tools/cyber/osv/client.py +451 -0
  30. codespy/tools/cyber/osv/models.py +335 -0
  31. codespy/tools/cyber/osv/server.py +236 -0
  32. codespy/tools/filesystem/__init__.py +21 -0
  33. codespy/tools/filesystem/client.py +343 -0
  34. codespy/tools/filesystem/models.py +111 -0
  35. codespy/tools/filesystem/server.py +154 -0
  36. codespy/tools/github/__init__.py +6 -0
  37. codespy/tools/github/client.py +249 -0
  38. codespy/tools/github/models.py +269 -0
  39. codespy/tools/github/server.py +104 -0
  40. codespy/tools/mcp_utils.py +75 -0
  41. codespy/tools/parsers/__init__.py +18 -0
  42. codespy/tools/parsers/ripgrep/__init__.py +5 -0
  43. codespy/tools/parsers/ripgrep/client.py +309 -0
  44. codespy/tools/parsers/ripgrep/server.py +193 -0
  45. codespy/tools/parsers/treesitter/__init__.py +15 -0
  46. codespy/tools/parsers/treesitter/base_extractor.py +67 -0
  47. codespy/tools/parsers/treesitter/extractors/__init__.py +23 -0
  48. codespy/tools/parsers/treesitter/extractors/go.py +86 -0
  49. codespy/tools/parsers/treesitter/extractors/java.py +73 -0
  50. codespy/tools/parsers/treesitter/extractors/javascript.py +95 -0
  51. codespy/tools/parsers/treesitter/extractors/kotlin.py +49 -0
  52. codespy/tools/parsers/treesitter/extractors/objc.py +81 -0
  53. codespy/tools/parsers/treesitter/extractors/python.py +69 -0
  54. codespy/tools/parsers/treesitter/extractors/rust.py +72 -0
  55. codespy/tools/parsers/treesitter/extractors/swift.py +49 -0
  56. codespy/tools/parsers/treesitter/extractors/terraform.py +494 -0
  57. codespy/tools/parsers/treesitter/models.py +153 -0
  58. codespy/tools/parsers/treesitter/parser.py +478 -0
  59. codespy/tools/parsers/treesitter/server.py +347 -0
  60. codespy/tools/web/__init__.py +11 -0
  61. codespy/tools/web/client.py +295 -0
  62. codespy/tools/web/models.py +58 -0
  63. codespy/tools/web/server.py +77 -0
  64. codespy_ai-0.1.0.dist-info/METADATA +620 -0
  65. codespy_ai-0.1.0.dist-info/RECORD +68 -0
  66. codespy_ai-0.1.0.dist-info/WHEEL +4 -0
  67. codespy_ai-0.1.0.dist-info/entry_points.txt +3 -0
  68. codespy_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
codespy/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """codespy - Code review agent powered by DSPy."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,21 @@
1
+ """Agents module - shared utilities and agent implementations."""
2
+
3
+ from codespy.agents.cost_tracker import (
4
+ CostTracker,
5
+ SignatureContext,
6
+ SignatureStats,
7
+ get_cost_tracker,
8
+ )
9
+ from codespy.agents.dspy_config import (
10
+ configure_dspy,
11
+ verify_model_access,
12
+ )
13
+
14
+ __all__ = [
15
+ "CostTracker",
16
+ "SignatureContext",
17
+ "SignatureStats",
18
+ "get_cost_tracker",
19
+ "configure_dspy",
20
+ "verify_model_access",
21
+ ]
@@ -0,0 +1,255 @@
1
+ """Thread-safe cost tracking for LLM calls with per-signature attribution.
2
+
3
+ Uses DSPy's internal LM history mechanism for reliable per-signature attribution,
4
+ even during parallel execution with dspy.Parallel.
5
+ """
6
+
7
+ import threading
8
+ import time
9
+ from dataclasses import dataclass
10
+ from typing import Optional
11
+
12
+ import dspy # type: ignore[import-untyped]
13
+
14
+
15
+ @dataclass
16
+ class SignatureStats:
17
+ """Statistics for a single signature's LLM usage."""
18
+
19
+ name: str
20
+ cost: float = 0.0
21
+ tokens: int = 0
22
+ call_count: int = 0
23
+ start_time: Optional[float] = None
24
+ end_time: Optional[float] = None
25
+
26
+ @property
27
+ def duration_seconds(self) -> float:
28
+ """Get duration in seconds, or 0 if not completed."""
29
+ if self.start_time is None:
30
+ return 0.0
31
+ end = self.end_time if self.end_time is not None else time.time()
32
+ return end - self.start_time
33
+
34
+ def to_dict(self) -> dict:
35
+ """Convert to dictionary for serialization."""
36
+ return {
37
+ "name": self.name,
38
+ "cost": self.cost,
39
+ "tokens": self.tokens,
40
+ "call_count": self.call_count,
41
+ "duration_seconds": self.duration_seconds,
42
+ }
43
+
44
+
45
+ class CostTracker:
46
+ """Track LLM costs across multiple calls with per-signature attribution.
47
+
48
+ Uses DSPy's LM history for per-signature tracking, which works reliably
49
+ even during parallel execution.
50
+ """
51
+
52
+ def __init__(self) -> None:
53
+ """Initialize the cost tracker."""
54
+ self._lock = threading.Lock()
55
+ self._signature_stats: dict[str, SignatureStats] = {}
56
+
57
+ def reset(self) -> None:
58
+ """Reset all tracking."""
59
+ with self._lock:
60
+ self._signature_stats.clear()
61
+
62
+ def start_signature(self, signature_name: str) -> None:
63
+ """Mark the start of a signature's execution.
64
+
65
+ Args:
66
+ signature_name: Name of the signature starting execution
67
+ """
68
+ with self._lock:
69
+ if signature_name not in self._signature_stats:
70
+ self._signature_stats[signature_name] = SignatureStats(name=signature_name)
71
+ self._signature_stats[signature_name].start_time = time.time()
72
+ self._signature_stats[signature_name].end_time = None
73
+
74
+ def end_signature(self, signature_name: str, cost: float, tokens: int, call_count: int) -> None:
75
+ """Mark the end of a signature's execution with its costs.
76
+
77
+ Args:
78
+ signature_name: Name of the signature ending execution
79
+ cost: Total cost for this signature's LLM calls
80
+ tokens: Total tokens used by this signature
81
+ call_count: Number of LLM calls made by this signature
82
+ """
83
+ with self._lock:
84
+ if signature_name not in self._signature_stats:
85
+ self._signature_stats[signature_name] = SignatureStats(name=signature_name)
86
+ stats = self._signature_stats[signature_name]
87
+ stats.end_time = time.time()
88
+ stats.cost += cost
89
+ stats.tokens += tokens
90
+ stats.call_count += call_count
91
+
92
+ @property
93
+ def total_cost(self) -> float:
94
+ """Get total cost in USD across all signatures."""
95
+ with self._lock:
96
+ return sum(s.cost for s in self._signature_stats.values())
97
+
98
+ @property
99
+ def total_tokens(self) -> int:
100
+ """Get total tokens used across all signatures."""
101
+ with self._lock:
102
+ return sum(s.tokens for s in self._signature_stats.values())
103
+
104
+ @property
105
+ def call_count(self) -> int:
106
+ """Get total number of LLM calls across all signatures."""
107
+ with self._lock:
108
+ return sum(s.call_count for s in self._signature_stats.values())
109
+
110
+ def get_signature_stats(self, signature_name: str) -> Optional[SignatureStats]:
111
+ """Get stats for a specific signature.
112
+
113
+ Args:
114
+ signature_name: Name of the signature
115
+
116
+ Returns:
117
+ SignatureStats or None if signature not found
118
+ """
119
+ with self._lock:
120
+ return self._signature_stats.get(signature_name)
121
+
122
+ def get_all_signature_stats(self) -> dict[str, SignatureStats]:
123
+ """Get stats for all signatures.
124
+
125
+ Returns:
126
+ Dictionary of signature name to SignatureStats
127
+ """
128
+ with self._lock:
129
+ # Return a copy to avoid concurrent modification issues
130
+ return {k: SignatureStats(
131
+ name=v.name,
132
+ cost=v.cost,
133
+ tokens=v.tokens,
134
+ call_count=v.call_count,
135
+ start_time=v.start_time,
136
+ end_time=v.end_time,
137
+ ) for k, v in self._signature_stats.items()}
138
+
139
+
140
+ def _get_history_entries() -> list[dict]:
141
+ """Get current LM history entries from DSPy.
142
+
143
+ Returns:
144
+ List of history entries, or empty list if LM not configured
145
+ """
146
+ try:
147
+ lm = dspy.settings.lm
148
+ if lm is not None and hasattr(lm, "history"):
149
+ return lm.history
150
+ except Exception:
151
+ pass
152
+ return []
153
+
154
+
155
+ def _get_history_uuids() -> set[str]:
156
+ """Get UUIDs of current history entries.
157
+
158
+ Returns:
159
+ Set of UUIDs from current history
160
+ """
161
+ entries = _get_history_entries()
162
+ return {entry.get("uuid", "") for entry in entries if entry.get("uuid")}
163
+
164
+
165
+ def _calculate_costs_from_entries(entries: list[dict], exclude_uuids: set[str]) -> tuple[float, int, int]:
166
+ """Calculate costs from history entries, excluding specific UUIDs.
167
+
168
+ Args:
169
+ entries: List of history entries
170
+ exclude_uuids: Set of UUIDs to exclude from calculation
171
+
172
+ Returns:
173
+ Tuple of (total_cost, total_tokens, call_count)
174
+ """
175
+ total_cost = 0.0
176
+ total_tokens = 0
177
+ call_count = 0
178
+
179
+ for entry in entries:
180
+ entry_uuid = entry.get("uuid", "")
181
+ if entry_uuid and entry_uuid not in exclude_uuids:
182
+ # Get cost
183
+ cost = entry.get("cost")
184
+ if cost is not None:
185
+ total_cost += cost
186
+
187
+ # Get tokens from usage
188
+ usage = entry.get("usage", {})
189
+ if usage:
190
+ prompt_tokens = usage.get("prompt_tokens", 0) or 0
191
+ completion_tokens = usage.get("completion_tokens", 0) or 0
192
+ total_tokens += prompt_tokens + completion_tokens
193
+
194
+ call_count += 1
195
+
196
+ return total_cost, total_tokens, call_count
197
+
198
+
199
+ class SignatureContext:
200
+ """Context manager for tracking signature execution.
201
+
202
+ Uses DSPy's LM history mechanism to track costs reliably, even during
203
+ parallel execution with dspy.Parallel. Works by:
204
+ 1. Recording history UUIDs before signature execution
205
+ 2. After execution, finding new entries (by UUID)
206
+ 3. Summing costs/tokens from new entries
207
+
208
+ Usage:
209
+ with SignatureContext("bug_detection", cost_tracker):
210
+ # All LLM calls here will be attributed to bug_detection
211
+ result = await agent.acall(...)
212
+ """
213
+
214
+ def __init__(self, signature_name: str, tracker: "CostTracker") -> None:
215
+ """Initialize the signature context.
216
+
217
+ Args:
218
+ signature_name: Name of the signature
219
+ tracker: CostTracker instance
220
+ """
221
+ self.signature_name = signature_name
222
+ self.tracker = tracker
223
+ self._before_uuids: set[str] = set()
224
+
225
+ def __enter__(self) -> "SignatureContext":
226
+ """Enter the context, capturing current history state."""
227
+ # Capture UUIDs of entries that exist before signature execution
228
+ self._before_uuids = _get_history_uuids()
229
+ self.tracker.start_signature(self.signature_name)
230
+ return self
231
+
232
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
233
+ """Exit the context, calculating costs from new history entries."""
234
+ # Get all current entries and calculate costs from new ones
235
+ entries = _get_history_entries()
236
+ cost, tokens, call_count = _calculate_costs_from_entries(entries, self._before_uuids)
237
+
238
+ self.tracker.end_signature(self.signature_name, cost, tokens, call_count)
239
+
240
+ async def __aenter__(self) -> "SignatureContext":
241
+ """Async enter the context."""
242
+ return self.__enter__()
243
+
244
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
245
+ """Async exit the context."""
246
+ self.__exit__(exc_type, exc_val, exc_tb)
247
+
248
+
249
+ # Global cost tracker instance
250
+ _cost_tracker = CostTracker()
251
+
252
+
253
+ def get_cost_tracker() -> CostTracker:
254
+ """Get the global cost tracker instance."""
255
+ return _cost_tracker
@@ -0,0 +1,158 @@
1
+ """DSPy and LiteLLM configuration utilities."""
2
+
3
+ import logging
4
+
5
+ import dspy # type: ignore[import-untyped]
6
+ from dspy.adapters.two_step_adapter import TwoStepAdapter # type: ignore[import-untyped]
7
+ import litellm # type: ignore[import-untyped]
8
+
9
+ from codespy.config import Settings
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def configure_dspy(settings: Settings) -> None:
15
+ """Configure DSPy with the LLM backend for reliable structured output.
16
+
17
+ This configures DSPy with:
18
+ - TwoStepAdapter for robust structured output parsing:
19
+ * Stage 1: Main LM generates free-form reasoning without format constraints
20
+ * Stage 2: Extraction LM extracts structured fields from free-form response
21
+ - Global timeout and retries for reliability
22
+ - Provider-side prompt caching (when enabled)
23
+ - Memory caching for LLM responses
24
+
25
+ TwoStepAdapter decouples reasoning quality from format compliance,
26
+ solving ChatAdapter parsing failures with ReAct agents.
27
+
28
+ Args:
29
+ settings: Application settings containing model and API key configuration.
30
+ """
31
+ model = settings.default_model
32
+
33
+ # Configure LiteLLM environment if needed
34
+ if settings.openai_api_key:
35
+ litellm.openai_key = settings.openai_api_key
36
+ if settings.anthropic_api_key:
37
+ litellm.anthropic_key = settings.anthropic_api_key
38
+ # Set up AWS credentials for Bedrock if using Bedrock model
39
+ if model.startswith("bedrock/"):
40
+ import os
41
+ os.environ["AWS_REGION_NAME"] = settings.aws_region
42
+ if settings.aws_access_key_id:
43
+ os.environ["AWS_ACCESS_KEY_ID"] = settings.aws_access_key_id
44
+ if settings.aws_secret_access_key:
45
+ os.environ["AWS_SECRET_ACCESS_KEY"] = settings.aws_secret_access_key
46
+
47
+ # Build LM kwargs with reliability settings
48
+ lm_kwargs: dict = {
49
+ "model": model,
50
+ "timeout": settings.llm_timeout, # Global timeout (default: 120s)
51
+ "num_retries": settings.llm_retries, # Global retries (default: 3)
52
+ }
53
+
54
+ # Enable provider-side prompt caching if configured
55
+ # This caches system prompts on the LLM provider's servers (Anthropic, OpenAI, Bedrock, etc.)
56
+ if settings.enable_prompt_caching:
57
+ lm_kwargs["cache_control_injection_points"] = [
58
+ {"location": "message", "role": "system"}
59
+ ]
60
+
61
+ # Configure DSPy with LiteLLM and TwoStepAdapter
62
+ lm = dspy.LM(**lm_kwargs)
63
+
64
+ # Create extraction LM for TwoStepAdapter's second stage
65
+ # Uses a smaller/faster model to extract structured fields from free-form responses
66
+ extraction_lm = dspy.LM(
67
+ model=settings.extraction_model,
68
+ timeout=settings.llm_timeout,
69
+ num_retries=settings.llm_retries,
70
+ )
71
+
72
+ dspy.settings.configure(
73
+ lm=lm,
74
+ adapter=TwoStepAdapter(extraction_lm), # TwoStepAdapter solves ChatAdapter parsing failures
75
+ )
76
+
77
+ # Enable memory-only caching for LLM calls (no disk caching)
78
+ dspy.configure_cache(enable_memory_cache=True, enable_disk_cache=False, memory_max_entries=10000)
79
+
80
+ prompt_cache_status = "enabled" if settings.enable_prompt_caching else "disabled"
81
+ logger.info(
82
+ f"Configured DSPy with model: {model} "
83
+ f"(TwoStepAdapter with extraction_model={settings.extraction_model}, "
84
+ f"timeout={settings.llm_timeout}s, retries={settings.llm_retries}, "
85
+ f"provider prompt caching {prompt_cache_status})"
86
+ )
87
+
88
+
89
+ def verify_model_access(settings: Settings) -> tuple[bool, str]:
90
+ """Verify that all configured models are accessible.
91
+
92
+ Checks the default model and all per-signature model overrides.
93
+
94
+ Args:
95
+ settings: Application settings containing model configuration.
96
+
97
+ Returns:
98
+ Tuple of (success, message)
99
+ """
100
+ # Collect all unique models from config
101
+ models_to_check: set[str] = {settings.default_model}
102
+
103
+ # Check all signature-specific models
104
+ for sig_name, sig_config in settings.signatures.items():
105
+ if sig_config.model:
106
+ models_to_check.add(sig_config.model)
107
+
108
+ # Check each model
109
+ verified: list[str] = []
110
+ failed: list[str] = []
111
+
112
+ for model in models_to_check:
113
+ try:
114
+ litellm.completion(
115
+ model=model,
116
+ messages=[{"role": "user", "content": "Hi"}],
117
+ max_tokens=5,
118
+ )
119
+ verified.append(model)
120
+ logger.info(f"Model verified: {model}")
121
+ except litellm.AuthenticationError as e:
122
+ failed.append(f"{model}: authentication failed - {e}")
123
+ except litellm.RateLimitError as e:
124
+ failed.append(f"{model}: rate limit exceeded - {e}")
125
+ except litellm.APIConnectionError as e:
126
+ failed.append(f"{model}: connection error - {e}")
127
+ except Exception as e:
128
+ failed.append(f"{model}: {e}")
129
+
130
+ if failed:
131
+ return False, f"Model verification failed: {'; '.join(failed)}"
132
+
133
+ return True, f"Verified {len(verified)} model(s): {', '.join(verified)}"
134
+
135
+
136
+ class _TaskDestroyedFilter(logging.Filter):
137
+ """Filter to suppress 'Task was destroyed' messages from asyncio."""
138
+
139
+ def filter(self, record: logging.LogRecord) -> bool:
140
+ msg = record.getMessage()
141
+ if "Task was destroyed" in msg and "LoggingWorker" in msg:
142
+ return False
143
+ return True
144
+
145
+
146
+ class _MCPRequestFilter(logging.Filter):
147
+ """Filter to suppress all noisy 'Processing request of type' MCP server messages."""
148
+
149
+ def filter(self, record: logging.LogRecord) -> bool:
150
+ return "Processing request of type" not in record.getMessage()
151
+
152
+
153
+ # Suppress LiteLLM's async logging worker warnings that occur during multi-threaded execution
154
+ logging.getLogger("asyncio").addFilter(_TaskDestroyedFilter())
155
+
156
+ # Suppress noisy MCP server "Processing request" messages
157
+ logging.getLogger("mcp.server").addFilter(_MCPRequestFilter())
158
+ logging.getLogger("mcp.server.lowlevel").addFilter(_MCPRequestFilter())
@@ -0,0 +1,17 @@
1
+ """Reviewer agent - AI-powered code review."""
2
+
3
+ from codespy.agents.reviewer.models import (
4
+ Issue,
5
+ IssueCategory,
6
+ IssueSeverity,
7
+ ReviewResult,
8
+ )
9
+ from codespy.agents.reviewer.reviewer import ReviewPipeline
10
+
11
+ __all__ = [
12
+ "ReviewPipeline",
13
+ "ReviewResult",
14
+ "Issue",
15
+ "IssueCategory",
16
+ "IssueSeverity",
17
+ ]