PyPI - codespy-ai - Versions diffs - 0.1.0__py3-none-any.whl - Mend

codespy-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

codespy/__init__.py +3 -0
codespy/agents/__init__.py +21 -0
codespy/agents/cost_tracker.py +255 -0
codespy/agents/dspy_config.py +158 -0
codespy/agents/reviewer/__init__.py +17 -0
codespy/agents/reviewer/models.py +338 -0
codespy/agents/reviewer/modules/__init__.py +17 -0
codespy/agents/reviewer/modules/bug_detector.py +197 -0
codespy/agents/reviewer/modules/deduplicator.py +105 -0
codespy/agents/reviewer/modules/doc_reviewer.py +195 -0
codespy/agents/reviewer/modules/domain_expert.py +246 -0
codespy/agents/reviewer/modules/helpers.py +119 -0
codespy/agents/reviewer/modules/scope_identifier.py +330 -0
codespy/agents/reviewer/modules/security_auditor.py +355 -0
codespy/agents/reviewer/reporters/__init__.py +11 -0
codespy/agents/reviewer/reporters/base.py +18 -0
codespy/agents/reviewer/reporters/github_pr.py +304 -0
codespy/agents/reviewer/reporters/stdout.py +38 -0
codespy/agents/reviewer/reviewer.py +202 -0
codespy/cli.py +241 -0
codespy/config.py +361 -0
codespy/config_dspy.py +103 -0
codespy/config_git.py +93 -0
codespy/config_io.py +40 -0
codespy/config_llm.py +271 -0
codespy/tools/__init__.py +24 -0
codespy/tools/cyber/__init__.py +17 -0
codespy/tools/cyber/osv/__init__.py +54 -0
codespy/tools/cyber/osv/client.py +451 -0
codespy/tools/cyber/osv/models.py +335 -0
codespy/tools/cyber/osv/server.py +236 -0
codespy/tools/filesystem/__init__.py +21 -0
codespy/tools/filesystem/client.py +343 -0
codespy/tools/filesystem/models.py +111 -0
codespy/tools/filesystem/server.py +154 -0
codespy/tools/github/__init__.py +6 -0
codespy/tools/github/client.py +249 -0
codespy/tools/github/models.py +269 -0
codespy/tools/github/server.py +104 -0
codespy/tools/mcp_utils.py +75 -0
codespy/tools/parsers/__init__.py +18 -0
codespy/tools/parsers/ripgrep/__init__.py +5 -0
codespy/tools/parsers/ripgrep/client.py +309 -0
codespy/tools/parsers/ripgrep/server.py +193 -0
codespy/tools/parsers/treesitter/__init__.py +15 -0
codespy/tools/parsers/treesitter/base_extractor.py +67 -0
codespy/tools/parsers/treesitter/extractors/__init__.py +23 -0
codespy/tools/parsers/treesitter/extractors/go.py +86 -0
codespy/tools/parsers/treesitter/extractors/java.py +73 -0
codespy/tools/parsers/treesitter/extractors/javascript.py +95 -0
codespy/tools/parsers/treesitter/extractors/kotlin.py +49 -0
codespy/tools/parsers/treesitter/extractors/objc.py +81 -0
codespy/tools/parsers/treesitter/extractors/python.py +69 -0
codespy/tools/parsers/treesitter/extractors/rust.py +72 -0
codespy/tools/parsers/treesitter/extractors/swift.py +49 -0
codespy/tools/parsers/treesitter/extractors/terraform.py +494 -0
codespy/tools/parsers/treesitter/models.py +153 -0
codespy/tools/parsers/treesitter/parser.py +478 -0
codespy/tools/parsers/treesitter/server.py +347 -0
codespy/tools/web/__init__.py +11 -0
codespy/tools/web/client.py +295 -0
codespy/tools/web/models.py +58 -0
codespy/tools/web/server.py +77 -0
codespy_ai-0.1.0.dist-info/METADATA +620 -0
codespy_ai-0.1.0.dist-info/RECORD +68 -0
codespy_ai-0.1.0.dist-info/WHEEL +4 -0
codespy_ai-0.1.0.dist-info/entry_points.txt +3 -0
codespy_ai-0.1.0.dist-info/licenses/LICENSE +21 -0

codespy/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""codespy - Code review agent powered by DSPy."""
+__version__ = "0.1.0"

codespy/agents/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Agents module - shared utilities and agent implementations."""
+from codespy.agents.cost_tracker import (
+    CostTracker,
+    SignatureContext,
+    SignatureStats,
+    get_cost_tracker,
+)
+from codespy.agents.dspy_config import (
+    configure_dspy,
+    verify_model_access,
+)
+__all__ = [
+    "CostTracker",
+    "SignatureContext",
+    "SignatureStats",
+    "get_cost_tracker",
+    "configure_dspy",
+    "verify_model_access",
+]

codespy/agents/cost_tracker.py ADDED Viewed

@@ -0,0 +1,255 @@
+"""Thread-safe cost tracking for LLM calls with per-signature attribution.
+Uses DSPy's internal LM history mechanism for reliable per-signature attribution,
+even during parallel execution with dspy.Parallel.
+"""
+import threading
+import time
+from dataclasses import dataclass
+from typing import Optional
+import dspy  # type: ignore[import-untyped]
+@dataclass
+class SignatureStats:
+    """Statistics for a single signature's LLM usage."""
+    name: str
+    cost: float = 0.0
+    tokens: int = 0
+    call_count: int = 0
+    start_time: Optional[float] = None
+    end_time: Optional[float] = None
+    @property
+    def duration_seconds(self) -> float:
+        """Get duration in seconds, or 0 if not completed."""
+        if self.start_time is None:
+            return 0.0
+        end = self.end_time if self.end_time is not None else time.time()
+        return end - self.start_time
+    def to_dict(self) -> dict:
+        """Convert to dictionary for serialization."""
+        return {
+            "name": self.name,
+            "cost": self.cost,
+            "tokens": self.tokens,
+            "call_count": self.call_count,
+            "duration_seconds": self.duration_seconds,
+        }
+class CostTracker:
+    """Track LLM costs across multiple calls with per-signature attribution.
+    Uses DSPy's LM history for per-signature tracking, which works reliably
+    even during parallel execution.
+    """
+    def __init__(self) -> None:
+        """Initialize the cost tracker."""
+        self._lock = threading.Lock()
+        self._signature_stats: dict[str, SignatureStats] = {}
+    def reset(self) -> None:
+        """Reset all tracking."""
+        with self._lock:
+            self._signature_stats.clear()
+    def start_signature(self, signature_name: str) -> None:
+        """Mark the start of a signature's execution.
+        Args:
+            signature_name: Name of the signature starting execution
+        """
+        with self._lock:
+            if signature_name not in self._signature_stats:
+                self._signature_stats[signature_name] = SignatureStats(name=signature_name)
+            self._signature_stats[signature_name].start_time = time.time()
+            self._signature_stats[signature_name].end_time = None
+    def end_signature(self, signature_name: str, cost: float, tokens: int, call_count: int) -> None:
+        """Mark the end of a signature's execution with its costs.
+        Args:
+            signature_name: Name of the signature ending execution
+            cost: Total cost for this signature's LLM calls
+            tokens: Total tokens used by this signature
+            call_count: Number of LLM calls made by this signature
+        """
+        with self._lock:
+            if signature_name not in self._signature_stats:
+                self._signature_stats[signature_name] = SignatureStats(name=signature_name)
+            stats = self._signature_stats[signature_name]
+            stats.end_time = time.time()
+            stats.cost += cost
+            stats.tokens += tokens
+            stats.call_count += call_count
+    @property
+    def total_cost(self) -> float:
+        """Get total cost in USD across all signatures."""
+        with self._lock:
+            return sum(s.cost for s in self._signature_stats.values())
+    @property
+    def total_tokens(self) -> int:
+        """Get total tokens used across all signatures."""
+        with self._lock:
+            return sum(s.tokens for s in self._signature_stats.values())
+    @property
+    def call_count(self) -> int:
+        """Get total number of LLM calls across all signatures."""
+        with self._lock:
+            return sum(s.call_count for s in self._signature_stats.values())
+    def get_signature_stats(self, signature_name: str) -> Optional[SignatureStats]:
+        """Get stats for a specific signature.
+        Args:
+            signature_name: Name of the signature
+        Returns:
+            SignatureStats or None if signature not found
+        """
+        with self._lock:
+            return self._signature_stats.get(signature_name)
+    def get_all_signature_stats(self) -> dict[str, SignatureStats]:
+        """Get stats for all signatures.
+        Returns:
+            Dictionary of signature name to SignatureStats
+        """
+        with self._lock:
+            # Return a copy to avoid concurrent modification issues
+            return {k: SignatureStats(
+                name=v.name,
+                cost=v.cost,
+                tokens=v.tokens,
+                call_count=v.call_count,
+                start_time=v.start_time,
+                end_time=v.end_time,
+            ) for k, v in self._signature_stats.items()}
+def _get_history_entries() -> list[dict]:
+    """Get current LM history entries from DSPy.
+    Returns:
+        List of history entries, or empty list if LM not configured
+    """
+    try:
+        lm = dspy.settings.lm
+        if lm is not None and hasattr(lm, "history"):
+            return lm.history
+    except Exception:
+        pass
+    return []
+def _get_history_uuids() -> set[str]:
+    """Get UUIDs of current history entries.
+    Returns:
+        Set of UUIDs from current history
+    """
+    entries = _get_history_entries()
+    return {entry.get("uuid", "") for entry in entries if entry.get("uuid")}
+def _calculate_costs_from_entries(entries: list[dict], exclude_uuids: set[str]) -> tuple[float, int, int]:
+    """Calculate costs from history entries, excluding specific UUIDs.
+    Args:
+        entries: List of history entries
+        exclude_uuids: Set of UUIDs to exclude from calculation
+    Returns:
+        Tuple of (total_cost, total_tokens, call_count)
+    """
+    total_cost = 0.0
+    total_tokens = 0
+    call_count = 0
+    for entry in entries:
+        entry_uuid = entry.get("uuid", "")
+        if entry_uuid and entry_uuid not in exclude_uuids:
+            # Get cost
+            cost = entry.get("cost")
+            if cost is not None:
+                total_cost += cost
+            # Get tokens from usage
+            usage = entry.get("usage", {})
+            if usage:
+                prompt_tokens = usage.get("prompt_tokens", 0) or 0
+                completion_tokens = usage.get("completion_tokens", 0) or 0
+                total_tokens += prompt_tokens + completion_tokens
+            call_count += 1
+    return total_cost, total_tokens, call_count
+class SignatureContext:
+    """Context manager for tracking signature execution.
+    Uses DSPy's LM history mechanism to track costs reliably, even during
+    parallel execution with dspy.Parallel. Works by:
+    1. Recording history UUIDs before signature execution
+    2. After execution, finding new entries (by UUID)
+    3. Summing costs/tokens from new entries
+    Usage:
+        with SignatureContext("bug_detection", cost_tracker):
+            # All LLM calls here will be attributed to bug_detection
+            result = await agent.acall(...)
+    """
+    def __init__(self, signature_name: str, tracker: "CostTracker") -> None:
+        """Initialize the signature context.
+        Args:
+            signature_name: Name of the signature
+            tracker: CostTracker instance
+        """
+        self.signature_name = signature_name
+        self.tracker = tracker
+        self._before_uuids: set[str] = set()
+    def __enter__(self) -> "SignatureContext":
+        """Enter the context, capturing current history state."""
+        # Capture UUIDs of entries that exist before signature execution
+        self._before_uuids = _get_history_uuids()
+        self.tracker.start_signature(self.signature_name)
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        """Exit the context, calculating costs from new history entries."""
+        # Get all current entries and calculate costs from new ones
+        entries = _get_history_entries()
+        cost, tokens, call_count = _calculate_costs_from_entries(entries, self._before_uuids)
+        self.tracker.end_signature(self.signature_name, cost, tokens, call_count)
+    async def __aenter__(self) -> "SignatureContext":
+        """Async enter the context."""
+        return self.__enter__()
+    async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
+        """Async exit the context."""
+        self.__exit__(exc_type, exc_val, exc_tb)
+# Global cost tracker instance
+_cost_tracker = CostTracker()
+def get_cost_tracker() -> CostTracker:
+    """Get the global cost tracker instance."""
+    return _cost_tracker

codespy/agents/dspy_config.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""DSPy and LiteLLM configuration utilities."""
+import logging
+import dspy  # type: ignore[import-untyped]
+from dspy.adapters.two_step_adapter import TwoStepAdapter  # type: ignore[import-untyped]
+import litellm  # type: ignore[import-untyped]
+from codespy.config import Settings
+logger = logging.getLogger(__name__)
+def configure_dspy(settings: Settings) -> None:
+    """Configure DSPy with the LLM backend for reliable structured output.
+    This configures DSPy with:
+    - TwoStepAdapter for robust structured output parsing:
+      * Stage 1: Main LM generates free-form reasoning without format constraints
+      * Stage 2: Extraction LM extracts structured fields from free-form response
+    - Global timeout and retries for reliability
+    - Provider-side prompt caching (when enabled)
+    - Memory caching for LLM responses
+    TwoStepAdapter decouples reasoning quality from format compliance,
+    solving ChatAdapter parsing failures with ReAct agents.
+    Args:
+        settings: Application settings containing model and API key configuration.
+    """
+    model = settings.default_model
+    # Configure LiteLLM environment if needed
+    if settings.openai_api_key:
+        litellm.openai_key = settings.openai_api_key
+    if settings.anthropic_api_key:
+        litellm.anthropic_key = settings.anthropic_api_key
+    # Set up AWS credentials for Bedrock if using Bedrock model
+    if model.startswith("bedrock/"):
+        import os
+        os.environ["AWS_REGION_NAME"] = settings.aws_region
+        if settings.aws_access_key_id:
+            os.environ["AWS_ACCESS_KEY_ID"] = settings.aws_access_key_id
+        if settings.aws_secret_access_key:
+            os.environ["AWS_SECRET_ACCESS_KEY"] = settings.aws_secret_access_key
+    # Build LM kwargs with reliability settings
+    lm_kwargs: dict = {
+        "model": model,
+        "timeout": settings.llm_timeout,  # Global timeout (default: 120s)
+        "num_retries": settings.llm_retries,  # Global retries (default: 3)
+    }
+    # Enable provider-side prompt caching if configured
+    # This caches system prompts on the LLM provider's servers (Anthropic, OpenAI, Bedrock, etc.)
+    if settings.enable_prompt_caching:
+        lm_kwargs["cache_control_injection_points"] = [
+            {"location": "message", "role": "system"}
+        ]
+    # Configure DSPy with LiteLLM and TwoStepAdapter
+    lm = dspy.LM(**lm_kwargs)
+    # Create extraction LM for TwoStepAdapter's second stage
+    # Uses a smaller/faster model to extract structured fields from free-form responses
+    extraction_lm = dspy.LM(
+        model=settings.extraction_model,
+        timeout=settings.llm_timeout,
+        num_retries=settings.llm_retries,
+    )
+    dspy.settings.configure(
+        lm=lm,
+        adapter=TwoStepAdapter(extraction_lm),  # TwoStepAdapter solves ChatAdapter parsing failures
+    )
+    # Enable memory-only caching for LLM calls (no disk caching)
+    dspy.configure_cache(enable_memory_cache=True, enable_disk_cache=False, memory_max_entries=10000)
+    prompt_cache_status = "enabled" if settings.enable_prompt_caching else "disabled"
+    logger.info(
+        f"Configured DSPy with model: {model} "
+        f"(TwoStepAdapter with extraction_model={settings.extraction_model}, "
+        f"timeout={settings.llm_timeout}s, retries={settings.llm_retries}, "
+        f"provider prompt caching {prompt_cache_status})"
+    )
+def verify_model_access(settings: Settings) -> tuple[bool, str]:
+    """Verify that all configured models are accessible.
+    Checks the default model and all per-signature model overrides.
+    Args:
+        settings: Application settings containing model configuration.
+    Returns:
+        Tuple of (success, message)
+    """
+    # Collect all unique models from config
+    models_to_check: set[str] = {settings.default_model}
+    # Check all signature-specific models
+    for sig_name, sig_config in settings.signatures.items():
+        if sig_config.model:
+            models_to_check.add(sig_config.model)
+    # Check each model
+    verified: list[str] = []
+    failed: list[str] = []
+    for model in models_to_check:
+        try:
+            litellm.completion(
+                model=model,
+                messages=[{"role": "user", "content": "Hi"}],
+                max_tokens=5,
+            )
+            verified.append(model)
+            logger.info(f"Model verified: {model}")
+        except litellm.AuthenticationError as e:
+            failed.append(f"{model}: authentication failed - {e}")
+        except litellm.RateLimitError as e:
+            failed.append(f"{model}: rate limit exceeded - {e}")
+        except litellm.APIConnectionError as e:
+            failed.append(f"{model}: connection error - {e}")
+        except Exception as e:
+            failed.append(f"{model}: {e}")
+    if failed:
+        return False, f"Model verification failed: {'; '.join(failed)}"
+    return True, f"Verified {len(verified)} model(s): {', '.join(verified)}"
+class _TaskDestroyedFilter(logging.Filter):
+    """Filter to suppress 'Task was destroyed' messages from asyncio."""
+    def filter(self, record: logging.LogRecord) -> bool:
+        msg = record.getMessage()
+        if "Task was destroyed" in msg and "LoggingWorker" in msg:
+            return False
+        return True
+class _MCPRequestFilter(logging.Filter):
+    """Filter to suppress all noisy 'Processing request of type' MCP server messages."""
+    def filter(self, record: logging.LogRecord) -> bool:
+        return "Processing request of type" not in record.getMessage()
+# Suppress LiteLLM's async logging worker warnings that occur during multi-threaded execution
+logging.getLogger("asyncio").addFilter(_TaskDestroyedFilter())
+# Suppress noisy MCP server "Processing request" messages
+logging.getLogger("mcp.server").addFilter(_MCPRequestFilter())
+logging.getLogger("mcp.server.lowlevel").addFilter(_MCPRequestFilter())

codespy/agents/reviewer/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Reviewer agent - AI-powered code review."""
+from codespy.agents.reviewer.models import (
+    Issue,
+    IssueCategory,
+    IssueSeverity,
+    ReviewResult,
+)
+from codespy.agents.reviewer.reviewer import ReviewPipeline
+__all__ = [
+    "ReviewPipeline",
+    "ReviewResult",
+    "Issue",
+    "IssueCategory",
+    "IssueSeverity",
+]