PyPI - codeshift - Versions diffs - 0.2.0__py3-none-any.whl - Mend

codeshift 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

codeshift/__init__.py +8 -0
codeshift/analyzer/__init__.py +5 -0
codeshift/analyzer/risk_assessor.py +388 -0
codeshift/api/__init__.py +1 -0
codeshift/api/auth.py +182 -0
codeshift/api/config.py +73 -0
codeshift/api/database.py +215 -0
codeshift/api/main.py +103 -0
codeshift/api/models/__init__.py +55 -0
codeshift/api/models/auth.py +108 -0
codeshift/api/models/billing.py +92 -0
codeshift/api/models/migrate.py +42 -0
codeshift/api/models/usage.py +116 -0
codeshift/api/routers/__init__.py +5 -0
codeshift/api/routers/auth.py +440 -0
codeshift/api/routers/billing.py +395 -0
codeshift/api/routers/migrate.py +304 -0
codeshift/api/routers/usage.py +291 -0
codeshift/api/routers/webhooks.py +289 -0
codeshift/cli/__init__.py +5 -0
codeshift/cli/commands/__init__.py +7 -0
codeshift/cli/commands/apply.py +352 -0
codeshift/cli/commands/auth.py +842 -0
codeshift/cli/commands/diff.py +221 -0
codeshift/cli/commands/scan.py +368 -0
codeshift/cli/commands/upgrade.py +436 -0
codeshift/cli/commands/upgrade_all.py +518 -0
codeshift/cli/main.py +221 -0
codeshift/cli/quota.py +210 -0
codeshift/knowledge/__init__.py +50 -0
codeshift/knowledge/cache.py +167 -0
codeshift/knowledge/generator.py +231 -0
codeshift/knowledge/models.py +151 -0
codeshift/knowledge/parser.py +270 -0
codeshift/knowledge/sources.py +388 -0
codeshift/knowledge_base/__init__.py +17 -0
codeshift/knowledge_base/loader.py +102 -0
codeshift/knowledge_base/models.py +110 -0
codeshift/migrator/__init__.py +23 -0
codeshift/migrator/ast_transforms.py +256 -0
codeshift/migrator/engine.py +395 -0
codeshift/migrator/llm_migrator.py +320 -0
codeshift/migrator/transforms/__init__.py +19 -0
codeshift/migrator/transforms/fastapi_transformer.py +174 -0
codeshift/migrator/transforms/pandas_transformer.py +236 -0
codeshift/migrator/transforms/pydantic_v1_to_v2.py +637 -0
codeshift/migrator/transforms/requests_transformer.py +218 -0
codeshift/migrator/transforms/sqlalchemy_transformer.py +175 -0
codeshift/scanner/__init__.py +6 -0
codeshift/scanner/code_scanner.py +352 -0
codeshift/scanner/dependency_parser.py +473 -0
codeshift/utils/__init__.py +5 -0
codeshift/utils/api_client.py +266 -0
codeshift/utils/cache.py +318 -0
codeshift/utils/config.py +71 -0
codeshift/utils/llm_client.py +221 -0
codeshift/validator/__init__.py +6 -0
codeshift/validator/syntax_checker.py +183 -0
codeshift/validator/test_runner.py +224 -0
codeshift-0.2.0.dist-info/METADATA +326 -0
codeshift-0.2.0.dist-info/RECORD +65 -0
codeshift-0.2.0.dist-info/WHEEL +5 -0
codeshift-0.2.0.dist-info/entry_points.txt +2 -0
codeshift-0.2.0.dist-info/licenses/LICENSE +21 -0
codeshift-0.2.0.dist-info/top_level.txt +1 -0

codeshift/knowledge/generator.py ADDED Viewed

@@ -0,0 +1,231 @@
+"""Knowledge base generator - orchestrates the knowledge acquisition pipeline."""
+from collections.abc import Callable
+from codeshift.knowledge.cache import KnowledgeCache, get_knowledge_cache
+from codeshift.knowledge.models import (
+    BreakingChange,
+    Confidence,
+    GeneratedKnowledgeBase,
+)
+from codeshift.knowledge.parser import ChangelogParser, get_changelog_parser
+from codeshift.knowledge.sources import SourceFetcher, get_source_fetcher
+class KnowledgeGenerator:
+    """Orchestrates knowledge base generation from multiple sources."""
+    def __init__(
+        self,
+        fetcher: SourceFetcher | None = None,
+        parser: ChangelogParser | None = None,
+        cache: KnowledgeCache | None = None,
+        use_cache: bool = True,
+    ):
+        """Initialize the generator.
+        Args:
+            fetcher: Source fetcher instance.
+            parser: Changelog parser instance.
+            cache: Knowledge cache instance.
+            use_cache: Whether to use caching.
+        """
+        self.fetcher = fetcher or get_source_fetcher()
+        self.parser = parser or get_changelog_parser()
+        self.cache = cache or get_knowledge_cache() if use_cache else None
+        self.use_cache = use_cache
+    def generate(
+        self,
+        package: str,
+        old_version: str,
+        new_version: str,
+        progress_callback: Callable[[str], None] | None = None,
+    ) -> GeneratedKnowledgeBase:
+        """Generate a knowledge base for a package migration.
+        Args:
+            package: Package name.
+            old_version: Starting version.
+            new_version: Target version.
+            progress_callback: Optional callback for progress updates.
+        Returns:
+            GeneratedKnowledgeBase with detected breaking changes.
+        """
+        def report(msg: str) -> None:
+            if progress_callback:
+                progress_callback(msg)
+        # Check cache first
+        if self.use_cache and self.cache:
+            cached = self.cache.get(package, old_version, new_version)
+            if cached:
+                report("Using cached knowledge base")
+                return cached
+        report("Fetching changelog sources...")
+        # Discover sources
+        sources = self.fetcher.discover_sources_sync(package, new_version)
+        if not sources:
+            report("No changelog sources found")
+            return GeneratedKnowledgeBase(
+                package=package,
+                old_version=old_version,
+                new_version=new_version,
+                overall_confidence=Confidence.LOW,
+            )
+        source_urls = [s.url for s in sources]
+        report(f"Found {len(sources)} source(s)")
+        # Extract version-specific content from changelogs
+        for source in sources:
+            if source.source_type == "changelog":
+                source.content = self.fetcher.extract_version_changelog(
+                    source.content,
+                    old_version,
+                    new_version,
+                )
+        # Parse sources with LLM
+        breaking_changes: list[BreakingChange] = []
+        if self.parser.is_available:
+            report("Parsing changelog with LLM...")
+            breaking_changes = self.parser.parse_multiple_sources(
+                sources,
+                package,
+                old_version,
+                new_version,
+            )
+            report(f"Found {len(breaking_changes)} breaking change(s)")
+        else:
+            report("LLM not available - skipping changelog parsing")
+        # Determine overall confidence
+        overall_confidence = self._calculate_overall_confidence(breaking_changes, sources)
+        # Create knowledge base
+        kb = GeneratedKnowledgeBase(
+            package=package,
+            old_version=old_version,
+            new_version=new_version,
+            breaking_changes=breaking_changes,
+            sources=source_urls,
+            overall_confidence=overall_confidence,
+        )
+        # Cache result
+        if self.use_cache and self.cache:
+            self.cache.set(kb)
+            report("Cached knowledge base")
+        return kb
+    def _calculate_overall_confidence(
+        self,
+        changes: list[BreakingChange],
+        sources: list,
+    ) -> Confidence:
+        """Calculate overall confidence based on changes and sources.
+        Args:
+            changes: List of breaking changes.
+            sources: List of sources used.
+        Returns:
+            Overall confidence level.
+        """
+        if not changes:
+            return Confidence.LOW
+        # Check if we have migration guide (high confidence source)
+        has_migration_guide = any(s.source_type == "migration_guide" for s in sources)
+        if has_migration_guide:
+            return Confidence.HIGH
+        # Count confidence levels
+        high_count = sum(1 for c in changes if c.confidence == Confidence.HIGH)
+        medium_count = sum(1 for c in changes if c.confidence == Confidence.MEDIUM)
+        if high_count >= len(changes) / 2:
+            return Confidence.HIGH
+        elif medium_count + high_count >= len(changes) / 2:
+            return Confidence.MEDIUM
+        return Confidence.LOW
+# Tier 1 libraries with deterministic AST transforms
+TIER_1_LIBRARIES = {"pydantic", "fastapi", "sqlalchemy", "pandas", "requests"}
+def is_tier_1_library(library: str) -> bool:
+    """Check if a library is Tier 1 (has deterministic transforms).
+    Args:
+        library: Library name.
+    Returns:
+        True if Tier 1.
+    """
+    return library.lower() in TIER_1_LIBRARIES
+async def generate_knowledge_base(
+    package: str,
+    old_version: str,
+    new_version: str,
+    progress_callback: Callable[[str], None] | None = None,
+) -> GeneratedKnowledgeBase:
+    """Async interface for generating knowledge base.
+    Args:
+        package: Package name.
+        old_version: Starting version.
+        new_version: Target version.
+        progress_callback: Optional callback for progress updates.
+    Returns:
+        GeneratedKnowledgeBase with detected breaking changes.
+    """
+    generator = KnowledgeGenerator()
+    return generator.generate(package, old_version, new_version, progress_callback)
+def generate_knowledge_base_sync(
+    package: str,
+    old_version: str,
+    new_version: str,
+    progress_callback: Callable[[str], None] | None = None,
+) -> GeneratedKnowledgeBase:
+    """Synchronous interface for generating knowledge base.
+    Args:
+        package: Package name.
+        old_version: Starting version.
+        new_version: Target version.
+        progress_callback: Optional callback for progress updates.
+    Returns:
+        GeneratedKnowledgeBase with detected breaking changes.
+    """
+    generator = KnowledgeGenerator()
+    return generator.generate(package, old_version, new_version, progress_callback)
+# Singleton instance
+_default_generator: KnowledgeGenerator | None = None
+def get_knowledge_generator() -> KnowledgeGenerator:
+    """Get the default knowledge generator instance."""
+    global _default_generator
+    if _default_generator is None:
+        _default_generator = KnowledgeGenerator()
+    return _default_generator

codeshift/knowledge/models.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""Data models for auto-generated knowledge bases."""
+from dataclasses import dataclass, field
+from enum import Enum
+class ChangeCategory(Enum):
+    """Categories of breaking changes."""
+    REMOVED = "removed"
+    RENAMED = "renamed"
+    SIGNATURE_CHANGED = "signature_changed"
+    BEHAVIOR_CHANGED = "behavior_changed"
+class Confidence(Enum):
+    """Confidence levels for detected changes."""
+    HIGH = "high"  # From migration guide or explicit changelog
+    MEDIUM = "medium"  # From changelog parsing
+    LOW = "low"  # From AST diff only
+    def __ge__(self, other: "Confidence") -> bool:
+        order = {Confidence.LOW: 0, Confidence.MEDIUM: 1, Confidence.HIGH: 2}
+        return order[self] >= order[other]
+    def __gt__(self, other: "Confidence") -> bool:
+        order = {Confidence.LOW: 0, Confidence.MEDIUM: 1, Confidence.HIGH: 2}
+        return order[self] > order[other]
+    def __le__(self, other: "Confidence") -> bool:
+        order = {Confidence.LOW: 0, Confidence.MEDIUM: 1, Confidence.HIGH: 2}
+        return order[self] <= order[other]
+    def __lt__(self, other: "Confidence") -> bool:
+        order = {Confidence.LOW: 0, Confidence.MEDIUM: 1, Confidence.HIGH: 2}
+        return order[self] < order[other]
+@dataclass
+class BreakingChange:
+    """Represents a single breaking change detected from sources."""
+    category: ChangeCategory
+    old_api: str
+    new_api: str | None
+    description: str
+    confidence: Confidence
+    source: str | None = None  # Where this change was detected from
+    def to_dict(self) -> dict:
+        """Convert to dictionary for serialization."""
+        return {
+            "category": self.category.value,
+            "old_api": self.old_api,
+            "new_api": self.new_api,
+            "description": self.description,
+            "confidence": self.confidence.value,
+            "source": self.source,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "BreakingChange":
+        """Create from dictionary."""
+        return cls(
+            category=ChangeCategory(data["category"]),
+            old_api=data["old_api"],
+            new_api=data.get("new_api"),
+            description=data["description"],
+            confidence=Confidence(data["confidence"]),
+            source=data.get("source"),
+        )
+@dataclass
+class ChangelogSource:
+    """Represents a source of changelog information."""
+    url: str
+    source_type: str  # "changelog", "migration_guide", "release_notes"
+    content: str
+    version_range: tuple[str, str] | None = None  # (from_version, to_version)
+    def to_dict(self) -> dict:
+        """Convert to dictionary for serialization."""
+        return {
+            "url": self.url,
+            "source_type": self.source_type,
+            "content": self.content,
+            "version_range": self.version_range,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "ChangelogSource":
+        """Create from dictionary."""
+        return cls(
+            url=data["url"],
+            source_type=data["source_type"],
+            content=data["content"],
+            version_range=tuple(data["version_range"]) if data.get("version_range") else None,
+        )
+@dataclass
+class GeneratedKnowledgeBase:
+    """Auto-generated knowledge base from changelogs and API diffs."""
+    package: str
+    old_version: str
+    new_version: str
+    breaking_changes: list[BreakingChange] = field(default_factory=list)
+    sources: list[str] = field(default_factory=list)  # URLs of sources used
+    overall_confidence: Confidence = Confidence.LOW
+    @property
+    def has_changes(self) -> bool:
+        """Check if there are any breaking changes."""
+        return len(self.breaking_changes) > 0
+    def get_changes_by_confidence(self, min_confidence: Confidence) -> list[BreakingChange]:
+        """Get changes with at least the specified confidence level."""
+        return [c for c in self.breaking_changes if c.confidence >= min_confidence]
+    def get_changes_by_category(self, category: ChangeCategory) -> list[BreakingChange]:
+        """Get changes of a specific category."""
+        return [c for c in self.breaking_changes if c.category == category]
+    def to_dict(self) -> dict:
+        """Convert to dictionary for serialization."""
+        return {
+            "package": self.package,
+            "old_version": self.old_version,
+            "new_version": self.new_version,
+            "breaking_changes": [c.to_dict() for c in self.breaking_changes],
+            "sources": self.sources,
+            "overall_confidence": self.overall_confidence.value,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "GeneratedKnowledgeBase":
+        """Create from dictionary."""
+        return cls(
+            package=data["package"],
+            old_version=data["old_version"],
+            new_version=data["new_version"],
+            breaking_changes=[
+                BreakingChange.from_dict(c) for c in data.get("breaking_changes", [])
+            ],
+            sources=data.get("sources", []),
+            overall_confidence=Confidence(data.get("overall_confidence", "low")),
+        )

codeshift/knowledge/parser.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""LLM-based changelog parser for extracting breaking changes."""
+import json
+import re
+from typing import cast
+from codeshift.knowledge.models import (
+    BreakingChange,
+    ChangeCategory,
+    ChangelogSource,
+    Confidence,
+)
+from codeshift.utils.llm_client import LLMClient, get_llm_client
+class ChangelogParser:
+    """Parses changelog content using LLM to extract breaking changes."""
+    SYSTEM_PROMPT = """You are an expert at analyzing Python library changelogs and migration guides.
+Your task is to extract breaking changes from the provided changelog content.
+For each breaking change, identify:
+1. category: One of "removed", "renamed", "signature_changed", "behavior_changed"
+2. old_api: The old API that is affected (function name, class name, parameter, etc.)
+3. new_api: The new API to use instead (if applicable, null otherwise)
+4. description: A brief description of the change
+Focus only on BREAKING changes that would require code modifications.
+Do not include new features, bug fixes, or deprecation warnings unless they affect existing code.
+Respond with a JSON array of breaking changes. Example:
+[
+  {
+    "category": "renamed",
+    "old_api": ".dict()",
+    "new_api": ".model_dump()",
+    "description": "The .dict() method has been renamed to .model_dump()"
+  },
+  {
+    "category": "removed",
+    "old_api": "parse_obj()",
+    "new_api": "model_validate()",
+    "description": "parse_obj() has been removed, use model_validate() instead"
+  }
+]
+If there are no breaking changes, respond with an empty array: []"""
+    def __init__(self, client: LLMClient | None = None):
+        """Initialize the parser.
+        Args:
+            client: LLM client to use. Defaults to singleton.
+        """
+        self.client = client or get_llm_client()
+    @property
+    def is_available(self) -> bool:
+        """Check if the parser is available (LLM client configured)."""
+        return self.client.is_available
+    def parse_changelog(
+        self,
+        source: ChangelogSource,
+        package: str,
+        from_version: str,
+        to_version: str,
+    ) -> list[BreakingChange]:
+        """Parse a changelog source to extract breaking changes.
+        Args:
+            source: The changelog source to parse.
+            package: Package name.
+            from_version: Starting version.
+            to_version: Target version.
+        Returns:
+            List of detected breaking changes.
+        """
+        if not self.is_available:
+            return []
+        # Truncate content if too long
+        content = source.content
+        max_length = 15000  # Leave room for prompts and response
+        if len(content) > max_length:
+            content = content[:max_length] + "\n\n[Content truncated...]"
+        prompt = f"""Analyze the following {source.source_type} for the Python package "{package}".
+Extract all breaking changes between version {from_version} and {to_version}.
+{source.source_type.upper()} CONTENT:
+```
+{content}
+```
+Extract breaking changes as a JSON array:"""
+        response = self.client.generate(
+            prompt=prompt,
+            system_prompt=self.SYSTEM_PROMPT,
+            temperature=0.0,
+        )
+        if not response.success:
+            return []
+        return self._parse_response(response.content, source)
+    def parse_multiple_sources(
+        self,
+        sources: list[ChangelogSource],
+        package: str,
+        from_version: str,
+        to_version: str,
+    ) -> list[BreakingChange]:
+        """Parse multiple changelog sources and merge results.
+        Args:
+            sources: List of changelog sources to parse.
+            package: Package name.
+            from_version: Starting version.
+            to_version: Target version.
+        Returns:
+            Merged list of breaking changes (duplicates removed).
+        """
+        all_changes: list[BreakingChange] = []
+        seen_apis: set[str] = set()
+        for source in sources:
+            changes = self.parse_changelog(source, package, from_version, to_version)
+            for change in changes:
+                # Deduplicate by old_api
+                if change.old_api not in seen_apis:
+                    seen_apis.add(change.old_api)
+                    all_changes.append(change)
+                else:
+                    # Update confidence if we find the same change in a better source
+                    for existing in all_changes:
+                        if existing.old_api == change.old_api:
+                            if change.confidence > existing.confidence:
+                                existing.confidence = change.confidence
+                                existing.source = change.source
+                            break
+        return all_changes
+    def _parse_response(
+        self,
+        content: str,
+        source: ChangelogSource,
+    ) -> list[BreakingChange]:
+        """Parse LLM response into BreakingChange objects.
+        Args:
+            content: Raw LLM response.
+            source: The source this was parsed from.
+        Returns:
+            List of BreakingChange objects.
+        """
+        # Extract JSON from response
+        json_content = self._extract_json(content)
+        if not json_content:
+            return []
+        try:
+            data = json.loads(json_content)
+            if not isinstance(data, list):
+                return []
+            # Determine confidence based on source type
+            confidence = self._get_source_confidence(source.source_type)
+            changes = []
+            for item in data:
+                if not isinstance(item, dict):
+                    continue
+                try:
+                    category = ChangeCategory(item.get("category", "behavior_changed"))
+                except ValueError:
+                    category = ChangeCategory.BEHAVIOR_CHANGED
+                changes.append(
+                    BreakingChange(
+                        category=category,
+                        old_api=item.get("old_api", ""),
+                        new_api=item.get("new_api"),
+                        description=item.get("description", ""),
+                        confidence=confidence,
+                        source=source.url,
+                    )
+                )
+            return changes
+        except json.JSONDecodeError:
+            return []
+    def _extract_json(self, content: str) -> str | None:
+        """Extract JSON array from LLM response.
+        Args:
+            content: Raw LLM response.
+        Returns:
+            JSON string or None.
+        """
+        # Try to find JSON array in response
+        content = content.strip()
+        # Try direct parse first
+        if content.startswith("["):
+            # Find matching closing bracket
+            bracket_count = 0
+            for i, char in enumerate(content):
+                if char == "[":
+                    bracket_count += 1
+                elif char == "]":
+                    bracket_count -= 1
+                    if bracket_count == 0:
+                        return content[: i + 1]
+        # Try to find JSON in code blocks
+        code_block_pattern = r"```(?:json)?\s*([\s\S]*?)```"
+        matches = re.findall(code_block_pattern, content)
+        for match in matches:
+            match_str = cast(str, match).strip()
+            if match_str.startswith("["):
+                return match_str
+        # Try to find bare JSON array
+        array_pattern = r"\[[\s\S]*?\]"
+        matches = re.findall(array_pattern, content)
+        if matches:
+            # Return the longest match (likely the full array)
+            return cast(str, max(matches, key=len))
+        return None
+    def _get_source_confidence(self, source_type: str) -> Confidence:
+        """Get confidence level based on source type.
+        Args:
+            source_type: Type of source.
+        Returns:
+            Confidence level.
+        """
+        confidence_map = {
+            "migration_guide": Confidence.HIGH,
+            "release_notes": Confidence.HIGH,
+            "changelog": Confidence.MEDIUM,
+        }
+        return confidence_map.get(source_type, Confidence.LOW)
+# Singleton instance
+_default_parser: ChangelogParser | None = None
+def get_changelog_parser() -> ChangelogParser:
+    """Get the default changelog parser instance."""
+    global _default_parser
+    if _default_parser is None:
+        _default_parser = ChangelogParser()
+    return _default_parser