PyPI - codeshift - Versions diffs - 0.4.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

codeshift 0.4.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

codeshift/__init__.py +1 -1
codeshift/cli/commands/auth.py +41 -25
codeshift/cli/commands/health.py +244 -0
codeshift/cli/commands/upgrade.py +68 -55
codeshift/cli/main.py +2 -0
codeshift/health/__init__.py +50 -0
codeshift/health/calculator.py +217 -0
codeshift/health/metrics/__init__.py +63 -0
codeshift/health/metrics/documentation.py +209 -0
codeshift/health/metrics/freshness.py +180 -0
codeshift/health/metrics/migration_readiness.py +142 -0
codeshift/health/metrics/security.py +225 -0
codeshift/health/metrics/test_coverage.py +191 -0
codeshift/health/models.py +284 -0
codeshift/health/report.py +310 -0
codeshift/knowledge/generator.py +6 -0
codeshift/knowledge_base/libraries/aiohttp.yaml +3 -3
codeshift/knowledge_base/libraries/httpx.yaml +4 -4
codeshift/knowledge_base/libraries/pytest.yaml +1 -1
codeshift/knowledge_base/models.py +1 -0
codeshift/migrator/transforms/marshmallow_transformer.py +50 -0
codeshift/migrator/transforms/pydantic_v1_to_v2.py +191 -22
codeshift/scanner/code_scanner.py +22 -2
codeshift/utils/api_client.py +144 -4
codeshift/utils/credential_store.py +393 -0
codeshift/utils/llm_client.py +111 -9
{codeshift-0.4.0.dist-info → codeshift-0.7.0.dist-info}/METADATA +4 -1
{codeshift-0.4.0.dist-info → codeshift-0.7.0.dist-info}/RECORD +32 -20
{codeshift-0.4.0.dist-info → codeshift-0.7.0.dist-info}/WHEEL +0 -0
{codeshift-0.4.0.dist-info → codeshift-0.7.0.dist-info}/entry_points.txt +0 -0
{codeshift-0.4.0.dist-info → codeshift-0.7.0.dist-info}/licenses/LICENSE +0 -0
{codeshift-0.4.0.dist-info → codeshift-0.7.0.dist-info}/top_level.txt +0 -0

codeshift/migrator/transforms/marshmallow_transformer.py CHANGED Viewed

@@ -191,6 +191,10 @@ class MarshmallowTransformer(BaseTransformer):
         - default -> dump_default
         - load_from -> data_key
         - dump_to -> data_key
+        Special handling: When both load_from and dump_to are present, only one data_key
+        is kept (preferring load_from) and a warning comment is added about the removed
+        dump_to value.
         """
         # Check if this is a fields.* call or a Field-like call
         func_name = self._get_call_func_name(node.func)
@@ -232,6 +236,27 @@ class MarshmallowTransformer(BaseTransformer):
         if func_name not in field_types:
             return node
+        # First pass: detect if both load_from and dump_to are present
+        load_from_arg = None
+        dump_to_arg = None
+        load_from_value = None
+        dump_to_value = None
+        for arg in node.args:
+            if isinstance(arg.keyword, cst.Name):
+                if arg.keyword.value == "load_from":
+                    load_from_arg = arg
+                    # Extract the value for comparison/warning
+                    if isinstance(arg.value, cst.SimpleString):
+                        load_from_value = arg.value.value
+                elif arg.keyword.value == "dump_to":
+                    dump_to_arg = arg
+                    # Extract the value for comparison/warning
+                    if isinstance(arg.value, cst.SimpleString):
+                        dump_to_value = arg.value.value
+        has_both_load_from_and_dump_to = load_from_arg is not None and dump_to_arg is not None
         new_args = []
         changed = False
         param_mappings = {
@@ -245,6 +270,31 @@ class MarshmallowTransformer(BaseTransformer):
             if isinstance(arg.keyword, cst.Name) and arg.keyword.value in param_mappings:
                 old_name = arg.keyword.value
                 new_name = param_mappings[old_name]
+                # Special case: skip dump_to when both load_from and dump_to exist
+                if old_name == "dump_to" and has_both_load_from_and_dump_to:
+                    changed = True
+                    # Record that dump_to was removed due to conflict
+                    self.record_change(
+                        description=(
+                            f"Remove '{old_name}' parameter - Marshmallow 3.x uses single "
+                            f"data_key for both load/dump. load_from value kept, dump_to="
+                            f"{dump_to_value} removed. Manual review may be needed if "
+                            f"load_from ({load_from_value}) != dump_to ({dump_to_value})."
+                        ),
+                        line_number=1,
+                        original=f"{func_name}(load_from=..., dump_to=...)",
+                        replacement=f"{func_name}(data_key=...)",
+                        transform_name="remove_dump_to_conflict",
+                        notes=(
+                            f"dump_to={dump_to_value} was removed because load_from="
+                            f"{load_from_value} was also present. In Marshmallow 3.x, "
+                            "data_key serves both purposes."
+                        ),
+                    )
+                    # Skip adding this arg
+                    continue
                 new_arg = arg.with_changes(keyword=cst.Name(new_name))
                 new_args.append(new_arg)
                 changed = True

codeshift/migrator/transforms/pydantic_v1_to_v2.py CHANGED Viewed

@@ -24,12 +24,151 @@ class PydanticV1ToV2Transformer(BaseTransformer):
         self._current_class: str | None = None
         # Track position info
         self._line_offset = 0
+        # Track Pydantic model classes defined in this file
+        self._pydantic_model_classes: set[str] = set()
+        # Track variables known to be Pydantic model instances
+        self._pydantic_instance_vars: set[str] = set()
+        # Track function parameters with Pydantic model type hints
+        self._pydantic_param_vars: set[str] = set()
+        # Track if BaseModel is imported from pydantic
+        self._has_basemodel_import = False
+    def visit_ImportFrom(self, node: cst.ImportFrom) -> bool:
+        """Track Pydantic imports to identify model base classes."""
+        if node.module is None:
+            return True
+        module_name = self._get_module_name(node.module)
+        if module_name == "pydantic" or module_name.startswith("pydantic."):
+            if isinstance(node.names, cst.ImportStar):
+                # With star import, assume BaseModel is available
+                self._has_basemodel_import = True
+            elif isinstance(node.names, tuple):
+                for name in node.names:
+                    if isinstance(name, cst.ImportAlias):
+                        imported_name = self._get_name_value(name.name)
+                        if imported_name == "BaseModel":
+                            self._has_basemodel_import = True
+        return True
     def visit_ClassDef(self, node: cst.ClassDef) -> bool:
-        """Track the current class being visited."""
+        """Track the current class being visited and detect Pydantic models."""
         self._current_class = node.name.value
+        # Check if this class inherits from BaseModel or another known Pydantic model
+        for base in node.bases:
+            base_name = self._get_base_class_name(base.value)
+            if (
+                base_name in ("BaseModel", "pydantic.BaseModel")
+                or base_name in self._pydantic_model_classes
+            ):
+                self._pydantic_model_classes.add(node.name.value)
+                break
+        return True
+    def _get_base_class_name(self, node: cst.BaseExpression) -> str:
+        """Get the name of a base class from its AST node."""
+        if isinstance(node, cst.Name):
+            return node.value
+        if isinstance(node, cst.Attribute):
+            return f"{self._get_base_class_name(node.value)}.{node.attr.value}"
+        if isinstance(node, cst.Subscript):
+            # Handle Generic[T] style - get the base
+            return self._get_base_class_name(node.value)
+        return ""
+    def visit_Assign(self, node: cst.Assign) -> bool:
+        """Track assignments of Pydantic model instances to variables."""
+        # Check if the value is a call to a Pydantic model class
+        if isinstance(node.value, cst.Call):
+            class_name = self._get_call_func_name(node.value.func)
+            if class_name in self._pydantic_model_classes:
+                # Track all assigned variable names
+                for target in node.targets:
+                    if isinstance(target.target, cst.Name):
+                        self._pydantic_instance_vars.add(target.target.value)
+        return True
+    def visit_AnnAssign(self, node: cst.AnnAssign) -> bool:
+        """Track annotated assignments with Pydantic model type hints."""
+        if isinstance(node.target, cst.Name):
+            type_name = self._get_annotation_name(node.annotation.annotation)
+            if type_name in self._pydantic_model_classes:
+                self._pydantic_instance_vars.add(node.target.value)
+        return True
+    def visit_FunctionDef(self, node: cst.FunctionDef) -> bool:
+        """Track function parameters with Pydantic model type annotations."""
+        for param in node.params.params:
+            if param.annotation is not None:
+                type_name = self._get_annotation_name(param.annotation.annotation)
+                if type_name in self._pydantic_model_classes:
+                    self._pydantic_param_vars.add(param.name.value)
         return True
+    def leave_FunctionDef_params(self, node: cst.FunctionDef) -> None:
+        """Clear function-scoped parameter tracking when leaving function."""
+        # Note: This is a simplified approach - ideally we'd use proper scope analysis
+        pass
+    def _get_call_func_name(self, node: cst.BaseExpression) -> str:
+        """Get the function/class name from a Call's func attribute."""
+        if isinstance(node, cst.Name):
+            return node.value
+        if isinstance(node, cst.Attribute):
+            return node.attr.value  # Return just the class name part
+        return ""
+    def _get_annotation_name(self, node: cst.BaseExpression) -> str:
+        """Extract the type name from a type annotation."""
+        if isinstance(node, cst.Name):
+            return node.value
+        if isinstance(node, cst.Attribute):
+            return node.attr.value  # Return just the class name part
+        if isinstance(node, cst.Subscript):
+            # Handle Optional[Model], List[Model], etc.
+            return self._get_annotation_name(node.value)
+        return ""
+    def _is_pydantic_instance(self, node: cst.BaseExpression) -> bool:
+        """Check if an expression is known to be a Pydantic model instance.
+        Returns True if we can confirm it's a Pydantic instance.
+        Returns False if we cannot confirm (either unknown or definitely not Pydantic).
+        """
+        if isinstance(node, cst.Name):
+            var_name = node.value
+            # Check if it's a known Pydantic instance variable
+            if var_name in self._pydantic_instance_vars:
+                return True
+            # Check if it's a function parameter with Pydantic type hint
+            if var_name in self._pydantic_param_vars:
+                return True
+            # Heuristic: variable name matches a model class name (case-insensitive)
+            for model_class in self._pydantic_model_classes:
+                if var_name.lower() == model_class.lower():
+                    return True
+            return False
+        if isinstance(node, cst.Call):
+            # Direct call like Model().json() - check if the function is a Pydantic class
+            func_name = self._get_call_func_name(node.func)
+            return func_name in self._pydantic_model_classes
+        if isinstance(node, cst.Attribute):
+            # Could be accessing an attribute that returns a Pydantic model
+            # This is harder to determine without full type analysis
+            return False
+        return False
+    def _is_class_method_call(self, node: cst.BaseExpression) -> bool:
+        """Check if this is a call on a class rather than an instance (e.g., Model.parse_obj).
+        Class methods like parse_obj, schema, etc. are called on the class itself.
+        """
+        if isinstance(node, cst.Name):
+            # Check if the name is a known Pydantic model class
+            return node.value in self._pydantic_model_classes
+        return False
     def leave_ClassDef(
         self, original_node: cst.ClassDef, updated_node: cst.ClassDef
     ) -> cst.ClassDef:
@@ -372,11 +511,17 @@ class PydanticV1ToV2Transformer(BaseTransformer):
         # Handle method calls on objects
         if isinstance(updated_node.func, cst.Attribute):
             method_name = updated_node.func.attr.value
+            obj = updated_node.func.value
-            method_mappings = {
+            # Methods that can only be called on instances
+            instance_method_mappings = {
                 "dict": "model_dump",
                 "json": "model_dump_json",
                 "copy": "model_copy",
+            }
+            # Methods that are typically called on the class (class methods)
+            class_method_mappings = {
                 "parse_obj": "model_validate",
                 "parse_raw": "model_validate_json",
                 "schema": "model_json_schema",
@@ -384,19 +529,40 @@ class PydanticV1ToV2Transformer(BaseTransformer):
                 "update_forward_refs": "model_rebuild",
             }
-            if method_name in method_mappings:
-                new_method = method_mappings[method_name]
-                new_attr = updated_node.func.with_changes(attr=cst.Name(new_method))
+            # Handle instance methods - need to verify the object is a Pydantic instance
+            if method_name in instance_method_mappings:
+                # Only transform if we can confirm this is a Pydantic model instance
+                if self._is_pydantic_instance(obj):
+                    new_method = instance_method_mappings[method_name]
+                    new_attr = updated_node.func.with_changes(attr=cst.Name(new_method))
-                self.record_change(
-                    description=f"Convert .{method_name}() to .{new_method}()",
-                    line_number=1,
-                    original=f".{method_name}()",
-                    replacement=f".{new_method}()",
-                    transform_name=f"{method_name}_to_{new_method}",
-                )
+                    self.record_change(
+                        description=f"Convert .{method_name}() to .{new_method}()",
+                        line_number=1,
+                        original=f".{method_name}()",
+                        replacement=f".{new_method}()",
+                        transform_name=f"{method_name}_to_{new_method}",
+                    )
+                    return updated_node.with_changes(func=new_attr)
+                # If we can't confirm it's a Pydantic instance, skip transformation
+                # This prevents false positives like response.json() on requests.Response
-                return updated_node.with_changes(func=new_attr)
+            # Handle class methods - verify the object is a Pydantic model class
+            if method_name in class_method_mappings:
+                if self._is_class_method_call(obj):
+                    new_method = class_method_mappings[method_name]
+                    new_attr = updated_node.func.with_changes(attr=cst.Name(new_method))
+                    self.record_change(
+                        description=f"Convert .{method_name}() to .{new_method}()",
+                        line_number=1,
+                        original=f".{method_name}()",
+                        replacement=f".{new_method}()",
+                        transform_name=f"{method_name}_to_{new_method}",
+                    )
+                    return updated_node.with_changes(func=new_attr)
         # Handle Field(regex=...) -> Field(pattern=...)
         if isinstance(updated_node.func, cst.Name) and updated_node.func.value == "Field":
@@ -461,17 +627,20 @@ class PydanticV1ToV2Transformer(BaseTransformer):
         }
         if attr_name in attr_mappings:
-            new_attr = attr_mappings[attr_name]
+            # Only transform if the object is a known Pydantic model class
+            obj = updated_node.value
+            if self._is_class_method_call(obj):
+                new_attr = attr_mappings[attr_name]
-            self.record_change(
-                description=f"Convert {attr_name} to {new_attr}",
-                line_number=1,
-                original=attr_name,
-                replacement=new_attr,
-                transform_name=f"{attr_name}_rename",
-            )
+                self.record_change(
+                    description=f"Convert {attr_name} to {new_attr}",
+                    line_number=1,
+                    original=attr_name,
+                    replacement=new_attr,
+                    transform_name=f"{attr_name}_rename",
+                )
-            return updated_node.with_changes(attr=cst.Name(new_attr))
+                return updated_node.with_changes(attr=cst.Name(new_attr))
         return updated_node

codeshift/scanner/code_scanner.py CHANGED Viewed

@@ -6,6 +6,17 @@ from pathlib import Path
 import libcst as cst
 from libcst.metadata import MetadataWrapper, PositionProvider
+# Mapping of package names to their actual import names
+# Some packages have different import names than their package names
+PACKAGE_IMPORT_ALIASES: dict[str, list[str]] = {
+    "attrs": ["attr", "attrs"],  # attrs package can be imported as "attr" or "attrs"
+    "pillow": ["PIL"],  # pillow package is imported as PIL
+    "scikit-learn": ["sklearn"],  # scikit-learn is imported as sklearn
+    "beautifulsoup4": ["bs4"],  # beautifulsoup4 is imported as bs4
+    "pyyaml": ["yaml"],  # pyyaml is imported as yaml
+    "python-dateutil": ["dateutil"],  # python-dateutil is imported as dateutil
+}
 @dataclass
 class ImportInfo:
@@ -53,15 +64,24 @@ class ImportVisitor(cst.CSTVisitor):
     def __init__(self, target_library: str):
         self.target_library = target_library
+        # Get all possible import names for this library
+        self.import_names = PACKAGE_IMPORT_ALIASES.get(target_library.lower(), [target_library])
         self.imports: list[ImportInfo] = []
         self._imported_names: set[str] = set()
+    def _matches_target_library(self, module_name: str) -> bool:
+        """Check if a module name matches the target library or its aliases."""
+        for import_name in self.import_names:
+            if module_name == import_name or module_name.startswith(f"{import_name}."):
+                return True
+        return False
     def visit_Import(self, node: cst.Import) -> None:
         """Visit import statements like 'import pydantic'."""
         for name in node.names if isinstance(node.names, tuple) else []:
             if isinstance(name, cst.ImportAlias):
                 module_name = self._get_name_value(name.name)
-                if module_name and module_name.startswith(self.target_library):
+                if module_name and self._matches_target_library(module_name):
                     alias = None
                     if name.asname and isinstance(name.asname, cst.AsName):
                         alias = self._get_name_value(name.asname.name)
@@ -84,7 +104,7 @@ class ImportVisitor(cst.CSTVisitor):
             return
         module_name = self._get_name_value(node.module)
-        if not module_name or not module_name.startswith(self.target_library):
+        if not module_name or not self._matches_target_library(module_name):
             return
         names = []

codeshift/utils/api_client.py CHANGED Viewed

@@ -4,12 +4,109 @@ This client calls the Codeshift API instead of Anthropic directly,
 ensuring that LLM features are gated behind the subscription model.
 """
+import logging
 from dataclasses import dataclass
+from urllib.parse import urlparse
 import httpx
 from codeshift.cli.commands.auth import get_api_key, get_api_url
+logger = logging.getLogger(__name__)
+class InsecureURLError(Exception):
+    """Raised when an insecure (non-HTTPS) URL is used for API communication.
+    HTTPS is required to protect API keys and sensitive data in transit.
+    Man-in-the-middle attacks could intercept API keys if HTTP is used.
+    """
+    def __init__(self, url: str, message: str | None = None):
+        self.url = url
+        default_msg = (
+            f"Insecure URL detected: {url}. "
+            "HTTPS is required for API communication to protect your API key. "
+            "Use HTTPS or set CODESHIFT_ALLOW_INSECURE=true for local development only."
+        )
+        super().__init__(message or default_msg)
+def validate_api_url(url: str) -> str:
+    """Validate and normalize the API URL.
+    Enforces HTTPS for all non-localhost hosts to prevent API key interception.
+    Args:
+        url: The API URL to validate
+    Returns:
+        The validated and normalized URL
+    Raises:
+        InsecureURLError: If the URL uses HTTP for a non-localhost host
+        ValueError: If the URL is malformed
+    """
+    if not url:
+        raise ValueError("API URL cannot be empty")
+    # Parse the URL
+    try:
+        parsed = urlparse(url)
+    except Exception as e:
+        raise ValueError(f"Malformed URL: {url}") from e
+    if not parsed.scheme:
+        raise ValueError(f"URL must include a scheme (http/https): {url}")
+    if not parsed.netloc:
+        raise ValueError(f"URL must include a host: {url}")
+    # Define localhost patterns
+    localhost_patterns = (
+        "localhost",
+        "127.0.0.1",
+        "::1",
+        "0.0.0.0",
+    )
+    host = parsed.hostname or ""
+    is_localhost = any(
+        host == pattern or host.startswith(f"{pattern}:") for pattern in localhost_patterns
+    )
+    # Allow HTTP only for localhost (development)
+    if parsed.scheme == "http":
+        # Check for explicit override (development only)
+        import os
+        allow_insecure = os.environ.get("CODESHIFT_ALLOW_INSECURE", "").lower() == "true"
+        if is_localhost:
+            logger.warning(
+                "Using HTTP for localhost development. " "This should not be used in production."
+            )
+        elif allow_insecure:
+            logger.warning(
+                "SECURITY WARNING: CODESHIFT_ALLOW_INSECURE is set. "
+                "HTTP is being used for API communication. "
+                "Your API key may be exposed to network interception. "
+                "This should ONLY be used for local testing."
+            )
+        else:
+            raise InsecureURLError(
+                url,
+                f"HTTP is not allowed for non-localhost hosts: {host}. "
+                "Use HTTPS to protect your API key from interception.",
+            )
+    # Validate HTTPS URLs
+    if parsed.scheme not in ("http", "https"):
+        raise ValueError(f"URL scheme must be http or https, got: {parsed.scheme}")
+    # Remove trailing slash for consistency
+    return url.rstrip("/")
 @dataclass
 class APIResponse:
@@ -30,24 +127,46 @@ class CodeshiftAPIClient:
     - Authentication and authorization
     - Quota checking and billing
     - Server-side Anthropic API calls
+    Security features:
+    - HTTPS enforcement for all non-localhost URLs
+    - API key protection via secure headers
+    - SSL verification enabled by default
     """
     def __init__(
         self,
         api_key: str | None = None,
         api_url: str | None = None,
-        timeout: int = 60,
+        timeout: int = 180,
+        verify_ssl: bool = True,
     ):
         """Initialize the API client.
         Args:
             api_key: Codeshift API key. Defaults to stored credentials.
             api_url: API base URL. Defaults to stored URL.
-            timeout: Request timeout in seconds.
+            timeout: Request timeout in seconds (default 180 for LLM calls).
+            verify_ssl: Whether to verify SSL certificates (default True).
+        Raises:
+            InsecureURLError: If the URL uses HTTP for a non-localhost host.
         """
         self.api_key = api_key or get_api_key()
-        self.api_url = api_url or get_api_url()
+        # Validate and normalize the API URL
+        raw_url = api_url or get_api_url()
+        self.api_url = validate_api_url(raw_url)
         self.timeout = timeout
+        self.verify_ssl = verify_ssl
+        # Log SSL verification status
+        if not verify_ssl:
+            logger.warning(
+                "SSL verification is disabled. "
+                "This exposes the connection to man-in-the-middle attacks."
+            )
     @property
     def is_available(self) -> bool:
@@ -76,9 +195,13 @@ class CodeshiftAPIClient:
         return httpx.post(
             f"{self.api_url}{endpoint}",
-            headers={"X-API-Key": self.api_key},
+            headers={
+                "X-API-Key": self.api_key,
+                "Content-Type": "application/json",
+            },
             json=payload,
             timeout=self.timeout,
+            verify=self.verify_ssl,
         )
     def migrate_code(
@@ -157,6 +280,15 @@ class CodeshiftAPIClient:
                     error="LLM migrations require Pro tier or higher. Run 'codeshift upgrade-plan' to upgrade.",
                 )
+            elif response.status_code == 429:
+                # Rate limited
+                retry_after = response.headers.get("Retry-After", "60")
+                return APIResponse(
+                    success=False,
+                    content=code,
+                    error=f"Rate limited. Please wait {retry_after} seconds before retrying.",
+                )
             elif response.status_code == 503:
                 return APIResponse(
                     success=False,
@@ -233,6 +365,14 @@ class CodeshiftAPIClient:
                     error="This feature requires Pro tier or higher.",
                 )
+            elif response.status_code == 429:
+                retry_after = response.headers.get("Retry-After", "60")
+                return APIResponse(
+                    success=False,
+                    content="",
+                    error=f"Rate limited. Please wait {retry_after} seconds before retrying.",
+                )
             else:
                 return APIResponse(
                     success=False,

codeshift 0.4.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

codeshift 0.4.0py3-none-any.whl → 0.7.0py3-none-any.whl