PyPI - data-contract-validator - Versions diffs - 1.0.3__tar.gz → 1.0.4a0__tar.gz - Mend

data-contract-validator 1.0.3tar.gz → 1.0.4a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{data_contract_validator-1.0.3/data_contract_validator.egg-info → data_contract_validator-1.0.4a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: data-contract-validator
-Version: 1.0.3
+Version: 1.0.4a0
 Summary: Adding pre-commit-fixes
 Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
 Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>

{data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/cli.py RENAMED Viewed

@@ -71,7 +71,7 @@ def init(interactive: bool, framework: str, dbt_path: str, output_dir: str):
 def _interactive_setup() -> Dict[str, Any]:
-    """Interactive setup wizard - 3 simple questions."""
+    """Interactive setup wizard with directory support."""
     click.echo("📋 Quick Setup (3 questions):")
     click.echo()
@@ -100,63 +100,100 @@ def _interactive_setup() -> Dict[str, Any]:
         show_default=True,
     )
-    # Question 3: API models location
+    # Question 3: API models location with directory support
     click.echo()
     if framework == "fastapi":
-        default_path = "app/models.py"
-        prompt_text = "3️⃣  Where are your Pydantic models?"
+        default_path = "app/models"  # Default to directory
+        prompt_text = "3️⃣  Where are your Pydantic models? (file or directory)"
+        help_text = (
+            "   💡 Examples: 'app/models.py' (single file) or 'app/models' (directory)"
+        )
     elif framework == "django":
         default_path = "models.py"
         prompt_text = "3️⃣  Where are your Django models?"
+        help_text = "   💡 Examples: 'myapp/models.py' or 'models'"
     else:
-        default_path = "models.py"
+        default_path = "models"
         prompt_text = "3️⃣  Where are your API models?"
+        help_text = "   💡 Can be a file (models.py) or directory (models/)"
+    click.echo(help_text)
     api_location = click.prompt(prompt_text, default=default_path, show_default=True)
-    # Auto-detect if it's local file or GitHub repo
+    # Auto-detect if it's local file/directory or GitHub repo
     is_github_repo = "/" in api_location and not api_location.startswith((".", "/"))
     if is_github_repo:
-        # Format: "org/repo" or "org/repo/path/to/file.py"
+        # Format: "org/repo" or "org/repo/path/to/models"
         parts = api_location.split("/")
         if len(parts) >= 2:
             repo = "/".join(parts[:2])
-            path = "/".join(parts[2:]) if len(parts) > 2 else "models.py"
+            path = "/".join(parts[2:]) if len(parts) > 2 else "app/models"
         else:
             repo = api_location
-            path = "models.py"
+            path = "app/models"
         api_config = {"type": "github", "repo": repo, "path": path}
         click.echo(f"   🐙 GitHub repo detected: {repo}/{path}")
     else:
         api_config = {"type": "local", "path": api_location}
-        # Check if local file exists
-        if Path(api_location).exists():
-            click.echo("   ✅ Local file found")
+        # Check if local file/directory exists and provide feedback
+        local_path = Path(api_location)
+        if local_path.exists():
+            if local_path.is_file():
+                click.echo(f"   ✅ Local file found: {api_location}")
+            elif local_path.is_dir():
+                # Count Python files in directory
+                py_files = list(local_path.rglob("*.py"))
+                py_files = [
+                    f
+                    for f in py_files
+                    if not f.name.startswith("test_") and f.name != "__init__.py"
+                ]
+                click.echo(
+                    f"   ✅ Local directory found: {api_location} ({len(py_files)} Python files)"
+                )
+            else:
+                click.echo(
+                    f"   ⚠️  Path exists but is neither file nor directory: {api_location}"
+                )
         else:
-            click.echo(f"   ⚠️  File not found: {api_location}")
+            click.echo(f"   ⚠️  Path not found: {api_location}")
             if not click.confirm("   Continue anyway?"):
                 sys.exit(1)
+            # New question about manifest parsing
+    click.echo()
+    disable_manifest = click.confirm(
+        "4️⃣  Disable manifest.json parsing? (recommended if you have CTE-based models)",
+        default=True
+    )
+    if disable_manifest:
+        click.echo("   📄 Will use SQL file parsing (better for complex models)")
+    else:
+        click.echo("   📋 Will try manifest.json first, fallback to SQL parsing")
     return {
         "version": "1.0",
         "name": f"contracts-{Path.cwd().name}",
-        "description": "Auto-generated data contract validation",
         "source": {
-            "dbt": {"project_path": dbt_path, "auto_compile": True, "timeout": 120}
+            "dbt": {
+                "project_path": dbt_path,
+                "auto_compile": True,
+                "disable_manifest": disable_manifest  # NEW
+            }
         },
         "target": {framework: api_config},
         "validation": {
             "fail_on": ["missing_tables", "missing_required_columns"],
-            "warn_on": ["type_mismatches", "missing_optional_columns"],
-            "mode": "strict",
+            "warn_on": ["type_mismatches"]
         },
-        "output": {"format": "terminal", "show_suggestions": True, "max_issues": 20},
     }
 def _quick_setup(framework: str, dbt_path: str) -> Dict[str, Any]:
     """Quick non-interactive setup with smart defaults."""
@@ -454,17 +491,27 @@ def _test_setup(config_file: Path) -> bool:
     "--output", type=click.Choice(["terminal", "json", "github"]), default="terminal"
 )
 @click.option("--dbt-project", help="Override DBT project path")
-@click.option("--fastapi-local", help="Override FastAPI models path")
+@click.option(
+    "--fastapi-local", help="Override FastAPI models path (file or directory)"
+)
+@click.option("--fastapi-directory", help="Override FastAPI models directory path")
 @click.option("--fastapi-repo", help="Override FastAPI repo (org/repo)")
-@click.option("--fastapi-path", default="app/models.py", help="Path in FastAPI repo")
+@click.option(
+    "--fastapi-path",
+    default="app/models",
+    help="Path in FastAPI repo (file or directory)",
+)
+@click.option("--disable-manifest", is_flag=True, help="Force SQL parsing, ignore manifest.json")
 def validate(
     config: str,
     dry_run: bool,
     output: str,
     dbt_project: str,
     fastapi_local: str,
+    fastapi_directory: str,
     fastapi_repo: str,
     fastapi_path: str,
+    disable_manifest: bool,
 ):
     """🔍 Validate data contracts (prevents production breaks)."""
@@ -486,12 +533,20 @@ def validate(
     if dry_run:
         click.echo("🧪 Dry run - testing configuration only")
-        _test_configuration(config_data, dbt_project, fastapi_local, fastapi_repo)
+        _test_configuration(
+            config_data, dbt_project, fastapi_local, fastapi_directory, fastapi_repo, disable_manifest
+        )
         return
     # Run actual validation
     _run_validation(
-        config_data, output, dbt_project, fastapi_local, fastapi_repo, fastapi_path
+        config_data,
+        output,
+        dbt_project,
+        fastapi_local,
+        fastapi_directory,
+        fastapi_repo,
+        fastapi_path,
     )
@@ -530,27 +585,64 @@ def _run_validation(
     fastapi_local: str,
     fastapi_repo: str,
     fastapi_path: str,
+    disable_manifest: bool = False,
 ):
-    """Run the actual validation."""
+    """Run the actual validation with manifest disable option."""
     # Get DBT project path
     dbt_path = dbt_project or config_data.get("source", {}).get("dbt", {}).get(
         "project_path", "."
     )
-    # Initialize DBT extractor
+    # Get disable_manifest from config file OR command line flag
+    config_disable_manifest = config_data.get("source", {}).get("dbt", {}).get("disable_manifest", False)
+    use_disable_manifest = disable_manifest or config_disable_manifest  # CLI flag takes precedence
+    if use_disable_manifest:
+        click.echo("📄 Manifest parsing disabled")
+        if disable_manifest:
+            click.echo("   (via --disable-manifest flag)")
+        else:
+            click.echo("   (via .retl-validator.yml config)")
+    # Initialize DBT extractor with disable_manifest option
     try:
-        dbt_extractor = DBTExtractor(dbt_path)
+        dbt_extractor = DBTExtractor(dbt_path, disable_manifest=use_disable_manifest)
     except Exception as e:
         click.echo(f"❌ Error initializing DBT extractor: {e}")
         sys.exit(1)
-    # Initialize FastAPI extractor
+    # Initialize FastAPI extractor with directory support
     try:
         if fastapi_local:
-            fastapi_extractor = FastAPIExtractor.from_local_file(fastapi_local)
+            # Use local path (file or directory)
+            local_path = fastapi_local
+            # Auto-detect if it's a file or directory
+            path = Path(local_path)
+            if path.is_file():
+                click.echo(f"📄 Using FastAPI models file: {local_path}")
+                fastapi_extractor = FastAPIExtractor.from_local_file(local_path)
+            elif path.is_dir():
+                click.echo(f"📁 Using FastAPI models directory: {local_path}")
+                fastapi_extractor = FastAPIExtractor.from_local_directory(local_path)
+            else:
+                raise ValueError(f"Path does not exist: {local_path}")
         elif fastapi_repo:
+            # Use GitHub repository
             github_token = os.environ.get("GITHUB_TOKEN")
+            # Check if fastapi_path ends with .py (file) or not (directory)
+            if fastapi_path.endswith(".py"):
+                click.echo(
+                    f"📄 Using FastAPI models file: {fastapi_repo}/{fastapi_path}"
+                )
+            else:
+                click.echo(
+                    f"📁 Using FastAPI models directory: {fastapi_repo}/{fastapi_path}"
+                )
             fastapi_extractor = FastAPIExtractor.from_github_repo(
                 repo=fastapi_repo, path=fastapi_path, token=github_token
             )
@@ -558,14 +650,23 @@ def _run_validation(
             # Get from config
             target_config = list(config_data.get("target", {}).values())[0]
             if target_config.get("type") == "local":
-                fastapi_extractor = FastAPIExtractor.from_local_file(
-                    target_config.get("path")
-                )
+                local_path = target_config.get("path")
+                path = Path(local_path)
+                if path.is_file():
+                    fastapi_extractor = FastAPIExtractor.from_local_file(local_path)
+                elif path.is_dir():
+                    fastapi_extractor = FastAPIExtractor.from_local_directory(
+                        local_path
+                    )
+                else:
+                    raise ValueError(f"Path does not exist: {local_path}")
             elif target_config.get("type") == "github":
                 github_token = os.environ.get("GITHUB_TOKEN")
                 fastapi_extractor = FastAPIExtractor.from_github_repo(
                     repo=target_config.get("repo"),
-                    path=target_config.get("path", "app/models.py"),
+                    path=target_config.get("path", "app/models"),
                     token=github_token,
                 )
             else:

{data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/extractors/dbt.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # data_contract_validator/extractors/dbt.py
 """
-DBT schema extractor - simplified version of your working code.
+DBT schema extractor
 """
 import json
@@ -16,11 +16,12 @@ from ..core.models import Schema
 class DBTExtractor(BaseExtractor):
     """Extract schemas from DBT projects."""
-    def __init__(self, project_path: str = "."):
+    def __init__(self, project_path: str = ".", disable_manifest: bool = False):
         self.project_path = Path(project_path)
         self.target_dir = self.project_path / "target"
         self.manifest_path = self.target_dir / "manifest.json"
         self.models_path = self.project_path / "models"
+        self.disable_manifest = disable_manifest
     def extract_schemas(self) -> Dict[str, Schema]:
         """Extract schemas from DBT project."""

data_contract_validator-1.0.4a0/data_contract_validator/extractors/fastapi.py ADDED Viewed

@@ -0,0 +1,439 @@
+# data_contract_validator/extractors/fastapi.py
+"""
+Enhanced FastAPI/Pydantic schema extractor with directory support
+"""
+import ast
+import re
+import requests
+import os
+from pathlib import Path
+from typing import Dict, List, Any, Optional, Union, get_type_hints
+from .base import BaseExtractor
+from ..core.models import Schema
+class FastAPIExtractor(BaseExtractor):
+    """Extract schemas from FastAPI/Pydantic models - supports files and directories."""
+    def __init__(
+        self, content: str = None, source: str = "unknown", file_path: str = None
+    ):
+        self.content = content
+        self.source = source
+        self.file_path = file_path
+        self.all_files_content = {}  # For directory mode
+    @classmethod
+    def from_local_file(cls, file_path: str) -> "FastAPIExtractor":
+        """Create extractor from local file."""
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise ValueError(f"Path does not exist: {file_path}")
+        if file_path.is_file():
+            # Single file mode (existing behavior)
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            return cls(
+                content=content, source=f"local:{file_path}", file_path=str(file_path)
+            )
+        elif file_path.is_dir():
+            # Directory mode (new functionality)
+            return cls._from_local_directory(file_path)
+        else:
+            raise ValueError(f"Path is neither file nor directory: {file_path}")
+    @classmethod
+    def from_local_directory(cls, directory_path: str) -> "FastAPIExtractor":
+        """Create extractor from local directory containing model files."""
+        return cls._from_local_directory(Path(directory_path))
+    @classmethod
+    def _from_local_directory(cls, dir_path: Path) -> "FastAPIExtractor":
+        """Internal method to handle directory extraction."""
+        if not dir_path.is_dir():
+            raise ValueError(f"Not a directory: {dir_path}")
+        # Find all Python files in the directory and subdirectories
+        python_files = list(dir_path.rglob("*.py"))
+        if not python_files:
+            raise ValueError(f"No Python files found in directory: {dir_path}")
+        print(f"🔍 Found {len(python_files)} Python files in {dir_path}")
+        # Read all files
+        all_files_content = {}
+        for py_file in python_files:
+            # Skip common non-model files
+            if py_file.name in [
+                "__init__.py",
+                "test_",
+                "tests.py",
+            ] or py_file.name.startswith("test_"):
+                continue
+            try:
+                with open(py_file, "r", encoding="utf-8") as f:
+                    content = f.read()
+                    relative_path = py_file.relative_to(dir_path)
+                    all_files_content[str(relative_path)] = content
+                    print(f"   📄 Loaded: {relative_path}")
+            except Exception as e:
+                print(f"   ⚠️  Could not read {py_file}: {e}")
+        if not all_files_content:
+            raise ValueError(f"Could not read any Python files from: {dir_path}")
+        # Create extractor instance for directory mode
+        extractor = cls(source=f"local_directory:{dir_path}")
+        extractor.all_files_content = all_files_content
+        return extractor
+    @classmethod
+    def from_github_repo(
+        cls, repo: str, path: str, token: str = None
+    ) -> "FastAPIExtractor":
+        """Create extractor from GitHub repository - supports files and directories."""
+        # First, check if it's a file or directory
+        if path.endswith(".py"):
+            # Single file
+            content = cls._fetch_github_file(repo, path, token)
+            if not content:
+                raise ValueError(f"Could not fetch {repo}/{path} from GitHub")
+            return cls(content, source=f"github:{repo}/{path}")
+        else:
+            # Assume it's a directory
+            return cls._from_github_directory(repo, path, token)
+    @classmethod
+    def _from_github_directory(
+        cls, repo: str, dir_path: str, token: str = None
+    ) -> "FastAPIExtractor":
+        """Fetch all Python files from a GitHub directory."""
+        # Get directory contents from GitHub API
+        url = f"https://api.github.com/repos/{repo}/contents/{dir_path}"
+        headers = {}
+        if token:
+            headers["Authorization"] = f"token {token}"
+        try:
+            response = requests.get(url, headers=headers)
+            if response.status_code != 200:
+                raise ValueError(
+                    f"Could not fetch directory {repo}/{dir_path}: {response.status_code}"
+                )
+            contents = response.json()
+            if not isinstance(contents, list):
+                raise ValueError(f"Path {dir_path} is not a directory")
+            all_files_content = {}
+            for item in contents:
+                if item["type"] == "file" and item["name"].endswith(".py"):
+                    # Skip common non-model files
+                    if item["name"] in ["__init__.py"] or item["name"].startswith(
+                        "test_"
+                    ):
+                        continue
+                    file_content = cls._fetch_github_file(repo, item["path"], token)
+                    if file_content:
+                        all_files_content[item["name"]] = file_content
+                        print(f"   📄 Downloaded: {item['name']}")
+                elif item["type"] == "dir":
+                    # Recursively fetch subdirectories
+                    try:
+                        subdir_files = cls._fetch_github_directory_recursive(
+                            repo, item["path"], token
+                        )
+                        for sub_path, sub_content in subdir_files.items():
+                            all_files_content[f"{item['name']}/{sub_path}"] = (
+                                sub_content
+                            )
+                    except Exception as e:
+                        print(f"   ⚠️  Could not fetch subdirectory {item['name']}: {e}")
+            if not all_files_content:
+                raise ValueError(f"No Python model files found in {repo}/{dir_path}")
+            print(
+                f"   ✅ Downloaded {len(all_files_content)} files from {repo}/{dir_path}"
+            )
+            extractor = cls(source=f"github_directory:{repo}/{dir_path}")
+            extractor.all_files_content = all_files_content
+            return extractor
+        except Exception as e:
+            raise ValueError(f"Error fetching GitHub directory {repo}/{dir_path}: {e}")
+    @classmethod
+    def _fetch_github_directory_recursive(
+        cls, repo: str, dir_path: str, token: str = None
+    ) -> Dict[str, str]:
+        """Recursively fetch Python files from GitHub directory."""
+        url = f"https://api.github.com/repos/{repo}/contents/{dir_path}"
+        headers = {}
+        if token:
+            headers["Authorization"] = f"token {token}"
+        files_content = {}
+        try:
+            response = requests.get(url, headers=headers)
+            if response.status_code == 200:
+                contents = response.json()
+                for item in contents:
+                    if item["type"] == "file" and item["name"].endswith(".py"):
+                        if (
+                            not item["name"].startswith("test_")
+                            and item["name"] != "__init__.py"
+                        ):
+                            file_content = cls._fetch_github_file(
+                                repo, item["path"], token
+                            )
+                            if file_content:
+                                files_content[item["name"]] = file_content
+                    elif item["type"] == "dir":
+                        # Recursive call for subdirectories
+                        subdir_files = cls._fetch_github_directory_recursive(
+                            repo, item["path"], token
+                        )
+                        for sub_path, sub_content in subdir_files.items():
+                            files_content[f"{item['name']}/{sub_path}"] = sub_content
+        except Exception as e:
+            print(f"   ⚠️  Error fetching subdirectory {dir_path}: {e}")
+        return files_content
+    @staticmethod
+    def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
+        """Fetch file content from GitHub API."""
+        url = f"https://api.github.com/repos/{repo}/contents/{path}"
+        headers = {}
+        if token:
+            headers["Authorization"] = f"token {token}"
+        try:
+            response = requests.get(url, headers=headers)
+            if response.status_code == 200:
+                import base64
+                content = base64.b64decode(response.json()["content"]).decode("utf-8")
+                return content
+            else:
+                print(f"   ❌ GitHub API error for {path}: {response.status_code}")
+                return None
+        except Exception as e:
+            print(f"   ❌ Error fetching {path} from GitHub: {e}")
+            return None
+    def extract_schemas(self) -> Dict[str, Schema]:
+        """Extract schemas from FastAPI/Pydantic models."""
+        if self.all_files_content:
+            # Directory mode - extract from multiple files
+            return self._extract_schemas_from_directory()
+        else:
+            # Single file mode - existing behavior
+            return self._extract_schemas_from_single_file()
+    def _extract_schemas_from_single_file(self) -> Dict[str, Schema]:
+        """Extract schemas from a single file (existing behavior)."""
+        print(f"🔍 Extracting FastAPI schemas from {self.source}")
+        try:
+            schemas = self._parse_pydantic_models(self.content)
+            print(f"   ✅ Found {len(schemas)} models")
+            return schemas
+        except Exception as e:
+            print(f"   ❌ Error parsing models: {e}")
+            return {}
+    def _extract_schemas_from_directory(self) -> Dict[str, Schema]:
+        """Extract schemas from multiple files in a directory."""
+        print(f"🔍 Extracting FastAPI schemas from directory {self.source}")
+        all_schemas = {}
+        total_models = 0
+        for file_path, file_content in self.all_files_content.items():
+            try:
+                print(f"   📄 Processing: {file_path}")
+                file_schemas = self._parse_pydantic_models(
+                    file_content, file_source=file_path
+                )
+                # Check for duplicate model names across files
+                for schema_name, schema in file_schemas.items():
+                    if schema_name in all_schemas:
+                        print(
+                            f"   ⚠️  Duplicate model name '{schema_name}' found in {file_path}"
+                        )
+                        print(f"       Previous: {all_schemas[schema_name].source}")
+                        print(f"       Current:  {schema.source}")
+                        # Use a unique name by including file path
+                        unique_name = f"{schema_name}_{file_path.replace('/', '_').replace('.py', '')}"
+                        all_schemas[unique_name] = schema
+                        print(f"       Renamed to: {unique_name}")
+                    else:
+                        all_schemas[schema_name] = schema
+                if file_schemas:
+                    print(f"       ✅ Found {len(file_schemas)} models")
+                    total_models += len(file_schemas)
+                else:
+                    print(f"       ⚪ No Pydantic models found")
+            except Exception as e:
+                print(f"   ❌ Error parsing {file_path}: {e}")
+        print(
+            f"   ✅ Total: {total_models} models from {len(self.all_files_content)} files"
+        )
+        return all_schemas
+    def _parse_pydantic_models(
+        self, content: str, file_source: str = None
+    ) -> Dict[str, Schema]:
+        """Parse Pydantic models from Python code."""
+        try:
+            tree = ast.parse(content)
+            schemas = {}
+            for node in ast.walk(tree):
+                if isinstance(node, ast.ClassDef):
+                    # Check if it's a Pydantic model
+                    if self._is_pydantic_model(node):
+                        schema = self._analyze_pydantic_class(node, file_source)
+                        if schema:
+                            table_name = schema.name
+                            schemas[table_name] = schema
+            return schemas
+        except Exception as e:
+            print(f"   ❌ Error parsing Python code: {e}")
+            return {}
+    def _is_pydantic_model(self, node: ast.ClassDef) -> bool:
+        """Check if class inherits from BaseModel or SQLModel."""
+        for base in node.bases:
+            if isinstance(base, ast.Name) and base.id in ["BaseModel", "SQLModel"]:
+                return True
+            elif isinstance(base, ast.Attribute) and base.attr in [
+                "BaseModel",
+                "SQLModel",
+            ]:
+                return True
+        return False
+    def _analyze_pydantic_class(
+        self, node: ast.ClassDef, file_source: str = None
+    ) -> Optional[Schema]:
+        """Analyze a Pydantic class to extract schema."""
+        # Convert class name to table name
+        table_name = self._class_to_table_name(node.name)
+        # Skip SQLModel tables (database models, not API models)
+        if self._is_sqlmodel_table(node):
+            return None
+        columns = []
+        # Parse type annotations
+        for item in node.body:
+            if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
+                field_name = item.target.id
+                field_type = self._parse_type_annotation(item.annotation)
+                is_required = not self._is_optional_type(item.annotation)
+                columns.append(
+                    {
+                        "name": field_name,
+                        "type": self._python_to_sql_type(field_type),
+                        "required": is_required,
+                        "nullable": not is_required,
+                    }
+                )
+        if not columns:
+            return None
+        # Create source identifier
+        if file_source:
+            source = f"pydantic:{node.name}@{file_source}"
+        else:
+            source = f"pydantic:{node.name}"
+        return Schema(name=table_name, columns=columns, source=source)
+    def _is_sqlmodel_table(self, node: ast.ClassDef) -> bool:
+        """Check if this is a SQLModel table (database model, not API model)."""
+        # Look for table=True in the class definition
+        for base in node.bases:
+            if isinstance(base, ast.Call):
+                for keyword in base.keywords:
+                    if (
+                        keyword.arg == "table"
+                        and isinstance(keyword.value, ast.Constant)
+                        and keyword.value.value is True
+                    ):
+                        return True
+        return False
+    def _class_to_table_name(self, class_name: str) -> str:
+        """Convert CamelCase class name to snake_case table name."""
+        # Insert underscore before capital letters
+        table_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", class_name)
+        table_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", table_name).lower()
+        # Remove common suffixes
+        for suffix in ["_model", "_schema", "_response", "_request"]:
+            if table_name.endswith(suffix):
+                table_name = table_name[: -len(suffix)]
+                break
+        return table_name
+    def _parse_type_annotation(self, annotation) -> str:
+        """Parse type annotation to string."""
+        if isinstance(annotation, ast.Name):
+            return annotation.id
+        elif isinstance(annotation, ast.Subscript):
+            if isinstance(annotation.value, ast.Name):
+                # Handle Optional[Type], List[Type], etc.
+                inner_type = self._parse_type_annotation(annotation.slice)
+                return f"{annotation.value.id}[{inner_type}]"
+        elif isinstance(annotation, ast.Attribute):
+            # Handle datetime.datetime, etc.
+            if hasattr(annotation.value, "id"):
+                return f"{annotation.value.id}.{annotation.attr}"
+            return annotation.attr
+        return "unknown"
+    def _is_optional_type(self, annotation) -> bool:
+        """Check if type annotation is Optional."""
+        if isinstance(annotation, ast.Subscript):
+            if isinstance(annotation.value, ast.Name):
+                # Check for Optional[Type] or Union[Type, None]
+                if annotation.value.id in ["Optional", "Union"]:
+                    return True
+        return False

{data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0/data_contract_validator.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: data-contract-validator
-Version: 1.0.3
+Version: 1.0.4a0
 Summary: Adding pre-commit-fixes
 Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
 Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>

{data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "data-contract-validator"
-version = "1.0.3"
+version = "1.0.4a"
 description = "Adding pre-commit-fixes"
 readme = "README.md"
 license = {text = "MIT"}

data_contract_validator-1.0.3/data_contract_validator/extractors/fastapi.py DELETED Viewed

@@ -1,200 +0,0 @@
-# data_contract_validator/extractors/fastapi.py
-"""
-FastAPI/Pydantic schema extractor - simplified version of your working code.
-"""
-import ast
-import re
-import requests
-import os
-from pathlib import Path
-from typing import Dict, List, Any, Optional, Union, get_type_hints
-from .base import BaseExtractor
-from ..core.models import Schema
-class FastAPIExtractor(BaseExtractor):
-    """Extract schemas from FastAPI/Pydantic models."""
-    def __init__(self, content: str, source: str = "unknown"):
-        self.content = content
-        self.source = source
-    @classmethod
-    def from_local_file(cls, file_path: str) -> "FastAPIExtractor":
-        """Create extractor from local file."""
-        file_path = Path(file_path)
-        with open(file_path, "r", encoding="utf-8") as f:
-            content = f.read()
-        return cls(content, source=f"local:{file_path}")
-    @classmethod
-    def from_github_repo(
-        cls, repo: str, path: str, token: str = None
-    ) -> "FastAPIExtractor":
-        """Create extractor from GitHub repository."""
-        content = cls._fetch_github_file(repo, path, token)
-        if not content:
-            raise ValueError(f"Could not fetch {repo}/{path} from GitHub")
-        return cls(content, source=f"github:{repo}/{path}")
-    @staticmethod
-    def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
-        """Fetch file content from GitHub API."""
-        url = f"https://api.github.com/repos/{repo}/contents/{path}"
-        headers = {}
-        if token:
-            headers["Authorization"] = f"token {token}"
-        try:
-            response = requests.get(url, headers=headers)
-            if response.status_code == 200:
-                import base64
-                content = base64.b64decode(response.json()["content"]).decode("utf-8")
-                print(f"   ✅ Downloaded {path} from {repo}")
-                return content
-            else:
-                print(f"   ❌ GitHub API error: {response.status_code}")
-                return None
-        except Exception as e:
-            print(f"   ❌ Error fetching from GitHub: {e}")
-            return None
-    def extract_schemas(self) -> Dict[str, Schema]:
-        """Extract schemas from FastAPI/Pydantic models."""
-        print(f"🔍 Extracting FastAPI schemas from {self.source}")
-        try:
-            schemas = self._parse_pydantic_models(self.content)
-            print(f"   ✅ Found {len(schemas)} models")
-            return schemas
-        except Exception as e:
-            print(f"   ❌ Error parsing models: {e}")
-            return {}
-    def _parse_pydantic_models(self, content: str) -> Dict[str, Schema]:
-        """Parse Pydantic models from Python code."""
-        try:
-            tree = ast.parse(content)
-            schemas = {}
-            for node in ast.walk(tree):
-                if isinstance(node, ast.ClassDef):
-                    # Check if it's a Pydantic model
-                    if self._is_pydantic_model(node):
-                        schema = self._analyze_pydantic_class(node)
-                        if schema:
-                            table_name = schema.name
-                            schemas[table_name] = schema
-                            print(f"   ✅ Found model: {node.name} -> {table_name}")
-            return schemas
-        except Exception as e:
-            print(f"   ❌ Error parsing Python code: {e}")
-            return {}
-    def _is_pydantic_model(self, node: ast.ClassDef) -> bool:
-        """Check if class inherits from BaseModel or SQLModel."""
-        for base in node.bases:
-            if isinstance(base, ast.Name) and base.id in ["BaseModel", "SQLModel"]:
-                return True
-            elif isinstance(base, ast.Attribute) and base.attr in [
-                "BaseModel",
-                "SQLModel",
-            ]:
-                return True
-        return False
-    def _analyze_pydantic_class(self, node: ast.ClassDef) -> Optional[Schema]:
-        """Analyze a Pydantic class to extract schema."""
-        # Convert class name to table name
-        table_name = self._class_to_table_name(node.name)
-        # Skip SQLModel tables (database models, not API models)
-        if self._is_sqlmodel_table(node):
-            return None
-        columns = []
-        # Parse type annotations
-        for item in node.body:
-            if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
-                field_name = item.target.id
-                field_type = self._parse_type_annotation(item.annotation)
-                is_required = not self._is_optional_type(item.annotation)
-                columns.append(
-                    {
-                        "name": field_name,
-                        "type": self._python_to_sql_type(field_type),
-                        "required": is_required,
-                        "nullable": not is_required,
-                    }
-                )
-        if not columns:
-            return None
-        return Schema(name=table_name, columns=columns, source=f"pydantic:{node.name}")
-    def _is_sqlmodel_table(self, node: ast.ClassDef) -> bool:
-        """Check if this is a SQLModel table (database model, not API model)."""
-        # Look for table=True in the class definition
-        for base in node.bases:
-            if isinstance(base, ast.Call):
-                for keyword in base.keywords:
-                    if (
-                        keyword.arg == "table"
-                        and isinstance(keyword.value, ast.Constant)
-                        and keyword.value.value is True
-                    ):
-                        return True
-        return False
-    def _class_to_table_name(self, class_name: str) -> str:
-        """Convert CamelCase class name to snake_case table name."""
-        # Insert underscore before capital letters
-        table_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", class_name)
-        table_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", table_name).lower()
-        # Remove common suffixes
-        for suffix in ["_model", "_schema", "_response", "_request"]:
-            if table_name.endswith(suffix):
-                table_name = table_name[: -len(suffix)]
-                break
-        # Pluralize if it doesn't end with 's'
-        # if not table_name.endswith('s') and not table_name.endswith('_data'):
-        #     table_name += 's'
-        return table_name
-    def _parse_type_annotation(self, annotation) -> str:
-        """Parse type annotation to string."""
-        if isinstance(annotation, ast.Name):
-            return annotation.id
-        elif isinstance(annotation, ast.Subscript):
-            if isinstance(annotation.value, ast.Name):
-                # Handle Optional[Type], List[Type], etc.
-                inner_type = self._parse_type_annotation(annotation.slice)
-                return f"{annotation.value.id}[{inner_type}]"
-        elif isinstance(annotation, ast.Attribute):
-            # Handle datetime.datetime, etc.
-            if hasattr(annotation.value, "id"):
-                return f"{annotation.value.id}.{annotation.attr}"
-            return annotation.attr
-        return "unknown"
-    def _is_optional_type(self, annotation) -> bool:
-        """Check if type annotation is Optional."""
-        if isinstance(annotation, ast.Subscript):
-            if isinstance(annotation.value, ast.Name):
-                # Check for Optional[Type] or Union[Type, None]
-                if annotation.value.id in ["Optional", "Union"]:
-                    return True
-        return False