PyPI - data-contract-validator - Versions diffs - 1.0.3__tar.gz → 1.0.4__tar.gz - Mend

data-contract-validator 1.0.3tar.gz → 1.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{data_contract_validator-1.0.3/data_contract_validator.egg-info → data_contract_validator-1.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: data-contract-validator
-Version: 1.0.3
+Version: 1.0.4
 Summary: Adding pre-commit-fixes
 Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
 Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>

{data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/cli.py RENAMED Viewed

@@ -71,7 +71,7 @@ def init(interactive: bool, framework: str, dbt_path: str, output_dir: str):
 def _interactive_setup() -> Dict[str, Any]:
-    """Interactive setup wizard - 3 simple questions."""
+    """Interactive setup wizard with directory support."""
     click.echo("📋 Quick Setup (3 questions):")
     click.echo()
@@ -100,43 +100,66 @@ def _interactive_setup() -> Dict[str, Any]:
         show_default=True,
     )
-    # Question 3: API models location
+    # Question 3: API models location with directory support
     click.echo()
     if framework == "fastapi":
-        default_path = "app/models.py"
-        prompt_text = "3️⃣  Where are your Pydantic models?"
+        default_path = "app/models"  # Default to directory
+        prompt_text = "3️⃣  Where are your Pydantic models? (file or directory)"
+        help_text = (
+            "   💡 Examples: 'app/models.py' (single file) or 'app/models' (directory)"
+        )
     elif framework == "django":
         default_path = "models.py"
         prompt_text = "3️⃣  Where are your Django models?"
+        help_text = "   💡 Examples: 'myapp/models.py' or 'models'"
     else:
-        default_path = "models.py"
+        default_path = "models"
         prompt_text = "3️⃣  Where are your API models?"
+        help_text = "   💡 Can be a file (models.py) or directory (models/)"
+    click.echo(help_text)
     api_location = click.prompt(prompt_text, default=default_path, show_default=True)
-    # Auto-detect if it's local file or GitHub repo
+    # Auto-detect if it's local file/directory or GitHub repo
     is_github_repo = "/" in api_location and not api_location.startswith((".", "/"))
     if is_github_repo:
-        # Format: "org/repo" or "org/repo/path/to/file.py"
+        # Format: "org/repo" or "org/repo/path/to/models"
         parts = api_location.split("/")
         if len(parts) >= 2:
             repo = "/".join(parts[:2])
-            path = "/".join(parts[2:]) if len(parts) > 2 else "models.py"
+            path = "/".join(parts[2:]) if len(parts) > 2 else "app/models"
         else:
             repo = api_location
-            path = "models.py"
+            path = "app/models"
         api_config = {"type": "github", "repo": repo, "path": path}
         click.echo(f"   🐙 GitHub repo detected: {repo}/{path}")
     else:
         api_config = {"type": "local", "path": api_location}
-        # Check if local file exists
-        if Path(api_location).exists():
-            click.echo("   ✅ Local file found")
+        # Check if local file/directory exists and provide feedback
+        local_path = Path(api_location)
+        if local_path.exists():
+            if local_path.is_file():
+                click.echo(f"   ✅ Local file found: {api_location}")
+            elif local_path.is_dir():
+                # Count Python files in directory
+                py_files = list(local_path.rglob("*.py"))
+                py_files = [
+                    f
+                    for f in py_files
+                    if not f.name.startswith("test_") and f.name != "__init__.py"
+                ]
+                click.echo(
+                    f"   ✅ Local directory found: {api_location} ({len(py_files)} Python files)"
+                )
+            else:
+                click.echo(
+                    f"   ⚠️  Path exists but is neither file nor directory: {api_location}"
+                )
         else:
-            click.echo(f"   ⚠️  File not found: {api_location}")
+            click.echo(f"   ⚠️  Path not found: {api_location}")
             if not click.confirm("   Continue anyway?"):
                 sys.exit(1)
@@ -454,15 +477,23 @@ def _test_setup(config_file: Path) -> bool:
     "--output", type=click.Choice(["terminal", "json", "github"]), default="terminal"
 )
 @click.option("--dbt-project", help="Override DBT project path")
-@click.option("--fastapi-local", help="Override FastAPI models path")
+@click.option(
+    "--fastapi-local", help="Override FastAPI models path (file or directory)"
+)
+@click.option("--fastapi-directory", help="Override FastAPI models directory path")
 @click.option("--fastapi-repo", help="Override FastAPI repo (org/repo)")
-@click.option("--fastapi-path", default="app/models.py", help="Path in FastAPI repo")
+@click.option(
+    "--fastapi-path",
+    default="app/models",
+    help="Path in FastAPI repo (file or directory)",
+)
 def validate(
     config: str,
     dry_run: bool,
     output: str,
     dbt_project: str,
     fastapi_local: str,
+    fastapi_directory: str,
     fastapi_repo: str,
     fastapi_path: str,
 ):
@@ -486,12 +517,20 @@ def validate(
     if dry_run:
         click.echo("🧪 Dry run - testing configuration only")
-        _test_configuration(config_data, dbt_project, fastapi_local, fastapi_repo)
+        _test_configuration(
+            config_data, dbt_project, fastapi_local, fastapi_directory, fastapi_repo
+        )
         return
     # Run actual validation
     _run_validation(
-        config_data, output, dbt_project, fastapi_local, fastapi_repo, fastapi_path
+        config_data,
+        output,
+        dbt_project,
+        fastapi_local,
+        fastapi_directory,
+        fastapi_repo,
+        fastapi_path,
     )
@@ -528,10 +567,11 @@ def _run_validation(
     output: str,
     dbt_project: str,
     fastapi_local: str,
+    fastapi_directory: str,
     fastapi_repo: str,
     fastapi_path: str,
 ):
-    """Run the actual validation."""
+    """Run the actual validation with directory support."""
     # Get DBT project path
     dbt_path = dbt_project or config_data.get("source", {}).get("dbt", {}).get(
@@ -545,12 +585,37 @@ def _run_validation(
         click.echo(f"❌ Error initializing DBT extractor: {e}")
         sys.exit(1)
-    # Initialize FastAPI extractor
+    # Initialize FastAPI extractor with directory support
     try:
-        if fastapi_local:
-            fastapi_extractor = FastAPIExtractor.from_local_file(fastapi_local)
+        if fastapi_local or fastapi_directory:
+            # Use local path (file or directory)
+            local_path = fastapi_local or fastapi_directory
+            # Auto-detect if it's a file or directory
+            path = Path(local_path)
+            if path.is_file():
+                click.echo(f"📄 Using FastAPI models file: {local_path}")
+                fastapi_extractor = FastAPIExtractor.from_local_file(local_path)
+            elif path.is_dir():
+                click.echo(f"📁 Using FastAPI models directory: {local_path}")
+                fastapi_extractor = FastAPIExtractor.from_local_directory(local_path)
+            else:
+                raise ValueError(f"Path does not exist: {local_path}")
         elif fastapi_repo:
+            # Use GitHub repository
             github_token = os.environ.get("GITHUB_TOKEN")
+            # Check if fastapi_path ends with .py (file) or not (directory)
+            if fastapi_path.endswith(".py"):
+                click.echo(
+                    f"📄 Using FastAPI models file: {fastapi_repo}/{fastapi_path}"
+                )
+            else:
+                click.echo(
+                    f"📁 Using FastAPI models directory: {fastapi_repo}/{fastapi_path}"
+                )
             fastapi_extractor = FastAPIExtractor.from_github_repo(
                 repo=fastapi_repo, path=fastapi_path, token=github_token
             )
@@ -558,14 +623,23 @@ def _run_validation(
             # Get from config
             target_config = list(config_data.get("target", {}).values())[0]
             if target_config.get("type") == "local":
-                fastapi_extractor = FastAPIExtractor.from_local_file(
-                    target_config.get("path")
-                )
+                local_path = target_config.get("path")
+                path = Path(local_path)
+                if path.is_file():
+                    fastapi_extractor = FastAPIExtractor.from_local_file(local_path)
+                elif path.is_dir():
+                    fastapi_extractor = FastAPIExtractor.from_local_directory(
+                        local_path
+                    )
+                else:
+                    raise ValueError(f"Path does not exist: {local_path}")
             elif target_config.get("type") == "github":
                 github_token = os.environ.get("GITHUB_TOKEN")
                 fastapi_extractor = FastAPIExtractor.from_github_repo(
                     repo=target_config.get("repo"),
-                    path=target_config.get("path", "app/models.py"),
+                    path=target_config.get("path", "app/models"),
                     token=github_token,
                 )
             else:

{data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/extractors/dbt.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # data_contract_validator/extractors/dbt.py
 """
-DBT schema extractor - simplified version of your working code.
+DBT schema extractor
 """
 import json

data_contract_validator-1.0.4/data_contract_validator/extractors/fastapi.py ADDED Viewed

@@ -0,0 +1,439 @@
+# data_contract_validator/extractors/fastapi.py
+"""
+Enhanced FastAPI/Pydantic schema extractor with directory support
+"""
+import ast
+import re
+import requests
+import os
+from pathlib import Path
+from typing import Dict, List, Any, Optional, Union, get_type_hints
+from .base import BaseExtractor
+from ..core.models import Schema
+class FastAPIExtractor(BaseExtractor):
+    """Extract schemas from FastAPI/Pydantic models - supports files and directories."""
+    def __init__(
+        self, content: str = None, source: str = "unknown", file_path: str = None
+    ):
+        self.content = content
+        self.source = source
+        self.file_path = file_path
+        self.all_files_content = {}  # For directory mode
+    @classmethod
+    def from_local_file(cls, file_path: str) -> "FastAPIExtractor":
+        """Create extractor from local file."""
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise ValueError(f"Path does not exist: {file_path}")
+        if file_path.is_file():
+            # Single file mode (existing behavior)
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            return cls(
+                content=content, source=f"local:{file_path}", file_path=str(file_path)
+            )
+        elif file_path.is_dir():
+            # Directory mode (new functionality)
+            return cls._from_local_directory(file_path)
+        else:
+            raise ValueError(f"Path is neither file nor directory: {file_path}")
+    @classmethod
+    def from_local_directory(cls, directory_path: str) -> "FastAPIExtractor":
+        """Create extractor from local directory containing model files."""
+        return cls._from_local_directory(Path(directory_path))
+    @classmethod
+    def _from_local_directory(cls, dir_path: Path) -> "FastAPIExtractor":
+        """Internal method to handle directory extraction."""
+        if not dir_path.is_dir():
+            raise ValueError(f"Not a directory: {dir_path}")
+        # Find all Python files in the directory and subdirectories
+        python_files = list(dir_path.rglob("*.py"))
+        if not python_files:
+            raise ValueError(f"No Python files found in directory: {dir_path}")
+        print(f"🔍 Found {len(python_files)} Python files in {dir_path}")
+        # Read all files
+        all_files_content = {}
+        for py_file in python_files:
+            # Skip common non-model files
+            if py_file.name in [
+                "__init__.py",
+                "test_",
+                "tests.py",
+            ] or py_file.name.startswith("test_"):
+                continue
+            try:
+                with open(py_file, "r", encoding="utf-8") as f:
+                    content = f.read()
+                    relative_path = py_file.relative_to(dir_path)
+                    all_files_content[str(relative_path)] = content
+                    print(f"   📄 Loaded: {relative_path}")
+            except Exception as e:
+                print(f"   ⚠️  Could not read {py_file}: {e}")
+        if not all_files_content:
+            raise ValueError(f"Could not read any Python files from: {dir_path}")
+        # Create extractor instance for directory mode
+        extractor = cls(source=f"local_directory:{dir_path}")
+        extractor.all_files_content = all_files_content
+        return extractor
+    @classmethod
+    def from_github_repo(
+        cls, repo: str, path: str, token: str = None
+    ) -> "FastAPIExtractor":
+        """Create extractor from GitHub repository - supports files and directories."""
+        # First, check if it's a file or directory
+        if path.endswith(".py"):
+            # Single file
+            content = cls._fetch_github_file(repo, path, token)
+            if not content:
+                raise ValueError(f"Could not fetch {repo}/{path} from GitHub")
+            return cls(content, source=f"github:{repo}/{path}")
+        else:
+            # Assume it's a directory
+            return cls._from_github_directory(repo, path, token)
+    @classmethod
+    def _from_github_directory(
+        cls, repo: str, dir_path: str, token: str = None
+    ) -> "FastAPIExtractor":
+        """Fetch all Python files from a GitHub directory."""
+        # Get directory contents from GitHub API
+        url = f"https://api.github.com/repos/{repo}/contents/{dir_path}"
+        headers = {}
+        if token:
+            headers["Authorization"] = f"token {token}"
+        try:
+            response = requests.get(url, headers=headers)
+            if response.status_code != 200:
+                raise ValueError(
+                    f"Could not fetch directory {repo}/{dir_path}: {response.status_code}"
+                )
+            contents = response.json()
+            if not isinstance(contents, list):
+                raise ValueError(f"Path {dir_path} is not a directory")
+            all_files_content = {}
+            for item in contents:
+                if item["type"] == "file" and item["name"].endswith(".py"):
+                    # Skip common non-model files
+                    if item["name"] in ["__init__.py"] or item["name"].startswith(
+                        "test_"
+                    ):
+                        continue
+                    file_content = cls._fetch_github_file(repo, item["path"], token)
+                    if file_content:
+                        all_files_content[item["name"]] = file_content
+                        print(f"   📄 Downloaded: {item['name']}")
+                elif item["type"] == "dir":
+                    # Recursively fetch subdirectories
+                    try:
+                        subdir_files = cls._fetch_github_directory_recursive(
+                            repo, item["path"], token
+                        )
+                        for sub_path, sub_content in subdir_files.items():
+                            all_files_content[f"{item['name']}/{sub_path}"] = (
+                                sub_content
+                            )
+                    except Exception as e:
+                        print(f"   ⚠️  Could not fetch subdirectory {item['name']}: {e}")
+            if not all_files_content:
+                raise ValueError(f"No Python model files found in {repo}/{dir_path}")
+            print(
+                f"   ✅ Downloaded {len(all_files_content)} files from {repo}/{dir_path}"
+            )
+            extractor = cls(source=f"github_directory:{repo}/{dir_path}")
+            extractor.all_files_content = all_files_content
+            return extractor
+        except Exception as e:
+            raise ValueError(f"Error fetching GitHub directory {repo}/{dir_path}: {e}")
+    @classmethod
+    def _fetch_github_directory_recursive(
+        cls, repo: str, dir_path: str, token: str = None
+    ) -> Dict[str, str]:
+        """Recursively fetch Python files from GitHub directory."""
+        url = f"https://api.github.com/repos/{repo}/contents/{dir_path}"
+        headers = {}
+        if token:
+            headers["Authorization"] = f"token {token}"
+        files_content = {}
+        try:
+            response = requests.get(url, headers=headers)
+            if response.status_code == 200:
+                contents = response.json()
+                for item in contents:
+                    if item["type"] == "file" and item["name"].endswith(".py"):
+                        if (
+                            not item["name"].startswith("test_")
+                            and item["name"] != "__init__.py"
+                        ):
+                            file_content = cls._fetch_github_file(
+                                repo, item["path"], token
+                            )
+                            if file_content:
+                                files_content[item["name"]] = file_content
+                    elif item["type"] == "dir":
+                        # Recursive call for subdirectories
+                        subdir_files = cls._fetch_github_directory_recursive(
+                            repo, item["path"], token
+                        )
+                        for sub_path, sub_content in subdir_files.items():
+                            files_content[f"{item['name']}/{sub_path}"] = sub_content
+        except Exception as e:
+            print(f"   ⚠️  Error fetching subdirectory {dir_path}: {e}")
+        return files_content
+    @staticmethod
+    def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
+        """Fetch file content from GitHub API."""
+        url = f"https://api.github.com/repos/{repo}/contents/{path}"
+        headers = {}
+        if token:
+            headers["Authorization"] = f"token {token}"
+        try:
+            response = requests.get(url, headers=headers)
+            if response.status_code == 200:
+                import base64
+                content = base64.b64decode(response.json()["content"]).decode("utf-8")
+                return content
+            else:
+                print(f"   ❌ GitHub API error for {path}: {response.status_code}")
+                return None
+        except Exception as e:
+            print(f"   ❌ Error fetching {path} from GitHub: {e}")
+            return None
+    def extract_schemas(self) -> Dict[str, Schema]:
+        """Extract schemas from FastAPI/Pydantic models."""
+        if self.all_files_content:
+            # Directory mode - extract from multiple files
+            return self._extract_schemas_from_directory()
+        else:
+            # Single file mode - existing behavior
+            return self._extract_schemas_from_single_file()
+    def _extract_schemas_from_single_file(self) -> Dict[str, Schema]:
+        """Extract schemas from a single file (existing behavior)."""
+        print(f"🔍 Extracting FastAPI schemas from {self.source}")
+        try:
+            schemas = self._parse_pydantic_models(self.content)
+            print(f"   ✅ Found {len(schemas)} models")
+            return schemas
+        except Exception as e:
+            print(f"   ❌ Error parsing models: {e}")
+            return {}
+    def _extract_schemas_from_directory(self) -> Dict[str, Schema]:
+        """Extract schemas from multiple files in a directory."""
+        print(f"🔍 Extracting FastAPI schemas from directory {self.source}")
+        all_schemas = {}
+        total_models = 0
+        for file_path, file_content in self.all_files_content.items():
+            try:
+                print(f"   📄 Processing: {file_path}")
+                file_schemas = self._parse_pydantic_models(
+                    file_content, file_source=file_path
+                )
+                # Check for duplicate model names across files
+                for schema_name, schema in file_schemas.items():
+                    if schema_name in all_schemas:
+                        print(
+                            f"   ⚠️  Duplicate model name '{schema_name}' found in {file_path}"
+                        )
+                        print(f"       Previous: {all_schemas[schema_name].source}")
+                        print(f"       Current:  {schema.source}")
+                        # Use a unique name by including file path
+                        unique_name = f"{schema_name}_{file_path.replace('/', '_').replace('.py', '')}"
+                        all_schemas[unique_name] = schema
+                        print(f"       Renamed to: {unique_name}")
+                    else:
+                        all_schemas[schema_name] = schema
+                if file_schemas:
+                    print(f"       ✅ Found {len(file_schemas)} models")
+                    total_models += len(file_schemas)
+                else:
+                    print(f"       ⚪ No Pydantic models found")
+            except Exception as e:
+                print(f"   ❌ Error parsing {file_path}: {e}")
+        print(
+            f"   ✅ Total: {total_models} models from {len(self.all_files_content)} files"
+        )
+        return all_schemas
+    def _parse_pydantic_models(
+        self, content: str, file_source: str = None
+    ) -> Dict[str, Schema]:
+        """Parse Pydantic models from Python code."""
+        try:
+            tree = ast.parse(content)
+            schemas = {}
+            for node in ast.walk(tree):
+                if isinstance(node, ast.ClassDef):
+                    # Check if it's a Pydantic model
+                    if self._is_pydantic_model(node):
+                        schema = self._analyze_pydantic_class(node, file_source)
+                        if schema:
+                            table_name = schema.name
+                            schemas[table_name] = schema
+            return schemas
+        except Exception as e:
+            print(f"   ❌ Error parsing Python code: {e}")
+            return {}
+    def _is_pydantic_model(self, node: ast.ClassDef) -> bool:
+        """Check if class inherits from BaseModel or SQLModel."""
+        for base in node.bases:
+            if isinstance(base, ast.Name) and base.id in ["BaseModel", "SQLModel"]:
+                return True
+            elif isinstance(base, ast.Attribute) and base.attr in [
+                "BaseModel",
+                "SQLModel",
+            ]:
+                return True
+        return False
+    def _analyze_pydantic_class(
+        self, node: ast.ClassDef, file_source: str = None
+    ) -> Optional[Schema]:
+        """Analyze a Pydantic class to extract schema."""
+        # Convert class name to table name
+        table_name = self._class_to_table_name(node.name)
+        # Skip SQLModel tables (database models, not API models)
+        if self._is_sqlmodel_table(node):
+            return None
+        columns = []
+        # Parse type annotations
+        for item in node.body:
+            if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
+                field_name = item.target.id
+                field_type = self._parse_type_annotation(item.annotation)
+                is_required = not self._is_optional_type(item.annotation)
+                columns.append(
+                    {
+                        "name": field_name,
+                        "type": self._python_to_sql_type(field_type),
+                        "required": is_required,
+                        "nullable": not is_required,
+                    }
+                )
+        if not columns:
+            return None
+        # Create source identifier
+        if file_source:
+            source = f"pydantic:{node.name}@{file_source}"
+        else:
+            source = f"pydantic:{node.name}"
+        return Schema(name=table_name, columns=columns, source=source)
+    def _is_sqlmodel_table(self, node: ast.ClassDef) -> bool:
+        """Check if this is a SQLModel table (database model, not API model)."""
+        # Look for table=True in the class definition
+        for base in node.bases:
+            if isinstance(base, ast.Call):
+                for keyword in base.keywords:
+                    if (
+                        keyword.arg == "table"
+                        and isinstance(keyword.value, ast.Constant)
+                        and keyword.value.value is True
+                    ):
+                        return True
+        return False
+    def _class_to_table_name(self, class_name: str) -> str:
+        """Convert CamelCase class name to snake_case table name."""
+        # Insert underscore before capital letters
+        table_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", class_name)
+        table_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", table_name).lower()
+        # Remove common suffixes
+        for suffix in ["_model", "_schema", "_response", "_request"]:
+            if table_name.endswith(suffix):
+                table_name = table_name[: -len(suffix)]
+                break
+        return table_name
+    def _parse_type_annotation(self, annotation) -> str:
+        """Parse type annotation to string."""
+        if isinstance(annotation, ast.Name):
+            return annotation.id
+        elif isinstance(annotation, ast.Subscript):
+            if isinstance(annotation.value, ast.Name):
+                # Handle Optional[Type], List[Type], etc.
+                inner_type = self._parse_type_annotation(annotation.slice)
+                return f"{annotation.value.id}[{inner_type}]"
+        elif isinstance(annotation, ast.Attribute):
+            # Handle datetime.datetime, etc.
+            if hasattr(annotation.value, "id"):
+                return f"{annotation.value.id}.{annotation.attr}"
+            return annotation.attr
+        return "unknown"
+    def _is_optional_type(self, annotation) -> bool:
+        """Check if type annotation is Optional."""
+        if isinstance(annotation, ast.Subscript):
+            if isinstance(annotation.value, ast.Name):
+                # Check for Optional[Type] or Union[Type, None]
+                if annotation.value.id in ["Optional", "Union"]:
+                    return True
+        return False

{data_contract_validator-1.0.3 → data_contract_validator-1.0.4/data_contract_validator.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: data-contract-validator
-Version: 1.0.3
+Version: 1.0.4
 Summary: Adding pre-commit-fixes
 Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
 Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>

{data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "data-contract-validator"
-version = "1.0.3"
+version = "1.0.4"
 description = "Adding pre-commit-fixes"
 readme = "README.md"
 license = {text = "MIT"}

data_contract_validator-1.0.3/data_contract_validator/extractors/fastapi.py DELETED Viewed

@@ -1,200 +0,0 @@
-# data_contract_validator/extractors/fastapi.py
-"""
-FastAPI/Pydantic schema extractor - simplified version of your working code.
-"""
-import ast
-import re
-import requests
-import os
-from pathlib import Path
-from typing import Dict, List, Any, Optional, Union, get_type_hints
-from .base import BaseExtractor
-from ..core.models import Schema
-class FastAPIExtractor(BaseExtractor):
-    """Extract schemas from FastAPI/Pydantic models."""
-    def __init__(self, content: str, source: str = "unknown"):
-        self.content = content
-        self.source = source
-    @classmethod
-    def from_local_file(cls, file_path: str) -> "FastAPIExtractor":
-        """Create extractor from local file."""
-        file_path = Path(file_path)
-        with open(file_path, "r", encoding="utf-8") as f:
-            content = f.read()
-        return cls(content, source=f"local:{file_path}")
-    @classmethod
-    def from_github_repo(
-        cls, repo: str, path: str, token: str = None
-    ) -> "FastAPIExtractor":
-        """Create extractor from GitHub repository."""
-        content = cls._fetch_github_file(repo, path, token)
-        if not content:
-            raise ValueError(f"Could not fetch {repo}/{path} from GitHub")
-        return cls(content, source=f"github:{repo}/{path}")
-    @staticmethod
-    def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
-        """Fetch file content from GitHub API."""
-        url = f"https://api.github.com/repos/{repo}/contents/{path}"
-        headers = {}
-        if token:
-            headers["Authorization"] = f"token {token}"
-        try:
-            response = requests.get(url, headers=headers)
-            if response.status_code == 200:
-                import base64
-                content = base64.b64decode(response.json()["content"]).decode("utf-8")
-                print(f"   ✅ Downloaded {path} from {repo}")
-                return content
-            else:
-                print(f"   ❌ GitHub API error: {response.status_code}")
-                return None
-        except Exception as e:
-            print(f"   ❌ Error fetching from GitHub: {e}")
-            return None
-    def extract_schemas(self) -> Dict[str, Schema]:
-        """Extract schemas from FastAPI/Pydantic models."""
-        print(f"🔍 Extracting FastAPI schemas from {self.source}")
-        try:
-            schemas = self._parse_pydantic_models(self.content)
-            print(f"   ✅ Found {len(schemas)} models")
-            return schemas
-        except Exception as e:
-            print(f"   ❌ Error parsing models: {e}")
-            return {}
-    def _parse_pydantic_models(self, content: str) -> Dict[str, Schema]:
-        """Parse Pydantic models from Python code."""
-        try:
-            tree = ast.parse(content)
-            schemas = {}
-            for node in ast.walk(tree):
-                if isinstance(node, ast.ClassDef):
-                    # Check if it's a Pydantic model
-                    if self._is_pydantic_model(node):
-                        schema = self._analyze_pydantic_class(node)
-                        if schema:
-                            table_name = schema.name
-                            schemas[table_name] = schema
-                            print(f"   ✅ Found model: {node.name} -> {table_name}")
-            return schemas
-        except Exception as e:
-            print(f"   ❌ Error parsing Python code: {e}")
-            return {}
-    def _is_pydantic_model(self, node: ast.ClassDef) -> bool:
-        """Check if class inherits from BaseModel or SQLModel."""
-        for base in node.bases:
-            if isinstance(base, ast.Name) and base.id in ["BaseModel", "SQLModel"]:
-                return True
-            elif isinstance(base, ast.Attribute) and base.attr in [
-                "BaseModel",
-                "SQLModel",
-            ]:
-                return True
-        return False
-    def _analyze_pydantic_class(self, node: ast.ClassDef) -> Optional[Schema]:
-        """Analyze a Pydantic class to extract schema."""
-        # Convert class name to table name
-        table_name = self._class_to_table_name(node.name)
-        # Skip SQLModel tables (database models, not API models)
-        if self._is_sqlmodel_table(node):
-            return None
-        columns = []
-        # Parse type annotations
-        for item in node.body:
-            if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
-                field_name = item.target.id
-                field_type = self._parse_type_annotation(item.annotation)
-                is_required = not self._is_optional_type(item.annotation)
-                columns.append(
-                    {
-                        "name": field_name,
-                        "type": self._python_to_sql_type(field_type),
-                        "required": is_required,
-                        "nullable": not is_required,
-                    }
-                )
-        if not columns:
-            return None
-        return Schema(name=table_name, columns=columns, source=f"pydantic:{node.name}")
-    def _is_sqlmodel_table(self, node: ast.ClassDef) -> bool:
-        """Check if this is a SQLModel table (database model, not API model)."""
-        # Look for table=True in the class definition
-        for base in node.bases:
-            if isinstance(base, ast.Call):
-                for keyword in base.keywords:
-                    if (
-                        keyword.arg == "table"
-                        and isinstance(keyword.value, ast.Constant)
-                        and keyword.value.value is True
-                    ):
-                        return True
-        return False
-    def _class_to_table_name(self, class_name: str) -> str:
-        """Convert CamelCase class name to snake_case table name."""
-        # Insert underscore before capital letters
-        table_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", class_name)
-        table_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", table_name).lower()
-        # Remove common suffixes
-        for suffix in ["_model", "_schema", "_response", "_request"]:
-            if table_name.endswith(suffix):
-                table_name = table_name[: -len(suffix)]
-                break
-        # Pluralize if it doesn't end with 's'
-        # if not table_name.endswith('s') and not table_name.endswith('_data'):
-        #     table_name += 's'
-        return table_name
-    def _parse_type_annotation(self, annotation) -> str:
-        """Parse type annotation to string."""
-        if isinstance(annotation, ast.Name):
-            return annotation.id
-        elif isinstance(annotation, ast.Subscript):
-            if isinstance(annotation.value, ast.Name):
-                # Handle Optional[Type], List[Type], etc.
-                inner_type = self._parse_type_annotation(annotation.slice)
-                return f"{annotation.value.id}[{inner_type}]"
-        elif isinstance(annotation, ast.Attribute):
-            # Handle datetime.datetime, etc.
-            if hasattr(annotation.value, "id"):
-                return f"{annotation.value.id}.{annotation.attr}"
-            return annotation.attr
-        return "unknown"
-    def _is_optional_type(self, annotation) -> bool:
-        """Check if type annotation is Optional."""
-        if isinstance(annotation, ast.Subscript):
-            if isinstance(annotation.value, ast.Name):
-                # Check for Optional[Type] or Union[Type, None]
-                if annotation.value.id in ["Optional", "Union"]:
-                    return True
-        return False