data-contract-validator 1.0.3__tar.gz → 1.0.4a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {data_contract_validator-1.0.3/data_contract_validator.egg-info → data_contract_validator-1.0.4a0}/PKG-INFO +1 -1
  2. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/cli.py +132 -31
  3. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/extractors/dbt.py +3 -2
  4. data_contract_validator-1.0.4a0/data_contract_validator/extractors/fastapi.py +439 -0
  5. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0/data_contract_validator.egg-info}/PKG-INFO +1 -1
  6. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/pyproject.toml +1 -1
  7. data_contract_validator-1.0.3/data_contract_validator/extractors/fastapi.py +0 -200
  8. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/CHANGELOG.md +0 -0
  9. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/LICENSE +0 -0
  10. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/MANIFEST.in +0 -0
  11. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/README.md +0 -0
  12. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/__init__.py +0 -0
  13. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/core/__init__.py +0 -0
  14. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/core/models.py +0 -0
  15. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/core/validator.py +0 -0
  16. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/extractors/__init__.py +0 -0
  17. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/extractors/base.py +0 -0
  18. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/integrations/__init__.py +0 -0
  19. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/py.typed +0 -0
  20. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator/templates/github-actions-template.yml +0 -0
  21. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator.egg-info/SOURCES.txt +0 -0
  22. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator.egg-info/dependency_links.txt +0 -0
  23. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator.egg-info/entry_points.txt +0 -0
  24. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator.egg-info/requires.txt +0 -0
  25. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/data_contract_validator.egg-info/top_level.txt +0 -0
  26. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/requirements.txt +0 -0
  27. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4a0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-contract-validator
3
- Version: 1.0.3
3
+ Version: 1.0.4a0
4
4
  Summary: Adding pre-commit-fixes
5
5
  Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
6
6
  Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>
@@ -71,7 +71,7 @@ def init(interactive: bool, framework: str, dbt_path: str, output_dir: str):
71
71
 
72
72
 
73
73
  def _interactive_setup() -> Dict[str, Any]:
74
- """Interactive setup wizard - 3 simple questions."""
74
+ """Interactive setup wizard with directory support."""
75
75
  click.echo("📋 Quick Setup (3 questions):")
76
76
  click.echo()
77
77
 
@@ -100,63 +100,100 @@ def _interactive_setup() -> Dict[str, Any]:
100
100
  show_default=True,
101
101
  )
102
102
 
103
- # Question 3: API models location
103
+ # Question 3: API models location with directory support
104
104
  click.echo()
105
105
  if framework == "fastapi":
106
- default_path = "app/models.py"
107
- prompt_text = "3️⃣ Where are your Pydantic models?"
106
+ default_path = "app/models" # Default to directory
107
+ prompt_text = "3️⃣ Where are your Pydantic models? (file or directory)"
108
+ help_text = (
109
+ " 💡 Examples: 'app/models.py' (single file) or 'app/models' (directory)"
110
+ )
108
111
  elif framework == "django":
109
112
  default_path = "models.py"
110
113
  prompt_text = "3️⃣ Where are your Django models?"
114
+ help_text = " 💡 Examples: 'myapp/models.py' or 'models'"
111
115
  else:
112
- default_path = "models.py"
116
+ default_path = "models"
113
117
  prompt_text = "3️⃣ Where are your API models?"
118
+ help_text = " 💡 Can be a file (models.py) or directory (models/)"
114
119
 
120
+ click.echo(help_text)
115
121
  api_location = click.prompt(prompt_text, default=default_path, show_default=True)
116
122
 
117
- # Auto-detect if it's local file or GitHub repo
123
+ # Auto-detect if it's local file/directory or GitHub repo
118
124
  is_github_repo = "/" in api_location and not api_location.startswith((".", "/"))
119
125
 
120
126
  if is_github_repo:
121
- # Format: "org/repo" or "org/repo/path/to/file.py"
127
+ # Format: "org/repo" or "org/repo/path/to/models"
122
128
  parts = api_location.split("/")
123
129
  if len(parts) >= 2:
124
130
  repo = "/".join(parts[:2])
125
- path = "/".join(parts[2:]) if len(parts) > 2 else "models.py"
131
+ path = "/".join(parts[2:]) if len(parts) > 2 else "app/models"
126
132
  else:
127
133
  repo = api_location
128
- path = "models.py"
134
+ path = "app/models"
129
135
 
130
136
  api_config = {"type": "github", "repo": repo, "path": path}
131
137
  click.echo(f" 🐙 GitHub repo detected: {repo}/{path}")
132
138
  else:
133
139
  api_config = {"type": "local", "path": api_location}
134
140
 
135
- # Check if local file exists
136
- if Path(api_location).exists():
137
- click.echo(" ✅ Local file found")
141
+ # Check if local file/directory exists and provide feedback
142
+ local_path = Path(api_location)
143
+ if local_path.exists():
144
+ if local_path.is_file():
145
+ click.echo(f" ✅ Local file found: {api_location}")
146
+ elif local_path.is_dir():
147
+ # Count Python files in directory
148
+ py_files = list(local_path.rglob("*.py"))
149
+ py_files = [
150
+ f
151
+ for f in py_files
152
+ if not f.name.startswith("test_") and f.name != "__init__.py"
153
+ ]
154
+ click.echo(
155
+ f" ✅ Local directory found: {api_location} ({len(py_files)} Python files)"
156
+ )
157
+ else:
158
+ click.echo(
159
+ f" ⚠️ Path exists but is neither file nor directory: {api_location}"
160
+ )
138
161
  else:
139
- click.echo(f" ⚠️ File not found: {api_location}")
162
+ click.echo(f" ⚠️ Path not found: {api_location}")
140
163
  if not click.confirm(" Continue anyway?"):
141
164
  sys.exit(1)
142
165
 
166
+ # New question about manifest parsing
167
+ click.echo()
168
+ disable_manifest = click.confirm(
169
+ "4️⃣ Disable manifest.json parsing? (recommended if you have CTE-based models)",
170
+ default=True
171
+ )
172
+
173
+ if disable_manifest:
174
+ click.echo(" 📄 Will use SQL file parsing (better for complex models)")
175
+ else:
176
+ click.echo(" 📋 Will try manifest.json first, fallback to SQL parsing")
177
+
143
178
  return {
144
179
  "version": "1.0",
145
180
  "name": f"contracts-{Path.cwd().name}",
146
- "description": "Auto-generated data contract validation",
147
181
  "source": {
148
- "dbt": {"project_path": dbt_path, "auto_compile": True, "timeout": 120}
182
+ "dbt": {
183
+ "project_path": dbt_path,
184
+ "auto_compile": True,
185
+ "disable_manifest": disable_manifest # NEW
186
+ }
149
187
  },
150
188
  "target": {framework: api_config},
151
189
  "validation": {
152
190
  "fail_on": ["missing_tables", "missing_required_columns"],
153
- "warn_on": ["type_mismatches", "missing_optional_columns"],
154
- "mode": "strict",
191
+ "warn_on": ["type_mismatches"]
155
192
  },
156
- "output": {"format": "terminal", "show_suggestions": True, "max_issues": 20},
157
193
  }
158
194
 
159
195
 
196
+
160
197
  def _quick_setup(framework: str, dbt_path: str) -> Dict[str, Any]:
161
198
  """Quick non-interactive setup with smart defaults."""
162
199
 
@@ -454,17 +491,27 @@ def _test_setup(config_file: Path) -> bool:
454
491
  "--output", type=click.Choice(["terminal", "json", "github"]), default="terminal"
455
492
  )
456
493
  @click.option("--dbt-project", help="Override DBT project path")
457
- @click.option("--fastapi-local", help="Override FastAPI models path")
494
+ @click.option(
495
+ "--fastapi-local", help="Override FastAPI models path (file or directory)"
496
+ )
497
+ @click.option("--fastapi-directory", help="Override FastAPI models directory path")
458
498
  @click.option("--fastapi-repo", help="Override FastAPI repo (org/repo)")
459
- @click.option("--fastapi-path", default="app/models.py", help="Path in FastAPI repo")
499
+ @click.option(
500
+ "--fastapi-path",
501
+ default="app/models",
502
+ help="Path in FastAPI repo (file or directory)",
503
+ )
504
+ @click.option("--disable-manifest", is_flag=True, help="Force SQL parsing, ignore manifest.json")
460
505
  def validate(
461
506
  config: str,
462
507
  dry_run: bool,
463
508
  output: str,
464
509
  dbt_project: str,
465
510
  fastapi_local: str,
511
+ fastapi_directory: str,
466
512
  fastapi_repo: str,
467
513
  fastapi_path: str,
514
+ disable_manifest: bool,
468
515
  ):
469
516
  """🔍 Validate data contracts (prevents production breaks)."""
470
517
 
@@ -486,12 +533,20 @@ def validate(
486
533
 
487
534
  if dry_run:
488
535
  click.echo("🧪 Dry run - testing configuration only")
489
- _test_configuration(config_data, dbt_project, fastapi_local, fastapi_repo)
536
+ _test_configuration(
537
+ config_data, dbt_project, fastapi_local, fastapi_directory, fastapi_repo, disable_manifest
538
+ )
490
539
  return
491
540
 
492
541
  # Run actual validation
493
542
  _run_validation(
494
- config_data, output, dbt_project, fastapi_local, fastapi_repo, fastapi_path
543
+ config_data,
544
+ output,
545
+ dbt_project,
546
+ fastapi_local,
547
+ fastapi_directory,
548
+ fastapi_repo,
549
+ fastapi_path,
495
550
  )
496
551
 
497
552
 
@@ -530,27 +585,64 @@ def _run_validation(
530
585
  fastapi_local: str,
531
586
  fastapi_repo: str,
532
587
  fastapi_path: str,
588
+ disable_manifest: bool = False,
533
589
  ):
534
- """Run the actual validation."""
590
+ """Run the actual validation with manifest disable option."""
535
591
 
536
592
  # Get DBT project path
537
593
  dbt_path = dbt_project or config_data.get("source", {}).get("dbt", {}).get(
538
594
  "project_path", "."
539
595
  )
540
596
 
541
- # Initialize DBT extractor
597
+ # Get disable_manifest from config file OR command line flag
598
+ config_disable_manifest = config_data.get("source", {}).get("dbt", {}).get("disable_manifest", False)
599
+ use_disable_manifest = disable_manifest or config_disable_manifest # CLI flag takes precedence
600
+
601
+ if use_disable_manifest:
602
+ click.echo("📄 Manifest parsing disabled")
603
+ if disable_manifest:
604
+ click.echo(" (via --disable-manifest flag)")
605
+ else:
606
+ click.echo(" (via .retl-validator.yml config)")
607
+
608
+ # Initialize DBT extractor with disable_manifest option
542
609
  try:
543
- dbt_extractor = DBTExtractor(dbt_path)
610
+ dbt_extractor = DBTExtractor(dbt_path, disable_manifest=use_disable_manifest)
544
611
  except Exception as e:
545
612
  click.echo(f"❌ Error initializing DBT extractor: {e}")
546
613
  sys.exit(1)
547
614
 
548
- # Initialize FastAPI extractor
615
+ # Initialize FastAPI extractor with directory support
549
616
  try:
550
617
  if fastapi_local:
551
- fastapi_extractor = FastAPIExtractor.from_local_file(fastapi_local)
618
+ # Use local path (file or directory)
619
+ local_path = fastapi_local
620
+
621
+ # Auto-detect if it's a file or directory
622
+ path = Path(local_path)
623
+ if path.is_file():
624
+ click.echo(f"📄 Using FastAPI models file: {local_path}")
625
+ fastapi_extractor = FastAPIExtractor.from_local_file(local_path)
626
+ elif path.is_dir():
627
+ click.echo(f"📁 Using FastAPI models directory: {local_path}")
628
+ fastapi_extractor = FastAPIExtractor.from_local_directory(local_path)
629
+ else:
630
+ raise ValueError(f"Path does not exist: {local_path}")
631
+
552
632
  elif fastapi_repo:
633
+ # Use GitHub repository
553
634
  github_token = os.environ.get("GITHUB_TOKEN")
635
+
636
+ # Check if fastapi_path ends with .py (file) or not (directory)
637
+ if fastapi_path.endswith(".py"):
638
+ click.echo(
639
+ f"📄 Using FastAPI models file: {fastapi_repo}/{fastapi_path}"
640
+ )
641
+ else:
642
+ click.echo(
643
+ f"📁 Using FastAPI models directory: {fastapi_repo}/{fastapi_path}"
644
+ )
645
+
554
646
  fastapi_extractor = FastAPIExtractor.from_github_repo(
555
647
  repo=fastapi_repo, path=fastapi_path, token=github_token
556
648
  )
@@ -558,14 +650,23 @@ def _run_validation(
558
650
  # Get from config
559
651
  target_config = list(config_data.get("target", {}).values())[0]
560
652
  if target_config.get("type") == "local":
561
- fastapi_extractor = FastAPIExtractor.from_local_file(
562
- target_config.get("path")
563
- )
653
+ local_path = target_config.get("path")
654
+ path = Path(local_path)
655
+
656
+ if path.is_file():
657
+ fastapi_extractor = FastAPIExtractor.from_local_file(local_path)
658
+ elif path.is_dir():
659
+ fastapi_extractor = FastAPIExtractor.from_local_directory(
660
+ local_path
661
+ )
662
+ else:
663
+ raise ValueError(f"Path does not exist: {local_path}")
664
+
564
665
  elif target_config.get("type") == "github":
565
666
  github_token = os.environ.get("GITHUB_TOKEN")
566
667
  fastapi_extractor = FastAPIExtractor.from_github_repo(
567
668
  repo=target_config.get("repo"),
568
- path=target_config.get("path", "app/models.py"),
669
+ path=target_config.get("path", "app/models"),
569
670
  token=github_token,
570
671
  )
571
672
  else:
@@ -1,6 +1,6 @@
1
1
  # data_contract_validator/extractors/dbt.py
2
2
  """
3
- DBT schema extractor - simplified version of your working code.
3
+ DBT schema extractor
4
4
  """
5
5
 
6
6
  import json
@@ -16,11 +16,12 @@ from ..core.models import Schema
16
16
  class DBTExtractor(BaseExtractor):
17
17
  """Extract schemas from DBT projects."""
18
18
 
19
- def __init__(self, project_path: str = "."):
19
+ def __init__(self, project_path: str = ".", disable_manifest: bool = False):
20
20
  self.project_path = Path(project_path)
21
21
  self.target_dir = self.project_path / "target"
22
22
  self.manifest_path = self.target_dir / "manifest.json"
23
23
  self.models_path = self.project_path / "models"
24
+ self.disable_manifest = disable_manifest
24
25
 
25
26
  def extract_schemas(self) -> Dict[str, Schema]:
26
27
  """Extract schemas from DBT project."""
@@ -0,0 +1,439 @@
1
+ # data_contract_validator/extractors/fastapi.py
2
+ """
3
+ Enhanced FastAPI/Pydantic schema extractor with directory support
4
+ """
5
+
6
+ import ast
7
+ import re
8
+ import requests
9
+ import os
10
+ from pathlib import Path
11
+ from typing import Dict, List, Any, Optional, Union, get_type_hints
12
+
13
+ from .base import BaseExtractor
14
+ from ..core.models import Schema
15
+
16
+
17
+ class FastAPIExtractor(BaseExtractor):
18
+ """Extract schemas from FastAPI/Pydantic models - supports files and directories."""
19
+
20
+ def __init__(
21
+ self, content: str = None, source: str = "unknown", file_path: str = None
22
+ ):
23
+ self.content = content
24
+ self.source = source
25
+ self.file_path = file_path
26
+ self.all_files_content = {} # For directory mode
27
+
28
+ @classmethod
29
+ def from_local_file(cls, file_path: str) -> "FastAPIExtractor":
30
+ """Create extractor from local file."""
31
+ file_path = Path(file_path)
32
+
33
+ if not file_path.exists():
34
+ raise ValueError(f"Path does not exist: {file_path}")
35
+
36
+ if file_path.is_file():
37
+ # Single file mode (existing behavior)
38
+ with open(file_path, "r", encoding="utf-8") as f:
39
+ content = f.read()
40
+ return cls(
41
+ content=content, source=f"local:{file_path}", file_path=str(file_path)
42
+ )
43
+
44
+ elif file_path.is_dir():
45
+ # Directory mode (new functionality)
46
+ return cls._from_local_directory(file_path)
47
+
48
+ else:
49
+ raise ValueError(f"Path is neither file nor directory: {file_path}")
50
+
51
+ @classmethod
52
+ def from_local_directory(cls, directory_path: str) -> "FastAPIExtractor":
53
+ """Create extractor from local directory containing model files."""
54
+ return cls._from_local_directory(Path(directory_path))
55
+
56
+ @classmethod
57
+ def _from_local_directory(cls, dir_path: Path) -> "FastAPIExtractor":
58
+ """Internal method to handle directory extraction."""
59
+ if not dir_path.is_dir():
60
+ raise ValueError(f"Not a directory: {dir_path}")
61
+
62
+ # Find all Python files in the directory and subdirectories
63
+ python_files = list(dir_path.rglob("*.py"))
64
+
65
+ if not python_files:
66
+ raise ValueError(f"No Python files found in directory: {dir_path}")
67
+
68
+ print(f"🔍 Found {len(python_files)} Python files in {dir_path}")
69
+
70
+ # Read all files
71
+ all_files_content = {}
72
+ for py_file in python_files:
73
+ # Skip common non-model files
74
+ if py_file.name in [
75
+ "__init__.py",
76
+ "test_",
77
+ "tests.py",
78
+ ] or py_file.name.startswith("test_"):
79
+ continue
80
+
81
+ try:
82
+ with open(py_file, "r", encoding="utf-8") as f:
83
+ content = f.read()
84
+ relative_path = py_file.relative_to(dir_path)
85
+ all_files_content[str(relative_path)] = content
86
+ print(f" 📄 Loaded: {relative_path}")
87
+ except Exception as e:
88
+ print(f" ⚠️ Could not read {py_file}: {e}")
89
+
90
+ if not all_files_content:
91
+ raise ValueError(f"Could not read any Python files from: {dir_path}")
92
+
93
+ # Create extractor instance for directory mode
94
+ extractor = cls(source=f"local_directory:{dir_path}")
95
+ extractor.all_files_content = all_files_content
96
+ return extractor
97
+
98
+ @classmethod
99
+ def from_github_repo(
100
+ cls, repo: str, path: str, token: str = None
101
+ ) -> "FastAPIExtractor":
102
+ """Create extractor from GitHub repository - supports files and directories."""
103
+
104
+ # First, check if it's a file or directory
105
+ if path.endswith(".py"):
106
+ # Single file
107
+ content = cls._fetch_github_file(repo, path, token)
108
+ if not content:
109
+ raise ValueError(f"Could not fetch {repo}/{path} from GitHub")
110
+ return cls(content, source=f"github:{repo}/{path}")
111
+ else:
112
+ # Assume it's a directory
113
+ return cls._from_github_directory(repo, path, token)
114
+
115
+ @classmethod
116
+ def _from_github_directory(
117
+ cls, repo: str, dir_path: str, token: str = None
118
+ ) -> "FastAPIExtractor":
119
+ """Fetch all Python files from a GitHub directory."""
120
+
121
+ # Get directory contents from GitHub API
122
+ url = f"https://api.github.com/repos/{repo}/contents/{dir_path}"
123
+ headers = {}
124
+
125
+ if token:
126
+ headers["Authorization"] = f"token {token}"
127
+
128
+ try:
129
+ response = requests.get(url, headers=headers)
130
+ if response.status_code != 200:
131
+ raise ValueError(
132
+ f"Could not fetch directory {repo}/{dir_path}: {response.status_code}"
133
+ )
134
+
135
+ contents = response.json()
136
+ if not isinstance(contents, list):
137
+ raise ValueError(f"Path {dir_path} is not a directory")
138
+
139
+ all_files_content = {}
140
+
141
+ for item in contents:
142
+ if item["type"] == "file" and item["name"].endswith(".py"):
143
+ # Skip common non-model files
144
+ if item["name"] in ["__init__.py"] or item["name"].startswith(
145
+ "test_"
146
+ ):
147
+ continue
148
+
149
+ file_content = cls._fetch_github_file(repo, item["path"], token)
150
+ if file_content:
151
+ all_files_content[item["name"]] = file_content
152
+ print(f" 📄 Downloaded: {item['name']}")
153
+
154
+ elif item["type"] == "dir":
155
+ # Recursively fetch subdirectories
156
+ try:
157
+ subdir_files = cls._fetch_github_directory_recursive(
158
+ repo, item["path"], token
159
+ )
160
+ for sub_path, sub_content in subdir_files.items():
161
+ all_files_content[f"{item['name']}/{sub_path}"] = (
162
+ sub_content
163
+ )
164
+ except Exception as e:
165
+ print(f" ⚠️ Could not fetch subdirectory {item['name']}: {e}")
166
+
167
+ if not all_files_content:
168
+ raise ValueError(f"No Python model files found in {repo}/{dir_path}")
169
+
170
+ print(
171
+ f" ✅ Downloaded {len(all_files_content)} files from {repo}/{dir_path}"
172
+ )
173
+
174
+ extractor = cls(source=f"github_directory:{repo}/{dir_path}")
175
+ extractor.all_files_content = all_files_content
176
+ return extractor
177
+
178
+ except Exception as e:
179
+ raise ValueError(f"Error fetching GitHub directory {repo}/{dir_path}: {e}")
180
+
181
+ @classmethod
182
+ def _fetch_github_directory_recursive(
183
+ cls, repo: str, dir_path: str, token: str = None
184
+ ) -> Dict[str, str]:
185
+ """Recursively fetch Python files from GitHub directory."""
186
+ url = f"https://api.github.com/repos/{repo}/contents/{dir_path}"
187
+ headers = {}
188
+
189
+ if token:
190
+ headers["Authorization"] = f"token {token}"
191
+
192
+ files_content = {}
193
+
194
+ try:
195
+ response = requests.get(url, headers=headers)
196
+ if response.status_code == 200:
197
+ contents = response.json()
198
+
199
+ for item in contents:
200
+ if item["type"] == "file" and item["name"].endswith(".py"):
201
+ if (
202
+ not item["name"].startswith("test_")
203
+ and item["name"] != "__init__.py"
204
+ ):
205
+ file_content = cls._fetch_github_file(
206
+ repo, item["path"], token
207
+ )
208
+ if file_content:
209
+ files_content[item["name"]] = file_content
210
+
211
+ elif item["type"] == "dir":
212
+ # Recursive call for subdirectories
213
+ subdir_files = cls._fetch_github_directory_recursive(
214
+ repo, item["path"], token
215
+ )
216
+ for sub_path, sub_content in subdir_files.items():
217
+ files_content[f"{item['name']}/{sub_path}"] = sub_content
218
+
219
+ except Exception as e:
220
+ print(f" ⚠️ Error fetching subdirectory {dir_path}: {e}")
221
+
222
+ return files_content
223
+
224
+ @staticmethod
225
+ def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
226
+ """Fetch file content from GitHub API."""
227
+ url = f"https://api.github.com/repos/{repo}/contents/{path}"
228
+ headers = {}
229
+
230
+ if token:
231
+ headers["Authorization"] = f"token {token}"
232
+
233
+ try:
234
+ response = requests.get(url, headers=headers)
235
+ if response.status_code == 200:
236
+ import base64
237
+
238
+ content = base64.b64decode(response.json()["content"]).decode("utf-8")
239
+ return content
240
+ else:
241
+ print(f" ❌ GitHub API error for {path}: {response.status_code}")
242
+ return None
243
+ except Exception as e:
244
+ print(f" ❌ Error fetching {path} from GitHub: {e}")
245
+ return None
246
+
247
+ def extract_schemas(self) -> Dict[str, Schema]:
248
+ """Extract schemas from FastAPI/Pydantic models."""
249
+
250
+ if self.all_files_content:
251
+ # Directory mode - extract from multiple files
252
+ return self._extract_schemas_from_directory()
253
+ else:
254
+ # Single file mode - existing behavior
255
+ return self._extract_schemas_from_single_file()
256
+
257
+ def _extract_schemas_from_single_file(self) -> Dict[str, Schema]:
258
+ """Extract schemas from a single file (existing behavior)."""
259
+ print(f"🔍 Extracting FastAPI schemas from {self.source}")
260
+
261
+ try:
262
+ schemas = self._parse_pydantic_models(self.content)
263
+ print(f" ✅ Found {len(schemas)} models")
264
+ return schemas
265
+ except Exception as e:
266
+ print(f" ❌ Error parsing models: {e}")
267
+ return {}
268
+
269
+ def _extract_schemas_from_directory(self) -> Dict[str, Schema]:
270
+ """Extract schemas from multiple files in a directory."""
271
+ print(f"🔍 Extracting FastAPI schemas from directory {self.source}")
272
+
273
+ all_schemas = {}
274
+ total_models = 0
275
+
276
+ for file_path, file_content in self.all_files_content.items():
277
+ try:
278
+ print(f" 📄 Processing: {file_path}")
279
+ file_schemas = self._parse_pydantic_models(
280
+ file_content, file_source=file_path
281
+ )
282
+
283
+ # Check for duplicate model names across files
284
+ for schema_name, schema in file_schemas.items():
285
+ if schema_name in all_schemas:
286
+ print(
287
+ f" ⚠️ Duplicate model name '{schema_name}' found in {file_path}"
288
+ )
289
+ print(f" Previous: {all_schemas[schema_name].source}")
290
+ print(f" Current: {schema.source}")
291
+ # Use a unique name by including file path
292
+ unique_name = f"{schema_name}_{file_path.replace('/', '_').replace('.py', '')}"
293
+ all_schemas[unique_name] = schema
294
+ print(f" Renamed to: {unique_name}")
295
+ else:
296
+ all_schemas[schema_name] = schema
297
+
298
+ if file_schemas:
299
+ print(f" ✅ Found {len(file_schemas)} models")
300
+ total_models += len(file_schemas)
301
+ else:
302
+ print(f" ⚪ No Pydantic models found")
303
+
304
+ except Exception as e:
305
+ print(f" ❌ Error parsing {file_path}: {e}")
306
+
307
+ print(
308
+ f" ✅ Total: {total_models} models from {len(self.all_files_content)} files"
309
+ )
310
+ return all_schemas
311
+
312
+ def _parse_pydantic_models(
313
+ self, content: str, file_source: str = None
314
+ ) -> Dict[str, Schema]:
315
+ """Parse Pydantic models from Python code."""
316
+ try:
317
+ tree = ast.parse(content)
318
+ schemas = {}
319
+
320
+ for node in ast.walk(tree):
321
+ if isinstance(node, ast.ClassDef):
322
+ # Check if it's a Pydantic model
323
+ if self._is_pydantic_model(node):
324
+ schema = self._analyze_pydantic_class(node, file_source)
325
+ if schema:
326
+ table_name = schema.name
327
+ schemas[table_name] = schema
328
+
329
+ return schemas
330
+
331
+ except Exception as e:
332
+ print(f" ❌ Error parsing Python code: {e}")
333
+ return {}
334
+
335
+ def _is_pydantic_model(self, node: ast.ClassDef) -> bool:
336
+ """Check if class inherits from BaseModel or SQLModel."""
337
+ for base in node.bases:
338
+ if isinstance(base, ast.Name) and base.id in ["BaseModel", "SQLModel"]:
339
+ return True
340
+ elif isinstance(base, ast.Attribute) and base.attr in [
341
+ "BaseModel",
342
+ "SQLModel",
343
+ ]:
344
+ return True
345
+ return False
346
+
347
+ def _analyze_pydantic_class(
348
+ self, node: ast.ClassDef, file_source: str = None
349
+ ) -> Optional[Schema]:
350
+ """Analyze a Pydantic class to extract schema."""
351
+ # Convert class name to table name
352
+ table_name = self._class_to_table_name(node.name)
353
+
354
+ # Skip SQLModel tables (database models, not API models)
355
+ if self._is_sqlmodel_table(node):
356
+ return None
357
+
358
+ columns = []
359
+
360
+ # Parse type annotations
361
+ for item in node.body:
362
+ if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
363
+ field_name = item.target.id
364
+ field_type = self._parse_type_annotation(item.annotation)
365
+ is_required = not self._is_optional_type(item.annotation)
366
+
367
+ columns.append(
368
+ {
369
+ "name": field_name,
370
+ "type": self._python_to_sql_type(field_type),
371
+ "required": is_required,
372
+ "nullable": not is_required,
373
+ }
374
+ )
375
+
376
+ if not columns:
377
+ return None
378
+
379
+ # Create source identifier
380
+ if file_source:
381
+ source = f"pydantic:{node.name}@{file_source}"
382
+ else:
383
+ source = f"pydantic:{node.name}"
384
+
385
+ return Schema(name=table_name, columns=columns, source=source)
386
+
387
+ def _is_sqlmodel_table(self, node: ast.ClassDef) -> bool:
388
+ """Check if this is a SQLModel table (database model, not API model)."""
389
+ # Look for table=True in the class definition
390
+ for base in node.bases:
391
+ if isinstance(base, ast.Call):
392
+ for keyword in base.keywords:
393
+ if (
394
+ keyword.arg == "table"
395
+ and isinstance(keyword.value, ast.Constant)
396
+ and keyword.value.value is True
397
+ ):
398
+ return True
399
+ return False
400
+
401
+ def _class_to_table_name(self, class_name: str) -> str:
402
+ """Convert CamelCase class name to snake_case table name."""
403
+ # Insert underscore before capital letters
404
+ table_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", class_name)
405
+ table_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", table_name).lower()
406
+
407
+ # Remove common suffixes
408
+ for suffix in ["_model", "_schema", "_response", "_request"]:
409
+ if table_name.endswith(suffix):
410
+ table_name = table_name[: -len(suffix)]
411
+ break
412
+
413
+ return table_name
414
+
415
+ def _parse_type_annotation(self, annotation) -> str:
416
+ """Parse type annotation to string."""
417
+ if isinstance(annotation, ast.Name):
418
+ return annotation.id
419
+ elif isinstance(annotation, ast.Subscript):
420
+ if isinstance(annotation.value, ast.Name):
421
+ # Handle Optional[Type], List[Type], etc.
422
+ inner_type = self._parse_type_annotation(annotation.slice)
423
+ return f"{annotation.value.id}[{inner_type}]"
424
+ elif isinstance(annotation, ast.Attribute):
425
+ # Handle datetime.datetime, etc.
426
+ if hasattr(annotation.value, "id"):
427
+ return f"{annotation.value.id}.{annotation.attr}"
428
+ return annotation.attr
429
+
430
+ return "unknown"
431
+
432
+ def _is_optional_type(self, annotation) -> bool:
433
+ """Check if type annotation is Optional."""
434
+ if isinstance(annotation, ast.Subscript):
435
+ if isinstance(annotation.value, ast.Name):
436
+ # Check for Optional[Type] or Union[Type, None]
437
+ if annotation.value.id in ["Optional", "Union"]:
438
+ return True
439
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-contract-validator
3
- Version: 1.0.3
3
+ Version: 1.0.4a0
4
4
  Summary: Adding pre-commit-fixes
5
5
  Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
6
6
  Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "data-contract-validator"
7
- version = "1.0.3"
7
+ version = "1.0.4a"
8
8
  description = "Adding pre-commit-fixes"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -1,200 +0,0 @@
1
- # data_contract_validator/extractors/fastapi.py
2
- """
3
- FastAPI/Pydantic schema extractor - simplified version of your working code.
4
- """
5
-
6
- import ast
7
- import re
8
- import requests
9
- import os
10
- from pathlib import Path
11
- from typing import Dict, List, Any, Optional, Union, get_type_hints
12
-
13
- from .base import BaseExtractor
14
- from ..core.models import Schema
15
-
16
-
17
- class FastAPIExtractor(BaseExtractor):
18
- """Extract schemas from FastAPI/Pydantic models."""
19
-
20
- def __init__(self, content: str, source: str = "unknown"):
21
- self.content = content
22
- self.source = source
23
-
24
- @classmethod
25
- def from_local_file(cls, file_path: str) -> "FastAPIExtractor":
26
- """Create extractor from local file."""
27
- file_path = Path(file_path)
28
- with open(file_path, "r", encoding="utf-8") as f:
29
- content = f.read()
30
- return cls(content, source=f"local:{file_path}")
31
-
32
- @classmethod
33
- def from_github_repo(
34
- cls, repo: str, path: str, token: str = None
35
- ) -> "FastAPIExtractor":
36
- """Create extractor from GitHub repository."""
37
- content = cls._fetch_github_file(repo, path, token)
38
- if not content:
39
- raise ValueError(f"Could not fetch {repo}/{path} from GitHub")
40
- return cls(content, source=f"github:{repo}/{path}")
41
-
42
- @staticmethod
43
- def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
44
- """Fetch file content from GitHub API."""
45
- url = f"https://api.github.com/repos/{repo}/contents/{path}"
46
- headers = {}
47
-
48
- if token:
49
- headers["Authorization"] = f"token {token}"
50
-
51
- try:
52
- response = requests.get(url, headers=headers)
53
- if response.status_code == 200:
54
- import base64
55
-
56
- content = base64.b64decode(response.json()["content"]).decode("utf-8")
57
- print(f" ✅ Downloaded {path} from {repo}")
58
- return content
59
- else:
60
- print(f" ❌ GitHub API error: {response.status_code}")
61
- return None
62
- except Exception as e:
63
- print(f" ❌ Error fetching from GitHub: {e}")
64
- return None
65
-
66
- def extract_schemas(self) -> Dict[str, Schema]:
67
- """Extract schemas from FastAPI/Pydantic models."""
68
- print(f"🔍 Extracting FastAPI schemas from {self.source}")
69
-
70
- try:
71
- schemas = self._parse_pydantic_models(self.content)
72
- print(f" ✅ Found {len(schemas)} models")
73
- return schemas
74
- except Exception as e:
75
- print(f" ❌ Error parsing models: {e}")
76
- return {}
77
-
78
- def _parse_pydantic_models(self, content: str) -> Dict[str, Schema]:
79
- """Parse Pydantic models from Python code."""
80
- try:
81
- tree = ast.parse(content)
82
- schemas = {}
83
-
84
- for node in ast.walk(tree):
85
- if isinstance(node, ast.ClassDef):
86
- # Check if it's a Pydantic model
87
- if self._is_pydantic_model(node):
88
- schema = self._analyze_pydantic_class(node)
89
- if schema:
90
- table_name = schema.name
91
- schemas[table_name] = schema
92
- print(f" ✅ Found model: {node.name} -> {table_name}")
93
-
94
- return schemas
95
-
96
- except Exception as e:
97
- print(f" ❌ Error parsing Python code: {e}")
98
- return {}
99
-
100
- def _is_pydantic_model(self, node: ast.ClassDef) -> bool:
101
- """Check if class inherits from BaseModel or SQLModel."""
102
- for base in node.bases:
103
- if isinstance(base, ast.Name) and base.id in ["BaseModel", "SQLModel"]:
104
- return True
105
- elif isinstance(base, ast.Attribute) and base.attr in [
106
- "BaseModel",
107
- "SQLModel",
108
- ]:
109
- return True
110
- return False
111
-
112
- def _analyze_pydantic_class(self, node: ast.ClassDef) -> Optional[Schema]:
113
- """Analyze a Pydantic class to extract schema."""
114
- # Convert class name to table name
115
- table_name = self._class_to_table_name(node.name)
116
-
117
- # Skip SQLModel tables (database models, not API models)
118
- if self._is_sqlmodel_table(node):
119
- return None
120
-
121
- columns = []
122
-
123
- # Parse type annotations
124
- for item in node.body:
125
- if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
126
- field_name = item.target.id
127
- field_type = self._parse_type_annotation(item.annotation)
128
- is_required = not self._is_optional_type(item.annotation)
129
-
130
- columns.append(
131
- {
132
- "name": field_name,
133
- "type": self._python_to_sql_type(field_type),
134
- "required": is_required,
135
- "nullable": not is_required,
136
- }
137
- )
138
-
139
- if not columns:
140
- return None
141
-
142
- return Schema(name=table_name, columns=columns, source=f"pydantic:{node.name}")
143
-
144
- def _is_sqlmodel_table(self, node: ast.ClassDef) -> bool:
145
- """Check if this is a SQLModel table (database model, not API model)."""
146
- # Look for table=True in the class definition
147
- for base in node.bases:
148
- if isinstance(base, ast.Call):
149
- for keyword in base.keywords:
150
- if (
151
- keyword.arg == "table"
152
- and isinstance(keyword.value, ast.Constant)
153
- and keyword.value.value is True
154
- ):
155
- return True
156
- return False
157
-
158
- def _class_to_table_name(self, class_name: str) -> str:
159
- """Convert CamelCase class name to snake_case table name."""
160
- # Insert underscore before capital letters
161
- table_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", class_name)
162
- table_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", table_name).lower()
163
-
164
- # Remove common suffixes
165
- for suffix in ["_model", "_schema", "_response", "_request"]:
166
- if table_name.endswith(suffix):
167
- table_name = table_name[: -len(suffix)]
168
- break
169
-
170
- # Pluralize if it doesn't end with 's'
171
- # if not table_name.endswith('s') and not table_name.endswith('_data'):
172
- # table_name += 's'
173
-
174
- return table_name
175
-
176
- def _parse_type_annotation(self, annotation) -> str:
177
- """Parse type annotation to string."""
178
- if isinstance(annotation, ast.Name):
179
- return annotation.id
180
- elif isinstance(annotation, ast.Subscript):
181
- if isinstance(annotation.value, ast.Name):
182
- # Handle Optional[Type], List[Type], etc.
183
- inner_type = self._parse_type_annotation(annotation.slice)
184
- return f"{annotation.value.id}[{inner_type}]"
185
- elif isinstance(annotation, ast.Attribute):
186
- # Handle datetime.datetime, etc.
187
- if hasattr(annotation.value, "id"):
188
- return f"{annotation.value.id}.{annotation.attr}"
189
- return annotation.attr
190
-
191
- return "unknown"
192
-
193
- def _is_optional_type(self, annotation) -> bool:
194
- """Check if type annotation is Optional."""
195
- if isinstance(annotation, ast.Subscript):
196
- if isinstance(annotation.value, ast.Name):
197
- # Check for Optional[Type] or Union[Type, None]
198
- if annotation.value.id in ["Optional", "Union"]:
199
- return True
200
- return False