data-contract-validator 1.0.3__tar.gz → 1.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {data_contract_validator-1.0.3/data_contract_validator.egg-info → data_contract_validator-1.0.4}/PKG-INFO +1 -1
  2. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/cli.py +99 -25
  3. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/extractors/dbt.py +1 -1
  4. data_contract_validator-1.0.4/data_contract_validator/extractors/fastapi.py +439 -0
  5. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4/data_contract_validator.egg-info}/PKG-INFO +1 -1
  6. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/pyproject.toml +1 -1
  7. data_contract_validator-1.0.3/data_contract_validator/extractors/fastapi.py +0 -200
  8. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/CHANGELOG.md +0 -0
  9. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/LICENSE +0 -0
  10. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/MANIFEST.in +0 -0
  11. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/README.md +0 -0
  12. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/__init__.py +0 -0
  13. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/core/__init__.py +0 -0
  14. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/core/models.py +0 -0
  15. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/core/validator.py +0 -0
  16. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/extractors/__init__.py +0 -0
  17. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/extractors/base.py +0 -0
  18. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/integrations/__init__.py +0 -0
  19. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/py.typed +0 -0
  20. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator/templates/github-actions-template.yml +0 -0
  21. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator.egg-info/SOURCES.txt +0 -0
  22. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator.egg-info/dependency_links.txt +0 -0
  23. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator.egg-info/entry_points.txt +0 -0
  24. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator.egg-info/requires.txt +0 -0
  25. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/data_contract_validator.egg-info/top_level.txt +0 -0
  26. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/requirements.txt +0 -0
  27. {data_contract_validator-1.0.3 → data_contract_validator-1.0.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-contract-validator
3
- Version: 1.0.3
3
+ Version: 1.0.4
4
4
  Summary: Adding pre-commit-fixes
5
5
  Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
6
6
  Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>
@@ -71,7 +71,7 @@ def init(interactive: bool, framework: str, dbt_path: str, output_dir: str):
71
71
 
72
72
 
73
73
  def _interactive_setup() -> Dict[str, Any]:
74
- """Interactive setup wizard - 3 simple questions."""
74
+ """Interactive setup wizard with directory support."""
75
75
  click.echo("📋 Quick Setup (3 questions):")
76
76
  click.echo()
77
77
 
@@ -100,43 +100,66 @@ def _interactive_setup() -> Dict[str, Any]:
100
100
  show_default=True,
101
101
  )
102
102
 
103
- # Question 3: API models location
103
+ # Question 3: API models location with directory support
104
104
  click.echo()
105
105
  if framework == "fastapi":
106
- default_path = "app/models.py"
107
- prompt_text = "3️⃣ Where are your Pydantic models?"
106
+ default_path = "app/models" # Default to directory
107
+ prompt_text = "3️⃣ Where are your Pydantic models? (file or directory)"
108
+ help_text = (
109
+ " 💡 Examples: 'app/models.py' (single file) or 'app/models' (directory)"
110
+ )
108
111
  elif framework == "django":
109
112
  default_path = "models.py"
110
113
  prompt_text = "3️⃣ Where are your Django models?"
114
+ help_text = " 💡 Examples: 'myapp/models.py' or 'models'"
111
115
  else:
112
- default_path = "models.py"
116
+ default_path = "models"
113
117
  prompt_text = "3️⃣ Where are your API models?"
118
+ help_text = " 💡 Can be a file (models.py) or directory (models/)"
114
119
 
120
+ click.echo(help_text)
115
121
  api_location = click.prompt(prompt_text, default=default_path, show_default=True)
116
122
 
117
- # Auto-detect if it's local file or GitHub repo
123
+ # Auto-detect if it's local file/directory or GitHub repo
118
124
  is_github_repo = "/" in api_location and not api_location.startswith((".", "/"))
119
125
 
120
126
  if is_github_repo:
121
- # Format: "org/repo" or "org/repo/path/to/file.py"
127
+ # Format: "org/repo" or "org/repo/path/to/models"
122
128
  parts = api_location.split("/")
123
129
  if len(parts) >= 2:
124
130
  repo = "/".join(parts[:2])
125
- path = "/".join(parts[2:]) if len(parts) > 2 else "models.py"
131
+ path = "/".join(parts[2:]) if len(parts) > 2 else "app/models"
126
132
  else:
127
133
  repo = api_location
128
- path = "models.py"
134
+ path = "app/models"
129
135
 
130
136
  api_config = {"type": "github", "repo": repo, "path": path}
131
137
  click.echo(f" 🐙 GitHub repo detected: {repo}/{path}")
132
138
  else:
133
139
  api_config = {"type": "local", "path": api_location}
134
140
 
135
- # Check if local file exists
136
- if Path(api_location).exists():
137
- click.echo(" ✅ Local file found")
141
+ # Check if local file/directory exists and provide feedback
142
+ local_path = Path(api_location)
143
+ if local_path.exists():
144
+ if local_path.is_file():
145
+ click.echo(f" ✅ Local file found: {api_location}")
146
+ elif local_path.is_dir():
147
+ # Count Python files in directory
148
+ py_files = list(local_path.rglob("*.py"))
149
+ py_files = [
150
+ f
151
+ for f in py_files
152
+ if not f.name.startswith("test_") and f.name != "__init__.py"
153
+ ]
154
+ click.echo(
155
+ f" ✅ Local directory found: {api_location} ({len(py_files)} Python files)"
156
+ )
157
+ else:
158
+ click.echo(
159
+ f" ⚠️ Path exists but is neither file nor directory: {api_location}"
160
+ )
138
161
  else:
139
- click.echo(f" ⚠️ File not found: {api_location}")
162
+ click.echo(f" ⚠️ Path not found: {api_location}")
140
163
  if not click.confirm(" Continue anyway?"):
141
164
  sys.exit(1)
142
165
 
@@ -454,15 +477,23 @@ def _test_setup(config_file: Path) -> bool:
454
477
  "--output", type=click.Choice(["terminal", "json", "github"]), default="terminal"
455
478
  )
456
479
  @click.option("--dbt-project", help="Override DBT project path")
457
- @click.option("--fastapi-local", help="Override FastAPI models path")
480
+ @click.option(
481
+ "--fastapi-local", help="Override FastAPI models path (file or directory)"
482
+ )
483
+ @click.option("--fastapi-directory", help="Override FastAPI models directory path")
458
484
  @click.option("--fastapi-repo", help="Override FastAPI repo (org/repo)")
459
- @click.option("--fastapi-path", default="app/models.py", help="Path in FastAPI repo")
485
+ @click.option(
486
+ "--fastapi-path",
487
+ default="app/models",
488
+ help="Path in FastAPI repo (file or directory)",
489
+ )
460
490
  def validate(
461
491
  config: str,
462
492
  dry_run: bool,
463
493
  output: str,
464
494
  dbt_project: str,
465
495
  fastapi_local: str,
496
+ fastapi_directory: str,
466
497
  fastapi_repo: str,
467
498
  fastapi_path: str,
468
499
  ):
@@ -486,12 +517,20 @@ def validate(
486
517
 
487
518
  if dry_run:
488
519
  click.echo("🧪 Dry run - testing configuration only")
489
- _test_configuration(config_data, dbt_project, fastapi_local, fastapi_repo)
520
+ _test_configuration(
521
+ config_data, dbt_project, fastapi_local, fastapi_directory, fastapi_repo
522
+ )
490
523
  return
491
524
 
492
525
  # Run actual validation
493
526
  _run_validation(
494
- config_data, output, dbt_project, fastapi_local, fastapi_repo, fastapi_path
527
+ config_data,
528
+ output,
529
+ dbt_project,
530
+ fastapi_local,
531
+ fastapi_directory,
532
+ fastapi_repo,
533
+ fastapi_path,
495
534
  )
496
535
 
497
536
 
@@ -528,10 +567,11 @@ def _run_validation(
528
567
  output: str,
529
568
  dbt_project: str,
530
569
  fastapi_local: str,
570
+ fastapi_directory: str,
531
571
  fastapi_repo: str,
532
572
  fastapi_path: str,
533
573
  ):
534
- """Run the actual validation."""
574
+ """Run the actual validation with directory support."""
535
575
 
536
576
  # Get DBT project path
537
577
  dbt_path = dbt_project or config_data.get("source", {}).get("dbt", {}).get(
@@ -545,12 +585,37 @@ def _run_validation(
545
585
  click.echo(f"❌ Error initializing DBT extractor: {e}")
546
586
  sys.exit(1)
547
587
 
548
- # Initialize FastAPI extractor
588
+ # Initialize FastAPI extractor with directory support
549
589
  try:
550
- if fastapi_local:
551
- fastapi_extractor = FastAPIExtractor.from_local_file(fastapi_local)
590
+ if fastapi_local or fastapi_directory:
591
+ # Use local path (file or directory)
592
+ local_path = fastapi_local or fastapi_directory
593
+
594
+ # Auto-detect if it's a file or directory
595
+ path = Path(local_path)
596
+ if path.is_file():
597
+ click.echo(f"📄 Using FastAPI models file: {local_path}")
598
+ fastapi_extractor = FastAPIExtractor.from_local_file(local_path)
599
+ elif path.is_dir():
600
+ click.echo(f"📁 Using FastAPI models directory: {local_path}")
601
+ fastapi_extractor = FastAPIExtractor.from_local_directory(local_path)
602
+ else:
603
+ raise ValueError(f"Path does not exist: {local_path}")
604
+
552
605
  elif fastapi_repo:
606
+ # Use GitHub repository
553
607
  github_token = os.environ.get("GITHUB_TOKEN")
608
+
609
+ # Check if fastapi_path ends with .py (file) or not (directory)
610
+ if fastapi_path.endswith(".py"):
611
+ click.echo(
612
+ f"📄 Using FastAPI models file: {fastapi_repo}/{fastapi_path}"
613
+ )
614
+ else:
615
+ click.echo(
616
+ f"📁 Using FastAPI models directory: {fastapi_repo}/{fastapi_path}"
617
+ )
618
+
554
619
  fastapi_extractor = FastAPIExtractor.from_github_repo(
555
620
  repo=fastapi_repo, path=fastapi_path, token=github_token
556
621
  )
@@ -558,14 +623,23 @@ def _run_validation(
558
623
  # Get from config
559
624
  target_config = list(config_data.get("target", {}).values())[0]
560
625
  if target_config.get("type") == "local":
561
- fastapi_extractor = FastAPIExtractor.from_local_file(
562
- target_config.get("path")
563
- )
626
+ local_path = target_config.get("path")
627
+ path = Path(local_path)
628
+
629
+ if path.is_file():
630
+ fastapi_extractor = FastAPIExtractor.from_local_file(local_path)
631
+ elif path.is_dir():
632
+ fastapi_extractor = FastAPIExtractor.from_local_directory(
633
+ local_path
634
+ )
635
+ else:
636
+ raise ValueError(f"Path does not exist: {local_path}")
637
+
564
638
  elif target_config.get("type") == "github":
565
639
  github_token = os.environ.get("GITHUB_TOKEN")
566
640
  fastapi_extractor = FastAPIExtractor.from_github_repo(
567
641
  repo=target_config.get("repo"),
568
- path=target_config.get("path", "app/models.py"),
642
+ path=target_config.get("path", "app/models"),
569
643
  token=github_token,
570
644
  )
571
645
  else:
@@ -1,6 +1,6 @@
1
1
  # data_contract_validator/extractors/dbt.py
2
2
  """
3
- DBT schema extractor - simplified version of your working code.
3
+ DBT schema extractor
4
4
  """
5
5
 
6
6
  import json
@@ -0,0 +1,439 @@
1
+ # data_contract_validator/extractors/fastapi.py
2
+ """
3
+ Enhanced FastAPI/Pydantic schema extractor with directory support
4
+ """
5
+
6
+ import ast
7
+ import re
8
+ import requests
9
+ import os
10
+ from pathlib import Path
11
+ from typing import Dict, List, Any, Optional, Union, get_type_hints
12
+
13
+ from .base import BaseExtractor
14
+ from ..core.models import Schema
15
+
16
+
17
+ class FastAPIExtractor(BaseExtractor):
18
+ """Extract schemas from FastAPI/Pydantic models - supports files and directories."""
19
+
20
+ def __init__(
21
+ self, content: str = None, source: str = "unknown", file_path: str = None
22
+ ):
23
+ self.content = content
24
+ self.source = source
25
+ self.file_path = file_path
26
+ self.all_files_content = {} # For directory mode
27
+
28
+ @classmethod
29
+ def from_local_file(cls, file_path: str) -> "FastAPIExtractor":
30
+ """Create extractor from local file."""
31
+ file_path = Path(file_path)
32
+
33
+ if not file_path.exists():
34
+ raise ValueError(f"Path does not exist: {file_path}")
35
+
36
+ if file_path.is_file():
37
+ # Single file mode (existing behavior)
38
+ with open(file_path, "r", encoding="utf-8") as f:
39
+ content = f.read()
40
+ return cls(
41
+ content=content, source=f"local:{file_path}", file_path=str(file_path)
42
+ )
43
+
44
+ elif file_path.is_dir():
45
+ # Directory mode (new functionality)
46
+ return cls._from_local_directory(file_path)
47
+
48
+ else:
49
+ raise ValueError(f"Path is neither file nor directory: {file_path}")
50
+
51
+ @classmethod
52
+ def from_local_directory(cls, directory_path: str) -> "FastAPIExtractor":
53
+ """Create extractor from local directory containing model files."""
54
+ return cls._from_local_directory(Path(directory_path))
55
+
56
+ @classmethod
57
+ def _from_local_directory(cls, dir_path: Path) -> "FastAPIExtractor":
58
+ """Internal method to handle directory extraction."""
59
+ if not dir_path.is_dir():
60
+ raise ValueError(f"Not a directory: {dir_path}")
61
+
62
+ # Find all Python files in the directory and subdirectories
63
+ python_files = list(dir_path.rglob("*.py"))
64
+
65
+ if not python_files:
66
+ raise ValueError(f"No Python files found in directory: {dir_path}")
67
+
68
+ print(f"🔍 Found {len(python_files)} Python files in {dir_path}")
69
+
70
+ # Read all files
71
+ all_files_content = {}
72
+ for py_file in python_files:
73
+ # Skip common non-model files
74
+ if py_file.name in [
75
+ "__init__.py",
76
+ "test_",
77
+ "tests.py",
78
+ ] or py_file.name.startswith("test_"):
79
+ continue
80
+
81
+ try:
82
+ with open(py_file, "r", encoding="utf-8") as f:
83
+ content = f.read()
84
+ relative_path = py_file.relative_to(dir_path)
85
+ all_files_content[str(relative_path)] = content
86
+ print(f" 📄 Loaded: {relative_path}")
87
+ except Exception as e:
88
+ print(f" ⚠️ Could not read {py_file}: {e}")
89
+
90
+ if not all_files_content:
91
+ raise ValueError(f"Could not read any Python files from: {dir_path}")
92
+
93
+ # Create extractor instance for directory mode
94
+ extractor = cls(source=f"local_directory:{dir_path}")
95
+ extractor.all_files_content = all_files_content
96
+ return extractor
97
+
98
+ @classmethod
99
+ def from_github_repo(
100
+ cls, repo: str, path: str, token: str = None
101
+ ) -> "FastAPIExtractor":
102
+ """Create extractor from GitHub repository - supports files and directories."""
103
+
104
+ # First, check if it's a file or directory
105
+ if path.endswith(".py"):
106
+ # Single file
107
+ content = cls._fetch_github_file(repo, path, token)
108
+ if not content:
109
+ raise ValueError(f"Could not fetch {repo}/{path} from GitHub")
110
+ return cls(content, source=f"github:{repo}/{path}")
111
+ else:
112
+ # Assume it's a directory
113
+ return cls._from_github_directory(repo, path, token)
114
+
115
+ @classmethod
116
+ def _from_github_directory(
117
+ cls, repo: str, dir_path: str, token: str = None
118
+ ) -> "FastAPIExtractor":
119
+ """Fetch all Python files from a GitHub directory."""
120
+
121
+ # Get directory contents from GitHub API
122
+ url = f"https://api.github.com/repos/{repo}/contents/{dir_path}"
123
+ headers = {}
124
+
125
+ if token:
126
+ headers["Authorization"] = f"token {token}"
127
+
128
+ try:
129
+ response = requests.get(url, headers=headers)
130
+ if response.status_code != 200:
131
+ raise ValueError(
132
+ f"Could not fetch directory {repo}/{dir_path}: {response.status_code}"
133
+ )
134
+
135
+ contents = response.json()
136
+ if not isinstance(contents, list):
137
+ raise ValueError(f"Path {dir_path} is not a directory")
138
+
139
+ all_files_content = {}
140
+
141
+ for item in contents:
142
+ if item["type"] == "file" and item["name"].endswith(".py"):
143
+ # Skip common non-model files
144
+ if item["name"] in ["__init__.py"] or item["name"].startswith(
145
+ "test_"
146
+ ):
147
+ continue
148
+
149
+ file_content = cls._fetch_github_file(repo, item["path"], token)
150
+ if file_content:
151
+ all_files_content[item["name"]] = file_content
152
+ print(f" 📄 Downloaded: {item['name']}")
153
+
154
+ elif item["type"] == "dir":
155
+ # Recursively fetch subdirectories
156
+ try:
157
+ subdir_files = cls._fetch_github_directory_recursive(
158
+ repo, item["path"], token
159
+ )
160
+ for sub_path, sub_content in subdir_files.items():
161
+ all_files_content[f"{item['name']}/{sub_path}"] = (
162
+ sub_content
163
+ )
164
+ except Exception as e:
165
+ print(f" ⚠️ Could not fetch subdirectory {item['name']}: {e}")
166
+
167
+ if not all_files_content:
168
+ raise ValueError(f"No Python model files found in {repo}/{dir_path}")
169
+
170
+ print(
171
+ f" ✅ Downloaded {len(all_files_content)} files from {repo}/{dir_path}"
172
+ )
173
+
174
+ extractor = cls(source=f"github_directory:{repo}/{dir_path}")
175
+ extractor.all_files_content = all_files_content
176
+ return extractor
177
+
178
+ except Exception as e:
179
+ raise ValueError(f"Error fetching GitHub directory {repo}/{dir_path}: {e}")
180
+
181
+ @classmethod
182
+ def _fetch_github_directory_recursive(
183
+ cls, repo: str, dir_path: str, token: str = None
184
+ ) -> Dict[str, str]:
185
+ """Recursively fetch Python files from GitHub directory."""
186
+ url = f"https://api.github.com/repos/{repo}/contents/{dir_path}"
187
+ headers = {}
188
+
189
+ if token:
190
+ headers["Authorization"] = f"token {token}"
191
+
192
+ files_content = {}
193
+
194
+ try:
195
+ response = requests.get(url, headers=headers)
196
+ if response.status_code == 200:
197
+ contents = response.json()
198
+
199
+ for item in contents:
200
+ if item["type"] == "file" and item["name"].endswith(".py"):
201
+ if (
202
+ not item["name"].startswith("test_")
203
+ and item["name"] != "__init__.py"
204
+ ):
205
+ file_content = cls._fetch_github_file(
206
+ repo, item["path"], token
207
+ )
208
+ if file_content:
209
+ files_content[item["name"]] = file_content
210
+
211
+ elif item["type"] == "dir":
212
+ # Recursive call for subdirectories
213
+ subdir_files = cls._fetch_github_directory_recursive(
214
+ repo, item["path"], token
215
+ )
216
+ for sub_path, sub_content in subdir_files.items():
217
+ files_content[f"{item['name']}/{sub_path}"] = sub_content
218
+
219
+ except Exception as e:
220
+ print(f" ⚠️ Error fetching subdirectory {dir_path}: {e}")
221
+
222
+ return files_content
223
+
224
+ @staticmethod
225
+ def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
226
+ """Fetch file content from GitHub API."""
227
+ url = f"https://api.github.com/repos/{repo}/contents/{path}"
228
+ headers = {}
229
+
230
+ if token:
231
+ headers["Authorization"] = f"token {token}"
232
+
233
+ try:
234
+ response = requests.get(url, headers=headers)
235
+ if response.status_code == 200:
236
+ import base64
237
+
238
+ content = base64.b64decode(response.json()["content"]).decode("utf-8")
239
+ return content
240
+ else:
241
+ print(f" ❌ GitHub API error for {path}: {response.status_code}")
242
+ return None
243
+ except Exception as e:
244
+ print(f" ❌ Error fetching {path} from GitHub: {e}")
245
+ return None
246
+
247
+ def extract_schemas(self) -> Dict[str, Schema]:
248
+ """Extract schemas from FastAPI/Pydantic models."""
249
+
250
+ if self.all_files_content:
251
+ # Directory mode - extract from multiple files
252
+ return self._extract_schemas_from_directory()
253
+ else:
254
+ # Single file mode - existing behavior
255
+ return self._extract_schemas_from_single_file()
256
+
257
+ def _extract_schemas_from_single_file(self) -> Dict[str, Schema]:
258
+ """Extract schemas from a single file (existing behavior)."""
259
+ print(f"🔍 Extracting FastAPI schemas from {self.source}")
260
+
261
+ try:
262
+ schemas = self._parse_pydantic_models(self.content)
263
+ print(f" ✅ Found {len(schemas)} models")
264
+ return schemas
265
+ except Exception as e:
266
+ print(f" ❌ Error parsing models: {e}")
267
+ return {}
268
+
269
+ def _extract_schemas_from_directory(self) -> Dict[str, Schema]:
270
+ """Extract schemas from multiple files in a directory."""
271
+ print(f"🔍 Extracting FastAPI schemas from directory {self.source}")
272
+
273
+ all_schemas = {}
274
+ total_models = 0
275
+
276
+ for file_path, file_content in self.all_files_content.items():
277
+ try:
278
+ print(f" 📄 Processing: {file_path}")
279
+ file_schemas = self._parse_pydantic_models(
280
+ file_content, file_source=file_path
281
+ )
282
+
283
+ # Check for duplicate model names across files
284
+ for schema_name, schema in file_schemas.items():
285
+ if schema_name in all_schemas:
286
+ print(
287
+ f" ⚠️ Duplicate model name '{schema_name}' found in {file_path}"
288
+ )
289
+ print(f" Previous: {all_schemas[schema_name].source}")
290
+ print(f" Current: {schema.source}")
291
+ # Use a unique name by including file path
292
+ unique_name = f"{schema_name}_{file_path.replace('/', '_').replace('.py', '')}"
293
+ all_schemas[unique_name] = schema
294
+ print(f" Renamed to: {unique_name}")
295
+ else:
296
+ all_schemas[schema_name] = schema
297
+
298
+ if file_schemas:
299
+ print(f" ✅ Found {len(file_schemas)} models")
300
+ total_models += len(file_schemas)
301
+ else:
302
+ print(f" ⚪ No Pydantic models found")
303
+
304
+ except Exception as e:
305
+ print(f" ❌ Error parsing {file_path}: {e}")
306
+
307
+ print(
308
+ f" ✅ Total: {total_models} models from {len(self.all_files_content)} files"
309
+ )
310
+ return all_schemas
311
+
312
+ def _parse_pydantic_models(
313
+ self, content: str, file_source: str = None
314
+ ) -> Dict[str, Schema]:
315
+ """Parse Pydantic models from Python code."""
316
+ try:
317
+ tree = ast.parse(content)
318
+ schemas = {}
319
+
320
+ for node in ast.walk(tree):
321
+ if isinstance(node, ast.ClassDef):
322
+ # Check if it's a Pydantic model
323
+ if self._is_pydantic_model(node):
324
+ schema = self._analyze_pydantic_class(node, file_source)
325
+ if schema:
326
+ table_name = schema.name
327
+ schemas[table_name] = schema
328
+
329
+ return schemas
330
+
331
+ except Exception as e:
332
+ print(f" ❌ Error parsing Python code: {e}")
333
+ return {}
334
+
335
+ def _is_pydantic_model(self, node: ast.ClassDef) -> bool:
336
+ """Check if class inherits from BaseModel or SQLModel."""
337
+ for base in node.bases:
338
+ if isinstance(base, ast.Name) and base.id in ["BaseModel", "SQLModel"]:
339
+ return True
340
+ elif isinstance(base, ast.Attribute) and base.attr in [
341
+ "BaseModel",
342
+ "SQLModel",
343
+ ]:
344
+ return True
345
+ return False
346
+
347
+ def _analyze_pydantic_class(
348
+ self, node: ast.ClassDef, file_source: str = None
349
+ ) -> Optional[Schema]:
350
+ """Analyze a Pydantic class to extract schema."""
351
+ # Convert class name to table name
352
+ table_name = self._class_to_table_name(node.name)
353
+
354
+ # Skip SQLModel tables (database models, not API models)
355
+ if self._is_sqlmodel_table(node):
356
+ return None
357
+
358
+ columns = []
359
+
360
+ # Parse type annotations
361
+ for item in node.body:
362
+ if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
363
+ field_name = item.target.id
364
+ field_type = self._parse_type_annotation(item.annotation)
365
+ is_required = not self._is_optional_type(item.annotation)
366
+
367
+ columns.append(
368
+ {
369
+ "name": field_name,
370
+ "type": self._python_to_sql_type(field_type),
371
+ "required": is_required,
372
+ "nullable": not is_required,
373
+ }
374
+ )
375
+
376
+ if not columns:
377
+ return None
378
+
379
+ # Create source identifier
380
+ if file_source:
381
+ source = f"pydantic:{node.name}@{file_source}"
382
+ else:
383
+ source = f"pydantic:{node.name}"
384
+
385
+ return Schema(name=table_name, columns=columns, source=source)
386
+
387
+ def _is_sqlmodel_table(self, node: ast.ClassDef) -> bool:
388
+ """Check if this is a SQLModel table (database model, not API model)."""
389
+ # Look for table=True in the class definition
390
+ for base in node.bases:
391
+ if isinstance(base, ast.Call):
392
+ for keyword in base.keywords:
393
+ if (
394
+ keyword.arg == "table"
395
+ and isinstance(keyword.value, ast.Constant)
396
+ and keyword.value.value is True
397
+ ):
398
+ return True
399
+ return False
400
+
401
+ def _class_to_table_name(self, class_name: str) -> str:
402
+ """Convert CamelCase class name to snake_case table name."""
403
+ # Insert underscore before capital letters
404
+ table_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", class_name)
405
+ table_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", table_name).lower()
406
+
407
+ # Remove common suffixes
408
+ for suffix in ["_model", "_schema", "_response", "_request"]:
409
+ if table_name.endswith(suffix):
410
+ table_name = table_name[: -len(suffix)]
411
+ break
412
+
413
+ return table_name
414
+
415
+ def _parse_type_annotation(self, annotation) -> str:
416
+ """Parse type annotation to string."""
417
+ if isinstance(annotation, ast.Name):
418
+ return annotation.id
419
+ elif isinstance(annotation, ast.Subscript):
420
+ if isinstance(annotation.value, ast.Name):
421
+ # Handle Optional[Type], List[Type], etc.
422
+ inner_type = self._parse_type_annotation(annotation.slice)
423
+ return f"{annotation.value.id}[{inner_type}]"
424
+ elif isinstance(annotation, ast.Attribute):
425
+ # Handle datetime.datetime, etc.
426
+ if hasattr(annotation.value, "id"):
427
+ return f"{annotation.value.id}.{annotation.attr}"
428
+ return annotation.attr
429
+
430
+ return "unknown"
431
+
432
+ def _is_optional_type(self, annotation) -> bool:
433
+ """Check if type annotation is Optional."""
434
+ if isinstance(annotation, ast.Subscript):
435
+ if isinstance(annotation.value, ast.Name):
436
+ # Check for Optional[Type] or Union[Type, None]
437
+ if annotation.value.id in ["Optional", "Union"]:
438
+ return True
439
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-contract-validator
3
- Version: 1.0.3
3
+ Version: 1.0.4
4
4
  Summary: Adding pre-commit-fixes
5
5
  Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
6
6
  Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "data-contract-validator"
7
- version = "1.0.3"
7
+ version = "1.0.4"
8
8
  description = "Adding pre-commit-fixes"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -1,200 +0,0 @@
1
- # data_contract_validator/extractors/fastapi.py
2
- """
3
- FastAPI/Pydantic schema extractor - simplified version of your working code.
4
- """
5
-
6
- import ast
7
- import re
8
- import requests
9
- import os
10
- from pathlib import Path
11
- from typing import Dict, List, Any, Optional, Union, get_type_hints
12
-
13
- from .base import BaseExtractor
14
- from ..core.models import Schema
15
-
16
-
17
- class FastAPIExtractor(BaseExtractor):
18
- """Extract schemas from FastAPI/Pydantic models."""
19
-
20
- def __init__(self, content: str, source: str = "unknown"):
21
- self.content = content
22
- self.source = source
23
-
24
- @classmethod
25
- def from_local_file(cls, file_path: str) -> "FastAPIExtractor":
26
- """Create extractor from local file."""
27
- file_path = Path(file_path)
28
- with open(file_path, "r", encoding="utf-8") as f:
29
- content = f.read()
30
- return cls(content, source=f"local:{file_path}")
31
-
32
- @classmethod
33
- def from_github_repo(
34
- cls, repo: str, path: str, token: str = None
35
- ) -> "FastAPIExtractor":
36
- """Create extractor from GitHub repository."""
37
- content = cls._fetch_github_file(repo, path, token)
38
- if not content:
39
- raise ValueError(f"Could not fetch {repo}/{path} from GitHub")
40
- return cls(content, source=f"github:{repo}/{path}")
41
-
42
- @staticmethod
43
- def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
44
- """Fetch file content from GitHub API."""
45
- url = f"https://api.github.com/repos/{repo}/contents/{path}"
46
- headers = {}
47
-
48
- if token:
49
- headers["Authorization"] = f"token {token}"
50
-
51
- try:
52
- response = requests.get(url, headers=headers)
53
- if response.status_code == 200:
54
- import base64
55
-
56
- content = base64.b64decode(response.json()["content"]).decode("utf-8")
57
- print(f" ✅ Downloaded {path} from {repo}")
58
- return content
59
- else:
60
- print(f" ❌ GitHub API error: {response.status_code}")
61
- return None
62
- except Exception as e:
63
- print(f" ❌ Error fetching from GitHub: {e}")
64
- return None
65
-
66
- def extract_schemas(self) -> Dict[str, Schema]:
67
- """Extract schemas from FastAPI/Pydantic models."""
68
- print(f"🔍 Extracting FastAPI schemas from {self.source}")
69
-
70
- try:
71
- schemas = self._parse_pydantic_models(self.content)
72
- print(f" ✅ Found {len(schemas)} models")
73
- return schemas
74
- except Exception as e:
75
- print(f" ❌ Error parsing models: {e}")
76
- return {}
77
-
78
- def _parse_pydantic_models(self, content: str) -> Dict[str, Schema]:
79
- """Parse Pydantic models from Python code."""
80
- try:
81
- tree = ast.parse(content)
82
- schemas = {}
83
-
84
- for node in ast.walk(tree):
85
- if isinstance(node, ast.ClassDef):
86
- # Check if it's a Pydantic model
87
- if self._is_pydantic_model(node):
88
- schema = self._analyze_pydantic_class(node)
89
- if schema:
90
- table_name = schema.name
91
- schemas[table_name] = schema
92
- print(f" ✅ Found model: {node.name} -> {table_name}")
93
-
94
- return schemas
95
-
96
- except Exception as e:
97
- print(f" ❌ Error parsing Python code: {e}")
98
- return {}
99
-
100
- def _is_pydantic_model(self, node: ast.ClassDef) -> bool:
101
- """Check if class inherits from BaseModel or SQLModel."""
102
- for base in node.bases:
103
- if isinstance(base, ast.Name) and base.id in ["BaseModel", "SQLModel"]:
104
- return True
105
- elif isinstance(base, ast.Attribute) and base.attr in [
106
- "BaseModel",
107
- "SQLModel",
108
- ]:
109
- return True
110
- return False
111
-
112
- def _analyze_pydantic_class(self, node: ast.ClassDef) -> Optional[Schema]:
113
- """Analyze a Pydantic class to extract schema."""
114
- # Convert class name to table name
115
- table_name = self._class_to_table_name(node.name)
116
-
117
- # Skip SQLModel tables (database models, not API models)
118
- if self._is_sqlmodel_table(node):
119
- return None
120
-
121
- columns = []
122
-
123
- # Parse type annotations
124
- for item in node.body:
125
- if isinstance(item, ast.AnnAssign) and isinstance(item.target, ast.Name):
126
- field_name = item.target.id
127
- field_type = self._parse_type_annotation(item.annotation)
128
- is_required = not self._is_optional_type(item.annotation)
129
-
130
- columns.append(
131
- {
132
- "name": field_name,
133
- "type": self._python_to_sql_type(field_type),
134
- "required": is_required,
135
- "nullable": not is_required,
136
- }
137
- )
138
-
139
- if not columns:
140
- return None
141
-
142
- return Schema(name=table_name, columns=columns, source=f"pydantic:{node.name}")
143
-
144
- def _is_sqlmodel_table(self, node: ast.ClassDef) -> bool:
145
- """Check if this is a SQLModel table (database model, not API model)."""
146
- # Look for table=True in the class definition
147
- for base in node.bases:
148
- if isinstance(base, ast.Call):
149
- for keyword in base.keywords:
150
- if (
151
- keyword.arg == "table"
152
- and isinstance(keyword.value, ast.Constant)
153
- and keyword.value.value is True
154
- ):
155
- return True
156
- return False
157
-
158
- def _class_to_table_name(self, class_name: str) -> str:
159
- """Convert CamelCase class name to snake_case table name."""
160
- # Insert underscore before capital letters
161
- table_name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", class_name)
162
- table_name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", table_name).lower()
163
-
164
- # Remove common suffixes
165
- for suffix in ["_model", "_schema", "_response", "_request"]:
166
- if table_name.endswith(suffix):
167
- table_name = table_name[: -len(suffix)]
168
- break
169
-
170
- # Pluralize if it doesn't end with 's'
171
- # if not table_name.endswith('s') and not table_name.endswith('_data'):
172
- # table_name += 's'
173
-
174
- return table_name
175
-
176
- def _parse_type_annotation(self, annotation) -> str:
177
- """Parse type annotation to string."""
178
- if isinstance(annotation, ast.Name):
179
- return annotation.id
180
- elif isinstance(annotation, ast.Subscript):
181
- if isinstance(annotation.value, ast.Name):
182
- # Handle Optional[Type], List[Type], etc.
183
- inner_type = self._parse_type_annotation(annotation.slice)
184
- return f"{annotation.value.id}[{inner_type}]"
185
- elif isinstance(annotation, ast.Attribute):
186
- # Handle datetime.datetime, etc.
187
- if hasattr(annotation.value, "id"):
188
- return f"{annotation.value.id}.{annotation.attr}"
189
- return annotation.attr
190
-
191
- return "unknown"
192
-
193
- def _is_optional_type(self, annotation) -> bool:
194
- """Check if type annotation is Optional."""
195
- if isinstance(annotation, ast.Subscript):
196
- if isinstance(annotation.value, ast.Name):
197
- # Check for Optional[Type] or Union[Type, None]
198
- if annotation.value.id in ["Optional", "Union"]:
199
- return True
200
- return False