data-contract-validator 1.0.4a0__tar.gz → 1.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. data_contract_validator-1.0.5/CHANGELOG.md +71 -0
  2. {data_contract_validator-1.0.4a0/data_contract_validator.egg-info → data_contract_validator-1.0.5}/PKG-INFO +2 -2
  3. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/cli.py +48 -12
  4. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/extractors/dbt.py +18 -5
  5. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/extractors/fastapi.py +25 -1
  6. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5/data_contract_validator.egg-info}/PKG-INFO +2 -2
  7. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/pyproject.toml +2 -2
  8. data_contract_validator-1.0.4a0/CHANGELOG.md +0 -38
  9. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/LICENSE +0 -0
  10. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/MANIFEST.in +0 -0
  11. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/README.md +0 -0
  12. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/__init__.py +0 -0
  13. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/core/__init__.py +0 -0
  14. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/core/models.py +0 -0
  15. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/core/validator.py +0 -0
  16. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/extractors/__init__.py +0 -0
  17. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/extractors/base.py +0 -0
  18. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/integrations/__init__.py +0 -0
  19. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/py.typed +0 -0
  20. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator/templates/github-actions-template.yml +0 -0
  21. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator.egg-info/SOURCES.txt +0 -0
  22. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator.egg-info/dependency_links.txt +0 -0
  23. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator.egg-info/entry_points.txt +0 -0
  24. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator.egg-info/requires.txt +0 -0
  25. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/data_contract_validator.egg-info/top_level.txt +0 -0
  26. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/requirements.txt +0 -0
  27. {data_contract_validator-1.0.4a0 → data_contract_validator-1.0.5}/setup.cfg +0 -0
@@ -0,0 +1,71 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [1.0.5] - 2025-01-24
11
+
12
+ ### Fixed
13
+ - **CRITICAL**: Fixed missing return statement in `DBTExtractor.extract_schemas()` that could return `None` instead of dictionary
14
+ - Added fallback to SQL file parsing when manifest.json is unavailable
15
+ - Now works reliably with or without DBT CLI installed
16
+ - **HIGH**: Fixed function signature mismatch in `_test_configuration()` causing TypeError on `--dry-run` command
17
+ - Added missing `disable_manifest` parameter
18
+ - Enhanced to display manifest parsing status
19
+ - **MEDIUM**: Replaced bare exception handler in `_try_compile_dbt()` with specific exception types
20
+ - Now properly handles TimeoutExpired, FileNotFoundError
21
+ - Provides helpful error messages instead of silent failures
22
+ - Respects keyboard interrupts
23
+ - **MEDIUM**: Removed unused `fastapi_directory` parameter from CLI
24
+ - Simplified API - use `--fastapi-local` for both files and directories
25
+ - **MEDIUM**: Added comprehensive YAML error handling with user-friendly messages
26
+ - Catches malformed YAML files with helpful suggestions
27
+ - Validates required configuration sections
28
+ - Provides clear error messages instead of Python tracebacks
29
+ - **LOW**: Added GitHub API rate limiting detection and handling
30
+ - Monitors rate limit headers and warns when limits are low
31
+ - Provides helpful guidance to use GITHUB_TOKEN for higher limits
32
+ - Better error messages for 403 and 404 responses
33
+
34
+ ### Improved
35
+ - Enhanced error messages throughout the application
36
+ - Better support for different use-cases:
37
+ - DBT projects with or without manifest.json
38
+ - Local files and directories for FastAPI models
39
+ - GitHub repositories with rate limit awareness
40
+ - Configuration validation with clear error reporting
41
+
42
+ ## [1.0.0] - 2025-01-XX
43
+
44
+ ### Added
45
+ - Initial release of Data Contract Validator
46
+ - DBT schema extraction from SQL files and manifest.json
47
+ - FastAPI/Pydantic model extraction from local files and GitHub repos
48
+ - Command-line interface with multiple output formats
49
+ - GitHub Actions integration
50
+ - Contract validation with critical/warning/info severity levels
51
+ - Support for multiple repositories and complex validation scenarios
52
+
53
+ ### Features
54
+ - ✅ DBT model schema extraction
55
+ - ✅ FastAPI/Pydantic schema extraction
56
+ - ✅ Cross-repository validation
57
+ - ✅ GitHub Actions workflows
58
+ - ✅ Multiple output formats (terminal, JSON, GitHub Actions)
59
+ - ✅ Comprehensive error reporting with suggested fixes
60
+ - ✅ Type compatibility checking
61
+ - ✅ Missing table/column detection
62
+
63
+ ### Known Limitations
64
+ - Only supports DBT and FastAPI currently
65
+ - Requires manual installation of DBT CLI
66
+ - Limited type inference from SQL
67
+ - No support for complex nested types
68
+
69
+ [Unreleased]: https://github.com/OGsiji/data-contract-validator/compare/v1.0.5...HEAD
70
+ [1.0.5]: https://github.com/OGsiji/data-contract-validator/releases/tag/v1.0.5
71
+ [1.0.0]: https://github.com/OGsiji/data-contract-validator/releases/tag/v1.0.0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-contract-validator
3
- Version: 1.0.4a0
4
- Summary: Adding pre-commit-fixes
3
+ Version: 1.0.5
4
+ Summary: Critical bug fixes: DBT extractor return paths, function signatures, error handling, and GitHub API rate limiting
5
5
  Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
6
6
  Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>
7
7
  License: MIT
@@ -494,7 +494,6 @@ def _test_setup(config_file: Path) -> bool:
494
494
  @click.option(
495
495
  "--fastapi-local", help="Override FastAPI models path (file or directory)"
496
496
  )
497
- @click.option("--fastapi-directory", help="Override FastAPI models directory path")
498
497
  @click.option("--fastapi-repo", help="Override FastAPI repo (org/repo)")
499
498
  @click.option(
500
499
  "--fastapi-path",
@@ -508,10 +507,9 @@ def validate(
508
507
  output: str,
509
508
  dbt_project: str,
510
509
  fastapi_local: str,
511
- fastapi_directory: str,
512
510
  fastapi_repo: str,
513
511
  fastapi_path: str,
514
- disable_manifest: bool,
512
+ disable_manifest: bool,
515
513
  ):
516
514
  """🔍 Validate data contracts (prevents production breaks)."""
517
515
 
@@ -519,9 +517,34 @@ def validate(
519
517
  config_data = {}
520
518
  config_file = Path(config)
521
519
  if config_file.exists():
522
- with open(config_file) as f:
523
- config_data = yaml.safe_load(f)
524
- click.echo(f"📋 Using config: {config}")
520
+ try:
521
+ with open(config_file) as f:
522
+ config_data = yaml.safe_load(f)
523
+
524
+ # Validate that config is a dictionary
525
+ if not isinstance(config_data, dict):
526
+ click.echo(f"❌ Configuration file {config} is invalid: expected YAML dictionary")
527
+ click.echo("💡 Check the file format - it should contain key-value pairs")
528
+ sys.exit(1)
529
+
530
+ # Check for required sections
531
+ if "source" not in config_data:
532
+ click.echo(f"⚠️ Warning: 'source' section missing in {config}")
533
+ if "target" not in config_data:
534
+ click.echo(f"⚠️ Warning: 'target' section missing in {config}")
535
+
536
+ click.echo(f"📋 Using config: {config}")
537
+ except yaml.YAMLError as e:
538
+ click.echo(f"❌ Configuration file {config} contains invalid YAML:")
539
+ click.echo(f" {e}")
540
+ click.echo("💡 Check for:")
541
+ click.echo(" - Incorrect indentation")
542
+ click.echo(" - Missing colons after keys")
543
+ click.echo(" - Unmatched quotes or brackets")
544
+ sys.exit(1)
545
+ except Exception as e:
546
+ click.echo(f"❌ Error reading configuration file {config}: {e}")
547
+ sys.exit(1)
525
548
  elif not any([dbt_project, fastapi_local, fastapi_repo]):
526
549
  click.echo("❌ No config file found and no command line options provided")
527
550
  click.echo("💡 Run 'contract-validator init' to create a config file")
@@ -534,7 +557,7 @@ def validate(
534
557
  if dry_run:
535
558
  click.echo("🧪 Dry run - testing configuration only")
536
559
  _test_configuration(
537
- config_data, dbt_project, fastapi_local, fastapi_directory, fastapi_repo, disable_manifest
560
+ config_data, dbt_project, fastapi_local, fastapi_repo, disable_manifest
538
561
  )
539
562
  return
540
563
 
@@ -544,14 +567,18 @@ def validate(
544
567
  output,
545
568
  dbt_project,
546
569
  fastapi_local,
547
- fastapi_directory,
548
570
  fastapi_repo,
549
571
  fastapi_path,
572
+ disable_manifest,
550
573
  )
551
574
 
552
575
 
553
576
  def _test_configuration(
554
- config_data: Dict[str, Any], dbt_project: str, fastapi_local: str, fastapi_repo: str
577
+ config_data: Dict[str, Any],
578
+ dbt_project: str,
579
+ fastapi_local: str,
580
+ fastapi_repo: str,
581
+ disable_manifest: bool = False
555
582
  ):
556
583
  """Test configuration without running full validation."""
557
584
 
@@ -565,12 +592,21 @@ def _test_configuration(
565
592
  else:
566
593
  click.echo(" ❌ Path not found")
567
594
 
595
+ if disable_manifest or config_data.get("source", {}).get("dbt", {}).get("disable_manifest", False):
596
+ click.echo(" 📄 Manifest parsing: disabled")
597
+ else:
598
+ click.echo(" 📋 Manifest parsing: enabled")
599
+
568
600
  if fastapi_local:
569
601
  click.echo(f" 🎯 FastAPI models: {fastapi_local}")
570
- if Path(fastapi_local).exists():
571
- click.echo(" ✅ File exists")
602
+ local_path = Path(fastapi_local)
603
+ if local_path.exists():
604
+ if local_path.is_file():
605
+ click.echo(" ✅ File exists")
606
+ elif local_path.is_dir():
607
+ click.echo(" ✅ Directory exists")
572
608
  else:
573
- click.echo(" ❌ File not found")
609
+ click.echo(" ❌ Path not found")
574
610
 
575
611
  if fastapi_repo:
576
612
  click.echo(f" 🐙 FastAPI repo: {fastapi_repo}")
@@ -27,13 +27,19 @@ class DBTExtractor(BaseExtractor):
27
27
  """Extract schemas from DBT project."""
28
28
  print(f"🔍 Extracting DBT schemas from {self.project_path}")
29
29
 
30
- # Try manifest first, fallback to SQL parsing
30
+ # Check if manifest should be disabled
31
+ if self.disable_manifest:
32
+ print(" 📄 Manifest disabled, using SQL file parsing")
33
+ return self._extract_from_sql_files()
34
+
35
+ # Try to use manifest.json if available
31
36
  if self._try_compile_dbt() and self.manifest_path.exists():
32
37
  print(" 📋 Using manifest.json")
33
38
  return self._extract_from_manifest()
34
- else:
35
- print(" 📄 Using SQL file parsing")
36
- return self._extract_from_sql_files()
39
+
40
+ # Fallback to SQL file parsing if manifest is not available
41
+ print(" 📄 Manifest not available, using SQL file parsing")
42
+ return self._extract_from_sql_files()
37
43
 
38
44
  def _try_compile_dbt(self) -> bool:
39
45
  """Try to compile DBT project."""
@@ -45,7 +51,14 @@ class DBTExtractor(BaseExtractor):
45
51
  timeout=60,
46
52
  )
47
53
  return result.returncode == 0
48
- except:
54
+ except subprocess.TimeoutExpired:
55
+ print(" ⚠️ DBT compilation timeout (>60s)")
56
+ return False
57
+ except FileNotFoundError:
58
+ print(" ⚠️ DBT CLI not found (install with: pip install dbt-core)")
59
+ return False
60
+ except Exception as e:
61
+ print(f" ⚠️ DBT compilation error: {e}")
49
62
  return False
50
63
 
51
64
  def _extract_from_manifest(self) -> Dict[str, Schema]:
@@ -223,7 +223,7 @@ class FastAPIExtractor(BaseExtractor):
223
223
 
224
224
  @staticmethod
225
225
  def _fetch_github_file(repo: str, path: str, token: str = None) -> Optional[str]:
226
- """Fetch file content from GitHub API."""
226
+ """Fetch file content from GitHub API with rate limit handling."""
227
227
  url = f"https://api.github.com/repos/{repo}/contents/{path}"
228
228
  headers = {}
229
229
 
@@ -232,11 +232,35 @@ class FastAPIExtractor(BaseExtractor):
232
232
 
233
233
  try:
234
234
  response = requests.get(url, headers=headers)
235
+
236
+ # Check rate limit headers
237
+ if "X-RateLimit-Remaining" in response.headers:
238
+ remaining = int(response.headers["X-RateLimit-Remaining"])
239
+ if remaining < 10:
240
+ print(f" ⚠️ GitHub API rate limit low: {remaining} requests remaining")
241
+ if remaining == 0:
242
+ reset_time = int(response.headers.get("X-RateLimit-Reset", 0))
243
+ import time
244
+ wait_time = max(0, reset_time - int(time.time()))
245
+ print(f" ⏳ Rate limit exceeded. Resets in {wait_time // 60} minutes")
246
+
235
247
  if response.status_code == 200:
236
248
  import base64
237
249
 
238
250
  content = base64.b64decode(response.json()["content"]).decode("utf-8")
239
251
  return content
252
+ elif response.status_code == 403:
253
+ # Check if it's a rate limit error
254
+ error_message = response.json().get("message", "")
255
+ if "rate limit" in error_message.lower():
256
+ print(f" ❌ GitHub API rate limit exceeded")
257
+ print(f" 💡 Try setting GITHUB_TOKEN environment variable for higher limits")
258
+ else:
259
+ print(f" ❌ GitHub API access forbidden: {error_message}")
260
+ return None
261
+ elif response.status_code == 404:
262
+ print(f" ❌ File not found: {path}")
263
+ return None
240
264
  else:
241
265
  print(f" ❌ GitHub API error for {path}: {response.status_code}")
242
266
  return None
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-contract-validator
3
- Version: 1.0.4a0
4
- Summary: Adding pre-commit-fixes
3
+ Version: 1.0.5
4
+ Summary: Critical bug fixes: DBT extractor return paths, function signatures, error handling, and GitHub API rate limiting
5
5
  Author-email: Ogunniran Siji <ogunniransiji@gmail.com>
6
6
  Maintainer-email: Ogunniran Siji <ogunniransiji@gmail.com>
7
7
  License: MIT
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "data-contract-validator"
7
- version = "1.0.4a"
8
- description = "Adding pre-commit-fixes"
7
+ version = "1.0.5"
8
+ description = "Critical bug fixes: DBT extractor return paths, function signatures, error handling, and GitHub API rate limiting"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
11
11
  authors = [
@@ -1,38 +0,0 @@
1
- # Changelog
2
-
3
- All notable changes to this project will be documented in this file.
4
-
5
- The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
- and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
-
8
- ## [Unreleased]
9
-
10
- ## [1.0.0] - 2025-01-XX
11
-
12
- ### Added
13
- - Initial release of Data Contract Validator
14
- - DBT schema extraction from SQL files and manifest.json
15
- - FastAPI/Pydantic model extraction from local files and GitHub repos
16
- - Command-line interface with multiple output formats
17
- - GitHub Actions integration
18
- - Contract validation with critical/warning/info severity levels
19
- - Support for multiple repositories and complex validation scenarios
20
-
21
- ### Features
22
- - ✅ DBT model schema extraction
23
- - ✅ FastAPI/Pydantic schema extraction
24
- - ✅ Cross-repository validation
25
- - ✅ GitHub Actions workflows
26
- - ✅ Multiple output formats (terminal, JSON, GitHub Actions)
27
- - ✅ Comprehensive error reporting with suggested fixes
28
- - ✅ Type compatibility checking
29
- - ✅ Missing table/column detection
30
-
31
- ### Known Limitations
32
- - Only supports DBT and FastAPI currently
33
- - Requires manual installation of DBT CLI
34
- - Limited type inference from SQL
35
- - No support for complex nested types
36
-
37
- [Unreleased]: https://github.com/OGsiji/retl_validator/compare/v1.0.0...HEAD
38
- [1.0.0]: https://github.com/OGsiji/retl_validator/releases/tag/v1.0.0