kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,304 @@
1
+ """CLI output rendering functions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import typer
6
+
7
+
8
+ def print_rich_stats(stats: dict | None) -> None:
9
+ """Pretty-print the optional stats block (concise, high-signal)."""
10
+ if not stats:
11
+ return
12
+
13
+ ds = stats.get("dataset", {}) or {}
14
+ run = stats.get("run_meta", {}) or {}
15
+ proj = stats.get("projection") or {}
16
+
17
+ # Prefer the human-friendly engine label if present
18
+ engine_label = run.get("engine") or run.get("engine_label")
19
+
20
+ nrows = ds.get("nrows")
21
+ ncols = ds.get("ncols")
22
+ dur = run.get("duration_ms_total")
23
+
24
+ if nrows is not None and ncols is not None and dur is not None:
25
+ base = f"\nStats • rows={nrows:,} cols={ncols} duration={dur} ms"
26
+ if engine_label:
27
+ base += f" engine={engine_label}"
28
+ typer.secho(base, fg=typer.colors.BLUE)
29
+ elif nrows is not None and ncols is not None:
30
+ typer.secho(f"\nStats • rows={nrows:,} cols={ncols}", fg=typer.colors.BLUE)
31
+
32
+ # Preplan / pushdown timing (if available)
33
+ preplan_ms = (run.get("preplan_breakdown_ms") or {}).get("analyze")
34
+ push_ms = run.get("pushdown_breakdown_ms") or {}
35
+ if preplan_ms is not None:
36
+ typer.secho(f"Preplan: analyze={preplan_ms} ms", fg=typer.colors.BLUE)
37
+ if push_ms:
38
+ parts = []
39
+ for k in ("compile", "execute", "introspect"):
40
+ v = push_ms.get(k)
41
+ if v is not None:
42
+ parts.append(f"{k}={v} ms")
43
+ if parts:
44
+ typer.secho("SQL pushdown: " + ", ".join(parts), fg=typer.colors.BLUE)
45
+
46
+ # If present, show RG pruning summary from preplan (engine may emit either key)
47
+ manifest = stats.get("pushdown_manifest") or {}
48
+ if manifest:
49
+ kept = manifest.get("row_groups_kept")
50
+ total = manifest.get("row_groups_total")
51
+ if kept is not None and total is not None:
52
+ typer.secho(
53
+ f"Preplan manifest: row-groups {kept}/{total} kept",
54
+ fg=typer.colors.BLUE,
55
+ )
56
+
57
+ # Explicit validated vs loaded columns (short previews)
58
+ validated = stats.get("columns_validated") or []
59
+ loaded = stats.get("columns_loaded") or []
60
+
61
+ if validated:
62
+ v_preview = ", ".join(validated[:6]) + ("…" if len(validated) > 6 else "")
63
+ typer.secho(
64
+ f"Columns validated ({len(validated)}): {v_preview}",
65
+ fg=typer.colors.BLUE,
66
+ )
67
+
68
+ if loaded:
69
+ l_preview = ", ".join(loaded[:6]) + ("…" if len(loaded) > 6 else "")
70
+ typer.secho(
71
+ f"Columns loaded ({len(loaded)}): {l_preview}",
72
+ fg=typer.colors.BLUE,
73
+ )
74
+
75
+ # Projection effectiveness (req/loaded/avail)
76
+ if proj:
77
+ enabled = proj.get("enabled", True)
78
+ required = proj.get("required_count", 0)
79
+ loaded_cnt = proj.get("loaded_count", 0)
80
+ available = proj.get("available_count")
81
+ effectiveness = "(pruned)" if proj.get("effective") else "(no reduction)"
82
+ if available is not None:
83
+ msg = (
84
+ f"Projection [{'on' if enabled else 'off'}]: "
85
+ f"{required}/{loaded_cnt}/{available} (req/loaded/avail) {effectiveness}"
86
+ )
87
+ else:
88
+ msg = (
89
+ f"Projection [{'on' if enabled else 'off'}]: "
90
+ f"{required}/{loaded_cnt} (req/loaded) {effectiveness}"
91
+ )
92
+ typer.secho(msg, fg=typer.colors.BLUE)
93
+
94
+ # Optional per-column profile (if requested)
95
+ prof = stats.get("profile")
96
+ if prof:
97
+ typer.secho("Profile:", fg=typer.colors.BLUE)
98
+ for col, s in prof.items():
99
+ parts = [
100
+ f"nulls={s.get('nulls', 0)}",
101
+ f"distinct={s.get('distinct', 0)}",
102
+ ]
103
+ if {"min", "max", "mean"} <= s.keys():
104
+ parts += [
105
+ f"min={s['min']}",
106
+ f"max={s['max']}",
107
+ f"mean={round(s['mean'], 3)}",
108
+ ]
109
+ typer.echo(f" - {col}: " + ", ".join(parts))
110
+
111
+
112
+ def render_diff_rich(diff) -> str:
113
+ """Render validation diff in human-readable format."""
114
+ lines = []
115
+
116
+ # Header
117
+ before_ts = diff.before.run_at.strftime("%Y-%m-%d %H:%M")
118
+ after_ts = diff.after.run_at.strftime("%Y-%m-%d %H:%M")
119
+
120
+ lines.append(f"Diff: {diff.after.contract_name}")
121
+ lines.append(f"Comparing: {before_ts} → {after_ts}")
122
+ lines.append("=" * 50)
123
+
124
+ # Overall status
125
+ if diff.status_changed:
126
+ before_status = "PASSED" if diff.before.summary.passed else "FAILED"
127
+ after_status = "PASSED" if diff.after.summary.passed else "FAILED"
128
+ lines.append(f"\nOverall: {before_status} → {after_status}")
129
+ else:
130
+ status = "PASSED" if diff.after.summary.passed else "FAILED"
131
+ lines.append(f"\nOverall: {status} (unchanged)")
132
+
133
+ # Summary
134
+ lines.append(
135
+ f"\nRules: {diff.before.summary.passed_rules}/{diff.before.summary.total_rules} → "
136
+ f"{diff.after.summary.passed_rules}/{diff.after.summary.total_rules}"
137
+ )
138
+
139
+ # New failures - group by severity
140
+ if diff.new_failures:
141
+ # Separate by severity
142
+ blocking = [rd for rd in diff.new_failures if rd.severity == "blocking"]
143
+ warnings = [rd for rd in diff.new_failures if rd.severity == "warning"]
144
+ infos = [rd for rd in diff.new_failures if rd.severity == "info"]
145
+
146
+ if blocking:
147
+ lines.append(f"\n❌ New Blocking Failures ({len(blocking)})")
148
+ for rd in blocking:
149
+ count_info = (
150
+ f" ({rd.after_count:,} violations)" if rd.after_count > 0 else ""
151
+ )
152
+ mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
153
+ lines.append(f" - {rd.rule_id}{count_info}{mode_info}")
154
+
155
+ if warnings:
156
+ lines.append(f"\n⚠️ New Warnings ({len(warnings)})")
157
+ for rd in warnings:
158
+ count_info = (
159
+ f" ({rd.after_count:,} violations)" if rd.after_count > 0 else ""
160
+ )
161
+ mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
162
+ lines.append(f" - {rd.rule_id}{count_info}{mode_info}")
163
+
164
+ if infos:
165
+ lines.append(f"\nℹ️ New Info Issues ({len(infos)})")
166
+ for rd in infos:
167
+ count_info = (
168
+ f" ({rd.after_count:,} violations)" if rd.after_count > 0 else ""
169
+ )
170
+ mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
171
+ lines.append(f" - {rd.rule_id}{count_info}{mode_info}")
172
+
173
+ # Regressions - group by severity
174
+ if diff.regressions:
175
+ blocking_reg = [rd for rd in diff.regressions if rd.severity == "blocking"]
176
+ warning_reg = [rd for rd in diff.regressions if rd.severity == "warning"]
177
+ info_reg = [rd for rd in diff.regressions if rd.severity == "info"]
178
+
179
+ if blocking_reg:
180
+ lines.append(f"\n❌ Blocking Regressions ({len(blocking_reg)})")
181
+ for rd in blocking_reg:
182
+ mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
183
+ lines.append(
184
+ f" - {rd.rule_id}: {rd.before_count:,} → {rd.after_count:,} (+{rd.delta:,}){mode_info}"
185
+ )
186
+
187
+ if warning_reg:
188
+ lines.append(f"\n⚠️ Warning Regressions ({len(warning_reg)})")
189
+ for rd in warning_reg:
190
+ mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
191
+ lines.append(
192
+ f" - {rd.rule_id}: {rd.before_count:,} → {rd.after_count:,} (+{rd.delta:,}){mode_info}"
193
+ )
194
+
195
+ if info_reg:
196
+ lines.append(f"\nℹ️ Info Regressions ({len(info_reg)})")
197
+ for rd in info_reg:
198
+ mode_info = f" [{rd.failure_mode}]" if rd.failure_mode else ""
199
+ lines.append(
200
+ f" - {rd.rule_id}: {rd.before_count:,} → {rd.after_count:,} (+{rd.delta:,}){mode_info}"
201
+ )
202
+
203
+ # Resolved
204
+ if diff.resolved:
205
+ lines.append(f"\n✅ Resolved ({len(diff.resolved)})")
206
+ for rd in diff.resolved:
207
+ lines.append(f" - {rd.rule_id}")
208
+
209
+ # Improvements
210
+ if diff.improvements:
211
+ lines.append(f"\n📈 Improvements ({len(diff.improvements)})")
212
+ for rd in diff.improvements:
213
+ lines.append(
214
+ f" - {rd.rule_id}: {rd.before_count:,} → {rd.after_count:,} ({rd.delta:,})"
215
+ )
216
+
217
+ # No changes
218
+ if (
219
+ not diff.new_failures
220
+ and not diff.regressions
221
+ and not diff.resolved
222
+ and not diff.improvements
223
+ ):
224
+ lines.append("\n✓ No changes detected")
225
+
226
+ return "\n".join(lines)
227
+
228
+
229
+ def render_profile_diff_rich(diff) -> str:
230
+ """Render profile diff in human-readable format."""
231
+ lines = []
232
+
233
+ # Header
234
+ lines.append(f"Profile Diff: {diff.after.source_uri}")
235
+ lines.append(
236
+ f"Comparing: {diff.before.profiled_at[:16]} → {diff.after.profiled_at[:16]}"
237
+ )
238
+ lines.append("=" * 50)
239
+
240
+ # Row count
241
+ if diff.row_count_delta != 0:
242
+ sign = "+" if diff.row_count_delta > 0 else ""
243
+ lines.append(
244
+ f"\nRows: {diff.row_count_before:,} → {diff.row_count_after:,} "
245
+ f"({sign}{diff.row_count_delta:,}, {diff.row_count_pct_change:+.1f}%)"
246
+ )
247
+ else:
248
+ lines.append(f"\nRows: {diff.row_count_after:,} (unchanged)")
249
+
250
+ # Column count
251
+ if diff.column_count_before != diff.column_count_after:
252
+ lines.append(
253
+ f"Columns: {diff.column_count_before} → {diff.column_count_after}"
254
+ )
255
+
256
+ # Schema changes
257
+ if diff.columns_added:
258
+ lines.append(f"\n➕ Columns Added ({len(diff.columns_added)})")
259
+ for col in diff.columns_added[:10]:
260
+ lines.append(f" - {col}")
261
+ if len(diff.columns_added) > 10:
262
+ lines.append(f" ... and {len(diff.columns_added) - 10} more")
263
+
264
+ if diff.columns_removed:
265
+ lines.append(f"\n➖ Columns Removed ({len(diff.columns_removed)})")
266
+ for col in diff.columns_removed[:10]:
267
+ lines.append(f" - {col}")
268
+
269
+ # Type changes
270
+ if diff.dtype_changes:
271
+ lines.append(f"\n🔄 Type Changes ({len(diff.dtype_changes)})")
272
+ for cd in diff.dtype_changes[:10]:
273
+ lines.append(f" - {cd.column_name}: {cd.dtype_before} → {cd.dtype_after}")
274
+
275
+ # Null rate increases (potential data quality issues)
276
+ if diff.null_rate_increases:
277
+ lines.append(f"\n⚠️ Null Rate Increases ({len(diff.null_rate_increases)})")
278
+ for cd in diff.null_rate_increases[:10]:
279
+ lines.append(
280
+ f" - {cd.column_name}: {cd.null_rate_before:.1%} → {cd.null_rate_after:.1%}"
281
+ )
282
+
283
+ # Null rate decreases (improvements)
284
+ if diff.null_rate_decreases:
285
+ lines.append(f"\n✅ Null Rate Decreases ({len(diff.null_rate_decreases)})")
286
+ for cd in diff.null_rate_decreases[:10]:
287
+ lines.append(
288
+ f" - {cd.column_name}: {cd.null_rate_before:.1%} → {cd.null_rate_after:.1%}"
289
+ )
290
+
291
+ # Cardinality changes
292
+ if diff.cardinality_changes:
293
+ lines.append(f"\n📊 Cardinality Changes ({len(diff.cardinality_changes)})")
294
+ for cd in diff.cardinality_changes[:10]:
295
+ sign = "+" if cd.distinct_count_delta > 0 else ""
296
+ lines.append(
297
+ f" - {cd.column_name}: {cd.distinct_count_before:,} → "
298
+ f"{cd.distinct_count_after:,} ({sign}{cd.distinct_count_delta:,})"
299
+ )
300
+
301
+ if not diff.has_changes:
302
+ lines.append("\n✓ No significant changes detected")
303
+
304
+ return "\n".join(lines)
kontra/cli/utils.py ADDED
@@ -0,0 +1,28 @@
1
+ """CLI utility functions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+
8
+ def parse_duration(duration_str: str) -> int:
9
+ """
10
+ Parse a duration string like '7d', '24h', '30m' into seconds.
11
+
12
+ Supported formats:
13
+ - Xd: X days
14
+ - Xh: X hours
15
+ - Xm: X minutes
16
+ - Xs: X seconds
17
+ """
18
+ match = re.match(r"^(\d+)([dhms])$", duration_str.lower())
19
+ if not match:
20
+ raise ValueError(
21
+ f"Invalid duration format: {duration_str}. Use '7d', '24h', '30m', or '60s'."
22
+ )
23
+
24
+ value = int(match.group(1))
25
+ unit = match.group(2)
26
+
27
+ multipliers = {"d": 86400, "h": 3600, "m": 60, "s": 1}
28
+ return value * multipliers[unit]
@@ -0,0 +1,34 @@
1
+ # src/kontra/config/__init__.py
2
+ """
3
+ Kontra configuration module - Contract and settings handling.
4
+
5
+ Public API:
6
+ - Contract, RuleSpec: Data models for contracts
7
+ - ContractLoader: Loads contracts from files or S3
8
+ - KontraConfig, EffectiveConfig: Configuration models
9
+ - load_config: Load project configuration
10
+ """
11
+
12
+ from kontra.config.models import Contract, RuleSpec
13
+ from kontra.config.loader import ContractLoader
14
+ from kontra.config.settings import (
15
+ KontraConfig,
16
+ EffectiveConfig,
17
+ load_config_file,
18
+ resolve_effective_config,
19
+ find_config_file,
20
+ )
21
+
22
+ __all__ = [
23
+ # Contract models
24
+ "Contract",
25
+ "RuleSpec",
26
+ # Loader
27
+ "ContractLoader",
28
+ # Config
29
+ "KontraConfig",
30
+ "EffectiveConfig",
31
+ "load_config_file",
32
+ "find_config_file",
33
+ "resolve_effective_config",
34
+ ]
@@ -0,0 +1,127 @@
1
+ from __future__ import annotations
2
+ from pathlib import Path
3
+ from typing import Any, Dict, List, Union
4
+ import os
5
+ import yaml
6
+
7
+ from kontra.config.models import Contract, RuleSpec
8
+
9
+
10
+ class ContractLoader:
11
+ """Static helpers to load a Contract from different sources."""
12
+
13
+ @staticmethod
14
+ def from_uri(uri: Union[str, Path]) -> Contract:
15
+ uri_str = str(uri)
16
+ if uri_str.lower().startswith("s3://"):
17
+ return ContractLoader.from_s3(uri_str)
18
+ return ContractLoader.from_path(uri_str)
19
+
20
+ @staticmethod
21
+ def from_path(path: Union[str, Path]) -> Contract:
22
+ p = Path(path)
23
+ if not p.exists():
24
+ raise FileNotFoundError(f"Contract file not found: {p}")
25
+ with p.open("r") as f:
26
+ raw = yaml.safe_load(f)
27
+ return ContractLoader._parse_and_validate(raw, source=str(p))
28
+
29
+ # ---------- NEW/UPDATED S3 LOADER ----------
30
+ @staticmethod
31
+ def _s3_storage_options() -> Dict[str, Any]:
32
+ """
33
+ Build fsspec/s3fs storage_options from env. Works with AWS S3 and MinIO.
34
+ """
35
+ opts: Dict[str, Any] = {"anon": False}
36
+
37
+ key = os.getenv("AWS_ACCESS_KEY_ID")
38
+ secret = os.getenv("AWS_SECRET_ACCESS_KEY")
39
+ if key and secret:
40
+ opts["key"] = key
41
+ opts["secret"] = secret
42
+
43
+ endpoint = os.getenv("AWS_ENDPOINT_URL")
44
+ if endpoint:
45
+ # MinIO/custom endpoints
46
+ opts["client_kwargs"] = {"endpoint_url": endpoint}
47
+ # Path-style is typical for MinIO
48
+ opts["config_kwargs"] = {"s3": {"addressing_style": "path"}}
49
+ # Use SSL only if endpoint is https
50
+ opts["use_ssl"] = endpoint.startswith("https")
51
+
52
+ region = os.getenv("AWS_REGION")
53
+ if region:
54
+ opts.setdefault("client_kwargs", {})
55
+ opts["client_kwargs"].setdefault("region_name", region)
56
+
57
+ return opts
58
+
59
+ @staticmethod
60
+ def from_s3(uri: str) -> Contract:
61
+ """
62
+ Load contract YAML from S3/MinIO using s3fs via fsspec with storage_options.
63
+ Requires: pip install s3fs
64
+ """
65
+ try:
66
+ import fsspec # s3fs discovered by fsspec
67
+ except ImportError as e:
68
+ raise RuntimeError(
69
+ "Reading contracts from S3 requires 's3fs'. Install with: pip install s3fs"
70
+ ) from e
71
+
72
+ storage_options = ContractLoader._s3_storage_options()
73
+
74
+ try:
75
+ fs = fsspec.filesystem("s3", **storage_options)
76
+ with fs.open(uri, mode="r") as f:
77
+ raw = yaml.safe_load(f)
78
+ except FileNotFoundError:
79
+ raise FileNotFoundError(f"Contract file not found on S3: {uri}")
80
+ except PermissionError as e:
81
+ raise RuntimeError(f"Failed to read contract from S3 '{uri}': Permission denied") from e
82
+ except Exception as e:
83
+ raise RuntimeError(f"Failed to read contract from S3 '{uri}': {e}") from e
84
+
85
+ return ContractLoader._parse_and_validate(raw, source=uri)
86
+
87
+ # ----------------- unchanged -----------------
88
+ @staticmethod
89
+ def _parse_and_validate(raw: Any, source: str) -> Contract:
90
+ if not isinstance(raw, dict):
91
+ raise ValueError(
92
+ f"Invalid or empty contract YAML at {source}. "
93
+ "Expected a mapping with keys like 'datasource' and 'rules'."
94
+ )
95
+ # datasource is optional - defaults to "inline" when data is passed directly
96
+ rules_raw = raw.get("rules", []) or []
97
+ if not isinstance(rules_raw, list):
98
+ raise ValueError("Contract 'rules' must be a list.")
99
+
100
+ rules: List[RuleSpec] = []
101
+ for i, r in enumerate(rules_raw):
102
+ if not isinstance(r, dict):
103
+ raise ValueError(f"Rule at index {i} is not a mapping.")
104
+ if "name" not in r:
105
+ raise ValueError(f"Rule at index {i} missing required key: 'name'.")
106
+ params = r.get("params", {}) or {}
107
+ if not isinstance(params, dict):
108
+ raise ValueError(f"Rule at index {i} has non-dict 'params'.")
109
+ context = r.get("context", {}) or {}
110
+ if not isinstance(context, dict):
111
+ raise ValueError(f"Rule at index {i} has non-dict 'context'.")
112
+ rules.append(RuleSpec(
113
+ name=r["name"],
114
+ id=r.get("id"),
115
+ params=params,
116
+ severity=r.get("severity", "blocking"),
117
+ context=context,
118
+ ))
119
+
120
+ # Use 'datasource' if present, otherwise fall back to 'dataset' for backwards compat
121
+ # If neither is present, default to "inline" (handled by Contract model)
122
+ datasource_value = raw.get("datasource") or raw.get("dataset") or "inline"
123
+ return Contract(
124
+ name=raw.get("name"),
125
+ datasource=str(datasource_value),
126
+ rules=rules,
127
+ )
@@ -0,0 +1,49 @@
1
+ # src/kontra/config/models.py
2
+ from pydantic import BaseModel, Field, model_validator
3
+ from typing import Dict, Any, List, Literal, Optional
4
+
5
+ class RuleSpec(BaseModel):
6
+ """
7
+ Declarative specification for a rule from contract.yml
8
+
9
+ The `context` field is for consumer-defined metadata that Kontra stores
10
+ but does not use for validation. Consumers/agents can read context for
11
+ routing, explanations, fix hints, etc.
12
+ """
13
+ name: str = Field(..., description="The rule name (e.g., not_null, unique).")
14
+ id: Optional[str] = Field(default=None, description="Explicit rule ID (optional, auto-generated if not provided).")
15
+ params: Dict[str, Any] = Field(default_factory=dict, description="Parameters passed to the rule.")
16
+ severity: Literal["blocking", "warning", "info"] = Field(
17
+ default="blocking",
18
+ description="Rule severity: blocking (fails pipeline), warning (warns but continues), info (logs only)."
19
+ )
20
+ context: Dict[str, Any] = Field(
21
+ default_factory=dict,
22
+ description="Consumer-defined context (owner, tags, fix_hint, etc.). Stored but not used by Kontra."
23
+ )
24
+
25
+ class Contract(BaseModel):
26
+ """
27
+ Data contract specification.
28
+
29
+ The `datasource` field can be:
30
+ - A named datasource from config: "prod_db.users"
31
+ - A file path: "./data/users.parquet"
32
+ - A URI: "s3://bucket/users.parquet", "postgres:///public.users"
33
+ - Omitted when data is passed directly to validate()
34
+ """
35
+ name: Optional[str] = Field(default=None, description="Contract name (optional, used for identification).")
36
+ datasource: str = Field(default="inline", description="Data source: named datasource, path, or URI. Defaults to 'inline' when data is passed directly.")
37
+ rules: List[RuleSpec] = Field(default_factory=list)
38
+
39
+ # Backwards compatibility: accept 'dataset' as alias for 'datasource'
40
+ @model_validator(mode="before")
41
+ @classmethod
42
+ def handle_dataset_alias(cls, data: Any) -> Any:
43
+ """Accept 'dataset' as deprecated alias for 'datasource'."""
44
+ if isinstance(data, dict):
45
+ if "dataset" in data and "datasource" not in data:
46
+ data["datasource"] = data.pop("dataset")
47
+ return data
48
+
49
+