kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
kontra/engine/stats.py ADDED
@@ -0,0 +1,194 @@
1
+ # src/kontra/engine/stats.py
2
+ from __future__ import annotations
3
+
4
+ """
5
+ Stats helpers — minimal, fast, and CLI-friendly.
6
+
7
+ Design goals
8
+ ------------
9
+ - Keep helpers tiny and zero-alloc heavy; these run on every validation.
10
+ - Avoid coupling to engine internals or reporters; return plain dicts.
11
+ - Backwards compatible: existing callers keep working as-is.
12
+ """
13
+
14
+ from dataclasses import dataclass
15
+ from typing import Iterable, Dict, Any, List, Optional
16
+ import time
17
+ import polars as pl
18
+
19
+
20
+ # ----------------------------- Timers -----------------------------------------
21
+
22
+
23
+ @dataclass
24
+ class RunTimers:
25
+ contract_load_ms: int = 0
26
+ data_load_ms: int = 0
27
+ compile_ms: int = 0
28
+ execute_ms: int = 0
29
+ report_ms: int = 0
30
+ polars_ms: int = 0
31
+ preplan_ms: int = 0
32
+ sql_ms: int = 0
33
+
34
+ def total_ms(self) -> int:
35
+ """Total time across all phases."""
36
+ return (
37
+ self.contract_load_ms
38
+ + self.data_load_ms
39
+ + self.compile_ms
40
+ + self.execute_ms
41
+ + self.report_ms
42
+ + self.polars_ms
43
+ + self.preplan_ms
44
+ + self.sql_ms
45
+ )
46
+
47
+
48
+ def now_ms() -> int:
49
+ return int(time.time() * 1000)
50
+
51
+
52
+ # ---------------------------- Summaries ---------------------------------------
53
+
54
+
55
+ def basic_summary(
56
+ df: Optional[pl.DataFrame],
57
+ *,
58
+ available_cols: Optional[List[str]] = None,
59
+ nrows_override: Optional[int] = None,
60
+ ) -> Dict[str, int]:
61
+ """
62
+ Return a tiny dataset summary.
63
+
64
+ Args
65
+ ----
66
+ df:
67
+ The (possibly pruned) Polars DataFrame. May be None when we
68
+ skipped materialization (e.g., all rules pushed down).
69
+ available_cols:
70
+ Full schema columns if known (e.g., via SQL introspection or a cheap scan).
71
+ When provided, we report ncols = len(available_cols) instead of df width,
72
+ so the CLI consistently shows *total* columns, not just loaded.
73
+ nrows_override:
74
+ Authoritative row count (e.g., from SQL COUNT(*)) to avoid collecting df.height.
75
+
76
+ Returns
77
+ -------
78
+ {"nrows": int, "ncols": int}
79
+ """
80
+ if df is None:
81
+ nrows = int(nrows_override or 0)
82
+ ncols = int(len(available_cols or []))
83
+ return {"nrows": nrows, "ncols": ncols}
84
+
85
+ nrows = int(nrows_override if nrows_override is not None else df.height)
86
+ ncols = int(len(available_cols)) if available_cols is not None else int(len(df.columns))
87
+ return {"nrows": nrows, "ncols": ncols}
88
+
89
+
90
+ def columns_touched(rule_specs: Iterable[Dict[str, Any]]) -> List[str]:
91
+ """
92
+ Ordered de-duplicated list of columns referenced by rules.
93
+ """
94
+ cols: List[str] = []
95
+ seen: set[str] = set()
96
+ for r in rule_specs:
97
+ col = r.get("params", {}).get("column")
98
+ if isinstance(col, str) and col and col not in seen:
99
+ seen.add(col)
100
+ cols.append(col)
101
+ return cols
102
+
103
+
104
+ def build_coverage(
105
+ *,
106
+ total_rules: int,
107
+ sql_results: Dict[str, Dict[str, Any]] | List[Dict[str, Any]],
108
+ polars_results: List[Dict[str, Any]],
109
+ validated_columns: List[str],
110
+ ) -> Dict[str, Any]:
111
+ """
112
+ Compact, renderer-friendly coverage block.
113
+
114
+ Returns
115
+ -------
116
+ {
117
+ "rules_total": int,
118
+ "rules_sql": int, "rules_failed_sql": int,
119
+ "rules_polars": int, "rules_failed_polars": int,
120
+ "validated_columns": [...],
121
+ }
122
+ """
123
+ # Allow either a dict-by-id or a flat list for sql_results
124
+ if isinstance(sql_results, dict):
125
+ sql_vals = list(sql_results.values())
126
+ else:
127
+ sql_vals = list(sql_results or [])
128
+
129
+ rules_sql = len(sql_vals)
130
+ rules_failed_sql = sum(1 for r in sql_vals if not r.get("passed", False))
131
+
132
+ rules_polars = len(polars_results or [])
133
+ rules_failed_polars = sum(1 for r in polars_results or [] if not r.get("passed", False))
134
+
135
+ return {
136
+ "rules_total": int(total_rules),
137
+ "rules_sql": int(rules_sql),
138
+ "rules_failed_sql": int(rules_failed_sql),
139
+ "rules_polars": int(rules_polars),
140
+ "rules_failed_polars": int(rules_failed_polars),
141
+ "validated_columns": list(validated_columns or []),
142
+ }
143
+
144
+
145
+ # ------------------------------ Profiling -------------------------------------
146
+
147
+
148
+ def profile_for(df: pl.DataFrame, cols: List[str]) -> Dict[str, Dict[str, Any]]:
149
+ """
150
+ Lightweight, single-pass column profile for touched columns only.
151
+ """
152
+ if not cols:
153
+ return {}
154
+
155
+ exprs: List[pl.Expr] = []
156
+ for c in cols:
157
+ # common stats by dtype family
158
+ e = [
159
+ pl.col(c).is_null().sum().alias(f"__nulls__{c}"),
160
+ pl.col(c).n_unique().alias(f"__distinct__{c}"),
161
+ ]
162
+ # numeric extras
163
+ try:
164
+ s = df.get_column(c)
165
+ if pl.datatypes.is_numeric(s.dtype):
166
+ e += [
167
+ pl.col(c).min().alias(f"__min__{c}"),
168
+ pl.col(c).max().alias(f"__max__{c}"),
169
+ pl.col(c).mean().alias(f"__mean__{c}"),
170
+ ]
171
+ except Exception:
172
+ # column missing (shouldn't happen if projection is correct) — skip extras
173
+ pass
174
+ exprs.extend(e)
175
+
176
+ out = df.select(exprs)
177
+ if out.height == 0:
178
+ return {}
179
+
180
+ # Use named=True to get row as dict for direct column access
181
+ row = out.row(0, named=True)
182
+ stats: Dict[str, Dict[str, Any]] = {}
183
+ for c in cols:
184
+ d: Dict[str, Any] = {
185
+ "nulls": int(row[f"__nulls__{c}"]),
186
+ "distinct": int(row[f"__distinct__{c}"]),
187
+ }
188
+ # Only attach numeric extras if these columns exist in the projection
189
+ if f"__min__{c}" in out.columns:
190
+ d["min"] = row[f"__min__{c}"]
191
+ d["max"] = row[f"__max__{c}"]
192
+ d["mean"] = float(row[f"__mean__{c}"])
193
+ stats[c] = d
194
+ return stats
kontra/engine/types.py ADDED
@@ -0,0 +1,138 @@
1
+ # src/kontra/engine/types.py
2
+ """
3
+ Type definitions for engine result dictionaries.
4
+
5
+ These TypedDicts provide IDE support and documentation for the
6
+ dict-based results returned by the validation engine.
7
+
8
+ Usage:
9
+ from kontra.engine.types import RuleResultDict, ValidationResultDict
10
+
11
+ def process_result(result: RuleResultDict) -> None:
12
+ print(result["rule_id"]) # IDE knows this is str
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from typing import Any, Dict, List, Optional, TypedDict, Literal
18
+
19
+
20
+ class RuleResultDict(TypedDict, total=False):
21
+ """
22
+ Result of validating a single rule.
23
+
24
+ Required fields:
25
+ rule_id: Unique identifier for the rule
26
+ passed: Whether the rule passed validation
27
+ failed_count: Number of violations found
28
+ message: Human-readable result message
29
+
30
+ Optional fields:
31
+ severity: blocking | warning | info
32
+ execution_source: Where rule was executed (polars | sql | metadata)
33
+ failure_mode: Type of failure (null_values, duplicate_values, etc.)
34
+ details: Additional details (unexpected values, suggestions, etc.)
35
+ actions_executed: List of post-validation actions run
36
+ """
37
+ # Required
38
+ rule_id: str
39
+ passed: bool
40
+ failed_count: int
41
+ message: str
42
+ # Optional
43
+ severity: str
44
+ execution_source: str
45
+ failure_mode: str
46
+ details: Dict[str, Any]
47
+ actions_executed: List[str]
48
+
49
+
50
+ class SummaryDict(TypedDict, total=False):
51
+ """
52
+ Validation summary for a dataset.
53
+
54
+ Contains aggregate pass/fail counts and optional severity breakdowns.
55
+ """
56
+ passed: bool
57
+ total_rules: int
58
+ rules_passed: int
59
+ rules_failed: int
60
+ dataset_name: str
61
+ # Severity breakdown
62
+ blocking_failures: int
63
+ warning_failures: int
64
+ info_failures: int
65
+
66
+
67
+ class ValidationResultDict(TypedDict, total=False):
68
+ """
69
+ Complete validation result returned by ValidationEngine.run().
70
+
71
+ Contains summary, individual rule results, and optional stats.
72
+ """
73
+ dataset: str
74
+ summary: SummaryDict
75
+ results: List[RuleResultDict]
76
+ stats: Dict[str, Any]
77
+ run_meta: Dict[str, Any]
78
+
79
+
80
+ class PreplanSummaryDict(TypedDict, total=False):
81
+ """
82
+ Preplan (metadata analysis) summary.
83
+
84
+ Reports how many rules were resolved via metadata without data scan.
85
+ """
86
+ enabled: bool
87
+ effective: bool
88
+ rules_pass_meta: int
89
+ rules_fail_meta: int
90
+ rules_unknown: int
91
+ row_groups_kept: Optional[int]
92
+ row_groups_total: Optional[int]
93
+ row_groups_pruned: Optional[int]
94
+
95
+
96
+ class ProjectionDict(TypedDict, total=False):
97
+ """
98
+ Column projection statistics.
99
+
100
+ Reports column pruning effectiveness.
101
+ """
102
+ enabled: bool
103
+ available_count: int
104
+ full: Dict[str, Any]
105
+ residual: Dict[str, Any]
106
+
107
+
108
+ class PushdownDict(TypedDict, total=False):
109
+ """
110
+ SQL pushdown statistics.
111
+
112
+ Reports SQL execution details and timing.
113
+ """
114
+ enabled: bool
115
+ effective: bool
116
+ executor: str
117
+ rules_pushed: int
118
+ breakdown_ms: Dict[str, int]
119
+
120
+
121
+ class StatsDict(TypedDict, total=False):
122
+ """
123
+ Full validation statistics.
124
+
125
+ Optional stats block attached to validation results when
126
+ stats_mode is "summary" or "profile".
127
+ """
128
+ stats_version: str
129
+ run_meta: Dict[str, Any]
130
+ dataset: Dict[str, Any]
131
+ preplan: PreplanSummaryDict
132
+ pushdown: PushdownDict
133
+ projection: ProjectionDict
134
+ residual: Dict[str, Any]
135
+ columns_touched: List[str]
136
+ columns_validated: List[str]
137
+ columns_loaded: List[str]
138
+ profile: Dict[str, Any]