kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,188 @@
1
+ # src/kontra/rules/builtin/compare.py
2
+ """
3
+ Compare rule - Compares two columns using a comparison operator.
4
+
5
+ Usage:
6
+ - name: compare
7
+ params:
8
+ left: end_date
9
+ right: start_date
10
+ op: ">="
11
+
12
+ Fails when:
13
+ - Either column is NULL (can't compare NULL values)
14
+ - The comparison left op right is FALSE
15
+ """
16
+ from __future__ import annotations
17
+
18
+ from typing import Any, Dict, Optional, Set
19
+
20
+ import polars as pl
21
+
22
+ from kontra.rules.base import BaseRule
23
+ from kontra.rules.registry import register_rule
24
+ from kontra.rules.predicates import Predicate
25
+ from kontra.state.types import FailureMode
26
+
27
+
28
+ # Map operator strings to Polars comparison methods
29
+ POLARS_OP_MAP = {
30
+ ">": pl.Expr.__gt__,
31
+ ">=": pl.Expr.__ge__,
32
+ "<": pl.Expr.__lt__,
33
+ "<=": pl.Expr.__le__,
34
+ "==": pl.Expr.__eq__,
35
+ "!=": pl.Expr.__ne__,
36
+ }
37
+
38
+ # Map for human-readable operator descriptions
39
+ OP_DESCRIPTIONS = {
40
+ ">": "greater than",
41
+ ">=": "greater than or equal to",
42
+ "<": "less than",
43
+ "<=": "less than or equal to",
44
+ "==": "equal to",
45
+ "!=": "not equal to",
46
+ }
47
+
48
+ SUPPORTED_OPS = set(POLARS_OP_MAP.keys())
49
+
50
+
51
+ @register_rule("compare")
52
+ class CompareRule(BaseRule):
53
+ """
54
+ Fails where left column does not satisfy the comparison with right column.
55
+
56
+ params:
57
+ - left: str (required) - Left column name
58
+ - right: str (required) - Right column name
59
+ - op: str (required) - Comparison operator: >, >=, <, <=, ==, !=
60
+
61
+ NULL handling:
62
+ Rows where either column is NULL are considered failures.
63
+ You can't meaningfully compare NULL values.
64
+ """
65
+
66
+ def __init__(self, name: str, params: Dict[str, Any]):
67
+ super().__init__(name, params)
68
+ # Validate parameters at construction time
69
+ self._left = self._get_required_param("left", str)
70
+ self._right = self._get_required_param("right", str)
71
+ self._op = self._get_required_param("op", str)
72
+
73
+ if self._op not in SUPPORTED_OPS:
74
+ raise ValueError(
75
+ f"Rule 'compare' unsupported operator '{self._op}'. "
76
+ f"Supported: {', '.join(sorted(SUPPORTED_OPS))}"
77
+ )
78
+
79
+ def required_columns(self) -> Set[str]:
80
+ return {self._left, self._right}
81
+
82
+ def validate(self, df: pl.DataFrame) -> Dict[str, Any]:
83
+ # Check columns exist before accessing
84
+ col_check = self._check_columns(df, {self._left, self._right})
85
+ if col_check is not None:
86
+ return col_check
87
+
88
+ left_col = pl.col(self._left)
89
+ right_col = pl.col(self._right)
90
+
91
+ # Get the comparison function
92
+ compare_fn = POLARS_OP_MAP[self._op]
93
+
94
+ # Build mask expression: True = failure
95
+ # Failures are: NULL in either column OR comparison is FALSE
96
+ comparison_expr = compare_fn(left_col, right_col)
97
+ mask_expr = (
98
+ left_col.is_null()
99
+ | right_col.is_null()
100
+ | ~comparison_expr
101
+ )
102
+
103
+ # Evaluate the expression to get a Series
104
+ mask = df.select(mask_expr.alias("_mask"))["_mask"]
105
+
106
+ op_desc = OP_DESCRIPTIONS[self._op]
107
+ message = f"{self._left} is not {op_desc} {self._right}"
108
+
109
+ res = super()._failures(df, mask, message)
110
+ res["rule_id"] = self.rule_id
111
+
112
+ if res["failed_count"] > 0:
113
+ res["failure_mode"] = str(FailureMode.COMPARISON_FAILED)
114
+ res["details"] = self._explain_failure(df, res["failed_count"])
115
+
116
+ return res
117
+
118
+ def _explain_failure(
119
+ self, df: pl.DataFrame, failed_count: int
120
+ ) -> Dict[str, Any]:
121
+ """Generate detailed failure explanation."""
122
+ total_rows = df.height
123
+ failure_rate = failed_count / total_rows if total_rows > 0 else 0
124
+
125
+ # Count NULLs in each column
126
+ left_nulls = df[self._left].is_null().sum()
127
+ right_nulls = df[self._right].is_null().sum()
128
+
129
+ details: Dict[str, Any] = {
130
+ "failed_count": failed_count,
131
+ "failure_rate": round(failure_rate, 4),
132
+ "total_rows": total_rows,
133
+ "left_column": self._left,
134
+ "right_column": self._right,
135
+ "operator": self._op,
136
+ "left_null_count": int(left_nulls),
137
+ "right_null_count": int(right_nulls),
138
+ }
139
+
140
+ return details
141
+
142
+ def compile_predicate(self) -> Optional[Predicate]:
143
+ left_col = pl.col(self._left)
144
+ right_col = pl.col(self._right)
145
+
146
+ compare_fn = POLARS_OP_MAP[self._op]
147
+ comparison_expr = compare_fn(left_col, right_col)
148
+
149
+ # Violation mask: NULL in either column OR comparison is FALSE
150
+ expr = (
151
+ left_col.is_null()
152
+ | right_col.is_null()
153
+ | ~comparison_expr
154
+ )
155
+
156
+ op_desc = OP_DESCRIPTIONS[self._op]
157
+ message = f"{self._left} is not {op_desc} {self._right}"
158
+
159
+ return Predicate(
160
+ rule_id=self.rule_id,
161
+ expr=expr,
162
+ message=message,
163
+ columns={self._left, self._right},
164
+ )
165
+
166
+ def to_sql_spec(self) -> Optional[Dict[str, Any]]:
167
+ """Return SQL spec for SQL pushdown executors."""
168
+ return {
169
+ "kind": "compare",
170
+ "rule_id": self.rule_id,
171
+ "left": self._left,
172
+ "right": self._right,
173
+ "op": self._op,
174
+ }
175
+
176
+ def to_sql_filter(self, dialect: str = "postgres") -> str | None:
177
+ left = f'"{self._left}"'
178
+ right = f'"{self._right}"'
179
+
180
+ # Map Python operators to SQL
181
+ sql_op = self._op
182
+ if sql_op == "==":
183
+ sql_op = "="
184
+ elif sql_op == "!=":
185
+ sql_op = "<>"
186
+
187
+ # Failures: NULL in either column OR comparison is FALSE
188
+ return f"{left} IS NULL OR {right} IS NULL OR NOT ({left} {sql_op} {right})"
@@ -0,0 +1,213 @@
1
+ # src/kontra/rules/builtin/conditional_not_null.py
2
+ """
3
+ Conditional not-null rule - Column must not be NULL when a condition is met.
4
+
5
+ Usage:
6
+ - name: conditional_not_null
7
+ params:
8
+ column: shipping_date
9
+ when: "status == 'shipped'"
10
+
11
+ Fails when:
12
+ - The `when` condition is TRUE AND the `column` is NULL
13
+
14
+ Passes when:
15
+ - The `when` condition is FALSE (regardless of column value)
16
+ - The `when` condition is TRUE AND the `column` is NOT NULL
17
+ """
18
+ from __future__ import annotations
19
+
20
+ from typing import Any, Dict, List, Optional, Set
21
+
22
+ import polars as pl
23
+
24
+ from kontra.rules.base import BaseRule
25
+ from kontra.rules.registry import register_rule
26
+ from kontra.rules.predicates import Predicate
27
+ from kontra.rules.condition_parser import parse_condition, ConditionParseError
28
+ from kontra.state.types import FailureMode
29
+
30
+
31
+ # Map operators to Polars comparison methods
32
+ POLARS_OP_MAP = {
33
+ "==": pl.Expr.__eq__,
34
+ "!=": pl.Expr.__ne__,
35
+ ">": pl.Expr.__gt__,
36
+ ">=": pl.Expr.__ge__,
37
+ "<": pl.Expr.__lt__,
38
+ "<=": pl.Expr.__le__,
39
+ }
40
+
41
+
42
+ @register_rule("conditional_not_null")
43
+ class ConditionalNotNullRule(BaseRule):
44
+ """
45
+ Fails where column is NULL when a condition is met.
46
+
47
+ params:
48
+ - column: str (required) - Column that must not be null
49
+ - when: str (required) - Condition expression (e.g., "status == 'shipped'")
50
+
51
+ Condition syntax:
52
+ column_name operator value
53
+
54
+ Supported operators: ==, !=, >, >=, <, <=
55
+ Supported values: 'string', 123, 123.45, true, false, null
56
+
57
+ Examples:
58
+ - status == 'shipped'
59
+ - amount > 0
60
+ - is_active == true
61
+ """
62
+
63
+ def __init__(self, name: str, params: Dict[str, Any]):
64
+ super().__init__(name, params)
65
+ # Validate parameters at construction time
66
+ self._column = self._get_required_param("column", str)
67
+ self._when_expr = self._get_required_param("when", str)
68
+
69
+ # Parse the when expression at init time to fail early
70
+ try:
71
+ self._when_column, self._when_op, self._when_value = parse_condition(self._when_expr)
72
+ except ConditionParseError as e:
73
+ raise ValueError(f"Rule 'conditional_not_null' invalid 'when' expression: {e}") from e
74
+
75
+ def required_columns(self) -> Set[str]:
76
+ return {self._column, self._when_column}
77
+
78
+ def _build_condition_expr(self) -> pl.Expr:
79
+ """Build the Polars expression for the when condition."""
80
+ when_col = pl.col(self._when_column)
81
+ compare_fn = POLARS_OP_MAP[self._when_op]
82
+
83
+ # Handle NULL value in condition
84
+ if self._when_value is None:
85
+ if self._when_op == "==":
86
+ return when_col.is_null()
87
+ elif self._when_op == "!=":
88
+ return when_col.is_not_null()
89
+ else:
90
+ # Other operators with NULL don't make sense; treat as always false
91
+ return pl.lit(False)
92
+
93
+ # Build comparison expression
94
+ return compare_fn(when_col, self._when_value)
95
+
96
+ def validate(self, df: pl.DataFrame) -> Dict[str, Any]:
97
+ # Check columns exist before accessing
98
+ col_check = self._check_columns(df, {self._column, self._when_column})
99
+ if col_check is not None:
100
+ return col_check
101
+
102
+ # Build condition expression
103
+ condition_expr = self._build_condition_expr()
104
+
105
+ # Mask: True = failure
106
+ # Failure = condition is TRUE AND column is NULL
107
+ mask_expr = condition_expr & pl.col(self._column).is_null()
108
+
109
+ # Evaluate the expression to get a Series
110
+ mask = df.select(mask_expr.alias("_mask"))["_mask"]
111
+
112
+ message = f"{self._column} is null when {self._when_expr}"
113
+
114
+ res = super()._failures(df, mask, message)
115
+ res["rule_id"] = self.rule_id
116
+
117
+ if res["failed_count"] > 0:
118
+ res["failure_mode"] = str(FailureMode.CONDITIONAL_NULL)
119
+ res["details"] = self._explain_failure(df, mask, res["failed_count"])
120
+
121
+ return res
122
+
123
+ def _explain_failure(
124
+ self, df: pl.DataFrame, mask: pl.Series, failed_count: int
125
+ ) -> Dict[str, Any]:
126
+ """Generate detailed failure explanation."""
127
+ total_rows = df.height
128
+ failure_rate = failed_count / total_rows if total_rows > 0 else 0
129
+
130
+ # Count rows matching the condition
131
+ condition_expr = self._build_condition_expr()
132
+ condition_matches = df.select(condition_expr.sum())[0, 0]
133
+
134
+ details: Dict[str, Any] = {
135
+ "failed_count": failed_count,
136
+ "failure_rate": round(failure_rate, 4),
137
+ "total_rows": total_rows,
138
+ "column": self._column,
139
+ "when_condition": self._when_expr,
140
+ "rows_matching_condition": int(condition_matches) if condition_matches else 0,
141
+ }
142
+
143
+ # Sample failing row positions (first 5)
144
+ if failed_count > 0 and failed_count <= 1000:
145
+ positions: List[int] = []
146
+ for i, val in enumerate(mask):
147
+ if val:
148
+ positions.append(i)
149
+ if len(positions) >= 5:
150
+ break
151
+ if positions:
152
+ details["sample_positions"] = positions
153
+
154
+ return details
155
+
156
+ def compile_predicate(self) -> Optional[Predicate]:
157
+ # Build condition expression
158
+ condition_expr = self._build_condition_expr()
159
+
160
+ # Mask: condition is TRUE AND column is NULL
161
+ expr = condition_expr & pl.col(self._column).is_null()
162
+
163
+ message = f"{self._column} is null when {self._when_expr}"
164
+
165
+ return Predicate(
166
+ rule_id=self.rule_id,
167
+ expr=expr,
168
+ message=message,
169
+ columns={self._column, self._when_column},
170
+ )
171
+
172
+ def to_sql_spec(self) -> Optional[Dict[str, Any]]:
173
+ """Return SQL spec for SQL pushdown executors."""
174
+ return {
175
+ "kind": "conditional_not_null",
176
+ "rule_id": self.rule_id,
177
+ "column": self._column,
178
+ "when_column": self._when_column,
179
+ "when_op": self._when_op,
180
+ "when_value": self._when_value,
181
+ }
182
+
183
+ def to_sql_filter(self, dialect: str = "postgres") -> str | None:
184
+ col = f'"{self._column}"'
185
+ when_col = f'"{self._when_column}"'
186
+
187
+ # Map operators
188
+ sql_op = self._when_op
189
+ if sql_op == "==":
190
+ sql_op = "="
191
+ elif sql_op == "!=":
192
+ sql_op = "<>"
193
+
194
+ # Format the value
195
+ if self._when_value is None:
196
+ # Special handling for NULL comparison
197
+ if sql_op == "=":
198
+ condition = f"{when_col} IS NULL"
199
+ elif sql_op == "<>":
200
+ condition = f"{when_col} IS NOT NULL"
201
+ else:
202
+ return None # Can't compare with NULL using < > etc.
203
+ elif isinstance(self._when_value, str):
204
+ escaped = self._when_value.replace("'", "''")
205
+ condition = f"{when_col} {sql_op} '{escaped}'"
206
+ elif isinstance(self._when_value, bool):
207
+ val = "TRUE" if self._when_value else "FALSE"
208
+ condition = f"{when_col} {sql_op} {val}"
209
+ else:
210
+ condition = f"{when_col} {sql_op} {self._when_value}"
211
+
212
+ # Failure = condition is TRUE AND column is NULL
213
+ return f"({condition}) AND {col} IS NULL"