kontra 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kontra/__init__.py +1871 -0
- kontra/api/__init__.py +22 -0
- kontra/api/compare.py +340 -0
- kontra/api/decorators.py +153 -0
- kontra/api/results.py +2121 -0
- kontra/api/rules.py +681 -0
- kontra/cli/__init__.py +0 -0
- kontra/cli/commands/__init__.py +1 -0
- kontra/cli/commands/config.py +153 -0
- kontra/cli/commands/diff.py +450 -0
- kontra/cli/commands/history.py +196 -0
- kontra/cli/commands/profile.py +289 -0
- kontra/cli/commands/validate.py +468 -0
- kontra/cli/constants.py +6 -0
- kontra/cli/main.py +48 -0
- kontra/cli/renderers.py +304 -0
- kontra/cli/utils.py +28 -0
- kontra/config/__init__.py +34 -0
- kontra/config/loader.py +127 -0
- kontra/config/models.py +49 -0
- kontra/config/settings.py +797 -0
- kontra/connectors/__init__.py +0 -0
- kontra/connectors/db_utils.py +251 -0
- kontra/connectors/detection.py +323 -0
- kontra/connectors/handle.py +368 -0
- kontra/connectors/postgres.py +127 -0
- kontra/connectors/sqlserver.py +226 -0
- kontra/engine/__init__.py +0 -0
- kontra/engine/backends/duckdb_session.py +227 -0
- kontra/engine/backends/duckdb_utils.py +18 -0
- kontra/engine/backends/polars_backend.py +47 -0
- kontra/engine/engine.py +1205 -0
- kontra/engine/executors/__init__.py +15 -0
- kontra/engine/executors/base.py +50 -0
- kontra/engine/executors/database_base.py +528 -0
- kontra/engine/executors/duckdb_sql.py +607 -0
- kontra/engine/executors/postgres_sql.py +162 -0
- kontra/engine/executors/registry.py +69 -0
- kontra/engine/executors/sqlserver_sql.py +163 -0
- kontra/engine/materializers/__init__.py +14 -0
- kontra/engine/materializers/base.py +42 -0
- kontra/engine/materializers/duckdb.py +110 -0
- kontra/engine/materializers/factory.py +22 -0
- kontra/engine/materializers/polars_connector.py +131 -0
- kontra/engine/materializers/postgres.py +157 -0
- kontra/engine/materializers/registry.py +138 -0
- kontra/engine/materializers/sqlserver.py +160 -0
- kontra/engine/result.py +15 -0
- kontra/engine/sql_utils.py +611 -0
- kontra/engine/sql_validator.py +609 -0
- kontra/engine/stats.py +194 -0
- kontra/engine/types.py +138 -0
- kontra/errors.py +533 -0
- kontra/logging.py +85 -0
- kontra/preplan/__init__.py +5 -0
- kontra/preplan/planner.py +253 -0
- kontra/preplan/postgres.py +179 -0
- kontra/preplan/sqlserver.py +191 -0
- kontra/preplan/types.py +24 -0
- kontra/probes/__init__.py +20 -0
- kontra/probes/compare.py +400 -0
- kontra/probes/relationship.py +283 -0
- kontra/reporters/__init__.py +0 -0
- kontra/reporters/json_reporter.py +190 -0
- kontra/reporters/rich_reporter.py +11 -0
- kontra/rules/__init__.py +35 -0
- kontra/rules/base.py +186 -0
- kontra/rules/builtin/__init__.py +40 -0
- kontra/rules/builtin/allowed_values.py +156 -0
- kontra/rules/builtin/compare.py +188 -0
- kontra/rules/builtin/conditional_not_null.py +213 -0
- kontra/rules/builtin/conditional_range.py +310 -0
- kontra/rules/builtin/contains.py +138 -0
- kontra/rules/builtin/custom_sql_check.py +182 -0
- kontra/rules/builtin/disallowed_values.py +140 -0
- kontra/rules/builtin/dtype.py +203 -0
- kontra/rules/builtin/ends_with.py +129 -0
- kontra/rules/builtin/freshness.py +240 -0
- kontra/rules/builtin/length.py +193 -0
- kontra/rules/builtin/max_rows.py +35 -0
- kontra/rules/builtin/min_rows.py +46 -0
- kontra/rules/builtin/not_null.py +121 -0
- kontra/rules/builtin/range.py +222 -0
- kontra/rules/builtin/regex.py +143 -0
- kontra/rules/builtin/starts_with.py +129 -0
- kontra/rules/builtin/unique.py +124 -0
- kontra/rules/condition_parser.py +203 -0
- kontra/rules/execution_plan.py +455 -0
- kontra/rules/factory.py +103 -0
- kontra/rules/predicates.py +25 -0
- kontra/rules/registry.py +24 -0
- kontra/rules/static_predicates.py +120 -0
- kontra/scout/__init__.py +9 -0
- kontra/scout/backends/__init__.py +17 -0
- kontra/scout/backends/base.py +111 -0
- kontra/scout/backends/duckdb_backend.py +359 -0
- kontra/scout/backends/postgres_backend.py +519 -0
- kontra/scout/backends/sqlserver_backend.py +577 -0
- kontra/scout/dtype_mapping.py +150 -0
- kontra/scout/patterns.py +69 -0
- kontra/scout/profiler.py +801 -0
- kontra/scout/reporters/__init__.py +39 -0
- kontra/scout/reporters/json_reporter.py +165 -0
- kontra/scout/reporters/markdown_reporter.py +152 -0
- kontra/scout/reporters/rich_reporter.py +144 -0
- kontra/scout/store.py +208 -0
- kontra/scout/suggest.py +200 -0
- kontra/scout/types.py +652 -0
- kontra/state/__init__.py +29 -0
- kontra/state/backends/__init__.py +79 -0
- kontra/state/backends/base.py +348 -0
- kontra/state/backends/local.py +480 -0
- kontra/state/backends/postgres.py +1010 -0
- kontra/state/backends/s3.py +543 -0
- kontra/state/backends/sqlserver.py +969 -0
- kontra/state/fingerprint.py +166 -0
- kontra/state/types.py +1061 -0
- kontra/version.py +1 -0
- kontra-0.5.2.dist-info/METADATA +122 -0
- kontra-0.5.2.dist-info/RECORD +124 -0
- kontra-0.5.2.dist-info/WHEEL +5 -0
- kontra-0.5.2.dist-info/entry_points.txt +2 -0
- kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
- kontra-0.5.2.dist-info/top_level.txt +1 -0
kontra/api/rules.py
ADDED
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
# src/kontra/api/rules.py
|
|
2
|
+
"""
|
|
3
|
+
Rule helper functions for inline rule definitions.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
from kontra import rules
|
|
7
|
+
|
|
8
|
+
result = kontra.validate(df, rules=[
|
|
9
|
+
rules.not_null("user_id"),
|
|
10
|
+
rules.unique("email"),
|
|
11
|
+
rules.range("age", min=0, max=150),
|
|
12
|
+
])
|
|
13
|
+
|
|
14
|
+
# Multiple rules on same column with custom IDs:
|
|
15
|
+
result = kontra.validate(df, rules=[
|
|
16
|
+
rules.range("score", min=0, max=100, id="score_full_range"),
|
|
17
|
+
rules.range("score", min=80, max=100, id="score_strict_range"),
|
|
18
|
+
])
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import Any, Dict, List, Optional, Union
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _validate_column(column: Any, rule_name: str) -> str:
|
|
25
|
+
"""Validate that column is a non-empty string."""
|
|
26
|
+
if column is None:
|
|
27
|
+
raise ValueError(f"{rule_name}() requires a column name, got None")
|
|
28
|
+
if not isinstance(column, str):
|
|
29
|
+
raise ValueError(f"{rule_name}() column must be a string, got {type(column).__name__}")
|
|
30
|
+
if not column.strip():
|
|
31
|
+
raise ValueError(f"{rule_name}() column name cannot be empty")
|
|
32
|
+
return column
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _build_rule(
|
|
36
|
+
name: str,
|
|
37
|
+
params: Dict[str, Any],
|
|
38
|
+
severity: str,
|
|
39
|
+
id: Optional[str] = None,
|
|
40
|
+
context: Optional[Dict[str, Any]] = None,
|
|
41
|
+
) -> Dict[str, Any]:
|
|
42
|
+
"""Build a rule dict, optionally with custom id and context."""
|
|
43
|
+
rule: Dict[str, Any] = {
|
|
44
|
+
"name": name,
|
|
45
|
+
"params": params,
|
|
46
|
+
"severity": severity,
|
|
47
|
+
}
|
|
48
|
+
if id is not None:
|
|
49
|
+
rule["id"] = id
|
|
50
|
+
if context is not None:
|
|
51
|
+
rule["context"] = context
|
|
52
|
+
return rule
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def not_null(
|
|
56
|
+
column: str,
|
|
57
|
+
severity: str = "blocking",
|
|
58
|
+
include_nan: bool = False,
|
|
59
|
+
id: Optional[str] = None,
|
|
60
|
+
context: Optional[Dict[str, Any]] = None,
|
|
61
|
+
) -> Dict[str, Any]:
|
|
62
|
+
"""
|
|
63
|
+
Column must not contain null values.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
column: Column name to check
|
|
67
|
+
severity: "blocking" | "warning" | "info"
|
|
68
|
+
include_nan: If True, also treat NaN as null (default: False)
|
|
69
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
70
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
71
|
+
|
|
72
|
+
Note:
|
|
73
|
+
By default, NaN values are NOT considered null (Polars behavior).
|
|
74
|
+
Set include_nan=True to catch both NULL and NaN values in float columns.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Rule dict for use with kontra.validate()
|
|
78
|
+
"""
|
|
79
|
+
_validate_column(column, "not_null")
|
|
80
|
+
params: Dict[str, Any] = {"column": column}
|
|
81
|
+
if include_nan:
|
|
82
|
+
params["include_nan"] = True
|
|
83
|
+
|
|
84
|
+
return _build_rule("not_null", params, severity, id, context)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def unique(
|
|
88
|
+
column: str,
|
|
89
|
+
severity: str = "blocking",
|
|
90
|
+
id: Optional[str] = None,
|
|
91
|
+
context: Optional[Dict[str, Any]] = None,
|
|
92
|
+
) -> Dict[str, Any]:
|
|
93
|
+
"""
|
|
94
|
+
Column values must be unique (no duplicates).
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
column: Column name to check
|
|
98
|
+
severity: "blocking" | "warning" | "info"
|
|
99
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
100
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Rule dict for use with kontra.validate()
|
|
104
|
+
"""
|
|
105
|
+
_validate_column(column, "unique")
|
|
106
|
+
return _build_rule("unique", {"column": column}, severity, id, context)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def dtype(
|
|
110
|
+
column: str,
|
|
111
|
+
type: str,
|
|
112
|
+
severity: str = "blocking",
|
|
113
|
+
id: Optional[str] = None,
|
|
114
|
+
context: Optional[Dict[str, Any]] = None,
|
|
115
|
+
) -> Dict[str, Any]:
|
|
116
|
+
"""
|
|
117
|
+
Column must have the specified data type.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
column: Column name to check
|
|
121
|
+
type: Expected type (int64, float64, string, datetime, bool, etc.)
|
|
122
|
+
severity: "blocking" | "warning" | "info"
|
|
123
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
124
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Rule dict for use with kontra.validate()
|
|
128
|
+
"""
|
|
129
|
+
_validate_column(column, "dtype")
|
|
130
|
+
return _build_rule("dtype", {"column": column, "type": type}, severity, id, context)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def range(
|
|
134
|
+
column: str,
|
|
135
|
+
min: Optional[Union[int, float]] = None,
|
|
136
|
+
max: Optional[Union[int, float]] = None,
|
|
137
|
+
severity: str = "blocking",
|
|
138
|
+
id: Optional[str] = None,
|
|
139
|
+
context: Optional[Dict[str, Any]] = None,
|
|
140
|
+
) -> Dict[str, Any]:
|
|
141
|
+
"""
|
|
142
|
+
Column values must be within the specified range.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
column: Column name to check
|
|
146
|
+
min: Minimum allowed value (inclusive)
|
|
147
|
+
max: Maximum allowed value (inclusive)
|
|
148
|
+
severity: "blocking" | "warning" | "info"
|
|
149
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
150
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Rule dict for use with kontra.validate()
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
ValueError: If neither min nor max is provided, or if min > max
|
|
157
|
+
"""
|
|
158
|
+
_validate_column(column, "range")
|
|
159
|
+
|
|
160
|
+
# Validate at least one bound is provided
|
|
161
|
+
if min is None and max is None:
|
|
162
|
+
raise ValueError("range rule: at least one of 'min' or 'max' must be provided")
|
|
163
|
+
|
|
164
|
+
# Validate min <= max
|
|
165
|
+
if min is not None and max is not None and min > max:
|
|
166
|
+
raise ValueError(f"range rule: min ({min}) must be <= max ({max})")
|
|
167
|
+
|
|
168
|
+
params: Dict[str, Any] = {"column": column}
|
|
169
|
+
if min is not None:
|
|
170
|
+
params["min"] = min
|
|
171
|
+
if max is not None:
|
|
172
|
+
params["max"] = max
|
|
173
|
+
|
|
174
|
+
return _build_rule("range", params, severity, id, context)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def allowed_values(
|
|
178
|
+
column: str,
|
|
179
|
+
values: List[Any],
|
|
180
|
+
severity: str = "blocking",
|
|
181
|
+
id: Optional[str] = None,
|
|
182
|
+
context: Optional[Dict[str, Any]] = None,
|
|
183
|
+
) -> Dict[str, Any]:
|
|
184
|
+
"""
|
|
185
|
+
Column values must be in the allowed set.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
column: Column name to check
|
|
189
|
+
values: List of allowed values
|
|
190
|
+
severity: "blocking" | "warning" | "info"
|
|
191
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
192
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Rule dict for use with kontra.validate()
|
|
196
|
+
"""
|
|
197
|
+
_validate_column(column, "allowed_values")
|
|
198
|
+
return _build_rule("allowed_values", {"column": column, "values": values}, severity, id, context)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def regex(
|
|
202
|
+
column: str,
|
|
203
|
+
pattern: str,
|
|
204
|
+
severity: str = "blocking",
|
|
205
|
+
id: Optional[str] = None,
|
|
206
|
+
context: Optional[Dict[str, Any]] = None,
|
|
207
|
+
) -> Dict[str, Any]:
|
|
208
|
+
"""
|
|
209
|
+
Column values must match the regex pattern.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
column: Column name to check
|
|
213
|
+
pattern: Regular expression pattern
|
|
214
|
+
severity: "blocking" | "warning" | "info"
|
|
215
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
216
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Rule dict for use with kontra.validate()
|
|
220
|
+
"""
|
|
221
|
+
_validate_column(column, "regex")
|
|
222
|
+
return _build_rule("regex", {"column": column, "pattern": pattern}, severity, id, context)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def min_rows(
|
|
226
|
+
threshold: int,
|
|
227
|
+
severity: str = "blocking",
|
|
228
|
+
id: Optional[str] = None,
|
|
229
|
+
context: Optional[Dict[str, Any]] = None,
|
|
230
|
+
) -> Dict[str, Any]:
|
|
231
|
+
"""
|
|
232
|
+
Dataset must have at least this many rows.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
threshold: Minimum row count (must be >= 0)
|
|
236
|
+
severity: "blocking" | "warning" | "info"
|
|
237
|
+
id: Custom rule ID (use when applying multiple rules)
|
|
238
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Rule dict for use with kontra.validate()
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
ValueError: If threshold is negative or not an integer
|
|
245
|
+
"""
|
|
246
|
+
if not isinstance(threshold, int) or isinstance(threshold, bool):
|
|
247
|
+
raise ValueError(f"min_rows() threshold must be an integer, got {type(threshold).__name__}")
|
|
248
|
+
if threshold < 0:
|
|
249
|
+
raise ValueError(f"min_rows threshold must be non-negative, got {threshold}")
|
|
250
|
+
|
|
251
|
+
return _build_rule("min_rows", {"threshold": threshold}, severity, id, context)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def max_rows(
|
|
255
|
+
threshold: int,
|
|
256
|
+
severity: str = "blocking",
|
|
257
|
+
id: Optional[str] = None,
|
|
258
|
+
context: Optional[Dict[str, Any]] = None,
|
|
259
|
+
) -> Dict[str, Any]:
|
|
260
|
+
"""
|
|
261
|
+
Dataset must have at most this many rows.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
threshold: Maximum row count (must be a non-negative integer)
|
|
265
|
+
severity: "blocking" | "warning" | "info"
|
|
266
|
+
id: Custom rule ID (use when applying multiple rules)
|
|
267
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Rule dict for use with kontra.validate()
|
|
271
|
+
|
|
272
|
+
Raises:
|
|
273
|
+
ValueError: If threshold is negative or not an integer
|
|
274
|
+
"""
|
|
275
|
+
if not isinstance(threshold, int) or isinstance(threshold, bool):
|
|
276
|
+
raise ValueError(f"max_rows() threshold must be an integer, got {type(threshold).__name__}")
|
|
277
|
+
if threshold < 0:
|
|
278
|
+
raise ValueError(f"max_rows threshold must be non-negative, got {threshold}")
|
|
279
|
+
return _build_rule("max_rows", {"threshold": threshold}, severity, id, context)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def freshness(
|
|
283
|
+
column: str,
|
|
284
|
+
max_age: str,
|
|
285
|
+
severity: str = "blocking",
|
|
286
|
+
id: Optional[str] = None,
|
|
287
|
+
context: Optional[Dict[str, Any]] = None,
|
|
288
|
+
) -> Dict[str, Any]:
|
|
289
|
+
"""
|
|
290
|
+
Column timestamp must be within max_age of now.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
column: Datetime column to check
|
|
294
|
+
max_age: Maximum age (e.g., "24h", "7d", "1w")
|
|
295
|
+
severity: "blocking" | "warning" | "info"
|
|
296
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
297
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Rule dict for use with kontra.validate()
|
|
301
|
+
|
|
302
|
+
Raises:
|
|
303
|
+
ValueError: If max_age is invalid or not provided
|
|
304
|
+
"""
|
|
305
|
+
_validate_column(column, "freshness")
|
|
306
|
+
|
|
307
|
+
# Validate max_age format
|
|
308
|
+
if max_age is None:
|
|
309
|
+
raise ValueError("freshness() requires max_age parameter")
|
|
310
|
+
if not isinstance(max_age, str):
|
|
311
|
+
raise ValueError(f"freshness() max_age must be a string, got {type(max_age).__name__}")
|
|
312
|
+
|
|
313
|
+
# Validate max_age is parseable
|
|
314
|
+
from kontra.rules.builtin.freshness import parse_duration
|
|
315
|
+
try:
|
|
316
|
+
parse_duration(max_age)
|
|
317
|
+
except ValueError as e:
|
|
318
|
+
raise ValueError(f"freshness() invalid max_age: {e}") from None
|
|
319
|
+
|
|
320
|
+
return _build_rule("freshness", {"column": column, "max_age": max_age}, severity, id, context)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def custom_sql_check(
|
|
324
|
+
sql: str,
|
|
325
|
+
threshold: int = 0,
|
|
326
|
+
severity: str = "blocking",
|
|
327
|
+
id: Optional[str] = None,
|
|
328
|
+
context: Optional[Dict[str, Any]] = None,
|
|
329
|
+
) -> Dict[str, Any]:
|
|
330
|
+
"""
|
|
331
|
+
Custom SQL check must return at most `threshold` rows.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
sql: SQL query that returns rows that violate the rule
|
|
335
|
+
threshold: Maximum allowed violations (default: 0)
|
|
336
|
+
severity: "blocking" | "warning" | "info"
|
|
337
|
+
id: Custom rule ID (use when applying multiple custom checks)
|
|
338
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
Rule dict for use with kontra.validate()
|
|
342
|
+
"""
|
|
343
|
+
return _build_rule("custom_sql_check", {"sql": sql, "threshold": threshold}, severity, id, context)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def compare(
|
|
347
|
+
left: str,
|
|
348
|
+
right: str,
|
|
349
|
+
op: str,
|
|
350
|
+
severity: str = "blocking",
|
|
351
|
+
id: Optional[str] = None,
|
|
352
|
+
context: Optional[Dict[str, Any]] = None,
|
|
353
|
+
) -> Dict[str, Any]:
|
|
354
|
+
"""
|
|
355
|
+
Compare two columns using a comparison operator.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
left: Left column name
|
|
359
|
+
right: Right column name
|
|
360
|
+
op: Comparison operator: ">", ">=", "<", "<=", "==", "!="
|
|
361
|
+
severity: "blocking" | "warning" | "info"
|
|
362
|
+
id: Custom rule ID (use when applying multiple compare rules)
|
|
363
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
364
|
+
|
|
365
|
+
Note:
|
|
366
|
+
Rows where either column is NULL are counted as failures.
|
|
367
|
+
You cannot meaningfully compare NULL values.
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
Rule dict for use with kontra.validate()
|
|
371
|
+
|
|
372
|
+
Example:
|
|
373
|
+
# Ensure end_date >= start_date
|
|
374
|
+
rules.compare("end_date", "start_date", ">=")
|
|
375
|
+
"""
|
|
376
|
+
_validate_column(left, "compare (left)")
|
|
377
|
+
_validate_column(right, "compare (right)")
|
|
378
|
+
return _build_rule("compare", {"left": left, "right": right, "op": op}, severity, id, context)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def conditional_not_null(
|
|
382
|
+
column: str,
|
|
383
|
+
when: str,
|
|
384
|
+
severity: str = "blocking",
|
|
385
|
+
id: Optional[str] = None,
|
|
386
|
+
context: Optional[Dict[str, Any]] = None,
|
|
387
|
+
) -> Dict[str, Any]:
|
|
388
|
+
"""
|
|
389
|
+
Column must not be NULL when a condition is met.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
column: Column that must not be null
|
|
393
|
+
when: Condition expression (e.g., "status == 'shipped'")
|
|
394
|
+
severity: "blocking" | "warning" | "info"
|
|
395
|
+
id: Custom rule ID (use when applying multiple rules)
|
|
396
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
397
|
+
|
|
398
|
+
Condition syntax:
|
|
399
|
+
column_name operator value
|
|
400
|
+
|
|
401
|
+
Supported operators: ==, !=, >, >=, <, <=
|
|
402
|
+
Supported values: 'string', 123, 123.45, true, false, null
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
Rule dict for use with kontra.validate()
|
|
406
|
+
|
|
407
|
+
Example:
|
|
408
|
+
# shipping_date must not be null when status is 'shipped'
|
|
409
|
+
rules.conditional_not_null("shipping_date", "status == 'shipped'")
|
|
410
|
+
"""
|
|
411
|
+
_validate_column(column, "conditional_not_null")
|
|
412
|
+
return _build_rule("conditional_not_null", {"column": column, "when": when}, severity, id, context)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def conditional_range(
|
|
416
|
+
column: str,
|
|
417
|
+
when: str,
|
|
418
|
+
min: Optional[Union[int, float]] = None,
|
|
419
|
+
max: Optional[Union[int, float]] = None,
|
|
420
|
+
severity: str = "blocking",
|
|
421
|
+
id: Optional[str] = None,
|
|
422
|
+
context: Optional[Dict[str, Any]] = None,
|
|
423
|
+
) -> Dict[str, Any]:
|
|
424
|
+
"""
|
|
425
|
+
Column must be within range when a condition is met.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
column: Column to check range
|
|
429
|
+
when: Condition expression (e.g., "customer_type == 'premium'")
|
|
430
|
+
min: Minimum allowed value (inclusive)
|
|
431
|
+
max: Maximum allowed value (inclusive)
|
|
432
|
+
severity: "blocking" | "warning" | "info"
|
|
433
|
+
id: Custom rule ID (use when applying multiple rules)
|
|
434
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
435
|
+
|
|
436
|
+
At least one of `min` or `max` must be provided.
|
|
437
|
+
|
|
438
|
+
Condition syntax:
|
|
439
|
+
column_name operator value
|
|
440
|
+
|
|
441
|
+
Supported operators: ==, !=, >, >=, <, <=
|
|
442
|
+
Supported values: 'string', 123, 123.45, true, false, null
|
|
443
|
+
|
|
444
|
+
When the condition is TRUE:
|
|
445
|
+
- NULL in column = failure (can't compare NULL)
|
|
446
|
+
- Value outside [min, max] = failure
|
|
447
|
+
|
|
448
|
+
Returns:
|
|
449
|
+
Rule dict for use with kontra.validate()
|
|
450
|
+
|
|
451
|
+
Example:
|
|
452
|
+
# discount_percent must be between 10 and 50 for premium customers
|
|
453
|
+
rules.conditional_range("discount_percent", "customer_type == 'premium'", min=10, max=50)
|
|
454
|
+
"""
|
|
455
|
+
_validate_column(column, "conditional_range")
|
|
456
|
+
params = {"column": column, "when": when}
|
|
457
|
+
if min is not None:
|
|
458
|
+
params["min"] = min
|
|
459
|
+
if max is not None:
|
|
460
|
+
params["max"] = max
|
|
461
|
+
return _build_rule("conditional_range", params, severity, id, context)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def disallowed_values(
|
|
465
|
+
column: str,
|
|
466
|
+
values: List[Any],
|
|
467
|
+
severity: str = "blocking",
|
|
468
|
+
id: Optional[str] = None,
|
|
469
|
+
context: Optional[Dict[str, Any]] = None,
|
|
470
|
+
) -> Dict[str, Any]:
|
|
471
|
+
"""
|
|
472
|
+
Column values must NOT be in the disallowed set.
|
|
473
|
+
|
|
474
|
+
Inverse of allowed_values: fails if value IS in the list.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
column: Column name to check
|
|
478
|
+
values: List of disallowed values
|
|
479
|
+
severity: "blocking" | "warning" | "info"
|
|
480
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
481
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
482
|
+
|
|
483
|
+
Note:
|
|
484
|
+
NULL values are NOT failures (NULL is not in any list).
|
|
485
|
+
|
|
486
|
+
Returns:
|
|
487
|
+
Rule dict for use with kontra.validate()
|
|
488
|
+
|
|
489
|
+
Example:
|
|
490
|
+
rules.disallowed_values("status", ["deleted", "banned", "spam"])
|
|
491
|
+
"""
|
|
492
|
+
_validate_column(column, "disallowed_values")
|
|
493
|
+
return _build_rule("disallowed_values", {"column": column, "values": values}, severity, id, context)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def length(
|
|
497
|
+
column: str,
|
|
498
|
+
min: Optional[int] = None,
|
|
499
|
+
max: Optional[int] = None,
|
|
500
|
+
severity: str = "blocking",
|
|
501
|
+
id: Optional[str] = None,
|
|
502
|
+
context: Optional[Dict[str, Any]] = None,
|
|
503
|
+
) -> Dict[str, Any]:
|
|
504
|
+
"""
|
|
505
|
+
Column string length must be within the specified range.
|
|
506
|
+
|
|
507
|
+
Args:
|
|
508
|
+
column: Column name to check
|
|
509
|
+
min: Minimum length (inclusive)
|
|
510
|
+
max: Maximum length (inclusive)
|
|
511
|
+
severity: "blocking" | "warning" | "info"
|
|
512
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
513
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
514
|
+
|
|
515
|
+
At least one of `min` or `max` must be provided.
|
|
516
|
+
|
|
517
|
+
Note:
|
|
518
|
+
NULL values are failures (can't measure length of NULL).
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
Rule dict for use with kontra.validate()
|
|
522
|
+
|
|
523
|
+
Raises:
|
|
524
|
+
ValueError: If neither min nor max is provided, or if min > max
|
|
525
|
+
|
|
526
|
+
Example:
|
|
527
|
+
rules.length("username", min=3, max=50)
|
|
528
|
+
"""
|
|
529
|
+
_validate_column(column, "length")
|
|
530
|
+
|
|
531
|
+
if min is None and max is None:
|
|
532
|
+
raise ValueError("length rule: at least one of 'min' or 'max' must be provided")
|
|
533
|
+
|
|
534
|
+
if min is not None and max is not None and min > max:
|
|
535
|
+
raise ValueError(f"length rule: min ({min}) must be <= max ({max})")
|
|
536
|
+
|
|
537
|
+
params: Dict[str, Any] = {"column": column}
|
|
538
|
+
if min is not None:
|
|
539
|
+
params["min"] = min
|
|
540
|
+
if max is not None:
|
|
541
|
+
params["max"] = max
|
|
542
|
+
|
|
543
|
+
return _build_rule("length", params, severity, id, context)
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def contains(
|
|
547
|
+
column: str,
|
|
548
|
+
substring: str,
|
|
549
|
+
severity: str = "blocking",
|
|
550
|
+
id: Optional[str] = None,
|
|
551
|
+
context: Optional[Dict[str, Any]] = None,
|
|
552
|
+
) -> Dict[str, Any]:
|
|
553
|
+
"""
|
|
554
|
+
Column values must contain the specified substring.
|
|
555
|
+
|
|
556
|
+
Uses literal substring matching for efficiency.
|
|
557
|
+
For regex patterns, use the `regex` rule instead.
|
|
558
|
+
|
|
559
|
+
Args:
|
|
560
|
+
column: Column name to check
|
|
561
|
+
substring: Substring that must be present
|
|
562
|
+
severity: "blocking" | "warning" | "info"
|
|
563
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
564
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
565
|
+
|
|
566
|
+
Note:
|
|
567
|
+
NULL values are failures (can't search in NULL).
|
|
568
|
+
|
|
569
|
+
Returns:
|
|
570
|
+
Rule dict for use with kontra.validate()
|
|
571
|
+
|
|
572
|
+
Example:
|
|
573
|
+
rules.contains("email", "@")
|
|
574
|
+
"""
|
|
575
|
+
_validate_column(column, "contains")
|
|
576
|
+
if not substring:
|
|
577
|
+
raise ValueError("contains rule: substring cannot be empty")
|
|
578
|
+
return _build_rule("contains", {"column": column, "substring": substring}, severity, id, context)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def starts_with(
|
|
582
|
+
column: str,
|
|
583
|
+
prefix: str,
|
|
584
|
+
severity: str = "blocking",
|
|
585
|
+
id: Optional[str] = None,
|
|
586
|
+
context: Optional[Dict[str, Any]] = None,
|
|
587
|
+
) -> Dict[str, Any]:
|
|
588
|
+
"""
|
|
589
|
+
Column values must start with the specified prefix.
|
|
590
|
+
|
|
591
|
+
Uses LIKE pattern matching for efficiency (faster than regex).
|
|
592
|
+
|
|
593
|
+
Args:
|
|
594
|
+
column: Column name to check
|
|
595
|
+
prefix: Prefix that must be present at the start
|
|
596
|
+
severity: "blocking" | "warning" | "info"
|
|
597
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
598
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
599
|
+
|
|
600
|
+
Note:
|
|
601
|
+
NULL values are failures (can't check NULL).
|
|
602
|
+
|
|
603
|
+
Returns:
|
|
604
|
+
Rule dict for use with kontra.validate()
|
|
605
|
+
|
|
606
|
+
Example:
|
|
607
|
+
rules.starts_with("url", "https://")
|
|
608
|
+
"""
|
|
609
|
+
_validate_column(column, "starts_with")
|
|
610
|
+
if not prefix:
|
|
611
|
+
raise ValueError("starts_with rule: prefix cannot be empty")
|
|
612
|
+
return _build_rule("starts_with", {"column": column, "prefix": prefix}, severity, id, context)
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def ends_with(
|
|
616
|
+
column: str,
|
|
617
|
+
suffix: str,
|
|
618
|
+
severity: str = "blocking",
|
|
619
|
+
id: Optional[str] = None,
|
|
620
|
+
context: Optional[Dict[str, Any]] = None,
|
|
621
|
+
) -> Dict[str, Any]:
|
|
622
|
+
"""
|
|
623
|
+
Column values must end with the specified suffix.
|
|
624
|
+
|
|
625
|
+
Uses LIKE pattern matching for efficiency (faster than regex).
|
|
626
|
+
|
|
627
|
+
Args:
|
|
628
|
+
column: Column name to check
|
|
629
|
+
suffix: Suffix that must be present at the end
|
|
630
|
+
severity: "blocking" | "warning" | "info"
|
|
631
|
+
id: Custom rule ID (use when applying multiple rules to same column)
|
|
632
|
+
context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
|
|
633
|
+
|
|
634
|
+
Note:
|
|
635
|
+
NULL values are failures (can't check NULL).
|
|
636
|
+
|
|
637
|
+
Returns:
|
|
638
|
+
Rule dict for use with kontra.validate()
|
|
639
|
+
|
|
640
|
+
Example:
|
|
641
|
+
rules.ends_with("filename", ".csv")
|
|
642
|
+
"""
|
|
643
|
+
_validate_column(column, "ends_with")
|
|
644
|
+
if not suffix:
|
|
645
|
+
raise ValueError("ends_with rule: suffix cannot be empty")
|
|
646
|
+
return _build_rule("ends_with", {"column": column, "suffix": suffix}, severity, id, context)
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
# Module-level access for `from kontra import rules` then `rules.not_null(...)`
|
|
650
|
+
class _RulesModule:
|
|
651
|
+
"""
|
|
652
|
+
Namespace for rule helper functions.
|
|
653
|
+
|
|
654
|
+
This allows using rules.not_null() syntax.
|
|
655
|
+
"""
|
|
656
|
+
|
|
657
|
+
not_null = staticmethod(not_null)
|
|
658
|
+
unique = staticmethod(unique)
|
|
659
|
+
dtype = staticmethod(dtype)
|
|
660
|
+
range = staticmethod(range)
|
|
661
|
+
allowed_values = staticmethod(allowed_values)
|
|
662
|
+
disallowed_values = staticmethod(disallowed_values)
|
|
663
|
+
regex = staticmethod(regex)
|
|
664
|
+
length = staticmethod(length)
|
|
665
|
+
contains = staticmethod(contains)
|
|
666
|
+
starts_with = staticmethod(starts_with)
|
|
667
|
+
ends_with = staticmethod(ends_with)
|
|
668
|
+
min_rows = staticmethod(min_rows)
|
|
669
|
+
max_rows = staticmethod(max_rows)
|
|
670
|
+
freshness = staticmethod(freshness)
|
|
671
|
+
custom_sql_check = staticmethod(custom_sql_check)
|
|
672
|
+
compare = staticmethod(compare)
|
|
673
|
+
conditional_not_null = staticmethod(conditional_not_null)
|
|
674
|
+
conditional_range = staticmethod(conditional_range)
|
|
675
|
+
|
|
676
|
+
def __repr__(self) -> str:
|
|
677
|
+
return "<kontra.rules module>"
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
# Export the module instance
|
|
681
|
+
rules = _RulesModule()
|
kontra/cli/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI command modules."""
|