kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
kontra/api/rules.py ADDED
@@ -0,0 +1,681 @@
1
+ # src/kontra/api/rules.py
2
+ """
3
+ Rule helper functions for inline rule definitions.
4
+
5
+ Usage:
6
+ from kontra import rules
7
+
8
+ result = kontra.validate(df, rules=[
9
+ rules.not_null("user_id"),
10
+ rules.unique("email"),
11
+ rules.range("age", min=0, max=150),
12
+ ])
13
+
14
+ # Multiple rules on same column with custom IDs:
15
+ result = kontra.validate(df, rules=[
16
+ rules.range("score", min=0, max=100, id="score_full_range"),
17
+ rules.range("score", min=80, max=100, id="score_strict_range"),
18
+ ])
19
+ """
20
+
21
+ from typing import Any, Dict, List, Optional, Union
22
+
23
+
24
+ def _validate_column(column: Any, rule_name: str) -> str:
25
+ """Validate that column is a non-empty string."""
26
+ if column is None:
27
+ raise ValueError(f"{rule_name}() requires a column name, got None")
28
+ if not isinstance(column, str):
29
+ raise ValueError(f"{rule_name}() column must be a string, got {type(column).__name__}")
30
+ if not column.strip():
31
+ raise ValueError(f"{rule_name}() column name cannot be empty")
32
+ return column
33
+
34
+
35
+ def _build_rule(
36
+ name: str,
37
+ params: Dict[str, Any],
38
+ severity: str,
39
+ id: Optional[str] = None,
40
+ context: Optional[Dict[str, Any]] = None,
41
+ ) -> Dict[str, Any]:
42
+ """Build a rule dict, optionally with custom id and context."""
43
+ rule: Dict[str, Any] = {
44
+ "name": name,
45
+ "params": params,
46
+ "severity": severity,
47
+ }
48
+ if id is not None:
49
+ rule["id"] = id
50
+ if context is not None:
51
+ rule["context"] = context
52
+ return rule
53
+
54
+
55
+ def not_null(
56
+ column: str,
57
+ severity: str = "blocking",
58
+ include_nan: bool = False,
59
+ id: Optional[str] = None,
60
+ context: Optional[Dict[str, Any]] = None,
61
+ ) -> Dict[str, Any]:
62
+ """
63
+ Column must not contain null values.
64
+
65
+ Args:
66
+ column: Column name to check
67
+ severity: "blocking" | "warning" | "info"
68
+ include_nan: If True, also treat NaN as null (default: False)
69
+ id: Custom rule ID (use when applying multiple rules to same column)
70
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
71
+
72
+ Note:
73
+ By default, NaN values are NOT considered null (Polars behavior).
74
+ Set include_nan=True to catch both NULL and NaN values in float columns.
75
+
76
+ Returns:
77
+ Rule dict for use with kontra.validate()
78
+ """
79
+ _validate_column(column, "not_null")
80
+ params: Dict[str, Any] = {"column": column}
81
+ if include_nan:
82
+ params["include_nan"] = True
83
+
84
+ return _build_rule("not_null", params, severity, id, context)
85
+
86
+
87
+ def unique(
88
+ column: str,
89
+ severity: str = "blocking",
90
+ id: Optional[str] = None,
91
+ context: Optional[Dict[str, Any]] = None,
92
+ ) -> Dict[str, Any]:
93
+ """
94
+ Column values must be unique (no duplicates).
95
+
96
+ Args:
97
+ column: Column name to check
98
+ severity: "blocking" | "warning" | "info"
99
+ id: Custom rule ID (use when applying multiple rules to same column)
100
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
101
+
102
+ Returns:
103
+ Rule dict for use with kontra.validate()
104
+ """
105
+ _validate_column(column, "unique")
106
+ return _build_rule("unique", {"column": column}, severity, id, context)
107
+
108
+
109
+ def dtype(
110
+ column: str,
111
+ type: str,
112
+ severity: str = "blocking",
113
+ id: Optional[str] = None,
114
+ context: Optional[Dict[str, Any]] = None,
115
+ ) -> Dict[str, Any]:
116
+ """
117
+ Column must have the specified data type.
118
+
119
+ Args:
120
+ column: Column name to check
121
+ type: Expected type (int64, float64, string, datetime, bool, etc.)
122
+ severity: "blocking" | "warning" | "info"
123
+ id: Custom rule ID (use when applying multiple rules to same column)
124
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
125
+
126
+ Returns:
127
+ Rule dict for use with kontra.validate()
128
+ """
129
+ _validate_column(column, "dtype")
130
+ return _build_rule("dtype", {"column": column, "type": type}, severity, id, context)
131
+
132
+
133
+ def range(
134
+ column: str,
135
+ min: Optional[Union[int, float]] = None,
136
+ max: Optional[Union[int, float]] = None,
137
+ severity: str = "blocking",
138
+ id: Optional[str] = None,
139
+ context: Optional[Dict[str, Any]] = None,
140
+ ) -> Dict[str, Any]:
141
+ """
142
+ Column values must be within the specified range.
143
+
144
+ Args:
145
+ column: Column name to check
146
+ min: Minimum allowed value (inclusive)
147
+ max: Maximum allowed value (inclusive)
148
+ severity: "blocking" | "warning" | "info"
149
+ id: Custom rule ID (use when applying multiple rules to same column)
150
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
151
+
152
+ Returns:
153
+ Rule dict for use with kontra.validate()
154
+
155
+ Raises:
156
+ ValueError: If neither min nor max is provided, or if min > max
157
+ """
158
+ _validate_column(column, "range")
159
+
160
+ # Validate at least one bound is provided
161
+ if min is None and max is None:
162
+ raise ValueError("range rule: at least one of 'min' or 'max' must be provided")
163
+
164
+ # Validate min <= max
165
+ if min is not None and max is not None and min > max:
166
+ raise ValueError(f"range rule: min ({min}) must be <= max ({max})")
167
+
168
+ params: Dict[str, Any] = {"column": column}
169
+ if min is not None:
170
+ params["min"] = min
171
+ if max is not None:
172
+ params["max"] = max
173
+
174
+ return _build_rule("range", params, severity, id, context)
175
+
176
+
177
+ def allowed_values(
178
+ column: str,
179
+ values: List[Any],
180
+ severity: str = "blocking",
181
+ id: Optional[str] = None,
182
+ context: Optional[Dict[str, Any]] = None,
183
+ ) -> Dict[str, Any]:
184
+ """
185
+ Column values must be in the allowed set.
186
+
187
+ Args:
188
+ column: Column name to check
189
+ values: List of allowed values
190
+ severity: "blocking" | "warning" | "info"
191
+ id: Custom rule ID (use when applying multiple rules to same column)
192
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
193
+
194
+ Returns:
195
+ Rule dict for use with kontra.validate()
196
+ """
197
+ _validate_column(column, "allowed_values")
198
+ return _build_rule("allowed_values", {"column": column, "values": values}, severity, id, context)
199
+
200
+
201
+ def regex(
202
+ column: str,
203
+ pattern: str,
204
+ severity: str = "blocking",
205
+ id: Optional[str] = None,
206
+ context: Optional[Dict[str, Any]] = None,
207
+ ) -> Dict[str, Any]:
208
+ """
209
+ Column values must match the regex pattern.
210
+
211
+ Args:
212
+ column: Column name to check
213
+ pattern: Regular expression pattern
214
+ severity: "blocking" | "warning" | "info"
215
+ id: Custom rule ID (use when applying multiple rules to same column)
216
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
217
+
218
+ Returns:
219
+ Rule dict for use with kontra.validate()
220
+ """
221
+ _validate_column(column, "regex")
222
+ return _build_rule("regex", {"column": column, "pattern": pattern}, severity, id, context)
223
+
224
+
225
+ def min_rows(
226
+ threshold: int,
227
+ severity: str = "blocking",
228
+ id: Optional[str] = None,
229
+ context: Optional[Dict[str, Any]] = None,
230
+ ) -> Dict[str, Any]:
231
+ """
232
+ Dataset must have at least this many rows.
233
+
234
+ Args:
235
+ threshold: Minimum row count (must be >= 0)
236
+ severity: "blocking" | "warning" | "info"
237
+ id: Custom rule ID (use when applying multiple rules)
238
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
239
+
240
+ Returns:
241
+ Rule dict for use with kontra.validate()
242
+
243
+ Raises:
244
+ ValueError: If threshold is negative or not an integer
245
+ """
246
+ if not isinstance(threshold, int) or isinstance(threshold, bool):
247
+ raise ValueError(f"min_rows() threshold must be an integer, got {type(threshold).__name__}")
248
+ if threshold < 0:
249
+ raise ValueError(f"min_rows threshold must be non-negative, got {threshold}")
250
+
251
+ return _build_rule("min_rows", {"threshold": threshold}, severity, id, context)
252
+
253
+
254
+ def max_rows(
255
+ threshold: int,
256
+ severity: str = "blocking",
257
+ id: Optional[str] = None,
258
+ context: Optional[Dict[str, Any]] = None,
259
+ ) -> Dict[str, Any]:
260
+ """
261
+ Dataset must have at most this many rows.
262
+
263
+ Args:
264
+ threshold: Maximum row count (must be a non-negative integer)
265
+ severity: "blocking" | "warning" | "info"
266
+ id: Custom rule ID (use when applying multiple rules)
267
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
268
+
269
+ Returns:
270
+ Rule dict for use with kontra.validate()
271
+
272
+ Raises:
273
+ ValueError: If threshold is negative or not an integer
274
+ """
275
+ if not isinstance(threshold, int) or isinstance(threshold, bool):
276
+ raise ValueError(f"max_rows() threshold must be an integer, got {type(threshold).__name__}")
277
+ if threshold < 0:
278
+ raise ValueError(f"max_rows threshold must be non-negative, got {threshold}")
279
+ return _build_rule("max_rows", {"threshold": threshold}, severity, id, context)
280
+
281
+
282
+ def freshness(
283
+ column: str,
284
+ max_age: str,
285
+ severity: str = "blocking",
286
+ id: Optional[str] = None,
287
+ context: Optional[Dict[str, Any]] = None,
288
+ ) -> Dict[str, Any]:
289
+ """
290
+ Column timestamp must be within max_age of now.
291
+
292
+ Args:
293
+ column: Datetime column to check
294
+ max_age: Maximum age (e.g., "24h", "7d", "1w")
295
+ severity: "blocking" | "warning" | "info"
296
+ id: Custom rule ID (use when applying multiple rules to same column)
297
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
298
+
299
+ Returns:
300
+ Rule dict for use with kontra.validate()
301
+
302
+ Raises:
303
+ ValueError: If max_age is invalid or not provided
304
+ """
305
+ _validate_column(column, "freshness")
306
+
307
+ # Validate max_age format
308
+ if max_age is None:
309
+ raise ValueError("freshness() requires max_age parameter")
310
+ if not isinstance(max_age, str):
311
+ raise ValueError(f"freshness() max_age must be a string, got {type(max_age).__name__}")
312
+
313
+ # Validate max_age is parseable
314
+ from kontra.rules.builtin.freshness import parse_duration
315
+ try:
316
+ parse_duration(max_age)
317
+ except ValueError as e:
318
+ raise ValueError(f"freshness() invalid max_age: {e}") from None
319
+
320
+ return _build_rule("freshness", {"column": column, "max_age": max_age}, severity, id, context)
321
+
322
+
323
+ def custom_sql_check(
324
+ sql: str,
325
+ threshold: int = 0,
326
+ severity: str = "blocking",
327
+ id: Optional[str] = None,
328
+ context: Optional[Dict[str, Any]] = None,
329
+ ) -> Dict[str, Any]:
330
+ """
331
+ Custom SQL check must return at most `threshold` rows.
332
+
333
+ Args:
334
+ sql: SQL query that returns rows that violate the rule
335
+ threshold: Maximum allowed violations (default: 0)
336
+ severity: "blocking" | "warning" | "info"
337
+ id: Custom rule ID (use when applying multiple custom checks)
338
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
339
+
340
+ Returns:
341
+ Rule dict for use with kontra.validate()
342
+ """
343
+ return _build_rule("custom_sql_check", {"sql": sql, "threshold": threshold}, severity, id, context)
344
+
345
+
346
+ def compare(
347
+ left: str,
348
+ right: str,
349
+ op: str,
350
+ severity: str = "blocking",
351
+ id: Optional[str] = None,
352
+ context: Optional[Dict[str, Any]] = None,
353
+ ) -> Dict[str, Any]:
354
+ """
355
+ Compare two columns using a comparison operator.
356
+
357
+ Args:
358
+ left: Left column name
359
+ right: Right column name
360
+ op: Comparison operator: ">", ">=", "<", "<=", "==", "!="
361
+ severity: "blocking" | "warning" | "info"
362
+ id: Custom rule ID (use when applying multiple compare rules)
363
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
364
+
365
+ Note:
366
+ Rows where either column is NULL are counted as failures.
367
+ You cannot meaningfully compare NULL values.
368
+
369
+ Returns:
370
+ Rule dict for use with kontra.validate()
371
+
372
+ Example:
373
+ # Ensure end_date >= start_date
374
+ rules.compare("end_date", "start_date", ">=")
375
+ """
376
+ _validate_column(left, "compare (left)")
377
+ _validate_column(right, "compare (right)")
378
+ return _build_rule("compare", {"left": left, "right": right, "op": op}, severity, id, context)
379
+
380
+
381
+ def conditional_not_null(
382
+ column: str,
383
+ when: str,
384
+ severity: str = "blocking",
385
+ id: Optional[str] = None,
386
+ context: Optional[Dict[str, Any]] = None,
387
+ ) -> Dict[str, Any]:
388
+ """
389
+ Column must not be NULL when a condition is met.
390
+
391
+ Args:
392
+ column: Column that must not be null
393
+ when: Condition expression (e.g., "status == 'shipped'")
394
+ severity: "blocking" | "warning" | "info"
395
+ id: Custom rule ID (use when applying multiple rules)
396
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
397
+
398
+ Condition syntax:
399
+ column_name operator value
400
+
401
+ Supported operators: ==, !=, >, >=, <, <=
402
+ Supported values: 'string', 123, 123.45, true, false, null
403
+
404
+ Returns:
405
+ Rule dict for use with kontra.validate()
406
+
407
+ Example:
408
+ # shipping_date must not be null when status is 'shipped'
409
+ rules.conditional_not_null("shipping_date", "status == 'shipped'")
410
+ """
411
+ _validate_column(column, "conditional_not_null")
412
+ return _build_rule("conditional_not_null", {"column": column, "when": when}, severity, id, context)
413
+
414
+
415
+ def conditional_range(
416
+ column: str,
417
+ when: str,
418
+ min: Optional[Union[int, float]] = None,
419
+ max: Optional[Union[int, float]] = None,
420
+ severity: str = "blocking",
421
+ id: Optional[str] = None,
422
+ context: Optional[Dict[str, Any]] = None,
423
+ ) -> Dict[str, Any]:
424
+ """
425
+ Column must be within range when a condition is met.
426
+
427
+ Args:
428
+ column: Column to check range
429
+ when: Condition expression (e.g., "customer_type == 'premium'")
430
+ min: Minimum allowed value (inclusive)
431
+ max: Maximum allowed value (inclusive)
432
+ severity: "blocking" | "warning" | "info"
433
+ id: Custom rule ID (use when applying multiple rules)
434
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
435
+
436
+ At least one of `min` or `max` must be provided.
437
+
438
+ Condition syntax:
439
+ column_name operator value
440
+
441
+ Supported operators: ==, !=, >, >=, <, <=
442
+ Supported values: 'string', 123, 123.45, true, false, null
443
+
444
+ When the condition is TRUE:
445
+ - NULL in column = failure (can't compare NULL)
446
+ - Value outside [min, max] = failure
447
+
448
+ Returns:
449
+ Rule dict for use with kontra.validate()
450
+
451
+ Example:
452
+ # discount_percent must be between 10 and 50 for premium customers
453
+ rules.conditional_range("discount_percent", "customer_type == 'premium'", min=10, max=50)
454
+ """
455
+ _validate_column(column, "conditional_range")
456
+ params = {"column": column, "when": when}
457
+ if min is not None:
458
+ params["min"] = min
459
+ if max is not None:
460
+ params["max"] = max
461
+ return _build_rule("conditional_range", params, severity, id, context)
462
+
463
+
464
+ def disallowed_values(
465
+ column: str,
466
+ values: List[Any],
467
+ severity: str = "blocking",
468
+ id: Optional[str] = None,
469
+ context: Optional[Dict[str, Any]] = None,
470
+ ) -> Dict[str, Any]:
471
+ """
472
+ Column values must NOT be in the disallowed set.
473
+
474
+ Inverse of allowed_values: fails if value IS in the list.
475
+
476
+ Args:
477
+ column: Column name to check
478
+ values: List of disallowed values
479
+ severity: "blocking" | "warning" | "info"
480
+ id: Custom rule ID (use when applying multiple rules to same column)
481
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
482
+
483
+ Note:
484
+ NULL values are NOT failures (NULL is not in any list).
485
+
486
+ Returns:
487
+ Rule dict for use with kontra.validate()
488
+
489
+ Example:
490
+ rules.disallowed_values("status", ["deleted", "banned", "spam"])
491
+ """
492
+ _validate_column(column, "disallowed_values")
493
+ return _build_rule("disallowed_values", {"column": column, "values": values}, severity, id, context)
494
+
495
+
496
+ def length(
497
+ column: str,
498
+ min: Optional[int] = None,
499
+ max: Optional[int] = None,
500
+ severity: str = "blocking",
501
+ id: Optional[str] = None,
502
+ context: Optional[Dict[str, Any]] = None,
503
+ ) -> Dict[str, Any]:
504
+ """
505
+ Column string length must be within the specified range.
506
+
507
+ Args:
508
+ column: Column name to check
509
+ min: Minimum length (inclusive)
510
+ max: Maximum length (inclusive)
511
+ severity: "blocking" | "warning" | "info"
512
+ id: Custom rule ID (use when applying multiple rules to same column)
513
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
514
+
515
+ At least one of `min` or `max` must be provided.
516
+
517
+ Note:
518
+ NULL values are failures (can't measure length of NULL).
519
+
520
+ Returns:
521
+ Rule dict for use with kontra.validate()
522
+
523
+ Raises:
524
+ ValueError: If neither min nor max is provided, or if min > max
525
+
526
+ Example:
527
+ rules.length("username", min=3, max=50)
528
+ """
529
+ _validate_column(column, "length")
530
+
531
+ if min is None and max is None:
532
+ raise ValueError("length rule: at least one of 'min' or 'max' must be provided")
533
+
534
+ if min is not None and max is not None and min > max:
535
+ raise ValueError(f"length rule: min ({min}) must be <= max ({max})")
536
+
537
+ params: Dict[str, Any] = {"column": column}
538
+ if min is not None:
539
+ params["min"] = min
540
+ if max is not None:
541
+ params["max"] = max
542
+
543
+ return _build_rule("length", params, severity, id, context)
544
+
545
+
546
+ def contains(
547
+ column: str,
548
+ substring: str,
549
+ severity: str = "blocking",
550
+ id: Optional[str] = None,
551
+ context: Optional[Dict[str, Any]] = None,
552
+ ) -> Dict[str, Any]:
553
+ """
554
+ Column values must contain the specified substring.
555
+
556
+ Uses literal substring matching for efficiency.
557
+ For regex patterns, use the `regex` rule instead.
558
+
559
+ Args:
560
+ column: Column name to check
561
+ substring: Substring that must be present
562
+ severity: "blocking" | "warning" | "info"
563
+ id: Custom rule ID (use when applying multiple rules to same column)
564
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
565
+
566
+ Note:
567
+ NULL values are failures (can't search in NULL).
568
+
569
+ Returns:
570
+ Rule dict for use with kontra.validate()
571
+
572
+ Example:
573
+ rules.contains("email", "@")
574
+ """
575
+ _validate_column(column, "contains")
576
+ if not substring:
577
+ raise ValueError("contains rule: substring cannot be empty")
578
+ return _build_rule("contains", {"column": column, "substring": substring}, severity, id, context)
579
+
580
+
581
+ def starts_with(
582
+ column: str,
583
+ prefix: str,
584
+ severity: str = "blocking",
585
+ id: Optional[str] = None,
586
+ context: Optional[Dict[str, Any]] = None,
587
+ ) -> Dict[str, Any]:
588
+ """
589
+ Column values must start with the specified prefix.
590
+
591
+ Uses LIKE pattern matching for efficiency (faster than regex).
592
+
593
+ Args:
594
+ column: Column name to check
595
+ prefix: Prefix that must be present at the start
596
+ severity: "blocking" | "warning" | "info"
597
+ id: Custom rule ID (use when applying multiple rules to same column)
598
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
599
+
600
+ Note:
601
+ NULL values are failures (can't check NULL).
602
+
603
+ Returns:
604
+ Rule dict for use with kontra.validate()
605
+
606
+ Example:
607
+ rules.starts_with("url", "https://")
608
+ """
609
+ _validate_column(column, "starts_with")
610
+ if not prefix:
611
+ raise ValueError("starts_with rule: prefix cannot be empty")
612
+ return _build_rule("starts_with", {"column": column, "prefix": prefix}, severity, id, context)
613
+
614
+
615
+ def ends_with(
616
+ column: str,
617
+ suffix: str,
618
+ severity: str = "blocking",
619
+ id: Optional[str] = None,
620
+ context: Optional[Dict[str, Any]] = None,
621
+ ) -> Dict[str, Any]:
622
+ """
623
+ Column values must end with the specified suffix.
624
+
625
+ Uses LIKE pattern matching for efficiency (faster than regex).
626
+
627
+ Args:
628
+ column: Column name to check
629
+ suffix: Suffix that must be present at the end
630
+ severity: "blocking" | "warning" | "info"
631
+ id: Custom rule ID (use when applying multiple rules to same column)
632
+ context: Consumer-defined metadata (owner, tags, fix_hint, etc.)
633
+
634
+ Note:
635
+ NULL values are failures (can't check NULL).
636
+
637
+ Returns:
638
+ Rule dict for use with kontra.validate()
639
+
640
+ Example:
641
+ rules.ends_with("filename", ".csv")
642
+ """
643
+ _validate_column(column, "ends_with")
644
+ if not suffix:
645
+ raise ValueError("ends_with rule: suffix cannot be empty")
646
+ return _build_rule("ends_with", {"column": column, "suffix": suffix}, severity, id, context)
647
+
648
+
649
+ # Module-level access for `from kontra import rules` then `rules.not_null(...)`
650
+ class _RulesModule:
651
+ """
652
+ Namespace for rule helper functions.
653
+
654
+ This allows using rules.not_null() syntax.
655
+ """
656
+
657
+ not_null = staticmethod(not_null)
658
+ unique = staticmethod(unique)
659
+ dtype = staticmethod(dtype)
660
+ range = staticmethod(range)
661
+ allowed_values = staticmethod(allowed_values)
662
+ disallowed_values = staticmethod(disallowed_values)
663
+ regex = staticmethod(regex)
664
+ length = staticmethod(length)
665
+ contains = staticmethod(contains)
666
+ starts_with = staticmethod(starts_with)
667
+ ends_with = staticmethod(ends_with)
668
+ min_rows = staticmethod(min_rows)
669
+ max_rows = staticmethod(max_rows)
670
+ freshness = staticmethod(freshness)
671
+ custom_sql_check = staticmethod(custom_sql_check)
672
+ compare = staticmethod(compare)
673
+ conditional_not_null = staticmethod(conditional_not_null)
674
+ conditional_range = staticmethod(conditional_range)
675
+
676
+ def __repr__(self) -> str:
677
+ return "<kontra.rules module>"
678
+
679
+
680
+ # Export the module instance
681
+ rules = _RulesModule()
kontra/cli/__init__.py ADDED
File without changes
@@ -0,0 +1 @@
1
+ """CLI command modules."""