kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,140 @@
1
+ # src/kontra/rules/builtin/disallowed_values.py
2
+ """
3
+ Disallowed values rule - Column must NOT contain any of the specified values.
4
+
5
+ Inverse of allowed_values: fails if value IS in the list.
6
+
7
+ Usage:
8
+ - name: disallowed_values
9
+ params:
10
+ column: status
11
+ values: ["deleted", "banned", "spam"]
12
+
13
+ Fails when:
14
+ - The column value IS in the disallowed values list
15
+
16
+ Passes when:
17
+ - The column value is NOT in the disallowed values list
18
+ - The column value is NULL (NULL is not in any list)
19
+ """
20
+ from __future__ import annotations
21
+
22
+ from typing import Any, Dict, List, Optional, Sequence, Set
23
+
24
+ import polars as pl
25
+
26
+ from kontra.rules.base import BaseRule
27
+ from kontra.rules.registry import register_rule
28
+ from kontra.rules.predicates import Predicate
29
+ from kontra.state.types import FailureMode
30
+
31
+
32
+ @register_rule("disallowed_values")
33
+ class DisallowedValuesRule(BaseRule):
34
+ """
35
+ Fails where column value IS in the disallowed set.
36
+
37
+ params:
38
+ - column: str (required) - Column to check
39
+ - values: list (required) - Values that are NOT allowed
40
+
41
+ NULL handling:
42
+ - NULL values are NOT failures (NULL is not in any list)
43
+ """
44
+
45
+ def __init__(self, name: str, params: Dict[str, Any]):
46
+ super().__init__(name, params)
47
+ self._column = self._get_required_param("column", str)
48
+ if "values" not in self.params:
49
+ raise ValueError(
50
+ f"Rule '{self.name}' requires parameter 'values' but it was not provided"
51
+ )
52
+ self._values: Sequence[Any] = self.params["values"]
53
+
54
+ def required_columns(self) -> Set[str]:
55
+ return {self._column}
56
+
57
+ def validate(self, df: pl.DataFrame) -> Dict[str, Any]:
58
+ # Check column exists before accessing
59
+ col_check = self._check_columns(df, {self._column})
60
+ if col_check is not None:
61
+ return col_check
62
+
63
+ # Failure = value IS in the disallowed list (not including NULL)
64
+ # is_in returns NULL for NULL values, we want NULL -> not a failure
65
+ mask = df[self._column].is_in(list(self._values)).fill_null(False)
66
+
67
+ res = super()._failures(df, mask, f"{self._column} contains disallowed values")
68
+ res["rule_id"] = self.rule_id
69
+
70
+ if res["failed_count"] > 0:
71
+ res["failure_mode"] = str(FailureMode.NOVEL_CATEGORY)
72
+ res["details"] = self._explain_failure(df, mask)
73
+
74
+ return res
75
+
76
+ def _explain_failure(self, df: pl.DataFrame, mask: pl.Series) -> Dict[str, Any]:
77
+ """Generate detailed failure explanation."""
78
+ # Find which disallowed values were found and their counts
79
+ found_values = (
80
+ df.filter(mask)
81
+ .group_by(self._column)
82
+ .agg(pl.len().alias("count"))
83
+ .sort("count", descending=True)
84
+ .head(10)
85
+ )
86
+
87
+ found_list: List[Dict[str, Any]] = []
88
+ for row in found_values.iter_rows(named=True):
89
+ found_list.append({
90
+ "value": row[self._column],
91
+ "count": row["count"],
92
+ })
93
+
94
+ return {
95
+ "disallowed": [str(v) for v in self._values],
96
+ "found_values": found_list,
97
+ }
98
+
99
+ def compile_predicate(self) -> Optional[Predicate]:
100
+ # Failure = value IS in the disallowed list
101
+ expr = pl.col(self._column).is_in(self._values).fill_null(False)
102
+ return Predicate(
103
+ rule_id=self.rule_id,
104
+ expr=expr,
105
+ message=f"{self._column} contains disallowed values",
106
+ columns={self._column},
107
+ )
108
+
109
+ def to_sql_spec(self) -> Optional[Dict[str, Any]]:
110
+ """Generate SQL pushdown specification."""
111
+ return {
112
+ "kind": "disallowed_values",
113
+ "rule_id": self.rule_id,
114
+ "column": self._column,
115
+ "values": list(self._values),
116
+ }
117
+
118
+ def to_sql_filter(self, dialect: str = "postgres") -> str | None:
119
+ """Generate SQL filter for sampling failing rows."""
120
+ col = f'"{self._column}"'
121
+
122
+ # Build IN list (exclude None values)
123
+ quoted_values = []
124
+ for v in self._values:
125
+ if v is None:
126
+ continue
127
+ elif isinstance(v, str):
128
+ escaped = v.replace("'", "''")
129
+ quoted_values.append(f"'{escaped}'")
130
+ elif isinstance(v, bool):
131
+ quoted_values.append("TRUE" if v else "FALSE")
132
+ else:
133
+ quoted_values.append(str(v))
134
+
135
+ if not quoted_values:
136
+ return None # No values to check
137
+
138
+ in_list = ", ".join(quoted_values)
139
+ # Failure = value IS in the disallowed list (not null)
140
+ return f"{col} IN ({in_list})"
@@ -0,0 +1,203 @@
1
+ from __future__ import annotations
2
+ from typing import Dict, Any, Optional, Set, Tuple
3
+
4
+ import polars as pl
5
+
6
+ from kontra.rules.base import BaseRule
7
+ from kontra.rules.registry import register_rule
8
+ from kontra.state.types import FailureMode
9
+
10
+
11
+ @register_rule("dtype")
12
+ class DtypeRule(BaseRule):
13
+ """
14
+ Dtype — schema-level type check for a single column.
15
+
16
+ Params
17
+ ------
18
+ - column: str # required
19
+ - type: str # required
20
+ Accepts either:
21
+ * exact physical types: int8/int16/int32/int64, uint8/uint16/uint32/uint64,
22
+ float32/float64 (or float/double as aliases),
23
+ boolean/bool, utf8/string/str/text, date, datetime, time
24
+ * logical families: int/integer, float, numeric, string/str
25
+
26
+ - mode: "strict" # optional (default). Future: may support relaxed modes.
27
+
28
+ Semantics
29
+ ---------
30
+ - Exact types require an exact match (e.g., "int16" passes only if the column is Int16).
31
+ - Family types accept any member of the family (e.g., "int" accepts Int8/16/32/64).
32
+ - Strings: "utf8", "string", "str", "text" are treated as the same family (Utf8 or String).
33
+ - We do NOT cast — we only validate. (Casting hints may come via planner/materializers later.)
34
+
35
+ Results
36
+ -------
37
+ - On mismatch or invalid config, `failed_count == nrows` (schema-level violation).
38
+ - Message is deterministic: "<col> expected <expected>, found <ActualDtype>".
39
+ """
40
+
41
+ # Valid type names (for error message)
42
+ _VALID_TYPES = [
43
+ # Exact types
44
+ "int8", "int16", "int32", "int64",
45
+ "uint8", "uint16", "uint32", "uint64",
46
+ "float32", "float64", "float", "double",
47
+ "bool", "boolean",
48
+ "date", "datetime", "time",
49
+ "utf8", "string", "str", "text",
50
+ # Family types
51
+ "int", "integer", "numeric",
52
+ ]
53
+
54
+ # ---- Aliases / Maps -----------------------------------------------------
55
+
56
+ def __init__(self, *args, **kwargs):
57
+ super().__init__(*args, **kwargs)
58
+ from kontra.errors import RuleParameterError
59
+
60
+ expected_type = self.params.get("type")
61
+ if expected_type is not None:
62
+ label, allowed = self._normalize_expected(str(expected_type))
63
+ if allowed is None:
64
+ raise RuleParameterError(
65
+ "dtype", "type",
66
+ f"unknown type '{expected_type}'. Valid types: {', '.join(sorted(self._VALID_TYPES))}"
67
+ )
68
+
69
+ _STRING_ALIASES = {"utf8", "string", "str", "text"}
70
+
71
+ # Exact physical types (single-member sets treated as "exact")
72
+ _EXACT_MAP = {
73
+ # signed ints
74
+ "int8": {pl.Int8}, "int16": {pl.Int16}, "int32": {pl.Int32}, "int64": {pl.Int64},
75
+ # unsigned ints
76
+ "uint8": {pl.UInt8}, "uint16": {pl.UInt16}, "uint32": {pl.UInt32}, "uint64": {pl.UInt64},
77
+ # floats
78
+ "float32": {pl.Float32}, "float64": {pl.Float64},
79
+ "float": {pl.Float64}, "double": {pl.Float64}, # common aliases treated as exact Float64
80
+ # booleans
81
+ "bool": {pl.Boolean}, "boolean": {pl.Boolean},
82
+ # temporal
83
+ "date": {pl.Date}, "datetime": {pl.Datetime}, "time": {pl.Time},
84
+ }
85
+
86
+ # Logical families (multi-member sets)
87
+ _FAMILY_MAP = {
88
+ "int": {pl.Int8, pl.Int16, pl.Int32, pl.Int64},
89
+ "integer": {pl.Int8, pl.Int16, pl.Int32, pl.Int64},
90
+ "float": {pl.Float32, pl.Float64},
91
+ "numeric": {pl.Int8, pl.Int16, pl.Int32, pl.Int64, pl.Float32, pl.Float64},
92
+ "string": {pl.Utf8, getattr(pl, "String", pl.Utf8)}, # tolerate both Utf8 and String
93
+ "str": {pl.Utf8, getattr(pl, "String", pl.Utf8)},
94
+ "text": {pl.Utf8, getattr(pl, "String", pl.Utf8)},
95
+ "utf8": {pl.Utf8, getattr(pl, "String", pl.Utf8)},
96
+ }
97
+
98
+ # ---- Normalization ------------------------------------------------------
99
+
100
+ @staticmethod
101
+ def _dtype_label(dt: pl.DataType) -> str:
102
+ """Stable, user-friendly label for actual dtype in messages."""
103
+ # Polars dtypes stringify nicely (e.g., "Int64", "Utf8").
104
+ # Keep that behavior, but ensure Utf8/String variants read cleanly.
105
+ if dt == pl.Utf8:
106
+ return "Utf8"
107
+ # Some Polars versions may have pl.String; prefer "Utf8" in messages for consistency.
108
+ if getattr(pl, "String", None) and dt == getattr(pl, "String"):
109
+ return "Utf8"
110
+ return str(dt)
111
+
112
+ def _normalize_expected(self, typ: str) -> Tuple[str, Optional[set]]:
113
+ """
114
+ Returns (label, allowed_set).
115
+ - label: string echoed in error messages ("int16", "int", "date", ...)
116
+ - allowed_set: a set of acceptable Polars dtypes (None if unknown)
117
+ """
118
+ t = (typ or "").strip().lower()
119
+ if not t:
120
+ return "<unspecified>", None
121
+
122
+ # tolerate hyphen variants like "utf-8"
123
+ t_no_dash = t.replace("-", "")
124
+
125
+ # Family first (covers "string", "str", "utf8", etc.)
126
+ if t in self._FAMILY_MAP:
127
+ return t, self._FAMILY_MAP[t]
128
+ if t_no_dash in self._FAMILY_MAP:
129
+ return t_no_dash, self._FAMILY_MAP[t_no_dash]
130
+
131
+ # Exact physical types (single-member sets)
132
+ if t in self._EXACT_MAP:
133
+ return t, self._EXACT_MAP[t]
134
+
135
+ return t, None
136
+
137
+ # ---- Rule contract ------------------------------------------------------
138
+
139
+ def validate(self, df: pl.DataFrame) -> Dict[str, Any]:
140
+ column = self.params.get("column")
141
+ expected_type = self.params.get("type")
142
+ mode = (self.params.get("mode") or "strict").lower()
143
+
144
+ if mode != "strict":
145
+ return {
146
+ "rule_id": self.rule_id,
147
+ "passed": False,
148
+ "failed_count": int(df.height),
149
+ "message": f"Unsupported dtype mode '{mode}'; only 'strict' is implemented.",
150
+ }
151
+
152
+ if not isinstance(column, str) or not column:
153
+ return {
154
+ "rule_id": self.rule_id,
155
+ "passed": False,
156
+ "failed_count": int(df.height),
157
+ "message": "Missing required 'column' parameter for dtype rule",
158
+ }
159
+
160
+ if column not in df.columns:
161
+ return {
162
+ "rule_id": self.rule_id,
163
+ "passed": False,
164
+ "failed_count": int(df.height),
165
+ "message": f"Column '{column}' not found for dtype check",
166
+ }
167
+
168
+ label, allowed = self._normalize_expected(str(expected_type) if expected_type is not None else "")
169
+ if allowed is None:
170
+ return {
171
+ "rule_id": self.rule_id,
172
+ "passed": False,
173
+ "failed_count": int(df.height),
174
+ "message": f"Invalid expected dtype '{expected_type}'",
175
+ }
176
+
177
+ actual = df[column].dtype
178
+ # Use equality comparison instead of set membership because parametric
179
+ # types like Datetime(time_unit='us') have different hashes than pl.Datetime
180
+ # but are equal via __eq__
181
+ passed = any(actual == a for a in allowed)
182
+
183
+ result: Dict[str, Any] = {
184
+ "rule_id": self.rule_id,
185
+ "passed": bool(passed),
186
+ "failed_count": 0 if passed else int(df.height),
187
+ "message": "Passed" if passed else f"{column} expected {label}, found {self._dtype_label(actual)}",
188
+ }
189
+
190
+ if not passed:
191
+ result["failure_mode"] = str(FailureMode.SCHEMA_DRIFT)
192
+ result["details"] = {
193
+ "expected_type": label,
194
+ "actual_type": self._dtype_label(actual),
195
+ "column": column,
196
+ }
197
+
198
+ return result
199
+
200
+ def required_columns(self) -> Set[str]:
201
+ # dtype check inspects the column’s dtype; ensure it is loaded (for projection).
202
+ col = self.params.get("column")
203
+ return {col} if isinstance(col, str) else set()
@@ -0,0 +1,129 @@
1
+ # src/kontra/rules/builtin/ends_with.py
2
+ """
3
+ Ends with rule - Column must end with the specified suffix.
4
+
5
+ Uses LIKE pattern matching for maximum efficiency (faster than regex).
6
+
7
+ Usage:
8
+ - name: ends_with
9
+ params:
10
+ column: filename
11
+ suffix: ".csv"
12
+
13
+ Fails when:
14
+ - Value does NOT end with the suffix
15
+ - Value is NULL (can't check NULL)
16
+ """
17
+ from __future__ import annotations
18
+
19
+ from typing import Any, Dict, List, Optional, Set
20
+
21
+ import polars as pl
22
+
23
+ from kontra.rules.base import BaseRule
24
+ from kontra.rules.registry import register_rule
25
+ from kontra.rules.predicates import Predicate
26
+ from kontra.state.types import FailureMode
27
+
28
+
29
+ def _escape_like_pattern(value: str, escape_char: str = "\\") -> str:
30
+ """Escape LIKE special characters: %, _, and the escape char."""
31
+ for c in (escape_char, "%", "_"):
32
+ value = value.replace(c, escape_char + c)
33
+ return value
34
+
35
+
36
+ @register_rule("ends_with")
37
+ class EndsWithRule(BaseRule):
38
+ """
39
+ Fails where column value does NOT end with the suffix.
40
+
41
+ params:
42
+ - column: str (required) - Column to check
43
+ - suffix: str (required) - Suffix that must be present
44
+
45
+ NULL handling:
46
+ - NULL values are failures (can't check NULL)
47
+ """
48
+
49
+ def __init__(self, name: str, params: Dict[str, Any]):
50
+ super().__init__(name, params)
51
+ self._column = self._get_required_param("column", str)
52
+ self._suffix = self._get_required_param("suffix", str)
53
+
54
+ if not self._suffix:
55
+ raise ValueError("Rule 'ends_with' suffix cannot be empty")
56
+
57
+ def required_columns(self) -> Set[str]:
58
+ return {self._column}
59
+
60
+ def validate(self, df: pl.DataFrame) -> Dict[str, Any]:
61
+ # Check column exists before accessing
62
+ col_check = self._check_columns(df, {self._column})
63
+ if col_check is not None:
64
+ return col_check
65
+
66
+ # Use Polars str.ends_with for efficiency
67
+ ends_result = df[self._column].cast(pl.Utf8).str.ends_with(self._suffix)
68
+
69
+ # Failure = does NOT end with OR is NULL
70
+ mask = (~ends_result).fill_null(True)
71
+
72
+ msg = f"{self._column} does not end with '{self._suffix}'"
73
+ res = super()._failures(df, mask, msg)
74
+ res["rule_id"] = self.rule_id
75
+
76
+ if res["failed_count"] > 0:
77
+ res["failure_mode"] = str(FailureMode.PATTERN_MISMATCH)
78
+ res["details"] = self._explain_failure(df, mask)
79
+
80
+ return res
81
+
82
+ def _explain_failure(self, df: pl.DataFrame, mask: pl.Series) -> Dict[str, Any]:
83
+ """Generate detailed failure explanation."""
84
+ details: Dict[str, Any] = {
85
+ "column": self._column,
86
+ "expected_suffix": self._suffix,
87
+ }
88
+
89
+ # Sample failing values
90
+ failed_df = df.filter(mask).head(5)
91
+ samples: List[Any] = []
92
+ for val in failed_df[self._column]:
93
+ samples.append(val)
94
+
95
+ if samples:
96
+ details["sample_failures"] = samples
97
+
98
+ return details
99
+
100
+ def compile_predicate(self) -> Optional[Predicate]:
101
+ ends_expr = pl.col(self._column).cast(pl.Utf8).str.ends_with(self._suffix)
102
+ expr = (~ends_expr).fill_null(True)
103
+
104
+ return Predicate(
105
+ rule_id=self.rule_id,
106
+ expr=expr,
107
+ message=f"{self._column} does not end with '{self._suffix}'",
108
+ columns={self._column},
109
+ )
110
+
111
+ def to_sql_spec(self) -> Optional[Dict[str, Any]]:
112
+ """Generate SQL pushdown specification."""
113
+ return {
114
+ "kind": "ends_with",
115
+ "rule_id": self.rule_id,
116
+ "column": self._column,
117
+ "suffix": self._suffix,
118
+ }
119
+
120
+ def to_sql_filter(self, dialect: str = "postgres") -> str | None:
121
+ """Generate SQL filter for sampling failing rows."""
122
+ col = f'"{self._column}"'
123
+
124
+ # Escape LIKE special characters
125
+ escaped = _escape_like_pattern(self._suffix)
126
+ pattern = f"%{escaped}"
127
+
128
+ # Failure = does NOT end with OR is NULL
129
+ return f"{col} IS NULL OR {col} NOT LIKE '{pattern}' ESCAPE '\\'"