kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,15 @@
1
+ # src/kontra/engine/executors/__init__.py
2
+ from .base import SqlExecutor
3
+ from .registry import (
4
+ pick_executor,
5
+ register_default_executors,
6
+ register_executor,
7
+ )
8
+
9
+ # Re-export for convenience
10
+ __all__ = [
11
+ "SqlExecutor",
12
+ "pick_executor",
13
+ "register_executor",
14
+ "register_default_executors",
15
+ ]
@@ -0,0 +1,50 @@
1
+ # src/kontra/engine/executors/base.py
2
+ from __future__ import annotations
3
+
4
+ from typing import Any, Dict, List, Protocol
5
+
6
+ from kontra.connectors.handle import DatasetHandle
7
+
8
+
9
+ class SqlExecutor(Protocol):
10
+ """
11
+ Protocol for a pluggable, SQL-based rule executor.
12
+
13
+ An executor is responsible for:
14
+ 1. Reporting if it can handle a given data source and rule set.
15
+ 2. Compiling a list of Kontra rules into a single SQL query.
16
+ 3. Executing that query and returning results in the Kontra format.
17
+ 4. (Optional) Introspecting the data source for metadata.
18
+ """
19
+
20
+ name: str = "sql_executor"
21
+
22
+ def supports(
23
+ self, handle: DatasetHandle, sql_specs: List[Dict[str, Any]]
24
+ ) -> bool:
25
+ """
26
+ Return True if this executor can run against the given handle
27
+ and supports at least one of the provided SQL-compatible rules.
28
+ """
29
+ ...
30
+
31
+ def compile(self, sql_specs: List[Dict[str, Any]]) -> Any:
32
+ """
33
+ Compile the list of rule specs into a native, executable query plan
34
+ (e.g., a SQL string).
35
+ """
36
+ ...
37
+
38
+ def execute(self, handle: DatasetHandle, compiled_plan: Any) -> Dict[str, Any]:
39
+ """
40
+ Execute the compiled plan against the data in the handle.
41
+ Must return a dict: {"results": [...]}
42
+ """
43
+ ...
44
+
45
+ def introspect(self, handle: DatasetHandle) -> Dict[str, Any]:
46
+ """
47
+ Perform lightweight introspection (e.g., row count, column names).
48
+ Must return a dict: {"row_count": int, "available_cols": list[str]}
49
+ """
50
+ ...
@@ -0,0 +1,528 @@
1
+ # src/kontra/engine/executors/database_base.py
2
+ """
3
+ Base class for database SQL executors (PostgreSQL, SQL Server).
4
+
5
+ This module provides shared implementation for compile() and execute() methods,
6
+ reducing code duplication between database-specific executors.
7
+
8
+ Each subclass must define:
9
+ - DIALECT: "postgres" or "sqlserver"
10
+ - SUPPORTED_RULES: Set of rule kinds this executor supports
11
+ - _get_connection_ctx(): Connection context manager
12
+ - _get_table_reference(): Fully-qualified table reference
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from abc import ABC, abstractmethod
18
+ from contextlib import contextmanager
19
+ from typing import Any, Dict, List, Optional, Set, Tuple
20
+
21
+ from kontra.connectors.handle import DatasetHandle
22
+ from kontra.engine.sql_utils import (
23
+ esc_ident,
24
+ agg_unique,
25
+ agg_min_rows,
26
+ agg_max_rows,
27
+ agg_allowed_values,
28
+ agg_disallowed_values,
29
+ agg_freshness,
30
+ agg_range,
31
+ agg_length,
32
+ agg_regex,
33
+ agg_contains,
34
+ agg_starts_with,
35
+ agg_ends_with,
36
+ agg_compare,
37
+ agg_conditional_not_null,
38
+ agg_conditional_range,
39
+ exists_not_null,
40
+ results_from_row,
41
+ Dialect,
42
+ )
43
+ from kontra.engine.sql_validator import validate_sql, replace_table_placeholder, to_count_query
44
+ from kontra.logging import get_logger
45
+
46
+ from .base import SqlExecutor
47
+
48
+ _logger = get_logger(__name__)
49
+
50
+
51
+ class DatabaseSqlExecutor(SqlExecutor, ABC):
52
+ """
53
+ Abstract base class for database-backed SQL executors.
54
+
55
+ Provides shared implementation for compile() and execute() methods.
56
+ Subclasses must implement dialect-specific connection and table handling.
57
+ """
58
+
59
+ # Subclasses must define these
60
+ DIALECT: Dialect
61
+ SUPPORTED_RULES: Set[str]
62
+
63
+ @property
64
+ @abstractmethod
65
+ def name(self) -> str:
66
+ """Executor name for registry."""
67
+ ...
68
+
69
+ @abstractmethod
70
+ @contextmanager
71
+ def _get_connection_ctx(self, handle: DatasetHandle):
72
+ """
73
+ Get a database connection context manager.
74
+
75
+ For BYOC, yields the external connection directly.
76
+ For URI-based, yields a new owned connection.
77
+ """
78
+ ...
79
+
80
+ @abstractmethod
81
+ def _get_table_reference(self, handle: DatasetHandle) -> str:
82
+ """
83
+ Get the fully-qualified table reference for the handle.
84
+
85
+ Returns: "schema.table" format with proper escaping.
86
+ """
87
+ ...
88
+
89
+ @abstractmethod
90
+ def _supports_scheme(self, scheme: str, handle: DatasetHandle) -> bool:
91
+ """
92
+ Check if this executor supports the given URI scheme.
93
+
94
+ Args:
95
+ scheme: The URI scheme (lowercase)
96
+ handle: The dataset handle for additional context (e.g., dialect)
97
+
98
+ Returns:
99
+ True if this executor can handle the scheme
100
+ """
101
+ ...
102
+
103
+ def _esc(self, name: str) -> str:
104
+ """Escape an identifier for this dialect."""
105
+ return esc_ident(name, self.DIALECT)
106
+
107
+ def _get_schema_and_table(self, handle: DatasetHandle) -> Tuple[str, str]:
108
+ """
109
+ Get schema and table name separately (for custom SQL placeholder replacement).
110
+
111
+ Returns:
112
+ Tuple of (schema, table_name)
113
+ """
114
+ # Default implementation - subclasses should override
115
+ # This extracts from the table reference or connection params
116
+ raise NotImplementedError("Subclass must implement _get_schema_and_table")
117
+
118
+ def _assemble_single_row(self, selects: List[str], table: str) -> str:
119
+ """Build a single-row aggregate query from multiple SELECT expressions."""
120
+ if not selects:
121
+ return "SELECT 0 AS __no_sql_rules__;"
122
+ return f"SELECT {', '.join(selects)} FROM {table};"
123
+
124
+ def _assemble_exists_query(self, exists_exprs: List[str]) -> str:
125
+ """Build a query with multiple EXISTS checks."""
126
+ if not exists_exprs:
127
+ return ""
128
+ return f"SELECT {', '.join(exists_exprs)};"
129
+
130
+ def supports(
131
+ self, handle: DatasetHandle, sql_specs: List[Dict[str, Any]]
132
+ ) -> bool:
133
+ """Check if this executor can handle the given handle and rules."""
134
+ scheme = (handle.scheme or "").lower()
135
+
136
+ if not self._supports_scheme(scheme, handle):
137
+ return False
138
+
139
+ # Must have at least one supported rule
140
+ return any(
141
+ s.get("kind") in self.SUPPORTED_RULES
142
+ for s in (sql_specs or [])
143
+ )
144
+
145
+ def compile(self, sql_specs: List[Dict[str, Any]]) -> Dict[str, Any]:
146
+ """
147
+ Compile rule specs into three-phase execution plan.
148
+
149
+ Phase 1: EXISTS checks for not_null rules (fast, early-terminate)
150
+ Phase 2: Aggregate query for most rules (batched into single query)
151
+ Phase 3: Custom SQL queries (each executed individually)
152
+
153
+ Returns:
154
+ {
155
+ "exists_specs": [...], # Phase 1: not_null rules
156
+ "aggregate_selects": [...], # Phase 2: aggregate expressions
157
+ "aggregate_specs": [...], # Phase 2: specs for aggregates
158
+ "custom_sql_specs": [...], # Phase 3: custom SQL queries
159
+ "supported_specs": [...], # All supported specs
160
+ }
161
+ """
162
+ exists_specs: List[Dict[str, Any]] = []
163
+ aggregate_selects: List[str] = []
164
+ aggregate_specs: List[Dict[str, Any]] = []
165
+ custom_sql_specs: List[Dict[str, Any]] = []
166
+ supported_specs: List[Dict[str, Any]] = []
167
+
168
+ for spec in sql_specs or []:
169
+ kind = spec.get("kind")
170
+ rule_id = spec.get("rule_id")
171
+
172
+ if not (kind and rule_id):
173
+ continue
174
+
175
+ # Skip unsupported rules
176
+ if kind not in self.SUPPORTED_RULES:
177
+ continue
178
+
179
+ if kind == "custom_sql_check":
180
+ # Validate SQL is safe using sqlglot before accepting
181
+ user_sql = spec.get("sql")
182
+ if user_sql:
183
+ # Replace {table} with dummy name for validation
184
+ # (sqlglot can't parse {table} as valid SQL)
185
+ test_sql = user_sql.replace("{table}", "_validation_table_")
186
+ validation = validate_sql(test_sql, dialect=self.DIALECT)
187
+ if validation.is_safe:
188
+ custom_sql_specs.append(spec)
189
+ supported_specs.append(spec)
190
+ else:
191
+ _logger.warning(
192
+ f"custom_sql_check '{rule_id}' rejected for remote execution: "
193
+ f"{validation.reason}"
194
+ )
195
+ continue
196
+
197
+ if kind == "not_null":
198
+ col = spec.get("column")
199
+ if isinstance(col, str) and col:
200
+ exists_specs.append(spec)
201
+ supported_specs.append(spec)
202
+
203
+ elif kind == "unique":
204
+ col = spec.get("column")
205
+ if isinstance(col, str) and col:
206
+ aggregate_selects.append(agg_unique(col, rule_id, self.DIALECT))
207
+ aggregate_specs.append(spec)
208
+ supported_specs.append(spec)
209
+
210
+ elif kind == "min_rows":
211
+ threshold = spec.get("threshold", 0)
212
+ aggregate_selects.append(agg_min_rows(int(threshold), rule_id, self.DIALECT))
213
+ aggregate_specs.append(spec)
214
+ supported_specs.append(spec)
215
+
216
+ elif kind == "max_rows":
217
+ threshold = spec.get("threshold", 0)
218
+ aggregate_selects.append(agg_max_rows(int(threshold), rule_id, self.DIALECT))
219
+ aggregate_specs.append(spec)
220
+ supported_specs.append(spec)
221
+
222
+ elif kind == "allowed_values":
223
+ col = spec.get("column")
224
+ values = spec.get("values", [])
225
+ if isinstance(col, str) and col and values:
226
+ aggregate_selects.append(agg_allowed_values(col, values, rule_id, self.DIALECT))
227
+ aggregate_specs.append(spec)
228
+ supported_specs.append(spec)
229
+
230
+ elif kind == "disallowed_values":
231
+ col = spec.get("column")
232
+ values = spec.get("values", [])
233
+ if isinstance(col, str) and col and values:
234
+ aggregate_selects.append(agg_disallowed_values(col, values, rule_id, self.DIALECT))
235
+ aggregate_specs.append(spec)
236
+ supported_specs.append(spec)
237
+
238
+ elif kind == "freshness":
239
+ col = spec.get("column")
240
+ max_age_seconds = spec.get("max_age_seconds")
241
+ if isinstance(col, str) and col and isinstance(max_age_seconds, int):
242
+ aggregate_selects.append(agg_freshness(col, max_age_seconds, rule_id, self.DIALECT))
243
+ aggregate_specs.append(spec)
244
+ supported_specs.append(spec)
245
+
246
+ elif kind == "range":
247
+ col = spec.get("column")
248
+ min_val = spec.get("min")
249
+ max_val = spec.get("max")
250
+ if isinstance(col, str) and col and (min_val is not None or max_val is not None):
251
+ aggregate_selects.append(agg_range(col, min_val, max_val, rule_id, self.DIALECT))
252
+ aggregate_specs.append(spec)
253
+ supported_specs.append(spec)
254
+
255
+ elif kind == "length":
256
+ col = spec.get("column")
257
+ min_len = spec.get("min")
258
+ max_len = spec.get("max")
259
+ if isinstance(col, str) and col and (min_len is not None or max_len is not None):
260
+ aggregate_selects.append(agg_length(col, min_len, max_len, rule_id, self.DIALECT))
261
+ aggregate_specs.append(spec)
262
+ supported_specs.append(spec)
263
+
264
+ elif kind == "regex":
265
+ col = spec.get("column")
266
+ pattern = spec.get("pattern")
267
+ if isinstance(col, str) and col and isinstance(pattern, str) and pattern:
268
+ aggregate_selects.append(agg_regex(col, pattern, rule_id, self.DIALECT))
269
+ aggregate_specs.append(spec)
270
+ supported_specs.append(spec)
271
+
272
+ elif kind == "contains":
273
+ col = spec.get("column")
274
+ substring = spec.get("substring")
275
+ if isinstance(col, str) and col and isinstance(substring, str) and substring:
276
+ aggregate_selects.append(agg_contains(col, substring, rule_id, self.DIALECT))
277
+ aggregate_specs.append(spec)
278
+ supported_specs.append(spec)
279
+
280
+ elif kind == "starts_with":
281
+ col = spec.get("column")
282
+ prefix = spec.get("prefix")
283
+ if isinstance(col, str) and col and isinstance(prefix, str) and prefix:
284
+ aggregate_selects.append(agg_starts_with(col, prefix, rule_id, self.DIALECT))
285
+ aggregate_specs.append(spec)
286
+ supported_specs.append(spec)
287
+
288
+ elif kind == "ends_with":
289
+ col = spec.get("column")
290
+ suffix = spec.get("suffix")
291
+ if isinstance(col, str) and col and isinstance(suffix, str) and suffix:
292
+ aggregate_selects.append(agg_ends_with(col, suffix, rule_id, self.DIALECT))
293
+ aggregate_specs.append(spec)
294
+ supported_specs.append(spec)
295
+
296
+ elif kind == "compare":
297
+ left = spec.get("left")
298
+ right = spec.get("right")
299
+ op = spec.get("op")
300
+ if (isinstance(left, str) and left and
301
+ isinstance(right, str) and right and
302
+ isinstance(op, str) and op):
303
+ aggregate_selects.append(agg_compare(left, right, op, rule_id, self.DIALECT))
304
+ aggregate_specs.append(spec)
305
+ supported_specs.append(spec)
306
+
307
+ elif kind == "conditional_not_null":
308
+ col = spec.get("column")
309
+ when_column = spec.get("when_column")
310
+ when_op = spec.get("when_op")
311
+ when_value = spec.get("when_value")
312
+ if (isinstance(col, str) and col and
313
+ isinstance(when_column, str) and when_column and
314
+ isinstance(when_op, str) and when_op):
315
+ aggregate_selects.append(
316
+ agg_conditional_not_null(col, when_column, when_op, when_value, rule_id, self.DIALECT)
317
+ )
318
+ aggregate_specs.append(spec)
319
+ supported_specs.append(spec)
320
+
321
+ elif kind == "conditional_range":
322
+ col = spec.get("column")
323
+ when_column = spec.get("when_column")
324
+ when_op = spec.get("when_op")
325
+ when_value = spec.get("when_value")
326
+ min_val = spec.get("min")
327
+ max_val = spec.get("max")
328
+ if (isinstance(col, str) and col and
329
+ isinstance(when_column, str) and when_column and
330
+ isinstance(when_op, str) and when_op and
331
+ (min_val is not None or max_val is not None)):
332
+ aggregate_selects.append(
333
+ agg_conditional_range(col, when_column, when_op, when_value, min_val, max_val, rule_id, self.DIALECT)
334
+ )
335
+ aggregate_specs.append(spec)
336
+ supported_specs.append(spec)
337
+
338
+ elif kind == "custom_agg":
339
+ # Custom rule with to_sql_agg() - use the pre-generated SQL
340
+ sql_agg = spec.get("sql_agg", {})
341
+ # Try exact dialect match first, then fallback for sqlserver/mssql naming
342
+ agg_expr = sql_agg.get(self.DIALECT)
343
+ if not agg_expr and self.DIALECT == "sqlserver":
344
+ agg_expr = sql_agg.get("mssql") # Fallback: mssql -> sqlserver
345
+ if agg_expr:
346
+ aggregate_selects.append(f'{agg_expr} AS "{rule_id}"')
347
+ aggregate_specs.append(spec)
348
+ supported_specs.append(spec)
349
+
350
+ return {
351
+ "exists_specs": exists_specs,
352
+ "aggregate_selects": aggregate_selects,
353
+ "aggregate_specs": aggregate_specs,
354
+ "custom_sql_specs": custom_sql_specs,
355
+ "supported_specs": supported_specs,
356
+ }
357
+
358
+ def execute(
359
+ self,
360
+ handle: DatasetHandle,
361
+ compiled_plan: Dict[str, Any],
362
+ **kwargs,
363
+ ) -> Dict[str, Any]:
364
+ """
365
+ Execute the compiled plan in three phases.
366
+
367
+ Phase 1: EXISTS checks for not_null (fast, can early-terminate)
368
+ Phase 2: Aggregate query for most rules (batched)
369
+ Phase 3: Custom SQL queries (each executed individually)
370
+
371
+ Returns:
372
+ {"results": [...], "staging": None}
373
+ """
374
+ exists_specs = compiled_plan.get("exists_specs", [])
375
+ aggregate_selects = compiled_plan.get("aggregate_selects", [])
376
+ custom_sql_specs = compiled_plan.get("custom_sql_specs", [])
377
+
378
+ if not exists_specs and not aggregate_selects and not custom_sql_specs:
379
+ return {"results": [], "staging": None}
380
+
381
+ table = self._get_table_reference(handle)
382
+ results: List[Dict[str, Any]] = []
383
+
384
+ # Build rule_kinds mapping from specs
385
+ rule_kinds = {}
386
+ for spec in exists_specs:
387
+ rule_kinds[spec["rule_id"]] = spec.get("kind")
388
+ for spec in compiled_plan.get("aggregate_specs", []):
389
+ rule_kinds[spec["rule_id"]] = spec.get("kind")
390
+ for spec in custom_sql_specs:
391
+ rule_kinds[spec["rule_id"]] = spec.get("kind")
392
+
393
+ with self._get_connection_ctx(handle) as conn:
394
+ cursor = self._get_cursor(conn)
395
+ try:
396
+ # Phase 1: EXISTS checks for not_null rules
397
+ if exists_specs:
398
+ exists_exprs = [
399
+ exists_not_null(
400
+ spec["column"],
401
+ spec["rule_id"],
402
+ table,
403
+ self.DIALECT
404
+ )
405
+ for spec in exists_specs
406
+ ]
407
+ exists_sql = self._assemble_exists_query(exists_exprs)
408
+ cursor.execute(exists_sql)
409
+ row = cursor.fetchone()
410
+ columns = [desc[0] for desc in cursor.description] if cursor.description else []
411
+
412
+ if row and columns:
413
+ exists_results = results_from_row(columns, row, is_exists=True, rule_kinds=rule_kinds)
414
+ results.extend(exists_results)
415
+
416
+ # Phase 2: Aggregate query for remaining rules
417
+ if aggregate_selects:
418
+ agg_sql = self._assemble_single_row(aggregate_selects, table)
419
+ cursor.execute(agg_sql)
420
+ row = cursor.fetchone()
421
+ columns = [desc[0] for desc in cursor.description] if cursor.description else []
422
+
423
+ if row and columns:
424
+ agg_results = results_from_row(columns, row, is_exists=False, rule_kinds=rule_kinds)
425
+ results.extend(agg_results)
426
+
427
+ # Phase 3: Custom SQL queries (executed individually)
428
+ if custom_sql_specs:
429
+ custom_results = self._execute_custom_sql_queries(
430
+ cursor, handle, custom_sql_specs
431
+ )
432
+ results.extend(custom_results)
433
+ finally:
434
+ self._close_cursor(cursor)
435
+
436
+ return {"results": results, "staging": None}
437
+
438
+ def _execute_custom_sql_queries(
439
+ self,
440
+ cursor,
441
+ handle: DatasetHandle,
442
+ custom_sql_specs: List[Dict[str, Any]],
443
+ ) -> List[Dict[str, Any]]:
444
+ """
445
+ Execute custom SQL queries (Phase 3).
446
+
447
+ Each custom_sql_check query is transformed to return a COUNT(*) and executed.
448
+ The user writes a query that selects "violation rows", and we count them.
449
+
450
+ Transformation strategy:
451
+ - Simple SELECT: Rewrite to COUNT(*) directly
452
+ - DISTINCT/GROUP BY/LIMIT: Wrap in SELECT COUNT(*) FROM (...) AS _v
453
+ """
454
+ results: List[Dict[str, Any]] = []
455
+
456
+ # Get schema and table for placeholder replacement
457
+ try:
458
+ schema, table_name = self._get_schema_and_table(handle)
459
+ except NotImplementedError:
460
+ # Fallback: extract from full table reference
461
+ _logger.warning("_get_schema_and_table not implemented, custom SQL skipped")
462
+ return results
463
+
464
+ for spec in custom_sql_specs:
465
+ rule_id = spec["rule_id"]
466
+ user_sql = spec.get("sql", "")
467
+
468
+ try:
469
+ # Step 1: Replace {table} placeholder with properly formatted table reference
470
+ formatted_sql = replace_table_placeholder(
471
+ sql=user_sql,
472
+ schema=schema,
473
+ table=table_name,
474
+ dialect=self.DIALECT,
475
+ )
476
+
477
+ # Step 2: Transform to COUNT(*) query
478
+ success, count_sql = to_count_query(formatted_sql, dialect=self.DIALECT)
479
+ if not success:
480
+ raise ValueError(f"Failed to transform SQL: {count_sql}")
481
+
482
+ # Step 3: Execute and read the count
483
+ cursor.execute(count_sql)
484
+ row = cursor.fetchone()
485
+
486
+ if row is None or len(row) < 1:
487
+ raise ValueError("Query returned no result")
488
+
489
+ failed_count = int(row[0]) if row[0] is not None else 0
490
+
491
+ passed = failed_count == 0
492
+ results.append({
493
+ "rule_id": rule_id,
494
+ "passed": passed,
495
+ "failed_count": failed_count,
496
+ "message": "Passed" if passed else f"Custom SQL check failed for {failed_count} rows",
497
+ "execution_source": self.DIALECT,
498
+ })
499
+
500
+ except Exception as e:
501
+ _logger.warning(f"Custom SQL execution failed for '{rule_id}': {e}")
502
+ results.append({
503
+ "rule_id": rule_id,
504
+ "passed": False,
505
+ "failed_count": 1, # Unknown, but at least 1 issue
506
+ "message": f"Custom SQL execution failed: {e}",
507
+ "execution_source": self.DIALECT,
508
+ })
509
+
510
+ return results
511
+
512
+ def _get_cursor(self, conn):
513
+ """
514
+ Get a cursor from the connection.
515
+
516
+ Default implementation calls conn.cursor().
517
+ Subclasses can override for different behavior.
518
+ """
519
+ return conn.cursor()
520
+
521
+ def _close_cursor(self, cursor):
522
+ """
523
+ Close a cursor if needed.
524
+
525
+ Default implementation does nothing (cursor closed by context manager).
526
+ Subclasses can override for connections that don't use context managers.
527
+ """
528
+ pass