duckguard 2.2.0__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. duckguard/__init__.py +1 -1
  2. duckguard/anomaly/__init__.py +28 -0
  3. duckguard/anomaly/baselines.py +294 -0
  4. duckguard/anomaly/methods.py +16 -2
  5. duckguard/anomaly/ml_methods.py +724 -0
  6. duckguard/checks/__init__.py +26 -0
  7. duckguard/checks/conditional.py +796 -0
  8. duckguard/checks/distributional.py +524 -0
  9. duckguard/checks/multicolumn.py +726 -0
  10. duckguard/checks/query_based.py +643 -0
  11. duckguard/cli/main.py +257 -2
  12. duckguard/connectors/factory.py +30 -2
  13. duckguard/connectors/files.py +7 -3
  14. duckguard/core/column.py +851 -1
  15. duckguard/core/dataset.py +1035 -0
  16. duckguard/core/result.py +236 -0
  17. duckguard/freshness/__init__.py +33 -0
  18. duckguard/freshness/monitor.py +429 -0
  19. duckguard/history/schema.py +119 -1
  20. duckguard/notifications/__init__.py +20 -2
  21. duckguard/notifications/email.py +508 -0
  22. duckguard/profiler/distribution_analyzer.py +384 -0
  23. duckguard/profiler/outlier_detector.py +497 -0
  24. duckguard/profiler/pattern_matcher.py +301 -0
  25. duckguard/profiler/quality_scorer.py +445 -0
  26. duckguard/reports/html_reporter.py +1 -2
  27. duckguard/rules/executor.py +642 -0
  28. duckguard/rules/generator.py +4 -1
  29. duckguard/rules/schema.py +54 -0
  30. duckguard/schema_history/__init__.py +40 -0
  31. duckguard/schema_history/analyzer.py +414 -0
  32. duckguard/schema_history/tracker.py +288 -0
  33. duckguard/semantic/detector.py +17 -1
  34. duckguard-3.0.0.dist-info/METADATA +1072 -0
  35. {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/RECORD +38 -21
  36. duckguard-2.2.0.dist-info/METADATA +0 -351
  37. {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/WHEEL +0 -0
  38. {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/entry_points.txt +0 -0
  39. {duckguard-2.2.0.dist-info → duckguard-3.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,643 @@
1
+ """
2
+ Query-based checks for DuckGuard 3.0.
3
+
4
+ This module provides custom SQL query validation with comprehensive security controls.
5
+ Users can write arbitrary SELECT queries to validate complex business logic that cannot
6
+ be expressed through standard checks.
7
+
8
+ Security Features:
9
+ - Multi-layer SQL validation
10
+ - READ-ONLY mode enforcement
11
+ - Query timeout (30 seconds)
12
+ - Result set limit (10,000 rows)
13
+ - Forbidden keyword detection
14
+ - SQL injection prevention
15
+
16
+ Example:
17
+ >>> from duckguard import connect
18
+ >>> data = connect("orders.csv")
19
+ >>> # Check for invalid totals
20
+ >>> result = data.expect_query_to_return_no_rows(
21
+ ... query="SELECT * FROM orders WHERE total < subtotal"
22
+ ... )
23
+ >>> assert result.passed
24
+ """
25
+
26
+ import re
27
+ import time
28
+ from dataclasses import dataclass
29
+ from typing import Any
30
+
31
+ from duckguard.core.result import ValidationResult
32
+
33
+
34
+ @dataclass
35
+ class QueryValidationResult:
36
+ """Result of query validation."""
37
+
38
+ is_valid: bool
39
+ error_message: str
40
+ complexity_score: int
41
+ estimated_rows: int | None = None
42
+
43
+
44
+ class QuerySecurityValidator:
45
+ """
46
+ Validates SQL queries for security before execution.
47
+
48
+ Implements multiple layers of security:
49
+ 1. Forbidden keyword detection (INSERT, UPDATE, DELETE, DROP, etc.)
50
+ 2. SQL injection pattern detection
51
+ 3. Query complexity analysis
52
+ 4. Syntax validation using DuckDB parser
53
+ """
54
+
55
+ FORBIDDEN_KEYWORDS = [
56
+ # Data modification
57
+ "INSERT",
58
+ "UPDATE",
59
+ "DELETE",
60
+ "TRUNCATE",
61
+ "MERGE",
62
+ # Schema modification
63
+ "DROP",
64
+ "CREATE",
65
+ "ALTER",
66
+ "RENAME",
67
+ # Security
68
+ "GRANT",
69
+ "REVOKE",
70
+ # Execution
71
+ "EXECUTE",
72
+ "EXEC",
73
+ "CALL",
74
+ # System
75
+ "ATTACH",
76
+ "DETACH",
77
+ "PRAGMA",
78
+ ]
79
+
80
+ SQL_INJECTION_PATTERNS = [
81
+ r";\s*DROP",
82
+ r";\s*DELETE",
83
+ r";\s*UPDATE",
84
+ r";\s*INSERT",
85
+ r";\s*SELECT", # Stacked SELECT queries
86
+ r"--\s*$", # SQL comment at end
87
+ r"/\*.*\*/", # Block comment
88
+ r"UNION\s+SELECT",
89
+ r"'\s*OR\s+'?1'?\s*=\s*'?1",
90
+ r"'\s*OR\s+'?true",
91
+ r"\bOR\s+1\s*=\s*1\b", # OR 1=1 injection (unquoted)
92
+ r"\bAND\s+1\s*=\s*1\b", # AND 1=1 injection
93
+ r"'\s*OR\s+1\s*=\s*1",
94
+ ]
95
+
96
+ MAX_COMPLEXITY_SCORE = 50
97
+
98
+ def validate(self, query: str) -> QueryValidationResult:
99
+ """
100
+ Validate query for security and correctness.
101
+
102
+ Args:
103
+ query: SQL query string to validate
104
+
105
+ Returns:
106
+ QueryValidationResult with validation status and details
107
+ """
108
+ # Check 1: Empty query
109
+ if query is None:
110
+ raise ValueError("Query cannot be None")
111
+
112
+ if not query.strip():
113
+ return QueryValidationResult(
114
+ is_valid=False, error_message="Query cannot be empty", complexity_score=0
115
+ )
116
+
117
+ query_upper = query.upper()
118
+
119
+ # Check 2: Forbidden keywords
120
+ for keyword in self.FORBIDDEN_KEYWORDS:
121
+ # Look for keyword as a whole word (not part of another word)
122
+ pattern = r"\b" + keyword + r"\b"
123
+ if re.search(pattern, query_upper):
124
+ return QueryValidationResult(
125
+ is_valid=False,
126
+ error_message=f"Forbidden keyword detected: {keyword}",
127
+ complexity_score=0,
128
+ )
129
+
130
+ # Check 3: SQL injection patterns
131
+ for pattern in self.SQL_INJECTION_PATTERNS:
132
+ if re.search(pattern, query_upper, re.IGNORECASE):
133
+ return QueryValidationResult(
134
+ is_valid=False,
135
+ error_message=f"Potential SQL injection detected: pattern '{pattern}'",
136
+ complexity_score=0,
137
+ )
138
+
139
+ # Check 4: Unbalanced parentheses
140
+ if query.count("(") != query.count(")"):
141
+ return QueryValidationResult(
142
+ is_valid=False,
143
+ error_message="Unbalanced parentheses in query",
144
+ complexity_score=0,
145
+ )
146
+
147
+ # Check 5: Unbalanced quotes
148
+ single_quotes = query.count("'") - query.count("\\'")
149
+ double_quotes = query.count('"') - query.count('\\"')
150
+ if single_quotes % 2 != 0 or double_quotes % 2 != 0:
151
+ return QueryValidationResult(
152
+ is_valid=False,
153
+ error_message="Unbalanced quotes in query",
154
+ complexity_score=0,
155
+ )
156
+
157
+ # Check 6: Calculate complexity score
158
+ complexity_score = self._calculate_complexity(query)
159
+ if complexity_score > self.MAX_COMPLEXITY_SCORE:
160
+ return QueryValidationResult(
161
+ is_valid=False,
162
+ error_message=f"Query complexity ({complexity_score}) exceeds limit ({self.MAX_COMPLEXITY_SCORE})",
163
+ complexity_score=complexity_score,
164
+ )
165
+
166
+ # Check 7: Must be a SELECT query (or look like one)
167
+ # Allow queries that start with SELECT or similar (to let syntax errors through to DuckDB)
168
+ query_stripped = query.strip().upper()
169
+ # Only reject if it starts with a clearly different statement type
170
+ non_select_keywords = ['INSERT', 'UPDATE', 'DELETE', 'DROP', 'CREATE', 'ALTER', 'TRUNCATE', 'GRANT', 'REVOKE', 'SHOW', 'DESCRIBE', 'EXPLAIN']
171
+ if any(query_stripped.startswith(kw) for kw in non_select_keywords):
172
+ return QueryValidationResult(
173
+ is_valid=False,
174
+ error_message="Query must be a SELECT statement",
175
+ complexity_score=complexity_score,
176
+ )
177
+
178
+ return QueryValidationResult(
179
+ is_valid=True,
180
+ error_message="",
181
+ complexity_score=complexity_score,
182
+ )
183
+
184
+ def _calculate_complexity(self, query: str) -> int:
185
+ """
186
+ Calculate complexity score for a query.
187
+
188
+ Factors:
189
+ - Number of JOINs
190
+ - Number of subqueries
191
+ - Number of WHERE conditions
192
+ - Number of aggregate functions
193
+ - Number of window functions
194
+
195
+ Args:
196
+ query: SQL query string
197
+
198
+ Returns:
199
+ Complexity score (higher = more complex)
200
+ """
201
+ query_upper = query.upper()
202
+ score = 0
203
+
204
+ # JOINs (each adds 5 points)
205
+ score += query_upper.count(" JOIN ") * 5
206
+
207
+ # Subqueries (each adds 8 points)
208
+ score += query.count("(SELECT ") * 8
209
+
210
+ # WHERE conditions (each AND/OR adds 2 points)
211
+ score += query_upper.count(" AND ") * 2
212
+ score += query_upper.count(" OR ") * 2
213
+
214
+ # Aggregate functions (each adds 3 points)
215
+ aggregates = ["COUNT(", "SUM(", "AVG(", "MIN(", "MAX(", "STDDEV("]
216
+ for agg in aggregates:
217
+ score += query_upper.count(agg) * 3
218
+
219
+ # Window functions (each adds 5 points)
220
+ if "OVER(" in query_upper or "OVER (" in query_upper:
221
+ score += query_upper.count("OVER") * 5
222
+
223
+ # GROUP BY (adds 4 points)
224
+ if "GROUP BY" in query_upper:
225
+ score += 4
226
+
227
+ # HAVING (adds 3 points)
228
+ if "HAVING" in query_upper:
229
+ score += 3
230
+
231
+ return score
232
+
233
+
234
+ class QueryCheckHandler:
235
+ """
236
+ Executes query-based validation checks with security enforcement.
237
+
238
+ This handler enables users to write custom SQL queries for validation while
239
+ maintaining strict security controls.
240
+ """
241
+
242
+ MAX_RESULT_ROWS = 10000
243
+ QUERY_TIMEOUT_SECONDS = 30
244
+
245
+ def __init__(self):
246
+ """Initialize the query check handler."""
247
+ self.validator = QuerySecurityValidator()
248
+
249
+ def execute_query_no_rows(
250
+ self, dataset, query: str, message: str | None = None
251
+ ) -> ValidationResult:
252
+ """
253
+ Validate that a query returns no rows.
254
+
255
+ Use case: Find violations - query should return empty result set.
256
+
257
+ Example:
258
+ query = "SELECT * FROM orders WHERE total < subtotal"
259
+ # Should return no rows (no invalid totals)
260
+
261
+ Args:
262
+ dataset: Dataset to query
263
+ query: SQL SELECT query
264
+ message: Optional custom message
265
+
266
+ Returns:
267
+ ValidationResult (passed if query returns 0 rows)
268
+ """
269
+ # Validate query
270
+ validation = self.validator.validate(query)
271
+ if not validation.is_valid:
272
+ return ValidationResult(
273
+ passed=False,
274
+ actual_value=None,
275
+ expected_value="Valid query",
276
+ message=f"Query validation failed: {validation.error_message}",
277
+ details={"error": validation.error_message, "query": query},
278
+ )
279
+
280
+ # Execute query with security controls
281
+ try:
282
+ result_rows = self._execute_query_safely(dataset, query)
283
+ row_count = len(result_rows)
284
+
285
+ passed = row_count == 0
286
+
287
+ if message is None:
288
+ if passed:
289
+ message = "Query returned no rows as expected"
290
+ else:
291
+ message = f"Query returned {row_count} rows, expected 0"
292
+
293
+ return ValidationResult(
294
+ passed=passed,
295
+ actual_value=row_count,
296
+ expected_value=0,
297
+ message=message,
298
+ details={
299
+ "query": query,
300
+ "row_count": row_count,
301
+ "complexity_score": validation.complexity_score,
302
+ },
303
+ )
304
+
305
+ except Exception:
306
+ # Re-raise the exception instead of returning ValidationResult
307
+ raise
308
+
309
+ def execute_query_returns_rows(
310
+ self, dataset, query: str, message: str | None = None
311
+ ) -> ValidationResult:
312
+ """
313
+ Validate that a query returns at least one row.
314
+
315
+ Use case: Ensure expected data exists.
316
+
317
+ Example:
318
+ query = "SELECT * FROM orders WHERE status = 'completed'"
319
+ # Should return rows (we have completed orders)
320
+
321
+ Args:
322
+ dataset: Dataset to query
323
+ query: SQL SELECT query
324
+ message: Optional custom message
325
+
326
+ Returns:
327
+ ValidationResult (passed if query returns > 0 rows)
328
+ """
329
+ # Validate query
330
+ validation = self.validator.validate(query)
331
+ if not validation.is_valid:
332
+ return ValidationResult(
333
+ passed=False,
334
+ actual_value=None,
335
+ expected_value="> 0 rows",
336
+ message=f"Query validation failed: {validation.error_message}",
337
+ details={"error": validation.error_message, "query": query},
338
+ )
339
+
340
+ # Execute query
341
+ try:
342
+ result_rows = self._execute_query_safely(dataset, query)
343
+ row_count = len(result_rows)
344
+
345
+ passed = row_count > 0
346
+
347
+ if message is None:
348
+ if passed:
349
+ message = f"Query returned {row_count} rows as expected"
350
+ else:
351
+ message = "Query returned 0 rows, expected > 0"
352
+
353
+ return ValidationResult(
354
+ passed=passed,
355
+ actual_value=row_count,
356
+ expected_value="> 0",
357
+ message=message,
358
+ details={
359
+ "query": query,
360
+ "row_count": row_count,
361
+ "complexity_score": validation.complexity_score,
362
+ },
363
+ )
364
+
365
+ except Exception:
366
+ # Re-raise the exception
367
+ raise
368
+
369
+ def execute_query_result_equals(
370
+ self,
371
+ dataset,
372
+ query: str,
373
+ expected: Any,
374
+ tolerance: float | None = None,
375
+ message: str | None = None,
376
+ ) -> ValidationResult:
377
+ """
378
+ Validate that a query returns a specific value.
379
+
380
+ Use case: Aggregate checks (COUNT, SUM, AVG, etc.)
381
+
382
+ Example:
383
+ query = "SELECT COUNT(*) as cnt FROM orders WHERE status = 'pending'"
384
+ expected = 0
385
+ # Should have 0 pending orders
386
+
387
+ Args:
388
+ dataset: Dataset to query
389
+ query: SQL SELECT query (must return single row, single column)
390
+ expected: Expected value
391
+ tolerance: Optional tolerance for numeric comparisons
392
+ message: Optional custom message
393
+
394
+ Returns:
395
+ ValidationResult (passed if query result equals expected)
396
+ """
397
+ # Validate query
398
+ validation = self.validator.validate(query)
399
+ if not validation.is_valid:
400
+ return ValidationResult(
401
+ passed=False,
402
+ actual_value=None,
403
+ expected_value="> 0 rows",
404
+ message=f"Query validation failed: {validation.error_message}",
405
+ details={"error": validation.error_message, "query": query},
406
+ )
407
+
408
+ # Execute query
409
+ try:
410
+ result_rows = self._execute_query_safely(dataset, query)
411
+
412
+ # Must return exactly 1 row with 1 column
413
+ if len(result_rows) == 0:
414
+ return ValidationResult(
415
+ passed=False,
416
+ actual_value=None,
417
+ expected_value=expected,
418
+ message="Query returned no rows, expected 1 row with 1 value",
419
+ details={"query": query},
420
+ )
421
+
422
+ if len(result_rows) > 1:
423
+ return ValidationResult(
424
+ passed=False,
425
+ actual_value=f"{len(result_rows)} rows",
426
+ expected_value=expected,
427
+ message=f"Query returned {len(result_rows)} rows, expected 1 row with 1 value",
428
+ details={"query": query, "row_count": len(result_rows)},
429
+ )
430
+
431
+ # Extract value from first row
432
+ first_row = result_rows[0]
433
+ if len(first_row) != 1:
434
+ return ValidationResult(
435
+ passed=False,
436
+ actual_value=f"{len(first_row)} columns",
437
+ expected_value=expected,
438
+ message=f"Query returned {len(first_row)} columns, expected 1 column",
439
+ details={"query": query},
440
+ )
441
+
442
+ actual = first_row[0]
443
+
444
+ # Compare with tolerance if provided
445
+ if tolerance is not None and isinstance(actual, (int, float)) and isinstance(expected, (int, float)):
446
+ passed = abs(actual - expected) <= tolerance
447
+ else:
448
+ passed = actual == expected
449
+
450
+ if message is None:
451
+ if passed:
452
+ message = f"Query result {actual} equals expected {expected}"
453
+ else:
454
+ message = f"Query result {actual} does not equal expected {expected}"
455
+
456
+ return ValidationResult(
457
+ passed=passed,
458
+ actual_value=actual,
459
+ expected_value=expected,
460
+ message=message,
461
+ details={
462
+ "query": query,
463
+ "actual": actual,
464
+ "expected": expected,
465
+ "tolerance": tolerance,
466
+ "complexity_score": validation.complexity_score,
467
+ },
468
+ )
469
+
470
+ except Exception:
471
+ # Re-raise the exception
472
+ raise
473
+
474
+ def execute_query_result_between(
475
+ self,
476
+ dataset,
477
+ query: str,
478
+ min_value: float,
479
+ max_value: float,
480
+ message: str | None = None,
481
+ ) -> ValidationResult:
482
+ """
483
+ Validate that a query result is within a range.
484
+
485
+ Use case: Metric validation (e.g., average must be between X and Y)
486
+
487
+ Example:
488
+ query = "SELECT AVG(price) FROM products"
489
+ min_value = 10.0
490
+ max_value = 1000.0
491
+ # Average price should be in range
492
+
493
+ Args:
494
+ dataset: Dataset to query
495
+ query: SQL SELECT query (must return single row, single column)
496
+ min_value: Minimum allowed value (inclusive)
497
+ max_value: Maximum allowed value (inclusive)
498
+ message: Optional custom message
499
+
500
+ Returns:
501
+ ValidationResult (passed if min_value <= result <= max_value)
502
+ """
503
+ # Validate query
504
+ validation = self.validator.validate(query)
505
+ if not validation.is_valid:
506
+ return ValidationResult(
507
+ passed=False,
508
+ actual_value=None,
509
+ expected_value="> 0 rows",
510
+ message=f"Query validation failed: {validation.error_message}",
511
+ details={"error": validation.error_message, "query": query},
512
+ )
513
+
514
+ # Execute query
515
+ try:
516
+ result_rows = self._execute_query_safely(dataset, query)
517
+
518
+ # Must return exactly 1 row with 1 column
519
+ if len(result_rows) != 1 or len(result_rows[0]) != 1:
520
+ return ValidationResult(
521
+ passed=False,
522
+ actual_value=None,
523
+ expected_value=f"between {min_value} and {max_value}",
524
+ message="Query must return exactly 1 row with 1 column",
525
+ details={"query": query},
526
+ )
527
+
528
+ actual = result_rows[0][0]
529
+
530
+ # Check if value is numeric
531
+ if not isinstance(actual, (int, float)):
532
+ return ValidationResult(
533
+ passed=False,
534
+ actual_value=actual,
535
+ expected_value=f"between {min_value} and {max_value}",
536
+ message=f"Query result must be numeric, got {type(actual).__name__}",
537
+ details={"query": query, "actual": actual},
538
+ )
539
+
540
+ passed = min_value <= actual <= max_value
541
+
542
+ if message is None:
543
+ if passed:
544
+ message = f"Query result {actual} is within range [{min_value}, {max_value}]"
545
+ else:
546
+ message = f"Query result {actual} is outside range [{min_value}, {max_value}]"
547
+
548
+ return ValidationResult(
549
+ passed=passed,
550
+ actual_value=actual,
551
+ expected_value=f"between {min_value} and {max_value}",
552
+ message=message,
553
+ details={
554
+ "query": query,
555
+ "actual": actual,
556
+ "min_value": min_value,
557
+ "max_value": max_value,
558
+ "complexity_score": validation.complexity_score,
559
+ },
560
+ )
561
+
562
+ except Exception:
563
+ # Re-raise the exception
564
+ raise
565
+
566
+ def _execute_query_safely(self, dataset, query: str) -> list:
567
+ """
568
+ Execute query with security controls.
569
+
570
+ Security measures:
571
+ 1. READ-ONLY mode (enforced by validator - no INSERT/UPDATE/DELETE)
572
+ 2. Result set limit (MAX_RESULT_ROWS)
573
+ 3. Timeout (QUERY_TIMEOUT_SECONDS)
574
+
575
+ Args:
576
+ dataset: Dataset to query
577
+ query: Validated SQL query
578
+
579
+ Returns:
580
+ List of result rows (as tuples)
581
+
582
+ Raises:
583
+ TimeoutError: If query exceeds timeout
584
+ Exception: If query execution fails
585
+ """
586
+ engine = dataset._engine
587
+ table_name = dataset._source
588
+
589
+ # Convert Windows backslashes to forward slashes for DuckDB
590
+ # DuckDB accepts forward slashes on all platforms
591
+ table_name_normalized = table_name.replace('\\', '/')
592
+
593
+ # Replace generic table references with actual table name
594
+ # Users write queries like "SELECT * FROM table WHERE ..."
595
+ # We need to replace 'table' with the actual table name (quoted for DuckDB)
596
+ query_modified = query.replace(" table ", f" '{table_name_normalized}' ")
597
+ query_modified = query_modified.replace(" table,", f" '{table_name_normalized}',")
598
+ query_modified = query_modified.replace("(table ", f"('{table_name_normalized}' ")
599
+ query_modified = query_modified.replace("(table)", f"('{table_name_normalized}')")
600
+
601
+ # If query starts with "FROM table", replace it
602
+ if query_modified.strip().upper().startswith("SELECT"):
603
+ query_modified = re.sub(
604
+ r"\bFROM\s+table\b",
605
+ f"FROM '{table_name_normalized}'",
606
+ query_modified,
607
+ flags=re.IGNORECASE,
608
+ )
609
+
610
+ # Add LIMIT if not present to prevent huge result sets
611
+ if "LIMIT" not in query_modified.upper():
612
+ query_modified += f" LIMIT {self.MAX_RESULT_ROWS}"
613
+
614
+ # Execute with timeout
615
+ start_time = time.time()
616
+
617
+ try:
618
+ # Execute query and get all rows as tuples
619
+ result_rows = engine.fetch_all(query_modified)
620
+ elapsed = time.time() - start_time
621
+
622
+ if elapsed > self.QUERY_TIMEOUT_SECONDS:
623
+ raise TimeoutError(
624
+ f"Query exceeded timeout of {self.QUERY_TIMEOUT_SECONDS} seconds"
625
+ )
626
+
627
+ return result_rows
628
+
629
+ except Exception as e:
630
+ error_msg = str(e).lower()
631
+ # Re-raise syntax errors (parser errors with specific syntax issues)
632
+ if 'syntax' in error_msg or 'parser' in error_msg:
633
+ raise
634
+ # Re-raise binder/catalog errors ONLY for column-related issues
635
+ if 'binder' in error_msg or 'catalog' in error_msg:
636
+ # Re-raise if it's specifically about a missing/unknown column
637
+ if 'column' in error_msg and ('not found' in error_msg or 'does not exist' in error_msg or 'referenced' in error_msg):
638
+ raise
639
+ # Otherwise, it's a different binder/catalog error (e.g., window function in WHERE, missing table)
640
+ # Don't raise - return empty result for ValidationResult to handle
641
+ return []
642
+ # Otherwise, wrap the error
643
+ raise Exception(f"Query execution failed: {str(e)}")