aetherdialect 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1847 @@
1
+ """Schema-level validation for intent fields against the database schema.
2
+
3
+ Validates ``select_cols``, ``order_by_cols``, ``group_by_cols``, ``filters_param``, and ``having_param`` for column existence, table qualification, valid operators, aggregation appropriateness, and scalar function type compatibility. Supports CTE output column tracking for cross-CTE validation. All validators return lists of ``IntentIssue`` objects and are called from both the main query and CTE chain validators.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ from typing import Any
10
+
11
+ from .config import (
12
+ AGGREGATION_ALLOWED_COLUMN_TYPES,
13
+ ARITHMETIC_ROLES,
14
+ DISALLOWED_EXTRACT_UNITS,
15
+ VALID_AGG_FUNCS,
16
+ VALID_AGGREGATION_FUNCTIONS,
17
+ VALID_DATE_DIFF_UNITS,
18
+ VALID_DATE_WINDOW_UNITS,
19
+ VALID_FILTER_OPS,
20
+ VALID_HAVING_OPS,
21
+ VALID_SCALAR_FUNCTIONS,
22
+ VALID_VALUE_TYPES,
23
+ )
24
+ from .contracts_base import (
25
+ ColumnMetadata,
26
+ CteOutputColumnMeta,
27
+ IntentIssue,
28
+ SchemaGraph,
29
+ )
30
+ from .contracts_core import (
31
+ FilterParam,
32
+ HavingParam,
33
+ NormalizedExpr,
34
+ OrderByCol,
35
+ RuntimeCteStep,
36
+ SelectCol,
37
+ )
38
+ from .core_utils import debug
39
+
40
+
41
+ def _strip_distinct_prefix(col: str) -> str:
42
+ """Remove a leading ``DISTINCT`` keyword from a column reference.
43
+
44
+ For example, ``"DISTINCT orders.order_id"`` returns ``"orders.order_id"``.
45
+
46
+ Args:
47
+
48
+ col: A column expression string that may have a ``DISTINCT`` prefix.
49
+
50
+ Returns:
51
+
52
+ The column expression with any leading ``DISTINCT`` keyword removed.
53
+ """
54
+ if col and col.upper().startswith("DISTINCT "):
55
+ return col[9:].strip()
56
+ return col
57
+
58
+
59
+ def extract_col_from_scalar_wrapper(col_expr: str) -> str:
60
+ """Strip a scalar function wrapper and any leading ``DISTINCT`` keyword, returning the inner column expression.
61
+
62
+ For example, ``"ABS(table.col)"`` returns ``"table.col"`` and ``"DISTINCT table.col"`` returns ``"table.col"``.
63
+
64
+ Args:
65
+
66
+ col_expr: A column expression string, possibly wrapped in a scalar function and/or prefixed with ``DISTINCT``.
67
+
68
+ Returns:
69
+
70
+ The inner column expression string, or the original string if no recognised scalar wrapper is present.
71
+ """
72
+ if not col_expr:
73
+ return col_expr
74
+ match = re.match(r"^\s*(\w+)\s*\(\s*(.+)\s*\)\s*$", col_expr, re.IGNORECASE)
75
+ if match:
76
+ func_name = match.group(1).lower()
77
+ if func_name in VALID_SCALAR_FUNCTIONS:
78
+ return _strip_distinct_prefix(match.group(2).strip())
79
+ return _strip_distinct_prefix(col_expr)
80
+
81
+
82
+ def _validate_scalar_func_valid(scalar_func: str | None, context: str, location: str) -> list[IntentIssue]:
83
+ """Validate that a scalar function name is in the allowed set.
84
+
85
+ Args:
86
+
87
+ scalar_func: The scalar function name to validate, or ``None``.
88
+ context: A short label for the field being validated (for example ``"select_0"``).
89
+ location: Human-readable location string used in error messages.
90
+
91
+ Returns:
92
+
93
+ List of ``IntentIssue`` objects (empty if valid or if ``scalar_func`` is ``None``).
94
+ """
95
+ issues = []
96
+ if not scalar_func:
97
+ return issues
98
+ func_lower = scalar_func.lower()
99
+ if func_lower not in VALID_SCALAR_FUNCTIONS:
100
+ issues.append(
101
+ IntentIssue(
102
+ issue_id=f"invalid_scalar_func_{context}_{scalar_func}",
103
+ category="scalar_validity",
104
+ severity="error",
105
+ message=f"Invalid scalar function '{scalar_func}' in {location}. Allowed: {', '.join(sorted(VALID_SCALAR_FUNCTIONS))}",
106
+ context={
107
+ "function": scalar_func,
108
+ "location": location,
109
+ "allowed": list(VALID_SCALAR_FUNCTIONS),
110
+ },
111
+ )
112
+ )
113
+ debug(f"[validation_schema.validate_scalar_func_valid] invalid scalar '{scalar_func}' in {location}")
114
+ return issues
115
+
116
+
117
+ def _first_arg_lower(args: list[Any]) -> str:
118
+ """Return the first scalar arg lowercased, or empty string if none."""
119
+ if not args:
120
+ return ""
121
+ return str(args[0]).strip().lower()
122
+
123
+
124
+ def _is_extract_epoch(func: str | None, args: list[Any]) -> bool:
125
+ """Return True if func is extract and first arg is a disallowed unit (e.g. epoch)."""
126
+ if not func or func.lower() != "extract":
127
+ return False
128
+ unit = _first_arg_lower(args)
129
+ return unit in DISALLOWED_EXTRACT_UNITS
130
+
131
+
132
+ def validate_expr_no_extract_epoch(
133
+ expr: NormalizedExpr,
134
+ context: str,
135
+ location: str,
136
+ ) -> list[IntentIssue]:
137
+ """Flag EXTRACT(EPOCH FROM ...) in expressions; EPOCH is not supported.
138
+
139
+ Walks expr and its add_groups/sub_groups and returns one error per
140
+ occurrence of extract with a disallowed unit.
141
+ """
142
+ issues: list[IntentIssue] = []
143
+ if _is_extract_epoch(expr.scalar_func, expr.scalar_func_args or []):
144
+ issues.append(
145
+ IntentIssue(
146
+ issue_id=f"extract_epoch_{context}",
147
+ category="extract_epoch",
148
+ severity="error",
149
+ message=(
150
+ "EXTRACT(EPOCH FROM ...) is not supported. Use date column "
151
+ "subtraction or other supported date functions."
152
+ ),
153
+ context={"location": location},
154
+ )
155
+ )
156
+ if _is_extract_epoch(expr.inner_scalar_func, expr.inner_scalar_func_args or []):
157
+ issues.append(
158
+ IntentIssue(
159
+ issue_id=f"extract_epoch_inner_{context}",
160
+ category="extract_epoch",
161
+ severity="error",
162
+ message=(
163
+ "EXTRACT(EPOCH FROM ...) is not supported. Use date column "
164
+ "subtraction or other supported date functions."
165
+ ),
166
+ context={"location": location},
167
+ )
168
+ )
169
+ for group in expr.add_groups + expr.sub_groups:
170
+ if _is_extract_epoch(group.scalar_func, group.scalar_func_args or []):
171
+ issues.append(
172
+ IntentIssue(
173
+ issue_id=f"extract_epoch_group_{context}",
174
+ category="extract_epoch",
175
+ severity="error",
176
+ message=(
177
+ "EXTRACT(EPOCH FROM ...) is not supported. Use date column "
178
+ "subtraction or other supported date functions."
179
+ ),
180
+ context={"location": location},
181
+ )
182
+ )
183
+ if _is_extract_epoch(group.inner_scalar_func, group.inner_scalar_func_args or []):
184
+ issues.append(
185
+ IntentIssue(
186
+ issue_id=f"extract_epoch_inner_group_{context}",
187
+ category="extract_epoch",
188
+ severity="error",
189
+ message=(
190
+ "EXTRACT(EPOCH FROM ...) is not supported. Use date column "
191
+ "subtraction or other supported date functions."
192
+ ),
193
+ context={"location": location},
194
+ )
195
+ )
196
+ return issues
197
+
198
+
199
+ def _validate_agg_func_valid(agg_func: str | None, context: str, location: str) -> list[IntentIssue]:
200
+ """Validate that an aggregation function name is in the allowed set.
201
+
202
+ Args:
203
+
204
+ agg_func: The aggregation function name to validate, or ``None``.
205
+ context: A short label for the field being validated.
206
+ location: Human-readable location string used in error messages.
207
+
208
+ Returns:
209
+
210
+ List of ``IntentIssue`` objects (empty if valid or if ``agg_func`` is ``None``).
211
+ """
212
+ issues = []
213
+ if not agg_func:
214
+ return issues
215
+ func_lower = agg_func.lower()
216
+ if func_lower not in VALID_AGGREGATION_FUNCTIONS:
217
+ issues.append(
218
+ IntentIssue(
219
+ issue_id=f"invalid_agg_func_{context}_{agg_func}",
220
+ category="aggregation_validity",
221
+ severity="error",
222
+ message=f"Invalid aggregation function '{agg_func}' in {location}. Allowed: {', '.join(sorted(VALID_AGGREGATION_FUNCTIONS))}",
223
+ context={
224
+ "function": agg_func,
225
+ "location": location,
226
+ "allowed": list(VALID_AGGREGATION_FUNCTIONS),
227
+ },
228
+ )
229
+ )
230
+ debug(f"[validation_schema.validate_agg_func_valid] invalid agg '{agg_func}' in {location}")
231
+ return issues
232
+
233
+
234
+ def validate_select_cols_schema(
235
+ select_cols: list[SelectCol],
236
+ schema: SchemaGraph,
237
+ allowed_tables: set[str],
238
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
239
+ context: str = "main",
240
+ ) -> list[IntentIssue]:
241
+ """Validate ``SelectCol`` entries against the schema for column existence and qualification.
242
+
243
+ Args:
244
+
245
+ select_cols: List of ``SelectCol`` instances to validate.
246
+ schema: The ``SchemaGraph``.
247
+ allowed_tables: Set of table names permitted in this query context.
248
+ cte_outputs: Dict of CTE name to output column metadata for cross-CTE lookup.
249
+ context: Label used in issue IDs and messages (for example ``"main"`` or ``"CTE cte1"``).
250
+
251
+ Returns:
252
+
253
+ List of ``IntentIssue`` objects.
254
+ """
255
+ issues = []
256
+ if not select_cols:
257
+ issues.append(
258
+ IntentIssue(
259
+ issue_id=f"select_cols_empty_{context}",
260
+ category="select_validity",
261
+ severity="error",
262
+ message=f"select_cols cannot be empty in {context}",
263
+ context={"location": context},
264
+ )
265
+ )
266
+ return issues
267
+ cte_outputs = cte_outputs or {}
268
+ for idx, sc in enumerate(select_cols):
269
+ col_expr = sc.expr.primary_column
270
+ if not col_expr:
271
+ issues.append(
272
+ IntentIssue(
273
+ issue_id=f"select_col_empty_{context}_{idx}",
274
+ category="select_validity",
275
+ severity="error",
276
+ message=f"SelectCol at index {idx} has empty col in {context}",
277
+ context={"index": idx, "location": context},
278
+ )
279
+ )
280
+ continue
281
+ actual_col = extract_col_from_scalar_wrapper(col_expr)
282
+ if "." not in actual_col:
283
+ issues.append(
284
+ IntentIssue(
285
+ issue_id=f"select_unqualified_{context}_{actual_col}",
286
+ category="select_validity",
287
+ severity="error",
288
+ message=f"select_cols must be qualified as table.column, got '{actual_col}' in {context}",
289
+ context={"column": actual_col, "location": context},
290
+ )
291
+ )
292
+ continue
293
+ table_name, col_name = actual_col.rsplit(".", 1)
294
+ if table_name in cte_outputs:
295
+ if col_name.lower() not in [c.lower() for c in cte_outputs[table_name]]:
296
+ issues.append(
297
+ IntentIssue(
298
+ issue_id=f"select_cte_col_not_found_{context}_{table_name}_{col_name}",
299
+ category="select_validity",
300
+ severity="error",
301
+ message=f"Column '{col_name}' not in CTE '{table_name}' outputs for select in {context}",
302
+ context={
303
+ "table": table_name,
304
+ "column": col_name,
305
+ "location": context,
306
+ },
307
+ )
308
+ )
309
+ continue
310
+ if table_name not in allowed_tables:
311
+ issues.append(
312
+ IntentIssue(
313
+ issue_id=f"select_table_not_allowed_{context}_{table_name}",
314
+ category="select_validity",
315
+ severity="error",
316
+ message=f"Table '{table_name}' not in allowed tables for select in {context}",
317
+ context={"table": table_name, "location": context},
318
+ )
319
+ )
320
+ continue
321
+ if table_name in schema.tables:
322
+ table_meta = schema.tables[table_name]
323
+ col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
324
+ if not col_meta:
325
+ issues.append(
326
+ IntentIssue(
327
+ issue_id=f"select_col_not_found_{context}_{table_name}_{col_name}",
328
+ category="select_validity",
329
+ severity="error",
330
+ message=f"Column '{col_name}' not in table '{table_name}' for select in {context}",
331
+ context={
332
+ "table": table_name,
333
+ "column": col_name,
334
+ "location": context,
335
+ },
336
+ )
337
+ )
338
+ sc_scalar, sc_agg = extract_functions_from_term(sc.expr.primary_term)
339
+ issues.extend(_validate_agg_func_valid(sc_agg, f"select_{idx}", context))
340
+ issues.extend(_validate_scalar_func_valid(sc_scalar, f"select_{idx}", context))
341
+ issues.extend(
342
+ validate_expr_no_extract_epoch(sc.expr, f"select_{idx}", context)
343
+ )
344
+ debug(f"[validation_schema.validate_select_cols_schema] {len(issues)} issues in {context}")
345
+ return issues
346
+
347
+
348
+ def validate_order_by_cols_schema(
349
+ order_by_cols: list[OrderByCol],
350
+ schema: SchemaGraph,
351
+ allowed_tables: set[str],
352
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
353
+ context: str = "main",
354
+ ) -> list[IntentIssue]:
355
+ """Validate ``OrderByCol`` entries against the schema for column existence and direction.
356
+
357
+ Args:
358
+
359
+ order_by_cols: List of ``OrderByCol`` instances to validate.
360
+ schema: The ``SchemaGraph``.
361
+ allowed_tables: Set of table names permitted in this query context.
362
+ cte_outputs: Dict of CTE name to output column metadata.
363
+ context: Label used in issue IDs and messages.
364
+
365
+ Returns:
366
+
367
+ List of ``IntentIssue`` objects.
368
+ """
369
+ issues = []
370
+ if not order_by_cols:
371
+ return []
372
+ cte_outputs = cte_outputs or {}
373
+ for idx, obc in enumerate(order_by_cols):
374
+ col_expr = obc.expr.primary_column
375
+ if not col_expr:
376
+ issues.append(
377
+ IntentIssue(
378
+ issue_id=f"order_by_col_empty_{context}_{idx}",
379
+ category="order_by_validity",
380
+ severity="error",
381
+ message=f"OrderByCol at index {idx} has empty col in {context}",
382
+ context={"index": idx, "location": context},
383
+ )
384
+ )
385
+ continue
386
+ actual_col = extract_col_from_scalar_wrapper(col_expr)
387
+ if "." not in actual_col:
388
+ issues.append(
389
+ IntentIssue(
390
+ issue_id=f"order_by_unqualified_{context}_{actual_col}",
391
+ category="order_by_validity",
392
+ severity="error",
393
+ message=f"order_by_cols must be qualified as table.column, got '{actual_col}' in {context}",
394
+ context={"column": actual_col, "location": context},
395
+ )
396
+ )
397
+ continue
398
+ table_name, col_name = actual_col.rsplit(".", 1)
399
+ if table_name in cte_outputs:
400
+ if col_name.lower() not in [c.lower() for c in cte_outputs[table_name]]:
401
+ issues.append(
402
+ IntentIssue(
403
+ issue_id=f"order_by_cte_col_not_found_{context}_{table_name}_{col_name}",
404
+ category="order_by_validity",
405
+ severity="error",
406
+ message=f"Column '{col_name}' not in CTE '{table_name}' outputs for order_by in {context}",
407
+ context={
408
+ "table": table_name,
409
+ "column": col_name,
410
+ "location": context,
411
+ },
412
+ )
413
+ )
414
+ continue
415
+ if table_name not in allowed_tables:
416
+ issues.append(
417
+ IntentIssue(
418
+ issue_id=f"order_by_table_not_allowed_{context}_{table_name}",
419
+ category="order_by_validity",
420
+ severity="error",
421
+ message=f"Table '{table_name}' not in allowed tables for order_by in {context}",
422
+ context={"table": table_name, "location": context},
423
+ )
424
+ )
425
+ continue
426
+ if table_name in schema.tables:
427
+ table_meta = schema.tables[table_name]
428
+ col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
429
+ if not col_meta:
430
+ issues.append(
431
+ IntentIssue(
432
+ issue_id=f"order_by_col_not_found_{context}_{table_name}_{col_name}",
433
+ category="order_by_validity",
434
+ severity="error",
435
+ message=f"Column '{col_name}' not in table '{table_name}' for order_by in {context}",
436
+ context={
437
+ "table": table_name,
438
+ "column": col_name,
439
+ "location": context,
440
+ },
441
+ )
442
+ )
443
+ if obc.direction not in ("ASC", "DESC"):
444
+ issues.append(
445
+ IntentIssue(
446
+ issue_id=f"order_by_invalid_direction_{context}_{idx}",
447
+ category="order_by_validity",
448
+ severity="error",
449
+ message=f"OrderByCol direction must be 'ASC' or 'DESC', got '{obc.direction}' in {context}",
450
+ context={"direction": obc.direction, "location": context},
451
+ )
452
+ )
453
+ obc_scalar, obc_agg = extract_functions_from_term(obc.expr.primary_term)
454
+ issues.extend(_validate_agg_func_valid(obc_agg, f"order_by_{idx}", context))
455
+ issues.extend(_validate_scalar_func_valid(obc_scalar, f"order_by_{idx}", context))
456
+ issues.extend(
457
+ validate_expr_no_extract_epoch(obc.expr, f"order_by_{idx}", context)
458
+ )
459
+ debug(f"[validation_schema.validate_order_by_cols_schema] {len(issues)} issues in {context}")
460
+ return issues
461
+
462
+
463
+ def validate_group_by_cols_schema(
464
+ group_by_cols: list[NormalizedExpr],
465
+ schema: SchemaGraph,
466
+ allowed_tables: set[str],
467
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
468
+ context: str = "main",
469
+ ) -> list[IntentIssue]:
470
+ """Validate ``group_by_cols`` against the schema and column groupability.
471
+
472
+ Args:
473
+
474
+ group_by_cols: List of ``NormalizedExpr`` instances to validate.
475
+ schema: The ``SchemaGraph``.
476
+ allowed_tables: Set of table names permitted in this query context.
477
+ cte_outputs: Dict of CTE name to output column metadata.
478
+ context: Label used in issue IDs and messages.
479
+
480
+ Returns:
481
+
482
+ List of ``IntentIssue`` objects.
483
+ """
484
+ issues = []
485
+ if not group_by_cols:
486
+ return []
487
+ cte_outputs = cte_outputs or {}
488
+ for g in group_by_cols:
489
+ col = g.primary_column
490
+ if "." not in col:
491
+ issues.append(
492
+ IntentIssue(
493
+ issue_id=f"group_by_unqualified_{context}_{col}",
494
+ category="group_by_validity",
495
+ severity="error",
496
+ message=f"group_by_cols must be qualified as table.column, got '{col}' in {context}",
497
+ context={"column": col, "location": context},
498
+ )
499
+ )
500
+ continue
501
+ table_name, col_name = col.rsplit(".", 1)
502
+ if table_name in cte_outputs:
503
+ cte_cols = cte_outputs[table_name]
504
+ matched_key = next((c for c in cte_cols if c.lower() == col_name.lower()), None)
505
+ if not matched_key:
506
+ issues.append(
507
+ IntentIssue(
508
+ issue_id=f"group_by_cte_col_not_found_{context}_{table_name}_{col_name}",
509
+ category="group_by_validity",
510
+ severity="error",
511
+ message=f"Column '{col_name}' not in CTE '{table_name}' outputs for group_by in {context}",
512
+ context={
513
+ "table": table_name,
514
+ "column": col_name,
515
+ "location": context,
516
+ },
517
+ )
518
+ )
519
+ elif not cte_cols[matched_key].groupable:
520
+ issues.append(
521
+ IntentIssue(
522
+ issue_id=f"group_by_cte_col_not_groupable_{context}_{table_name}_{col_name}",
523
+ category="group_by_validity",
524
+ severity="warning",
525
+ message=f"CTE column '{table_name}.{col_name}' (role={cte_cols[matched_key].role}) is not recommended for GROUP BY in {context}",
526
+ context={
527
+ "table": table_name,
528
+ "column": col_name,
529
+ "role": cte_cols[matched_key].role,
530
+ "location": context,
531
+ },
532
+ )
533
+ )
534
+ continue
535
+ if table_name not in allowed_tables:
536
+ issues.append(
537
+ IntentIssue(
538
+ issue_id=f"group_by_table_not_allowed_{context}_{table_name}",
539
+ category="group_by_validity",
540
+ severity="error",
541
+ message=f"Table '{table_name}' not in allowed tables for group_by in {context}",
542
+ context={"table": table_name, "location": context},
543
+ )
544
+ )
545
+ continue
546
+ if table_name in schema.tables:
547
+ table_meta = schema.tables[table_name]
548
+ col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
549
+ if not col_meta:
550
+ issues.append(
551
+ IntentIssue(
552
+ issue_id=f"group_by_col_not_found_{context}_{table_name}_{col_name}",
553
+ category="group_by_validity",
554
+ severity="error",
555
+ message=f"Column '{col_name}' not in table '{table_name}' for group_by in {context}",
556
+ context={
557
+ "table": table_name,
558
+ "column": col_name,
559
+ "location": context,
560
+ },
561
+ )
562
+ )
563
+ elif not col_meta.is_groupable:
564
+ issues.append(
565
+ IntentIssue(
566
+ issue_id=f"group_by_col_not_groupable_{context}_{table_name}_{col_name}",
567
+ category="group_by_validity",
568
+ severity="warning",
569
+ message=f"Column '{col_name}' (role={col_meta.role}) is not recommended for grouping in {context}",
570
+ context={
571
+ "table": table_name,
572
+ "column": col_name,
573
+ "role": col_meta.role,
574
+ "location": context,
575
+ },
576
+ )
577
+ )
578
+ debug(f"[validation_schema.validate_group_by_cols_schema] {len(issues)} issues in {context}")
579
+ return issues
580
+
581
+
582
+ def _validate_filter_col(
583
+ col_expr: str,
584
+ schema: SchemaGraph,
585
+ allowed_tables: set[str],
586
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
587
+ context: str,
588
+ side: str,
589
+ param_key: str,
590
+ ) -> list[IntentIssue]:
591
+ """Validate a single filter column reference (left or right side of a ``FilterParam``).
592
+
593
+ Args:
594
+
595
+ col_expr: The column expression string to validate.
596
+ schema: The ``SchemaGraph``.
597
+ allowed_tables: Set of table names permitted in this context.
598
+ cte_outputs: Dict of CTE name to output column metadata.
599
+ context: Label used in issue IDs and messages.
600
+ side: ``"left_col"`` or ``"right_col"``.
601
+ param_key: The ``param_key`` of the ``FilterParam`` (for issue IDs).
602
+
603
+ Returns:
604
+
605
+ List of ``IntentIssue`` objects.
606
+ """
607
+ issues = []
608
+ if not col_expr:
609
+ return issues
610
+ actual_col = extract_col_from_scalar_wrapper(col_expr)
611
+ if "." not in actual_col:
612
+ issues.append(
613
+ IntentIssue(
614
+ issue_id=f"filter_{side}_unqualified_{context}_{actual_col}",
615
+ category="filter_validity",
616
+ severity="error",
617
+ message=f"Filter {side} must be qualified as table.column, got '{actual_col}' in {context}",
618
+ context={
619
+ "column": actual_col,
620
+ "side": side,
621
+ "param_key": param_key,
622
+ "location": context,
623
+ },
624
+ )
625
+ )
626
+ return issues
627
+ table_name, col_name = actual_col.rsplit(".", 1)
628
+ if table_name in cte_outputs:
629
+ if col_name.lower() not in [c.lower() for c in cte_outputs[table_name]]:
630
+ issues.append(
631
+ IntentIssue(
632
+ issue_id=f"filter_{side}_cte_col_not_found_{context}_{table_name}_{col_name}",
633
+ category="filter_validity",
634
+ severity="error",
635
+ message=f"Column '{col_name}' not in CTE '{table_name}' outputs for filter {side} in {context}",
636
+ context={
637
+ "table": table_name,
638
+ "column": col_name,
639
+ "side": side,
640
+ "param_key": param_key,
641
+ "location": context,
642
+ },
643
+ )
644
+ )
645
+ return issues
646
+ if table_name not in allowed_tables:
647
+ issues.append(
648
+ IntentIssue(
649
+ issue_id=f"filter_{side}_table_not_allowed_{context}_{table_name}",
650
+ category="filter_validity",
651
+ severity="error",
652
+ message=f"Table '{table_name}' not in allowed tables for filter {side} in {context}",
653
+ context={
654
+ "table": table_name,
655
+ "side": side,
656
+ "param_key": param_key,
657
+ "location": context,
658
+ },
659
+ )
660
+ )
661
+ return issues
662
+ if table_name not in schema.tables:
663
+ issues.append(
664
+ IntentIssue(
665
+ issue_id=f"filter_{side}_table_not_in_schema_{context}_{table_name}",
666
+ category="filter_validity",
667
+ severity="error",
668
+ message=f"Table '{table_name}' not in schema for filter {side} in {context}",
669
+ context={
670
+ "table": table_name,
671
+ "side": side,
672
+ "param_key": param_key,
673
+ "location": context,
674
+ },
675
+ )
676
+ )
677
+ return issues
678
+ table_meta = schema.tables[table_name]
679
+ col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
680
+ if not col_meta:
681
+ issues.append(
682
+ IntentIssue(
683
+ issue_id=f"filter_{side}_col_not_found_{context}_{table_name}_{col_name}",
684
+ category="filter_validity",
685
+ severity="error",
686
+ message=f"Column '{col_name}' not in table '{table_name}' for filter {side} in {context}",
687
+ context={
688
+ "table": table_name,
689
+ "column": col_name,
690
+ "side": side,
691
+ "param_key": param_key,
692
+ "location": context,
693
+ },
694
+ )
695
+ )
696
+ return issues
697
+
698
+
699
+ def validate_filters_schema(
700
+ filters_param: list[FilterParam],
701
+ schema: SchemaGraph,
702
+ allowed_tables: set[str],
703
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
704
+ context: str = "main",
705
+ ) -> list[IntentIssue]:
706
+ """Validate ``FilterParam`` entries against the schema.
707
+
708
+ Checks left and right column references, operator validity, and value-type validity, and also validates scalar functions on both sides.
709
+
710
+ Args:
711
+
712
+ filters_param: List of ``FilterParam`` instances to validate.
713
+ schema: The ``SchemaGraph``.
714
+ allowed_tables: Set of table names permitted in this context.
715
+ cte_outputs: Dict of CTE name to output column metadata.
716
+ context: Label used in issue IDs and messages.
717
+
718
+ Returns:
719
+
720
+ List of ``IntentIssue`` objects.
721
+ """
722
+ issues = []
723
+ if not filters_param:
724
+ return []
725
+ cte_outputs = cte_outputs or {}
726
+ for fp in filters_param:
727
+ param_key = fp.param_key or "unknown"
728
+ issues.extend(
729
+ _validate_filter_col(
730
+ fp.left_expr.primary_column,
731
+ schema,
732
+ allowed_tables,
733
+ cte_outputs,
734
+ context,
735
+ "left_col",
736
+ param_key,
737
+ )
738
+ )
739
+ if fp.right_expr:
740
+ issues.extend(
741
+ _validate_filter_col(
742
+ fp.right_expr.primary_column,
743
+ schema,
744
+ allowed_tables,
745
+ cte_outputs,
746
+ context,
747
+ "right_col",
748
+ param_key,
749
+ )
750
+ )
751
+ if fp.op not in VALID_FILTER_OPS:
752
+ issues.append(
753
+ IntentIssue(
754
+ issue_id=f"filter_invalid_op_{context}_{fp.op}",
755
+ category="filter_validity",
756
+ severity="error",
757
+ message=f"Invalid filter operator '{fp.op}' in {context}",
758
+ context={
759
+ "operator": fp.op,
760
+ "param_key": param_key,
761
+ "location": context,
762
+ },
763
+ )
764
+ )
765
+ if not fp.right_expr and fp.op not in ("is null", "is not null"):
766
+ if fp.value_type not in VALID_VALUE_TYPES:
767
+ issues.append(
768
+ IntentIssue(
769
+ issue_id=f"filter_invalid_value_type_{context}_{fp.value_type}",
770
+ category="filter_validity",
771
+ severity="error",
772
+ message=f"Invalid filter value_type '{fp.value_type}' in {context}",
773
+ context={
774
+ "value_type": fp.value_type,
775
+ "param_key": param_key,
776
+ "location": context,
777
+ },
778
+ )
779
+ )
780
+ fp_left_scalar, _ = extract_functions_from_term(fp.left_expr.primary_term)
781
+ fp_right_scalar, _ = extract_functions_from_term(fp.right_expr.primary_term) if fp.right_expr else (None, None)
782
+ issues.extend(_validate_scalar_func_valid(fp_left_scalar, f"filter_{param_key}_left", context))
783
+ issues.extend(_validate_scalar_func_valid(fp_right_scalar, f"filter_{param_key}_right", context))
784
+ issues.extend(
785
+ validate_expr_no_extract_epoch(
786
+ fp.left_expr, f"filter_{param_key}_left", context
787
+ )
788
+ )
789
+ if fp.right_expr:
790
+ issues.extend(
791
+ validate_expr_no_extract_epoch(
792
+ fp.right_expr, f"filter_{param_key}_right", context
793
+ )
794
+ )
795
+ if fp.bool_op not in ("AND", "OR"):
796
+ issues.append(
797
+ IntentIssue(
798
+ issue_id=f"filter_invalid_bool_op_{context}_{fp.bool_op}",
799
+ category="filter_validity",
800
+ severity="error",
801
+ message=f"Invalid filter bool_op '{fp.bool_op}' in {context}. Must be 'AND' or 'OR'.",
802
+ context={
803
+ "bool_op": fp.bool_op,
804
+ "param_key": param_key,
805
+ "location": context,
806
+ },
807
+ )
808
+ )
809
+ debug(f"[validation_schema.validate_filters_schema] {len(issues)} issues in {context}")
810
+ return issues
811
+
812
+
813
+ def extract_agg_col(agg_expr: str) -> tuple:
814
+ """Extract ``(func, target, has_distinct)`` from an aggregation expression.
815
+
816
+ For example, ``"COUNT(DISTINCT table.col)"`` returns ``("count", "table.col", True)``.
817
+
818
+ Args:
819
+
820
+ agg_expr: The aggregation expression string.
821
+
822
+ Returns:
823
+
824
+ Tuple of ``(func, target, has_distinct)`` or ``(None, None, False)`` if the expression does not match the expected ``FUNC(...)`` pattern.
825
+ """
826
+ if not agg_expr:
827
+ return (None, None, False)
828
+ match = re.match(r"^\s*(\w+)\s*\(\s*(.*)\s*\)\s*$", agg_expr, re.IGNORECASE)
829
+ if not match:
830
+ return (None, None, False)
831
+ func = match.group(1).lower()
832
+ inner = match.group(2).strip()
833
+ has_distinct = False
834
+ actual_target = inner
835
+ if inner.upper().startswith("DISTINCT "):
836
+ has_distinct = True
837
+ actual_target = inner[9:].strip()
838
+ actual_target = extract_col_from_scalar_wrapper(actual_target)
839
+ return (func, actual_target, has_distinct)
840
+
841
+
842
+ def extract_functions_from_term(term: str) -> tuple[str | None, str | None]:
843
+ """Extract the outer scalar and inner aggregation function names from a term string.
844
+
845
+ Handles patterns like ``"ROUND(SUM(table.col))"`` returning ``("round", "sum")`` and plain ``"SUM(table.col)"`` returning ``(None, "sum")``.
846
+
847
+ Args:
848
+
849
+ term: The expression term string.
850
+
851
+ Returns:
852
+
853
+ Tuple of ``(scalar_func, agg_func)`` where each is a lowercase function name string or ``None`` if not present.
854
+ """
855
+ result = extract_agg_col(term)
856
+ if len(result) != 3 or not result[0]:
857
+ return None, None
858
+ outer = result[0]
859
+ if outer in VALID_AGG_FUNCS:
860
+ return None, outer
861
+ inner_result = extract_agg_col(result[1]) if result[1] else (None, None, False)
862
+ if len(inner_result) == 3 and inner_result[0] and inner_result[0] in VALID_AGG_FUNCS:
863
+ return outer, inner_result[0]
864
+ return outer, None
865
+
866
+
867
+ def _validate_having_agg(
868
+ agg_expr: str,
869
+ schema: SchemaGraph,
870
+ allowed_tables: set[str],
871
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
872
+ context: str,
873
+ side: str,
874
+ param_key: str,
875
+ ) -> list[IntentIssue]:
876
+ """Validate a single HAVING aggregation expression (left or right side).
877
+
878
+ Args:
879
+
880
+ agg_expr: The aggregation expression string (for example ``"COUNT(table.col)"``).
881
+ schema: The ``SchemaGraph``.
882
+ allowed_tables: Set of table names permitted in this context.
883
+ cte_outputs: Dict of CTE name to output column metadata.
884
+ context: Label used in issue IDs and messages.
885
+ side: ``"left_agg"`` or ``"right_agg"``.
886
+ param_key: The ``param_key`` of the ``HavingParam`` (for issue IDs).
887
+
888
+ Returns:
889
+
890
+ List of ``IntentIssue`` objects.
891
+ """
892
+ issues = []
893
+ if not agg_expr:
894
+ return issues
895
+ cte_col_match = re.match(r"^\s*(\w+)\s*\.\s*(\w+)\s*$", agg_expr.strip())
896
+ if cte_col_match:
897
+ cte_name, col_name = cte_col_match.group(1), cte_col_match.group(2)
898
+ cte_cols = cte_outputs.get(cte_name, {})
899
+ col_meta = next(
900
+ (v for k, v in cte_cols.items() if k.lower() == col_name.lower()),
901
+ None,
902
+ )
903
+ if col_meta and (
904
+ col_meta.source == "aggregation" or (col_meta.agg_func or "").strip()
905
+ ):
906
+ return issues
907
+ result = extract_agg_col(agg_expr)
908
+ if len(result) != 3:
909
+ issues.append(
910
+ IntentIssue(
911
+ issue_id=f"having_{side}_invalid_format_{context}",
912
+ category="having_validity",
913
+ severity="error",
914
+ message=f"Invalid aggregation format in HAVING {side}: '{agg_expr}' in {context}",
915
+ context={
916
+ "aggregation": agg_expr,
917
+ "side": side,
918
+ "param_key": param_key,
919
+ "location": context,
920
+ },
921
+ )
922
+ )
923
+ return issues
924
+ func, actual_target, has_distinct = result
925
+ if not func:
926
+ issues.append(
927
+ IntentIssue(
928
+ issue_id=f"having_{side}_invalid_format_{context}_{agg_expr}",
929
+ category="having_validity",
930
+ severity="error",
931
+ message=f"Invalid aggregation format in HAVING {side}: '{agg_expr}' in {context}",
932
+ context={
933
+ "aggregation": agg_expr,
934
+ "side": side,
935
+ "param_key": param_key,
936
+ "location": context,
937
+ },
938
+ )
939
+ )
940
+ return issues
941
+ if func not in VALID_AGGREGATION_FUNCTIONS:
942
+ issues.append(
943
+ IntentIssue(
944
+ issue_id=f"having_{side}_invalid_func_{context}_{func}",
945
+ category="having_validity",
946
+ severity="error",
947
+ message=f"Invalid aggregation function '{func}' in HAVING {side} for {context}",
948
+ context={
949
+ "function": func,
950
+ "side": side,
951
+ "param_key": param_key,
952
+ "location": context,
953
+ },
954
+ )
955
+ )
956
+ return issues
957
+ if has_distinct and func != "count":
958
+ issues.append(
959
+ IntentIssue(
960
+ issue_id=f"having_{side}_distinct_not_count_{context}_{func}",
961
+ category="having_validity",
962
+ severity="error",
963
+ message=f"DISTINCT only allowed with COUNT in HAVING, not {func.upper()} in {context}",
964
+ context={
965
+ "function": func,
966
+ "aggregation": agg_expr,
967
+ "side": side,
968
+ "param_key": param_key,
969
+ "location": context,
970
+ },
971
+ )
972
+ )
973
+ return issues
974
+ if actual_target == "*":
975
+ if func != "count":
976
+ issues.append(
977
+ IntentIssue(
978
+ issue_id=f"having_{side}_star_not_count_{context}_{func}",
979
+ category="having_validity",
980
+ severity="error",
981
+ message=f"'*' only allowed with COUNT in HAVING, not {func.upper()} in {context}",
982
+ context={
983
+ "function": func,
984
+ "side": side,
985
+ "param_key": param_key,
986
+ "location": context,
987
+ },
988
+ )
989
+ )
990
+ return issues
991
+ if "." not in actual_target:
992
+ issues.append(
993
+ IntentIssue(
994
+ issue_id=f"having_{side}_unqualified_{context}_{actual_target}",
995
+ category="having_validity",
996
+ severity="error",
997
+ message=f"HAVING aggregation target must be qualified as table.column, got '{actual_target}' in {context}",
998
+ context={
999
+ "target": actual_target,
1000
+ "side": side,
1001
+ "param_key": param_key,
1002
+ "location": context,
1003
+ },
1004
+ )
1005
+ )
1006
+ return issues
1007
+ table_name, col_name = actual_target.rsplit(".", 1)
1008
+ if table_name in cte_outputs:
1009
+ if col_name.lower() not in [c.lower() for c in cte_outputs[table_name]]:
1010
+ issues.append(
1011
+ IntentIssue(
1012
+ issue_id=f"having_{side}_cte_col_not_found_{context}_{table_name}_{col_name}",
1013
+ category="having_validity",
1014
+ severity="error",
1015
+ message=f"Column '{col_name}' not in CTE '{table_name}' outputs for HAVING {side} in {context}",
1016
+ context={
1017
+ "table": table_name,
1018
+ "column": col_name,
1019
+ "side": side,
1020
+ "param_key": param_key,
1021
+ "location": context,
1022
+ },
1023
+ )
1024
+ )
1025
+ return issues
1026
+ if table_name not in allowed_tables:
1027
+ issues.append(
1028
+ IntentIssue(
1029
+ issue_id=f"having_{side}_table_not_allowed_{context}_{table_name}",
1030
+ category="having_validity",
1031
+ severity="error",
1032
+ message=f"Table '{table_name}' not in allowed tables for HAVING {side} in {context}",
1033
+ context={
1034
+ "table": table_name,
1035
+ "side": side,
1036
+ "param_key": param_key,
1037
+ "location": context,
1038
+ },
1039
+ )
1040
+ )
1041
+ return issues
1042
+ if table_name in schema.tables:
1043
+ table_meta = schema.tables[table_name]
1044
+ col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
1045
+ if not col_meta:
1046
+ issues.append(
1047
+ IntentIssue(
1048
+ issue_id=f"having_{side}_col_not_found_{context}_{table_name}_{col_name}",
1049
+ category="having_validity",
1050
+ severity="error",
1051
+ message=f"Column '{col_name}' not in table '{table_name}' for HAVING {side} in {context}",
1052
+ context={
1053
+ "table": table_name,
1054
+ "column": col_name,
1055
+ "side": side,
1056
+ "param_key": param_key,
1057
+ "location": context,
1058
+ },
1059
+ )
1060
+ )
1061
+ elif func != "count":
1062
+ value_type = col_meta.value_type or "string"
1063
+ allowed_types = AGGREGATION_ALLOWED_COLUMN_TYPES.get(func, [])
1064
+ if value_type not in allowed_types:
1065
+ issues.append(
1066
+ IntentIssue(
1067
+ issue_id=f"having_{side}_type_mismatch_{context}_{func}_{col_name}",
1068
+ category="having_validity",
1069
+ severity="error",
1070
+ message=f"Cannot use {func.upper()} on column '{actual_target}' of type '{col_meta.data_type}' in HAVING {side} for {context}",
1071
+ context={
1072
+ "function": func,
1073
+ "column": actual_target,
1074
+ "column_type": col_meta.data_type,
1075
+ "side": side,
1076
+ "param_key": param_key,
1077
+ "location": context,
1078
+ },
1079
+ )
1080
+ )
1081
+ return issues
1082
+
1083
+
1084
+ def _reconstruct_agg_expr(expr: NormalizedExpr) -> str:
1085
+ """Reconstruct an aggregation expression string from a ``NormalizedExpr``.
1086
+
1087
+ The ``NormalizedExpr`` decomposition strips the wrapping aggregation function, storing it on the ``MulGroup`` or expression level; this helper reassembles the canonical ``FUNC(column)`` string that ``_validate_having_agg`` expects.
1088
+
1089
+ Args:
1090
+
1091
+ expr: The ``NormalizedExpr`` from a ``HavingParam`` side.
1092
+
1093
+ Returns:
1094
+
1095
+ Reassembled expression such as ``"COUNT(orders.order_id)"``, or the bare ``primary_term`` when no aggregation is present.
1096
+ """
1097
+ agg_func = expr.agg_func
1098
+ if not agg_func and expr.add_groups:
1099
+ agg_func = expr.add_groups[0].agg_func
1100
+ column = expr.primary_term
1101
+ if not agg_func:
1102
+ return column
1103
+ return f"{agg_func.upper()}({column})"
1104
+
1105
+
1106
+ def validate_having_schema(
1107
+ having_param: list[HavingParam],
1108
+ schema: SchemaGraph,
1109
+ allowed_tables: set[str],
1110
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
1111
+ context: str = "main",
1112
+ ) -> list[IntentIssue]:
1113
+ """Validate ``HavingParam`` entries against the schema.
1114
+
1115
+ Checks left and right aggregation expressions, operator validity, value-type validity, and scalar functions on both sides.
1116
+
1117
+ Args:
1118
+
1119
+ having_param: List of ``HavingParam`` instances to validate.
1120
+ schema: The ``SchemaGraph``.
1121
+ allowed_tables: Set of table names permitted in this context.
1122
+ cte_outputs: Dict of CTE name to output column metadata.
1123
+ context: Label used in issue IDs and messages.
1124
+
1125
+ Returns:
1126
+
1127
+ List of ``IntentIssue`` objects.
1128
+ """
1129
+ issues = []
1130
+ if not having_param:
1131
+ return []
1132
+ cte_outputs = cte_outputs or {}
1133
+ for hp in having_param:
1134
+ param_key = hp.param_key or "unknown"
1135
+ issues.extend(
1136
+ _validate_having_agg(
1137
+ _reconstruct_agg_expr(hp.left_expr),
1138
+ schema,
1139
+ allowed_tables,
1140
+ cte_outputs,
1141
+ context,
1142
+ "left_agg",
1143
+ param_key,
1144
+ )
1145
+ )
1146
+ if hp.right_expr:
1147
+ issues.extend(
1148
+ _validate_having_agg(
1149
+ _reconstruct_agg_expr(hp.right_expr),
1150
+ schema,
1151
+ allowed_tables,
1152
+ cte_outputs,
1153
+ context,
1154
+ "right_agg",
1155
+ param_key,
1156
+ )
1157
+ )
1158
+ if hp.op not in VALID_HAVING_OPS:
1159
+ issues.append(
1160
+ IntentIssue(
1161
+ issue_id=f"having_invalid_op_{context}_{hp.op}",
1162
+ category="having_validity",
1163
+ severity="error",
1164
+ message=f"Invalid HAVING operator '{hp.op}' in {context}",
1165
+ context={
1166
+ "operator": hp.op,
1167
+ "param_key": param_key,
1168
+ "location": context,
1169
+ },
1170
+ )
1171
+ )
1172
+ if not hp.right_expr:
1173
+ if hp.value_type not in VALID_VALUE_TYPES:
1174
+ issues.append(
1175
+ IntentIssue(
1176
+ issue_id=f"having_invalid_value_type_{context}_{hp.value_type}",
1177
+ category="having_validity",
1178
+ severity="error",
1179
+ message=f"Invalid HAVING value_type '{hp.value_type}' in {context}",
1180
+ context={
1181
+ "value_type": hp.value_type,
1182
+ "param_key": param_key,
1183
+ "location": context,
1184
+ },
1185
+ )
1186
+ )
1187
+ if hp.op not in ("is null", "is not null") and hp.raw_value is None:
1188
+ issues.append(
1189
+ IntentIssue(
1190
+ issue_id=f"having_missing_value_{context}_{param_key}",
1191
+ category="having_validity",
1192
+ severity="error",
1193
+ message=(
1194
+ f"HAVING parameter '{param_key}' has no comparison value in {context}. "
1195
+ "Provide a numeric or string raw_value."
1196
+ ),
1197
+ context={
1198
+ "param_key": param_key,
1199
+ "op": hp.op,
1200
+ "location": context,
1201
+ },
1202
+ )
1203
+ )
1204
+ hp_left_scalar, _ = extract_functions_from_term(hp.left_expr.primary_term)
1205
+ hp_right_scalar, _ = extract_functions_from_term(hp.right_expr.primary_term) if hp.right_expr else (None, None)
1206
+ issues.extend(_validate_scalar_func_valid(hp_left_scalar, f"having_{param_key}_left", context))
1207
+ issues.extend(_validate_scalar_func_valid(hp_right_scalar, f"having_{param_key}_right", context))
1208
+ issues.extend(
1209
+ validate_expr_no_extract_epoch(
1210
+ hp.left_expr, f"having_{param_key}_left", context
1211
+ )
1212
+ )
1213
+ if hp.right_expr:
1214
+ issues.extend(
1215
+ validate_expr_no_extract_epoch(
1216
+ hp.right_expr, f"having_{param_key}_right", context
1217
+ )
1218
+ )
1219
+ if hp.bool_op not in ("AND", "OR"):
1220
+ issues.append(
1221
+ IntentIssue(
1222
+ issue_id=f"having_invalid_bool_op_{context}_{hp.bool_op}",
1223
+ category="having_validity",
1224
+ severity="error",
1225
+ message=f"Invalid HAVING bool_op '{hp.bool_op}' in {context}. Must be 'AND' or 'OR'.",
1226
+ context={
1227
+ "bool_op": hp.bool_op,
1228
+ "param_key": param_key,
1229
+ "location": context,
1230
+ },
1231
+ )
1232
+ )
1233
+ debug(f"[validation_schema.validate_having_schema] {len(issues)} issues in {context}")
1234
+ return issues
1235
+
1236
+
1237
+ def validate_filter_ops_per_column(
1238
+ filters_param: list[FilterParam],
1239
+ schema: SchemaGraph,
1240
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
1241
+ context: str = "main",
1242
+ ) -> list[IntentIssue]:
1243
+ """Validate that filter operators are valid for each column's data type and role.
1244
+
1245
+ Args:
1246
+
1247
+ filters_param: List of ``FilterParam`` instances to validate.
1248
+ schema: The ``SchemaGraph``.
1249
+ cte_outputs: Dict of CTE name to output column metadata.
1250
+ context: Label used in issue IDs and messages.
1251
+
1252
+ Returns:
1253
+
1254
+ List of ``IntentIssue`` objects.
1255
+ """
1256
+ issues = []
1257
+ if not filters_param:
1258
+ return []
1259
+ cte_outputs = cte_outputs or {}
1260
+ for fp in filters_param:
1261
+ col_expr = fp.left_expr.primary_column
1262
+ if not col_expr:
1263
+ continue
1264
+ actual_col = extract_col_from_scalar_wrapper(col_expr)
1265
+ if "." not in actual_col:
1266
+ continue
1267
+ table_name, col_name = actual_col.rsplit(".", 1)
1268
+ if table_name in cte_outputs:
1269
+ cte_cols = cte_outputs[table_name]
1270
+ matched_key = next((c for c in cte_cols if c.lower() == col_name.lower()), None)
1271
+ if matched_key:
1272
+ cte_meta = cte_cols[matched_key]
1273
+ valid_ops = cte_meta.get_valid_filter_ops()
1274
+ if valid_ops and fp.op not in valid_ops:
1275
+ issues.append(
1276
+ IntentIssue(
1277
+ issue_id=f"filter_op_invalid_for_cte_{context}_{actual_col}_{fp.op}",
1278
+ category="filter_validity",
1279
+ severity="error",
1280
+ message=f"Operator '{fp.op}' not valid for CTE column '{actual_col}' (role={cte_meta.role}, type={cte_meta.data_type}) in {context}. Valid: {valid_ops}",
1281
+ context={
1282
+ "column": actual_col,
1283
+ "operator": fp.op,
1284
+ "role": cte_meta.role,
1285
+ "data_type": cte_meta.data_type,
1286
+ "valid_ops": valid_ops,
1287
+ "location": context,
1288
+ },
1289
+ )
1290
+ )
1291
+ if not cte_meta.filterable and fp.op not in ("is null", "is not null"):
1292
+ issues.append(
1293
+ IntentIssue(
1294
+ issue_id=f"filter_cte_col_not_filterable_{context}_{actual_col}",
1295
+ category="filter_validity",
1296
+ severity="warning",
1297
+ message=f"CTE column '{actual_col}' (role={cte_meta.role}) is not recommended for filtering in {context}",
1298
+ context={
1299
+ "column": actual_col,
1300
+ "role": cte_meta.role,
1301
+ "location": context,
1302
+ },
1303
+ )
1304
+ )
1305
+ continue
1306
+ if table_name not in schema.tables:
1307
+ continue
1308
+ table_meta = schema.tables[table_name]
1309
+ col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
1310
+ if not col_meta:
1311
+ continue
1312
+ valid_ops = col_meta.get_valid_filter_ops()
1313
+ if fp.op not in valid_ops:
1314
+ issues.append(
1315
+ IntentIssue(
1316
+ issue_id=f"filter_op_invalid_for_type_{context}_{actual_col}_{fp.op}",
1317
+ category="filter_validity",
1318
+ severity="error",
1319
+ message=f"Operator '{fp.op}' not valid for column '{actual_col}' (role={col_meta.role}, type={col_meta.data_type}) in {context}. Valid: {valid_ops}",
1320
+ context={
1321
+ "column": actual_col,
1322
+ "operator": fp.op,
1323
+ "role": col_meta.role,
1324
+ "data_type": col_meta.data_type,
1325
+ "valid_ops": valid_ops,
1326
+ "location": context,
1327
+ },
1328
+ )
1329
+ )
1330
+ if not col_meta.is_filterable and fp.op not in ("is null", "is not null"):
1331
+ issues.append(
1332
+ IntentIssue(
1333
+ issue_id=f"filter_col_not_filterable_{context}_{actual_col}",
1334
+ category="filter_validity",
1335
+ severity="warning",
1336
+ message=f"Column '{actual_col}' (role={col_meta.role}) is not recommended for filtering in {context}",
1337
+ context={
1338
+ "column": actual_col,
1339
+ "role": col_meta.role,
1340
+ "location": context,
1341
+ },
1342
+ )
1343
+ )
1344
+ debug(f"[validation_schema.validate_filter_ops_per_column] {len(issues)} issues in {context}")
1345
+ return issues
1346
+
1347
+
1348
+ def validate_having_ops_per_column(
1349
+ having_param: list[HavingParam],
1350
+ schema: SchemaGraph,
1351
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
1352
+ context: str = "main",
1353
+ ) -> list[IntentIssue]:
1354
+ """Validate that HAVING operators are valid for each column's type and role."""
1355
+ issues: list[IntentIssue] = []
1356
+ if not having_param:
1357
+ return []
1358
+ cte_outputs = cte_outputs or {}
1359
+ for hp in having_param:
1360
+
1361
+ def _check_expr(term: str, _hp: Any = hp) -> None:
1362
+ result = extract_agg_col(term)
1363
+ if len(result) == 3 and result[0] and result[1] and "." in result[1]:
1364
+ _, actual_target, _ = result
1365
+ table_name, col_name = actual_target.rsplit(".", 1)
1366
+ else:
1367
+ col = extract_col_from_scalar_wrapper(term)
1368
+ if "." not in col:
1369
+ return
1370
+ table_name, col_name = col.rsplit(".", 1)
1371
+ if table_name in cte_outputs:
1372
+ cte_cols = cte_outputs[table_name]
1373
+ cte_meta = cte_cols.get(col_name) or cte_cols.get(col_name.lower())
1374
+ if cte_meta:
1375
+ valid_ops = cte_meta.get_valid_having_ops()
1376
+ if valid_ops and _hp.op not in valid_ops:
1377
+ actual_col = f"{table_name}.{col_name}"
1378
+ issues.append(
1379
+ IntentIssue(
1380
+ issue_id=f"having_op_invalid_for_cte_{context}_{actual_col}_{_hp.op}",
1381
+ category="having_validity",
1382
+ severity="error",
1383
+ message=f"Operator '{_hp.op}' not valid for CTE column '{actual_col}' (role={cte_meta.role}, type={cte_meta.data_type}) in {context}. Valid: {valid_ops}",
1384
+ context={
1385
+ "column": actual_col,
1386
+ "operator": _hp.op,
1387
+ "role": cte_meta.role,
1388
+ "data_type": cte_meta.data_type,
1389
+ "valid_ops": valid_ops,
1390
+ "location": context,
1391
+ },
1392
+ )
1393
+ )
1394
+ elif table_name in schema.tables:
1395
+ tbl = schema.tables[table_name]
1396
+ col_meta = tbl.columns.get(col_name) or tbl.columns.get(col_name.lower())
1397
+ if col_meta:
1398
+ valid_ops = col_meta.get_valid_having_ops()
1399
+ if valid_ops and _hp.op not in valid_ops:
1400
+ actual_col = f"{table_name}.{col_name}"
1401
+ issues.append(
1402
+ IntentIssue(
1403
+ issue_id=f"having_op_invalid_for_column_{context}_{actual_col}_{_hp.op}",
1404
+ category="having_validity",
1405
+ severity="error",
1406
+ message=f"Operator '{_hp.op}' not valid for column '{actual_col}' (role={col_meta.role}, type={col_meta.data_type}) in {context}. Valid: {valid_ops}",
1407
+ context={
1408
+ "column": actual_col,
1409
+ "operator": _hp.op,
1410
+ "role": col_meta.role,
1411
+ "data_type": col_meta.data_type,
1412
+ "valid_ops": valid_ops,
1413
+ "location": context,
1414
+ },
1415
+ )
1416
+ )
1417
+
1418
+ _check_expr(hp.left_expr.primary_term)
1419
+ if hp.right_expr:
1420
+ _check_expr(hp.right_expr.primary_term)
1421
+ if issues:
1422
+ debug(f"[validation_schema.validate_having_ops_per_column] {len(issues)} issues in {context}")
1423
+ return issues
1424
+
1425
+
1426
+ def validate_date_window_units(
1427
+ filters_param: list[FilterParam],
1428
+ cte_steps: list[RuntimeCteStep] | None = None,
1429
+ context: str = "main",
1430
+ ) -> list[IntentIssue]:
1431
+ """Validate that ``date_window`` filters have a valid unit.
1432
+
1433
+ Args:
1434
+
1435
+ filters_param: List of ``FilterParam`` instances for the main query.
1436
+ cte_steps: Optional list of CTE steps whose filters are also checked.
1437
+ context: Label used in issue IDs and messages.
1438
+
1439
+ Returns:
1440
+
1441
+ List of ``IntentIssue`` objects for invalid ``date_window`` units.
1442
+ """
1443
+ issues: list[IntentIssue] = []
1444
+ cte_steps = cte_steps or []
1445
+
1446
+ def check(fp: FilterParam, loc: str) -> None:
1447
+ if fp.value_type != "date_window":
1448
+ return
1449
+ if not isinstance(fp.raw_value, dict):
1450
+ return
1451
+ unit = fp.raw_value.get("unit")
1452
+ if unit is None:
1453
+ return
1454
+ if unit not in VALID_DATE_WINDOW_UNITS:
1455
+ col = fp.left_expr.primary_column
1456
+ issues.append(
1457
+ IntentIssue(
1458
+ issue_id=f"date_window_invalid_unit_{context}_{col}",
1459
+ category="filter_validity",
1460
+ severity="error",
1461
+ message=f"{loc}: date_window filter on '{col}' has invalid unit '{unit}'. Valid: {sorted(VALID_DATE_WINDOW_UNITS)}",
1462
+ context={
1463
+ "column": col,
1464
+ "unit": unit,
1465
+ "valid_units": sorted(VALID_DATE_WINDOW_UNITS),
1466
+ "location": context,
1467
+ },
1468
+ )
1469
+ )
1470
+
1471
+ for fp in filters_param:
1472
+ check(fp, f"{context} filter")
1473
+ for cte in cte_steps:
1474
+ for fp in cte.filters_param or []:
1475
+ check(fp, f"CTE '{cte.cte_name}' filter")
1476
+
1477
+ if issues:
1478
+ debug(f"[validation_schema.validate_date_window_units] {len(issues)} invalid units")
1479
+ return issues
1480
+
1481
+
1482
+ def validate_date_diff_units(
1483
+ filters_param: list[FilterParam],
1484
+ cte_steps: list[RuntimeCteStep] | None = None,
1485
+ context: str = "main",
1486
+ ) -> list[IntentIssue]:
1487
+ """Validate that ``date_diff`` filters have a valid unit and amount.
1488
+
1489
+ Args:
1490
+
1491
+ filters_param: List of ``FilterParam`` instances for the main query.
1492
+ cte_steps: Optional list of CTE steps whose filters are also checked.
1493
+ context: Label used in issue IDs and messages.
1494
+
1495
+ Returns:
1496
+
1497
+ List of ``IntentIssue`` objects for invalid ``date_diff`` config.
1498
+ """
1499
+ issues: list[IntentIssue] = []
1500
+ cte_steps = cte_steps or []
1501
+
1502
+ def check(fp: FilterParam, loc: str) -> None:
1503
+ if fp.value_type != "date_diff":
1504
+ return
1505
+ if not isinstance(fp.raw_value, dict):
1506
+ return
1507
+ unit = fp.raw_value.get("unit")
1508
+ amount = fp.raw_value.get("amount")
1509
+ if unit is not None and unit not in VALID_DATE_DIFF_UNITS:
1510
+ col = fp.left_expr.primary_column or fp.left_expr.primary_term or fp.param_key or "expr"
1511
+ issues.append(
1512
+ IntentIssue(
1513
+ issue_id=f"date_diff_invalid_unit_{context}_{col}",
1514
+ category="date_diff",
1515
+ severity="error",
1516
+ message=f"{loc}: date_diff filter has invalid unit '{unit}'. Valid: {sorted(VALID_DATE_DIFF_UNITS)}",
1517
+ context={
1518
+ "column": col,
1519
+ "unit": unit,
1520
+ "valid_units": sorted(VALID_DATE_DIFF_UNITS),
1521
+ "location": context,
1522
+ },
1523
+ )
1524
+ )
1525
+ if amount is not None and not isinstance(amount, (int, float)):
1526
+ try:
1527
+ int(amount)
1528
+ except (TypeError, ValueError):
1529
+ col = fp.left_expr.primary_column or fp.left_expr.primary_term or fp.param_key or "expr"
1530
+ issues.append(
1531
+ IntentIssue(
1532
+ issue_id=f"date_diff_invalid_amount_{context}_{col}",
1533
+ category="date_diff",
1534
+ severity="error",
1535
+ message=f"{loc}: date_diff filter has non-numeric amount '{amount}'",
1536
+ context={"column": col, "amount": amount, "location": context},
1537
+ )
1538
+ )
1539
+
1540
+ for fp in filters_param:
1541
+ check(fp, f"{context} filter")
1542
+ for cte in cte_steps:
1543
+ for fp in cte.filters_param or []:
1544
+ check(fp, f"CTE '{cte.cte_name}' filter")
1545
+
1546
+ if issues:
1547
+ debug(f"[validation_schema.validate_date_diff_units] {len(issues)} invalid configs")
1548
+ return issues
1549
+
1550
+
1551
+ def validate_null_filters(
1552
+ filters_param: list[FilterParam],
1553
+ cte_steps: list[RuntimeCteStep] | None = None,
1554
+ context: str = "main",
1555
+ ) -> list[IntentIssue]:
1556
+ """Validate that ``IS NULL`` / ``IS NOT NULL`` filters have the correct ``value_type``.
1557
+
1558
+ Args:
1559
+
1560
+ filters_param: List of ``FilterParam`` instances for the main query.
1561
+ cte_steps: Optional list of CTE steps whose filters are also checked.
1562
+ context: Label used in issue IDs and messages.
1563
+
1564
+ Returns:
1565
+
1566
+ List of ``IntentIssue`` objects for any NULL filter with incorrect ``value_type``.
1567
+ """
1568
+ issues = []
1569
+ cte_steps = cte_steps or []
1570
+
1571
+ def check_filter(fp: FilterParam, loc: str) -> IntentIssue | None:
1572
+ if fp.op in ("is null", "is not null"):
1573
+ if fp.value_type and fp.value_type != "null":
1574
+ col = fp.left_expr.primary_column
1575
+ return IntentIssue(
1576
+ issue_id=f"null_filter_wrong_value_type_{col}",
1577
+ category="filter_structure",
1578
+ severity="error",
1579
+ message=f"{loc}: IS NULL filter on '{col}' should have value_type='null' or empty, got '{fp.value_type}'",
1580
+ context={
1581
+ "column": col,
1582
+ "op": fp.op,
1583
+ "expected_value_type": "null",
1584
+ "actual_value_type": fp.value_type,
1585
+ },
1586
+ )
1587
+ return None
1588
+
1589
+ for fp in filters_param:
1590
+ issue = check_filter(fp, f"{context} filter")
1591
+ if issue:
1592
+ issues.append(issue)
1593
+
1594
+ for cte in cte_steps:
1595
+ for fp in cte.filters_param or []:
1596
+ issue = check_filter(fp, f"CTE '{cte.cte_name}' filter")
1597
+ if issue:
1598
+ issues.append(issue)
1599
+
1600
+ if issues:
1601
+ debug(f"[validation_schema.validate_null_filters] FAILED with {len(issues)} issues")
1602
+ else:
1603
+ debug("[validation_schema.validate_null_filters] PASSED")
1604
+ return issues
1605
+
1606
+
1607
+ def validate_filter_value_type_alignment(
1608
+ filters_param: list[FilterParam],
1609
+ schema: SchemaGraph,
1610
+ context: str = "main",
1611
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
1612
+ ) -> list[IntentIssue]:
1613
+ issues: list[IntentIssue] = []
1614
+ cte_outputs = cte_outputs or {}
1615
+ for fp in filters_param:
1616
+ if fp.value_type not in {"string", "enum"}:
1617
+ continue
1618
+ if fp.raw_value is None:
1619
+ continue
1620
+ col = fp.left_expr.primary_column
1621
+ parts = col.split(".", 1) if "." in col else None
1622
+ if not parts:
1623
+ continue
1624
+ table_name, col_name = parts
1625
+ col_meta = schema.get_column(table_name, col_name)
1626
+ if col_meta:
1627
+ if col_meta.is_foreign_key and col_meta.value_type in {"integer", "number"}:
1628
+ issues.append(
1629
+ IntentIssue(
1630
+ issue_id=f"filter_string_on_fk_int_{table_name}_{col_name}_{context}",
1631
+ category="type_alignment",
1632
+ severity="warning",
1633
+ message=f"Filter on {col} uses string value '{fp.raw_value}' but column is a numeric FK in {context}. Filter should target the FK target table's descriptive column.",
1634
+ context={
1635
+ "column": col,
1636
+ "value": str(fp.raw_value),
1637
+ "value_type": fp.value_type,
1638
+ "column_type": col_meta.value_type,
1639
+ "location": context,
1640
+ },
1641
+ )
1642
+ )
1643
+ debug(f"[validation_schema.validate_filter_value_type_alignment] string value on FK int column {col}")
1644
+ continue
1645
+ if table_name in cte_outputs:
1646
+ cte_cols = cte_outputs[table_name]
1647
+ cte_meta = cte_cols.get(col_name) or cte_cols.get(col_name.lower())
1648
+ if cte_meta and cte_meta.value_type in {"integer", "number"}:
1649
+ issues.append(
1650
+ IntentIssue(
1651
+ issue_id=f"filter_string_on_cte_numeric_{table_name}_{col_name}_{context}",
1652
+ category="type_alignment",
1653
+ severity="warning",
1654
+ message=f"Filter on {col} uses string value '{fp.raw_value}' but CTE column is numeric ({cte_meta.value_type}) in {context}.",
1655
+ context={
1656
+ "column": col,
1657
+ "value": str(fp.raw_value),
1658
+ "value_type": fp.value_type,
1659
+ "column_type": cte_meta.value_type,
1660
+ "location": context,
1661
+ },
1662
+ )
1663
+ )
1664
+ return issues
1665
+
1666
+
1667
+ def validate_no_between_ops(
1668
+ filters_param: list[FilterParam],
1669
+ having_param: list[HavingParam],
1670
+ context: str = "main query",
1671
+ ) -> list[IntentIssue]:
1672
+ """Flag any remaining BETWEEN operators that were not decomposed.
1673
+
1674
+ After ``decompose_between_params`` all BETWEEN ops should be
1675
+ replaced by >= / <= pairs. This validator catches any that survived
1676
+ as errors so the semantic repair loop can instruct the LLM to
1677
+ rewrite them.
1678
+
1679
+ Args: filters_param: Filter conditions to inspect.
1680
+ having_param: Having conditions to inspect. context: Description
1681
+ of the query scope for issue messages.
1682
+
1683
+ Returns: List of ``IntentIssue`` objects, one per surviving
1684
+ BETWEEN op.
1685
+ """
1686
+ issues: list[IntentIssue] = []
1687
+ for fp in filters_param:
1688
+ if fp.op.lower() == "between":
1689
+ col = fp.left_expr.primary_column
1690
+ issues.append(
1691
+ IntentIssue(
1692
+ issue_id=f"filter_between_not_decomposed_{col}_{context}",
1693
+ category="operator",
1694
+ severity="error",
1695
+ message=(
1696
+ f"Filter on {col} still uses BETWEEN in {context}. "
1697
+ "Decompose into separate >= and <= conditions."
1698
+ ),
1699
+ context={"column": col, "op": fp.op, "location": context},
1700
+ )
1701
+ )
1702
+ for hp in having_param:
1703
+ if hp.op.lower() == "between":
1704
+ col = hp.left_expr.primary_column
1705
+ issues.append(
1706
+ IntentIssue(
1707
+ issue_id=f"having_between_not_decomposed_{col}_{context}",
1708
+ category="operator",
1709
+ severity="error",
1710
+ message=(
1711
+ f"Having on {col} still uses BETWEEN in {context}. "
1712
+ "Decompose into separate >= and <= conditions."
1713
+ ),
1714
+ context={"column": col, "op": hp.op, "location": context},
1715
+ )
1716
+ )
1717
+ if issues:
1718
+ debug(f"[validation_schema.validate_no_between_ops] FAILED with {len(issues)} issues in {context}")
1719
+ return issues
1720
+
1721
+
1722
+ def get_col_type(
1723
+ col_expr: str,
1724
+ schema: SchemaGraph,
1725
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
1726
+ ) -> str | None:
1727
+ """Get column ``value_type`` from schema or CTE output metadata.
1728
+
1729
+ Args:
1730
+
1731
+ col_expr: Column reference, optionally wrapped in scalar function calls.
1732
+ schema: Schema graph containing table and column metadata.
1733
+ cte_outputs: Map of CTE name to column output metadata.
1734
+
1735
+ Returns:
1736
+
1737
+ The ``value_type`` string for the resolved column, or ``None`` if it cannot be resolved.
1738
+ """
1739
+ actual_col = extract_col_from_scalar_wrapper(col_expr)
1740
+ if "." not in actual_col:
1741
+ return None
1742
+ table_name, col_name = actual_col.rsplit(".", 1)
1743
+ if table_name in cte_outputs:
1744
+ meta = cte_outputs[table_name].get(col_name) or cte_outputs[table_name].get(col_name.lower())
1745
+ return meta.value_type if meta else None
1746
+ if table_name not in schema.tables:
1747
+ return None
1748
+ table_meta = schema.tables[table_name]
1749
+ col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
1750
+ if not col_meta:
1751
+ return None
1752
+ return col_meta.value_type
1753
+
1754
+
1755
+ def get_col_meta(
1756
+ col_expr: str,
1757
+ schema: SchemaGraph,
1758
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
1759
+ ) -> Any | None:
1760
+ """Get column metadata from the schema graph or synthesise it from CTE output metadata.
1761
+
1762
+ Args:
1763
+
1764
+ col_expr: Column reference, optionally wrapped in scalar function calls.
1765
+ schema: Schema graph containing table and column metadata.
1766
+ cte_outputs: Map of CTE name to column output metadata.
1767
+
1768
+ Returns:
1769
+
1770
+ A ``ColumnMetadata`` instance (real or synthetic) for the resolved column, or ``None`` if the column cannot be resolved.
1771
+ """
1772
+ actual_col = extract_col_from_scalar_wrapper(col_expr)
1773
+ if "." not in actual_col:
1774
+ return None
1775
+ table_name, col_name = actual_col.rsplit(".", 1)
1776
+ if table_name in cte_outputs:
1777
+ cte_meta = cte_outputs[table_name].get(col_name) or cte_outputs[table_name].get(col_name.lower())
1778
+ if not cte_meta:
1779
+ return None
1780
+ return ColumnMetadata(
1781
+ name=col_name,
1782
+ data_type=cte_meta.data_type or "unknown",
1783
+ role=cte_meta.role,
1784
+ is_filterable_override=cte_meta.filterable,
1785
+ is_aggregatable_override=cte_meta.aggregatable,
1786
+ is_groupable_override=cte_meta.groupable,
1787
+ valid_filter_ops=list(cte_meta.valid_filter_ops or []),
1788
+ valid_aggregations=list(cte_meta.valid_aggregations or []),
1789
+ valid_having_ops=list(cte_meta.valid_having_ops or []),
1790
+ )
1791
+ if table_name not in schema.tables:
1792
+ return None
1793
+ table_meta = schema.tables[table_name]
1794
+ return table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
1795
+
1796
+
1797
+ def is_col_numeric(
1798
+ col_ref: str,
1799
+ schema: SchemaGraph,
1800
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
1801
+ ) -> bool | None:
1802
+ """Return whether a column's value type is numeric.
1803
+
1804
+ Args:
1805
+
1806
+ col_ref: Fully-qualified column reference (``table.column``).
1807
+ schema: Schema graph containing column type information.
1808
+ cte_outputs: Map of CTE name to column output metadata.
1809
+
1810
+ Returns:
1811
+
1812
+ ``True`` if the column type is numeric, ``False`` if it is not, or ``None`` if the column cannot be resolved.
1813
+ """
1814
+ col_type = get_col_type(col_ref, schema, cte_outputs)
1815
+ if col_type is None:
1816
+ return None
1817
+ return col_type in ("integer", "number")
1818
+
1819
+
1820
+ def is_col_arithmetic_role(
1821
+ col_ref: str,
1822
+ schema: SchemaGraph,
1823
+ cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
1824
+ ) -> bool | None:
1825
+ """Return whether a column's role permits its use in arithmetic expressions.
1826
+
1827
+ Args:
1828
+
1829
+ col_ref: Fully-qualified column reference (``table.column``).
1830
+ schema: Schema graph containing column role information.
1831
+ cte_outputs: Map of CTE name to column output metadata.
1832
+
1833
+ Returns:
1834
+
1835
+ ``True`` if the column role is ``NUMERIC_MEASURE`` or ``NUMERIC_CATEGORICAL``; ``None`` if the column cannot be resolved.
1836
+ """
1837
+ meta = get_col_meta(col_ref, schema, cte_outputs)
1838
+ if meta and meta.role:
1839
+ return meta.role in ARITHMETIC_ROLES
1840
+ actual_col = extract_col_from_scalar_wrapper(col_ref)
1841
+ if "." in actual_col:
1842
+ table_name, col_name = actual_col.rsplit(".", 1)
1843
+ if table_name in cte_outputs:
1844
+ cte_meta = cte_outputs[table_name].get(col_name) or cte_outputs[table_name].get(col_name.lower())
1845
+ if cte_meta and cte_meta.role:
1846
+ return cte_meta.role in ARITHMETIC_ROLES
1847
+ return None