aetherdialect 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aetherdialect-0.1.0.dist-info/METADATA +197 -0
- aetherdialect-0.1.0.dist-info/RECORD +34 -0
- aetherdialect-0.1.0.dist-info/WHEEL +5 -0
- aetherdialect-0.1.0.dist-info/licenses/LICENSE +7 -0
- aetherdialect-0.1.0.dist-info/top_level.txt +1 -0
- text2sql/__init__.py +7 -0
- text2sql/config.py +1063 -0
- text2sql/contracts_base.py +952 -0
- text2sql/contracts_core.py +1890 -0
- text2sql/core_utils.py +834 -0
- text2sql/dialect.py +1134 -0
- text2sql/expansion_ops.py +1218 -0
- text2sql/expansion_rules.py +496 -0
- text2sql/intent_expr.py +1759 -0
- text2sql/intent_process.py +2133 -0
- text2sql/intent_repair.py +1733 -0
- text2sql/intent_resolve.py +1292 -0
- text2sql/live_testing.py +1117 -0
- text2sql/main_execution.py +799 -0
- text2sql/pipeline.py +1662 -0
- text2sql/qsim_ops.py +1286 -0
- text2sql/qsim_sample.py +609 -0
- text2sql/qsim_struct.py +569 -0
- text2sql/schema.py +973 -0
- text2sql/schema_profiling.py +2075 -0
- text2sql/simulator.py +970 -0
- text2sql/sql_gen.py +1537 -0
- text2sql/templates.py +1037 -0
- text2sql/text2sql.py +726 -0
- text2sql/utils.py +973 -0
- text2sql/validation_agg.py +1033 -0
- text2sql/validation_execute.py +1092 -0
- text2sql/validation_schema.py +1847 -0
- text2sql/validation_semantic.py +2122 -0
|
@@ -0,0 +1,1847 @@
|
|
|
1
|
+
"""Schema-level validation for intent fields against the database schema.
|
|
2
|
+
|
|
3
|
+
Validates ``select_cols``, ``order_by_cols``, ``group_by_cols``, ``filters_param``, and ``having_param`` for column existence, table qualification, valid operators, aggregation appropriateness, and scalar function type compatibility. Supports CTE output column tracking for cross-CTE validation. All validators return lists of ``IntentIssue`` objects and are called from both the main query and CTE chain validators.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .config import (
|
|
12
|
+
AGGREGATION_ALLOWED_COLUMN_TYPES,
|
|
13
|
+
ARITHMETIC_ROLES,
|
|
14
|
+
DISALLOWED_EXTRACT_UNITS,
|
|
15
|
+
VALID_AGG_FUNCS,
|
|
16
|
+
VALID_AGGREGATION_FUNCTIONS,
|
|
17
|
+
VALID_DATE_DIFF_UNITS,
|
|
18
|
+
VALID_DATE_WINDOW_UNITS,
|
|
19
|
+
VALID_FILTER_OPS,
|
|
20
|
+
VALID_HAVING_OPS,
|
|
21
|
+
VALID_SCALAR_FUNCTIONS,
|
|
22
|
+
VALID_VALUE_TYPES,
|
|
23
|
+
)
|
|
24
|
+
from .contracts_base import (
|
|
25
|
+
ColumnMetadata,
|
|
26
|
+
CteOutputColumnMeta,
|
|
27
|
+
IntentIssue,
|
|
28
|
+
SchemaGraph,
|
|
29
|
+
)
|
|
30
|
+
from .contracts_core import (
|
|
31
|
+
FilterParam,
|
|
32
|
+
HavingParam,
|
|
33
|
+
NormalizedExpr,
|
|
34
|
+
OrderByCol,
|
|
35
|
+
RuntimeCteStep,
|
|
36
|
+
SelectCol,
|
|
37
|
+
)
|
|
38
|
+
from .core_utils import debug
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _strip_distinct_prefix(col: str) -> str:
|
|
42
|
+
"""Remove a leading ``DISTINCT`` keyword from a column reference.
|
|
43
|
+
|
|
44
|
+
For example, ``"DISTINCT orders.order_id"`` returns ``"orders.order_id"``.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
|
|
48
|
+
col: A column expression string that may have a ``DISTINCT`` prefix.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
|
|
52
|
+
The column expression with any leading ``DISTINCT`` keyword removed.
|
|
53
|
+
"""
|
|
54
|
+
if col and col.upper().startswith("DISTINCT "):
|
|
55
|
+
return col[9:].strip()
|
|
56
|
+
return col
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def extract_col_from_scalar_wrapper(col_expr: str) -> str:
|
|
60
|
+
"""Strip a scalar function wrapper and any leading ``DISTINCT`` keyword, returning the inner column expression.
|
|
61
|
+
|
|
62
|
+
For example, ``"ABS(table.col)"`` returns ``"table.col"`` and ``"DISTINCT table.col"`` returns ``"table.col"``.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
|
|
66
|
+
col_expr: A column expression string, possibly wrapped in a scalar function and/or prefixed with ``DISTINCT``.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
|
|
70
|
+
The inner column expression string, or the original string if no recognised scalar wrapper is present.
|
|
71
|
+
"""
|
|
72
|
+
if not col_expr:
|
|
73
|
+
return col_expr
|
|
74
|
+
match = re.match(r"^\s*(\w+)\s*\(\s*(.+)\s*\)\s*$", col_expr, re.IGNORECASE)
|
|
75
|
+
if match:
|
|
76
|
+
func_name = match.group(1).lower()
|
|
77
|
+
if func_name in VALID_SCALAR_FUNCTIONS:
|
|
78
|
+
return _strip_distinct_prefix(match.group(2).strip())
|
|
79
|
+
return _strip_distinct_prefix(col_expr)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _validate_scalar_func_valid(scalar_func: str | None, context: str, location: str) -> list[IntentIssue]:
|
|
83
|
+
"""Validate that a scalar function name is in the allowed set.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
|
|
87
|
+
scalar_func: The scalar function name to validate, or ``None``.
|
|
88
|
+
context: A short label for the field being validated (for example ``"select_0"``).
|
|
89
|
+
location: Human-readable location string used in error messages.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
|
|
93
|
+
List of ``IntentIssue`` objects (empty if valid or if ``scalar_func`` is ``None``).
|
|
94
|
+
"""
|
|
95
|
+
issues = []
|
|
96
|
+
if not scalar_func:
|
|
97
|
+
return issues
|
|
98
|
+
func_lower = scalar_func.lower()
|
|
99
|
+
if func_lower not in VALID_SCALAR_FUNCTIONS:
|
|
100
|
+
issues.append(
|
|
101
|
+
IntentIssue(
|
|
102
|
+
issue_id=f"invalid_scalar_func_{context}_{scalar_func}",
|
|
103
|
+
category="scalar_validity",
|
|
104
|
+
severity="error",
|
|
105
|
+
message=f"Invalid scalar function '{scalar_func}' in {location}. Allowed: {', '.join(sorted(VALID_SCALAR_FUNCTIONS))}",
|
|
106
|
+
context={
|
|
107
|
+
"function": scalar_func,
|
|
108
|
+
"location": location,
|
|
109
|
+
"allowed": list(VALID_SCALAR_FUNCTIONS),
|
|
110
|
+
},
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
debug(f"[validation_schema.validate_scalar_func_valid] invalid scalar '{scalar_func}' in {location}")
|
|
114
|
+
return issues
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _first_arg_lower(args: list[Any]) -> str:
|
|
118
|
+
"""Return the first scalar arg lowercased, or empty string if none."""
|
|
119
|
+
if not args:
|
|
120
|
+
return ""
|
|
121
|
+
return str(args[0]).strip().lower()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _is_extract_epoch(func: str | None, args: list[Any]) -> bool:
|
|
125
|
+
"""Return True if func is extract and first arg is a disallowed unit (e.g. epoch)."""
|
|
126
|
+
if not func or func.lower() != "extract":
|
|
127
|
+
return False
|
|
128
|
+
unit = _first_arg_lower(args)
|
|
129
|
+
return unit in DISALLOWED_EXTRACT_UNITS
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def validate_expr_no_extract_epoch(
|
|
133
|
+
expr: NormalizedExpr,
|
|
134
|
+
context: str,
|
|
135
|
+
location: str,
|
|
136
|
+
) -> list[IntentIssue]:
|
|
137
|
+
"""Flag EXTRACT(EPOCH FROM ...) in expressions; EPOCH is not supported.
|
|
138
|
+
|
|
139
|
+
Walks expr and its add_groups/sub_groups and returns one error per
|
|
140
|
+
occurrence of extract with a disallowed unit.
|
|
141
|
+
"""
|
|
142
|
+
issues: list[IntentIssue] = []
|
|
143
|
+
if _is_extract_epoch(expr.scalar_func, expr.scalar_func_args or []):
|
|
144
|
+
issues.append(
|
|
145
|
+
IntentIssue(
|
|
146
|
+
issue_id=f"extract_epoch_{context}",
|
|
147
|
+
category="extract_epoch",
|
|
148
|
+
severity="error",
|
|
149
|
+
message=(
|
|
150
|
+
"EXTRACT(EPOCH FROM ...) is not supported. Use date column "
|
|
151
|
+
"subtraction or other supported date functions."
|
|
152
|
+
),
|
|
153
|
+
context={"location": location},
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
if _is_extract_epoch(expr.inner_scalar_func, expr.inner_scalar_func_args or []):
|
|
157
|
+
issues.append(
|
|
158
|
+
IntentIssue(
|
|
159
|
+
issue_id=f"extract_epoch_inner_{context}",
|
|
160
|
+
category="extract_epoch",
|
|
161
|
+
severity="error",
|
|
162
|
+
message=(
|
|
163
|
+
"EXTRACT(EPOCH FROM ...) is not supported. Use date column "
|
|
164
|
+
"subtraction or other supported date functions."
|
|
165
|
+
),
|
|
166
|
+
context={"location": location},
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
for group in expr.add_groups + expr.sub_groups:
|
|
170
|
+
if _is_extract_epoch(group.scalar_func, group.scalar_func_args or []):
|
|
171
|
+
issues.append(
|
|
172
|
+
IntentIssue(
|
|
173
|
+
issue_id=f"extract_epoch_group_{context}",
|
|
174
|
+
category="extract_epoch",
|
|
175
|
+
severity="error",
|
|
176
|
+
message=(
|
|
177
|
+
"EXTRACT(EPOCH FROM ...) is not supported. Use date column "
|
|
178
|
+
"subtraction or other supported date functions."
|
|
179
|
+
),
|
|
180
|
+
context={"location": location},
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
if _is_extract_epoch(group.inner_scalar_func, group.inner_scalar_func_args or []):
|
|
184
|
+
issues.append(
|
|
185
|
+
IntentIssue(
|
|
186
|
+
issue_id=f"extract_epoch_inner_group_{context}",
|
|
187
|
+
category="extract_epoch",
|
|
188
|
+
severity="error",
|
|
189
|
+
message=(
|
|
190
|
+
"EXTRACT(EPOCH FROM ...) is not supported. Use date column "
|
|
191
|
+
"subtraction or other supported date functions."
|
|
192
|
+
),
|
|
193
|
+
context={"location": location},
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
return issues
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _validate_agg_func_valid(agg_func: str | None, context: str, location: str) -> list[IntentIssue]:
|
|
200
|
+
"""Validate that an aggregation function name is in the allowed set.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
|
|
204
|
+
agg_func: The aggregation function name to validate, or ``None``.
|
|
205
|
+
context: A short label for the field being validated.
|
|
206
|
+
location: Human-readable location string used in error messages.
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
|
|
210
|
+
List of ``IntentIssue`` objects (empty if valid or if ``agg_func`` is ``None``).
|
|
211
|
+
"""
|
|
212
|
+
issues = []
|
|
213
|
+
if not agg_func:
|
|
214
|
+
return issues
|
|
215
|
+
func_lower = agg_func.lower()
|
|
216
|
+
if func_lower not in VALID_AGGREGATION_FUNCTIONS:
|
|
217
|
+
issues.append(
|
|
218
|
+
IntentIssue(
|
|
219
|
+
issue_id=f"invalid_agg_func_{context}_{agg_func}",
|
|
220
|
+
category="aggregation_validity",
|
|
221
|
+
severity="error",
|
|
222
|
+
message=f"Invalid aggregation function '{agg_func}' in {location}. Allowed: {', '.join(sorted(VALID_AGGREGATION_FUNCTIONS))}",
|
|
223
|
+
context={
|
|
224
|
+
"function": agg_func,
|
|
225
|
+
"location": location,
|
|
226
|
+
"allowed": list(VALID_AGGREGATION_FUNCTIONS),
|
|
227
|
+
},
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
debug(f"[validation_schema.validate_agg_func_valid] invalid agg '{agg_func}' in {location}")
|
|
231
|
+
return issues
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def validate_select_cols_schema(
|
|
235
|
+
select_cols: list[SelectCol],
|
|
236
|
+
schema: SchemaGraph,
|
|
237
|
+
allowed_tables: set[str],
|
|
238
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
|
|
239
|
+
context: str = "main",
|
|
240
|
+
) -> list[IntentIssue]:
|
|
241
|
+
"""Validate ``SelectCol`` entries against the schema for column existence and qualification.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
|
|
245
|
+
select_cols: List of ``SelectCol`` instances to validate.
|
|
246
|
+
schema: The ``SchemaGraph``.
|
|
247
|
+
allowed_tables: Set of table names permitted in this query context.
|
|
248
|
+
cte_outputs: Dict of CTE name to output column metadata for cross-CTE lookup.
|
|
249
|
+
context: Label used in issue IDs and messages (for example ``"main"`` or ``"CTE cte1"``).
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
|
|
253
|
+
List of ``IntentIssue`` objects.
|
|
254
|
+
"""
|
|
255
|
+
issues = []
|
|
256
|
+
if not select_cols:
|
|
257
|
+
issues.append(
|
|
258
|
+
IntentIssue(
|
|
259
|
+
issue_id=f"select_cols_empty_{context}",
|
|
260
|
+
category="select_validity",
|
|
261
|
+
severity="error",
|
|
262
|
+
message=f"select_cols cannot be empty in {context}",
|
|
263
|
+
context={"location": context},
|
|
264
|
+
)
|
|
265
|
+
)
|
|
266
|
+
return issues
|
|
267
|
+
cte_outputs = cte_outputs or {}
|
|
268
|
+
for idx, sc in enumerate(select_cols):
|
|
269
|
+
col_expr = sc.expr.primary_column
|
|
270
|
+
if not col_expr:
|
|
271
|
+
issues.append(
|
|
272
|
+
IntentIssue(
|
|
273
|
+
issue_id=f"select_col_empty_{context}_{idx}",
|
|
274
|
+
category="select_validity",
|
|
275
|
+
severity="error",
|
|
276
|
+
message=f"SelectCol at index {idx} has empty col in {context}",
|
|
277
|
+
context={"index": idx, "location": context},
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
continue
|
|
281
|
+
actual_col = extract_col_from_scalar_wrapper(col_expr)
|
|
282
|
+
if "." not in actual_col:
|
|
283
|
+
issues.append(
|
|
284
|
+
IntentIssue(
|
|
285
|
+
issue_id=f"select_unqualified_{context}_{actual_col}",
|
|
286
|
+
category="select_validity",
|
|
287
|
+
severity="error",
|
|
288
|
+
message=f"select_cols must be qualified as table.column, got '{actual_col}' in {context}",
|
|
289
|
+
context={"column": actual_col, "location": context},
|
|
290
|
+
)
|
|
291
|
+
)
|
|
292
|
+
continue
|
|
293
|
+
table_name, col_name = actual_col.rsplit(".", 1)
|
|
294
|
+
if table_name in cte_outputs:
|
|
295
|
+
if col_name.lower() not in [c.lower() for c in cte_outputs[table_name]]:
|
|
296
|
+
issues.append(
|
|
297
|
+
IntentIssue(
|
|
298
|
+
issue_id=f"select_cte_col_not_found_{context}_{table_name}_{col_name}",
|
|
299
|
+
category="select_validity",
|
|
300
|
+
severity="error",
|
|
301
|
+
message=f"Column '{col_name}' not in CTE '{table_name}' outputs for select in {context}",
|
|
302
|
+
context={
|
|
303
|
+
"table": table_name,
|
|
304
|
+
"column": col_name,
|
|
305
|
+
"location": context,
|
|
306
|
+
},
|
|
307
|
+
)
|
|
308
|
+
)
|
|
309
|
+
continue
|
|
310
|
+
if table_name not in allowed_tables:
|
|
311
|
+
issues.append(
|
|
312
|
+
IntentIssue(
|
|
313
|
+
issue_id=f"select_table_not_allowed_{context}_{table_name}",
|
|
314
|
+
category="select_validity",
|
|
315
|
+
severity="error",
|
|
316
|
+
message=f"Table '{table_name}' not in allowed tables for select in {context}",
|
|
317
|
+
context={"table": table_name, "location": context},
|
|
318
|
+
)
|
|
319
|
+
)
|
|
320
|
+
continue
|
|
321
|
+
if table_name in schema.tables:
|
|
322
|
+
table_meta = schema.tables[table_name]
|
|
323
|
+
col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
|
|
324
|
+
if not col_meta:
|
|
325
|
+
issues.append(
|
|
326
|
+
IntentIssue(
|
|
327
|
+
issue_id=f"select_col_not_found_{context}_{table_name}_{col_name}",
|
|
328
|
+
category="select_validity",
|
|
329
|
+
severity="error",
|
|
330
|
+
message=f"Column '{col_name}' not in table '{table_name}' for select in {context}",
|
|
331
|
+
context={
|
|
332
|
+
"table": table_name,
|
|
333
|
+
"column": col_name,
|
|
334
|
+
"location": context,
|
|
335
|
+
},
|
|
336
|
+
)
|
|
337
|
+
)
|
|
338
|
+
sc_scalar, sc_agg = extract_functions_from_term(sc.expr.primary_term)
|
|
339
|
+
issues.extend(_validate_agg_func_valid(sc_agg, f"select_{idx}", context))
|
|
340
|
+
issues.extend(_validate_scalar_func_valid(sc_scalar, f"select_{idx}", context))
|
|
341
|
+
issues.extend(
|
|
342
|
+
validate_expr_no_extract_epoch(sc.expr, f"select_{idx}", context)
|
|
343
|
+
)
|
|
344
|
+
debug(f"[validation_schema.validate_select_cols_schema] {len(issues)} issues in {context}")
|
|
345
|
+
return issues
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def validate_order_by_cols_schema(
|
|
349
|
+
order_by_cols: list[OrderByCol],
|
|
350
|
+
schema: SchemaGraph,
|
|
351
|
+
allowed_tables: set[str],
|
|
352
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
|
|
353
|
+
context: str = "main",
|
|
354
|
+
) -> list[IntentIssue]:
|
|
355
|
+
"""Validate ``OrderByCol`` entries against the schema for column existence and direction.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
|
|
359
|
+
order_by_cols: List of ``OrderByCol`` instances to validate.
|
|
360
|
+
schema: The ``SchemaGraph``.
|
|
361
|
+
allowed_tables: Set of table names permitted in this query context.
|
|
362
|
+
cte_outputs: Dict of CTE name to output column metadata.
|
|
363
|
+
context: Label used in issue IDs and messages.
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
|
|
367
|
+
List of ``IntentIssue`` objects.
|
|
368
|
+
"""
|
|
369
|
+
issues = []
|
|
370
|
+
if not order_by_cols:
|
|
371
|
+
return []
|
|
372
|
+
cte_outputs = cte_outputs or {}
|
|
373
|
+
for idx, obc in enumerate(order_by_cols):
|
|
374
|
+
col_expr = obc.expr.primary_column
|
|
375
|
+
if not col_expr:
|
|
376
|
+
issues.append(
|
|
377
|
+
IntentIssue(
|
|
378
|
+
issue_id=f"order_by_col_empty_{context}_{idx}",
|
|
379
|
+
category="order_by_validity",
|
|
380
|
+
severity="error",
|
|
381
|
+
message=f"OrderByCol at index {idx} has empty col in {context}",
|
|
382
|
+
context={"index": idx, "location": context},
|
|
383
|
+
)
|
|
384
|
+
)
|
|
385
|
+
continue
|
|
386
|
+
actual_col = extract_col_from_scalar_wrapper(col_expr)
|
|
387
|
+
if "." not in actual_col:
|
|
388
|
+
issues.append(
|
|
389
|
+
IntentIssue(
|
|
390
|
+
issue_id=f"order_by_unqualified_{context}_{actual_col}",
|
|
391
|
+
category="order_by_validity",
|
|
392
|
+
severity="error",
|
|
393
|
+
message=f"order_by_cols must be qualified as table.column, got '{actual_col}' in {context}",
|
|
394
|
+
context={"column": actual_col, "location": context},
|
|
395
|
+
)
|
|
396
|
+
)
|
|
397
|
+
continue
|
|
398
|
+
table_name, col_name = actual_col.rsplit(".", 1)
|
|
399
|
+
if table_name in cte_outputs:
|
|
400
|
+
if col_name.lower() not in [c.lower() for c in cte_outputs[table_name]]:
|
|
401
|
+
issues.append(
|
|
402
|
+
IntentIssue(
|
|
403
|
+
issue_id=f"order_by_cte_col_not_found_{context}_{table_name}_{col_name}",
|
|
404
|
+
category="order_by_validity",
|
|
405
|
+
severity="error",
|
|
406
|
+
message=f"Column '{col_name}' not in CTE '{table_name}' outputs for order_by in {context}",
|
|
407
|
+
context={
|
|
408
|
+
"table": table_name,
|
|
409
|
+
"column": col_name,
|
|
410
|
+
"location": context,
|
|
411
|
+
},
|
|
412
|
+
)
|
|
413
|
+
)
|
|
414
|
+
continue
|
|
415
|
+
if table_name not in allowed_tables:
|
|
416
|
+
issues.append(
|
|
417
|
+
IntentIssue(
|
|
418
|
+
issue_id=f"order_by_table_not_allowed_{context}_{table_name}",
|
|
419
|
+
category="order_by_validity",
|
|
420
|
+
severity="error",
|
|
421
|
+
message=f"Table '{table_name}' not in allowed tables for order_by in {context}",
|
|
422
|
+
context={"table": table_name, "location": context},
|
|
423
|
+
)
|
|
424
|
+
)
|
|
425
|
+
continue
|
|
426
|
+
if table_name in schema.tables:
|
|
427
|
+
table_meta = schema.tables[table_name]
|
|
428
|
+
col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
|
|
429
|
+
if not col_meta:
|
|
430
|
+
issues.append(
|
|
431
|
+
IntentIssue(
|
|
432
|
+
issue_id=f"order_by_col_not_found_{context}_{table_name}_{col_name}",
|
|
433
|
+
category="order_by_validity",
|
|
434
|
+
severity="error",
|
|
435
|
+
message=f"Column '{col_name}' not in table '{table_name}' for order_by in {context}",
|
|
436
|
+
context={
|
|
437
|
+
"table": table_name,
|
|
438
|
+
"column": col_name,
|
|
439
|
+
"location": context,
|
|
440
|
+
},
|
|
441
|
+
)
|
|
442
|
+
)
|
|
443
|
+
if obc.direction not in ("ASC", "DESC"):
|
|
444
|
+
issues.append(
|
|
445
|
+
IntentIssue(
|
|
446
|
+
issue_id=f"order_by_invalid_direction_{context}_{idx}",
|
|
447
|
+
category="order_by_validity",
|
|
448
|
+
severity="error",
|
|
449
|
+
message=f"OrderByCol direction must be 'ASC' or 'DESC', got '{obc.direction}' in {context}",
|
|
450
|
+
context={"direction": obc.direction, "location": context},
|
|
451
|
+
)
|
|
452
|
+
)
|
|
453
|
+
obc_scalar, obc_agg = extract_functions_from_term(obc.expr.primary_term)
|
|
454
|
+
issues.extend(_validate_agg_func_valid(obc_agg, f"order_by_{idx}", context))
|
|
455
|
+
issues.extend(_validate_scalar_func_valid(obc_scalar, f"order_by_{idx}", context))
|
|
456
|
+
issues.extend(
|
|
457
|
+
validate_expr_no_extract_epoch(obc.expr, f"order_by_{idx}", context)
|
|
458
|
+
)
|
|
459
|
+
debug(f"[validation_schema.validate_order_by_cols_schema] {len(issues)} issues in {context}")
|
|
460
|
+
return issues
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def validate_group_by_cols_schema(
|
|
464
|
+
group_by_cols: list[NormalizedExpr],
|
|
465
|
+
schema: SchemaGraph,
|
|
466
|
+
allowed_tables: set[str],
|
|
467
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
|
|
468
|
+
context: str = "main",
|
|
469
|
+
) -> list[IntentIssue]:
|
|
470
|
+
"""Validate ``group_by_cols`` against the schema and column groupability.
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
|
|
474
|
+
group_by_cols: List of ``NormalizedExpr`` instances to validate.
|
|
475
|
+
schema: The ``SchemaGraph``.
|
|
476
|
+
allowed_tables: Set of table names permitted in this query context.
|
|
477
|
+
cte_outputs: Dict of CTE name to output column metadata.
|
|
478
|
+
context: Label used in issue IDs and messages.
|
|
479
|
+
|
|
480
|
+
Returns:
|
|
481
|
+
|
|
482
|
+
List of ``IntentIssue`` objects.
|
|
483
|
+
"""
|
|
484
|
+
issues = []
|
|
485
|
+
if not group_by_cols:
|
|
486
|
+
return []
|
|
487
|
+
cte_outputs = cte_outputs or {}
|
|
488
|
+
for g in group_by_cols:
|
|
489
|
+
col = g.primary_column
|
|
490
|
+
if "." not in col:
|
|
491
|
+
issues.append(
|
|
492
|
+
IntentIssue(
|
|
493
|
+
issue_id=f"group_by_unqualified_{context}_{col}",
|
|
494
|
+
category="group_by_validity",
|
|
495
|
+
severity="error",
|
|
496
|
+
message=f"group_by_cols must be qualified as table.column, got '{col}' in {context}",
|
|
497
|
+
context={"column": col, "location": context},
|
|
498
|
+
)
|
|
499
|
+
)
|
|
500
|
+
continue
|
|
501
|
+
table_name, col_name = col.rsplit(".", 1)
|
|
502
|
+
if table_name in cte_outputs:
|
|
503
|
+
cte_cols = cte_outputs[table_name]
|
|
504
|
+
matched_key = next((c for c in cte_cols if c.lower() == col_name.lower()), None)
|
|
505
|
+
if not matched_key:
|
|
506
|
+
issues.append(
|
|
507
|
+
IntentIssue(
|
|
508
|
+
issue_id=f"group_by_cte_col_not_found_{context}_{table_name}_{col_name}",
|
|
509
|
+
category="group_by_validity",
|
|
510
|
+
severity="error",
|
|
511
|
+
message=f"Column '{col_name}' not in CTE '{table_name}' outputs for group_by in {context}",
|
|
512
|
+
context={
|
|
513
|
+
"table": table_name,
|
|
514
|
+
"column": col_name,
|
|
515
|
+
"location": context,
|
|
516
|
+
},
|
|
517
|
+
)
|
|
518
|
+
)
|
|
519
|
+
elif not cte_cols[matched_key].groupable:
|
|
520
|
+
issues.append(
|
|
521
|
+
IntentIssue(
|
|
522
|
+
issue_id=f"group_by_cte_col_not_groupable_{context}_{table_name}_{col_name}",
|
|
523
|
+
category="group_by_validity",
|
|
524
|
+
severity="warning",
|
|
525
|
+
message=f"CTE column '{table_name}.{col_name}' (role={cte_cols[matched_key].role}) is not recommended for GROUP BY in {context}",
|
|
526
|
+
context={
|
|
527
|
+
"table": table_name,
|
|
528
|
+
"column": col_name,
|
|
529
|
+
"role": cte_cols[matched_key].role,
|
|
530
|
+
"location": context,
|
|
531
|
+
},
|
|
532
|
+
)
|
|
533
|
+
)
|
|
534
|
+
continue
|
|
535
|
+
if table_name not in allowed_tables:
|
|
536
|
+
issues.append(
|
|
537
|
+
IntentIssue(
|
|
538
|
+
issue_id=f"group_by_table_not_allowed_{context}_{table_name}",
|
|
539
|
+
category="group_by_validity",
|
|
540
|
+
severity="error",
|
|
541
|
+
message=f"Table '{table_name}' not in allowed tables for group_by in {context}",
|
|
542
|
+
context={"table": table_name, "location": context},
|
|
543
|
+
)
|
|
544
|
+
)
|
|
545
|
+
continue
|
|
546
|
+
if table_name in schema.tables:
|
|
547
|
+
table_meta = schema.tables[table_name]
|
|
548
|
+
col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
|
|
549
|
+
if not col_meta:
|
|
550
|
+
issues.append(
|
|
551
|
+
IntentIssue(
|
|
552
|
+
issue_id=f"group_by_col_not_found_{context}_{table_name}_{col_name}",
|
|
553
|
+
category="group_by_validity",
|
|
554
|
+
severity="error",
|
|
555
|
+
message=f"Column '{col_name}' not in table '{table_name}' for group_by in {context}",
|
|
556
|
+
context={
|
|
557
|
+
"table": table_name,
|
|
558
|
+
"column": col_name,
|
|
559
|
+
"location": context,
|
|
560
|
+
},
|
|
561
|
+
)
|
|
562
|
+
)
|
|
563
|
+
elif not col_meta.is_groupable:
|
|
564
|
+
issues.append(
|
|
565
|
+
IntentIssue(
|
|
566
|
+
issue_id=f"group_by_col_not_groupable_{context}_{table_name}_{col_name}",
|
|
567
|
+
category="group_by_validity",
|
|
568
|
+
severity="warning",
|
|
569
|
+
message=f"Column '{col_name}' (role={col_meta.role}) is not recommended for grouping in {context}",
|
|
570
|
+
context={
|
|
571
|
+
"table": table_name,
|
|
572
|
+
"column": col_name,
|
|
573
|
+
"role": col_meta.role,
|
|
574
|
+
"location": context,
|
|
575
|
+
},
|
|
576
|
+
)
|
|
577
|
+
)
|
|
578
|
+
debug(f"[validation_schema.validate_group_by_cols_schema] {len(issues)} issues in {context}")
|
|
579
|
+
return issues
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def _validate_filter_col(
|
|
583
|
+
col_expr: str,
|
|
584
|
+
schema: SchemaGraph,
|
|
585
|
+
allowed_tables: set[str],
|
|
586
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
|
|
587
|
+
context: str,
|
|
588
|
+
side: str,
|
|
589
|
+
param_key: str,
|
|
590
|
+
) -> list[IntentIssue]:
|
|
591
|
+
"""Validate a single filter column reference (left or right side of a ``FilterParam``).
|
|
592
|
+
|
|
593
|
+
Args:
|
|
594
|
+
|
|
595
|
+
col_expr: The column expression string to validate.
|
|
596
|
+
schema: The ``SchemaGraph``.
|
|
597
|
+
allowed_tables: Set of table names permitted in this context.
|
|
598
|
+
cte_outputs: Dict of CTE name to output column metadata.
|
|
599
|
+
context: Label used in issue IDs and messages.
|
|
600
|
+
side: ``"left_col"`` or ``"right_col"``.
|
|
601
|
+
param_key: The ``param_key`` of the ``FilterParam`` (for issue IDs).
|
|
602
|
+
|
|
603
|
+
Returns:
|
|
604
|
+
|
|
605
|
+
List of ``IntentIssue`` objects.
|
|
606
|
+
"""
|
|
607
|
+
issues = []
|
|
608
|
+
if not col_expr:
|
|
609
|
+
return issues
|
|
610
|
+
actual_col = extract_col_from_scalar_wrapper(col_expr)
|
|
611
|
+
if "." not in actual_col:
|
|
612
|
+
issues.append(
|
|
613
|
+
IntentIssue(
|
|
614
|
+
issue_id=f"filter_{side}_unqualified_{context}_{actual_col}",
|
|
615
|
+
category="filter_validity",
|
|
616
|
+
severity="error",
|
|
617
|
+
message=f"Filter {side} must be qualified as table.column, got '{actual_col}' in {context}",
|
|
618
|
+
context={
|
|
619
|
+
"column": actual_col,
|
|
620
|
+
"side": side,
|
|
621
|
+
"param_key": param_key,
|
|
622
|
+
"location": context,
|
|
623
|
+
},
|
|
624
|
+
)
|
|
625
|
+
)
|
|
626
|
+
return issues
|
|
627
|
+
table_name, col_name = actual_col.rsplit(".", 1)
|
|
628
|
+
if table_name in cte_outputs:
|
|
629
|
+
if col_name.lower() not in [c.lower() for c in cte_outputs[table_name]]:
|
|
630
|
+
issues.append(
|
|
631
|
+
IntentIssue(
|
|
632
|
+
issue_id=f"filter_{side}_cte_col_not_found_{context}_{table_name}_{col_name}",
|
|
633
|
+
category="filter_validity",
|
|
634
|
+
severity="error",
|
|
635
|
+
message=f"Column '{col_name}' not in CTE '{table_name}' outputs for filter {side} in {context}",
|
|
636
|
+
context={
|
|
637
|
+
"table": table_name,
|
|
638
|
+
"column": col_name,
|
|
639
|
+
"side": side,
|
|
640
|
+
"param_key": param_key,
|
|
641
|
+
"location": context,
|
|
642
|
+
},
|
|
643
|
+
)
|
|
644
|
+
)
|
|
645
|
+
return issues
|
|
646
|
+
if table_name not in allowed_tables:
|
|
647
|
+
issues.append(
|
|
648
|
+
IntentIssue(
|
|
649
|
+
issue_id=f"filter_{side}_table_not_allowed_{context}_{table_name}",
|
|
650
|
+
category="filter_validity",
|
|
651
|
+
severity="error",
|
|
652
|
+
message=f"Table '{table_name}' not in allowed tables for filter {side} in {context}",
|
|
653
|
+
context={
|
|
654
|
+
"table": table_name,
|
|
655
|
+
"side": side,
|
|
656
|
+
"param_key": param_key,
|
|
657
|
+
"location": context,
|
|
658
|
+
},
|
|
659
|
+
)
|
|
660
|
+
)
|
|
661
|
+
return issues
|
|
662
|
+
if table_name not in schema.tables:
|
|
663
|
+
issues.append(
|
|
664
|
+
IntentIssue(
|
|
665
|
+
issue_id=f"filter_{side}_table_not_in_schema_{context}_{table_name}",
|
|
666
|
+
category="filter_validity",
|
|
667
|
+
severity="error",
|
|
668
|
+
message=f"Table '{table_name}' not in schema for filter {side} in {context}",
|
|
669
|
+
context={
|
|
670
|
+
"table": table_name,
|
|
671
|
+
"side": side,
|
|
672
|
+
"param_key": param_key,
|
|
673
|
+
"location": context,
|
|
674
|
+
},
|
|
675
|
+
)
|
|
676
|
+
)
|
|
677
|
+
return issues
|
|
678
|
+
table_meta = schema.tables[table_name]
|
|
679
|
+
col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
|
|
680
|
+
if not col_meta:
|
|
681
|
+
issues.append(
|
|
682
|
+
IntentIssue(
|
|
683
|
+
issue_id=f"filter_{side}_col_not_found_{context}_{table_name}_{col_name}",
|
|
684
|
+
category="filter_validity",
|
|
685
|
+
severity="error",
|
|
686
|
+
message=f"Column '{col_name}' not in table '{table_name}' for filter {side} in {context}",
|
|
687
|
+
context={
|
|
688
|
+
"table": table_name,
|
|
689
|
+
"column": col_name,
|
|
690
|
+
"side": side,
|
|
691
|
+
"param_key": param_key,
|
|
692
|
+
"location": context,
|
|
693
|
+
},
|
|
694
|
+
)
|
|
695
|
+
)
|
|
696
|
+
return issues
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
def validate_filters_schema(
|
|
700
|
+
filters_param: list[FilterParam],
|
|
701
|
+
schema: SchemaGraph,
|
|
702
|
+
allowed_tables: set[str],
|
|
703
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
|
|
704
|
+
context: str = "main",
|
|
705
|
+
) -> list[IntentIssue]:
|
|
706
|
+
"""Validate ``FilterParam`` entries against the schema.
|
|
707
|
+
|
|
708
|
+
Checks left and right column references, operator validity, and value-type validity, and also validates scalar functions on both sides.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
|
|
712
|
+
filters_param: List of ``FilterParam`` instances to validate.
|
|
713
|
+
schema: The ``SchemaGraph``.
|
|
714
|
+
allowed_tables: Set of table names permitted in this context.
|
|
715
|
+
cte_outputs: Dict of CTE name to output column metadata.
|
|
716
|
+
context: Label used in issue IDs and messages.
|
|
717
|
+
|
|
718
|
+
Returns:
|
|
719
|
+
|
|
720
|
+
List of ``IntentIssue`` objects.
|
|
721
|
+
"""
|
|
722
|
+
issues = []
|
|
723
|
+
if not filters_param:
|
|
724
|
+
return []
|
|
725
|
+
cte_outputs = cte_outputs or {}
|
|
726
|
+
for fp in filters_param:
|
|
727
|
+
param_key = fp.param_key or "unknown"
|
|
728
|
+
issues.extend(
|
|
729
|
+
_validate_filter_col(
|
|
730
|
+
fp.left_expr.primary_column,
|
|
731
|
+
schema,
|
|
732
|
+
allowed_tables,
|
|
733
|
+
cte_outputs,
|
|
734
|
+
context,
|
|
735
|
+
"left_col",
|
|
736
|
+
param_key,
|
|
737
|
+
)
|
|
738
|
+
)
|
|
739
|
+
if fp.right_expr:
|
|
740
|
+
issues.extend(
|
|
741
|
+
_validate_filter_col(
|
|
742
|
+
fp.right_expr.primary_column,
|
|
743
|
+
schema,
|
|
744
|
+
allowed_tables,
|
|
745
|
+
cte_outputs,
|
|
746
|
+
context,
|
|
747
|
+
"right_col",
|
|
748
|
+
param_key,
|
|
749
|
+
)
|
|
750
|
+
)
|
|
751
|
+
if fp.op not in VALID_FILTER_OPS:
|
|
752
|
+
issues.append(
|
|
753
|
+
IntentIssue(
|
|
754
|
+
issue_id=f"filter_invalid_op_{context}_{fp.op}",
|
|
755
|
+
category="filter_validity",
|
|
756
|
+
severity="error",
|
|
757
|
+
message=f"Invalid filter operator '{fp.op}' in {context}",
|
|
758
|
+
context={
|
|
759
|
+
"operator": fp.op,
|
|
760
|
+
"param_key": param_key,
|
|
761
|
+
"location": context,
|
|
762
|
+
},
|
|
763
|
+
)
|
|
764
|
+
)
|
|
765
|
+
if not fp.right_expr and fp.op not in ("is null", "is not null"):
|
|
766
|
+
if fp.value_type not in VALID_VALUE_TYPES:
|
|
767
|
+
issues.append(
|
|
768
|
+
IntentIssue(
|
|
769
|
+
issue_id=f"filter_invalid_value_type_{context}_{fp.value_type}",
|
|
770
|
+
category="filter_validity",
|
|
771
|
+
severity="error",
|
|
772
|
+
message=f"Invalid filter value_type '{fp.value_type}' in {context}",
|
|
773
|
+
context={
|
|
774
|
+
"value_type": fp.value_type,
|
|
775
|
+
"param_key": param_key,
|
|
776
|
+
"location": context,
|
|
777
|
+
},
|
|
778
|
+
)
|
|
779
|
+
)
|
|
780
|
+
fp_left_scalar, _ = extract_functions_from_term(fp.left_expr.primary_term)
|
|
781
|
+
fp_right_scalar, _ = extract_functions_from_term(fp.right_expr.primary_term) if fp.right_expr else (None, None)
|
|
782
|
+
issues.extend(_validate_scalar_func_valid(fp_left_scalar, f"filter_{param_key}_left", context))
|
|
783
|
+
issues.extend(_validate_scalar_func_valid(fp_right_scalar, f"filter_{param_key}_right", context))
|
|
784
|
+
issues.extend(
|
|
785
|
+
validate_expr_no_extract_epoch(
|
|
786
|
+
fp.left_expr, f"filter_{param_key}_left", context
|
|
787
|
+
)
|
|
788
|
+
)
|
|
789
|
+
if fp.right_expr:
|
|
790
|
+
issues.extend(
|
|
791
|
+
validate_expr_no_extract_epoch(
|
|
792
|
+
fp.right_expr, f"filter_{param_key}_right", context
|
|
793
|
+
)
|
|
794
|
+
)
|
|
795
|
+
if fp.bool_op not in ("AND", "OR"):
|
|
796
|
+
issues.append(
|
|
797
|
+
IntentIssue(
|
|
798
|
+
issue_id=f"filter_invalid_bool_op_{context}_{fp.bool_op}",
|
|
799
|
+
category="filter_validity",
|
|
800
|
+
severity="error",
|
|
801
|
+
message=f"Invalid filter bool_op '{fp.bool_op}' in {context}. Must be 'AND' or 'OR'.",
|
|
802
|
+
context={
|
|
803
|
+
"bool_op": fp.bool_op,
|
|
804
|
+
"param_key": param_key,
|
|
805
|
+
"location": context,
|
|
806
|
+
},
|
|
807
|
+
)
|
|
808
|
+
)
|
|
809
|
+
debug(f"[validation_schema.validate_filters_schema] {len(issues)} issues in {context}")
|
|
810
|
+
return issues
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def extract_agg_col(agg_expr: str) -> tuple:
|
|
814
|
+
"""Extract ``(func, target, has_distinct)`` from an aggregation expression.
|
|
815
|
+
|
|
816
|
+
For example, ``"COUNT(DISTINCT table.col)"`` returns ``("count", "table.col", True)``.
|
|
817
|
+
|
|
818
|
+
Args:
|
|
819
|
+
|
|
820
|
+
agg_expr: The aggregation expression string.
|
|
821
|
+
|
|
822
|
+
Returns:
|
|
823
|
+
|
|
824
|
+
Tuple of ``(func, target, has_distinct)`` or ``(None, None, False)`` if the expression does not match the expected ``FUNC(...)`` pattern.
|
|
825
|
+
"""
|
|
826
|
+
if not agg_expr:
|
|
827
|
+
return (None, None, False)
|
|
828
|
+
match = re.match(r"^\s*(\w+)\s*\(\s*(.*)\s*\)\s*$", agg_expr, re.IGNORECASE)
|
|
829
|
+
if not match:
|
|
830
|
+
return (None, None, False)
|
|
831
|
+
func = match.group(1).lower()
|
|
832
|
+
inner = match.group(2).strip()
|
|
833
|
+
has_distinct = False
|
|
834
|
+
actual_target = inner
|
|
835
|
+
if inner.upper().startswith("DISTINCT "):
|
|
836
|
+
has_distinct = True
|
|
837
|
+
actual_target = inner[9:].strip()
|
|
838
|
+
actual_target = extract_col_from_scalar_wrapper(actual_target)
|
|
839
|
+
return (func, actual_target, has_distinct)
|
|
840
|
+
|
|
841
|
+
|
|
842
|
+
def extract_functions_from_term(term: str) -> tuple[str | None, str | None]:
|
|
843
|
+
"""Extract the outer scalar and inner aggregation function names from a term string.
|
|
844
|
+
|
|
845
|
+
Handles patterns like ``"ROUND(SUM(table.col))"`` returning ``("round", "sum")`` and plain ``"SUM(table.col)"`` returning ``(None, "sum")``.
|
|
846
|
+
|
|
847
|
+
Args:
|
|
848
|
+
|
|
849
|
+
term: The expression term string.
|
|
850
|
+
|
|
851
|
+
Returns:
|
|
852
|
+
|
|
853
|
+
Tuple of ``(scalar_func, agg_func)`` where each is a lowercase function name string or ``None`` if not present.
|
|
854
|
+
"""
|
|
855
|
+
result = extract_agg_col(term)
|
|
856
|
+
if len(result) != 3 or not result[0]:
|
|
857
|
+
return None, None
|
|
858
|
+
outer = result[0]
|
|
859
|
+
if outer in VALID_AGG_FUNCS:
|
|
860
|
+
return None, outer
|
|
861
|
+
inner_result = extract_agg_col(result[1]) if result[1] else (None, None, False)
|
|
862
|
+
if len(inner_result) == 3 and inner_result[0] and inner_result[0] in VALID_AGG_FUNCS:
|
|
863
|
+
return outer, inner_result[0]
|
|
864
|
+
return outer, None
|
|
865
|
+
|
|
866
|
+
|
|
867
|
+
def _validate_having_agg(
|
|
868
|
+
agg_expr: str,
|
|
869
|
+
schema: SchemaGraph,
|
|
870
|
+
allowed_tables: set[str],
|
|
871
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
|
|
872
|
+
context: str,
|
|
873
|
+
side: str,
|
|
874
|
+
param_key: str,
|
|
875
|
+
) -> list[IntentIssue]:
|
|
876
|
+
"""Validate a single HAVING aggregation expression (left or right side).
|
|
877
|
+
|
|
878
|
+
Args:
|
|
879
|
+
|
|
880
|
+
agg_expr: The aggregation expression string (for example ``"COUNT(table.col)"``).
|
|
881
|
+
schema: The ``SchemaGraph``.
|
|
882
|
+
allowed_tables: Set of table names permitted in this context.
|
|
883
|
+
cte_outputs: Dict of CTE name to output column metadata.
|
|
884
|
+
context: Label used in issue IDs and messages.
|
|
885
|
+
side: ``"left_agg"`` or ``"right_agg"``.
|
|
886
|
+
param_key: The ``param_key`` of the ``HavingParam`` (for issue IDs).
|
|
887
|
+
|
|
888
|
+
Returns:
|
|
889
|
+
|
|
890
|
+
List of ``IntentIssue`` objects.
|
|
891
|
+
"""
|
|
892
|
+
issues = []
|
|
893
|
+
if not agg_expr:
|
|
894
|
+
return issues
|
|
895
|
+
cte_col_match = re.match(r"^\s*(\w+)\s*\.\s*(\w+)\s*$", agg_expr.strip())
|
|
896
|
+
if cte_col_match:
|
|
897
|
+
cte_name, col_name = cte_col_match.group(1), cte_col_match.group(2)
|
|
898
|
+
cte_cols = cte_outputs.get(cte_name, {})
|
|
899
|
+
col_meta = next(
|
|
900
|
+
(v for k, v in cte_cols.items() if k.lower() == col_name.lower()),
|
|
901
|
+
None,
|
|
902
|
+
)
|
|
903
|
+
if col_meta and (
|
|
904
|
+
col_meta.source == "aggregation" or (col_meta.agg_func or "").strip()
|
|
905
|
+
):
|
|
906
|
+
return issues
|
|
907
|
+
result = extract_agg_col(agg_expr)
|
|
908
|
+
if len(result) != 3:
|
|
909
|
+
issues.append(
|
|
910
|
+
IntentIssue(
|
|
911
|
+
issue_id=f"having_{side}_invalid_format_{context}",
|
|
912
|
+
category="having_validity",
|
|
913
|
+
severity="error",
|
|
914
|
+
message=f"Invalid aggregation format in HAVING {side}: '{agg_expr}' in {context}",
|
|
915
|
+
context={
|
|
916
|
+
"aggregation": agg_expr,
|
|
917
|
+
"side": side,
|
|
918
|
+
"param_key": param_key,
|
|
919
|
+
"location": context,
|
|
920
|
+
},
|
|
921
|
+
)
|
|
922
|
+
)
|
|
923
|
+
return issues
|
|
924
|
+
func, actual_target, has_distinct = result
|
|
925
|
+
if not func:
|
|
926
|
+
issues.append(
|
|
927
|
+
IntentIssue(
|
|
928
|
+
issue_id=f"having_{side}_invalid_format_{context}_{agg_expr}",
|
|
929
|
+
category="having_validity",
|
|
930
|
+
severity="error",
|
|
931
|
+
message=f"Invalid aggregation format in HAVING {side}: '{agg_expr}' in {context}",
|
|
932
|
+
context={
|
|
933
|
+
"aggregation": agg_expr,
|
|
934
|
+
"side": side,
|
|
935
|
+
"param_key": param_key,
|
|
936
|
+
"location": context,
|
|
937
|
+
},
|
|
938
|
+
)
|
|
939
|
+
)
|
|
940
|
+
return issues
|
|
941
|
+
if func not in VALID_AGGREGATION_FUNCTIONS:
|
|
942
|
+
issues.append(
|
|
943
|
+
IntentIssue(
|
|
944
|
+
issue_id=f"having_{side}_invalid_func_{context}_{func}",
|
|
945
|
+
category="having_validity",
|
|
946
|
+
severity="error",
|
|
947
|
+
message=f"Invalid aggregation function '{func}' in HAVING {side} for {context}",
|
|
948
|
+
context={
|
|
949
|
+
"function": func,
|
|
950
|
+
"side": side,
|
|
951
|
+
"param_key": param_key,
|
|
952
|
+
"location": context,
|
|
953
|
+
},
|
|
954
|
+
)
|
|
955
|
+
)
|
|
956
|
+
return issues
|
|
957
|
+
if has_distinct and func != "count":
|
|
958
|
+
issues.append(
|
|
959
|
+
IntentIssue(
|
|
960
|
+
issue_id=f"having_{side}_distinct_not_count_{context}_{func}",
|
|
961
|
+
category="having_validity",
|
|
962
|
+
severity="error",
|
|
963
|
+
message=f"DISTINCT only allowed with COUNT in HAVING, not {func.upper()} in {context}",
|
|
964
|
+
context={
|
|
965
|
+
"function": func,
|
|
966
|
+
"aggregation": agg_expr,
|
|
967
|
+
"side": side,
|
|
968
|
+
"param_key": param_key,
|
|
969
|
+
"location": context,
|
|
970
|
+
},
|
|
971
|
+
)
|
|
972
|
+
)
|
|
973
|
+
return issues
|
|
974
|
+
if actual_target == "*":
|
|
975
|
+
if func != "count":
|
|
976
|
+
issues.append(
|
|
977
|
+
IntentIssue(
|
|
978
|
+
issue_id=f"having_{side}_star_not_count_{context}_{func}",
|
|
979
|
+
category="having_validity",
|
|
980
|
+
severity="error",
|
|
981
|
+
message=f"'*' only allowed with COUNT in HAVING, not {func.upper()} in {context}",
|
|
982
|
+
context={
|
|
983
|
+
"function": func,
|
|
984
|
+
"side": side,
|
|
985
|
+
"param_key": param_key,
|
|
986
|
+
"location": context,
|
|
987
|
+
},
|
|
988
|
+
)
|
|
989
|
+
)
|
|
990
|
+
return issues
|
|
991
|
+
if "." not in actual_target:
|
|
992
|
+
issues.append(
|
|
993
|
+
IntentIssue(
|
|
994
|
+
issue_id=f"having_{side}_unqualified_{context}_{actual_target}",
|
|
995
|
+
category="having_validity",
|
|
996
|
+
severity="error",
|
|
997
|
+
message=f"HAVING aggregation target must be qualified as table.column, got '{actual_target}' in {context}",
|
|
998
|
+
context={
|
|
999
|
+
"target": actual_target,
|
|
1000
|
+
"side": side,
|
|
1001
|
+
"param_key": param_key,
|
|
1002
|
+
"location": context,
|
|
1003
|
+
},
|
|
1004
|
+
)
|
|
1005
|
+
)
|
|
1006
|
+
return issues
|
|
1007
|
+
table_name, col_name = actual_target.rsplit(".", 1)
|
|
1008
|
+
if table_name in cte_outputs:
|
|
1009
|
+
if col_name.lower() not in [c.lower() for c in cte_outputs[table_name]]:
|
|
1010
|
+
issues.append(
|
|
1011
|
+
IntentIssue(
|
|
1012
|
+
issue_id=f"having_{side}_cte_col_not_found_{context}_{table_name}_{col_name}",
|
|
1013
|
+
category="having_validity",
|
|
1014
|
+
severity="error",
|
|
1015
|
+
message=f"Column '{col_name}' not in CTE '{table_name}' outputs for HAVING {side} in {context}",
|
|
1016
|
+
context={
|
|
1017
|
+
"table": table_name,
|
|
1018
|
+
"column": col_name,
|
|
1019
|
+
"side": side,
|
|
1020
|
+
"param_key": param_key,
|
|
1021
|
+
"location": context,
|
|
1022
|
+
},
|
|
1023
|
+
)
|
|
1024
|
+
)
|
|
1025
|
+
return issues
|
|
1026
|
+
if table_name not in allowed_tables:
|
|
1027
|
+
issues.append(
|
|
1028
|
+
IntentIssue(
|
|
1029
|
+
issue_id=f"having_{side}_table_not_allowed_{context}_{table_name}",
|
|
1030
|
+
category="having_validity",
|
|
1031
|
+
severity="error",
|
|
1032
|
+
message=f"Table '{table_name}' not in allowed tables for HAVING {side} in {context}",
|
|
1033
|
+
context={
|
|
1034
|
+
"table": table_name,
|
|
1035
|
+
"side": side,
|
|
1036
|
+
"param_key": param_key,
|
|
1037
|
+
"location": context,
|
|
1038
|
+
},
|
|
1039
|
+
)
|
|
1040
|
+
)
|
|
1041
|
+
return issues
|
|
1042
|
+
if table_name in schema.tables:
|
|
1043
|
+
table_meta = schema.tables[table_name]
|
|
1044
|
+
col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
|
|
1045
|
+
if not col_meta:
|
|
1046
|
+
issues.append(
|
|
1047
|
+
IntentIssue(
|
|
1048
|
+
issue_id=f"having_{side}_col_not_found_{context}_{table_name}_{col_name}",
|
|
1049
|
+
category="having_validity",
|
|
1050
|
+
severity="error",
|
|
1051
|
+
message=f"Column '{col_name}' not in table '{table_name}' for HAVING {side} in {context}",
|
|
1052
|
+
context={
|
|
1053
|
+
"table": table_name,
|
|
1054
|
+
"column": col_name,
|
|
1055
|
+
"side": side,
|
|
1056
|
+
"param_key": param_key,
|
|
1057
|
+
"location": context,
|
|
1058
|
+
},
|
|
1059
|
+
)
|
|
1060
|
+
)
|
|
1061
|
+
elif func != "count":
|
|
1062
|
+
value_type = col_meta.value_type or "string"
|
|
1063
|
+
allowed_types = AGGREGATION_ALLOWED_COLUMN_TYPES.get(func, [])
|
|
1064
|
+
if value_type not in allowed_types:
|
|
1065
|
+
issues.append(
|
|
1066
|
+
IntentIssue(
|
|
1067
|
+
issue_id=f"having_{side}_type_mismatch_{context}_{func}_{col_name}",
|
|
1068
|
+
category="having_validity",
|
|
1069
|
+
severity="error",
|
|
1070
|
+
message=f"Cannot use {func.upper()} on column '{actual_target}' of type '{col_meta.data_type}' in HAVING {side} for {context}",
|
|
1071
|
+
context={
|
|
1072
|
+
"function": func,
|
|
1073
|
+
"column": actual_target,
|
|
1074
|
+
"column_type": col_meta.data_type,
|
|
1075
|
+
"side": side,
|
|
1076
|
+
"param_key": param_key,
|
|
1077
|
+
"location": context,
|
|
1078
|
+
},
|
|
1079
|
+
)
|
|
1080
|
+
)
|
|
1081
|
+
return issues
|
|
1082
|
+
|
|
1083
|
+
|
|
1084
|
+
def _reconstruct_agg_expr(expr: NormalizedExpr) -> str:
|
|
1085
|
+
"""Reconstruct an aggregation expression string from a ``NormalizedExpr``.
|
|
1086
|
+
|
|
1087
|
+
The ``NormalizedExpr`` decomposition strips the wrapping aggregation function, storing it on the ``MulGroup`` or expression level; this helper reassembles the canonical ``FUNC(column)`` string that ``_validate_having_agg`` expects.
|
|
1088
|
+
|
|
1089
|
+
Args:
|
|
1090
|
+
|
|
1091
|
+
expr: The ``NormalizedExpr`` from a ``HavingParam`` side.
|
|
1092
|
+
|
|
1093
|
+
Returns:
|
|
1094
|
+
|
|
1095
|
+
Reassembled expression such as ``"COUNT(orders.order_id)"``, or the bare ``primary_term`` when no aggregation is present.
|
|
1096
|
+
"""
|
|
1097
|
+
agg_func = expr.agg_func
|
|
1098
|
+
if not agg_func and expr.add_groups:
|
|
1099
|
+
agg_func = expr.add_groups[0].agg_func
|
|
1100
|
+
column = expr.primary_term
|
|
1101
|
+
if not agg_func:
|
|
1102
|
+
return column
|
|
1103
|
+
return f"{agg_func.upper()}({column})"
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
def validate_having_schema(
|
|
1107
|
+
having_param: list[HavingParam],
|
|
1108
|
+
schema: SchemaGraph,
|
|
1109
|
+
allowed_tables: set[str],
|
|
1110
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
|
|
1111
|
+
context: str = "main",
|
|
1112
|
+
) -> list[IntentIssue]:
|
|
1113
|
+
"""Validate ``HavingParam`` entries against the schema.
|
|
1114
|
+
|
|
1115
|
+
Checks left and right aggregation expressions, operator validity, value-type validity, and scalar functions on both sides.
|
|
1116
|
+
|
|
1117
|
+
Args:
|
|
1118
|
+
|
|
1119
|
+
having_param: List of ``HavingParam`` instances to validate.
|
|
1120
|
+
schema: The ``SchemaGraph``.
|
|
1121
|
+
allowed_tables: Set of table names permitted in this context.
|
|
1122
|
+
cte_outputs: Dict of CTE name to output column metadata.
|
|
1123
|
+
context: Label used in issue IDs and messages.
|
|
1124
|
+
|
|
1125
|
+
Returns:
|
|
1126
|
+
|
|
1127
|
+
List of ``IntentIssue`` objects.
|
|
1128
|
+
"""
|
|
1129
|
+
issues = []
|
|
1130
|
+
if not having_param:
|
|
1131
|
+
return []
|
|
1132
|
+
cte_outputs = cte_outputs or {}
|
|
1133
|
+
for hp in having_param:
|
|
1134
|
+
param_key = hp.param_key or "unknown"
|
|
1135
|
+
issues.extend(
|
|
1136
|
+
_validate_having_agg(
|
|
1137
|
+
_reconstruct_agg_expr(hp.left_expr),
|
|
1138
|
+
schema,
|
|
1139
|
+
allowed_tables,
|
|
1140
|
+
cte_outputs,
|
|
1141
|
+
context,
|
|
1142
|
+
"left_agg",
|
|
1143
|
+
param_key,
|
|
1144
|
+
)
|
|
1145
|
+
)
|
|
1146
|
+
if hp.right_expr:
|
|
1147
|
+
issues.extend(
|
|
1148
|
+
_validate_having_agg(
|
|
1149
|
+
_reconstruct_agg_expr(hp.right_expr),
|
|
1150
|
+
schema,
|
|
1151
|
+
allowed_tables,
|
|
1152
|
+
cte_outputs,
|
|
1153
|
+
context,
|
|
1154
|
+
"right_agg",
|
|
1155
|
+
param_key,
|
|
1156
|
+
)
|
|
1157
|
+
)
|
|
1158
|
+
if hp.op not in VALID_HAVING_OPS:
|
|
1159
|
+
issues.append(
|
|
1160
|
+
IntentIssue(
|
|
1161
|
+
issue_id=f"having_invalid_op_{context}_{hp.op}",
|
|
1162
|
+
category="having_validity",
|
|
1163
|
+
severity="error",
|
|
1164
|
+
message=f"Invalid HAVING operator '{hp.op}' in {context}",
|
|
1165
|
+
context={
|
|
1166
|
+
"operator": hp.op,
|
|
1167
|
+
"param_key": param_key,
|
|
1168
|
+
"location": context,
|
|
1169
|
+
},
|
|
1170
|
+
)
|
|
1171
|
+
)
|
|
1172
|
+
if not hp.right_expr:
|
|
1173
|
+
if hp.value_type not in VALID_VALUE_TYPES:
|
|
1174
|
+
issues.append(
|
|
1175
|
+
IntentIssue(
|
|
1176
|
+
issue_id=f"having_invalid_value_type_{context}_{hp.value_type}",
|
|
1177
|
+
category="having_validity",
|
|
1178
|
+
severity="error",
|
|
1179
|
+
message=f"Invalid HAVING value_type '{hp.value_type}' in {context}",
|
|
1180
|
+
context={
|
|
1181
|
+
"value_type": hp.value_type,
|
|
1182
|
+
"param_key": param_key,
|
|
1183
|
+
"location": context,
|
|
1184
|
+
},
|
|
1185
|
+
)
|
|
1186
|
+
)
|
|
1187
|
+
if hp.op not in ("is null", "is not null") and hp.raw_value is None:
|
|
1188
|
+
issues.append(
|
|
1189
|
+
IntentIssue(
|
|
1190
|
+
issue_id=f"having_missing_value_{context}_{param_key}",
|
|
1191
|
+
category="having_validity",
|
|
1192
|
+
severity="error",
|
|
1193
|
+
message=(
|
|
1194
|
+
f"HAVING parameter '{param_key}' has no comparison value in {context}. "
|
|
1195
|
+
"Provide a numeric or string raw_value."
|
|
1196
|
+
),
|
|
1197
|
+
context={
|
|
1198
|
+
"param_key": param_key,
|
|
1199
|
+
"op": hp.op,
|
|
1200
|
+
"location": context,
|
|
1201
|
+
},
|
|
1202
|
+
)
|
|
1203
|
+
)
|
|
1204
|
+
hp_left_scalar, _ = extract_functions_from_term(hp.left_expr.primary_term)
|
|
1205
|
+
hp_right_scalar, _ = extract_functions_from_term(hp.right_expr.primary_term) if hp.right_expr else (None, None)
|
|
1206
|
+
issues.extend(_validate_scalar_func_valid(hp_left_scalar, f"having_{param_key}_left", context))
|
|
1207
|
+
issues.extend(_validate_scalar_func_valid(hp_right_scalar, f"having_{param_key}_right", context))
|
|
1208
|
+
issues.extend(
|
|
1209
|
+
validate_expr_no_extract_epoch(
|
|
1210
|
+
hp.left_expr, f"having_{param_key}_left", context
|
|
1211
|
+
)
|
|
1212
|
+
)
|
|
1213
|
+
if hp.right_expr:
|
|
1214
|
+
issues.extend(
|
|
1215
|
+
validate_expr_no_extract_epoch(
|
|
1216
|
+
hp.right_expr, f"having_{param_key}_right", context
|
|
1217
|
+
)
|
|
1218
|
+
)
|
|
1219
|
+
if hp.bool_op not in ("AND", "OR"):
|
|
1220
|
+
issues.append(
|
|
1221
|
+
IntentIssue(
|
|
1222
|
+
issue_id=f"having_invalid_bool_op_{context}_{hp.bool_op}",
|
|
1223
|
+
category="having_validity",
|
|
1224
|
+
severity="error",
|
|
1225
|
+
message=f"Invalid HAVING bool_op '{hp.bool_op}' in {context}. Must be 'AND' or 'OR'.",
|
|
1226
|
+
context={
|
|
1227
|
+
"bool_op": hp.bool_op,
|
|
1228
|
+
"param_key": param_key,
|
|
1229
|
+
"location": context,
|
|
1230
|
+
},
|
|
1231
|
+
)
|
|
1232
|
+
)
|
|
1233
|
+
debug(f"[validation_schema.validate_having_schema] {len(issues)} issues in {context}")
|
|
1234
|
+
return issues
|
|
1235
|
+
|
|
1236
|
+
|
|
1237
|
+
def validate_filter_ops_per_column(
|
|
1238
|
+
filters_param: list[FilterParam],
|
|
1239
|
+
schema: SchemaGraph,
|
|
1240
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
|
|
1241
|
+
context: str = "main",
|
|
1242
|
+
) -> list[IntentIssue]:
|
|
1243
|
+
"""Validate that filter operators are valid for each column's data type and role.
|
|
1244
|
+
|
|
1245
|
+
Args:
|
|
1246
|
+
|
|
1247
|
+
filters_param: List of ``FilterParam`` instances to validate.
|
|
1248
|
+
schema: The ``SchemaGraph``.
|
|
1249
|
+
cte_outputs: Dict of CTE name to output column metadata.
|
|
1250
|
+
context: Label used in issue IDs and messages.
|
|
1251
|
+
|
|
1252
|
+
Returns:
|
|
1253
|
+
|
|
1254
|
+
List of ``IntentIssue`` objects.
|
|
1255
|
+
"""
|
|
1256
|
+
issues = []
|
|
1257
|
+
if not filters_param:
|
|
1258
|
+
return []
|
|
1259
|
+
cte_outputs = cte_outputs or {}
|
|
1260
|
+
for fp in filters_param:
|
|
1261
|
+
col_expr = fp.left_expr.primary_column
|
|
1262
|
+
if not col_expr:
|
|
1263
|
+
continue
|
|
1264
|
+
actual_col = extract_col_from_scalar_wrapper(col_expr)
|
|
1265
|
+
if "." not in actual_col:
|
|
1266
|
+
continue
|
|
1267
|
+
table_name, col_name = actual_col.rsplit(".", 1)
|
|
1268
|
+
if table_name in cte_outputs:
|
|
1269
|
+
cte_cols = cte_outputs[table_name]
|
|
1270
|
+
matched_key = next((c for c in cte_cols if c.lower() == col_name.lower()), None)
|
|
1271
|
+
if matched_key:
|
|
1272
|
+
cte_meta = cte_cols[matched_key]
|
|
1273
|
+
valid_ops = cte_meta.get_valid_filter_ops()
|
|
1274
|
+
if valid_ops and fp.op not in valid_ops:
|
|
1275
|
+
issues.append(
|
|
1276
|
+
IntentIssue(
|
|
1277
|
+
issue_id=f"filter_op_invalid_for_cte_{context}_{actual_col}_{fp.op}",
|
|
1278
|
+
category="filter_validity",
|
|
1279
|
+
severity="error",
|
|
1280
|
+
message=f"Operator '{fp.op}' not valid for CTE column '{actual_col}' (role={cte_meta.role}, type={cte_meta.data_type}) in {context}. Valid: {valid_ops}",
|
|
1281
|
+
context={
|
|
1282
|
+
"column": actual_col,
|
|
1283
|
+
"operator": fp.op,
|
|
1284
|
+
"role": cte_meta.role,
|
|
1285
|
+
"data_type": cte_meta.data_type,
|
|
1286
|
+
"valid_ops": valid_ops,
|
|
1287
|
+
"location": context,
|
|
1288
|
+
},
|
|
1289
|
+
)
|
|
1290
|
+
)
|
|
1291
|
+
if not cte_meta.filterable and fp.op not in ("is null", "is not null"):
|
|
1292
|
+
issues.append(
|
|
1293
|
+
IntentIssue(
|
|
1294
|
+
issue_id=f"filter_cte_col_not_filterable_{context}_{actual_col}",
|
|
1295
|
+
category="filter_validity",
|
|
1296
|
+
severity="warning",
|
|
1297
|
+
message=f"CTE column '{actual_col}' (role={cte_meta.role}) is not recommended for filtering in {context}",
|
|
1298
|
+
context={
|
|
1299
|
+
"column": actual_col,
|
|
1300
|
+
"role": cte_meta.role,
|
|
1301
|
+
"location": context,
|
|
1302
|
+
},
|
|
1303
|
+
)
|
|
1304
|
+
)
|
|
1305
|
+
continue
|
|
1306
|
+
if table_name not in schema.tables:
|
|
1307
|
+
continue
|
|
1308
|
+
table_meta = schema.tables[table_name]
|
|
1309
|
+
col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
|
|
1310
|
+
if not col_meta:
|
|
1311
|
+
continue
|
|
1312
|
+
valid_ops = col_meta.get_valid_filter_ops()
|
|
1313
|
+
if fp.op not in valid_ops:
|
|
1314
|
+
issues.append(
|
|
1315
|
+
IntentIssue(
|
|
1316
|
+
issue_id=f"filter_op_invalid_for_type_{context}_{actual_col}_{fp.op}",
|
|
1317
|
+
category="filter_validity",
|
|
1318
|
+
severity="error",
|
|
1319
|
+
message=f"Operator '{fp.op}' not valid for column '{actual_col}' (role={col_meta.role}, type={col_meta.data_type}) in {context}. Valid: {valid_ops}",
|
|
1320
|
+
context={
|
|
1321
|
+
"column": actual_col,
|
|
1322
|
+
"operator": fp.op,
|
|
1323
|
+
"role": col_meta.role,
|
|
1324
|
+
"data_type": col_meta.data_type,
|
|
1325
|
+
"valid_ops": valid_ops,
|
|
1326
|
+
"location": context,
|
|
1327
|
+
},
|
|
1328
|
+
)
|
|
1329
|
+
)
|
|
1330
|
+
if not col_meta.is_filterable and fp.op not in ("is null", "is not null"):
|
|
1331
|
+
issues.append(
|
|
1332
|
+
IntentIssue(
|
|
1333
|
+
issue_id=f"filter_col_not_filterable_{context}_{actual_col}",
|
|
1334
|
+
category="filter_validity",
|
|
1335
|
+
severity="warning",
|
|
1336
|
+
message=f"Column '{actual_col}' (role={col_meta.role}) is not recommended for filtering in {context}",
|
|
1337
|
+
context={
|
|
1338
|
+
"column": actual_col,
|
|
1339
|
+
"role": col_meta.role,
|
|
1340
|
+
"location": context,
|
|
1341
|
+
},
|
|
1342
|
+
)
|
|
1343
|
+
)
|
|
1344
|
+
debug(f"[validation_schema.validate_filter_ops_per_column] {len(issues)} issues in {context}")
|
|
1345
|
+
return issues
|
|
1346
|
+
|
|
1347
|
+
|
|
1348
|
+
def validate_having_ops_per_column(
|
|
1349
|
+
having_param: list[HavingParam],
|
|
1350
|
+
schema: SchemaGraph,
|
|
1351
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
|
|
1352
|
+
context: str = "main",
|
|
1353
|
+
) -> list[IntentIssue]:
|
|
1354
|
+
"""Validate that HAVING operators are valid for each column's type and role."""
|
|
1355
|
+
issues: list[IntentIssue] = []
|
|
1356
|
+
if not having_param:
|
|
1357
|
+
return []
|
|
1358
|
+
cte_outputs = cte_outputs or {}
|
|
1359
|
+
for hp in having_param:
|
|
1360
|
+
|
|
1361
|
+
def _check_expr(term: str, _hp: Any = hp) -> None:
|
|
1362
|
+
result = extract_agg_col(term)
|
|
1363
|
+
if len(result) == 3 and result[0] and result[1] and "." in result[1]:
|
|
1364
|
+
_, actual_target, _ = result
|
|
1365
|
+
table_name, col_name = actual_target.rsplit(".", 1)
|
|
1366
|
+
else:
|
|
1367
|
+
col = extract_col_from_scalar_wrapper(term)
|
|
1368
|
+
if "." not in col:
|
|
1369
|
+
return
|
|
1370
|
+
table_name, col_name = col.rsplit(".", 1)
|
|
1371
|
+
if table_name in cte_outputs:
|
|
1372
|
+
cte_cols = cte_outputs[table_name]
|
|
1373
|
+
cte_meta = cte_cols.get(col_name) or cte_cols.get(col_name.lower())
|
|
1374
|
+
if cte_meta:
|
|
1375
|
+
valid_ops = cte_meta.get_valid_having_ops()
|
|
1376
|
+
if valid_ops and _hp.op not in valid_ops:
|
|
1377
|
+
actual_col = f"{table_name}.{col_name}"
|
|
1378
|
+
issues.append(
|
|
1379
|
+
IntentIssue(
|
|
1380
|
+
issue_id=f"having_op_invalid_for_cte_{context}_{actual_col}_{_hp.op}",
|
|
1381
|
+
category="having_validity",
|
|
1382
|
+
severity="error",
|
|
1383
|
+
message=f"Operator '{_hp.op}' not valid for CTE column '{actual_col}' (role={cte_meta.role}, type={cte_meta.data_type}) in {context}. Valid: {valid_ops}",
|
|
1384
|
+
context={
|
|
1385
|
+
"column": actual_col,
|
|
1386
|
+
"operator": _hp.op,
|
|
1387
|
+
"role": cte_meta.role,
|
|
1388
|
+
"data_type": cte_meta.data_type,
|
|
1389
|
+
"valid_ops": valid_ops,
|
|
1390
|
+
"location": context,
|
|
1391
|
+
},
|
|
1392
|
+
)
|
|
1393
|
+
)
|
|
1394
|
+
elif table_name in schema.tables:
|
|
1395
|
+
tbl = schema.tables[table_name]
|
|
1396
|
+
col_meta = tbl.columns.get(col_name) or tbl.columns.get(col_name.lower())
|
|
1397
|
+
if col_meta:
|
|
1398
|
+
valid_ops = col_meta.get_valid_having_ops()
|
|
1399
|
+
if valid_ops and _hp.op not in valid_ops:
|
|
1400
|
+
actual_col = f"{table_name}.{col_name}"
|
|
1401
|
+
issues.append(
|
|
1402
|
+
IntentIssue(
|
|
1403
|
+
issue_id=f"having_op_invalid_for_column_{context}_{actual_col}_{_hp.op}",
|
|
1404
|
+
category="having_validity",
|
|
1405
|
+
severity="error",
|
|
1406
|
+
message=f"Operator '{_hp.op}' not valid for column '{actual_col}' (role={col_meta.role}, type={col_meta.data_type}) in {context}. Valid: {valid_ops}",
|
|
1407
|
+
context={
|
|
1408
|
+
"column": actual_col,
|
|
1409
|
+
"operator": _hp.op,
|
|
1410
|
+
"role": col_meta.role,
|
|
1411
|
+
"data_type": col_meta.data_type,
|
|
1412
|
+
"valid_ops": valid_ops,
|
|
1413
|
+
"location": context,
|
|
1414
|
+
},
|
|
1415
|
+
)
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
_check_expr(hp.left_expr.primary_term)
|
|
1419
|
+
if hp.right_expr:
|
|
1420
|
+
_check_expr(hp.right_expr.primary_term)
|
|
1421
|
+
if issues:
|
|
1422
|
+
debug(f"[validation_schema.validate_having_ops_per_column] {len(issues)} issues in {context}")
|
|
1423
|
+
return issues
|
|
1424
|
+
|
|
1425
|
+
|
|
1426
|
+
def validate_date_window_units(
|
|
1427
|
+
filters_param: list[FilterParam],
|
|
1428
|
+
cte_steps: list[RuntimeCteStep] | None = None,
|
|
1429
|
+
context: str = "main",
|
|
1430
|
+
) -> list[IntentIssue]:
|
|
1431
|
+
"""Validate that ``date_window`` filters have a valid unit.
|
|
1432
|
+
|
|
1433
|
+
Args:
|
|
1434
|
+
|
|
1435
|
+
filters_param: List of ``FilterParam`` instances for the main query.
|
|
1436
|
+
cte_steps: Optional list of CTE steps whose filters are also checked.
|
|
1437
|
+
context: Label used in issue IDs and messages.
|
|
1438
|
+
|
|
1439
|
+
Returns:
|
|
1440
|
+
|
|
1441
|
+
List of ``IntentIssue`` objects for invalid ``date_window`` units.
|
|
1442
|
+
"""
|
|
1443
|
+
issues: list[IntentIssue] = []
|
|
1444
|
+
cte_steps = cte_steps or []
|
|
1445
|
+
|
|
1446
|
+
def check(fp: FilterParam, loc: str) -> None:
|
|
1447
|
+
if fp.value_type != "date_window":
|
|
1448
|
+
return
|
|
1449
|
+
if not isinstance(fp.raw_value, dict):
|
|
1450
|
+
return
|
|
1451
|
+
unit = fp.raw_value.get("unit")
|
|
1452
|
+
if unit is None:
|
|
1453
|
+
return
|
|
1454
|
+
if unit not in VALID_DATE_WINDOW_UNITS:
|
|
1455
|
+
col = fp.left_expr.primary_column
|
|
1456
|
+
issues.append(
|
|
1457
|
+
IntentIssue(
|
|
1458
|
+
issue_id=f"date_window_invalid_unit_{context}_{col}",
|
|
1459
|
+
category="filter_validity",
|
|
1460
|
+
severity="error",
|
|
1461
|
+
message=f"{loc}: date_window filter on '{col}' has invalid unit '{unit}'. Valid: {sorted(VALID_DATE_WINDOW_UNITS)}",
|
|
1462
|
+
context={
|
|
1463
|
+
"column": col,
|
|
1464
|
+
"unit": unit,
|
|
1465
|
+
"valid_units": sorted(VALID_DATE_WINDOW_UNITS),
|
|
1466
|
+
"location": context,
|
|
1467
|
+
},
|
|
1468
|
+
)
|
|
1469
|
+
)
|
|
1470
|
+
|
|
1471
|
+
for fp in filters_param:
|
|
1472
|
+
check(fp, f"{context} filter")
|
|
1473
|
+
for cte in cte_steps:
|
|
1474
|
+
for fp in cte.filters_param or []:
|
|
1475
|
+
check(fp, f"CTE '{cte.cte_name}' filter")
|
|
1476
|
+
|
|
1477
|
+
if issues:
|
|
1478
|
+
debug(f"[validation_schema.validate_date_window_units] {len(issues)} invalid units")
|
|
1479
|
+
return issues
|
|
1480
|
+
|
|
1481
|
+
|
|
1482
|
+
def validate_date_diff_units(
|
|
1483
|
+
filters_param: list[FilterParam],
|
|
1484
|
+
cte_steps: list[RuntimeCteStep] | None = None,
|
|
1485
|
+
context: str = "main",
|
|
1486
|
+
) -> list[IntentIssue]:
|
|
1487
|
+
"""Validate that ``date_diff`` filters have a valid unit and amount.
|
|
1488
|
+
|
|
1489
|
+
Args:
|
|
1490
|
+
|
|
1491
|
+
filters_param: List of ``FilterParam`` instances for the main query.
|
|
1492
|
+
cte_steps: Optional list of CTE steps whose filters are also checked.
|
|
1493
|
+
context: Label used in issue IDs and messages.
|
|
1494
|
+
|
|
1495
|
+
Returns:
|
|
1496
|
+
|
|
1497
|
+
List of ``IntentIssue`` objects for invalid ``date_diff`` config.
|
|
1498
|
+
"""
|
|
1499
|
+
issues: list[IntentIssue] = []
|
|
1500
|
+
cte_steps = cte_steps or []
|
|
1501
|
+
|
|
1502
|
+
def check(fp: FilterParam, loc: str) -> None:
|
|
1503
|
+
if fp.value_type != "date_diff":
|
|
1504
|
+
return
|
|
1505
|
+
if not isinstance(fp.raw_value, dict):
|
|
1506
|
+
return
|
|
1507
|
+
unit = fp.raw_value.get("unit")
|
|
1508
|
+
amount = fp.raw_value.get("amount")
|
|
1509
|
+
if unit is not None and unit not in VALID_DATE_DIFF_UNITS:
|
|
1510
|
+
col = fp.left_expr.primary_column or fp.left_expr.primary_term or fp.param_key or "expr"
|
|
1511
|
+
issues.append(
|
|
1512
|
+
IntentIssue(
|
|
1513
|
+
issue_id=f"date_diff_invalid_unit_{context}_{col}",
|
|
1514
|
+
category="date_diff",
|
|
1515
|
+
severity="error",
|
|
1516
|
+
message=f"{loc}: date_diff filter has invalid unit '{unit}'. Valid: {sorted(VALID_DATE_DIFF_UNITS)}",
|
|
1517
|
+
context={
|
|
1518
|
+
"column": col,
|
|
1519
|
+
"unit": unit,
|
|
1520
|
+
"valid_units": sorted(VALID_DATE_DIFF_UNITS),
|
|
1521
|
+
"location": context,
|
|
1522
|
+
},
|
|
1523
|
+
)
|
|
1524
|
+
)
|
|
1525
|
+
if amount is not None and not isinstance(amount, (int, float)):
|
|
1526
|
+
try:
|
|
1527
|
+
int(amount)
|
|
1528
|
+
except (TypeError, ValueError):
|
|
1529
|
+
col = fp.left_expr.primary_column or fp.left_expr.primary_term or fp.param_key or "expr"
|
|
1530
|
+
issues.append(
|
|
1531
|
+
IntentIssue(
|
|
1532
|
+
issue_id=f"date_diff_invalid_amount_{context}_{col}",
|
|
1533
|
+
category="date_diff",
|
|
1534
|
+
severity="error",
|
|
1535
|
+
message=f"{loc}: date_diff filter has non-numeric amount '{amount}'",
|
|
1536
|
+
context={"column": col, "amount": amount, "location": context},
|
|
1537
|
+
)
|
|
1538
|
+
)
|
|
1539
|
+
|
|
1540
|
+
for fp in filters_param:
|
|
1541
|
+
check(fp, f"{context} filter")
|
|
1542
|
+
for cte in cte_steps:
|
|
1543
|
+
for fp in cte.filters_param or []:
|
|
1544
|
+
check(fp, f"CTE '{cte.cte_name}' filter")
|
|
1545
|
+
|
|
1546
|
+
if issues:
|
|
1547
|
+
debug(f"[validation_schema.validate_date_diff_units] {len(issues)} invalid configs")
|
|
1548
|
+
return issues
|
|
1549
|
+
|
|
1550
|
+
|
|
1551
|
+
def validate_null_filters(
|
|
1552
|
+
filters_param: list[FilterParam],
|
|
1553
|
+
cte_steps: list[RuntimeCteStep] | None = None,
|
|
1554
|
+
context: str = "main",
|
|
1555
|
+
) -> list[IntentIssue]:
|
|
1556
|
+
"""Validate that ``IS NULL`` / ``IS NOT NULL`` filters have the correct ``value_type``.
|
|
1557
|
+
|
|
1558
|
+
Args:
|
|
1559
|
+
|
|
1560
|
+
filters_param: List of ``FilterParam`` instances for the main query.
|
|
1561
|
+
cte_steps: Optional list of CTE steps whose filters are also checked.
|
|
1562
|
+
context: Label used in issue IDs and messages.
|
|
1563
|
+
|
|
1564
|
+
Returns:
|
|
1565
|
+
|
|
1566
|
+
List of ``IntentIssue`` objects for any NULL filter with incorrect ``value_type``.
|
|
1567
|
+
"""
|
|
1568
|
+
issues = []
|
|
1569
|
+
cte_steps = cte_steps or []
|
|
1570
|
+
|
|
1571
|
+
def check_filter(fp: FilterParam, loc: str) -> IntentIssue | None:
|
|
1572
|
+
if fp.op in ("is null", "is not null"):
|
|
1573
|
+
if fp.value_type and fp.value_type != "null":
|
|
1574
|
+
col = fp.left_expr.primary_column
|
|
1575
|
+
return IntentIssue(
|
|
1576
|
+
issue_id=f"null_filter_wrong_value_type_{col}",
|
|
1577
|
+
category="filter_structure",
|
|
1578
|
+
severity="error",
|
|
1579
|
+
message=f"{loc}: IS NULL filter on '{col}' should have value_type='null' or empty, got '{fp.value_type}'",
|
|
1580
|
+
context={
|
|
1581
|
+
"column": col,
|
|
1582
|
+
"op": fp.op,
|
|
1583
|
+
"expected_value_type": "null",
|
|
1584
|
+
"actual_value_type": fp.value_type,
|
|
1585
|
+
},
|
|
1586
|
+
)
|
|
1587
|
+
return None
|
|
1588
|
+
|
|
1589
|
+
for fp in filters_param:
|
|
1590
|
+
issue = check_filter(fp, f"{context} filter")
|
|
1591
|
+
if issue:
|
|
1592
|
+
issues.append(issue)
|
|
1593
|
+
|
|
1594
|
+
for cte in cte_steps:
|
|
1595
|
+
for fp in cte.filters_param or []:
|
|
1596
|
+
issue = check_filter(fp, f"CTE '{cte.cte_name}' filter")
|
|
1597
|
+
if issue:
|
|
1598
|
+
issues.append(issue)
|
|
1599
|
+
|
|
1600
|
+
if issues:
|
|
1601
|
+
debug(f"[validation_schema.validate_null_filters] FAILED with {len(issues)} issues")
|
|
1602
|
+
else:
|
|
1603
|
+
debug("[validation_schema.validate_null_filters] PASSED")
|
|
1604
|
+
return issues
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
def validate_filter_value_type_alignment(
|
|
1608
|
+
filters_param: list[FilterParam],
|
|
1609
|
+
schema: SchemaGraph,
|
|
1610
|
+
context: str = "main",
|
|
1611
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]] | None = None,
|
|
1612
|
+
) -> list[IntentIssue]:
|
|
1613
|
+
issues: list[IntentIssue] = []
|
|
1614
|
+
cte_outputs = cte_outputs or {}
|
|
1615
|
+
for fp in filters_param:
|
|
1616
|
+
if fp.value_type not in {"string", "enum"}:
|
|
1617
|
+
continue
|
|
1618
|
+
if fp.raw_value is None:
|
|
1619
|
+
continue
|
|
1620
|
+
col = fp.left_expr.primary_column
|
|
1621
|
+
parts = col.split(".", 1) if "." in col else None
|
|
1622
|
+
if not parts:
|
|
1623
|
+
continue
|
|
1624
|
+
table_name, col_name = parts
|
|
1625
|
+
col_meta = schema.get_column(table_name, col_name)
|
|
1626
|
+
if col_meta:
|
|
1627
|
+
if col_meta.is_foreign_key and col_meta.value_type in {"integer", "number"}:
|
|
1628
|
+
issues.append(
|
|
1629
|
+
IntentIssue(
|
|
1630
|
+
issue_id=f"filter_string_on_fk_int_{table_name}_{col_name}_{context}",
|
|
1631
|
+
category="type_alignment",
|
|
1632
|
+
severity="warning",
|
|
1633
|
+
message=f"Filter on {col} uses string value '{fp.raw_value}' but column is a numeric FK in {context}. Filter should target the FK target table's descriptive column.",
|
|
1634
|
+
context={
|
|
1635
|
+
"column": col,
|
|
1636
|
+
"value": str(fp.raw_value),
|
|
1637
|
+
"value_type": fp.value_type,
|
|
1638
|
+
"column_type": col_meta.value_type,
|
|
1639
|
+
"location": context,
|
|
1640
|
+
},
|
|
1641
|
+
)
|
|
1642
|
+
)
|
|
1643
|
+
debug(f"[validation_schema.validate_filter_value_type_alignment] string value on FK int column {col}")
|
|
1644
|
+
continue
|
|
1645
|
+
if table_name in cte_outputs:
|
|
1646
|
+
cte_cols = cte_outputs[table_name]
|
|
1647
|
+
cte_meta = cte_cols.get(col_name) or cte_cols.get(col_name.lower())
|
|
1648
|
+
if cte_meta and cte_meta.value_type in {"integer", "number"}:
|
|
1649
|
+
issues.append(
|
|
1650
|
+
IntentIssue(
|
|
1651
|
+
issue_id=f"filter_string_on_cte_numeric_{table_name}_{col_name}_{context}",
|
|
1652
|
+
category="type_alignment",
|
|
1653
|
+
severity="warning",
|
|
1654
|
+
message=f"Filter on {col} uses string value '{fp.raw_value}' but CTE column is numeric ({cte_meta.value_type}) in {context}.",
|
|
1655
|
+
context={
|
|
1656
|
+
"column": col,
|
|
1657
|
+
"value": str(fp.raw_value),
|
|
1658
|
+
"value_type": fp.value_type,
|
|
1659
|
+
"column_type": cte_meta.value_type,
|
|
1660
|
+
"location": context,
|
|
1661
|
+
},
|
|
1662
|
+
)
|
|
1663
|
+
)
|
|
1664
|
+
return issues
|
|
1665
|
+
|
|
1666
|
+
|
|
1667
|
+
def validate_no_between_ops(
|
|
1668
|
+
filters_param: list[FilterParam],
|
|
1669
|
+
having_param: list[HavingParam],
|
|
1670
|
+
context: str = "main query",
|
|
1671
|
+
) -> list[IntentIssue]:
|
|
1672
|
+
"""Flag any remaining BETWEEN operators that were not decomposed.
|
|
1673
|
+
|
|
1674
|
+
After ``decompose_between_params`` all BETWEEN ops should be
|
|
1675
|
+
replaced by >= / <= pairs. This validator catches any that survived
|
|
1676
|
+
as errors so the semantic repair loop can instruct the LLM to
|
|
1677
|
+
rewrite them.
|
|
1678
|
+
|
|
1679
|
+
Args: filters_param: Filter conditions to inspect.
|
|
1680
|
+
having_param: Having conditions to inspect. context: Description
|
|
1681
|
+
of the query scope for issue messages.
|
|
1682
|
+
|
|
1683
|
+
Returns: List of ``IntentIssue`` objects, one per surviving
|
|
1684
|
+
BETWEEN op.
|
|
1685
|
+
"""
|
|
1686
|
+
issues: list[IntentIssue] = []
|
|
1687
|
+
for fp in filters_param:
|
|
1688
|
+
if fp.op.lower() == "between":
|
|
1689
|
+
col = fp.left_expr.primary_column
|
|
1690
|
+
issues.append(
|
|
1691
|
+
IntentIssue(
|
|
1692
|
+
issue_id=f"filter_between_not_decomposed_{col}_{context}",
|
|
1693
|
+
category="operator",
|
|
1694
|
+
severity="error",
|
|
1695
|
+
message=(
|
|
1696
|
+
f"Filter on {col} still uses BETWEEN in {context}. "
|
|
1697
|
+
"Decompose into separate >= and <= conditions."
|
|
1698
|
+
),
|
|
1699
|
+
context={"column": col, "op": fp.op, "location": context},
|
|
1700
|
+
)
|
|
1701
|
+
)
|
|
1702
|
+
for hp in having_param:
|
|
1703
|
+
if hp.op.lower() == "between":
|
|
1704
|
+
col = hp.left_expr.primary_column
|
|
1705
|
+
issues.append(
|
|
1706
|
+
IntentIssue(
|
|
1707
|
+
issue_id=f"having_between_not_decomposed_{col}_{context}",
|
|
1708
|
+
category="operator",
|
|
1709
|
+
severity="error",
|
|
1710
|
+
message=(
|
|
1711
|
+
f"Having on {col} still uses BETWEEN in {context}. "
|
|
1712
|
+
"Decompose into separate >= and <= conditions."
|
|
1713
|
+
),
|
|
1714
|
+
context={"column": col, "op": hp.op, "location": context},
|
|
1715
|
+
)
|
|
1716
|
+
)
|
|
1717
|
+
if issues:
|
|
1718
|
+
debug(f"[validation_schema.validate_no_between_ops] FAILED with {len(issues)} issues in {context}")
|
|
1719
|
+
return issues
|
|
1720
|
+
|
|
1721
|
+
|
|
1722
|
+
def get_col_type(
|
|
1723
|
+
col_expr: str,
|
|
1724
|
+
schema: SchemaGraph,
|
|
1725
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
|
|
1726
|
+
) -> str | None:
|
|
1727
|
+
"""Get column ``value_type`` from schema or CTE output metadata.
|
|
1728
|
+
|
|
1729
|
+
Args:
|
|
1730
|
+
|
|
1731
|
+
col_expr: Column reference, optionally wrapped in scalar function calls.
|
|
1732
|
+
schema: Schema graph containing table and column metadata.
|
|
1733
|
+
cte_outputs: Map of CTE name to column output metadata.
|
|
1734
|
+
|
|
1735
|
+
Returns:
|
|
1736
|
+
|
|
1737
|
+
The ``value_type`` string for the resolved column, or ``None`` if it cannot be resolved.
|
|
1738
|
+
"""
|
|
1739
|
+
actual_col = extract_col_from_scalar_wrapper(col_expr)
|
|
1740
|
+
if "." not in actual_col:
|
|
1741
|
+
return None
|
|
1742
|
+
table_name, col_name = actual_col.rsplit(".", 1)
|
|
1743
|
+
if table_name in cte_outputs:
|
|
1744
|
+
meta = cte_outputs[table_name].get(col_name) or cte_outputs[table_name].get(col_name.lower())
|
|
1745
|
+
return meta.value_type if meta else None
|
|
1746
|
+
if table_name not in schema.tables:
|
|
1747
|
+
return None
|
|
1748
|
+
table_meta = schema.tables[table_name]
|
|
1749
|
+
col_meta = table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
|
|
1750
|
+
if not col_meta:
|
|
1751
|
+
return None
|
|
1752
|
+
return col_meta.value_type
|
|
1753
|
+
|
|
1754
|
+
|
|
1755
|
+
def get_col_meta(
|
|
1756
|
+
col_expr: str,
|
|
1757
|
+
schema: SchemaGraph,
|
|
1758
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
|
|
1759
|
+
) -> Any | None:
|
|
1760
|
+
"""Get column metadata from the schema graph or synthesise it from CTE output metadata.
|
|
1761
|
+
|
|
1762
|
+
Args:
|
|
1763
|
+
|
|
1764
|
+
col_expr: Column reference, optionally wrapped in scalar function calls.
|
|
1765
|
+
schema: Schema graph containing table and column metadata.
|
|
1766
|
+
cte_outputs: Map of CTE name to column output metadata.
|
|
1767
|
+
|
|
1768
|
+
Returns:
|
|
1769
|
+
|
|
1770
|
+
A ``ColumnMetadata`` instance (real or synthetic) for the resolved column, or ``None`` if the column cannot be resolved.
|
|
1771
|
+
"""
|
|
1772
|
+
actual_col = extract_col_from_scalar_wrapper(col_expr)
|
|
1773
|
+
if "." not in actual_col:
|
|
1774
|
+
return None
|
|
1775
|
+
table_name, col_name = actual_col.rsplit(".", 1)
|
|
1776
|
+
if table_name in cte_outputs:
|
|
1777
|
+
cte_meta = cte_outputs[table_name].get(col_name) or cte_outputs[table_name].get(col_name.lower())
|
|
1778
|
+
if not cte_meta:
|
|
1779
|
+
return None
|
|
1780
|
+
return ColumnMetadata(
|
|
1781
|
+
name=col_name,
|
|
1782
|
+
data_type=cte_meta.data_type or "unknown",
|
|
1783
|
+
role=cte_meta.role,
|
|
1784
|
+
is_filterable_override=cte_meta.filterable,
|
|
1785
|
+
is_aggregatable_override=cte_meta.aggregatable,
|
|
1786
|
+
is_groupable_override=cte_meta.groupable,
|
|
1787
|
+
valid_filter_ops=list(cte_meta.valid_filter_ops or []),
|
|
1788
|
+
valid_aggregations=list(cte_meta.valid_aggregations or []),
|
|
1789
|
+
valid_having_ops=list(cte_meta.valid_having_ops or []),
|
|
1790
|
+
)
|
|
1791
|
+
if table_name not in schema.tables:
|
|
1792
|
+
return None
|
|
1793
|
+
table_meta = schema.tables[table_name]
|
|
1794
|
+
return table_meta.columns.get(col_name) or table_meta.columns.get(col_name.lower())
|
|
1795
|
+
|
|
1796
|
+
|
|
1797
|
+
def is_col_numeric(
|
|
1798
|
+
col_ref: str,
|
|
1799
|
+
schema: SchemaGraph,
|
|
1800
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
|
|
1801
|
+
) -> bool | None:
|
|
1802
|
+
"""Return whether a column's value type is numeric.
|
|
1803
|
+
|
|
1804
|
+
Args:
|
|
1805
|
+
|
|
1806
|
+
col_ref: Fully-qualified column reference (``table.column``).
|
|
1807
|
+
schema: Schema graph containing column type information.
|
|
1808
|
+
cte_outputs: Map of CTE name to column output metadata.
|
|
1809
|
+
|
|
1810
|
+
Returns:
|
|
1811
|
+
|
|
1812
|
+
``True`` if the column type is numeric, ``False`` if it is not, or ``None`` if the column cannot be resolved.
|
|
1813
|
+
"""
|
|
1814
|
+
col_type = get_col_type(col_ref, schema, cte_outputs)
|
|
1815
|
+
if col_type is None:
|
|
1816
|
+
return None
|
|
1817
|
+
return col_type in ("integer", "number")
|
|
1818
|
+
|
|
1819
|
+
|
|
1820
|
+
def is_col_arithmetic_role(
|
|
1821
|
+
col_ref: str,
|
|
1822
|
+
schema: SchemaGraph,
|
|
1823
|
+
cte_outputs: dict[str, dict[str, CteOutputColumnMeta]],
|
|
1824
|
+
) -> bool | None:
|
|
1825
|
+
"""Return whether a column's role permits its use in arithmetic expressions.
|
|
1826
|
+
|
|
1827
|
+
Args:
|
|
1828
|
+
|
|
1829
|
+
col_ref: Fully-qualified column reference (``table.column``).
|
|
1830
|
+
schema: Schema graph containing column role information.
|
|
1831
|
+
cte_outputs: Map of CTE name to column output metadata.
|
|
1832
|
+
|
|
1833
|
+
Returns:
|
|
1834
|
+
|
|
1835
|
+
``True`` if the column role is ``NUMERIC_MEASURE`` or ``NUMERIC_CATEGORICAL``; ``None`` if the column cannot be resolved.
|
|
1836
|
+
"""
|
|
1837
|
+
meta = get_col_meta(col_ref, schema, cte_outputs)
|
|
1838
|
+
if meta and meta.role:
|
|
1839
|
+
return meta.role in ARITHMETIC_ROLES
|
|
1840
|
+
actual_col = extract_col_from_scalar_wrapper(col_ref)
|
|
1841
|
+
if "." in actual_col:
|
|
1842
|
+
table_name, col_name = actual_col.rsplit(".", 1)
|
|
1843
|
+
if table_name in cte_outputs:
|
|
1844
|
+
cte_meta = cte_outputs[table_name].get(col_name) or cte_outputs[table_name].get(col_name.lower())
|
|
1845
|
+
if cte_meta and cte_meta.role:
|
|
1846
|
+
return cte_meta.role in ARITHMETIC_ROLES
|
|
1847
|
+
return None
|