prismiq 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prismiq/__init__.py +543 -0
- prismiq/api.py +1889 -0
- prismiq/auth.py +108 -0
- prismiq/cache.py +527 -0
- prismiq/calculated_field_processor.py +231 -0
- prismiq/calculated_fields.py +819 -0
- prismiq/dashboard_store.py +1219 -0
- prismiq/dashboards.py +374 -0
- prismiq/dates.py +247 -0
- prismiq/engine.py +1315 -0
- prismiq/executor.py +345 -0
- prismiq/filter_merge.py +397 -0
- prismiq/formatting.py +298 -0
- prismiq/logging.py +489 -0
- prismiq/metrics.py +536 -0
- prismiq/middleware.py +346 -0
- prismiq/permissions.py +87 -0
- prismiq/persistence/__init__.py +45 -0
- prismiq/persistence/models.py +208 -0
- prismiq/persistence/postgres_store.py +1119 -0
- prismiq/persistence/saved_query_store.py +336 -0
- prismiq/persistence/schema.sql +95 -0
- prismiq/persistence/setup.py +222 -0
- prismiq/persistence/tables.py +76 -0
- prismiq/pins.py +72 -0
- prismiq/py.typed +0 -0
- prismiq/query.py +1233 -0
- prismiq/schema.py +333 -0
- prismiq/schema_config.py +354 -0
- prismiq/sql_utils.py +147 -0
- prismiq/sql_validator.py +219 -0
- prismiq/sqlalchemy_builder.py +577 -0
- prismiq/timeseries.py +410 -0
- prismiq/transforms.py +471 -0
- prismiq/trends.py +573 -0
- prismiq/types.py +688 -0
- prismiq-0.1.0.dist-info/METADATA +109 -0
- prismiq-0.1.0.dist-info/RECORD +39 -0
- prismiq-0.1.0.dist-info/WHEEL +4 -0
prismiq/query.py
ADDED
|
@@ -0,0 +1,1233 @@
|
|
|
1
|
+
"""Query builder for converting QueryDefinition to parameterized SQL.
|
|
2
|
+
|
|
3
|
+
This module provides the QueryBuilder class that generates safe,
|
|
4
|
+
parameterized SQL queries from QueryDefinition objects.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from datetime import date, datetime
|
|
10
|
+
from difflib import get_close_matches
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, ConfigDict
|
|
14
|
+
|
|
15
|
+
from prismiq.calculated_fields import ExpressionParser
|
|
16
|
+
from prismiq.types import (
|
|
17
|
+
AggregationType,
|
|
18
|
+
ColumnSelection,
|
|
19
|
+
DatabaseSchema,
|
|
20
|
+
FilterDefinition,
|
|
21
|
+
FilterOperator,
|
|
22
|
+
JoinType,
|
|
23
|
+
QueryDefinition,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# ============================================================================
|
|
27
|
+
# Validation Models
|
|
28
|
+
# ============================================================================
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ValidationError(BaseModel):
|
|
32
|
+
"""Detailed validation error."""
|
|
33
|
+
|
|
34
|
+
model_config = ConfigDict(strict=True)
|
|
35
|
+
|
|
36
|
+
code: str
|
|
37
|
+
"""Machine-readable error code."""
|
|
38
|
+
|
|
39
|
+
message: str
|
|
40
|
+
"""User-friendly error message."""
|
|
41
|
+
|
|
42
|
+
field: str | None = None
|
|
43
|
+
"""Path to the problematic field (e.g., 'tables[0].name')."""
|
|
44
|
+
|
|
45
|
+
suggestion: str | None = None
|
|
46
|
+
"""Suggested fix."""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ValidationResult(BaseModel):
|
|
50
|
+
"""Complete validation result."""
|
|
51
|
+
|
|
52
|
+
model_config = ConfigDict(strict=True)
|
|
53
|
+
|
|
54
|
+
valid: bool
|
|
55
|
+
"""Whether the query is valid."""
|
|
56
|
+
|
|
57
|
+
errors: list[ValidationError]
|
|
58
|
+
"""List of validation errors (empty if valid)."""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# Error codes
|
|
62
|
+
ERROR_TABLE_NOT_FOUND = "TABLE_NOT_FOUND"
|
|
63
|
+
ERROR_COLUMN_NOT_FOUND = "COLUMN_NOT_FOUND"
|
|
64
|
+
ERROR_INVALID_JOIN = "INVALID_JOIN"
|
|
65
|
+
ERROR_TYPE_MISMATCH = "TYPE_MISMATCH"
|
|
66
|
+
ERROR_INVALID_AGGREGATION = "INVALID_AGGREGATION"
|
|
67
|
+
ERROR_EMPTY_QUERY = "EMPTY_QUERY"
|
|
68
|
+
ERROR_CIRCULAR_JOIN = "CIRCULAR_JOIN"
|
|
69
|
+
ERROR_AMBIGUOUS_COLUMN = "AMBIGUOUS_COLUMN"
|
|
70
|
+
ERROR_INVALID_TIME_SERIES = "INVALID_TIME_SERIES"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class QueryBuilder:
|
|
74
|
+
"""Builds parameterized SQL queries from QueryDefinition objects.
|
|
75
|
+
|
|
76
|
+
Uses the database schema to validate table and column references,
|
|
77
|
+
and generates SQL with proper identifier quoting for safety.
|
|
78
|
+
|
|
79
|
+
Example:
|
|
80
|
+
>>> builder = QueryBuilder(schema)
|
|
81
|
+
>>> sql, params = builder.build(query_definition)
|
|
82
|
+
>>> # sql: 'SELECT "users"."email" FROM "users" WHERE "users"."id" = $1'
|
|
83
|
+
>>> # params: [42]
|
|
84
|
+
|
|
85
|
+
With schema qualification:
|
|
86
|
+
>>> builder = QueryBuilder(schema, schema_name="org_123")
|
|
87
|
+
>>> sql, params = builder.build(query_definition)
|
|
88
|
+
>>> # sql: 'SELECT "org_123"."users"."email" FROM "org_123"."users" ...'
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(
|
|
92
|
+
self,
|
|
93
|
+
schema: DatabaseSchema,
|
|
94
|
+
schema_name: str | None = None,
|
|
95
|
+
) -> None:
|
|
96
|
+
"""Initialize the query builder.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
schema: Database schema for validation.
|
|
100
|
+
schema_name: PostgreSQL schema name for schema-qualified table references.
|
|
101
|
+
If None, tables are referenced without schema prefix (uses search_path).
|
|
102
|
+
"""
|
|
103
|
+
self._schema = schema
|
|
104
|
+
self._schema_name = schema_name
|
|
105
|
+
|
|
106
|
+
def validate(self, query: QueryDefinition) -> list[str]:
|
|
107
|
+
"""Validate a query definition against the schema.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
query: Query definition to validate.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List of validation error messages (empty if valid).
|
|
114
|
+
|
|
115
|
+
Note:
|
|
116
|
+
This method returns simple string errors for backward compatibility.
|
|
117
|
+
Use validate_detailed() for richer error information.
|
|
118
|
+
"""
|
|
119
|
+
result = self.validate_detailed(query)
|
|
120
|
+
return [err.message for err in result.errors]
|
|
121
|
+
|
|
122
|
+
def validate_detailed(self, query: QueryDefinition) -> ValidationResult:
|
|
123
|
+
"""Validate a query definition with detailed error information.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
query: Query definition to validate.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
ValidationResult with detailed errors including suggestions.
|
|
130
|
+
"""
|
|
131
|
+
errors: list[ValidationError] = []
|
|
132
|
+
|
|
133
|
+
# Build table_id -> table_name mapping
|
|
134
|
+
table_map: dict[str, str] = {}
|
|
135
|
+
for qt in query.tables:
|
|
136
|
+
table_map[qt.id] = qt.name
|
|
137
|
+
|
|
138
|
+
# Get all available table names for suggestions
|
|
139
|
+
available_tables = self._schema.table_names()
|
|
140
|
+
|
|
141
|
+
# Validate tables exist in schema
|
|
142
|
+
for i, qt in enumerate(query.tables):
|
|
143
|
+
if not self._schema.has_table(qt.name):
|
|
144
|
+
suggestion = self._suggest_similar(qt.name, available_tables)
|
|
145
|
+
errors.append(
|
|
146
|
+
ValidationError(
|
|
147
|
+
code=ERROR_TABLE_NOT_FOUND,
|
|
148
|
+
message=f"Table '{qt.name}' not found in schema",
|
|
149
|
+
field=f"tables[{i}].name",
|
|
150
|
+
suggestion=suggestion,
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Build set of calculated field names for reference checking
|
|
155
|
+
calculated_field_names = {cf.name for cf in query.calculated_fields}
|
|
156
|
+
|
|
157
|
+
# Validate columns exist in tables
|
|
158
|
+
for i, col in enumerate(query.columns):
|
|
159
|
+
table_name = table_map.get(col.table_id)
|
|
160
|
+
if table_name:
|
|
161
|
+
table = self._schema.get_table(table_name)
|
|
162
|
+
if table:
|
|
163
|
+
# Allow "*" for COUNT(*) - this is a valid SQL pattern
|
|
164
|
+
if col.column == "*" and col.aggregation == AggregationType.COUNT:
|
|
165
|
+
continue # Skip further validation for COUNT(*)
|
|
166
|
+
|
|
167
|
+
# Allow references to calculated fields - they're defined in calculated_fields
|
|
168
|
+
if col.column in calculated_field_names:
|
|
169
|
+
continue # Skip further validation for calculated field references
|
|
170
|
+
|
|
171
|
+
if not table.has_column(col.column):
|
|
172
|
+
available_columns = [c.name for c in table.columns]
|
|
173
|
+
suggestion = self._suggest_similar(col.column, available_columns)
|
|
174
|
+
errors.append(
|
|
175
|
+
ValidationError(
|
|
176
|
+
code=ERROR_COLUMN_NOT_FOUND,
|
|
177
|
+
message=f"Column '{col.column}' not found in table '{table_name}'",
|
|
178
|
+
field=f"columns[{i}].column",
|
|
179
|
+
suggestion=suggestion,
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
else:
|
|
183
|
+
# Validate aggregation is valid for column type
|
|
184
|
+
if col.aggregation != AggregationType.NONE:
|
|
185
|
+
column_schema = table.get_column(col.column)
|
|
186
|
+
if column_schema:
|
|
187
|
+
agg_error = self._validate_aggregation(
|
|
188
|
+
col.aggregation, column_schema.data_type, col.column
|
|
189
|
+
)
|
|
190
|
+
if agg_error:
|
|
191
|
+
errors.append(
|
|
192
|
+
ValidationError(
|
|
193
|
+
code=ERROR_INVALID_AGGREGATION,
|
|
194
|
+
message=agg_error,
|
|
195
|
+
field=f"columns[{i}].aggregation",
|
|
196
|
+
suggestion=self._suggest_aggregation(
|
|
197
|
+
column_schema.data_type
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Validate join columns
|
|
203
|
+
for i, join in enumerate(query.joins):
|
|
204
|
+
# From column
|
|
205
|
+
from_table_name = table_map.get(join.from_table_id)
|
|
206
|
+
if from_table_name:
|
|
207
|
+
from_table = self._schema.get_table(from_table_name)
|
|
208
|
+
if from_table and not from_table.has_column(join.from_column):
|
|
209
|
+
available_columns = [c.name for c in from_table.columns]
|
|
210
|
+
suggestion = self._suggest_similar(join.from_column, available_columns)
|
|
211
|
+
errors.append(
|
|
212
|
+
ValidationError(
|
|
213
|
+
code=ERROR_INVALID_JOIN,
|
|
214
|
+
message=f"Join column '{join.from_column}' not found in table '{from_table_name}'",
|
|
215
|
+
field=f"joins[{i}].from_column",
|
|
216
|
+
suggestion=suggestion,
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# To column
|
|
221
|
+
to_table_name = table_map.get(join.to_table_id)
|
|
222
|
+
if to_table_name:
|
|
223
|
+
to_table = self._schema.get_table(to_table_name)
|
|
224
|
+
if to_table and not to_table.has_column(join.to_column):
|
|
225
|
+
available_columns = [c.name for c in to_table.columns]
|
|
226
|
+
suggestion = self._suggest_similar(join.to_column, available_columns)
|
|
227
|
+
errors.append(
|
|
228
|
+
ValidationError(
|
|
229
|
+
code=ERROR_INVALID_JOIN,
|
|
230
|
+
message=f"Join column '{join.to_column}' not found in table '{to_table_name}'",
|
|
231
|
+
field=f"joins[{i}].to_column",
|
|
232
|
+
suggestion=suggestion,
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Validate filter columns
|
|
237
|
+
for i, f in enumerate(query.filters):
|
|
238
|
+
table_name = table_map.get(f.table_id)
|
|
239
|
+
if table_name:
|
|
240
|
+
table = self._schema.get_table(table_name)
|
|
241
|
+
if table:
|
|
242
|
+
# Allow references to calculated fields - they're defined in calculated_fields
|
|
243
|
+
if f.column in calculated_field_names:
|
|
244
|
+
continue # Skip further validation for calculated field references
|
|
245
|
+
|
|
246
|
+
if not table.has_column(f.column):
|
|
247
|
+
available_columns = [c.name for c in table.columns]
|
|
248
|
+
suggestion = self._suggest_similar(f.column, available_columns)
|
|
249
|
+
errors.append(
|
|
250
|
+
ValidationError(
|
|
251
|
+
code=ERROR_COLUMN_NOT_FOUND,
|
|
252
|
+
message=f"Filter column '{f.column}' not found in table '{table_name}'",
|
|
253
|
+
field=f"filters[{i}].column",
|
|
254
|
+
suggestion=suggestion,
|
|
255
|
+
)
|
|
256
|
+
)
|
|
257
|
+
else:
|
|
258
|
+
# Validate filter value type matches column type
|
|
259
|
+
column_schema = table.get_column(f.column)
|
|
260
|
+
if column_schema and f.value is not None:
|
|
261
|
+
type_error = self._validate_filter_type(
|
|
262
|
+
f.operator, f.value, column_schema.data_type, f.column
|
|
263
|
+
)
|
|
264
|
+
if type_error:
|
|
265
|
+
errors.append(
|
|
266
|
+
ValidationError(
|
|
267
|
+
code=ERROR_TYPE_MISMATCH,
|
|
268
|
+
message=type_error,
|
|
269
|
+
field=f"filters[{i}].value",
|
|
270
|
+
suggestion=None,
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Validate order by columns
|
|
275
|
+
for i, o in enumerate(query.order_by):
|
|
276
|
+
# Allow references to calculated fields
|
|
277
|
+
if o.column in calculated_field_names:
|
|
278
|
+
continue
|
|
279
|
+
|
|
280
|
+
table_name = table_map.get(o.table_id)
|
|
281
|
+
if table_name:
|
|
282
|
+
table = self._schema.get_table(table_name)
|
|
283
|
+
if table and not table.has_column(o.column):
|
|
284
|
+
available_columns = [c.name for c in table.columns]
|
|
285
|
+
suggestion = self._suggest_similar(o.column, available_columns)
|
|
286
|
+
errors.append(
|
|
287
|
+
ValidationError(
|
|
288
|
+
code=ERROR_COLUMN_NOT_FOUND,
|
|
289
|
+
message=f"Order by column '{o.column}' not found in table '{table_name}'",
|
|
290
|
+
field=f"order_by[{i}].column",
|
|
291
|
+
suggestion=suggestion,
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# Validate time series configuration
|
|
296
|
+
if query.time_series:
|
|
297
|
+
ts_errors = self._validate_time_series(query, table_map)
|
|
298
|
+
errors.extend(ts_errors)
|
|
299
|
+
|
|
300
|
+
# Check for circular joins
|
|
301
|
+
circular_error = self._check_circular_joins(query)
|
|
302
|
+
if circular_error:
|
|
303
|
+
errors.append(circular_error)
|
|
304
|
+
|
|
305
|
+
return ValidationResult(valid=len(errors) == 0, errors=errors)
|
|
306
|
+
|
|
307
|
+
def _validate_time_series(
|
|
308
|
+
self, query: QueryDefinition, table_map: dict[str, str]
|
|
309
|
+
) -> list[ValidationError]:
|
|
310
|
+
"""Validate time series configuration."""
|
|
311
|
+
errors: list[ValidationError] = []
|
|
312
|
+
|
|
313
|
+
if not query.time_series:
|
|
314
|
+
return errors
|
|
315
|
+
|
|
316
|
+
ts = query.time_series
|
|
317
|
+
table_name = table_map.get(ts.table_id)
|
|
318
|
+
|
|
319
|
+
if not table_name:
|
|
320
|
+
errors.append(
|
|
321
|
+
ValidationError(
|
|
322
|
+
code=ERROR_INVALID_TIME_SERIES,
|
|
323
|
+
message=f"Time series table_id '{ts.table_id}' not found",
|
|
324
|
+
field="time_series.table_id",
|
|
325
|
+
suggestion=None,
|
|
326
|
+
)
|
|
327
|
+
)
|
|
328
|
+
return errors
|
|
329
|
+
|
|
330
|
+
table = self._schema.get_table(table_name)
|
|
331
|
+
if not table:
|
|
332
|
+
return errors
|
|
333
|
+
|
|
334
|
+
# Validate date column exists
|
|
335
|
+
if not table.has_column(ts.date_column):
|
|
336
|
+
available_columns = [c.name for c in table.columns]
|
|
337
|
+
suggestion = self._suggest_similar(ts.date_column, available_columns)
|
|
338
|
+
errors.append(
|
|
339
|
+
ValidationError(
|
|
340
|
+
code=ERROR_INVALID_TIME_SERIES,
|
|
341
|
+
message=f"Date column '{ts.date_column}' not found in table '{table_name}'",
|
|
342
|
+
field="time_series.date_column",
|
|
343
|
+
suggestion=suggestion,
|
|
344
|
+
)
|
|
345
|
+
)
|
|
346
|
+
else:
|
|
347
|
+
# Validate column is a date/timestamp type
|
|
348
|
+
column_schema = table.get_column(ts.date_column)
|
|
349
|
+
if column_schema:
|
|
350
|
+
date_types = {
|
|
351
|
+
"date",
|
|
352
|
+
"timestamp",
|
|
353
|
+
"timestamp without time zone",
|
|
354
|
+
"timestamp with time zone",
|
|
355
|
+
"timestamptz",
|
|
356
|
+
}
|
|
357
|
+
is_date_type = any(dt in column_schema.data_type.lower() for dt in date_types)
|
|
358
|
+
if not is_date_type:
|
|
359
|
+
errors.append(
|
|
360
|
+
ValidationError(
|
|
361
|
+
code=ERROR_INVALID_TIME_SERIES,
|
|
362
|
+
message=f"Column '{ts.date_column}' is not a date/timestamp type (found: {column_schema.data_type})",
|
|
363
|
+
field="time_series.date_column",
|
|
364
|
+
suggestion="Use a column with date, timestamp, or timestamptz type",
|
|
365
|
+
)
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
return errors
|
|
369
|
+
|
|
370
|
+
def _suggest_similar(
|
|
371
|
+
self, name: str, candidates: list[str], max_suggestions: int = 3
|
|
372
|
+
) -> str | None:
|
|
373
|
+
"""Find similar names for suggestions."""
|
|
374
|
+
matches = get_close_matches(
|
|
375
|
+
name.lower(), [c.lower() for c in candidates], n=max_suggestions, cutoff=0.6
|
|
376
|
+
)
|
|
377
|
+
if matches:
|
|
378
|
+
# Map back to original case
|
|
379
|
+
original_matches = []
|
|
380
|
+
for match in matches:
|
|
381
|
+
for candidate in candidates:
|
|
382
|
+
if candidate.lower() == match:
|
|
383
|
+
original_matches.append(candidate)
|
|
384
|
+
break
|
|
385
|
+
if len(original_matches) == 1:
|
|
386
|
+
return f"Did you mean '{original_matches[0]}'?"
|
|
387
|
+
elif len(original_matches) > 1:
|
|
388
|
+
return f"Did you mean one of: {', '.join(repr(m) for m in original_matches)}?"
|
|
389
|
+
return None
|
|
390
|
+
|
|
391
|
+
def _validate_aggregation(
|
|
392
|
+
self, agg: AggregationType, data_type: str, column_name: str
|
|
393
|
+
) -> str | None:
|
|
394
|
+
"""Validate that an aggregation is valid for a data type."""
|
|
395
|
+
# Numeric aggregations
|
|
396
|
+
numeric_aggs = {AggregationType.SUM, AggregationType.AVG}
|
|
397
|
+
numeric_types = {
|
|
398
|
+
"integer",
|
|
399
|
+
"bigint",
|
|
400
|
+
"smallint",
|
|
401
|
+
"numeric",
|
|
402
|
+
"decimal",
|
|
403
|
+
"real",
|
|
404
|
+
"double precision",
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
if agg in numeric_aggs:
|
|
408
|
+
# Check if type is numeric-ish
|
|
409
|
+
data_type_lower = data_type.lower()
|
|
410
|
+
is_numeric = any(nt in data_type_lower for nt in numeric_types)
|
|
411
|
+
if not is_numeric:
|
|
412
|
+
return f"Aggregation '{agg.value}' is not valid for column '{column_name}' of type '{data_type}'"
|
|
413
|
+
|
|
414
|
+
return None
|
|
415
|
+
|
|
416
|
+
def _suggest_aggregation(self, data_type: str) -> str | None:
|
|
417
|
+
"""Suggest valid aggregations for a data type."""
|
|
418
|
+
data_type_lower = data_type.lower()
|
|
419
|
+
numeric_types = {
|
|
420
|
+
"integer",
|
|
421
|
+
"bigint",
|
|
422
|
+
"smallint",
|
|
423
|
+
"numeric",
|
|
424
|
+
"decimal",
|
|
425
|
+
"real",
|
|
426
|
+
"double precision",
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
is_numeric = any(nt in data_type_lower for nt in numeric_types)
|
|
430
|
+
if is_numeric:
|
|
431
|
+
return "Valid aggregations for this column: sum, avg, min, max, count"
|
|
432
|
+
else:
|
|
433
|
+
return "Valid aggregations for this column: min, max, count"
|
|
434
|
+
|
|
435
|
+
def _validate_filter_type(
|
|
436
|
+
self, operator: FilterOperator, value: Any, data_type: str, column_name: str
|
|
437
|
+
) -> str | None:
|
|
438
|
+
"""Validate that a filter value is compatible with the column type."""
|
|
439
|
+
data_type_lower = data_type.lower()
|
|
440
|
+
|
|
441
|
+
# Check for list operators - combined condition
|
|
442
|
+
if operator in (FilterOperator.IN, FilterOperator.NOT_IN) and not isinstance(value, list):
|
|
443
|
+
return f"Operator '{operator.value}' requires a list value for column '{column_name}'"
|
|
444
|
+
|
|
445
|
+
# Check for between operator - combined condition
|
|
446
|
+
if operator == FilterOperator.BETWEEN and (
|
|
447
|
+
not isinstance(value, list | tuple) or len(value) != 2
|
|
448
|
+
):
|
|
449
|
+
return f"Operator 'between' requires a list/tuple of exactly 2 values for column '{column_name}'"
|
|
450
|
+
|
|
451
|
+
# Basic numeric type checking
|
|
452
|
+
numeric_types = {
|
|
453
|
+
"integer",
|
|
454
|
+
"bigint",
|
|
455
|
+
"smallint",
|
|
456
|
+
"numeric",
|
|
457
|
+
"decimal",
|
|
458
|
+
"real",
|
|
459
|
+
"double precision",
|
|
460
|
+
}
|
|
461
|
+
is_numeric_column = any(nt in data_type_lower for nt in numeric_types)
|
|
462
|
+
|
|
463
|
+
if is_numeric_column and operator not in (
|
|
464
|
+
FilterOperator.IS_NULL,
|
|
465
|
+
FilterOperator.IS_NOT_NULL,
|
|
466
|
+
):
|
|
467
|
+
# For IN/NOT_IN/IN_OR_NULL, check list items (None allowed for IN_OR_NULL)
|
|
468
|
+
if operator in (
|
|
469
|
+
FilterOperator.IN,
|
|
470
|
+
FilterOperator.NOT_IN,
|
|
471
|
+
FilterOperator.IN_OR_NULL,
|
|
472
|
+
) and isinstance(value, list):
|
|
473
|
+
for v in value:
|
|
474
|
+
if v is not None and not isinstance(v, int | float):
|
|
475
|
+
return f"Column '{column_name}' is numeric but received non-numeric value in list"
|
|
476
|
+
elif operator == FilterOperator.BETWEEN and isinstance(value, list | tuple):
|
|
477
|
+
for v in value:
|
|
478
|
+
if not isinstance(v, int | float):
|
|
479
|
+
return f"Column '{column_name}' is numeric but received non-numeric value in range"
|
|
480
|
+
elif not isinstance(value, int | float | list | tuple):
|
|
481
|
+
return f"Column '{column_name}' is numeric but received non-numeric value"
|
|
482
|
+
|
|
483
|
+
return None
|
|
484
|
+
|
|
485
|
+
def _check_circular_joins(self, query: QueryDefinition) -> ValidationError | None:
|
|
486
|
+
"""Check for circular join references."""
|
|
487
|
+
if not query.joins:
|
|
488
|
+
return None
|
|
489
|
+
|
|
490
|
+
# Build a simple adjacency list
|
|
491
|
+
# For simplicity, we just check if any table joins to itself
|
|
492
|
+
for i, join in enumerate(query.joins):
|
|
493
|
+
if join.from_table_id == join.to_table_id:
|
|
494
|
+
return ValidationError(
|
|
495
|
+
code=ERROR_CIRCULAR_JOIN,
|
|
496
|
+
message="Join references the same table on both sides",
|
|
497
|
+
field=f"joins[{i}]",
|
|
498
|
+
suggestion="A join should connect two different tables",
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
return None
|
|
502
|
+
|
|
503
|
+
def sanitize_filters(self, query: QueryDefinition) -> QueryDefinition:
|
|
504
|
+
"""Remove filters that reference non-existent columns.
|
|
505
|
+
|
|
506
|
+
Filters referencing columns that don't exist in their target table are
|
|
507
|
+
silently removed instead of causing validation errors.
|
|
508
|
+
|
|
509
|
+
Args:
|
|
510
|
+
query: Query definition with potentially invalid filters.
|
|
511
|
+
|
|
512
|
+
Returns:
|
|
513
|
+
A new QueryDefinition with invalid filters removed.
|
|
514
|
+
"""
|
|
515
|
+
if not query.filters:
|
|
516
|
+
return query
|
|
517
|
+
|
|
518
|
+
# Build table_id -> table_name mapping
|
|
519
|
+
table_map: dict[str, str] = {}
|
|
520
|
+
for qt in query.tables:
|
|
521
|
+
table_map[qt.id] = qt.name
|
|
522
|
+
|
|
523
|
+
# Build set of calculated field names (these are always valid)
|
|
524
|
+
calculated_field_names = {cf.name for cf in (query.calculated_fields or [])}
|
|
525
|
+
|
|
526
|
+
# Filter out invalid filters
|
|
527
|
+
valid_filters = []
|
|
528
|
+
for f in query.filters:
|
|
529
|
+
table_name = table_map.get(f.table_id)
|
|
530
|
+
if not table_name:
|
|
531
|
+
# Unknown table_id - skip this filter
|
|
532
|
+
continue
|
|
533
|
+
|
|
534
|
+
table = self._schema.get_table(table_name)
|
|
535
|
+
if not table:
|
|
536
|
+
# Unknown table - skip this filter
|
|
537
|
+
continue
|
|
538
|
+
|
|
539
|
+
# Allow references to calculated fields
|
|
540
|
+
if f.column in calculated_field_names:
|
|
541
|
+
valid_filters.append(f)
|
|
542
|
+
continue
|
|
543
|
+
|
|
544
|
+
# Check if column exists in table
|
|
545
|
+
if table.has_column(f.column):
|
|
546
|
+
valid_filters.append(f)
|
|
547
|
+
# else: column doesn't exist - skip this filter silently
|
|
548
|
+
|
|
549
|
+
# Return new query with sanitized filters
|
|
550
|
+
if len(valid_filters) == len(query.filters):
|
|
551
|
+
return query # No changes needed
|
|
552
|
+
|
|
553
|
+
return query.model_copy(update={"filters": valid_filters})
|
|
554
|
+
|
|
555
|
+
def build(self, query: QueryDefinition) -> tuple[str, list[Any]]:
|
|
556
|
+
"""Build a parameterized SQL query.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
query: Query definition to build.
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
Tuple of (sql_string, parameters) where parameters use $1, $2 placeholders.
|
|
563
|
+
"""
|
|
564
|
+
params: list[Any] = []
|
|
565
|
+
|
|
566
|
+
# Build table_id -> table_name mapping for schema lookup
|
|
567
|
+
table_map: dict[str, str] = {}
|
|
568
|
+
for qt in query.tables:
|
|
569
|
+
table_map[qt.id] = qt.name
|
|
570
|
+
|
|
571
|
+
# Build table_id -> table reference mapping
|
|
572
|
+
table_refs = self._build_table_refs(query)
|
|
573
|
+
|
|
574
|
+
# Build calculated field SQL map (shared across SELECT, WHERE, ORDER BY)
|
|
575
|
+
calc_sql_map = self._build_calc_sql_map(query)
|
|
576
|
+
|
|
577
|
+
# SELECT clause - with time series support
|
|
578
|
+
select_clause = self._build_select(query, table_refs, calc_sql_map)
|
|
579
|
+
|
|
580
|
+
# FROM clause
|
|
581
|
+
from_clause = self._build_from(query, table_refs)
|
|
582
|
+
|
|
583
|
+
# WHERE clause
|
|
584
|
+
where_clause, params = self._build_where(
|
|
585
|
+
query.filters, table_refs, table_map, calc_sql_map, params
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
# GROUP BY clause - with time series support
|
|
589
|
+
group_by_clause = self._build_group_by(query, table_refs, calc_sql_map)
|
|
590
|
+
|
|
591
|
+
# ORDER BY clause - with time series support
|
|
592
|
+
order_by_clause = self._build_order_by(query, table_refs, calc_sql_map)
|
|
593
|
+
|
|
594
|
+
# LIMIT and OFFSET
|
|
595
|
+
limit_clause = ""
|
|
596
|
+
if query.limit is not None:
|
|
597
|
+
params.append(query.limit)
|
|
598
|
+
limit_clause = f" LIMIT ${len(params)}"
|
|
599
|
+
|
|
600
|
+
offset_clause = ""
|
|
601
|
+
if query.offset is not None:
|
|
602
|
+
params.append(query.offset)
|
|
603
|
+
offset_clause = f" OFFSET ${len(params)}"
|
|
604
|
+
|
|
605
|
+
# Combine all clauses
|
|
606
|
+
sql = f"SELECT {select_clause} FROM {from_clause}"
|
|
607
|
+
if where_clause:
|
|
608
|
+
sql += f" WHERE {where_clause}"
|
|
609
|
+
if group_by_clause:
|
|
610
|
+
sql += f" GROUP BY {group_by_clause}"
|
|
611
|
+
if order_by_clause:
|
|
612
|
+
sql += f" ORDER BY {order_by_clause}"
|
|
613
|
+
sql += limit_clause + offset_clause
|
|
614
|
+
|
|
615
|
+
return sql, params
|
|
616
|
+
|
|
617
|
+
def _build_calc_sql_map(self, query: QueryDefinition) -> dict[str, str]:
|
|
618
|
+
"""Build mapping from calculated field names to their SQL expressions.
|
|
619
|
+
|
|
620
|
+
Uses pre-computed sql_expression if available (recommended for inter-field
|
|
621
|
+
dependency resolution). Otherwise parses the expression on-demand.
|
|
622
|
+
|
|
623
|
+
Args:
|
|
624
|
+
query: Query definition containing calculated_fields.
|
|
625
|
+
|
|
626
|
+
Returns:
|
|
627
|
+
Dict mapping calculated field name to SQL expression.
|
|
628
|
+
"""
|
|
629
|
+
calc_sql_map: dict[str, str] = {}
|
|
630
|
+
|
|
631
|
+
# Get base table reference for qualifying unqualified column references.
|
|
632
|
+
# Prefer alias over name since FROM clause uses alias when present.
|
|
633
|
+
# This prevents "ambiguous column" errors in multi-table queries.
|
|
634
|
+
if query.tables:
|
|
635
|
+
base_table = query.tables[0]
|
|
636
|
+
base_table_ref = base_table.alias or base_table.name
|
|
637
|
+
else:
|
|
638
|
+
base_table_ref = None
|
|
639
|
+
|
|
640
|
+
for cf in query.calculated_fields:
|
|
641
|
+
# Use pre-computed SQL if available (handles inter-field dependencies).
|
|
642
|
+
# IMPORTANT: sql_expression must be pre-validated and use parameterized
|
|
643
|
+
# values. It should have all column references fully qualified with the
|
|
644
|
+
# correct table alias/name to match the FROM clause.
|
|
645
|
+
if cf.sql_expression:
|
|
646
|
+
if not cf.sql_expression.strip():
|
|
647
|
+
raise ValueError(f"Calculated field '{cf.name}' has empty sql_expression")
|
|
648
|
+
calc_sql_map[cf.name] = cf.sql_expression
|
|
649
|
+
elif cf.expression:
|
|
650
|
+
# Fall back to parsing on-demand. This is a secondary code path
|
|
651
|
+
# that won't resolve inter-field references correctly. Prefer
|
|
652
|
+
# providing sql_expression from resolve_calculated_fields().
|
|
653
|
+
try:
|
|
654
|
+
parser = ExpressionParser()
|
|
655
|
+
ast = parser.parse(cf.expression)
|
|
656
|
+
calc_sql_map[cf.name] = ast.to_sql({}, default_table_ref=base_table_ref)
|
|
657
|
+
except ValueError as e:
|
|
658
|
+
# Fail closed: raise a clear error instead of injecting raw text
|
|
659
|
+
raise ValueError(
|
|
660
|
+
f"Failed to parse calculated field '{cf.name}': {e}. "
|
|
661
|
+
f"Expression: {cf.expression!r}"
|
|
662
|
+
) from e
|
|
663
|
+
|
|
664
|
+
return calc_sql_map
|
|
665
|
+
|
|
666
|
+
def _build_table_refs(self, query: QueryDefinition) -> dict[str, str]:
|
|
667
|
+
"""Build mapping from table_id to quoted table reference."""
|
|
668
|
+
refs: dict[str, str] = {}
|
|
669
|
+
for qt in query.tables:
|
|
670
|
+
if qt.alias:
|
|
671
|
+
refs[qt.id] = self._quote_identifier(qt.alias)
|
|
672
|
+
else:
|
|
673
|
+
refs[qt.id] = self._quote_identifier(qt.name)
|
|
674
|
+
return refs
|
|
675
|
+
|
|
676
|
+
def _build_select(
|
|
677
|
+
self,
|
|
678
|
+
query: QueryDefinition,
|
|
679
|
+
table_refs: dict[str, str],
|
|
680
|
+
calc_sql_map: dict[str, str],
|
|
681
|
+
) -> str:
|
|
682
|
+
"""Build the SELECT clause, including time series bucket if
|
|
683
|
+
configured."""
|
|
684
|
+
parts: list[str] = []
|
|
685
|
+
|
|
686
|
+
# Add time series bucket column first if configured
|
|
687
|
+
if query.time_series:
|
|
688
|
+
ts = query.time_series
|
|
689
|
+
table_ref = table_refs[ts.table_id]
|
|
690
|
+
date_col = f"{table_ref}.{self._quote_identifier(ts.date_column)}"
|
|
691
|
+
date_trunc = f"date_trunc('{ts.interval}', {date_col})"
|
|
692
|
+
|
|
693
|
+
# Add alias if specified
|
|
694
|
+
alias = ts.alias or f"{ts.date_column}_bucket"
|
|
695
|
+
date_trunc = f"{date_trunc} AS {self._quote_identifier(alias)}"
|
|
696
|
+
|
|
697
|
+
parts.append(date_trunc)
|
|
698
|
+
|
|
699
|
+
# Add regular columns
|
|
700
|
+
for col in query.columns:
|
|
701
|
+
table_ref = table_refs[col.table_id]
|
|
702
|
+
|
|
703
|
+
# Handle COUNT(*) specially - don't quote the asterisk
|
|
704
|
+
if col.column == "*" and col.aggregation == AggregationType.COUNT:
|
|
705
|
+
col_ref = "COUNT(*)"
|
|
706
|
+
# Handle column with inline sql_expression (e.g., calculated field)
|
|
707
|
+
elif col.sql_expression:
|
|
708
|
+
col_ref = f"({col.sql_expression})"
|
|
709
|
+
|
|
710
|
+
# Apply aggregation if specified
|
|
711
|
+
if col.aggregation != AggregationType.NONE:
|
|
712
|
+
col_ref = self._apply_aggregation(col_ref, col.aggregation)
|
|
713
|
+
# Handle calculated field references - expand to SQL expression
|
|
714
|
+
elif col.column in calc_sql_map:
|
|
715
|
+
# Use the converted SQL expression
|
|
716
|
+
col_ref = f"({calc_sql_map[col.column]})"
|
|
717
|
+
|
|
718
|
+
# Apply aggregation if specified
|
|
719
|
+
if col.aggregation != AggregationType.NONE:
|
|
720
|
+
col_ref = self._apply_aggregation(col_ref, col.aggregation)
|
|
721
|
+
else:
|
|
722
|
+
col_ref = f"{table_ref}.{self._quote_identifier(col.column)}"
|
|
723
|
+
|
|
724
|
+
# Apply date_trunc if specified (for date columns)
|
|
725
|
+
if col.date_trunc:
|
|
726
|
+
col_ref = f"date_trunc('{col.date_trunc}', {col_ref})"
|
|
727
|
+
|
|
728
|
+
# Apply aggregation if specified
|
|
729
|
+
if col.aggregation != AggregationType.NONE:
|
|
730
|
+
col_ref = self._apply_aggregation(col_ref, col.aggregation)
|
|
731
|
+
|
|
732
|
+
# Apply alias if specified
|
|
733
|
+
if col.alias:
|
|
734
|
+
col_ref = f"{col_ref} AS {self._quote_identifier(col.alias)}"
|
|
735
|
+
|
|
736
|
+
parts.append(col_ref)
|
|
737
|
+
|
|
738
|
+
return ", ".join(parts)
|
|
739
|
+
|
|
740
|
+
def _apply_aggregation(self, col_ref: str, agg: AggregationType) -> str:
|
|
741
|
+
"""Apply aggregation function to column reference."""
|
|
742
|
+
agg_map = {
|
|
743
|
+
AggregationType.SUM: "SUM",
|
|
744
|
+
AggregationType.AVG: "AVG",
|
|
745
|
+
AggregationType.COUNT: "COUNT",
|
|
746
|
+
AggregationType.COUNT_DISTINCT: "COUNT_DISTINCT",
|
|
747
|
+
AggregationType.MIN: "MIN",
|
|
748
|
+
AggregationType.MAX: "MAX",
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
if agg == AggregationType.COUNT_DISTINCT:
|
|
752
|
+
return f"COUNT(DISTINCT {col_ref})"
|
|
753
|
+
|
|
754
|
+
func = agg_map.get(agg, "")
|
|
755
|
+
if func:
|
|
756
|
+
return f"{func}({col_ref})"
|
|
757
|
+
|
|
758
|
+
return col_ref
|
|
759
|
+
|
|
760
|
+
def _build_from(self, query: QueryDefinition, table_refs: dict[str, str]) -> str:
|
|
761
|
+
"""Build the FROM clause including JOINs.
|
|
762
|
+
|
|
763
|
+
Uses schema-qualified table names if schema_name is set.
|
|
764
|
+
"""
|
|
765
|
+
if not query.tables:
|
|
766
|
+
return ""
|
|
767
|
+
|
|
768
|
+
# Track which tables are already in the FROM clause
|
|
769
|
+
tables_in_from: set[str] = set()
|
|
770
|
+
|
|
771
|
+
# First table
|
|
772
|
+
first_table = query.tables[0]
|
|
773
|
+
sql = self._quote_table(first_table.name)
|
|
774
|
+
if first_table.alias:
|
|
775
|
+
sql += f" AS {self._quote_identifier(first_table.alias)}"
|
|
776
|
+
tables_in_from.add(first_table.id)
|
|
777
|
+
|
|
778
|
+
# Add JOINs
|
|
779
|
+
for join in query.joins:
|
|
780
|
+
# Find the table being joined (to_table)
|
|
781
|
+
to_table = query.get_table_by_id(join.to_table_id)
|
|
782
|
+
if to_table is None:
|
|
783
|
+
continue
|
|
784
|
+
|
|
785
|
+
join_type = self._join_type_sql(join.join_type)
|
|
786
|
+
from_ref = table_refs[join.from_table_id]
|
|
787
|
+
to_ref = table_refs[join.to_table_id]
|
|
788
|
+
|
|
789
|
+
table_sql = self._quote_table(to_table.name)
|
|
790
|
+
if to_table.alias:
|
|
791
|
+
table_sql += f" AS {self._quote_identifier(to_table.alias)}"
|
|
792
|
+
|
|
793
|
+
sql += (
|
|
794
|
+
f" {join_type} JOIN {table_sql} ON "
|
|
795
|
+
f"{from_ref}.{self._quote_identifier(join.from_column)} = "
|
|
796
|
+
f"{to_ref}.{self._quote_identifier(join.to_column)}"
|
|
797
|
+
)
|
|
798
|
+
tables_in_from.add(join.to_table_id)
|
|
799
|
+
|
|
800
|
+
# Add any remaining tables that aren't joined (creates implicit cross join)
|
|
801
|
+
# This handles cases where columns are selected from multiple tables without explicit joins
|
|
802
|
+
for qt in query.tables[1:]:
|
|
803
|
+
if qt.id not in tables_in_from:
|
|
804
|
+
table_sql = self._quote_table(qt.name)
|
|
805
|
+
if qt.alias:
|
|
806
|
+
table_sql += f" AS {self._quote_identifier(qt.alias)}"
|
|
807
|
+
sql += f", {table_sql}"
|
|
808
|
+
tables_in_from.add(qt.id)
|
|
809
|
+
|
|
810
|
+
return sql
|
|
811
|
+
|
|
812
|
+
def _join_type_sql(self, join_type: JoinType) -> str:
|
|
813
|
+
"""Convert JoinType enum to SQL keyword."""
|
|
814
|
+
return {
|
|
815
|
+
JoinType.INNER: "INNER",
|
|
816
|
+
JoinType.LEFT: "LEFT",
|
|
817
|
+
JoinType.RIGHT: "RIGHT",
|
|
818
|
+
JoinType.FULL: "FULL",
|
|
819
|
+
}.get(join_type, "INNER")
|
|
820
|
+
|
|
821
|
+
def _build_where(
|
|
822
|
+
self,
|
|
823
|
+
filters: list[FilterDefinition],
|
|
824
|
+
table_refs: dict[str, str],
|
|
825
|
+
table_map: dict[str, str],
|
|
826
|
+
calc_sql_map: dict[str, str],
|
|
827
|
+
params: list[Any],
|
|
828
|
+
) -> tuple[str, list[Any]]:
|
|
829
|
+
"""Build the WHERE clause."""
|
|
830
|
+
if not filters:
|
|
831
|
+
return "", params
|
|
832
|
+
|
|
833
|
+
conditions: list[str] = []
|
|
834
|
+
for f in filters:
|
|
835
|
+
# Handle filter with inline sql_expression (e.g., calculated field)
|
|
836
|
+
if f.sql_expression:
|
|
837
|
+
col_ref = f"({f.sql_expression})"
|
|
838
|
+
# No type coercion for calculated fields (type not known from schema)
|
|
839
|
+
data_type = None
|
|
840
|
+
# Handle calculated field references - expand to SQL expression
|
|
841
|
+
elif f.column in calc_sql_map:
|
|
842
|
+
col_ref = f"({calc_sql_map[f.column]})"
|
|
843
|
+
# No type coercion for calculated fields (type not known from schema)
|
|
844
|
+
data_type = None
|
|
845
|
+
else:
|
|
846
|
+
table_ref = table_refs[f.table_id]
|
|
847
|
+
col_ref = f"{table_ref}.{self._quote_identifier(f.column)}"
|
|
848
|
+
|
|
849
|
+
# Get column data type for value coercion
|
|
850
|
+
table_name = table_map.get(f.table_id)
|
|
851
|
+
data_type = None
|
|
852
|
+
if table_name:
|
|
853
|
+
table = self._schema.get_table(table_name)
|
|
854
|
+
if table:
|
|
855
|
+
column = table.get_column(f.column)
|
|
856
|
+
if column:
|
|
857
|
+
data_type = column.data_type
|
|
858
|
+
|
|
859
|
+
condition, params = self._build_condition(col_ref, f, data_type, params)
|
|
860
|
+
conditions.append(condition)
|
|
861
|
+
|
|
862
|
+
return " AND ".join(conditions), params
|
|
863
|
+
|
|
864
|
+
def _build_condition(
|
|
865
|
+
self,
|
|
866
|
+
col_ref: str,
|
|
867
|
+
f: FilterDefinition,
|
|
868
|
+
data_type: str | None,
|
|
869
|
+
params: list[Any],
|
|
870
|
+
) -> tuple[str, list[Any]]:
|
|
871
|
+
"""Build a single filter condition."""
|
|
872
|
+
op = f.operator
|
|
873
|
+
|
|
874
|
+
# Coerce the filter value to the appropriate Python type
|
|
875
|
+
coerced_value = self._coerce_value(f.value, data_type)
|
|
876
|
+
|
|
877
|
+
if op == FilterOperator.EQ:
|
|
878
|
+
if coerced_value is None:
|
|
879
|
+
return f"{col_ref} IS NULL", params
|
|
880
|
+
params.append(coerced_value)
|
|
881
|
+
return f"{col_ref} = ${len(params)}", params
|
|
882
|
+
|
|
883
|
+
if op == FilterOperator.NEQ:
|
|
884
|
+
if coerced_value is None:
|
|
885
|
+
return f"{col_ref} IS NOT NULL", params
|
|
886
|
+
params.append(coerced_value)
|
|
887
|
+
return f"{col_ref} <> ${len(params)}", params
|
|
888
|
+
|
|
889
|
+
if op == FilterOperator.GT:
|
|
890
|
+
params.append(coerced_value)
|
|
891
|
+
return f"{col_ref} > ${len(params)}", params
|
|
892
|
+
|
|
893
|
+
if op == FilterOperator.GTE:
|
|
894
|
+
params.append(coerced_value)
|
|
895
|
+
return f"{col_ref} >= ${len(params)}", params
|
|
896
|
+
|
|
897
|
+
if op == FilterOperator.LT:
|
|
898
|
+
params.append(coerced_value)
|
|
899
|
+
return f"{col_ref} < ${len(params)}", params
|
|
900
|
+
|
|
901
|
+
if op == FilterOperator.LTE:
|
|
902
|
+
params.append(coerced_value)
|
|
903
|
+
return f"{col_ref} <= ${len(params)}", params
|
|
904
|
+
|
|
905
|
+
if op == FilterOperator.IN:
|
|
906
|
+
if isinstance(coerced_value, list):
|
|
907
|
+
if not coerced_value:
|
|
908
|
+
return "FALSE", params
|
|
909
|
+
placeholders: list[str] = []
|
|
910
|
+
for v in coerced_value:
|
|
911
|
+
params.append(v)
|
|
912
|
+
placeholders.append(f"${len(params)}")
|
|
913
|
+
return f"{col_ref} IN ({', '.join(placeholders)})", params
|
|
914
|
+
params.append(coerced_value)
|
|
915
|
+
return f"{col_ref} IN (${len(params)})", params
|
|
916
|
+
|
|
917
|
+
if op == FilterOperator.NOT_IN:
|
|
918
|
+
if isinstance(coerced_value, list):
|
|
919
|
+
if not coerced_value:
|
|
920
|
+
return "TRUE", params
|
|
921
|
+
placeholders = []
|
|
922
|
+
for v in coerced_value:
|
|
923
|
+
params.append(v)
|
|
924
|
+
placeholders.append(f"${len(params)}")
|
|
925
|
+
return f"{col_ref} NOT IN ({', '.join(placeholders)})", params
|
|
926
|
+
params.append(coerced_value)
|
|
927
|
+
return f"{col_ref} NOT IN (${len(params)})", params
|
|
928
|
+
|
|
929
|
+
if op == FilterOperator.IN_OR_NULL:
|
|
930
|
+
# Handle mixed selection of concrete values AND NULL
|
|
931
|
+
# Generates: (col IN (...) OR col IS NULL)
|
|
932
|
+
if isinstance(coerced_value, list):
|
|
933
|
+
# Filter out None values - they're handled by the IS NULL clause
|
|
934
|
+
concrete_values = [v for v in coerced_value if v is not None]
|
|
935
|
+
if not concrete_values:
|
|
936
|
+
# No concrete values (empty list or list of only None values)
|
|
937
|
+
return f"{col_ref} IS NULL", params
|
|
938
|
+
placeholders = []
|
|
939
|
+
for v in concrete_values:
|
|
940
|
+
params.append(v)
|
|
941
|
+
placeholders.append(f"${len(params)}")
|
|
942
|
+
return (
|
|
943
|
+
f"({col_ref} IN ({', '.join(placeholders)}) OR {col_ref} IS NULL)",
|
|
944
|
+
params,
|
|
945
|
+
)
|
|
946
|
+
# Single non-list value
|
|
947
|
+
if coerced_value is None:
|
|
948
|
+
# Single None value - just IS NULL
|
|
949
|
+
return f"{col_ref} IS NULL", params
|
|
950
|
+
params.append(coerced_value)
|
|
951
|
+
return f"({col_ref} IN (${len(params)}) OR {col_ref} IS NULL)", params
|
|
952
|
+
|
|
953
|
+
if op == FilterOperator.LIKE:
|
|
954
|
+
params.append(coerced_value)
|
|
955
|
+
return f"{col_ref} LIKE ${len(params)}", params
|
|
956
|
+
|
|
957
|
+
if op == FilterOperator.ILIKE:
|
|
958
|
+
params.append(coerced_value)
|
|
959
|
+
return f"{col_ref} ILIKE ${len(params)}", params
|
|
960
|
+
|
|
961
|
+
if op == FilterOperator.NOT_LIKE:
|
|
962
|
+
params.append(coerced_value)
|
|
963
|
+
return f"{col_ref} NOT LIKE ${len(params)}", params
|
|
964
|
+
|
|
965
|
+
if op == FilterOperator.NOT_ILIKE:
|
|
966
|
+
params.append(coerced_value)
|
|
967
|
+
return f"{col_ref} NOT ILIKE ${len(params)}", params
|
|
968
|
+
|
|
969
|
+
if op == FilterOperator.BETWEEN:
|
|
970
|
+
if isinstance(coerced_value, list | tuple) and len(coerced_value) == 2:
|
|
971
|
+
params.append(coerced_value[0])
|
|
972
|
+
p1 = len(params)
|
|
973
|
+
params.append(coerced_value[1])
|
|
974
|
+
p2 = len(params)
|
|
975
|
+
return f"{col_ref} BETWEEN ${p1} AND ${p2}", params
|
|
976
|
+
# Invalid BETWEEN value - raise error instead of silent fallback
|
|
977
|
+
value_desc = (
|
|
978
|
+
f"{len(coerced_value)} values"
|
|
979
|
+
if isinstance(coerced_value, list | tuple)
|
|
980
|
+
else type(coerced_value).__name__
|
|
981
|
+
)
|
|
982
|
+
raise ValueError(
|
|
983
|
+
f"BETWEEN filter on column '{f.column}' requires exactly 2 values, got {value_desc}"
|
|
984
|
+
)
|
|
985
|
+
|
|
986
|
+
if op == FilterOperator.IS_NULL:
|
|
987
|
+
return f"{col_ref} IS NULL", params
|
|
988
|
+
|
|
989
|
+
if op == FilterOperator.IS_NOT_NULL:
|
|
990
|
+
return f"{col_ref} IS NOT NULL", params
|
|
991
|
+
|
|
992
|
+
if op == FilterOperator.IN_SUBQUERY:
|
|
993
|
+
# For subquery filters (used in RLS filtering).
|
|
994
|
+
# SECURITY: The SQL in value["sql"] is interpolated directly without
|
|
995
|
+
# parameterization. Callers MUST ensure the SQL is safely generated
|
|
996
|
+
# (e.g., from trusted internal code, not user input). This is by design
|
|
997
|
+
# since subqueries cannot be parameterized.
|
|
998
|
+
if not isinstance(f.value, dict):
|
|
999
|
+
raise ValueError(
|
|
1000
|
+
f"IN_SUBQUERY filter on column '{f.column}' requires "
|
|
1001
|
+
f"value={{'sql': '...'}}, got {type(f.value).__name__}"
|
|
1002
|
+
)
|
|
1003
|
+
if "sql" not in f.value:
|
|
1004
|
+
raise ValueError(
|
|
1005
|
+
f"IN_SUBQUERY filter on column '{f.column}' requires "
|
|
1006
|
+
f"value={{'sql': '...'}}, missing 'sql' key"
|
|
1007
|
+
)
|
|
1008
|
+
subquery_sql = f.value["sql"].strip()
|
|
1009
|
+
if not subquery_sql:
|
|
1010
|
+
raise ValueError(f"IN_SUBQUERY filter on column '{f.column}' has empty SQL")
|
|
1011
|
+
return f"{col_ref} IN ({subquery_sql})", params
|
|
1012
|
+
|
|
1013
|
+
# Unknown operator - raise error instead of silent fallback
|
|
1014
|
+
raise ValueError(f"Unknown filter operator: {op}")
|
|
1015
|
+
|
|
1016
|
+
def _build_group_by(
|
|
1017
|
+
self,
|
|
1018
|
+
query: QueryDefinition,
|
|
1019
|
+
table_refs: dict[str, str],
|
|
1020
|
+
calc_sql_map: dict[str, str],
|
|
1021
|
+
) -> str:
|
|
1022
|
+
"""Build the GROUP BY clause, including time series bucket if
|
|
1023
|
+
configured."""
|
|
1024
|
+
group_by_parts: list[str] = []
|
|
1025
|
+
|
|
1026
|
+
# Add time series bucket to GROUP BY if present
|
|
1027
|
+
if query.time_series:
|
|
1028
|
+
ts = query.time_series
|
|
1029
|
+
table_ref = table_refs[ts.table_id]
|
|
1030
|
+
date_col = f"{table_ref}.{self._quote_identifier(ts.date_column)}"
|
|
1031
|
+
group_by_parts.append(f"date_trunc('{ts.interval}', {date_col})")
|
|
1032
|
+
|
|
1033
|
+
# Build set of calculated fields that have internal aggregation
|
|
1034
|
+
calc_fields_with_agg = {
|
|
1035
|
+
cf.name for cf in query.calculated_fields if cf.has_internal_aggregation
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
# Build lookup from (table_id, column) to column selection for date_trunc/sql_expression
|
|
1039
|
+
column_lookup: dict[tuple[str, str], ColumnSelection] = {
|
|
1040
|
+
(col.table_id, col.column): col for col in query.columns
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
# Add regular GROUP BY columns
|
|
1044
|
+
group_by_cols = query.derive_group_by()
|
|
1045
|
+
for g in group_by_cols:
|
|
1046
|
+
# Skip calculated fields that have internal aggregation
|
|
1047
|
+
# These fields contain SUM, COUNT, etc. and should NOT be in GROUP BY
|
|
1048
|
+
if g.column in calc_fields_with_agg:
|
|
1049
|
+
continue
|
|
1050
|
+
|
|
1051
|
+
# Look up the column selection to check for date_trunc/sql_expression
|
|
1052
|
+
col_sel = column_lookup.get((g.table_id, g.column))
|
|
1053
|
+
|
|
1054
|
+
# Handle column with inline sql_expression (e.g., calculated field)
|
|
1055
|
+
if col_sel and col_sel.sql_expression:
|
|
1056
|
+
group_by_parts.append(f"({col_sel.sql_expression})")
|
|
1057
|
+
# Handle calculated field references - expand to SQL expression
|
|
1058
|
+
elif g.column in calc_sql_map:
|
|
1059
|
+
group_by_parts.append(f"({calc_sql_map[g.column]})")
|
|
1060
|
+
else:
|
|
1061
|
+
table_ref = table_refs[g.table_id]
|
|
1062
|
+
col_ref = f"{table_ref}.{self._quote_identifier(g.column)}"
|
|
1063
|
+
|
|
1064
|
+
# Apply date_trunc if specified (must match SELECT clause)
|
|
1065
|
+
if col_sel and col_sel.date_trunc:
|
|
1066
|
+
col_ref = f"date_trunc('{col_sel.date_trunc}', {col_ref})"
|
|
1067
|
+
|
|
1068
|
+
group_by_parts.append(col_ref)
|
|
1069
|
+
|
|
1070
|
+
# If time series is present and there are aggregations, we need GROUP BY
|
|
1071
|
+
if query.time_series and query.has_aggregations() and not group_by_cols:
|
|
1072
|
+
# Only have the time series bucket
|
|
1073
|
+
pass
|
|
1074
|
+
elif not group_by_parts:
|
|
1075
|
+
return ""
|
|
1076
|
+
|
|
1077
|
+
return ", ".join(group_by_parts)
|
|
1078
|
+
|
|
1079
|
+
def _build_order_by(
|
|
1080
|
+
self,
|
|
1081
|
+
query: QueryDefinition,
|
|
1082
|
+
table_refs: dict[str, str],
|
|
1083
|
+
calc_sql_map: dict[str, str],
|
|
1084
|
+
) -> str:
|
|
1085
|
+
"""Build the ORDER BY clause, adding time series bucket if
|
|
1086
|
+
configured."""
|
|
1087
|
+
parts: list[str] = []
|
|
1088
|
+
|
|
1089
|
+
# Build lookup from (table_id, column) to column selection for date_trunc
|
|
1090
|
+
# Only include non-aggregated columns since those are the ones with date_trunc
|
|
1091
|
+
column_lookup: dict[tuple[str, str], ColumnSelection] = {
|
|
1092
|
+
(col.table_id, col.column): col
|
|
1093
|
+
for col in query.columns
|
|
1094
|
+
if col.aggregation == AggregationType.NONE
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
# If time series is present and no explicit order by, order by date bucket
|
|
1098
|
+
if query.time_series and not query.order_by:
|
|
1099
|
+
ts = query.time_series
|
|
1100
|
+
table_ref = table_refs[ts.table_id]
|
|
1101
|
+
date_col = f"{table_ref}.{self._quote_identifier(ts.date_column)}"
|
|
1102
|
+
parts.append(f"date_trunc('{ts.interval}', {date_col}) ASC")
|
|
1103
|
+
else:
|
|
1104
|
+
# Use explicit order by
|
|
1105
|
+
for o in query.order_by:
|
|
1106
|
+
# Handle calculated field references - expand to SQL expression
|
|
1107
|
+
if o.column in calc_sql_map:
|
|
1108
|
+
col_ref = f"({calc_sql_map[o.column]})"
|
|
1109
|
+
else:
|
|
1110
|
+
table_ref = table_refs[o.table_id]
|
|
1111
|
+
col_ref = f"{table_ref}.{self._quote_identifier(o.column)}"
|
|
1112
|
+
|
|
1113
|
+
# Apply date_trunc if the column has it (must match SELECT/GROUP BY)
|
|
1114
|
+
col_sel = column_lookup.get((o.table_id, o.column))
|
|
1115
|
+
if col_sel and col_sel.date_trunc:
|
|
1116
|
+
col_ref = f"date_trunc('{col_sel.date_trunc}', {col_ref})"
|
|
1117
|
+
|
|
1118
|
+
parts.append(f"{col_ref} {o.direction.value}")
|
|
1119
|
+
|
|
1120
|
+
return ", ".join(parts)
|
|
1121
|
+
|
|
1122
|
+
def _coerce_value(self, value: Any, data_type: str | None) -> Any:
|
|
1123
|
+
"""Coerce a filter value to the appropriate Python type for asyncpg.
|
|
1124
|
+
|
|
1125
|
+
asyncpg requires Python date/datetime objects for date/timestamp columns,
|
|
1126
|
+
not strings. This method converts string values to appropriate Python types
|
|
1127
|
+
based on the column's data type.
|
|
1128
|
+
|
|
1129
|
+
Args:
|
|
1130
|
+
value: The filter value (may be a string, list, or other type).
|
|
1131
|
+
data_type: The PostgreSQL data type of the column (e.g., 'date', 'timestamp').
|
|
1132
|
+
|
|
1133
|
+
Returns:
|
|
1134
|
+
The value coerced to the appropriate Python type.
|
|
1135
|
+
"""
|
|
1136
|
+
if value is None or data_type is None:
|
|
1137
|
+
return value
|
|
1138
|
+
|
|
1139
|
+
data_type_lower = data_type.lower()
|
|
1140
|
+
|
|
1141
|
+
# Check if this is a date/timestamp column
|
|
1142
|
+
date_types = {"date"}
|
|
1143
|
+
timestamp_types = {
|
|
1144
|
+
"timestamp",
|
|
1145
|
+
"timestamp without time zone",
|
|
1146
|
+
"timestamp with time zone",
|
|
1147
|
+
"timestamptz",
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
is_date = (
|
|
1151
|
+
any(dt in data_type_lower for dt in date_types) and "timestamp" not in data_type_lower
|
|
1152
|
+
)
|
|
1153
|
+
is_timestamp = any(dt in data_type_lower for dt in timestamp_types)
|
|
1154
|
+
|
|
1155
|
+
if not is_date and not is_timestamp:
|
|
1156
|
+
return value
|
|
1157
|
+
|
|
1158
|
+
# Handle list values (for IN, NOT_IN, BETWEEN)
|
|
1159
|
+
if isinstance(value, list):
|
|
1160
|
+
return [self._coerce_single_date_value(v, is_date) for v in value]
|
|
1161
|
+
|
|
1162
|
+
if isinstance(value, tuple):
|
|
1163
|
+
return tuple(self._coerce_single_date_value(v, is_date) for v in value)
|
|
1164
|
+
|
|
1165
|
+
return self._coerce_single_date_value(value, is_date)
|
|
1166
|
+
|
|
1167
|
+
def _coerce_single_date_value(self, value: Any, is_date: bool) -> Any:
|
|
1168
|
+
"""Coerce a single value to date or datetime.
|
|
1169
|
+
|
|
1170
|
+
Args:
|
|
1171
|
+
value: The value to coerce.
|
|
1172
|
+
is_date: True for date columns, False for timestamp columns.
|
|
1173
|
+
|
|
1174
|
+
Returns:
|
|
1175
|
+
Python date or datetime object, or original value if not a string/date type.
|
|
1176
|
+
|
|
1177
|
+
Raises:
|
|
1178
|
+
ValueError: If a string value cannot be parsed as a valid date/datetime.
|
|
1179
|
+
"""
|
|
1180
|
+
# Already the correct type
|
|
1181
|
+
if isinstance(value, datetime):
|
|
1182
|
+
return value.date() if is_date else value
|
|
1183
|
+
if isinstance(value, date):
|
|
1184
|
+
return value if is_date else datetime.combine(value, datetime.min.time())
|
|
1185
|
+
|
|
1186
|
+
# Try to parse string values
|
|
1187
|
+
if isinstance(value, str):
|
|
1188
|
+
expected_type = "date" if is_date else "datetime"
|
|
1189
|
+
try:
|
|
1190
|
+
# Try ISO format with time first (e.g., "2026-01-01T00:00:00")
|
|
1191
|
+
if "T" in value or " " in value:
|
|
1192
|
+
# Handle both 'T' separator and space separator
|
|
1193
|
+
dt = datetime.fromisoformat(value.replace(" ", "T"))
|
|
1194
|
+
return dt.date() if is_date else dt
|
|
1195
|
+
# Date only format (e.g., "2026-01-01")
|
|
1196
|
+
dt = datetime.strptime(value, "%Y-%m-%d")
|
|
1197
|
+
return dt.date() if is_date else dt
|
|
1198
|
+
except ValueError as e:
|
|
1199
|
+
raise ValueError(
|
|
1200
|
+
f"Invalid {expected_type} value: {value!r}. "
|
|
1201
|
+
f"Expected ISO format (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS)."
|
|
1202
|
+
) from e
|
|
1203
|
+
|
|
1204
|
+
return value
|
|
1205
|
+
|
|
1206
|
+
def _quote_identifier(self, identifier: str) -> str:
|
|
1207
|
+
"""Quote a SQL identifier to prevent injection.
|
|
1208
|
+
|
|
1209
|
+
Args:
|
|
1210
|
+
identifier: Column or table name.
|
|
1211
|
+
|
|
1212
|
+
Returns:
|
|
1213
|
+
Quoted identifier (e.g., "column_name").
|
|
1214
|
+
"""
|
|
1215
|
+
# Escape any existing double quotes
|
|
1216
|
+
escaped = identifier.replace('"', '""')
|
|
1217
|
+
return f'"{escaped}"'
|
|
1218
|
+
|
|
1219
|
+
def _quote_table(self, table_name: str) -> str:
|
|
1220
|
+
"""Quote a table name with optional schema qualification.
|
|
1221
|
+
|
|
1222
|
+
Args:
|
|
1223
|
+
table_name: Name of the table.
|
|
1224
|
+
|
|
1225
|
+
Returns:
|
|
1226
|
+
Schema-qualified table name if schema_name is set,
|
|
1227
|
+
otherwise just the quoted table name.
|
|
1228
|
+
E.g., "org_123"."users" or just "users"
|
|
1229
|
+
"""
|
|
1230
|
+
quoted_table = self._quote_identifier(table_name)
|
|
1231
|
+
if self._schema_name:
|
|
1232
|
+
return f"{self._quote_identifier(self._schema_name)}.{quoted_table}"
|
|
1233
|
+
return quoted_table
|