prismiq 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prismiq/__init__.py +543 -0
- prismiq/api.py +1889 -0
- prismiq/auth.py +108 -0
- prismiq/cache.py +527 -0
- prismiq/calculated_field_processor.py +231 -0
- prismiq/calculated_fields.py +819 -0
- prismiq/dashboard_store.py +1219 -0
- prismiq/dashboards.py +374 -0
- prismiq/dates.py +247 -0
- prismiq/engine.py +1315 -0
- prismiq/executor.py +345 -0
- prismiq/filter_merge.py +397 -0
- prismiq/formatting.py +298 -0
- prismiq/logging.py +489 -0
- prismiq/metrics.py +536 -0
- prismiq/middleware.py +346 -0
- prismiq/permissions.py +87 -0
- prismiq/persistence/__init__.py +45 -0
- prismiq/persistence/models.py +208 -0
- prismiq/persistence/postgres_store.py +1119 -0
- prismiq/persistence/saved_query_store.py +336 -0
- prismiq/persistence/schema.sql +95 -0
- prismiq/persistence/setup.py +222 -0
- prismiq/persistence/tables.py +76 -0
- prismiq/pins.py +72 -0
- prismiq/py.typed +0 -0
- prismiq/query.py +1233 -0
- prismiq/schema.py +333 -0
- prismiq/schema_config.py +354 -0
- prismiq/sql_utils.py +147 -0
- prismiq/sql_validator.py +219 -0
- prismiq/sqlalchemy_builder.py +577 -0
- prismiq/timeseries.py +410 -0
- prismiq/transforms.py +471 -0
- prismiq/trends.py +573 -0
- prismiq/types.py +688 -0
- prismiq-0.1.0.dist-info/METADATA +109 -0
- prismiq-0.1.0.dist-info/RECORD +39 -0
- prismiq-0.1.0.dist-info/WHEEL +4 -0
prismiq/engine.py
ADDED
|
@@ -0,0 +1,1315 @@
|
|
|
1
|
+
"""Main PrismiqEngine class that ties all components together.
|
|
2
|
+
|
|
3
|
+
This module provides the central engine class for the Prismiq embedded
|
|
4
|
+
analytics platform.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import time
|
|
11
|
+
from datetime import date, timedelta
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
import asyncpg # type: ignore[import-not-found]
|
|
15
|
+
|
|
16
|
+
from prismiq.cache import CacheBackend, CacheConfig, QueryCache
|
|
17
|
+
from prismiq.dashboard_store import DashboardStore, InMemoryDashboardStore
|
|
18
|
+
from prismiq.executor import QueryExecutor
|
|
19
|
+
from prismiq.metrics import record_cache_hit, record_query_execution, set_active_connections
|
|
20
|
+
from prismiq.persistence import PostgresDashboardStore, SavedQueryStore, ensure_tables
|
|
21
|
+
from prismiq.query import QueryBuilder, ValidationResult
|
|
22
|
+
from prismiq.schema import SchemaIntrospector
|
|
23
|
+
from prismiq.schema_config import (
|
|
24
|
+
ColumnConfig,
|
|
25
|
+
EnhancedDatabaseSchema,
|
|
26
|
+
SchemaConfig,
|
|
27
|
+
SchemaConfigManager,
|
|
28
|
+
TableConfig,
|
|
29
|
+
)
|
|
30
|
+
from prismiq.sql_validator import SQLValidationResult, SQLValidator
|
|
31
|
+
from prismiq.timeseries import TimeInterval
|
|
32
|
+
from prismiq.transforms import pivot_data
|
|
33
|
+
from prismiq.trends import ComparisonPeriod, TrendResult, calculate_trend
|
|
34
|
+
from prismiq.types import (
|
|
35
|
+
DatabaseSchema,
|
|
36
|
+
FilterDefinition,
|
|
37
|
+
FilterOperator,
|
|
38
|
+
QueryDefinition,
|
|
39
|
+
QueryResult,
|
|
40
|
+
TableSchema,
|
|
41
|
+
TimeSeriesConfig,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
if TYPE_CHECKING:
|
|
45
|
+
from asyncpg import Pool
|
|
46
|
+
|
|
47
|
+
_logger = logging.getLogger(__name__)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class PrismiqEngine:
|
|
51
|
+
"""Main engine for embedded analytics.
|
|
52
|
+
|
|
53
|
+
Provides a high-level interface for schema introspection,
|
|
54
|
+
query building, and execution.
|
|
55
|
+
|
|
56
|
+
Example:
|
|
57
|
+
>>> engine = PrismiqEngine(
|
|
58
|
+
... database_url="postgresql://user:pass@localhost/db",
|
|
59
|
+
... exposed_tables=["users", "orders"],
|
|
60
|
+
... )
|
|
61
|
+
>>> await engine.startup()
|
|
62
|
+
>>>
|
|
63
|
+
>>> schema = await engine.get_schema()
|
|
64
|
+
>>> result = await engine.execute_query(query_definition)
|
|
65
|
+
>>>
|
|
66
|
+
>>> await engine.shutdown()
|
|
67
|
+
|
|
68
|
+
With FastAPI:
|
|
69
|
+
>>> from fastapi import FastAPI
|
|
70
|
+
>>> from prismiq import PrismiqEngine, create_router
|
|
71
|
+
>>>
|
|
72
|
+
>>> app = FastAPI()
|
|
73
|
+
>>> engine = PrismiqEngine(database_url)
|
|
74
|
+
>>>
|
|
75
|
+
>>> @app.on_event("startup")
|
|
76
|
+
>>> async def startup():
|
|
77
|
+
... await engine.startup()
|
|
78
|
+
... app.include_router(create_router(engine), prefix="/api/analytics")
|
|
79
|
+
>>>
|
|
80
|
+
>>> @app.on_event("shutdown")
|
|
81
|
+
>>> async def shutdown():
|
|
82
|
+
... await engine.shutdown()
|
|
83
|
+
|
|
84
|
+
With caching:
|
|
85
|
+
>>> from prismiq import PrismiqEngine, InMemoryCache
|
|
86
|
+
>>>
|
|
87
|
+
>>> cache = InMemoryCache()
|
|
88
|
+
>>> engine = PrismiqEngine(
|
|
89
|
+
... database_url="postgresql://...",
|
|
90
|
+
... cache=cache,
|
|
91
|
+
... query_cache_ttl=86400, # 24 hours (default)
|
|
92
|
+
... schema_cache_ttl=3600, # 1 hour (default)
|
|
93
|
+
... )
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
database_url: str,
|
|
99
|
+
exposed_tables: list[str] | None = None,
|
|
100
|
+
query_timeout: float = 30.0,
|
|
101
|
+
max_rows: int = 10000,
|
|
102
|
+
schema_name: str = "public",
|
|
103
|
+
schema_config: SchemaConfig | None = None,
|
|
104
|
+
cache: CacheBackend | None = None,
|
|
105
|
+
query_cache_ttl: int | None = None,
|
|
106
|
+
schema_cache_ttl: int | None = None,
|
|
107
|
+
enable_metrics: bool = True,
|
|
108
|
+
persist_dashboards: bool = False,
|
|
109
|
+
skip_table_creation: bool = False,
|
|
110
|
+
) -> None:
|
|
111
|
+
"""Initialize the Prismiq engine.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
database_url: PostgreSQL connection URL.
|
|
115
|
+
exposed_tables: List of tables to expose. If None, all tables are exposed.
|
|
116
|
+
query_timeout: Maximum query execution time in seconds.
|
|
117
|
+
max_rows: Maximum number of rows to return per query.
|
|
118
|
+
schema_name: PostgreSQL schema to use (default: "public").
|
|
119
|
+
schema_config: Initial schema configuration for display names, hidden items, etc.
|
|
120
|
+
cache: Optional cache backend for query result caching.
|
|
121
|
+
query_cache_ttl: TTL for query result cache in seconds (default: 86400 = 24 hours).
|
|
122
|
+
schema_cache_ttl: TTL for schema cache in seconds (default: 3600 = 1 hour).
|
|
123
|
+
enable_metrics: Whether to record Prometheus metrics (default: True).
|
|
124
|
+
persist_dashboards: Store dashboards in PostgreSQL (default: False uses in-memory).
|
|
125
|
+
skip_table_creation: Skip automatic table creation (default: False).
|
|
126
|
+
Use when tables are managed externally (e.g., via Alembic migrations).
|
|
127
|
+
"""
|
|
128
|
+
self._database_url = database_url
|
|
129
|
+
self._exposed_tables = exposed_tables
|
|
130
|
+
self._query_timeout = query_timeout
|
|
131
|
+
self._max_rows = max_rows
|
|
132
|
+
self._schema_name = schema_name
|
|
133
|
+
self._query_cache_ttl = query_cache_ttl
|
|
134
|
+
self._schema_cache_ttl = schema_cache_ttl
|
|
135
|
+
self._enable_metrics = enable_metrics
|
|
136
|
+
self._persist_dashboards = persist_dashboards
|
|
137
|
+
self._skip_table_creation = skip_table_creation
|
|
138
|
+
|
|
139
|
+
# Schema config manager
|
|
140
|
+
self._schema_config_manager = SchemaConfigManager(schema_config)
|
|
141
|
+
|
|
142
|
+
# Cache backend
|
|
143
|
+
self._cache: CacheBackend | None = cache
|
|
144
|
+
self._query_cache: QueryCache | None = None
|
|
145
|
+
if cache:
|
|
146
|
+
# Build CacheConfig with provided TTLs or use defaults
|
|
147
|
+
config_kwargs: dict[str, int] = {}
|
|
148
|
+
if query_cache_ttl is not None:
|
|
149
|
+
config_kwargs["query_ttl"] = query_cache_ttl
|
|
150
|
+
config_kwargs["default_ttl"] = query_cache_ttl
|
|
151
|
+
if schema_cache_ttl is not None:
|
|
152
|
+
config_kwargs["schema_ttl"] = schema_cache_ttl
|
|
153
|
+
cache_config = CacheConfig(**config_kwargs)
|
|
154
|
+
self._query_cache = QueryCache(cache, config=cache_config)
|
|
155
|
+
|
|
156
|
+
# These will be initialized in startup()
|
|
157
|
+
self._pool: Pool | None = None
|
|
158
|
+
self._introspector: SchemaIntrospector | None = None
|
|
159
|
+
self._executor: QueryExecutor | None = None
|
|
160
|
+
self._builder: QueryBuilder | None = None
|
|
161
|
+
self._sql_validator: SQLValidator | None = None
|
|
162
|
+
self._schema: DatabaseSchema | None = None
|
|
163
|
+
self._dashboard_store: DashboardStore | None = None
|
|
164
|
+
self._saved_query_store: SavedQueryStore | None = None
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def cache(self) -> CacheBackend | None:
|
|
168
|
+
"""Get the cache backend."""
|
|
169
|
+
return self._cache
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def dashboard_store(self) -> DashboardStore:
|
|
173
|
+
"""Get the dashboard store.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
The dashboard store (PostgreSQL or in-memory).
|
|
177
|
+
|
|
178
|
+
Raises:
|
|
179
|
+
RuntimeError: If engine has not been started.
|
|
180
|
+
"""
|
|
181
|
+
if self._dashboard_store is None:
|
|
182
|
+
raise RuntimeError("Engine not started. Call 'await engine.startup()' first.")
|
|
183
|
+
return self._dashboard_store
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def dashboards(self) -> DashboardStore:
|
|
187
|
+
"""Alias for dashboard_store for convenience.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
The dashboard store.
|
|
191
|
+
"""
|
|
192
|
+
return self.dashboard_store
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def saved_query_store(self) -> SavedQueryStore:
|
|
196
|
+
"""Get the saved query store.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
The saved query store (PostgreSQL-backed).
|
|
200
|
+
|
|
201
|
+
Raises:
|
|
202
|
+
RuntimeError: If engine has not been started.
|
|
203
|
+
"""
|
|
204
|
+
if self._saved_query_store is None:
|
|
205
|
+
raise RuntimeError("Engine not started. Call 'await engine.startup()' first.")
|
|
206
|
+
return self._saved_query_store
|
|
207
|
+
|
|
208
|
+
async def startup(self) -> None:
|
|
209
|
+
"""Initialize the engine.
|
|
210
|
+
|
|
211
|
+
Creates the database connection pool and introspects the schema.
|
|
212
|
+
Must be called before using other methods.
|
|
213
|
+
"""
|
|
214
|
+
# Create connection pool
|
|
215
|
+
self._pool = await asyncpg.create_pool(
|
|
216
|
+
self._database_url,
|
|
217
|
+
min_size=1,
|
|
218
|
+
max_size=10,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Create schema introspector with optional caching
|
|
222
|
+
introspector_kwargs: dict[str, Any] = {
|
|
223
|
+
"exposed_tables": self._exposed_tables,
|
|
224
|
+
"schema_name": self._schema_name,
|
|
225
|
+
"cache": self._cache,
|
|
226
|
+
}
|
|
227
|
+
if self._schema_cache_ttl is not None:
|
|
228
|
+
introspector_kwargs["cache_ttl"] = self._schema_cache_ttl
|
|
229
|
+
self._introspector = SchemaIntrospector(self._pool, **introspector_kwargs)
|
|
230
|
+
|
|
231
|
+
# Introspect schema
|
|
232
|
+
self._schema = await self._introspector.get_schema()
|
|
233
|
+
|
|
234
|
+
# Create query builder, executor, and SQL validator
|
|
235
|
+
self._builder = QueryBuilder(self._schema)
|
|
236
|
+
self._sql_validator = SQLValidator(self._schema)
|
|
237
|
+
self._executor = QueryExecutor(
|
|
238
|
+
self._pool,
|
|
239
|
+
self._schema,
|
|
240
|
+
query_timeout=self._query_timeout,
|
|
241
|
+
max_rows=self._max_rows,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Initialize dashboard store
|
|
245
|
+
if self._persist_dashboards:
|
|
246
|
+
# Create tables if they don't exist (skip if managed externally via Alembic)
|
|
247
|
+
if not self._skip_table_creation:
|
|
248
|
+
await ensure_tables(self._pool)
|
|
249
|
+
self._dashboard_store = PostgresDashboardStore(self._pool)
|
|
250
|
+
self._saved_query_store = SavedQueryStore(self._pool)
|
|
251
|
+
else:
|
|
252
|
+
self._dashboard_store = InMemoryDashboardStore()
|
|
253
|
+
# SavedQueryStore requires PostgreSQL - no in-memory fallback
|
|
254
|
+
self._saved_query_store = None # type: ignore[assignment]
|
|
255
|
+
|
|
256
|
+
# Update metrics
|
|
257
|
+
if self._enable_metrics:
|
|
258
|
+
set_active_connections(self._pool.get_size())
|
|
259
|
+
|
|
260
|
+
async def shutdown(self) -> None:
|
|
261
|
+
"""Shutdown the engine.
|
|
262
|
+
|
|
263
|
+
Closes the database connection pool. Should be called on
|
|
264
|
+
application shutdown.
|
|
265
|
+
"""
|
|
266
|
+
if self._pool:
|
|
267
|
+
await self._pool.close()
|
|
268
|
+
self._pool = None
|
|
269
|
+
|
|
270
|
+
self._introspector = None
|
|
271
|
+
self._executor = None
|
|
272
|
+
self._builder = None
|
|
273
|
+
self._sql_validator = None
|
|
274
|
+
self._schema = None
|
|
275
|
+
self._dashboard_store = None
|
|
276
|
+
self._saved_query_store = None
|
|
277
|
+
|
|
278
|
+
# Update metrics
|
|
279
|
+
if self._enable_metrics:
|
|
280
|
+
set_active_connections(0)
|
|
281
|
+
|
|
282
|
+
# ========================================================================
|
|
283
|
+
# Health Check Methods
|
|
284
|
+
# ========================================================================
|
|
285
|
+
|
|
286
|
+
async def check_connection(self) -> bool:
|
|
287
|
+
"""Check if the database connection is healthy.
|
|
288
|
+
|
|
289
|
+
Executes a simple query to verify the database connection.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
True if the connection is healthy.
|
|
293
|
+
|
|
294
|
+
Raises:
|
|
295
|
+
RuntimeError: If the engine has not been started.
|
|
296
|
+
Exception: If the database connection fails.
|
|
297
|
+
"""
|
|
298
|
+
self._ensure_started()
|
|
299
|
+
assert self._pool is not None
|
|
300
|
+
|
|
301
|
+
async with self._pool.acquire() as conn:
|
|
302
|
+
await conn.fetchval("SELECT 1")
|
|
303
|
+
|
|
304
|
+
return True
|
|
305
|
+
|
|
306
|
+
# ========================================================================
|
|
307
|
+
# Schema Methods
|
|
308
|
+
# ========================================================================
|
|
309
|
+
|
|
310
|
+
async def _validate_schema_exists(self, schema_name: str) -> bool:
|
|
311
|
+
"""Verify that a PostgreSQL schema exists.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
schema_name: Name of the schema to check.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
True if the schema exists, False otherwise.
|
|
318
|
+
"""
|
|
319
|
+
assert self._pool is not None
|
|
320
|
+
async with self._pool.acquire() as conn:
|
|
321
|
+
result = await conn.fetchval(
|
|
322
|
+
"SELECT EXISTS(SELECT 1 FROM information_schema.schemata WHERE schema_name = $1)",
|
|
323
|
+
schema_name,
|
|
324
|
+
)
|
|
325
|
+
return bool(result)
|
|
326
|
+
|
|
327
|
+
async def get_schema(
|
|
328
|
+
self,
|
|
329
|
+
schema_name: str | None = None,
|
|
330
|
+
force_refresh: bool = False,
|
|
331
|
+
) -> DatabaseSchema:
|
|
332
|
+
"""Get the complete database schema (raw, without config applied).
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
schema_name: PostgreSQL schema to introspect. If None, uses the engine's
|
|
336
|
+
default schema. Used for multi-tenant schema isolation.
|
|
337
|
+
force_refresh: If True, bypass cache and introspect fresh.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
DatabaseSchema containing all exposed tables and relationships.
|
|
341
|
+
|
|
342
|
+
Raises:
|
|
343
|
+
RuntimeError: If the engine has not been started.
|
|
344
|
+
ValueError: If the specified schema does not exist.
|
|
345
|
+
"""
|
|
346
|
+
self._ensure_started()
|
|
347
|
+
assert self._pool is not None
|
|
348
|
+
|
|
349
|
+
effective_schema = schema_name or self._schema_name
|
|
350
|
+
|
|
351
|
+
# If using the default schema, use the cached introspector
|
|
352
|
+
if effective_schema == self._schema_name:
|
|
353
|
+
assert self._introspector is not None
|
|
354
|
+
return await self._introspector.get_schema(force_refresh=force_refresh)
|
|
355
|
+
|
|
356
|
+
# For non-default schemas, validate existence first
|
|
357
|
+
if not await self._validate_schema_exists(effective_schema):
|
|
358
|
+
raise ValueError(
|
|
359
|
+
f"PostgreSQL schema '{effective_schema}' does not exist. "
|
|
360
|
+
f'Verify the schema name or create it with: CREATE SCHEMA "{effective_schema}"'
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# For non-default schemas, create introspector on-demand
|
|
364
|
+
# Uses existing cache with schema-based keys
|
|
365
|
+
introspector_kwargs: dict[str, Any] = {
|
|
366
|
+
"exposed_tables": self._exposed_tables,
|
|
367
|
+
"schema_name": effective_schema,
|
|
368
|
+
"cache": self._cache,
|
|
369
|
+
}
|
|
370
|
+
if self._schema_cache_ttl is not None:
|
|
371
|
+
introspector_kwargs["cache_ttl"] = self._schema_cache_ttl
|
|
372
|
+
|
|
373
|
+
introspector = SchemaIntrospector(self._pool, **introspector_kwargs)
|
|
374
|
+
return await introspector.get_schema(force_refresh=force_refresh)
|
|
375
|
+
|
|
376
|
+
async def get_enhanced_schema(
|
|
377
|
+
self,
|
|
378
|
+
schema_name: str | None = None,
|
|
379
|
+
) -> EnhancedDatabaseSchema:
|
|
380
|
+
"""Get the database schema with configuration applied.
|
|
381
|
+
|
|
382
|
+
Returns schema with display names, descriptions, and hidden
|
|
383
|
+
tables/columns filtered out.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
schema_name: PostgreSQL schema to introspect. If None, uses the engine's
|
|
387
|
+
default schema. Used for multi-tenant schema isolation.
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
EnhancedDatabaseSchema with configuration applied.
|
|
391
|
+
|
|
392
|
+
Raises:
|
|
393
|
+
RuntimeError: If the engine has not been started.
|
|
394
|
+
ValueError: If the specified schema does not exist.
|
|
395
|
+
"""
|
|
396
|
+
self._ensure_started()
|
|
397
|
+
schema = await self.get_schema(schema_name=schema_name)
|
|
398
|
+
return self._schema_config_manager.apply_to_schema(schema)
|
|
399
|
+
|
|
400
|
+
async def get_table(
|
|
401
|
+
self,
|
|
402
|
+
table_name: str,
|
|
403
|
+
schema_name: str | None = None,
|
|
404
|
+
) -> TableSchema:
|
|
405
|
+
"""Get schema information for a single table.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
table_name: Name of the table to retrieve.
|
|
409
|
+
schema_name: PostgreSQL schema to introspect.
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
TableSchema for the requested table.
|
|
413
|
+
|
|
414
|
+
Raises:
|
|
415
|
+
RuntimeError: If the engine has not been started.
|
|
416
|
+
TableNotFoundError: If the table is not found.
|
|
417
|
+
ValueError: If the specified schema does not exist (for non-default schemas).
|
|
418
|
+
"""
|
|
419
|
+
self._ensure_started()
|
|
420
|
+
assert self._pool is not None
|
|
421
|
+
|
|
422
|
+
effective_schema = schema_name or self._schema_name
|
|
423
|
+
|
|
424
|
+
# If using the default schema, use the cached introspector
|
|
425
|
+
if effective_schema == self._schema_name:
|
|
426
|
+
assert self._introspector is not None
|
|
427
|
+
return await self._introspector.get_table(table_name)
|
|
428
|
+
|
|
429
|
+
# For non-default schemas, validate existence first
|
|
430
|
+
if not await self._validate_schema_exists(effective_schema):
|
|
431
|
+
raise ValueError(
|
|
432
|
+
f"PostgreSQL schema '{effective_schema}' does not exist. "
|
|
433
|
+
f'Verify the schema name or create it with: CREATE SCHEMA "{effective_schema}"'
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
# For non-default schemas, create introspector on-demand
|
|
437
|
+
introspector_kwargs: dict[str, Any] = {
|
|
438
|
+
"exposed_tables": self._exposed_tables,
|
|
439
|
+
"schema_name": effective_schema,
|
|
440
|
+
"cache": self._cache,
|
|
441
|
+
}
|
|
442
|
+
if self._schema_cache_ttl is not None:
|
|
443
|
+
introspector_kwargs["cache_ttl"] = self._schema_cache_ttl
|
|
444
|
+
|
|
445
|
+
introspector = SchemaIntrospector(self._pool, **introspector_kwargs)
|
|
446
|
+
return await introspector.get_table(table_name)
|
|
447
|
+
|
|
448
|
+
# ========================================================================
|
|
449
|
+
# Query Methods
|
|
450
|
+
# ========================================================================
|
|
451
|
+
|
|
452
|
+
async def execute_query(
|
|
453
|
+
self,
|
|
454
|
+
query: QueryDefinition,
|
|
455
|
+
schema_name: str | None = None,
|
|
456
|
+
use_cache: bool = True,
|
|
457
|
+
) -> QueryResult:
|
|
458
|
+
"""Execute a query and return results.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
query: Query definition to execute.
|
|
462
|
+
schema_name: PostgreSQL schema for table resolution. If None, uses the
|
|
463
|
+
engine's default schema. Used for multi-tenant schema isolation.
|
|
464
|
+
use_cache: Whether to use cached results if available.
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
QueryResult with columns, rows, and execution metadata.
|
|
468
|
+
|
|
469
|
+
Raises:
|
|
470
|
+
RuntimeError: If the engine has not been started.
|
|
471
|
+
QueryValidationError: If the query fails validation.
|
|
472
|
+
QueryTimeoutError: If the query exceeds the timeout.
|
|
473
|
+
QueryExecutionError: If the query execution fails.
|
|
474
|
+
"""
|
|
475
|
+
self._ensure_started()
|
|
476
|
+
assert self._pool is not None
|
|
477
|
+
|
|
478
|
+
effective_schema = schema_name or self._schema_name
|
|
479
|
+
start = time.perf_counter()
|
|
480
|
+
|
|
481
|
+
# Create schema-specific cache for non-default schemas
|
|
482
|
+
# Always create the cache object so we can update it even when bypassing
|
|
483
|
+
query_cache = self._query_cache
|
|
484
|
+
if self._cache and effective_schema != self._schema_name:
|
|
485
|
+
# Build CacheConfig with provided TTLs or use defaults
|
|
486
|
+
config_kwargs: dict[str, int] = {}
|
|
487
|
+
if self._query_cache_ttl is not None:
|
|
488
|
+
config_kwargs["query_ttl"] = self._query_cache_ttl
|
|
489
|
+
config_kwargs["default_ttl"] = self._query_cache_ttl
|
|
490
|
+
cache_config = CacheConfig(**config_kwargs) if config_kwargs else None
|
|
491
|
+
query_cache = QueryCache(
|
|
492
|
+
self._cache,
|
|
493
|
+
config=cache_config,
|
|
494
|
+
schema_name=effective_schema,
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Check cache first
|
|
498
|
+
if use_cache and query_cache:
|
|
499
|
+
cached = await query_cache.get_result(query)
|
|
500
|
+
if cached:
|
|
501
|
+
if self._enable_metrics:
|
|
502
|
+
record_cache_hit(True)
|
|
503
|
+
return cached
|
|
504
|
+
if self._enable_metrics:
|
|
505
|
+
record_cache_hit(False)
|
|
506
|
+
|
|
507
|
+
# Get schema for the target schema
|
|
508
|
+
db_schema = await self.get_schema(schema_name=effective_schema)
|
|
509
|
+
|
|
510
|
+
# Create executor for this schema
|
|
511
|
+
executor = QueryExecutor(
|
|
512
|
+
self._pool,
|
|
513
|
+
db_schema,
|
|
514
|
+
query_timeout=self._query_timeout,
|
|
515
|
+
max_rows=self._max_rows,
|
|
516
|
+
schema_name=effective_schema,
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# Execute query
|
|
520
|
+
try:
|
|
521
|
+
result = await executor.execute(query)
|
|
522
|
+
|
|
523
|
+
# Always cache the result when cache is available
|
|
524
|
+
# Even when use_cache=False (bypass), we want to update the cache with fresh data
|
|
525
|
+
if query_cache:
|
|
526
|
+
try:
|
|
527
|
+
await query_cache.cache_result(query, result)
|
|
528
|
+
except Exception as cache_err:
|
|
529
|
+
_logger.warning(
|
|
530
|
+
"Failed to cache query result: %s (%s)",
|
|
531
|
+
cache_err,
|
|
532
|
+
type(cache_err).__name__,
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
# Record metrics
|
|
536
|
+
if self._enable_metrics:
|
|
537
|
+
duration = (time.perf_counter() - start) * 1000
|
|
538
|
+
record_query_execution(duration, "success")
|
|
539
|
+
|
|
540
|
+
return result
|
|
541
|
+
|
|
542
|
+
except Exception:
|
|
543
|
+
if self._enable_metrics:
|
|
544
|
+
duration = (time.perf_counter() - start) * 1000
|
|
545
|
+
record_query_execution(duration, "error")
|
|
546
|
+
raise
|
|
547
|
+
|
|
548
|
+
async def preview_query(
|
|
549
|
+
self,
|
|
550
|
+
query: QueryDefinition,
|
|
551
|
+
limit: int = 100,
|
|
552
|
+
schema_name: str | None = None,
|
|
553
|
+
) -> QueryResult:
|
|
554
|
+
"""Execute a query with a limited number of rows.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
query: Query definition to execute.
|
|
558
|
+
limit: Maximum number of rows to return.
|
|
559
|
+
schema_name: PostgreSQL schema for table resolution.
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
QueryResult with limited rows.
|
|
563
|
+
|
|
564
|
+
Raises:
|
|
565
|
+
RuntimeError: If the engine has not been started.
|
|
566
|
+
QueryValidationError: If the query fails validation.
|
|
567
|
+
"""
|
|
568
|
+
self._ensure_started()
|
|
569
|
+
assert self._pool is not None
|
|
570
|
+
|
|
571
|
+
effective_schema = schema_name or self._schema_name
|
|
572
|
+
|
|
573
|
+
# Get schema and create executor for this schema
|
|
574
|
+
db_schema = await self.get_schema(schema_name=effective_schema)
|
|
575
|
+
|
|
576
|
+
executor = QueryExecutor(
|
|
577
|
+
self._pool,
|
|
578
|
+
db_schema,
|
|
579
|
+
query_timeout=self._query_timeout,
|
|
580
|
+
max_rows=self._max_rows,
|
|
581
|
+
schema_name=effective_schema,
|
|
582
|
+
)
|
|
583
|
+
return await executor.preview(query, limit=limit)
|
|
584
|
+
|
|
585
|
+
async def sample_column_values(
|
|
586
|
+
self,
|
|
587
|
+
table_name: str,
|
|
588
|
+
column_name: str,
|
|
589
|
+
limit: int = 5,
|
|
590
|
+
schema_name: str | None = None,
|
|
591
|
+
) -> list[Any]:
|
|
592
|
+
"""Get sample values from a column for data preview.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
table_name: Name of the table.
|
|
596
|
+
column_name: Name of the column.
|
|
597
|
+
limit: Maximum number of distinct values to return.
|
|
598
|
+
schema_name: PostgreSQL schema to query.
|
|
599
|
+
|
|
600
|
+
Returns:
|
|
601
|
+
List of sample values from the column.
|
|
602
|
+
|
|
603
|
+
Raises:
|
|
604
|
+
RuntimeError: If the engine has not been started.
|
|
605
|
+
ValueError: If the table or column doesn't exist.
|
|
606
|
+
"""
|
|
607
|
+
self._ensure_started()
|
|
608
|
+
assert self._pool is not None
|
|
609
|
+
|
|
610
|
+
effective_schema = schema_name or self._schema_name
|
|
611
|
+
|
|
612
|
+
# Get the schema for validation
|
|
613
|
+
db_schema = await self.get_schema(schema_name=effective_schema)
|
|
614
|
+
|
|
615
|
+
# Validate table exists
|
|
616
|
+
table = db_schema.get_table(table_name)
|
|
617
|
+
if table is None:
|
|
618
|
+
raise ValueError(f"Table '{table_name}' not found")
|
|
619
|
+
|
|
620
|
+
# Validate column exists
|
|
621
|
+
column_exists = any(col.name == column_name for col in table.columns)
|
|
622
|
+
if not column_exists:
|
|
623
|
+
raise ValueError(f"Column '{column_name}' not found in table '{table_name}'")
|
|
624
|
+
|
|
625
|
+
# Build schema-qualified table reference
|
|
626
|
+
# Note: table_name and column_name are validated against the schema above,
|
|
627
|
+
# so this is safe from SQL injection despite string interpolation
|
|
628
|
+
escaped_col = column_name.replace('"', '""')
|
|
629
|
+
escaped_table = table_name.replace('"', '""')
|
|
630
|
+
escaped_schema = effective_schema.replace('"', '""')
|
|
631
|
+
|
|
632
|
+
table_ref = f'"{escaped_schema}"."{escaped_table}"'
|
|
633
|
+
|
|
634
|
+
sql = f"""
|
|
635
|
+
SELECT DISTINCT "{escaped_col}"
|
|
636
|
+
FROM {table_ref}
|
|
637
|
+
WHERE "{escaped_col}" IS NOT NULL
|
|
638
|
+
ORDER BY "{escaped_col}"
|
|
639
|
+
LIMIT {limit}
|
|
640
|
+
""" # noqa: S608
|
|
641
|
+
|
|
642
|
+
async with self._pool.acquire() as conn:
|
|
643
|
+
rows = await conn.fetch(sql)
|
|
644
|
+
|
|
645
|
+
# Extract values and serialize
|
|
646
|
+
from prismiq.executor import serialize_value
|
|
647
|
+
|
|
648
|
+
return [serialize_value(row[0]) for row in rows]
|
|
649
|
+
|
|
650
|
+
def validate_query(self, query: QueryDefinition) -> list[str]:
|
|
651
|
+
"""Validate a query without executing it (uses default schema).
|
|
652
|
+
|
|
653
|
+
Args:
|
|
654
|
+
query: Query definition to validate.
|
|
655
|
+
|
|
656
|
+
Returns:
|
|
657
|
+
List of validation error messages (empty if valid).
|
|
658
|
+
|
|
659
|
+
Raises:
|
|
660
|
+
RuntimeError: If the engine has not been started.
|
|
661
|
+
|
|
662
|
+
Note:
|
|
663
|
+
This method validates against the default schema. For multi-tenant
|
|
664
|
+
schema support, use validate_query_async() instead.
|
|
665
|
+
"""
|
|
666
|
+
self._ensure_started()
|
|
667
|
+
assert self._builder is not None
|
|
668
|
+
return self._builder.validate(query)
|
|
669
|
+
|
|
670
|
+
async def validate_query_async(
|
|
671
|
+
self,
|
|
672
|
+
query: QueryDefinition,
|
|
673
|
+
schema_name: str | None = None,
|
|
674
|
+
) -> list[str]:
|
|
675
|
+
"""Validate a query without executing it (with schema support).
|
|
676
|
+
|
|
677
|
+
Args:
|
|
678
|
+
query: Query definition to validate.
|
|
679
|
+
schema_name: PostgreSQL schema to validate against. If None, uses
|
|
680
|
+
the engine's default schema.
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
List of validation error messages (empty if valid).
|
|
684
|
+
|
|
685
|
+
Raises:
|
|
686
|
+
RuntimeError: If the engine has not been started.
|
|
687
|
+
ValueError: If the specified schema does not exist.
|
|
688
|
+
"""
|
|
689
|
+
self._ensure_started()
|
|
690
|
+
|
|
691
|
+
effective_schema = schema_name or self._schema_name
|
|
692
|
+
|
|
693
|
+
# Use default builder for default schema
|
|
694
|
+
if effective_schema == self._schema_name:
|
|
695
|
+
assert self._builder is not None
|
|
696
|
+
return self._builder.validate(query)
|
|
697
|
+
|
|
698
|
+
# For non-default schemas, get schema and create builder
|
|
699
|
+
db_schema = await self.get_schema(schema_name=effective_schema)
|
|
700
|
+
builder = QueryBuilder(db_schema, schema_name=effective_schema)
|
|
701
|
+
return builder.validate(query)
|
|
702
|
+
|
|
703
|
+
def validate_query_detailed(self, query: QueryDefinition) -> ValidationResult:
|
|
704
|
+
"""Validate a query with detailed error information (uses default schema).
|
|
705
|
+
|
|
706
|
+
Args:
|
|
707
|
+
query: Query definition to validate.
|
|
708
|
+
|
|
709
|
+
Returns:
|
|
710
|
+
ValidationResult with detailed errors including suggestions.
|
|
711
|
+
|
|
712
|
+
Raises:
|
|
713
|
+
RuntimeError: If the engine has not been started.
|
|
714
|
+
|
|
715
|
+
Note:
|
|
716
|
+
This method validates against the default schema. For multi-tenant
|
|
717
|
+
schema support, use validate_query_detailed_async() instead.
|
|
718
|
+
"""
|
|
719
|
+
self._ensure_started()
|
|
720
|
+
assert self._builder is not None
|
|
721
|
+
return self._builder.validate_detailed(query)
|
|
722
|
+
|
|
723
|
+
async def validate_query_detailed_async(
|
|
724
|
+
self,
|
|
725
|
+
query: QueryDefinition,
|
|
726
|
+
schema_name: str | None = None,
|
|
727
|
+
) -> ValidationResult:
|
|
728
|
+
"""Validate a query with detailed error information (with schema support).
|
|
729
|
+
|
|
730
|
+
Args:
|
|
731
|
+
query: Query definition to validate.
|
|
732
|
+
schema_name: PostgreSQL schema to validate against. If None, uses
|
|
733
|
+
the engine's default schema.
|
|
734
|
+
|
|
735
|
+
Returns:
|
|
736
|
+
ValidationResult with detailed errors including suggestions.
|
|
737
|
+
|
|
738
|
+
Raises:
|
|
739
|
+
RuntimeError: If the engine has not been started.
|
|
740
|
+
ValueError: If the specified schema does not exist.
|
|
741
|
+
"""
|
|
742
|
+
self._ensure_started()
|
|
743
|
+
|
|
744
|
+
effective_schema = schema_name or self._schema_name
|
|
745
|
+
|
|
746
|
+
# Use default builder for default schema
|
|
747
|
+
if effective_schema == self._schema_name:
|
|
748
|
+
assert self._builder is not None
|
|
749
|
+
return self._builder.validate_detailed(query)
|
|
750
|
+
|
|
751
|
+
# For non-default schemas, get schema and create builder
|
|
752
|
+
db_schema = await self.get_schema(schema_name=effective_schema)
|
|
753
|
+
builder = QueryBuilder(db_schema, schema_name=effective_schema)
|
|
754
|
+
return builder.validate_detailed(query)
|
|
755
|
+
|
|
756
|
+
def generate_sql(self, query: QueryDefinition) -> str:
|
|
757
|
+
"""Generate SQL from a query definition without executing (uses default schema).
|
|
758
|
+
|
|
759
|
+
Useful for previewing the SQL that will be executed.
|
|
760
|
+
|
|
761
|
+
Args:
|
|
762
|
+
query: Query definition to generate SQL for.
|
|
763
|
+
|
|
764
|
+
Returns:
|
|
765
|
+
The generated SQL string.
|
|
766
|
+
|
|
767
|
+
Raises:
|
|
768
|
+
RuntimeError: If the engine has not been started.
|
|
769
|
+
QueryValidationError: If the query is invalid.
|
|
770
|
+
|
|
771
|
+
Note:
|
|
772
|
+
This method uses the default schema. For multi-tenant schema support,
|
|
773
|
+
use generate_sql_async() instead.
|
|
774
|
+
"""
|
|
775
|
+
self._ensure_started()
|
|
776
|
+
assert self._builder is not None
|
|
777
|
+
|
|
778
|
+
# Validate first
|
|
779
|
+
errors = self._builder.validate(query)
|
|
780
|
+
if errors:
|
|
781
|
+
from .types import QueryValidationError
|
|
782
|
+
|
|
783
|
+
raise QueryValidationError("; ".join(errors), errors)
|
|
784
|
+
|
|
785
|
+
sql, _ = self._builder.build(query)
|
|
786
|
+
return sql
|
|
787
|
+
|
|
788
|
+
async def generate_sql_async(
|
|
789
|
+
self,
|
|
790
|
+
query: QueryDefinition,
|
|
791
|
+
schema_name: str | None = None,
|
|
792
|
+
) -> str:
|
|
793
|
+
"""Generate SQL from a query definition without executing (with schema support).
|
|
794
|
+
|
|
795
|
+
Useful for previewing the SQL that will be executed.
|
|
796
|
+
|
|
797
|
+
Args:
|
|
798
|
+
query: Query definition to generate SQL for.
|
|
799
|
+
schema_name: PostgreSQL schema for table resolution. If None, uses
|
|
800
|
+
the engine's default schema.
|
|
801
|
+
|
|
802
|
+
Returns:
|
|
803
|
+
The generated SQL string.
|
|
804
|
+
|
|
805
|
+
Raises:
|
|
806
|
+
RuntimeError: If the engine has not been started.
|
|
807
|
+
QueryValidationError: If the query is invalid.
|
|
808
|
+
ValueError: If the specified schema does not exist.
|
|
809
|
+
"""
|
|
810
|
+
self._ensure_started()
|
|
811
|
+
|
|
812
|
+
effective_schema = schema_name or self._schema_name
|
|
813
|
+
|
|
814
|
+
# Use default builder for default schema
|
|
815
|
+
if effective_schema == self._schema_name:
|
|
816
|
+
assert self._builder is not None
|
|
817
|
+
errors = self._builder.validate(query)
|
|
818
|
+
if errors:
|
|
819
|
+
from .types import QueryValidationError
|
|
820
|
+
|
|
821
|
+
raise QueryValidationError("; ".join(errors), errors)
|
|
822
|
+
sql, _ = self._builder.build(query)
|
|
823
|
+
return sql
|
|
824
|
+
|
|
825
|
+
# For non-default schemas, get schema and create builder
|
|
826
|
+
db_schema = await self.get_schema(schema_name=effective_schema)
|
|
827
|
+
builder = QueryBuilder(db_schema, schema_name=effective_schema)
|
|
828
|
+
|
|
829
|
+
errors = builder.validate(query)
|
|
830
|
+
if errors:
|
|
831
|
+
from .types import QueryValidationError
|
|
832
|
+
|
|
833
|
+
raise QueryValidationError("; ".join(errors), errors)
|
|
834
|
+
|
|
835
|
+
sql, _ = builder.build(query)
|
|
836
|
+
return sql
|
|
837
|
+
|
|
838
|
+
# ========================================================================
|
|
839
|
+
# Custom SQL Methods
|
|
840
|
+
# ========================================================================
|
|
841
|
+
|
|
842
|
+
async def validate_sql(
|
|
843
|
+
self,
|
|
844
|
+
sql: str,
|
|
845
|
+
schema_name: str | None = None,
|
|
846
|
+
) -> SQLValidationResult:
|
|
847
|
+
"""Validate a raw SQL query without executing.
|
|
848
|
+
|
|
849
|
+
Checks that the SQL is a valid SELECT statement and only
|
|
850
|
+
references tables visible in the schema.
|
|
851
|
+
|
|
852
|
+
Args:
|
|
853
|
+
sql: Raw SQL query to validate.
|
|
854
|
+
schema_name: PostgreSQL schema for table validation. If None, uses the
|
|
855
|
+
engine's default schema. Used for multi-tenant schema isolation.
|
|
856
|
+
|
|
857
|
+
Returns:
|
|
858
|
+
SQLValidationResult with validation status and details.
|
|
859
|
+
|
|
860
|
+
Raises:
|
|
861
|
+
RuntimeError: If the engine has not been started.
|
|
862
|
+
"""
|
|
863
|
+
self._ensure_started()
|
|
864
|
+
|
|
865
|
+
# For non-default schemas, create a validator with the tenant's schema
|
|
866
|
+
effective_schema = schema_name or self._schema_name
|
|
867
|
+
if effective_schema != self._schema_name:
|
|
868
|
+
tenant_schema = await self.get_schema(schema_name=effective_schema)
|
|
869
|
+
validator = SQLValidator(tenant_schema)
|
|
870
|
+
return validator.validate(sql)
|
|
871
|
+
|
|
872
|
+
assert self._sql_validator is not None
|
|
873
|
+
return self._sql_validator.validate(sql)
|
|
874
|
+
|
|
875
|
+
async def execute_raw_sql(
|
|
876
|
+
self,
|
|
877
|
+
sql: str,
|
|
878
|
+
params: dict[str, Any] | None = None,
|
|
879
|
+
schema_name: str | None = None,
|
|
880
|
+
) -> QueryResult:
|
|
881
|
+
"""Execute a raw SQL query.
|
|
882
|
+
|
|
883
|
+
Only SELECT statements are allowed. Queries are restricted
|
|
884
|
+
to tables visible in the schema.
|
|
885
|
+
|
|
886
|
+
Args:
|
|
887
|
+
sql: Raw SQL query (SELECT only).
|
|
888
|
+
params: Optional named parameters for the query.
|
|
889
|
+
schema_name: PostgreSQL schema for table validation. If None, uses the
|
|
890
|
+
engine's default schema. Used for multi-tenant schema isolation.
|
|
891
|
+
|
|
892
|
+
Returns:
|
|
893
|
+
QueryResult with columns, rows, and execution metadata.
|
|
894
|
+
|
|
895
|
+
Raises:
|
|
896
|
+
RuntimeError: If the engine has not been started.
|
|
897
|
+
SQLValidationError: If the SQL fails validation.
|
|
898
|
+
QueryTimeoutError: If the query exceeds the timeout.
|
|
899
|
+
QueryExecutionError: If the query execution fails.
|
|
900
|
+
"""
|
|
901
|
+
self._ensure_started()
|
|
902
|
+
assert self._executor is not None
|
|
903
|
+
assert self._pool is not None
|
|
904
|
+
|
|
905
|
+
# For non-default schemas, validate with tenant's schema
|
|
906
|
+
effective_schema = schema_name or self._schema_name
|
|
907
|
+
if effective_schema != self._schema_name:
|
|
908
|
+
validation = await self.validate_sql(sql, schema_name=effective_schema)
|
|
909
|
+
if not validation.valid:
|
|
910
|
+
from .sql_validator import SQLValidationError
|
|
911
|
+
|
|
912
|
+
raise SQLValidationError(
|
|
913
|
+
"SQL validation failed: " + "; ".join(validation.errors),
|
|
914
|
+
errors=validation.errors,
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
start = time.perf_counter()
|
|
918
|
+
|
|
919
|
+
try:
|
|
920
|
+
result = await self._executor.execute_raw_sql(sql, params)
|
|
921
|
+
|
|
922
|
+
# Record metrics
|
|
923
|
+
if self._enable_metrics:
|
|
924
|
+
duration = (time.perf_counter() - start) * 1000
|
|
925
|
+
record_query_execution(duration, "success")
|
|
926
|
+
|
|
927
|
+
return result
|
|
928
|
+
|
|
929
|
+
except Exception:
|
|
930
|
+
if self._enable_metrics:
|
|
931
|
+
duration = (time.perf_counter() - start) * 1000
|
|
932
|
+
record_query_execution(duration, "error")
|
|
933
|
+
raise
|
|
934
|
+
|
|
935
|
+
# ========================================================================
|
|
936
|
+
# Cache Methods
|
|
937
|
+
# ========================================================================
|
|
938
|
+
|
|
939
|
+
async def invalidate_cache(self, table_name: str | None = None) -> int:
|
|
940
|
+
"""Invalidate cached data.
|
|
941
|
+
|
|
942
|
+
Args:
|
|
943
|
+
table_name: If provided, invalidate only queries involving this table.
|
|
944
|
+
If None, invalidate all query cache.
|
|
945
|
+
|
|
946
|
+
Returns:
|
|
947
|
+
Number of cache entries invalidated.
|
|
948
|
+
"""
|
|
949
|
+
if not self._query_cache or not self._cache:
|
|
950
|
+
return 0
|
|
951
|
+
|
|
952
|
+
if table_name:
|
|
953
|
+
return await self._query_cache.invalidate_table(table_name)
|
|
954
|
+
else:
|
|
955
|
+
return await self._cache.clear("query:*")
|
|
956
|
+
|
|
957
|
+
async def invalidate_schema_cache(self) -> None:
|
|
958
|
+
"""Invalidate the schema cache.
|
|
959
|
+
|
|
960
|
+
Forces the next get_schema() call to introspect the database.
|
|
961
|
+
"""
|
|
962
|
+
if self._introspector:
|
|
963
|
+
await self._introspector.invalidate_cache()
|
|
964
|
+
|
|
965
|
+
# ========================================================================
|
|
966
|
+
# Time Series Methods
|
|
967
|
+
# ========================================================================
|
|
968
|
+
|
|
969
|
+
async def execute_timeseries_query(
|
|
970
|
+
self,
|
|
971
|
+
query: QueryDefinition,
|
|
972
|
+
interval: TimeInterval,
|
|
973
|
+
date_column: str,
|
|
974
|
+
fill_missing: bool = True,
|
|
975
|
+
schema_name: str | None = None,
|
|
976
|
+
) -> QueryResult:
|
|
977
|
+
"""Execute a time series query with automatic bucketing.
|
|
978
|
+
|
|
979
|
+
Adds date_trunc to the query for time bucketing and optionally
|
|
980
|
+
fills missing time buckets.
|
|
981
|
+
|
|
982
|
+
Args:
|
|
983
|
+
query: Query definition to execute.
|
|
984
|
+
interval: Time interval for bucketing.
|
|
985
|
+
date_column: Name of the date/timestamp column to bucket.
|
|
986
|
+
fill_missing: Whether to fill missing time buckets with default values.
|
|
987
|
+
schema_name: PostgreSQL schema for table resolution. If None, uses the
|
|
988
|
+
engine's default schema. Used for multi-tenant schema isolation.
|
|
989
|
+
|
|
990
|
+
Returns:
|
|
991
|
+
QueryResult with time-bucketed data.
|
|
992
|
+
|
|
993
|
+
Raises:
|
|
994
|
+
RuntimeError: If the engine has not been started.
|
|
995
|
+
QueryValidationError: If the query fails validation.
|
|
996
|
+
ValueError: If the date column is not found.
|
|
997
|
+
"""
|
|
998
|
+
self._ensure_started()
|
|
999
|
+
assert self._pool is not None
|
|
1000
|
+
|
|
1001
|
+
effective_schema = schema_name or self._schema_name
|
|
1002
|
+
|
|
1003
|
+
# Get schema for the target schema
|
|
1004
|
+
db_schema = await self.get_schema(schema_name=effective_schema)
|
|
1005
|
+
|
|
1006
|
+
# Find the table ID for the date column
|
|
1007
|
+
table_id = self._find_table_for_column_in_schema(query, date_column, db_schema)
|
|
1008
|
+
if table_id is None:
|
|
1009
|
+
raise ValueError(f"Date column '{date_column}' not found in query tables")
|
|
1010
|
+
|
|
1011
|
+
# Create a modified query with time series config
|
|
1012
|
+
modified_query = QueryDefinition(
|
|
1013
|
+
tables=query.tables,
|
|
1014
|
+
joins=query.joins,
|
|
1015
|
+
columns=query.columns,
|
|
1016
|
+
filters=query.filters,
|
|
1017
|
+
group_by=query.group_by,
|
|
1018
|
+
order_by=query.order_by,
|
|
1019
|
+
limit=query.limit,
|
|
1020
|
+
offset=query.offset,
|
|
1021
|
+
time_series=TimeSeriesConfig(
|
|
1022
|
+
table_id=table_id,
|
|
1023
|
+
date_column=date_column,
|
|
1024
|
+
interval=interval.value,
|
|
1025
|
+
fill_missing=fill_missing,
|
|
1026
|
+
),
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
# Create executor for this schema
|
|
1030
|
+
executor = QueryExecutor(
|
|
1031
|
+
self._pool,
|
|
1032
|
+
db_schema,
|
|
1033
|
+
query_timeout=self._query_timeout,
|
|
1034
|
+
max_rows=self._max_rows,
|
|
1035
|
+
schema_name=effective_schema,
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
return await executor.execute(modified_query)
|
|
1039
|
+
|
|
1040
|
+
def _find_table_for_column(self, query: QueryDefinition, column_name: str) -> str | None:
|
|
1041
|
+
"""Find the table ID that contains the specified column."""
|
|
1042
|
+
self._ensure_started()
|
|
1043
|
+
assert self._schema is not None
|
|
1044
|
+
return self._find_table_for_column_in_schema(query, column_name, self._schema)
|
|
1045
|
+
|
|
1046
|
+
def _find_table_for_column_in_schema(
|
|
1047
|
+
self,
|
|
1048
|
+
query: QueryDefinition,
|
|
1049
|
+
column_name: str,
|
|
1050
|
+
schema: DatabaseSchema,
|
|
1051
|
+
) -> str | None:
|
|
1052
|
+
"""Find the table ID that contains the specified column in the given schema."""
|
|
1053
|
+
for query_table in query.tables:
|
|
1054
|
+
table_schema = schema.get_table(query_table.name)
|
|
1055
|
+
if table_schema and table_schema.has_column(column_name):
|
|
1056
|
+
return query_table.id
|
|
1057
|
+
|
|
1058
|
+
return None
|
|
1059
|
+
|
|
1060
|
+
# ========================================================================
|
|
1061
|
+
# Transform Methods
|
|
1062
|
+
# ========================================================================
|
|
1063
|
+
|
|
1064
|
+
def transform_pivot(
|
|
1065
|
+
self,
|
|
1066
|
+
result: QueryResult,
|
|
1067
|
+
row_column: str,
|
|
1068
|
+
pivot_column: str,
|
|
1069
|
+
value_column: str,
|
|
1070
|
+
aggregation: str = "sum",
|
|
1071
|
+
) -> QueryResult:
|
|
1072
|
+
"""Pivot a query result from long to wide format.
|
|
1073
|
+
|
|
1074
|
+
Args:
|
|
1075
|
+
result: Query result to pivot.
|
|
1076
|
+
row_column: Column to use as row headers.
|
|
1077
|
+
pivot_column: Column to pivot into separate columns.
|
|
1078
|
+
value_column: Column containing values to aggregate.
|
|
1079
|
+
aggregation: Aggregation function: sum, avg, count, min, max.
|
|
1080
|
+
|
|
1081
|
+
Returns:
|
|
1082
|
+
Pivoted QueryResult.
|
|
1083
|
+
"""
|
|
1084
|
+
return pivot_data(
|
|
1085
|
+
result=result,
|
|
1086
|
+
row_column=row_column,
|
|
1087
|
+
pivot_column=pivot_column,
|
|
1088
|
+
value_column=value_column,
|
|
1089
|
+
aggregation=aggregation,
|
|
1090
|
+
)
|
|
1091
|
+
|
|
1092
|
+
# ========================================================================
|
|
1093
|
+
# Trend Methods
|
|
1094
|
+
# ========================================================================
|
|
1095
|
+
|
|
1096
|
+
def calculate_trend(
|
|
1097
|
+
self,
|
|
1098
|
+
current: float | None,
|
|
1099
|
+
previous: float | None,
|
|
1100
|
+
threshold: float = 0.001,
|
|
1101
|
+
) -> TrendResult:
|
|
1102
|
+
"""Calculate a trend between two values.
|
|
1103
|
+
|
|
1104
|
+
Args:
|
|
1105
|
+
current: Current value.
|
|
1106
|
+
previous: Previous value for comparison.
|
|
1107
|
+
threshold: Changes smaller than this are considered "flat".
|
|
1108
|
+
|
|
1109
|
+
Returns:
|
|
1110
|
+
TrendResult with direction and change metrics.
|
|
1111
|
+
"""
|
|
1112
|
+
return calculate_trend(current, previous, threshold)
|
|
1113
|
+
|
|
1114
|
+
async def calculate_metric_trend(
|
|
1115
|
+
self,
|
|
1116
|
+
query: QueryDefinition,
|
|
1117
|
+
comparison: ComparisonPeriod,
|
|
1118
|
+
current_start: date,
|
|
1119
|
+
current_end: date,
|
|
1120
|
+
value_column: str,
|
|
1121
|
+
date_column: str,
|
|
1122
|
+
) -> TrendResult:
|
|
1123
|
+
"""Calculate trend for a metric query.
|
|
1124
|
+
|
|
1125
|
+
Executes the query for both current and comparison periods,
|
|
1126
|
+
then calculates the trend between them.
|
|
1127
|
+
|
|
1128
|
+
Args:
|
|
1129
|
+
query: Query definition for the metric.
|
|
1130
|
+
comparison: Period to compare against.
|
|
1131
|
+
current_start: Start date of current period.
|
|
1132
|
+
current_end: End date of current period.
|
|
1133
|
+
value_column: Column containing the metric value.
|
|
1134
|
+
date_column: Column containing the date for filtering.
|
|
1135
|
+
|
|
1136
|
+
Returns:
|
|
1137
|
+
TrendResult with current value, previous value, and change metrics.
|
|
1138
|
+
|
|
1139
|
+
Raises:
|
|
1140
|
+
RuntimeError: If the engine has not been started.
|
|
1141
|
+
ValueError: If the date or value column is not found.
|
|
1142
|
+
"""
|
|
1143
|
+
self._ensure_started()
|
|
1144
|
+
assert self._executor is not None
|
|
1145
|
+
|
|
1146
|
+
# Find the table ID for the date column
|
|
1147
|
+
table_id = self._find_table_for_column(query, date_column)
|
|
1148
|
+
if table_id is None:
|
|
1149
|
+
raise ValueError(f"Date column '{date_column}' not found in query tables")
|
|
1150
|
+
|
|
1151
|
+
# Calculate comparison period dates
|
|
1152
|
+
previous_start, previous_end = self._get_comparison_dates(
|
|
1153
|
+
comparison, current_start, current_end
|
|
1154
|
+
)
|
|
1155
|
+
|
|
1156
|
+
# Execute query for current period
|
|
1157
|
+
current_query = self._add_date_filter(
|
|
1158
|
+
query, table_id, date_column, current_start, current_end
|
|
1159
|
+
)
|
|
1160
|
+
current_result = await self._executor.execute(current_query)
|
|
1161
|
+
|
|
1162
|
+
# Execute query for previous period
|
|
1163
|
+
previous_query = self._add_date_filter(
|
|
1164
|
+
query, table_id, date_column, previous_start, previous_end
|
|
1165
|
+
)
|
|
1166
|
+
previous_result = await self._executor.execute(previous_query)
|
|
1167
|
+
|
|
1168
|
+
# Extract values
|
|
1169
|
+
current_value = self._extract_value(current_result, value_column)
|
|
1170
|
+
previous_value = self._extract_value(previous_result, value_column)
|
|
1171
|
+
|
|
1172
|
+
return calculate_trend(current_value, previous_value)
|
|
1173
|
+
|
|
1174
|
+
def _get_comparison_dates(
|
|
1175
|
+
self,
|
|
1176
|
+
comparison: ComparisonPeriod,
|
|
1177
|
+
current_start: date,
|
|
1178
|
+
current_end: date,
|
|
1179
|
+
) -> tuple[date, date]:
|
|
1180
|
+
"""Calculate the comparison period dates."""
|
|
1181
|
+
period_days = (current_end - current_start).days + 1
|
|
1182
|
+
|
|
1183
|
+
if comparison == ComparisonPeriod.PREVIOUS_PERIOD:
|
|
1184
|
+
previous_end = current_start - timedelta(days=1)
|
|
1185
|
+
previous_start = previous_end - timedelta(days=period_days - 1)
|
|
1186
|
+
elif comparison == ComparisonPeriod.PREVIOUS_YEAR:
|
|
1187
|
+
previous_start = current_start.replace(year=current_start.year - 1)
|
|
1188
|
+
previous_end = current_end.replace(year=current_end.year - 1)
|
|
1189
|
+
elif comparison == ComparisonPeriod.PREVIOUS_MONTH:
|
|
1190
|
+
# Move back one month
|
|
1191
|
+
if current_start.month == 1:
|
|
1192
|
+
previous_start = current_start.replace(year=current_start.year - 1, month=12)
|
|
1193
|
+
else:
|
|
1194
|
+
previous_start = current_start.replace(month=current_start.month - 1)
|
|
1195
|
+
|
|
1196
|
+
if current_end.month == 1:
|
|
1197
|
+
previous_end = current_end.replace(year=current_end.year - 1, month=12)
|
|
1198
|
+
else:
|
|
1199
|
+
previous_end = current_end.replace(month=current_end.month - 1)
|
|
1200
|
+
elif comparison == ComparisonPeriod.PREVIOUS_WEEK:
|
|
1201
|
+
previous_start = current_start - timedelta(days=7)
|
|
1202
|
+
previous_end = current_end - timedelta(days=7)
|
|
1203
|
+
else:
|
|
1204
|
+
raise ValueError(f"Unknown comparison period: {comparison}")
|
|
1205
|
+
|
|
1206
|
+
return previous_start, previous_end
|
|
1207
|
+
|
|
1208
|
+
def _add_date_filter(
|
|
1209
|
+
self,
|
|
1210
|
+
query: QueryDefinition,
|
|
1211
|
+
table_id: str,
|
|
1212
|
+
date_column: str,
|
|
1213
|
+
start_date: date,
|
|
1214
|
+
end_date: date,
|
|
1215
|
+
) -> QueryDefinition:
|
|
1216
|
+
"""Add date range filters to a query."""
|
|
1217
|
+
new_filters = list(query.filters)
|
|
1218
|
+
new_filters.extend(
|
|
1219
|
+
[
|
|
1220
|
+
FilterDefinition(
|
|
1221
|
+
table_id=table_id,
|
|
1222
|
+
column=date_column,
|
|
1223
|
+
operator=FilterOperator.GTE,
|
|
1224
|
+
value=start_date.isoformat(),
|
|
1225
|
+
),
|
|
1226
|
+
FilterDefinition(
|
|
1227
|
+
table_id=table_id,
|
|
1228
|
+
column=date_column,
|
|
1229
|
+
operator=FilterOperator.LTE,
|
|
1230
|
+
value=end_date.isoformat(),
|
|
1231
|
+
),
|
|
1232
|
+
]
|
|
1233
|
+
)
|
|
1234
|
+
|
|
1235
|
+
return QueryDefinition(
|
|
1236
|
+
tables=query.tables,
|
|
1237
|
+
joins=query.joins,
|
|
1238
|
+
columns=query.columns,
|
|
1239
|
+
filters=new_filters,
|
|
1240
|
+
group_by=query.group_by,
|
|
1241
|
+
order_by=query.order_by,
|
|
1242
|
+
limit=query.limit,
|
|
1243
|
+
offset=query.offset,
|
|
1244
|
+
time_series=query.time_series,
|
|
1245
|
+
)
|
|
1246
|
+
|
|
1247
|
+
def _extract_value(self, result: QueryResult, column: str) -> float | None:
|
|
1248
|
+
"""Extract a single value from a query result."""
|
|
1249
|
+
if not result.rows:
|
|
1250
|
+
return None
|
|
1251
|
+
|
|
1252
|
+
try:
|
|
1253
|
+
col_idx = result.columns.index(column)
|
|
1254
|
+
except ValueError:
|
|
1255
|
+
# Try finding by alias pattern (e.g., "sum_amount" for aggregated column)
|
|
1256
|
+
for i, col_name in enumerate(result.columns):
|
|
1257
|
+
if col_name == column or col_name.endswith(f"_{column}"):
|
|
1258
|
+
col_idx = i
|
|
1259
|
+
break
|
|
1260
|
+
else:
|
|
1261
|
+
raise ValueError(f"Column '{column}' not found in result")
|
|
1262
|
+
|
|
1263
|
+
value = result.rows[0][col_idx]
|
|
1264
|
+
if value is None:
|
|
1265
|
+
return None
|
|
1266
|
+
|
|
1267
|
+
return float(value)
|
|
1268
|
+
|
|
1269
|
+
# ========================================================================
|
|
1270
|
+
# Schema Configuration Methods
|
|
1271
|
+
# ========================================================================
|
|
1272
|
+
|
|
1273
|
+
def get_schema_config(self) -> SchemaConfig:
|
|
1274
|
+
"""Get the current schema configuration.
|
|
1275
|
+
|
|
1276
|
+
Returns:
|
|
1277
|
+
Current SchemaConfig with all table and column settings.
|
|
1278
|
+
"""
|
|
1279
|
+
return self._schema_config_manager.get_config()
|
|
1280
|
+
|
|
1281
|
+
def set_schema_config(self, config: SchemaConfig) -> None:
|
|
1282
|
+
"""Replace the entire schema configuration.
|
|
1283
|
+
|
|
1284
|
+
Args:
|
|
1285
|
+
config: New schema configuration.
|
|
1286
|
+
"""
|
|
1287
|
+
self._schema_config_manager = SchemaConfigManager(config)
|
|
1288
|
+
|
|
1289
|
+
def update_table_config(self, table_name: str, config: TableConfig) -> None:
|
|
1290
|
+
"""Update configuration for a specific table.
|
|
1291
|
+
|
|
1292
|
+
Args:
|
|
1293
|
+
table_name: Name of the table.
|
|
1294
|
+
config: New configuration for the table.
|
|
1295
|
+
"""
|
|
1296
|
+
self._schema_config_manager.update_table_config(table_name, config)
|
|
1297
|
+
|
|
1298
|
+
def update_column_config(self, table_name: str, column_name: str, config: ColumnConfig) -> None:
|
|
1299
|
+
"""Update configuration for a specific column.
|
|
1300
|
+
|
|
1301
|
+
Args:
|
|
1302
|
+
table_name: Name of the table.
|
|
1303
|
+
column_name: Name of the column.
|
|
1304
|
+
config: New configuration for the column.
|
|
1305
|
+
"""
|
|
1306
|
+
self._schema_config_manager.update_column_config(table_name, column_name, config)
|
|
1307
|
+
|
|
1308
|
+
# ========================================================================
|
|
1309
|
+
# Private Methods
|
|
1310
|
+
# ========================================================================
|
|
1311
|
+
|
|
1312
|
+
def _ensure_started(self) -> None:
|
|
1313
|
+
"""Ensure the engine has been started."""
|
|
1314
|
+
if self._pool is None:
|
|
1315
|
+
raise RuntimeError("Engine not started. Call 'await engine.startup()' first.")
|