prismiq 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
prismiq/engine.py ADDED
@@ -0,0 +1,1315 @@
1
+ """Main PrismiqEngine class that ties all components together.
2
+
3
+ This module provides the central engine class for the Prismiq embedded
4
+ analytics platform.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import time
11
+ from datetime import date, timedelta
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ import asyncpg # type: ignore[import-not-found]
15
+
16
+ from prismiq.cache import CacheBackend, CacheConfig, QueryCache
17
+ from prismiq.dashboard_store import DashboardStore, InMemoryDashboardStore
18
+ from prismiq.executor import QueryExecutor
19
+ from prismiq.metrics import record_cache_hit, record_query_execution, set_active_connections
20
+ from prismiq.persistence import PostgresDashboardStore, SavedQueryStore, ensure_tables
21
+ from prismiq.query import QueryBuilder, ValidationResult
22
+ from prismiq.schema import SchemaIntrospector
23
+ from prismiq.schema_config import (
24
+ ColumnConfig,
25
+ EnhancedDatabaseSchema,
26
+ SchemaConfig,
27
+ SchemaConfigManager,
28
+ TableConfig,
29
+ )
30
+ from prismiq.sql_validator import SQLValidationResult, SQLValidator
31
+ from prismiq.timeseries import TimeInterval
32
+ from prismiq.transforms import pivot_data
33
+ from prismiq.trends import ComparisonPeriod, TrendResult, calculate_trend
34
+ from prismiq.types import (
35
+ DatabaseSchema,
36
+ FilterDefinition,
37
+ FilterOperator,
38
+ QueryDefinition,
39
+ QueryResult,
40
+ TableSchema,
41
+ TimeSeriesConfig,
42
+ )
43
+
44
+ if TYPE_CHECKING:
45
+ from asyncpg import Pool
46
+
47
+ _logger = logging.getLogger(__name__)
48
+
49
+
50
+ class PrismiqEngine:
51
+ """Main engine for embedded analytics.
52
+
53
+ Provides a high-level interface for schema introspection,
54
+ query building, and execution.
55
+
56
+ Example:
57
+ >>> engine = PrismiqEngine(
58
+ ... database_url="postgresql://user:pass@localhost/db",
59
+ ... exposed_tables=["users", "orders"],
60
+ ... )
61
+ >>> await engine.startup()
62
+ >>>
63
+ >>> schema = await engine.get_schema()
64
+ >>> result = await engine.execute_query(query_definition)
65
+ >>>
66
+ >>> await engine.shutdown()
67
+
68
+ With FastAPI:
69
+ >>> from fastapi import FastAPI
70
+ >>> from prismiq import PrismiqEngine, create_router
71
+ >>>
72
+ >>> app = FastAPI()
73
+ >>> engine = PrismiqEngine(database_url)
74
+ >>>
75
+ >>> @app.on_event("startup")
76
+ >>> async def startup():
77
+ ... await engine.startup()
78
+ ... app.include_router(create_router(engine), prefix="/api/analytics")
79
+ >>>
80
+ >>> @app.on_event("shutdown")
81
+ >>> async def shutdown():
82
+ ... await engine.shutdown()
83
+
84
+ With caching:
85
+ >>> from prismiq import PrismiqEngine, InMemoryCache
86
+ >>>
87
+ >>> cache = InMemoryCache()
88
+ >>> engine = PrismiqEngine(
89
+ ... database_url="postgresql://...",
90
+ ... cache=cache,
91
+ ... query_cache_ttl=86400, # 24 hours (default)
92
+ ... schema_cache_ttl=3600, # 1 hour (default)
93
+ ... )
94
+ """
95
+
96
+ def __init__(
97
+ self,
98
+ database_url: str,
99
+ exposed_tables: list[str] | None = None,
100
+ query_timeout: float = 30.0,
101
+ max_rows: int = 10000,
102
+ schema_name: str = "public",
103
+ schema_config: SchemaConfig | None = None,
104
+ cache: CacheBackend | None = None,
105
+ query_cache_ttl: int | None = None,
106
+ schema_cache_ttl: int | None = None,
107
+ enable_metrics: bool = True,
108
+ persist_dashboards: bool = False,
109
+ skip_table_creation: bool = False,
110
+ ) -> None:
111
+ """Initialize the Prismiq engine.
112
+
113
+ Args:
114
+ database_url: PostgreSQL connection URL.
115
+ exposed_tables: List of tables to expose. If None, all tables are exposed.
116
+ query_timeout: Maximum query execution time in seconds.
117
+ max_rows: Maximum number of rows to return per query.
118
+ schema_name: PostgreSQL schema to use (default: "public").
119
+ schema_config: Initial schema configuration for display names, hidden items, etc.
120
+ cache: Optional cache backend for query result caching.
121
+ query_cache_ttl: TTL for query result cache in seconds (default: 86400 = 24 hours).
122
+ schema_cache_ttl: TTL for schema cache in seconds (default: 3600 = 1 hour).
123
+ enable_metrics: Whether to record Prometheus metrics (default: True).
124
+ persist_dashboards: Store dashboards in PostgreSQL (default: False uses in-memory).
125
+ skip_table_creation: Skip automatic table creation (default: False).
126
+ Use when tables are managed externally (e.g., via Alembic migrations).
127
+ """
128
+ self._database_url = database_url
129
+ self._exposed_tables = exposed_tables
130
+ self._query_timeout = query_timeout
131
+ self._max_rows = max_rows
132
+ self._schema_name = schema_name
133
+ self._query_cache_ttl = query_cache_ttl
134
+ self._schema_cache_ttl = schema_cache_ttl
135
+ self._enable_metrics = enable_metrics
136
+ self._persist_dashboards = persist_dashboards
137
+ self._skip_table_creation = skip_table_creation
138
+
139
+ # Schema config manager
140
+ self._schema_config_manager = SchemaConfigManager(schema_config)
141
+
142
+ # Cache backend
143
+ self._cache: CacheBackend | None = cache
144
+ self._query_cache: QueryCache | None = None
145
+ if cache:
146
+ # Build CacheConfig with provided TTLs or use defaults
147
+ config_kwargs: dict[str, int] = {}
148
+ if query_cache_ttl is not None:
149
+ config_kwargs["query_ttl"] = query_cache_ttl
150
+ config_kwargs["default_ttl"] = query_cache_ttl
151
+ if schema_cache_ttl is not None:
152
+ config_kwargs["schema_ttl"] = schema_cache_ttl
153
+ cache_config = CacheConfig(**config_kwargs)
154
+ self._query_cache = QueryCache(cache, config=cache_config)
155
+
156
+ # These will be initialized in startup()
157
+ self._pool: Pool | None = None
158
+ self._introspector: SchemaIntrospector | None = None
159
+ self._executor: QueryExecutor | None = None
160
+ self._builder: QueryBuilder | None = None
161
+ self._sql_validator: SQLValidator | None = None
162
+ self._schema: DatabaseSchema | None = None
163
+ self._dashboard_store: DashboardStore | None = None
164
+ self._saved_query_store: SavedQueryStore | None = None
165
+
166
+ @property
167
+ def cache(self) -> CacheBackend | None:
168
+ """Get the cache backend."""
169
+ return self._cache
170
+
171
+ @property
172
+ def dashboard_store(self) -> DashboardStore:
173
+ """Get the dashboard store.
174
+
175
+ Returns:
176
+ The dashboard store (PostgreSQL or in-memory).
177
+
178
+ Raises:
179
+ RuntimeError: If engine has not been started.
180
+ """
181
+ if self._dashboard_store is None:
182
+ raise RuntimeError("Engine not started. Call 'await engine.startup()' first.")
183
+ return self._dashboard_store
184
+
185
+ @property
186
+ def dashboards(self) -> DashboardStore:
187
+ """Alias for dashboard_store for convenience.
188
+
189
+ Returns:
190
+ The dashboard store.
191
+ """
192
+ return self.dashboard_store
193
+
194
+ @property
195
+ def saved_query_store(self) -> SavedQueryStore:
196
+ """Get the saved query store.
197
+
198
+ Returns:
199
+ The saved query store (PostgreSQL-backed).
200
+
201
+ Raises:
202
+ RuntimeError: If engine has not been started.
203
+ """
204
+ if self._saved_query_store is None:
205
+ raise RuntimeError("Engine not started. Call 'await engine.startup()' first.")
206
+ return self._saved_query_store
207
+
208
+ async def startup(self) -> None:
209
+ """Initialize the engine.
210
+
211
+ Creates the database connection pool and introspects the schema.
212
+ Must be called before using other methods.
213
+ """
214
+ # Create connection pool
215
+ self._pool = await asyncpg.create_pool(
216
+ self._database_url,
217
+ min_size=1,
218
+ max_size=10,
219
+ )
220
+
221
+ # Create schema introspector with optional caching
222
+ introspector_kwargs: dict[str, Any] = {
223
+ "exposed_tables": self._exposed_tables,
224
+ "schema_name": self._schema_name,
225
+ "cache": self._cache,
226
+ }
227
+ if self._schema_cache_ttl is not None:
228
+ introspector_kwargs["cache_ttl"] = self._schema_cache_ttl
229
+ self._introspector = SchemaIntrospector(self._pool, **introspector_kwargs)
230
+
231
+ # Introspect schema
232
+ self._schema = await self._introspector.get_schema()
233
+
234
+ # Create query builder, executor, and SQL validator
235
+ self._builder = QueryBuilder(self._schema)
236
+ self._sql_validator = SQLValidator(self._schema)
237
+ self._executor = QueryExecutor(
238
+ self._pool,
239
+ self._schema,
240
+ query_timeout=self._query_timeout,
241
+ max_rows=self._max_rows,
242
+ )
243
+
244
+ # Initialize dashboard store
245
+ if self._persist_dashboards:
246
+ # Create tables if they don't exist (skip if managed externally via Alembic)
247
+ if not self._skip_table_creation:
248
+ await ensure_tables(self._pool)
249
+ self._dashboard_store = PostgresDashboardStore(self._pool)
250
+ self._saved_query_store = SavedQueryStore(self._pool)
251
+ else:
252
+ self._dashboard_store = InMemoryDashboardStore()
253
+ # SavedQueryStore requires PostgreSQL - no in-memory fallback
254
+ self._saved_query_store = None # type: ignore[assignment]
255
+
256
+ # Update metrics
257
+ if self._enable_metrics:
258
+ set_active_connections(self._pool.get_size())
259
+
260
+ async def shutdown(self) -> None:
261
+ """Shutdown the engine.
262
+
263
+ Closes the database connection pool. Should be called on
264
+ application shutdown.
265
+ """
266
+ if self._pool:
267
+ await self._pool.close()
268
+ self._pool = None
269
+
270
+ self._introspector = None
271
+ self._executor = None
272
+ self._builder = None
273
+ self._sql_validator = None
274
+ self._schema = None
275
+ self._dashboard_store = None
276
+ self._saved_query_store = None
277
+
278
+ # Update metrics
279
+ if self._enable_metrics:
280
+ set_active_connections(0)
281
+
282
+ # ========================================================================
283
+ # Health Check Methods
284
+ # ========================================================================
285
+
286
+ async def check_connection(self) -> bool:
287
+ """Check if the database connection is healthy.
288
+
289
+ Executes a simple query to verify the database connection.
290
+
291
+ Returns:
292
+ True if the connection is healthy.
293
+
294
+ Raises:
295
+ RuntimeError: If the engine has not been started.
296
+ Exception: If the database connection fails.
297
+ """
298
+ self._ensure_started()
299
+ assert self._pool is not None
300
+
301
+ async with self._pool.acquire() as conn:
302
+ await conn.fetchval("SELECT 1")
303
+
304
+ return True
305
+
306
+ # ========================================================================
307
+ # Schema Methods
308
+ # ========================================================================
309
+
310
+ async def _validate_schema_exists(self, schema_name: str) -> bool:
311
+ """Verify that a PostgreSQL schema exists.
312
+
313
+ Args:
314
+ schema_name: Name of the schema to check.
315
+
316
+ Returns:
317
+ True if the schema exists, False otherwise.
318
+ """
319
+ assert self._pool is not None
320
+ async with self._pool.acquire() as conn:
321
+ result = await conn.fetchval(
322
+ "SELECT EXISTS(SELECT 1 FROM information_schema.schemata WHERE schema_name = $1)",
323
+ schema_name,
324
+ )
325
+ return bool(result)
326
+
327
+ async def get_schema(
328
+ self,
329
+ schema_name: str | None = None,
330
+ force_refresh: bool = False,
331
+ ) -> DatabaseSchema:
332
+ """Get the complete database schema (raw, without config applied).
333
+
334
+ Args:
335
+ schema_name: PostgreSQL schema to introspect. If None, uses the engine's
336
+ default schema. Used for multi-tenant schema isolation.
337
+ force_refresh: If True, bypass cache and introspect fresh.
338
+
339
+ Returns:
340
+ DatabaseSchema containing all exposed tables and relationships.
341
+
342
+ Raises:
343
+ RuntimeError: If the engine has not been started.
344
+ ValueError: If the specified schema does not exist.
345
+ """
346
+ self._ensure_started()
347
+ assert self._pool is not None
348
+
349
+ effective_schema = schema_name or self._schema_name
350
+
351
+ # If using the default schema, use the cached introspector
352
+ if effective_schema == self._schema_name:
353
+ assert self._introspector is not None
354
+ return await self._introspector.get_schema(force_refresh=force_refresh)
355
+
356
+ # For non-default schemas, validate existence first
357
+ if not await self._validate_schema_exists(effective_schema):
358
+ raise ValueError(
359
+ f"PostgreSQL schema '{effective_schema}' does not exist. "
360
+ f'Verify the schema name or create it with: CREATE SCHEMA "{effective_schema}"'
361
+ )
362
+
363
+ # For non-default schemas, create introspector on-demand
364
+ # Uses existing cache with schema-based keys
365
+ introspector_kwargs: dict[str, Any] = {
366
+ "exposed_tables": self._exposed_tables,
367
+ "schema_name": effective_schema,
368
+ "cache": self._cache,
369
+ }
370
+ if self._schema_cache_ttl is not None:
371
+ introspector_kwargs["cache_ttl"] = self._schema_cache_ttl
372
+
373
+ introspector = SchemaIntrospector(self._pool, **introspector_kwargs)
374
+ return await introspector.get_schema(force_refresh=force_refresh)
375
+
376
+ async def get_enhanced_schema(
377
+ self,
378
+ schema_name: str | None = None,
379
+ ) -> EnhancedDatabaseSchema:
380
+ """Get the database schema with configuration applied.
381
+
382
+ Returns schema with display names, descriptions, and hidden
383
+ tables/columns filtered out.
384
+
385
+ Args:
386
+ schema_name: PostgreSQL schema to introspect. If None, uses the engine's
387
+ default schema. Used for multi-tenant schema isolation.
388
+
389
+ Returns:
390
+ EnhancedDatabaseSchema with configuration applied.
391
+
392
+ Raises:
393
+ RuntimeError: If the engine has not been started.
394
+ ValueError: If the specified schema does not exist.
395
+ """
396
+ self._ensure_started()
397
+ schema = await self.get_schema(schema_name=schema_name)
398
+ return self._schema_config_manager.apply_to_schema(schema)
399
+
400
+ async def get_table(
401
+ self,
402
+ table_name: str,
403
+ schema_name: str | None = None,
404
+ ) -> TableSchema:
405
+ """Get schema information for a single table.
406
+
407
+ Args:
408
+ table_name: Name of the table to retrieve.
409
+ schema_name: PostgreSQL schema to introspect.
410
+
411
+ Returns:
412
+ TableSchema for the requested table.
413
+
414
+ Raises:
415
+ RuntimeError: If the engine has not been started.
416
+ TableNotFoundError: If the table is not found.
417
+ ValueError: If the specified schema does not exist (for non-default schemas).
418
+ """
419
+ self._ensure_started()
420
+ assert self._pool is not None
421
+
422
+ effective_schema = schema_name or self._schema_name
423
+
424
+ # If using the default schema, use the cached introspector
425
+ if effective_schema == self._schema_name:
426
+ assert self._introspector is not None
427
+ return await self._introspector.get_table(table_name)
428
+
429
+ # For non-default schemas, validate existence first
430
+ if not await self._validate_schema_exists(effective_schema):
431
+ raise ValueError(
432
+ f"PostgreSQL schema '{effective_schema}' does not exist. "
433
+ f'Verify the schema name or create it with: CREATE SCHEMA "{effective_schema}"'
434
+ )
435
+
436
+ # For non-default schemas, create introspector on-demand
437
+ introspector_kwargs: dict[str, Any] = {
438
+ "exposed_tables": self._exposed_tables,
439
+ "schema_name": effective_schema,
440
+ "cache": self._cache,
441
+ }
442
+ if self._schema_cache_ttl is not None:
443
+ introspector_kwargs["cache_ttl"] = self._schema_cache_ttl
444
+
445
+ introspector = SchemaIntrospector(self._pool, **introspector_kwargs)
446
+ return await introspector.get_table(table_name)
447
+
448
+ # ========================================================================
449
+ # Query Methods
450
+ # ========================================================================
451
+
452
+ async def execute_query(
453
+ self,
454
+ query: QueryDefinition,
455
+ schema_name: str | None = None,
456
+ use_cache: bool = True,
457
+ ) -> QueryResult:
458
+ """Execute a query and return results.
459
+
460
+ Args:
461
+ query: Query definition to execute.
462
+ schema_name: PostgreSQL schema for table resolution. If None, uses the
463
+ engine's default schema. Used for multi-tenant schema isolation.
464
+ use_cache: Whether to use cached results if available.
465
+
466
+ Returns:
467
+ QueryResult with columns, rows, and execution metadata.
468
+
469
+ Raises:
470
+ RuntimeError: If the engine has not been started.
471
+ QueryValidationError: If the query fails validation.
472
+ QueryTimeoutError: If the query exceeds the timeout.
473
+ QueryExecutionError: If the query execution fails.
474
+ """
475
+ self._ensure_started()
476
+ assert self._pool is not None
477
+
478
+ effective_schema = schema_name or self._schema_name
479
+ start = time.perf_counter()
480
+
481
+ # Create schema-specific cache for non-default schemas
482
+ # Always create the cache object so we can update it even when bypassing
483
+ query_cache = self._query_cache
484
+ if self._cache and effective_schema != self._schema_name:
485
+ # Build CacheConfig with provided TTLs or use defaults
486
+ config_kwargs: dict[str, int] = {}
487
+ if self._query_cache_ttl is not None:
488
+ config_kwargs["query_ttl"] = self._query_cache_ttl
489
+ config_kwargs["default_ttl"] = self._query_cache_ttl
490
+ cache_config = CacheConfig(**config_kwargs) if config_kwargs else None
491
+ query_cache = QueryCache(
492
+ self._cache,
493
+ config=cache_config,
494
+ schema_name=effective_schema,
495
+ )
496
+
497
+ # Check cache first
498
+ if use_cache and query_cache:
499
+ cached = await query_cache.get_result(query)
500
+ if cached:
501
+ if self._enable_metrics:
502
+ record_cache_hit(True)
503
+ return cached
504
+ if self._enable_metrics:
505
+ record_cache_hit(False)
506
+
507
+ # Get schema for the target schema
508
+ db_schema = await self.get_schema(schema_name=effective_schema)
509
+
510
+ # Create executor for this schema
511
+ executor = QueryExecutor(
512
+ self._pool,
513
+ db_schema,
514
+ query_timeout=self._query_timeout,
515
+ max_rows=self._max_rows,
516
+ schema_name=effective_schema,
517
+ )
518
+
519
+ # Execute query
520
+ try:
521
+ result = await executor.execute(query)
522
+
523
+ # Always cache the result when cache is available
524
+ # Even when use_cache=False (bypass), we want to update the cache with fresh data
525
+ if query_cache:
526
+ try:
527
+ await query_cache.cache_result(query, result)
528
+ except Exception as cache_err:
529
+ _logger.warning(
530
+ "Failed to cache query result: %s (%s)",
531
+ cache_err,
532
+ type(cache_err).__name__,
533
+ )
534
+
535
+ # Record metrics
536
+ if self._enable_metrics:
537
+ duration = (time.perf_counter() - start) * 1000
538
+ record_query_execution(duration, "success")
539
+
540
+ return result
541
+
542
+ except Exception:
543
+ if self._enable_metrics:
544
+ duration = (time.perf_counter() - start) * 1000
545
+ record_query_execution(duration, "error")
546
+ raise
547
+
548
+ async def preview_query(
549
+ self,
550
+ query: QueryDefinition,
551
+ limit: int = 100,
552
+ schema_name: str | None = None,
553
+ ) -> QueryResult:
554
+ """Execute a query with a limited number of rows.
555
+
556
+ Args:
557
+ query: Query definition to execute.
558
+ limit: Maximum number of rows to return.
559
+ schema_name: PostgreSQL schema for table resolution.
560
+
561
+ Returns:
562
+ QueryResult with limited rows.
563
+
564
+ Raises:
565
+ RuntimeError: If the engine has not been started.
566
+ QueryValidationError: If the query fails validation.
567
+ """
568
+ self._ensure_started()
569
+ assert self._pool is not None
570
+
571
+ effective_schema = schema_name or self._schema_name
572
+
573
+ # Get schema and create executor for this schema
574
+ db_schema = await self.get_schema(schema_name=effective_schema)
575
+
576
+ executor = QueryExecutor(
577
+ self._pool,
578
+ db_schema,
579
+ query_timeout=self._query_timeout,
580
+ max_rows=self._max_rows,
581
+ schema_name=effective_schema,
582
+ )
583
+ return await executor.preview(query, limit=limit)
584
+
585
+ async def sample_column_values(
586
+ self,
587
+ table_name: str,
588
+ column_name: str,
589
+ limit: int = 5,
590
+ schema_name: str | None = None,
591
+ ) -> list[Any]:
592
+ """Get sample values from a column for data preview.
593
+
594
+ Args:
595
+ table_name: Name of the table.
596
+ column_name: Name of the column.
597
+ limit: Maximum number of distinct values to return.
598
+ schema_name: PostgreSQL schema to query.
599
+
600
+ Returns:
601
+ List of sample values from the column.
602
+
603
+ Raises:
604
+ RuntimeError: If the engine has not been started.
605
+ ValueError: If the table or column doesn't exist.
606
+ """
607
+ self._ensure_started()
608
+ assert self._pool is not None
609
+
610
+ effective_schema = schema_name or self._schema_name
611
+
612
+ # Get the schema for validation
613
+ db_schema = await self.get_schema(schema_name=effective_schema)
614
+
615
+ # Validate table exists
616
+ table = db_schema.get_table(table_name)
617
+ if table is None:
618
+ raise ValueError(f"Table '{table_name}' not found")
619
+
620
+ # Validate column exists
621
+ column_exists = any(col.name == column_name for col in table.columns)
622
+ if not column_exists:
623
+ raise ValueError(f"Column '{column_name}' not found in table '{table_name}'")
624
+
625
+ # Build schema-qualified table reference
626
+ # Note: table_name and column_name are validated against the schema above,
627
+ # so this is safe from SQL injection despite string interpolation
628
+ escaped_col = column_name.replace('"', '""')
629
+ escaped_table = table_name.replace('"', '""')
630
+ escaped_schema = effective_schema.replace('"', '""')
631
+
632
+ table_ref = f'"{escaped_schema}"."{escaped_table}"'
633
+
634
+ sql = f"""
635
+ SELECT DISTINCT "{escaped_col}"
636
+ FROM {table_ref}
637
+ WHERE "{escaped_col}" IS NOT NULL
638
+ ORDER BY "{escaped_col}"
639
+ LIMIT {limit}
640
+ """ # noqa: S608
641
+
642
+ async with self._pool.acquire() as conn:
643
+ rows = await conn.fetch(sql)
644
+
645
+ # Extract values and serialize
646
+ from prismiq.executor import serialize_value
647
+
648
+ return [serialize_value(row[0]) for row in rows]
649
+
650
+ def validate_query(self, query: QueryDefinition) -> list[str]:
651
+ """Validate a query without executing it (uses default schema).
652
+
653
+ Args:
654
+ query: Query definition to validate.
655
+
656
+ Returns:
657
+ List of validation error messages (empty if valid).
658
+
659
+ Raises:
660
+ RuntimeError: If the engine has not been started.
661
+
662
+ Note:
663
+ This method validates against the default schema. For multi-tenant
664
+ schema support, use validate_query_async() instead.
665
+ """
666
+ self._ensure_started()
667
+ assert self._builder is not None
668
+ return self._builder.validate(query)
669
+
670
+ async def validate_query_async(
671
+ self,
672
+ query: QueryDefinition,
673
+ schema_name: str | None = None,
674
+ ) -> list[str]:
675
+ """Validate a query without executing it (with schema support).
676
+
677
+ Args:
678
+ query: Query definition to validate.
679
+ schema_name: PostgreSQL schema to validate against. If None, uses
680
+ the engine's default schema.
681
+
682
+ Returns:
683
+ List of validation error messages (empty if valid).
684
+
685
+ Raises:
686
+ RuntimeError: If the engine has not been started.
687
+ ValueError: If the specified schema does not exist.
688
+ """
689
+ self._ensure_started()
690
+
691
+ effective_schema = schema_name or self._schema_name
692
+
693
+ # Use default builder for default schema
694
+ if effective_schema == self._schema_name:
695
+ assert self._builder is not None
696
+ return self._builder.validate(query)
697
+
698
+ # For non-default schemas, get schema and create builder
699
+ db_schema = await self.get_schema(schema_name=effective_schema)
700
+ builder = QueryBuilder(db_schema, schema_name=effective_schema)
701
+ return builder.validate(query)
702
+
703
+ def validate_query_detailed(self, query: QueryDefinition) -> ValidationResult:
704
+ """Validate a query with detailed error information (uses default schema).
705
+
706
+ Args:
707
+ query: Query definition to validate.
708
+
709
+ Returns:
710
+ ValidationResult with detailed errors including suggestions.
711
+
712
+ Raises:
713
+ RuntimeError: If the engine has not been started.
714
+
715
+ Note:
716
+ This method validates against the default schema. For multi-tenant
717
+ schema support, use validate_query_detailed_async() instead.
718
+ """
719
+ self._ensure_started()
720
+ assert self._builder is not None
721
+ return self._builder.validate_detailed(query)
722
+
723
+ async def validate_query_detailed_async(
724
+ self,
725
+ query: QueryDefinition,
726
+ schema_name: str | None = None,
727
+ ) -> ValidationResult:
728
+ """Validate a query with detailed error information (with schema support).
729
+
730
+ Args:
731
+ query: Query definition to validate.
732
+ schema_name: PostgreSQL schema to validate against. If None, uses
733
+ the engine's default schema.
734
+
735
+ Returns:
736
+ ValidationResult with detailed errors including suggestions.
737
+
738
+ Raises:
739
+ RuntimeError: If the engine has not been started.
740
+ ValueError: If the specified schema does not exist.
741
+ """
742
+ self._ensure_started()
743
+
744
+ effective_schema = schema_name or self._schema_name
745
+
746
+ # Use default builder for default schema
747
+ if effective_schema == self._schema_name:
748
+ assert self._builder is not None
749
+ return self._builder.validate_detailed(query)
750
+
751
+ # For non-default schemas, get schema and create builder
752
+ db_schema = await self.get_schema(schema_name=effective_schema)
753
+ builder = QueryBuilder(db_schema, schema_name=effective_schema)
754
+ return builder.validate_detailed(query)
755
+
756
+ def generate_sql(self, query: QueryDefinition) -> str:
757
+ """Generate SQL from a query definition without executing (uses default schema).
758
+
759
+ Useful for previewing the SQL that will be executed.
760
+
761
+ Args:
762
+ query: Query definition to generate SQL for.
763
+
764
+ Returns:
765
+ The generated SQL string.
766
+
767
+ Raises:
768
+ RuntimeError: If the engine has not been started.
769
+ QueryValidationError: If the query is invalid.
770
+
771
+ Note:
772
+ This method uses the default schema. For multi-tenant schema support,
773
+ use generate_sql_async() instead.
774
+ """
775
+ self._ensure_started()
776
+ assert self._builder is not None
777
+
778
+ # Validate first
779
+ errors = self._builder.validate(query)
780
+ if errors:
781
+ from .types import QueryValidationError
782
+
783
+ raise QueryValidationError("; ".join(errors), errors)
784
+
785
+ sql, _ = self._builder.build(query)
786
+ return sql
787
+
788
+ async def generate_sql_async(
789
+ self,
790
+ query: QueryDefinition,
791
+ schema_name: str | None = None,
792
+ ) -> str:
793
+ """Generate SQL from a query definition without executing (with schema support).
794
+
795
+ Useful for previewing the SQL that will be executed.
796
+
797
+ Args:
798
+ query: Query definition to generate SQL for.
799
+ schema_name: PostgreSQL schema for table resolution. If None, uses
800
+ the engine's default schema.
801
+
802
+ Returns:
803
+ The generated SQL string.
804
+
805
+ Raises:
806
+ RuntimeError: If the engine has not been started.
807
+ QueryValidationError: If the query is invalid.
808
+ ValueError: If the specified schema does not exist.
809
+ """
810
+ self._ensure_started()
811
+
812
+ effective_schema = schema_name or self._schema_name
813
+
814
+ # Use default builder for default schema
815
+ if effective_schema == self._schema_name:
816
+ assert self._builder is not None
817
+ errors = self._builder.validate(query)
818
+ if errors:
819
+ from .types import QueryValidationError
820
+
821
+ raise QueryValidationError("; ".join(errors), errors)
822
+ sql, _ = self._builder.build(query)
823
+ return sql
824
+
825
+ # For non-default schemas, get schema and create builder
826
+ db_schema = await self.get_schema(schema_name=effective_schema)
827
+ builder = QueryBuilder(db_schema, schema_name=effective_schema)
828
+
829
+ errors = builder.validate(query)
830
+ if errors:
831
+ from .types import QueryValidationError
832
+
833
+ raise QueryValidationError("; ".join(errors), errors)
834
+
835
+ sql, _ = builder.build(query)
836
+ return sql
837
+
838
+ # ========================================================================
839
+ # Custom SQL Methods
840
+ # ========================================================================
841
+
842
+ async def validate_sql(
843
+ self,
844
+ sql: str,
845
+ schema_name: str | None = None,
846
+ ) -> SQLValidationResult:
847
+ """Validate a raw SQL query without executing.
848
+
849
+ Checks that the SQL is a valid SELECT statement and only
850
+ references tables visible in the schema.
851
+
852
+ Args:
853
+ sql: Raw SQL query to validate.
854
+ schema_name: PostgreSQL schema for table validation. If None, uses the
855
+ engine's default schema. Used for multi-tenant schema isolation.
856
+
857
+ Returns:
858
+ SQLValidationResult with validation status and details.
859
+
860
+ Raises:
861
+ RuntimeError: If the engine has not been started.
862
+ """
863
+ self._ensure_started()
864
+
865
+ # For non-default schemas, create a validator with the tenant's schema
866
+ effective_schema = schema_name or self._schema_name
867
+ if effective_schema != self._schema_name:
868
+ tenant_schema = await self.get_schema(schema_name=effective_schema)
869
+ validator = SQLValidator(tenant_schema)
870
+ return validator.validate(sql)
871
+
872
+ assert self._sql_validator is not None
873
+ return self._sql_validator.validate(sql)
874
+
875
+ async def execute_raw_sql(
876
+ self,
877
+ sql: str,
878
+ params: dict[str, Any] | None = None,
879
+ schema_name: str | None = None,
880
+ ) -> QueryResult:
881
+ """Execute a raw SQL query.
882
+
883
+ Only SELECT statements are allowed. Queries are restricted
884
+ to tables visible in the schema.
885
+
886
+ Args:
887
+ sql: Raw SQL query (SELECT only).
888
+ params: Optional named parameters for the query.
889
+ schema_name: PostgreSQL schema for table validation. If None, uses the
890
+ engine's default schema. Used for multi-tenant schema isolation.
891
+
892
+ Returns:
893
+ QueryResult with columns, rows, and execution metadata.
894
+
895
+ Raises:
896
+ RuntimeError: If the engine has not been started.
897
+ SQLValidationError: If the SQL fails validation.
898
+ QueryTimeoutError: If the query exceeds the timeout.
899
+ QueryExecutionError: If the query execution fails.
900
+ """
901
+ self._ensure_started()
902
+ assert self._executor is not None
903
+ assert self._pool is not None
904
+
905
+ # For non-default schemas, validate with tenant's schema
906
+ effective_schema = schema_name or self._schema_name
907
+ if effective_schema != self._schema_name:
908
+ validation = await self.validate_sql(sql, schema_name=effective_schema)
909
+ if not validation.valid:
910
+ from .sql_validator import SQLValidationError
911
+
912
+ raise SQLValidationError(
913
+ "SQL validation failed: " + "; ".join(validation.errors),
914
+ errors=validation.errors,
915
+ )
916
+
917
+ start = time.perf_counter()
918
+
919
+ try:
920
+ result = await self._executor.execute_raw_sql(sql, params)
921
+
922
+ # Record metrics
923
+ if self._enable_metrics:
924
+ duration = (time.perf_counter() - start) * 1000
925
+ record_query_execution(duration, "success")
926
+
927
+ return result
928
+
929
+ except Exception:
930
+ if self._enable_metrics:
931
+ duration = (time.perf_counter() - start) * 1000
932
+ record_query_execution(duration, "error")
933
+ raise
934
+
935
+ # ========================================================================
936
+ # Cache Methods
937
+ # ========================================================================
938
+
939
+ async def invalidate_cache(self, table_name: str | None = None) -> int:
940
+ """Invalidate cached data.
941
+
942
+ Args:
943
+ table_name: If provided, invalidate only queries involving this table.
944
+ If None, invalidate all query cache.
945
+
946
+ Returns:
947
+ Number of cache entries invalidated.
948
+ """
949
+ if not self._query_cache or not self._cache:
950
+ return 0
951
+
952
+ if table_name:
953
+ return await self._query_cache.invalidate_table(table_name)
954
+ else:
955
+ return await self._cache.clear("query:*")
956
+
957
+ async def invalidate_schema_cache(self) -> None:
958
+ """Invalidate the schema cache.
959
+
960
+ Forces the next get_schema() call to introspect the database.
961
+ """
962
+ if self._introspector:
963
+ await self._introspector.invalidate_cache()
964
+
965
+ # ========================================================================
966
+ # Time Series Methods
967
+ # ========================================================================
968
+
969
+ async def execute_timeseries_query(
970
+ self,
971
+ query: QueryDefinition,
972
+ interval: TimeInterval,
973
+ date_column: str,
974
+ fill_missing: bool = True,
975
+ schema_name: str | None = None,
976
+ ) -> QueryResult:
977
+ """Execute a time series query with automatic bucketing.
978
+
979
+ Adds date_trunc to the query for time bucketing and optionally
980
+ fills missing time buckets.
981
+
982
+ Args:
983
+ query: Query definition to execute.
984
+ interval: Time interval for bucketing.
985
+ date_column: Name of the date/timestamp column to bucket.
986
+ fill_missing: Whether to fill missing time buckets with default values.
987
+ schema_name: PostgreSQL schema for table resolution. If None, uses the
988
+ engine's default schema. Used for multi-tenant schema isolation.
989
+
990
+ Returns:
991
+ QueryResult with time-bucketed data.
992
+
993
+ Raises:
994
+ RuntimeError: If the engine has not been started.
995
+ QueryValidationError: If the query fails validation.
996
+ ValueError: If the date column is not found.
997
+ """
998
+ self._ensure_started()
999
+ assert self._pool is not None
1000
+
1001
+ effective_schema = schema_name or self._schema_name
1002
+
1003
+ # Get schema for the target schema
1004
+ db_schema = await self.get_schema(schema_name=effective_schema)
1005
+
1006
+ # Find the table ID for the date column
1007
+ table_id = self._find_table_for_column_in_schema(query, date_column, db_schema)
1008
+ if table_id is None:
1009
+ raise ValueError(f"Date column '{date_column}' not found in query tables")
1010
+
1011
+ # Create a modified query with time series config
1012
+ modified_query = QueryDefinition(
1013
+ tables=query.tables,
1014
+ joins=query.joins,
1015
+ columns=query.columns,
1016
+ filters=query.filters,
1017
+ group_by=query.group_by,
1018
+ order_by=query.order_by,
1019
+ limit=query.limit,
1020
+ offset=query.offset,
1021
+ time_series=TimeSeriesConfig(
1022
+ table_id=table_id,
1023
+ date_column=date_column,
1024
+ interval=interval.value,
1025
+ fill_missing=fill_missing,
1026
+ ),
1027
+ )
1028
+
1029
+ # Create executor for this schema
1030
+ executor = QueryExecutor(
1031
+ self._pool,
1032
+ db_schema,
1033
+ query_timeout=self._query_timeout,
1034
+ max_rows=self._max_rows,
1035
+ schema_name=effective_schema,
1036
+ )
1037
+
1038
+ return await executor.execute(modified_query)
1039
+
1040
+ def _find_table_for_column(self, query: QueryDefinition, column_name: str) -> str | None:
1041
+ """Find the table ID that contains the specified column."""
1042
+ self._ensure_started()
1043
+ assert self._schema is not None
1044
+ return self._find_table_for_column_in_schema(query, column_name, self._schema)
1045
+
1046
+ def _find_table_for_column_in_schema(
1047
+ self,
1048
+ query: QueryDefinition,
1049
+ column_name: str,
1050
+ schema: DatabaseSchema,
1051
+ ) -> str | None:
1052
+ """Find the table ID that contains the specified column in the given schema."""
1053
+ for query_table in query.tables:
1054
+ table_schema = schema.get_table(query_table.name)
1055
+ if table_schema and table_schema.has_column(column_name):
1056
+ return query_table.id
1057
+
1058
+ return None
1059
+
1060
+ # ========================================================================
1061
+ # Transform Methods
1062
+ # ========================================================================
1063
+
1064
+ def transform_pivot(
1065
+ self,
1066
+ result: QueryResult,
1067
+ row_column: str,
1068
+ pivot_column: str,
1069
+ value_column: str,
1070
+ aggregation: str = "sum",
1071
+ ) -> QueryResult:
1072
+ """Pivot a query result from long to wide format.
1073
+
1074
+ Args:
1075
+ result: Query result to pivot.
1076
+ row_column: Column to use as row headers.
1077
+ pivot_column: Column to pivot into separate columns.
1078
+ value_column: Column containing values to aggregate.
1079
+ aggregation: Aggregation function: sum, avg, count, min, max.
1080
+
1081
+ Returns:
1082
+ Pivoted QueryResult.
1083
+ """
1084
+ return pivot_data(
1085
+ result=result,
1086
+ row_column=row_column,
1087
+ pivot_column=pivot_column,
1088
+ value_column=value_column,
1089
+ aggregation=aggregation,
1090
+ )
1091
+
1092
+ # ========================================================================
1093
+ # Trend Methods
1094
+ # ========================================================================
1095
+
1096
+ def calculate_trend(
1097
+ self,
1098
+ current: float | None,
1099
+ previous: float | None,
1100
+ threshold: float = 0.001,
1101
+ ) -> TrendResult:
1102
+ """Calculate a trend between two values.
1103
+
1104
+ Args:
1105
+ current: Current value.
1106
+ previous: Previous value for comparison.
1107
+ threshold: Changes smaller than this are considered "flat".
1108
+
1109
+ Returns:
1110
+ TrendResult with direction and change metrics.
1111
+ """
1112
+ return calculate_trend(current, previous, threshold)
1113
+
1114
+ async def calculate_metric_trend(
1115
+ self,
1116
+ query: QueryDefinition,
1117
+ comparison: ComparisonPeriod,
1118
+ current_start: date,
1119
+ current_end: date,
1120
+ value_column: str,
1121
+ date_column: str,
1122
+ ) -> TrendResult:
1123
+ """Calculate trend for a metric query.
1124
+
1125
+ Executes the query for both current and comparison periods,
1126
+ then calculates the trend between them.
1127
+
1128
+ Args:
1129
+ query: Query definition for the metric.
1130
+ comparison: Period to compare against.
1131
+ current_start: Start date of current period.
1132
+ current_end: End date of current period.
1133
+ value_column: Column containing the metric value.
1134
+ date_column: Column containing the date for filtering.
1135
+
1136
+ Returns:
1137
+ TrendResult with current value, previous value, and change metrics.
1138
+
1139
+ Raises:
1140
+ RuntimeError: If the engine has not been started.
1141
+ ValueError: If the date or value column is not found.
1142
+ """
1143
+ self._ensure_started()
1144
+ assert self._executor is not None
1145
+
1146
+ # Find the table ID for the date column
1147
+ table_id = self._find_table_for_column(query, date_column)
1148
+ if table_id is None:
1149
+ raise ValueError(f"Date column '{date_column}' not found in query tables")
1150
+
1151
+ # Calculate comparison period dates
1152
+ previous_start, previous_end = self._get_comparison_dates(
1153
+ comparison, current_start, current_end
1154
+ )
1155
+
1156
+ # Execute query for current period
1157
+ current_query = self._add_date_filter(
1158
+ query, table_id, date_column, current_start, current_end
1159
+ )
1160
+ current_result = await self._executor.execute(current_query)
1161
+
1162
+ # Execute query for previous period
1163
+ previous_query = self._add_date_filter(
1164
+ query, table_id, date_column, previous_start, previous_end
1165
+ )
1166
+ previous_result = await self._executor.execute(previous_query)
1167
+
1168
+ # Extract values
1169
+ current_value = self._extract_value(current_result, value_column)
1170
+ previous_value = self._extract_value(previous_result, value_column)
1171
+
1172
+ return calculate_trend(current_value, previous_value)
1173
+
1174
+ def _get_comparison_dates(
1175
+ self,
1176
+ comparison: ComparisonPeriod,
1177
+ current_start: date,
1178
+ current_end: date,
1179
+ ) -> tuple[date, date]:
1180
+ """Calculate the comparison period dates."""
1181
+ period_days = (current_end - current_start).days + 1
1182
+
1183
+ if comparison == ComparisonPeriod.PREVIOUS_PERIOD:
1184
+ previous_end = current_start - timedelta(days=1)
1185
+ previous_start = previous_end - timedelta(days=period_days - 1)
1186
+ elif comparison == ComparisonPeriod.PREVIOUS_YEAR:
1187
+ previous_start = current_start.replace(year=current_start.year - 1)
1188
+ previous_end = current_end.replace(year=current_end.year - 1)
1189
+ elif comparison == ComparisonPeriod.PREVIOUS_MONTH:
1190
+ # Move back one month
1191
+ if current_start.month == 1:
1192
+ previous_start = current_start.replace(year=current_start.year - 1, month=12)
1193
+ else:
1194
+ previous_start = current_start.replace(month=current_start.month - 1)
1195
+
1196
+ if current_end.month == 1:
1197
+ previous_end = current_end.replace(year=current_end.year - 1, month=12)
1198
+ else:
1199
+ previous_end = current_end.replace(month=current_end.month - 1)
1200
+ elif comparison == ComparisonPeriod.PREVIOUS_WEEK:
1201
+ previous_start = current_start - timedelta(days=7)
1202
+ previous_end = current_end - timedelta(days=7)
1203
+ else:
1204
+ raise ValueError(f"Unknown comparison period: {comparison}")
1205
+
1206
+ return previous_start, previous_end
1207
+
1208
+ def _add_date_filter(
1209
+ self,
1210
+ query: QueryDefinition,
1211
+ table_id: str,
1212
+ date_column: str,
1213
+ start_date: date,
1214
+ end_date: date,
1215
+ ) -> QueryDefinition:
1216
+ """Add date range filters to a query."""
1217
+ new_filters = list(query.filters)
1218
+ new_filters.extend(
1219
+ [
1220
+ FilterDefinition(
1221
+ table_id=table_id,
1222
+ column=date_column,
1223
+ operator=FilterOperator.GTE,
1224
+ value=start_date.isoformat(),
1225
+ ),
1226
+ FilterDefinition(
1227
+ table_id=table_id,
1228
+ column=date_column,
1229
+ operator=FilterOperator.LTE,
1230
+ value=end_date.isoformat(),
1231
+ ),
1232
+ ]
1233
+ )
1234
+
1235
+ return QueryDefinition(
1236
+ tables=query.tables,
1237
+ joins=query.joins,
1238
+ columns=query.columns,
1239
+ filters=new_filters,
1240
+ group_by=query.group_by,
1241
+ order_by=query.order_by,
1242
+ limit=query.limit,
1243
+ offset=query.offset,
1244
+ time_series=query.time_series,
1245
+ )
1246
+
1247
+ def _extract_value(self, result: QueryResult, column: str) -> float | None:
1248
+ """Extract a single value from a query result."""
1249
+ if not result.rows:
1250
+ return None
1251
+
1252
+ try:
1253
+ col_idx = result.columns.index(column)
1254
+ except ValueError:
1255
+ # Try finding by alias pattern (e.g., "sum_amount" for aggregated column)
1256
+ for i, col_name in enumerate(result.columns):
1257
+ if col_name == column or col_name.endswith(f"_{column}"):
1258
+ col_idx = i
1259
+ break
1260
+ else:
1261
+ raise ValueError(f"Column '{column}' not found in result")
1262
+
1263
+ value = result.rows[0][col_idx]
1264
+ if value is None:
1265
+ return None
1266
+
1267
+ return float(value)
1268
+
1269
+ # ========================================================================
1270
+ # Schema Configuration Methods
1271
+ # ========================================================================
1272
+
1273
+ def get_schema_config(self) -> SchemaConfig:
1274
+ """Get the current schema configuration.
1275
+
1276
+ Returns:
1277
+ Current SchemaConfig with all table and column settings.
1278
+ """
1279
+ return self._schema_config_manager.get_config()
1280
+
1281
+ def set_schema_config(self, config: SchemaConfig) -> None:
1282
+ """Replace the entire schema configuration.
1283
+
1284
+ Args:
1285
+ config: New schema configuration.
1286
+ """
1287
+ self._schema_config_manager = SchemaConfigManager(config)
1288
+
1289
+ def update_table_config(self, table_name: str, config: TableConfig) -> None:
1290
+ """Update configuration for a specific table.
1291
+
1292
+ Args:
1293
+ table_name: Name of the table.
1294
+ config: New configuration for the table.
1295
+ """
1296
+ self._schema_config_manager.update_table_config(table_name, config)
1297
+
1298
+ def update_column_config(self, table_name: str, column_name: str, config: ColumnConfig) -> None:
1299
+ """Update configuration for a specific column.
1300
+
1301
+ Args:
1302
+ table_name: Name of the table.
1303
+ column_name: Name of the column.
1304
+ config: New configuration for the column.
1305
+ """
1306
+ self._schema_config_manager.update_column_config(table_name, column_name, config)
1307
+
1308
+ # ========================================================================
1309
+ # Private Methods
1310
+ # ========================================================================
1311
+
1312
+ def _ensure_started(self) -> None:
1313
+ """Ensure the engine has been started."""
1314
+ if self._pool is None:
1315
+ raise RuntimeError("Engine not started. Call 'await engine.startup()' first.")