prismiq 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
prismiq/schema.py ADDED
@@ -0,0 +1,333 @@
1
+ """Schema introspection for PostgreSQL databases.
2
+
3
+ This module provides the SchemaIntrospector class that reads database
4
+ metadata from PostgreSQL's information_schema.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ from prismiq.types import (
12
+ ColumnSchema,
13
+ DatabaseSchema,
14
+ Relationship,
15
+ TableNotFoundError,
16
+ TableSchema,
17
+ )
18
+
19
+ if TYPE_CHECKING:
20
+ from asyncpg import Pool, Record # type: ignore[import-not-found]
21
+
22
+ from prismiq.cache import CacheBackend
23
+
24
+
25
+ class SchemaIntrospector:
26
+ """Introspects PostgreSQL database schema.
27
+
28
+ Reads table and column metadata from information_schema,
29
+ detects foreign key relationships, and provides a filtered
30
+ view based on exposed_tables configuration.
31
+
32
+ Supports optional caching to reduce database queries.
33
+
34
+ Example:
35
+ >>> pool = await asyncpg.create_pool(database_url)
36
+ >>> introspector = SchemaIntrospector(pool, exposed_tables=["users", "orders"])
37
+ >>> schema = await introspector.get_schema()
38
+ >>> print(schema.table_names())
39
+ ['users', 'orders']
40
+
41
+ With caching:
42
+ >>> from prismiq.cache import InMemoryCache
43
+ >>> cache = InMemoryCache()
44
+ >>> introspector = SchemaIntrospector(pool, cache=cache, cache_ttl=3600)
45
+ >>> schema = await introspector.get_schema() # Hits database
46
+ >>> schema = await introspector.get_schema() # Returns cached result
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ pool: Pool,
52
+ exposed_tables: list[str] | None = None,
53
+ schema_name: str = "public",
54
+ cache: CacheBackend | None = None,
55
+ cache_ttl: int = 3600,
56
+ ) -> None:
57
+ """Initialize the schema introspector.
58
+
59
+ Args:
60
+ pool: asyncpg connection pool to use for queries.
61
+ exposed_tables: List of table names to expose. If None, all tables
62
+ in the schema are exposed.
63
+ schema_name: PostgreSQL schema to introspect (default: "public").
64
+ cache: Optional cache backend for caching schema data.
65
+ cache_ttl: TTL for cached schema in seconds (default: 1 hour).
66
+ """
67
+ self._pool = pool
68
+ self._exposed_tables = exposed_tables
69
+ self._schema_name = schema_name
70
+ self._cache = cache
71
+ self._cache_ttl = cache_ttl
72
+
73
+ def _cache_key(self, suffix: str) -> str:
74
+ """Generate schema-qualified cache key for tenant isolation.
75
+
76
+ Args:
77
+ suffix: Cache key suffix (e.g., "full", "table:users").
78
+
79
+ Returns:
80
+ Cache key with schema prefix (e.g., "schema:org_123:full").
81
+ """
82
+ return f"schema:{self._schema_name}:{suffix}"
83
+
84
+ async def get_schema(self, force_refresh: bool = False) -> DatabaseSchema:
85
+ """Get the complete database schema.
86
+
87
+ Args:
88
+ force_refresh: If True, bypass cache and fetch fresh data.
89
+
90
+ Returns:
91
+ DatabaseSchema containing all exposed tables and their relationships.
92
+ """
93
+ # Try cache first (using schema-qualified key for tenant isolation)
94
+ if self._cache and not force_refresh:
95
+ cached = await self._cache.get(self._cache_key("full"))
96
+ if cached is not None:
97
+ return DatabaseSchema.model_validate(cached)
98
+
99
+ # Introspect from database
100
+ schema = await self._introspect_schema()
101
+
102
+ # Store in cache (using schema-qualified key)
103
+ if self._cache:
104
+ await self._cache.set(self._cache_key("full"), schema.model_dump(), self._cache_ttl)
105
+
106
+ return schema
107
+
108
+ async def _introspect_schema(self) -> DatabaseSchema:
109
+ """Introspect schema from database."""
110
+ table_names = await self._get_table_names()
111
+ tables: list[TableSchema] = []
112
+
113
+ for table_name in table_names:
114
+ table = await self._get_table_schema(table_name)
115
+ tables.append(table)
116
+
117
+ relationships = await self.detect_relationships()
118
+
119
+ return DatabaseSchema(tables=tables, relationships=relationships)
120
+
121
+ async def get_table(self, table_name: str, force_refresh: bool = False) -> TableSchema:
122
+ """Get schema information for a single table.
123
+
124
+ Args:
125
+ table_name: Name of the table to retrieve.
126
+ force_refresh: If True, bypass cache and fetch fresh data.
127
+
128
+ Returns:
129
+ TableSchema for the requested table.
130
+
131
+ Raises:
132
+ TableNotFoundError: If the table doesn't exist or isn't exposed.
133
+ """
134
+ # Check if table is exposed
135
+ if self._exposed_tables is not None and table_name not in self._exposed_tables:
136
+ raise TableNotFoundError(table_name)
137
+
138
+ # Try cache first (using schema-qualified key for tenant isolation)
139
+ cache_key = self._cache_key(f"table:{table_name}")
140
+ if self._cache and not force_refresh:
141
+ cached = await self._cache.get(cache_key)
142
+ if cached is not None:
143
+ return TableSchema.model_validate(cached)
144
+
145
+ # Check if table exists in database
146
+ table_names = await self._get_table_names()
147
+ if table_name not in table_names:
148
+ raise TableNotFoundError(table_name)
149
+
150
+ # Introspect from database
151
+ table = await self._get_table_schema(table_name)
152
+
153
+ # Store in cache (using schema-qualified key)
154
+ if self._cache:
155
+ await self._cache.set(cache_key, table.model_dump(), self._cache_ttl)
156
+
157
+ return table
158
+
159
+ async def invalidate_cache(self) -> int:
160
+ """Invalidate all cached schema data for this schema.
161
+
162
+ Only invalidates cache entries for this schema (tenant isolation).
163
+
164
+ Returns:
165
+ Number of cache entries cleared.
166
+ """
167
+ if self._cache is None:
168
+ return 0
169
+
170
+ # Only clear cache entries for this specific schema
171
+ return await self._cache.clear(f"schema:{self._schema_name}:*")
172
+
173
+ async def detect_relationships(self) -> list[Relationship]:
174
+ """Detect foreign key relationships between exposed tables.
175
+
176
+ Returns:
177
+ List of Relationship objects representing foreign keys.
178
+ """
179
+ async with self._pool.acquire() as conn:
180
+ # Query foreign key constraints
181
+ query = """
182
+ SELECT
183
+ tc.table_name AS from_table,
184
+ kcu.column_name AS from_column,
185
+ ccu.table_name AS to_table,
186
+ ccu.column_name AS to_column
187
+ FROM information_schema.table_constraints tc
188
+ JOIN information_schema.key_column_usage kcu
189
+ ON tc.constraint_name = kcu.constraint_name
190
+ AND tc.table_schema = kcu.table_schema
191
+ JOIN information_schema.constraint_column_usage ccu
192
+ ON ccu.constraint_name = tc.constraint_name
193
+ AND ccu.table_schema = tc.table_schema
194
+ WHERE tc.constraint_type = 'FOREIGN KEY'
195
+ AND tc.table_schema = $1
196
+ """
197
+ rows: list[Record] = await conn.fetch(query, self._schema_name)
198
+
199
+ relationships: list[Relationship] = []
200
+ exposed_set = set(self._exposed_tables) if self._exposed_tables else None
201
+
202
+ for row in rows:
203
+ from_table = row["from_table"]
204
+ to_table = row["to_table"]
205
+
206
+ # Filter to only include relationships between exposed tables
207
+ if exposed_set is not None and (
208
+ from_table not in exposed_set or to_table not in exposed_set
209
+ ):
210
+ continue
211
+
212
+ relationships.append(
213
+ Relationship(
214
+ from_table=from_table,
215
+ from_column=row["from_column"],
216
+ to_table=to_table,
217
+ to_column=row["to_column"],
218
+ )
219
+ )
220
+
221
+ return relationships
222
+
223
+ async def _get_table_names(self) -> list[str]:
224
+ """Get list of table names in the schema."""
225
+ async with self._pool.acquire() as conn:
226
+ query = """
227
+ SELECT table_name
228
+ FROM information_schema.tables
229
+ WHERE table_schema = $1
230
+ AND table_type IN ('BASE TABLE', 'VIEW')
231
+ ORDER BY table_name
232
+ """
233
+ rows: list[Record] = await conn.fetch(query, self._schema_name)
234
+
235
+ table_names = [row["table_name"] for row in rows]
236
+
237
+ # Filter to exposed tables if specified
238
+ if self._exposed_tables is not None:
239
+ exposed_set = set(self._exposed_tables)
240
+ table_names = [t for t in table_names if t in exposed_set]
241
+
242
+ return table_names
243
+
244
+ async def _get_table_schema(self, table_name: str) -> TableSchema:
245
+ """Get schema for a single table."""
246
+ columns = await self._get_columns(table_name)
247
+ primary_keys = await self._get_primary_keys(table_name)
248
+ row_count = await self._get_row_count(table_name)
249
+ primary_key_set = set(primary_keys)
250
+
251
+ # Mark primary key columns
252
+ for col in columns:
253
+ if col.name in primary_key_set:
254
+ # Create new column with is_primary_key=True
255
+ # (Pydantic models are immutable by default in strict mode)
256
+ col_dict = col.model_dump()
257
+ col_dict["is_primary_key"] = True
258
+ columns[columns.index(col)] = ColumnSchema(**col_dict)
259
+
260
+ return TableSchema(
261
+ name=table_name,
262
+ schema_name=self._schema_name,
263
+ columns=columns,
264
+ row_count=row_count,
265
+ )
266
+
267
+ async def _get_columns(self, table_name: str) -> list[ColumnSchema]:
268
+ """Get column information for a table."""
269
+ async with self._pool.acquire() as conn:
270
+ query = """
271
+ SELECT
272
+ column_name,
273
+ data_type,
274
+ is_nullable,
275
+ column_default
276
+ FROM information_schema.columns
277
+ WHERE table_schema = $1
278
+ AND table_name = $2
279
+ ORDER BY ordinal_position
280
+ """
281
+ rows: list[Record] = await conn.fetch(query, self._schema_name, table_name)
282
+
283
+ return [
284
+ ColumnSchema(
285
+ name=row["column_name"],
286
+ data_type=row["data_type"],
287
+ is_nullable=row["is_nullable"] == "YES",
288
+ default_value=row["column_default"],
289
+ )
290
+ for row in rows
291
+ ]
292
+
293
+ async def _get_primary_keys(self, table_name: str) -> list[str]:
294
+ """Get primary key column names for a table."""
295
+ async with self._pool.acquire() as conn:
296
+ query = """
297
+ SELECT kcu.column_name
298
+ FROM information_schema.table_constraints tc
299
+ JOIN information_schema.key_column_usage kcu
300
+ ON tc.constraint_name = kcu.constraint_name
301
+ AND tc.table_schema = kcu.table_schema
302
+ WHERE tc.constraint_type = 'PRIMARY KEY'
303
+ AND tc.table_schema = $1
304
+ AND tc.table_name = $2
305
+ ORDER BY kcu.ordinal_position
306
+ """
307
+ rows: list[Record] = await conn.fetch(query, self._schema_name, table_name)
308
+
309
+ return [row["column_name"] for row in rows]
310
+
311
+ async def _get_row_count(self, table_name: str) -> int | None:
312
+ """Get approximate row count for a table using pg_class.reltuples.
313
+
314
+ This is fast but may be slightly out of date. For exact counts,
315
+ VACUUM ANALYZE should be run periodically.
316
+ """
317
+ async with self._pool.acquire() as conn:
318
+ query = """
319
+ SELECT reltuples::bigint AS row_count
320
+ FROM pg_class c
321
+ JOIN pg_namespace n ON n.oid = c.relnamespace
322
+ WHERE n.nspname = $1
323
+ AND c.relname = $2
324
+ AND c.relkind = 'r'
325
+ """
326
+ row = await conn.fetchrow(query, self._schema_name, table_name)
327
+
328
+ if row is None:
329
+ return None
330
+
331
+ # reltuples can be -1 if never analyzed, treat as 0
332
+ count = row["row_count"]
333
+ return max(0, count) if count is not None else None
@@ -0,0 +1,354 @@
1
+ """Schema customization for Prismiq analytics.
2
+
3
+ This module provides configuration models and a manager for customizing
4
+ how database schema is presented to users, including friendly names,
5
+ hidden columns, and formatting hints.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from typing import Any
12
+
13
+ from pydantic import BaseModel, ConfigDict
14
+
15
+ from prismiq.types import DatabaseSchema
16
+
17
+
18
+ class ColumnConfig(BaseModel):
19
+ """Configuration for a single column."""
20
+
21
+ model_config = ConfigDict(strict=True)
22
+
23
+ display_name: str | None = None
24
+ """Friendly name for UI display."""
25
+
26
+ description: str | None = None
27
+ """Tooltip/help text for the column."""
28
+
29
+ hidden: bool = False
30
+ """Whether to hide this column from the schema explorer."""
31
+
32
+ format: str | None = None
33
+ """Number format: plain, currency, percent, compact."""
34
+
35
+ date_format: str | None = None
36
+ """Date format string for date/timestamp columns."""
37
+
38
+
39
+ class TableConfig(BaseModel):
40
+ """Configuration for a single table."""
41
+
42
+ model_config = ConfigDict(strict=True)
43
+
44
+ display_name: str | None = None
45
+ """Friendly name for UI display."""
46
+
47
+ description: str | None = None
48
+ """Tooltip/help text for the table."""
49
+
50
+ hidden: bool = False
51
+ """Whether to hide this table from the schema explorer."""
52
+
53
+ columns: dict[str, ColumnConfig] = {}
54
+ """Column-specific configurations."""
55
+
56
+
57
+ class SchemaConfig(BaseModel):
58
+ """Complete schema customization configuration."""
59
+
60
+ model_config = ConfigDict(strict=True)
61
+
62
+ tables: dict[str, TableConfig] = {}
63
+ """Table-specific configurations."""
64
+
65
+ def get_table_config(self, table_name: str) -> TableConfig:
66
+ """Get config for a table, with defaults if not configured.
67
+
68
+ Args:
69
+ table_name: The table name to get configuration for.
70
+
71
+ Returns:
72
+ TableConfig for the table (may be default/empty).
73
+ """
74
+ return self.tables.get(table_name, TableConfig())
75
+
76
+ def get_column_config(self, table_name: str, column_name: str) -> ColumnConfig:
77
+ """Get config for a column, with defaults if not configured.
78
+
79
+ Args:
80
+ table_name: The table containing the column.
81
+ column_name: The column name to get configuration for.
82
+
83
+ Returns:
84
+ ColumnConfig for the column (may be default/empty).
85
+ """
86
+ table_config = self.get_table_config(table_name)
87
+ return table_config.columns.get(column_name, ColumnConfig())
88
+
89
+ def get_display_name(self, table_name: str, column_name: str | None = None) -> str:
90
+ """Get display name for table or column, falling back to actual name.
91
+
92
+ Args:
93
+ table_name: The table name.
94
+ column_name: Optional column name. If None, returns table display name.
95
+
96
+ Returns:
97
+ The display name, or the actual name if no display name is configured.
98
+ """
99
+ if column_name is None:
100
+ table_config = self.get_table_config(table_name)
101
+ return table_config.display_name or table_name
102
+ else:
103
+ column_config = self.get_column_config(table_name, column_name)
104
+ return column_config.display_name or column_name
105
+
106
+ def is_table_hidden(self, table_name: str) -> bool:
107
+ """Check if a table is hidden."""
108
+ return self.get_table_config(table_name).hidden
109
+
110
+ def is_column_hidden(self, table_name: str, column_name: str) -> bool:
111
+ """Check if a column is hidden."""
112
+ return self.get_column_config(table_name, column_name).hidden
113
+
114
+
115
+ class EnhancedColumnSchema(BaseModel):
116
+ """Column schema with configuration-based enhancements."""
117
+
118
+ model_config = ConfigDict(strict=True)
119
+
120
+ name: str
121
+ """Column name in the database."""
122
+
123
+ data_type: str
124
+ """PostgreSQL data type."""
125
+
126
+ is_nullable: bool
127
+ """Whether the column allows NULL values."""
128
+
129
+ is_primary_key: bool = False
130
+ """Whether this column is part of the primary key."""
131
+
132
+ default_value: str | None = None
133
+ """Default value expression, if any."""
134
+
135
+ display_name: str | None = None
136
+ """Friendly display name from configuration."""
137
+
138
+ description: str | None = None
139
+ """Description from configuration."""
140
+
141
+ format: str | None = None
142
+ """Number format from configuration."""
143
+
144
+ date_format: str | None = None
145
+ """Date format from configuration."""
146
+
147
+
148
+ class EnhancedTableSchema(BaseModel):
149
+ """Table schema with configuration-based enhancements."""
150
+
151
+ model_config = ConfigDict(strict=True)
152
+
153
+ name: str
154
+ """Table name in the database."""
155
+
156
+ schema_name: str = "public"
157
+ """Database schema (namespace)."""
158
+
159
+ columns: list[EnhancedColumnSchema]
160
+ """Enhanced column schemas."""
161
+
162
+ display_name: str | None = None
163
+ """Friendly display name from configuration."""
164
+
165
+ description: str | None = None
166
+ """Description from configuration."""
167
+
168
+ def get_column(self, column_name: str) -> EnhancedColumnSchema | None:
169
+ """Get a column by name, or None if not found."""
170
+ for col in self.columns:
171
+ if col.name == column_name:
172
+ return col
173
+ return None
174
+
175
+ def has_column(self, column_name: str) -> bool:
176
+ """Check if the table has a column with the given name."""
177
+ return self.get_column(column_name) is not None
178
+
179
+
180
+ class EnhancedDatabaseSchema(BaseModel):
181
+ """Database schema with configuration-based enhancements."""
182
+
183
+ model_config = ConfigDict(strict=True)
184
+
185
+ tables: list[EnhancedTableSchema]
186
+ """Enhanced table schemas."""
187
+
188
+ relationships: list[Any] # Using Any to avoid circular import
189
+ """Foreign key relationships."""
190
+
191
+ def get_table(self, table_name: str) -> EnhancedTableSchema | None:
192
+ """Get a table by name, or None if not found."""
193
+ for table in self.tables:
194
+ if table.name == table_name:
195
+ return table
196
+ return None
197
+
198
+ def has_table(self, table_name: str) -> bool:
199
+ """Check if the schema contains a table with the given name."""
200
+ return self.get_table(table_name) is not None
201
+
202
+ def table_names(self) -> list[str]:
203
+ """Get list of all table names."""
204
+ return [t.name for t in self.tables]
205
+
206
+
207
+ class SchemaConfigManager:
208
+ """Manages schema configuration persistence and application.
209
+
210
+ Provides methods for updating configuration and applying it to
211
+ database schemas.
212
+ """
213
+
214
+ def __init__(self, config: SchemaConfig | None = None) -> None:
215
+ """Initialize the schema config manager.
216
+
217
+ Args:
218
+ config: Initial configuration. If None, an empty config is used.
219
+ """
220
+ self._config = config or SchemaConfig()
221
+
222
+ def get_config(self) -> SchemaConfig:
223
+ """Get current configuration.
224
+
225
+ Returns:
226
+ The current SchemaConfig.
227
+ """
228
+ return self._config
229
+
230
+ def update_table_config(self, table_name: str, config: TableConfig) -> None:
231
+ """Update configuration for a table.
232
+
233
+ Creates a new config with the updated table (immutable operation).
234
+
235
+ Args:
236
+ table_name: Name of the table to configure.
237
+ config: New configuration for the table.
238
+ """
239
+ new_tables = dict(self._config.tables)
240
+ new_tables[table_name] = config
241
+ self._config = SchemaConfig(tables=new_tables)
242
+
243
+ def update_column_config(self, table_name: str, column_name: str, config: ColumnConfig) -> None:
244
+ """Update configuration for a column.
245
+
246
+ Creates a new config with the updated column (immutable operation).
247
+
248
+ Args:
249
+ table_name: Name of the table containing the column.
250
+ column_name: Name of the column to configure.
251
+ config: New configuration for the column.
252
+ """
253
+ # Get existing table config or create new one
254
+ table_config = self._config.get_table_config(table_name)
255
+ new_columns = dict(table_config.columns)
256
+ new_columns[column_name] = config
257
+
258
+ # Create new table config with updated columns
259
+ new_table_config = TableConfig(
260
+ display_name=table_config.display_name,
261
+ description=table_config.description,
262
+ hidden=table_config.hidden,
263
+ columns=new_columns,
264
+ )
265
+
266
+ # Update via table config
267
+ self.update_table_config(table_name, new_table_config)
268
+
269
+ def apply_to_schema(self, schema: DatabaseSchema) -> EnhancedDatabaseSchema:
270
+ """Apply configuration to a schema.
271
+
272
+ Adds display names, descriptions, and formats from configuration.
273
+ Filters out hidden tables and columns.
274
+
275
+ Args:
276
+ schema: The database schema to enhance.
277
+
278
+ Returns:
279
+ Enhanced schema with configuration applied and hidden items removed.
280
+ """
281
+ enhanced_tables: list[EnhancedTableSchema] = []
282
+
283
+ for table in schema.tables:
284
+ # Skip hidden tables
285
+ if self._config.is_table_hidden(table.name):
286
+ continue
287
+
288
+ table_config = self._config.get_table_config(table.name)
289
+ enhanced_columns: list[EnhancedColumnSchema] = []
290
+
291
+ for column in table.columns:
292
+ # Skip hidden columns
293
+ if self._config.is_column_hidden(table.name, column.name):
294
+ continue
295
+
296
+ column_config = self._config.get_column_config(table.name, column.name)
297
+ enhanced_columns.append(
298
+ EnhancedColumnSchema(
299
+ name=column.name,
300
+ data_type=column.data_type,
301
+ is_nullable=column.is_nullable,
302
+ is_primary_key=column.is_primary_key,
303
+ default_value=column.default_value,
304
+ display_name=column_config.display_name,
305
+ description=column_config.description,
306
+ format=column_config.format,
307
+ date_format=column_config.date_format,
308
+ )
309
+ )
310
+
311
+ enhanced_tables.append(
312
+ EnhancedTableSchema(
313
+ name=table.name,
314
+ schema_name=table.schema_name,
315
+ columns=enhanced_columns,
316
+ display_name=table_config.display_name,
317
+ description=table_config.description,
318
+ )
319
+ )
320
+
321
+ # Filter relationships to only include visible tables
322
+ visible_table_names = {t.name for t in enhanced_tables}
323
+ visible_relationships = [
324
+ rel
325
+ for rel in schema.relationships
326
+ if rel.from_table in visible_table_names and rel.to_table in visible_table_names
327
+ ]
328
+
329
+ return EnhancedDatabaseSchema(
330
+ tables=enhanced_tables,
331
+ relationships=visible_relationships,
332
+ )
333
+
334
+ def to_json(self) -> str:
335
+ """Serialize configuration to JSON.
336
+
337
+ Returns:
338
+ JSON string representation of the configuration.
339
+ """
340
+ return self._config.model_dump_json(indent=2)
341
+
342
+ @classmethod
343
+ def from_json(cls, json_str: str) -> SchemaConfigManager:
344
+ """Deserialize configuration from JSON.
345
+
346
+ Args:
347
+ json_str: JSON string to parse.
348
+
349
+ Returns:
350
+ New SchemaConfigManager with the parsed configuration.
351
+ """
352
+ data = json.loads(json_str)
353
+ config = SchemaConfig.model_validate(data)
354
+ return cls(config)