planar 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- planar/app.py +26 -6
- planar/cli.py +26 -0
- planar/data/__init__.py +1 -0
- planar/data/config.py +12 -1
- planar/data/connection.py +89 -4
- planar/data/dataset.py +13 -7
- planar/data/utils.py +145 -25
- planar/db/alembic/env.py +68 -57
- planar/db/alembic.ini +1 -1
- planar/files/storage/config.py +7 -1
- planar/routers/dataset_router.py +5 -1
- planar/routers/info.py +79 -36
- planar/scaffold_templates/pyproject.toml.j2 +1 -1
- planar/testing/fixtures.py +7 -4
- planar/testing/planar_test_client.py +8 -0
- planar/version.py +27 -0
- planar-0.12.0.dist-info/METADATA +202 -0
- {planar-0.10.0.dist-info → planar-0.12.0.dist-info}/RECORD +20 -71
- planar/ai/test_agent_serialization.py +0 -229
- planar/ai/test_agent_tool_step_display.py +0 -78
- planar/data/test_dataset.py +0 -358
- planar/files/storage/test_azure_blob.py +0 -435
- planar/files/storage/test_local_directory.py +0 -162
- planar/files/storage/test_s3.py +0 -299
- planar/files/test_files.py +0 -282
- planar/human/test_human.py +0 -385
- planar/logging/test_formatter.py +0 -327
- planar/modeling/mixins/test_auditable.py +0 -97
- planar/modeling/mixins/test_timestamp.py +0 -134
- planar/modeling/mixins/test_uuid_primary_key.py +0 -52
- planar/routers/test_agents_router.py +0 -174
- planar/routers/test_dataset_router.py +0 -429
- planar/routers/test_files_router.py +0 -49
- planar/routers/test_object_config_router.py +0 -367
- planar/routers/test_routes_security.py +0 -168
- planar/routers/test_rule_router.py +0 -470
- planar/routers/test_workflow_router.py +0 -564
- planar/rules/test_data/account_dormancy_management.json +0 -223
- planar/rules/test_data/airline_loyalty_points_calculator.json +0 -262
- planar/rules/test_data/applicant_risk_assessment.json +0 -435
- planar/rules/test_data/booking_fraud_detection.json +0 -407
- planar/rules/test_data/cellular_data_rollover_system.json +0 -258
- planar/rules/test_data/clinical_trial_eligibility_screener.json +0 -437
- planar/rules/test_data/customer_lifetime_value.json +0 -143
- planar/rules/test_data/import_duties_calculator.json +0 -289
- planar/rules/test_data/insurance_prior_authorization.json +0 -443
- planar/rules/test_data/online_check_in_eligibility_system.json +0 -254
- planar/rules/test_data/order_consolidation_system.json +0 -375
- planar/rules/test_data/portfolio_risk_monitor.json +0 -471
- planar/rules/test_data/supply_chain_risk.json +0 -253
- planar/rules/test_data/warehouse_cross_docking.json +0 -237
- planar/rules/test_rules.py +0 -1494
- planar/security/tests/test_auth_middleware.py +0 -162
- planar/security/tests/test_authorization_context.py +0 -78
- planar/security/tests/test_cedar_basics.py +0 -41
- planar/security/tests/test_cedar_policies.py +0 -158
- planar/security/tests/test_jwt_principal_context.py +0 -179
- planar/test_app.py +0 -142
- planar/test_cli.py +0 -394
- planar/test_config.py +0 -515
- planar/test_object_config.py +0 -527
- planar/test_object_registry.py +0 -14
- planar/test_sqlalchemy.py +0 -193
- planar/test_utils.py +0 -105
- planar/testing/test_memory_storage.py +0 -143
- planar/workflows/test_concurrency_detection.py +0 -120
- planar/workflows/test_lock_timeout.py +0 -140
- planar/workflows/test_serialization.py +0 -1203
- planar/workflows/test_suspend_deserialization.py +0 -231
- planar/workflows/test_workflow.py +0 -2005
- planar-0.10.0.dist-info/METADATA +0 -323
- {planar-0.10.0.dist-info → planar-0.12.0.dist-info}/WHEEL +0 -0
- {planar-0.10.0.dist-info → planar-0.12.0.dist-info}/entry_points.txt +0 -0
planar/app.py
CHANGED
@@ -28,7 +28,6 @@ from planar.routers import (
|
|
28
28
|
create_workflow_router,
|
29
29
|
)
|
30
30
|
from planar.routers.agents_router import create_agent_router
|
31
|
-
from planar.routers.dataset_router import create_dataset_router
|
32
31
|
from planar.routers.entity_router import create_entities_router
|
33
32
|
from planar.routers.object_config_router import create_object_config_router
|
34
33
|
from planar.routers.rule import create_rule_router
|
@@ -130,14 +129,27 @@ class PlanarApp:
|
|
130
129
|
create_human_task_routes(),
|
131
130
|
prefix="/human-tasks",
|
132
131
|
)
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
132
|
+
|
133
|
+
if self.config.data:
|
134
|
+
try:
|
135
|
+
from planar.routers.dataset_router import create_dataset_router
|
136
|
+
|
137
|
+
self.router_v1.include_router(
|
138
|
+
create_dataset_router(),
|
139
|
+
prefix="/datasets",
|
140
|
+
)
|
141
|
+
except ImportError:
|
142
|
+
logger.error(
|
143
|
+
"Data dependencies not installed. Ensure you install the `data` optional dependency in your project (planar[data])"
|
144
|
+
)
|
145
|
+
raise
|
137
146
|
|
138
147
|
self.router_v1.include_router(
|
139
148
|
create_info_router(
|
140
|
-
title=title or "Planar API",
|
149
|
+
title=title or "Planar API",
|
150
|
+
description=description or "Planar API",
|
151
|
+
config=self.config,
|
152
|
+
registry=self._object_registry,
|
141
153
|
),
|
142
154
|
prefix="",
|
143
155
|
)
|
@@ -262,6 +274,14 @@ class PlanarApp:
|
|
262
274
|
# Reset the config in the context
|
263
275
|
config_var.reset(config_tok)
|
264
276
|
|
277
|
+
if self.config.data:
|
278
|
+
try:
|
279
|
+
from planar.data.connection import reset_connection_cache
|
280
|
+
except ImportError as exc: # pragma: no cover - optional dependency
|
281
|
+
logger.debug("skipping data connection cleanup", error=str(exc))
|
282
|
+
else:
|
283
|
+
await reset_connection_cache()
|
284
|
+
|
265
285
|
await self.db_manager.disconnect()
|
266
286
|
|
267
287
|
if self.storage:
|
planar/cli.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2
2
|
import subprocess
|
3
3
|
import sys
|
4
4
|
from pathlib import Path
|
5
|
+
from typing import Annotated
|
5
6
|
|
6
7
|
import typer
|
7
8
|
import uvicorn
|
@@ -9,10 +10,35 @@ from jinja2 import Environment as JinjaEnvironment
|
|
9
10
|
from jinja2 import FileSystemLoader
|
10
11
|
|
11
12
|
from planar.config import Environment
|
13
|
+
from planar.version import get_version
|
12
14
|
|
13
15
|
app = typer.Typer(help="Planar CLI tool")
|
14
16
|
|
15
17
|
|
18
|
+
def version_callback(value: bool) -> bool:
|
19
|
+
if value:
|
20
|
+
typer.echo(f"planar {get_version()}")
|
21
|
+
raise typer.Exit()
|
22
|
+
return value
|
23
|
+
|
24
|
+
|
25
|
+
@app.callback()
|
26
|
+
def root_callback(
|
27
|
+
version: Annotated[
|
28
|
+
bool | None,
|
29
|
+
typer.Option(
|
30
|
+
"--version",
|
31
|
+
"-v",
|
32
|
+
help="Show Planar version and exit.",
|
33
|
+
callback=version_callback,
|
34
|
+
is_flag=True,
|
35
|
+
is_eager=True,
|
36
|
+
),
|
37
|
+
] = None,
|
38
|
+
) -> None:
|
39
|
+
"""Entry point for Planar CLI with shared options."""
|
40
|
+
|
41
|
+
|
16
42
|
def find_default_app_path() -> Path:
|
17
43
|
"""Checks for default app file paths (app.py, then main.py)."""
|
18
44
|
for filename in ["app.py", "main.py"]:
|
planar/data/__init__.py
CHANGED
planar/data/config.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
from typing import Annotated, Literal
|
4
4
|
|
5
|
-
from pydantic import BaseModel, Field
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
6
6
|
|
7
7
|
from planar.files.storage.config import StorageConfig
|
8
8
|
|
@@ -13,6 +13,8 @@ class DuckDBCatalogConfig(BaseModel):
|
|
13
13
|
type: Literal["duckdb"]
|
14
14
|
path: str # Path to .ducklake file
|
15
15
|
|
16
|
+
model_config = ConfigDict(frozen=True)
|
17
|
+
|
16
18
|
|
17
19
|
class PostgresCatalogConfig(BaseModel):
|
18
20
|
"""Configuration for PostgreSQL catalog backend."""
|
@@ -24,6 +26,8 @@ class PostgresCatalogConfig(BaseModel):
|
|
24
26
|
password: str | None = None
|
25
27
|
db: str
|
26
28
|
|
29
|
+
model_config = ConfigDict(frozen=True)
|
30
|
+
|
27
31
|
|
28
32
|
class SQLiteCatalogConfig(BaseModel):
|
29
33
|
"""Configuration for SQLite catalog backend."""
|
@@ -31,6 +35,8 @@ class SQLiteCatalogConfig(BaseModel):
|
|
31
35
|
type: Literal["sqlite"]
|
32
36
|
path: str # Path to .sqlite file
|
33
37
|
|
38
|
+
model_config = ConfigDict(frozen=True)
|
39
|
+
|
34
40
|
|
35
41
|
# Discriminated union for catalog configurations
|
36
42
|
CatalogConfig = Annotated[
|
@@ -47,3 +53,8 @@ class DataConfig(BaseModel):
|
|
47
53
|
|
48
54
|
# Optional settings
|
49
55
|
catalog_name: str = "planar_data" # Default catalog name in Ducklake
|
56
|
+
|
57
|
+
model_config = ConfigDict(frozen=True)
|
58
|
+
|
59
|
+
def is_sqlite_catalog(self) -> bool:
|
60
|
+
return self.catalog.type == "sqlite"
|
planar/data/connection.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
from dataclasses import dataclass
|
2
3
|
|
3
4
|
import ibis
|
4
5
|
from ibis.backends.duckdb import Backend as DuckDBBackend
|
@@ -17,6 +18,43 @@ from planar.session import get_config
|
|
17
18
|
logger = get_logger(__name__)
|
18
19
|
|
19
20
|
|
21
|
+
@dataclass
|
22
|
+
class _ConnectionPool:
|
23
|
+
connections: list[DuckDBBackend]
|
24
|
+
cursor: int = 0
|
25
|
+
|
26
|
+
|
27
|
+
# In production a Planar app typically runs with a single data config, so we only
|
28
|
+
# ever have one signature, but we still rotate through a handful of cached
|
29
|
+
# backends to reduce the risk of concurrent calls sharing the same DuckDB
|
30
|
+
# connection. During testing we create many ephemeral configs (temp dirs, sqlite
|
31
|
+
# files, etc.), so the cache also avoids paying the attachment cost on every
|
32
|
+
# request. We keep up to `_MAX_CONNECTIONS_PER_SIGNATURE` backends per signature
|
33
|
+
# and hand them out in round-robin order; concurrency safety ultimately depends on
|
34
|
+
# DuckDB tolerating overlapping use of an individual backend.
|
35
|
+
_connection_cache: dict[int, _ConnectionPool] = {}
|
36
|
+
_cache_lock: asyncio.Lock | None = None
|
37
|
+
|
38
|
+
# Maximum number of cached connections per configuration signature.
|
39
|
+
_MAX_CONNECTIONS_PER_SIGNATURE = 10
|
40
|
+
|
41
|
+
|
42
|
+
def _config_signature(config: PlanarConfig) -> int:
|
43
|
+
"""Create a stable signature for caching connections."""
|
44
|
+
|
45
|
+
assert config.data is not None, "data configuration must be set"
|
46
|
+
return hash(config.data)
|
47
|
+
|
48
|
+
|
49
|
+
async def _close_backend(connection: DuckDBBackend) -> None:
|
50
|
+
close_fn = getattr(connection, "close", None)
|
51
|
+
try:
|
52
|
+
if callable(close_fn):
|
53
|
+
await asyncio.to_thread(close_fn)
|
54
|
+
except Exception as exc:
|
55
|
+
logger.warning("failed to close DuckDB connection", error=str(exc))
|
56
|
+
|
57
|
+
|
20
58
|
async def _create_connection(config: PlanarConfig) -> DuckDBBackend:
|
21
59
|
"""Create Ibis DuckDB connection with Ducklake."""
|
22
60
|
data_config = config.data
|
@@ -95,8 +133,21 @@ async def _create_connection(config: PlanarConfig) -> DuckDBBackend:
|
|
95
133
|
return con
|
96
134
|
|
97
135
|
|
98
|
-
|
99
|
-
|
136
|
+
def _get_cache_lock() -> asyncio.Lock:
|
137
|
+
# Create a lock on the first call to this function, or re-create it if the
|
138
|
+
# loop has changed (happens on tests).
|
139
|
+
global _cache_lock
|
140
|
+
loop = asyncio.get_running_loop()
|
141
|
+
lock = _cache_lock
|
142
|
+
if lock is None or getattr(lock, "_loop", None) is not loop:
|
143
|
+
lock = asyncio.Lock()
|
144
|
+
_cache_lock = lock
|
145
|
+
return lock
|
146
|
+
|
147
|
+
|
148
|
+
async def get_connection() -> DuckDBBackend:
|
149
|
+
"""Return a cached DuckDB connection using round-robin selection."""
|
150
|
+
|
100
151
|
config = get_config()
|
101
152
|
|
102
153
|
if not config.data:
|
@@ -104,5 +155,39 @@ async def _get_connection() -> DuckDBBackend:
|
|
104
155
|
"Data configuration not found. Please configure 'data' in your planar.yaml"
|
105
156
|
)
|
106
157
|
|
107
|
-
|
108
|
-
|
158
|
+
signature = _config_signature(config)
|
159
|
+
lock = _get_cache_lock()
|
160
|
+
|
161
|
+
async with lock:
|
162
|
+
pool = _connection_cache.get(signature)
|
163
|
+
|
164
|
+
if pool is None:
|
165
|
+
connection = await _create_connection(config)
|
166
|
+
_connection_cache[signature] = _ConnectionPool(connections=[connection])
|
167
|
+
return connection
|
168
|
+
|
169
|
+
if len(pool.connections) < _MAX_CONNECTIONS_PER_SIGNATURE:
|
170
|
+
connection = await _create_connection(config)
|
171
|
+
pool.connections.append(connection)
|
172
|
+
return connection
|
173
|
+
|
174
|
+
connection = pool.connections[pool.cursor]
|
175
|
+
pool.cursor = (pool.cursor + 1) % len(pool.connections)
|
176
|
+
return connection
|
177
|
+
|
178
|
+
|
179
|
+
async def reset_connection_cache() -> None:
|
180
|
+
"""Reset the cached DuckDB connection, closing it if necessary."""
|
181
|
+
|
182
|
+
lock = _get_cache_lock()
|
183
|
+
|
184
|
+
async with lock:
|
185
|
+
pools = list(_connection_cache.values())
|
186
|
+
_connection_cache.clear()
|
187
|
+
|
188
|
+
for pool in pools:
|
189
|
+
for connection in pool.connections:
|
190
|
+
await _close_backend(connection)
|
191
|
+
|
192
|
+
global _cache_lock
|
193
|
+
_cache_lock = None
|
planar/data/dataset.py
CHANGED
@@ -6,10 +6,11 @@ from typing import Literal, Self
|
|
6
6
|
import ibis
|
7
7
|
import polars as pl
|
8
8
|
import pyarrow as pa
|
9
|
+
from ibis.backends.duckdb import Backend as DuckDBBackend
|
9
10
|
from ibis.common.exceptions import TableNotFound
|
10
11
|
from pydantic import BaseModel
|
11
12
|
|
12
|
-
from planar.data.connection import
|
13
|
+
from planar.data.connection import get_connection
|
13
14
|
from planar.logging import get_logger
|
14
15
|
|
15
16
|
from .exceptions import DataError, DatasetAlreadyExistsError, DatasetNotFoundError
|
@@ -67,7 +68,11 @@ class PlanarDataset(BaseModel):
|
|
67
68
|
|
68
69
|
async def exists(self) -> bool:
|
69
70
|
"""Check if the dataset exists in Ducklake."""
|
70
|
-
con = await
|
71
|
+
con = await get_connection()
|
72
|
+
return await self._table_exists(con)
|
73
|
+
|
74
|
+
async def _table_exists(self, con: DuckDBBackend) -> bool:
|
75
|
+
"""Check for table existence using the provided connection."""
|
71
76
|
|
72
77
|
try:
|
73
78
|
# TODO: Query for the table name directly
|
@@ -88,11 +93,13 @@ class PlanarDataset(BaseModel):
|
|
88
93
|
data: Data to write (Polars DataFrame/LazyFrame, PyArrow Table, or Ibis expression)
|
89
94
|
mode: Write mode - "append" or "overwrite"
|
90
95
|
"""
|
91
|
-
con = await _get_connection()
|
92
96
|
overwrite = mode == "overwrite"
|
93
97
|
|
94
98
|
try:
|
95
|
-
|
99
|
+
con = await get_connection()
|
100
|
+
table_exists = await self._table_exists(con)
|
101
|
+
|
102
|
+
if not table_exists:
|
96
103
|
await asyncio.to_thread(
|
97
104
|
con.create_table, self.name, data, overwrite=overwrite
|
98
105
|
)
|
@@ -133,9 +140,8 @@ class PlanarDataset(BaseModel):
|
|
133
140
|
Returns:
|
134
141
|
Ibis table expression that can be further filtered using Ibis methods
|
135
142
|
"""
|
136
|
-
con = await _get_connection()
|
137
|
-
|
138
143
|
try:
|
144
|
+
con = await get_connection()
|
139
145
|
table = await asyncio.to_thread(con.table, self.name)
|
140
146
|
|
141
147
|
if columns:
|
@@ -162,8 +168,8 @@ class PlanarDataset(BaseModel):
|
|
162
168
|
|
163
169
|
async def delete(self) -> None:
|
164
170
|
"""Delete the dataset."""
|
165
|
-
con = await _get_connection()
|
166
171
|
try:
|
172
|
+
con = await get_connection()
|
167
173
|
await asyncio.to_thread(con.drop_table, self.name, force=True)
|
168
174
|
logger.info("deleted dataset", dataset_name=self.name)
|
169
175
|
except Exception as e:
|
planar/data/utils.py
CHANGED
@@ -1,42 +1,45 @@
|
|
1
1
|
import asyncio
|
2
|
-
from
|
2
|
+
from collections import defaultdict
|
3
|
+
from typing import Sequence, TypedDict
|
3
4
|
|
5
|
+
import ibis
|
4
6
|
import ibis.expr.datatypes as dt
|
7
|
+
import pyarrow as pa
|
8
|
+
from ibis.backends.duckdb import Backend as DuckDBBackend
|
5
9
|
from ibis.common.exceptions import TableNotFound
|
10
|
+
from sqlglot import exp
|
6
11
|
|
7
|
-
from planar.data.connection import
|
12
|
+
from planar.data.connection import get_connection
|
8
13
|
from planar.data.dataset import PlanarDataset
|
9
14
|
from planar.data.exceptions import DatasetNotFoundError
|
10
15
|
from planar.logging import get_logger
|
16
|
+
from planar.session import get_config
|
11
17
|
|
12
18
|
logger = get_logger(__name__)
|
13
19
|
|
14
20
|
|
15
|
-
# TODO: consider connection pooling or memoize the connection
|
16
|
-
|
17
|
-
|
18
21
|
async def list_datasets(limit: int = 100, offset: int = 0) -> list[PlanarDataset]:
|
19
|
-
conn = await
|
20
|
-
tables = await asyncio.to_thread(conn.list_tables)
|
22
|
+
conn = await get_connection()
|
23
|
+
tables = sorted(await asyncio.to_thread(conn.list_tables))[offset : offset + limit]
|
21
24
|
return [PlanarDataset(name=table) for table in tables]
|
22
25
|
|
23
26
|
|
24
27
|
async def list_schemas() -> list[str]:
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
"pg_catalog",
|
30
|
-
]
|
28
|
+
config = get_config()
|
29
|
+
|
30
|
+
if config.data is None:
|
31
|
+
return []
|
31
32
|
|
32
|
-
|
33
|
+
METADATA_SCHEMAS = [config.data.catalog_name, "main"]
|
34
|
+
|
35
|
+
conn = await get_connection()
|
33
36
|
|
34
37
|
# in ibis, "databases" are schemas in the traditional sense
|
35
38
|
# e.g. psql: schema == ibis: database
|
36
39
|
# https://ibis-project.org/concepts/backend-table-hierarchy
|
37
40
|
schemas = await asyncio.to_thread(conn.list_databases)
|
38
41
|
|
39
|
-
return [schema for schema in schemas if schema
|
42
|
+
return [schema for schema in schemas if schema in METADATA_SCHEMAS]
|
40
43
|
|
41
44
|
|
42
45
|
async def get_dataset(dataset_name: str, schema_name: str = "main") -> PlanarDataset:
|
@@ -51,7 +54,7 @@ async def get_dataset(dataset_name: str, schema_name: str = "main") -> PlanarDat
|
|
51
54
|
|
52
55
|
|
53
56
|
async def get_dataset_row_count(dataset_name: str) -> int:
|
54
|
-
conn = await
|
57
|
+
conn = await get_connection()
|
55
58
|
|
56
59
|
try:
|
57
60
|
value = await asyncio.to_thread(
|
@@ -72,18 +75,135 @@ class DatasetMetadata(TypedDict):
|
|
72
75
|
row_count: int
|
73
76
|
|
74
77
|
|
75
|
-
async def
|
76
|
-
|
77
|
-
|
78
|
-
|
78
|
+
async def _fetch_column_schemas(
|
79
|
+
conn: DuckDBBackend,
|
80
|
+
dataset_names: Sequence[str],
|
81
|
+
schema_name: str,
|
82
|
+
) -> dict[str, dict[str, dt.DataType]]:
|
83
|
+
columns = conn.table("columns", database="information_schema")
|
84
|
+
schema_literal = ibis.literal(schema_name)
|
85
|
+
dataset_literals = [ibis.literal(name) for name in dataset_names]
|
86
|
+
filtered = columns.filter(
|
87
|
+
(columns.table_schema == schema_literal)
|
88
|
+
& (columns.table_name.isin(dataset_literals))
|
89
|
+
)
|
90
|
+
|
91
|
+
selected = filtered.select(
|
92
|
+
columns.table_name.name("table_name"),
|
93
|
+
columns.column_name.name("column_name"),
|
94
|
+
columns.ordinal_position.name("ordinal_position"),
|
95
|
+
columns.data_type.name("data_type"),
|
96
|
+
columns.is_nullable.name("is_nullable"),
|
97
|
+
)
|
98
|
+
|
99
|
+
arrow_table: pa.Table = await asyncio.to_thread(selected.to_pyarrow)
|
100
|
+
rows = arrow_table.to_pylist()
|
101
|
+
|
102
|
+
schema_fields: dict[str, list[tuple[int, str, dt.DataType]]] = defaultdict(list)
|
103
|
+
type_mapper = conn.compiler.type_mapper
|
104
|
+
|
105
|
+
for row in rows:
|
106
|
+
table_name = row["table_name"]
|
107
|
+
column_name = row["column_name"]
|
108
|
+
ordinal_position = row["ordinal_position"]
|
109
|
+
dtype = type_mapper.from_string(
|
110
|
+
row["data_type"], nullable=row.get("is_nullable") == "YES"
|
111
|
+
)
|
79
112
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
113
|
+
schema_fields[table_name].append((ordinal_position, column_name, dtype))
|
114
|
+
|
115
|
+
ordered_fields: dict[str, dict[str, dt.DataType]] = {}
|
116
|
+
for table_name, fields in schema_fields.items():
|
117
|
+
ordered_fields[table_name] = {
|
118
|
+
column_name: dtype
|
119
|
+
for _, column_name, dtype in sorted(fields, key=lambda entry: entry[0])
|
120
|
+
}
|
121
|
+
|
122
|
+
return ordered_fields
|
123
|
+
|
124
|
+
|
125
|
+
async def _fetch_row_counts(
|
126
|
+
conn: DuckDBBackend,
|
127
|
+
dataset_names: Sequence[str],
|
128
|
+
schema_name: str,
|
129
|
+
) -> dict[str, int]:
|
130
|
+
if not dataset_names:
|
131
|
+
return {}
|
132
|
+
|
133
|
+
quoted = conn.compiler.quoted
|
134
|
+
count_queries: list[exp.Select] = []
|
135
|
+
|
136
|
+
for dataset_name in dataset_names:
|
137
|
+
table_expr = exp.Table(
|
138
|
+
this=exp.Identifier(this=dataset_name, quoted=quoted),
|
139
|
+
db=(
|
140
|
+
exp.Identifier(this=schema_name, quoted=quoted) if schema_name else None
|
141
|
+
),
|
84
142
|
)
|
143
|
+
select_expr = (
|
144
|
+
exp.Select()
|
145
|
+
.select(
|
146
|
+
exp.Literal.string(dataset_name).as_("dataset_name"),
|
147
|
+
exp.Count(this=exp.Star()).as_("row_count"),
|
148
|
+
)
|
149
|
+
.from_(table_expr)
|
150
|
+
)
|
151
|
+
count_queries.append(select_expr)
|
152
|
+
|
153
|
+
if not count_queries:
|
154
|
+
return {}
|
155
|
+
|
156
|
+
union_query: exp.Expression = count_queries[0]
|
157
|
+
for query in count_queries[1:]:
|
158
|
+
union_query = exp.Union(this=union_query, expression=query, distinct=False)
|
159
|
+
|
160
|
+
def _execute() -> dict[str, int]:
|
161
|
+
with conn._safe_raw_sql(union_query) as cursor: # type: ignore[attr-defined]
|
162
|
+
rows = cursor.fetchall()
|
85
163
|
|
86
|
-
return
|
164
|
+
return {str(dataset_name): int(row_count) for dataset_name, row_count in rows}
|
87
165
|
|
166
|
+
return await asyncio.to_thread(_execute)
|
167
|
+
|
168
|
+
|
169
|
+
async def get_datasets_metadata(
|
170
|
+
dataset_names: Sequence[str], schema_name: str
|
171
|
+
) -> dict[str, DatasetMetadata]:
|
172
|
+
if not dataset_names:
|
173
|
+
return {}
|
174
|
+
|
175
|
+
dataset_list = list(dict.fromkeys(dataset_names))
|
176
|
+
if not dataset_list:
|
177
|
+
return {}
|
178
|
+
|
179
|
+
conn = await get_connection()
|
180
|
+
|
181
|
+
schemas = await _fetch_column_schemas(conn, dataset_list, schema_name)
|
182
|
+
row_counts = await _fetch_row_counts(conn, list(schemas.keys()), schema_name)
|
183
|
+
|
184
|
+
metadata: dict[str, DatasetMetadata] = {}
|
185
|
+
|
186
|
+
for dataset_name in dataset_list:
|
187
|
+
schema = schemas.get(dataset_name)
|
188
|
+
row_count = row_counts.get(dataset_name)
|
189
|
+
|
190
|
+
if not schema or row_count is None:
|
191
|
+
continue
|
192
|
+
|
193
|
+
metadata[dataset_name] = DatasetMetadata(
|
194
|
+
schema=schema,
|
195
|
+
row_count=row_count,
|
196
|
+
)
|
197
|
+
|
198
|
+
return metadata
|
199
|
+
|
200
|
+
|
201
|
+
async def get_dataset_metadata(
|
202
|
+
dataset_name: str, schema_name: str
|
203
|
+
) -> DatasetMetadata | None:
|
204
|
+
try:
|
205
|
+
metadata = await get_datasets_metadata([dataset_name], schema_name)
|
88
206
|
except TableNotFound:
|
89
207
|
return None
|
208
|
+
|
209
|
+
return metadata.get(dataset_name)
|
planar/db/alembic/env.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
import asyncio
|
1
2
|
from functools import wraps
|
2
3
|
from logging.config import fileConfig
|
3
4
|
|
4
5
|
import alembic.ddl.base as alembic_base
|
5
6
|
from alembic import context
|
6
|
-
from sqlalchemy import Connection,
|
7
|
+
from sqlalchemy import Connection, pool
|
8
|
+
from sqlalchemy.ext.asyncio import create_async_engine
|
7
9
|
|
8
10
|
from planar.db import PLANAR_FRAMEWORK_METADATA, PLANAR_SCHEMA
|
9
11
|
|
@@ -72,6 +74,69 @@ alembic_base.format_table_name = schema_translate_wrapper(
|
|
72
74
|
)
|
73
75
|
|
74
76
|
|
77
|
+
async def run_migrations_online_async() -> None:
|
78
|
+
"""Run migrations in 'online' mode using async engine for development."""
|
79
|
+
# Import models to ensure they're registered with PLANAR_FRAMEWORK_METADATA
|
80
|
+
try:
|
81
|
+
from planar.files.models import PlanarFileMetadata # noqa: F401, PLC0415
|
82
|
+
from planar.human.models import HumanTask # noqa: F401, PLC0415
|
83
|
+
from planar.object_config.models import ( # noqa: F401, PLC0415
|
84
|
+
ObjectConfiguration,
|
85
|
+
)
|
86
|
+
from planar.workflows.models import ( # noqa: PLC0415
|
87
|
+
LockedResource, # noqa: F401
|
88
|
+
Workflow, # noqa: F401
|
89
|
+
WorkflowEvent, # noqa: F401
|
90
|
+
WorkflowStep, # noqa: F401
|
91
|
+
)
|
92
|
+
except ImportError as e:
|
93
|
+
raise RuntimeError(
|
94
|
+
f"Failed to import system models for migration generation: {e}"
|
95
|
+
)
|
96
|
+
|
97
|
+
config_dict = config.get_section(config.config_ini_section, {})
|
98
|
+
url = config_dict["sqlalchemy.url"]
|
99
|
+
is_sqlite = url.startswith("sqlite://")
|
100
|
+
|
101
|
+
# Create async engine
|
102
|
+
connectable = create_async_engine(
|
103
|
+
url,
|
104
|
+
poolclass=pool.NullPool,
|
105
|
+
execution_options={
|
106
|
+
# SQLite doesn't support schemas, so we need to translate the planar schema
|
107
|
+
# name to None in order to ignore it.
|
108
|
+
"schema_translate_map": sqlite_schema_translate_map if is_sqlite else {},
|
109
|
+
},
|
110
|
+
)
|
111
|
+
|
112
|
+
async with connectable.connect() as connection:
|
113
|
+
is_sqlite = connection.dialect.name == "sqlite"
|
114
|
+
if is_sqlite:
|
115
|
+
connection.dialect.default_schema_name = PLANAR_SCHEMA
|
116
|
+
|
117
|
+
def do_run_migrations(sync_conn):
|
118
|
+
context.configure(
|
119
|
+
connection=sync_conn,
|
120
|
+
target_metadata=target_metadata,
|
121
|
+
# For SQLite, don't use schema since it's not supported
|
122
|
+
version_table_schema=None if is_sqlite else PLANAR_SCHEMA,
|
123
|
+
include_schemas=True,
|
124
|
+
include_name=include_name,
|
125
|
+
# SQLite doesn't support alter table, so we need to use render_as_batch
|
126
|
+
# to create the tables in a single transaction. For other databases,
|
127
|
+
# the batch op is no-op.
|
128
|
+
# https://alembic.sqlalchemy.org/en/latest/batch.html#running-batch-migrations-for-sqlite-and-other-databases
|
129
|
+
render_as_batch=True,
|
130
|
+
)
|
131
|
+
|
132
|
+
with context.begin_transaction():
|
133
|
+
context.run_migrations()
|
134
|
+
|
135
|
+
await connection.run_sync(do_run_migrations)
|
136
|
+
|
137
|
+
await connectable.dispose()
|
138
|
+
|
139
|
+
|
75
140
|
def run_migrations_online() -> None:
|
76
141
|
"""Run migrations in 'online' mode.
|
77
142
|
|
@@ -103,62 +168,8 @@ def run_migrations_online() -> None:
|
|
103
168
|
with context.begin_transaction():
|
104
169
|
context.run_migrations()
|
105
170
|
else:
|
106
|
-
# Development mode:
|
107
|
-
|
108
|
-
# Import models to ensure they're registered with PLANAR_FRAMEWORK_METADATA
|
109
|
-
try:
|
110
|
-
from planar.files.models import PlanarFileMetadata # noqa: F401, PLC0415
|
111
|
-
from planar.human.models import HumanTask # noqa: F401, PLC0415
|
112
|
-
from planar.object_config.models import ( # noqa: F401, PLC0415
|
113
|
-
ObjectConfiguration,
|
114
|
-
)
|
115
|
-
from planar.workflows.models import ( # noqa: PLC0415
|
116
|
-
LockedResource, # noqa: F401
|
117
|
-
Workflow, # noqa: F401
|
118
|
-
WorkflowEvent, # noqa: F401
|
119
|
-
WorkflowStep, # noqa: F401
|
120
|
-
)
|
121
|
-
except ImportError as e:
|
122
|
-
raise RuntimeError(
|
123
|
-
f"Failed to import system models for migration generation: {e}"
|
124
|
-
)
|
125
|
-
|
126
|
-
config_dict = config.get_section(config.config_ini_section, {})
|
127
|
-
url = config_dict["sqlalchemy.url"]
|
128
|
-
is_sqlite = url.startswith("sqlite://")
|
129
|
-
translate_map = sqlite_schema_translate_map if is_sqlite else {}
|
130
|
-
connectable = engine_from_config(
|
131
|
-
config_dict,
|
132
|
-
prefix="sqlalchemy.",
|
133
|
-
poolclass=pool.NullPool,
|
134
|
-
execution_options={
|
135
|
-
# SQLite doesn't support schemas, so we need to translate the planar schema
|
136
|
-
# name to None in order to ignore it.
|
137
|
-
"schema_translate_map": translate_map,
|
138
|
-
},
|
139
|
-
)
|
140
|
-
|
141
|
-
with connectable.connect() as connection:
|
142
|
-
is_sqlite = connection.dialect.name == "sqlite"
|
143
|
-
if is_sqlite:
|
144
|
-
connection.dialect.default_schema_name = PLANAR_SCHEMA
|
145
|
-
|
146
|
-
context.configure(
|
147
|
-
connection=connection,
|
148
|
-
target_metadata=target_metadata,
|
149
|
-
# For SQLite, don't use schema since it's not supported
|
150
|
-
version_table_schema=None if is_sqlite else PLANAR_SCHEMA,
|
151
|
-
include_schemas=True,
|
152
|
-
include_name=include_name,
|
153
|
-
# SQLite doesn't support alter table, so we need to use render_as_batch
|
154
|
-
# to create the tables in a single transaction. For other databases,
|
155
|
-
# the batch op is no-op.
|
156
|
-
# https://alembic.sqlalchemy.org/en/latest/batch.html#running-batch-migrations-for-sqlite-and-other-databases
|
157
|
-
render_as_batch=True,
|
158
|
-
)
|
159
|
-
|
160
|
-
with context.begin_transaction():
|
161
|
-
context.run_migrations()
|
171
|
+
# Development mode: run migrations asynchronously
|
172
|
+
asyncio.run(run_migrations_online_async())
|
162
173
|
|
163
174
|
|
164
175
|
if context.is_offline_mode():
|