planar 0.9.3__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- planar/ai/agent.py +2 -1
- planar/ai/agent_base.py +24 -5
- planar/ai/state.py +17 -0
- planar/app.py +18 -1
- planar/data/connection.py +108 -0
- planar/data/dataset.py +11 -104
- planar/data/utils.py +89 -0
- planar/db/alembic/env.py +25 -1
- planar/files/storage/azure_blob.py +1 -1
- planar/registry_items.py +2 -0
- planar/routers/dataset_router.py +213 -0
- planar/routers/info.py +79 -36
- planar/routers/models.py +1 -0
- planar/routers/workflow.py +2 -0
- planar/scaffold_templates/pyproject.toml.j2 +1 -1
- planar/security/authorization.py +31 -3
- planar/security/default_policies.cedar +25 -0
- planar/testing/fixtures.py +34 -1
- planar/testing/planar_test_client.py +1 -1
- planar/workflows/decorators.py +2 -1
- planar/workflows/wrappers.py +1 -0
- {planar-0.9.3.dist-info → planar-0.11.0.dist-info}/METADATA +9 -1
- {planar-0.9.3.dist-info → planar-0.11.0.dist-info}/RECORD +25 -72
- {planar-0.9.3.dist-info → planar-0.11.0.dist-info}/WHEEL +1 -1
- planar/ai/test_agent_serialization.py +0 -229
- planar/ai/test_agent_tool_step_display.py +0 -78
- planar/data/test_dataset.py +0 -354
- planar/files/storage/test_azure_blob.py +0 -435
- planar/files/storage/test_local_directory.py +0 -162
- planar/files/storage/test_s3.py +0 -299
- planar/files/test_files.py +0 -282
- planar/human/test_human.py +0 -385
- planar/logging/test_formatter.py +0 -327
- planar/modeling/mixins/test_auditable.py +0 -97
- planar/modeling/mixins/test_timestamp.py +0 -134
- planar/modeling/mixins/test_uuid_primary_key.py +0 -52
- planar/routers/test_agents_router.py +0 -174
- planar/routers/test_files_router.py +0 -49
- planar/routers/test_object_config_router.py +0 -367
- planar/routers/test_routes_security.py +0 -168
- planar/routers/test_rule_router.py +0 -470
- planar/routers/test_workflow_router.py +0 -539
- planar/rules/test_data/account_dormancy_management.json +0 -223
- planar/rules/test_data/airline_loyalty_points_calculator.json +0 -262
- planar/rules/test_data/applicant_risk_assessment.json +0 -435
- planar/rules/test_data/booking_fraud_detection.json +0 -407
- planar/rules/test_data/cellular_data_rollover_system.json +0 -258
- planar/rules/test_data/clinical_trial_eligibility_screener.json +0 -437
- planar/rules/test_data/customer_lifetime_value.json +0 -143
- planar/rules/test_data/import_duties_calculator.json +0 -289
- planar/rules/test_data/insurance_prior_authorization.json +0 -443
- planar/rules/test_data/online_check_in_eligibility_system.json +0 -254
- planar/rules/test_data/order_consolidation_system.json +0 -375
- planar/rules/test_data/portfolio_risk_monitor.json +0 -471
- planar/rules/test_data/supply_chain_risk.json +0 -253
- planar/rules/test_data/warehouse_cross_docking.json +0 -237
- planar/rules/test_rules.py +0 -1494
- planar/security/tests/test_auth_middleware.py +0 -162
- planar/security/tests/test_authorization_context.py +0 -78
- planar/security/tests/test_cedar_basics.py +0 -41
- planar/security/tests/test_cedar_policies.py +0 -158
- planar/security/tests/test_jwt_principal_context.py +0 -179
- planar/test_app.py +0 -142
- planar/test_cli.py +0 -394
- planar/test_config.py +0 -515
- planar/test_object_config.py +0 -527
- planar/test_object_registry.py +0 -14
- planar/test_sqlalchemy.py +0 -193
- planar/test_utils.py +0 -105
- planar/testing/test_memory_storage.py +0 -143
- planar/workflows/test_concurrency_detection.py +0 -120
- planar/workflows/test_lock_timeout.py +0 -140
- planar/workflows/test_serialization.py +0 -1203
- planar/workflows/test_suspend_deserialization.py +0 -231
- planar/workflows/test_workflow.py +0 -2005
- {planar-0.9.3.dist-info → planar-0.11.0.dist-info}/entry_points.txt +0 -0
planar/ai/agent.py
CHANGED
@@ -50,7 +50,8 @@ class AgentWorkflowNotifier(AgentEventEmitter):
|
|
50
50
|
class Agent[
|
51
51
|
TInput: BaseModel | str,
|
52
52
|
TOutput: BaseModel | str,
|
53
|
-
|
53
|
+
TDeps,
|
54
|
+
](AgentBase[TInput, TOutput, TDeps]):
|
54
55
|
model: models.KnownModelName | models.Model = "openai:gpt-4o"
|
55
56
|
|
56
57
|
async def run_step(
|
planar/ai/agent_base.py
CHANGED
@@ -15,6 +15,7 @@ from pydantic import BaseModel
|
|
15
15
|
from pydantic_ai.settings import ModelSettings
|
16
16
|
|
17
17
|
from planar.ai.models import AgentConfig, AgentEventEmitter, AgentRunResult
|
18
|
+
from planar.ai.state import delete_state, set_state
|
18
19
|
from planar.logging import get_logger
|
19
20
|
from planar.modeling.field_helpers import JsonSchema
|
20
21
|
from planar.utils import P, R, T, U
|
@@ -29,6 +30,7 @@ class AgentBase[
|
|
29
30
|
# TODO: add `= str` default when we upgrade to 3.13
|
30
31
|
TInput: BaseModel | str,
|
31
32
|
TOutput: BaseModel | str,
|
33
|
+
TState,
|
32
34
|
](abc.ABC):
|
33
35
|
"""An LLM-powered agent that can be called directly within workflows."""
|
34
36
|
|
@@ -45,6 +47,7 @@ class AgentBase[
|
|
45
47
|
)
|
46
48
|
event_emitter: AgentEventEmitter | None = None
|
47
49
|
durable: bool = True
|
50
|
+
state_type: Type[TState] | None = None
|
48
51
|
|
49
52
|
# TODO: move here to serialize to frontend
|
50
53
|
#
|
@@ -91,14 +94,16 @@ class AgentBase[
|
|
91
94
|
|
92
95
|
@overload
|
93
96
|
async def __call__(
|
94
|
-
self: "AgentBase[TInput, str]",
|
97
|
+
self: "AgentBase[TInput, str, TState]",
|
95
98
|
input_value: TInput,
|
99
|
+
state: TState | None = None,
|
96
100
|
) -> AgentRunResult[str]: ...
|
97
101
|
|
98
102
|
@overload
|
99
103
|
async def __call__(
|
100
|
-
self: "AgentBase[TInput, TOutput]",
|
104
|
+
self: "AgentBase[TInput, TOutput, TState]",
|
101
105
|
input_value: TInput,
|
106
|
+
state: TState | None = None,
|
102
107
|
) -> AgentRunResult[TOutput]: ...
|
103
108
|
|
104
109
|
def as_step_if_durable(
|
@@ -120,6 +125,7 @@ class AgentBase[
|
|
120
125
|
async def __call__(
|
121
126
|
self,
|
122
127
|
input_value: TInput,
|
128
|
+
state: TState | None = None,
|
123
129
|
) -> AgentRunResult[Any]:
|
124
130
|
if self.input_type is not None and not isinstance(input_value, self.input_type):
|
125
131
|
raise ValueError(
|
@@ -147,9 +153,22 @@ class AgentBase[
|
|
147
153
|
return_type=AgentRunResult[self.output_type],
|
148
154
|
)
|
149
155
|
|
150
|
-
|
151
|
-
|
152
|
-
|
156
|
+
if state is not None:
|
157
|
+
if self.state_type is None:
|
158
|
+
raise ValueError("state cannot be provided when state_type is not set")
|
159
|
+
if not isinstance(state, self.state_type):
|
160
|
+
raise ValueError(
|
161
|
+
f"state must be of type {self.state_type}, but got {type(state)}"
|
162
|
+
)
|
163
|
+
set_state(cast(TState, state))
|
164
|
+
|
165
|
+
try:
|
166
|
+
result = await run_step(input_value=input_value)
|
167
|
+
# Cast the result to ensure type compatibility
|
168
|
+
return cast(AgentRunResult[TOutput], result)
|
169
|
+
finally:
|
170
|
+
if state is not None:
|
171
|
+
delete_state()
|
153
172
|
|
154
173
|
@abc.abstractmethod
|
155
174
|
async def run_step(
|
planar/ai/state.py
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
from typing import Any, Type, cast
|
2
|
+
|
3
|
+
from planar.task_local import TaskLocal
|
4
|
+
|
5
|
+
data: TaskLocal[Any] = TaskLocal()
|
6
|
+
|
7
|
+
|
8
|
+
def set_state(ctx: Any):
|
9
|
+
return data.set(ctx)
|
10
|
+
|
11
|
+
|
12
|
+
def get_state[T](_: Type[T]) -> T:
|
13
|
+
return cast(T, data.get())
|
14
|
+
|
15
|
+
|
16
|
+
def delete_state():
|
17
|
+
return data.clear()
|
planar/app.py
CHANGED
@@ -130,9 +130,26 @@ class PlanarApp:
|
|
130
130
|
prefix="/human-tasks",
|
131
131
|
)
|
132
132
|
|
133
|
+
if self.config.data:
|
134
|
+
try:
|
135
|
+
from planar.routers.dataset_router import create_dataset_router
|
136
|
+
|
137
|
+
self.router_v1.include_router(
|
138
|
+
create_dataset_router(),
|
139
|
+
prefix="/datasets",
|
140
|
+
)
|
141
|
+
except ImportError:
|
142
|
+
logger.error(
|
143
|
+
"Data dependencies not installed. Ensure you install the `data` optional dependency in your project (planar[data])"
|
144
|
+
)
|
145
|
+
raise
|
146
|
+
|
133
147
|
self.router_v1.include_router(
|
134
148
|
create_info_router(
|
135
|
-
title=title or "Planar API",
|
149
|
+
title=title or "Planar API",
|
150
|
+
description=description or "Planar API",
|
151
|
+
config=self.config,
|
152
|
+
registry=self._object_registry,
|
136
153
|
),
|
137
154
|
prefix="",
|
138
155
|
)
|
@@ -0,0 +1,108 @@
|
|
1
|
+
import asyncio
|
2
|
+
|
3
|
+
import ibis
|
4
|
+
from ibis.backends.duckdb import Backend as DuckDBBackend
|
5
|
+
|
6
|
+
from planar.config import PlanarConfig
|
7
|
+
from planar.data.config import (
|
8
|
+
DuckDBCatalogConfig,
|
9
|
+
PostgresCatalogConfig,
|
10
|
+
SQLiteCatalogConfig,
|
11
|
+
)
|
12
|
+
from planar.data.exceptions import DataError
|
13
|
+
from planar.files.storage.config import LocalDirectoryConfig, S3Config
|
14
|
+
from planar.logging import get_logger
|
15
|
+
from planar.session import get_config
|
16
|
+
|
17
|
+
logger = get_logger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
async def _create_connection(config: PlanarConfig) -> DuckDBBackend:
|
21
|
+
"""Create Ibis DuckDB connection with Ducklake."""
|
22
|
+
data_config = config.data
|
23
|
+
if not data_config:
|
24
|
+
raise DataError("Data configuration not found")
|
25
|
+
|
26
|
+
# Connect to DuckDB with Ducklake extension
|
27
|
+
con = await asyncio.to_thread(ibis.duckdb.connect, extensions=["ducklake"])
|
28
|
+
|
29
|
+
# Build Ducklake connection string based on catalog type
|
30
|
+
catalog_config = data_config.catalog
|
31
|
+
|
32
|
+
match catalog_config:
|
33
|
+
case DuckDBCatalogConfig():
|
34
|
+
metadata_path = catalog_config.path
|
35
|
+
case PostgresCatalogConfig():
|
36
|
+
# Use connection components to build postgres connection string
|
37
|
+
metadata_path = f"postgres:dbname={catalog_config.db}"
|
38
|
+
if catalog_config.host:
|
39
|
+
metadata_path += f" host={catalog_config.host}"
|
40
|
+
if catalog_config.port:
|
41
|
+
metadata_path += f" port={catalog_config.port}"
|
42
|
+
if catalog_config.user:
|
43
|
+
metadata_path += f" user={catalog_config.user}"
|
44
|
+
if catalog_config.password:
|
45
|
+
metadata_path += f" password={catalog_config.password}"
|
46
|
+
case SQLiteCatalogConfig():
|
47
|
+
metadata_path = f"sqlite:{catalog_config.path}"
|
48
|
+
case _:
|
49
|
+
raise ValueError(f"Unsupported catalog type: {catalog_config.type}")
|
50
|
+
|
51
|
+
try:
|
52
|
+
await asyncio.to_thread(con.raw_sql, "INSTALL ducklake")
|
53
|
+
match catalog_config.type:
|
54
|
+
case "sqlite":
|
55
|
+
await asyncio.to_thread(con.raw_sql, "INSTALL sqlite;")
|
56
|
+
case "postgres":
|
57
|
+
await asyncio.to_thread(con.raw_sql, "INSTALL postgres;")
|
58
|
+
logger.debug("installed Ducklake extensions", catalog_type=catalog_config.type)
|
59
|
+
except Exception as e:
|
60
|
+
raise DataError(f"Failed to install Ducklake extensions: {e}") from e
|
61
|
+
|
62
|
+
# Build ATTACH statement
|
63
|
+
attach_sql = f"ATTACH 'ducklake:{metadata_path}' AS planar_ducklake"
|
64
|
+
|
65
|
+
# Add data path from storage config
|
66
|
+
storage = data_config.storage
|
67
|
+
if isinstance(storage, LocalDirectoryConfig):
|
68
|
+
data_path = storage.directory
|
69
|
+
elif isinstance(storage, S3Config):
|
70
|
+
data_path = f"s3://{storage.bucket_name}/"
|
71
|
+
else:
|
72
|
+
# Generic fallback
|
73
|
+
data_path = getattr(storage, "path", None) or getattr(storage, "directory", ".")
|
74
|
+
|
75
|
+
ducklake_catalog = data_config.catalog_name
|
76
|
+
attach_sql += f" (DATA_PATH '{data_path}'"
|
77
|
+
if catalog_config.type != "sqlite":
|
78
|
+
attach_sql += f", METADATA_SCHEMA '{ducklake_catalog}'"
|
79
|
+
attach_sql += ");"
|
80
|
+
|
81
|
+
# Attach to Ducklake
|
82
|
+
try:
|
83
|
+
await asyncio.to_thread(con.raw_sql, attach_sql)
|
84
|
+
except Exception as e:
|
85
|
+
raise DataError(f"Failed to attach to Ducklake: {e}") from e
|
86
|
+
|
87
|
+
await asyncio.to_thread(con.raw_sql, "USE planar_ducklake;")
|
88
|
+
logger.debug(
|
89
|
+
"connection created",
|
90
|
+
catalog=ducklake_catalog,
|
91
|
+
catalog_type=catalog_config.type,
|
92
|
+
attach_sql=attach_sql,
|
93
|
+
)
|
94
|
+
|
95
|
+
return con
|
96
|
+
|
97
|
+
|
98
|
+
async def _get_connection() -> DuckDBBackend:
|
99
|
+
"""Get Ibis connection to Ducklake."""
|
100
|
+
config = get_config()
|
101
|
+
|
102
|
+
if not config.data:
|
103
|
+
raise DataError(
|
104
|
+
"Data configuration not found. Please configure 'data' in your planar.yaml"
|
105
|
+
)
|
106
|
+
|
107
|
+
# TODO: Add cached connection pooling or memoize the connection
|
108
|
+
return await _create_connection(config)
|
planar/data/dataset.py
CHANGED
@@ -6,14 +6,11 @@ from typing import Literal, Self
|
|
6
6
|
import ibis
|
7
7
|
import polars as pl
|
8
8
|
import pyarrow as pa
|
9
|
-
from ibis.backends.duckdb import Backend as DuckDBBackend
|
10
9
|
from ibis.common.exceptions import TableNotFound
|
11
10
|
from pydantic import BaseModel
|
12
11
|
|
13
|
-
from planar.
|
14
|
-
from planar.files.storage.config import LocalDirectoryConfig, S3Config
|
12
|
+
from planar.data.connection import _get_connection
|
15
13
|
from planar.logging import get_logger
|
16
|
-
from planar.session import get_config
|
17
14
|
|
18
15
|
from .exceptions import DataError, DatasetAlreadyExistsError, DatasetNotFoundError
|
19
16
|
|
@@ -32,6 +29,8 @@ class PlanarDataset(BaseModel):
|
|
32
29
|
# TODO: Add snapshot version: no version = latest, otherwise time travel on read operations
|
33
30
|
# TODO: Add partition support? A Dataset representation could be a table with a partition column
|
34
31
|
|
32
|
+
is_planar_dataset: bool = True
|
33
|
+
|
35
34
|
model_config = {"arbitrary_types_allowed": True}
|
36
35
|
# TODO: Add serialization metadata to make clear this is a dataset reference
|
37
36
|
# like EntityField.
|
@@ -68,7 +67,8 @@ class PlanarDataset(BaseModel):
|
|
68
67
|
|
69
68
|
async def exists(self) -> bool:
|
70
69
|
"""Check if the dataset exists in Ducklake."""
|
71
|
-
con = await
|
70
|
+
con = await _get_connection()
|
71
|
+
|
72
72
|
try:
|
73
73
|
# TODO: Query for the table name directly
|
74
74
|
tables = await asyncio.to_thread(con.list_tables)
|
@@ -79,16 +79,16 @@ class PlanarDataset(BaseModel):
|
|
79
79
|
|
80
80
|
async def write(
|
81
81
|
self,
|
82
|
-
data: pl.DataFrame | ibis.Table | list | dict,
|
82
|
+
data: pl.DataFrame | pl.LazyFrame | ibis.Table | list | dict,
|
83
83
|
mode: Literal["overwrite", "append"] = "append",
|
84
84
|
) -> None:
|
85
85
|
"""Write data to the dataset.
|
86
86
|
|
87
87
|
Args:
|
88
|
-
data: Data to write (Polars DataFrame, PyArrow Table, or Ibis expression)
|
88
|
+
data: Data to write (Polars DataFrame/LazyFrame, PyArrow Table, or Ibis expression)
|
89
89
|
mode: Write mode - "append" or "overwrite"
|
90
90
|
"""
|
91
|
-
con = await
|
91
|
+
con = await _get_connection()
|
92
92
|
overwrite = mode == "overwrite"
|
93
93
|
|
94
94
|
try:
|
@@ -99,7 +99,7 @@ class PlanarDataset(BaseModel):
|
|
99
99
|
else:
|
100
100
|
# TODO: Explore if workflow context can be used to set metadata
|
101
101
|
# on the snapshot version for lineage
|
102
|
-
if isinstance(data, pl.DataFrame):
|
102
|
+
if isinstance(data, (pl.DataFrame, pl.LazyFrame)):
|
103
103
|
await asyncio.to_thread(
|
104
104
|
con.insert,
|
105
105
|
self.name,
|
@@ -133,7 +133,7 @@ class PlanarDataset(BaseModel):
|
|
133
133
|
Returns:
|
134
134
|
Ibis table expression that can be further filtered using Ibis methods
|
135
135
|
"""
|
136
|
-
con = await
|
136
|
+
con = await _get_connection()
|
137
137
|
|
138
138
|
try:
|
139
139
|
table = await asyncio.to_thread(con.table, self.name)
|
@@ -162,102 +162,9 @@ class PlanarDataset(BaseModel):
|
|
162
162
|
|
163
163
|
async def delete(self) -> None:
|
164
164
|
"""Delete the dataset."""
|
165
|
-
con = await
|
165
|
+
con = await _get_connection()
|
166
166
|
try:
|
167
167
|
await asyncio.to_thread(con.drop_table, self.name, force=True)
|
168
168
|
logger.info("deleted dataset", dataset_name=self.name)
|
169
169
|
except Exception as e:
|
170
170
|
raise DataError(f"Failed to delete dataset: {e}") from e
|
171
|
-
|
172
|
-
async def _get_connection(self) -> DuckDBBackend:
|
173
|
-
"""Get Ibis connection to Ducklake."""
|
174
|
-
config = get_config()
|
175
|
-
|
176
|
-
if not config.data:
|
177
|
-
raise DataError(
|
178
|
-
"Data configuration not found. Please configure 'data' in your planar.yaml"
|
179
|
-
)
|
180
|
-
|
181
|
-
# TODO: Add cached connection pooling or memoize the connection
|
182
|
-
return await self._create_connection(config)
|
183
|
-
|
184
|
-
async def _create_connection(self, config: PlanarConfig) -> DuckDBBackend:
|
185
|
-
"""Create Ibis DuckDB connection with Ducklake."""
|
186
|
-
data_config = config.data
|
187
|
-
if not data_config:
|
188
|
-
raise DataError("Data configuration not found")
|
189
|
-
|
190
|
-
# Connect to DuckDB with Ducklake extension
|
191
|
-
con = await asyncio.to_thread(ibis.duckdb.connect, extensions=["ducklake"])
|
192
|
-
|
193
|
-
# Build Ducklake connection string based on catalog type
|
194
|
-
catalog_config = data_config.catalog
|
195
|
-
|
196
|
-
if catalog_config.type == "duckdb":
|
197
|
-
metadata_path = catalog_config.path
|
198
|
-
elif catalog_config.type == "postgres":
|
199
|
-
# Use connection components to build postgres connection string
|
200
|
-
pg = catalog_config
|
201
|
-
metadata_path = f"postgres:dbname={pg.db}"
|
202
|
-
if pg.host:
|
203
|
-
metadata_path += f" host={pg.host}"
|
204
|
-
if pg.port:
|
205
|
-
metadata_path += f" port={pg.port}"
|
206
|
-
if pg.user:
|
207
|
-
metadata_path += f" user={pg.user}"
|
208
|
-
if pg.password:
|
209
|
-
metadata_path += f" password={pg.password}"
|
210
|
-
elif catalog_config.type == "sqlite":
|
211
|
-
metadata_path = f"sqlite:{catalog_config.path}"
|
212
|
-
else:
|
213
|
-
raise ValueError(f"Unsupported catalog type: {catalog_config.type}")
|
214
|
-
|
215
|
-
try:
|
216
|
-
await asyncio.to_thread(con.raw_sql, "INSTALL ducklake")
|
217
|
-
match catalog_config.type:
|
218
|
-
case "sqlite":
|
219
|
-
await asyncio.to_thread(con.raw_sql, "INSTALL sqlite;")
|
220
|
-
case "postgres":
|
221
|
-
await asyncio.to_thread(con.raw_sql, "INSTALL postgres;")
|
222
|
-
logger.debug(
|
223
|
-
"installed Ducklake extensions", catalog_type=catalog_config.type
|
224
|
-
)
|
225
|
-
except Exception as e:
|
226
|
-
raise DataError(f"Failed to install Ducklake extensions: {e}") from e
|
227
|
-
|
228
|
-
# Build ATTACH statement
|
229
|
-
attach_sql = f"ATTACH 'ducklake:{metadata_path}' AS planar_ducklake"
|
230
|
-
|
231
|
-
# Add data path from storage config
|
232
|
-
storage = data_config.storage
|
233
|
-
if isinstance(storage, LocalDirectoryConfig):
|
234
|
-
data_path = storage.directory
|
235
|
-
elif isinstance(storage, S3Config):
|
236
|
-
data_path = f"s3://{storage.bucket_name}/"
|
237
|
-
else:
|
238
|
-
# Generic fallback
|
239
|
-
data_path = getattr(storage, "path", None) or getattr(
|
240
|
-
storage, "directory", "."
|
241
|
-
)
|
242
|
-
|
243
|
-
ducklake_catalog = data_config.catalog_name
|
244
|
-
attach_sql += f" (DATA_PATH '{data_path}'"
|
245
|
-
if catalog_config.type != "sqlite":
|
246
|
-
attach_sql += f", METADATA_SCHEMA '{ducklake_catalog}'"
|
247
|
-
attach_sql += ");"
|
248
|
-
|
249
|
-
# Attach to Ducklake
|
250
|
-
try:
|
251
|
-
await asyncio.to_thread(con.raw_sql, attach_sql)
|
252
|
-
except Exception as e:
|
253
|
-
raise DataError(f"Failed to attach to Ducklake: {e}") from e
|
254
|
-
|
255
|
-
await asyncio.to_thread(con.raw_sql, "USE planar_ducklake;")
|
256
|
-
logger.debug(
|
257
|
-
"connection created",
|
258
|
-
catalog=ducklake_catalog,
|
259
|
-
catalog_type=catalog_config.type,
|
260
|
-
attach_sql=attach_sql,
|
261
|
-
)
|
262
|
-
|
263
|
-
return con
|
planar/data/utils.py
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import TypedDict
|
3
|
+
|
4
|
+
import ibis.expr.datatypes as dt
|
5
|
+
from ibis.common.exceptions import TableNotFound
|
6
|
+
|
7
|
+
from planar.data.connection import _get_connection
|
8
|
+
from planar.data.dataset import PlanarDataset
|
9
|
+
from planar.data.exceptions import DatasetNotFoundError
|
10
|
+
from planar.logging import get_logger
|
11
|
+
|
12
|
+
logger = get_logger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
# TODO: consider connection pooling or memoize the connection
|
16
|
+
|
17
|
+
|
18
|
+
async def list_datasets(limit: int = 100, offset: int = 0) -> list[PlanarDataset]:
|
19
|
+
conn = await _get_connection()
|
20
|
+
tables = await asyncio.to_thread(conn.list_tables)
|
21
|
+
return [PlanarDataset(name=table) for table in tables]
|
22
|
+
|
23
|
+
|
24
|
+
async def list_schemas() -> list[str]:
|
25
|
+
METADATA_SCHEMAS = [
|
26
|
+
"information_schema",
|
27
|
+
# FIXME: why is list_databases returning pg_catalog
|
28
|
+
# if the ducklake catalog is sqlite?
|
29
|
+
"pg_catalog",
|
30
|
+
]
|
31
|
+
|
32
|
+
conn = await _get_connection()
|
33
|
+
|
34
|
+
# in ibis, "databases" are schemas in the traditional sense
|
35
|
+
# e.g. psql: schema == ibis: database
|
36
|
+
# https://ibis-project.org/concepts/backend-table-hierarchy
|
37
|
+
schemas = await asyncio.to_thread(conn.list_databases)
|
38
|
+
|
39
|
+
return [schema for schema in schemas if schema not in METADATA_SCHEMAS]
|
40
|
+
|
41
|
+
|
42
|
+
async def get_dataset(dataset_name: str, schema_name: str = "main") -> PlanarDataset:
|
43
|
+
# TODO: add schema_name as a parameter
|
44
|
+
|
45
|
+
dataset = PlanarDataset(name=dataset_name)
|
46
|
+
|
47
|
+
if not await dataset.exists():
|
48
|
+
raise DatasetNotFoundError(f"Dataset {dataset_name} not found")
|
49
|
+
|
50
|
+
return dataset
|
51
|
+
|
52
|
+
|
53
|
+
async def get_dataset_row_count(dataset_name: str) -> int:
|
54
|
+
conn = await _get_connection()
|
55
|
+
|
56
|
+
try:
|
57
|
+
value = await asyncio.to_thread(
|
58
|
+
lambda conn, dataset_name: conn.table(dataset_name).count().to_polars(),
|
59
|
+
conn,
|
60
|
+
dataset_name,
|
61
|
+
)
|
62
|
+
|
63
|
+
assert isinstance(value, int), "Scalar must be an integer"
|
64
|
+
|
65
|
+
return value
|
66
|
+
except TableNotFound:
|
67
|
+
raise # re-raise the exception and allow the caller to handle it
|
68
|
+
|
69
|
+
|
70
|
+
class DatasetMetadata(TypedDict):
|
71
|
+
schema: dict[str, dt.DataType]
|
72
|
+
row_count: int
|
73
|
+
|
74
|
+
|
75
|
+
async def get_dataset_metadata(
|
76
|
+
dataset_name: str, schema_name: str
|
77
|
+
) -> DatasetMetadata | None:
|
78
|
+
conn = await _get_connection()
|
79
|
+
|
80
|
+
try:
|
81
|
+
schema, row_count = await asyncio.gather(
|
82
|
+
asyncio.to_thread(conn.get_schema, dataset_name, database=schema_name),
|
83
|
+
get_dataset_row_count(dataset_name),
|
84
|
+
)
|
85
|
+
|
86
|
+
return DatasetMetadata(schema=schema.fields, row_count=row_count)
|
87
|
+
|
88
|
+
except TableNotFound:
|
89
|
+
return None
|
planar/db/alembic/env.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
+
from functools import wraps
|
1
2
|
from logging.config import fileConfig
|
2
3
|
|
4
|
+
import alembic.ddl.base as alembic_base
|
3
5
|
from alembic import context
|
4
6
|
from sqlalchemy import Connection, engine_from_config, pool
|
5
7
|
|
@@ -48,6 +50,28 @@ def include_name(name, type_, _):
|
|
48
50
|
return True
|
49
51
|
|
50
52
|
|
53
|
+
sqlite_schema_translate_map = {PLANAR_SCHEMA: None}
|
54
|
+
|
55
|
+
|
56
|
+
def schema_translate_wrapper(f):
|
57
|
+
@wraps(f)
|
58
|
+
def format_table_name_with_schema(compiler, name, schema):
|
59
|
+
# when on sqlite, we need to translate the schema to None
|
60
|
+
is_sqlite = compiler.dialect.name == "sqlite"
|
61
|
+
if is_sqlite:
|
62
|
+
translated_schema = sqlite_schema_translate_map.get(schema, schema)
|
63
|
+
else:
|
64
|
+
translated_schema = schema
|
65
|
+
return f(compiler, name, translated_schema)
|
66
|
+
|
67
|
+
return format_table_name_with_schema
|
68
|
+
|
69
|
+
|
70
|
+
alembic_base.format_table_name = schema_translate_wrapper(
|
71
|
+
alembic_base.format_table_name
|
72
|
+
)
|
73
|
+
|
74
|
+
|
51
75
|
def run_migrations_online() -> None:
|
52
76
|
"""Run migrations in 'online' mode.
|
53
77
|
|
@@ -102,7 +126,7 @@ def run_migrations_online() -> None:
|
|
102
126
|
config_dict = config.get_section(config.config_ini_section, {})
|
103
127
|
url = config_dict["sqlalchemy.url"]
|
104
128
|
is_sqlite = url.startswith("sqlite://")
|
105
|
-
translate_map =
|
129
|
+
translate_map = sqlite_schema_translate_map if is_sqlite else {}
|
106
130
|
connectable = engine_from_config(
|
107
131
|
config_dict,
|
108
132
|
prefix="sqlalchemy.",
|
@@ -278,7 +278,7 @@ class AzureBlobStorage(Storage):
|
|
278
278
|
|
279
279
|
elif self.auth_method.name == "AZURE_AD":
|
280
280
|
# Generate a User Delegation SAS signed with a user delegation key
|
281
|
-
start_time = datetime.
|
281
|
+
start_time = datetime.now(UTC)
|
282
282
|
user_delegation_key = await self.client.get_user_delegation_key(
|
283
283
|
key_start_time=start_time, key_expiry_time=expiry_time
|
284
284
|
)
|
planar/registry_items.py
CHANGED
@@ -47,6 +47,7 @@ class RegisteredWorkflow:
|
|
47
47
|
input_schema: dict[str, Any]
|
48
48
|
output_schema: dict[str, Any]
|
49
49
|
pydantic_model: Type[BaseModel]
|
50
|
+
is_interactive: bool
|
50
51
|
|
51
52
|
@staticmethod
|
52
53
|
def from_workflow(workflow: "WorkflowWrapper") -> "RegisteredWorkflow":
|
@@ -63,4 +64,5 @@ class RegisteredWorkflow:
|
|
63
64
|
workflow.original_fn
|
64
65
|
),
|
65
66
|
pydantic_model=create_pydantic_model_for_workflow(workflow),
|
67
|
+
is_interactive=workflow.is_interactive,
|
66
68
|
)
|