planar 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- planar/ai/agent.py +2 -1
- planar/ai/agent_base.py +24 -5
- planar/ai/state.py +17 -0
- planar/ai/test_agent_tool_step_display.py +1 -1
- planar/app.py +5 -0
- planar/data/connection.py +108 -0
- planar/data/dataset.py +11 -104
- planar/data/test_dataset.py +45 -41
- planar/data/utils.py +89 -0
- planar/db/alembic/env.py +25 -1
- planar/files/storage/azure_blob.py +1 -1
- planar/registry_items.py +2 -0
- planar/routers/dataset_router.py +213 -0
- planar/routers/models.py +1 -0
- planar/routers/test_dataset_router.py +429 -0
- planar/routers/test_workflow_router.py +26 -1
- planar/routers/workflow.py +2 -0
- planar/security/authorization.py +31 -3
- planar/security/default_policies.cedar +25 -0
- planar/testing/fixtures.py +30 -0
- planar/testing/planar_test_client.py +1 -1
- planar/workflows/decorators.py +2 -1
- planar/workflows/wrappers.py +1 -0
- {planar-0.9.3.dist-info → planar-0.10.0.dist-info}/METADATA +1 -1
- {planar-0.9.3.dist-info → planar-0.10.0.dist-info}/RECORD +27 -22
- {planar-0.9.3.dist-info → planar-0.10.0.dist-info}/WHEEL +1 -1
- {planar-0.9.3.dist-info → planar-0.10.0.dist-info}/entry_points.txt +0 -0
planar/ai/agent.py
CHANGED
@@ -50,7 +50,8 @@ class AgentWorkflowNotifier(AgentEventEmitter):
|
|
50
50
|
class Agent[
|
51
51
|
TInput: BaseModel | str,
|
52
52
|
TOutput: BaseModel | str,
|
53
|
-
|
53
|
+
TDeps,
|
54
|
+
](AgentBase[TInput, TOutput, TDeps]):
|
54
55
|
model: models.KnownModelName | models.Model = "openai:gpt-4o"
|
55
56
|
|
56
57
|
async def run_step(
|
planar/ai/agent_base.py
CHANGED
@@ -15,6 +15,7 @@ from pydantic import BaseModel
|
|
15
15
|
from pydantic_ai.settings import ModelSettings
|
16
16
|
|
17
17
|
from planar.ai.models import AgentConfig, AgentEventEmitter, AgentRunResult
|
18
|
+
from planar.ai.state import delete_state, set_state
|
18
19
|
from planar.logging import get_logger
|
19
20
|
from planar.modeling.field_helpers import JsonSchema
|
20
21
|
from planar.utils import P, R, T, U
|
@@ -29,6 +30,7 @@ class AgentBase[
|
|
29
30
|
# TODO: add `= str` default when we upgrade to 3.13
|
30
31
|
TInput: BaseModel | str,
|
31
32
|
TOutput: BaseModel | str,
|
33
|
+
TState,
|
32
34
|
](abc.ABC):
|
33
35
|
"""An LLM-powered agent that can be called directly within workflows."""
|
34
36
|
|
@@ -45,6 +47,7 @@ class AgentBase[
|
|
45
47
|
)
|
46
48
|
event_emitter: AgentEventEmitter | None = None
|
47
49
|
durable: bool = True
|
50
|
+
state_type: Type[TState] | None = None
|
48
51
|
|
49
52
|
# TODO: move here to serialize to frontend
|
50
53
|
#
|
@@ -91,14 +94,16 @@ class AgentBase[
|
|
91
94
|
|
92
95
|
@overload
|
93
96
|
async def __call__(
|
94
|
-
self: "AgentBase[TInput, str]",
|
97
|
+
self: "AgentBase[TInput, str, TState]",
|
95
98
|
input_value: TInput,
|
99
|
+
state: TState | None = None,
|
96
100
|
) -> AgentRunResult[str]: ...
|
97
101
|
|
98
102
|
@overload
|
99
103
|
async def __call__(
|
100
|
-
self: "AgentBase[TInput, TOutput]",
|
104
|
+
self: "AgentBase[TInput, TOutput, TState]",
|
101
105
|
input_value: TInput,
|
106
|
+
state: TState | None = None,
|
102
107
|
) -> AgentRunResult[TOutput]: ...
|
103
108
|
|
104
109
|
def as_step_if_durable(
|
@@ -120,6 +125,7 @@ class AgentBase[
|
|
120
125
|
async def __call__(
|
121
126
|
self,
|
122
127
|
input_value: TInput,
|
128
|
+
state: TState | None = None,
|
123
129
|
) -> AgentRunResult[Any]:
|
124
130
|
if self.input_type is not None and not isinstance(input_value, self.input_type):
|
125
131
|
raise ValueError(
|
@@ -147,9 +153,22 @@ class AgentBase[
|
|
147
153
|
return_type=AgentRunResult[self.output_type],
|
148
154
|
)
|
149
155
|
|
150
|
-
|
151
|
-
|
152
|
-
|
156
|
+
if state is not None:
|
157
|
+
if self.state_type is None:
|
158
|
+
raise ValueError("state cannot be provided when state_type is not set")
|
159
|
+
if not isinstance(state, self.state_type):
|
160
|
+
raise ValueError(
|
161
|
+
f"state must be of type {self.state_type}, but got {type(state)}"
|
162
|
+
)
|
163
|
+
set_state(cast(TState, state))
|
164
|
+
|
165
|
+
try:
|
166
|
+
result = await run_step(input_value=input_value)
|
167
|
+
# Cast the result to ensure type compatibility
|
168
|
+
return cast(AgentRunResult[TOutput], result)
|
169
|
+
finally:
|
170
|
+
if state is not None:
|
171
|
+
delete_state()
|
153
172
|
|
154
173
|
@abc.abstractmethod
|
155
174
|
async def run_step(
|
planar/ai/state.py
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
from typing import Any, Type, cast
|
2
|
+
|
3
|
+
from planar.task_local import TaskLocal
|
4
|
+
|
5
|
+
data: TaskLocal[Any] = TaskLocal()
|
6
|
+
|
7
|
+
|
8
|
+
def set_state(ctx: Any):
|
9
|
+
return data.set(ctx)
|
10
|
+
|
11
|
+
|
12
|
+
def get_state[T](_: Type[T]) -> T:
|
13
|
+
return cast(T, data.get())
|
14
|
+
|
15
|
+
|
16
|
+
def delete_state():
|
17
|
+
return data.clear()
|
@@ -49,7 +49,7 @@ async def test_agent_tool_step_has_display_name(session):
|
|
49
49
|
patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}, clear=False),
|
50
50
|
patch("planar.ai.agent.model_run", side_effect=fake_model_run),
|
51
51
|
):
|
52
|
-
agent = Agent[str, str](
|
52
|
+
agent = Agent[str, str, None](
|
53
53
|
name="test_agent",
|
54
54
|
system_prompt="",
|
55
55
|
user_prompt="",
|
planar/app.py
CHANGED
@@ -28,6 +28,7 @@ from planar.routers import (
|
|
28
28
|
create_workflow_router,
|
29
29
|
)
|
30
30
|
from planar.routers.agents_router import create_agent_router
|
31
|
+
from planar.routers.dataset_router import create_dataset_router
|
31
32
|
from planar.routers.entity_router import create_entities_router
|
32
33
|
from planar.routers.object_config_router import create_object_config_router
|
33
34
|
from planar.routers.rule import create_rule_router
|
@@ -129,6 +130,10 @@ class PlanarApp:
|
|
129
130
|
create_human_task_routes(),
|
130
131
|
prefix="/human-tasks",
|
131
132
|
)
|
133
|
+
self.router_v1.include_router(
|
134
|
+
create_dataset_router(),
|
135
|
+
prefix="/datasets",
|
136
|
+
)
|
132
137
|
|
133
138
|
self.router_v1.include_router(
|
134
139
|
create_info_router(
|
@@ -0,0 +1,108 @@
|
|
1
|
+
import asyncio
|
2
|
+
|
3
|
+
import ibis
|
4
|
+
from ibis.backends.duckdb import Backend as DuckDBBackend
|
5
|
+
|
6
|
+
from planar.config import PlanarConfig
|
7
|
+
from planar.data.config import (
|
8
|
+
DuckDBCatalogConfig,
|
9
|
+
PostgresCatalogConfig,
|
10
|
+
SQLiteCatalogConfig,
|
11
|
+
)
|
12
|
+
from planar.data.exceptions import DataError
|
13
|
+
from planar.files.storage.config import LocalDirectoryConfig, S3Config
|
14
|
+
from planar.logging import get_logger
|
15
|
+
from planar.session import get_config
|
16
|
+
|
17
|
+
logger = get_logger(__name__)
|
18
|
+
|
19
|
+
|
20
|
+
async def _create_connection(config: PlanarConfig) -> DuckDBBackend:
|
21
|
+
"""Create Ibis DuckDB connection with Ducklake."""
|
22
|
+
data_config = config.data
|
23
|
+
if not data_config:
|
24
|
+
raise DataError("Data configuration not found")
|
25
|
+
|
26
|
+
# Connect to DuckDB with Ducklake extension
|
27
|
+
con = await asyncio.to_thread(ibis.duckdb.connect, extensions=["ducklake"])
|
28
|
+
|
29
|
+
# Build Ducklake connection string based on catalog type
|
30
|
+
catalog_config = data_config.catalog
|
31
|
+
|
32
|
+
match catalog_config:
|
33
|
+
case DuckDBCatalogConfig():
|
34
|
+
metadata_path = catalog_config.path
|
35
|
+
case PostgresCatalogConfig():
|
36
|
+
# Use connection components to build postgres connection string
|
37
|
+
metadata_path = f"postgres:dbname={catalog_config.db}"
|
38
|
+
if catalog_config.host:
|
39
|
+
metadata_path += f" host={catalog_config.host}"
|
40
|
+
if catalog_config.port:
|
41
|
+
metadata_path += f" port={catalog_config.port}"
|
42
|
+
if catalog_config.user:
|
43
|
+
metadata_path += f" user={catalog_config.user}"
|
44
|
+
if catalog_config.password:
|
45
|
+
metadata_path += f" password={catalog_config.password}"
|
46
|
+
case SQLiteCatalogConfig():
|
47
|
+
metadata_path = f"sqlite:{catalog_config.path}"
|
48
|
+
case _:
|
49
|
+
raise ValueError(f"Unsupported catalog type: {catalog_config.type}")
|
50
|
+
|
51
|
+
try:
|
52
|
+
await asyncio.to_thread(con.raw_sql, "INSTALL ducklake")
|
53
|
+
match catalog_config.type:
|
54
|
+
case "sqlite":
|
55
|
+
await asyncio.to_thread(con.raw_sql, "INSTALL sqlite;")
|
56
|
+
case "postgres":
|
57
|
+
await asyncio.to_thread(con.raw_sql, "INSTALL postgres;")
|
58
|
+
logger.debug("installed Ducklake extensions", catalog_type=catalog_config.type)
|
59
|
+
except Exception as e:
|
60
|
+
raise DataError(f"Failed to install Ducklake extensions: {e}") from e
|
61
|
+
|
62
|
+
# Build ATTACH statement
|
63
|
+
attach_sql = f"ATTACH 'ducklake:{metadata_path}' AS planar_ducklake"
|
64
|
+
|
65
|
+
# Add data path from storage config
|
66
|
+
storage = data_config.storage
|
67
|
+
if isinstance(storage, LocalDirectoryConfig):
|
68
|
+
data_path = storage.directory
|
69
|
+
elif isinstance(storage, S3Config):
|
70
|
+
data_path = f"s3://{storage.bucket_name}/"
|
71
|
+
else:
|
72
|
+
# Generic fallback
|
73
|
+
data_path = getattr(storage, "path", None) or getattr(storage, "directory", ".")
|
74
|
+
|
75
|
+
ducklake_catalog = data_config.catalog_name
|
76
|
+
attach_sql += f" (DATA_PATH '{data_path}'"
|
77
|
+
if catalog_config.type != "sqlite":
|
78
|
+
attach_sql += f", METADATA_SCHEMA '{ducklake_catalog}'"
|
79
|
+
attach_sql += ");"
|
80
|
+
|
81
|
+
# Attach to Ducklake
|
82
|
+
try:
|
83
|
+
await asyncio.to_thread(con.raw_sql, attach_sql)
|
84
|
+
except Exception as e:
|
85
|
+
raise DataError(f"Failed to attach to Ducklake: {e}") from e
|
86
|
+
|
87
|
+
await asyncio.to_thread(con.raw_sql, "USE planar_ducklake;")
|
88
|
+
logger.debug(
|
89
|
+
"connection created",
|
90
|
+
catalog=ducklake_catalog,
|
91
|
+
catalog_type=catalog_config.type,
|
92
|
+
attach_sql=attach_sql,
|
93
|
+
)
|
94
|
+
|
95
|
+
return con
|
96
|
+
|
97
|
+
|
98
|
+
async def _get_connection() -> DuckDBBackend:
|
99
|
+
"""Get Ibis connection to Ducklake."""
|
100
|
+
config = get_config()
|
101
|
+
|
102
|
+
if not config.data:
|
103
|
+
raise DataError(
|
104
|
+
"Data configuration not found. Please configure 'data' in your planar.yaml"
|
105
|
+
)
|
106
|
+
|
107
|
+
# TODO: Add cached connection pooling or memoize the connection
|
108
|
+
return await _create_connection(config)
|
planar/data/dataset.py
CHANGED
@@ -6,14 +6,11 @@ from typing import Literal, Self
|
|
6
6
|
import ibis
|
7
7
|
import polars as pl
|
8
8
|
import pyarrow as pa
|
9
|
-
from ibis.backends.duckdb import Backend as DuckDBBackend
|
10
9
|
from ibis.common.exceptions import TableNotFound
|
11
10
|
from pydantic import BaseModel
|
12
11
|
|
13
|
-
from planar.
|
14
|
-
from planar.files.storage.config import LocalDirectoryConfig, S3Config
|
12
|
+
from planar.data.connection import _get_connection
|
15
13
|
from planar.logging import get_logger
|
16
|
-
from planar.session import get_config
|
17
14
|
|
18
15
|
from .exceptions import DataError, DatasetAlreadyExistsError, DatasetNotFoundError
|
19
16
|
|
@@ -32,6 +29,8 @@ class PlanarDataset(BaseModel):
|
|
32
29
|
# TODO: Add snapshot version: no version = latest, otherwise time travel on read operations
|
33
30
|
# TODO: Add partition support? A Dataset representation could be a table with a partition column
|
34
31
|
|
32
|
+
is_planar_dataset: bool = True
|
33
|
+
|
35
34
|
model_config = {"arbitrary_types_allowed": True}
|
36
35
|
# TODO: Add serialization metadata to make clear this is a dataset reference
|
37
36
|
# like EntityField.
|
@@ -68,7 +67,8 @@ class PlanarDataset(BaseModel):
|
|
68
67
|
|
69
68
|
async def exists(self) -> bool:
|
70
69
|
"""Check if the dataset exists in Ducklake."""
|
71
|
-
con = await
|
70
|
+
con = await _get_connection()
|
71
|
+
|
72
72
|
try:
|
73
73
|
# TODO: Query for the table name directly
|
74
74
|
tables = await asyncio.to_thread(con.list_tables)
|
@@ -79,16 +79,16 @@ class PlanarDataset(BaseModel):
|
|
79
79
|
|
80
80
|
async def write(
|
81
81
|
self,
|
82
|
-
data: pl.DataFrame | ibis.Table | list | dict,
|
82
|
+
data: pl.DataFrame | pl.LazyFrame | ibis.Table | list | dict,
|
83
83
|
mode: Literal["overwrite", "append"] = "append",
|
84
84
|
) -> None:
|
85
85
|
"""Write data to the dataset.
|
86
86
|
|
87
87
|
Args:
|
88
|
-
data: Data to write (Polars DataFrame, PyArrow Table, or Ibis expression)
|
88
|
+
data: Data to write (Polars DataFrame/LazyFrame, PyArrow Table, or Ibis expression)
|
89
89
|
mode: Write mode - "append" or "overwrite"
|
90
90
|
"""
|
91
|
-
con = await
|
91
|
+
con = await _get_connection()
|
92
92
|
overwrite = mode == "overwrite"
|
93
93
|
|
94
94
|
try:
|
@@ -99,7 +99,7 @@ class PlanarDataset(BaseModel):
|
|
99
99
|
else:
|
100
100
|
# TODO: Explore if workflow context can be used to set metadata
|
101
101
|
# on the snapshot version for lineage
|
102
|
-
if isinstance(data, pl.DataFrame):
|
102
|
+
if isinstance(data, (pl.DataFrame, pl.LazyFrame)):
|
103
103
|
await asyncio.to_thread(
|
104
104
|
con.insert,
|
105
105
|
self.name,
|
@@ -133,7 +133,7 @@ class PlanarDataset(BaseModel):
|
|
133
133
|
Returns:
|
134
134
|
Ibis table expression that can be further filtered using Ibis methods
|
135
135
|
"""
|
136
|
-
con = await
|
136
|
+
con = await _get_connection()
|
137
137
|
|
138
138
|
try:
|
139
139
|
table = await asyncio.to_thread(con.table, self.name)
|
@@ -162,102 +162,9 @@ class PlanarDataset(BaseModel):
|
|
162
162
|
|
163
163
|
async def delete(self) -> None:
|
164
164
|
"""Delete the dataset."""
|
165
|
-
con = await
|
165
|
+
con = await _get_connection()
|
166
166
|
try:
|
167
167
|
await asyncio.to_thread(con.drop_table, self.name, force=True)
|
168
168
|
logger.info("deleted dataset", dataset_name=self.name)
|
169
169
|
except Exception as e:
|
170
170
|
raise DataError(f"Failed to delete dataset: {e}") from e
|
171
|
-
|
172
|
-
async def _get_connection(self) -> DuckDBBackend:
|
173
|
-
"""Get Ibis connection to Ducklake."""
|
174
|
-
config = get_config()
|
175
|
-
|
176
|
-
if not config.data:
|
177
|
-
raise DataError(
|
178
|
-
"Data configuration not found. Please configure 'data' in your planar.yaml"
|
179
|
-
)
|
180
|
-
|
181
|
-
# TODO: Add cached connection pooling or memoize the connection
|
182
|
-
return await self._create_connection(config)
|
183
|
-
|
184
|
-
async def _create_connection(self, config: PlanarConfig) -> DuckDBBackend:
|
185
|
-
"""Create Ibis DuckDB connection with Ducklake."""
|
186
|
-
data_config = config.data
|
187
|
-
if not data_config:
|
188
|
-
raise DataError("Data configuration not found")
|
189
|
-
|
190
|
-
# Connect to DuckDB with Ducklake extension
|
191
|
-
con = await asyncio.to_thread(ibis.duckdb.connect, extensions=["ducklake"])
|
192
|
-
|
193
|
-
# Build Ducklake connection string based on catalog type
|
194
|
-
catalog_config = data_config.catalog
|
195
|
-
|
196
|
-
if catalog_config.type == "duckdb":
|
197
|
-
metadata_path = catalog_config.path
|
198
|
-
elif catalog_config.type == "postgres":
|
199
|
-
# Use connection components to build postgres connection string
|
200
|
-
pg = catalog_config
|
201
|
-
metadata_path = f"postgres:dbname={pg.db}"
|
202
|
-
if pg.host:
|
203
|
-
metadata_path += f" host={pg.host}"
|
204
|
-
if pg.port:
|
205
|
-
metadata_path += f" port={pg.port}"
|
206
|
-
if pg.user:
|
207
|
-
metadata_path += f" user={pg.user}"
|
208
|
-
if pg.password:
|
209
|
-
metadata_path += f" password={pg.password}"
|
210
|
-
elif catalog_config.type == "sqlite":
|
211
|
-
metadata_path = f"sqlite:{catalog_config.path}"
|
212
|
-
else:
|
213
|
-
raise ValueError(f"Unsupported catalog type: {catalog_config.type}")
|
214
|
-
|
215
|
-
try:
|
216
|
-
await asyncio.to_thread(con.raw_sql, "INSTALL ducklake")
|
217
|
-
match catalog_config.type:
|
218
|
-
case "sqlite":
|
219
|
-
await asyncio.to_thread(con.raw_sql, "INSTALL sqlite;")
|
220
|
-
case "postgres":
|
221
|
-
await asyncio.to_thread(con.raw_sql, "INSTALL postgres;")
|
222
|
-
logger.debug(
|
223
|
-
"installed Ducklake extensions", catalog_type=catalog_config.type
|
224
|
-
)
|
225
|
-
except Exception as e:
|
226
|
-
raise DataError(f"Failed to install Ducklake extensions: {e}") from e
|
227
|
-
|
228
|
-
# Build ATTACH statement
|
229
|
-
attach_sql = f"ATTACH 'ducklake:{metadata_path}' AS planar_ducklake"
|
230
|
-
|
231
|
-
# Add data path from storage config
|
232
|
-
storage = data_config.storage
|
233
|
-
if isinstance(storage, LocalDirectoryConfig):
|
234
|
-
data_path = storage.directory
|
235
|
-
elif isinstance(storage, S3Config):
|
236
|
-
data_path = f"s3://{storage.bucket_name}/"
|
237
|
-
else:
|
238
|
-
# Generic fallback
|
239
|
-
data_path = getattr(storage, "path", None) or getattr(
|
240
|
-
storage, "directory", "."
|
241
|
-
)
|
242
|
-
|
243
|
-
ducklake_catalog = data_config.catalog_name
|
244
|
-
attach_sql += f" (DATA_PATH '{data_path}'"
|
245
|
-
if catalog_config.type != "sqlite":
|
246
|
-
attach_sql += f", METADATA_SCHEMA '{ducklake_catalog}'"
|
247
|
-
attach_sql += ");"
|
248
|
-
|
249
|
-
# Attach to Ducklake
|
250
|
-
try:
|
251
|
-
await asyncio.to_thread(con.raw_sql, attach_sql)
|
252
|
-
except Exception as e:
|
253
|
-
raise DataError(f"Failed to attach to Ducklake: {e}") from e
|
254
|
-
|
255
|
-
await asyncio.to_thread(con.raw_sql, "USE planar_ducklake;")
|
256
|
-
logger.debug(
|
257
|
-
"connection created",
|
258
|
-
catalog=ducklake_catalog,
|
259
|
-
catalog_type=catalog_config.type,
|
260
|
-
attach_sql=attach_sql,
|
261
|
-
)
|
262
|
-
|
263
|
-
return con
|
planar/data/test_dataset.py
CHANGED
@@ -5,44 +5,21 @@ import pyarrow as pa
|
|
5
5
|
import pytest
|
6
6
|
from ibis import literal
|
7
7
|
|
8
|
-
from planar import PlanarApp
|
9
8
|
from planar.data import PlanarDataset
|
10
|
-
from planar.data.config import DataConfig, SQLiteCatalogConfig
|
11
9
|
from planar.data.exceptions import (
|
12
10
|
DataError,
|
13
11
|
DatasetAlreadyExistsError,
|
14
12
|
DatasetNotFoundError,
|
15
13
|
)
|
16
|
-
from planar.files.storage.config import LocalDirectoryConfig
|
17
14
|
from planar.workflows import step
|
18
15
|
|
19
16
|
|
20
|
-
@pytest.fixture
|
21
|
-
def data_config(tmp_path):
|
22
|
-
"""Create a test data configuration."""
|
23
|
-
data_dir = tmp_path / "data"
|
24
|
-
data_dir.mkdir(exist_ok=True)
|
25
|
-
|
26
|
-
catalog_path = data_dir / "test.sqlite"
|
27
|
-
storage_path = data_dir / "ducklake_files"
|
28
|
-
storage_path.mkdir(exist_ok=True)
|
29
|
-
|
30
|
-
return DataConfig(
|
31
|
-
catalog=SQLiteCatalogConfig(type="sqlite", path=str(catalog_path)),
|
32
|
-
storage=LocalDirectoryConfig(backend="localdir", directory=str(storage_path)),
|
33
|
-
)
|
34
|
-
|
35
|
-
|
36
17
|
@pytest.fixture(name="app")
|
37
|
-
def app_fixture(
|
38
|
-
"""
|
39
|
-
|
40
|
-
# Add data config to the app's config
|
41
|
-
app.config.data = data_config
|
42
|
-
return app
|
18
|
+
def app_fixture(app_with_data):
|
19
|
+
"""Use the shared app_with_data fixture as 'app' for this test module."""
|
20
|
+
return app_with_data
|
43
21
|
|
44
22
|
|
45
|
-
@pytest.mark.asyncio
|
46
23
|
async def test_dataset_create(client):
|
47
24
|
"""Test creating a dataset reference."""
|
48
25
|
dataset = await PlanarDataset.create("test_table")
|
@@ -62,7 +39,6 @@ async def test_dataset_create(client):
|
|
62
39
|
await dataset.delete()
|
63
40
|
|
64
41
|
|
65
|
-
@pytest.mark.asyncio
|
66
42
|
async def test_dataset_create_if_not_exists(client):
|
67
43
|
"""Test creating a dataset with if_not_exists behavior."""
|
68
44
|
# Create dataset and write data to make it exist
|
@@ -82,7 +58,6 @@ async def test_dataset_create_if_not_exists(client):
|
|
82
58
|
await dataset1.delete()
|
83
59
|
|
84
60
|
|
85
|
-
@pytest.mark.asyncio
|
86
61
|
async def test_dataset_write_and_read_polars(client):
|
87
62
|
"""Test writing and reading data with Polars."""
|
88
63
|
dataset = await PlanarDataset.create("test_polars")
|
@@ -112,7 +87,6 @@ async def test_dataset_write_and_read_polars(client):
|
|
112
87
|
await dataset.delete()
|
113
88
|
|
114
89
|
|
115
|
-
@pytest.mark.asyncio
|
116
90
|
async def test_dataset_write_and_read_pyarrow(client):
|
117
91
|
"""Test writing and reading data with PyArrow."""
|
118
92
|
dataset = await PlanarDataset.create("test_pyarrow")
|
@@ -140,7 +114,47 @@ async def test_dataset_write_and_read_pyarrow(client):
|
|
140
114
|
await dataset.delete()
|
141
115
|
|
142
116
|
|
143
|
-
|
117
|
+
async def test_dataset_write_and_read_lazyframe(client):
|
118
|
+
"""Test writing and reading data with Polars LazyFrame."""
|
119
|
+
dataset = await PlanarDataset.create("test_lazyframe")
|
120
|
+
|
121
|
+
# Create test data as LazyFrame with computed columns
|
122
|
+
lf = pl.LazyFrame(
|
123
|
+
{
|
124
|
+
"id": range(5),
|
125
|
+
"name": ["Alice", "Bob", "Charlie", "David", "Eve"],
|
126
|
+
"value": [10.5, 20.3, 30.1, 40.7, 50.9],
|
127
|
+
}
|
128
|
+
).with_columns(
|
129
|
+
# Use native polars expressions for efficiency
|
130
|
+
pl.format("user_{}", pl.col("id")).alias("username"),
|
131
|
+
pl.col("value").round(1).alias("rounded_value"),
|
132
|
+
)
|
133
|
+
|
134
|
+
# Write LazyFrame data
|
135
|
+
await dataset.write(lf, mode="overwrite")
|
136
|
+
|
137
|
+
# Read data back
|
138
|
+
result = await dataset.to_polars()
|
139
|
+
|
140
|
+
# Verify shape and columns
|
141
|
+
assert result.shape == (5, 5)
|
142
|
+
assert set(result.columns) == {"id", "name", "value", "username", "rounded_value"}
|
143
|
+
|
144
|
+
# Verify the computed columns work correctly
|
145
|
+
assert result["username"].to_list() == [
|
146
|
+
"user_0",
|
147
|
+
"user_1",
|
148
|
+
"user_2",
|
149
|
+
"user_3",
|
150
|
+
"user_4",
|
151
|
+
]
|
152
|
+
assert result["rounded_value"].to_list() == [10.5, 20.3, 30.1, 40.7, 50.9]
|
153
|
+
|
154
|
+
# Cleanup
|
155
|
+
await dataset.delete()
|
156
|
+
|
157
|
+
|
144
158
|
async def test_dataset_append_mode(client):
|
145
159
|
"""Test appending data to a dataset."""
|
146
160
|
dataset = await PlanarDataset.create("test_append")
|
@@ -164,7 +178,6 @@ async def test_dataset_append_mode(client):
|
|
164
178
|
await dataset.delete()
|
165
179
|
|
166
180
|
|
167
|
-
@pytest.mark.asyncio
|
168
181
|
async def test_dataset_overwrite_replaces_existing(client):
|
169
182
|
"""Overwrite should replace existing rows completely."""
|
170
183
|
dataset = await PlanarDataset.create("test_overwrite")
|
@@ -184,7 +197,6 @@ async def test_dataset_overwrite_replaces_existing(client):
|
|
184
197
|
await dataset.delete()
|
185
198
|
|
186
199
|
|
187
|
-
@pytest.mark.asyncio
|
188
200
|
async def test_dataset_read_with_filter(client):
|
189
201
|
"""Test reading data with Ibis filtering."""
|
190
202
|
dataset = await PlanarDataset.create("test_filter")
|
@@ -204,7 +216,6 @@ async def test_dataset_read_with_filter(client):
|
|
204
216
|
await dataset.delete()
|
205
217
|
|
206
218
|
|
207
|
-
@pytest.mark.asyncio
|
208
219
|
async def test_dataset_read_with_columns_and_limit(client):
|
209
220
|
"""Test reading specific columns with limit."""
|
210
221
|
dataset = await PlanarDataset.create("test_select")
|
@@ -232,7 +243,6 @@ async def test_dataset_read_with_columns_and_limit(client):
|
|
232
243
|
await dataset.delete()
|
233
244
|
|
234
245
|
|
235
|
-
@pytest.mark.asyncio
|
236
246
|
async def test_dataset_not_found(client):
|
237
247
|
"""Test reading from non-existent dataset."""
|
238
248
|
dataset = PlanarDataset(name="nonexistent")
|
@@ -245,7 +255,6 @@ async def test_dataset_not_found(client):
|
|
245
255
|
await dataset.read()
|
246
256
|
|
247
257
|
|
248
|
-
@pytest.mark.asyncio
|
249
258
|
async def test_dataset_delete(client):
|
250
259
|
"""Test deleting a dataset."""
|
251
260
|
dataset = await PlanarDataset.create("test_delete")
|
@@ -264,7 +273,6 @@ async def test_dataset_delete(client):
|
|
264
273
|
assert not await dataset.exists()
|
265
274
|
|
266
275
|
|
267
|
-
@pytest.mark.asyncio
|
268
276
|
async def test_dataset_write_list_of_dicts(client):
|
269
277
|
"""Write list-of-dicts input and read back with Polars."""
|
270
278
|
dataset = await PlanarDataset.create("test_list_of_dicts")
|
@@ -279,7 +287,6 @@ async def test_dataset_write_list_of_dicts(client):
|
|
279
287
|
await dataset.delete()
|
280
288
|
|
281
289
|
|
282
|
-
@pytest.mark.asyncio
|
283
290
|
async def test_dataset_write_dict_of_lists(client):
|
284
291
|
"""Write dict-of-lists input and read back with Polars."""
|
285
292
|
dataset = await PlanarDataset.create("test_dict_of_lists")
|
@@ -294,7 +301,6 @@ async def test_dataset_write_dict_of_lists(client):
|
|
294
301
|
await dataset.delete()
|
295
302
|
|
296
303
|
|
297
|
-
@pytest.mark.asyncio
|
298
304
|
async def test_dataset_workflow_serialization(client):
|
299
305
|
"""Test that PlanarDataset can be used as workflow input/output."""
|
300
306
|
|
@@ -327,7 +333,6 @@ async def test_dataset_workflow_serialization(client):
|
|
327
333
|
await dataset.delete()
|
328
334
|
|
329
335
|
|
330
|
-
@pytest.mark.asyncio
|
331
336
|
async def test_no_data_config_error(client):
|
332
337
|
"""Test error when data config is not set."""
|
333
338
|
# Remove data config
|
@@ -336,10 +341,9 @@ async def test_no_data_config_error(client):
|
|
336
341
|
dataset = PlanarDataset(name="test")
|
337
342
|
|
338
343
|
with pytest.raises(DataError, match="Data configuration not found"):
|
339
|
-
await dataset.
|
344
|
+
await dataset.exists()
|
340
345
|
|
341
346
|
|
342
|
-
@pytest.mark.asyncio
|
343
347
|
async def test_write_with_invalid_input_raises(client):
|
344
348
|
"""Unknown input types to write() should raise a DataError."""
|
345
349
|
|