PyPI - planar - Versions diffs - 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

planar 0.9.3py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

planar/ai/agent.py +2 -1
planar/ai/agent_base.py +24 -5
planar/ai/state.py +17 -0
planar/ai/test_agent_tool_step_display.py +1 -1
planar/app.py +5 -0
planar/data/connection.py +108 -0
planar/data/dataset.py +11 -104
planar/data/test_dataset.py +45 -41
planar/data/utils.py +89 -0
planar/db/alembic/env.py +25 -1
planar/files/storage/azure_blob.py +1 -1
planar/registry_items.py +2 -0
planar/routers/dataset_router.py +213 -0
planar/routers/models.py +1 -0
planar/routers/test_dataset_router.py +429 -0
planar/routers/test_workflow_router.py +26 -1
planar/routers/workflow.py +2 -0
planar/security/authorization.py +31 -3
planar/security/default_policies.cedar +25 -0
planar/testing/fixtures.py +30 -0
planar/testing/planar_test_client.py +1 -1
planar/workflows/decorators.py +2 -1
planar/workflows/wrappers.py +1 -0
{planar-0.9.3.dist-info → planar-0.10.0.dist-info}/METADATA +1 -1
{planar-0.9.3.dist-info → planar-0.10.0.dist-info}/RECORD +27 -22
{planar-0.9.3.dist-info → planar-0.10.0.dist-info}/WHEEL +1 -1
{planar-0.9.3.dist-info → planar-0.10.0.dist-info}/entry_points.txt +0 -0

planar/ai/agent.py CHANGED Viewed

@@ -50,7 +50,8 @@ class AgentWorkflowNotifier(AgentEventEmitter):
 class Agent[
     TInput: BaseModel | str,
     TOutput: BaseModel | str,
-](AgentBase[TInput, TOutput]):
+    TDeps,
+](AgentBase[TInput, TOutput, TDeps]):
     model: models.KnownModelName | models.Model = "openai:gpt-4o"
     async def run_step(

planar/ai/agent_base.py CHANGED Viewed

@@ -15,6 +15,7 @@ from pydantic import BaseModel
 from pydantic_ai.settings import ModelSettings
 from planar.ai.models import AgentConfig, AgentEventEmitter, AgentRunResult
+from planar.ai.state import delete_state, set_state
 from planar.logging import get_logger
 from planar.modeling.field_helpers import JsonSchema
 from planar.utils import P, R, T, U
@@ -29,6 +30,7 @@ class AgentBase[
     # TODO: add `= str` default when we upgrade to 3.13
     TInput: BaseModel | str,
     TOutput: BaseModel | str,
+    TState,
 ](abc.ABC):
     """An LLM-powered agent that can be called directly within workflows."""
@@ -45,6 +47,7 @@ class AgentBase[
     )
     event_emitter: AgentEventEmitter | None = None
     durable: bool = True
+    state_type: Type[TState] | None = None
     # TODO: move here to serialize to frontend
     #
@@ -91,14 +94,16 @@ class AgentBase[
     @overload
     async def __call__(
-        self: "AgentBase[TInput, str]",
+        self: "AgentBase[TInput, str, TState]",
         input_value: TInput,
+        state: TState | None = None,
     ) -> AgentRunResult[str]: ...
     @overload
     async def __call__(
-        self: "AgentBase[TInput, TOutput]",
+        self: "AgentBase[TInput, TOutput, TState]",
         input_value: TInput,
+        state: TState | None = None,
     ) -> AgentRunResult[TOutput]: ...
     def as_step_if_durable(
@@ -120,6 +125,7 @@ class AgentBase[
     async def __call__(
         self,
         input_value: TInput,
+        state: TState | None = None,
     ) -> AgentRunResult[Any]:
         if self.input_type is not None and not isinstance(input_value, self.input_type):
             raise ValueError(
@@ -147,9 +153,22 @@ class AgentBase[
                 return_type=AgentRunResult[self.output_type],
             )
-        result = await run_step(input_value=input_value)
-        # Cast the result to ensure type compatibility
-        return cast(AgentRunResult[TOutput], result)
+        if state is not None:
+            if self.state_type is None:
+                raise ValueError("state cannot be provided when state_type is not set")
+            if not isinstance(state, self.state_type):
+                raise ValueError(
+                    f"state must be of type {self.state_type}, but got {type(state)}"
+                )
+            set_state(cast(TState, state))
+        try:
+            result = await run_step(input_value=input_value)
+            # Cast the result to ensure type compatibility
+            return cast(AgentRunResult[TOutput], result)
+        finally:
+            if state is not None:
+                delete_state()
     @abc.abstractmethod
     async def run_step(

planar/ai/state.py ADDED Viewed

@@ -0,0 +1,17 @@
+from typing import Any, Type, cast
+from planar.task_local import TaskLocal
+data: TaskLocal[Any] = TaskLocal()
+def set_state(ctx: Any):
+    return data.set(ctx)
+def get_state[T](_: Type[T]) -> T:
+    return cast(T, data.get())
+def delete_state():
+    return data.clear()

planar/ai/test_agent_tool_step_display.py CHANGED Viewed

@@ -49,7 +49,7 @@ async def test_agent_tool_step_has_display_name(session):
         patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}, clear=False),
         patch("planar.ai.agent.model_run", side_effect=fake_model_run),
     ):
-        agent = Agent[str, str](
+        agent = Agent[str, str, None](
             name="test_agent",
             system_prompt="",
             user_prompt="",

planar/app.py CHANGED Viewed

@@ -28,6 +28,7 @@ from planar.routers import (
     create_workflow_router,
 )
 from planar.routers.agents_router import create_agent_router
+from planar.routers.dataset_router import create_dataset_router
 from planar.routers.entity_router import create_entities_router
 from planar.routers.object_config_router import create_object_config_router
 from planar.routers.rule import create_rule_router
@@ -129,6 +130,10 @@ class PlanarApp:
             create_human_task_routes(),
             prefix="/human-tasks",
         )
+        self.router_v1.include_router(
+            create_dataset_router(),
+            prefix="/datasets",
+        )
         self.router_v1.include_router(
             create_info_router(

planar/data/connection.py ADDED Viewed

@@ -0,0 +1,108 @@
+import asyncio
+import ibis
+from ibis.backends.duckdb import Backend as DuckDBBackend
+from planar.config import PlanarConfig
+from planar.data.config import (
+    DuckDBCatalogConfig,
+    PostgresCatalogConfig,
+    SQLiteCatalogConfig,
+)
+from planar.data.exceptions import DataError
+from planar.files.storage.config import LocalDirectoryConfig, S3Config
+from planar.logging import get_logger
+from planar.session import get_config
+logger = get_logger(__name__)
+async def _create_connection(config: PlanarConfig) -> DuckDBBackend:
+    """Create Ibis DuckDB connection with Ducklake."""
+    data_config = config.data
+    if not data_config:
+        raise DataError("Data configuration not found")
+    # Connect to DuckDB with Ducklake extension
+    con = await asyncio.to_thread(ibis.duckdb.connect, extensions=["ducklake"])
+    # Build Ducklake connection string based on catalog type
+    catalog_config = data_config.catalog
+    match catalog_config:
+        case DuckDBCatalogConfig():
+            metadata_path = catalog_config.path
+        case PostgresCatalogConfig():
+            # Use connection components to build postgres connection string
+            metadata_path = f"postgres:dbname={catalog_config.db}"
+            if catalog_config.host:
+                metadata_path += f" host={catalog_config.host}"
+            if catalog_config.port:
+                metadata_path += f" port={catalog_config.port}"
+            if catalog_config.user:
+                metadata_path += f" user={catalog_config.user}"
+            if catalog_config.password:
+                metadata_path += f" password={catalog_config.password}"
+        case SQLiteCatalogConfig():
+            metadata_path = f"sqlite:{catalog_config.path}"
+        case _:
+            raise ValueError(f"Unsupported catalog type: {catalog_config.type}")
+    try:
+        await asyncio.to_thread(con.raw_sql, "INSTALL ducklake")
+        match catalog_config.type:
+            case "sqlite":
+                await asyncio.to_thread(con.raw_sql, "INSTALL sqlite;")
+            case "postgres":
+                await asyncio.to_thread(con.raw_sql, "INSTALL postgres;")
+        logger.debug("installed Ducklake extensions", catalog_type=catalog_config.type)
+    except Exception as e:
+        raise DataError(f"Failed to install Ducklake extensions: {e}") from e
+    # Build ATTACH statement
+    attach_sql = f"ATTACH 'ducklake:{metadata_path}' AS planar_ducklake"
+    # Add data path from storage config
+    storage = data_config.storage
+    if isinstance(storage, LocalDirectoryConfig):
+        data_path = storage.directory
+    elif isinstance(storage, S3Config):
+        data_path = f"s3://{storage.bucket_name}/"
+    else:
+        # Generic fallback
+        data_path = getattr(storage, "path", None) or getattr(storage, "directory", ".")
+    ducklake_catalog = data_config.catalog_name
+    attach_sql += f" (DATA_PATH '{data_path}'"
+    if catalog_config.type != "sqlite":
+        attach_sql += f", METADATA_SCHEMA '{ducklake_catalog}'"
+    attach_sql += ");"
+    # Attach to Ducklake
+    try:
+        await asyncio.to_thread(con.raw_sql, attach_sql)
+    except Exception as e:
+        raise DataError(f"Failed to attach to Ducklake: {e}") from e
+    await asyncio.to_thread(con.raw_sql, "USE planar_ducklake;")
+    logger.debug(
+        "connection created",
+        catalog=ducklake_catalog,
+        catalog_type=catalog_config.type,
+        attach_sql=attach_sql,
+    )
+    return con
+async def _get_connection() -> DuckDBBackend:
+    """Get Ibis connection to Ducklake."""
+    config = get_config()
+    if not config.data:
+        raise DataError(
+            "Data configuration not found. Please configure 'data' in your planar.yaml"
+        )
+    # TODO: Add cached connection pooling or memoize the connection
+    return await _create_connection(config)

planar/data/dataset.py CHANGED Viewed

@@ -6,14 +6,11 @@ from typing import Literal, Self
 import ibis
 import polars as pl
 import pyarrow as pa
-from ibis.backends.duckdb import Backend as DuckDBBackend
 from ibis.common.exceptions import TableNotFound
 from pydantic import BaseModel
-from planar.config import PlanarConfig
-from planar.files.storage.config import LocalDirectoryConfig, S3Config
+from planar.data.connection import _get_connection
 from planar.logging import get_logger
-from planar.session import get_config
 from .exceptions import DataError, DatasetAlreadyExistsError, DatasetNotFoundError
@@ -32,6 +29,8 @@ class PlanarDataset(BaseModel):
     # TODO: Add snapshot version: no version = latest, otherwise time travel on read operations
     # TODO: Add partition support? A Dataset representation could be a table with a partition column
+    is_planar_dataset: bool = True
     model_config = {"arbitrary_types_allowed": True}
     # TODO: Add serialization metadata to make clear this is a dataset reference
     # like EntityField.
@@ -68,7 +67,8 @@ class PlanarDataset(BaseModel):
     async def exists(self) -> bool:
         """Check if the dataset exists in Ducklake."""
-        con = await self._get_connection()
+        con = await _get_connection()
         try:
             # TODO: Query for the table name directly
             tables = await asyncio.to_thread(con.list_tables)
@@ -79,16 +79,16 @@ class PlanarDataset(BaseModel):
     async def write(
         self,
-        data: pl.DataFrame | ibis.Table | list | dict,
+        data: pl.DataFrame | pl.LazyFrame | ibis.Table | list | dict,
         mode: Literal["overwrite", "append"] = "append",
     ) -> None:
         """Write data to the dataset.
         Args:
-            data: Data to write (Polars DataFrame, PyArrow Table, or Ibis expression)
+            data: Data to write (Polars DataFrame/LazyFrame, PyArrow Table, or Ibis expression)
             mode: Write mode - "append" or "overwrite"
         """
-        con = await self._get_connection()
+        con = await _get_connection()
         overwrite = mode == "overwrite"
         try:
@@ -99,7 +99,7 @@ class PlanarDataset(BaseModel):
             else:
                 # TODO: Explore if workflow context can be used to set metadata
                 # on the snapshot version for lineage
-                if isinstance(data, pl.DataFrame):
+                if isinstance(data, (pl.DataFrame, pl.LazyFrame)):
                     await asyncio.to_thread(
                         con.insert,
                         self.name,
@@ -133,7 +133,7 @@ class PlanarDataset(BaseModel):
         Returns:
             Ibis table expression that can be further filtered using Ibis methods
         """
-        con = await self._get_connection()
+        con = await _get_connection()
         try:
             table = await asyncio.to_thread(con.table, self.name)
@@ -162,102 +162,9 @@ class PlanarDataset(BaseModel):
     async def delete(self) -> None:
         """Delete the dataset."""
-        con = await self._get_connection()
+        con = await _get_connection()
         try:
             await asyncio.to_thread(con.drop_table, self.name, force=True)
             logger.info("deleted dataset", dataset_name=self.name)
         except Exception as e:
             raise DataError(f"Failed to delete dataset: {e}") from e
-    async def _get_connection(self) -> DuckDBBackend:
-        """Get Ibis connection to Ducklake."""
-        config = get_config()
-        if not config.data:
-            raise DataError(
-                "Data configuration not found. Please configure 'data' in your planar.yaml"
-            )
-        # TODO: Add cached connection pooling or memoize the connection
-        return await self._create_connection(config)
-    async def _create_connection(self, config: PlanarConfig) -> DuckDBBackend:
-        """Create Ibis DuckDB connection with Ducklake."""
-        data_config = config.data
-        if not data_config:
-            raise DataError("Data configuration not found")
-        # Connect to DuckDB with Ducklake extension
-        con = await asyncio.to_thread(ibis.duckdb.connect, extensions=["ducklake"])
-        # Build Ducklake connection string based on catalog type
-        catalog_config = data_config.catalog
-        if catalog_config.type == "duckdb":
-            metadata_path = catalog_config.path
-        elif catalog_config.type == "postgres":
-            # Use connection components to build postgres connection string
-            pg = catalog_config
-            metadata_path = f"postgres:dbname={pg.db}"
-            if pg.host:
-                metadata_path += f" host={pg.host}"
-            if pg.port:
-                metadata_path += f" port={pg.port}"
-            if pg.user:
-                metadata_path += f" user={pg.user}"
-            if pg.password:
-                metadata_path += f" password={pg.password}"
-        elif catalog_config.type == "sqlite":
-            metadata_path = f"sqlite:{catalog_config.path}"
-        else:
-            raise ValueError(f"Unsupported catalog type: {catalog_config.type}")
-        try:
-            await asyncio.to_thread(con.raw_sql, "INSTALL ducklake")
-            match catalog_config.type:
-                case "sqlite":
-                    await asyncio.to_thread(con.raw_sql, "INSTALL sqlite;")
-                case "postgres":
-                    await asyncio.to_thread(con.raw_sql, "INSTALL postgres;")
-            logger.debug(
-                "installed Ducklake extensions", catalog_type=catalog_config.type
-            )
-        except Exception as e:
-            raise DataError(f"Failed to install Ducklake extensions: {e}") from e
-        # Build ATTACH statement
-        attach_sql = f"ATTACH 'ducklake:{metadata_path}' AS planar_ducklake"
-        # Add data path from storage config
-        storage = data_config.storage
-        if isinstance(storage, LocalDirectoryConfig):
-            data_path = storage.directory
-        elif isinstance(storage, S3Config):
-            data_path = f"s3://{storage.bucket_name}/"
-        else:
-            # Generic fallback
-            data_path = getattr(storage, "path", None) or getattr(
-                storage, "directory", "."
-            )
-        ducklake_catalog = data_config.catalog_name
-        attach_sql += f" (DATA_PATH '{data_path}'"
-        if catalog_config.type != "sqlite":
-            attach_sql += f", METADATA_SCHEMA '{ducklake_catalog}'"
-        attach_sql += ");"
-        # Attach to Ducklake
-        try:
-            await asyncio.to_thread(con.raw_sql, attach_sql)
-        except Exception as e:
-            raise DataError(f"Failed to attach to Ducklake: {e}") from e
-        await asyncio.to_thread(con.raw_sql, "USE planar_ducklake;")
-        logger.debug(
-            "connection created",
-            catalog=ducklake_catalog,
-            catalog_type=catalog_config.type,
-            attach_sql=attach_sql,
-        )
-        return con

planar/data/test_dataset.py CHANGED Viewed

@@ -5,44 +5,21 @@ import pyarrow as pa
 import pytest
 from ibis import literal
-from planar import PlanarApp
 from planar.data import PlanarDataset
-from planar.data.config import DataConfig, SQLiteCatalogConfig
 from planar.data.exceptions import (
     DataError,
     DatasetAlreadyExistsError,
     DatasetNotFoundError,
 )
-from planar.files.storage.config import LocalDirectoryConfig
 from planar.workflows import step
-@pytest.fixture
-def data_config(tmp_path):
-    """Create a test data configuration."""
-    data_dir = tmp_path / "data"
-    data_dir.mkdir(exist_ok=True)
-    catalog_path = data_dir / "test.sqlite"
-    storage_path = data_dir / "ducklake_files"
-    storage_path.mkdir(exist_ok=True)
-    return DataConfig(
-        catalog=SQLiteCatalogConfig(type="sqlite", path=str(catalog_path)),
-        storage=LocalDirectoryConfig(backend="localdir", directory=str(storage_path)),
-    )
 @pytest.fixture(name="app")
-def app_fixture(data_config):
-    """Create a PlanarApp with data configuration."""
-    app = PlanarApp()
-    # Add data config to the app's config
-    app.config.data = data_config
-    return app
+def app_fixture(app_with_data):
+    """Use the shared app_with_data fixture as 'app' for this test module."""
+    return app_with_data
-@pytest.mark.asyncio
 async def test_dataset_create(client):
     """Test creating a dataset reference."""
     dataset = await PlanarDataset.create("test_table")
@@ -62,7 +39,6 @@ async def test_dataset_create(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_create_if_not_exists(client):
     """Test creating a dataset with if_not_exists behavior."""
     # Create dataset and write data to make it exist
@@ -82,7 +58,6 @@ async def test_dataset_create_if_not_exists(client):
     await dataset1.delete()
-@pytest.mark.asyncio
 async def test_dataset_write_and_read_polars(client):
     """Test writing and reading data with Polars."""
     dataset = await PlanarDataset.create("test_polars")
@@ -112,7 +87,6 @@ async def test_dataset_write_and_read_polars(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_write_and_read_pyarrow(client):
     """Test writing and reading data with PyArrow."""
     dataset = await PlanarDataset.create("test_pyarrow")
@@ -140,7 +114,47 @@ async def test_dataset_write_and_read_pyarrow(client):
     await dataset.delete()
-@pytest.mark.asyncio
+async def test_dataset_write_and_read_lazyframe(client):
+    """Test writing and reading data with Polars LazyFrame."""
+    dataset = await PlanarDataset.create("test_lazyframe")
+    # Create test data as LazyFrame with computed columns
+    lf = pl.LazyFrame(
+        {
+            "id": range(5),
+            "name": ["Alice", "Bob", "Charlie", "David", "Eve"],
+            "value": [10.5, 20.3, 30.1, 40.7, 50.9],
+        }
+    ).with_columns(
+        # Use native polars expressions for efficiency
+        pl.format("user_{}", pl.col("id")).alias("username"),
+        pl.col("value").round(1).alias("rounded_value"),
+    )
+    # Write LazyFrame data
+    await dataset.write(lf, mode="overwrite")
+    # Read data back
+    result = await dataset.to_polars()
+    # Verify shape and columns
+    assert result.shape == (5, 5)
+    assert set(result.columns) == {"id", "name", "value", "username", "rounded_value"}
+    # Verify the computed columns work correctly
+    assert result["username"].to_list() == [
+        "user_0",
+        "user_1",
+        "user_2",
+        "user_3",
+        "user_4",
+    ]
+    assert result["rounded_value"].to_list() == [10.5, 20.3, 30.1, 40.7, 50.9]
+    # Cleanup
+    await dataset.delete()
 async def test_dataset_append_mode(client):
     """Test appending data to a dataset."""
     dataset = await PlanarDataset.create("test_append")
@@ -164,7 +178,6 @@ async def test_dataset_append_mode(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_overwrite_replaces_existing(client):
     """Overwrite should replace existing rows completely."""
     dataset = await PlanarDataset.create("test_overwrite")
@@ -184,7 +197,6 @@ async def test_dataset_overwrite_replaces_existing(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_read_with_filter(client):
     """Test reading data with Ibis filtering."""
     dataset = await PlanarDataset.create("test_filter")
@@ -204,7 +216,6 @@ async def test_dataset_read_with_filter(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_read_with_columns_and_limit(client):
     """Test reading specific columns with limit."""
     dataset = await PlanarDataset.create("test_select")
@@ -232,7 +243,6 @@ async def test_dataset_read_with_columns_and_limit(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_not_found(client):
     """Test reading from non-existent dataset."""
     dataset = PlanarDataset(name="nonexistent")
@@ -245,7 +255,6 @@ async def test_dataset_not_found(client):
         await dataset.read()
-@pytest.mark.asyncio
 async def test_dataset_delete(client):
     """Test deleting a dataset."""
     dataset = await PlanarDataset.create("test_delete")
@@ -264,7 +273,6 @@ async def test_dataset_delete(client):
     assert not await dataset.exists()
-@pytest.mark.asyncio
 async def test_dataset_write_list_of_dicts(client):
     """Write list-of-dicts input and read back with Polars."""
     dataset = await PlanarDataset.create("test_list_of_dicts")
@@ -279,7 +287,6 @@ async def test_dataset_write_list_of_dicts(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_write_dict_of_lists(client):
     """Write dict-of-lists input and read back with Polars."""
     dataset = await PlanarDataset.create("test_dict_of_lists")
@@ -294,7 +301,6 @@ async def test_dataset_write_dict_of_lists(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_workflow_serialization(client):
     """Test that PlanarDataset can be used as workflow input/output."""
@@ -327,7 +333,6 @@ async def test_dataset_workflow_serialization(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_no_data_config_error(client):
     """Test error when data config is not set."""
     # Remove data config
@@ -336,10 +341,9 @@ async def test_no_data_config_error(client):
     dataset = PlanarDataset(name="test")
     with pytest.raises(DataError, match="Data configuration not found"):
-        await dataset._get_connection()
+        await dataset.exists()
-@pytest.mark.asyncio
 async def test_write_with_invalid_input_raises(client):
     """Unknown input types to write() should raise a DataError."""

planar 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl

planar 0.9.3py3-none-any.whl → 0.10.0py3-none-any.whl