PyPI - planar - Versions diffs - 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

planar 0.9.2py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

planar/ai/agent.py +3 -1
planar/ai/agent_base.py +24 -5
planar/ai/state.py +17 -0
planar/ai/test_agent_tool_step_display.py +78 -0
planar/app.py +5 -0
planar/cli.py +9 -1
planar/data/connection.py +108 -0
planar/data/dataset.py +11 -104
planar/data/test_dataset.py +45 -41
planar/data/utils.py +89 -0
planar/db/alembic/env.py +25 -1
planar/db/alembic/versions/8855a78a408f_message_step_type.py +30 -0
planar/db/alembic.ini +5 -5
planar/files/__init__.py +3 -0
planar/files/storage/azure_blob.py +1 -1
planar/registry_items.py +2 -0
planar/routers/dataset_router.py +213 -0
planar/routers/models.py +1 -0
planar/routers/test_dataset_router.py +429 -0
planar/routers/test_workflow_router.py +26 -1
planar/routers/workflow.py +2 -0
planar/scaffold_templates/main.py.j2 +6 -1
planar/security/authorization.py +31 -3
planar/security/default_policies.cedar +25 -0
planar/testing/fixtures.py +32 -2
planar/testing/planar_test_client.py +1 -1
planar/utils.py +17 -0
planar/workflows/contrib.py +8 -0
planar/workflows/decorators.py +2 -1
planar/workflows/models.py +1 -0
planar/workflows/test_workflow.py +39 -1
planar/workflows/wrappers.py +1 -0
{planar-0.9.2.dist-info → planar-0.10.0.dist-info}/METADATA +16 -17
{planar-0.9.2.dist-info → planar-0.10.0.dist-info}/RECORD +36 -29
{planar-0.9.2.dist-info → planar-0.10.0.dist-info}/WHEEL +1 -1
{planar-0.9.2.dist-info → planar-0.10.0.dist-info}/entry_points.txt +0 -0

planar/data/test_dataset.py CHANGED Viewed

@@ -5,44 +5,21 @@ import pyarrow as pa
 import pytest
 from ibis import literal
-from planar import PlanarApp
 from planar.data import PlanarDataset
-from planar.data.config import DataConfig, SQLiteCatalogConfig
 from planar.data.exceptions import (
     DataError,
     DatasetAlreadyExistsError,
     DatasetNotFoundError,
 )
-from planar.files.storage.config import LocalDirectoryConfig
 from planar.workflows import step
-@pytest.fixture
-def data_config(tmp_path):
-    """Create a test data configuration."""
-    data_dir = tmp_path / "data"
-    data_dir.mkdir(exist_ok=True)
-    catalog_path = data_dir / "test.sqlite"
-    storage_path = data_dir / "ducklake_files"
-    storage_path.mkdir(exist_ok=True)
-    return DataConfig(
-        catalog=SQLiteCatalogConfig(type="sqlite", path=str(catalog_path)),
-        storage=LocalDirectoryConfig(backend="localdir", directory=str(storage_path)),
-    )
 @pytest.fixture(name="app")
-def app_fixture(data_config):
-    """Create a PlanarApp with data configuration."""
-    app = PlanarApp()
-    # Add data config to the app's config
-    app.config.data = data_config
-    return app
+def app_fixture(app_with_data):
+    """Use the shared app_with_data fixture as 'app' for this test module."""
+    return app_with_data
-@pytest.mark.asyncio
 async def test_dataset_create(client):
     """Test creating a dataset reference."""
     dataset = await PlanarDataset.create("test_table")
@@ -62,7 +39,6 @@ async def test_dataset_create(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_create_if_not_exists(client):
     """Test creating a dataset with if_not_exists behavior."""
     # Create dataset and write data to make it exist
@@ -82,7 +58,6 @@ async def test_dataset_create_if_not_exists(client):
     await dataset1.delete()
-@pytest.mark.asyncio
 async def test_dataset_write_and_read_polars(client):
     """Test writing and reading data with Polars."""
     dataset = await PlanarDataset.create("test_polars")
@@ -112,7 +87,6 @@ async def test_dataset_write_and_read_polars(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_write_and_read_pyarrow(client):
     """Test writing and reading data with PyArrow."""
     dataset = await PlanarDataset.create("test_pyarrow")
@@ -140,7 +114,47 @@ async def test_dataset_write_and_read_pyarrow(client):
     await dataset.delete()
-@pytest.mark.asyncio
+async def test_dataset_write_and_read_lazyframe(client):
+    """Test writing and reading data with Polars LazyFrame."""
+    dataset = await PlanarDataset.create("test_lazyframe")
+    # Create test data as LazyFrame with computed columns
+    lf = pl.LazyFrame(
+        {
+            "id": range(5),
+            "name": ["Alice", "Bob", "Charlie", "David", "Eve"],
+            "value": [10.5, 20.3, 30.1, 40.7, 50.9],
+        }
+    ).with_columns(
+        # Use native polars expressions for efficiency
+        pl.format("user_{}", pl.col("id")).alias("username"),
+        pl.col("value").round(1).alias("rounded_value"),
+    )
+    # Write LazyFrame data
+    await dataset.write(lf, mode="overwrite")
+    # Read data back
+    result = await dataset.to_polars()
+    # Verify shape and columns
+    assert result.shape == (5, 5)
+    assert set(result.columns) == {"id", "name", "value", "username", "rounded_value"}
+    # Verify the computed columns work correctly
+    assert result["username"].to_list() == [
+        "user_0",
+        "user_1",
+        "user_2",
+        "user_3",
+        "user_4",
+    ]
+    assert result["rounded_value"].to_list() == [10.5, 20.3, 30.1, 40.7, 50.9]
+    # Cleanup
+    await dataset.delete()
 async def test_dataset_append_mode(client):
     """Test appending data to a dataset."""
     dataset = await PlanarDataset.create("test_append")
@@ -164,7 +178,6 @@ async def test_dataset_append_mode(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_overwrite_replaces_existing(client):
     """Overwrite should replace existing rows completely."""
     dataset = await PlanarDataset.create("test_overwrite")
@@ -184,7 +197,6 @@ async def test_dataset_overwrite_replaces_existing(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_read_with_filter(client):
     """Test reading data with Ibis filtering."""
     dataset = await PlanarDataset.create("test_filter")
@@ -204,7 +216,6 @@ async def test_dataset_read_with_filter(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_read_with_columns_and_limit(client):
     """Test reading specific columns with limit."""
     dataset = await PlanarDataset.create("test_select")
@@ -232,7 +243,6 @@ async def test_dataset_read_with_columns_and_limit(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_not_found(client):
     """Test reading from non-existent dataset."""
     dataset = PlanarDataset(name="nonexistent")
@@ -245,7 +255,6 @@ async def test_dataset_not_found(client):
         await dataset.read()
-@pytest.mark.asyncio
 async def test_dataset_delete(client):
     """Test deleting a dataset."""
     dataset = await PlanarDataset.create("test_delete")
@@ -264,7 +273,6 @@ async def test_dataset_delete(client):
     assert not await dataset.exists()
-@pytest.mark.asyncio
 async def test_dataset_write_list_of_dicts(client):
     """Write list-of-dicts input and read back with Polars."""
     dataset = await PlanarDataset.create("test_list_of_dicts")
@@ -279,7 +287,6 @@ async def test_dataset_write_list_of_dicts(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_write_dict_of_lists(client):
     """Write dict-of-lists input and read back with Polars."""
     dataset = await PlanarDataset.create("test_dict_of_lists")
@@ -294,7 +301,6 @@ async def test_dataset_write_dict_of_lists(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_dataset_workflow_serialization(client):
     """Test that PlanarDataset can be used as workflow input/output."""
@@ -327,7 +333,6 @@ async def test_dataset_workflow_serialization(client):
     await dataset.delete()
-@pytest.mark.asyncio
 async def test_no_data_config_error(client):
     """Test error when data config is not set."""
     # Remove data config
@@ -336,10 +341,9 @@ async def test_no_data_config_error(client):
     dataset = PlanarDataset(name="test")
     with pytest.raises(DataError, match="Data configuration not found"):
-        await dataset._get_connection()
+        await dataset.exists()
-@pytest.mark.asyncio
 async def test_write_with_invalid_input_raises(client):
     """Unknown input types to write() should raise a DataError."""

planar/data/utils.py ADDED Viewed

@@ -0,0 +1,89 @@
+import asyncio
+from typing import TypedDict
+import ibis.expr.datatypes as dt
+from ibis.common.exceptions import TableNotFound
+from planar.data.connection import _get_connection
+from planar.data.dataset import PlanarDataset
+from planar.data.exceptions import DatasetNotFoundError
+from planar.logging import get_logger
+logger = get_logger(__name__)
+# TODO: consider connection pooling or memoize the connection
+async def list_datasets(limit: int = 100, offset: int = 0) -> list[PlanarDataset]:
+    conn = await _get_connection()
+    tables = await asyncio.to_thread(conn.list_tables)
+    return [PlanarDataset(name=table) for table in tables]
+async def list_schemas() -> list[str]:
+    METADATA_SCHEMAS = [
+        "information_schema",
+        # FIXME: why is list_databases returning pg_catalog
+        # if the ducklake catalog is sqlite?
+        "pg_catalog",
+    ]
+    conn = await _get_connection()
+    # in ibis, "databases" are schemas in the traditional sense
+    # e.g. psql: schema == ibis: database
+    # https://ibis-project.org/concepts/backend-table-hierarchy
+    schemas = await asyncio.to_thread(conn.list_databases)
+    return [schema for schema in schemas if schema not in METADATA_SCHEMAS]
+async def get_dataset(dataset_name: str, schema_name: str = "main") -> PlanarDataset:
+    # TODO: add schema_name as a parameter
+    dataset = PlanarDataset(name=dataset_name)
+    if not await dataset.exists():
+        raise DatasetNotFoundError(f"Dataset {dataset_name} not found")
+    return dataset
+async def get_dataset_row_count(dataset_name: str) -> int:
+    conn = await _get_connection()
+    try:
+        value = await asyncio.to_thread(
+            lambda conn, dataset_name: conn.table(dataset_name).count().to_polars(),
+            conn,
+            dataset_name,
+        )
+        assert isinstance(value, int), "Scalar must be an integer"
+        return value
+    except TableNotFound:
+        raise  # re-raise the exception and allow the caller to handle it
+class DatasetMetadata(TypedDict):
+    schema: dict[str, dt.DataType]
+    row_count: int
+async def get_dataset_metadata(
+    dataset_name: str, schema_name: str
+) -> DatasetMetadata | None:
+    conn = await _get_connection()
+    try:
+        schema, row_count = await asyncio.gather(
+            asyncio.to_thread(conn.get_schema, dataset_name, database=schema_name),
+            get_dataset_row_count(dataset_name),
+        )
+        return DatasetMetadata(schema=schema.fields, row_count=row_count)
+    except TableNotFound:
+        return None

planar/db/alembic/env.py CHANGED Viewed

@@ -1,5 +1,7 @@
+from functools import wraps
 from logging.config import fileConfig
+import alembic.ddl.base as alembic_base
 from alembic import context
 from sqlalchemy import Connection, engine_from_config, pool
@@ -48,6 +50,28 @@ def include_name(name, type_, _):
         return True
+sqlite_schema_translate_map = {PLANAR_SCHEMA: None}
+def schema_translate_wrapper(f):
+    @wraps(f)
+    def format_table_name_with_schema(compiler, name, schema):
+        # when on sqlite, we need to translate the schema to None
+        is_sqlite = compiler.dialect.name == "sqlite"
+        if is_sqlite:
+            translated_schema = sqlite_schema_translate_map.get(schema, schema)
+        else:
+            translated_schema = schema
+        return f(compiler, name, translated_schema)
+    return format_table_name_with_schema
+alembic_base.format_table_name = schema_translate_wrapper(
+    alembic_base.format_table_name
+)
 def run_migrations_online() -> None:
     """Run migrations in 'online' mode.
@@ -102,7 +126,7 @@ def run_migrations_online() -> None:
         config_dict = config.get_section(config.config_ini_section, {})
         url = config_dict["sqlalchemy.url"]
         is_sqlite = url.startswith("sqlite://")
-        translate_map = {PLANAR_SCHEMA: None} if is_sqlite else {}
+        translate_map = sqlite_schema_translate_map if is_sqlite else {}
         connectable = engine_from_config(
             config_dict,
             prefix="sqlalchemy.",

planar/db/alembic/versions/8855a78a408f_message_step_type.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""Add MESSAGE to step_type enum
+Revision ID: 8855a78a408f
+Revises: 3476068c153c
+Create Date: 2025-09-16 16:19:25.917861
+"""
+from typing import Sequence, Union
+from alembic import op
+# revision identifiers, used by Alembic.
+revision: str = "8855a78a408f"
+down_revision: Union[str, None] = "3476068c153c"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    if op.get_context().dialect.name != "sqlite":
+        op.execute("ALTER TYPE steptype ADD VALUE 'MESSAGE'")
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    # Rolling this back would require updating any MESSAGE `WorkflowStep` rows to a different
+    # step type or deleting them before running a migration to drop the MESSAGE value.
+    pass
+    # ### end Alembic commands ###

planar/db/alembic.ini CHANGED Viewed

@@ -62,12 +62,12 @@ version_path_separator = os
 # output_encoding = utf-8
 # Development database for generating system migrations
-# It's safer for us to use a local postgres database for generating and testing migrations rather than sqlite,
+# It's safer for us to use a local postgres database for generating and testing migrations rather than sqlite,
 # to be sure they'll work in production deployments.
-# Using postgres as the dev database for autogenerating revisions also is better because
-# we don't have the weird schema issues of Sqlite. Alembic doesn't fully support `schema_translate_map`
-# feature in SA that we use to remap `planar`->None in SQLite (due to it not supporting schemas),
-# so it sometimes incorrectly thinks it needs to re-generate things (like indices) that already
+# Using postgres as the dev database for autogenerating revisions also is better because
+# we don't have the weird schema issues of Sqlite. Alembic doesn't fully support `schema_translate_map`
+# feature in SA that we use to remap `planar`->None in SQLite (due to it not supporting schemas),
+# so it sometimes incorrectly thinks it needs to re-generate things (like indices) that already
 # exist in the database from a prior migration. Using postgres obviates that issue.
 # https://github.com/sqlalchemy/alembic/issues/555
 sqlalchemy.url = postgresql+psycopg2://postgres:postgres@localhost:5432/postgres

planar/files/__init__.py CHANGED Viewed

@@ -1,2 +1,5 @@
 from .models import PlanarFile, PlanarFileMetadata  # noqa: F401
 from .storage.context import get_storage  # noqa: F401
+# re-export PlanarFile
+__all__ = ["PlanarFile", "PlanarFileMetadata", "get_storage"]

planar/files/storage/azure_blob.py CHANGED Viewed

@@ -278,7 +278,7 @@ class AzureBlobStorage(Storage):
         elif self.auth_method.name == "AZURE_AD":
             # Generate a User Delegation SAS signed with a user delegation key
-            start_time = datetime.utcnow()
+            start_time = datetime.now(UTC)
             user_delegation_key = await self.client.get_user_delegation_key(
                 key_start_time=start_time, key_expiry_time=expiry_time
             )

planar/registry_items.py CHANGED Viewed

@@ -47,6 +47,7 @@ class RegisteredWorkflow:
     input_schema: dict[str, Any]
     output_schema: dict[str, Any]
     pydantic_model: Type[BaseModel]
+    is_interactive: bool
     @staticmethod
     def from_workflow(workflow: "WorkflowWrapper") -> "RegisteredWorkflow":
@@ -63,4 +64,5 @@ class RegisteredWorkflow:
                 workflow.original_fn
             ),
             pydantic_model=create_pydantic_model_for_workflow(workflow),
+            is_interactive=workflow.is_interactive,
         )

planar/routers/dataset_router.py ADDED Viewed

@@ -0,0 +1,213 @@
+import io
+from typing import AsyncGenerator
+import pyarrow as pa
+import pyarrow.parquet as pq
+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import StreamingResponse
+from ibis.common.exceptions import TableNotFound
+from pydantic import BaseModel
+from planar.data.exceptions import DatasetNotFoundError
+from planar.data.utils import (
+    get_dataset,
+    get_dataset_metadata,
+    list_datasets,
+    list_schemas,
+)
+from planar.logging import get_logger
+from planar.security.authorization import (
+    DatasetAction,
+    DatasetResource,
+    validate_authorization_for,
+)
+logger = get_logger(__name__)
+class DatasetMetadata(BaseModel):
+    name: str
+    table_schema: dict
+    row_count: int
+def create_dataset_router() -> APIRouter:
+    router = APIRouter(tags=["Planar Datasets"])
+    @router.get("/schemas", response_model=list[str])
+    async def get_schemas():
+        validate_authorization_for(
+            DatasetResource(), DatasetAction.DATASET_LIST_SCHEMAS
+        )
+        schemas = await list_schemas()
+        return schemas
+    @router.get("/metadata", response_model=list[DatasetMetadata])
+    async def list_planar_datasets(
+        limit: int = Query(100, ge=1, le=1000),
+        offset: int = Query(0, ge=0),
+        schema_name: str = Query("main"),
+    ):
+        validate_authorization_for(DatasetResource(), DatasetAction.DATASET_LIST)
+        datasets = await list_datasets(limit, offset)
+        response = []
+        for dataset in datasets:
+            metadata = await get_dataset_metadata(dataset.name, schema_name)
+            if not metadata:
+                continue
+            schema = metadata["schema"]
+            row_count = metadata["row_count"]
+            response.append(
+                DatasetMetadata(
+                    name=dataset.name,
+                    row_count=row_count,
+                    table_schema={
+                        field_name: str(field_type)
+                        for field_name, field_type in schema.items()
+                    },
+                )
+            )
+        return response
+    @router.get("/metadata/{dataset_name}", response_model=DatasetMetadata)
+    async def get_planar_dataset(dataset_name: str, schema_name: str = "main"):
+        validate_authorization_for(
+            DatasetResource(dataset_name=dataset_name),
+            DatasetAction.DATASET_VIEW_DETAILS,
+        )
+        try:
+            metadata = await get_dataset_metadata(dataset_name, schema_name)
+            if not metadata:
+                raise HTTPException(
+                    status_code=404, detail=f"Dataset {dataset_name} not found"
+                )
+            schema = metadata["schema"]
+            row_count = metadata["row_count"]
+            return DatasetMetadata(
+                name=dataset_name,
+                row_count=row_count,
+                table_schema={
+                    field_name: str(field_type)
+                    for field_name, field_type in schema.items()
+                },
+            )
+        except (DatasetNotFoundError, TableNotFound):
+            raise HTTPException(
+                status_code=404, detail=f"Dataset {dataset_name} not found"
+            )
+    @router.get(
+        "/content/{dataset_name}/arrow-stream", response_class=StreamingResponse
+    )
+    async def stream_dataset_content(
+        dataset_name: str,
+        batch_size: int = Query(100, ge=1, le=1000),
+        limit: int | None = Query(None, ge=1),
+    ):
+        validate_authorization_for(
+            DatasetResource(dataset_name=dataset_name),
+            DatasetAction.DATASET_STREAM_CONTENT,
+        )
+        try:
+            dataset = await get_dataset(dataset_name)
+            # Apply limit parameter if specified
+            table = await dataset.read(limit=limit)
+            schema = table.schema().to_pyarrow()
+            async def stream_content() -> AsyncGenerator[bytes, None]:
+                sink = io.BytesIO()
+                try:
+                    with pa.ipc.new_stream(sink, schema) as writer:
+                        yield sink.getvalue()  # yield the schema
+                        batch_count = 0
+                        for batch in table.to_pyarrow_batches(chunk_size=batch_size):
+                            # reset the sink to only stream
+                            # the current batch
+                            # we don't want to stream the schema or previous
+                            # batches again
+                            sink.seek(0)
+                            sink.truncate(0)
+                            writer.write_batch(batch)
+                            yield sink.getvalue()
+                            batch_count += 1
+                        # For empty datasets, ensure we have a complete stream
+                        if batch_count == 0:
+                            # Write an empty batch to ensure valid Arrow stream format
+                            empty_batch = pa.RecordBatch.from_arrays(
+                                [pa.array([], type=field.type) for field in schema],
+                                schema=schema,
+                            )
+                            sink.seek(0)
+                            sink.truncate(0)
+                            writer.write_batch(empty_batch)
+                            yield sink.getvalue()
+                finally:
+                    # Explicit BytesIO cleanup for memory safety
+                    sink.close()
+            return StreamingResponse(
+                stream_content(),
+                media_type="application/vnd.apache.arrow.stream",
+                headers={
+                    "Content-Disposition": f"attachment; filename={dataset_name}.arrow",
+                    "X-Batch-Size": str(batch_size),
+                    "X-Row-Limit": str(limit) if limit else "unlimited",
+                },
+            )
+        except (DatasetNotFoundError, TableNotFound):
+            raise HTTPException(
+                status_code=404, detail=f"Dataset {dataset_name} not found"
+            )
+    @router.get("/content/{dataset_name}/download")
+    async def download_dataset(dataset_name: str, schema_name: str = "main"):
+        validate_authorization_for(
+            DatasetResource(dataset_name=dataset_name),
+            DatasetAction.DATASET_DOWNLOAD,
+        )
+        try:
+            arrow_buffer = pa.BufferOutputStream()
+            dataset = await get_dataset(dataset_name, schema_name)
+            pyarrow_table = await dataset.to_pyarrow()
+            pq.write_table(pyarrow_table, arrow_buffer)
+            if arrow_buffer.tell() == 0:
+                logger.warning(
+                    "Dataset is empty",
+                    dataset_name=dataset_name,
+                    schema_name=schema_name,
+                )
+            buffer = arrow_buffer.getvalue()
+            parquet_bytes = buffer.to_pybytes()
+            bytes_io = io.BytesIO(parquet_bytes)
+            return StreamingResponse(
+                bytes_io,
+                media_type="application/x-parquet",
+                headers={
+                    "Content-Disposition": f"attachment; filename={dataset_name}.parquet"
+                },
+            )
+        except (DatasetNotFoundError, TableNotFound):
+            raise HTTPException(
+                status_code=404, detail=f"Dataset {dataset_name} not found"
+            )
+    return router

planar/routers/models.py CHANGED Viewed

@@ -79,6 +79,7 @@ class WorkflowDefinition(BaseModel):
     total_runs: int
     run_statuses: WorkflowRunStatusCounts
     durations: DurationStats | None = None
+    is_interactive: bool
 class StepStats(BaseModel):

planar 0.9.2__py3-none-any.whl → 0.10.0__py3-none-any.whl

planar 0.9.2py3-none-any.whl → 0.10.0py3-none-any.whl