planar 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,429 @@
1
+ import math
2
+
3
+ import polars as pl
4
+ import pyarrow as pa
5
+ import pytest
6
+
7
+ from planar.data.dataset import PlanarDataset
8
+ from planar.testing.planar_test_client import PlanarTestClient
9
+
10
+
11
+ @pytest.fixture(name="app")
12
+ def app_fixture(app_with_data):
13
+ """Use the shared app_with_data fixture as 'app' for this test module."""
14
+ return app_with_data
15
+
16
+
17
+ async def test_stream_arrow_chunks(
18
+ client: PlanarTestClient,
19
+ ):
20
+ dataset_name = "test_streaming"
21
+ dataset_size = 10_000
22
+ batch_size = 1000
23
+
24
+ dataset = await PlanarDataset.create(dataset_name)
25
+
26
+ df = pl.DataFrame({"id": range(dataset_size)}).with_columns(
27
+ pl.format("value_{}", pl.col("id")).alias("value")
28
+ )
29
+
30
+ await dataset.write(df)
31
+
32
+ response = await client.get(
33
+ f"/planar/v1/datasets/content/{dataset_name}/arrow-stream",
34
+ params={"batch_size": batch_size, "limit": dataset_size},
35
+ )
36
+
37
+ assert response.status_code == 200
38
+ assert response.headers["content-type"] == "application/vnd.apache.arrow.stream"
39
+ assert "test_streaming.arrow" in response.headers.get("content-disposition", "")
40
+ assert response.headers.get("x-batch-size") == str(batch_size)
41
+
42
+ content = await response.aread()
43
+ buffer = pa.py_buffer(content)
44
+ reader = pa.ipc.open_stream(buffer)
45
+
46
+ batch_info = []
47
+ total_rows_received = 0
48
+ all_ids = []
49
+
50
+ try:
51
+ while True:
52
+ arrow_batch = reader.read_next_batch()
53
+ batch_info.append(
54
+ {
55
+ "rows": arrow_batch.num_rows,
56
+ "columns": arrow_batch.num_columns,
57
+ }
58
+ )
59
+ total_rows_received += arrow_batch.num_rows
60
+
61
+ id_column = arrow_batch.column("id")
62
+ batch_ids = id_column.to_pylist()
63
+ all_ids.extend(batch_ids)
64
+ except StopIteration:
65
+ pass
66
+
67
+ expected_batches = math.ceil(dataset_size / batch_size)
68
+
69
+ assert len(batch_info) == expected_batches
70
+ assert total_rows_received == dataset_size
71
+
72
+ # Verify data integrity - check that we received all expected IDs
73
+ assert len(all_ids) == dataset_size
74
+ assert set(all_ids) == set(range(dataset_size))
75
+ assert sum(all_ids) == sum(range(dataset_size))
76
+
77
+
78
+ async def test_stream_arrow_with_limit(
79
+ client: PlanarTestClient,
80
+ ):
81
+ """Test that the limit parameter properly restricts the number of rows streamed."""
82
+ dataset_name = "test_streaming_limit"
83
+ dataset_size = 1000
84
+ batch_size = 100
85
+ row_limit = 250 # Should get 3 batches (100 + 100 + 50)
86
+
87
+ dataset = await PlanarDataset.create(dataset_name)
88
+
89
+ # Create test data
90
+ df = pl.DataFrame({"id": range(dataset_size)}).with_columns(
91
+ pl.format("value_{}", pl.col("id")).alias("value")
92
+ )
93
+
94
+ await dataset.write(df)
95
+
96
+ response = await client.get(
97
+ f"/planar/v1/datasets/content/{dataset_name}/arrow-stream",
98
+ params={"batch_size": batch_size, "limit": row_limit},
99
+ )
100
+
101
+ assert response.status_code == 200
102
+ assert response.headers["x-row-limit"] == str(row_limit)
103
+
104
+ content = await response.aread()
105
+ buffer = pa.py_buffer(content)
106
+ reader = pa.ipc.open_stream(buffer)
107
+
108
+ total_rows_received = 0
109
+ batch_count = 0
110
+
111
+ try:
112
+ while True:
113
+ arrow_batch = reader.read_next_batch()
114
+ total_rows_received += arrow_batch.num_rows
115
+ batch_count += 1
116
+ except StopIteration:
117
+ pass
118
+
119
+ # Should receive exactly the limited number of rows
120
+ assert total_rows_received == row_limit
121
+ # Should receive expected number of batches (3: 100, 100, 50)
122
+ expected_batches = math.ceil(row_limit / batch_size)
123
+ assert batch_count == expected_batches
124
+
125
+
126
+ async def test_stream_arrow_empty_dataset(
127
+ client: PlanarTestClient,
128
+ ):
129
+ """Test streaming behavior with an empty dataset."""
130
+ dataset_name = "test_empty_stream"
131
+ batch_size = 100
132
+
133
+ dataset = await PlanarDataset.create(dataset_name)
134
+
135
+ # Create empty dataset
136
+ df = pl.DataFrame(
137
+ {"id": [], "value": []}, schema={"id": pl.Int64, "value": pl.Utf8}
138
+ )
139
+ await dataset.write(df)
140
+
141
+ response = await client.get(
142
+ f"/planar/v1/datasets/content/{dataset_name}/arrow-stream",
143
+ params={"batch_size": batch_size},
144
+ )
145
+
146
+ assert response.status_code == 200
147
+
148
+ content = await response.aread()
149
+ buffer = pa.py_buffer(content)
150
+ reader = pa.ipc.open_stream(buffer)
151
+
152
+ # Should be able to read the schema and get one empty batch
153
+ total_rows = 0
154
+ batch_count = 0
155
+
156
+ try:
157
+ while True:
158
+ arrow_batch = reader.read_next_batch()
159
+ total_rows += arrow_batch.num_rows
160
+ batch_count += 1
161
+ except StopIteration:
162
+ pass
163
+
164
+ # Should have exactly 1 empty batch (our fallback for empty datasets)
165
+ assert batch_count == 1
166
+ assert total_rows == 0
167
+
168
+
169
+ async def test_stream_arrow_single_batch(
170
+ client: PlanarTestClient,
171
+ ):
172
+ """Test streaming when dataset size is smaller than batch size."""
173
+ dataset_name = "test_single_batch"
174
+ dataset_size = 50
175
+ batch_size = 100
176
+
177
+ dataset = await PlanarDataset.create(dataset_name)
178
+
179
+ df = pl.DataFrame({"id": range(dataset_size)}).with_columns(
180
+ pl.format("value_{}", pl.col("id")).alias("value")
181
+ )
182
+
183
+ await dataset.write(df)
184
+
185
+ response = await client.get(
186
+ f"/planar/v1/datasets/content/{dataset_name}/arrow-stream",
187
+ params={"batch_size": batch_size},
188
+ )
189
+
190
+ assert response.status_code == 200
191
+
192
+ content = await response.aread()
193
+ buffer = pa.py_buffer(content)
194
+ reader = pa.ipc.open_stream(buffer)
195
+
196
+ total_rows = 0
197
+ batch_count = 0
198
+
199
+ try:
200
+ while True:
201
+ arrow_batch = reader.read_next_batch()
202
+ total_rows += arrow_batch.num_rows
203
+ batch_count += 1
204
+ except StopIteration:
205
+ pass
206
+
207
+ assert batch_count == 1
208
+ assert total_rows == dataset_size
209
+
210
+
211
+ async def test_get_schemas_endpoint(
212
+ client: PlanarTestClient,
213
+ ):
214
+ """Test the GET /schemas endpoint."""
215
+ response = await client.get("/planar/v1/datasets/schemas")
216
+
217
+ assert response.status_code == 200
218
+ schemas = response.json()
219
+ assert isinstance(schemas, list)
220
+ assert "main" in schemas # Default schema should exist
221
+
222
+
223
+ async def test_list_datasets_metadata_endpoint(
224
+ client: PlanarTestClient,
225
+ ):
226
+ """Test the GET /metadata endpoint (list all datasets)."""
227
+ # Create a test dataset first
228
+ dataset_name = "test_list_datasets"
229
+ dataset = await PlanarDataset.create(dataset_name)
230
+
231
+ df = pl.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]})
232
+ await dataset.write(df)
233
+
234
+ response = await client.get("/planar/v1/datasets/metadata")
235
+
236
+ assert response.status_code == 200
237
+ datasets = response.json()
238
+ assert isinstance(datasets, list)
239
+
240
+ # Find our test dataset
241
+ test_dataset = next((d for d in datasets if d["name"] == dataset_name), None)
242
+ assert test_dataset is not None
243
+ assert test_dataset["row_count"] == 3
244
+ assert "id" in test_dataset["table_schema"]
245
+ assert "name" in test_dataset["table_schema"]
246
+
247
+
248
+ async def test_list_datasets_metadata_with_pagination(
249
+ client: PlanarTestClient,
250
+ ):
251
+ """Test the GET /metadata endpoint with pagination parameters."""
252
+ response = await client.get(
253
+ "/planar/v1/datasets/metadata",
254
+ params={"limit": 5, "offset": 0, "schema_name": "main"},
255
+ )
256
+
257
+ assert response.status_code == 200
258
+ datasets = response.json()
259
+ assert isinstance(datasets, list)
260
+ assert len(datasets) <= 5 # Should respect limit
261
+
262
+
263
+ async def test_get_dataset_metadata_endpoint(
264
+ client: PlanarTestClient,
265
+ ):
266
+ """Test the GET /metadata/{dataset_name} endpoint."""
267
+ dataset_name = "test_single_metadata"
268
+ dataset = await PlanarDataset.create(dataset_name)
269
+
270
+ df = pl.DataFrame(
271
+ {
272
+ "id": [1, 2, 3, 4, 5],
273
+ "value": ["apple", "banana", "cherry", "date", "elderberry"],
274
+ }
275
+ )
276
+ await dataset.write(df)
277
+
278
+ response = await client.get(f"/planar/v1/datasets/metadata/{dataset_name}")
279
+
280
+ assert response.status_code == 200
281
+ metadata = response.json()
282
+ assert metadata["name"] == dataset_name
283
+ assert metadata["row_count"] == 5
284
+ assert "id" in metadata["table_schema"]
285
+ assert "value" in metadata["table_schema"]
286
+
287
+
288
+ async def test_get_dataset_metadata_not_found(
289
+ client: PlanarTestClient,
290
+ ):
291
+ """Test the GET /metadata/{dataset_name} endpoint with non-existent dataset."""
292
+ response = await client.get("/planar/v1/datasets/metadata/nonexistent_dataset")
293
+
294
+ assert response.status_code == 404
295
+ error = response.json()
296
+ assert "not found" in error["detail"].lower()
297
+
298
+
299
+ async def test_download_dataset_endpoint(
300
+ client: PlanarTestClient,
301
+ ):
302
+ """Test the GET /content/{dataset_name}/download endpoint."""
303
+ dataset_name = "test_download"
304
+ dataset = await PlanarDataset.create(dataset_name)
305
+
306
+ df = pl.DataFrame({"id": [1, 2, 3], "value": ["x", "y", "z"]})
307
+ await dataset.write(df)
308
+
309
+ response = await client.get(f"/planar/v1/datasets/content/{dataset_name}/download")
310
+
311
+ assert response.status_code == 200
312
+ assert response.headers["content-type"] == "application/x-parquet"
313
+ assert f"{dataset_name}.parquet" in response.headers.get("content-disposition", "")
314
+
315
+ # Verify we get valid parquet content
316
+ content = await response.aread()
317
+ assert len(content) > 0
318
+
319
+ # Verify it's valid parquet by reading it back
320
+ import pyarrow.parquet as pq
321
+
322
+ parquet_buffer = pa.py_buffer(content)
323
+ table = pq.read_table(parquet_buffer)
324
+ assert table.num_rows == 3
325
+ assert table.num_columns == 2
326
+
327
+
328
+ async def test_download_dataset_not_found(
329
+ client: PlanarTestClient,
330
+ ):
331
+ """Test the GET /content/{dataset_name}/download endpoint with non-existent dataset."""
332
+ response = await client.get(
333
+ "/planar/v1/datasets/content/nonexistent_dataset/download"
334
+ )
335
+
336
+ assert response.status_code == 404
337
+ error = response.json()
338
+ assert "not found" in error["detail"].lower()
339
+
340
+
341
+ async def test_stream_arrow_dataset_not_found(
342
+ client: PlanarTestClient,
343
+ ):
344
+ """Test the GET /content/{dataset_name}/arrow-stream endpoint with non-existent dataset."""
345
+ response = await client.get(
346
+ "/planar/v1/datasets/content/nonexistent_dataset/arrow-stream"
347
+ )
348
+
349
+ assert response.status_code == 404
350
+ error = response.json()
351
+ assert "not found" in error["detail"].lower()
352
+
353
+
354
+ async def test_get_dataset_metadata_empty_dataset(
355
+ client: PlanarTestClient,
356
+ ):
357
+ """Test GET /metadata/{dataset_name} with empty dataset."""
358
+ dataset_name = "test_empty_metadata"
359
+ dataset = await PlanarDataset.create(dataset_name)
360
+
361
+ # Create empty dataset
362
+ df = pl.DataFrame(
363
+ {"id": [], "value": []}, schema={"id": pl.Int64, "value": pl.Utf8}
364
+ )
365
+ await dataset.write(df)
366
+
367
+ response = await client.get(f"/planar/v1/datasets/metadata/{dataset_name}")
368
+ assert response.status_code == 200
369
+
370
+ metadata = response.json()
371
+ assert metadata["name"] == dataset_name
372
+ assert metadata["row_count"] == 0
373
+ assert "id" in metadata["table_schema"]
374
+ assert "value" in metadata["table_schema"]
375
+
376
+
377
+ async def test_list_datasets_metadata_empty_dataset(
378
+ client: PlanarTestClient,
379
+ ):
380
+ """Test GET /metadata with empty dataset in the list."""
381
+ dataset_name = "test_empty_in_list"
382
+ dataset = await PlanarDataset.create(dataset_name)
383
+
384
+ # Create empty dataset
385
+ df = pl.DataFrame(
386
+ {"id": [], "value": []}, schema={"id": pl.Int64, "value": pl.Utf8}
387
+ )
388
+ await dataset.write(df)
389
+
390
+ response = await client.get("/planar/v1/datasets/metadata")
391
+ assert response.status_code == 200
392
+
393
+ datasets = response.json()
394
+ empty_dataset = next((d for d in datasets if d["name"] == dataset_name), None)
395
+ assert empty_dataset is not None
396
+ assert empty_dataset["row_count"] == 0
397
+
398
+
399
+ async def test_download_empty_dataset(
400
+ client: PlanarTestClient,
401
+ ):
402
+ """Test GET /content/{dataset_name}/download with empty dataset."""
403
+ dataset_name = "test_empty_download"
404
+ dataset = await PlanarDataset.create(dataset_name)
405
+
406
+ # Create empty dataset
407
+ df = pl.DataFrame(
408
+ {"id": [], "value": []}, schema={"id": pl.Int64, "value": pl.Utf8}
409
+ )
410
+ await dataset.write(df)
411
+
412
+ response = await client.get(f"/planar/v1/datasets/content/{dataset_name}/download")
413
+ assert response.status_code == 200
414
+ assert response.headers["content-type"] == "application/x-parquet"
415
+ assert f"{dataset_name}.parquet" in response.headers.get("content-disposition", "")
416
+
417
+ # Verify we get valid parquet content (even if empty)
418
+ content = await response.aread()
419
+ assert len(content) > 0 # Should have parquet metadata even for empty data
420
+
421
+ # Verify it's valid parquet by reading it back
422
+ import pyarrow.parquet as pq
423
+
424
+ parquet_buffer = pa.py_buffer(content)
425
+ table = pq.read_table(parquet_buffer)
426
+ assert table.num_rows == 0
427
+ assert table.num_columns == 2 # id and value columns
428
+ assert table.schema.field("id").type == pa.int64()
429
+ assert table.schema.field("value").type == pa.string()
@@ -102,7 +102,7 @@ class FileProcessingResult(BaseModel):
102
102
  file_id: UUID = Field(description="ID of the processed file")
103
103
 
104
104
 
105
- @workflow(name="test_file_processing_workflow")
105
+ @workflow(name="test_file_processing_workflow", is_interactive=False)
106
106
  async def file_processing_workflow(file: PlanarFile):
107
107
  """
108
108
  Workflow that processes a text file and returns basic information about it.
@@ -284,6 +284,9 @@ async def test_list_workflows(client: PlanarTestClient):
284
284
  # Verify that the file workflow input schema includes file parameter
285
285
  assert "file" in file_workflow["input_schema"]["properties"]
286
286
 
287
+ # Verify that we propagated the `is_interactive` flag
288
+ assert file_workflow["is_interactive"] is False
289
+
287
290
  # Verify run statistics are present
288
291
  assert "total_runs" in expense_workflow
289
292
  assert "run_statuses" in expense_workflow
@@ -537,3 +540,25 @@ async def test_get_compute_step(
537
540
  data = resp.json()
538
541
  assert "meta" in data
539
542
  assert data["meta"] is None
543
+
544
+
545
+ async def test_list_interactive_workflow(app: PlanarApp, client: PlanarTestClient):
546
+ """
547
+ We propagate interactive workflows all the way to the `/workflows` endpoint.
548
+ """
549
+
550
+ # This is here rather than at the top bc it's not registered as part of the `app` fixture.
551
+ @workflow(name="interactive_workflow", is_interactive=True)
552
+ async def interactive_workflow():
553
+ pass
554
+
555
+ app.register_workflow(interactive_workflow)
556
+
557
+ response = await client.get("/planar/v1/workflows/")
558
+ assert response.status_code == 200
559
+
560
+ data = response.json()
561
+ expense_workflow = next(
562
+ item for item in data["items"] if item["name"] == "interactive_workflow"
563
+ )
564
+ assert expense_workflow["is_interactive"] is True
@@ -217,6 +217,7 @@ def create_workflow_router(
217
217
  }
218
218
  ),
219
219
  durations=duration_stats,
220
+ is_interactive=workflow.is_interactive,
220
221
  )
221
222
  )
222
223
 
@@ -257,6 +258,7 @@ def create_workflow_router(
257
258
  **{status.value: count for status, count in run_statuses.items()}
258
259
  ),
259
260
  durations=duration_stats,
261
+ is_interactive=wf.is_interactive,
260
262
  )
261
263
 
262
264
  @router.get("/{workflow_name}/runs", response_model=WorkflowRunList)
@@ -87,12 +87,23 @@ class RuleAction(str, Enum):
87
87
  RULE_SIMULATE = "Rule::Simulate"
88
88
 
89
89
 
90
+ class DatasetAction(str, Enum):
91
+ """Actions that can be performed on datasets."""
92
+
93
+ DATASET_LIST_SCHEMAS = "Dataset::ListSchemas"
94
+ DATASET_LIST = "Dataset::List"
95
+ DATASET_VIEW_DETAILS = "Dataset::ViewDetails"
96
+ DATASET_STREAM_CONTENT = "Dataset::StreamContent"
97
+ DATASET_DOWNLOAD = "Dataset::Download"
98
+
99
+
90
100
  class ResourceType(str, Enum):
91
101
  PRINCIPAL = "Principal"
92
102
  WORKFLOW = "Workflow"
93
103
  ENTITY = "Entity"
94
104
  AGENT = "Agent"
95
105
  Rule = "Rule"
106
+ DATASET = "Dataset"
96
107
 
97
108
 
98
109
  class EntityIdentifier(TypedDict):
@@ -129,7 +140,12 @@ class RuleResource:
129
140
  rule_name: str | None = None
130
141
 
131
142
 
132
- ResourceDescriptor = AgentResource | WorkflowResource | RuleResource
143
+ @dataclass(frozen=True, slots=True)
144
+ class DatasetResource:
145
+ dataset_name: str | None = None
146
+
147
+
148
+ ResourceDescriptor = AgentResource | WorkflowResource | RuleResource | DatasetResource
133
149
 
134
150
 
135
151
  class CedarEntity(BaseModel):
@@ -209,6 +225,15 @@ class CedarEntity(BaseModel):
209
225
  resource_attributes={"rule_name": rule_name},
210
226
  )
211
227
 
228
+ @staticmethod
229
+ def from_dataset(dataset_name: str | None) -> "CedarEntity":
230
+ """Create a CedarEntity instance from dataset data"""
231
+ return CedarEntity(
232
+ resource_type=ResourceType.DATASET,
233
+ resource_key="dataset_name",
234
+ resource_attributes={"dataset_name": dataset_name},
235
+ )
236
+
212
237
 
213
238
  class PolicyService:
214
239
  """Service for managing and evaluating Authorization policies."""
@@ -272,7 +297,7 @@ class PolicyService:
272
297
  def is_allowed(
273
298
  self,
274
299
  principal: CedarEntity,
275
- action: str | WorkflowAction | AgentAction | RuleAction,
300
+ action: str | WorkflowAction | AgentAction | RuleAction | DatasetAction,
276
301
  resource: CedarEntity,
277
302
  ) -> bool:
278
303
  """Check if the principal is permitted to perform the action on the resource.
@@ -294,6 +319,7 @@ class PolicyService:
294
319
  isinstance(action, WorkflowAction)
295
320
  or isinstance(action, AgentAction)
296
321
  or isinstance(action, RuleAction)
322
+ or isinstance(action, DatasetAction)
297
323
  ):
298
324
  action = f'Action::"{action.value}"'
299
325
  else:
@@ -346,7 +372,7 @@ class PolicyService:
346
372
 
347
373
  def validate_authorization_for(
348
374
  resource_descriptor: ResourceDescriptor,
349
- action: WorkflowAction | AgentAction | RuleAction,
375
+ action: WorkflowAction | AgentAction | RuleAction | DatasetAction,
350
376
  ):
351
377
  authz_service = get_policy_service()
352
378
 
@@ -363,6 +389,8 @@ def validate_authorization_for(
363
389
  entity = CedarEntity.from_agent(resource_descriptor.id)
364
390
  case RuleAction() if isinstance(resource_descriptor, RuleResource):
365
391
  entity = CedarEntity.from_rule(resource_descriptor.rule_name)
392
+ case DatasetAction() if isinstance(resource_descriptor, DatasetResource):
393
+ entity = CedarEntity.from_dataset(resource_descriptor.dataset_name)
366
394
  case _:
367
395
  raise ValueError(
368
396
  f"Invalid resource descriptor {type(resource_descriptor).__name__} for action {action}"
@@ -74,4 +74,29 @@ permit (
74
74
  principal,
75
75
  action == Action::"Rule::Simulate",
76
76
  resource
77
+ );
78
+ permit (
79
+ principal,
80
+ action == Action::"Dataset::ListSchemas",
81
+ resource
82
+ );
83
+ permit (
84
+ principal,
85
+ action == Action::"Dataset::List",
86
+ resource
87
+ );
88
+ permit (
89
+ principal,
90
+ action == Action::"Dataset::ViewDetails",
91
+ resource
92
+ );
93
+ permit (
94
+ principal,
95
+ action == Action::"Dataset::StreamContent",
96
+ resource
97
+ );
98
+ permit (
99
+ principal,
100
+ action == Action::"Dataset::Download",
101
+ resource
77
102
  );
@@ -16,6 +16,8 @@ Usage in external projects:
16
16
 
17
17
  Available fixtures:
18
18
  - storage: In-memory file storage for tests
19
+ - data_config: Test data configuration with SQLite catalog and local storage
20
+ - app_with_data: PlanarApp instance with data configuration
19
21
  - tmp_db_url: Parametrized database URL (SQLite/PostgreSQL)
20
22
  - session: Database session
21
23
  - client: Planar test client
@@ -33,8 +35,11 @@ from pathlib import Path
33
35
 
34
36
  import pytest
35
37
 
38
+ from planar.app import PlanarApp
36
39
  from planar.config import load_config
40
+ from planar.data.config import DataConfig, SQLiteCatalogConfig
37
41
  from planar.db import DatabaseManager, new_session
42
+ from planar.files.storage.config import LocalDirectoryConfig
38
43
  from planar.files.storage.context import set_storage
39
44
  from planar.logging import set_context_metadata
40
45
  from planar.object_registry import ObjectRegistry
@@ -114,6 +119,31 @@ async def storage():
114
119
  yield storage
115
120
 
116
121
 
122
+ @pytest.fixture()
123
+ def data_config(tmp_path):
124
+ """Create a test data configuration."""
125
+ data_dir = tmp_path / "data"
126
+ data_dir.mkdir(exist_ok=True)
127
+
128
+ catalog_path = data_dir / "test.sqlite"
129
+ storage_path = data_dir / "ducklake_files"
130
+ storage_path.mkdir(exist_ok=True)
131
+
132
+ return DataConfig(
133
+ catalog=SQLiteCatalogConfig(type="sqlite", path=str(catalog_path)),
134
+ storage=LocalDirectoryConfig(backend="localdir", directory=str(storage_path)),
135
+ )
136
+
137
+
138
+ @pytest.fixture(name="app_with_data")
139
+ def app_with_data_fixture(data_config):
140
+ """Create a PlanarApp with data configuration."""
141
+ app = PlanarApp()
142
+ # Add data config to the app's config
143
+ app.config.data = data_config
144
+ return app
145
+
146
+
117
147
  @pytest.fixture()
118
148
  def tmp_sqlite_url(tmp_db_path: str):
119
149
  return f"sqlite+aiosqlite:///{tmp_db_path}"
@@ -53,5 +53,5 @@ async def wait_all_event_loop_tasks():
53
53
  break
54
54
  try:
55
55
  await asyncio.gather(*other_tasks)
56
- except asyncio.CancelledError:
56
+ except (asyncio.CancelledError, Exception):
57
57
  pass
@@ -78,7 +78,7 @@ def step(
78
78
  return decorator
79
79
 
80
80
 
81
- def workflow(*, name: str | None = None):
81
+ def workflow(*, name: str | None = None, is_interactive: bool = False):
82
82
  """
83
83
  Decorator to define a workflow.
84
84
 
@@ -177,6 +177,7 @@ def workflow(*, name: str | None = None):
177
177
  start_step=start_workflow_step,
178
178
  wait_for_completion=wait_for_completion,
179
179
  wrapped_fn=run_workflow,
180
+ is_interactive=is_interactive,
180
181
  )
181
182
 
182
183
  return wf_wrapper
@@ -33,6 +33,7 @@ class WorkflowWrapper(Wrapper[P, T, U, R]):
33
33
  start: Callable[P, Coroutine[T, U, Workflow]]
34
34
  start_step: Callable[P, Coroutine[T, U, UUID]]
35
35
  wait_for_completion: Callable[[UUID], Coroutine[T, U, R]]
36
+ is_interactive: bool
36
37
 
37
38
 
38
39
  @dataclass(kw_only=True)