planar 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- planar/ai/agent.py +2 -1
- planar/ai/agent_base.py +24 -5
- planar/ai/state.py +17 -0
- planar/ai/test_agent_tool_step_display.py +1 -1
- planar/app.py +5 -0
- planar/data/connection.py +108 -0
- planar/data/dataset.py +11 -104
- planar/data/test_dataset.py +45 -41
- planar/data/utils.py +89 -0
- planar/db/alembic/env.py +25 -1
- planar/files/storage/azure_blob.py +1 -1
- planar/registry_items.py +2 -0
- planar/routers/dataset_router.py +213 -0
- planar/routers/models.py +1 -0
- planar/routers/test_dataset_router.py +429 -0
- planar/routers/test_workflow_router.py +26 -1
- planar/routers/workflow.py +2 -0
- planar/security/authorization.py +31 -3
- planar/security/default_policies.cedar +25 -0
- planar/testing/fixtures.py +30 -0
- planar/testing/planar_test_client.py +1 -1
- planar/workflows/decorators.py +2 -1
- planar/workflows/wrappers.py +1 -0
- {planar-0.9.3.dist-info → planar-0.10.0.dist-info}/METADATA +1 -1
- {planar-0.9.3.dist-info → planar-0.10.0.dist-info}/RECORD +27 -22
- {planar-0.9.3.dist-info → planar-0.10.0.dist-info}/WHEEL +1 -1
- {planar-0.9.3.dist-info → planar-0.10.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,429 @@
|
|
1
|
+
import math
|
2
|
+
|
3
|
+
import polars as pl
|
4
|
+
import pyarrow as pa
|
5
|
+
import pytest
|
6
|
+
|
7
|
+
from planar.data.dataset import PlanarDataset
|
8
|
+
from planar.testing.planar_test_client import PlanarTestClient
|
9
|
+
|
10
|
+
|
11
|
+
@pytest.fixture(name="app")
|
12
|
+
def app_fixture(app_with_data):
|
13
|
+
"""Use the shared app_with_data fixture as 'app' for this test module."""
|
14
|
+
return app_with_data
|
15
|
+
|
16
|
+
|
17
|
+
async def test_stream_arrow_chunks(
|
18
|
+
client: PlanarTestClient,
|
19
|
+
):
|
20
|
+
dataset_name = "test_streaming"
|
21
|
+
dataset_size = 10_000
|
22
|
+
batch_size = 1000
|
23
|
+
|
24
|
+
dataset = await PlanarDataset.create(dataset_name)
|
25
|
+
|
26
|
+
df = pl.DataFrame({"id": range(dataset_size)}).with_columns(
|
27
|
+
pl.format("value_{}", pl.col("id")).alias("value")
|
28
|
+
)
|
29
|
+
|
30
|
+
await dataset.write(df)
|
31
|
+
|
32
|
+
response = await client.get(
|
33
|
+
f"/planar/v1/datasets/content/{dataset_name}/arrow-stream",
|
34
|
+
params={"batch_size": batch_size, "limit": dataset_size},
|
35
|
+
)
|
36
|
+
|
37
|
+
assert response.status_code == 200
|
38
|
+
assert response.headers["content-type"] == "application/vnd.apache.arrow.stream"
|
39
|
+
assert "test_streaming.arrow" in response.headers.get("content-disposition", "")
|
40
|
+
assert response.headers.get("x-batch-size") == str(batch_size)
|
41
|
+
|
42
|
+
content = await response.aread()
|
43
|
+
buffer = pa.py_buffer(content)
|
44
|
+
reader = pa.ipc.open_stream(buffer)
|
45
|
+
|
46
|
+
batch_info = []
|
47
|
+
total_rows_received = 0
|
48
|
+
all_ids = []
|
49
|
+
|
50
|
+
try:
|
51
|
+
while True:
|
52
|
+
arrow_batch = reader.read_next_batch()
|
53
|
+
batch_info.append(
|
54
|
+
{
|
55
|
+
"rows": arrow_batch.num_rows,
|
56
|
+
"columns": arrow_batch.num_columns,
|
57
|
+
}
|
58
|
+
)
|
59
|
+
total_rows_received += arrow_batch.num_rows
|
60
|
+
|
61
|
+
id_column = arrow_batch.column("id")
|
62
|
+
batch_ids = id_column.to_pylist()
|
63
|
+
all_ids.extend(batch_ids)
|
64
|
+
except StopIteration:
|
65
|
+
pass
|
66
|
+
|
67
|
+
expected_batches = math.ceil(dataset_size / batch_size)
|
68
|
+
|
69
|
+
assert len(batch_info) == expected_batches
|
70
|
+
assert total_rows_received == dataset_size
|
71
|
+
|
72
|
+
# Verify data integrity - check that we received all expected IDs
|
73
|
+
assert len(all_ids) == dataset_size
|
74
|
+
assert set(all_ids) == set(range(dataset_size))
|
75
|
+
assert sum(all_ids) == sum(range(dataset_size))
|
76
|
+
|
77
|
+
|
78
|
+
async def test_stream_arrow_with_limit(
|
79
|
+
client: PlanarTestClient,
|
80
|
+
):
|
81
|
+
"""Test that the limit parameter properly restricts the number of rows streamed."""
|
82
|
+
dataset_name = "test_streaming_limit"
|
83
|
+
dataset_size = 1000
|
84
|
+
batch_size = 100
|
85
|
+
row_limit = 250 # Should get 3 batches (100 + 100 + 50)
|
86
|
+
|
87
|
+
dataset = await PlanarDataset.create(dataset_name)
|
88
|
+
|
89
|
+
# Create test data
|
90
|
+
df = pl.DataFrame({"id": range(dataset_size)}).with_columns(
|
91
|
+
pl.format("value_{}", pl.col("id")).alias("value")
|
92
|
+
)
|
93
|
+
|
94
|
+
await dataset.write(df)
|
95
|
+
|
96
|
+
response = await client.get(
|
97
|
+
f"/planar/v1/datasets/content/{dataset_name}/arrow-stream",
|
98
|
+
params={"batch_size": batch_size, "limit": row_limit},
|
99
|
+
)
|
100
|
+
|
101
|
+
assert response.status_code == 200
|
102
|
+
assert response.headers["x-row-limit"] == str(row_limit)
|
103
|
+
|
104
|
+
content = await response.aread()
|
105
|
+
buffer = pa.py_buffer(content)
|
106
|
+
reader = pa.ipc.open_stream(buffer)
|
107
|
+
|
108
|
+
total_rows_received = 0
|
109
|
+
batch_count = 0
|
110
|
+
|
111
|
+
try:
|
112
|
+
while True:
|
113
|
+
arrow_batch = reader.read_next_batch()
|
114
|
+
total_rows_received += arrow_batch.num_rows
|
115
|
+
batch_count += 1
|
116
|
+
except StopIteration:
|
117
|
+
pass
|
118
|
+
|
119
|
+
# Should receive exactly the limited number of rows
|
120
|
+
assert total_rows_received == row_limit
|
121
|
+
# Should receive expected number of batches (3: 100, 100, 50)
|
122
|
+
expected_batches = math.ceil(row_limit / batch_size)
|
123
|
+
assert batch_count == expected_batches
|
124
|
+
|
125
|
+
|
126
|
+
async def test_stream_arrow_empty_dataset(
|
127
|
+
client: PlanarTestClient,
|
128
|
+
):
|
129
|
+
"""Test streaming behavior with an empty dataset."""
|
130
|
+
dataset_name = "test_empty_stream"
|
131
|
+
batch_size = 100
|
132
|
+
|
133
|
+
dataset = await PlanarDataset.create(dataset_name)
|
134
|
+
|
135
|
+
# Create empty dataset
|
136
|
+
df = pl.DataFrame(
|
137
|
+
{"id": [], "value": []}, schema={"id": pl.Int64, "value": pl.Utf8}
|
138
|
+
)
|
139
|
+
await dataset.write(df)
|
140
|
+
|
141
|
+
response = await client.get(
|
142
|
+
f"/planar/v1/datasets/content/{dataset_name}/arrow-stream",
|
143
|
+
params={"batch_size": batch_size},
|
144
|
+
)
|
145
|
+
|
146
|
+
assert response.status_code == 200
|
147
|
+
|
148
|
+
content = await response.aread()
|
149
|
+
buffer = pa.py_buffer(content)
|
150
|
+
reader = pa.ipc.open_stream(buffer)
|
151
|
+
|
152
|
+
# Should be able to read the schema and get one empty batch
|
153
|
+
total_rows = 0
|
154
|
+
batch_count = 0
|
155
|
+
|
156
|
+
try:
|
157
|
+
while True:
|
158
|
+
arrow_batch = reader.read_next_batch()
|
159
|
+
total_rows += arrow_batch.num_rows
|
160
|
+
batch_count += 1
|
161
|
+
except StopIteration:
|
162
|
+
pass
|
163
|
+
|
164
|
+
# Should have exactly 1 empty batch (our fallback for empty datasets)
|
165
|
+
assert batch_count == 1
|
166
|
+
assert total_rows == 0
|
167
|
+
|
168
|
+
|
169
|
+
async def test_stream_arrow_single_batch(
|
170
|
+
client: PlanarTestClient,
|
171
|
+
):
|
172
|
+
"""Test streaming when dataset size is smaller than batch size."""
|
173
|
+
dataset_name = "test_single_batch"
|
174
|
+
dataset_size = 50
|
175
|
+
batch_size = 100
|
176
|
+
|
177
|
+
dataset = await PlanarDataset.create(dataset_name)
|
178
|
+
|
179
|
+
df = pl.DataFrame({"id": range(dataset_size)}).with_columns(
|
180
|
+
pl.format("value_{}", pl.col("id")).alias("value")
|
181
|
+
)
|
182
|
+
|
183
|
+
await dataset.write(df)
|
184
|
+
|
185
|
+
response = await client.get(
|
186
|
+
f"/planar/v1/datasets/content/{dataset_name}/arrow-stream",
|
187
|
+
params={"batch_size": batch_size},
|
188
|
+
)
|
189
|
+
|
190
|
+
assert response.status_code == 200
|
191
|
+
|
192
|
+
content = await response.aread()
|
193
|
+
buffer = pa.py_buffer(content)
|
194
|
+
reader = pa.ipc.open_stream(buffer)
|
195
|
+
|
196
|
+
total_rows = 0
|
197
|
+
batch_count = 0
|
198
|
+
|
199
|
+
try:
|
200
|
+
while True:
|
201
|
+
arrow_batch = reader.read_next_batch()
|
202
|
+
total_rows += arrow_batch.num_rows
|
203
|
+
batch_count += 1
|
204
|
+
except StopIteration:
|
205
|
+
pass
|
206
|
+
|
207
|
+
assert batch_count == 1
|
208
|
+
assert total_rows == dataset_size
|
209
|
+
|
210
|
+
|
211
|
+
async def test_get_schemas_endpoint(
|
212
|
+
client: PlanarTestClient,
|
213
|
+
):
|
214
|
+
"""Test the GET /schemas endpoint."""
|
215
|
+
response = await client.get("/planar/v1/datasets/schemas")
|
216
|
+
|
217
|
+
assert response.status_code == 200
|
218
|
+
schemas = response.json()
|
219
|
+
assert isinstance(schemas, list)
|
220
|
+
assert "main" in schemas # Default schema should exist
|
221
|
+
|
222
|
+
|
223
|
+
async def test_list_datasets_metadata_endpoint(
|
224
|
+
client: PlanarTestClient,
|
225
|
+
):
|
226
|
+
"""Test the GET /metadata endpoint (list all datasets)."""
|
227
|
+
# Create a test dataset first
|
228
|
+
dataset_name = "test_list_datasets"
|
229
|
+
dataset = await PlanarDataset.create(dataset_name)
|
230
|
+
|
231
|
+
df = pl.DataFrame({"id": [1, 2, 3], "name": ["a", "b", "c"]})
|
232
|
+
await dataset.write(df)
|
233
|
+
|
234
|
+
response = await client.get("/planar/v1/datasets/metadata")
|
235
|
+
|
236
|
+
assert response.status_code == 200
|
237
|
+
datasets = response.json()
|
238
|
+
assert isinstance(datasets, list)
|
239
|
+
|
240
|
+
# Find our test dataset
|
241
|
+
test_dataset = next((d for d in datasets if d["name"] == dataset_name), None)
|
242
|
+
assert test_dataset is not None
|
243
|
+
assert test_dataset["row_count"] == 3
|
244
|
+
assert "id" in test_dataset["table_schema"]
|
245
|
+
assert "name" in test_dataset["table_schema"]
|
246
|
+
|
247
|
+
|
248
|
+
async def test_list_datasets_metadata_with_pagination(
|
249
|
+
client: PlanarTestClient,
|
250
|
+
):
|
251
|
+
"""Test the GET /metadata endpoint with pagination parameters."""
|
252
|
+
response = await client.get(
|
253
|
+
"/planar/v1/datasets/metadata",
|
254
|
+
params={"limit": 5, "offset": 0, "schema_name": "main"},
|
255
|
+
)
|
256
|
+
|
257
|
+
assert response.status_code == 200
|
258
|
+
datasets = response.json()
|
259
|
+
assert isinstance(datasets, list)
|
260
|
+
assert len(datasets) <= 5 # Should respect limit
|
261
|
+
|
262
|
+
|
263
|
+
async def test_get_dataset_metadata_endpoint(
|
264
|
+
client: PlanarTestClient,
|
265
|
+
):
|
266
|
+
"""Test the GET /metadata/{dataset_name} endpoint."""
|
267
|
+
dataset_name = "test_single_metadata"
|
268
|
+
dataset = await PlanarDataset.create(dataset_name)
|
269
|
+
|
270
|
+
df = pl.DataFrame(
|
271
|
+
{
|
272
|
+
"id": [1, 2, 3, 4, 5],
|
273
|
+
"value": ["apple", "banana", "cherry", "date", "elderberry"],
|
274
|
+
}
|
275
|
+
)
|
276
|
+
await dataset.write(df)
|
277
|
+
|
278
|
+
response = await client.get(f"/planar/v1/datasets/metadata/{dataset_name}")
|
279
|
+
|
280
|
+
assert response.status_code == 200
|
281
|
+
metadata = response.json()
|
282
|
+
assert metadata["name"] == dataset_name
|
283
|
+
assert metadata["row_count"] == 5
|
284
|
+
assert "id" in metadata["table_schema"]
|
285
|
+
assert "value" in metadata["table_schema"]
|
286
|
+
|
287
|
+
|
288
|
+
async def test_get_dataset_metadata_not_found(
|
289
|
+
client: PlanarTestClient,
|
290
|
+
):
|
291
|
+
"""Test the GET /metadata/{dataset_name} endpoint with non-existent dataset."""
|
292
|
+
response = await client.get("/planar/v1/datasets/metadata/nonexistent_dataset")
|
293
|
+
|
294
|
+
assert response.status_code == 404
|
295
|
+
error = response.json()
|
296
|
+
assert "not found" in error["detail"].lower()
|
297
|
+
|
298
|
+
|
299
|
+
async def test_download_dataset_endpoint(
|
300
|
+
client: PlanarTestClient,
|
301
|
+
):
|
302
|
+
"""Test the GET /content/{dataset_name}/download endpoint."""
|
303
|
+
dataset_name = "test_download"
|
304
|
+
dataset = await PlanarDataset.create(dataset_name)
|
305
|
+
|
306
|
+
df = pl.DataFrame({"id": [1, 2, 3], "value": ["x", "y", "z"]})
|
307
|
+
await dataset.write(df)
|
308
|
+
|
309
|
+
response = await client.get(f"/planar/v1/datasets/content/{dataset_name}/download")
|
310
|
+
|
311
|
+
assert response.status_code == 200
|
312
|
+
assert response.headers["content-type"] == "application/x-parquet"
|
313
|
+
assert f"{dataset_name}.parquet" in response.headers.get("content-disposition", "")
|
314
|
+
|
315
|
+
# Verify we get valid parquet content
|
316
|
+
content = await response.aread()
|
317
|
+
assert len(content) > 0
|
318
|
+
|
319
|
+
# Verify it's valid parquet by reading it back
|
320
|
+
import pyarrow.parquet as pq
|
321
|
+
|
322
|
+
parquet_buffer = pa.py_buffer(content)
|
323
|
+
table = pq.read_table(parquet_buffer)
|
324
|
+
assert table.num_rows == 3
|
325
|
+
assert table.num_columns == 2
|
326
|
+
|
327
|
+
|
328
|
+
async def test_download_dataset_not_found(
|
329
|
+
client: PlanarTestClient,
|
330
|
+
):
|
331
|
+
"""Test the GET /content/{dataset_name}/download endpoint with non-existent dataset."""
|
332
|
+
response = await client.get(
|
333
|
+
"/planar/v1/datasets/content/nonexistent_dataset/download"
|
334
|
+
)
|
335
|
+
|
336
|
+
assert response.status_code == 404
|
337
|
+
error = response.json()
|
338
|
+
assert "not found" in error["detail"].lower()
|
339
|
+
|
340
|
+
|
341
|
+
async def test_stream_arrow_dataset_not_found(
|
342
|
+
client: PlanarTestClient,
|
343
|
+
):
|
344
|
+
"""Test the GET /content/{dataset_name}/arrow-stream endpoint with non-existent dataset."""
|
345
|
+
response = await client.get(
|
346
|
+
"/planar/v1/datasets/content/nonexistent_dataset/arrow-stream"
|
347
|
+
)
|
348
|
+
|
349
|
+
assert response.status_code == 404
|
350
|
+
error = response.json()
|
351
|
+
assert "not found" in error["detail"].lower()
|
352
|
+
|
353
|
+
|
354
|
+
async def test_get_dataset_metadata_empty_dataset(
|
355
|
+
client: PlanarTestClient,
|
356
|
+
):
|
357
|
+
"""Test GET /metadata/{dataset_name} with empty dataset."""
|
358
|
+
dataset_name = "test_empty_metadata"
|
359
|
+
dataset = await PlanarDataset.create(dataset_name)
|
360
|
+
|
361
|
+
# Create empty dataset
|
362
|
+
df = pl.DataFrame(
|
363
|
+
{"id": [], "value": []}, schema={"id": pl.Int64, "value": pl.Utf8}
|
364
|
+
)
|
365
|
+
await dataset.write(df)
|
366
|
+
|
367
|
+
response = await client.get(f"/planar/v1/datasets/metadata/{dataset_name}")
|
368
|
+
assert response.status_code == 200
|
369
|
+
|
370
|
+
metadata = response.json()
|
371
|
+
assert metadata["name"] == dataset_name
|
372
|
+
assert metadata["row_count"] == 0
|
373
|
+
assert "id" in metadata["table_schema"]
|
374
|
+
assert "value" in metadata["table_schema"]
|
375
|
+
|
376
|
+
|
377
|
+
async def test_list_datasets_metadata_empty_dataset(
|
378
|
+
client: PlanarTestClient,
|
379
|
+
):
|
380
|
+
"""Test GET /metadata with empty dataset in the list."""
|
381
|
+
dataset_name = "test_empty_in_list"
|
382
|
+
dataset = await PlanarDataset.create(dataset_name)
|
383
|
+
|
384
|
+
# Create empty dataset
|
385
|
+
df = pl.DataFrame(
|
386
|
+
{"id": [], "value": []}, schema={"id": pl.Int64, "value": pl.Utf8}
|
387
|
+
)
|
388
|
+
await dataset.write(df)
|
389
|
+
|
390
|
+
response = await client.get("/planar/v1/datasets/metadata")
|
391
|
+
assert response.status_code == 200
|
392
|
+
|
393
|
+
datasets = response.json()
|
394
|
+
empty_dataset = next((d for d in datasets if d["name"] == dataset_name), None)
|
395
|
+
assert empty_dataset is not None
|
396
|
+
assert empty_dataset["row_count"] == 0
|
397
|
+
|
398
|
+
|
399
|
+
async def test_download_empty_dataset(
|
400
|
+
client: PlanarTestClient,
|
401
|
+
):
|
402
|
+
"""Test GET /content/{dataset_name}/download with empty dataset."""
|
403
|
+
dataset_name = "test_empty_download"
|
404
|
+
dataset = await PlanarDataset.create(dataset_name)
|
405
|
+
|
406
|
+
# Create empty dataset
|
407
|
+
df = pl.DataFrame(
|
408
|
+
{"id": [], "value": []}, schema={"id": pl.Int64, "value": pl.Utf8}
|
409
|
+
)
|
410
|
+
await dataset.write(df)
|
411
|
+
|
412
|
+
response = await client.get(f"/planar/v1/datasets/content/{dataset_name}/download")
|
413
|
+
assert response.status_code == 200
|
414
|
+
assert response.headers["content-type"] == "application/x-parquet"
|
415
|
+
assert f"{dataset_name}.parquet" in response.headers.get("content-disposition", "")
|
416
|
+
|
417
|
+
# Verify we get valid parquet content (even if empty)
|
418
|
+
content = await response.aread()
|
419
|
+
assert len(content) > 0 # Should have parquet metadata even for empty data
|
420
|
+
|
421
|
+
# Verify it's valid parquet by reading it back
|
422
|
+
import pyarrow.parquet as pq
|
423
|
+
|
424
|
+
parquet_buffer = pa.py_buffer(content)
|
425
|
+
table = pq.read_table(parquet_buffer)
|
426
|
+
assert table.num_rows == 0
|
427
|
+
assert table.num_columns == 2 # id and value columns
|
428
|
+
assert table.schema.field("id").type == pa.int64()
|
429
|
+
assert table.schema.field("value").type == pa.string()
|
@@ -102,7 +102,7 @@ class FileProcessingResult(BaseModel):
|
|
102
102
|
file_id: UUID = Field(description="ID of the processed file")
|
103
103
|
|
104
104
|
|
105
|
-
@workflow(name="test_file_processing_workflow")
|
105
|
+
@workflow(name="test_file_processing_workflow", is_interactive=False)
|
106
106
|
async def file_processing_workflow(file: PlanarFile):
|
107
107
|
"""
|
108
108
|
Workflow that processes a text file and returns basic information about it.
|
@@ -284,6 +284,9 @@ async def test_list_workflows(client: PlanarTestClient):
|
|
284
284
|
# Verify that the file workflow input schema includes file parameter
|
285
285
|
assert "file" in file_workflow["input_schema"]["properties"]
|
286
286
|
|
287
|
+
# Verify that we propagated the `is_interactive` flag
|
288
|
+
assert file_workflow["is_interactive"] is False
|
289
|
+
|
287
290
|
# Verify run statistics are present
|
288
291
|
assert "total_runs" in expense_workflow
|
289
292
|
assert "run_statuses" in expense_workflow
|
@@ -537,3 +540,25 @@ async def test_get_compute_step(
|
|
537
540
|
data = resp.json()
|
538
541
|
assert "meta" in data
|
539
542
|
assert data["meta"] is None
|
543
|
+
|
544
|
+
|
545
|
+
async def test_list_interactive_workflow(app: PlanarApp, client: PlanarTestClient):
|
546
|
+
"""
|
547
|
+
We propagate interactive workflows all the way to the `/workflows` endpoint.
|
548
|
+
"""
|
549
|
+
|
550
|
+
# This is here rather than at the top bc it's not registered as part of the `app` fixture.
|
551
|
+
@workflow(name="interactive_workflow", is_interactive=True)
|
552
|
+
async def interactive_workflow():
|
553
|
+
pass
|
554
|
+
|
555
|
+
app.register_workflow(interactive_workflow)
|
556
|
+
|
557
|
+
response = await client.get("/planar/v1/workflows/")
|
558
|
+
assert response.status_code == 200
|
559
|
+
|
560
|
+
data = response.json()
|
561
|
+
expense_workflow = next(
|
562
|
+
item for item in data["items"] if item["name"] == "interactive_workflow"
|
563
|
+
)
|
564
|
+
assert expense_workflow["is_interactive"] is True
|
planar/routers/workflow.py
CHANGED
@@ -217,6 +217,7 @@ def create_workflow_router(
|
|
217
217
|
}
|
218
218
|
),
|
219
219
|
durations=duration_stats,
|
220
|
+
is_interactive=workflow.is_interactive,
|
220
221
|
)
|
221
222
|
)
|
222
223
|
|
@@ -257,6 +258,7 @@ def create_workflow_router(
|
|
257
258
|
**{status.value: count for status, count in run_statuses.items()}
|
258
259
|
),
|
259
260
|
durations=duration_stats,
|
261
|
+
is_interactive=wf.is_interactive,
|
260
262
|
)
|
261
263
|
|
262
264
|
@router.get("/{workflow_name}/runs", response_model=WorkflowRunList)
|
planar/security/authorization.py
CHANGED
@@ -87,12 +87,23 @@ class RuleAction(str, Enum):
|
|
87
87
|
RULE_SIMULATE = "Rule::Simulate"
|
88
88
|
|
89
89
|
|
90
|
+
class DatasetAction(str, Enum):
|
91
|
+
"""Actions that can be performed on datasets."""
|
92
|
+
|
93
|
+
DATASET_LIST_SCHEMAS = "Dataset::ListSchemas"
|
94
|
+
DATASET_LIST = "Dataset::List"
|
95
|
+
DATASET_VIEW_DETAILS = "Dataset::ViewDetails"
|
96
|
+
DATASET_STREAM_CONTENT = "Dataset::StreamContent"
|
97
|
+
DATASET_DOWNLOAD = "Dataset::Download"
|
98
|
+
|
99
|
+
|
90
100
|
class ResourceType(str, Enum):
|
91
101
|
PRINCIPAL = "Principal"
|
92
102
|
WORKFLOW = "Workflow"
|
93
103
|
ENTITY = "Entity"
|
94
104
|
AGENT = "Agent"
|
95
105
|
Rule = "Rule"
|
106
|
+
DATASET = "Dataset"
|
96
107
|
|
97
108
|
|
98
109
|
class EntityIdentifier(TypedDict):
|
@@ -129,7 +140,12 @@ class RuleResource:
|
|
129
140
|
rule_name: str | None = None
|
130
141
|
|
131
142
|
|
132
|
-
|
143
|
+
@dataclass(frozen=True, slots=True)
|
144
|
+
class DatasetResource:
|
145
|
+
dataset_name: str | None = None
|
146
|
+
|
147
|
+
|
148
|
+
ResourceDescriptor = AgentResource | WorkflowResource | RuleResource | DatasetResource
|
133
149
|
|
134
150
|
|
135
151
|
class CedarEntity(BaseModel):
|
@@ -209,6 +225,15 @@ class CedarEntity(BaseModel):
|
|
209
225
|
resource_attributes={"rule_name": rule_name},
|
210
226
|
)
|
211
227
|
|
228
|
+
@staticmethod
|
229
|
+
def from_dataset(dataset_name: str | None) -> "CedarEntity":
|
230
|
+
"""Create a CedarEntity instance from dataset data"""
|
231
|
+
return CedarEntity(
|
232
|
+
resource_type=ResourceType.DATASET,
|
233
|
+
resource_key="dataset_name",
|
234
|
+
resource_attributes={"dataset_name": dataset_name},
|
235
|
+
)
|
236
|
+
|
212
237
|
|
213
238
|
class PolicyService:
|
214
239
|
"""Service for managing and evaluating Authorization policies."""
|
@@ -272,7 +297,7 @@ class PolicyService:
|
|
272
297
|
def is_allowed(
|
273
298
|
self,
|
274
299
|
principal: CedarEntity,
|
275
|
-
action: str | WorkflowAction | AgentAction | RuleAction,
|
300
|
+
action: str | WorkflowAction | AgentAction | RuleAction | DatasetAction,
|
276
301
|
resource: CedarEntity,
|
277
302
|
) -> bool:
|
278
303
|
"""Check if the principal is permitted to perform the action on the resource.
|
@@ -294,6 +319,7 @@ class PolicyService:
|
|
294
319
|
isinstance(action, WorkflowAction)
|
295
320
|
or isinstance(action, AgentAction)
|
296
321
|
or isinstance(action, RuleAction)
|
322
|
+
or isinstance(action, DatasetAction)
|
297
323
|
):
|
298
324
|
action = f'Action::"{action.value}"'
|
299
325
|
else:
|
@@ -346,7 +372,7 @@ class PolicyService:
|
|
346
372
|
|
347
373
|
def validate_authorization_for(
|
348
374
|
resource_descriptor: ResourceDescriptor,
|
349
|
-
action: WorkflowAction | AgentAction | RuleAction,
|
375
|
+
action: WorkflowAction | AgentAction | RuleAction | DatasetAction,
|
350
376
|
):
|
351
377
|
authz_service = get_policy_service()
|
352
378
|
|
@@ -363,6 +389,8 @@ def validate_authorization_for(
|
|
363
389
|
entity = CedarEntity.from_agent(resource_descriptor.id)
|
364
390
|
case RuleAction() if isinstance(resource_descriptor, RuleResource):
|
365
391
|
entity = CedarEntity.from_rule(resource_descriptor.rule_name)
|
392
|
+
case DatasetAction() if isinstance(resource_descriptor, DatasetResource):
|
393
|
+
entity = CedarEntity.from_dataset(resource_descriptor.dataset_name)
|
366
394
|
case _:
|
367
395
|
raise ValueError(
|
368
396
|
f"Invalid resource descriptor {type(resource_descriptor).__name__} for action {action}"
|
@@ -74,4 +74,29 @@ permit (
|
|
74
74
|
principal,
|
75
75
|
action == Action::"Rule::Simulate",
|
76
76
|
resource
|
77
|
+
);
|
78
|
+
permit (
|
79
|
+
principal,
|
80
|
+
action == Action::"Dataset::ListSchemas",
|
81
|
+
resource
|
82
|
+
);
|
83
|
+
permit (
|
84
|
+
principal,
|
85
|
+
action == Action::"Dataset::List",
|
86
|
+
resource
|
87
|
+
);
|
88
|
+
permit (
|
89
|
+
principal,
|
90
|
+
action == Action::"Dataset::ViewDetails",
|
91
|
+
resource
|
92
|
+
);
|
93
|
+
permit (
|
94
|
+
principal,
|
95
|
+
action == Action::"Dataset::StreamContent",
|
96
|
+
resource
|
97
|
+
);
|
98
|
+
permit (
|
99
|
+
principal,
|
100
|
+
action == Action::"Dataset::Download",
|
101
|
+
resource
|
77
102
|
);
|
planar/testing/fixtures.py
CHANGED
@@ -16,6 +16,8 @@ Usage in external projects:
|
|
16
16
|
|
17
17
|
Available fixtures:
|
18
18
|
- storage: In-memory file storage for tests
|
19
|
+
- data_config: Test data configuration with SQLite catalog and local storage
|
20
|
+
- app_with_data: PlanarApp instance with data configuration
|
19
21
|
- tmp_db_url: Parametrized database URL (SQLite/PostgreSQL)
|
20
22
|
- session: Database session
|
21
23
|
- client: Planar test client
|
@@ -33,8 +35,11 @@ from pathlib import Path
|
|
33
35
|
|
34
36
|
import pytest
|
35
37
|
|
38
|
+
from planar.app import PlanarApp
|
36
39
|
from planar.config import load_config
|
40
|
+
from planar.data.config import DataConfig, SQLiteCatalogConfig
|
37
41
|
from planar.db import DatabaseManager, new_session
|
42
|
+
from planar.files.storage.config import LocalDirectoryConfig
|
38
43
|
from planar.files.storage.context import set_storage
|
39
44
|
from planar.logging import set_context_metadata
|
40
45
|
from planar.object_registry import ObjectRegistry
|
@@ -114,6 +119,31 @@ async def storage():
|
|
114
119
|
yield storage
|
115
120
|
|
116
121
|
|
122
|
+
@pytest.fixture()
|
123
|
+
def data_config(tmp_path):
|
124
|
+
"""Create a test data configuration."""
|
125
|
+
data_dir = tmp_path / "data"
|
126
|
+
data_dir.mkdir(exist_ok=True)
|
127
|
+
|
128
|
+
catalog_path = data_dir / "test.sqlite"
|
129
|
+
storage_path = data_dir / "ducklake_files"
|
130
|
+
storage_path.mkdir(exist_ok=True)
|
131
|
+
|
132
|
+
return DataConfig(
|
133
|
+
catalog=SQLiteCatalogConfig(type="sqlite", path=str(catalog_path)),
|
134
|
+
storage=LocalDirectoryConfig(backend="localdir", directory=str(storage_path)),
|
135
|
+
)
|
136
|
+
|
137
|
+
|
138
|
+
@pytest.fixture(name="app_with_data")
|
139
|
+
def app_with_data_fixture(data_config):
|
140
|
+
"""Create a PlanarApp with data configuration."""
|
141
|
+
app = PlanarApp()
|
142
|
+
# Add data config to the app's config
|
143
|
+
app.config.data = data_config
|
144
|
+
return app
|
145
|
+
|
146
|
+
|
117
147
|
@pytest.fixture()
|
118
148
|
def tmp_sqlite_url(tmp_db_path: str):
|
119
149
|
return f"sqlite+aiosqlite:///{tmp_db_path}"
|
planar/workflows/decorators.py
CHANGED
@@ -78,7 +78,7 @@ def step(
|
|
78
78
|
return decorator
|
79
79
|
|
80
80
|
|
81
|
-
def workflow(*, name: str | None = None):
|
81
|
+
def workflow(*, name: str | None = None, is_interactive: bool = False):
|
82
82
|
"""
|
83
83
|
Decorator to define a workflow.
|
84
84
|
|
@@ -177,6 +177,7 @@ def workflow(*, name: str | None = None):
|
|
177
177
|
start_step=start_workflow_step,
|
178
178
|
wait_for_completion=wait_for_completion,
|
179
179
|
wrapped_fn=run_workflow,
|
180
|
+
is_interactive=is_interactive,
|
180
181
|
)
|
181
182
|
|
182
183
|
return wf_wrapper
|
planar/workflows/wrappers.py
CHANGED