docworkspace 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docworkspace-0.2.0 → docworkspace-0.2.2}/.gitignore +0 -1
- {docworkspace-0.2.0 → docworkspace-0.2.2}/PKG-INFO +78 -59
- {docworkspace-0.2.0 → docworkspace-0.2.2}/README.md +76 -53
- {docworkspace-0.2.0 → docworkspace-0.2.2}/pyproject.toml +3 -15
- {docworkspace-0.2.0 → docworkspace-0.2.2}/src/docworkspace/node/core.py +4 -4
- {docworkspace-0.2.0 → docworkspace-0.2.2}/src/docworkspace/node/io.py +1 -1
- {docworkspace-0.2.0 → docworkspace-0.2.2}/tests/test_fastapi_integration.py +13 -6
- {docworkspace-0.2.0 → docworkspace-0.2.2}/tests/test_node.py +30 -6
- {docworkspace-0.2.0 → docworkspace-0.2.2}/tests/test_node_io.py +11 -4
- {docworkspace-0.2.0 → docworkspace-0.2.2}/tests/test_workspace.py +18 -11
- {docworkspace-0.2.0 → docworkspace-0.2.2}/tests/test_workspace_serialization_types.py +15 -9
- docworkspace-0.2.2/uv.lock +125 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/.github/workflows/ci.yml +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/.github/workflows/release.yml +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/PUBLISH.md +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/pytest.ini +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/src/docworkspace/__init__.py +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/src/docworkspace/node/__init__.py +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/src/docworkspace/workspace/__init__.py +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/src/docworkspace/workspace/analysis.py +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/src/docworkspace/workspace/core.py +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/src/docworkspace/workspace/io.py +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/tests/conftest.py +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/tests/test_simple_operations.py +0 -0
- {docworkspace-0.2.0 → docworkspace-0.2.2}/tests/test_workspace_shim.py +0 -0
|
@@ -1,18 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docworkspace
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: A workspace library for managing Polars dataframes with parent-child relationships and lazy evaluation
|
|
5
5
|
Requires-Python: >=3.14
|
|
6
|
-
Requires-Dist: polars
|
|
7
|
-
Requires-Dist: polars-text>=0.1.0
|
|
8
|
-
Provides-Extra: cpu
|
|
9
|
-
Requires-Dist: polars; extra == 'cpu'
|
|
10
|
-
Requires-Dist: polars-text>=0.1.0; extra == 'cpu'
|
|
6
|
+
Requires-Dist: polars-text>=0.1.2
|
|
11
7
|
Description-Content-Type: text/markdown
|
|
12
8
|
|
|
13
9
|
# DocWorkspace
|
|
14
10
|
|
|
15
|
-
A powerful Python library for managing Polars
|
|
11
|
+
A powerful Python library for managing Polars LazyFrames with parent-child relationships, lazy evaluation, and FastAPI integration. Part of the LDaCA (Language Data Commons of Australia) ecosystem.
|
|
16
12
|
|
|
17
13
|
## Overview
|
|
18
14
|
|
|
@@ -20,21 +16,31 @@ DocWorkspace provides a workspace-based approach to data analysis, where data tr
|
|
|
20
16
|
|
|
21
17
|
- **Relationship Tracking**: Understand data lineage and transformation history
|
|
22
18
|
- **Lazy Evaluation**: Optimize performance with Polars LazyFrames
|
|
23
|
-
- **
|
|
19
|
+
- **LazyFrame-Native Graphs**: Node data is stored as Polars LazyFrames
|
|
24
20
|
- **FastAPI Integration**: Ready-to-use models and utilities for web APIs
|
|
25
21
|
- **Serialization**: Save and restore entire workspaces with their relationships
|
|
26
22
|
|
|
27
23
|
## Installation
|
|
28
24
|
|
|
29
25
|
```bash
|
|
30
|
-
pip install docworkspace
|
|
26
|
+
pip install "docworkspace>=0.2.0"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
`docworkspace` is published on PyPI as a pure-Python package.
|
|
30
|
+
|
|
31
|
+
### Install From Source
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
git clone https://github.com/Australian-Text-Analytics-Platform/docworkspace.git
|
|
35
|
+
cd docworkspace
|
|
36
|
+
uv sync --group dev
|
|
31
37
|
```
|
|
32
38
|
|
|
33
39
|
### Dependencies
|
|
34
40
|
|
|
35
41
|
- Python ≥ 3.14
|
|
36
42
|
- polars
|
|
37
|
-
- polars-text
|
|
43
|
+
- polars-text >= 0.1.0
|
|
38
44
|
|
|
39
45
|
For FastAPI integration:
|
|
40
46
|
|
|
@@ -57,8 +63,8 @@ df = pl.DataFrame({
|
|
|
57
63
|
"score": [0.8, 0.9, 0.95]
|
|
58
64
|
})
|
|
59
65
|
|
|
60
|
-
#
|
|
61
|
-
data_node = workspace.add_node(Node(df, name="raw_data"))
|
|
66
|
+
# Convert to LazyFrame before adding data to the workspace
|
|
67
|
+
data_node = workspace.add_node(Node(df.lazy(), name="raw_data"))
|
|
62
68
|
|
|
63
69
|
# Apply transformations (creates new nodes automatically)
|
|
64
70
|
filtered = data_node.filter(pl.col("score") > 0.85)
|
|
@@ -77,16 +83,17 @@ print(workspace.visualize_graph())
|
|
|
77
83
|
|
|
78
84
|
### Node
|
|
79
85
|
|
|
80
|
-
A `Node` wraps
|
|
86
|
+
A `Node` wraps a Polars `LazyFrame` and tracks relationships with other nodes. Nodes support:
|
|
81
87
|
|
|
82
88
|
- **Transparent Data Access**: All DataFrame methods work directly on nodes
|
|
89
|
+
- **Transparent Data Access**: All LazyFrame methods work directly on nodes
|
|
83
90
|
- **Automatic Relationship Tracking**: Operations create child nodes
|
|
84
91
|
- **Lazy Evaluation**: Maintains laziness for performance
|
|
85
92
|
- **Metadata**: Store operation descriptions and custom metadata
|
|
86
93
|
|
|
87
94
|
```python
|
|
88
|
-
#
|
|
89
|
-
node = Node(df, name="my_data")
|
|
95
|
+
# Convert eager data before creating a node when needed
|
|
96
|
+
node = Node(df.lazy(), name="my_data")
|
|
90
97
|
|
|
91
98
|
# All DataFrame operations work directly
|
|
92
99
|
filtered_node = node.filter(pl.col("value") > 10)
|
|
@@ -123,28 +130,23 @@ leaves = workspace.get_leaf_nodes()
|
|
|
123
130
|
|
|
124
131
|
## Supported Data Types
|
|
125
132
|
|
|
126
|
-
DocWorkspace
|
|
127
|
-
|
|
128
|
-
### Polars Types
|
|
129
|
-
|
|
130
|
-
- **`pl.DataFrame`**: Materialized, in-memory data
|
|
131
|
-
- **`pl.LazyFrame`**: Lazy evaluation for performance optimization
|
|
133
|
+
DocWorkspace stores node data as Polars `LazyFrame` objects.
|
|
132
134
|
|
|
133
|
-
###
|
|
135
|
+
### Creating LazyFrame Nodes
|
|
134
136
|
|
|
135
137
|
```python
|
|
136
138
|
import polars as pl
|
|
137
139
|
|
|
138
|
-
# Polars DataFrame
|
|
140
|
+
# Convert an eager Polars DataFrame when needed
|
|
139
141
|
df = pl.DataFrame({"text": ["hello", "world"], "id": [1, 2]})
|
|
140
|
-
node1 = Node(df, "eager_data")
|
|
142
|
+
node1 = Node(df.lazy(), "eager_data")
|
|
141
143
|
|
|
142
144
|
# Polars LazyFrame (lazy)
|
|
143
145
|
lazy_df = pl.LazyFrame({"text": ["foo", "bar"], "id": [3, 4]})
|
|
144
146
|
node2 = Node(lazy_df, "lazy_data")
|
|
145
147
|
|
|
146
|
-
#
|
|
147
|
-
workspace = Workspace("
|
|
148
|
+
# Both nodes remain LazyFrames inside the workspace
|
|
149
|
+
workspace = Workspace("lazyframe_nodes")
|
|
148
150
|
for node in [node1, node2]:
|
|
149
151
|
workspace.add_node(node)
|
|
150
152
|
```
|
|
@@ -175,7 +177,7 @@ Understand your data lineage:
|
|
|
175
177
|
|
|
176
178
|
```python
|
|
177
179
|
# Create a processing pipeline
|
|
178
|
-
raw_data = Node(df, "raw")
|
|
180
|
+
raw_data = Node(df.lazy(), "raw")
|
|
179
181
|
cleaned = raw_data.filter(pl.col("value").is_not_null())
|
|
180
182
|
normalized = cleaned.with_columns(pl.col("value") / pl.col("value").max())
|
|
181
183
|
final = normalized.select(["id", "normalized_value"])
|
|
@@ -230,7 +232,7 @@ def custom_transform(node: Node, operation_name: str) -> Node:
|
|
|
230
232
|
"""Apply custom transformation and track the operation."""
|
|
231
233
|
# Your custom logic here
|
|
232
234
|
result_data = node.data.with_columns(pl.col("value") * 2)
|
|
233
|
-
|
|
235
|
+
|
|
234
236
|
# Create new node with relationship tracking
|
|
235
237
|
return Node(
|
|
236
238
|
data=result_data,
|
|
@@ -274,7 +276,7 @@ df = pl.DataFrame({
|
|
|
274
276
|
"metadata": ["type1", "type2", "type1"]
|
|
275
277
|
})
|
|
276
278
|
|
|
277
|
-
node = Node(df, "corpus")
|
|
279
|
+
node = Node(df.lazy(), "corpus")
|
|
278
280
|
node.document = "text"
|
|
279
281
|
|
|
280
282
|
# Document metadata is preserved across operations
|
|
@@ -295,7 +297,7 @@ Node(data, name=None, workspace=None, parents=None, operation=None)
|
|
|
295
297
|
#### Properties
|
|
296
298
|
|
|
297
299
|
- `document: Optional[str]` - Document column tracked in node metadata
|
|
298
|
-
- `data:
|
|
300
|
+
- `data: pl.LazyFrame` - Underlying frame-like object
|
|
299
301
|
|
|
300
302
|
#### Methods
|
|
301
303
|
|
|
@@ -304,9 +306,9 @@ Node(data, name=None, workspace=None, parents=None, operation=None)
|
|
|
304
306
|
- `info(json=False) -> Dict` - Get node information
|
|
305
307
|
- `json_schema() -> Dict[str, str]` - Get JSON-compatible schema
|
|
306
308
|
|
|
307
|
-
####
|
|
309
|
+
#### Data Operations
|
|
308
310
|
|
|
309
|
-
All Polars
|
|
311
|
+
All Polars LazyFrame operations are available directly:
|
|
310
312
|
|
|
311
313
|
- `filter(condition) -> Node`
|
|
312
314
|
- `select(columns) -> Node`
|
|
@@ -402,7 +404,7 @@ df = pl.DataFrame({
|
|
|
402
404
|
workspace = Workspace("text_analysis")
|
|
403
405
|
|
|
404
406
|
# Track the document column for text analysis
|
|
405
|
-
corpus = workspace.add_node(Node(df, "full_corpus"))
|
|
407
|
+
corpus = workspace.add_node(Node(df.lazy(), "full_corpus"))
|
|
406
408
|
corpus.document = "text"
|
|
407
409
|
|
|
408
410
|
# Filter by category
|
|
@@ -514,44 +516,64 @@ for node in workspace.get_leaf_nodes():
|
|
|
514
516
|
|
|
515
517
|
```bash
|
|
516
518
|
# Install development dependencies
|
|
517
|
-
|
|
519
|
+
uv sync --group dev
|
|
518
520
|
|
|
519
521
|
# Run all tests
|
|
520
|
-
pytest
|
|
522
|
+
uv run pytest
|
|
521
523
|
|
|
522
524
|
# Run with coverage
|
|
523
|
-
pytest --cov=docworkspace
|
|
525
|
+
uv run pytest --cov=docworkspace
|
|
524
526
|
|
|
525
527
|
# Run specific test file
|
|
526
|
-
pytest tests/test_workspace.py -v
|
|
528
|
+
uv run pytest tests/test_workspace.py -v
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
### Building Distributions
|
|
532
|
+
|
|
533
|
+
```bash
|
|
534
|
+
uv build
|
|
527
535
|
```
|
|
528
536
|
|
|
537
|
+
This produces a universal wheel and source distribution suitable for PyPI.
|
|
538
|
+
|
|
529
539
|
### Contributing
|
|
530
540
|
|
|
531
541
|
1. Fork the repository
|
|
532
542
|
2. Create a feature branch: `git checkout -b feature-name`
|
|
533
543
|
3. Make your changes and add tests
|
|
534
|
-
4. Run the test suite: `pytest`
|
|
544
|
+
4. Run the test suite: `uv run pytest`
|
|
535
545
|
5. Submit a pull request
|
|
536
546
|
|
|
537
547
|
### Project Structure
|
|
538
548
|
|
|
539
549
|
```text
|
|
540
550
|
docworkspace/
|
|
541
|
-
├──
|
|
542
|
-
│
|
|
543
|
-
|
|
544
|
-
│
|
|
545
|
-
│
|
|
546
|
-
│
|
|
551
|
+
├── .github/
|
|
552
|
+
│ └── workflows/ # CI and release automation
|
|
553
|
+
├── src/
|
|
554
|
+
│ └── docworkspace/
|
|
555
|
+
│ ├── __init__.py # Public package exports
|
|
556
|
+
│ ├── node/
|
|
557
|
+
│ │ ├── __init__.py
|
|
558
|
+
│ │ ├── core.py # Node implementation
|
|
559
|
+
│ │ └── io.py # Node serialization helpers
|
|
560
|
+
│ └── workspace/
|
|
561
|
+
│ ├── __init__.py
|
|
562
|
+
│ ├── core.py # Workspace implementation
|
|
563
|
+
│ ├── io.py # Workspace serialization helpers
|
|
564
|
+
│ └── analysis.py
|
|
547
565
|
├── tests/ # Test suite
|
|
548
|
-
│ ├──
|
|
549
|
-
│ ├──
|
|
550
|
-
│ ├──
|
|
551
|
-
│
|
|
552
|
-
├──
|
|
553
|
-
├──
|
|
554
|
-
|
|
566
|
+
│ ├── conftest.py
|
|
567
|
+
│ ├── test_fastapi_integration.py
|
|
568
|
+
│ ├── test_node.py
|
|
569
|
+
│ ├── test_node_io.py
|
|
570
|
+
│ ├── test_simple_operations.py
|
|
571
|
+
│ ├── test_workspace.py
|
|
572
|
+
│ ├── test_workspace_serialization_types.py
|
|
573
|
+
│ └── test_workspace_shim.py
|
|
574
|
+
├── PUBLISH.md # Release runbook
|
|
575
|
+
├── README.md # This file
|
|
576
|
+
└── pyproject.toml # Project configuration
|
|
555
577
|
```
|
|
556
578
|
|
|
557
579
|
## License
|
|
@@ -560,15 +582,12 @@ Part of the LDaCA (Language Data Commons of Australia) ecosystem.
|
|
|
560
582
|
|
|
561
583
|
## Changelog
|
|
562
584
|
|
|
563
|
-
### Version 0.
|
|
585
|
+
### Version 0.2.0
|
|
564
586
|
|
|
565
|
-
-
|
|
566
|
-
-
|
|
567
|
-
-
|
|
568
|
-
-
|
|
569
|
-
- FastAPI integration
|
|
570
|
-
- Serialization capabilities
|
|
571
|
-
- Comprehensive test suite
|
|
587
|
+
- Published on PyPI as `docworkspace`
|
|
588
|
+
- PyPI consumers can install the package directly instead of relying on a local workspace checkout
|
|
589
|
+
- Added release automation and publishing runbook for future releases
|
|
590
|
+
- Continued support for Polars data types, lazy evaluation, FastAPI integration, and serialization
|
|
572
591
|
|
|
573
592
|
## Related Projects
|
|
574
593
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# DocWorkspace
|
|
2
2
|
|
|
3
|
-
A powerful Python library for managing Polars
|
|
3
|
+
A powerful Python library for managing Polars LazyFrames with parent-child relationships, lazy evaluation, and FastAPI integration. Part of the LDaCA (Language Data Commons of Australia) ecosystem.
|
|
4
4
|
|
|
5
5
|
## Overview
|
|
6
6
|
|
|
@@ -8,21 +8,31 @@ DocWorkspace provides a workspace-based approach to data analysis, where data tr
|
|
|
8
8
|
|
|
9
9
|
- **Relationship Tracking**: Understand data lineage and transformation history
|
|
10
10
|
- **Lazy Evaluation**: Optimize performance with Polars LazyFrames
|
|
11
|
-
- **
|
|
11
|
+
- **LazyFrame-Native Graphs**: Node data is stored as Polars LazyFrames
|
|
12
12
|
- **FastAPI Integration**: Ready-to-use models and utilities for web APIs
|
|
13
13
|
- **Serialization**: Save and restore entire workspaces with their relationships
|
|
14
14
|
|
|
15
15
|
## Installation
|
|
16
16
|
|
|
17
17
|
```bash
|
|
18
|
-
pip install docworkspace
|
|
18
|
+
pip install "docworkspace>=0.2.0"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
`docworkspace` is published on PyPI as a pure-Python package.
|
|
22
|
+
|
|
23
|
+
### Install From Source
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
git clone https://github.com/Australian-Text-Analytics-Platform/docworkspace.git
|
|
27
|
+
cd docworkspace
|
|
28
|
+
uv sync --group dev
|
|
19
29
|
```
|
|
20
30
|
|
|
21
31
|
### Dependencies
|
|
22
32
|
|
|
23
33
|
- Python ≥ 3.14
|
|
24
34
|
- polars
|
|
25
|
-
- polars-text
|
|
35
|
+
- polars-text >= 0.1.0
|
|
26
36
|
|
|
27
37
|
For FastAPI integration:
|
|
28
38
|
|
|
@@ -45,8 +55,8 @@ df = pl.DataFrame({
|
|
|
45
55
|
"score": [0.8, 0.9, 0.95]
|
|
46
56
|
})
|
|
47
57
|
|
|
48
|
-
#
|
|
49
|
-
data_node = workspace.add_node(Node(df, name="raw_data"))
|
|
58
|
+
# Convert to LazyFrame before adding data to the workspace
|
|
59
|
+
data_node = workspace.add_node(Node(df.lazy(), name="raw_data"))
|
|
50
60
|
|
|
51
61
|
# Apply transformations (creates new nodes automatically)
|
|
52
62
|
filtered = data_node.filter(pl.col("score") > 0.85)
|
|
@@ -65,16 +75,17 @@ print(workspace.visualize_graph())
|
|
|
65
75
|
|
|
66
76
|
### Node
|
|
67
77
|
|
|
68
|
-
A `Node` wraps
|
|
78
|
+
A `Node` wraps a Polars `LazyFrame` and tracks relationships with other nodes. Nodes support:
|
|
69
79
|
|
|
70
80
|
- **Transparent Data Access**: All DataFrame methods work directly on nodes
|
|
81
|
+
- **Transparent Data Access**: All LazyFrame methods work directly on nodes
|
|
71
82
|
- **Automatic Relationship Tracking**: Operations create child nodes
|
|
72
83
|
- **Lazy Evaluation**: Maintains laziness for performance
|
|
73
84
|
- **Metadata**: Store operation descriptions and custom metadata
|
|
74
85
|
|
|
75
86
|
```python
|
|
76
|
-
#
|
|
77
|
-
node = Node(df, name="my_data")
|
|
87
|
+
# Convert eager data before creating a node when needed
|
|
88
|
+
node = Node(df.lazy(), name="my_data")
|
|
78
89
|
|
|
79
90
|
# All DataFrame operations work directly
|
|
80
91
|
filtered_node = node.filter(pl.col("value") > 10)
|
|
@@ -111,28 +122,23 @@ leaves = workspace.get_leaf_nodes()
|
|
|
111
122
|
|
|
112
123
|
## Supported Data Types
|
|
113
124
|
|
|
114
|
-
DocWorkspace
|
|
115
|
-
|
|
116
|
-
### Polars Types
|
|
117
|
-
|
|
118
|
-
- **`pl.DataFrame`**: Materialized, in-memory data
|
|
119
|
-
- **`pl.LazyFrame`**: Lazy evaluation for performance optimization
|
|
125
|
+
DocWorkspace stores node data as Polars `LazyFrame` objects.
|
|
120
126
|
|
|
121
|
-
###
|
|
127
|
+
### Creating LazyFrame Nodes
|
|
122
128
|
|
|
123
129
|
```python
|
|
124
130
|
import polars as pl
|
|
125
131
|
|
|
126
|
-
# Polars DataFrame
|
|
132
|
+
# Convert an eager Polars DataFrame when needed
|
|
127
133
|
df = pl.DataFrame({"text": ["hello", "world"], "id": [1, 2]})
|
|
128
|
-
node1 = Node(df, "eager_data")
|
|
134
|
+
node1 = Node(df.lazy(), "eager_data")
|
|
129
135
|
|
|
130
136
|
# Polars LazyFrame (lazy)
|
|
131
137
|
lazy_df = pl.LazyFrame({"text": ["foo", "bar"], "id": [3, 4]})
|
|
132
138
|
node2 = Node(lazy_df, "lazy_data")
|
|
133
139
|
|
|
134
|
-
#
|
|
135
|
-
workspace = Workspace("
|
|
140
|
+
# Both nodes remain LazyFrames inside the workspace
|
|
141
|
+
workspace = Workspace("lazyframe_nodes")
|
|
136
142
|
for node in [node1, node2]:
|
|
137
143
|
workspace.add_node(node)
|
|
138
144
|
```
|
|
@@ -163,7 +169,7 @@ Understand your data lineage:
|
|
|
163
169
|
|
|
164
170
|
```python
|
|
165
171
|
# Create a processing pipeline
|
|
166
|
-
raw_data = Node(df, "raw")
|
|
172
|
+
raw_data = Node(df.lazy(), "raw")
|
|
167
173
|
cleaned = raw_data.filter(pl.col("value").is_not_null())
|
|
168
174
|
normalized = cleaned.with_columns(pl.col("value") / pl.col("value").max())
|
|
169
175
|
final = normalized.select(["id", "normalized_value"])
|
|
@@ -218,7 +224,7 @@ def custom_transform(node: Node, operation_name: str) -> Node:
|
|
|
218
224
|
"""Apply custom transformation and track the operation."""
|
|
219
225
|
# Your custom logic here
|
|
220
226
|
result_data = node.data.with_columns(pl.col("value") * 2)
|
|
221
|
-
|
|
227
|
+
|
|
222
228
|
# Create new node with relationship tracking
|
|
223
229
|
return Node(
|
|
224
230
|
data=result_data,
|
|
@@ -262,7 +268,7 @@ df = pl.DataFrame({
|
|
|
262
268
|
"metadata": ["type1", "type2", "type1"]
|
|
263
269
|
})
|
|
264
270
|
|
|
265
|
-
node = Node(df, "corpus")
|
|
271
|
+
node = Node(df.lazy(), "corpus")
|
|
266
272
|
node.document = "text"
|
|
267
273
|
|
|
268
274
|
# Document metadata is preserved across operations
|
|
@@ -283,7 +289,7 @@ Node(data, name=None, workspace=None, parents=None, operation=None)
|
|
|
283
289
|
#### Properties
|
|
284
290
|
|
|
285
291
|
- `document: Optional[str]` - Document column tracked in node metadata
|
|
286
|
-
- `data:
|
|
292
|
+
- `data: pl.LazyFrame` - Underlying frame-like object
|
|
287
293
|
|
|
288
294
|
#### Methods
|
|
289
295
|
|
|
@@ -292,9 +298,9 @@ Node(data, name=None, workspace=None, parents=None, operation=None)
|
|
|
292
298
|
- `info(json=False) -> Dict` - Get node information
|
|
293
299
|
- `json_schema() -> Dict[str, str]` - Get JSON-compatible schema
|
|
294
300
|
|
|
295
|
-
####
|
|
301
|
+
#### Data Operations
|
|
296
302
|
|
|
297
|
-
All Polars
|
|
303
|
+
All Polars LazyFrame operations are available directly:
|
|
298
304
|
|
|
299
305
|
- `filter(condition) -> Node`
|
|
300
306
|
- `select(columns) -> Node`
|
|
@@ -390,7 +396,7 @@ df = pl.DataFrame({
|
|
|
390
396
|
workspace = Workspace("text_analysis")
|
|
391
397
|
|
|
392
398
|
# Track the document column for text analysis
|
|
393
|
-
corpus = workspace.add_node(Node(df, "full_corpus"))
|
|
399
|
+
corpus = workspace.add_node(Node(df.lazy(), "full_corpus"))
|
|
394
400
|
corpus.document = "text"
|
|
395
401
|
|
|
396
402
|
# Filter by category
|
|
@@ -502,44 +508,64 @@ for node in workspace.get_leaf_nodes():
|
|
|
502
508
|
|
|
503
509
|
```bash
|
|
504
510
|
# Install development dependencies
|
|
505
|
-
|
|
511
|
+
uv sync --group dev
|
|
506
512
|
|
|
507
513
|
# Run all tests
|
|
508
|
-
pytest
|
|
514
|
+
uv run pytest
|
|
509
515
|
|
|
510
516
|
# Run with coverage
|
|
511
|
-
pytest --cov=docworkspace
|
|
517
|
+
uv run pytest --cov=docworkspace
|
|
512
518
|
|
|
513
519
|
# Run specific test file
|
|
514
|
-
pytest tests/test_workspace.py -v
|
|
520
|
+
uv run pytest tests/test_workspace.py -v
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
### Building Distributions
|
|
524
|
+
|
|
525
|
+
```bash
|
|
526
|
+
uv build
|
|
515
527
|
```
|
|
516
528
|
|
|
529
|
+
This produces a universal wheel and source distribution suitable for PyPI.
|
|
530
|
+
|
|
517
531
|
### Contributing
|
|
518
532
|
|
|
519
533
|
1. Fork the repository
|
|
520
534
|
2. Create a feature branch: `git checkout -b feature-name`
|
|
521
535
|
3. Make your changes and add tests
|
|
522
|
-
4. Run the test suite: `pytest`
|
|
536
|
+
4. Run the test suite: `uv run pytest`
|
|
523
537
|
5. Submit a pull request
|
|
524
538
|
|
|
525
539
|
### Project Structure
|
|
526
540
|
|
|
527
541
|
```text
|
|
528
542
|
docworkspace/
|
|
529
|
-
├──
|
|
530
|
-
│
|
|
531
|
-
|
|
532
|
-
│
|
|
533
|
-
│
|
|
534
|
-
│
|
|
543
|
+
├── .github/
|
|
544
|
+
│ └── workflows/ # CI and release automation
|
|
545
|
+
├── src/
|
|
546
|
+
│ └── docworkspace/
|
|
547
|
+
│ ├── __init__.py # Public package exports
|
|
548
|
+
│ ├── node/
|
|
549
|
+
│ │ ├── __init__.py
|
|
550
|
+
│ │ ├── core.py # Node implementation
|
|
551
|
+
│ │ └── io.py # Node serialization helpers
|
|
552
|
+
│ └── workspace/
|
|
553
|
+
│ ├── __init__.py
|
|
554
|
+
│ ├── core.py # Workspace implementation
|
|
555
|
+
│ ├── io.py # Workspace serialization helpers
|
|
556
|
+
│ └── analysis.py
|
|
535
557
|
├── tests/ # Test suite
|
|
536
|
-
│ ├──
|
|
537
|
-
│ ├──
|
|
538
|
-
│ ├──
|
|
539
|
-
│
|
|
540
|
-
├──
|
|
541
|
-
├──
|
|
542
|
-
|
|
558
|
+
│ ├── conftest.py
|
|
559
|
+
│ ├── test_fastapi_integration.py
|
|
560
|
+
│ ├── test_node.py
|
|
561
|
+
│ ├── test_node_io.py
|
|
562
|
+
│ ├── test_simple_operations.py
|
|
563
|
+
│ ├── test_workspace.py
|
|
564
|
+
│ ├── test_workspace_serialization_types.py
|
|
565
|
+
│ └── test_workspace_shim.py
|
|
566
|
+
├── PUBLISH.md # Release runbook
|
|
567
|
+
├── README.md # This file
|
|
568
|
+
└── pyproject.toml # Project configuration
|
|
543
569
|
```
|
|
544
570
|
|
|
545
571
|
## License
|
|
@@ -548,15 +574,12 @@ Part of the LDaCA (Language Data Commons of Australia) ecosystem.
|
|
|
548
574
|
|
|
549
575
|
## Changelog
|
|
550
576
|
|
|
551
|
-
### Version 0.
|
|
577
|
+
### Version 0.2.0
|
|
552
578
|
|
|
553
|
-
-
|
|
554
|
-
-
|
|
555
|
-
-
|
|
556
|
-
-
|
|
557
|
-
- FastAPI integration
|
|
558
|
-
- Serialization capabilities
|
|
559
|
-
- Comprehensive test suite
|
|
579
|
+
- Published on PyPI as `docworkspace`
|
|
580
|
+
- PyPI consumers can install the package directly instead of relying on a local workspace checkout
|
|
581
|
+
- Added release automation and publishing runbook for future releases
|
|
582
|
+
- Continued support for Polars data types, lazy evaluation, FastAPI integration, and serialization
|
|
560
583
|
|
|
561
584
|
## Related Projects
|
|
562
585
|
|
|
@@ -1,26 +1,14 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "docworkspace"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.2"
|
|
4
4
|
description = "A workspace library for managing Polars dataframes with parent-child relationships and lazy evaluation"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.14"
|
|
7
|
-
dependencies = [
|
|
8
|
-
"polars",
|
|
9
|
-
"polars-text>=0.1.0",
|
|
10
|
-
]
|
|
11
|
-
|
|
12
|
-
[project.optional-dependencies]
|
|
13
|
-
cpu = [
|
|
14
|
-
"polars",
|
|
15
|
-
"polars-text>=0.1.0",
|
|
16
|
-
]
|
|
7
|
+
dependencies = ["polars-text>=0.1.2"]
|
|
17
8
|
|
|
18
9
|
[build-system]
|
|
19
10
|
requires = ["hatchling"]
|
|
20
11
|
build-backend = "hatchling.build"
|
|
21
12
|
|
|
22
13
|
[dependency-groups]
|
|
23
|
-
dev = [
|
|
24
|
-
"pytest>=8.0.0",
|
|
25
|
-
]
|
|
26
|
-
|
|
14
|
+
dev = ["pytest>=8.0.0"]
|
|
@@ -8,7 +8,7 @@ from __future__ import annotations
|
|
|
8
8
|
|
|
9
9
|
import uuid
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, cast
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Sequence, cast
|
|
12
12
|
|
|
13
13
|
import polars as pl
|
|
14
14
|
|
|
@@ -27,9 +27,9 @@ class Node:
|
|
|
27
27
|
def __init__(
|
|
28
28
|
self,
|
|
29
29
|
data: pl.LazyFrame,
|
|
30
|
-
name: str
|
|
30
|
+
name: str,
|
|
31
31
|
workspace: Optional["Workspace"] = None,
|
|
32
|
-
parents:
|
|
32
|
+
parents: Sequence["Node | str"] = (),
|
|
33
33
|
operation: str | None = None,
|
|
34
34
|
id: str | None = None,
|
|
35
35
|
document: str | None = None,
|
|
@@ -46,7 +46,7 @@ class Node:
|
|
|
46
46
|
self._redo_stack: list[pl.LazyFrame] = []
|
|
47
47
|
self._data: pl.LazyFrame = data
|
|
48
48
|
self._document_column: Optional[str] = document
|
|
49
|
-
self.parents: list[Node | str] = list(parents
|
|
49
|
+
self.parents: list[Node | str] = list(parents)
|
|
50
50
|
self.workspace: Optional[Workspace] = workspace
|
|
51
51
|
self.operation = operation
|
|
52
52
|
|
|
@@ -79,7 +79,7 @@ def from_dict(
|
|
|
79
79
|
if workspace is None:
|
|
80
80
|
return Node(data=lf, workspace=None, parents=list(parent_ids), **node_metadata)
|
|
81
81
|
|
|
82
|
-
parents = [
|
|
82
|
+
parents: list[Node | str] = [
|
|
83
83
|
workspace.nodes[parent_id]
|
|
84
84
|
for parent_id in parent_ids
|
|
85
85
|
if parent_id in workspace.nodes
|
|
@@ -4,7 +4,11 @@ This module tests that the core docworkspace library is properly separated
|
|
|
4
4
|
from API-specific functionality and can operate independently.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from typing import cast
|
|
8
|
+
|
|
9
|
+
import polars as pl
|
|
7
10
|
import pytest
|
|
11
|
+
|
|
8
12
|
from docworkspace import Node, Workspace
|
|
9
13
|
|
|
10
14
|
|
|
@@ -25,10 +29,12 @@ class TestCoreLibraryIndependence:
|
|
|
25
29
|
|
|
26
30
|
def test_no_api_dependencies(self):
|
|
27
31
|
"""Test that core classes don't have API-specific methods."""
|
|
28
|
-
df = pytest.importorskip("polars").DataFrame(
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
+
df = pytest.importorskip("polars").DataFrame(
|
|
33
|
+
{
|
|
34
|
+
"id": [1, 2, 3],
|
|
35
|
+
"text": ["a", "b", "c"],
|
|
36
|
+
}
|
|
37
|
+
)
|
|
32
38
|
|
|
33
39
|
workspace = Workspace("test")
|
|
34
40
|
node = workspace.add_node(Node(df.lazy(), "test_node"))
|
|
@@ -64,14 +70,14 @@ class TestCoreLibraryIndependence:
|
|
|
64
70
|
# Test core functionality
|
|
65
71
|
assert len(workspace.nodes) == 1
|
|
66
72
|
assert node.name == "test_data"
|
|
67
|
-
assert node.data.collect().height == 3
|
|
73
|
+
assert cast(pl.DataFrame, node.data.collect()).height == 3
|
|
68
74
|
assert node.data.collect_schema().len() == 2
|
|
69
75
|
assert isinstance(node.data, pl.LazyFrame)
|
|
70
76
|
|
|
71
77
|
# Test node operations (polars delegation)
|
|
72
78
|
filtered = node.filter(pl.col("x") > 1)
|
|
73
79
|
assert isinstance(filtered, Node)
|
|
74
|
-
assert filtered.data.collect().height == 2
|
|
80
|
+
assert cast(pl.DataFrame, filtered.data.collect()).height == 2
|
|
75
81
|
|
|
76
82
|
def test_lazy_frame_support(self):
|
|
77
83
|
"""Test that lazy frame support works without API dependencies."""
|
|
@@ -89,3 +95,4 @@ class TestCoreLibraryIndependence:
|
|
|
89
95
|
filtered_lazy = lazy_node.filter(pl.col("a") > 1)
|
|
90
96
|
assert isinstance(filtered_lazy, Node)
|
|
91
97
|
assert isinstance(filtered_lazy.data, pl.LazyFrame)
|
|
98
|
+
assert isinstance(filtered_lazy.data, pl.LazyFrame)
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
"""Tests for the Node class."""
|
|
2
2
|
|
|
3
|
+
from inspect import signature
|
|
4
|
+
from typing import Optional, Sequence, cast, get_type_hints
|
|
5
|
+
|
|
3
6
|
import polars as pl
|
|
4
7
|
import pytest
|
|
8
|
+
|
|
5
9
|
from docworkspace import Node, Workspace
|
|
6
10
|
|
|
7
11
|
|
|
@@ -50,6 +54,23 @@ class TestNode:
|
|
|
50
54
|
assert node.parents == ["parent-123"]
|
|
51
55
|
assert node.children == []
|
|
52
56
|
|
|
57
|
+
def test_node_init_contract_uses_non_optional_parents(self):
|
|
58
|
+
"""Constructor type contract should not advertise optional parents."""
|
|
59
|
+
hints = get_type_hints(
|
|
60
|
+
Node.__init__,
|
|
61
|
+
globalns={
|
|
62
|
+
"Node": Node,
|
|
63
|
+
"Workspace": Workspace,
|
|
64
|
+
"Sequence": Sequence,
|
|
65
|
+
"Optional": Optional,
|
|
66
|
+
"pl": pl,
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
assert hints["name"] is str
|
|
71
|
+
assert hints["parents"] == Sequence[Node | str]
|
|
72
|
+
assert signature(Node.__init__).parameters["parents"].default == ()
|
|
73
|
+
|
|
53
74
|
def test_node_lazy_status_polars_dataframe(self, sample_df):
|
|
54
75
|
"""Test lazy status for polars DataFrame."""
|
|
55
76
|
node = Node(sample_df.lazy(), "test_node")
|
|
@@ -82,7 +103,7 @@ class TestNode:
|
|
|
82
103
|
|
|
83
104
|
assert len(sliced.parents) == 1
|
|
84
105
|
assert sliced.parents[0] == node
|
|
85
|
-
assert sliced.data.collect().height == 2
|
|
106
|
+
assert cast(pl.DataFrame, sliced.data.collect()).height == 2
|
|
86
107
|
|
|
87
108
|
def test_node_drop_creates_child_and_drops_column(self, sample_df):
|
|
88
109
|
"""Dropping columns returns a child node with updated schema."""
|
|
@@ -303,11 +324,13 @@ class TestNodeRelationships:
|
|
|
303
324
|
@pytest.fixture
|
|
304
325
|
def sample_df(self):
|
|
305
326
|
"""Create a sample DataFrame."""
|
|
306
|
-
return pl.DataFrame(
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
327
|
+
return pl.DataFrame(
|
|
328
|
+
{
|
|
329
|
+
"id": [1, 2, 3, 4, 5],
|
|
330
|
+
"category": ["A", "B", "A", "B", "C"],
|
|
331
|
+
"value": [10, 20, 30, 40, 50],
|
|
332
|
+
}
|
|
333
|
+
)
|
|
311
334
|
|
|
312
335
|
def test_filter_creates_parent_child_relationship(self, workspace, sample_df):
|
|
313
336
|
"""Test that filter operation creates proper parent-child relationship."""
|
|
@@ -376,3 +399,4 @@ class TestNodeRelationships:
|
|
|
376
399
|
assert parent2 in merged.parents
|
|
377
400
|
assert merged in parent1.children
|
|
378
401
|
assert merged in parent2.children
|
|
402
|
+
assert merged in parent2.children
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import cast
|
|
4
5
|
|
|
5
6
|
import polars as pl
|
|
7
|
+
|
|
6
8
|
from docworkspace import Node, Workspace
|
|
7
9
|
from docworkspace.node.io import dumps, from_dict, loads, to_dict
|
|
8
10
|
|
|
@@ -36,7 +38,7 @@ def test_node_to_dict_persists_lazyframe_payload(tmp_path: Path):
|
|
|
36
38
|
data_file = tmp_path / payload["data_path"]
|
|
37
39
|
assert data_file.exists()
|
|
38
40
|
restored = pl.LazyFrame.deserialize(data_file.open("rb"), format="binary")
|
|
39
|
-
assert restored.collect().to_dict(as_series=False) == {
|
|
41
|
+
assert cast(pl.DataFrame, restored.collect()).to_dict(as_series=False) == {
|
|
40
42
|
"text": ["a", "b"],
|
|
41
43
|
"value": [1, 2],
|
|
42
44
|
}
|
|
@@ -62,7 +64,9 @@ def test_node_exposes_instance_and_class_serialization_helpers(tmp_path: Path):
|
|
|
62
64
|
|
|
63
65
|
assert payload["node_metadata"]["id"] == node.id
|
|
64
66
|
assert restored.id == node.id
|
|
65
|
-
assert restored.data.collect().to_dict(as_series=False) == {
|
|
67
|
+
assert cast(pl.DataFrame, restored.data.collect()).to_dict(as_series=False) == {
|
|
68
|
+
"value": [1, 2, 3]
|
|
69
|
+
}
|
|
66
70
|
|
|
67
71
|
|
|
68
72
|
def test_node_dumps_returns_json_payload_and_persists_data_file(tmp_path: Path):
|
|
@@ -110,7 +114,7 @@ def test_node_from_dict_restores_node_state(tmp_path: Path):
|
|
|
110
114
|
assert restored.children == []
|
|
111
115
|
assert restored.can_undo is False
|
|
112
116
|
assert restored.can_redo is False
|
|
113
|
-
assert restored.data.collect().to_dict(as_series=False) == {
|
|
117
|
+
assert cast(pl.DataFrame, restored.data.collect()).to_dict(as_series=False) == {
|
|
114
118
|
"text": ["x", "y"],
|
|
115
119
|
"value": [10, 20],
|
|
116
120
|
}
|
|
@@ -136,7 +140,9 @@ def test_node_loads_round_trip_from_json_string(tmp_path: Path):
|
|
|
136
140
|
|
|
137
141
|
assert restored.id == node.id
|
|
138
142
|
assert restored.name == "round_trip"
|
|
139
|
-
assert restored.data.collect().to_dict(as_series=False) == {
|
|
143
|
+
assert cast(pl.DataFrame, restored.data.collect()).to_dict(as_series=False) == {
|
|
144
|
+
"value": [3, 4]
|
|
145
|
+
}
|
|
140
146
|
|
|
141
147
|
|
|
142
148
|
def test_node_from_dict_uses_constructor_defaults_for_runtime_state(tmp_path: Path):
|
|
@@ -226,3 +232,4 @@ def test_node_from_dict_ignores_missing_parent_ids(tmp_path: Path):
|
|
|
226
232
|
)
|
|
227
233
|
|
|
228
234
|
assert restored.parents == []
|
|
235
|
+
assert restored.parents == []
|
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
import os
|
|
4
4
|
import tempfile
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from typing import Any, cast
|
|
6
7
|
|
|
7
8
|
import polars as pl
|
|
8
9
|
import pytest
|
|
10
|
+
|
|
9
11
|
from docworkspace import Node, Workspace
|
|
10
12
|
|
|
11
13
|
|
|
@@ -203,11 +205,13 @@ class TestWorkspaceSerialization:
|
|
|
203
205
|
workspace.modified_at = "2024-01-01T12:00:00Z"
|
|
204
206
|
|
|
205
207
|
# Create nodes
|
|
206
|
-
df1 = pl.DataFrame(
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
208
|
+
df1 = pl.DataFrame(
|
|
209
|
+
{
|
|
210
|
+
"id": [1, 2, 3],
|
|
211
|
+
"category": ["A", "B", "A"],
|
|
212
|
+
"value": [10, 20, 30],
|
|
213
|
+
}
|
|
214
|
+
)
|
|
211
215
|
|
|
212
216
|
df2 = pl.DataFrame({"id": [1, 2, 3], "extra": ["x", "y", "z"]})
|
|
213
217
|
|
|
@@ -323,7 +327,7 @@ class TestWorkspaceSerialization:
|
|
|
323
327
|
def test_load_from_dict_rejected(self):
|
|
324
328
|
"""Workspace.load should accept path-like values only."""
|
|
325
329
|
with pytest.raises(TypeError):
|
|
326
|
-
Workspace.load({"workspace_metadata": {}, "nodes": []})
|
|
330
|
+
cast(Any, Workspace.load)({"workspace_metadata": {}, "nodes": []})
|
|
327
331
|
|
|
328
332
|
def test_workspace_serialized_file_structure(self, populated_workspace):
|
|
329
333
|
"""Validate on-disk JSON structure contains expected envelope keys."""
|
|
@@ -449,11 +453,13 @@ class TestWorkspaceGraphOperations:
|
|
|
449
453
|
def test_workspace_csv_loading(self):
|
|
450
454
|
"""Test explicit CSV loading workflow for workspaces."""
|
|
451
455
|
# Create a temporary CSV file
|
|
452
|
-
df = pl.DataFrame(
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
456
|
+
df = pl.DataFrame(
|
|
457
|
+
{
|
|
458
|
+
"name": ["Alice", "Bob", "Charlie"],
|
|
459
|
+
"age": [25, 30, 35],
|
|
460
|
+
"city": ["NYC", "LA", "Chicago"],
|
|
461
|
+
}
|
|
462
|
+
)
|
|
457
463
|
|
|
458
464
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
|
|
459
465
|
df.write_csv(f.name)
|
|
@@ -603,3 +609,4 @@ class TestWorkspaceGraphOperations:
|
|
|
603
609
|
assert node_c in node_d.parents
|
|
604
610
|
assert node_a not in node_d.parents
|
|
605
611
|
assert node_a not in workspace.nodes.values()
|
|
612
|
+
assert node_a not in workspace.nodes.values()
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from typing import Any, cast
|
|
2
3
|
|
|
3
4
|
import polars as pl
|
|
4
5
|
import pytest
|
|
5
|
-
|
|
6
|
-
from docworkspace.
|
|
6
|
+
|
|
7
|
+
from docworkspace.node import Node
|
|
8
|
+
from docworkspace.workspace import Workspace
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
def build_sample_objects():
|
|
@@ -34,7 +36,9 @@ def test_workspace_save_load_preserves_types(tmp_path):
|
|
|
34
36
|
# Round-trip data content sanity
|
|
35
37
|
df_node = next(n for n in ws2.nodes.values() if n.name == "df")
|
|
36
38
|
assert isinstance(df_node.data, pl.LazyFrame)
|
|
37
|
-
assert
|
|
39
|
+
assert cast(
|
|
40
|
+
pl.DataFrame, df_node.data.select(pl.col("a")).collect()
|
|
41
|
+
).to_series().to_list() == [1, 2, 3]
|
|
38
42
|
|
|
39
43
|
|
|
40
44
|
def test_workspace_save_load_no_format_argument(tmp_path):
|
|
@@ -43,14 +47,16 @@ def test_workspace_save_load_no_format_argument(tmp_path):
|
|
|
43
47
|
|
|
44
48
|
# API no longer accepts a format argument.
|
|
45
49
|
with pytest.raises(TypeError):
|
|
46
|
-
ws.save(tmp_path / "ws.bin", format="binary")
|
|
50
|
+
cast(Any, ws.save)(tmp_path / "ws.bin", format="binary")
|
|
47
51
|
|
|
48
52
|
dummy = tmp_path / "ws.json"
|
|
49
53
|
dummy.write_text(
|
|
50
|
-
json.dumps(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
+
json.dumps(
|
|
55
|
+
{
|
|
56
|
+
"workspace_metadata": {"id": "x", "name": "n"},
|
|
57
|
+
"nodes": [],
|
|
58
|
+
}
|
|
59
|
+
)
|
|
54
60
|
)
|
|
55
61
|
with pytest.raises(TypeError):
|
|
56
|
-
Workspace.load(dummy, format="binary")
|
|
62
|
+
cast(Any, Workspace.load)(dummy, format="binary")
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
version = 1
|
|
2
|
+
revision = 3
|
|
3
|
+
requires-python = ">=3.14"
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "colorama"
|
|
7
|
+
version = "0.4.6"
|
|
8
|
+
source = { registry = "https://pypi.org/simple" }
|
|
9
|
+
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
|
|
10
|
+
wheels = [
|
|
11
|
+
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[[package]]
|
|
15
|
+
name = "docworkspace"
|
|
16
|
+
version = "0.2.2"
|
|
17
|
+
source = { editable = "." }
|
|
18
|
+
dependencies = [
|
|
19
|
+
{ name = "polars-text" },
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[package.dev-dependencies]
|
|
23
|
+
dev = [
|
|
24
|
+
{ name = "pytest" },
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[package.metadata]
|
|
28
|
+
requires-dist = [{ name = "polars-text", specifier = ">=0.1.2" }]
|
|
29
|
+
|
|
30
|
+
[package.metadata.requires-dev]
|
|
31
|
+
dev = [{ name = "pytest", specifier = ">=8.0.0" }]
|
|
32
|
+
|
|
33
|
+
[[package]]
|
|
34
|
+
name = "iniconfig"
|
|
35
|
+
version = "2.3.0"
|
|
36
|
+
source = { registry = "https://pypi.org/simple" }
|
|
37
|
+
sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
|
|
38
|
+
wheels = [
|
|
39
|
+
{ url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[[package]]
|
|
43
|
+
name = "packaging"
|
|
44
|
+
version = "26.0"
|
|
45
|
+
source = { registry = "https://pypi.org/simple" }
|
|
46
|
+
sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
|
|
47
|
+
wheels = [
|
|
48
|
+
{ url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
[[package]]
|
|
52
|
+
name = "pluggy"
|
|
53
|
+
version = "1.6.0"
|
|
54
|
+
source = { registry = "https://pypi.org/simple" }
|
|
55
|
+
sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
|
|
56
|
+
wheels = [
|
|
57
|
+
{ url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
[[package]]
|
|
61
|
+
name = "polars"
|
|
62
|
+
version = "1.39.3"
|
|
63
|
+
source = { registry = "https://pypi.org/simple" }
|
|
64
|
+
dependencies = [
|
|
65
|
+
{ name = "polars-runtime-32" },
|
|
66
|
+
]
|
|
67
|
+
sdist = { url = "https://files.pythonhosted.org/packages/93/ab/f19e592fce9e000da49c96bf35e77cef67f9cb4b040bfa538a2764c0263e/polars-1.39.3.tar.gz", hash = "sha256:2e016c7f3e8d14fa777ef86fe0477cec6c67023a20ba4c94d6e8431eefe4a63c", size = 728987, upload-time = "2026-03-20T11:16:24.836Z" }
|
|
68
|
+
wheels = [
|
|
69
|
+
{ url = "https://files.pythonhosted.org/packages/b4/db/08f4ca10c5018813e7e0b59e4472302328b3d2ab1512f5a2157a814540e0/polars-1.39.3-py3-none-any.whl", hash = "sha256:c2b955ccc0a08a2bc9259785decf3d5c007b489b523bf2390cf21cec2bb82a56", size = 823985, upload-time = "2026-03-20T11:14:23.619Z" },
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
[[package]]
|
|
73
|
+
name = "polars-runtime-32"
|
|
74
|
+
version = "1.39.3"
|
|
75
|
+
source = { registry = "https://pypi.org/simple" }
|
|
76
|
+
sdist = { url = "https://files.pythonhosted.org/packages/17/39/c8688696bc22b6c501e3b82ef3be10e543c07a785af5660f30997cd22dd2/polars_runtime_32-1.39.3.tar.gz", hash = "sha256:c728e4f469cafab501947585f36311b8fb222d3e934c6209e83791e0df20b29d", size = 2872335, upload-time = "2026-03-20T11:16:26.581Z" }
|
|
77
|
+
wheels = [
|
|
78
|
+
{ url = "https://files.pythonhosted.org/packages/3b/74/1b41205f7368c9375ab1dea91178eaa20435fe3eff036390a53a7660b416/polars_runtime_32-1.39.3-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:425c0b220b573fa097b4042edff73114cc6d23432a21dfd2dc41adf329d7d2e9", size = 45273243, upload-time = "2026-03-20T11:14:26.691Z" },
|
|
79
|
+
{ url = "https://files.pythonhosted.org/packages/90/bf/297716b3095fe719be20fcf7af1d2b6ab069c38199bbace2469608a69b3a/polars_runtime_32-1.39.3-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:ef5884711e3c617d7dc93519a7d038e242f5741cfe5fe9afd32d58845d86c562", size = 40842924, upload-time = "2026-03-20T11:14:31.154Z" },
|
|
80
|
+
{ url = "https://files.pythonhosted.org/packages/3d/3e/e65236d9d0d9babfa0ecba593413c06530fca60a8feb8f66243aa5dba92e/polars_runtime_32-1.39.3-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06b47f535eb1f97a9a1e5b0053ef50db3a4276e241178e37bbb1a38b1fa53b14", size = 43220650, upload-time = "2026-03-20T11:14:35.458Z" },
|
|
81
|
+
{ url = "https://files.pythonhosted.org/packages/b0/15/fc3e43f3fdf3f20b7dfb5abe871ab6162cf8fb4aeabf4cfad822d5dc4c79/polars_runtime_32-1.39.3-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bc9e13dc1d2e828331f2fe8ccbc9757554dc4933a8d3e85e906b988178f95ed", size = 46877498, upload-time = "2026-03-20T11:14:40.14Z" },
|
|
82
|
+
{ url = "https://files.pythonhosted.org/packages/3c/81/bd5f895919e32c6ab0a7786cd0c0ca961cb03152c47c3645808b54383f31/polars_runtime_32-1.39.3-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:363d49e3a3e638fc943e2b9887940300a7d06789930855a178a4727949259dc2", size = 43380176, upload-time = "2026-03-20T11:14:45.566Z" },
|
|
83
|
+
{ url = "https://files.pythonhosted.org/packages/7a/3e/c86433c3b5ec0315bdfc7640d0c15d41f1216c0103a0eab9a9b5147d6c4c/polars_runtime_32-1.39.3-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7c206bdcc7bc62ea038d6adea8e44b02f0e675e0191a54c810703b4895208ea4", size = 46485933, upload-time = "2026-03-20T11:14:51.155Z" },
|
|
84
|
+
{ url = "https://files.pythonhosted.org/packages/54/ce/200b310cf91f98e652eb6ea09fdb3a9718aa0293ebf113dce325797c8572/polars_runtime_32-1.39.3-cp310-abi3-win_amd64.whl", hash = "sha256:d66ca522517554a883446957539c40dc7b75eb0c2220357fb28bc8940d305339", size = 46995458, upload-time = "2026-03-20T11:14:56.074Z" },
|
|
85
|
+
{ url = "https://files.pythonhosted.org/packages/da/76/2d48927e0aa2abbdde08cbf4a2536883b73277d47fbeca95e952de86df34/polars_runtime_32-1.39.3-cp310-abi3-win_arm64.whl", hash = "sha256:f49f51461de63f13e5dd4eb080421c8f23f856945f3f8bd5b2b1f59da52c2860", size = 41857648, upload-time = "2026-03-20T11:15:01.142Z" },
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
[[package]]
|
|
89
|
+
name = "polars-text"
|
|
90
|
+
version = "0.1.2"
|
|
91
|
+
source = { registry = "https://pypi.org/simple" }
|
|
92
|
+
dependencies = [
|
|
93
|
+
{ name = "polars" },
|
|
94
|
+
]
|
|
95
|
+
sdist = { url = "https://files.pythonhosted.org/packages/42/b9/1653c8ac742e3b84ebe328d213b5eb09ee96ce6241c27364911a2e29e2aa/polars_text-0.1.2.tar.gz", hash = "sha256:2a080c8e7e0ecef10b3b431fb14dbb90de6faf552e24ffa30f52f0876c5fe5e8", size = 53365, upload-time = "2026-04-07T23:41:59.225Z" }
|
|
96
|
+
wheels = [
|
|
97
|
+
{ url = "https://files.pythonhosted.org/packages/b4/60/829e2048d45aa63ec9c2634f63bf750b9569aded969920db3240737ea9e9/polars_text-0.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f7dbd56bc3c90d87defc9e682e816ff7cdf264f19ee458df190322ba92c10c20", size = 7659172, upload-time = "2026-04-07T23:41:53.444Z" },
|
|
98
|
+
{ url = "https://files.pythonhosted.org/packages/a5/60/6417a5e73915e0dbc4c1f0643b41c715428a5350f4b2a97903655b526342/polars_text-0.1.2-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:0283306eb5c185cac89db3abf1ecf877b47c4e18a2afc4ad8892a5ab12116b8f", size = 10981911, upload-time = "2026-04-07T23:41:55.388Z" },
|
|
99
|
+
{ url = "https://files.pythonhosted.org/packages/da/a6/ff1dd58b29c7ac03e7344311e2b82f211d043321d8f9705a9e8e664db900/polars_text-0.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:5b80da5e9902ecaefc4db227a38d20fc6c328ee4254678319329b38248c2ea4e", size = 7771979, upload-time = "2026-04-07T23:41:57.646Z" },
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
[[package]]
|
|
103
|
+
name = "pygments"
|
|
104
|
+
version = "2.20.0"
|
|
105
|
+
source = { registry = "https://pypi.org/simple" }
|
|
106
|
+
sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" }
|
|
107
|
+
wheels = [
|
|
108
|
+
{ url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" },
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
[[package]]
|
|
112
|
+
name = "pytest"
|
|
113
|
+
version = "9.0.3"
|
|
114
|
+
source = { registry = "https://pypi.org/simple" }
|
|
115
|
+
dependencies = [
|
|
116
|
+
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
|
117
|
+
{ name = "iniconfig" },
|
|
118
|
+
{ name = "packaging" },
|
|
119
|
+
{ name = "pluggy" },
|
|
120
|
+
{ name = "pygments" },
|
|
121
|
+
]
|
|
122
|
+
sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" }
|
|
123
|
+
wheels = [
|
|
124
|
+
{ url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
|
|
125
|
+
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|