lazyscribe-arrow 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lazyscribe-arrow might be problematic. Click here for more details.
- lazyscribe_arrow/__init__.py +2 -1
- lazyscribe_arrow/_meta.py +1 -1
- lazyscribe_arrow/csv.py +5 -1
- lazyscribe_arrow/parquet.py +107 -0
- {lazyscribe_arrow-0.1.0.dist-info → lazyscribe_arrow-0.2.0.dist-info}/METADATA +45 -1
- lazyscribe_arrow-0.2.0.dist-info/RECORD +10 -0
- {lazyscribe_arrow-0.1.0.dist-info → lazyscribe_arrow-0.2.0.dist-info}/WHEEL +1 -1
- {lazyscribe_arrow-0.1.0.dist-info → lazyscribe_arrow-0.2.0.dist-info}/entry_points.txt +1 -0
- lazyscribe_arrow-0.2.0.dist-info/licenses/LICENSE +21 -0
- lazyscribe_arrow-0.1.0.dist-info/RECORD +0 -8
- {lazyscribe_arrow-0.1.0.dist-info → lazyscribe_arrow-0.2.0.dist-info}/top_level.txt +0 -0
lazyscribe_arrow/__init__.py
CHANGED
lazyscribe_arrow/_meta.py
CHANGED
lazyscribe_arrow/csv.py
CHANGED
|
@@ -9,6 +9,7 @@ from attrs import define
|
|
|
9
9
|
from lazyscribe._utils import utcnow
|
|
10
10
|
from lazyscribe.artifacts.base import Artifact
|
|
11
11
|
from pyarrow import csv
|
|
12
|
+
from pyarrow.interchange import from_dataframe
|
|
12
13
|
from slugify import slugify
|
|
13
14
|
|
|
14
15
|
LOG = logging.getLogger(__name__)
|
|
@@ -92,10 +93,13 @@ class CSVArtifact(Artifact):
|
|
|
92
93
|
LOG.debug("Provided object is already a PyArrow table.")
|
|
93
94
|
elif hasattr(obj, "__arrow_c_array__") or hasattr(obj, "__arrow_c_stream__"):
|
|
94
95
|
obj = pa.table(obj)
|
|
96
|
+
elif hasattr(obj, "__dataframe__"):
|
|
97
|
+
obj = from_dataframe(obj)
|
|
95
98
|
else:
|
|
96
99
|
raise ValueError(
|
|
97
100
|
f"Object of type `{type(obj)}` cannot be easily coerced into a PyArrow Table. "
|
|
98
|
-
"Please provide an object that implements the Arrow PyCapsule Interface
|
|
101
|
+
"Please provide an object that implements the Arrow PyCapsule Interface or the "
|
|
102
|
+
"Dataframe Interchange Protocol."
|
|
99
103
|
)
|
|
100
104
|
|
|
101
105
|
csv.write_csv(obj, buf, **kwargs)
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""Custom artifact handlers for parquets."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, ClassVar
|
|
6
|
+
|
|
7
|
+
import pyarrow as pa
|
|
8
|
+
import pyarrow.parquet as pq
|
|
9
|
+
from attrs import define
|
|
10
|
+
from lazyscribe._utils import utcnow
|
|
11
|
+
from lazyscribe.artifacts.base import Artifact
|
|
12
|
+
from pyarrow.interchange import from_dataframe
|
|
13
|
+
from slugify import slugify
|
|
14
|
+
|
|
15
|
+
LOG = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@define(auto_attribs=True)
|
|
19
|
+
class ParquetArtifact(Artifact):
|
|
20
|
+
"""Arrow-powered Parquet handler."""
|
|
21
|
+
|
|
22
|
+
alias: ClassVar[str] = "parquet"
|
|
23
|
+
suffix: ClassVar[str] = "parquet"
|
|
24
|
+
binary: ClassVar[bool] = True
|
|
25
|
+
output_only: ClassVar[bool] = False
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def construct(
|
|
29
|
+
cls,
|
|
30
|
+
name: str,
|
|
31
|
+
value: Any | None = None,
|
|
32
|
+
fname: str | None = None,
|
|
33
|
+
created_at: datetime | None = None,
|
|
34
|
+
writer_kwargs: dict | None = None,
|
|
35
|
+
version: int = 0,
|
|
36
|
+
dirty: bool = True,
|
|
37
|
+
**kwargs,
|
|
38
|
+
):
|
|
39
|
+
"""Construct the handler class."""
|
|
40
|
+
created_at = created_at or utcnow()
|
|
41
|
+
|
|
42
|
+
return cls( # type: ignore[call-arg]
|
|
43
|
+
name=name,
|
|
44
|
+
value=value,
|
|
45
|
+
fname=fname
|
|
46
|
+
or f"{slugify(name)}-{slugify(created_at.strftime('%Y%m%d%H%M%S'))}.{cls.suffix}",
|
|
47
|
+
writer_kwargs=writer_kwargs or {},
|
|
48
|
+
version=version,
|
|
49
|
+
created_at=created_at,
|
|
50
|
+
dirty=dirty,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def read(cls, buf, **kwargs) -> pa.Table:
|
|
55
|
+
"""Read in the parquet file.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
buf : file-like object
|
|
60
|
+
The buffer from a ``fsspec`` filesystem.
|
|
61
|
+
**kwargs
|
|
62
|
+
Keyword arguments for the read method.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
pyarrow.lib.Table
|
|
67
|
+
A ``pyarrow`` table with the data.
|
|
68
|
+
"""
|
|
69
|
+
return pq.read_table(buf, **kwargs)
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def write(cls, obj, buf, **kwargs):
|
|
73
|
+
"""Write the parquet file using pyarrow.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
obj : object
|
|
78
|
+
The object to write.
|
|
79
|
+
buf : file-like object
|
|
80
|
+
The buffer from a ``fsspec`` filesystem.
|
|
81
|
+
**kwargs
|
|
82
|
+
Keyword arguments for :py:meth:`pyarrow.parquet.write_table`.
|
|
83
|
+
|
|
84
|
+
Raises
|
|
85
|
+
------
|
|
86
|
+
ValueError
|
|
87
|
+
Raised if the supplied object does not have ``__arrow_c_array__``
|
|
88
|
+
or ``__arrow_c_stream__`` attribute
|
|
89
|
+
or if the object does not
|
|
90
|
+
implement the dataframe interchange protocol. These attributes allow us to
|
|
91
|
+
perform a zero-copy transformation from the native obejct to a PyArrow
|
|
92
|
+
Table.
|
|
93
|
+
"""
|
|
94
|
+
if isinstance(obj, pa.Table):
|
|
95
|
+
LOG.debug("Provided object is already a PyArrow table.")
|
|
96
|
+
elif hasattr(obj, "__arrow_c_array__") or hasattr(obj, "__arrow_c_stream__"):
|
|
97
|
+
obj = pa.table(obj)
|
|
98
|
+
elif hasattr(obj, "__dataframe__"):
|
|
99
|
+
obj = from_dataframe(obj)
|
|
100
|
+
else:
|
|
101
|
+
raise ValueError(
|
|
102
|
+
f"Object of type `{type(obj)}` cannot be easily coerced into a PyArrow Table. "
|
|
103
|
+
"Please provide an object that implements the Arrow PyCapsule Interface or the "
|
|
104
|
+
"Dataframe Interchange Protocol."
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
pq.write_table(obj, buf, **kwargs)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lazyscribe-arrow
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Arrow-based artifact handlers for Lazyscribe
|
|
5
5
|
Author-email: Akshay Gupta <akgcodes@gmail.com>
|
|
6
6
|
License: MIT license
|
|
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.13
|
|
18
18
|
Requires-Python: >=3.10.0
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
20
21
|
Requires-Dist: attrs<=25.1.0,>=21.2.0
|
|
21
22
|
Requires-Dist: lazyscribe<=1.1.0,>=1.0.0
|
|
22
23
|
Requires-Dist: pyarrow<=19.0.1,>=14.0.1
|
|
@@ -42,3 +43,46 @@ Provides-Extra: dev
|
|
|
42
43
|
Requires-Dist: lazyscribe-arrow[build]; extra == "dev"
|
|
43
44
|
Requires-Dist: lazyscribe-arrow[qa]; extra == "dev"
|
|
44
45
|
Requires-Dist: lazyscribe-arrow[tests]; extra == "dev"
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
|
|
48
|
+
[](LICENSE) [](https://pypi.org/project/lazyscribe-arrow/) [](https://pypi.org/project/lazyscrib-arrow/) [](https://codecov.io/gh/lazyscribe/lazyscribe-arrow)
|
|
49
|
+
|
|
50
|
+
# Arrow-based artifact handling for lazyscribe
|
|
51
|
+
|
|
52
|
+
`lazyscribe-arrow` is a lightweight package that adds the following artifact handlers for `lazyscribe`:
|
|
53
|
+
|
|
54
|
+
* `csv`
|
|
55
|
+
|
|
56
|
+
Any data structure that implements the [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html)
|
|
57
|
+
will be compatible with the handlers in this library. Popular compatible open source data structures include
|
|
58
|
+
|
|
59
|
+
* `pandas.DataFrame`
|
|
60
|
+
* `polars.DataFrame`
|
|
61
|
+
* `polars.LazyFrame`
|
|
62
|
+
|
|
63
|
+
# Installation
|
|
64
|
+
|
|
65
|
+
Python 3.10 and above is required. use `pip` to install:
|
|
66
|
+
|
|
67
|
+
```console
|
|
68
|
+
$ python -m pip install lazyscribe-arrow
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
# Usage
|
|
72
|
+
|
|
73
|
+
To use this library, simply log an artifact to a `lazyscribe` experiment or repository with
|
|
74
|
+
|
|
75
|
+
* `handler="csv"` for a CSV output
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import pyarrow as pa
|
|
80
|
+
from lazyscribe import Project
|
|
81
|
+
|
|
82
|
+
project = Project("project.json", mode="w")
|
|
83
|
+
with project.log("My experiment") as exp:
|
|
84
|
+
data = pa.Table.from_arrays([[0, 1, 2]], names=["a"])
|
|
85
|
+
exp.log_artifact(name="data", value=data, handler="csv")
|
|
86
|
+
|
|
87
|
+
project.save()
|
|
88
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
lazyscribe_arrow/__init__.py,sha256=YwnXVqIllCJKZakHtTtDsWh_raw0HGqG8lgsUfCH9FQ,199
|
|
2
|
+
lazyscribe_arrow/_meta.py,sha256=tkM0F11odKeRd4ewZf6_GEIFshm_W68xXAjlwEu59xQ,38
|
|
3
|
+
lazyscribe_arrow/csv.py,sha256=cyMcFC9S0tR2U7C5EWTsvQZETgU54fronafmn2LQbSA,3158
|
|
4
|
+
lazyscribe_arrow/parquet.py,sha256=J3AqMvV77wSDW8FQYJE8xjidC5v8Zg4tvgCYh11_1LM,3293
|
|
5
|
+
lazyscribe_arrow-0.2.0.dist-info/licenses/LICENSE,sha256=CbVwVKAq7TtkHhzCxwB5N86wxbkcn2VMP5UTxj3NgZo,1067
|
|
6
|
+
lazyscribe_arrow-0.2.0.dist-info/METADATA,sha256=TqG43wfVq499R4vLnaNUjs5wnEhga6NOcl5BLoN33a0,3347
|
|
7
|
+
lazyscribe_arrow-0.2.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
|
8
|
+
lazyscribe_arrow-0.2.0.dist-info/entry_points.txt,sha256=GBZx4whc5e9t2mDuCxaBlYX0yHJKmnNwqczLob2TFfI,105
|
|
9
|
+
lazyscribe_arrow-0.2.0.dist-info/top_level.txt,sha256=C_ElBcqIKkSjUmMQPDECDhhP54M7muep1KLnEiFJ61I,17
|
|
10
|
+
lazyscribe_arrow-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 lazyscribe
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
lazyscribe_arrow/__init__.py,sha256=nZR8SxoxZjsGSVwdMFTI9l582H0M-77iec3NXzCeziU,127
|
|
2
|
-
lazyscribe_arrow/_meta.py,sha256=FT4J4aKqWfsY7SJy-yg7R1ebCnARJzh_OZjkyUpF0NQ,38
|
|
3
|
-
lazyscribe_arrow/csv.py,sha256=mFgYkntabK65KDguS5wuWe3vd7IG0dznKi01EEGf5QY,2972
|
|
4
|
-
lazyscribe_arrow-0.1.0.dist-info/METADATA,sha256=7IcYyuWzrkgeRsxJVH-Erz0fYb-Kaxy7lQZbCf7T6rQ,1813
|
|
5
|
-
lazyscribe_arrow-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
6
|
-
lazyscribe_arrow-0.1.0.dist-info/entry_points.txt,sha256=4ymVC3yTTvMs2iOibuw9pt7goZ0zWWhgdjJ6NzMS_cw,62
|
|
7
|
-
lazyscribe_arrow-0.1.0.dist-info/top_level.txt,sha256=C_ElBcqIKkSjUmMQPDECDhhP54M7muep1KLnEiFJ61I,17
|
|
8
|
-
lazyscribe_arrow-0.1.0.dist-info/RECORD,,
|
|
File without changes
|