lazyscribe-arrow 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lazyscribe-arrow might be problematic. Click here for more details.

@@ -1,5 +1,6 @@
1
1
  """Import the custom artifact handlers."""
2
2
 
3
3
  from lazyscribe_arrow.csv import CSVArtifact
4
+ from lazyscribe_arrow.parquet import ParquetArtifact
4
5
 
5
- __all__: list[str] = ["CSVArtifact"]
6
+ __all__: list[str] = ["CSVArtifact", "ParquetArtifact"]
lazyscribe_arrow/_meta.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Version."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.2.0"
lazyscribe_arrow/csv.py CHANGED
@@ -9,6 +9,7 @@ from attrs import define
9
9
  from lazyscribe._utils import utcnow
10
10
  from lazyscribe.artifacts.base import Artifact
11
11
  from pyarrow import csv
12
+ from pyarrow.interchange import from_dataframe
12
13
  from slugify import slugify
13
14
 
14
15
  LOG = logging.getLogger(__name__)
@@ -92,10 +93,13 @@ class CSVArtifact(Artifact):
92
93
  LOG.debug("Provided object is already a PyArrow table.")
93
94
  elif hasattr(obj, "__arrow_c_array__") or hasattr(obj, "__arrow_c_stream__"):
94
95
  obj = pa.table(obj)
96
+ elif hasattr(obj, "__dataframe__"):
97
+ obj = from_dataframe(obj)
95
98
  else:
96
99
  raise ValueError(
97
100
  f"Object of type `{type(obj)}` cannot be easily coerced into a PyArrow Table. "
98
- "Please provide an object that implements the Arrow PyCapsule Interface."
101
+ "Please provide an object that implements the Arrow PyCapsule Interface or the "
102
+ "Dataframe Interchange Protocol."
99
103
  )
100
104
 
101
105
  csv.write_csv(obj, buf, **kwargs)
@@ -0,0 +1,107 @@
1
+ """Custom artifact handlers for parquets."""
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from typing import Any, ClassVar
6
+
7
+ import pyarrow as pa
8
+ import pyarrow.parquet as pq
9
+ from attrs import define
10
+ from lazyscribe._utils import utcnow
11
+ from lazyscribe.artifacts.base import Artifact
12
+ from pyarrow.interchange import from_dataframe
13
+ from slugify import slugify
14
+
15
+ LOG = logging.getLogger(__name__)
16
+
17
+
18
+ @define(auto_attribs=True)
19
+ class ParquetArtifact(Artifact):
20
+ """Arrow-powered Parquet handler."""
21
+
22
+ alias: ClassVar[str] = "parquet"
23
+ suffix: ClassVar[str] = "parquet"
24
+ binary: ClassVar[bool] = True
25
+ output_only: ClassVar[bool] = False
26
+
27
+ @classmethod
28
+ def construct(
29
+ cls,
30
+ name: str,
31
+ value: Any | None = None,
32
+ fname: str | None = None,
33
+ created_at: datetime | None = None,
34
+ writer_kwargs: dict | None = None,
35
+ version: int = 0,
36
+ dirty: bool = True,
37
+ **kwargs,
38
+ ):
39
+ """Construct the handler class."""
40
+ created_at = created_at or utcnow()
41
+
42
+ return cls( # type: ignore[call-arg]
43
+ name=name,
44
+ value=value,
45
+ fname=fname
46
+ or f"{slugify(name)}-{slugify(created_at.strftime('%Y%m%d%H%M%S'))}.{cls.suffix}",
47
+ writer_kwargs=writer_kwargs or {},
48
+ version=version,
49
+ created_at=created_at,
50
+ dirty=dirty,
51
+ )
52
+
53
+ @classmethod
54
+ def read(cls, buf, **kwargs) -> pa.Table:
55
+ """Read in the parquet file.
56
+
57
+ Parameters
58
+ ----------
59
+ buf : file-like object
60
+ The buffer from a ``fsspec`` filesystem.
61
+ **kwargs
62
+ Keyword arguments for the read method.
63
+
64
+ Returns
65
+ -------
66
+ pyarrow.lib.Table
67
+ A ``pyarrow`` table with the data.
68
+ """
69
+ return pq.read_table(buf, **kwargs)
70
+
71
+ @classmethod
72
+ def write(cls, obj, buf, **kwargs):
73
+ """Write the parquet file using pyarrow.
74
+
75
+ Parameters
76
+ ----------
77
+ obj : object
78
+ The object to write.
79
+ buf : file-like object
80
+ The buffer from a ``fsspec`` filesystem.
81
+ **kwargs
82
+ Keyword arguments for :py:meth:`pyarrow.parquet.write_table`.
83
+
84
+ Raises
85
+ ------
86
+ ValueError
87
+ Raised if the supplied object does not have ``__arrow_c_array__``
88
+ or ``__arrow_c_stream__`` attribute
89
+ or if the object does not
90
+ implement the dataframe interchange protocol. These attributes allow us to
91
+ perform a zero-copy transformation from the native obejct to a PyArrow
92
+ Table.
93
+ """
94
+ if isinstance(obj, pa.Table):
95
+ LOG.debug("Provided object is already a PyArrow table.")
96
+ elif hasattr(obj, "__arrow_c_array__") or hasattr(obj, "__arrow_c_stream__"):
97
+ obj = pa.table(obj)
98
+ elif hasattr(obj, "__dataframe__"):
99
+ obj = from_dataframe(obj)
100
+ else:
101
+ raise ValueError(
102
+ f"Object of type `{type(obj)}` cannot be easily coerced into a PyArrow Table. "
103
+ "Please provide an object that implements the Arrow PyCapsule Interface or the "
104
+ "Dataframe Interchange Protocol."
105
+ )
106
+
107
+ pq.write_table(obj, buf, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lazyscribe-arrow
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: Arrow-based artifact handlers for Lazyscribe
5
5
  Author-email: Akshay Gupta <akgcodes@gmail.com>
6
6
  License: MIT license
@@ -17,6 +17,7 @@ Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3.13
18
18
  Requires-Python: >=3.10.0
19
19
  Description-Content-Type: text/markdown
20
+ License-File: LICENSE
20
21
  Requires-Dist: attrs<=25.1.0,>=21.2.0
21
22
  Requires-Dist: lazyscribe<=1.1.0,>=1.0.0
22
23
  Requires-Dist: pyarrow<=19.0.1,>=14.0.1
@@ -42,3 +43,46 @@ Provides-Extra: dev
42
43
  Requires-Dist: lazyscribe-arrow[build]; extra == "dev"
43
44
  Requires-Dist: lazyscribe-arrow[qa]; extra == "dev"
44
45
  Requires-Dist: lazyscribe-arrow[tests]; extra == "dev"
46
+ Dynamic: license-file
47
+
48
+ [![License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![PyPI](https://img.shields.io/pypi/v/lazyscribe-arrow)](https://pypi.org/project/lazyscribe-arrow/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lazyscribe-arrow)](https://pypi.org/project/lazyscrib-arrow/) [![codecov](https://codecov.io/gh/lazyscribe/lazyscribe-arrow/graph/badge.svg?token=W5TPK7GX7G)](https://codecov.io/gh/lazyscribe/lazyscribe-arrow)
49
+
50
+ # Arrow-based artifact handling for lazyscribe
51
+
52
+ `lazyscribe-arrow` is a lightweight package that adds the following artifact handlers for `lazyscribe`:
53
+
54
+ * `csv`
55
+
56
+ Any data structure that implements the [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html)
57
+ will be compatible with the handlers in this library. Popular compatible open source data structures include
58
+
59
+ * `pandas.DataFrame`
60
+ * `polars.DataFrame`
61
+ * `polars.LazyFrame`
62
+
63
+ # Installation
64
+
65
+ Python 3.10 and above is required. use `pip` to install:
66
+
67
+ ```console
68
+ $ python -m pip install lazyscribe-arrow
69
+ ```
70
+
71
+ # Usage
72
+
73
+ To use this library, simply log an artifact to a `lazyscribe` experiment or repository with
74
+
75
+ * `handler="csv"` for a CSV output
76
+
77
+
78
+ ```python
79
+ import pyarrow as pa
80
+ from lazyscribe import Project
81
+
82
+ project = Project("project.json", mode="w")
83
+ with project.log("My experiment") as exp:
84
+ data = pa.Table.from_arrays([[0, 1, 2]], names=["a"])
85
+ exp.log_artifact(name="data", value=data, handler="csv")
86
+
87
+ project.save()
88
+ ```
@@ -0,0 +1,10 @@
1
+ lazyscribe_arrow/__init__.py,sha256=YwnXVqIllCJKZakHtTtDsWh_raw0HGqG8lgsUfCH9FQ,199
2
+ lazyscribe_arrow/_meta.py,sha256=tkM0F11odKeRd4ewZf6_GEIFshm_W68xXAjlwEu59xQ,38
3
+ lazyscribe_arrow/csv.py,sha256=cyMcFC9S0tR2U7C5EWTsvQZETgU54fronafmn2LQbSA,3158
4
+ lazyscribe_arrow/parquet.py,sha256=J3AqMvV77wSDW8FQYJE8xjidC5v8Zg4tvgCYh11_1LM,3293
5
+ lazyscribe_arrow-0.2.0.dist-info/licenses/LICENSE,sha256=CbVwVKAq7TtkHhzCxwB5N86wxbkcn2VMP5UTxj3NgZo,1067
6
+ lazyscribe_arrow-0.2.0.dist-info/METADATA,sha256=TqG43wfVq499R4vLnaNUjs5wnEhga6NOcl5BLoN33a0,3347
7
+ lazyscribe_arrow-0.2.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
8
+ lazyscribe_arrow-0.2.0.dist-info/entry_points.txt,sha256=GBZx4whc5e9t2mDuCxaBlYX0yHJKmnNwqczLob2TFfI,105
9
+ lazyscribe_arrow-0.2.0.dist-info/top_level.txt,sha256=C_ElBcqIKkSjUmMQPDECDhhP54M7muep1KLnEiFJ61I,17
10
+ lazyscribe_arrow-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,2 +1,3 @@
1
1
  [lazyscribe.artifact_type]
2
2
  csv = lazyscribe_arrow:CSVArtifact
3
+ parquet = lazyscribe_arrow:ParquetArtifact
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 lazyscribe
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -1,8 +0,0 @@
1
- lazyscribe_arrow/__init__.py,sha256=nZR8SxoxZjsGSVwdMFTI9l582H0M-77iec3NXzCeziU,127
2
- lazyscribe_arrow/_meta.py,sha256=FT4J4aKqWfsY7SJy-yg7R1ebCnARJzh_OZjkyUpF0NQ,38
3
- lazyscribe_arrow/csv.py,sha256=mFgYkntabK65KDguS5wuWe3vd7IG0dznKi01EEGf5QY,2972
4
- lazyscribe_arrow-0.1.0.dist-info/METADATA,sha256=7IcYyuWzrkgeRsxJVH-Erz0fYb-Kaxy7lQZbCf7T6rQ,1813
5
- lazyscribe_arrow-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
6
- lazyscribe_arrow-0.1.0.dist-info/entry_points.txt,sha256=4ymVC3yTTvMs2iOibuw9pt7goZ0zWWhgdjJ6NzMS_cw,62
7
- lazyscribe_arrow-0.1.0.dist-info/top_level.txt,sha256=C_ElBcqIKkSjUmMQPDECDhhP54M7muep1KLnEiFJ61I,17
8
- lazyscribe_arrow-0.1.0.dist-info/RECORD,,