lazyscribe-arrow 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lazyscribe-arrow might be problematic. Click here for more details.

@@ -1,5 +1,6 @@
1
1
  """Import the custom artifact handlers."""
2
2
 
3
3
  from lazyscribe_arrow.csv import CSVArtifact
4
+ from lazyscribe_arrow.parquet import ParquetArtifact
4
5
 
5
- __all__: list[str] = ["CSVArtifact"]
6
+ __all__: list[str] = ["CSVArtifact", "ParquetArtifact"]
lazyscribe_arrow/_meta.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Version."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.2.1"
lazyscribe_arrow/csv.py CHANGED
@@ -9,8 +9,15 @@ from attrs import define
9
9
  from lazyscribe._utils import utcnow
10
10
  from lazyscribe.artifacts.base import Artifact
11
11
  from pyarrow import csv
12
+ from pyarrow.interchange import from_dataframe
12
13
  from slugify import slugify
13
14
 
15
+ from lazyscribe_arrow.protocols import (
16
+ ArrowArrayExportable,
17
+ ArrowStreamExportable,
18
+ SupportsInterchange,
19
+ )
20
+
14
21
  LOG = logging.getLogger(__name__)
15
22
 
16
23
 
@@ -90,12 +97,15 @@ class CSVArtifact(Artifact):
90
97
  """
91
98
  if isinstance(obj, pa.Table):
92
99
  LOG.debug("Provided object is already a PyArrow table.")
93
- elif hasattr(obj, "__arrow_c_array__") or hasattr(obj, "__arrow_c_stream__"):
100
+ elif isinstance(obj, (ArrowArrayExportable, ArrowStreamExportable)):
94
101
  obj = pa.table(obj)
102
+ elif isinstance(obj, SupportsInterchange):
103
+ obj = from_dataframe(obj)
95
104
  else:
96
105
  raise ValueError(
97
106
  f"Object of type `{type(obj)}` cannot be easily coerced into a PyArrow Table. "
98
- "Please provide an object that implements the Arrow PyCapsule Interface."
107
+ "Please provide an object that implements the Arrow PyCapsule Interface or the "
108
+ "Dataframe Interchange Protocol."
99
109
  )
100
110
 
101
111
  csv.write_csv(obj, buf, **kwargs)
@@ -0,0 +1,113 @@
1
+ """Custom artifact handlers for parquets."""
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from typing import Any, ClassVar
6
+
7
+ import pyarrow as pa
8
+ import pyarrow.parquet as pq
9
+ from attrs import define
10
+ from lazyscribe._utils import utcnow
11
+ from lazyscribe.artifacts.base import Artifact
12
+ from pyarrow.interchange import from_dataframe
13
+ from slugify import slugify
14
+
15
+ from lazyscribe_arrow.protocols import (
16
+ ArrowArrayExportable,
17
+ ArrowStreamExportable,
18
+ SupportsInterchange,
19
+ )
20
+
21
+ LOG = logging.getLogger(__name__)
22
+
23
+
24
+ @define(auto_attribs=True)
25
+ class ParquetArtifact(Artifact):
26
+ """Arrow-powered Parquet handler."""
27
+
28
+ alias: ClassVar[str] = "parquet"
29
+ suffix: ClassVar[str] = "parquet"
30
+ binary: ClassVar[bool] = True
31
+ output_only: ClassVar[bool] = False
32
+
33
+ @classmethod
34
+ def construct(
35
+ cls,
36
+ name: str,
37
+ value: Any | None = None,
38
+ fname: str | None = None,
39
+ created_at: datetime | None = None,
40
+ writer_kwargs: dict | None = None,
41
+ version: int = 0,
42
+ dirty: bool = True,
43
+ **kwargs,
44
+ ):
45
+ """Construct the handler class."""
46
+ created_at = created_at or utcnow()
47
+
48
+ return cls( # type: ignore[call-arg]
49
+ name=name,
50
+ value=value,
51
+ fname=fname
52
+ or f"{slugify(name)}-{slugify(created_at.strftime('%Y%m%d%H%M%S'))}.{cls.suffix}",
53
+ writer_kwargs=writer_kwargs or {},
54
+ version=version,
55
+ created_at=created_at,
56
+ dirty=dirty,
57
+ )
58
+
59
+ @classmethod
60
+ def read(cls, buf, **kwargs) -> pa.Table:
61
+ """Read in the parquet file.
62
+
63
+ Parameters
64
+ ----------
65
+ buf : file-like object
66
+ The buffer from a ``fsspec`` filesystem.
67
+ **kwargs
68
+ Keyword arguments for the read method.
69
+
70
+ Returns
71
+ -------
72
+ pyarrow.lib.Table
73
+ A ``pyarrow`` table with the data.
74
+ """
75
+ return pq.read_table(buf, **kwargs)
76
+
77
+ @classmethod
78
+ def write(cls, obj, buf, **kwargs):
79
+ """Write the parquet file using pyarrow.
80
+
81
+ Parameters
82
+ ----------
83
+ obj : object
84
+ The object to write.
85
+ buf : file-like object
86
+ The buffer from a ``fsspec`` filesystem.
87
+ **kwargs
88
+ Keyword arguments for :py:meth:`pyarrow.parquet.write_table`.
89
+
90
+ Raises
91
+ ------
92
+ ValueError
93
+ Raised if the supplied object does not have ``__arrow_c_array__``
94
+ or ``__arrow_c_stream__`` attribute
95
+ or if the object does not
96
+ implement the dataframe interchange protocol. These attributes allow us to
97
+ perform a zero-copy transformation from the native obejct to a PyArrow
98
+ Table.
99
+ """
100
+ if isinstance(obj, pa.Table):
101
+ LOG.debug("Provided object is already a PyArrow table.")
102
+ elif isinstance(obj, (ArrowArrayExportable, ArrowStreamExportable)):
103
+ obj = pa.table(obj)
104
+ elif isinstance(obj, SupportsInterchange):
105
+ obj = from_dataframe(obj)
106
+ else:
107
+ raise ValueError(
108
+ f"Object of type `{type(obj)}` cannot be easily coerced into a PyArrow Table. "
109
+ "Please provide an object that implements the Arrow PyCapsule Interface or the "
110
+ "Dataframe Interchange Protocol."
111
+ )
112
+
113
+ pq.write_table(obj, buf, **kwargs)
@@ -0,0 +1,38 @@
1
+ """Arrow exportable protocols."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Protocol, runtime_checkable
6
+
7
+
8
+ @runtime_checkable
9
+ class ArrowArrayExportable(Protocol):
10
+ """Type protocol for Arrow C Data Interface via Arrow PyCapsule Interface."""
11
+
12
+ def __arrow_c_array__(
13
+ self, requested_schema: object | None = None
14
+ ) -> tuple[object, object]:
15
+ """Export the object as a pair of ArrowSchema and ArrowArray structures."""
16
+ ...
17
+
18
+
19
+ @runtime_checkable
20
+ class ArrowStreamExportable(Protocol):
21
+ """Type protocol for Arrow C Stream Interface via Arrow PyCapsule Interface."""
22
+
23
+ def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
24
+ """Export the object as an ArrowArrayStream."""
25
+ ...
26
+
27
+
28
+ @runtime_checkable
29
+ class SupportsInterchange(Protocol):
30
+ """Dataframe that supports conversion into an interchange dataframe object."""
31
+
32
+ def __dataframe__(
33
+ self,
34
+ nan_as_null: bool = False,
35
+ allow_copy: bool = True,
36
+ ) -> SupportsInterchange:
37
+ """Convert to a dataframe object implementing the dataframe interchange protocol."""
38
+ ...
@@ -0,0 +1,85 @@
1
+ Metadata-Version: 2.3
2
+ Name: lazyscribe-arrow
3
+ Version: 0.2.1
4
+ Summary: Arrow-based artifact handlers for Lazyscribe
5
+ Author: Akshay Gupta
6
+ Author-email: Akshay Gupta <akgcodes@gmail.com>
7
+ License: MIT license
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Natural Language :: English
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Dist: attrs>=21.2,<=25.3
17
+ Requires-Dist: lazyscribe>=1,<=1.2
18
+ Requires-Dist: pyarrow>=14.0.1,<=21
19
+ Requires-Dist: python-slugify>=5,<=8.0.4
20
+ Requires-Dist: commitizen ; extra == 'build'
21
+ Requires-Dist: uv ; extra == 'build'
22
+ Requires-Dist: lazyscribe-arrow[build] ; extra == 'dev'
23
+ Requires-Dist: lazyscribe-arrow[qa] ; extra == 'dev'
24
+ Requires-Dist: lazyscribe-arrow[tests] ; extra == 'dev'
25
+ Requires-Dist: edgetest ; extra == 'qa'
26
+ Requires-Dist: mypy ; extra == 'qa'
27
+ Requires-Dist: pre-commit ; extra == 'qa'
28
+ Requires-Dist: pyproject-fmt ; extra == 'qa'
29
+ Requires-Dist: ruff ; extra == 'qa'
30
+ Requires-Dist: types-python-slugify ; extra == 'qa'
31
+ Requires-Dist: uv ; extra == 'qa'
32
+ Requires-Dist: pandas ; extra == 'tests'
33
+ Requires-Dist: pytest ; extra == 'tests'
34
+ Requires-Dist: pytest-cov ; extra == 'tests'
35
+ Requires-Dist: time-machine ; extra == 'tests'
36
+ Requires-Python: >=3.10.0
37
+ Project-URL: Documentation, https://github.com/lazyscribe/lazyscribe-arrow
38
+ Project-URL: Repository, https://github.com/lazyscribe/lazyscribe-arrow
39
+ Provides-Extra: build
40
+ Provides-Extra: dev
41
+ Provides-Extra: qa
42
+ Provides-Extra: tests
43
+ Description-Content-Type: text/markdown
44
+
45
+ [![License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) [![PyPI](https://img.shields.io/pypi/v/lazyscribe-arrow)](https://pypi.org/project/lazyscribe-arrow/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/lazyscribe-arrow)](https://pypi.org/project/lazyscrib-arrow/) [![codecov](https://codecov.io/gh/lazyscribe/lazyscribe-arrow/graph/badge.svg?token=W5TPK7GX7G)](https://codecov.io/gh/lazyscribe/lazyscribe-arrow)
46
+
47
+ # Arrow-based artifact handling for lazyscribe
48
+
49
+ `lazyscribe-arrow` is a lightweight package that adds the following artifact handlers for `lazyscribe`:
50
+
51
+ * `csv`
52
+
53
+ Any data structure that implements the [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html)
54
+ will be compatible with the handlers in this library. Popular compatible open source data structures include
55
+
56
+ * `pandas.DataFrame`
57
+ * `polars.DataFrame`
58
+ * `polars.LazyFrame`
59
+
60
+ # Installation
61
+
62
+ Python 3.10 and above is required. use `pip` to install:
63
+
64
+ ```console
65
+ $ python -m pip install lazyscribe-arrow
66
+ ```
67
+
68
+ # Usage
69
+
70
+ To use this library, simply log an artifact to a `lazyscribe` experiment or repository with
71
+
72
+ * `handler="csv"` for a CSV output
73
+
74
+
75
+ ```python
76
+ import pyarrow as pa
77
+ from lazyscribe import Project
78
+
79
+ project = Project("project.json", mode="w")
80
+ with project.log("My experiment") as exp:
81
+ data = pa.Table.from_arrays([[0, 1, 2]], names=["a"])
82
+ exp.log_artifact(name="data", value=data, handler="csv")
83
+
84
+ project.save()
85
+ ```
@@ -0,0 +1,9 @@
1
+ lazyscribe_arrow/__init__.py,sha256=YwnXVqIllCJKZakHtTtDsWh_raw0HGqG8lgsUfCH9FQ,199
2
+ lazyscribe_arrow/_meta.py,sha256=5ldinxF7m6V5dgpN2M5yw54r8bPkiGNJFgWiMlNj7Ag,38
3
+ lazyscribe_arrow/csv.py,sha256=55GIciGtui9sd2l0GdhSXpGaLTenMwxtUbJjboqMDi4,3278
4
+ lazyscribe_arrow/parquet.py,sha256=C_MofzAUqG5UlOG8NW4odlK_rTm0a4HjlhOZed94Row,3413
5
+ lazyscribe_arrow/protocols.py,sha256=VsG6t1em4qsTRwrDvph1aQdTKjFXCjjAcuvwFuay-8Y,1147
6
+ lazyscribe_arrow-0.2.1.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
7
+ lazyscribe_arrow-0.2.1.dist-info/entry_points.txt,sha256=OZeI9uVR1xkCYghXAwtsGStl5ItRE3UZ-BgabZufeyc,106
8
+ lazyscribe_arrow-0.2.1.dist-info/METADATA,sha256=6-F4cGUJ6-QqlJSRcZiQ13hIWW-IUHoCa8veRMUwTqs,3237
9
+ lazyscribe_arrow-0.2.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.8.24
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -1,2 +1,4 @@
1
1
  [lazyscribe.artifact_type]
2
2
  csv = lazyscribe_arrow:CSVArtifact
3
+ parquet = lazyscribe_arrow:ParquetArtifact
4
+
@@ -1,44 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: lazyscribe-arrow
3
- Version: 0.1.0
4
- Summary: Arrow-based artifact handlers for Lazyscribe
5
- Author-email: Akshay Gupta <akgcodes@gmail.com>
6
- License: MIT license
7
- Project-URL: Documentation, https://github.com/lazyscribe/lazyscribe-arrow
8
- Project-URL: Repository, https://github.com/lazyscribe/lazyscribe-arrow
9
- Classifier: Development Status :: 3 - Alpha
10
- Classifier: License :: OSI Approved :: MIT License
11
- Classifier: Natural Language :: English
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3 :: Only
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.13
18
- Requires-Python: >=3.10.0
19
- Description-Content-Type: text/markdown
20
- Requires-Dist: attrs<=25.1.0,>=21.2.0
21
- Requires-Dist: lazyscribe<=1.1.0,>=1.0.0
22
- Requires-Dist: pyarrow<=19.0.1,>=14.0.1
23
- Requires-Dist: python-slugify<=8.0.4,>=5.0.0
24
- Provides-Extra: build
25
- Requires-Dist: build; extra == "build"
26
- Requires-Dist: commitizen; extra == "build"
27
- Requires-Dist: twine; extra == "build"
28
- Requires-Dist: wheel; extra == "build"
29
- Provides-Extra: qa
30
- Requires-Dist: edgetest; extra == "qa"
31
- Requires-Dist: mypy; extra == "qa"
32
- Requires-Dist: pre-commit; extra == "qa"
33
- Requires-Dist: ruff; extra == "qa"
34
- Requires-Dist: types-python-slugify; extra == "qa"
35
- Requires-Dist: uv; extra == "qa"
36
- Provides-Extra: tests
37
- Requires-Dist: pandas; extra == "tests"
38
- Requires-Dist: pytest; extra == "tests"
39
- Requires-Dist: pytest-cov; extra == "tests"
40
- Requires-Dist: time-machine; extra == "tests"
41
- Provides-Extra: dev
42
- Requires-Dist: lazyscribe-arrow[build]; extra == "dev"
43
- Requires-Dist: lazyscribe-arrow[qa]; extra == "dev"
44
- Requires-Dist: lazyscribe-arrow[tests]; extra == "dev"
@@ -1,8 +0,0 @@
1
- lazyscribe_arrow/__init__.py,sha256=nZR8SxoxZjsGSVwdMFTI9l582H0M-77iec3NXzCeziU,127
2
- lazyscribe_arrow/_meta.py,sha256=FT4J4aKqWfsY7SJy-yg7R1ebCnARJzh_OZjkyUpF0NQ,38
3
- lazyscribe_arrow/csv.py,sha256=mFgYkntabK65KDguS5wuWe3vd7IG0dznKi01EEGf5QY,2972
4
- lazyscribe_arrow-0.1.0.dist-info/METADATA,sha256=7IcYyuWzrkgeRsxJVH-Erz0fYb-Kaxy7lQZbCf7T6rQ,1813
5
- lazyscribe_arrow-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
6
- lazyscribe_arrow-0.1.0.dist-info/entry_points.txt,sha256=4ymVC3yTTvMs2iOibuw9pt7goZ0zWWhgdjJ6NzMS_cw,62
7
- lazyscribe_arrow-0.1.0.dist-info/top_level.txt,sha256=C_ElBcqIKkSjUmMQPDECDhhP54M7muep1KLnEiFJ61I,17
8
- lazyscribe_arrow-0.1.0.dist-info/RECORD,,
@@ -1,5 +0,0 @@
1
- Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
3
- Root-Is-Purelib: true
4
- Tag: py3-none-any
5
-
@@ -1 +0,0 @@
1
- lazyscribe_arrow