lazyscribe-arrow 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lazyscribe-arrow might be problematic. Click here for more details.
- lazyscribe_arrow/__init__.py +5 -0
- lazyscribe_arrow/_meta.py +3 -0
- lazyscribe_arrow/csv.py +101 -0
- lazyscribe_arrow-0.1.0.dist-info/METADATA +44 -0
- lazyscribe_arrow-0.1.0.dist-info/RECORD +8 -0
- lazyscribe_arrow-0.1.0.dist-info/WHEEL +5 -0
- lazyscribe_arrow-0.1.0.dist-info/entry_points.txt +2 -0
- lazyscribe_arrow-0.1.0.dist-info/top_level.txt +1 -0
lazyscribe_arrow/csv.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Custom artifact handlers for CSVs."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, ClassVar
|
|
6
|
+
|
|
7
|
+
import pyarrow as pa
|
|
8
|
+
from attrs import define
|
|
9
|
+
from lazyscribe._utils import utcnow
|
|
10
|
+
from lazyscribe.artifacts.base import Artifact
|
|
11
|
+
from pyarrow import csv
|
|
12
|
+
from slugify import slugify
|
|
13
|
+
|
|
14
|
+
LOG = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@define(auto_attribs=True)
|
|
18
|
+
class CSVArtifact(Artifact):
|
|
19
|
+
"""Arrow-powered CSV handler."""
|
|
20
|
+
|
|
21
|
+
alias: ClassVar[str] = "csv"
|
|
22
|
+
suffix: ClassVar[str] = "csv"
|
|
23
|
+
binary: ClassVar[bool] = True
|
|
24
|
+
output_only: ClassVar[bool] = False
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def construct(
|
|
28
|
+
cls,
|
|
29
|
+
name: str,
|
|
30
|
+
value: Any | None = None,
|
|
31
|
+
fname: str | None = None,
|
|
32
|
+
created_at: datetime | None = None,
|
|
33
|
+
writer_kwargs: dict | None = None,
|
|
34
|
+
version: int = 0,
|
|
35
|
+
dirty: bool = True,
|
|
36
|
+
**kwargs,
|
|
37
|
+
):
|
|
38
|
+
"""Construct the handler class."""
|
|
39
|
+
created_at = created_at or utcnow()
|
|
40
|
+
|
|
41
|
+
return cls( # type: ignore[call-arg]
|
|
42
|
+
name=name,
|
|
43
|
+
value=value,
|
|
44
|
+
fname=fname
|
|
45
|
+
or f"{slugify(name)}-{slugify(created_at.strftime('%Y%m%d%H%M%S'))}.{cls.suffix}",
|
|
46
|
+
writer_kwargs=writer_kwargs or {},
|
|
47
|
+
version=version,
|
|
48
|
+
created_at=created_at,
|
|
49
|
+
dirty=dirty,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def read(cls, buf, **kwargs) -> pa.Table:
|
|
54
|
+
"""Read in the CSV file.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
buf : file-like object
|
|
59
|
+
The buffer from a ``fsspec`` filesystem.
|
|
60
|
+
**kwargs
|
|
61
|
+
Keyword arguments for the read method.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
pyarrow.lib.Table
|
|
66
|
+
A ``pyarrow`` table with the data.
|
|
67
|
+
"""
|
|
68
|
+
return csv.read_csv(buf, **kwargs)
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def write(cls, obj, buf, **kwargs):
|
|
72
|
+
"""Write the CSV file using pyarrow.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
obj : object
|
|
77
|
+
The object to write.
|
|
78
|
+
buf : file-like object
|
|
79
|
+
The buffer from a ``fsspec`` filesystem.
|
|
80
|
+
**kwargs
|
|
81
|
+
Keyword arguments for :py:meth:`pyarrow.csv.write_csv`.
|
|
82
|
+
|
|
83
|
+
Raises
|
|
84
|
+
------
|
|
85
|
+
ValueError
|
|
86
|
+
Raised if the supplied object does not have ``__arrow_c_array__``
|
|
87
|
+
or ``__arrow_c_stream__`` attributes. These attributes allow us to
|
|
88
|
+
perform a zero-copy transformation from the native obejct to a PyArrow
|
|
89
|
+
Table.
|
|
90
|
+
"""
|
|
91
|
+
if isinstance(obj, pa.Table):
|
|
92
|
+
LOG.debug("Provided object is already a PyArrow table.")
|
|
93
|
+
elif hasattr(obj, "__arrow_c_array__") or hasattr(obj, "__arrow_c_stream__"):
|
|
94
|
+
obj = pa.table(obj)
|
|
95
|
+
else:
|
|
96
|
+
raise ValueError(
|
|
97
|
+
f"Object of type `{type(obj)}` cannot be easily coerced into a PyArrow Table. "
|
|
98
|
+
"Please provide an object that implements the Arrow PyCapsule Interface."
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
csv.write_csv(obj, buf, **kwargs)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lazyscribe-arrow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Arrow-based artifact handlers for Lazyscribe
|
|
5
|
+
Author-email: Akshay Gupta <akgcodes@gmail.com>
|
|
6
|
+
License: MIT license
|
|
7
|
+
Project-URL: Documentation, https://github.com/lazyscribe/lazyscribe-arrow
|
|
8
|
+
Project-URL: Repository, https://github.com/lazyscribe/lazyscribe-arrow
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Natural Language :: English
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Requires-Python: >=3.10.0
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: attrs<=25.1.0,>=21.2.0
|
|
21
|
+
Requires-Dist: lazyscribe<=1.1.0,>=1.0.0
|
|
22
|
+
Requires-Dist: pyarrow<=19.0.1,>=14.0.1
|
|
23
|
+
Requires-Dist: python-slugify<=8.0.4,>=5.0.0
|
|
24
|
+
Provides-Extra: build
|
|
25
|
+
Requires-Dist: build; extra == "build"
|
|
26
|
+
Requires-Dist: commitizen; extra == "build"
|
|
27
|
+
Requires-Dist: twine; extra == "build"
|
|
28
|
+
Requires-Dist: wheel; extra == "build"
|
|
29
|
+
Provides-Extra: qa
|
|
30
|
+
Requires-Dist: edgetest; extra == "qa"
|
|
31
|
+
Requires-Dist: mypy; extra == "qa"
|
|
32
|
+
Requires-Dist: pre-commit; extra == "qa"
|
|
33
|
+
Requires-Dist: ruff; extra == "qa"
|
|
34
|
+
Requires-Dist: types-python-slugify; extra == "qa"
|
|
35
|
+
Requires-Dist: uv; extra == "qa"
|
|
36
|
+
Provides-Extra: tests
|
|
37
|
+
Requires-Dist: pandas; extra == "tests"
|
|
38
|
+
Requires-Dist: pytest; extra == "tests"
|
|
39
|
+
Requires-Dist: pytest-cov; extra == "tests"
|
|
40
|
+
Requires-Dist: time-machine; extra == "tests"
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: lazyscribe-arrow[build]; extra == "dev"
|
|
43
|
+
Requires-Dist: lazyscribe-arrow[qa]; extra == "dev"
|
|
44
|
+
Requires-Dist: lazyscribe-arrow[tests]; extra == "dev"
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
lazyscribe_arrow/__init__.py,sha256=nZR8SxoxZjsGSVwdMFTI9l582H0M-77iec3NXzCeziU,127
|
|
2
|
+
lazyscribe_arrow/_meta.py,sha256=FT4J4aKqWfsY7SJy-yg7R1ebCnARJzh_OZjkyUpF0NQ,38
|
|
3
|
+
lazyscribe_arrow/csv.py,sha256=mFgYkntabK65KDguS5wuWe3vd7IG0dznKi01EEGf5QY,2972
|
|
4
|
+
lazyscribe_arrow-0.1.0.dist-info/METADATA,sha256=7IcYyuWzrkgeRsxJVH-Erz0fYb-Kaxy7lQZbCf7T6rQ,1813
|
|
5
|
+
lazyscribe_arrow-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
6
|
+
lazyscribe_arrow-0.1.0.dist-info/entry_points.txt,sha256=4ymVC3yTTvMs2iOibuw9pt7goZ0zWWhgdjJ6NzMS_cw,62
|
|
7
|
+
lazyscribe_arrow-0.1.0.dist-info/top_level.txt,sha256=C_ElBcqIKkSjUmMQPDECDhhP54M7muep1KLnEiFJ61I,17
|
|
8
|
+
lazyscribe_arrow-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
lazyscribe_arrow
|