datazip 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datazip/__init__.py +17 -0
- datazip/_optional.py +142 -0
- datazip/_test_classes.py +186 -0
- datazip/_utils.py +87 -0
- datazip/_version.py +24 -0
- datazip/core.py +876 -0
- datazip/mixin.py +117 -0
- datazip-0.2.0.dist-info/METADATA +100 -0
- datazip-0.2.0.dist-info/RECORD +11 -0
- datazip-0.2.0.dist-info/WHEEL +4 -0
- datazip-0.2.0.dist-info/licenses/LICENSE +21 -0
datazip/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Datazip."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
# Create a root logger for use anywhere within the package.
|
|
6
|
+
logger = logging.getLogger("datazip")
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from datazip._version import version as __version__
|
|
10
|
+
except ImportError:
|
|
11
|
+
logger.warning("Version unknown because package is not installed.")
|
|
12
|
+
__version__ = "unknown"
|
|
13
|
+
|
|
14
|
+
from datazip.core import DataZip # noqa: E402
|
|
15
|
+
from datazip.mixin import IOMixin # noqa: E402
|
|
16
|
+
|
|
17
|
+
__all__ = ["DataZip", "IOMixin", "__version__"]
|
datazip/_optional.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Dummy modules and classes when optional packages are not installed.
|
|
2
|
+
|
|
3
|
+
Could do something more like this:
|
|
4
|
+
https://github.com/pola-rs/polars/blob/master/py-polars/polars/dependencies.py
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import sqlalchemy
|
|
9
|
+
except (ModuleNotFoundError, ImportError):
|
|
10
|
+
|
|
11
|
+
class sqlalchemy: # noqa: N801
|
|
12
|
+
"""Dummy for :mod:`sqlalchemy` when not installed."""
|
|
13
|
+
|
|
14
|
+
class engine: # noqa: N801
|
|
15
|
+
"""Dummy for :mod:`sqlalchemy.engine` when not installed."""
|
|
16
|
+
|
|
17
|
+
class Engine:
|
|
18
|
+
"""Dummy for :mod:`sqlalchemy.engine.Engine` when not installed."""
|
|
19
|
+
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def create_engine(*args, **kwargs):
|
|
24
|
+
"""Dummy for :func:`sqlalchemy.create_engine` when not installed."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
import plotly
|
|
30
|
+
except (ModuleNotFoundError, ImportError):
|
|
31
|
+
|
|
32
|
+
class plotly: # noqa: N801
|
|
33
|
+
"""Dummy for :mod:`plotly` when not installed."""
|
|
34
|
+
|
|
35
|
+
class graph_objects: # noqa: N801
|
|
36
|
+
"""Dummy for :mod:`plotly.graph_objects` when not installed."""
|
|
37
|
+
|
|
38
|
+
class Figure:
|
|
39
|
+
"""Dummy for :mod:`plotly.graph_objects.Figure` when not installed."""
|
|
40
|
+
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
import numpy
|
|
46
|
+
|
|
47
|
+
except (ModuleNotFoundError, ImportError):
|
|
48
|
+
|
|
49
|
+
class numpy: # noqa: N801
|
|
50
|
+
"""Dummy for :mod:`numpy` when not installed."""
|
|
51
|
+
|
|
52
|
+
class ndarray: # noqa: N801
|
|
53
|
+
"""Dummy for :mod:`numpy.ndarray` when not installed."""
|
|
54
|
+
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
class int64: # noqa: N801
|
|
58
|
+
"""Dummy for :mod:`numpy.int64` when not installed."""
|
|
59
|
+
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
class float64: # noqa: N801
|
|
63
|
+
"""Dummy for :mod:`numpy.float64` when not installed."""
|
|
64
|
+
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
def load(*args, **kwargs):
|
|
68
|
+
"""Dummy for :func:`numpy.load` when not installed."""
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
def save(*args, **kwargs):
|
|
72
|
+
"""Dummy for :func:`numpy.save` when not installed."""
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
import pandas
|
|
78
|
+
|
|
79
|
+
if pandas.__version__ < "2.0.0":
|
|
80
|
+
raise ImportError("pandas < 2.0.0")
|
|
81
|
+
|
|
82
|
+
except (ModuleNotFoundError, ImportError):
|
|
83
|
+
|
|
84
|
+
class pandas: # noqa: N801
|
|
85
|
+
"""Dummy for when pandas is not installed."""
|
|
86
|
+
|
|
87
|
+
class DataFrame:
|
|
88
|
+
"""Dummy for when pandas is not installed."""
|
|
89
|
+
|
|
90
|
+
def to_parquet(*args, **kwargs):
|
|
91
|
+
"""Dummy for when pandas is not installed."""
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
class Series:
|
|
95
|
+
"""Dummy for when pandas is not installed."""
|
|
96
|
+
|
|
97
|
+
def to_frame(*args, **kwargs):
|
|
98
|
+
"""Dummy for when pandas is not installed."""
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
class Timestamp:
|
|
102
|
+
"""Dummy for when pandas is not installed."""
|
|
103
|
+
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
def read_parquet(*args, **kwargs):
|
|
107
|
+
"""Dummy for when pandas is not installed."""
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
import polars
|
|
113
|
+
|
|
114
|
+
except (ModuleNotFoundError, ImportError):
|
|
115
|
+
|
|
116
|
+
class polars: # noqa: N801
|
|
117
|
+
"""Dummy for when polars is not installed."""
|
|
118
|
+
|
|
119
|
+
class DataFrame:
|
|
120
|
+
"""Dummy for when polars is not installed."""
|
|
121
|
+
|
|
122
|
+
def write_parquet(*args, **kwargs):
|
|
123
|
+
"""Dummy for when polars is not installed."""
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
class LazyFrame:
|
|
127
|
+
"""Dummy for when polars is not installed."""
|
|
128
|
+
|
|
129
|
+
def collect(*args, **kwargs):
|
|
130
|
+
"""Dummy for when polars is not installed."""
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
class Series:
|
|
134
|
+
"""Dummy for when polars is not installed."""
|
|
135
|
+
|
|
136
|
+
def to_frame(*args, **kwargs):
|
|
137
|
+
"""Dummy for when polars is not installed."""
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
def read_parquet(*args, **kwargs):
|
|
141
|
+
"""Dummy for when polars is not installed."""
|
|
142
|
+
pass
|
datazip/_test_classes.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Mocks and other objects for testing [`DataZip`][datazip.core.DataZip]."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import NamedTuple
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ObjMeta(NamedTuple):
|
|
11
|
+
"""NamedTuple for testing."""
|
|
12
|
+
|
|
13
|
+
module: str
|
|
14
|
+
qualname: str
|
|
15
|
+
constructor: str | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _eq_func(self, other):
|
|
19
|
+
if not isinstance(other, self.__class__):
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
def _comp(v0, v1):
|
|
23
|
+
if isinstance(v0, dict):
|
|
24
|
+
for v01, v11 in zip(v0.values(), v1.values()): # noqa: B905
|
|
25
|
+
return _comp(v01, v11)
|
|
26
|
+
if isinstance(v0, list | tuple):
|
|
27
|
+
for v01, v11 in zip(v0, v1): # noqa: B905
|
|
28
|
+
return _comp(v01, v11)
|
|
29
|
+
if isinstance(v0, pd.DataFrame | pd.Series):
|
|
30
|
+
return v0.compare(v1).empty
|
|
31
|
+
return bool(v0 == v1)
|
|
32
|
+
|
|
33
|
+
r = []
|
|
34
|
+
|
|
35
|
+
if hasattr(self, "__dict__"):
|
|
36
|
+
for v0, v1 in zip( # noqa: B905
|
|
37
|
+
self.__dict__.values(), other.__dict__.values()
|
|
38
|
+
):
|
|
39
|
+
r.append(_comp(v0, v1))
|
|
40
|
+
|
|
41
|
+
if hasattr(self, "__slots__"):
|
|
42
|
+
for k in self.__slots__:
|
|
43
|
+
if hasattr(self, k) and hasattr(other, k):
|
|
44
|
+
v0, v1 = getattr(self, k), getattr(other, k)
|
|
45
|
+
r.append(_comp(v0, v1))
|
|
46
|
+
if hasattr(self, k) and not hasattr(other, k):
|
|
47
|
+
r.append(False)
|
|
48
|
+
|
|
49
|
+
return all(r)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class _TestKlassSlotsCore:
|
|
53
|
+
"""Test class with slots w/o get/set."""
|
|
54
|
+
|
|
55
|
+
__slots__ = ("_dfs", "foo", "lis", "tup")
|
|
56
|
+
|
|
57
|
+
def __init__(self, **kwargs):
|
|
58
|
+
"""Init."""
|
|
59
|
+
for k, v in kwargs.items():
|
|
60
|
+
setattr(self, k, v)
|
|
61
|
+
|
|
62
|
+
def __repr__(self):
|
|
63
|
+
attrs = ", ".join(
|
|
64
|
+
f"{k}={getattr(self, k)}" for k in self.__slots__ if hasattr(self, k)
|
|
65
|
+
)
|
|
66
|
+
return self.__class__.__qualname__ + f"({attrs})"
|
|
67
|
+
|
|
68
|
+
def __eq__(self, other):
|
|
69
|
+
return _eq_func(self, other)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class _KlassSlots(_TestKlassSlotsCore):
|
|
73
|
+
"""Generic class with slots and get/set."""
|
|
74
|
+
|
|
75
|
+
__slots__ = ("_dfs", "exclude", "foo", "lis", "tup")
|
|
76
|
+
|
|
77
|
+
def __init__(self, **kwargs):
|
|
78
|
+
"""Init."""
|
|
79
|
+
super().__init__(**kwargs)
|
|
80
|
+
if "exclude" not in kwargs:
|
|
81
|
+
self.exclude = ()
|
|
82
|
+
|
|
83
|
+
def __setstate__(self, state):
|
|
84
|
+
_, state = state
|
|
85
|
+
for k, v in state.items():
|
|
86
|
+
if k in self.__slots__:
|
|
87
|
+
setattr(self, k, v)
|
|
88
|
+
|
|
89
|
+
def __getstate__(self):
|
|
90
|
+
return None, {k: getattr(self, k) for k in self.__slots__}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class _KlassSlotsDzstate(_KlassSlots):
|
|
94
|
+
"""Generic class with slots and get/set."""
|
|
95
|
+
|
|
96
|
+
__slots__ = ("_dfs", "exclude", "foo", "lis", "tup")
|
|
97
|
+
|
|
98
|
+
def _dzsetstate_(self, state):
|
|
99
|
+
_, state = state
|
|
100
|
+
for k in state["exclude"]:
|
|
101
|
+
state[k] = 5
|
|
102
|
+
for k, v in state.items():
|
|
103
|
+
if k in self.__slots__:
|
|
104
|
+
setattr(self, k, v)
|
|
105
|
+
|
|
106
|
+
def _dzgetstate_(self):
|
|
107
|
+
return None, {
|
|
108
|
+
k: getattr(self, k) for k in self.__slots__ if k not in self.exclude
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class _TestKlassSlotsDict:
|
|
113
|
+
"""Test class with slots and __dict__ w/o get/set."""
|
|
114
|
+
|
|
115
|
+
__slots__ = (
|
|
116
|
+
"__dict__",
|
|
117
|
+
"foo",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def __init__(self, **kwargs):
|
|
121
|
+
"""Init."""
|
|
122
|
+
for k, v in kwargs.items():
|
|
123
|
+
setattr(self, k, v)
|
|
124
|
+
|
|
125
|
+
def add_to_dict(self, k, v):
|
|
126
|
+
setattr(self, k, v)
|
|
127
|
+
return self
|
|
128
|
+
|
|
129
|
+
def __eq__(self, other):
|
|
130
|
+
return _eq_func(self, other)
|
|
131
|
+
|
|
132
|
+
def __repr__(self):
|
|
133
|
+
attrs = ", ".join(
|
|
134
|
+
f"{k}={getattr(self, k)}" for k in self.__slots__ if hasattr(self, k)
|
|
135
|
+
)
|
|
136
|
+
return self.__class__.__qualname__ + f"({attrs})"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class _TestKlassCore:
|
|
140
|
+
"""Generic class w/o slots w/o get/set."""
|
|
141
|
+
|
|
142
|
+
def __init__(self, **kwargs):
|
|
143
|
+
"""Init."""
|
|
144
|
+
for k, v in kwargs.items():
|
|
145
|
+
setattr(self, k, v)
|
|
146
|
+
|
|
147
|
+
def __repr__(self):
|
|
148
|
+
return (
|
|
149
|
+
self.__class__.__qualname__
|
|
150
|
+
+ f"({', '.join(f'{k}={v}' for k, v in self.__dict__.items())})"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def __eq__(self, other):
|
|
154
|
+
return _eq_func(self, other)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class _TestKlass(_TestKlassCore):
|
|
158
|
+
"""Generic class w/o slots with get/set."""
|
|
159
|
+
|
|
160
|
+
def __init__(self, **kwargs):
|
|
161
|
+
"""Init."""
|
|
162
|
+
super().__init__(**kwargs)
|
|
163
|
+
|
|
164
|
+
def __setstate__(self, state):
|
|
165
|
+
self.__dict__ = state
|
|
166
|
+
|
|
167
|
+
def __getstate__(self):
|
|
168
|
+
return self.__dict__
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class _TestKlassDzstate(_TestKlass):
|
|
172
|
+
"""Generic class w/o slots with get/set."""
|
|
173
|
+
|
|
174
|
+
def __init__(self, **kwargs):
|
|
175
|
+
"""Init."""
|
|
176
|
+
super().__init__(**kwargs)
|
|
177
|
+
if "exclude" not in kwargs:
|
|
178
|
+
self.exclude = ()
|
|
179
|
+
|
|
180
|
+
def _dzsetstate_(self, state):
|
|
181
|
+
for k in state["exclude"]:
|
|
182
|
+
state[k] = 5
|
|
183
|
+
self.__dict__ = state
|
|
184
|
+
|
|
185
|
+
def _dzgetstate_(self):
|
|
186
|
+
return {k: v for k, v in self.__dict__.items() if k not in self.exclude}
|
datazip/_utils.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import getpass
|
|
4
|
+
import logging
|
|
5
|
+
from contextlib import suppress
|
|
6
|
+
from importlib import import_module
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
LOGGER = logging.getLogger("datazip")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _quote_strip(string: str) -> str:
|
|
13
|
+
return string.replace("'", "").replace('"', "")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _get_version(obj: Any) -> str:
|
|
17
|
+
mod = import_module(obj.__class__.__module__.partition(".")[0])
|
|
18
|
+
for v_attr in ("__version__", "version", "release"):
|
|
19
|
+
if hasattr(mod, v_attr):
|
|
20
|
+
return getattr(mod, v_attr)
|
|
21
|
+
return "unknown"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _get_username():
|
|
25
|
+
try:
|
|
26
|
+
return getpass.getuser()
|
|
27
|
+
except (ModuleNotFoundError, OSError) as exc0:
|
|
28
|
+
import os
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
return os.getlogin()
|
|
32
|
+
except Exception as exc1:
|
|
33
|
+
LOGGER.error("No username %r from %r", exc1, exc0)
|
|
34
|
+
return "unknown"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _objinfo(obj: Any) -> str:
|
|
38
|
+
return obj.__class__.__module__ + "|" + obj.__class__.__qualname__
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _get_klass(mod_klass: str | list | tuple):
|
|
42
|
+
|
|
43
|
+
if isinstance(mod_klass, str):
|
|
44
|
+
mod_klass = mod_klass.split("|")
|
|
45
|
+
try:
|
|
46
|
+
mod, qname, *_ = mod_klass
|
|
47
|
+
klass: type = getattr(import_module(mod), qname)
|
|
48
|
+
except (AttributeError, ModuleNotFoundError) as exc:
|
|
49
|
+
raise ImportError(f"Unable to import {qname} from {mod}.") from exc
|
|
50
|
+
else:
|
|
51
|
+
return klass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def default_setstate(obj, state):
|
|
55
|
+
"""Called if no `__setstate__` implementation."""
|
|
56
|
+
if state is None:
|
|
57
|
+
pass
|
|
58
|
+
elif isinstance(state, dict):
|
|
59
|
+
obj.__dict__ = state
|
|
60
|
+
elif isinstance(state, tuple):
|
|
61
|
+
d_state, s_state = state
|
|
62
|
+
if d_state is not None:
|
|
63
|
+
obj.__dict__ = d_state
|
|
64
|
+
for k, v in s_state.items():
|
|
65
|
+
setattr(obj, k, v)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def default_getstate(obj):
|
|
69
|
+
"""Called if no `__getstate__` implementation."""
|
|
70
|
+
|
|
71
|
+
def slots_dict(_slots):
|
|
72
|
+
sout = {}
|
|
73
|
+
for k in _slots:
|
|
74
|
+
if k != "__dict__":
|
|
75
|
+
with suppress(AttributeError):
|
|
76
|
+
sout.update({k: getattr(obj, k)})
|
|
77
|
+
return sout
|
|
78
|
+
|
|
79
|
+
match obj:
|
|
80
|
+
case object(__dict__=d_state, __slots__=slots):
|
|
81
|
+
return d_state.copy(), slots_dict(slots)
|
|
82
|
+
case object(__dict__=d_state):
|
|
83
|
+
return d_state.copy()
|
|
84
|
+
case object(__slots__=slots):
|
|
85
|
+
return None, slots_dict(slots)
|
|
86
|
+
case _:
|
|
87
|
+
return None
|
datazip/_version.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '0.2.0'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 2, 0)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|