hopeit.dataframes 0.25.0b7__tar.gz → 0.25.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/PKG-INFO +6 -8
- hopeit_dataframes-0.25.1/pyproject.toml +51 -0
- hopeit_dataframes-0.25.1/setup.py +25 -0
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/dataframe.py +76 -25
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/serialization/dataset.py +17 -5
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/serialization/files.py +4 -5
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/serialization/settings.py +1 -2
- hopeit_dataframes-0.25.1/src/hopeit/dataframes/setup/__init__.py +0 -0
- hopeit_dataframes-0.25.1/src/hopeit/dataframes/setup/py.typed +0 -0
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit.dataframes.egg-info/PKG-INFO +6 -8
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit.dataframes.egg-info/SOURCES.txt +4 -1
- hopeit_dataframes-0.25.1/src/hopeit.dataframes.egg-info/requires.txt +6 -0
- hopeit_dataframes-0.25.0b7/setup.py +0 -59
- hopeit_dataframes-0.25.0b7/src/hopeit.dataframes.egg-info/requires.txt +0 -6
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/README.md +0 -0
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/setup.cfg +0 -0
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/__init__.py +0 -0
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/py.typed +0 -0
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/serialization/__init__.py +0 -0
- /hopeit_dataframes-0.25.0b7/src/hopeit/dataframes/setup/__init__.py → /hopeit_dataframes-0.25.1/src/hopeit/dataframes/serialization/py.typed +0 -0
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/setup/dataframes.py +0 -0
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit.dataframes.egg-info/dependency_links.txt +0 -0
- {hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit.dataframes.egg-info/top_level.txt +0 -0
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hopeit.dataframes
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.1
|
|
4
4
|
Summary: Hopeit Engine Dataframes Toolkit
|
|
5
|
-
|
|
6
|
-
Author: Leo Smerling and Pablo Canto
|
|
7
|
-
Author-email: contact@hopeit.com.ar
|
|
5
|
+
Author-email: Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
|
|
8
6
|
License: Apache 2
|
|
7
|
+
Project-URL: Homepage, https://github.com/hopeit-git/hopeit.engine
|
|
9
8
|
Project-URL: CI: GitHub Actions, https://github.com/hopeit-git/hopeit.engine/actions?query=workflow
|
|
10
9
|
Project-URL: Docs: RTD, https://hopeitengine.readthedocs.io/en/latest/
|
|
11
10
|
Project-URL: GitHub: issues, https://github.com/hopeit-git/hopeit.engine/issues
|
|
@@ -13,7 +12,6 @@ Project-URL: GitHub: repo, https://github.com/hopeit-git/hopeit.engine
|
|
|
13
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
13
|
Classifier: Intended Audience :: Developers
|
|
15
14
|
Classifier: Programming Language :: Python
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
17
15
|
Classifier: Programming Language :: Python :: 3.9
|
|
18
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
@@ -24,9 +22,9 @@ Classifier: Operating System :: Microsoft :: Windows
|
|
|
24
22
|
Classifier: Topic :: Internet :: WWW/HTTP
|
|
25
23
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
24
|
Classifier: Framework :: AsyncIO
|
|
27
|
-
Requires-Python: >=3.
|
|
28
|
-
Description-Content-Type: text/
|
|
29
|
-
Requires-Dist: hopeit.engine[fs-storage]==0.25.
|
|
25
|
+
Requires-Python: >=3.9
|
|
26
|
+
Description-Content-Type: text/plain
|
|
27
|
+
Requires-Dist: hopeit.engine[fs-storage]==0.25.1
|
|
30
28
|
Requires-Dist: pandas
|
|
31
29
|
Requires-Dist: numpy
|
|
32
30
|
Provides-Extra: pyarrow
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools >= 64", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "hopeit.dataframes"
|
|
7
|
+
description = "Hopeit Engine Dataframes Toolkit"
|
|
8
|
+
dynamic = ["version", "readme", "dependencies", "optional-dependencies"]
|
|
9
|
+
license = { text = "Apache 2" }
|
|
10
|
+
authors = [
|
|
11
|
+
{ name = "Leo Smerling", email = "contact@hopeit.com.ar" },
|
|
12
|
+
{ name = "Pablo Canto", email = "contact@hopeit.com.ar" },
|
|
13
|
+
]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"License :: OSI Approved :: Apache Software License",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Development Status :: 4 - Beta",
|
|
22
|
+
"Operating System :: POSIX :: Linux",
|
|
23
|
+
"Operating System :: MacOS :: MacOS X",
|
|
24
|
+
"Operating System :: Microsoft :: Windows",
|
|
25
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
26
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
27
|
+
"Framework :: AsyncIO",
|
|
28
|
+
]
|
|
29
|
+
requires-python = ">=3.9"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
"Homepage" = "https://github.com/hopeit-git/hopeit.engine"
|
|
34
|
+
"CI: GitHub Actions" = "https://github.com/hopeit-git/hopeit.engine/actions?query=workflow"
|
|
35
|
+
"Docs: RTD" = "https://hopeitengine.readthedocs.io/en/latest/"
|
|
36
|
+
"GitHub: issues" = "https://github.com/hopeit-git/hopeit.engine/issues"
|
|
37
|
+
"GitHub: repo" = "https://github.com/hopeit-git/hopeit.engine"
|
|
38
|
+
|
|
39
|
+
[tool.setuptools]
|
|
40
|
+
include-package-data = true
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["src"]
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.package-data]
|
|
46
|
+
"hopeit.dataframes" = ["py.typed"]
|
|
47
|
+
"hopeit.dataframes.serialization" = ["py.typed"]
|
|
48
|
+
"hopeit.dataframes.setup" = ["py.typed"]
|
|
49
|
+
|
|
50
|
+
[tool.setuptools.dynamic]
|
|
51
|
+
readme = { file = ["README.md"], content-type = "text/plain" }
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from setuptools import setup
|
|
2
|
+
from os import environ
|
|
3
|
+
|
|
4
|
+
version = {}
|
|
5
|
+
try:
|
|
6
|
+
with open("../../../engine/src/hopeit/server/version.py") as fp:
|
|
7
|
+
exec(fp.read(), version)
|
|
8
|
+
ENGINE_VERSION = version["ENGINE_VERSION"]
|
|
9
|
+
except FileNotFoundError:
|
|
10
|
+
ENGINE_VERSION = environ.get("ENGINE_VERSION")
|
|
11
|
+
|
|
12
|
+
if not ENGINE_VERSION:
|
|
13
|
+
raise RuntimeError("ENGINE_VERSION is not specified.")
|
|
14
|
+
|
|
15
|
+
setup(
|
|
16
|
+
version=ENGINE_VERSION,
|
|
17
|
+
install_requires=[
|
|
18
|
+
f"hopeit.engine[fs-storage]=={ENGINE_VERSION}",
|
|
19
|
+
"pandas",
|
|
20
|
+
"numpy",
|
|
21
|
+
],
|
|
22
|
+
extras_require={
|
|
23
|
+
"pyarrow": ["pyarrow"],
|
|
24
|
+
},
|
|
25
|
+
)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
"""
|
|
2
2
|
DataFrames type abstractions.
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
import dataclasses
|
|
5
6
|
from datetime import date, datetime, timezone
|
|
6
|
-
from
|
|
7
|
-
from typing import Any, Callable, Dict, Generic, Iterator, List, Type, TypeVar
|
|
7
|
+
from typing import Any, Callable, Dict, Generic, Iterator, List, Type, TypeVar, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
@@ -19,30 +19,67 @@ from hopeit.dataobjects import (
|
|
|
19
19
|
fields,
|
|
20
20
|
)
|
|
21
21
|
from hopeit.dataobjects.payload import Payload
|
|
22
|
+
from pydantic_core import PydanticUndefined
|
|
22
23
|
|
|
23
24
|
DataFrameT = TypeVar("DataFrameT")
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
@dataclasses.dataclass
|
|
27
|
-
class DataFrameMetadata
|
|
28
|
+
class DataFrameMetadata:
|
|
28
29
|
columns: List[str]
|
|
29
30
|
fields: Dict[str, FieldInfo]
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
# Functions to do type coercion
|
|
33
|
-
def _series_to_int(x: pd.Series) -> pd.Series:
|
|
34
|
+
def _series_to_int(field_name: str, x: pd.Series) -> pd.Series:
|
|
35
|
+
if x.isnull().values.any(): # type: ignore[union-attr]
|
|
36
|
+
raise ValueError(f"Field `{field_name}` is not nullable")
|
|
34
37
|
return x.astype(np.int64)
|
|
35
38
|
|
|
36
39
|
|
|
37
|
-
def _series_to_float(x: pd.Series) -> pd.Series:
|
|
40
|
+
def _series_to_float(field_name: str, x: pd.Series) -> pd.Series:
|
|
41
|
+
if x.isnull().values.any(): # type: ignore[union-attr]
|
|
42
|
+
raise ValueError(f"Field `{field_name}` is not nullable")
|
|
38
43
|
return x.astype(np.float64)
|
|
39
44
|
|
|
40
45
|
|
|
41
|
-
def _series_to_str(x: pd.Series) -> pd.Series:
|
|
46
|
+
def _series_to_str(field_name: str, x: pd.Series) -> pd.Series:
|
|
47
|
+
if x.isnull().values.any(): # type: ignore[union-attr]
|
|
48
|
+
raise ValueError(f"Field `{field_name}` is not nullable")
|
|
42
49
|
return x.astype(str)
|
|
43
50
|
|
|
44
51
|
|
|
45
|
-
|
|
52
|
+
# Functions to do type coercion
|
|
53
|
+
def _series_to_int_nullable(_field_name: str, x: pd.Series) -> pd.Series:
|
|
54
|
+
return x[x.notna()].astype(np.int64)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _series_to_float_nullable(_field_name: str, x: pd.Series) -> pd.Series:
|
|
58
|
+
return x[x.notna()].astype(np.float64)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _series_to_str_nullable(_field_name: str, x: pd.Series) -> pd.Series:
|
|
62
|
+
return x[x.notna()].astype(str)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _series_to_datetime(field_name: str, x: pd.Series) -> pd.Series:
|
|
66
|
+
if x.isnull().values.any(): # type: ignore[union-attr]
|
|
67
|
+
raise ValueError(f"Field `{field_name}` is not nullable")
|
|
68
|
+
return pd.to_datetime(x)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _series_to_utc_datetime(field_name: str, x: pd.Series) -> pd.Series:
|
|
72
|
+
if x.isnull().values.any(): # type: ignore[union-attr]
|
|
73
|
+
raise ValueError(f"Field `{field_name}` is not nullable")
|
|
74
|
+
return pd.to_datetime(x, utc=True)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _series_to_datetime_nullable(_field_name: str, x: pd.Series) -> pd.Series:
|
|
78
|
+
return pd.to_datetime(x)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _series_to_utc_datetime_nullable(_field_name: str, x: pd.Series) -> pd.Series:
|
|
82
|
+
return pd.to_datetime(x, utc=True)
|
|
46
83
|
|
|
47
84
|
|
|
48
85
|
class DataFrameMixin(Generic[DataFrameT, DataObject]):
|
|
@@ -56,11 +93,16 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
|
|
|
56
93
|
int: _series_to_int,
|
|
57
94
|
float: _series_to_float,
|
|
58
95
|
str: _series_to_str,
|
|
59
|
-
date:
|
|
96
|
+
date: _series_to_datetime,
|
|
60
97
|
datetime: _series_to_utc_datetime,
|
|
98
|
+
Union[int, None]: _series_to_int_nullable,
|
|
99
|
+
Union[float, None]: _series_to_float_nullable,
|
|
100
|
+
Union[str, None]: _series_to_str_nullable,
|
|
101
|
+
Union[date, None]: _series_to_datetime_nullable,
|
|
102
|
+
Union[datetime, None]: _series_to_utc_datetime_nullable,
|
|
61
103
|
}
|
|
62
104
|
|
|
63
|
-
def __init__(self) -> None:
|
|
105
|
+
def __init__(self, **series: pd.Series) -> None:
|
|
64
106
|
# Fields added here only to allow mypy to provide correct type hints
|
|
65
107
|
self.__data_object__: Dict[str, Any] = {}
|
|
66
108
|
self.__dataframe__: DataFrameMetadata = None # type: ignore
|
|
@@ -68,9 +110,7 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
|
|
|
68
110
|
raise NotImplementedError # must use @dataframe decorator # pragma: no cover
|
|
69
111
|
|
|
70
112
|
@staticmethod
|
|
71
|
-
def __init_from_series__(
|
|
72
|
-
self, **series: pd.Series
|
|
73
|
-
): # pylint: disable=bad-staticmethod-argument
|
|
113
|
+
def __init_from_series__(self, **series: pd.Series): # pylint: disable=bad-staticmethod-argument
|
|
74
114
|
df = pd.DataFrame(series)
|
|
75
115
|
df.index.name = None # Removes index name to avoid colisions with series name
|
|
76
116
|
if self.__data_object__["validate"]:
|
|
@@ -106,10 +146,7 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
|
|
|
106
146
|
return self._from_df(self.__df[key])
|
|
107
147
|
|
|
108
148
|
def _to_dataobjects(self) -> List[DataObject]:
|
|
109
|
-
return [
|
|
110
|
-
self.DataObject(**fields)
|
|
111
|
-
for fields in self.__df.to_dict(orient="records")
|
|
112
|
-
]
|
|
149
|
+
return [self.DataObject(**fields) for fields in self.__df.to_dict(orient="records")]
|
|
113
150
|
|
|
114
151
|
def event_id(self, *args, **kwargs) -> str:
|
|
115
152
|
return ""
|
|
@@ -132,9 +169,28 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
|
|
|
132
169
|
else:
|
|
133
170
|
object.__setattr__(self, name, value)
|
|
134
171
|
|
|
135
|
-
def
|
|
172
|
+
def _get_series(
|
|
173
|
+
self,
|
|
174
|
+
df: pd.DataFrame,
|
|
175
|
+
field_name: str,
|
|
176
|
+
field_info: FieldInfo,
|
|
177
|
+
) -> pd.Series:
|
|
178
|
+
try:
|
|
179
|
+
return df[field_name]
|
|
180
|
+
except KeyError:
|
|
181
|
+
default_value = field_info.get_default()
|
|
182
|
+
if default_value is not PydanticUndefined:
|
|
183
|
+
return pd.Series([default_value] * len(df))
|
|
184
|
+
raise
|
|
185
|
+
|
|
186
|
+
def _coerce_datatypes(
|
|
187
|
+
self,
|
|
188
|
+
df: pd.DataFrame,
|
|
189
|
+
) -> Dict[str, pd.Series]:
|
|
136
190
|
return {
|
|
137
|
-
name: self.DATATYPE_MAPPING[field.annotation](
|
|
191
|
+
name: self.DATATYPE_MAPPING[field.annotation]( # type: ignore[index, operator]
|
|
192
|
+
name, self._get_series(df, name, field)
|
|
193
|
+
)
|
|
138
194
|
for name, field in self.__dataframe__.fields.items()
|
|
139
195
|
}
|
|
140
196
|
|
|
@@ -153,7 +209,7 @@ def dataframe(
|
|
|
153
209
|
if hasattr(cls, "__annotations__") and hasattr(cls, "__dataclass_fields__"):
|
|
154
210
|
amended_class = type(
|
|
155
211
|
cls.__name__,
|
|
156
|
-
(DataFrameMixin,
|
|
212
|
+
(DataFrameMixin,) + cls.__mro__,
|
|
157
213
|
dict(cls.__dict__),
|
|
158
214
|
)
|
|
159
215
|
setattr(amended_class, "__init__", DataFrameMixin.__init_from_series__)
|
|
@@ -162,7 +218,7 @@ def dataframe(
|
|
|
162
218
|
|
|
163
219
|
def add_dataframe_metadata(cls):
|
|
164
220
|
serialized_fields = {k: (v.annotation, v) for k, v in fields(cls).items()}
|
|
165
|
-
dataobject_type = create_model(cls.__name__+"DataObject", **serialized_fields)
|
|
221
|
+
dataobject_type = create_model(cls.__name__ + "DataObject", **serialized_fields)
|
|
166
222
|
dataobject_type = dataobject(dataobject_type, unsafe=True)
|
|
167
223
|
|
|
168
224
|
setattr(cls, "DataObject", dataobject_type)
|
|
@@ -185,17 +241,12 @@ def dataframe(
|
|
|
185
241
|
setattr(cls, "event_id", StreamEventMixin.event_id)
|
|
186
242
|
setattr(cls, "event_ts", StreamEventMixin.event_ts)
|
|
187
243
|
|
|
188
|
-
def set_fields_optional(cls):
|
|
189
|
-
for _, field in fields(cls).items():
|
|
190
|
-
field.default = None
|
|
191
|
-
|
|
192
244
|
def wrap(cls) -> Type[DataFrameMixin]:
|
|
193
245
|
if hasattr(cls, "__dataframe__"):
|
|
194
246
|
return cls
|
|
195
247
|
add_dataframe_metadata(cls)
|
|
196
248
|
amended_class = add_dataframe_mixin(cls)
|
|
197
249
|
add_dataobject_annotations(amended_class, unsafe, validate, schema)
|
|
198
|
-
set_fields_optional(amended_class)
|
|
199
250
|
return amended_class
|
|
200
251
|
|
|
201
252
|
if decorated_class is None:
|
|
@@ -1,25 +1,37 @@
|
|
|
1
|
-
"""Dataset objects definition, used as a result of serialized dataframes
|
|
2
|
-
"""
|
|
1
|
+
"""Dataset objects definition, used as a result of serialized dataframes"""
|
|
3
2
|
|
|
4
3
|
from importlib import import_module
|
|
5
|
-
from typing import Generic, Type, TypeVar
|
|
4
|
+
from typing import Any, Dict, Generic, Type, TypeVar
|
|
6
5
|
|
|
7
|
-
from hopeit.dataobjects import dataclass, dataobject
|
|
6
|
+
from hopeit.dataobjects import dataclass, dataobject, field
|
|
8
7
|
|
|
9
8
|
DataFrameT = TypeVar("DataFrameT")
|
|
10
9
|
|
|
11
10
|
|
|
11
|
+
class DatasetLoadError(Exception):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
12
15
|
@dataobject
|
|
13
16
|
@dataclass
|
|
14
17
|
class Dataset(Generic[DataFrameT]):
|
|
15
18
|
"""Persisted representation of a @dataframe object"""
|
|
19
|
+
|
|
16
20
|
protocol: str
|
|
17
21
|
partition_key: str
|
|
18
22
|
key: str
|
|
19
23
|
datatype: str
|
|
24
|
+
schema: Dict[str, Any] = field(default_factory=dict)
|
|
20
25
|
|
|
21
26
|
async def load(self) -> DataFrameT:
|
|
22
|
-
|
|
27
|
+
try:
|
|
28
|
+
dataframe = await self.__storage.load(self) # type: ignore[attr-defined]
|
|
29
|
+
return dataframe
|
|
30
|
+
except (RuntimeError, IOError, KeyError) as e:
|
|
31
|
+
raise DatasetLoadError(
|
|
32
|
+
f"Error {type(e).__name__}: {e} loading dataset of type {self.datatype} "
|
|
33
|
+
f"at location {self.partition_key}/{self.key}"
|
|
34
|
+
) from e
|
|
23
35
|
|
|
24
36
|
@classmethod
|
|
25
37
|
async def save(cls, dataframe: DataFrameT) -> "Dataset[DataFrameT]":
|
{hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/serialization/files.py
RENAMED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
"""Support for `@dataframes` serialization to files
|
|
2
|
-
"""
|
|
1
|
+
"""Support for `@dataframes` serialization to files"""
|
|
3
2
|
|
|
4
3
|
import io
|
|
5
4
|
from importlib import import_module
|
|
@@ -7,6 +6,7 @@ from typing import Generic, Optional, Type, TypeVar
|
|
|
7
6
|
from uuid import uuid4
|
|
8
7
|
|
|
9
8
|
import pandas as pd
|
|
9
|
+
from pydantic import TypeAdapter
|
|
10
10
|
|
|
11
11
|
try:
|
|
12
12
|
import pyarrow # type: ignore # noqa # pylint: disable=unused-import
|
|
@@ -54,14 +54,13 @@ class DatasetFileStorage(Generic[DataFrameT]):
|
|
|
54
54
|
partition_key=partition_key,
|
|
55
55
|
key=key,
|
|
56
56
|
datatype=f"{datatype.__module__}.{datatype.__qualname__}",
|
|
57
|
+
schema=TypeAdapter(datatype).json_schema(),
|
|
57
58
|
)
|
|
58
59
|
|
|
59
60
|
async def load(self, dataset: Dataset) -> EventPayloadType:
|
|
60
61
|
"""Loads @dataframe annotated object using Dataset metadata"""
|
|
61
62
|
datatype: Type[DataFrameT] = find_dataframe_type(dataset.datatype)
|
|
62
|
-
data = await self.storage.get_file(
|
|
63
|
-
dataset.key, partition_key=dataset.partition_key
|
|
64
|
-
)
|
|
63
|
+
data = await self.storage.get_file(dataset.key, partition_key=dataset.partition_key)
|
|
65
64
|
if data is None:
|
|
66
65
|
raise FileNotFoundError(dataset.key)
|
|
67
66
|
df = pd.read_parquet(io.BytesIO(data), engine="pyarrow")
|
|
File without changes
|
|
File without changes
|
{hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit.dataframes.egg-info/PKG-INFO
RENAMED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hopeit.dataframes
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.1
|
|
4
4
|
Summary: Hopeit Engine Dataframes Toolkit
|
|
5
|
-
|
|
6
|
-
Author: Leo Smerling and Pablo Canto
|
|
7
|
-
Author-email: contact@hopeit.com.ar
|
|
5
|
+
Author-email: Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
|
|
8
6
|
License: Apache 2
|
|
7
|
+
Project-URL: Homepage, https://github.com/hopeit-git/hopeit.engine
|
|
9
8
|
Project-URL: CI: GitHub Actions, https://github.com/hopeit-git/hopeit.engine/actions?query=workflow
|
|
10
9
|
Project-URL: Docs: RTD, https://hopeitengine.readthedocs.io/en/latest/
|
|
11
10
|
Project-URL: GitHub: issues, https://github.com/hopeit-git/hopeit.engine/issues
|
|
@@ -13,7 +12,6 @@ Project-URL: GitHub: repo, https://github.com/hopeit-git/hopeit.engine
|
|
|
13
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
13
|
Classifier: Intended Audience :: Developers
|
|
15
14
|
Classifier: Programming Language :: Python
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
17
15
|
Classifier: Programming Language :: Python :: 3.9
|
|
18
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
19
17
|
Classifier: Programming Language :: Python :: 3.11
|
|
@@ -24,9 +22,9 @@ Classifier: Operating System :: Microsoft :: Windows
|
|
|
24
22
|
Classifier: Topic :: Internet :: WWW/HTTP
|
|
25
23
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
26
24
|
Classifier: Framework :: AsyncIO
|
|
27
|
-
Requires-Python: >=3.
|
|
28
|
-
Description-Content-Type: text/
|
|
29
|
-
Requires-Dist: hopeit.engine[fs-storage]==0.25.
|
|
25
|
+
Requires-Python: >=3.9
|
|
26
|
+
Description-Content-Type: text/plain
|
|
27
|
+
Requires-Dist: hopeit.engine[fs-storage]==0.25.1
|
|
30
28
|
Requires-Dist: pandas
|
|
31
29
|
Requires-Dist: numpy
|
|
32
30
|
Provides-Extra: pyarrow
|
{hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit.dataframes.egg-info/SOURCES.txt
RENAMED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
README.md
|
|
2
|
+
pyproject.toml
|
|
2
3
|
setup.py
|
|
3
4
|
src/hopeit.dataframes.egg-info/PKG-INFO
|
|
4
5
|
src/hopeit.dataframes.egg-info/SOURCES.txt
|
|
@@ -11,6 +12,8 @@ src/hopeit/dataframes/py.typed
|
|
|
11
12
|
src/hopeit/dataframes/serialization/__init__.py
|
|
12
13
|
src/hopeit/dataframes/serialization/dataset.py
|
|
13
14
|
src/hopeit/dataframes/serialization/files.py
|
|
15
|
+
src/hopeit/dataframes/serialization/py.typed
|
|
14
16
|
src/hopeit/dataframes/serialization/settings.py
|
|
15
17
|
src/hopeit/dataframes/setup/__init__.py
|
|
16
|
-
src/hopeit/dataframes/setup/dataframes.py
|
|
18
|
+
src/hopeit/dataframes/setup/dataframes.py
|
|
19
|
+
src/hopeit/dataframes/setup/py.typed
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
import setuptools
|
|
2
|
-
|
|
3
|
-
version = {}
|
|
4
|
-
with open("../../../engine/src/hopeit/server/version.py") as fp:
|
|
5
|
-
exec(fp.read(), version)
|
|
6
|
-
|
|
7
|
-
setuptools.setup(
|
|
8
|
-
name="hopeit.dataframes",
|
|
9
|
-
version=version["ENGINE_VERSION"],
|
|
10
|
-
description="Hopeit Engine Dataframes Toolkit",
|
|
11
|
-
license="Apache 2",
|
|
12
|
-
long_description=open("README.md").read(),
|
|
13
|
-
long_description_content_type="text/markdown",
|
|
14
|
-
author="Leo Smerling and Pablo Canto",
|
|
15
|
-
author_email="contact@hopeit.com.ar",
|
|
16
|
-
url="https://github.com/hopeit-git/hopeit.engine",
|
|
17
|
-
classifiers=[
|
|
18
|
-
"License :: OSI Approved :: Apache Software License",
|
|
19
|
-
"Intended Audience :: Developers",
|
|
20
|
-
"Programming Language :: Python",
|
|
21
|
-
"Programming Language :: Python :: 3.8",
|
|
22
|
-
"Programming Language :: Python :: 3.9",
|
|
23
|
-
"Programming Language :: Python :: 3.10",
|
|
24
|
-
"Programming Language :: Python :: 3.11",
|
|
25
|
-
"Development Status :: 4 - Beta",
|
|
26
|
-
"Operating System :: POSIX :: Linux",
|
|
27
|
-
"Operating System :: MacOS :: MacOS X",
|
|
28
|
-
"Operating System :: Microsoft :: Windows",
|
|
29
|
-
"Topic :: Internet :: WWW/HTTP",
|
|
30
|
-
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
31
|
-
"Framework :: AsyncIO",
|
|
32
|
-
],
|
|
33
|
-
project_urls={
|
|
34
|
-
"CI: GitHub Actions": "https://github.com/hopeit-git/hopeit.engine/actions?query=workflow", # noqa
|
|
35
|
-
"Docs: RTD": "https://hopeitengine.readthedocs.io/en/latest/",
|
|
36
|
-
"GitHub: issues": "https://github.com/hopeit-git/hopeit.engine/issues",
|
|
37
|
-
"GitHub: repo": "https://github.com/hopeit-git/hopeit.engine",
|
|
38
|
-
},
|
|
39
|
-
package_dir={"": "src"},
|
|
40
|
-
packages=[
|
|
41
|
-
"hopeit.dataframes",
|
|
42
|
-
"hopeit.dataframes.serialization",
|
|
43
|
-
"hopeit.dataframes.setup",
|
|
44
|
-
],
|
|
45
|
-
include_package_data=True,
|
|
46
|
-
package_data={
|
|
47
|
-
"hopeit.dataframes": ["py.typed"],
|
|
48
|
-
},
|
|
49
|
-
python_requires=">=3.8",
|
|
50
|
-
install_requires=[
|
|
51
|
-
f"hopeit.engine[fs-storage]=={version['ENGINE_VERSION']}",
|
|
52
|
-
"pandas",
|
|
53
|
-
"numpy",
|
|
54
|
-
],
|
|
55
|
-
extras_require={
|
|
56
|
-
"pyarrow": ["pyarrow"],
|
|
57
|
-
},
|
|
58
|
-
entry_points={},
|
|
59
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit/dataframes/setup/dataframes.py
RENAMED
|
File without changes
|
|
File without changes
|
{hopeit_dataframes-0.25.0b7 → hopeit_dataframes-0.25.1}/src/hopeit.dataframes.egg-info/top_level.txt
RENAMED
|
File without changes
|