hopeit.dataframes 0.25.0b1__tar.gz → 0.25.0b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/PKG-INFO +2 -2
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/__init__.py +55 -31
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/dataframe.py +6 -66
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/serialization/dataset.py +10 -2
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/serialization/files.py +1 -35
- hopeit_dataframes-0.25.0b2/src/hopeit/dataframes/setup/dataframes.py +36 -0
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit.dataframes.egg-info/PKG-INFO +2 -2
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit.dataframes.egg-info/SOURCES.txt +0 -1
- hopeit_dataframes-0.25.0b2/src/hopeit.dataframes.egg-info/requires.txt +6 -0
- hopeit_dataframes-0.25.0b1/src/hopeit/dataframes/dataframeobject.py +0 -187
- hopeit_dataframes-0.25.0b1/src/hopeit/dataframes/setup/dataframes.py +0 -52
- hopeit_dataframes-0.25.0b1/src/hopeit.dataframes.egg-info/requires.txt +0 -6
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/README.md +0 -0
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/setup.cfg +0 -0
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/setup.py +0 -0
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/py.typed +0 -0
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/serialization/__init__.py +0 -0
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/serialization/settings.py +0 -0
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/setup/__init__.py +0 -0
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit.dataframes.egg-info/dependency_links.txt +0 -0
- {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit.dataframes.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hopeit.dataframes
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.0b2
|
|
4
4
|
Summary: Hopeit Engine Dataframes Toolkit
|
|
5
5
|
Home-page: https://github.com/hopeit-git/hopeit.engine
|
|
6
6
|
Author: Leo Smerling and Pablo Canto
|
|
@@ -26,7 +26,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
26
26
|
Classifier: Framework :: AsyncIO
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Description-Content-Type: text/markdown
|
|
29
|
-
Requires-Dist: hopeit.engine[fs-storage]==0.25.
|
|
29
|
+
Requires-Dist: hopeit.engine[fs-storage]==0.25.0b2
|
|
30
30
|
Requires-Dist: pandas
|
|
31
31
|
Requires-Dist: numpy
|
|
32
32
|
Provides-Extra: pyarrow
|
|
@@ -1,37 +1,74 @@
|
|
|
1
1
|
"""
|
|
2
2
|
hopeit.engine dataframes plugin entry point
|
|
3
3
|
|
|
4
|
-
This module exposes the
|
|
4
|
+
This module exposes the 2 main constructions to be used inside apps,
|
|
5
|
+
to extend @dataobject functionallity supporting working with `pandas DataFrames`
|
|
5
6
|
`@dataframe` dataclass annotation
|
|
6
|
-
`@dataframeobject` dataclass annotation
|
|
7
7
|
`DataFrames` class to handle manipulation of dataframe/dataframeobjects
|
|
8
8
|
|
|
9
9
|
Usage:
|
|
10
10
|
```
|
|
11
|
-
from
|
|
11
|
+
from typing import List
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from hopeit.dataframes.serialization.settings import DatasetSerialization
|
|
16
|
+
from hopeit.dataframes import DataFrames, Dataset, dataframe
|
|
17
|
+
from hopeit.dataobjects import dataobject, dataclass
|
|
18
|
+
from hopeit.dataobjects.payload import Payload
|
|
12
19
|
|
|
13
20
|
@dataframe
|
|
14
21
|
@dataclass
|
|
15
|
-
class
|
|
22
|
+
class MyData:
|
|
16
23
|
field1: int
|
|
17
24
|
field2: str
|
|
18
25
|
...
|
|
19
26
|
|
|
20
|
-
@
|
|
27
|
+
@dataobject
|
|
21
28
|
@dataclass
|
|
22
29
|
class MyDataset:
|
|
23
30
|
dataset_name: str
|
|
24
|
-
example_data:
|
|
31
|
+
example_data: Dataset[MyData]
|
|
32
|
+
|
|
33
|
+
@dataobject
|
|
34
|
+
@dataclass
|
|
35
|
+
class MyWebResponse:
|
|
36
|
+
dataset_name: str
|
|
37
|
+
example_data: List[MyData.DataObject]
|
|
38
|
+
|
|
39
|
+
# This step is not needed if SETUP event is configured in app
|
|
40
|
+
DataFrames.setup(DatasetSerialization(
|
|
41
|
+
protocol="hopeit.dataframes.serialization.files.DatasetFileStorage",
|
|
42
|
+
location="/tmp/data",
|
|
43
|
+
partition_dateformat="%Y/%m/%d/%H/",
|
|
44
|
+
))
|
|
45
|
+
|
|
46
|
+
df = pd.DataFrame([ # Create or load a pandas DataFrame
|
|
47
|
+
{"field1": 1, "field2": "text1"},
|
|
48
|
+
{"field1": 2, "field2": "text2"},
|
|
49
|
+
])
|
|
50
|
+
|
|
51
|
+
my_data: MyData = DataFrames.from_df(MyData, df)
|
|
25
52
|
|
|
53
|
+
# return dataset after saving data to disk
|
|
54
|
+
my_dataset = MyDataset(
|
|
55
|
+
dataset_name="example",
|
|
56
|
+
example_data=await Dataset.save(my_data)
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
print(Payload.to_json(my_dataset))
|
|
26
60
|
|
|
27
|
-
|
|
61
|
+
my_data_again: MyData = await my_dataset.example_data.load()
|
|
28
62
|
|
|
29
|
-
|
|
63
|
+
print(DataFrames.df(my_data_again))
|
|
30
64
|
|
|
31
|
-
return
|
|
65
|
+
# return dataframe converted to list of dataobjects that can be directly converted to json
|
|
66
|
+
my_json_response = MyWebResponse(
|
|
32
67
|
dataset_name="example",
|
|
33
|
-
example_data=my_data
|
|
68
|
+
example_data=DataFrames.to_dataobjects(my_data)
|
|
34
69
|
)
|
|
70
|
+
|
|
71
|
+
print(Payload.to_json(my_json_response))
|
|
35
72
|
```
|
|
36
73
|
"""
|
|
37
74
|
|
|
@@ -40,35 +77,22 @@ from typing import Dict, Generic, Iterator, List, Type
|
|
|
40
77
|
import numpy as np
|
|
41
78
|
import pandas as pd
|
|
42
79
|
from hopeit.dataframes.dataframe import DataFrameT, dataframe
|
|
43
|
-
from hopeit.dataframes.
|
|
80
|
+
from hopeit.dataframes.serialization.dataset import Dataset
|
|
81
|
+
from hopeit.dataframes.serialization.settings import DatasetSerialization
|
|
82
|
+
from hopeit.dataframes.setup.dataframes import register_serialization
|
|
44
83
|
from hopeit.dataobjects import DataObject
|
|
45
84
|
|
|
46
|
-
__all__ = ["DataFrames", "
|
|
85
|
+
__all__ = ["DataFrames", "Dataset", "dataframe"]
|
|
47
86
|
|
|
48
87
|
|
|
49
|
-
class DataFrames(Generic[DataFrameT,
|
|
88
|
+
class DataFrames(Generic[DataFrameT, DataObject]):
|
|
50
89
|
"""
|
|
51
90
|
Dataframes manipulation utilities methods
|
|
52
91
|
"""
|
|
53
92
|
|
|
54
93
|
@staticmethod
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
and converts to a `DataObject` json-compatible with pointers to saved
|
|
58
|
-
locations.
|
|
59
|
-
|
|
60
|
-
This method can be used to i.e. return `@dataframeobject`s as a JSON response
|
|
61
|
-
"""
|
|
62
|
-
return await obj._serialize() # type: ignore # pylint: disable=protected-access
|
|
63
|
-
|
|
64
|
-
@staticmethod
|
|
65
|
-
async def deserialize(
|
|
66
|
-
datatype: Type[DataFrameObjectT], dataobject: DataObject
|
|
67
|
-
) -> DataFrameObjectT:
|
|
68
|
-
"""Deserialize/load contents of serialized dataobject fields of a `@dataframeobject`
|
|
69
|
-
loading saved Dataset information for @dataframe fields
|
|
70
|
-
"""
|
|
71
|
-
return await datatype._deserialize(dataobject) # type: ignore # pylint: disable=protected-access
|
|
94
|
+
def setup(settings: DatasetSerialization):
|
|
95
|
+
register_serialization(settings)
|
|
72
96
|
|
|
73
97
|
@staticmethod
|
|
74
98
|
def from_df(
|
|
@@ -88,7 +112,7 @@ class DataFrames(Generic[DataFrameT, DataFrameObjectT, DataObject]):
|
|
|
88
112
|
|
|
89
113
|
@staticmethod
|
|
90
114
|
def from_dataobjects(
|
|
91
|
-
datatype: Type[DataFrameT], dataobjects: Iterator[
|
|
115
|
+
datatype: Type[DataFrameT], dataobjects: Iterator[DataObject]
|
|
92
116
|
) -> DataFrameT:
|
|
93
117
|
"""Converts standard json serializable `@dataobject`s to a single `@dataframe`"""
|
|
94
118
|
return datatype._from_dataobjects(dataobjects) # type: ignore # pylint: disable=protected-access
|
{hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/dataframe.py
RENAMED
|
@@ -1,20 +1,9 @@
|
|
|
1
1
|
"""
|
|
2
2
|
DataFrames type abstractions.
|
|
3
|
-
|
|
4
|
-
Example:
|
|
5
|
-
|
|
6
|
-
from hopeit.dataobjects import dataclass # equivalent to `dataclasses.dataclass`
|
|
7
|
-
from hopeit.dataframes import dataframe
|
|
8
|
-
|
|
9
|
-
@dataframe
|
|
10
|
-
@dataclass
|
|
11
|
-
class MyObject:
|
|
12
|
-
name: str
|
|
13
|
-
number: int
|
|
14
3
|
"""
|
|
15
4
|
import dataclasses
|
|
16
5
|
from datetime import date, datetime, timezone
|
|
17
|
-
from typing import Any, Callable, Dict, Generic, Iterator, List,
|
|
6
|
+
from typing import Any, Callable, Dict, Generic, Iterator, List, Type, TypeVar
|
|
18
7
|
|
|
19
8
|
import numpy as np
|
|
20
9
|
import pandas as pd
|
|
@@ -34,28 +23,9 @@ DataFrameT = TypeVar("DataFrameT")
|
|
|
34
23
|
|
|
35
24
|
|
|
36
25
|
@dataclasses.dataclass
|
|
37
|
-
class DataFrameMetadata(
|
|
26
|
+
class DataFrameMetadata():
|
|
38
27
|
columns: List[str]
|
|
39
28
|
fields: Dict[str, FieldInfo]
|
|
40
|
-
serialized_type: Type[DataObject]
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
@dataclasses.dataclass
|
|
44
|
-
class DataFrameParams:
|
|
45
|
-
"""
|
|
46
|
-
Helper class used to access attributes in @dataframe
|
|
47
|
-
decorated objects, based on dot notation expressions
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
datatypes: Optional[str]
|
|
51
|
-
|
|
52
|
-
@staticmethod
|
|
53
|
-
def extract_attr(obj, expr):
|
|
54
|
-
value = obj
|
|
55
|
-
for attr_name in expr.split("."):
|
|
56
|
-
if value:
|
|
57
|
-
value = getattr(value, attr_name)
|
|
58
|
-
return value
|
|
59
29
|
|
|
60
30
|
|
|
61
31
|
class DataFrameMixin(Generic[DataFrameT, DataObject]):
|
|
@@ -120,40 +90,10 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
|
|
|
120
90
|
|
|
121
91
|
def _to_dataobjects(self) -> List[DataObject]:
|
|
122
92
|
return [
|
|
123
|
-
self.
|
|
93
|
+
self.DataObject(**fields)
|
|
124
94
|
for fields in self.__df.to_dict(orient="records")
|
|
125
95
|
]
|
|
126
96
|
|
|
127
|
-
# def to_json(self, *args, **kwargs) -> str:
|
|
128
|
-
# raise NotImplementedError(
|
|
129
|
-
# "Dataframe must be used inside `@dataobject(unsafe=True)` to be used as an output"
|
|
130
|
-
# )
|
|
131
|
-
|
|
132
|
-
# def to_dict(self, *args, **kwargs) -> Dict[str, Any]:
|
|
133
|
-
# raise NotImplementedError(
|
|
134
|
-
# "Dataframe must be used inside `@dataobject(unsafe=True)` to be used as an output"
|
|
135
|
-
# )
|
|
136
|
-
|
|
137
|
-
# @classmethod
|
|
138
|
-
# def from_json(cls, *args, **kwargs) -> DataObject:
|
|
139
|
-
# return cls.__dataframe__.serialized_type.from_dict(*args, **kwargs)
|
|
140
|
-
|
|
141
|
-
# @classmethod
|
|
142
|
-
# def from_dict(
|
|
143
|
-
# cls,
|
|
144
|
-
# *args,
|
|
145
|
-
# **kwargs,
|
|
146
|
-
# ) -> DataObject:
|
|
147
|
-
# return cls.__dataframe__.serialized_type.from_dict(*args, **kwargs)
|
|
148
|
-
|
|
149
|
-
# @classmethod
|
|
150
|
-
# def json_schema(cls, *args, **kwargs) -> Dict[str, Any]:
|
|
151
|
-
# if cls.__data_object__["schema"]:
|
|
152
|
-
# schema = cls.__dataframe__.serialized_type.json_schema(*args, **kwargs)
|
|
153
|
-
# schema[cls.__name__] = schema[cls.__dataframe__.serialized_type.__name__]
|
|
154
|
-
# return schema
|
|
155
|
-
# return {}
|
|
156
|
-
|
|
157
97
|
def event_id(self, *args, **kwargs) -> str:
|
|
158
98
|
return ""
|
|
159
99
|
|
|
@@ -205,16 +145,16 @@ def dataframe(
|
|
|
205
145
|
|
|
206
146
|
def add_dataframe_metadata(cls):
|
|
207
147
|
serialized_fields = {k: (v.annotation, v) for k, v in fields(cls).items()}
|
|
208
|
-
|
|
209
|
-
|
|
148
|
+
dataobject_type = create_model(cls.__name__+"DataObject", **serialized_fields)
|
|
149
|
+
dataobject_type = dataobject(dataobject_type, unsafe=True)
|
|
210
150
|
|
|
151
|
+
setattr(cls, "DataObject", dataobject_type)
|
|
211
152
|
setattr(
|
|
212
153
|
cls,
|
|
213
154
|
"__dataframe__",
|
|
214
155
|
DataFrameMetadata(
|
|
215
156
|
columns=list(fields(cls).keys()),
|
|
216
157
|
fields=dict(fields(cls).items()),
|
|
217
|
-
serialized_type=serialized_type,
|
|
218
158
|
),
|
|
219
159
|
)
|
|
220
160
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"""
|
|
3
3
|
|
|
4
4
|
from importlib import import_module
|
|
5
|
-
from typing import Type, TypeVar
|
|
5
|
+
from typing import Generic, Type, TypeVar
|
|
6
6
|
|
|
7
7
|
from hopeit.dataobjects import dataclass, dataobject
|
|
8
8
|
|
|
@@ -11,12 +11,20 @@ DataFrameT = TypeVar("DataFrameT")
|
|
|
11
11
|
|
|
12
12
|
@dataobject
|
|
13
13
|
@dataclass
|
|
14
|
-
class Dataset:
|
|
14
|
+
class Dataset(Generic[DataFrameT]):
|
|
15
|
+
"""Persisted representation of a @dataframe object"""
|
|
15
16
|
protocol: str
|
|
16
17
|
partition_key: str
|
|
17
18
|
key: str
|
|
18
19
|
datatype: str
|
|
19
20
|
|
|
21
|
+
async def load(self) -> DataFrameT:
|
|
22
|
+
return await self.__storage.load(self) # type: ignore[attr-defined]
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
async def save(cls, dataframe: DataFrameT) -> "Dataset[DataFrameT]":
|
|
26
|
+
return await cls.__storage.save(dataframe) # type: ignore[attr-defined]
|
|
27
|
+
|
|
20
28
|
|
|
21
29
|
def find_protocol_impl(qual_type_name: str) -> Type:
|
|
22
30
|
mod_name, type_name = (
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
import io
|
|
5
5
|
from importlib import import_module
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import Generic, Optional, Type, TypeVar
|
|
7
7
|
from uuid import uuid4
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
@@ -67,40 +67,6 @@ class DatasetFileStorage(Generic[DataFrameT]):
|
|
|
67
67
|
df = pd.read_parquet(io.BytesIO(data), engine="pyarrow")
|
|
68
68
|
return datatype._from_df(df) # pylint: disable=protected-access
|
|
69
69
|
|
|
70
|
-
async def ser_wrapper(
|
|
71
|
-
self,
|
|
72
|
-
base_serialization: Callable,
|
|
73
|
-
data: Union[EventPayloadType, DataFrameT],
|
|
74
|
-
level: int,
|
|
75
|
-
) -> bytes:
|
|
76
|
-
"""Serialization wrapper that plugins-in into hopeit.engine
|
|
77
|
-
serialization when dataframes plugin is initialized
|
|
78
|
-
"""
|
|
79
|
-
if hasattr(data, "__dataframeobject__"):
|
|
80
|
-
data = await data._serialize() # type: ignore # pylint: disable=protected-access
|
|
81
|
-
if hasattr(data, "__dataframe__"):
|
|
82
|
-
data = await self.save(data) # type: ignore
|
|
83
|
-
return await base_serialization(data, level)
|
|
84
|
-
|
|
85
|
-
async def deser_wrapper(
|
|
86
|
-
self,
|
|
87
|
-
base_deserialization: Callable,
|
|
88
|
-
data: bytes,
|
|
89
|
-
datatype: Union[Type[EventPayloadType], Type[DataFrameT]],
|
|
90
|
-
) -> Union[EventPayloadType, DataFrameT]:
|
|
91
|
-
"""Deerialization wrapper that plugins-in into hopeit.engine
|
|
92
|
-
deserialization when dataframes plugin is initialized
|
|
93
|
-
"""
|
|
94
|
-
if hasattr(datatype, "__dataframeobject__"):
|
|
95
|
-
dataset = await base_deserialization(
|
|
96
|
-
data, datatype.__dataframeobject__.serialized_type # type: ignore
|
|
97
|
-
)
|
|
98
|
-
return await datatype._deserialize(dataset) # type: ignore # pylint: disable=protected-access
|
|
99
|
-
if hasattr(datatype, "__dataframe__"):
|
|
100
|
-
dataset = await base_deserialization(data, Dataset)
|
|
101
|
-
return await self.load(dataset)
|
|
102
|
-
return await base_deserialization(data, datatype)
|
|
103
|
-
|
|
104
70
|
|
|
105
71
|
def find_dataframe_type(qual_type_name: str) -> Type[DataFrameT]:
|
|
106
72
|
"""Returns dataframe class based on type name used during serialization"""
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""hopeit.engine dataframes plugin SETUP event.
|
|
2
|
+
|
|
3
|
+
This event executes when engine starts with dataframes plugin configuration file loaded,
|
|
4
|
+
and ensures that the engine will support serialization of `@dataframe` and `@dataframeobject`
|
|
5
|
+
types
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from hopeit.app.context import EventContext
|
|
9
|
+
from hopeit.app.logger import app_logger
|
|
10
|
+
from hopeit.dataframes.serialization.dataset import Dataset, find_protocol_impl
|
|
11
|
+
from hopeit.dataframes.serialization.settings import DatasetSerialization
|
|
12
|
+
|
|
13
|
+
logger = app_logger()
|
|
14
|
+
|
|
15
|
+
__steps__ = ["setup"]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def setup(payload: None, context: EventContext) -> None:
|
|
19
|
+
"""Setups serizaltion wrappers in hopeit.engine based on
|
|
20
|
+
`DataSerialization` settings configured in plugin configuration file
|
|
21
|
+
"""
|
|
22
|
+
logger.info(context, "Configuring Dataset serialization...")
|
|
23
|
+
settings: DatasetSerialization = context.settings(
|
|
24
|
+
key="dataset_serialization", datatype=DatasetSerialization
|
|
25
|
+
)
|
|
26
|
+
register_serialization(settings)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def register_serialization(settings: DatasetSerialization):
|
|
30
|
+
impl = find_protocol_impl(settings.protocol)
|
|
31
|
+
storage = impl(
|
|
32
|
+
protocol=settings.protocol,
|
|
33
|
+
location=settings.location,
|
|
34
|
+
partition_dateformat=settings.partition_dateformat,
|
|
35
|
+
)
|
|
36
|
+
setattr(Dataset, "_Dataset__storage", storage)
|
{hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit.dataframes.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hopeit.dataframes
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.0b2
|
|
4
4
|
Summary: Hopeit Engine Dataframes Toolkit
|
|
5
5
|
Home-page: https://github.com/hopeit-git/hopeit.engine
|
|
6
6
|
Author: Leo Smerling and Pablo Canto
|
|
@@ -26,7 +26,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
26
26
|
Classifier: Framework :: AsyncIO
|
|
27
27
|
Requires-Python: >=3.8
|
|
28
28
|
Description-Content-Type: text/markdown
|
|
29
|
-
Requires-Dist: hopeit.engine[fs-storage]==0.25.
|
|
29
|
+
Requires-Dist: hopeit.engine[fs-storage]==0.25.0b2
|
|
30
30
|
Requires-Dist: pandas
|
|
31
31
|
Requires-Dist: numpy
|
|
32
32
|
Provides-Extra: pyarrow
|
{hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit.dataframes.egg-info/SOURCES.txt
RENAMED
|
@@ -7,7 +7,6 @@ src/hopeit.dataframes.egg-info/requires.txt
|
|
|
7
7
|
src/hopeit.dataframes.egg-info/top_level.txt
|
|
8
8
|
src/hopeit/dataframes/__init__.py
|
|
9
9
|
src/hopeit/dataframes/dataframe.py
|
|
10
|
-
src/hopeit/dataframes/dataframeobject.py
|
|
11
10
|
src/hopeit/dataframes/py.typed
|
|
12
11
|
src/hopeit/dataframes/serialization/__init__.py
|
|
13
12
|
src/hopeit/dataframes/serialization/dataset.py
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
`@dataframeobject` annonation mixin to serialize a group of `@dataframe`s.
|
|
3
|
-
|
|
4
|
-
Datasets behaves as DataObject so they can be used as payload
|
|
5
|
-
for endpoints and streams.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import dataclasses
|
|
9
|
-
from typing import (
|
|
10
|
-
Any,
|
|
11
|
-
Callable,
|
|
12
|
-
ClassVar,
|
|
13
|
-
Dict,
|
|
14
|
-
Generic,
|
|
15
|
-
Optional,
|
|
16
|
-
Type,
|
|
17
|
-
TypeVar,
|
|
18
|
-
Union,
|
|
19
|
-
get_args,
|
|
20
|
-
get_origin,
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
from pydantic import TypeAdapter, create_model
|
|
24
|
-
from pydantic.fields import FieldInfo
|
|
25
|
-
|
|
26
|
-
from hopeit.dataframes.serialization.dataset import Dataset
|
|
27
|
-
from hopeit.dataobjects import (
|
|
28
|
-
DataObject,
|
|
29
|
-
StreamEventMixin,
|
|
30
|
-
StreamEventParams,
|
|
31
|
-
dataobject,
|
|
32
|
-
fields,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
DataFrameObjectT = TypeVar("DataFrameObjectT")
|
|
36
|
-
NoneType = type(None)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
@dataclasses.dataclass
|
|
40
|
-
class DataFrameObjectMetadata(Generic[DataObject]):
|
|
41
|
-
serialized_type: Type[DataObject]
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class DataFrameObjectMixin(Generic[DataFrameObjectT]):
|
|
45
|
-
"""
|
|
46
|
-
MixIn class to add functionality for `@dataframeobject`s
|
|
47
|
-
|
|
48
|
-
Do not use this class directly, instead use `@dataframeobject` class decorator.
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
__storage: ClassVar[Any] = None # pylint: disable=invalid-name
|
|
52
|
-
|
|
53
|
-
def __init__(self) -> None:
|
|
54
|
-
self.__dataframeobject__: DataFrameObjectMetadata = None # type: ignore
|
|
55
|
-
raise NotImplementedError(
|
|
56
|
-
"DataFrameObjectMixin() should not be called directly. Use `@dataframeobject` annotation"
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
async def _serialize(self) -> Optional[DataObject]:
|
|
60
|
-
"""Saves internal `@dataframe`s using configured serialization protocol
|
|
61
|
-
and returns json-serialiable dataobject
|
|
62
|
-
"""
|
|
63
|
-
datasets = {}
|
|
64
|
-
for field_name, field in fields(self).items(): # type: ignore[arg-type]
|
|
65
|
-
if Dataset in {field.annotation, *get_args(field.annotation)}:
|
|
66
|
-
dataframe = getattr(self, field_name)
|
|
67
|
-
dataset = (
|
|
68
|
-
None if dataframe is None else await self.__storage.save(dataframe)
|
|
69
|
-
)
|
|
70
|
-
datasets[field_name] = dataset
|
|
71
|
-
else:
|
|
72
|
-
datasets[field_name] = getattr(self, field_name)
|
|
73
|
-
return self.__dataframeobject__.serialized_type(**datasets)
|
|
74
|
-
|
|
75
|
-
@classmethod
|
|
76
|
-
async def _deserialize(
|
|
77
|
-
cls, serialized: DataObject
|
|
78
|
-
) -> "DataFrameObjectMixin[DataFrameObjectT]":
|
|
79
|
-
"""From a serialized datframeobject, load inner `@dataframe` objects
|
|
80
|
-
and returns a `@dataframeobject` instance"""
|
|
81
|
-
dataframes = {}
|
|
82
|
-
for field_name, field in fields(cls).items(): # type: ignore[type-var]
|
|
83
|
-
if Dataset in {field.annotation, *get_args(field.annotation)}:
|
|
84
|
-
dataset = getattr(serialized, field_name)
|
|
85
|
-
dataframe = (
|
|
86
|
-
None if dataset is None else await cls.__storage.load(dataset)
|
|
87
|
-
)
|
|
88
|
-
dataframes[field_name] = dataframe
|
|
89
|
-
else:
|
|
90
|
-
dataframes[field_name] = getattr(serialized, field_name)
|
|
91
|
-
return cls(**dataframes)
|
|
92
|
-
|
|
93
|
-
@classmethod
|
|
94
|
-
def json_schema(cls, *args, **kwargs) -> Dict[str, Any]:
|
|
95
|
-
schema = TypeAdapter(cls.__dataframeobject__.serialized_type).json_schema(*args, **kwargs)
|
|
96
|
-
return schema
|
|
97
|
-
|
|
98
|
-
# def to_json(self, *args, **kwargs) -> Dict[str, Any]:
|
|
99
|
-
# raise RuntimeError(
|
|
100
|
-
# f"`{type(self).__name__}` `@dataframeobject` cannot be converted to json directly. "
|
|
101
|
-
# "i.e. use `return await DataFrames.serialize(obj)` to return it as a response."
|
|
102
|
-
# )
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def _is_dataframe_field(field: FieldInfo) -> bool:
|
|
106
|
-
return any(
|
|
107
|
-
hasattr(field_type, "__dataframe__")
|
|
108
|
-
for field_type in [field.annotation, *get_args(field.annotation)]
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def _serialized_field_type(field_name: str, field: FieldInfo) -> Optional[Type[Any]]:
|
|
113
|
-
"""Computes the `@dataobject` datatype used as a result
|
|
114
|
-
of serialized `@dataframeobject`
|
|
115
|
-
"""
|
|
116
|
-
if hasattr(field.annotation, "__dataframe__"):
|
|
117
|
-
return Dataset
|
|
118
|
-
if get_origin(field.annotation) is Union:
|
|
119
|
-
args = get_args(field.annotation)
|
|
120
|
-
if (
|
|
121
|
-
len(args) == 2
|
|
122
|
-
and any(hasattr(field_type, "__dataframe__") for field_type in args)
|
|
123
|
-
and any(field_type is NoneType for field_type in args)
|
|
124
|
-
):
|
|
125
|
-
return Optional[Dataset] # type: ignore
|
|
126
|
-
if _is_dataframe_field(field):
|
|
127
|
-
raise TypeError(
|
|
128
|
-
f"field {field_name}: only `DataFrameT` or `Optional[DataFrameT]` are supported"
|
|
129
|
-
)
|
|
130
|
-
return field.annotation
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def dataframeobject(
|
|
134
|
-
decorated_class=None,
|
|
135
|
-
) -> Callable[[Type], Type[DataFrameObjectMixin]]:
|
|
136
|
-
"""
|
|
137
|
-
Decorator for dataclasses intended to be used as dataframes.
|
|
138
|
-
"""
|
|
139
|
-
|
|
140
|
-
def add_dataframe_mixin(cls) -> Type[DataFrameObjectMixin]:
|
|
141
|
-
if hasattr(cls, "__annotations__") and hasattr(cls, "__dataclass_fields__"):
|
|
142
|
-
amended_class = type(
|
|
143
|
-
cls.__name__,
|
|
144
|
-
(DataFrameObjectMixin,) + cls.__mro__,
|
|
145
|
-
dict(cls.__dict__),
|
|
146
|
-
)
|
|
147
|
-
return amended_class
|
|
148
|
-
return cls
|
|
149
|
-
|
|
150
|
-
def add_dataframeobject_metadata(cls):
|
|
151
|
-
serialized_fields = {
|
|
152
|
-
field_name: (_serialized_field_type(field_name, field_info), field_info)
|
|
153
|
-
for field_name, field_info in fields(cls).items()
|
|
154
|
-
}
|
|
155
|
-
serialized_type = create_model(cls.__name__+"_", **serialized_fields)
|
|
156
|
-
serialized_type = dataobject(serialized_type, unsafe=True)
|
|
157
|
-
setattr(
|
|
158
|
-
cls,
|
|
159
|
-
"__dataframeobject__",
|
|
160
|
-
DataFrameObjectMetadata(
|
|
161
|
-
serialized_type=serialized_type,
|
|
162
|
-
),
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
def add_dataobject_annotations(cls, unsafe: bool, schema: bool):
|
|
166
|
-
setattr(
|
|
167
|
-
cls,
|
|
168
|
-
"__data_object__",
|
|
169
|
-
{"unsafe": unsafe, "schema": schema},
|
|
170
|
-
)
|
|
171
|
-
setattr(cls, "__stream_event__", StreamEventParams(None, None))
|
|
172
|
-
setattr(cls, "event_id", StreamEventMixin.event_id)
|
|
173
|
-
setattr(cls, "event_ts", StreamEventMixin.event_ts)
|
|
174
|
-
|
|
175
|
-
def wrap(cls) -> Type[DataFrameObjectMixin]:
|
|
176
|
-
if hasattr(cls, "__dataframeobject__"):
|
|
177
|
-
return cls
|
|
178
|
-
add_dataframeobject_metadata(cls)
|
|
179
|
-
amended_class = add_dataframe_mixin(cls)
|
|
180
|
-
add_dataobject_annotations(
|
|
181
|
-
amended_class, unsafe=False, schema=True
|
|
182
|
-
)
|
|
183
|
-
return amended_class
|
|
184
|
-
|
|
185
|
-
if decorated_class is None:
|
|
186
|
-
return wrap
|
|
187
|
-
return wrap(decorated_class) # type: ignore
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
"""hopeit.engine dataframes plugin SETUP event.
|
|
2
|
-
|
|
3
|
-
This event executes when engine starts with dataframes plugin configuration file loaded,
|
|
4
|
-
and ensures that the engine will support serialization of `@dataframe` and `@dataframeobject`
|
|
5
|
-
types
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from functools import partial
|
|
9
|
-
|
|
10
|
-
from hopeit.app.context import EventContext
|
|
11
|
-
from hopeit.app.logger import app_logger
|
|
12
|
-
from hopeit.dataframes.dataframeobject import DataFrameObjectMixin
|
|
13
|
-
from hopeit.dataframes.serialization.dataset import find_protocol_impl
|
|
14
|
-
from hopeit.dataframes.serialization.settings import DatasetSerialization
|
|
15
|
-
from hopeit.server import serialization
|
|
16
|
-
|
|
17
|
-
logger = app_logger()
|
|
18
|
-
|
|
19
|
-
__steps__ = ["register_serialization"]
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def register_serialization(payload: None, context: EventContext) -> None:
|
|
23
|
-
"""Setups serizaltion wrappers in hopeit.engine based on
|
|
24
|
-
`DataSerialization` settings configured in plugin configuration file
|
|
25
|
-
"""
|
|
26
|
-
logger.info(context, "Registering serialization methods...")
|
|
27
|
-
|
|
28
|
-
settings: DatasetSerialization = context.settings(
|
|
29
|
-
key="dataset_serialization", datatype=DatasetSerialization
|
|
30
|
-
)
|
|
31
|
-
impl = find_protocol_impl(settings.protocol)
|
|
32
|
-
|
|
33
|
-
storage = impl(
|
|
34
|
-
protocol=settings.protocol,
|
|
35
|
-
location=settings.location,
|
|
36
|
-
partition_dateformat=settings.partition_dateformat,
|
|
37
|
-
)
|
|
38
|
-
setattr(DataFrameObjectMixin, "_DataFrameObjectMixin__storage", storage)
|
|
39
|
-
|
|
40
|
-
serdeser_wrappers = {}
|
|
41
|
-
for (
|
|
42
|
-
serdeser,
|
|
43
|
-
methods,
|
|
44
|
-
) in serialization._SERDESER.items(): # pylint: disable=protected-access
|
|
45
|
-
serdeser_wrappers[serdeser] = (
|
|
46
|
-
partial(storage.ser_wrapper, methods[0]),
|
|
47
|
-
methods[1],
|
|
48
|
-
partial(storage.deser_wrapper, methods[2]),
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
for serdeser, methods in serdeser_wrappers.items():
|
|
52
|
-
serialization._SERDESER[serdeser] = methods # pylint: disable=protected-access
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b2}/src/hopeit/dataframes/setup/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|