hopeit.dataframes 0.25.0b1__tar.gz → 0.25.0b3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/PKG-INFO +2 -2
  2. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit/dataframes/__init__.py +55 -31
  3. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit/dataframes/dataframe.py +6 -66
  4. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit/dataframes/serialization/dataset.py +10 -2
  5. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit/dataframes/serialization/files.py +1 -35
  6. hopeit_dataframes-0.25.0b3/src/hopeit/dataframes/setup/dataframes.py +36 -0
  7. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit.dataframes.egg-info/PKG-INFO +2 -2
  8. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit.dataframes.egg-info/SOURCES.txt +0 -1
  9. hopeit_dataframes-0.25.0b3/src/hopeit.dataframes.egg-info/requires.txt +6 -0
  10. hopeit_dataframes-0.25.0b1/src/hopeit/dataframes/dataframeobject.py +0 -187
  11. hopeit_dataframes-0.25.0b1/src/hopeit/dataframes/setup/dataframes.py +0 -52
  12. hopeit_dataframes-0.25.0b1/src/hopeit.dataframes.egg-info/requires.txt +0 -6
  13. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/README.md +0 -0
  14. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/setup.cfg +0 -0
  15. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/setup.py +0 -0
  16. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit/dataframes/py.typed +0 -0
  17. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit/dataframes/serialization/__init__.py +0 -0
  18. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit/dataframes/serialization/settings.py +0 -0
  19. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit/dataframes/setup/__init__.py +0 -0
  20. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit.dataframes.egg-info/dependency_links.txt +0 -0
  21. {hopeit_dataframes-0.25.0b1 → hopeit_dataframes-0.25.0b3}/src/hopeit.dataframes.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hopeit.dataframes
3
- Version: 0.25.0b1
3
+ Version: 0.25.0b3
4
4
  Summary: Hopeit Engine Dataframes Toolkit
5
5
  Home-page: https://github.com/hopeit-git/hopeit.engine
6
6
  Author: Leo Smerling and Pablo Canto
@@ -26,7 +26,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
26
  Classifier: Framework :: AsyncIO
27
27
  Requires-Python: >=3.8
28
28
  Description-Content-Type: text/markdown
29
- Requires-Dist: hopeit.engine[fs-storage]==0.25.0b1
29
+ Requires-Dist: hopeit.engine[fs-storage]==0.25.0b3
30
30
  Requires-Dist: pandas
31
31
  Requires-Dist: numpy
32
32
  Provides-Extra: pyarrow
@@ -1,37 +1,74 @@
1
1
  """
2
2
  hopeit.engine dataframes plugin entry point
3
3
 
4
- This module exposes the 3 main constructions to be used inside apps:
4
+ This module exposes the 2 main constructions to be used inside apps,
5
+ to extend @dataobject functionallity supporting working with `pandas DataFrames`
5
6
  `@dataframe` dataclass annotation
6
- `@dataframeobject` dataclass annotation
7
7
  `DataFrames` class to handle manipulation of dataframe/dataframeobjects
8
8
 
9
9
  Usage:
10
10
  ```
11
- from hopeit.dataframes import DataFrames, dataframe, dataframeobject
11
+ from typing import List
12
+
13
+ import pandas as pd
14
+
15
+ from hopeit.dataframes.serialization.settings import DatasetSerialization
16
+ from hopeit.dataframes import DataFrames, Dataset, dataframe
17
+ from hopeit.dataobjects import dataobject, dataclass
18
+ from hopeit.dataobjects.payload import Payload
12
19
 
13
20
  @dataframe
14
21
  @dataclass
15
- class MyDataFrame:
22
+ class MyData:
16
23
  field1: int
17
24
  field2: str
18
25
  ...
19
26
 
20
- @dataframeobject
27
+ @dataobject
21
28
  @dataclass
22
29
  class MyDataset:
23
30
  dataset_name: str
24
- example_data: MyDataFrame
31
+ example_data: Dataset[MyData]
32
+
33
+ @dataobject
34
+ @dataclass
35
+ class MyWebResponse:
36
+ dataset_name: str
37
+ example_data: List[MyData.DataObject]
38
+
39
+ # This step is not needed if SETUP event is configured in app
40
+ DataFrames.setup(DatasetSerialization(
41
+ protocol="hopeit.dataframes.serialization.files.DatasetFileStorage",
42
+ location="/tmp/data",
43
+ partition_dateformat="%Y/%m/%d/%H/",
44
+ ))
45
+
46
+ df = pd.DataFrame([ # Create or load a pandas DataFrame
47
+ {"field1": 1, "field2": "text1"},
48
+ {"field1": 2, "field2": "text2"},
49
+ ])
50
+
51
+ my_data: MyData = DataFrames.from_df(MyData, df)
25
52
 
53
+ # return dataset after saving data to disk
54
+ my_dataset = MyDataset(
55
+ dataset_name="example",
56
+ example_data=await Dataset.save(my_data)
57
+ )
58
+
59
+ print(Payload.to_json(my_dataset))
26
60
 
27
- df = pd.DataFrame(...) # create or load your pandas dataframe
61
+ my_data_again: MyData = await my_dataset.example_data.load()
28
62
 
29
- my_data = DataFrames.from_df(pd.DataFrame(..))
63
+ print(DataFrames.df(my_data_again))
30
64
 
31
- return MyDataSet(
65
+ # return dataframe converted to list of dataobjects that can be directly converted to json
66
+ my_json_response = MyWebResponse(
32
67
  dataset_name="example",
33
- example_data=my_data
68
+ example_data=DataFrames.to_dataobjects(my_data)
34
69
  )
70
+
71
+ print(Payload.to_json(my_json_response))
35
72
  ```
36
73
  """
37
74
 
@@ -40,35 +77,22 @@ from typing import Dict, Generic, Iterator, List, Type
40
77
  import numpy as np
41
78
  import pandas as pd
42
79
  from hopeit.dataframes.dataframe import DataFrameT, dataframe
43
- from hopeit.dataframes.dataframeobject import DataFrameObjectT, dataframeobject
80
+ from hopeit.dataframes.serialization.dataset import Dataset
81
+ from hopeit.dataframes.serialization.settings import DatasetSerialization
82
+ from hopeit.dataframes.setup.dataframes import register_serialization
44
83
  from hopeit.dataobjects import DataObject
45
84
 
46
- __all__ = ["DataFrames", "dataframe", "dataframeobject"]
85
+ __all__ = ["DataFrames", "Dataset", "dataframe"]
47
86
 
48
87
 
49
- class DataFrames(Generic[DataFrameT, DataFrameObjectT, DataObject]):
88
+ class DataFrames(Generic[DataFrameT, DataObject]):
50
89
  """
51
90
  Dataframes manipulation utilities methods
52
91
  """
53
92
 
54
93
  @staticmethod
55
- async def serialize(obj: DataFrameObjectT) -> DataObject:
56
- """Serialize/saves contents of dataframe fields of a `@dataframeobject`
57
- and converts to a `DataObject` json-compatible with pointers to saved
58
- locations.
59
-
60
- This method can be used to i.e. return `@dataframeobject`s as a JSON response
61
- """
62
- return await obj._serialize() # type: ignore # pylint: disable=protected-access
63
-
64
- @staticmethod
65
- async def deserialize(
66
- datatype: Type[DataFrameObjectT], dataobject: DataObject
67
- ) -> DataFrameObjectT:
68
- """Deserialize/load contents of serialized dataobject fields of a `@dataframeobject`
69
- loading saved Dataset information for @dataframe fields
70
- """
71
- return await datatype._deserialize(dataobject) # type: ignore # pylint: disable=protected-access
94
+ def setup(settings: DatasetSerialization):
95
+ register_serialization(settings)
72
96
 
73
97
  @staticmethod
74
98
  def from_df(
@@ -88,7 +112,7 @@ class DataFrames(Generic[DataFrameT, DataFrameObjectT, DataObject]):
88
112
 
89
113
  @staticmethod
90
114
  def from_dataobjects(
91
- datatype: Type[DataFrameT], dataobjects: Iterator[DataFrameObjectT]
115
+ datatype: Type[DataFrameT], dataobjects: Iterator[DataObject]
92
116
  ) -> DataFrameT:
93
117
  """Converts standard json serializable `@dataobject`s to a single `@dataframe`"""
94
118
  return datatype._from_dataobjects(dataobjects) # type: ignore # pylint: disable=protected-access
@@ -1,20 +1,9 @@
1
1
  """
2
2
  DataFrames type abstractions.
3
-
4
- Example:
5
-
6
- from hopeit.dataobjects import dataclass # equivalent to `dataclasses.dataclass`
7
- from hopeit.dataframes import dataframe
8
-
9
- @dataframe
10
- @dataclass
11
- class MyObject:
12
- name: str
13
- number: int
14
3
  """
15
4
  import dataclasses
16
5
  from datetime import date, datetime, timezone
17
- from typing import Any, Callable, Dict, Generic, Iterator, List, Optional, Type, TypeVar
6
+ from typing import Any, Callable, Dict, Generic, Iterator, List, Type, TypeVar
18
7
 
19
8
  import numpy as np
20
9
  import pandas as pd
@@ -34,28 +23,9 @@ DataFrameT = TypeVar("DataFrameT")
34
23
 
35
24
 
36
25
  @dataclasses.dataclass
37
- class DataFrameMetadata(Generic[DataObject]):
26
+ class DataFrameMetadata():
38
27
  columns: List[str]
39
28
  fields: Dict[str, FieldInfo]
40
- serialized_type: Type[DataObject]
41
-
42
-
43
- @dataclasses.dataclass
44
- class DataFrameParams:
45
- """
46
- Helper class used to access attributes in @dataframe
47
- decorated objects, based on dot notation expressions
48
- """
49
-
50
- datatypes: Optional[str]
51
-
52
- @staticmethod
53
- def extract_attr(obj, expr):
54
- value = obj
55
- for attr_name in expr.split("."):
56
- if value:
57
- value = getattr(value, attr_name)
58
- return value
59
29
 
60
30
 
61
31
  class DataFrameMixin(Generic[DataFrameT, DataObject]):
@@ -120,40 +90,10 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
120
90
 
121
91
  def _to_dataobjects(self) -> List[DataObject]:
122
92
  return [
123
- self.__dataframe__.serialized_type(**fields)
93
+ self.DataObject(**fields)
124
94
  for fields in self.__df.to_dict(orient="records")
125
95
  ]
126
96
 
127
- # def to_json(self, *args, **kwargs) -> str:
128
- # raise NotImplementedError(
129
- # "Dataframe must be used inside `@dataobject(unsafe=True)` to be used as an output"
130
- # )
131
-
132
- # def to_dict(self, *args, **kwargs) -> Dict[str, Any]:
133
- # raise NotImplementedError(
134
- # "Dataframe must be used inside `@dataobject(unsafe=True)` to be used as an output"
135
- # )
136
-
137
- # @classmethod
138
- # def from_json(cls, *args, **kwargs) -> DataObject:
139
- # return cls.__dataframe__.serialized_type.from_dict(*args, **kwargs)
140
-
141
- # @classmethod
142
- # def from_dict(
143
- # cls,
144
- # *args,
145
- # **kwargs,
146
- # ) -> DataObject:
147
- # return cls.__dataframe__.serialized_type.from_dict(*args, **kwargs)
148
-
149
- # @classmethod
150
- # def json_schema(cls, *args, **kwargs) -> Dict[str, Any]:
151
- # if cls.__data_object__["schema"]:
152
- # schema = cls.__dataframe__.serialized_type.json_schema(*args, **kwargs)
153
- # schema[cls.__name__] = schema[cls.__dataframe__.serialized_type.__name__]
154
- # return schema
155
- # return {}
156
-
157
97
  def event_id(self, *args, **kwargs) -> str:
158
98
  return ""
159
99
 
@@ -205,16 +145,16 @@ def dataframe(
205
145
 
206
146
  def add_dataframe_metadata(cls):
207
147
  serialized_fields = {k: (v.annotation, v) for k, v in fields(cls).items()}
208
- serialized_type = create_model(cls.__name__+"_", **serialized_fields)
209
- serialized_type = dataobject(serialized_type, unsafe=True)
148
+ dataobject_type = create_model(cls.__name__+"DataObject", **serialized_fields)
149
+ dataobject_type = dataobject(dataobject_type, unsafe=True)
210
150
 
151
+ setattr(cls, "DataObject", dataobject_type)
211
152
  setattr(
212
153
  cls,
213
154
  "__dataframe__",
214
155
  DataFrameMetadata(
215
156
  columns=list(fields(cls).keys()),
216
157
  fields=dict(fields(cls).items()),
217
- serialized_type=serialized_type,
218
158
  ),
219
159
  )
220
160
 
@@ -2,7 +2,7 @@
2
2
  """
3
3
 
4
4
  from importlib import import_module
5
- from typing import Type, TypeVar
5
+ from typing import Generic, Type, TypeVar
6
6
 
7
7
  from hopeit.dataobjects import dataclass, dataobject
8
8
 
@@ -11,12 +11,20 @@ DataFrameT = TypeVar("DataFrameT")
11
11
 
12
12
  @dataobject
13
13
  @dataclass
14
- class Dataset:
14
+ class Dataset(Generic[DataFrameT]):
15
+ """Persisted representation of a @dataframe object"""
15
16
  protocol: str
16
17
  partition_key: str
17
18
  key: str
18
19
  datatype: str
19
20
 
21
+ async def load(self) -> DataFrameT:
22
+ return await self.__storage.load(self) # type: ignore[attr-defined]
23
+
24
+ @classmethod
25
+ async def save(cls, dataframe: DataFrameT) -> "Dataset[DataFrameT]":
26
+ return await cls.__storage.save(dataframe) # type: ignore[attr-defined]
27
+
20
28
 
21
29
  def find_protocol_impl(qual_type_name: str) -> Type:
22
30
  mod_name, type_name = (
@@ -3,7 +3,7 @@
3
3
 
4
4
  import io
5
5
  from importlib import import_module
6
- from typing import Callable, Generic, Optional, Type, TypeVar, Union
6
+ from typing import Generic, Optional, Type, TypeVar
7
7
  from uuid import uuid4
8
8
 
9
9
  import pandas as pd
@@ -67,40 +67,6 @@ class DatasetFileStorage(Generic[DataFrameT]):
67
67
  df = pd.read_parquet(io.BytesIO(data), engine="pyarrow")
68
68
  return datatype._from_df(df) # pylint: disable=protected-access
69
69
 
70
- async def ser_wrapper(
71
- self,
72
- base_serialization: Callable,
73
- data: Union[EventPayloadType, DataFrameT],
74
- level: int,
75
- ) -> bytes:
76
- """Serialization wrapper that plugins-in into hopeit.engine
77
- serialization when dataframes plugin is initialized
78
- """
79
- if hasattr(data, "__dataframeobject__"):
80
- data = await data._serialize() # type: ignore # pylint: disable=protected-access
81
- if hasattr(data, "__dataframe__"):
82
- data = await self.save(data) # type: ignore
83
- return await base_serialization(data, level)
84
-
85
- async def deser_wrapper(
86
- self,
87
- base_deserialization: Callable,
88
- data: bytes,
89
- datatype: Union[Type[EventPayloadType], Type[DataFrameT]],
90
- ) -> Union[EventPayloadType, DataFrameT]:
91
- """Deerialization wrapper that plugins-in into hopeit.engine
92
- deserialization when dataframes plugin is initialized
93
- """
94
- if hasattr(datatype, "__dataframeobject__"):
95
- dataset = await base_deserialization(
96
- data, datatype.__dataframeobject__.serialized_type # type: ignore
97
- )
98
- return await datatype._deserialize(dataset) # type: ignore # pylint: disable=protected-access
99
- if hasattr(datatype, "__dataframe__"):
100
- dataset = await base_deserialization(data, Dataset)
101
- return await self.load(dataset)
102
- return await base_deserialization(data, datatype)
103
-
104
70
 
105
71
  def find_dataframe_type(qual_type_name: str) -> Type[DataFrameT]:
106
72
  """Returns dataframe class based on type name used during serialization"""
@@ -0,0 +1,36 @@
1
+ """hopeit.engine dataframes plugin SETUP event.
2
+
3
+ This event executes when engine starts with dataframes plugin configuration file loaded,
4
+ and ensures that the engine will support serialization of `@dataframe` and `@dataframeobject`
5
+ types
6
+ """
7
+
8
+ from hopeit.app.context import EventContext
9
+ from hopeit.app.logger import app_logger
10
+ from hopeit.dataframes.serialization.dataset import Dataset, find_protocol_impl
11
+ from hopeit.dataframes.serialization.settings import DatasetSerialization
12
+
13
+ logger = app_logger()
14
+
15
+ __steps__ = ["setup"]
16
+
17
+
18
+ def setup(payload: None, context: EventContext) -> None:
19
+ """Setups serizaltion wrappers in hopeit.engine based on
20
+ `DataSerialization` settings configured in plugin configuration file
21
+ """
22
+ logger.info(context, "Configuring Dataset serialization...")
23
+ settings: DatasetSerialization = context.settings(
24
+ key="dataset_serialization", datatype=DatasetSerialization
25
+ )
26
+ register_serialization(settings)
27
+
28
+
29
+ def register_serialization(settings: DatasetSerialization):
30
+ impl = find_protocol_impl(settings.protocol)
31
+ storage = impl(
32
+ protocol=settings.protocol,
33
+ location=settings.location,
34
+ partition_dateformat=settings.partition_dateformat,
35
+ )
36
+ setattr(Dataset, "_Dataset__storage", storage)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hopeit.dataframes
3
- Version: 0.25.0b1
3
+ Version: 0.25.0b3
4
4
  Summary: Hopeit Engine Dataframes Toolkit
5
5
  Home-page: https://github.com/hopeit-git/hopeit.engine
6
6
  Author: Leo Smerling and Pablo Canto
@@ -26,7 +26,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
26
  Classifier: Framework :: AsyncIO
27
27
  Requires-Python: >=3.8
28
28
  Description-Content-Type: text/markdown
29
- Requires-Dist: hopeit.engine[fs-storage]==0.25.0b1
29
+ Requires-Dist: hopeit.engine[fs-storage]==0.25.0b3
30
30
  Requires-Dist: pandas
31
31
  Requires-Dist: numpy
32
32
  Provides-Extra: pyarrow
@@ -7,7 +7,6 @@ src/hopeit.dataframes.egg-info/requires.txt
7
7
  src/hopeit.dataframes.egg-info/top_level.txt
8
8
  src/hopeit/dataframes/__init__.py
9
9
  src/hopeit/dataframes/dataframe.py
10
- src/hopeit/dataframes/dataframeobject.py
11
10
  src/hopeit/dataframes/py.typed
12
11
  src/hopeit/dataframes/serialization/__init__.py
13
12
  src/hopeit/dataframes/serialization/dataset.py
@@ -0,0 +1,6 @@
1
+ hopeit.engine[fs-storage]==0.25.0b3
2
+ pandas
3
+ numpy
4
+
5
+ [pyarrow]
6
+ pyarrow
@@ -1,187 +0,0 @@
1
- """
2
- `@dataframeobject` annonation mixin to serialize a group of `@dataframe`s.
3
-
4
- Datasets behaves as DataObject so they can be used as payload
5
- for endpoints and streams.
6
- """
7
-
8
- import dataclasses
9
- from typing import (
10
- Any,
11
- Callable,
12
- ClassVar,
13
- Dict,
14
- Generic,
15
- Optional,
16
- Type,
17
- TypeVar,
18
- Union,
19
- get_args,
20
- get_origin,
21
- )
22
-
23
- from pydantic import TypeAdapter, create_model
24
- from pydantic.fields import FieldInfo
25
-
26
- from hopeit.dataframes.serialization.dataset import Dataset
27
- from hopeit.dataobjects import (
28
- DataObject,
29
- StreamEventMixin,
30
- StreamEventParams,
31
- dataobject,
32
- fields,
33
- )
34
-
35
- DataFrameObjectT = TypeVar("DataFrameObjectT")
36
- NoneType = type(None)
37
-
38
-
39
- @dataclasses.dataclass
40
- class DataFrameObjectMetadata(Generic[DataObject]):
41
- serialized_type: Type[DataObject]
42
-
43
-
44
- class DataFrameObjectMixin(Generic[DataFrameObjectT]):
45
- """
46
- MixIn class to add functionality for `@dataframeobject`s
47
-
48
- Do not use this class directly, instead use `@dataframeobject` class decorator.
49
- """
50
-
51
- __storage: ClassVar[Any] = None # pylint: disable=invalid-name
52
-
53
- def __init__(self) -> None:
54
- self.__dataframeobject__: DataFrameObjectMetadata = None # type: ignore
55
- raise NotImplementedError(
56
- "DataFrameObjectMixin() should not be called directly. Use `@dataframeobject` annotation"
57
- )
58
-
59
- async def _serialize(self) -> Optional[DataObject]:
60
- """Saves internal `@dataframe`s using configured serialization protocol
61
- and returns json-serialiable dataobject
62
- """
63
- datasets = {}
64
- for field_name, field in fields(self).items(): # type: ignore[arg-type]
65
- if Dataset in {field.annotation, *get_args(field.annotation)}:
66
- dataframe = getattr(self, field_name)
67
- dataset = (
68
- None if dataframe is None else await self.__storage.save(dataframe)
69
- )
70
- datasets[field_name] = dataset
71
- else:
72
- datasets[field_name] = getattr(self, field_name)
73
- return self.__dataframeobject__.serialized_type(**datasets)
74
-
75
- @classmethod
76
- async def _deserialize(
77
- cls, serialized: DataObject
78
- ) -> "DataFrameObjectMixin[DataFrameObjectT]":
79
- """From a serialized datframeobject, load inner `@dataframe` objects
80
- and returns a `@dataframeobject` instance"""
81
- dataframes = {}
82
- for field_name, field in fields(cls).items(): # type: ignore[type-var]
83
- if Dataset in {field.annotation, *get_args(field.annotation)}:
84
- dataset = getattr(serialized, field_name)
85
- dataframe = (
86
- None if dataset is None else await cls.__storage.load(dataset)
87
- )
88
- dataframes[field_name] = dataframe
89
- else:
90
- dataframes[field_name] = getattr(serialized, field_name)
91
- return cls(**dataframes)
92
-
93
- @classmethod
94
- def json_schema(cls, *args, **kwargs) -> Dict[str, Any]:
95
- schema = TypeAdapter(cls.__dataframeobject__.serialized_type).json_schema(*args, **kwargs)
96
- return schema
97
-
98
- # def to_json(self, *args, **kwargs) -> Dict[str, Any]:
99
- # raise RuntimeError(
100
- # f"`{type(self).__name__}` `@dataframeobject` cannot be converted to json directly. "
101
- # "i.e. use `return await DataFrames.serialize(obj)` to return it as a response."
102
- # )
103
-
104
-
105
- def _is_dataframe_field(field: FieldInfo) -> bool:
106
- return any(
107
- hasattr(field_type, "__dataframe__")
108
- for field_type in [field.annotation, *get_args(field.annotation)]
109
- )
110
-
111
-
112
- def _serialized_field_type(field_name: str, field: FieldInfo) -> Optional[Type[Any]]:
113
- """Computes the `@dataobject` datatype used as a result
114
- of serialized `@dataframeobject`
115
- """
116
- if hasattr(field.annotation, "__dataframe__"):
117
- return Dataset
118
- if get_origin(field.annotation) is Union:
119
- args = get_args(field.annotation)
120
- if (
121
- len(args) == 2
122
- and any(hasattr(field_type, "__dataframe__") for field_type in args)
123
- and any(field_type is NoneType for field_type in args)
124
- ):
125
- return Optional[Dataset] # type: ignore
126
- if _is_dataframe_field(field):
127
- raise TypeError(
128
- f"field {field_name}: only `DataFrameT` or `Optional[DataFrameT]` are supported"
129
- )
130
- return field.annotation
131
-
132
-
133
- def dataframeobject(
134
- decorated_class=None,
135
- ) -> Callable[[Type], Type[DataFrameObjectMixin]]:
136
- """
137
- Decorator for dataclasses intended to be used as dataframes.
138
- """
139
-
140
- def add_dataframe_mixin(cls) -> Type[DataFrameObjectMixin]:
141
- if hasattr(cls, "__annotations__") and hasattr(cls, "__dataclass_fields__"):
142
- amended_class = type(
143
- cls.__name__,
144
- (DataFrameObjectMixin,) + cls.__mro__,
145
- dict(cls.__dict__),
146
- )
147
- return amended_class
148
- return cls
149
-
150
- def add_dataframeobject_metadata(cls):
151
- serialized_fields = {
152
- field_name: (_serialized_field_type(field_name, field_info), field_info)
153
- for field_name, field_info in fields(cls).items()
154
- }
155
- serialized_type = create_model(cls.__name__+"_", **serialized_fields)
156
- serialized_type = dataobject(serialized_type, unsafe=True)
157
- setattr(
158
- cls,
159
- "__dataframeobject__",
160
- DataFrameObjectMetadata(
161
- serialized_type=serialized_type,
162
- ),
163
- )
164
-
165
- def add_dataobject_annotations(cls, unsafe: bool, schema: bool):
166
- setattr(
167
- cls,
168
- "__data_object__",
169
- {"unsafe": unsafe, "schema": schema},
170
- )
171
- setattr(cls, "__stream_event__", StreamEventParams(None, None))
172
- setattr(cls, "event_id", StreamEventMixin.event_id)
173
- setattr(cls, "event_ts", StreamEventMixin.event_ts)
174
-
175
- def wrap(cls) -> Type[DataFrameObjectMixin]:
176
- if hasattr(cls, "__dataframeobject__"):
177
- return cls
178
- add_dataframeobject_metadata(cls)
179
- amended_class = add_dataframe_mixin(cls)
180
- add_dataobject_annotations(
181
- amended_class, unsafe=False, schema=True
182
- )
183
- return amended_class
184
-
185
- if decorated_class is None:
186
- return wrap
187
- return wrap(decorated_class) # type: ignore
@@ -1,52 +0,0 @@
1
- """hopeit.engine dataframes plugin SETUP event.
2
-
3
- This event executes when engine starts with dataframes plugin configuration file loaded,
4
- and ensures that the engine will support serialization of `@dataframe` and `@dataframeobject`
5
- types
6
- """
7
-
8
- from functools import partial
9
-
10
- from hopeit.app.context import EventContext
11
- from hopeit.app.logger import app_logger
12
- from hopeit.dataframes.dataframeobject import DataFrameObjectMixin
13
- from hopeit.dataframes.serialization.dataset import find_protocol_impl
14
- from hopeit.dataframes.serialization.settings import DatasetSerialization
15
- from hopeit.server import serialization
16
-
17
- logger = app_logger()
18
-
19
- __steps__ = ["register_serialization"]
20
-
21
-
22
- def register_serialization(payload: None, context: EventContext) -> None:
23
- """Setups serizaltion wrappers in hopeit.engine based on
24
- `DataSerialization` settings configured in plugin configuration file
25
- """
26
- logger.info(context, "Registering serialization methods...")
27
-
28
- settings: DatasetSerialization = context.settings(
29
- key="dataset_serialization", datatype=DatasetSerialization
30
- )
31
- impl = find_protocol_impl(settings.protocol)
32
-
33
- storage = impl(
34
- protocol=settings.protocol,
35
- location=settings.location,
36
- partition_dateformat=settings.partition_dateformat,
37
- )
38
- setattr(DataFrameObjectMixin, "_DataFrameObjectMixin__storage", storage)
39
-
40
- serdeser_wrappers = {}
41
- for (
42
- serdeser,
43
- methods,
44
- ) in serialization._SERDESER.items(): # pylint: disable=protected-access
45
- serdeser_wrappers[serdeser] = (
46
- partial(storage.ser_wrapper, methods[0]),
47
- methods[1],
48
- partial(storage.deser_wrapper, methods[2]),
49
- )
50
-
51
- for serdeser, methods in serdeser_wrappers.items():
52
- serialization._SERDESER[serdeser] = methods # pylint: disable=protected-access
@@ -1,6 +0,0 @@
1
- hopeit.engine[fs-storage]==0.25.0b1
2
- pandas
3
- numpy
4
-
5
- [pyarrow]
6
- pyarrow