hopeit.dataframes 0.24.2__tar.gz → 0.25.0b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/PKG-INFO +2 -2
  2. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/dataframe.py +47 -44
  3. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/dataframeobject.py +39 -36
  4. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit.dataframes.egg-info/PKG-INFO +2 -2
  5. hopeit_dataframes-0.25.0b1/src/hopeit.dataframes.egg-info/requires.txt +6 -0
  6. hopeit.dataframes-0.24.2/src/hopeit.dataframes.egg-info/requires.txt +0 -6
  7. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/README.md +0 -0
  8. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/setup.cfg +0 -0
  9. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/setup.py +0 -0
  10. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/__init__.py +0 -0
  11. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/py.typed +0 -0
  12. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/serialization/__init__.py +0 -0
  13. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/serialization/dataset.py +0 -0
  14. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/serialization/files.py +0 -0
  15. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/serialization/settings.py +0 -0
  16. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/setup/__init__.py +0 -0
  17. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit/dataframes/setup/dataframes.py +0 -0
  18. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit.dataframes.egg-info/SOURCES.txt +0 -0
  19. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit.dataframes.egg-info/dependency_links.txt +0 -0
  20. {hopeit.dataframes-0.24.2 → hopeit_dataframes-0.25.0b1}/src/hopeit.dataframes.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hopeit.dataframes
3
- Version: 0.24.2
3
+ Version: 0.25.0b1
4
4
  Summary: Hopeit Engine Dataframes Toolkit
5
5
  Home-page: https://github.com/hopeit-git/hopeit.engine
6
6
  Author: Leo Smerling and Pablo Canto
@@ -26,7 +26,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
26
  Classifier: Framework :: AsyncIO
27
27
  Requires-Python: >=3.8
28
28
  Description-Content-Type: text/markdown
29
- Requires-Dist: hopeit.engine[fs-storage]==0.24.2
29
+ Requires-Dist: hopeit.engine[fs-storage]==0.25.0b1
30
30
  Requires-Dist: pandas
31
31
  Requires-Dist: numpy
32
32
  Provides-Extra: pyarrow
@@ -12,32 +12,35 @@ Example:
12
12
  name: str
13
13
  number: int
14
14
  """
15
-
16
- from dataclasses import Field, asdict, dataclass, fields, make_dataclass
15
+ import dataclasses
17
16
  from datetime import date, datetime, timezone
18
17
  from typing import Any, Callable, Dict, Generic, Iterator, List, Optional, Type, TypeVar
19
18
 
20
19
  import numpy as np
21
20
  import pandas as pd
22
- from dataclasses_jsonschema import JsonSchemaMixin
21
+ from pydantic import create_model
22
+ from pydantic.fields import FieldInfo
23
+
23
24
  from hopeit.dataobjects import (
24
25
  DataObject,
25
26
  StreamEventMixin,
26
27
  StreamEventParams,
27
28
  dataobject,
29
+ fields,
28
30
  )
31
+ from hopeit.dataobjects.payload import Payload
29
32
 
30
33
  DataFrameT = TypeVar("DataFrameT")
31
34
 
32
35
 
33
- @dataclass
36
+ @dataclasses.dataclass
34
37
  class DataFrameMetadata(Generic[DataObject]):
35
38
  columns: List[str]
36
- fields: Dict[str, Field]
39
+ fields: Dict[str, FieldInfo]
37
40
  serialized_type: Type[DataObject]
38
41
 
39
42
 
40
- @dataclass
43
+ @dataclasses.dataclass
41
44
  class DataFrameParams:
42
45
  """
43
46
  Helper class used to access attributes in @dataframe
@@ -99,7 +102,7 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
99
102
 
100
103
  @classmethod
101
104
  def _from_dataobjects(cls, items: Iterator[DataObject]) -> DataFrameT:
102
- return cls._from_df(pd.DataFrame(asdict(item) for item in items)) # type: ignore
105
+ return cls._from_df(pd.DataFrame(Payload.to_obj(item) for item in items)) # type: ignore[misc]
103
106
 
104
107
  @classmethod
105
108
  def _from_df_unsafe(cls, df: pd.DataFrame, **series: pd.Series) -> DataFrameT:
@@ -121,35 +124,35 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
121
124
  for fields in self.__df.to_dict(orient="records")
122
125
  ]
123
126
 
124
- def to_json(self, *args, **kwargs) -> str:
125
- raise NotImplementedError(
126
- "Dataframe must be used inside `@dataobject(unsafe=True)` to be used as an output"
127
- )
128
-
129
- def to_dict(self, *args, **kwargs) -> Dict[str, Any]:
130
- raise NotImplementedError(
131
- "Dataframe must be used inside `@dataobject(unsafe=True)` to be used as an output"
132
- )
133
-
134
- @classmethod
135
- def from_json(cls, *args, **kwargs) -> DataObject:
136
- return cls.__dataframe__.serialized_type.from_dict(*args, **kwargs)
137
-
138
- @classmethod
139
- def from_dict(
140
- cls,
141
- *args,
142
- **kwargs,
143
- ) -> DataObject:
144
- return cls.__dataframe__.serialized_type.from_dict(*args, **kwargs)
145
-
146
- @classmethod
147
- def json_schema(cls, *args, **kwargs) -> Dict[str, Any]:
148
- if cls.__data_object__["schema"]:
149
- schema = cls.__dataframe__.serialized_type.json_schema(*args, **kwargs)
150
- schema[cls.__name__] = schema[cls.__dataframe__.serialized_type.__name__]
151
- return schema
152
- return {}
127
+ # def to_json(self, *args, **kwargs) -> str:
128
+ # raise NotImplementedError(
129
+ # "Dataframe must be used inside `@dataobject(unsafe=True)` to be used as an output"
130
+ # )
131
+
132
+ # def to_dict(self, *args, **kwargs) -> Dict[str, Any]:
133
+ # raise NotImplementedError(
134
+ # "Dataframe must be used inside `@dataobject(unsafe=True)` to be used as an output"
135
+ # )
136
+
137
+ # @classmethod
138
+ # def from_json(cls, *args, **kwargs) -> DataObject:
139
+ # return cls.__dataframe__.serialized_type.from_dict(*args, **kwargs)
140
+
141
+ # @classmethod
142
+ # def from_dict(
143
+ # cls,
144
+ # *args,
145
+ # **kwargs,
146
+ # ) -> DataObject:
147
+ # return cls.__dataframe__.serialized_type.from_dict(*args, **kwargs)
148
+
149
+ # @classmethod
150
+ # def json_schema(cls, *args, **kwargs) -> Dict[str, Any]:
151
+ # if cls.__data_object__["schema"]:
152
+ # schema = cls.__dataframe__.serialized_type.json_schema(*args, **kwargs)
153
+ # schema[cls.__name__] = schema[cls.__dataframe__.serialized_type.__name__]
154
+ # return schema
155
+ # return {}
153
156
 
154
157
  def event_id(self, *args, **kwargs) -> str:
155
158
  return ""
@@ -174,7 +177,7 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
174
177
 
175
178
  def _coerce_datatypes(self, df: pd.DataFrame) -> Dict[str, pd.Series]:
176
179
  return {
177
- name: self.DATATYPE_MAPPING[field.type](df[name]) # type: ignore
180
+ name: self.DATATYPE_MAPPING[field.annotation](df[name]) # type: ignore
178
181
  for name, field in self.__dataframe__.fields.items()
179
182
  }
180
183
 
@@ -193,7 +196,7 @@ def dataframe(
193
196
  if hasattr(cls, "__annotations__") and hasattr(cls, "__dataclass_fields__"):
194
197
  amended_class = type(
195
198
  cls.__name__,
196
- (DataFrameMixin, JsonSchemaMixin) + cls.__mro__,
199
+ (DataFrameMixin, ) + cls.__mro__,
197
200
  dict(cls.__dict__),
198
201
  )
199
202
  setattr(amended_class, "__init__", DataFrameMixin.__init_from_series__)
@@ -201,16 +204,16 @@ def dataframe(
201
204
  return cls
202
205
 
203
206
  def add_dataframe_metadata(cls):
204
- serialized_fiels = [(field.name, field.type) for field in fields(cls)]
205
- serialized_type = make_dataclass(cls.__name__ + "_", serialized_fiels)
207
+ serialized_fields = {k: (v.annotation, v) for k, v in fields(cls).items()}
208
+ serialized_type = create_model(cls.__name__+"_", **serialized_fields)
206
209
  serialized_type = dataobject(serialized_type, unsafe=True)
207
210
 
208
211
  setattr(
209
212
  cls,
210
213
  "__dataframe__",
211
214
  DataFrameMetadata(
212
- columns=[field.name for field in fields(cls)],
213
- fields={field.name: field for field in fields(cls)},
215
+ columns=list(fields(cls).keys()),
216
+ fields=dict(fields(cls).items()),
214
217
  serialized_type=serialized_type,
215
218
  ),
216
219
  )
@@ -226,14 +229,14 @@ def dataframe(
226
229
  setattr(cls, "event_ts", StreamEventMixin.event_ts)
227
230
 
228
231
  def set_fields_optional(cls):
229
- for field in fields(cls):
232
+ for _, field in fields(cls).items():
230
233
  field.default = None
231
234
 
232
235
  def wrap(cls) -> Type[DataFrameMixin]:
233
236
  if hasattr(cls, "__dataframe__"):
234
237
  return cls
238
+ add_dataframe_metadata(cls)
235
239
  amended_class = add_dataframe_mixin(cls)
236
- add_dataframe_metadata(amended_class)
237
240
  add_dataobject_annotations(amended_class, unsafe, validate, schema)
238
241
  set_fields_optional(amended_class)
239
242
  return amended_class
@@ -5,7 +5,7 @@ Datasets behaves as DataObject so they can be used as payload
5
5
  for endpoints and streams.
6
6
  """
7
7
 
8
- from dataclasses import Field, dataclass, fields, make_dataclass
8
+ import dataclasses
9
9
  from typing import (
10
10
  Any,
11
11
  Callable,
@@ -20,19 +20,23 @@ from typing import (
20
20
  get_origin,
21
21
  )
22
22
 
23
+ from pydantic import TypeAdapter, create_model
24
+ from pydantic.fields import FieldInfo
25
+
23
26
  from hopeit.dataframes.serialization.dataset import Dataset
24
27
  from hopeit.dataobjects import (
25
28
  DataObject,
26
29
  StreamEventMixin,
27
30
  StreamEventParams,
28
31
  dataobject,
32
+ fields,
29
33
  )
30
34
 
31
35
  DataFrameObjectT = TypeVar("DataFrameObjectT")
32
36
  NoneType = type(None)
33
37
 
34
38
 
35
- @dataclass
39
+ @dataclasses.dataclass
36
40
  class DataFrameObjectMetadata(Generic[DataObject]):
37
41
  serialized_type: Type[DataObject]
38
42
 
@@ -57,15 +61,15 @@ class DataFrameObjectMixin(Generic[DataFrameObjectT]):
57
61
  and returns json-serialiable dataobject
58
62
  """
59
63
  datasets = {}
60
- for field in fields(self): # type: ignore
61
- if _is_dataframe_field(field):
62
- dataframe = getattr(self, field.name)
64
+ for field_name, field in fields(self).items(): # type: ignore[arg-type]
65
+ if Dataset in {field.annotation, *get_args(field.annotation)}:
66
+ dataframe = getattr(self, field_name)
63
67
  dataset = (
64
68
  None if dataframe is None else await self.__storage.save(dataframe)
65
69
  )
66
- datasets[field.name] = dataset
70
+ datasets[field_name] = dataset
67
71
  else:
68
- datasets[field.name] = getattr(self, field.name)
72
+ datasets[field_name] = getattr(self, field_name)
69
73
  return self.__dataframeobject__.serialized_type(**datasets)
70
74
 
71
75
  @classmethod
@@ -75,45 +79,44 @@ class DataFrameObjectMixin(Generic[DataFrameObjectT]):
75
79
  """From a serialized datframeobject, load inner `@dataframe` objects
76
80
  and returns a `@dataframeobject` instance"""
77
81
  dataframes = {}
78
- for field in fields(cls): # type: ignore
79
- if _is_dataframe_field(field):
80
- dataset = getattr(serialized, field.name)
82
+ for field_name, field in fields(cls).items(): # type: ignore[type-var]
83
+ if Dataset in {field.annotation, *get_args(field.annotation)}:
84
+ dataset = getattr(serialized, field_name)
81
85
  dataframe = (
82
86
  None if dataset is None else await cls.__storage.load(dataset)
83
87
  )
84
- dataframes[field.name] = dataframe
88
+ dataframes[field_name] = dataframe
85
89
  else:
86
- dataframes[field.name] = getattr(serialized, field.name)
90
+ dataframes[field_name] = getattr(serialized, field_name)
87
91
  return cls(**dataframes)
88
92
 
89
93
  @classmethod
90
94
  def json_schema(cls, *args, **kwargs) -> Dict[str, Any]:
91
- schema = cls.__dataframeobject__.serialized_type.json_schema(*args, **kwargs)
92
- schema[cls.__name__] = schema[cls.__dataframeobject__.serialized_type.__name__]
95
+ schema = TypeAdapter(cls.__dataframeobject__.serialized_type).json_schema(*args, **kwargs)
93
96
  return schema
94
97
 
95
- def to_json(self, *args, **kwargs) -> Dict[str, Any]:
96
- raise RuntimeError(
97
- f"`{type(self).__name__}` `@dataframeobject` cannot be converted to json directly. "
98
- "i.e. use `return await DataFrames.serialize(obj)` to return it as a reponse."
99
- )
98
+ # def to_json(self, *args, **kwargs) -> Dict[str, Any]:
99
+ # raise RuntimeError(
100
+ # f"`{type(self).__name__}` `@dataframeobject` cannot be converted to json directly. "
101
+ # "i.e. use `return await DataFrames.serialize(obj)` to return it as a response."
102
+ # )
100
103
 
101
104
 
102
- def _is_dataframe_field(field: Field) -> bool:
105
+ def _is_dataframe_field(field: FieldInfo) -> bool:
103
106
  return any(
104
107
  hasattr(field_type, "__dataframe__")
105
- for field_type in [field.type, *get_args(field.type)]
108
+ for field_type in [field.annotation, *get_args(field.annotation)]
106
109
  )
107
110
 
108
111
 
109
- def _serialized_field_type(field: Field) -> Type[Any]:
112
+ def _serialized_field_type(field_name: str, field: FieldInfo) -> Optional[Type[Any]]:
110
113
  """Computes the `@dataobject` datatype used as a result
111
114
  of serialized `@dataframeobject`
112
115
  """
113
- if hasattr(field.type, "__dataframe__"):
116
+ if hasattr(field.annotation, "__dataframe__"):
114
117
  return Dataset
115
- if get_origin(field.type) is Union:
116
- args = get_args(field.type)
118
+ if get_origin(field.annotation) is Union:
119
+ args = get_args(field.annotation)
117
120
  if (
118
121
  len(args) == 2
119
122
  and any(hasattr(field_type, "__dataframe__") for field_type in args)
@@ -122,9 +125,9 @@ def _serialized_field_type(field: Field) -> Type[Any]:
122
125
  return Optional[Dataset] # type: ignore
123
126
  if _is_dataframe_field(field):
124
127
  raise TypeError(
125
- f"field {field.name}: only `DataFrameT` or `Optional[DataFrameT]` are supported"
128
+ f"field {field_name}: only `DataFrameT` or `Optional[DataFrameT]` are supported"
126
129
  )
127
- return field.type
130
+ return field.annotation
128
131
 
129
132
 
130
133
  def dataframeobject(
@@ -145,12 +148,12 @@ def dataframeobject(
145
148
  return cls
146
149
 
147
150
  def add_dataframeobject_metadata(cls):
148
- serialized_fiels = [
149
- (field.name, _serialized_field_type(field)) for field in fields(cls)
150
- ]
151
- serialized_type = make_dataclass(cls.__name__ + "_", serialized_fiels)
151
+ serialized_fields = {
152
+ field_name: (_serialized_field_type(field_name, field_info), field_info)
153
+ for field_name, field_info in fields(cls).items()
154
+ }
155
+ serialized_type = create_model(cls.__name__+"_", **serialized_fields)
152
156
  serialized_type = dataobject(serialized_type, unsafe=True)
153
-
154
157
  setattr(
155
158
  cls,
156
159
  "__dataframeobject__",
@@ -159,11 +162,11 @@ def dataframeobject(
159
162
  ),
160
163
  )
161
164
 
162
- def add_dataobject_annotations(cls, unsafe: bool, validate: bool, schema: bool):
165
+ def add_dataobject_annotations(cls, unsafe: bool, schema: bool):
163
166
  setattr(
164
167
  cls,
165
168
  "__data_object__",
166
- {"unsafe": unsafe, "validate": validate, "schema": schema},
169
+ {"unsafe": unsafe, "schema": schema},
167
170
  )
168
171
  setattr(cls, "__stream_event__", StreamEventParams(None, None))
169
172
  setattr(cls, "event_id", StreamEventMixin.event_id)
@@ -172,10 +175,10 @@ def dataframeobject(
172
175
  def wrap(cls) -> Type[DataFrameObjectMixin]:
173
176
  if hasattr(cls, "__dataframeobject__"):
174
177
  return cls
178
+ add_dataframeobject_metadata(cls)
175
179
  amended_class = add_dataframe_mixin(cls)
176
- add_dataframeobject_metadata(amended_class)
177
180
  add_dataobject_annotations(
178
- amended_class, unsafe=False, validate=True, schema=True
181
+ amended_class, unsafe=False, schema=True
179
182
  )
180
183
  return amended_class
181
184
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hopeit.dataframes
3
- Version: 0.24.2
3
+ Version: 0.25.0b1
4
4
  Summary: Hopeit Engine Dataframes Toolkit
5
5
  Home-page: https://github.com/hopeit-git/hopeit.engine
6
6
  Author: Leo Smerling and Pablo Canto
@@ -26,7 +26,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
26
  Classifier: Framework :: AsyncIO
27
27
  Requires-Python: >=3.8
28
28
  Description-Content-Type: text/markdown
29
- Requires-Dist: hopeit.engine[fs-storage]==0.24.2
29
+ Requires-Dist: hopeit.engine[fs-storage]==0.25.0b1
30
30
  Requires-Dist: pandas
31
31
  Requires-Dist: numpy
32
32
  Provides-Extra: pyarrow
@@ -0,0 +1,6 @@
1
+ hopeit.engine[fs-storage]==0.25.0b1
2
+ pandas
3
+ numpy
4
+
5
+ [pyarrow]
6
+ pyarrow
@@ -1,6 +0,0 @@
1
- hopeit.engine[fs-storage]==0.24.2
2
- pandas
3
- numpy
4
-
5
- [pyarrow]
6
- pyarrow