hopeit.dataframes 0.25.1__tar.gz → 0.25.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/PKG-INFO +2 -2
  2. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/__init__.py +2 -2
  3. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/dataframe.py +30 -6
  4. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit.dataframes.egg-info/PKG-INFO +2 -2
  5. hopeit_dataframes-0.25.2/src/hopeit.dataframes.egg-info/requires.txt +6 -0
  6. hopeit_dataframes-0.25.1/src/hopeit.dataframes.egg-info/requires.txt +0 -6
  7. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/README.md +0 -0
  8. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/pyproject.toml +0 -0
  9. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/setup.cfg +0 -0
  10. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/setup.py +0 -0
  11. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/py.typed +0 -0
  12. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/serialization/__init__.py +0 -0
  13. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/serialization/dataset.py +0 -0
  14. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/serialization/files.py +0 -0
  15. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/serialization/py.typed +0 -0
  16. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/serialization/settings.py +0 -0
  17. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/setup/__init__.py +0 -0
  18. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/setup/dataframes.py +0 -0
  19. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit/dataframes/setup/py.typed +0 -0
  20. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit.dataframes.egg-info/SOURCES.txt +0 -0
  21. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit.dataframes.egg-info/dependency_links.txt +0 -0
  22. {hopeit_dataframes-0.25.1 → hopeit_dataframes-0.25.2}/src/hopeit.dataframes.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hopeit.dataframes
3
- Version: 0.25.1
3
+ Version: 0.25.2
4
4
  Summary: Hopeit Engine Dataframes Toolkit
5
5
  Author-email: Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
6
6
  License: Apache 2
@@ -24,7 +24,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
24
  Classifier: Framework :: AsyncIO
25
25
  Requires-Python: >=3.9
26
26
  Description-Content-Type: text/plain
27
- Requires-Dist: hopeit.engine[fs-storage]==0.25.1
27
+ Requires-Dist: hopeit.engine[fs-storage]==0.25.2
28
28
  Requires-Dist: pandas
29
29
  Requires-Dist: numpy
30
30
  Provides-Extra: pyarrow
@@ -118,9 +118,9 @@ class DataFrames(Generic[DataFrameT, DataObject]):
118
118
  return datatype._from_dataobjects(dataobjects) # type: ignore # pylint: disable=protected-access
119
119
 
120
120
  @staticmethod
121
- def to_dataobjects(obj: DataFrameT) -> List[DataObject]:
121
+ def to_dataobjects(obj: DataFrameT, *, normalize_null_values: bool = False) -> List[DataObject]:
122
122
  """Converts `@dataframe` object to a list of standard `@dataobject`s"""
123
- return obj._to_dataobjects() # type: ignore # pylint: disable=protected-access
123
+ return obj._to_dataobjects(normalize_null_values) # type: ignore # pylint: disable=protected-access
124
124
 
125
125
  @staticmethod
126
126
  def from_array(datatype: Type[DataFrameT], array: np.ndarray) -> DataFrameT:
@@ -37,6 +37,12 @@ def _series_to_int(field_name: str, x: pd.Series) -> pd.Series:
37
37
  return x.astype(np.int64)
38
38
 
39
39
 
40
+ def _series_to_bool(field_name: str, x: pd.Series) -> pd.Series:
41
+ if x.isnull().values.any(): # type: ignore[union-attr]
42
+ raise ValueError(f"Field `{field_name}` is not nullable")
43
+ return x.astype(bool)
44
+
45
+
40
46
  def _series_to_float(field_name: str, x: pd.Series) -> pd.Series:
41
47
  if x.isnull().values.any(): # type: ignore[union-attr]
42
48
  raise ValueError(f"Field `{field_name}` is not nullable")
@@ -51,15 +57,19 @@ def _series_to_str(field_name: str, x: pd.Series) -> pd.Series:
51
57
 
52
58
  # Functions to do type coercion
53
59
  def _series_to_int_nullable(_field_name: str, x: pd.Series) -> pd.Series:
54
- return x[x.notna()].astype(np.int64)
60
+ return x.dropna().astype(np.int64)
61
+
62
+
63
+ def _series_to_bool_nullable(_field_name: str, x: pd.Series) -> pd.Series:
64
+ return x.dropna().astype(bool)
55
65
 
56
66
 
57
67
  def _series_to_float_nullable(_field_name: str, x: pd.Series) -> pd.Series:
58
- return x[x.notna()].astype(np.float64)
68
+ return x.dropna().astype(np.float64)
59
69
 
60
70
 
61
71
  def _series_to_str_nullable(_field_name: str, x: pd.Series) -> pd.Series:
62
- return x[x.notna()].astype(str)
72
+ return x.dropna().astype(str)
63
73
 
64
74
 
65
75
  def _series_to_datetime(field_name: str, x: pd.Series) -> pd.Series:
@@ -75,11 +85,11 @@ def _series_to_utc_datetime(field_name: str, x: pd.Series) -> pd.Series:
75
85
 
76
86
 
77
87
  def _series_to_datetime_nullable(_field_name: str, x: pd.Series) -> pd.Series:
78
- return pd.to_datetime(x)
88
+ return pd.to_datetime(x.dropna())
79
89
 
80
90
 
81
91
  def _series_to_utc_datetime_nullable(_field_name: str, x: pd.Series) -> pd.Series:
82
- return pd.to_datetime(x, utc=True)
92
+ return pd.to_datetime(x.dropna(), utc=True)
83
93
 
84
94
 
85
95
  class DataFrameMixin(Generic[DataFrameT, DataObject]):
@@ -89,13 +99,17 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
89
99
  Do not use this class directly, instead use `@dataframe` class decorator.
90
100
  """
91
101
 
102
+ DataFrameValueType = Union[int, bool, float, str, date, datetime, None]
103
+
92
104
  DATATYPE_MAPPING = {
93
105
  int: _series_to_int,
106
+ bool: _series_to_bool,
94
107
  float: _series_to_float,
95
108
  str: _series_to_str,
96
109
  date: _series_to_datetime,
97
110
  datetime: _series_to_utc_datetime,
98
111
  Union[int, None]: _series_to_int_nullable,
112
+ Union[bool, None]: _series_to_bool_nullable,
99
113
  Union[float, None]: _series_to_float_nullable,
100
114
  Union[str, None]: _series_to_str_nullable,
101
115
  Union[date, None]: _series_to_datetime_nullable,
@@ -145,7 +159,17 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
145
159
  def __getitem__(self, key) -> "DataFrameT":
146
160
  return self._from_df(self.__df[key])
147
161
 
148
- def _to_dataobjects(self) -> List[DataObject]:
162
+ def _normalize_null_values(
163
+ self, value: Union[DataFrameValueType, pd.Timestamp]
164
+ ) -> DataFrameValueType:
165
+ return None if pd.isnull(value) else value
166
+
167
+ def _to_dataobjects(self, normalize_null_values: bool) -> List[DataObject]:
168
+ if normalize_null_values:
169
+ return [
170
+ self.DataObject(**{k: self._normalize_null_values(v) for k, v in fields.items()})
171
+ for fields in self.__df.to_dict(orient="records")
172
+ ]
149
173
  return [self.DataObject(**fields) for fields in self.__df.to_dict(orient="records")]
150
174
 
151
175
  def event_id(self, *args, **kwargs) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hopeit.dataframes
3
- Version: 0.25.1
3
+ Version: 0.25.2
4
4
  Summary: Hopeit Engine Dataframes Toolkit
5
5
  Author-email: Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
6
6
  License: Apache 2
@@ -24,7 +24,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
24
  Classifier: Framework :: AsyncIO
25
25
  Requires-Python: >=3.9
26
26
  Description-Content-Type: text/plain
27
- Requires-Dist: hopeit.engine[fs-storage]==0.25.1
27
+ Requires-Dist: hopeit.engine[fs-storage]==0.25.2
28
28
  Requires-Dist: pandas
29
29
  Requires-Dist: numpy
30
30
  Provides-Extra: pyarrow
@@ -0,0 +1,6 @@
1
+ hopeit.engine[fs-storage]==0.25.2
2
+ pandas
3
+ numpy
4
+
5
+ [pyarrow]
6
+ pyarrow
@@ -1,6 +0,0 @@
1
- hopeit.engine[fs-storage]==0.25.1
2
- pandas
3
- numpy
4
-
5
- [pyarrow]
6
- pyarrow