hopeit.dataframes 0.26.5__tar.gz → 0.27.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/PKG-INFO +8 -10
  2. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/README.md +2 -2
  3. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/pyproject.toml +6 -8
  4. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/__init__.py +41 -21
  5. hopeit_dataframes-0.27.0/src/hopeit/dataframes/datablocks.py +598 -0
  6. hopeit_dataframes-0.27.0/src/hopeit/dataframes/dataframe.py +288 -0
  7. hopeit_dataframes-0.27.0/src/hopeit/dataframes/polars/__init__.py +50 -0
  8. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/dataset.py +10 -7
  9. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/files.py +48 -30
  10. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit.dataframes.egg-info/PKG-INFO +8 -10
  11. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit.dataframes.egg-info/SOURCES.txt +2 -4
  12. hopeit_dataframes-0.27.0/src/hopeit.dataframes.egg-info/requires.txt +5 -0
  13. hopeit_dataframes-0.26.5/src/hopeit/dataframes/datablocks.py +0 -304
  14. hopeit_dataframes-0.26.5/src/hopeit/dataframes/dataframe.py +0 -274
  15. hopeit_dataframes-0.26.5/src/hopeit/dataframes/pandas/numpy_mock.py +0 -3
  16. hopeit_dataframes-0.26.5/src/hopeit/dataframes/pandas/pandas_mock.py +0 -13
  17. hopeit_dataframes-0.26.5/src/hopeit/dataframes/setup/__init__.py +0 -0
  18. hopeit_dataframes-0.26.5/src/hopeit.dataframes.egg-info/requires.txt +0 -7
  19. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/setup.cfg +0 -0
  20. {hopeit_dataframes-0.26.5/src/hopeit/dataframes/pandas → hopeit_dataframes-0.27.0/src/hopeit/dataframes/polars}/py.typed +0 -0
  21. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/py.typed +0 -0
  22. {hopeit_dataframes-0.26.5/src/hopeit/dataframes/pandas → hopeit_dataframes-0.27.0/src/hopeit/dataframes/serialization}/__init__.py +0 -0
  23. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/protocol.py +0 -0
  24. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/py.typed +0 -0
  25. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/settings.py +0 -0
  26. {hopeit_dataframes-0.26.5/src/hopeit/dataframes/serialization → hopeit_dataframes-0.27.0/src/hopeit/dataframes/setup}/__init__.py +0 -0
  27. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/setup/dataframes.py +0 -0
  28. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/setup/py.typed +0 -0
  29. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/setup/register_database.py +0 -0
  30. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/setup/registry.py +0 -0
  31. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit.dataframes.egg-info/dependency_links.txt +0 -0
  32. {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit.dataframes.egg-info/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hopeit.dataframes
3
- Version: 0.26.5
4
- Summary: Hopeit Engine Dataframes for Pandas
3
+ Version: 0.27.0
4
+ Summary: Hopeit Engine Dataframes for Polars
5
5
  Author-email: Leo Smerling & Pablo Canto <contact@hopeit.com.ar>, Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
6
6
  License: Apache 2
7
7
  Project-URL: Homepage, https://github.com/hopeit-git/hopeit.engine
@@ -24,12 +24,10 @@ Classifier: Topic :: Internet :: WWW/HTTP
24
24
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
25
  Classifier: Framework :: AsyncIO
26
26
  Description-Content-Type: text/markdown
27
- Requires-Dist: hopeit.engine>=0.26.5
28
- Requires-Dist: hopeit.fs-storage>=0.26.5
29
- Provides-Extra: pandas
30
- Requires-Dist: pandas>=2.2.3; extra == "pandas"
31
- Requires-Dist: pyarrow>=19.0.1; extra == "pandas"
32
- Requires-Dist: numpy>=1.26.4; extra == "pandas"
27
+ Requires-Dist: hopeit.engine>=0.27.0
28
+ Requires-Dist: hopeit.fs-storage>=0.27.0
29
+ Provides-Extra: polars
30
+ Requires-Dist: polars>=1.32.0; extra == "polars"
33
31
 
34
32
  # hopeit.engine dataframes plugin
35
33
 
@@ -47,12 +45,12 @@ pip install hopeit.engine[dataframes]
47
45
 
48
46
  ### hopeit.dataframes
49
47
 
50
- This plugin introduces dataclasses annotations to work with `pandas` dataframes
48
+ This plugin introduces dataclasses annotations to work with `polars` dataframes
51
49
  as other dataobjects:
52
50
 
53
51
  `@dataframe` annotation allows a dataclass to become the schema and container for a dataframe
54
52
  `@dataframeobject` annotation, acts as @dataobject with support to have dataframe annotated fields
55
- `DataFrames` class, provides an api to create, serialize, and access pandas dataframe
53
+ `DataFrames` class, provides an api to create, serialize, and access polars dataframe
56
54
 
57
55
  Features:
58
56
  -Type coercion for @dataframe fields
@@ -14,12 +14,12 @@ pip install hopeit.engine[dataframes]
14
14
 
15
15
  ### hopeit.dataframes
16
16
 
17
- This plugin introduces dataclasses annotations to work with `pandas` dataframes
17
+ This plugin introduces dataclasses annotations to work with `polars` dataframes
18
18
  as other dataobjects:
19
19
 
20
20
  `@dataframe` annotation allows a dataclass to become the schema and container for a dataframe
21
21
  `@dataframeobject` annotation, acts as @dataobject with support to have dataframe annotated fields
22
- `DataFrames` class, provides an api to create, serialize, and access pandas dataframe
22
+ `DataFrames` class, provides an api to create, serialize, and access polars dataframe
23
23
 
24
24
  Features:
25
25
  -Type coercion for @dataframe fields
@@ -4,9 +4,9 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hopeit.dataframes"
7
- version = "0.26.5"
7
+ version = "0.27.0"
8
8
 
9
- description = "Hopeit Engine Dataframes for Pandas"
9
+ description = "Hopeit Engine Dataframes for Polars"
10
10
  dynamic = ["readme"]
11
11
 
12
12
  license = { text = "Apache 2" }
@@ -33,15 +33,13 @@ classifiers = [
33
33
  ]
34
34
 
35
35
  dependencies = [
36
- "hopeit.engine>=0.26.5",
37
- "hopeit.fs-storage>=0.26.5"
36
+ "hopeit.engine>=0.27.0",
37
+ "hopeit.fs-storage>=0.27.0",
38
38
  ]
39
39
 
40
40
  [project.optional-dependencies]
41
- pandas = [
42
- "pandas>=2.2.3",
43
- "pyarrow>=19.0.1",
44
- "numpy>=1.26.4"
41
+ polars = [
42
+ "polars>=1.32.0",
45
43
  ]
46
44
 
47
45
  [project.urls]
@@ -2,7 +2,7 @@
2
2
  hopeit.engine dataframes plugin entry point
3
3
 
4
4
  This module exposes the 2 main constructions to be used inside apps,
5
- to extend @dataobject functionallity supporting working with `pandas DataFrames`
5
+ to extend @dataobject functionallity supporting working with `polars DataFrames`
6
6
  `@dataframe` dataclass annotation
7
7
  `DataFrames` class to handle manipulation of dataframe/dataframeobjects
8
8
 
@@ -10,7 +10,7 @@ Usage:
10
10
  ```
11
11
  from typing import List
12
12
 
13
- import pandas as pd
13
+ import polars as pl
14
14
 
15
15
  from hopeit.dataframes.serialization.settings import DatasetSerialization
16
16
  from hopeit.dataframes import DataFrames, Dataset, dataframe
@@ -41,7 +41,7 @@ settings = DataframesSettings(...) # settings example in `plugin-config.json`
41
41
  await registry.init_registry(settings)
42
42
 
43
43
  # Usage
44
- df = pd.DataFrame([ # Create or load a pandas DataFrame
44
+ df = pd.DataFrame([ # Create or load a polars DataFrame
45
45
  {"field1": 1, "field2": "text1"},
46
46
  {"field1": 2, "field2": "text2"},
47
47
  ])
@@ -70,16 +70,12 @@ print(Payload.to_json(my_json_response))
70
70
  ```
71
71
  """
72
72
 
73
- from typing import Dict, Generic, Iterator, List, Type
73
+ from typing import Any, Dict, Generic, Iterator, List, Type
74
74
 
75
75
  try:
76
- import numpy as np
77
- import pandas as pd
76
+ import polars as pl
78
77
  except ImportError:
79
- # Supports using `@dataframe` annotation for dataobjects definitions
80
- # without installing pandas and numpy. Useful for API-only projects.
81
- import hopeit.dataframes.pandas.numpy_mock as np # type: ignore[no-redef]
82
- import hopeit.dataframes.pandas.pandas_mock as pd # type: ignore[no-redef]
78
+ import hopeit.dataframes.polars as pl # type: ignore # Polars is optional; set to a mock if not installed
83
79
 
84
80
  from hopeit.dataframes.dataframe import DataFrameT, dataframe
85
81
  from hopeit.dataframes.serialization.dataset import Dataset
@@ -96,16 +92,31 @@ class DataFrames(Generic[DataFrameT, DataObject]):
96
92
 
97
93
  @staticmethod
98
94
  def from_df(
99
- datatype: Type[DataFrameT], df: pd.DataFrame, **series: Dict[str, pd.Series]
95
+ datatype: Type[DataFrameT], df: pl.DataFrame, **series: Dict[str, pl.Series]
100
96
  ) -> DataFrameT:
101
- """Create a `@dataframe` instance of a particular `datatype` from a pandas DataFrame.
97
+ """Create a `@dataframe` instance of a particular `datatype` from a polars DataFrame.
102
98
  Optionally, add or override series.
103
99
  """
104
100
  return datatype._from_df(df, **series) # type: ignore # pylint: disable=protected-access
105
101
 
102
+ @staticmethod
103
+ def from_pandas(
104
+ datatype: Type[DataFrameT], pandas_df: Any, **series: Dict[str, pl.Series]
105
+ ) -> DataFrameT:
106
+ """Create a `@dataframe` instance of a particular `datatype` from a polars DataFrame.
107
+ Optionally, add or override series.
108
+ """
109
+ return datatype._from_df( # type: ignore[attr-defined]
110
+ pl.from_pandas(
111
+ pandas_df,
112
+ schema_overrides=datatype.__dataframe__.schema, # type: ignore[attr-defined]
113
+ ),
114
+ **series,
115
+ ) # type: ignore # pylint: disable=protected-access
116
+
106
117
  @staticmethod
107
118
  def from_dataframe(
108
- datatype: Type[DataFrameT], obj: DataFrameT, **series: Dict[str, pd.Series]
119
+ datatype: Type[DataFrameT], obj: DataFrameT, **series: Dict[str, pl.Series]
109
120
  ) -> DataFrameT:
110
121
  """Creates a new `@dataframe` object extracting fields from another `@dataframe`"""
111
122
  return datatype._from_df(obj._df, **series) # type: ignore # pylint: disable=protected-access
@@ -118,16 +129,25 @@ class DataFrames(Generic[DataFrameT, DataObject]):
118
129
  return datatype._from_dataobjects(dataobjects) # type: ignore # pylint: disable=protected-access
119
130
 
120
131
  @staticmethod
121
- def to_dataobjects(obj: DataFrameT, *, normalize_null_values: bool = False) -> List[DataObject]:
132
+ def to_dataobjects(obj: DataFrameT) -> List[DataObject]:
122
133
  """Converts `@dataframe` object to a list of standard `@dataobject`s"""
123
- return obj._to_dataobjects(normalize_null_values) # type: ignore # pylint: disable=protected-access
134
+ return obj._to_dataobjects() # type: ignore # pylint: disable=protected-access
124
135
 
125
136
  @staticmethod
126
- def from_array(datatype: Type[DataFrameT], array: np.ndarray) -> DataFrameT:
127
- """Creates `@dataframe` object from a numpy array"""
128
- return datatype._from_array(array) # type: ignore # pylint: disable=protected-access
137
+ def df(obj: DataFrameT) -> pl.DataFrame:
138
+ """Provides acces to the internal `polars` dataframe of a `@dataframe` object"""
139
+ return obj._df # type: ignore # pylint: disable=protected-access
129
140
 
130
141
  @staticmethod
131
- def df(obj: DataFrameT) -> pd.DataFrame:
132
- """Provides acces to the internal pandas dataframe of a `@dataframe` object"""
133
- return obj._df # type: ignore # pylint: disable=protected-access
142
+ def to_pandas(obj: DataFrameT) -> Any:
143
+ """Returns internal dataframe converted to pandas"""
144
+ return obj._df.to_pandas() # type: ignore # pylint: disable=protected-access
145
+
146
+ @staticmethod
147
+ def schema(datatype: Type[DataFrameT]) -> pl.Schema:
148
+ if pl is None:
149
+ raise RuntimeError(
150
+ "`polars` needs to be installed to access dataframe schema. "
151
+ "Run `pip install hopeit.dataframes[polars]`"
152
+ )
153
+ return datatype.__dataframe__.schema # type: ignore # pylint: disable=protected-access