hopeit.dataframes 0.26.5__tar.gz → 0.27.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/PKG-INFO +8 -10
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/README.md +2 -2
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/pyproject.toml +6 -8
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/__init__.py +41 -21
- hopeit_dataframes-0.27.0/src/hopeit/dataframes/datablocks.py +598 -0
- hopeit_dataframes-0.27.0/src/hopeit/dataframes/dataframe.py +288 -0
- hopeit_dataframes-0.27.0/src/hopeit/dataframes/polars/__init__.py +50 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/dataset.py +10 -7
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/files.py +48 -30
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit.dataframes.egg-info/PKG-INFO +8 -10
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit.dataframes.egg-info/SOURCES.txt +2 -4
- hopeit_dataframes-0.27.0/src/hopeit.dataframes.egg-info/requires.txt +5 -0
- hopeit_dataframes-0.26.5/src/hopeit/dataframes/datablocks.py +0 -304
- hopeit_dataframes-0.26.5/src/hopeit/dataframes/dataframe.py +0 -274
- hopeit_dataframes-0.26.5/src/hopeit/dataframes/pandas/numpy_mock.py +0 -3
- hopeit_dataframes-0.26.5/src/hopeit/dataframes/pandas/pandas_mock.py +0 -13
- hopeit_dataframes-0.26.5/src/hopeit/dataframes/setup/__init__.py +0 -0
- hopeit_dataframes-0.26.5/src/hopeit.dataframes.egg-info/requires.txt +0 -7
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/setup.cfg +0 -0
- {hopeit_dataframes-0.26.5/src/hopeit/dataframes/pandas → hopeit_dataframes-0.27.0/src/hopeit/dataframes/polars}/py.typed +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/py.typed +0 -0
- {hopeit_dataframes-0.26.5/src/hopeit/dataframes/pandas → hopeit_dataframes-0.27.0/src/hopeit/dataframes/serialization}/__init__.py +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/protocol.py +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/py.typed +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/serialization/settings.py +0 -0
- {hopeit_dataframes-0.26.5/src/hopeit/dataframes/serialization → hopeit_dataframes-0.27.0/src/hopeit/dataframes/setup}/__init__.py +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/setup/dataframes.py +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/setup/py.typed +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/setup/register_database.py +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit/dataframes/setup/registry.py +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit.dataframes.egg-info/dependency_links.txt +0 -0
- {hopeit_dataframes-0.26.5 → hopeit_dataframes-0.27.0}/src/hopeit.dataframes.egg-info/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hopeit.dataframes
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Hopeit Engine Dataframes for
|
|
3
|
+
Version: 0.27.0
|
|
4
|
+
Summary: Hopeit Engine Dataframes for Polars
|
|
5
5
|
Author-email: Leo Smerling & Pablo Canto <contact@hopeit.com.ar>, Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
|
|
6
6
|
License: Apache 2
|
|
7
7
|
Project-URL: Homepage, https://github.com/hopeit-git/hopeit.engine
|
|
@@ -24,12 +24,10 @@ Classifier: Topic :: Internet :: WWW/HTTP
|
|
|
24
24
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
25
|
Classifier: Framework :: AsyncIO
|
|
26
26
|
Description-Content-Type: text/markdown
|
|
27
|
-
Requires-Dist: hopeit.engine>=0.
|
|
28
|
-
Requires-Dist: hopeit.fs-storage>=0.
|
|
29
|
-
Provides-Extra:
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist: pyarrow>=19.0.1; extra == "pandas"
|
|
32
|
-
Requires-Dist: numpy>=1.26.4; extra == "pandas"
|
|
27
|
+
Requires-Dist: hopeit.engine>=0.27.0
|
|
28
|
+
Requires-Dist: hopeit.fs-storage>=0.27.0
|
|
29
|
+
Provides-Extra: polars
|
|
30
|
+
Requires-Dist: polars>=1.32.0; extra == "polars"
|
|
33
31
|
|
|
34
32
|
# hopeit.engine dataframes plugin
|
|
35
33
|
|
|
@@ -47,12 +45,12 @@ pip install hopeit.engine[dataframes]
|
|
|
47
45
|
|
|
48
46
|
### hopeit.dataframes
|
|
49
47
|
|
|
50
|
-
This plugin introduces dataclasses annotations to work with `
|
|
48
|
+
This plugin introduces dataclasses annotations to work with `polars` dataframes
|
|
51
49
|
as other dataobjects:
|
|
52
50
|
|
|
53
51
|
`@dataframe` annotation allows a dataclass to become the schema and container for a dataframe
|
|
54
52
|
`@dataframeobject` annotation, acts as @dataobject with support to have dataframe annotated fields
|
|
55
|
-
`DataFrames` class, provides an api to create, serialize, and access
|
|
53
|
+
`DataFrames` class, provides an api to create, serialize, and access polars dataframe
|
|
56
54
|
|
|
57
55
|
Features:
|
|
58
56
|
-Type coercion for @dataframe fields
|
|
@@ -14,12 +14,12 @@ pip install hopeit.engine[dataframes]
|
|
|
14
14
|
|
|
15
15
|
### hopeit.dataframes
|
|
16
16
|
|
|
17
|
-
This plugin introduces dataclasses annotations to work with `
|
|
17
|
+
This plugin introduces dataclasses annotations to work with `polars` dataframes
|
|
18
18
|
as other dataobjects:
|
|
19
19
|
|
|
20
20
|
`@dataframe` annotation allows a dataclass to become the schema and container for a dataframe
|
|
21
21
|
`@dataframeobject` annotation, acts as @dataobject with support to have dataframe annotated fields
|
|
22
|
-
`DataFrames` class, provides an api to create, serialize, and access
|
|
22
|
+
`DataFrames` class, provides an api to create, serialize, and access polars dataframe
|
|
23
23
|
|
|
24
24
|
Features:
|
|
25
25
|
-Type coercion for @dataframe fields
|
|
@@ -4,9 +4,9 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "hopeit.dataframes"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.27.0"
|
|
8
8
|
|
|
9
|
-
description = "Hopeit Engine Dataframes for
|
|
9
|
+
description = "Hopeit Engine Dataframes for Polars"
|
|
10
10
|
dynamic = ["readme"]
|
|
11
11
|
|
|
12
12
|
license = { text = "Apache 2" }
|
|
@@ -33,15 +33,13 @@ classifiers = [
|
|
|
33
33
|
]
|
|
34
34
|
|
|
35
35
|
dependencies = [
|
|
36
|
-
"hopeit.engine>=0.
|
|
37
|
-
"hopeit.fs-storage>=0.
|
|
36
|
+
"hopeit.engine>=0.27.0",
|
|
37
|
+
"hopeit.fs-storage>=0.27.0",
|
|
38
38
|
]
|
|
39
39
|
|
|
40
40
|
[project.optional-dependencies]
|
|
41
|
-
|
|
42
|
-
"
|
|
43
|
-
"pyarrow>=19.0.1",
|
|
44
|
-
"numpy>=1.26.4"
|
|
41
|
+
polars = [
|
|
42
|
+
"polars>=1.32.0",
|
|
45
43
|
]
|
|
46
44
|
|
|
47
45
|
[project.urls]
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
hopeit.engine dataframes plugin entry point
|
|
3
3
|
|
|
4
4
|
This module exposes the 2 main constructions to be used inside apps,
|
|
5
|
-
to extend @dataobject functionallity supporting working with `
|
|
5
|
+
to extend @dataobject functionallity supporting working with `polars DataFrames`
|
|
6
6
|
`@dataframe` dataclass annotation
|
|
7
7
|
`DataFrames` class to handle manipulation of dataframe/dataframeobjects
|
|
8
8
|
|
|
@@ -10,7 +10,7 @@ Usage:
|
|
|
10
10
|
```
|
|
11
11
|
from typing import List
|
|
12
12
|
|
|
13
|
-
import
|
|
13
|
+
import polars as pl
|
|
14
14
|
|
|
15
15
|
from hopeit.dataframes.serialization.settings import DatasetSerialization
|
|
16
16
|
from hopeit.dataframes import DataFrames, Dataset, dataframe
|
|
@@ -41,7 +41,7 @@ settings = DataframesSettings(...) # settings example in `plugin-config.json`
|
|
|
41
41
|
await registry.init_registry(settings)
|
|
42
42
|
|
|
43
43
|
# Usage
|
|
44
|
-
df = pd.DataFrame([ # Create or load a
|
|
44
|
+
df = pd.DataFrame([ # Create or load a polars DataFrame
|
|
45
45
|
{"field1": 1, "field2": "text1"},
|
|
46
46
|
{"field1": 2, "field2": "text2"},
|
|
47
47
|
])
|
|
@@ -70,16 +70,12 @@ print(Payload.to_json(my_json_response))
|
|
|
70
70
|
```
|
|
71
71
|
"""
|
|
72
72
|
|
|
73
|
-
from typing import Dict, Generic, Iterator, List, Type
|
|
73
|
+
from typing import Any, Dict, Generic, Iterator, List, Type
|
|
74
74
|
|
|
75
75
|
try:
|
|
76
|
-
import
|
|
77
|
-
import pandas as pd
|
|
76
|
+
import polars as pl
|
|
78
77
|
except ImportError:
|
|
79
|
-
#
|
|
80
|
-
# without installing pandas and numpy. Useful for API-only projects.
|
|
81
|
-
import hopeit.dataframes.pandas.numpy_mock as np # type: ignore[no-redef]
|
|
82
|
-
import hopeit.dataframes.pandas.pandas_mock as pd # type: ignore[no-redef]
|
|
78
|
+
import hopeit.dataframes.polars as pl # type: ignore # Polars is optional; set to a mock if not installed
|
|
83
79
|
|
|
84
80
|
from hopeit.dataframes.dataframe import DataFrameT, dataframe
|
|
85
81
|
from hopeit.dataframes.serialization.dataset import Dataset
|
|
@@ -96,16 +92,31 @@ class DataFrames(Generic[DataFrameT, DataObject]):
|
|
|
96
92
|
|
|
97
93
|
@staticmethod
|
|
98
94
|
def from_df(
|
|
99
|
-
datatype: Type[DataFrameT], df:
|
|
95
|
+
datatype: Type[DataFrameT], df: pl.DataFrame, **series: Dict[str, pl.Series]
|
|
100
96
|
) -> DataFrameT:
|
|
101
|
-
"""Create a `@dataframe` instance of a particular `datatype` from a
|
|
97
|
+
"""Create a `@dataframe` instance of a particular `datatype` from a polars DataFrame.
|
|
102
98
|
Optionally, add or override series.
|
|
103
99
|
"""
|
|
104
100
|
return datatype._from_df(df, **series) # type: ignore # pylint: disable=protected-access
|
|
105
101
|
|
|
102
|
+
@staticmethod
|
|
103
|
+
def from_pandas(
|
|
104
|
+
datatype: Type[DataFrameT], pandas_df: Any, **series: Dict[str, pl.Series]
|
|
105
|
+
) -> DataFrameT:
|
|
106
|
+
"""Create a `@dataframe` instance of a particular `datatype` from a polars DataFrame.
|
|
107
|
+
Optionally, add or override series.
|
|
108
|
+
"""
|
|
109
|
+
return datatype._from_df( # type: ignore[attr-defined]
|
|
110
|
+
pl.from_pandas(
|
|
111
|
+
pandas_df,
|
|
112
|
+
schema_overrides=datatype.__dataframe__.schema, # type: ignore[attr-defined]
|
|
113
|
+
),
|
|
114
|
+
**series,
|
|
115
|
+
) # type: ignore # pylint: disable=protected-access
|
|
116
|
+
|
|
106
117
|
@staticmethod
|
|
107
118
|
def from_dataframe(
|
|
108
|
-
datatype: Type[DataFrameT], obj: DataFrameT, **series: Dict[str,
|
|
119
|
+
datatype: Type[DataFrameT], obj: DataFrameT, **series: Dict[str, pl.Series]
|
|
109
120
|
) -> DataFrameT:
|
|
110
121
|
"""Creates a new `@dataframe` object extracting fields from another `@dataframe`"""
|
|
111
122
|
return datatype._from_df(obj._df, **series) # type: ignore # pylint: disable=protected-access
|
|
@@ -118,16 +129,25 @@ class DataFrames(Generic[DataFrameT, DataObject]):
|
|
|
118
129
|
return datatype._from_dataobjects(dataobjects) # type: ignore # pylint: disable=protected-access
|
|
119
130
|
|
|
120
131
|
@staticmethod
|
|
121
|
-
def to_dataobjects(obj: DataFrameT
|
|
132
|
+
def to_dataobjects(obj: DataFrameT) -> List[DataObject]:
|
|
122
133
|
"""Converts `@dataframe` object to a list of standard `@dataobject`s"""
|
|
123
|
-
return obj._to_dataobjects(
|
|
134
|
+
return obj._to_dataobjects() # type: ignore # pylint: disable=protected-access
|
|
124
135
|
|
|
125
136
|
@staticmethod
|
|
126
|
-
def
|
|
127
|
-
"""
|
|
128
|
-
return
|
|
137
|
+
def df(obj: DataFrameT) -> pl.DataFrame:
|
|
138
|
+
"""Provides acces to the internal `polars` dataframe of a `@dataframe` object"""
|
|
139
|
+
return obj._df # type: ignore # pylint: disable=protected-access
|
|
129
140
|
|
|
130
141
|
@staticmethod
|
|
131
|
-
def
|
|
132
|
-
"""
|
|
133
|
-
return obj._df # type: ignore # pylint: disable=protected-access
|
|
142
|
+
def to_pandas(obj: DataFrameT) -> Any:
|
|
143
|
+
"""Returns internal dataframe converted to pandas"""
|
|
144
|
+
return obj._df.to_pandas() # type: ignore # pylint: disable=protected-access
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def schema(datatype: Type[DataFrameT]) -> pl.Schema:
|
|
148
|
+
if pl is None:
|
|
149
|
+
raise RuntimeError(
|
|
150
|
+
"`polars` needs to be installed to access dataframe schema. "
|
|
151
|
+
"Run `pip install hopeit.dataframes[polars]`"
|
|
152
|
+
)
|
|
153
|
+
return datatype.__dataframe__.schema # type: ignore # pylint: disable=protected-access
|