hopeit.dataframes 0.25.4__tar.gz → 0.26.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/PKG-INFO +12 -13
  2. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/pyproject.toml +17 -12
  3. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/__init__.py +1 -1
  4. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/datablocks.py +2 -2
  5. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/dataframe.py +5 -4
  6. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/serialization/dataset.py +3 -3
  7. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/serialization/files.py +26 -4
  8. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/serialization/settings.py +2 -1
  9. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/setup/dataframes.py +2 -1
  10. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit.dataframes.egg-info/PKG-INFO +12 -13
  11. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit.dataframes.egg-info/SOURCES.txt +0 -1
  12. hopeit_dataframes-0.26.0rc1/src/hopeit.dataframes.egg-info/requires.txt +4 -0
  13. hopeit_dataframes-0.25.4/setup.py +0 -25
  14. hopeit_dataframes-0.25.4/src/hopeit.dataframes.egg-info/requires.txt +0 -6
  15. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/README.md +0 -0
  16. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/setup.cfg +0 -0
  17. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/py.typed +0 -0
  18. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/serialization/__init__.py +0 -0
  19. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/serialization/py.typed +0 -0
  20. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/setup/__init__.py +0 -0
  21. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit/dataframes/setup/py.typed +0 -0
  22. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit.dataframes.egg-info/dependency_links.txt +0 -0
  23. {hopeit_dataframes-0.25.4 → hopeit_dataframes-0.26.0rc1}/src/hopeit.dataframes.egg-info/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: hopeit.dataframes
3
- Version: 0.25.4
4
- Summary: Hopeit Engine Dataframes Toolkit
5
- Author-email: Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
3
+ Version: 0.26.0rc1
4
+ Summary: Hopeit Engine Dataframes for Pandas
5
+ Author-email: Leo Smerling & Pablo Canto <contact@hopeit.com.ar>, Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
6
6
  License: Apache 2
7
7
  Project-URL: Homepage, https://github.com/hopeit-git/hopeit.engine
8
8
  Project-URL: CI: GitHub Actions, https://github.com/hopeit-git/hopeit.engine/actions?query=workflow
@@ -12,23 +12,22 @@ Project-URL: GitHub: repo, https://github.com/hopeit-git/hopeit.engine
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Intended Audience :: Developers
14
14
  Classifier: Programming Language :: Python
15
- Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
- Classifier: Development Status :: 4 - Beta
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Development Status :: 5 - Production/Stable
19
20
  Classifier: Operating System :: POSIX :: Linux
20
21
  Classifier: Operating System :: MacOS :: MacOS X
21
22
  Classifier: Operating System :: Microsoft :: Windows
22
23
  Classifier: Topic :: Internet :: WWW/HTTP
23
24
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
25
  Classifier: Framework :: AsyncIO
25
- Requires-Python: >=3.9
26
- Description-Content-Type: text/plain
27
- Requires-Dist: hopeit.engine[fs-storage]==0.25.4
28
- Requires-Dist: pandas
29
- Requires-Dist: numpy
30
- Provides-Extra: pyarrow
31
- Requires-Dist: pyarrow; extra == "pyarrow"
26
+ Description-Content-Type: text/markdown
27
+ Requires-Dist: hopeit.engine>=0.26.0rc1
28
+ Requires-Dist: hopeit.fs-storage>=0.26.0rc1
29
+ Requires-Dist: pandas>=2.2.3
30
+ Requires-Dist: numpy>=1.26.4
32
31
 
33
32
  # hopeit.engine dataframes plugin
34
33
 
@@ -4,10 +4,21 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hopeit.dataframes"
7
- description = "Hopeit Engine Dataframes Toolkit"
8
- dynamic = ["version", "readme", "dependencies", "optional-dependencies"]
7
+ version = "0.26.0rc1"
8
+
9
+ description = "Hopeit Engine Dataframes for Pandas"
10
+ dynamic = ["readme"]
11
+
12
+ dependencies = [
13
+ "hopeit.engine>=0.26.0rc1",
14
+ "hopeit.fs-storage>=0.26.0rc1",
15
+ "pandas>=2.2.3",
16
+ "numpy>=1.26.4"
17
+ ]
18
+
9
19
  license = { text = "Apache 2" }
10
20
  authors = [
21
+ { name = "Leo Smerling & Pablo Canto", email = "contact@hopeit.com.ar" },
11
22
  { name = "Leo Smerling", email = "contact@hopeit.com.ar" },
12
23
  { name = "Pablo Canto", email = "contact@hopeit.com.ar" },
13
24
  ]
@@ -15,10 +26,11 @@ classifiers = [
15
26
  "License :: OSI Approved :: Apache Software License",
16
27
  "Intended Audience :: Developers",
17
28
  "Programming Language :: Python",
18
- "Programming Language :: Python :: 3.9",
19
29
  "Programming Language :: Python :: 3.10",
20
30
  "Programming Language :: Python :: 3.11",
21
- "Development Status :: 4 - Beta",
31
+ "Programming Language :: Python :: 3.12",
32
+ "Programming Language :: Python :: 3.13",
33
+ "Development Status :: 5 - Production/Stable",
22
34
  "Operating System :: POSIX :: Linux",
23
35
  "Operating System :: MacOS :: MacOS X",
24
36
  "Operating System :: Microsoft :: Windows",
@@ -26,8 +38,6 @@ classifiers = [
26
38
  "Topic :: Software Development :: Libraries :: Python Modules",
27
39
  "Framework :: AsyncIO",
28
40
  ]
29
- requires-python = ">=3.9"
30
-
31
41
 
32
42
  [project.urls]
33
43
  "Homepage" = "https://github.com/hopeit-git/hopeit.engine"
@@ -42,10 +52,5 @@ include-package-data = true
42
52
  [tool.setuptools.packages.find]
43
53
  where = ["src"]
44
54
 
45
- [tool.setuptools.package-data]
46
- "hopeit.dataframes" = ["py.typed"]
47
- "hopeit.dataframes.serialization" = ["py.typed"]
48
- "hopeit.dataframes.setup" = ["py.typed"]
49
-
50
55
  [tool.setuptools.dynamic]
51
- readme = { file = ["README.md"], content-type = "text/plain" }
56
+ readme = { file = ["README.md"], content-type = "text/markdown" }
@@ -92,7 +92,7 @@ class DataFrames(Generic[DataFrameT, DataObject]):
92
92
  """
93
93
 
94
94
  @staticmethod
95
- def setup(settings: DatasetSerialization):
95
+ def setup(settings: DatasetSerialization) -> None:
96
96
  register_serialization(settings)
97
97
 
98
98
  @staticmethod
@@ -11,7 +11,7 @@ DataFrameType = TypeVar("DataFrameType")
11
11
 
12
12
 
13
13
  class TempDataBlock(Generic[DataBlockType, DataBlockItemType]):
14
- def __init__(self, datatype: Type[DataBlockType], df: pd.DataFrame):
14
+ def __init__(self, datatype: Type[DataBlockType], df: pd.DataFrame) -> None:
15
15
  self.datatype = datatype
16
16
  self.df = df
17
17
 
@@ -132,7 +132,7 @@ class DataBlocks(Generic[DataBlockType, DataFrameType]):
132
132
  ) from e
133
133
 
134
134
  @classmethod
135
- def _adapt_to_schema(cls, datablock: DataBlockType, keys: list[str], df: pd.DataFrame):
135
+ def _adapt_to_schema(cls, datablock: DataBlockType, keys: list[str], df: pd.DataFrame) -> None:
136
136
  for key in keys:
137
137
  datatype = find_dataframe_type(getattr(datablock, key).datatype) # type: ignore[var-annotated]
138
138
  valid_df = datatype._from_df(df)._df
@@ -124,7 +124,7 @@ class DataFrameMixin(Generic[DataFrameT, DataObject]):
124
124
  raise NotImplementedError # must use @dataframe decorator # pragma: no cover
125
125
 
126
126
  @staticmethod
127
- def __init_from_series__(self, **series: pd.Series): # pylint: disable=bad-staticmethod-argument
127
+ def __init_from_series__(self, **series: pd.Series) -> None: # pylint: disable=bad-staticmethod-argument
128
128
  df = pd.DataFrame(series)
129
129
  df.index.name = None # Removes index name to avoid colisions with series name
130
130
  if self.__data_object__["validate"]:
@@ -240,9 +240,10 @@ def dataframe(
240
240
  return amended_class
241
241
  return cls
242
242
 
243
- def add_dataframe_metadata(cls):
243
+ def add_dataframe_metadata(cls) -> None:
244
244
  serialized_fields = {k: (v.annotation, v) for k, v in fields(cls).items()}
245
- dataobject_type = create_model(cls.__name__ + "DataObject", **serialized_fields)
245
+ dataobject_name = str(cls.__name__) + "DataObject"
246
+ dataobject_type = create_model(dataobject_name, **serialized_fields) # type: ignore[call-overload]
246
247
  dataobject_type = dataobject(dataobject_type, unsafe=True)
247
248
 
248
249
  setattr(cls, "DataObject", dataobject_type)
@@ -255,7 +256,7 @@ def dataframe(
255
256
  ),
256
257
  )
257
258
 
258
- def add_dataobject_annotations(cls, unsafe: bool, validate: bool, schema: bool):
259
+ def add_dataobject_annotations(cls, unsafe: bool, validate: bool, schema: bool) -> None:
259
260
  setattr(
260
261
  cls,
261
262
  "__data_object__",
@@ -87,7 +87,7 @@ def find_dataframe_type(qual_type_name: str) -> Type[DataFrameT]:
87
87
  )
88
88
  module = import_module(mod_name)
89
89
  datatype = getattr(module, type_name)
90
- assert hasattr(
91
- datatype, "__dataframe__"
92
- ), f"Type {qual_type_name} must be annotated with `@dataframe`."
90
+ assert hasattr(datatype, "__dataframe__"), (
91
+ f"Type {qual_type_name} must be annotated with `@dataframe`."
92
+ )
93
93
  return datatype
@@ -1,7 +1,7 @@
1
1
  """Support for `@dataframes` serialization to files"""
2
2
 
3
3
  import io
4
- from typing import Generic, Optional, Type, TypeVar
4
+ from typing import Generic, Literal, Optional, Type, TypeVar
5
5
  from uuid import uuid4
6
6
 
7
7
  import pandas as pd
@@ -17,22 +17,42 @@ except ImportError as e:
17
17
 
18
18
  from hopeit.dataframes.dataframe import DataFrameMixin
19
19
  from hopeit.dataframes.serialization.dataset import Dataset
20
- from hopeit.dataobjects import DataObject
20
+ from hopeit.dataobjects import DataObject, dataclass, dataobject
21
+ from hopeit.dataobjects.payload import Payload
21
22
  from hopeit.fs_storage import FileStorage
22
23
 
23
24
  DataFrameT = TypeVar("DataFrameT", bound=DataFrameMixin)
24
25
 
25
26
 
27
+ @dataobject
28
+ @dataclass
29
+ class DatasetFileStorageEngineSettings:
30
+ """Pyarrow settings for parquet file storage"""
31
+
32
+ compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] | None = None
33
+ compression_level: int | str | None = None
34
+
35
+
26
36
  class DatasetFileStorage(Generic[DataFrameT]):
27
37
  """Support to store dataframes as files,
28
38
  using pandas parquet format support in combination
29
39
  with `hopeit.engine` file storage plugins
30
40
  """
31
41
 
32
- def __init__(self, *, location: str, partition_dateformat: Optional[str], **kwargs):
42
+ def __init__(
43
+ self,
44
+ *,
45
+ location: str,
46
+ partition_dateformat: Optional[str],
47
+ storage_settings: dict[str, str | int],
48
+ **kwargs,
49
+ ):
33
50
  self.storage: FileStorage = FileStorage(
34
51
  path=location, partition_dateformat=partition_dateformat
35
52
  )
53
+ self.storage_settings: DatasetFileStorageEngineSettings = Payload.from_obj(
54
+ storage_settings, DatasetFileStorageEngineSettings
55
+ )
36
56
 
37
57
  async def save(self, dataframe: DataFrameT) -> Dataset:
38
58
  """Saves @dataframe annotated object as parquet to file system
@@ -42,7 +62,9 @@ class DatasetFileStorage(Generic[DataFrameT]):
42
62
  key = f"{datatype.__qualname__.lower()}_{uuid4()}.parquet"
43
63
  data = io.BytesIO(
44
64
  dataframe._df.to_parquet( # pylint: disable=protected-access
45
- engine="pyarrow"
65
+ engine="pyarrow",
66
+ compression=self.storage_settings.compression,
67
+ compression_level=self.storage_settings.compression_level,
46
68
  )
47
69
  )
48
70
  location = await self.storage.store_file(file_name=key, value=data)
@@ -2,7 +2,7 @@
2
2
 
3
3
  from typing import Optional
4
4
 
5
- from hopeit.dataobjects import dataclass, dataobject
5
+ from hopeit.dataobjects import dataclass, dataobject, field
6
6
 
7
7
 
8
8
  @dataobject
@@ -11,3 +11,4 @@ class DatasetSerialization:
11
11
  protocol: str
12
12
  location: str
13
13
  partition_dateformat: Optional[str] = None
14
+ storage_settings: dict[str, str | int] = field(default_factory=dict)
@@ -26,11 +26,12 @@ def setup(payload: None, context: EventContext) -> None:
26
26
  register_serialization(settings)
27
27
 
28
28
 
29
- def register_serialization(settings: DatasetSerialization):
29
+ def register_serialization(settings: DatasetSerialization) -> None:
30
30
  impl = find_protocol_impl(settings.protocol)
31
31
  storage = impl(
32
32
  protocol=settings.protocol,
33
33
  location=settings.location,
34
34
  partition_dateformat=settings.partition_dateformat,
35
+ storage_settings=settings.storage_settings,
35
36
  )
36
37
  setattr(Dataset, "_Dataset__storage", storage)
@@ -1,8 +1,8 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: hopeit.dataframes
3
- Version: 0.25.4
4
- Summary: Hopeit Engine Dataframes Toolkit
5
- Author-email: Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
3
+ Version: 0.26.0rc1
4
+ Summary: Hopeit Engine Dataframes for Pandas
5
+ Author-email: Leo Smerling & Pablo Canto <contact@hopeit.com.ar>, Leo Smerling <contact@hopeit.com.ar>, Pablo Canto <contact@hopeit.com.ar>
6
6
  License: Apache 2
7
7
  Project-URL: Homepage, https://github.com/hopeit-git/hopeit.engine
8
8
  Project-URL: CI: GitHub Actions, https://github.com/hopeit-git/hopeit.engine/actions?query=workflow
@@ -12,23 +12,22 @@ Project-URL: GitHub: repo, https://github.com/hopeit-git/hopeit.engine
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Intended Audience :: Developers
14
14
  Classifier: Programming Language :: Python
15
- Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
- Classifier: Development Status :: 4 - Beta
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Development Status :: 5 - Production/Stable
19
20
  Classifier: Operating System :: POSIX :: Linux
20
21
  Classifier: Operating System :: MacOS :: MacOS X
21
22
  Classifier: Operating System :: Microsoft :: Windows
22
23
  Classifier: Topic :: Internet :: WWW/HTTP
23
24
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
25
  Classifier: Framework :: AsyncIO
25
- Requires-Python: >=3.9
26
- Description-Content-Type: text/plain
27
- Requires-Dist: hopeit.engine[fs-storage]==0.25.4
28
- Requires-Dist: pandas
29
- Requires-Dist: numpy
30
- Provides-Extra: pyarrow
31
- Requires-Dist: pyarrow; extra == "pyarrow"
26
+ Description-Content-Type: text/markdown
27
+ Requires-Dist: hopeit.engine>=0.26.0rc1
28
+ Requires-Dist: hopeit.fs-storage>=0.26.0rc1
29
+ Requires-Dist: pandas>=2.2.3
30
+ Requires-Dist: numpy>=1.26.4
32
31
 
33
32
  # hopeit.engine dataframes plugin
34
33
 
@@ -1,6 +1,5 @@
1
1
  README.md
2
2
  pyproject.toml
3
- setup.py
4
3
  src/hopeit.dataframes.egg-info/PKG-INFO
5
4
  src/hopeit.dataframes.egg-info/SOURCES.txt
6
5
  src/hopeit.dataframes.egg-info/dependency_links.txt
@@ -0,0 +1,4 @@
1
+ hopeit.engine>=0.26.0rc1
2
+ hopeit.fs-storage>=0.26.0rc1
3
+ pandas>=2.2.3
4
+ numpy>=1.26.4
@@ -1,25 +0,0 @@
1
- from setuptools import setup
2
- from os import environ
3
-
4
- version = {}
5
- try:
6
- with open("../../../engine/src/hopeit/server/version.py") as fp:
7
- exec(fp.read(), version)
8
- ENGINE_VERSION = version["ENGINE_VERSION"]
9
- except FileNotFoundError:
10
- ENGINE_VERSION = environ.get("ENGINE_VERSION")
11
-
12
- if not ENGINE_VERSION:
13
- raise RuntimeError("ENGINE_VERSION is not specified.")
14
-
15
- setup(
16
- version=ENGINE_VERSION,
17
- install_requires=[
18
- f"hopeit.engine[fs-storage]=={ENGINE_VERSION}",
19
- "pandas",
20
- "numpy",
21
- ],
22
- extras_require={
23
- "pyarrow": ["pyarrow"],
24
- },
25
- )
@@ -1,6 +0,0 @@
1
- hopeit.engine[fs-storage]==0.25.4
2
- pandas
3
- numpy
4
-
5
- [pyarrow]
6
- pyarrow