pydiverse-common 0.3.8__tar.gz → 0.3.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/PKG-INFO +1 -1
  2. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/docs/source/changelog.md +3 -0
  3. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/pyproject.toml +1 -1
  4. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/dtypes.py +27 -3
  5. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/tests/dtypes/test_dtype_pyarrow.py +14 -0
  6. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.gitattributes +0 -0
  7. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.github/CODEOWNERS +0 -0
  8. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  9. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.github/dependabot.yml +0 -0
  10. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.github/scripts/check_deps.sh +0 -0
  11. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.github/workflows/release.yml +0 -0
  12. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.github/workflows/tests.yml +0 -0
  13. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.github/workflows/update-lockfiles.yml +0 -0
  14. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.gitignore +0 -0
  15. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.pre-commit-config.yaml +0 -0
  16. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/.readthedocs.yaml +0 -0
  17. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/LICENSE +0 -0
  18. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/README.md +0 -0
  19. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/docs/Makefile +0 -0
  20. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/docs/make.bat +0 -0
  21. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/docs/package/README.md +0 -0
  22. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/docs/source/conf.py +0 -0
  23. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/docs/source/index.md +0 -0
  24. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/docs/source/license.md +0 -0
  25. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/docs/source/reference/api.rst +0 -0
  26. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/pixi.lock +0 -0
  27. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/pixi.toml +0 -0
  28. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/pytest.ini +0 -0
  29. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/__init__.py +0 -0
  30. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/errors/__init__.py +0 -0
  31. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/testing.py +0 -0
  32. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/util/__init__.py +0 -0
  33. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/util/computation_tracing.py +0 -0
  34. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/util/deep_map.py +0 -0
  35. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/util/deep_merge.py +0 -0
  36. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/util/disposable.py +0 -0
  37. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/util/hashing.py +0 -0
  38. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/util/import_.py +0 -0
  39. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/util/structlog.py +0 -0
  40. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/src/pydiverse/common/version.py +0 -0
  41. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/tests/conftest.py +0 -0
  42. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/tests/dtypes/test_dtype_pandas.py +0 -0
  43. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/tests/dtypes/test_dtype_polars.py +0 -0
  44. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/tests/dtypes/test_dtype_sqlalchemy.py +0 -0
  45. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/tests/test_util.py +0 -0
  46. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/tests/test_version.py +0 -0
  47. {pydiverse_common-0.3.8 → pydiverse_common-0.3.9}/typos.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydiverse-common
3
- Version: 0.3.8
3
+ Version: 0.3.9
4
4
  Summary: Common functionality shared between pydiverse libraries
5
5
  Author: QuantCo, Inc.
6
6
  Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
@@ -1,5 +1,8 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.9 (2025-08-21)
4
+ - fix enum pyarrow dtype
5
+
3
6
  ## 0.3.8 (2025-08-19)
4
7
  - fixed util.hashing.hash_polars_dataframe for simple dataframe
5
8
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pydiverse-common"
3
- version = "0.3.8"
3
+ version = "0.3.9"
4
4
  description = "Common functionality shared between pydiverse libraries"
5
5
  authors = [
6
6
  { name = "QuantCo, Inc." },
@@ -201,6 +201,11 @@ class Dtype:
201
201
  return NullType()
202
202
  if pa.types.is_list(arrow_type):
203
203
  return List(Dtype.from_arrow(arrow_type.value_type))
204
+ if pa.types.is_dictionary(arrow_type):
205
+ raise RuntimeError(
206
+ "Most likely this is an Enum type. But metadata about categories is "
207
+ "only in the pyarrow field and not in the pyarrow dtype"
208
+ )
204
209
  raise TypeError
205
210
 
206
211
  @staticmethod
@@ -268,7 +273,7 @@ class Dtype:
268
273
  if backend == PandasBackend.NUMPY:
269
274
  return self.to_pandas_nullable(backend)
270
275
  if backend == PandasBackend.ARROW:
271
- if self == String():
276
+ if self == String() or isinstance(self, Enum):
272
277
  return pd.StringDtype(storage="pyarrow")
273
278
  return pd.ArrowDtype(self.to_arrow())
274
279
 
@@ -355,6 +360,12 @@ class Dtype:
355
360
  NullType(): pa.null(),
356
361
  }[self]
357
362
 
363
+ def to_arrow_field(self, name: str, nullable: bool = True):
364
+ """Convert this Dtype to a PyArrow Field."""
365
+ import pyarrow as pa
366
+
367
+ return pa.field(name, self.to_arrow(), nullable=nullable)
368
+
358
369
  def to_polars(self: "Dtype"):
359
370
  """Convert this Dtype to a Polars type."""
360
371
  import polars as pl
@@ -506,6 +517,19 @@ class Enum(String):
506
517
  def to_arrow(self):
507
518
  import pyarrow as pa
508
519
 
509
- # There is also pa.dictionary(), which seems to be kind of similar to an enum.
510
- # Maybe it is better to convert to this.
520
+ # enum categories can only be maintained in pyarrow field (see to_arrow_field)
511
521
  return pa.string()
522
+
523
+ def to_arrow_field(self, name: str, nullable: bool = True):
524
+ """Convert this Dtype to a PyArrow Field."""
525
+ import pyarrow as pa
526
+
527
+ # try to mimic what polars does
528
+ return pa.field(
529
+ name,
530
+ pa.dictionary(pa.uint32(), pa.large_string()),
531
+ nullable=nullable,
532
+ metadata={
533
+ "_PL_ENUM_VALUES": "".join([f"{len(c)};{c}" for c in self.categories])
534
+ },
535
+ )
@@ -95,6 +95,20 @@ def test_dtype_to_pyarrow():
95
95
  assert_conversion(Datetime(), pa.timestamp("us"))
96
96
 
97
97
 
98
+ @pytest.mark.skipif(pa is None, reason="requires pyarrow")
99
+ def test_dtype_to_pyarrow_enum():
100
+ import polars as pl
101
+
102
+ def assert_conversion(type_: Dtype, expected_dtype):
103
+ df = pl.DataFrame(dict(x=["a"]), schema=dict(x=expected_dtype))
104
+ expected = df.to_arrow().schema
105
+ actual = pa.schema([type_.to_arrow_field("x", nullable=True)])
106
+ assert actual == expected
107
+ assert actual.field(0).metadata == expected.field(0).metadata
108
+
109
+ assert_conversion(Enum("a", "b;c"), pl.Enum(["a", "b;c"]))
110
+
111
+
98
112
  @pytest.mark.skipif(pa is None, reason="requires pyarrow")
99
113
  @pytest.mark.parametrize(
100
114
  "type_",