pydiverse-common 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ from .dtypes import (
7
7
  Decimal,
8
8
  Dtype,
9
9
  Duration,
10
+ Enum,
10
11
  Float,
11
12
  Float32,
12
13
  Float64,
@@ -51,5 +52,6 @@ __all__ = [
51
52
  "UInt32",
52
53
  "UInt64",
53
54
  "List",
55
+ "Enum",
54
56
  "PandasBackend",
55
57
  ]
@@ -1,9 +1,9 @@
1
1
  # Copyright (c) QuantCo and pydiverse contributors 2025-2025
2
2
  # SPDX-License-Identifier: BSD-3-Clause
3
- from enum import Enum
3
+ import enum
4
4
 
5
5
 
6
- class PandasBackend(str, Enum):
6
+ class PandasBackend(str, enum.Enum):
7
7
  NUMPY = "numpy"
8
8
  ARROW = "arrow"
9
9
 
@@ -141,6 +141,9 @@ class Dtype:
141
141
  # we don't know any decimal/time/null dtypes in pandas if column is not
142
142
  # arrow backed
143
143
 
144
+ if pandas_type.name == "category":
145
+ return Enum(*pandas_type.categories.to_list())
146
+
144
147
  raise TypeError
145
148
 
146
149
  @staticmethod
@@ -204,6 +207,8 @@ class Dtype:
204
207
 
205
208
  if isinstance(polars_type, pl.List):
206
209
  return List(Dtype.from_polars(polars_type.inner))
210
+ if isinstance(polars_type, pl.Enum):
211
+ return Enum(*polars_type.categories)
207
212
 
208
213
  return {
209
214
  pl.Int64: Int64(),
@@ -224,7 +229,6 @@ class Dtype:
224
229
  pl.Date: Date(),
225
230
  pl.Null: NullType(),
226
231
  pl.Duration: Duration(),
227
- pl.Enum: String(),
228
232
  }[polars_type.base_type()]
229
233
 
230
234
  def to_sql(self):
@@ -293,6 +297,9 @@ class Dtype:
293
297
  if isinstance(self, List):
294
298
  raise TypeError("pandas doesn't have a native list dtype")
295
299
 
300
+ if isinstance(self, Enum):
301
+ return pd.CategoricalDtype(self.categories)
302
+
296
303
  return {
297
304
  Int(): pd.Int64Dtype(), # we default to 64 bit
298
305
  Int8(): pd.Int8Dtype(),
@@ -319,6 +326,9 @@ class Dtype:
319
326
  """Convert this Dtype to a PyArrow type."""
320
327
  import pyarrow as pa
321
328
 
329
+ if isinstance(self, Enum):
330
+ return pa.string()
331
+
322
332
  return {
323
333
  Int(): pa.int64(), # we default to 64 bit
324
334
  Int8(): pa.int8(),
@@ -463,3 +473,36 @@ class List(Dtype):
463
473
  import pyarrow as pa
464
474
 
465
475
  return pa.list_(self.inner.to_arrow())
476
+
477
+
478
+ class Enum(String):
479
+ def __init__(self, *categories: str):
480
+ if not all(isinstance(c, str) for c in categories):
481
+ raise TypeError("arguments for `Enum` must have type `str`")
482
+ self.categories = list(categories)
483
+
484
+ def __eq__(self, rhs):
485
+ return isinstance(rhs, Enum) and self.categories == rhs.categories
486
+
487
+ def __repr__(self) -> str:
488
+ return f"Enum[{', '.join(repr(c) for c in self.categories)}]"
489
+
490
+ def __hash__(self):
491
+ return hash(tuple(self.categories))
492
+
493
+ def to_polars(self):
494
+ import polars as pl
495
+
496
+ return pl.Enum(self.categories)
497
+
498
+ def to_sql(self):
499
+ import sqlalchemy as sqa
500
+
501
+ return sqa.String()
502
+
503
+ def to_arrow(self):
504
+ import pyarrow as pa
505
+
506
+ # There is also pa.dictionary(), which seems to be kind of similar to an enum.
507
+ # Maybe it is better to convert to this.
508
+ return pa.string()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydiverse-common
3
- Version: 0.3.4
3
+ Version: 0.3.5
4
4
  Summary: Common functionality shared between pydiverse libraries
5
5
  Author: QuantCo, Inc.
6
6
  Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
@@ -48,7 +48,7 @@ Classifier: Topic :: Database
48
48
  Classifier: Topic :: Scientific/Engineering
49
49
  Classifier: Topic :: Software Development
50
50
  Requires-Python: <3.14,>=3.10
51
- Requires-Dist: python-box<8.0.0,>=7.3.2
51
+ Requires-Dist: python-box<8,>=7.3.2
52
52
  Description-Content-Type: text/markdown
53
53
 
54
54
  # pydiverse.common
@@ -1,5 +1,5 @@
1
- pydiverse/common/__init__.py,sha256=eK3Wji6868cvXeq6e9YPRxi20vU6i9Lql4IzG6iB4B4,784
2
- pydiverse/common/dtypes.py,sha256=1ADpaWJbdNIFEE2DFjdIy2RRsp5Wg3UVnSM1ZN8XVdw,14555
1
+ pydiverse/common/__init__.py,sha256=J7b4iStFyaEMYre_jdlZ4l_8dLyrMWCIpQdsMQcB8aI,806
2
+ pydiverse/common/dtypes.py,sha256=Twna-Esfy0-4wzIxX37C1CzPTJz8MnesV_LNegndKzE,15787
3
3
  pydiverse/common/testing.py,sha256=FcivI5wn0X3gzJhwnysKvCOgjSTTXaN6FtSFJ72jfSg,341
4
4
  pydiverse/common/version.py,sha256=1IU_m4r76_Qq0u-Tyo2_bERZFOkh0ZFueVzDqcCfLO0,336
5
5
  pydiverse/common/errors/__init__.py,sha256=FNeEfVbUa23b9sHkFsmxHYhY6sRgjaZysPQmlovpJrI,262
@@ -11,7 +11,7 @@ pydiverse/common/util/disposable.py,sha256=4XoGz70YRWA9TAqnUBvRCTAdsOGBviFN0gzxU
11
11
  pydiverse/common/util/hashing.py,sha256=6x77BKg-w61u59fuTe9di0BtU-kEKH6UTRcKsRoYJ84,1196
12
12
  pydiverse/common/util/import_.py,sha256=K7dSgz4YyrqEvqhoOzbwgD7D8HScMoO5XoSWtjbaoUs,4056
13
13
  pydiverse/common/util/structlog.py,sha256=g0d8yaXBzAxmGNGZYMnMP9dsSQ__jN44GAY8Mb0ABeI,3487
14
- pydiverse_common-0.3.4.dist-info/METADATA,sha256=iV-aHd6IDEbgTaIsbB5lgAstdXaAAdotwZCh05laHA4,3403
15
- pydiverse_common-0.3.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- pydiverse_common-0.3.4.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
17
- pydiverse_common-0.3.4.dist-info/RECORD,,
14
+ pydiverse_common-0.3.5.dist-info/METADATA,sha256=E4Z7zT2kwRtIlGetzPlxFf5Oj5-b037lAkLikUbNgKk,3399
15
+ pydiverse_common-0.3.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ pydiverse_common-0.3.5.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
17
+ pydiverse_common-0.3.5.dist-info/RECORD,,