pydiverse-common 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ from .dtypes import (
7
7
  Decimal,
8
8
  Dtype,
9
9
  Duration,
10
+ Enum,
10
11
  Float,
11
12
  Float32,
12
13
  Float64,
@@ -51,5 +52,6 @@ __all__ = [
51
52
  "UInt32",
52
53
  "UInt64",
53
54
  "List",
55
+ "Enum",
54
56
  "PandasBackend",
55
57
  ]
@@ -1,9 +1,9 @@
1
1
  # Copyright (c) QuantCo and pydiverse contributors 2025-2025
2
2
  # SPDX-License-Identifier: BSD-3-Clause
3
- from enum import Enum
3
+ import enum
4
4
 
5
5
 
6
- class PandasBackend(str, Enum):
6
+ class PandasBackend(str, enum.Enum):
7
7
  NUMPY = "numpy"
8
8
  ARROW = "arrow"
9
9
 
@@ -103,7 +103,10 @@ class Dtype:
103
103
  type_, pd.core.dtypes.common.classes(np_dtype)
104
104
  )
105
105
 
106
- if pd.api.types.is_signed_integer_dtype(pandas_type):
106
+ workaround = (
107
+ pandas_type is not np.floating
108
+ ) # see https://github.com/pandas-dev/pandas/issues/62018
109
+ if workaround and pd.api.types.is_signed_integer_dtype(pandas_type):
107
110
  if is_np_dtype(pandas_type, np.int64):
108
111
  return Int64()
109
112
  elif is_np_dtype(pandas_type, np.int32):
@@ -113,7 +116,7 @@ class Dtype:
113
116
  elif is_np_dtype(pandas_type, np.int8):
114
117
  return Int8()
115
118
  raise TypeError
116
- if pd.api.types.is_unsigned_integer_dtype(pandas_type):
119
+ if workaround and pd.api.types.is_unsigned_integer_dtype(pandas_type):
117
120
  if is_np_dtype(pandas_type, np.uint64):
118
121
  return UInt64()
119
122
  elif is_np_dtype(pandas_type, np.uint32):
@@ -123,8 +126,8 @@ class Dtype:
123
126
  elif is_np_dtype(pandas_type, np.uint8):
124
127
  return UInt8()
125
128
  raise TypeError
126
- if pd.api.types.is_float_dtype(pandas_type):
127
- if is_np_dtype(pandas_type, np.float64):
129
+ if not workaround or pd.api.types.is_float_dtype(pandas_type):
130
+ if not workaround or is_np_dtype(pandas_type, np.float64):
128
131
  return Float64()
129
132
  elif is_np_dtype(pandas_type, np.float32):
130
133
  return Float32()
@@ -141,6 +144,9 @@ class Dtype:
141
144
  # we don't know any decimal/time/null dtypes in pandas if column is not
142
145
  # arrow backed
143
146
 
147
+ if pandas_type.name == "category":
148
+ return Enum(*pandas_type.categories.to_list())
149
+
144
150
  raise TypeError
145
151
 
146
152
  @staticmethod
@@ -204,6 +210,8 @@ class Dtype:
204
210
 
205
211
  if isinstance(polars_type, pl.List):
206
212
  return List(Dtype.from_polars(polars_type.inner))
213
+ if isinstance(polars_type, pl.Enum):
214
+ return Enum(*polars_type.categories)
207
215
 
208
216
  return {
209
217
  pl.Int64: Int64(),
@@ -224,7 +232,6 @@ class Dtype:
224
232
  pl.Date: Date(),
225
233
  pl.Null: NullType(),
226
234
  pl.Duration: Duration(),
227
- pl.Enum: String(),
228
235
  }[polars_type.base_type()]
229
236
 
230
237
  def to_sql(self):
@@ -293,6 +300,9 @@ class Dtype:
293
300
  if isinstance(self, List):
294
301
  raise TypeError("pandas doesn't have a native list dtype")
295
302
 
303
+ if isinstance(self, Enum):
304
+ return pd.CategoricalDtype(self.categories)
305
+
296
306
  return {
297
307
  Int(): pd.Int64Dtype(), # we default to 64 bit
298
308
  Int8(): pd.Int8Dtype(),
@@ -319,6 +329,9 @@ class Dtype:
319
329
  """Convert this Dtype to a PyArrow type."""
320
330
  import pyarrow as pa
321
331
 
332
+ if isinstance(self, Enum):
333
+ return pa.string()
334
+
322
335
  return {
323
336
  Int(): pa.int64(), # we default to 64 bit
324
337
  Int8(): pa.int8(),
@@ -463,3 +476,36 @@ class List(Dtype):
463
476
  import pyarrow as pa
464
477
 
465
478
  return pa.list_(self.inner.to_arrow())
479
+
480
+
481
+ class Enum(String):
482
+ def __init__(self, *categories: str):
483
+ if not all(isinstance(c, str) for c in categories):
484
+ raise TypeError("arguments for `Enum` must have type `str`")
485
+ self.categories = list(categories)
486
+
487
+ def __eq__(self, rhs):
488
+ return isinstance(rhs, Enum) and self.categories == rhs.categories
489
+
490
+ def __repr__(self) -> str:
491
+ return f"Enum[{', '.join(repr(c) for c in self.categories)}]"
492
+
493
+ def __hash__(self):
494
+ return hash(tuple(self.categories))
495
+
496
+ def to_polars(self):
497
+ import polars as pl
498
+
499
+ return pl.Enum(self.categories)
500
+
501
+ def to_sql(self):
502
+ import sqlalchemy as sqa
503
+
504
+ return sqa.String()
505
+
506
+ def to_arrow(self):
507
+ import pyarrow as pa
508
+
509
+ # There is also pa.dictionary(), which seems to be kind of similar to an enum.
510
+ # Maybe it is better to convert to this.
511
+ return pa.string()
@@ -80,7 +80,7 @@ class PydiverseConsoleRenderer(structlog.dev.ConsoleRenderer):
80
80
 
81
81
  def setup_logging(
82
82
  log_level=logging.INFO,
83
- log_stream=sys.stderr,
83
+ log_stream=None,
84
84
  timestamp_format="%Y-%m-%d %H:%M:%S.%f",
85
85
  ):
86
86
  """Configures structlog and logging with sane defaults."""
@@ -92,6 +92,15 @@ def setup_logging(
92
92
  level=log_level,
93
93
  handlers=[StructlogHandler()],
94
94
  )
95
+ if log_stream is None:
96
+ try:
97
+ # hack to avoid dask pickling problems with pytest capture
98
+ import structlog._output
99
+
100
+ structlog._output.stderr = sys.stderr
101
+ finally:
102
+ pass
103
+ log_stream = sys.stderr
95
104
  # Configure structlog
96
105
  structlog.configure(
97
106
  processors=[
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydiverse-common
3
- Version: 0.3.4
3
+ Version: 0.3.6
4
4
  Summary: Common functionality shared between pydiverse libraries
5
5
  Author: QuantCo, Inc.
6
6
  Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
@@ -48,7 +48,7 @@ Classifier: Topic :: Database
48
48
  Classifier: Topic :: Scientific/Engineering
49
49
  Classifier: Topic :: Software Development
50
50
  Requires-Python: <3.14,>=3.10
51
- Requires-Dist: python-box<8.0.0,>=7.3.2
51
+ Requires-Dist: python-box<8,>=7.3.2
52
52
  Description-Content-Type: text/markdown
53
53
 
54
54
  # pydiverse.common
@@ -1,5 +1,5 @@
1
- pydiverse/common/__init__.py,sha256=eK3Wji6868cvXeq6e9YPRxi20vU6i9Lql4IzG6iB4B4,784
2
- pydiverse/common/dtypes.py,sha256=1ADpaWJbdNIFEE2DFjdIy2RRsp5Wg3UVnSM1ZN8XVdw,14555
1
+ pydiverse/common/__init__.py,sha256=J7b4iStFyaEMYre_jdlZ4l_8dLyrMWCIpQdsMQcB8aI,806
2
+ pydiverse/common/dtypes.py,sha256=LYZKaKYq_4uI4kUhoaCTTo5j1SRurswIOfN11Bkz25A,15986
3
3
  pydiverse/common/testing.py,sha256=FcivI5wn0X3gzJhwnysKvCOgjSTTXaN6FtSFJ72jfSg,341
4
4
  pydiverse/common/version.py,sha256=1IU_m4r76_Qq0u-Tyo2_bERZFOkh0ZFueVzDqcCfLO0,336
5
5
  pydiverse/common/errors/__init__.py,sha256=FNeEfVbUa23b9sHkFsmxHYhY6sRgjaZysPQmlovpJrI,262
@@ -10,8 +10,8 @@ pydiverse/common/util/deep_merge.py,sha256=bV5p5_lsC-9nFah28EiEyG2h6U3Z5AuTqSoox
10
10
  pydiverse/common/util/disposable.py,sha256=4XoGz70YRWA9TAqnUBvRCTAdsOGBviFN0gzxU7veY9o,993
11
11
  pydiverse/common/util/hashing.py,sha256=6x77BKg-w61u59fuTe9di0BtU-kEKH6UTRcKsRoYJ84,1196
12
12
  pydiverse/common/util/import_.py,sha256=K7dSgz4YyrqEvqhoOzbwgD7D8HScMoO5XoSWtjbaoUs,4056
13
- pydiverse/common/util/structlog.py,sha256=g0d8yaXBzAxmGNGZYMnMP9dsSQ__jN44GAY8Mb0ABeI,3487
14
- pydiverse_common-0.3.4.dist-info/METADATA,sha256=iV-aHd6IDEbgTaIsbB5lgAstdXaAAdotwZCh05laHA4,3403
15
- pydiverse_common-0.3.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
- pydiverse_common-0.3.4.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
17
- pydiverse_common-0.3.4.dist-info/RECORD,,
13
+ pydiverse/common/util/structlog.py,sha256=xxhauxMuyxcKXTVg1MiPTkuvPBj8Zcr4o_v8Bq59Nig,3778
14
+ pydiverse_common-0.3.6.dist-info/METADATA,sha256=TVaCnX9IArQwB64MGJ4wfVv1EE7KUDI51u9d0QnFXeY,3399
15
+ pydiverse_common-0.3.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ pydiverse_common-0.3.6.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
17
+ pydiverse_common-0.3.6.dist-info/RECORD,,