pydiverse-common 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydiverse/common/__init__.py +2 -0
- pydiverse/common/dtypes.py +53 -7
- pydiverse/common/util/structlog.py +10 -1
- {pydiverse_common-0.3.4.dist-info → pydiverse_common-0.3.6.dist-info}/METADATA +2 -2
- {pydiverse_common-0.3.4.dist-info → pydiverse_common-0.3.6.dist-info}/RECORD +7 -7
- {pydiverse_common-0.3.4.dist-info → pydiverse_common-0.3.6.dist-info}/WHEEL +0 -0
- {pydiverse_common-0.3.4.dist-info → pydiverse_common-0.3.6.dist-info}/licenses/LICENSE +0 -0
pydiverse/common/__init__.py
CHANGED
pydiverse/common/dtypes.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
2
|
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
-
|
3
|
+
import enum
|
4
4
|
|
5
5
|
|
6
|
-
class PandasBackend(str, Enum):
|
6
|
+
class PandasBackend(str, enum.Enum):
|
7
7
|
NUMPY = "numpy"
|
8
8
|
ARROW = "arrow"
|
9
9
|
|
@@ -103,7 +103,10 @@ class Dtype:
|
|
103
103
|
type_, pd.core.dtypes.common.classes(np_dtype)
|
104
104
|
)
|
105
105
|
|
106
|
-
|
106
|
+
workaround = (
|
107
|
+
pandas_type is not np.floating
|
108
|
+
) # see https://github.com/pandas-dev/pandas/issues/62018
|
109
|
+
if workaround and pd.api.types.is_signed_integer_dtype(pandas_type):
|
107
110
|
if is_np_dtype(pandas_type, np.int64):
|
108
111
|
return Int64()
|
109
112
|
elif is_np_dtype(pandas_type, np.int32):
|
@@ -113,7 +116,7 @@ class Dtype:
|
|
113
116
|
elif is_np_dtype(pandas_type, np.int8):
|
114
117
|
return Int8()
|
115
118
|
raise TypeError
|
116
|
-
if pd.api.types.is_unsigned_integer_dtype(pandas_type):
|
119
|
+
if workaround and pd.api.types.is_unsigned_integer_dtype(pandas_type):
|
117
120
|
if is_np_dtype(pandas_type, np.uint64):
|
118
121
|
return UInt64()
|
119
122
|
elif is_np_dtype(pandas_type, np.uint32):
|
@@ -123,8 +126,8 @@ class Dtype:
|
|
123
126
|
elif is_np_dtype(pandas_type, np.uint8):
|
124
127
|
return UInt8()
|
125
128
|
raise TypeError
|
126
|
-
if pd.api.types.is_float_dtype(pandas_type):
|
127
|
-
if is_np_dtype(pandas_type, np.float64):
|
129
|
+
if not workaround or pd.api.types.is_float_dtype(pandas_type):
|
130
|
+
if not workaround or is_np_dtype(pandas_type, np.float64):
|
128
131
|
return Float64()
|
129
132
|
elif is_np_dtype(pandas_type, np.float32):
|
130
133
|
return Float32()
|
@@ -141,6 +144,9 @@ class Dtype:
|
|
141
144
|
# we don't know any decimal/time/null dtypes in pandas if column is not
|
142
145
|
# arrow backed
|
143
146
|
|
147
|
+
if pandas_type.name == "category":
|
148
|
+
return Enum(*pandas_type.categories.to_list())
|
149
|
+
|
144
150
|
raise TypeError
|
145
151
|
|
146
152
|
@staticmethod
|
@@ -204,6 +210,8 @@ class Dtype:
|
|
204
210
|
|
205
211
|
if isinstance(polars_type, pl.List):
|
206
212
|
return List(Dtype.from_polars(polars_type.inner))
|
213
|
+
if isinstance(polars_type, pl.Enum):
|
214
|
+
return Enum(*polars_type.categories)
|
207
215
|
|
208
216
|
return {
|
209
217
|
pl.Int64: Int64(),
|
@@ -224,7 +232,6 @@ class Dtype:
|
|
224
232
|
pl.Date: Date(),
|
225
233
|
pl.Null: NullType(),
|
226
234
|
pl.Duration: Duration(),
|
227
|
-
pl.Enum: String(),
|
228
235
|
}[polars_type.base_type()]
|
229
236
|
|
230
237
|
def to_sql(self):
|
@@ -293,6 +300,9 @@ class Dtype:
|
|
293
300
|
if isinstance(self, List):
|
294
301
|
raise TypeError("pandas doesn't have a native list dtype")
|
295
302
|
|
303
|
+
if isinstance(self, Enum):
|
304
|
+
return pd.CategoricalDtype(self.categories)
|
305
|
+
|
296
306
|
return {
|
297
307
|
Int(): pd.Int64Dtype(), # we default to 64 bit
|
298
308
|
Int8(): pd.Int8Dtype(),
|
@@ -319,6 +329,9 @@ class Dtype:
|
|
319
329
|
"""Convert this Dtype to a PyArrow type."""
|
320
330
|
import pyarrow as pa
|
321
331
|
|
332
|
+
if isinstance(self, Enum):
|
333
|
+
return pa.string()
|
334
|
+
|
322
335
|
return {
|
323
336
|
Int(): pa.int64(), # we default to 64 bit
|
324
337
|
Int8(): pa.int8(),
|
@@ -463,3 +476,36 @@ class List(Dtype):
|
|
463
476
|
import pyarrow as pa
|
464
477
|
|
465
478
|
return pa.list_(self.inner.to_arrow())
|
479
|
+
|
480
|
+
|
481
|
+
class Enum(String):
|
482
|
+
def __init__(self, *categories: str):
|
483
|
+
if not all(isinstance(c, str) for c in categories):
|
484
|
+
raise TypeError("arguments for `Enum` must have type `str`")
|
485
|
+
self.categories = list(categories)
|
486
|
+
|
487
|
+
def __eq__(self, rhs):
|
488
|
+
return isinstance(rhs, Enum) and self.categories == rhs.categories
|
489
|
+
|
490
|
+
def __repr__(self) -> str:
|
491
|
+
return f"Enum[{', '.join(repr(c) for c in self.categories)}]"
|
492
|
+
|
493
|
+
def __hash__(self):
|
494
|
+
return hash(tuple(self.categories))
|
495
|
+
|
496
|
+
def to_polars(self):
|
497
|
+
import polars as pl
|
498
|
+
|
499
|
+
return pl.Enum(self.categories)
|
500
|
+
|
501
|
+
def to_sql(self):
|
502
|
+
import sqlalchemy as sqa
|
503
|
+
|
504
|
+
return sqa.String()
|
505
|
+
|
506
|
+
def to_arrow(self):
|
507
|
+
import pyarrow as pa
|
508
|
+
|
509
|
+
# There is also pa.dictionary(), which seems to be kind of similar to an enum.
|
510
|
+
# Maybe it is better to convert to this.
|
511
|
+
return pa.string()
|
@@ -80,7 +80,7 @@ class PydiverseConsoleRenderer(structlog.dev.ConsoleRenderer):
|
|
80
80
|
|
81
81
|
def setup_logging(
|
82
82
|
log_level=logging.INFO,
|
83
|
-
log_stream=
|
83
|
+
log_stream=None,
|
84
84
|
timestamp_format="%Y-%m-%d %H:%M:%S.%f",
|
85
85
|
):
|
86
86
|
"""Configures structlog and logging with sane defaults."""
|
@@ -92,6 +92,15 @@ def setup_logging(
|
|
92
92
|
level=log_level,
|
93
93
|
handlers=[StructlogHandler()],
|
94
94
|
)
|
95
|
+
if log_stream is None:
|
96
|
+
try:
|
97
|
+
# hack to avoid dask pickling problems with pytest capture
|
98
|
+
import structlog._output
|
99
|
+
|
100
|
+
structlog._output.stderr = sys.stderr
|
101
|
+
finally:
|
102
|
+
pass
|
103
|
+
log_stream = sys.stderr
|
95
104
|
# Configure structlog
|
96
105
|
structlog.configure(
|
97
106
|
processors=[
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pydiverse-common
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.6
|
4
4
|
Summary: Common functionality shared between pydiverse libraries
|
5
5
|
Author: QuantCo, Inc.
|
6
6
|
Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
|
@@ -48,7 +48,7 @@ Classifier: Topic :: Database
|
|
48
48
|
Classifier: Topic :: Scientific/Engineering
|
49
49
|
Classifier: Topic :: Software Development
|
50
50
|
Requires-Python: <3.14,>=3.10
|
51
|
-
Requires-Dist: python-box<8
|
51
|
+
Requires-Dist: python-box<8,>=7.3.2
|
52
52
|
Description-Content-Type: text/markdown
|
53
53
|
|
54
54
|
# pydiverse.common
|
@@ -1,5 +1,5 @@
|
|
1
|
-
pydiverse/common/__init__.py,sha256=
|
2
|
-
pydiverse/common/dtypes.py,sha256=
|
1
|
+
pydiverse/common/__init__.py,sha256=J7b4iStFyaEMYre_jdlZ4l_8dLyrMWCIpQdsMQcB8aI,806
|
2
|
+
pydiverse/common/dtypes.py,sha256=LYZKaKYq_4uI4kUhoaCTTo5j1SRurswIOfN11Bkz25A,15986
|
3
3
|
pydiverse/common/testing.py,sha256=FcivI5wn0X3gzJhwnysKvCOgjSTTXaN6FtSFJ72jfSg,341
|
4
4
|
pydiverse/common/version.py,sha256=1IU_m4r76_Qq0u-Tyo2_bERZFOkh0ZFueVzDqcCfLO0,336
|
5
5
|
pydiverse/common/errors/__init__.py,sha256=FNeEfVbUa23b9sHkFsmxHYhY6sRgjaZysPQmlovpJrI,262
|
@@ -10,8 +10,8 @@ pydiverse/common/util/deep_merge.py,sha256=bV5p5_lsC-9nFah28EiEyG2h6U3Z5AuTqSoox
|
|
10
10
|
pydiverse/common/util/disposable.py,sha256=4XoGz70YRWA9TAqnUBvRCTAdsOGBviFN0gzxU7veY9o,993
|
11
11
|
pydiverse/common/util/hashing.py,sha256=6x77BKg-w61u59fuTe9di0BtU-kEKH6UTRcKsRoYJ84,1196
|
12
12
|
pydiverse/common/util/import_.py,sha256=K7dSgz4YyrqEvqhoOzbwgD7D8HScMoO5XoSWtjbaoUs,4056
|
13
|
-
pydiverse/common/util/structlog.py,sha256=
|
14
|
-
pydiverse_common-0.3.
|
15
|
-
pydiverse_common-0.3.
|
16
|
-
pydiverse_common-0.3.
|
17
|
-
pydiverse_common-0.3.
|
13
|
+
pydiverse/common/util/structlog.py,sha256=xxhauxMuyxcKXTVg1MiPTkuvPBj8Zcr4o_v8Bq59Nig,3778
|
14
|
+
pydiverse_common-0.3.6.dist-info/METADATA,sha256=TVaCnX9IArQwB64MGJ4wfVv1EE7KUDI51u9d0QnFXeY,3399
|
15
|
+
pydiverse_common-0.3.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
+
pydiverse_common-0.3.6.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
|
17
|
+
pydiverse_common-0.3.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|