pydiverse-common 0.3.1__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydiverse/common/__init__.py +4 -0
- pydiverse/common/dtypes.py +99 -9
- pydiverse/common/testing.py +14 -0
- pydiverse/common/version.py +10 -0
- {pydiverse_common-0.3.1.dist-info → pydiverse_common-0.3.5.dist-info}/METADATA +3 -2
- {pydiverse_common-0.3.1.dist-info → pydiverse_common-0.3.5.dist-info}/RECORD +8 -6
- {pydiverse_common-0.3.1.dist-info → pydiverse_common-0.3.5.dist-info}/WHEEL +0 -0
- {pydiverse_common-0.3.1.dist-info → pydiverse_common-0.3.5.dist-info}/licenses/LICENSE +0 -0
pydiverse/common/__init__.py
CHANGED
@@ -7,6 +7,7 @@ from .dtypes import (
|
|
7
7
|
Decimal,
|
8
8
|
Dtype,
|
9
9
|
Duration,
|
10
|
+
Enum,
|
10
11
|
Float,
|
11
12
|
Float32,
|
12
13
|
Float64,
|
@@ -25,8 +26,10 @@ from .dtypes import (
|
|
25
26
|
UInt32,
|
26
27
|
UInt64,
|
27
28
|
)
|
29
|
+
from .version import __version__
|
28
30
|
|
29
31
|
__all__ = [
|
32
|
+
"__version__",
|
30
33
|
"Dtype",
|
31
34
|
"Bool",
|
32
35
|
"Date",
|
@@ -49,5 +52,6 @@ __all__ = [
|
|
49
52
|
"UInt32",
|
50
53
|
"UInt64",
|
51
54
|
"List",
|
55
|
+
"Enum",
|
52
56
|
"PandasBackend",
|
53
57
|
]
|
pydiverse/common/dtypes.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
2
|
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
-
|
3
|
+
import enum
|
4
4
|
|
5
5
|
|
6
|
-
class PandasBackend(str, Enum):
|
6
|
+
class PandasBackend(str, enum.Enum):
|
7
7
|
NUMPY = "numpy"
|
8
8
|
ARROW = "arrow"
|
9
9
|
|
@@ -12,24 +12,33 @@ class Dtype:
|
|
12
12
|
"""Base class for all data types."""
|
13
13
|
|
14
14
|
def __eq__(self, rhs):
|
15
|
+
"""Return ``True`` if this dtype is equal to `rhs`."""
|
15
16
|
return isinstance(rhs, Dtype) and type(self) is type(rhs)
|
16
17
|
|
17
18
|
def __hash__(self):
|
19
|
+
"""Return a hash for this dtype."""
|
18
20
|
return hash(type(self))
|
19
21
|
|
20
22
|
def __repr__(self):
|
23
|
+
"""Return a string representation of this dtype."""
|
21
24
|
return self.__class__.__name__
|
22
25
|
|
23
26
|
@classmethod
|
24
27
|
def is_int(cls):
|
28
|
+
"""Return ``True`` if this dtype is an integer type."""
|
25
29
|
return False
|
26
30
|
|
27
31
|
@classmethod
|
28
32
|
def is_float(cls):
|
33
|
+
"""Return ``True`` if this dtype is a float type."""
|
29
34
|
return False
|
30
35
|
|
31
36
|
@classmethod
|
32
37
|
def is_subtype(cls, rhs):
|
38
|
+
"""Return ``True`` if this dtype is a subtype of `rhs`.
|
39
|
+
|
40
|
+
For example, ``Int8.is_subtype(Int())`` is ``True``.
|
41
|
+
"""
|
33
42
|
rhs_cls = type(rhs)
|
34
43
|
return (
|
35
44
|
(cls is rhs_cls)
|
@@ -39,6 +48,7 @@ class Dtype:
|
|
39
48
|
|
40
49
|
@staticmethod
|
41
50
|
def from_sql(sql_type) -> "Dtype":
|
51
|
+
"""Convert a SQL type to a Dtype."""
|
42
52
|
import sqlalchemy as sqa
|
43
53
|
|
44
54
|
if isinstance(sql_type, sqa.SmallInteger):
|
@@ -73,14 +83,15 @@ class Dtype:
|
|
73
83
|
if isinstance(sql_type, sqa.Interval):
|
74
84
|
return Duration()
|
75
85
|
if isinstance(sql_type, sqa.ARRAY):
|
76
|
-
return List(Dtype.from_sql(sql_type.item_type
|
77
|
-
if isinstance(sql_type, sqa.
|
86
|
+
return List(Dtype.from_sql(sql_type.item_type))
|
87
|
+
if isinstance(sql_type, sqa.types.NullType):
|
78
88
|
return NullType()
|
79
89
|
|
80
90
|
raise TypeError
|
81
91
|
|
82
92
|
@staticmethod
|
83
93
|
def from_pandas(pandas_type) -> "Dtype":
|
94
|
+
"""Convert a pandas type to a Dtype."""
|
84
95
|
import numpy as np
|
85
96
|
import pandas as pd
|
86
97
|
|
@@ -127,13 +138,17 @@ class Dtype:
|
|
127
138
|
return Datetime()
|
128
139
|
if pd.api.types.is_timedelta64_dtype(pandas_type):
|
129
140
|
return Duration()
|
130
|
-
# we don't know any decimal/time dtypes in pandas if column is not
|
141
|
+
# we don't know any decimal/time/null dtypes in pandas if column is not
|
131
142
|
# arrow backed
|
132
143
|
|
144
|
+
if pandas_type.name == "category":
|
145
|
+
return Enum(*pandas_type.categories.to_list())
|
146
|
+
|
133
147
|
raise TypeError
|
134
148
|
|
135
149
|
@staticmethod
|
136
150
|
def from_arrow(arrow_type) -> "Dtype":
|
151
|
+
"""Convert a PyArrow type to a Dtype."""
|
137
152
|
import pyarrow as pa
|
138
153
|
|
139
154
|
if pa.types.is_signed_integer(arrow_type):
|
@@ -179,14 +194,21 @@ class Dtype:
|
|
179
194
|
return Time()
|
180
195
|
if pa.types.is_duration(arrow_type):
|
181
196
|
return Duration()
|
197
|
+
if pa.types.is_null(arrow_type):
|
198
|
+
return NullType()
|
199
|
+
if pa.types.is_list(arrow_type):
|
200
|
+
return List(Dtype.from_arrow(arrow_type.value_type))
|
182
201
|
raise TypeError
|
183
202
|
|
184
203
|
@staticmethod
|
185
204
|
def from_polars(polars_type) -> "Dtype":
|
205
|
+
"""Convert a Polars type to a Dtype."""
|
186
206
|
import polars as pl
|
187
207
|
|
188
208
|
if isinstance(polars_type, pl.List):
|
189
209
|
return List(Dtype.from_polars(polars_type.inner))
|
210
|
+
if isinstance(polars_type, pl.Enum):
|
211
|
+
return Enum(*polars_type.categories)
|
190
212
|
|
191
213
|
return {
|
192
214
|
pl.Int64: Int64(),
|
@@ -207,10 +229,10 @@ class Dtype:
|
|
207
229
|
pl.Date: Date(),
|
208
230
|
pl.Null: NullType(),
|
209
231
|
pl.Duration: Duration(),
|
210
|
-
pl.Enum: String(),
|
211
232
|
}[polars_type.base_type()]
|
212
233
|
|
213
234
|
def to_sql(self):
|
235
|
+
"""Convert this Dtype to a SQL type."""
|
214
236
|
import sqlalchemy as sqa
|
215
237
|
|
216
238
|
return {
|
@@ -237,6 +259,7 @@ class Dtype:
|
|
237
259
|
}[self]
|
238
260
|
|
239
261
|
def to_pandas(self, backend: PandasBackend = PandasBackend.ARROW):
|
262
|
+
"""Convert this Dtype to a pandas type."""
|
240
263
|
import pandas as pd
|
241
264
|
|
242
265
|
if backend == PandasBackend.NUMPY:
|
@@ -247,12 +270,35 @@ class Dtype:
|
|
247
270
|
return pd.ArrowDtype(self.to_arrow())
|
248
271
|
|
249
272
|
def to_pandas_nullable(self, backend: PandasBackend = PandasBackend.ARROW):
|
273
|
+
"""Convert this Dtype to a pandas nullable type.
|
274
|
+
|
275
|
+
Nullable can be either pandas extension types like StringDtype or ArrowDtype.
|
276
|
+
|
277
|
+
Parameters
|
278
|
+
----------
|
279
|
+
backend : PandasBackend, optional
|
280
|
+
The pandas backend to use. Defaults to ``PandasBackend.ARROW``.
|
281
|
+
If ``PandasBackend.NUMPY`` is selected, this method will attempt
|
282
|
+
to return a NumPy-backed nullable pandas dtype. Note that
|
283
|
+
Time, NullType, and List will raise a TypeError for the
|
284
|
+
NUMPY backend as pandas doesn't have corresponding native
|
285
|
+
nullable dtypes for these.
|
286
|
+
"""
|
250
287
|
import pandas as pd
|
251
288
|
|
252
|
-
if
|
253
|
-
|
254
|
-
|
289
|
+
if backend == PandasBackend.ARROW:
|
290
|
+
return pd.ArrowDtype(self.to_arrow())
|
291
|
+
|
292
|
+
# we don't want to produce object columns
|
293
|
+
if isinstance(self, Time):
|
255
294
|
raise TypeError("pandas doesn't have a native time dtype")
|
295
|
+
if isinstance(self, NullType):
|
296
|
+
raise TypeError("pandas doesn't have a native null dtype")
|
297
|
+
if isinstance(self, List):
|
298
|
+
raise TypeError("pandas doesn't have a native list dtype")
|
299
|
+
|
300
|
+
if isinstance(self, Enum):
|
301
|
+
return pd.CategoricalDtype(self.categories)
|
256
302
|
|
257
303
|
return {
|
258
304
|
Int(): pd.Int64Dtype(), # we default to 64 bit
|
@@ -277,8 +323,12 @@ class Dtype:
|
|
277
323
|
}[self]
|
278
324
|
|
279
325
|
def to_arrow(self):
|
326
|
+
"""Convert this Dtype to a PyArrow type."""
|
280
327
|
import pyarrow as pa
|
281
328
|
|
329
|
+
if isinstance(self, Enum):
|
330
|
+
return pa.string()
|
331
|
+
|
282
332
|
return {
|
283
333
|
Int(): pa.int64(), # we default to 64 bit
|
284
334
|
Int8(): pa.int8(),
|
@@ -299,9 +349,11 @@ class Dtype:
|
|
299
349
|
Time(): pa.time64("us"),
|
300
350
|
Datetime(): pa.timestamp("us"),
|
301
351
|
Duration(): pa.duration("us"),
|
352
|
+
NullType(): pa.null(),
|
302
353
|
}[self]
|
303
354
|
|
304
355
|
def to_polars(self: "Dtype"):
|
356
|
+
"""Convert this Dtype to a Polars type."""
|
305
357
|
import polars as pl
|
306
358
|
|
307
359
|
return {
|
@@ -416,3 +468,41 @@ class List(Dtype):
|
|
416
468
|
import polars as pl
|
417
469
|
|
418
470
|
return pl.List(self.inner.to_polars())
|
471
|
+
|
472
|
+
def to_arrow(self):
|
473
|
+
import pyarrow as pa
|
474
|
+
|
475
|
+
return pa.list_(self.inner.to_arrow())
|
476
|
+
|
477
|
+
|
478
|
+
class Enum(String):
|
479
|
+
def __init__(self, *categories: str):
|
480
|
+
if not all(isinstance(c, str) for c in categories):
|
481
|
+
raise TypeError("arguments for `Enum` must have type `str`")
|
482
|
+
self.categories = list(categories)
|
483
|
+
|
484
|
+
def __eq__(self, rhs):
|
485
|
+
return isinstance(rhs, Enum) and self.categories == rhs.categories
|
486
|
+
|
487
|
+
def __repr__(self) -> str:
|
488
|
+
return f"Enum[{', '.join(repr(c) for c in self.categories)}]"
|
489
|
+
|
490
|
+
def __hash__(self):
|
491
|
+
return hash(tuple(self.categories))
|
492
|
+
|
493
|
+
def to_polars(self):
|
494
|
+
import polars as pl
|
495
|
+
|
496
|
+
return pl.Enum(self.categories)
|
497
|
+
|
498
|
+
def to_sql(self):
|
499
|
+
import sqlalchemy as sqa
|
500
|
+
|
501
|
+
return sqa.String()
|
502
|
+
|
503
|
+
def to_arrow(self):
|
504
|
+
import pyarrow as pa
|
505
|
+
|
506
|
+
# There is also pa.dictionary(), which seems to be kind of similar to an enum.
|
507
|
+
# Maybe it is better to convert to this.
|
508
|
+
return pa.string()
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
+
|
4
|
+
import inspect
|
5
|
+
|
6
|
+
import pydiverse.common.dtypes as dtypes
|
7
|
+
|
8
|
+
ALL_TYPES = [
|
9
|
+
getattr(dtypes, c)
|
10
|
+
for c in dir(dtypes)
|
11
|
+
if inspect.isclass(getattr(dtypes, c))
|
12
|
+
and issubclass(getattr(dtypes, c), dtypes.Dtype)
|
13
|
+
and c != "Dtype"
|
14
|
+
]
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# Copyright (c) QuantCo and pydiverse contributors 2025-2025
|
2
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
3
|
+
|
4
|
+
from importlib.metadata import PackageNotFoundError, version
|
5
|
+
|
6
|
+
try:
|
7
|
+
__version__ = version(__package__ or __name__)
|
8
|
+
except PackageNotFoundError:
|
9
|
+
# Running from a Git checkout or an editable install
|
10
|
+
__version__ = "0.0.0+dev"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pydiverse-common
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.5
|
4
4
|
Summary: Common functionality shared between pydiverse libraries
|
5
5
|
Author: QuantCo, Inc.
|
6
6
|
Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
|
@@ -47,7 +47,8 @@ Classifier: Programming Language :: SQL
|
|
47
47
|
Classifier: Topic :: Database
|
48
48
|
Classifier: Topic :: Scientific/Engineering
|
49
49
|
Classifier: Topic :: Software Development
|
50
|
-
Requires-Python:
|
50
|
+
Requires-Python: <3.14,>=3.10
|
51
|
+
Requires-Dist: python-box<8,>=7.3.2
|
51
52
|
Description-Content-Type: text/markdown
|
52
53
|
|
53
54
|
# pydiverse.common
|
@@ -1,5 +1,7 @@
|
|
1
|
-
pydiverse/common/__init__.py,sha256=
|
2
|
-
pydiverse/common/dtypes.py,sha256=
|
1
|
+
pydiverse/common/__init__.py,sha256=J7b4iStFyaEMYre_jdlZ4l_8dLyrMWCIpQdsMQcB8aI,806
|
2
|
+
pydiverse/common/dtypes.py,sha256=Twna-Esfy0-4wzIxX37C1CzPTJz8MnesV_LNegndKzE,15787
|
3
|
+
pydiverse/common/testing.py,sha256=FcivI5wn0X3gzJhwnysKvCOgjSTTXaN6FtSFJ72jfSg,341
|
4
|
+
pydiverse/common/version.py,sha256=1IU_m4r76_Qq0u-Tyo2_bERZFOkh0ZFueVzDqcCfLO0,336
|
3
5
|
pydiverse/common/errors/__init__.py,sha256=FNeEfVbUa23b9sHkFsmxHYhY6sRgjaZysPQmlovpJrI,262
|
4
6
|
pydiverse/common/util/__init__.py,sha256=fGdKZtLaTVBW7NfCpX7rZhKHwUzmBnsuY2akDOnAnjc,315
|
5
7
|
pydiverse/common/util/computation_tracing.py,sha256=HeXRHRUI8vxpzQ27Xcpa0StndSTP63EMT9vj4trPJUY,9697
|
@@ -9,7 +11,7 @@ pydiverse/common/util/disposable.py,sha256=4XoGz70YRWA9TAqnUBvRCTAdsOGBviFN0gzxU
|
|
9
11
|
pydiverse/common/util/hashing.py,sha256=6x77BKg-w61u59fuTe9di0BtU-kEKH6UTRcKsRoYJ84,1196
|
10
12
|
pydiverse/common/util/import_.py,sha256=K7dSgz4YyrqEvqhoOzbwgD7D8HScMoO5XoSWtjbaoUs,4056
|
11
13
|
pydiverse/common/util/structlog.py,sha256=g0d8yaXBzAxmGNGZYMnMP9dsSQ__jN44GAY8Mb0ABeI,3487
|
12
|
-
pydiverse_common-0.3.
|
13
|
-
pydiverse_common-0.3.
|
14
|
-
pydiverse_common-0.3.
|
15
|
-
pydiverse_common-0.3.
|
14
|
+
pydiverse_common-0.3.5.dist-info/METADATA,sha256=E4Z7zT2kwRtIlGetzPlxFf5Oj5-b037lAkLikUbNgKk,3399
|
15
|
+
pydiverse_common-0.3.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
+
pydiverse_common-0.3.5.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
|
17
|
+
pydiverse_common-0.3.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|