pydiverse-common 0.3.1__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ from .dtypes import (
7
7
  Decimal,
8
8
  Dtype,
9
9
  Duration,
10
+ Enum,
10
11
  Float,
11
12
  Float32,
12
13
  Float64,
@@ -25,8 +26,10 @@ from .dtypes import (
25
26
  UInt32,
26
27
  UInt64,
27
28
  )
29
+ from .version import __version__
28
30
 
29
31
  __all__ = [
32
+ "__version__",
30
33
  "Dtype",
31
34
  "Bool",
32
35
  "Date",
@@ -49,5 +52,6 @@ __all__ = [
49
52
  "UInt32",
50
53
  "UInt64",
51
54
  "List",
55
+ "Enum",
52
56
  "PandasBackend",
53
57
  ]
@@ -1,9 +1,9 @@
1
1
  # Copyright (c) QuantCo and pydiverse contributors 2025-2025
2
2
  # SPDX-License-Identifier: BSD-3-Clause
3
- from enum import Enum
3
+ import enum
4
4
 
5
5
 
6
- class PandasBackend(str, Enum):
6
+ class PandasBackend(str, enum.Enum):
7
7
  NUMPY = "numpy"
8
8
  ARROW = "arrow"
9
9
 
@@ -12,24 +12,33 @@ class Dtype:
12
12
  """Base class for all data types."""
13
13
 
14
14
  def __eq__(self, rhs):
15
+ """Return ``True`` if this dtype is equal to `rhs`."""
15
16
  return isinstance(rhs, Dtype) and type(self) is type(rhs)
16
17
 
17
18
  def __hash__(self):
19
+ """Return a hash for this dtype."""
18
20
  return hash(type(self))
19
21
 
20
22
  def __repr__(self):
23
+ """Return a string representation of this dtype."""
21
24
  return self.__class__.__name__
22
25
 
23
26
  @classmethod
24
27
  def is_int(cls):
28
+ """Return ``True`` if this dtype is an integer type."""
25
29
  return False
26
30
 
27
31
  @classmethod
28
32
  def is_float(cls):
33
+ """Return ``True`` if this dtype is a float type."""
29
34
  return False
30
35
 
31
36
  @classmethod
32
37
  def is_subtype(cls, rhs):
38
+ """Return ``True`` if this dtype is a subtype of `rhs`.
39
+
40
+ For example, ``Int8.is_subtype(Int())`` is ``True``.
41
+ """
33
42
  rhs_cls = type(rhs)
34
43
  return (
35
44
  (cls is rhs_cls)
@@ -39,6 +48,7 @@ class Dtype:
39
48
 
40
49
  @staticmethod
41
50
  def from_sql(sql_type) -> "Dtype":
51
+ """Convert a SQL type to a Dtype."""
42
52
  import sqlalchemy as sqa
43
53
 
44
54
  if isinstance(sql_type, sqa.SmallInteger):
@@ -73,14 +83,15 @@ class Dtype:
73
83
  if isinstance(sql_type, sqa.Interval):
74
84
  return Duration()
75
85
  if isinstance(sql_type, sqa.ARRAY):
76
- return List(Dtype.from_sql(sql_type.item_type.from_sql))
77
- if isinstance(sql_type, sqa.Null):
86
+ return List(Dtype.from_sql(sql_type.item_type))
87
+ if isinstance(sql_type, sqa.types.NullType):
78
88
  return NullType()
79
89
 
80
90
  raise TypeError
81
91
 
82
92
  @staticmethod
83
93
  def from_pandas(pandas_type) -> "Dtype":
94
+ """Convert a pandas type to a Dtype."""
84
95
  import numpy as np
85
96
  import pandas as pd
86
97
 
@@ -127,13 +138,17 @@ class Dtype:
127
138
  return Datetime()
128
139
  if pd.api.types.is_timedelta64_dtype(pandas_type):
129
140
  return Duration()
130
- # we don't know any decimal/time dtypes in pandas if column is not
141
+ # we don't know any decimal/time/null dtypes in pandas if column is not
131
142
  # arrow backed
132
143
 
144
+ if pandas_type.name == "category":
145
+ return Enum(*pandas_type.categories.to_list())
146
+
133
147
  raise TypeError
134
148
 
135
149
  @staticmethod
136
150
  def from_arrow(arrow_type) -> "Dtype":
151
+ """Convert a PyArrow type to a Dtype."""
137
152
  import pyarrow as pa
138
153
 
139
154
  if pa.types.is_signed_integer(arrow_type):
@@ -179,14 +194,21 @@ class Dtype:
179
194
  return Time()
180
195
  if pa.types.is_duration(arrow_type):
181
196
  return Duration()
197
+ if pa.types.is_null(arrow_type):
198
+ return NullType()
199
+ if pa.types.is_list(arrow_type):
200
+ return List(Dtype.from_arrow(arrow_type.value_type))
182
201
  raise TypeError
183
202
 
184
203
  @staticmethod
185
204
  def from_polars(polars_type) -> "Dtype":
205
+ """Convert a Polars type to a Dtype."""
186
206
  import polars as pl
187
207
 
188
208
  if isinstance(polars_type, pl.List):
189
209
  return List(Dtype.from_polars(polars_type.inner))
210
+ if isinstance(polars_type, pl.Enum):
211
+ return Enum(*polars_type.categories)
190
212
 
191
213
  return {
192
214
  pl.Int64: Int64(),
@@ -207,10 +229,10 @@ class Dtype:
207
229
  pl.Date: Date(),
208
230
  pl.Null: NullType(),
209
231
  pl.Duration: Duration(),
210
- pl.Enum: String(),
211
232
  }[polars_type.base_type()]
212
233
 
213
234
  def to_sql(self):
235
+ """Convert this Dtype to a SQL type."""
214
236
  import sqlalchemy as sqa
215
237
 
216
238
  return {
@@ -237,6 +259,7 @@ class Dtype:
237
259
  }[self]
238
260
 
239
261
  def to_pandas(self, backend: PandasBackend = PandasBackend.ARROW):
262
+ """Convert this Dtype to a pandas type."""
240
263
  import pandas as pd
241
264
 
242
265
  if backend == PandasBackend.NUMPY:
@@ -247,12 +270,35 @@ class Dtype:
247
270
  return pd.ArrowDtype(self.to_arrow())
248
271
 
249
272
  def to_pandas_nullable(self, backend: PandasBackend = PandasBackend.ARROW):
273
+ """Convert this Dtype to a pandas nullable type.
274
+
275
+ Nullable can be either pandas extension types like StringDtype or ArrowDtype.
276
+
277
+ Parameters
278
+ ----------
279
+ backend : PandasBackend, optional
280
+ The pandas backend to use. Defaults to ``PandasBackend.ARROW``.
281
+ If ``PandasBackend.NUMPY`` is selected, this method will attempt
282
+ to return a NumPy-backed nullable pandas dtype. Note that
283
+ Time, NullType, and List will raise a TypeError for the
284
+ NUMPY backend as pandas doesn't have corresponding native
285
+ nullable dtypes for these.
286
+ """
250
287
  import pandas as pd
251
288
 
252
- if self == Time():
253
- if backend == PandasBackend.ARROW:
254
- return pd.ArrowDtype(self.to_arrow())
289
+ if backend == PandasBackend.ARROW:
290
+ return pd.ArrowDtype(self.to_arrow())
291
+
292
+ # we don't want to produce object columns
293
+ if isinstance(self, Time):
255
294
  raise TypeError("pandas doesn't have a native time dtype")
295
+ if isinstance(self, NullType):
296
+ raise TypeError("pandas doesn't have a native null dtype")
297
+ if isinstance(self, List):
298
+ raise TypeError("pandas doesn't have a native list dtype")
299
+
300
+ if isinstance(self, Enum):
301
+ return pd.CategoricalDtype(self.categories)
256
302
 
257
303
  return {
258
304
  Int(): pd.Int64Dtype(), # we default to 64 bit
@@ -277,8 +323,12 @@ class Dtype:
277
323
  }[self]
278
324
 
279
325
  def to_arrow(self):
326
+ """Convert this Dtype to a PyArrow type."""
280
327
  import pyarrow as pa
281
328
 
329
+ if isinstance(self, Enum):
330
+ return pa.string()
331
+
282
332
  return {
283
333
  Int(): pa.int64(), # we default to 64 bit
284
334
  Int8(): pa.int8(),
@@ -299,9 +349,11 @@ class Dtype:
299
349
  Time(): pa.time64("us"),
300
350
  Datetime(): pa.timestamp("us"),
301
351
  Duration(): pa.duration("us"),
352
+ NullType(): pa.null(),
302
353
  }[self]
303
354
 
304
355
  def to_polars(self: "Dtype"):
356
+ """Convert this Dtype to a Polars type."""
305
357
  import polars as pl
306
358
 
307
359
  return {
@@ -416,3 +468,41 @@ class List(Dtype):
416
468
  import polars as pl
417
469
 
418
470
  return pl.List(self.inner.to_polars())
471
+
472
+ def to_arrow(self):
473
+ import pyarrow as pa
474
+
475
+ return pa.list_(self.inner.to_arrow())
476
+
477
+
478
+ class Enum(String):
479
+ def __init__(self, *categories: str):
480
+ if not all(isinstance(c, str) for c in categories):
481
+ raise TypeError("arguments for `Enum` must have type `str`")
482
+ self.categories = list(categories)
483
+
484
+ def __eq__(self, rhs):
485
+ return isinstance(rhs, Enum) and self.categories == rhs.categories
486
+
487
+ def __repr__(self) -> str:
488
+ return f"Enum[{', '.join(repr(c) for c in self.categories)}]"
489
+
490
+ def __hash__(self):
491
+ return hash(tuple(self.categories))
492
+
493
+ def to_polars(self):
494
+ import polars as pl
495
+
496
+ return pl.Enum(self.categories)
497
+
498
+ def to_sql(self):
499
+ import sqlalchemy as sqa
500
+
501
+ return sqa.String()
502
+
503
+ def to_arrow(self):
504
+ import pyarrow as pa
505
+
506
+ # There is also pa.dictionary(), which seems to be kind of similar to an enum.
507
+ # Maybe it is better to convert to this.
508
+ return pa.string()
@@ -0,0 +1,14 @@
1
+ # Copyright (c) QuantCo and pydiverse contributors 2025-2025
2
+ # SPDX-License-Identifier: BSD-3-Clause
3
+
4
+ import inspect
5
+
6
+ import pydiverse.common.dtypes as dtypes
7
+
8
+ ALL_TYPES = [
9
+ getattr(dtypes, c)
10
+ for c in dir(dtypes)
11
+ if inspect.isclass(getattr(dtypes, c))
12
+ and issubclass(getattr(dtypes, c), dtypes.Dtype)
13
+ and c != "Dtype"
14
+ ]
@@ -0,0 +1,10 @@
1
+ # Copyright (c) QuantCo and pydiverse contributors 2025-2025
2
+ # SPDX-License-Identifier: BSD-3-Clause
3
+
4
+ from importlib.metadata import PackageNotFoundError, version
5
+
6
+ try:
7
+ __version__ = version(__package__ or __name__)
8
+ except PackageNotFoundError:
9
+ # Running from a Git checkout or an editable install
10
+ __version__ = "0.0.0+dev"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydiverse-common
3
- Version: 0.3.1
3
+ Version: 0.3.5
4
4
  Summary: Common functionality shared between pydiverse libraries
5
5
  Author: QuantCo, Inc.
6
6
  Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
@@ -47,7 +47,8 @@ Classifier: Programming Language :: SQL
47
47
  Classifier: Topic :: Database
48
48
  Classifier: Topic :: Scientific/Engineering
49
49
  Classifier: Topic :: Software Development
50
- Requires-Python: >=3.10
50
+ Requires-Python: <3.14,>=3.10
51
+ Requires-Dist: python-box<8,>=7.3.2
51
52
  Description-Content-Type: text/markdown
52
53
 
53
54
  # pydiverse.common
@@ -1,5 +1,7 @@
1
- pydiverse/common/__init__.py,sha256=2t3wnZGY_KITo5lysb-GAgXizeyd_iZvPjEHKnMsYpA,732
2
- pydiverse/common/dtypes.py,sha256=OtxOLnA5HIX9OandrrSgKS8bzERnntLRvQF-hSzvlbU,12509
1
+ pydiverse/common/__init__.py,sha256=J7b4iStFyaEMYre_jdlZ4l_8dLyrMWCIpQdsMQcB8aI,806
2
+ pydiverse/common/dtypes.py,sha256=Twna-Esfy0-4wzIxX37C1CzPTJz8MnesV_LNegndKzE,15787
3
+ pydiverse/common/testing.py,sha256=FcivI5wn0X3gzJhwnysKvCOgjSTTXaN6FtSFJ72jfSg,341
4
+ pydiverse/common/version.py,sha256=1IU_m4r76_Qq0u-Tyo2_bERZFOkh0ZFueVzDqcCfLO0,336
3
5
  pydiverse/common/errors/__init__.py,sha256=FNeEfVbUa23b9sHkFsmxHYhY6sRgjaZysPQmlovpJrI,262
4
6
  pydiverse/common/util/__init__.py,sha256=fGdKZtLaTVBW7NfCpX7rZhKHwUzmBnsuY2akDOnAnjc,315
5
7
  pydiverse/common/util/computation_tracing.py,sha256=HeXRHRUI8vxpzQ27Xcpa0StndSTP63EMT9vj4trPJUY,9697
@@ -9,7 +11,7 @@ pydiverse/common/util/disposable.py,sha256=4XoGz70YRWA9TAqnUBvRCTAdsOGBviFN0gzxU
9
11
  pydiverse/common/util/hashing.py,sha256=6x77BKg-w61u59fuTe9di0BtU-kEKH6UTRcKsRoYJ84,1196
10
12
  pydiverse/common/util/import_.py,sha256=K7dSgz4YyrqEvqhoOzbwgD7D8HScMoO5XoSWtjbaoUs,4056
11
13
  pydiverse/common/util/structlog.py,sha256=g0d8yaXBzAxmGNGZYMnMP9dsSQ__jN44GAY8Mb0ABeI,3487
12
- pydiverse_common-0.3.1.dist-info/METADATA,sha256=Ul8A6lt1G9b05z5khak8nhekPdjTcQsaZxmoReueJXU,3357
13
- pydiverse_common-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- pydiverse_common-0.3.1.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
15
- pydiverse_common-0.3.1.dist-info/RECORD,,
14
+ pydiverse_common-0.3.5.dist-info/METADATA,sha256=E4Z7zT2kwRtIlGetzPlxFf5Oj5-b037lAkLikUbNgKk,3399
15
+ pydiverse_common-0.3.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ pydiverse_common-0.3.5.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
17
+ pydiverse_common-0.3.5.dist-info/RECORD,,