pydiverse-common 0.3.2__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/PKG-INFO +1 -1
  2. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/docs/source/changelog.md +4 -0
  3. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/docs/source/reference/api.rst +23 -0
  4. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/pyproject.toml +3 -4
  5. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/dtypes.py +53 -6
  6. pydiverse_common-0.3.3/src/pydiverse/common/testing.py +14 -0
  7. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/tests/dtypes/test_dtype_pandas.py +48 -0
  8. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/tests/dtypes/test_dtype_polars.py +22 -4
  9. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/tests/dtypes/test_dtype_pyarrow.py +18 -0
  10. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/tests/dtypes/test_dtype_sqlalchemy.py +29 -0
  11. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.gitattributes +0 -0
  12. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.github/CODEOWNERS +0 -0
  13. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  14. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.github/dependabot.yml +0 -0
  15. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.github/workflows/release.yml +0 -0
  16. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.github/workflows/tests.yml +0 -0
  17. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.github/workflows/update-lockfiles.yml +0 -0
  18. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.gitignore +0 -0
  19. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.pre-commit-config.yaml +0 -0
  20. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/.readthedocs.yaml +0 -0
  21. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/LICENSE +0 -0
  22. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/README.md +0 -0
  23. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/docs/Makefile +0 -0
  24. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/docs/make.bat +0 -0
  25. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/docs/package/README.md +0 -0
  26. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/docs/source/conf.py +0 -0
  27. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/docs/source/index.md +0 -0
  28. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/docs/source/license.md +0 -0
  29. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/pixi.lock +0 -0
  30. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/pixi.toml +0 -0
  31. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/pytest.ini +0 -0
  32. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/__init__.py +0 -0
  33. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/errors/__init__.py +0 -0
  34. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/util/__init__.py +0 -0
  35. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/util/computation_tracing.py +0 -0
  36. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/util/deep_map.py +0 -0
  37. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/util/deep_merge.py +0 -0
  38. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/util/disposable.py +0 -0
  39. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/util/hashing.py +0 -0
  40. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/util/import_.py +0 -0
  41. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/util/structlog.py +0 -0
  42. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/src/pydiverse/common/version.py +0 -0
  43. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/tests/conftest.py +0 -0
  44. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/tests/test_util.py +0 -0
  45. {pydiverse_common-0.3.2 → pydiverse_common-0.3.3}/tests/test_version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydiverse-common
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: Common functionality shared between pydiverse libraries
5
5
  Author: QuantCo, Inc.
6
6
  Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.3 (2025-06-08)
4
+ - improved support of None and List type
5
+ - bug fixes in type conversion functions
6
+
3
7
  ## 0.3.2 (2025-06-08)
4
8
  - pydiverse.common.__version__ (implemented via importlib.metadata)
5
9
 
@@ -8,25 +8,48 @@ Public
8
8
  .. py:module:: pydiverse.common
9
9
 
10
10
  .. autoclass:: Dtype
11
+ :members:
11
12
  .. autoclass:: Bool
13
+ :members:
12
14
  .. autoclass:: Date
15
+ :members:
13
16
  .. autoclass:: Datetime
17
+ :members:
14
18
  .. autoclass:: Decimal
19
+ :members:
15
20
  .. autoclass:: Duration
21
+ :members:
16
22
  .. autoclass:: Float
23
+ :members:
17
24
  .. autoclass:: Float32
25
+ :members:
18
26
  .. autoclass:: Float64
27
+ :members:
19
28
  .. autoclass:: Int
29
+ :members:
20
30
  .. autoclass:: Int8
31
+ :members:
21
32
  .. autoclass:: Int16
33
+ :members:
22
34
  .. autoclass:: Int32
35
+ :members:
23
36
  .. autoclass:: Int64
37
+ :members:
24
38
  .. autoclass:: NullType
39
+ :members:
25
40
  .. autoclass:: String
41
+ :members:
26
42
  .. autoclass:: Time
43
+ :members:
27
44
  .. autoclass:: UInt8
45
+ :members:
28
46
  .. autoclass:: UInt16
47
+ :members:
29
48
  .. autoclass:: UInt32
49
+ :members:
30
50
  .. autoclass:: UInt64
51
+ :members:
31
52
  .. autoclass:: List
53
+ :members:
32
54
  .. autoclass:: PandasBackend
55
+ :members:
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pydiverse-common"
3
- version = "0.3.2"
3
+ version = "0.3.3"
4
4
  description = "Common functionality shared between pydiverse libraries"
5
5
  authors = [
6
6
  { name = "QuantCo, Inc." },
@@ -29,14 +29,13 @@ classifiers = [
29
29
  packages = ["src/pydiverse"]
30
30
 
31
31
  [tool.ruff]
32
- fix = true
33
32
  target-version = "py310"
34
33
  extend-exclude = ["docs/*"]
34
+ fix = true
35
35
 
36
36
  [tool.ruff.lint]
37
37
  select = ["F", "E", "UP", "W", "I001", "I002", "B", "A"]
38
- ignore = ["B028"]
39
- # 'ignore-init-module-imports' is deprecated and removed per warning
38
+ #ignore = ["B028"]
40
39
 
41
40
  [tool.ruff.lint.per-file-ignores]
42
41
  #"__init__.py" = ["F401", "F403"]
@@ -12,24 +12,33 @@ class Dtype:
12
12
  """Base class for all data types."""
13
13
 
14
14
  def __eq__(self, rhs):
15
+ """Return ``True`` if this dtype is equal to `rhs`."""
15
16
  return isinstance(rhs, Dtype) and type(self) is type(rhs)
16
17
 
17
18
  def __hash__(self):
19
+ """Return a hash for this dtype."""
18
20
  return hash(type(self))
19
21
 
20
22
  def __repr__(self):
23
+ """Return a string representation of this dtype."""
21
24
  return self.__class__.__name__
22
25
 
23
26
  @classmethod
24
27
  def is_int(cls):
28
+ """Return ``True`` if this dtype is an integer type."""
25
29
  return False
26
30
 
27
31
  @classmethod
28
32
  def is_float(cls):
33
+ """Return ``True`` if this dtype is a float type."""
29
34
  return False
30
35
 
31
36
  @classmethod
32
37
  def is_subtype(cls, rhs):
38
+ """Return ``True`` if this dtype is a subtype of `rhs`.
39
+
40
+ For example, ``Int8.is_subtype(Int())`` is ``True``.
41
+ """
33
42
  rhs_cls = type(rhs)
34
43
  return (
35
44
  (cls is rhs_cls)
@@ -39,6 +48,7 @@ class Dtype:
39
48
 
40
49
  @staticmethod
41
50
  def from_sql(sql_type) -> "Dtype":
51
+ """Convert a SQL type to a Dtype."""
42
52
  import sqlalchemy as sqa
43
53
 
44
54
  if isinstance(sql_type, sqa.SmallInteger):
@@ -73,14 +83,15 @@ class Dtype:
73
83
  if isinstance(sql_type, sqa.Interval):
74
84
  return Duration()
75
85
  if isinstance(sql_type, sqa.ARRAY):
76
- return List(Dtype.from_sql(sql_type.item_type.from_sql))
77
- if isinstance(sql_type, sqa.Null):
86
+ return List(Dtype.from_sql(sql_type.item_type))
87
+ if isinstance(sql_type, sqa.types.NullType):
78
88
  return NullType()
79
89
 
80
90
  raise TypeError
81
91
 
82
92
  @staticmethod
83
93
  def from_pandas(pandas_type) -> "Dtype":
94
+ """Convert a pandas type to a Dtype."""
84
95
  import numpy as np
85
96
  import pandas as pd
86
97
 
@@ -127,13 +138,14 @@ class Dtype:
127
138
  return Datetime()
128
139
  if pd.api.types.is_timedelta64_dtype(pandas_type):
129
140
  return Duration()
130
- # we don't know any decimal/time dtypes in pandas if column is not
141
+ # we don't know any decimal/time/null dtypes in pandas if column is not
131
142
  # arrow backed
132
143
 
133
144
  raise TypeError
134
145
 
135
146
  @staticmethod
136
147
  def from_arrow(arrow_type) -> "Dtype":
148
+ """Convert a PyArrow type to a Dtype."""
137
149
  import pyarrow as pa
138
150
 
139
151
  if pa.types.is_signed_integer(arrow_type):
@@ -179,10 +191,15 @@ class Dtype:
179
191
  return Time()
180
192
  if pa.types.is_duration(arrow_type):
181
193
  return Duration()
194
+ if pa.types.is_null(arrow_type):
195
+ return NullType()
196
+ if pa.types.is_list(arrow_type):
197
+ return List(Dtype.from_arrow(arrow_type.value_type))
182
198
  raise TypeError
183
199
 
184
200
  @staticmethod
185
201
  def from_polars(polars_type) -> "Dtype":
202
+ """Convert a Polars type to a Dtype."""
186
203
  import polars as pl
187
204
 
188
205
  if isinstance(polars_type, pl.List):
@@ -211,6 +228,7 @@ class Dtype:
211
228
  }[polars_type.base_type()]
212
229
 
213
230
  def to_sql(self):
231
+ """Convert this Dtype to a SQL type."""
214
232
  import sqlalchemy as sqa
215
233
 
216
234
  return {
@@ -237,6 +255,7 @@ class Dtype:
237
255
  }[self]
238
256
 
239
257
  def to_pandas(self, backend: PandasBackend = PandasBackend.ARROW):
258
+ """Convert this Dtype to a pandas type."""
240
259
  import pandas as pd
241
260
 
242
261
  if backend == PandasBackend.NUMPY:
@@ -247,12 +266,32 @@ class Dtype:
247
266
  return pd.ArrowDtype(self.to_arrow())
248
267
 
249
268
  def to_pandas_nullable(self, backend: PandasBackend = PandasBackend.ARROW):
269
+ """Convert this Dtype to a pandas nullable type.
270
+
271
+ Nullable can be either pandas extension types like StringDtype or ArrowDtype.
272
+
273
+ Parameters
274
+ ----------
275
+ backend : PandasBackend, optional
276
+ The pandas backend to use. Defaults to ``PandasBackend.ARROW``.
277
+ If ``PandasBackend.NUMPY`` is selected, this method will attempt
278
+ to return a NumPy-backed nullable pandas dtype. Note that
279
+ Time, NullType, and List will raise a TypeError for the
280
+ NUMPY backend as pandas doesn't have corresponding native
281
+ nullable dtypes for these.
282
+ """
250
283
  import pandas as pd
251
284
 
252
- if self == Time():
253
- if backend == PandasBackend.ARROW:
254
- return pd.ArrowDtype(self.to_arrow())
285
+ if backend == PandasBackend.ARROW:
286
+ return pd.ArrowDtype(self.to_arrow())
287
+
288
+ # we don't want to produce object columns
289
+ if isinstance(self, Time):
255
290
  raise TypeError("pandas doesn't have a native time dtype")
291
+ if isinstance(self, NullType):
292
+ raise TypeError("pandas doesn't have a native null dtype")
293
+ if isinstance(self, List):
294
+ raise TypeError("pandas doesn't have a native list dtype")
256
295
 
257
296
  return {
258
297
  Int(): pd.Int64Dtype(), # we default to 64 bit
@@ -277,6 +316,7 @@ class Dtype:
277
316
  }[self]
278
317
 
279
318
  def to_arrow(self):
319
+ """Convert this Dtype to a PyArrow type."""
280
320
  import pyarrow as pa
281
321
 
282
322
  return {
@@ -299,9 +339,11 @@ class Dtype:
299
339
  Time(): pa.time64("us"),
300
340
  Datetime(): pa.timestamp("us"),
301
341
  Duration(): pa.duration("us"),
342
+ NullType(): pa.null(),
302
343
  }[self]
303
344
 
304
345
  def to_polars(self: "Dtype"):
346
+ """Convert this Dtype to a Polars type."""
305
347
  import polars as pl
306
348
 
307
349
  return {
@@ -416,3 +458,8 @@ class List(Dtype):
416
458
  import polars as pl
417
459
 
418
460
  return pl.List(self.inner.to_polars())
461
+
462
+ def to_arrow(self):
463
+ import pyarrow as pa
464
+
465
+ return pa.list_(self.inner.to_arrow())
@@ -0,0 +1,14 @@
1
+ # Copyright (c) QuantCo and pydiverse contributors 2025-2025
2
+ # SPDX-License-Identifier: BSD-3-Clause
3
+
4
+ import inspect
5
+
6
+ import pydiverse.common.dtypes as dtypes
7
+
8
+ ALL_TYPES = [
9
+ getattr(dtypes, c)
10
+ for c in dir(dtypes)
11
+ if inspect.isclass(getattr(dtypes, c))
12
+ and issubclass(getattr(dtypes, c), dtypes.Dtype)
13
+ and c != "Dtype"
14
+ ]
@@ -5,6 +5,9 @@ import types
5
5
 
6
6
  import pytest
7
7
 
8
+ import pydiverse.common as pdc
9
+ from pydiverse.common.testing import ALL_TYPES
10
+
8
11
  try:
9
12
  import numpy as np
10
13
  import pandas as pd
@@ -141,3 +144,48 @@ def test_dtype_to_pandas_pyarrow():
141
144
  assert_conversion(Date(), pa.date32())
142
145
  assert_conversion(Time(), pa.time64("us"))
143
146
  assert_conversion(Datetime(), pa.timestamp("us"))
147
+
148
+
149
+ @pytest.mark.skipif(np is None, reason="requires pandas, numpy, and pyarrow")
150
+ @pytest.mark.parametrize(
151
+ "type_",
152
+ ALL_TYPES,
153
+ )
154
+ def test_all_types_numpy(type_):
155
+ if type_ is pdc.List:
156
+ type_obj = type_(pdc.Int64())
157
+ else:
158
+ type_obj = type_()
159
+ if type_ is pdc.NullType:
160
+ with pytest.raises(TypeError, match="pandas doesn't have a native null dtype"):
161
+ type_obj.to_pandas(PandasBackend.NUMPY)
162
+ elif type_ is pdc.Time:
163
+ with pytest.raises(TypeError, match="pandas doesn't have a native time dtype"):
164
+ type_obj.to_pandas(PandasBackend.NUMPY)
165
+ elif type_ is pdc.List:
166
+ with pytest.raises(TypeError, match="pandas doesn't have a native list dtype"):
167
+ type_obj.to_pandas(PandasBackend.NUMPY)
168
+ else:
169
+ dst_type = type_obj.to_pandas(PandasBackend.NUMPY)
170
+ back_type = Dtype.from_pandas(dst_type)
171
+ if type_ is pdc.Decimal:
172
+ assert isinstance(back_type, pdc.Float64)
173
+ elif type_ is pdc.Date:
174
+ assert isinstance(back_type, pdc.Datetime)
175
+ else:
176
+ assert isinstance(back_type, type_)
177
+
178
+
179
+ @pytest.mark.skipif(np is None, reason="requires pandas, numpy, and pyarrow")
180
+ @pytest.mark.parametrize(
181
+ "type_",
182
+ ALL_TYPES,
183
+ )
184
+ def test_all_types_arrow(type_):
185
+ if type_ is pdc.List:
186
+ type_obj = type_(pdc.Int64())
187
+ else:
188
+ type_obj = type_()
189
+ dst_type = type_obj.to_pandas(PandasBackend.ARROW)
190
+ back_type = Dtype.from_pandas(dst_type)
191
+ assert isinstance(back_type, type_)
@@ -1,9 +1,9 @@
1
1
  # Copyright (c) QuantCo and pydiverse contributors 2025-2025
2
2
  # SPDX-License-Identifier: BSD-3-Clause
3
- from typing import TYPE_CHECKING
4
3
 
5
4
  import pytest
6
5
 
6
+ import pydiverse.common as pdc
7
7
  from pydiverse.common import (
8
8
  Bool,
9
9
  Date,
@@ -22,13 +22,15 @@ from pydiverse.common import (
22
22
  UInt32,
23
23
  UInt64,
24
24
  )
25
+ from pydiverse.common.testing import ALL_TYPES
25
26
 
26
- pl = pytest.importorskip("polars")
27
-
28
- if TYPE_CHECKING:
27
+ try:
29
28
  import polars as pl
29
+ except ImportError:
30
+ pl = None
30
31
 
31
32
 
33
+ @pytest.mark.skipif(pl is None, reason="requires polars")
32
34
  def test_dtype_from_polars():
33
35
  def assert_conversion(type_, expected):
34
36
  assert Dtype.from_polars(type_) == expected
@@ -57,6 +59,7 @@ def test_dtype_from_polars():
57
59
  assert_conversion(pl.Datetime("ns"), Datetime())
58
60
 
59
61
 
62
+ @pytest.mark.skipif(pl is None, reason="requires polars")
60
63
  def test_dtype_to_polars():
61
64
  def assert_conversion(type_: Dtype, expected):
62
65
  assert type_.to_polars() == expected
@@ -80,3 +83,18 @@ def test_dtype_to_polars():
80
83
  assert_conversion(Date(), pl.Date)
81
84
  assert_conversion(Time(), pl.Time)
82
85
  assert_conversion(Datetime(), pl.Datetime("us"))
86
+
87
+
88
+ @pytest.mark.skipif(pl is None, reason="requires polars")
89
+ @pytest.mark.parametrize(
90
+ "type_",
91
+ ALL_TYPES,
92
+ )
93
+ def test_all_types(type_):
94
+ if type_ is pdc.List:
95
+ type_obj = type_(pdc.Int64())
96
+ else:
97
+ type_obj = type_()
98
+ dst_type = type_obj.to_polars()
99
+ back_type = Dtype.from_polars(dst_type)
100
+ assert isinstance(back_type, type_)
@@ -2,6 +2,9 @@
2
2
  # SPDX-License-Identifier: BSD-3-Clause
3
3
  import pytest
4
4
 
5
+ import pydiverse.common as pdc
6
+ from pydiverse.common.testing import ALL_TYPES
7
+
5
8
  try:
6
9
  import pyarrow as pa
7
10
  except ImportError:
@@ -87,3 +90,18 @@ def test_dtype_to_pyarrow():
87
90
  assert_conversion(Date(), pa.date32())
88
91
  assert_conversion(Time(), pa.time64("us"))
89
92
  assert_conversion(Datetime(), pa.timestamp("us"))
93
+
94
+
95
+ @pytest.mark.skipif(pa is None, reason="requires pandas, numpy, and pyarrow")
96
+ @pytest.mark.parametrize(
97
+ "type_",
98
+ ALL_TYPES,
99
+ )
100
+ def test_all_types(type_):
101
+ if type_ is pdc.List:
102
+ type_obj = type_(pdc.Int64())
103
+ else:
104
+ type_obj = type_()
105
+ dst_type = type_obj.to_arrow()
106
+ back_type = Dtype.from_arrow(dst_type)
107
+ assert isinstance(back_type, type_)
@@ -2,10 +2,12 @@
2
2
  # SPDX-License-Identifier: BSD-3-Clause
3
3
  import pytest
4
4
 
5
+ import pydiverse.common as pdc
5
6
  from pydiverse.common import (
6
7
  Bool,
7
8
  Date,
8
9
  Datetime,
10
+ Decimal,
9
11
  Dtype,
10
12
  Float32,
11
13
  Float64,
@@ -20,6 +22,7 @@ from pydiverse.common import (
20
22
  UInt32,
21
23
  UInt64,
22
24
  )
25
+ from pydiverse.common.testing import ALL_TYPES
23
26
 
24
27
  try:
25
28
  import sqlalchemy as sa
@@ -77,3 +80,29 @@ def test_dtype_to_sqlalchemy():
77
80
  assert_conversion(Date(), sa.Date)
78
81
  assert_conversion(Time(), sa.Time)
79
82
  assert_conversion(Datetime(), sa.DateTime)
83
+
84
+
85
+ @pytest.mark.skipif(sa is None, reason="requires polars")
86
+ @pytest.mark.parametrize(
87
+ "type_",
88
+ ALL_TYPES,
89
+ )
90
+ def test_all_types(type_):
91
+ if type_ is pdc.List:
92
+ type_obj = type_(pdc.Int64())
93
+ else:
94
+ type_obj = type_()
95
+ dst_type = type_obj.to_sql()
96
+ back_type = Dtype.from_sql(dst_type)
97
+ acceptance_map = {
98
+ # SQL is a bit less strict about integer precisions
99
+ Int8: Int16,
100
+ UInt8: Int16,
101
+ UInt16: Int32,
102
+ UInt32: Int64,
103
+ UInt64: Int64,
104
+ # we intentionally fetch Decimal as Float since Decimal is more a relational
105
+ # database thing
106
+ Decimal: Float64,
107
+ }
108
+ assert isinstance(back_type, acceptance_map.get(type_, type_))