pydiverse-common 0.3.7__tar.gz → 0.3.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/PKG-INFO +1 -1
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/docs/source/changelog.md +6 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/pyproject.toml +1 -1
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/dtypes.py +27 -3
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/util/hashing.py +16 -13
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/tests/dtypes/test_dtype_pyarrow.py +14 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/tests/test_util.py +11 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.gitattributes +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.github/CODEOWNERS +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.github/dependabot.yml +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.github/scripts/check_deps.sh +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.github/workflows/release.yml +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.github/workflows/tests.yml +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.github/workflows/update-lockfiles.yml +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.gitignore +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.pre-commit-config.yaml +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/.readthedocs.yaml +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/LICENSE +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/README.md +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/docs/Makefile +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/docs/make.bat +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/docs/package/README.md +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/docs/source/conf.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/docs/source/index.md +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/docs/source/license.md +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/docs/source/reference/api.rst +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/pixi.lock +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/pixi.toml +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/pytest.ini +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/__init__.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/errors/__init__.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/testing.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/util/__init__.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/util/computation_tracing.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/util/deep_map.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/util/deep_merge.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/util/disposable.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/util/import_.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/util/structlog.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/version.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/tests/conftest.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/tests/dtypes/test_dtype_pandas.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/tests/dtypes/test_dtype_polars.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/tests/dtypes/test_dtype_sqlalchemy.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/tests/test_version.py +0 -0
- {pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/typos.toml +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pydiverse-common
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.9
|
4
4
|
Summary: Common functionality shared between pydiverse libraries
|
5
5
|
Author: QuantCo, Inc.
|
6
6
|
Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
|
@@ -201,6 +201,11 @@ class Dtype:
|
|
201
201
|
return NullType()
|
202
202
|
if pa.types.is_list(arrow_type):
|
203
203
|
return List(Dtype.from_arrow(arrow_type.value_type))
|
204
|
+
if pa.types.is_dictionary(arrow_type):
|
205
|
+
raise RuntimeError(
|
206
|
+
"Most likely this is an Enum type. But metadata about categories is "
|
207
|
+
"only in the pyarrow field and not in the pyarrow dtype"
|
208
|
+
)
|
204
209
|
raise TypeError
|
205
210
|
|
206
211
|
@staticmethod
|
@@ -268,7 +273,7 @@ class Dtype:
|
|
268
273
|
if backend == PandasBackend.NUMPY:
|
269
274
|
return self.to_pandas_nullable(backend)
|
270
275
|
if backend == PandasBackend.ARROW:
|
271
|
-
if self == String():
|
276
|
+
if self == String() or isinstance(self, Enum):
|
272
277
|
return pd.StringDtype(storage="pyarrow")
|
273
278
|
return pd.ArrowDtype(self.to_arrow())
|
274
279
|
|
@@ -355,6 +360,12 @@ class Dtype:
|
|
355
360
|
NullType(): pa.null(),
|
356
361
|
}[self]
|
357
362
|
|
363
|
+
def to_arrow_field(self, name: str, nullable: bool = True):
|
364
|
+
"""Convert this Dtype to a PyArrow Field."""
|
365
|
+
import pyarrow as pa
|
366
|
+
|
367
|
+
return pa.field(name, self.to_arrow(), nullable=nullable)
|
368
|
+
|
358
369
|
def to_polars(self: "Dtype"):
|
359
370
|
"""Convert this Dtype to a Polars type."""
|
360
371
|
import polars as pl
|
@@ -506,6 +517,19 @@ class Enum(String):
|
|
506
517
|
def to_arrow(self):
|
507
518
|
import pyarrow as pa
|
508
519
|
|
509
|
-
#
|
510
|
-
# Maybe it is better to convert to this.
|
520
|
+
# enum categories can only be maintained in pyarrow field (see to_arrow_field)
|
511
521
|
return pa.string()
|
522
|
+
|
523
|
+
def to_arrow_field(self, name: str, nullable: bool = True):
|
524
|
+
"""Convert this Dtype to a PyArrow Field."""
|
525
|
+
import pyarrow as pa
|
526
|
+
|
527
|
+
# try to mimic what polars does
|
528
|
+
return pa.field(
|
529
|
+
name,
|
530
|
+
pa.dictionary(pa.uint32(), pa.large_string()),
|
531
|
+
nullable=nullable,
|
532
|
+
metadata={
|
533
|
+
"_PL_ENUM_VALUES": "".join([f"{len(c)};{c}" for c in self.categories])
|
534
|
+
},
|
535
|
+
)
|
@@ -73,24 +73,27 @@ def hash_polars_dataframe(df: pl.DataFrame, use_init_repr=False) -> str:
|
|
73
73
|
list_columns = [
|
74
74
|
col for col, dtype in df.schema.items() if dtype == pl.List
|
75
75
|
]
|
76
|
-
|
77
|
-
|
78
|
-
.with_columns(pl.col(array_columns).reshape([-1]).implode())
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
)
|
76
|
+
lf = df.lazy()
|
77
|
+
if array_columns:
|
78
|
+
lf = lf.with_columns(pl.col(array_columns).reshape([-1]).implode())
|
79
|
+
lf = lf.with_columns(
|
80
|
+
# Necessary because hash() does not work on lists of strings.
|
81
|
+
# This can be removed when
|
82
|
+
# https://github.com/pola-rs/polars/issues/21523 is resolved
|
83
|
+
# in all supported versions of polars.
|
84
|
+
pl.selectors.by_dtype(pl.List(pl.String)).list.eval(
|
85
|
+
pl.element().hash()
|
87
86
|
)
|
87
|
+
)
|
88
|
+
if list_columns or array_columns:
|
88
89
|
# Necessary because hash_rows() does not work on lists.
|
89
90
|
# This can be removed when
|
90
91
|
# https://github.com/pola-rs/polars/issues/24121 is resolved
|
91
92
|
# in all supported versions of polars.
|
92
|
-
.with_columns(pl.col(*list_columns, *array_columns).hash())
|
93
|
-
|
93
|
+
lf = lf.with_columns(pl.col(*list_columns, *array_columns).hash())
|
94
|
+
|
95
|
+
content_hash = str(
|
96
|
+
lf.collect()
|
94
97
|
.hash_rows() # We get a Series of hashes, one for each row
|
95
98
|
# Since polars only hashes rows, we need to implode the Series into
|
96
99
|
# a single row to get a single hash
|
@@ -95,6 +95,20 @@ def test_dtype_to_pyarrow():
|
|
95
95
|
assert_conversion(Datetime(), pa.timestamp("us"))
|
96
96
|
|
97
97
|
|
98
|
+
@pytest.mark.skipif(pa is None, reason="requires pyarrow")
|
99
|
+
def test_dtype_to_pyarrow_enum():
|
100
|
+
import polars as pl
|
101
|
+
|
102
|
+
def assert_conversion(type_: Dtype, expected_dtype):
|
103
|
+
df = pl.DataFrame(dict(x=["a"]), schema=dict(x=expected_dtype))
|
104
|
+
expected = df.to_arrow().schema
|
105
|
+
actual = pa.schema([type_.to_arrow_field("x", nullable=True)])
|
106
|
+
assert actual == expected
|
107
|
+
assert actual.field(0).metadata == expected.field(0).metadata
|
108
|
+
|
109
|
+
assert_conversion(Enum("a", "b;c"), pl.Enum(["a", "b;c"]))
|
110
|
+
|
111
|
+
|
98
112
|
@pytest.mark.skipif(pa is None, reason="requires pyarrow")
|
99
113
|
@pytest.mark.parametrize(
|
100
114
|
"type_",
|
@@ -142,6 +142,17 @@ def check_df_hashes(df1_a: pl.DataFrame, other_dfs: list[pl.DataFrame]) -> None:
|
|
142
142
|
) == hash_polars_dataframe(df1_other, use_init_repr=True)
|
143
143
|
|
144
144
|
|
145
|
+
@pytest.mark.skipif(pl.DataFrame is None, reason="requires polars")
|
146
|
+
def test_hashing_basic():
|
147
|
+
df1_a = pl.DataFrame(dict(x=[1]))
|
148
|
+
df1_b = pl.DataFrame(dict(y=[1]))
|
149
|
+
df1_c = pl.DataFrame(dict(x=[2]))
|
150
|
+
df1_d = pl.DataFrame(dict(x=[1.0]))
|
151
|
+
df1_e = pl.DataFrame(dict(x=[]))
|
152
|
+
|
153
|
+
check_df_hashes(df1_a, [df1_b, df1_c, df1_d, df1_e])
|
154
|
+
|
155
|
+
|
145
156
|
@pytest.mark.skipif(pl.DataFrame is None, reason="requires polars")
|
146
157
|
def test_hashing():
|
147
158
|
df1_a = pl.DataFrame(
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{pydiverse_common-0.3.7 → pydiverse_common-0.3.9}/src/pydiverse/common/util/computation_tracing.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|