pydiverse-common 0.3.7__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydiverse/common/util/hashing.py +16 -13
- {pydiverse_common-0.3.7.dist-info → pydiverse_common-0.3.8.dist-info}/METADATA +1 -1
- {pydiverse_common-0.3.7.dist-info → pydiverse_common-0.3.8.dist-info}/RECORD +5 -5
- {pydiverse_common-0.3.7.dist-info → pydiverse_common-0.3.8.dist-info}/WHEEL +0 -0
- {pydiverse_common-0.3.7.dist-info → pydiverse_common-0.3.8.dist-info}/licenses/LICENSE +0 -0
pydiverse/common/util/hashing.py
CHANGED
@@ -73,24 +73,27 @@ def hash_polars_dataframe(df: pl.DataFrame, use_init_repr=False) -> str:
|
|
73
73
|
list_columns = [
|
74
74
|
col for col, dtype in df.schema.items() if dtype == pl.List
|
75
75
|
]
|
76
|
-
|
77
|
-
|
78
|
-
.with_columns(pl.col(array_columns).reshape([-1]).implode())
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
)
|
76
|
+
lf = df.lazy()
|
77
|
+
if array_columns:
|
78
|
+
lf = lf.with_columns(pl.col(array_columns).reshape([-1]).implode())
|
79
|
+
lf = lf.with_columns(
|
80
|
+
# Necessary because hash() does not work on lists of strings.
|
81
|
+
# This can be removed when
|
82
|
+
# https://github.com/pola-rs/polars/issues/21523 is resolved
|
83
|
+
# in all supported versions of polars.
|
84
|
+
pl.selectors.by_dtype(pl.List(pl.String)).list.eval(
|
85
|
+
pl.element().hash()
|
87
86
|
)
|
87
|
+
)
|
88
|
+
if list_columns or array_columns:
|
88
89
|
# Necessary because hash_rows() does not work on lists.
|
89
90
|
# This can be removed when
|
90
91
|
# https://github.com/pola-rs/polars/issues/24121 is resolved
|
91
92
|
# in all supported versions of polars.
|
92
|
-
.with_columns(pl.col(*list_columns, *array_columns).hash())
|
93
|
-
|
93
|
+
lf = lf.with_columns(pl.col(*list_columns, *array_columns).hash())
|
94
|
+
|
95
|
+
content_hash = str(
|
96
|
+
lf.collect()
|
94
97
|
.hash_rows() # We get a Series of hashes, one for each row
|
95
98
|
# Since polars only hashes rows, we need to implode the Series into
|
96
99
|
# a single row to get a single hash
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pydiverse-common
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.8
|
4
4
|
Summary: Common functionality shared between pydiverse libraries
|
5
5
|
Author: QuantCo, Inc.
|
6
6
|
Author-email: Martin Trautmann <windiana@users.sf.net>, Finn Rudolph <finn.rudolph@t-online.de>
|
@@ -8,10 +8,10 @@ pydiverse/common/util/computation_tracing.py,sha256=HeXRHRUI8vxpzQ27Xcpa0StndSTP
|
|
8
8
|
pydiverse/common/util/deep_map.py,sha256=JtY5ViWMMelOiLzPF7ZjzruCfB-bETISGxCk37qETxg,2540
|
9
9
|
pydiverse/common/util/deep_merge.py,sha256=bV5p5_lsC-9nFah28EiEyG2h6U3Z5AuTqSooxOgCHN0,1929
|
10
10
|
pydiverse/common/util/disposable.py,sha256=4XoGz70YRWA9TAqnUBvRCTAdsOGBviFN0gzxU7veY9o,993
|
11
|
-
pydiverse/common/util/hashing.py,sha256=
|
11
|
+
pydiverse/common/util/hashing.py,sha256=8Z1NybJ_zd3ONpn5annHGjowwArWkd2ZkCtlb3dtz_Q,4576
|
12
12
|
pydiverse/common/util/import_.py,sha256=K7dSgz4YyrqEvqhoOzbwgD7D8HScMoO5XoSWtjbaoUs,4056
|
13
13
|
pydiverse/common/util/structlog.py,sha256=xxhauxMuyxcKXTVg1MiPTkuvPBj8Zcr4o_v8Bq59Nig,3778
|
14
|
-
pydiverse_common-0.3.
|
15
|
-
pydiverse_common-0.3.
|
16
|
-
pydiverse_common-0.3.
|
17
|
-
pydiverse_common-0.3.
|
14
|
+
pydiverse_common-0.3.8.dist-info/METADATA,sha256=ptAGp299BY9NSaM-XEaojLzhL_KVc0SEY-MFqqqAwL0,3399
|
15
|
+
pydiverse_common-0.3.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
16
|
+
pydiverse_common-0.3.8.dist-info/licenses/LICENSE,sha256=AcE6SDVuAq6v9ZLE_8eOCe_NvSE0rAPR3NR7lSowYh4,1517
|
17
|
+
pydiverse_common-0.3.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|