cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -1
- cudf_polars/VERSION +1 -1
- cudf_polars/callback.py +60 -15
- cudf_polars/containers/column.py +137 -77
- cudf_polars/containers/dataframe.py +123 -34
- cudf_polars/containers/datatype.py +134 -13
- cudf_polars/dsl/expr.py +0 -2
- cudf_polars/dsl/expressions/aggregation.py +80 -28
- cudf_polars/dsl/expressions/binaryop.py +34 -14
- cudf_polars/dsl/expressions/boolean.py +110 -37
- cudf_polars/dsl/expressions/datetime.py +59 -30
- cudf_polars/dsl/expressions/literal.py +11 -5
- cudf_polars/dsl/expressions/rolling.py +460 -119
- cudf_polars/dsl/expressions/selection.py +9 -8
- cudf_polars/dsl/expressions/slicing.py +1 -1
- cudf_polars/dsl/expressions/string.py +256 -114
- cudf_polars/dsl/expressions/struct.py +19 -7
- cudf_polars/dsl/expressions/ternary.py +33 -3
- cudf_polars/dsl/expressions/unary.py +126 -64
- cudf_polars/dsl/ir.py +1053 -350
- cudf_polars/dsl/to_ast.py +30 -13
- cudf_polars/dsl/tracing.py +194 -0
- cudf_polars/dsl/translate.py +307 -107
- cudf_polars/dsl/utils/aggregations.py +43 -30
- cudf_polars/dsl/utils/reshape.py +14 -2
- cudf_polars/dsl/utils/rolling.py +12 -8
- cudf_polars/dsl/utils/windows.py +35 -20
- cudf_polars/experimental/base.py +55 -2
- cudf_polars/experimental/benchmarks/pdsds.py +12 -126
- cudf_polars/experimental/benchmarks/pdsh.py +792 -2
- cudf_polars/experimental/benchmarks/utils.py +596 -39
- cudf_polars/experimental/dask_registers.py +47 -20
- cudf_polars/experimental/dispatch.py +9 -3
- cudf_polars/experimental/distinct.py +2 -0
- cudf_polars/experimental/explain.py +15 -2
- cudf_polars/experimental/expressions.py +30 -15
- cudf_polars/experimental/groupby.py +25 -4
- cudf_polars/experimental/io.py +156 -124
- cudf_polars/experimental/join.py +53 -23
- cudf_polars/experimental/parallel.py +68 -19
- cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
- cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
- cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
- cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
- cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
- cudf_polars/experimental/rapidsmpf/core.py +488 -0
- cudf_polars/experimental/rapidsmpf/dask.py +172 -0
- cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
- cudf_polars/experimental/rapidsmpf/io.py +696 -0
- cudf_polars/experimental/rapidsmpf/join.py +322 -0
- cudf_polars/experimental/rapidsmpf/lower.py +74 -0
- cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
- cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
- cudf_polars/experimental/rapidsmpf/union.py +115 -0
- cudf_polars/experimental/rapidsmpf/utils.py +374 -0
- cudf_polars/experimental/repartition.py +9 -2
- cudf_polars/experimental/select.py +177 -14
- cudf_polars/experimental/shuffle.py +46 -12
- cudf_polars/experimental/sort.py +100 -26
- cudf_polars/experimental/spilling.py +1 -1
- cudf_polars/experimental/statistics.py +24 -5
- cudf_polars/experimental/utils.py +25 -7
- cudf_polars/testing/asserts.py +13 -8
- cudf_polars/testing/io.py +2 -1
- cudf_polars/testing/plugin.py +93 -17
- cudf_polars/typing/__init__.py +86 -32
- cudf_polars/utils/config.py +473 -58
- cudf_polars/utils/cuda_stream.py +70 -0
- cudf_polars/utils/versions.py +5 -4
- cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
- cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
- cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
- cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
cudf_polars/testing/plugin.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024-
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
"""Plugin for running polars test suite setting GPU engine as default."""
|
|
@@ -12,8 +12,12 @@ import pytest
|
|
|
12
12
|
|
|
13
13
|
import polars
|
|
14
14
|
|
|
15
|
+
from cudf_polars.utils.config import StreamingFallbackMode
|
|
16
|
+
from cudf_polars.utils.versions import POLARS_VERSION_LT_135
|
|
17
|
+
|
|
15
18
|
if TYPE_CHECKING:
|
|
16
19
|
from collections.abc import Mapping
|
|
20
|
+
from typing import Any
|
|
17
21
|
|
|
18
22
|
|
|
19
23
|
def pytest_addoption(parser: pytest.Parser) -> None:
|
|
@@ -26,17 +30,50 @@ def pytest_addoption(parser: pytest.Parser) -> None:
|
|
|
26
30
|
action="store_true",
|
|
27
31
|
help="Turn off fallback to CPU when running tests (default use fallback)",
|
|
28
32
|
)
|
|
33
|
+
group.addoption(
|
|
34
|
+
"--executor",
|
|
35
|
+
action="store",
|
|
36
|
+
default="in-memory",
|
|
37
|
+
choices=("in-memory", "streaming"),
|
|
38
|
+
help="Executor to use for GPUEngine.",
|
|
39
|
+
)
|
|
40
|
+
group.addoption(
|
|
41
|
+
"--blocksize-mode",
|
|
42
|
+
action="store",
|
|
43
|
+
default="default",
|
|
44
|
+
choices=("small", "default"),
|
|
45
|
+
help=(
|
|
46
|
+
"Blocksize to use for 'streaming' executor. Set to 'small' "
|
|
47
|
+
"to run most tests with multiple partitions."
|
|
48
|
+
),
|
|
49
|
+
)
|
|
29
50
|
|
|
30
51
|
|
|
31
52
|
def pytest_configure(config: pytest.Config) -> None:
|
|
32
53
|
"""Enable use of this module as a pytest plugin to enable GPU collection."""
|
|
33
54
|
no_fallback = config.getoption("--cudf-polars-no-fallback")
|
|
55
|
+
executor = config.getoption("--executor")
|
|
56
|
+
blocksize_mode = config.getoption("--blocksize-mode")
|
|
34
57
|
if no_fallback:
|
|
35
58
|
collect = polars.LazyFrame.collect
|
|
36
59
|
engine = polars.GPUEngine(raise_on_fail=no_fallback)
|
|
37
60
|
# https://github.com/python/mypy/issues/2427
|
|
38
|
-
polars.LazyFrame.collect = partialmethod(collect, engine=engine) # type: ignore[method-assign,assignment]
|
|
61
|
+
polars.LazyFrame.collect = partialmethod(collect, engine=engine) # type: ignore[method-assign, assignment]
|
|
62
|
+
elif executor == "in-memory":
|
|
63
|
+
collect = polars.LazyFrame.collect
|
|
64
|
+
engine = polars.GPUEngine(executor=executor)
|
|
65
|
+
polars.LazyFrame.collect = partialmethod(collect, engine=engine) # type: ignore[method-assign, assignment]
|
|
66
|
+
elif executor == "streaming" and blocksize_mode == "small":
|
|
67
|
+
executor_options: dict[str, Any] = {}
|
|
68
|
+
executor_options["max_rows_per_partition"] = 4
|
|
69
|
+
executor_options["target_partition_size"] = 10
|
|
70
|
+
# We expect many tests to fall back, so silence the warnings
|
|
71
|
+
executor_options["fallback_mode"] = StreamingFallbackMode.SILENT
|
|
72
|
+
collect = polars.LazyFrame.collect
|
|
73
|
+
engine = polars.GPUEngine(executor=executor, executor_options=executor_options)
|
|
74
|
+
polars.LazyFrame.collect = partialmethod(collect, engine=engine) # type: ignore[method-assign, assignment]
|
|
39
75
|
else:
|
|
76
|
+
# run with streaming executor and default blocksize
|
|
40
77
|
polars.Config.set_engine_affinity("gpu")
|
|
41
78
|
config.addinivalue_line(
|
|
42
79
|
"filterwarnings",
|
|
@@ -57,10 +94,13 @@ EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
|
|
|
57
94
|
"tests/unit/io/test_delta.py::test_scan_delta_schema_evolution_nested_struct_field_19915": "Need to expose hive partitioning",
|
|
58
95
|
"tests/unit/io/test_delta.py::test_scan_delta_nanosecond_timestamp": "polars generates the wrong schema: https://github.com/pola-rs/polars/issues/23949",
|
|
59
96
|
"tests/unit/io/test_delta.py::test_scan_delta_nanosecond_timestamp_nested": "polars generates the wrong schema: https://github.com/pola-rs/polars/issues/23949",
|
|
97
|
+
"tests/unit/io/test_delta.py::test_scan_delta_loads_aws_profile_endpoint_url": (
|
|
98
|
+
"See https://github.com/rapidsai/cudf/pull/20791#issuecomment-3750528419",
|
|
99
|
+
not POLARS_VERSION_LT_135,
|
|
100
|
+
),
|
|
60
101
|
"tests/unit/io/test_lazy_count_star.py::test_count_compressed_csv_18057": "Need to determine if file is compressed",
|
|
61
102
|
"tests/unit/io/test_lazy_count_star.py::test_count_parquet[small.parquet-4]": "Debug output on stderr doesn't match",
|
|
62
103
|
"tests/unit/io/test_lazy_count_star.py::test_count_parquet[foods*.parquet-54]": "Debug output on stderr doesn't match",
|
|
63
|
-
"tests/unit/io/test_lazy_csv.py::test_scan_csv_slice_offset_zero": "Integer overflow in sliced read",
|
|
64
104
|
"tests/unit/io/test_lazy_parquet.py::test_parquet_is_in_statistics": "Debug output on stderr doesn't match",
|
|
65
105
|
"tests/unit/io/test_lazy_parquet.py::test_parquet_statistics": "Debug output on stderr doesn't match",
|
|
66
106
|
"tests/unit/io/test_partition.py::test_partition_to_memory[io_type0]": "partition sinks not yet supported in standard engine.",
|
|
@@ -112,17 +152,12 @@ EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
|
|
|
112
152
|
"tests/unit/io/test_parquet.py::test_allow_missing_columns[projection0-True-columns]": "Mismatching column read cudf#16394",
|
|
113
153
|
"tests/unit/io/test_parquet.py::test_allow_missing_columns[projection1-True-columns]": "Mismatching column read cudf#16394",
|
|
114
154
|
"tests/unit/io/test_parquet.py::test_scan_parquet_filter_statistics_load_missing_column_21391": "Mismatching column read cudf#16394",
|
|
115
|
-
"tests/unit/io/test_parquet.py::
|
|
116
|
-
"tests/unit/io/test_parquet_field_overwrites.py::test_required_flat": "cannot serialize in-memory sink target.",
|
|
117
|
-
"tests/unit/io/test_parquet_field_overwrites.py::test_required_list[dtype0]": "cannot serialize in-memory sink target.",
|
|
118
|
-
"tests/unit/io/test_parquet_field_overwrites.py::test_required_list[dtype1]": "cannot serialize in-memory sink target.",
|
|
119
|
-
"tests/unit/io/test_parquet_field_overwrites.py::test_required_struct": "cannot serialize in-memory sink target.",
|
|
155
|
+
"tests/unit/io/test_parquet.py::test_binary_offset_roundtrip": "binary offset type unsupported",
|
|
120
156
|
"tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[gpu]": "Expect this to pass because cudf-polars is installed",
|
|
121
157
|
"tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[engine1]": "Expect this to pass because cudf-polars is installed",
|
|
122
158
|
"tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly",
|
|
123
159
|
"tests/unit/lazyframe/test_lazyframe.py::test_cast_frame": "Casting that raises not supported on GPU",
|
|
124
160
|
"tests/unit/lazyframe/test_lazyframe.py::test_lazy_cache_hit": "Debug output on stderr doesn't match",
|
|
125
|
-
"tests/unit/lazyframe/test_collect_schema.py::test_collect_schema_parametric": "polars returns decimal column with precision=None",
|
|
126
161
|
"tests/unit/operations/aggregation/test_aggregations.py::test_binary_op_agg_context_no_simplify_expr_12423": "groupby-agg of just literals should not produce collect_list",
|
|
127
162
|
"tests/unit/operations/aggregation/test_aggregations.py::test_nan_inf_aggregation": "treatment of nans and nulls together is different in libcudf and polars in groupby-agg context",
|
|
128
163
|
"tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",
|
|
@@ -143,24 +178,26 @@ EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
|
|
|
143
178
|
"tests/unit/operations/test_group_by.py::test_group_by_series_lit_22103[False]": "Incorrect broadcasting of literals in groupby-agg",
|
|
144
179
|
"tests/unit/operations/test_group_by.py::test_group_by_series_lit_22103[True]": "Incorrect broadcasting of literals in groupby-agg",
|
|
145
180
|
"tests/unit/operations/test_join.py::test_cross_join_slice_pushdown": "Need to implement slice pushdown for cross joins",
|
|
146
|
-
|
|
181
|
+
# We match the behavior of the polars[cpu] streaming engine (it makes doesn't make any ordering guarantees either when maintain_order is none).
|
|
182
|
+
# But this test does because the test is run with the polars[cpu] in-memory engine, which still preserves the order of the left dataframe
|
|
183
|
+
# when maintain order is none.
|
|
184
|
+
"tests/unit/operations/test_join.py::test_join_preserve_order_left": "polars[gpu] makes no ordering guarantees when maintain_order is none",
|
|
185
|
+
# TODO: As of polars 1.34, the column names for left and right came in unaligned, which causes the dtypes to mismatch when calling plc.replace.replace_nulls
|
|
186
|
+
# Need to investigate what changed in polars
|
|
187
|
+
"tests/unit/operations/test_join.py::test_join_coalesce_column_order_23177": "Misaligned left/right column names left and right tables in join op",
|
|
147
188
|
"tests/unit/operations/namespaces/string/test_pad.py::test_str_zfill_unicode_not_respected": "polars doesn't add zeros for unicode characters.",
|
|
148
189
|
"tests/unit/sql/test_cast.py::test_cast_errors[values0-values::uint8-conversion from `f64` to `u64` failed]": "Casting that raises not supported on GPU",
|
|
149
190
|
"tests/unit/sql/test_cast.py::test_cast_errors[values1-values::uint4-conversion from `i64` to `u32` failed]": "Casting that raises not supported on GPU",
|
|
150
191
|
"tests/unit/sql/test_cast.py::test_cast_errors[values2-values::int1-conversion from `i64` to `i8` failed]": "Casting that raises not supported on GPU",
|
|
151
192
|
"tests/unit/sql/test_cast.py::test_cast_errors[values5-values::int4-conversion from `str` to `i32` failed]": "Cast raises, but error user receives is wrong",
|
|
152
193
|
"tests/unit/sql/test_miscellaneous.py::test_read_csv": "Incorrect handling of missing_is_null in read_csv",
|
|
153
|
-
"tests/unit/sql/test_wildcard_opts.py::test_select_wildcard_errors": "Raises correctly but with different exception",
|
|
154
194
|
"tests/unit/test_cse.py::test_cse_predicate_self_join": "Debug output on stderr doesn't match",
|
|
155
195
|
"tests/unit/test_cse.py::test_nested_cache_no_panic_16553": "Needs https://github.com/rapidsai/cudf/issues/18630",
|
|
156
196
|
"tests/unit/test_errors.py::test_error_on_empty_group_by": "Incorrect exception raised",
|
|
157
197
|
"tests/unit/test_predicates.py::test_predicate_pushdown_split_pushable": "Casting that raises not supported on GPU",
|
|
158
198
|
"tests/unit/io/test_scan_row_deletion.py::test_scan_row_deletion_skips_file_with_all_rows_deleted": "The test intentionally corrupts the parquet file, so we cannot read the row count from the header.",
|
|
159
199
|
"tests/unit/io/test_multiscan.py::test_multiscan_row_index[scan_csv-write_csv-csv]": "Debug output on stderr doesn't match",
|
|
160
|
-
"tests/unit/
|
|
161
|
-
"tests/unit/sql/test_temporal.py::test_implicit_temporal_strings[dt IN ('1960-01-07','2077-01-01','2222-02-22')-expected15]": "Needs https://github.com/pola-rs/polars/issues/23020",
|
|
162
|
-
"tests/unit/sql/test_operators.py::test_in_not_in[dt NOT IN ('1950-12-24', '1997-07-05')]": "Needs https://github.com/pola-rs/polars/issues/23020",
|
|
163
|
-
"tests/unit/sql/test_operators.py::test_in_not_in[dt IN ('2020-10-10', '2077-03-18')]": "Needs https://github.com/pola-rs/polars/issues/23020",
|
|
200
|
+
"tests/unit/datatypes/test_decimal.py::test_decimal_aggregations": "https://github.com/rapidsai/cudf/issues/20508",
|
|
164
201
|
"tests/unit/datatypes/test_struct.py::test_struct_agg_all": "Needs nested list[struct] support",
|
|
165
202
|
"tests/unit/constructors/test_structs.py::test_constructor_non_strict_schema_17956": "Needs nested list[struct] support",
|
|
166
203
|
"tests/unit/io/test_delta.py::test_read_delta_arrow_map_type": "Needs nested list[struct] support",
|
|
@@ -174,8 +211,31 @@ EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
|
|
|
174
211
|
"tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-row_groups]": "allow_missing_columns argument in read_parquet not translated in IR",
|
|
175
212
|
"tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-prefiltered]": "allow_missing_columns argument in read_parquet not translated in IR",
|
|
176
213
|
"tests/unit/io/test_lazy_parquet.py::test_parquet_schema_arg[False-none]": "allow_missing_columns argument in read_parquet not translated in IR",
|
|
177
|
-
"tests/unit/
|
|
178
|
-
"tests/unit/
|
|
214
|
+
"tests/unit/test_cse.py::test_cse_predicate_self_join[False]": "polars removed the refcount in the logical plan",
|
|
215
|
+
"tests/unit/io/test_multiscan.py::test_multiscan_row_index[scan_csv-write_csv]": "CSV multiscan with row_index and no row limit is not yet supported.",
|
|
216
|
+
"tests/unit/io/test_scan.py::test_scan_empty_paths_friendly_error[scan_parquet-failed to retrieve first file schema (parquet)-'parquet scan']": "Debug output on stderr doesn't match",
|
|
217
|
+
"tests/unit/io/test_scan.py::test_scan_empty_paths_friendly_error[scan_ipc-failed to retrieve first file schema (ipc)-'ipc scan']": "Debug output on stderr doesn't match",
|
|
218
|
+
"tests/unit/io/test_scan.py::test_scan_empty_paths_friendly_error[scan_csv-failed to retrieve file schemas (csv)-'csv scan']": "Debug output on stderr doesn't match",
|
|
219
|
+
"tests/unit/io/test_scan.py::test_scan_empty_paths_friendly_error[scan_ndjson-failed to retrieve first file schema (ndjson)-'ndjson scan']": "Debug output on stderr doesn't match",
|
|
220
|
+
"tests/unit/operations/test_slice.py::test_schema_gather_get_on_literal_24101[lit1-idx2-False]": "Aggregating a list literal: cudf#19610",
|
|
221
|
+
"tests/unit/operations/test_slice.py::test_schema_gather_get_on_literal_24101[lit2-idx2-False]": "Aggregating a list literal: cudf#19610",
|
|
222
|
+
"tests/unit/operations/test_slice.py::test_schema_gather_get_on_literal_24101[lit1-0-False]": "Aggregating a list literal: cudf#19610",
|
|
223
|
+
"tests/unit/operations/test_slice.py::test_schema_gather_get_on_literal_24101[lit1-idx1-False]": "Aggregating a list literal: cudf#19610",
|
|
224
|
+
"tests/unit/operations/test_slice.py::test_schema_gather_get_on_literal_24101[lit2-0-False]": "Aggregating a list literal: cudf#19610",
|
|
225
|
+
"tests/unit/operations/test_slice.py::test_schema_gather_get_on_literal_24101[lit2-idx1-False]": "Aggregating a list literal: cudf#19610",
|
|
226
|
+
"tests/unit/operations/test_slice.py::test_schema_head_tail_on_literal_24102[lit1-1-False]": "Aggregating a list literal: cudf#19610",
|
|
227
|
+
"tests/unit/operations/test_slice.py::test_schema_head_tail_on_literal_24102[lit1-len1-False]": "Aggregating a list literal: cudf#19610",
|
|
228
|
+
"tests/unit/operations/test_slice.py::test_schema_head_tail_on_literal_24102[lit2-1-False]": "Aggregating a list literal: cudf#19610",
|
|
229
|
+
"tests/unit/operations/test_slice.py::test_schema_head_tail_on_literal_24102[lit2-len1-False]": "Aggregating a list literal: cudf#19610",
|
|
230
|
+
"tests/unit/operations/test_slice.py::test_schema_slice_on_literal_23999[lit2-offset1-0-False]": "Aggregating a list literal: cudf#19610",
|
|
231
|
+
"tests/unit/operations/test_slice.py::test_schema_slice_on_literal_23999[lit2-offset1-len1-False]": "Aggregating a list literal: cudf#19610",
|
|
232
|
+
"tests/unit/operations/test_slice.py::test_schema_slice_on_literal_23999[lit1-0-len1-False]": "Aggregating a list literal: cudf#19610",
|
|
233
|
+
"tests/unit/operations/test_slice.py::test_schema_slice_on_literal_23999[lit1-offset1-0-False]": "Aggregating a list literal: cudf#19610",
|
|
234
|
+
"tests/unit/operations/test_slice.py::test_schema_slice_on_literal_23999[lit1-offset1-len1-False]": "Aggregating a list literal: cudf#19610",
|
|
235
|
+
"tests/unit/operations/test_slice.py::test_schema_slice_on_literal_23999[lit2-0-0-False]": "Aggregating a list literal: cudf#19610",
|
|
236
|
+
"tests/unit/operations/test_slice.py::test_schema_slice_on_literal_23999[lit2-0-len1-False]": "Aggregating a list literal: cudf#19610",
|
|
237
|
+
"tests/unit/operations/test_slice.py::test_schema_slice_on_literal_23999[lit1-0-0-False]": "Aggregating a list literal: cudf#19610",
|
|
238
|
+
"tests/unit/operations/namespaces/test_binary.py::test_binary_compounded_literal_aggstate_24460": "Aggregating a list literal: cudf#19610",
|
|
179
239
|
}
|
|
180
240
|
|
|
181
241
|
|
|
@@ -209,6 +269,16 @@ TESTS_TO_SKIP: Mapping[str, str] = {
|
|
|
209
269
|
"tests/unit/streaming/test_streaming.py::test_streaming_apply": "https://github.com/pola-rs/polars/issues/22558",
|
|
210
270
|
# New iceberg release causes this test to fail. We can remove in the next polars version bump: https://github.com/rapidsai/cudf/pull/19912
|
|
211
271
|
"tests/unit/io/test_iceberg.py::test_fill_missing_fields_with_identity_partition_values[False]": "https://github.com/pola-rs/polars/pull/24456",
|
|
272
|
+
"tests/unit/operations/test_rolling.py::test_rolling_agg_bad_input_types[str]": "https://github.com/rapidsai/cudf/issues/20551",
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
STREAMING_ONLY_EXPECTED_FAILURES: Mapping[str, str] = {
|
|
277
|
+
"tests/unit/io/test_parquet.py::test_field_overwrites_metadata": "cannot serialize in-memory sink target.",
|
|
278
|
+
"tests/unit/io/test_parquet_field_overwrites.py::test_required_flat": "cannot serialize in-memory sink target.",
|
|
279
|
+
"tests/unit/io/test_parquet_field_overwrites.py::test_required_list[dtype0]": "cannot serialize in-memory sink target.",
|
|
280
|
+
"tests/unit/io/test_parquet_field_overwrites.py::test_required_list[dtype1]": "cannot serialize in-memory sink target.",
|
|
281
|
+
"tests/unit/io/test_parquet_field_overwrites.py::test_required_struct": "cannot serialize in-memory sink target.",
|
|
212
282
|
}
|
|
213
283
|
|
|
214
284
|
|
|
@@ -222,6 +292,12 @@ def pytest_collection_modifyitems(
|
|
|
222
292
|
for item in items:
|
|
223
293
|
if (reason := TESTS_TO_SKIP.get(item.nodeid, None)) is not None:
|
|
224
294
|
item.add_marker(pytest.mark.skip(reason=reason))
|
|
295
|
+
elif (
|
|
296
|
+
config.getoption("--executor") == "streaming"
|
|
297
|
+
and (s_reason := STREAMING_ONLY_EXPECTED_FAILURES.get(item.nodeid, None))
|
|
298
|
+
is not None
|
|
299
|
+
):
|
|
300
|
+
item.add_marker(pytest.mark.xfail(reason=s_reason))
|
|
225
301
|
elif (entry := EXPECTED_FAILURES.get(item.nodeid, None)) is not None:
|
|
226
302
|
if isinstance(entry, tuple):
|
|
227
303
|
# the second entry in the tuple is the condition to xfail on
|
cudf_polars/typing/__init__.py
CHANGED
|
@@ -19,7 +19,7 @@ from typing import (
|
|
|
19
19
|
|
|
20
20
|
import polars as pl
|
|
21
21
|
import polars.datatypes
|
|
22
|
-
from polars
|
|
22
|
+
from polars import polars as plrs # type: ignore[attr-defined]
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
25
|
from collections.abc import Callable
|
|
@@ -48,44 +48,45 @@ __all__: list[str] = [
|
|
|
48
48
|
"OptimizationArgs",
|
|
49
49
|
"PolarsExpr",
|
|
50
50
|
"PolarsIR",
|
|
51
|
+
"RankMethod",
|
|
51
52
|
"Schema",
|
|
52
53
|
"Slice",
|
|
53
54
|
]
|
|
54
55
|
|
|
55
56
|
PolarsIR: TypeAlias = Union[
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
57
|
+
plrs._ir_nodes.PythonScan,
|
|
58
|
+
plrs._ir_nodes.Scan,
|
|
59
|
+
plrs._ir_nodes.Cache,
|
|
60
|
+
plrs._ir_nodes.DataFrameScan,
|
|
61
|
+
plrs._ir_nodes.Select,
|
|
62
|
+
plrs._ir_nodes.GroupBy,
|
|
63
|
+
plrs._ir_nodes.Join,
|
|
64
|
+
plrs._ir_nodes.HStack,
|
|
65
|
+
plrs._ir_nodes.Distinct,
|
|
66
|
+
plrs._ir_nodes.Sort,
|
|
67
|
+
plrs._ir_nodes.Slice,
|
|
68
|
+
plrs._ir_nodes.Filter,
|
|
69
|
+
plrs._ir_nodes.SimpleProjection,
|
|
70
|
+
plrs._ir_nodes.MapFunction,
|
|
71
|
+
plrs._ir_nodes.Union,
|
|
72
|
+
plrs._ir_nodes.HConcat,
|
|
73
|
+
plrs._ir_nodes.ExtContext,
|
|
73
74
|
]
|
|
74
75
|
|
|
75
76
|
PolarsExpr: TypeAlias = Union[
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
77
|
+
plrs._expr_nodes.Function,
|
|
78
|
+
plrs._expr_nodes.Window,
|
|
79
|
+
plrs._expr_nodes.Literal,
|
|
80
|
+
plrs._expr_nodes.Sort,
|
|
81
|
+
plrs._expr_nodes.SortBy,
|
|
82
|
+
plrs._expr_nodes.Gather,
|
|
83
|
+
plrs._expr_nodes.Filter,
|
|
84
|
+
plrs._expr_nodes.Cast,
|
|
85
|
+
plrs._expr_nodes.Column,
|
|
86
|
+
plrs._expr_nodes.Agg,
|
|
87
|
+
plrs._expr_nodes.BinaryExpr,
|
|
88
|
+
plrs._expr_nodes.Len,
|
|
89
|
+
plrs._expr_nodes.PyExprIR,
|
|
89
90
|
]
|
|
90
91
|
|
|
91
92
|
PolarsSchema: TypeAlias = dict[str, pl.DataType]
|
|
@@ -173,6 +174,53 @@ class GenericTransformer(Protocol[U_contra, V_co, StateT_co]):
|
|
|
173
174
|
...
|
|
174
175
|
|
|
175
176
|
|
|
177
|
+
class _ScalarDataTypeHeader(TypedDict):
|
|
178
|
+
kind: Literal["scalar"]
|
|
179
|
+
name: str
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class _DecimalDataTypeHeader(TypedDict):
|
|
183
|
+
kind: Literal["decimal"]
|
|
184
|
+
precision: int
|
|
185
|
+
scale: int
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class _DatetimeDataTypeHeader(TypedDict):
|
|
189
|
+
kind: Literal["datetime"]
|
|
190
|
+
time_unit: str
|
|
191
|
+
time_zone: str | None
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class _DurationDataTypeHeader(TypedDict):
|
|
195
|
+
kind: Literal["duration"]
|
|
196
|
+
time_unit: str
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class _ListDataTypeHeader(TypedDict):
|
|
200
|
+
kind: Literal["list"]
|
|
201
|
+
inner: DataTypeHeader
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class _StructFieldHeader(TypedDict):
|
|
205
|
+
name: str
|
|
206
|
+
dtype: DataTypeHeader
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class _StructDataTypeHeader(TypedDict):
|
|
210
|
+
kind: Literal["struct"]
|
|
211
|
+
fields: list[_StructFieldHeader]
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
DataTypeHeader = (
|
|
215
|
+
_ScalarDataTypeHeader
|
|
216
|
+
| _DecimalDataTypeHeader
|
|
217
|
+
| _DatetimeDataTypeHeader
|
|
218
|
+
| _DurationDataTypeHeader
|
|
219
|
+
| _ListDataTypeHeader
|
|
220
|
+
| _StructDataTypeHeader
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
176
224
|
class ColumnOptions(TypedDict):
|
|
177
225
|
"""
|
|
178
226
|
Column constructor options.
|
|
@@ -186,7 +234,7 @@ class ColumnOptions(TypedDict):
|
|
|
186
234
|
order: plc.types.Order
|
|
187
235
|
null_order: plc.types.NullOrder
|
|
188
236
|
name: str | None
|
|
189
|
-
dtype:
|
|
237
|
+
dtype: DataTypeHeader
|
|
190
238
|
|
|
191
239
|
|
|
192
240
|
class DeserializedColumnOptions(TypedDict):
|
|
@@ -217,3 +265,9 @@ class DataFrameHeader(TypedDict):
|
|
|
217
265
|
|
|
218
266
|
columns_kwargs: list[ColumnOptions]
|
|
219
267
|
frame_count: int
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
# Not public in polars yet
|
|
271
|
+
RankMethod = Literal["ordinal", "dense", "min", "max", "average"]
|
|
272
|
+
|
|
273
|
+
RoundMethod = Literal["half_away_from_zero", "half_to_even"]
|