cudf-polars-cu13 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. cudf_polars/GIT_COMMIT +1 -0
  2. cudf_polars/VERSION +1 -0
  3. cudf_polars/__init__.py +28 -0
  4. cudf_polars/_version.py +21 -0
  5. cudf_polars/callback.py +318 -0
  6. cudf_polars/containers/__init__.py +13 -0
  7. cudf_polars/containers/column.py +495 -0
  8. cudf_polars/containers/dataframe.py +361 -0
  9. cudf_polars/containers/datatype.py +137 -0
  10. cudf_polars/dsl/__init__.py +8 -0
  11. cudf_polars/dsl/expr.py +66 -0
  12. cudf_polars/dsl/expressions/__init__.py +8 -0
  13. cudf_polars/dsl/expressions/aggregation.py +226 -0
  14. cudf_polars/dsl/expressions/base.py +272 -0
  15. cudf_polars/dsl/expressions/binaryop.py +120 -0
  16. cudf_polars/dsl/expressions/boolean.py +326 -0
  17. cudf_polars/dsl/expressions/datetime.py +271 -0
  18. cudf_polars/dsl/expressions/literal.py +97 -0
  19. cudf_polars/dsl/expressions/rolling.py +643 -0
  20. cudf_polars/dsl/expressions/selection.py +74 -0
  21. cudf_polars/dsl/expressions/slicing.py +46 -0
  22. cudf_polars/dsl/expressions/sorting.py +85 -0
  23. cudf_polars/dsl/expressions/string.py +1002 -0
  24. cudf_polars/dsl/expressions/struct.py +137 -0
  25. cudf_polars/dsl/expressions/ternary.py +49 -0
  26. cudf_polars/dsl/expressions/unary.py +517 -0
  27. cudf_polars/dsl/ir.py +2607 -0
  28. cudf_polars/dsl/nodebase.py +164 -0
  29. cudf_polars/dsl/to_ast.py +359 -0
  30. cudf_polars/dsl/tracing.py +16 -0
  31. cudf_polars/dsl/translate.py +939 -0
  32. cudf_polars/dsl/traversal.py +224 -0
  33. cudf_polars/dsl/utils/__init__.py +8 -0
  34. cudf_polars/dsl/utils/aggregations.py +481 -0
  35. cudf_polars/dsl/utils/groupby.py +98 -0
  36. cudf_polars/dsl/utils/naming.py +34 -0
  37. cudf_polars/dsl/utils/replace.py +61 -0
  38. cudf_polars/dsl/utils/reshape.py +74 -0
  39. cudf_polars/dsl/utils/rolling.py +121 -0
  40. cudf_polars/dsl/utils/windows.py +192 -0
  41. cudf_polars/experimental/__init__.py +8 -0
  42. cudf_polars/experimental/base.py +386 -0
  43. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  44. cudf_polars/experimental/benchmarks/pdsds.py +220 -0
  45. cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
  46. cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
  47. cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
  48. cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
  49. cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
  50. cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
  51. cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
  52. cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
  53. cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
  54. cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
  55. cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
  56. cudf_polars/experimental/benchmarks/pdsh.py +814 -0
  57. cudf_polars/experimental/benchmarks/utils.py +832 -0
  58. cudf_polars/experimental/dask_registers.py +200 -0
  59. cudf_polars/experimental/dispatch.py +156 -0
  60. cudf_polars/experimental/distinct.py +197 -0
  61. cudf_polars/experimental/explain.py +157 -0
  62. cudf_polars/experimental/expressions.py +590 -0
  63. cudf_polars/experimental/groupby.py +327 -0
  64. cudf_polars/experimental/io.py +943 -0
  65. cudf_polars/experimental/join.py +391 -0
  66. cudf_polars/experimental/parallel.py +423 -0
  67. cudf_polars/experimental/repartition.py +69 -0
  68. cudf_polars/experimental/scheduler.py +155 -0
  69. cudf_polars/experimental/select.py +188 -0
  70. cudf_polars/experimental/shuffle.py +354 -0
  71. cudf_polars/experimental/sort.py +609 -0
  72. cudf_polars/experimental/spilling.py +151 -0
  73. cudf_polars/experimental/statistics.py +795 -0
  74. cudf_polars/experimental/utils.py +169 -0
  75. cudf_polars/py.typed +0 -0
  76. cudf_polars/testing/__init__.py +8 -0
  77. cudf_polars/testing/asserts.py +448 -0
  78. cudf_polars/testing/io.py +122 -0
  79. cudf_polars/testing/plugin.py +236 -0
  80. cudf_polars/typing/__init__.py +219 -0
  81. cudf_polars/utils/__init__.py +8 -0
  82. cudf_polars/utils/config.py +741 -0
  83. cudf_polars/utils/conversion.py +40 -0
  84. cudf_polars/utils/dtypes.py +118 -0
  85. cudf_polars/utils/sorting.py +53 -0
  86. cudf_polars/utils/timer.py +39 -0
  87. cudf_polars/utils/versions.py +27 -0
  88. cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
  89. cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
  90. cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
  91. cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
  92. cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,225 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Query 10."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ import polars as pl
11
+
12
+ from cudf_polars.experimental.benchmarks.utils import get_data
13
+
14
+ if TYPE_CHECKING:
15
+ from cudf_polars.experimental.benchmarks.utils import RunConfig
16
+
17
+
18
+ def duckdb_impl(run_config: RunConfig) -> str:
19
+ """Query 10."""
20
+ return """
21
+ SELECT cd_gender,
22
+ cd_marital_status,
23
+ cd_education_status,
24
+ Count(*) cnt1,
25
+ cd_purchase_estimate,
26
+ Count(*) cnt2,
27
+ cd_credit_rating,
28
+ Count(*) cnt3,
29
+ cd_dep_count,
30
+ Count(*) cnt4,
31
+ cd_dep_employed_count,
32
+ Count(*) cnt5,
33
+ cd_dep_college_count,
34
+ Count(*) cnt6
35
+ FROM customer c,
36
+ customer_address ca,
37
+ customer_demographics
38
+ WHERE c.c_current_addr_sk = ca.ca_address_sk
39
+ AND ca_county IN ( 'Lycoming County', 'Sheridan County',
40
+ 'Kandiyohi County',
41
+ 'Pike County',
42
+ 'Greene County' )
43
+ AND cd_demo_sk = c.c_current_cdemo_sk
44
+ AND EXISTS (SELECT *
45
+ FROM store_sales,
46
+ date_dim
47
+ WHERE c.c_customer_sk = ss_customer_sk
48
+ AND ss_sold_date_sk = d_date_sk
49
+ AND d_year = 2002
50
+ AND d_moy BETWEEN 4 AND 4 + 3)
51
+ AND ( EXISTS (SELECT *
52
+ FROM web_sales,
53
+ date_dim
54
+ WHERE c.c_customer_sk = ws_bill_customer_sk
55
+ AND ws_sold_date_sk = d_date_sk
56
+ AND d_year = 2002
57
+ AND d_moy BETWEEN 4 AND 4 + 3)
58
+ OR EXISTS (SELECT *
59
+ FROM catalog_sales,
60
+ date_dim
61
+ WHERE c.c_customer_sk = cs_ship_customer_sk
62
+ AND cs_sold_date_sk = d_date_sk
63
+ AND d_year = 2002
64
+ AND d_moy BETWEEN 4 AND 4 + 3) )
65
+ GROUP BY cd_gender,
66
+ cd_marital_status,
67
+ cd_education_status,
68
+ cd_purchase_estimate,
69
+ cd_credit_rating,
70
+ cd_dep_count,
71
+ cd_dep_employed_count,
72
+ cd_dep_college_count
73
+ ORDER BY cd_gender,
74
+ cd_marital_status,
75
+ cd_education_status,
76
+ cd_purchase_estimate,
77
+ cd_credit_rating,
78
+ cd_dep_count,
79
+ cd_dep_employed_count,
80
+ cd_dep_college_count
81
+ LIMIT 100;
82
+ """
83
+
84
+
85
+ def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
86
+ """Query 10."""
87
+ # Load required tables
88
+ customer = get_data(run_config.dataset_path, "customer", run_config.suffix)
89
+ customer_address = get_data(
90
+ run_config.dataset_path, "customer_address", run_config.suffix
91
+ )
92
+ customer_demographics = get_data(
93
+ run_config.dataset_path, "customer_demographics", run_config.suffix
94
+ )
95
+ store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
96
+ web_sales = get_data(run_config.dataset_path, "web_sales", run_config.suffix)
97
+ catalog_sales = get_data(
98
+ run_config.dataset_path, "catalog_sales", run_config.suffix
99
+ )
100
+ date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
101
+
102
+ # Target counties and date range
103
+ target_counties = [
104
+ "Lycoming County",
105
+ "Sheridan County",
106
+ "Kandiyohi County",
107
+ "Pike County",
108
+ "Greene County",
109
+ ]
110
+
111
+ # Get customers with store sales in the target period (EXISTS condition 1)
112
+ store_customers = (
113
+ store_sales.join(date_dim, left_on="ss_sold_date_sk", right_on="d_date_sk")
114
+ .filter(
115
+ (pl.col("d_year") == 2002)
116
+ & (pl.col("d_moy").is_between(4, 7, closed="both"))
117
+ )
118
+ .select("ss_customer_sk")
119
+ .unique()
120
+ )
121
+
122
+ # Get customers with web sales in the target period (EXISTS condition 2a)
123
+ web_customers = (
124
+ web_sales.join(date_dim, left_on="ws_sold_date_sk", right_on="d_date_sk")
125
+ .filter(
126
+ (pl.col("d_year") == 2002)
127
+ & (pl.col("d_moy").is_between(4, 7, closed="both"))
128
+ )
129
+ .select(pl.col("ws_bill_customer_sk").alias("customer_sk"))
130
+ .unique()
131
+ )
132
+
133
+ # Get customers with catalog sales in the target period (EXISTS condition 2b)
134
+ catalog_customers = (
135
+ catalog_sales.join(date_dim, left_on="cs_sold_date_sk", right_on="d_date_sk")
136
+ .filter(
137
+ (pl.col("d_year") == 2002)
138
+ & (pl.col("d_moy").is_between(4, 7, closed="both"))
139
+ )
140
+ .select(pl.col("cs_ship_customer_sk").alias("customer_sk"))
141
+ .unique()
142
+ )
143
+
144
+ # Combine web and catalog customers (OR condition)
145
+ web_or_catalog_customers = pl.concat([web_customers, catalog_customers]).unique()
146
+
147
+ # Main query: join customer tables and apply filters
148
+ return (
149
+ customer.join(
150
+ customer_address, left_on="c_current_addr_sk", right_on="ca_address_sk"
151
+ )
152
+ .join(
153
+ customer_demographics, left_on="c_current_cdemo_sk", right_on="cd_demo_sk"
154
+ )
155
+ .filter(pl.col("ca_county").is_in(target_counties))
156
+ # Apply EXISTS conditions through joins
157
+ .join(
158
+ store_customers,
159
+ left_on="c_customer_sk",
160
+ right_on="ss_customer_sk",
161
+ how="inner",
162
+ )
163
+ .join(
164
+ web_or_catalog_customers,
165
+ left_on="c_customer_sk",
166
+ right_on="customer_sk",
167
+ how="inner",
168
+ )
169
+ .group_by(
170
+ [
171
+ "cd_gender",
172
+ "cd_marital_status",
173
+ "cd_education_status",
174
+ "cd_purchase_estimate",
175
+ "cd_credit_rating",
176
+ "cd_dep_count",
177
+ "cd_dep_employed_count",
178
+ "cd_dep_college_count",
179
+ ]
180
+ )
181
+ .agg(
182
+ [
183
+ # Cast -> Int64 to match DuckDB
184
+ # TODO: We should plan to make these optional
185
+ pl.len().alias("cnt1").cast(pl.Int64),
186
+ pl.len().alias("cnt2").cast(pl.Int64),
187
+ pl.len().alias("cnt3").cast(pl.Int64),
188
+ pl.len().alias("cnt4").cast(pl.Int64),
189
+ pl.len().alias("cnt5").cast(pl.Int64),
190
+ pl.len().alias("cnt6").cast(pl.Int64),
191
+ ]
192
+ )
193
+ .sort(
194
+ [
195
+ "cd_gender",
196
+ "cd_marital_status",
197
+ "cd_education_status",
198
+ "cd_purchase_estimate",
199
+ "cd_credit_rating",
200
+ "cd_dep_count",
201
+ "cd_dep_employed_count",
202
+ "cd_dep_college_count",
203
+ ],
204
+ nulls_last=True,
205
+ )
206
+ .limit(100)
207
+ .select(
208
+ [
209
+ "cd_gender",
210
+ "cd_marital_status",
211
+ "cd_education_status",
212
+ "cnt1",
213
+ "cd_purchase_estimate",
214
+ "cnt2",
215
+ "cd_credit_rating",
216
+ "cnt3",
217
+ "cd_dep_count",
218
+ "cnt4",
219
+ "cd_dep_employed_count",
220
+ "cnt5",
221
+ "cd_dep_college_count",
222
+ "cnt6",
223
+ ]
224
+ )
225
+ )
@@ -0,0 +1,244 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Query 2."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ import polars as pl
11
+
12
+ from cudf_polars.experimental.benchmarks.utils import get_data
13
+
14
+ if TYPE_CHECKING:
15
+ from cudf_polars.experimental.benchmarks.utils import RunConfig
16
+
17
+
18
+ def duckdb_impl(run_config: RunConfig) -> str:
19
+ """Query 2."""
20
+ return """
21
+ WITH wscs
22
+ AS (SELECT sold_date_sk,
23
+ sales_price
24
+ FROM (SELECT ws_sold_date_sk sold_date_sk,
25
+ ws_ext_sales_price sales_price
26
+ FROM web_sales)
27
+ UNION ALL
28
+ (SELECT cs_sold_date_sk sold_date_sk,
29
+ cs_ext_sales_price sales_price
30
+ FROM catalog_sales)),
31
+ wswscs
32
+ AS (SELECT d_week_seq,
33
+ Sum(CASE
34
+ WHEN ( d_day_name = 'Sunday' ) THEN sales_price
35
+ ELSE NULL
36
+ END) sun_sales,
37
+ Sum(CASE
38
+ WHEN ( d_day_name = 'Monday' ) THEN sales_price
39
+ ELSE NULL
40
+ END) mon_sales,
41
+ Sum(CASE
42
+ WHEN ( d_day_name = 'Tuesday' ) THEN sales_price
43
+ ELSE NULL
44
+ END) tue_sales,
45
+ Sum(CASE
46
+ WHEN ( d_day_name = 'Wednesday' ) THEN sales_price
47
+ ELSE NULL
48
+ END) wed_sales,
49
+ Sum(CASE
50
+ WHEN ( d_day_name = 'Thursday' ) THEN sales_price
51
+ ELSE NULL
52
+ END) thu_sales,
53
+ Sum(CASE
54
+ WHEN ( d_day_name = 'Friday' ) THEN sales_price
55
+ ELSE NULL
56
+ END) fri_sales,
57
+ Sum(CASE
58
+ WHEN ( d_day_name = 'Saturday' ) THEN sales_price
59
+ ELSE NULL
60
+ END) sat_sales
61
+ FROM wscs,
62
+ date_dim
63
+ WHERE d_date_sk = sold_date_sk
64
+ GROUP BY d_week_seq)
65
+ SELECT d_week_seq1,
66
+ Round(sun_sales1 / sun_sales2, 2),
67
+ Round(mon_sales1 / mon_sales2, 2),
68
+ Round(tue_sales1 / tue_sales2, 2),
69
+ Round(wed_sales1 / wed_sales2, 2),
70
+ Round(thu_sales1 / thu_sales2, 2),
71
+ Round(fri_sales1 / fri_sales2, 2),
72
+ Round(sat_sales1 / sat_sales2, 2)
73
+ FROM (SELECT wswscs.d_week_seq d_week_seq1,
74
+ sun_sales sun_sales1,
75
+ mon_sales mon_sales1,
76
+ tue_sales tue_sales1,
77
+ wed_sales wed_sales1,
78
+ thu_sales thu_sales1,
79
+ fri_sales fri_sales1,
80
+ sat_sales sat_sales1
81
+ FROM wswscs,
82
+ date_dim
83
+ WHERE date_dim.d_week_seq = wswscs.d_week_seq
84
+ AND d_year = 1998) y,
85
+ (SELECT wswscs.d_week_seq d_week_seq2,
86
+ sun_sales sun_sales2,
87
+ mon_sales mon_sales2,
88
+ tue_sales tue_sales2,
89
+ wed_sales wed_sales2,
90
+ thu_sales thu_sales2,
91
+ fri_sales fri_sales2,
92
+ sat_sales sat_sales2
93
+ FROM wswscs,
94
+ date_dim
95
+ WHERE date_dim.d_week_seq = wswscs.d_week_seq
96
+ AND d_year = 1998 + 1) z
97
+ WHERE d_week_seq1 = d_week_seq2 - 53
98
+ ORDER BY d_week_seq1;
99
+ """
100
+
101
+
102
+ def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
103
+ """Query 2."""
104
+ # Load required tables
105
+ web_sales = get_data(run_config.dataset_path, "web_sales", run_config.suffix)
106
+ catalog_sales = get_data(
107
+ run_config.dataset_path, "catalog_sales", run_config.suffix
108
+ )
109
+ date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
110
+ # Step 1: Create wscs CTE equivalent (union of web and catalog sales)
111
+ wscs = pl.concat(
112
+ [
113
+ web_sales.select(
114
+ [
115
+ pl.col("ws_sold_date_sk").alias("sold_date_sk"),
116
+ pl.col("ws_ext_sales_price").alias("sales_price"),
117
+ ]
118
+ ),
119
+ catalog_sales.select(
120
+ [
121
+ pl.col("cs_sold_date_sk").alias("sold_date_sk"),
122
+ pl.col("cs_ext_sales_price").alias("sales_price"),
123
+ ]
124
+ ),
125
+ ]
126
+ )
127
+ # Step 2: Create wswscs CTE equivalent (aggregate by week and day of week)
128
+ # First join with date_dim to get day names
129
+ wscs_with_dates = wscs.join(date_dim, left_on="sold_date_sk", right_on="d_date_sk")
130
+ # Create separate aggregations for each day to better control null handling
131
+ days = (
132
+ "Sunday",
133
+ "Monday",
134
+ "Tuesday",
135
+ "Wednesday",
136
+ "Thursday",
137
+ "Friday",
138
+ "Saturday",
139
+ )
140
+ day_cols = (
141
+ "sun_sales",
142
+ "mon_sales",
143
+ "tue_sales",
144
+ "wed_sales",
145
+ "thu_sales",
146
+ "fri_sales",
147
+ "sat_sales",
148
+ )
149
+ # Start with all week sequences
150
+ all_weeks = wscs_with_dates.select("d_week_seq").unique()
151
+ wswscs = all_weeks
152
+
153
+ wswscs = (
154
+ wscs_with_dates.with_columns(
155
+ [
156
+ pl.when(pl.col("d_day_name") == day)
157
+ .then(pl.col("sales_price"))
158
+ .otherwise(None)
159
+ .alias(name)
160
+ for day, name in zip(days, day_cols, strict=True)
161
+ ]
162
+ )
163
+ .group_by("d_week_seq")
164
+ .agg(
165
+ *(pl.col(name).sum().alias(name) for name in day_cols),
166
+ *(pl.col(name).count().alias(f"{name}_count") for name in day_cols),
167
+ )
168
+ .with_columns(
169
+ [
170
+ pl.when(pl.col(f"{name}_count") > 0)
171
+ .then(pl.col(name))
172
+ .otherwise(None)
173
+ .alias(name)
174
+ for name in day_cols
175
+ ]
176
+ )
177
+ .select(["d_week_seq", *day_cols])
178
+ )
179
+
180
+ # Step 3: Create year 1998 data (y subquery equivalent)
181
+ y_1998 = (
182
+ wswscs.join(date_dim, left_on="d_week_seq", right_on="d_week_seq")
183
+ .filter(pl.col("d_year") == 1998)
184
+ .select(
185
+ [
186
+ pl.col("d_week_seq").alias("d_week_seq1"),
187
+ pl.col("sun_sales").alias("sun_sales1"),
188
+ pl.col("mon_sales").alias("mon_sales1"),
189
+ pl.col("tue_sales").alias("tue_sales1"),
190
+ pl.col("wed_sales").alias("wed_sales1"),
191
+ pl.col("thu_sales").alias("thu_sales1"),
192
+ pl.col("fri_sales").alias("fri_sales1"),
193
+ pl.col("sat_sales").alias("sat_sales1"),
194
+ ]
195
+ )
196
+ )
197
+ # Step 4: Create year 1999 data (z subquery equivalent)
198
+ z_1999 = (
199
+ wswscs.join(date_dim, left_on="d_week_seq", right_on="d_week_seq")
200
+ .filter(pl.col("d_year") == 1999)
201
+ .select(
202
+ [
203
+ pl.col("d_week_seq").alias("d_week_seq2"),
204
+ pl.col("sun_sales").alias("sun_sales2"),
205
+ pl.col("mon_sales").alias("mon_sales2"),
206
+ pl.col("tue_sales").alias("tue_sales2"),
207
+ pl.col("wed_sales").alias("wed_sales2"),
208
+ pl.col("thu_sales").alias("thu_sales2"),
209
+ pl.col("fri_sales").alias("fri_sales2"),
210
+ pl.col("sat_sales").alias("sat_sales2"),
211
+ ]
212
+ )
213
+ )
214
+ # Step 5: Join the two years and calculate ratios
215
+ return (
216
+ y_1998.join(z_1999, left_on="d_week_seq1", right_on=pl.col("d_week_seq2") - 53)
217
+ .select(
218
+ [
219
+ pl.col("d_week_seq1"),
220
+ (pl.col("sun_sales1") / pl.col("sun_sales2"))
221
+ .round(2)
222
+ .alias("round((sun_sales1 / sun_sales2), 2)"),
223
+ (pl.col("mon_sales1") / pl.col("mon_sales2"))
224
+ .round(2)
225
+ .alias("round((mon_sales1 / mon_sales2), 2)"),
226
+ (pl.col("tue_sales1") / pl.col("tue_sales2"))
227
+ .round(2)
228
+ .alias("round((tue_sales1 / tue_sales2), 2)"),
229
+ (pl.col("wed_sales1") / pl.col("wed_sales2"))
230
+ .round(2)
231
+ .alias("round((wed_sales1 / wed_sales2), 2)"),
232
+ (pl.col("thu_sales1") / pl.col("thu_sales2"))
233
+ .round(2)
234
+ .alias("round((thu_sales1 / thu_sales2), 2)"),
235
+ (pl.col("fri_sales1") / pl.col("fri_sales2"))
236
+ .round(2)
237
+ .alias("round((fri_sales1 / fri_sales2), 2)"),
238
+ (pl.col("sat_sales1") / pl.col("sat_sales2"))
239
+ .round(2)
240
+ .alias("round((sat_sales1 / sat_sales2), 2)"),
241
+ ]
242
+ )
243
+ .sort("d_week_seq1")
244
+ )
@@ -0,0 +1,65 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Query 3."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ import polars as pl
11
+
12
+ from cudf_polars.experimental.benchmarks.utils import get_data
13
+
14
+ if TYPE_CHECKING:
15
+ from cudf_polars.experimental.benchmarks.utils import RunConfig
16
+
17
+
18
+ def duckdb_impl(run_config: RunConfig) -> str:
19
+ """Query 3."""
20
+ return """
21
+ SELECT dt.d_year,
22
+ item.i_brand_id brand_id,
23
+ item.i_brand brand,
24
+ Sum(ss_ext_discount_amt) sum_agg
25
+ FROM date_dim dt,
26
+ store_sales,
27
+ item
28
+ WHERE dt.d_date_sk = store_sales.ss_sold_date_sk
29
+ AND store_sales.ss_item_sk = item.i_item_sk
30
+ AND item.i_manufact_id = 427
31
+ AND dt.d_moy = 11
32
+ GROUP BY dt.d_year,
33
+ item.i_brand,
34
+ item.i_brand_id
35
+ ORDER BY dt.d_year,
36
+ sum_agg DESC,
37
+ brand_id
38
+ LIMIT 100;
39
+ """
40
+
41
+
42
+ def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
43
+ """Query 3."""
44
+ # Load required tables
45
+ date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
46
+ store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
47
+ item = get_data(run_config.dataset_path, "item", run_config.suffix)
48
+ # Execute the query following the SQL logic
49
+ return (
50
+ date_dim.join(store_sales, left_on="d_date_sk", right_on="ss_sold_date_sk")
51
+ .join(item, left_on="ss_item_sk", right_on="i_item_sk")
52
+ .filter((pl.col("i_manufact_id") == 427) & (pl.col("d_moy") == 11))
53
+ .group_by(["d_year", "i_brand", "i_brand_id"])
54
+ .agg([pl.col("ss_ext_discount_amt").sum().alias("sum_agg")])
55
+ .select(
56
+ [
57
+ pl.col("d_year"),
58
+ pl.col("i_brand_id").alias("brand_id"),
59
+ pl.col("i_brand").alias("brand"),
60
+ pl.col("sum_agg"),
61
+ ]
62
+ )
63
+ .sort(["d_year", "sum_agg", "brand_id"], descending=[False, True, False])
64
+ .limit(100)
65
+ )