cudf-polars-cu13 25.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -0
- cudf_polars/VERSION +1 -0
- cudf_polars/__init__.py +28 -0
- cudf_polars/_version.py +21 -0
- cudf_polars/callback.py +318 -0
- cudf_polars/containers/__init__.py +13 -0
- cudf_polars/containers/column.py +495 -0
- cudf_polars/containers/dataframe.py +361 -0
- cudf_polars/containers/datatype.py +137 -0
- cudf_polars/dsl/__init__.py +8 -0
- cudf_polars/dsl/expr.py +66 -0
- cudf_polars/dsl/expressions/__init__.py +8 -0
- cudf_polars/dsl/expressions/aggregation.py +226 -0
- cudf_polars/dsl/expressions/base.py +272 -0
- cudf_polars/dsl/expressions/binaryop.py +120 -0
- cudf_polars/dsl/expressions/boolean.py +326 -0
- cudf_polars/dsl/expressions/datetime.py +271 -0
- cudf_polars/dsl/expressions/literal.py +97 -0
- cudf_polars/dsl/expressions/rolling.py +643 -0
- cudf_polars/dsl/expressions/selection.py +74 -0
- cudf_polars/dsl/expressions/slicing.py +46 -0
- cudf_polars/dsl/expressions/sorting.py +85 -0
- cudf_polars/dsl/expressions/string.py +1002 -0
- cudf_polars/dsl/expressions/struct.py +137 -0
- cudf_polars/dsl/expressions/ternary.py +49 -0
- cudf_polars/dsl/expressions/unary.py +517 -0
- cudf_polars/dsl/ir.py +2607 -0
- cudf_polars/dsl/nodebase.py +164 -0
- cudf_polars/dsl/to_ast.py +359 -0
- cudf_polars/dsl/tracing.py +16 -0
- cudf_polars/dsl/translate.py +939 -0
- cudf_polars/dsl/traversal.py +224 -0
- cudf_polars/dsl/utils/__init__.py +8 -0
- cudf_polars/dsl/utils/aggregations.py +481 -0
- cudf_polars/dsl/utils/groupby.py +98 -0
- cudf_polars/dsl/utils/naming.py +34 -0
- cudf_polars/dsl/utils/replace.py +61 -0
- cudf_polars/dsl/utils/reshape.py +74 -0
- cudf_polars/dsl/utils/rolling.py +121 -0
- cudf_polars/dsl/utils/windows.py +192 -0
- cudf_polars/experimental/__init__.py +8 -0
- cudf_polars/experimental/base.py +386 -0
- cudf_polars/experimental/benchmarks/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds.py +220 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
- cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
- cudf_polars/experimental/benchmarks/pdsh.py +814 -0
- cudf_polars/experimental/benchmarks/utils.py +832 -0
- cudf_polars/experimental/dask_registers.py +200 -0
- cudf_polars/experimental/dispatch.py +156 -0
- cudf_polars/experimental/distinct.py +197 -0
- cudf_polars/experimental/explain.py +157 -0
- cudf_polars/experimental/expressions.py +590 -0
- cudf_polars/experimental/groupby.py +327 -0
- cudf_polars/experimental/io.py +943 -0
- cudf_polars/experimental/join.py +391 -0
- cudf_polars/experimental/parallel.py +423 -0
- cudf_polars/experimental/repartition.py +69 -0
- cudf_polars/experimental/scheduler.py +155 -0
- cudf_polars/experimental/select.py +188 -0
- cudf_polars/experimental/shuffle.py +354 -0
- cudf_polars/experimental/sort.py +609 -0
- cudf_polars/experimental/spilling.py +151 -0
- cudf_polars/experimental/statistics.py +795 -0
- cudf_polars/experimental/utils.py +169 -0
- cudf_polars/py.typed +0 -0
- cudf_polars/testing/__init__.py +8 -0
- cudf_polars/testing/asserts.py +448 -0
- cudf_polars/testing/io.py +122 -0
- cudf_polars/testing/plugin.py +236 -0
- cudf_polars/typing/__init__.py +219 -0
- cudf_polars/utils/__init__.py +8 -0
- cudf_polars/utils/config.py +741 -0
- cudf_polars/utils/conversion.py +40 -0
- cudf_polars/utils/dtypes.py +118 -0
- cudf_polars/utils/sorting.py +53 -0
- cudf_polars/utils/timer.py +39 -0
- cudf_polars/utils/versions.py +27 -0
- cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
- cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
- cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
- cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
- cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Query 8."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
import polars as pl
|
|
11
|
+
|
|
12
|
+
from cudf_polars.experimental.benchmarks.utils import get_data
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from cudf_polars.experimental.benchmarks.utils import RunConfig
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
warning!, one filter removed to prevent zero row results
|
|
19
|
+
|
|
20
|
+
note: alternate zip code
|
|
21
|
+
'70069', # 93 preferred customers
|
|
22
|
+
'60069', # 87 preferred customers
|
|
23
|
+
'78877', # 87 preferred customers
|
|
24
|
+
'60169', # 87 preferred customers
|
|
25
|
+
'68252', # 86 preferred customers
|
|
26
|
+
'71087', # 84 preferred customers
|
|
27
|
+
'71711', # 84 preferred customers
|
|
28
|
+
'68877', # 84 preferred customers
|
|
29
|
+
'55709', # 82 preferred customers
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
TARGET_YEAR = 1996
|
|
33
|
+
TARGET_QUARTER = 2
|
|
34
|
+
TARGET_ZIPS = [
|
|
35
|
+
"67436",
|
|
36
|
+
"26121",
|
|
37
|
+
"38443",
|
|
38
|
+
"63157",
|
|
39
|
+
"68856",
|
|
40
|
+
"19485",
|
|
41
|
+
"86425",
|
|
42
|
+
"26741",
|
|
43
|
+
"70991",
|
|
44
|
+
"60899",
|
|
45
|
+
"63573",
|
|
46
|
+
"47556",
|
|
47
|
+
"56193",
|
|
48
|
+
"93314",
|
|
49
|
+
"87827",
|
|
50
|
+
"62017",
|
|
51
|
+
"85067",
|
|
52
|
+
"95390",
|
|
53
|
+
"48091",
|
|
54
|
+
"10261",
|
|
55
|
+
"81845",
|
|
56
|
+
"41790",
|
|
57
|
+
"42853",
|
|
58
|
+
"24675",
|
|
59
|
+
"12840",
|
|
60
|
+
"60065",
|
|
61
|
+
"84430",
|
|
62
|
+
"57451",
|
|
63
|
+
"24021",
|
|
64
|
+
"91735",
|
|
65
|
+
"75335",
|
|
66
|
+
"71935",
|
|
67
|
+
"34482",
|
|
68
|
+
"56943",
|
|
69
|
+
"70695",
|
|
70
|
+
"52147",
|
|
71
|
+
"56251",
|
|
72
|
+
"28411",
|
|
73
|
+
"86653",
|
|
74
|
+
"23005",
|
|
75
|
+
"22478",
|
|
76
|
+
"29031",
|
|
77
|
+
"34398",
|
|
78
|
+
"15365",
|
|
79
|
+
"42460",
|
|
80
|
+
"33337",
|
|
81
|
+
"59433",
|
|
82
|
+
"73943",
|
|
83
|
+
"72477",
|
|
84
|
+
"74081",
|
|
85
|
+
"74430",
|
|
86
|
+
"64605",
|
|
87
|
+
"39006",
|
|
88
|
+
"11226",
|
|
89
|
+
"49057",
|
|
90
|
+
"97308",
|
|
91
|
+
"42663",
|
|
92
|
+
"18187",
|
|
93
|
+
"19768",
|
|
94
|
+
"43454",
|
|
95
|
+
"32147",
|
|
96
|
+
"76637",
|
|
97
|
+
"51975",
|
|
98
|
+
"11181",
|
|
99
|
+
"45630",
|
|
100
|
+
"33129",
|
|
101
|
+
"45995",
|
|
102
|
+
"64386",
|
|
103
|
+
"55522",
|
|
104
|
+
"26697",
|
|
105
|
+
"20963",
|
|
106
|
+
"35154",
|
|
107
|
+
"64587",
|
|
108
|
+
"49752",
|
|
109
|
+
"66386",
|
|
110
|
+
"30586",
|
|
111
|
+
"59286",
|
|
112
|
+
"13177",
|
|
113
|
+
"66646",
|
|
114
|
+
"84195",
|
|
115
|
+
"74316",
|
|
116
|
+
"36853",
|
|
117
|
+
"32927",
|
|
118
|
+
"12469",
|
|
119
|
+
"11904",
|
|
120
|
+
"36269",
|
|
121
|
+
"17724",
|
|
122
|
+
"55346",
|
|
123
|
+
"12595",
|
|
124
|
+
"53988",
|
|
125
|
+
"65439",
|
|
126
|
+
"28015",
|
|
127
|
+
"63268",
|
|
128
|
+
"73590",
|
|
129
|
+
"29216",
|
|
130
|
+
"82575",
|
|
131
|
+
"69267",
|
|
132
|
+
"13805",
|
|
133
|
+
"91678",
|
|
134
|
+
"79460",
|
|
135
|
+
"94152",
|
|
136
|
+
"14961",
|
|
137
|
+
"15419",
|
|
138
|
+
"48277",
|
|
139
|
+
"62588",
|
|
140
|
+
"55493",
|
|
141
|
+
"28360",
|
|
142
|
+
"14152",
|
|
143
|
+
"55225",
|
|
144
|
+
"18007",
|
|
145
|
+
"53705",
|
|
146
|
+
"56573",
|
|
147
|
+
"80245",
|
|
148
|
+
"71769",
|
|
149
|
+
"57348",
|
|
150
|
+
"36845",
|
|
151
|
+
"13039",
|
|
152
|
+
"17270",
|
|
153
|
+
"22363",
|
|
154
|
+
"83474",
|
|
155
|
+
"25294",
|
|
156
|
+
"43269",
|
|
157
|
+
"77666",
|
|
158
|
+
"15488",
|
|
159
|
+
"99146",
|
|
160
|
+
"64441",
|
|
161
|
+
"43338",
|
|
162
|
+
"38736",
|
|
163
|
+
"62754",
|
|
164
|
+
"48556",
|
|
165
|
+
"86057",
|
|
166
|
+
"23090",
|
|
167
|
+
"38114",
|
|
168
|
+
"66061",
|
|
169
|
+
"18910",
|
|
170
|
+
"84385",
|
|
171
|
+
"23600",
|
|
172
|
+
"19975",
|
|
173
|
+
"27883",
|
|
174
|
+
"65719",
|
|
175
|
+
"19933",
|
|
176
|
+
"32085",
|
|
177
|
+
"49731",
|
|
178
|
+
"40473",
|
|
179
|
+
"27190",
|
|
180
|
+
"46192",
|
|
181
|
+
"23949",
|
|
182
|
+
"44738",
|
|
183
|
+
"12436",
|
|
184
|
+
"64794",
|
|
185
|
+
"68741",
|
|
186
|
+
"15333",
|
|
187
|
+
"24282",
|
|
188
|
+
"49085",
|
|
189
|
+
"31844",
|
|
190
|
+
"71156",
|
|
191
|
+
"48441",
|
|
192
|
+
"17100",
|
|
193
|
+
"98207",
|
|
194
|
+
"44982",
|
|
195
|
+
"20277",
|
|
196
|
+
"71496",
|
|
197
|
+
"96299",
|
|
198
|
+
"37583",
|
|
199
|
+
"22206",
|
|
200
|
+
"89174",
|
|
201
|
+
"30589",
|
|
202
|
+
"61924",
|
|
203
|
+
"53079",
|
|
204
|
+
"10976",
|
|
205
|
+
"13104",
|
|
206
|
+
"42794",
|
|
207
|
+
"54772",
|
|
208
|
+
"15809",
|
|
209
|
+
"56434",
|
|
210
|
+
"39975",
|
|
211
|
+
"13874",
|
|
212
|
+
"30753",
|
|
213
|
+
"77598",
|
|
214
|
+
"78229",
|
|
215
|
+
"59478",
|
|
216
|
+
"12345",
|
|
217
|
+
"55547",
|
|
218
|
+
"57422",
|
|
219
|
+
"42600",
|
|
220
|
+
"79444",
|
|
221
|
+
"29074",
|
|
222
|
+
"29752",
|
|
223
|
+
"21676",
|
|
224
|
+
"32096",
|
|
225
|
+
"43044",
|
|
226
|
+
"39383",
|
|
227
|
+
"37296",
|
|
228
|
+
"36295",
|
|
229
|
+
"63077",
|
|
230
|
+
"16572",
|
|
231
|
+
"31275",
|
|
232
|
+
"18701",
|
|
233
|
+
"40197",
|
|
234
|
+
"48242",
|
|
235
|
+
"27219",
|
|
236
|
+
"49865",
|
|
237
|
+
"84175",
|
|
238
|
+
"30446",
|
|
239
|
+
"25165",
|
|
240
|
+
"13807",
|
|
241
|
+
"72142",
|
|
242
|
+
"70499",
|
|
243
|
+
"70464",
|
|
244
|
+
"71429",
|
|
245
|
+
"18111",
|
|
246
|
+
"70857",
|
|
247
|
+
"29545",
|
|
248
|
+
"36425",
|
|
249
|
+
"52706",
|
|
250
|
+
"36194",
|
|
251
|
+
"42963",
|
|
252
|
+
"75068",
|
|
253
|
+
"47921",
|
|
254
|
+
"74763",
|
|
255
|
+
"90990",
|
|
256
|
+
"89456",
|
|
257
|
+
"62073",
|
|
258
|
+
"88397",
|
|
259
|
+
"73963",
|
|
260
|
+
"75885",
|
|
261
|
+
"62657",
|
|
262
|
+
"12530",
|
|
263
|
+
"81146",
|
|
264
|
+
"57434",
|
|
265
|
+
"25099",
|
|
266
|
+
"41429",
|
|
267
|
+
"98441",
|
|
268
|
+
"48713",
|
|
269
|
+
"52552",
|
|
270
|
+
"31667",
|
|
271
|
+
"14072",
|
|
272
|
+
"13903",
|
|
273
|
+
"44709",
|
|
274
|
+
"85429",
|
|
275
|
+
"58017",
|
|
276
|
+
"38295",
|
|
277
|
+
"44875",
|
|
278
|
+
"73541",
|
|
279
|
+
"30091",
|
|
280
|
+
"12707",
|
|
281
|
+
"23762",
|
|
282
|
+
"62258",
|
|
283
|
+
"33247",
|
|
284
|
+
"78722",
|
|
285
|
+
"77431",
|
|
286
|
+
"14510",
|
|
287
|
+
"35656",
|
|
288
|
+
"72428",
|
|
289
|
+
"92082",
|
|
290
|
+
"35267",
|
|
291
|
+
"43759",
|
|
292
|
+
"24354",
|
|
293
|
+
"90952",
|
|
294
|
+
"11512",
|
|
295
|
+
"21242",
|
|
296
|
+
"22579",
|
|
297
|
+
"56114",
|
|
298
|
+
"32339",
|
|
299
|
+
"52282",
|
|
300
|
+
"41791",
|
|
301
|
+
"24484",
|
|
302
|
+
"95020",
|
|
303
|
+
"28408",
|
|
304
|
+
"99710",
|
|
305
|
+
"11899",
|
|
306
|
+
"43344",
|
|
307
|
+
"72915",
|
|
308
|
+
"27644",
|
|
309
|
+
"62708",
|
|
310
|
+
"74479",
|
|
311
|
+
"17177",
|
|
312
|
+
"32619",
|
|
313
|
+
"12351",
|
|
314
|
+
"91339",
|
|
315
|
+
"31169",
|
|
316
|
+
"57081",
|
|
317
|
+
"53522",
|
|
318
|
+
"16712",
|
|
319
|
+
"34419",
|
|
320
|
+
"71779",
|
|
321
|
+
"44187",
|
|
322
|
+
"46206",
|
|
323
|
+
"96099",
|
|
324
|
+
"61910",
|
|
325
|
+
"53664",
|
|
326
|
+
"12295",
|
|
327
|
+
"31837",
|
|
328
|
+
"33096",
|
|
329
|
+
"10813",
|
|
330
|
+
"63048",
|
|
331
|
+
"31732",
|
|
332
|
+
"79118",
|
|
333
|
+
"73084",
|
|
334
|
+
"72783",
|
|
335
|
+
"84952",
|
|
336
|
+
"46965",
|
|
337
|
+
"77956",
|
|
338
|
+
"39815",
|
|
339
|
+
"32311",
|
|
340
|
+
"75329",
|
|
341
|
+
"48156",
|
|
342
|
+
"30826",
|
|
343
|
+
"49661",
|
|
344
|
+
"13736",
|
|
345
|
+
"92076",
|
|
346
|
+
"74865",
|
|
347
|
+
"88149",
|
|
348
|
+
"92397",
|
|
349
|
+
"52777",
|
|
350
|
+
"68453",
|
|
351
|
+
"32012",
|
|
352
|
+
"21222",
|
|
353
|
+
"52721",
|
|
354
|
+
"24626",
|
|
355
|
+
"18210",
|
|
356
|
+
"42177",
|
|
357
|
+
"91791",
|
|
358
|
+
"75251",
|
|
359
|
+
"82075",
|
|
360
|
+
"44372",
|
|
361
|
+
"45542",
|
|
362
|
+
"20609",
|
|
363
|
+
"60115",
|
|
364
|
+
"17362",
|
|
365
|
+
"22750",
|
|
366
|
+
"90434",
|
|
367
|
+
"31852",
|
|
368
|
+
"54071",
|
|
369
|
+
"33762",
|
|
370
|
+
"14705",
|
|
371
|
+
"40718",
|
|
372
|
+
"56433",
|
|
373
|
+
"30996",
|
|
374
|
+
"40657",
|
|
375
|
+
"49056",
|
|
376
|
+
"23585",
|
|
377
|
+
"66455",
|
|
378
|
+
"41021",
|
|
379
|
+
"74736",
|
|
380
|
+
"72151",
|
|
381
|
+
"37007",
|
|
382
|
+
"21729",
|
|
383
|
+
"60177",
|
|
384
|
+
"84558",
|
|
385
|
+
"59027",
|
|
386
|
+
"93855",
|
|
387
|
+
"60022",
|
|
388
|
+
"86443",
|
|
389
|
+
"19541",
|
|
390
|
+
"86886",
|
|
391
|
+
"30532",
|
|
392
|
+
"39062",
|
|
393
|
+
"48532",
|
|
394
|
+
"34713",
|
|
395
|
+
"52077",
|
|
396
|
+
"22564",
|
|
397
|
+
"64638",
|
|
398
|
+
"15273",
|
|
399
|
+
"31677",
|
|
400
|
+
"36138",
|
|
401
|
+
"62367",
|
|
402
|
+
"60261",
|
|
403
|
+
"80213",
|
|
404
|
+
"42818",
|
|
405
|
+
"25113",
|
|
406
|
+
"72378",
|
|
407
|
+
"69802",
|
|
408
|
+
"69096",
|
|
409
|
+
"55443",
|
|
410
|
+
"28820",
|
|
411
|
+
"13848",
|
|
412
|
+
"78258",
|
|
413
|
+
"37490",
|
|
414
|
+
"30556",
|
|
415
|
+
"77380",
|
|
416
|
+
"28447",
|
|
417
|
+
"44550",
|
|
418
|
+
"26791",
|
|
419
|
+
"70609",
|
|
420
|
+
"82182",
|
|
421
|
+
"33306",
|
|
422
|
+
"43224",
|
|
423
|
+
"22322",
|
|
424
|
+
"86959",
|
|
425
|
+
"68519",
|
|
426
|
+
"14308",
|
|
427
|
+
"46501",
|
|
428
|
+
"81131",
|
|
429
|
+
"34056",
|
|
430
|
+
"61991",
|
|
431
|
+
"19896",
|
|
432
|
+
"87804",
|
|
433
|
+
"65774",
|
|
434
|
+
"92564",
|
|
435
|
+
]
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def duckdb_impl(run_config: RunConfig) -> str:
|
|
439
|
+
"""Query 8."""
|
|
440
|
+
return f"""
|
|
441
|
+
-- start query 8 in stream 0 using template query8.tpl
|
|
442
|
+
SELECT s_store_name,
|
|
443
|
+
Sum(ss_net_profit)
|
|
444
|
+
FROM store_sales,
|
|
445
|
+
date_dim,
|
|
446
|
+
store,
|
|
447
|
+
(SELECT ca_zip
|
|
448
|
+
FROM (SELECT Substr(ca_zip, 1, 5) ca_zip
|
|
449
|
+
FROM customer_address
|
|
450
|
+
WHERE Substr(ca_zip, 1, 5) IN ({", ".join(f"'{zip}'" for zip in TARGET_ZIPS)})
|
|
451
|
+
INTERSECT
|
|
452
|
+
SELECT ca_zip
|
|
453
|
+
FROM (SELECT Substr(ca_zip, 1, 5) ca_zip,
|
|
454
|
+
Count(*) cnt
|
|
455
|
+
FROM customer_address,
|
|
456
|
+
customer
|
|
457
|
+
WHERE ca_address_sk = c_current_addr_sk
|
|
458
|
+
AND c_preferred_cust_flag = 'Y'
|
|
459
|
+
GROUP BY ca_zip
|
|
460
|
+
HAVING Count(*) > 10)A1)A2) V1
|
|
461
|
+
WHERE ss_store_sk = s_store_sk
|
|
462
|
+
AND ss_sold_date_sk = d_date_sk
|
|
463
|
+
AND d_qoy = {TARGET_QUARTER}
|
|
464
|
+
AND d_year = {TARGET_YEAR}
|
|
465
|
+
AND ( Substr(s_zip, 1, 2) = Substr(V1.ca_zip, 1, 2) )
|
|
466
|
+
GROUP BY s_store_name
|
|
467
|
+
ORDER BY s_store_name
|
|
468
|
+
LIMIT 100;
|
|
469
|
+
|
|
470
|
+
"""
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
|
|
474
|
+
"""Query 8."""
|
|
475
|
+
# Load required tables
|
|
476
|
+
store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
|
|
477
|
+
date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
|
|
478
|
+
store = get_data(run_config.dataset_path, "store", run_config.suffix)
|
|
479
|
+
customer_address = get_data(
|
|
480
|
+
run_config.dataset_path, "customer_address", run_config.suffix
|
|
481
|
+
)
|
|
482
|
+
customer = get_data(run_config.dataset_path, "customer", run_config.suffix)
|
|
483
|
+
|
|
484
|
+
# First subquery: get first 5 chars of zip codes from target list
|
|
485
|
+
target_zips_5char = (
|
|
486
|
+
customer_address.select(pl.col("ca_zip").str.slice(0, 5).alias("ca_zip"))
|
|
487
|
+
.filter(pl.col("ca_zip").is_in(TARGET_ZIPS))
|
|
488
|
+
.unique()
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
# Second subquery: preferred customers by zip with count > 10
|
|
492
|
+
preferred_customer_zips = (
|
|
493
|
+
customer_address.join(
|
|
494
|
+
customer, left_on="ca_address_sk", right_on="c_current_addr_sk"
|
|
495
|
+
)
|
|
496
|
+
.filter(pl.col("c_preferred_cust_flag") == "Y")
|
|
497
|
+
.group_by(pl.col("ca_zip").str.slice(0, 5).alias("ca_zip"))
|
|
498
|
+
.agg(pl.len().alias("cnt"))
|
|
499
|
+
.filter(pl.col("cnt") > 10)
|
|
500
|
+
.select("ca_zip")
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
# INTERSECT: Get common zip codes between target list and preferred customer zips
|
|
504
|
+
intersect_zips = target_zips_5char.join(
|
|
505
|
+
preferred_customer_zips, on="ca_zip", how="inner"
|
|
506
|
+
).select("ca_zip")
|
|
507
|
+
|
|
508
|
+
# Main query: join store_sales with date_dim, store, and filter by zip codes
|
|
509
|
+
return (
|
|
510
|
+
store_sales.join(date_dim, left_on="ss_sold_date_sk", right_on="d_date_sk")
|
|
511
|
+
.join(store, left_on="ss_store_sk", right_on="s_store_sk")
|
|
512
|
+
.join(
|
|
513
|
+
intersect_zips,
|
|
514
|
+
left_on=pl.col("s_zip").str.slice(0, 2),
|
|
515
|
+
right_on=pl.col("ca_zip").str.slice(0, 2),
|
|
516
|
+
)
|
|
517
|
+
.filter(pl.col("d_qoy") == TARGET_QUARTER)
|
|
518
|
+
.filter(pl.col("d_year") == TARGET_YEAR)
|
|
519
|
+
.group_by("s_store_name")
|
|
520
|
+
.agg(pl.col("ss_net_profit").sum().alias("sum"))
|
|
521
|
+
.sort("s_store_name", nulls_last=True)
|
|
522
|
+
.limit(100)
|
|
523
|
+
.select([pl.col("s_store_name"), pl.col("sum").alias("sum(ss_net_profit)")])
|
|
524
|
+
)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Query 9."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
import polars as pl
|
|
11
|
+
|
|
12
|
+
from cudf_polars.experimental.benchmarks.utils import get_data
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from cudf_polars.experimental.benchmarks.utils import RunConfig
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def duckdb_impl(run_config: RunConfig) -> str:
|
|
19
|
+
"""Query 9."""
|
|
20
|
+
return """
|
|
21
|
+
-- start query 9 in stream 0 using template query9.tpl
|
|
22
|
+
SELECT CASE
|
|
23
|
+
WHEN (SELECT Count(*)
|
|
24
|
+
FROM store_sales
|
|
25
|
+
WHERE ss_quantity BETWEEN 1 AND 20) > 3672 THEN
|
|
26
|
+
(SELECT Avg(ss_ext_list_price)
|
|
27
|
+
FROM store_sales
|
|
28
|
+
WHERE
|
|
29
|
+
ss_quantity BETWEEN 1 AND 20)
|
|
30
|
+
ELSE (SELECT Avg(ss_net_profit)
|
|
31
|
+
FROM store_sales
|
|
32
|
+
WHERE ss_quantity BETWEEN 1 AND 20)
|
|
33
|
+
END bucket1,
|
|
34
|
+
CASE
|
|
35
|
+
WHEN (SELECT Count(*)
|
|
36
|
+
FROM store_sales
|
|
37
|
+
WHERE ss_quantity BETWEEN 21 AND 40) > 3392 THEN
|
|
38
|
+
(SELECT Avg(ss_ext_list_price)
|
|
39
|
+
FROM store_sales
|
|
40
|
+
WHERE
|
|
41
|
+
ss_quantity BETWEEN 21 AND 40)
|
|
42
|
+
ELSE (SELECT Avg(ss_net_profit)
|
|
43
|
+
FROM store_sales
|
|
44
|
+
WHERE ss_quantity BETWEEN 21 AND 40)
|
|
45
|
+
END bucket2,
|
|
46
|
+
CASE
|
|
47
|
+
WHEN (SELECT Count(*)
|
|
48
|
+
FROM store_sales
|
|
49
|
+
WHERE ss_quantity BETWEEN 41 AND 60) > 32784 THEN
|
|
50
|
+
(SELECT Avg(ss_ext_list_price)
|
|
51
|
+
FROM store_sales
|
|
52
|
+
WHERE
|
|
53
|
+
ss_quantity BETWEEN 41 AND 60)
|
|
54
|
+
ELSE (SELECT Avg(ss_net_profit)
|
|
55
|
+
FROM store_sales
|
|
56
|
+
WHERE ss_quantity BETWEEN 41 AND 60)
|
|
57
|
+
END bucket3,
|
|
58
|
+
CASE
|
|
59
|
+
WHEN (SELECT Count(*)
|
|
60
|
+
FROM store_sales
|
|
61
|
+
WHERE ss_quantity BETWEEN 61 AND 80) > 26032 THEN
|
|
62
|
+
(SELECT Avg(ss_ext_list_price)
|
|
63
|
+
FROM store_sales
|
|
64
|
+
WHERE
|
|
65
|
+
ss_quantity BETWEEN 61 AND 80)
|
|
66
|
+
ELSE (SELECT Avg(ss_net_profit)
|
|
67
|
+
FROM store_sales
|
|
68
|
+
WHERE ss_quantity BETWEEN 61 AND 80)
|
|
69
|
+
END bucket4,
|
|
70
|
+
CASE
|
|
71
|
+
WHEN (SELECT Count(*)
|
|
72
|
+
FROM store_sales
|
|
73
|
+
WHERE ss_quantity BETWEEN 81 AND 100) > 23982 THEN
|
|
74
|
+
(SELECT Avg(ss_ext_list_price)
|
|
75
|
+
FROM store_sales
|
|
76
|
+
WHERE
|
|
77
|
+
ss_quantity BETWEEN 81 AND 100)
|
|
78
|
+
ELSE (SELECT Avg(ss_net_profit)
|
|
79
|
+
FROM store_sales
|
|
80
|
+
WHERE ss_quantity BETWEEN 81 AND 100)
|
|
81
|
+
END bucket5
|
|
82
|
+
FROM reason
|
|
83
|
+
WHERE r_reason_sk = 1;
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
|
|
88
|
+
"""Query 9."""
|
|
89
|
+
# Load required tables
|
|
90
|
+
store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
|
|
91
|
+
reason = get_data(run_config.dataset_path, "reason", run_config.suffix)
|
|
92
|
+
|
|
93
|
+
# Define bucket configurations: (min_qty, max_qty, count_threshold)
|
|
94
|
+
buckets = [
|
|
95
|
+
(1, 20, 3672),
|
|
96
|
+
(21, 40, 3392),
|
|
97
|
+
(41, 60, 32784),
|
|
98
|
+
(61, 80, 26032),
|
|
99
|
+
(81, 100, 23982),
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
# Calculate each bucket summary
|
|
103
|
+
bucket_stats = []
|
|
104
|
+
for i, (min_qty, max_qty, _) in enumerate(buckets, 1):
|
|
105
|
+
# Compute count, avg(ss_ext_list_price), avg(ss_net_profit) for each quantity range
|
|
106
|
+
stats = store_sales.filter(
|
|
107
|
+
pl.col("ss_quantity").is_between(min_qty, max_qty, closed="both")
|
|
108
|
+
).select(
|
|
109
|
+
[
|
|
110
|
+
pl.len().alias(f"count_{i}"),
|
|
111
|
+
pl.col("ss_ext_list_price").mean().alias(f"avg_price_{i}"),
|
|
112
|
+
pl.col("ss_net_profit").mean().alias(f"avg_profit_{i}"),
|
|
113
|
+
]
|
|
114
|
+
)
|
|
115
|
+
bucket_stats.append(stats)
|
|
116
|
+
|
|
117
|
+
# Combine all bucket summaries into one row
|
|
118
|
+
combined_stats = pl.concat(bucket_stats, how="horizontal")
|
|
119
|
+
|
|
120
|
+
# Select appropriate value per bucket based on count threshold
|
|
121
|
+
bucket_values = []
|
|
122
|
+
for i, (_, _, threshold) in enumerate(buckets, 1):
|
|
123
|
+
bucket = (
|
|
124
|
+
pl.when(pl.col(f"count_{i}") > threshold)
|
|
125
|
+
.then(pl.col(f"avg_price_{i}"))
|
|
126
|
+
.otherwise(pl.col(f"avg_profit_{i}"))
|
|
127
|
+
.alias(f"bucket{i}")
|
|
128
|
+
)
|
|
129
|
+
bucket_values.append(bucket)
|
|
130
|
+
|
|
131
|
+
# Create result DataFrame with one row (using reason table as in SQL)
|
|
132
|
+
return (
|
|
133
|
+
reason.filter(pl.col("r_reason_sk") == 1)
|
|
134
|
+
.join(combined_stats, how="cross")
|
|
135
|
+
.select(bucket_values)
|
|
136
|
+
.limit(1)
|
|
137
|
+
)
|