cudf-polars-cu13 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. cudf_polars/GIT_COMMIT +1 -0
  2. cudf_polars/VERSION +1 -0
  3. cudf_polars/__init__.py +28 -0
  4. cudf_polars/_version.py +21 -0
  5. cudf_polars/callback.py +318 -0
  6. cudf_polars/containers/__init__.py +13 -0
  7. cudf_polars/containers/column.py +495 -0
  8. cudf_polars/containers/dataframe.py +361 -0
  9. cudf_polars/containers/datatype.py +137 -0
  10. cudf_polars/dsl/__init__.py +8 -0
  11. cudf_polars/dsl/expr.py +66 -0
  12. cudf_polars/dsl/expressions/__init__.py +8 -0
  13. cudf_polars/dsl/expressions/aggregation.py +226 -0
  14. cudf_polars/dsl/expressions/base.py +272 -0
  15. cudf_polars/dsl/expressions/binaryop.py +120 -0
  16. cudf_polars/dsl/expressions/boolean.py +326 -0
  17. cudf_polars/dsl/expressions/datetime.py +271 -0
  18. cudf_polars/dsl/expressions/literal.py +97 -0
  19. cudf_polars/dsl/expressions/rolling.py +643 -0
  20. cudf_polars/dsl/expressions/selection.py +74 -0
  21. cudf_polars/dsl/expressions/slicing.py +46 -0
  22. cudf_polars/dsl/expressions/sorting.py +85 -0
  23. cudf_polars/dsl/expressions/string.py +1002 -0
  24. cudf_polars/dsl/expressions/struct.py +137 -0
  25. cudf_polars/dsl/expressions/ternary.py +49 -0
  26. cudf_polars/dsl/expressions/unary.py +517 -0
  27. cudf_polars/dsl/ir.py +2607 -0
  28. cudf_polars/dsl/nodebase.py +164 -0
  29. cudf_polars/dsl/to_ast.py +359 -0
  30. cudf_polars/dsl/tracing.py +16 -0
  31. cudf_polars/dsl/translate.py +939 -0
  32. cudf_polars/dsl/traversal.py +224 -0
  33. cudf_polars/dsl/utils/__init__.py +8 -0
  34. cudf_polars/dsl/utils/aggregations.py +481 -0
  35. cudf_polars/dsl/utils/groupby.py +98 -0
  36. cudf_polars/dsl/utils/naming.py +34 -0
  37. cudf_polars/dsl/utils/replace.py +61 -0
  38. cudf_polars/dsl/utils/reshape.py +74 -0
  39. cudf_polars/dsl/utils/rolling.py +121 -0
  40. cudf_polars/dsl/utils/windows.py +192 -0
  41. cudf_polars/experimental/__init__.py +8 -0
  42. cudf_polars/experimental/base.py +386 -0
  43. cudf_polars/experimental/benchmarks/__init__.py +4 -0
  44. cudf_polars/experimental/benchmarks/pdsds.py +220 -0
  45. cudf_polars/experimental/benchmarks/pdsds_queries/__init__.py +4 -0
  46. cudf_polars/experimental/benchmarks/pdsds_queries/q1.py +88 -0
  47. cudf_polars/experimental/benchmarks/pdsds_queries/q10.py +225 -0
  48. cudf_polars/experimental/benchmarks/pdsds_queries/q2.py +244 -0
  49. cudf_polars/experimental/benchmarks/pdsds_queries/q3.py +65 -0
  50. cudf_polars/experimental/benchmarks/pdsds_queries/q4.py +359 -0
  51. cudf_polars/experimental/benchmarks/pdsds_queries/q5.py +462 -0
  52. cudf_polars/experimental/benchmarks/pdsds_queries/q6.py +92 -0
  53. cudf_polars/experimental/benchmarks/pdsds_queries/q7.py +79 -0
  54. cudf_polars/experimental/benchmarks/pdsds_queries/q8.py +524 -0
  55. cudf_polars/experimental/benchmarks/pdsds_queries/q9.py +137 -0
  56. cudf_polars/experimental/benchmarks/pdsh.py +814 -0
  57. cudf_polars/experimental/benchmarks/utils.py +832 -0
  58. cudf_polars/experimental/dask_registers.py +200 -0
  59. cudf_polars/experimental/dispatch.py +156 -0
  60. cudf_polars/experimental/distinct.py +197 -0
  61. cudf_polars/experimental/explain.py +157 -0
  62. cudf_polars/experimental/expressions.py +590 -0
  63. cudf_polars/experimental/groupby.py +327 -0
  64. cudf_polars/experimental/io.py +943 -0
  65. cudf_polars/experimental/join.py +391 -0
  66. cudf_polars/experimental/parallel.py +423 -0
  67. cudf_polars/experimental/repartition.py +69 -0
  68. cudf_polars/experimental/scheduler.py +155 -0
  69. cudf_polars/experimental/select.py +188 -0
  70. cudf_polars/experimental/shuffle.py +354 -0
  71. cudf_polars/experimental/sort.py +609 -0
  72. cudf_polars/experimental/spilling.py +151 -0
  73. cudf_polars/experimental/statistics.py +795 -0
  74. cudf_polars/experimental/utils.py +169 -0
  75. cudf_polars/py.typed +0 -0
  76. cudf_polars/testing/__init__.py +8 -0
  77. cudf_polars/testing/asserts.py +448 -0
  78. cudf_polars/testing/io.py +122 -0
  79. cudf_polars/testing/plugin.py +236 -0
  80. cudf_polars/typing/__init__.py +219 -0
  81. cudf_polars/utils/__init__.py +8 -0
  82. cudf_polars/utils/config.py +741 -0
  83. cudf_polars/utils/conversion.py +40 -0
  84. cudf_polars/utils/dtypes.py +118 -0
  85. cudf_polars/utils/sorting.py +53 -0
  86. cudf_polars/utils/timer.py +39 -0
  87. cudf_polars/utils/versions.py +27 -0
  88. cudf_polars_cu13-25.10.0.dist-info/METADATA +136 -0
  89. cudf_polars_cu13-25.10.0.dist-info/RECORD +92 -0
  90. cudf_polars_cu13-25.10.0.dist-info/WHEEL +5 -0
  91. cudf_polars_cu13-25.10.0.dist-info/licenses/LICENSE +201 -0
  92. cudf_polars_cu13-25.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,524 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Query 8."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ import polars as pl
11
+
12
+ from cudf_polars.experimental.benchmarks.utils import get_data
13
+
14
+ if TYPE_CHECKING:
15
+ from cudf_polars.experimental.benchmarks.utils import RunConfig
16
+
17
+ """
18
+ warning!, one filter removed to prevent zero row results
19
+
20
+ note: alternate zip code
21
+ '70069', # 93 preferred customers
22
+ '60069', # 87 preferred customers
23
+ '78877', # 87 preferred customers
24
+ '60169', # 87 preferred customers
25
+ '68252', # 86 preferred customers
26
+ '71087', # 84 preferred customers
27
+ '71711', # 84 preferred customers
28
+ '68877', # 84 preferred customers
29
+ '55709', # 82 preferred customers
30
+ """
31
+
32
+ TARGET_YEAR = 1996
33
+ TARGET_QUARTER = 2
34
+ TARGET_ZIPS = [
35
+ "67436",
36
+ "26121",
37
+ "38443",
38
+ "63157",
39
+ "68856",
40
+ "19485",
41
+ "86425",
42
+ "26741",
43
+ "70991",
44
+ "60899",
45
+ "63573",
46
+ "47556",
47
+ "56193",
48
+ "93314",
49
+ "87827",
50
+ "62017",
51
+ "85067",
52
+ "95390",
53
+ "48091",
54
+ "10261",
55
+ "81845",
56
+ "41790",
57
+ "42853",
58
+ "24675",
59
+ "12840",
60
+ "60065",
61
+ "84430",
62
+ "57451",
63
+ "24021",
64
+ "91735",
65
+ "75335",
66
+ "71935",
67
+ "34482",
68
+ "56943",
69
+ "70695",
70
+ "52147",
71
+ "56251",
72
+ "28411",
73
+ "86653",
74
+ "23005",
75
+ "22478",
76
+ "29031",
77
+ "34398",
78
+ "15365",
79
+ "42460",
80
+ "33337",
81
+ "59433",
82
+ "73943",
83
+ "72477",
84
+ "74081",
85
+ "74430",
86
+ "64605",
87
+ "39006",
88
+ "11226",
89
+ "49057",
90
+ "97308",
91
+ "42663",
92
+ "18187",
93
+ "19768",
94
+ "43454",
95
+ "32147",
96
+ "76637",
97
+ "51975",
98
+ "11181",
99
+ "45630",
100
+ "33129",
101
+ "45995",
102
+ "64386",
103
+ "55522",
104
+ "26697",
105
+ "20963",
106
+ "35154",
107
+ "64587",
108
+ "49752",
109
+ "66386",
110
+ "30586",
111
+ "59286",
112
+ "13177",
113
+ "66646",
114
+ "84195",
115
+ "74316",
116
+ "36853",
117
+ "32927",
118
+ "12469",
119
+ "11904",
120
+ "36269",
121
+ "17724",
122
+ "55346",
123
+ "12595",
124
+ "53988",
125
+ "65439",
126
+ "28015",
127
+ "63268",
128
+ "73590",
129
+ "29216",
130
+ "82575",
131
+ "69267",
132
+ "13805",
133
+ "91678",
134
+ "79460",
135
+ "94152",
136
+ "14961",
137
+ "15419",
138
+ "48277",
139
+ "62588",
140
+ "55493",
141
+ "28360",
142
+ "14152",
143
+ "55225",
144
+ "18007",
145
+ "53705",
146
+ "56573",
147
+ "80245",
148
+ "71769",
149
+ "57348",
150
+ "36845",
151
+ "13039",
152
+ "17270",
153
+ "22363",
154
+ "83474",
155
+ "25294",
156
+ "43269",
157
+ "77666",
158
+ "15488",
159
+ "99146",
160
+ "64441",
161
+ "43338",
162
+ "38736",
163
+ "62754",
164
+ "48556",
165
+ "86057",
166
+ "23090",
167
+ "38114",
168
+ "66061",
169
+ "18910",
170
+ "84385",
171
+ "23600",
172
+ "19975",
173
+ "27883",
174
+ "65719",
175
+ "19933",
176
+ "32085",
177
+ "49731",
178
+ "40473",
179
+ "27190",
180
+ "46192",
181
+ "23949",
182
+ "44738",
183
+ "12436",
184
+ "64794",
185
+ "68741",
186
+ "15333",
187
+ "24282",
188
+ "49085",
189
+ "31844",
190
+ "71156",
191
+ "48441",
192
+ "17100",
193
+ "98207",
194
+ "44982",
195
+ "20277",
196
+ "71496",
197
+ "96299",
198
+ "37583",
199
+ "22206",
200
+ "89174",
201
+ "30589",
202
+ "61924",
203
+ "53079",
204
+ "10976",
205
+ "13104",
206
+ "42794",
207
+ "54772",
208
+ "15809",
209
+ "56434",
210
+ "39975",
211
+ "13874",
212
+ "30753",
213
+ "77598",
214
+ "78229",
215
+ "59478",
216
+ "12345",
217
+ "55547",
218
+ "57422",
219
+ "42600",
220
+ "79444",
221
+ "29074",
222
+ "29752",
223
+ "21676",
224
+ "32096",
225
+ "43044",
226
+ "39383",
227
+ "37296",
228
+ "36295",
229
+ "63077",
230
+ "16572",
231
+ "31275",
232
+ "18701",
233
+ "40197",
234
+ "48242",
235
+ "27219",
236
+ "49865",
237
+ "84175",
238
+ "30446",
239
+ "25165",
240
+ "13807",
241
+ "72142",
242
+ "70499",
243
+ "70464",
244
+ "71429",
245
+ "18111",
246
+ "70857",
247
+ "29545",
248
+ "36425",
249
+ "52706",
250
+ "36194",
251
+ "42963",
252
+ "75068",
253
+ "47921",
254
+ "74763",
255
+ "90990",
256
+ "89456",
257
+ "62073",
258
+ "88397",
259
+ "73963",
260
+ "75885",
261
+ "62657",
262
+ "12530",
263
+ "81146",
264
+ "57434",
265
+ "25099",
266
+ "41429",
267
+ "98441",
268
+ "48713",
269
+ "52552",
270
+ "31667",
271
+ "14072",
272
+ "13903",
273
+ "44709",
274
+ "85429",
275
+ "58017",
276
+ "38295",
277
+ "44875",
278
+ "73541",
279
+ "30091",
280
+ "12707",
281
+ "23762",
282
+ "62258",
283
+ "33247",
284
+ "78722",
285
+ "77431",
286
+ "14510",
287
+ "35656",
288
+ "72428",
289
+ "92082",
290
+ "35267",
291
+ "43759",
292
+ "24354",
293
+ "90952",
294
+ "11512",
295
+ "21242",
296
+ "22579",
297
+ "56114",
298
+ "32339",
299
+ "52282",
300
+ "41791",
301
+ "24484",
302
+ "95020",
303
+ "28408",
304
+ "99710",
305
+ "11899",
306
+ "43344",
307
+ "72915",
308
+ "27644",
309
+ "62708",
310
+ "74479",
311
+ "17177",
312
+ "32619",
313
+ "12351",
314
+ "91339",
315
+ "31169",
316
+ "57081",
317
+ "53522",
318
+ "16712",
319
+ "34419",
320
+ "71779",
321
+ "44187",
322
+ "46206",
323
+ "96099",
324
+ "61910",
325
+ "53664",
326
+ "12295",
327
+ "31837",
328
+ "33096",
329
+ "10813",
330
+ "63048",
331
+ "31732",
332
+ "79118",
333
+ "73084",
334
+ "72783",
335
+ "84952",
336
+ "46965",
337
+ "77956",
338
+ "39815",
339
+ "32311",
340
+ "75329",
341
+ "48156",
342
+ "30826",
343
+ "49661",
344
+ "13736",
345
+ "92076",
346
+ "74865",
347
+ "88149",
348
+ "92397",
349
+ "52777",
350
+ "68453",
351
+ "32012",
352
+ "21222",
353
+ "52721",
354
+ "24626",
355
+ "18210",
356
+ "42177",
357
+ "91791",
358
+ "75251",
359
+ "82075",
360
+ "44372",
361
+ "45542",
362
+ "20609",
363
+ "60115",
364
+ "17362",
365
+ "22750",
366
+ "90434",
367
+ "31852",
368
+ "54071",
369
+ "33762",
370
+ "14705",
371
+ "40718",
372
+ "56433",
373
+ "30996",
374
+ "40657",
375
+ "49056",
376
+ "23585",
377
+ "66455",
378
+ "41021",
379
+ "74736",
380
+ "72151",
381
+ "37007",
382
+ "21729",
383
+ "60177",
384
+ "84558",
385
+ "59027",
386
+ "93855",
387
+ "60022",
388
+ "86443",
389
+ "19541",
390
+ "86886",
391
+ "30532",
392
+ "39062",
393
+ "48532",
394
+ "34713",
395
+ "52077",
396
+ "22564",
397
+ "64638",
398
+ "15273",
399
+ "31677",
400
+ "36138",
401
+ "62367",
402
+ "60261",
403
+ "80213",
404
+ "42818",
405
+ "25113",
406
+ "72378",
407
+ "69802",
408
+ "69096",
409
+ "55443",
410
+ "28820",
411
+ "13848",
412
+ "78258",
413
+ "37490",
414
+ "30556",
415
+ "77380",
416
+ "28447",
417
+ "44550",
418
+ "26791",
419
+ "70609",
420
+ "82182",
421
+ "33306",
422
+ "43224",
423
+ "22322",
424
+ "86959",
425
+ "68519",
426
+ "14308",
427
+ "46501",
428
+ "81131",
429
+ "34056",
430
+ "61991",
431
+ "19896",
432
+ "87804",
433
+ "65774",
434
+ "92564",
435
+ ]
436
+
437
+
438
+ def duckdb_impl(run_config: RunConfig) -> str:
439
+ """Query 8."""
440
+ return f"""
441
+ -- start query 8 in stream 0 using template query8.tpl
442
+ SELECT s_store_name,
443
+ Sum(ss_net_profit)
444
+ FROM store_sales,
445
+ date_dim,
446
+ store,
447
+ (SELECT ca_zip
448
+ FROM (SELECT Substr(ca_zip, 1, 5) ca_zip
449
+ FROM customer_address
450
+ WHERE Substr(ca_zip, 1, 5) IN ({", ".join(f"'{zip}'" for zip in TARGET_ZIPS)})
451
+ INTERSECT
452
+ SELECT ca_zip
453
+ FROM (SELECT Substr(ca_zip, 1, 5) ca_zip,
454
+ Count(*) cnt
455
+ FROM customer_address,
456
+ customer
457
+ WHERE ca_address_sk = c_current_addr_sk
458
+ AND c_preferred_cust_flag = 'Y'
459
+ GROUP BY ca_zip
460
+ HAVING Count(*) > 10)A1)A2) V1
461
+ WHERE ss_store_sk = s_store_sk
462
+ AND ss_sold_date_sk = d_date_sk
463
+ AND d_qoy = {TARGET_QUARTER}
464
+ AND d_year = {TARGET_YEAR}
465
+ AND ( Substr(s_zip, 1, 2) = Substr(V1.ca_zip, 1, 2) )
466
+ GROUP BY s_store_name
467
+ ORDER BY s_store_name
468
+ LIMIT 100;
469
+
470
+ """
471
+
472
+
473
+ def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
474
+ """Query 8."""
475
+ # Load required tables
476
+ store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
477
+ date_dim = get_data(run_config.dataset_path, "date_dim", run_config.suffix)
478
+ store = get_data(run_config.dataset_path, "store", run_config.suffix)
479
+ customer_address = get_data(
480
+ run_config.dataset_path, "customer_address", run_config.suffix
481
+ )
482
+ customer = get_data(run_config.dataset_path, "customer", run_config.suffix)
483
+
484
+ # First subquery: get first 5 chars of zip codes from target list
485
+ target_zips_5char = (
486
+ customer_address.select(pl.col("ca_zip").str.slice(0, 5).alias("ca_zip"))
487
+ .filter(pl.col("ca_zip").is_in(TARGET_ZIPS))
488
+ .unique()
489
+ )
490
+
491
+ # Second subquery: preferred customers by zip with count > 10
492
+ preferred_customer_zips = (
493
+ customer_address.join(
494
+ customer, left_on="ca_address_sk", right_on="c_current_addr_sk"
495
+ )
496
+ .filter(pl.col("c_preferred_cust_flag") == "Y")
497
+ .group_by(pl.col("ca_zip").str.slice(0, 5).alias("ca_zip"))
498
+ .agg(pl.len().alias("cnt"))
499
+ .filter(pl.col("cnt") > 10)
500
+ .select("ca_zip")
501
+ )
502
+
503
+ # INTERSECT: Get common zip codes between target list and preferred customer zips
504
+ intersect_zips = target_zips_5char.join(
505
+ preferred_customer_zips, on="ca_zip", how="inner"
506
+ ).select("ca_zip")
507
+
508
+ # Main query: join store_sales with date_dim, store, and filter by zip codes
509
+ return (
510
+ store_sales.join(date_dim, left_on="ss_sold_date_sk", right_on="d_date_sk")
511
+ .join(store, left_on="ss_store_sk", right_on="s_store_sk")
512
+ .join(
513
+ intersect_zips,
514
+ left_on=pl.col("s_zip").str.slice(0, 2),
515
+ right_on=pl.col("ca_zip").str.slice(0, 2),
516
+ )
517
+ .filter(pl.col("d_qoy") == TARGET_QUARTER)
518
+ .filter(pl.col("d_year") == TARGET_YEAR)
519
+ .group_by("s_store_name")
520
+ .agg(pl.col("ss_net_profit").sum().alias("sum"))
521
+ .sort("s_store_name", nulls_last=True)
522
+ .limit(100)
523
+ .select([pl.col("s_store_name"), pl.col("sum").alias("sum(ss_net_profit)")])
524
+ )
@@ -0,0 +1,137 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Query 9."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ import polars as pl
11
+
12
+ from cudf_polars.experimental.benchmarks.utils import get_data
13
+
14
+ if TYPE_CHECKING:
15
+ from cudf_polars.experimental.benchmarks.utils import RunConfig
16
+
17
+
18
+ def duckdb_impl(run_config: RunConfig) -> str:
19
+ """Query 9."""
20
+ return """
21
+ -- start query 9 in stream 0 using template query9.tpl
22
+ SELECT CASE
23
+ WHEN (SELECT Count(*)
24
+ FROM store_sales
25
+ WHERE ss_quantity BETWEEN 1 AND 20) > 3672 THEN
26
+ (SELECT Avg(ss_ext_list_price)
27
+ FROM store_sales
28
+ WHERE
29
+ ss_quantity BETWEEN 1 AND 20)
30
+ ELSE (SELECT Avg(ss_net_profit)
31
+ FROM store_sales
32
+ WHERE ss_quantity BETWEEN 1 AND 20)
33
+ END bucket1,
34
+ CASE
35
+ WHEN (SELECT Count(*)
36
+ FROM store_sales
37
+ WHERE ss_quantity BETWEEN 21 AND 40) > 3392 THEN
38
+ (SELECT Avg(ss_ext_list_price)
39
+ FROM store_sales
40
+ WHERE
41
+ ss_quantity BETWEEN 21 AND 40)
42
+ ELSE (SELECT Avg(ss_net_profit)
43
+ FROM store_sales
44
+ WHERE ss_quantity BETWEEN 21 AND 40)
45
+ END bucket2,
46
+ CASE
47
+ WHEN (SELECT Count(*)
48
+ FROM store_sales
49
+ WHERE ss_quantity BETWEEN 41 AND 60) > 32784 THEN
50
+ (SELECT Avg(ss_ext_list_price)
51
+ FROM store_sales
52
+ WHERE
53
+ ss_quantity BETWEEN 41 AND 60)
54
+ ELSE (SELECT Avg(ss_net_profit)
55
+ FROM store_sales
56
+ WHERE ss_quantity BETWEEN 41 AND 60)
57
+ END bucket3,
58
+ CASE
59
+ WHEN (SELECT Count(*)
60
+ FROM store_sales
61
+ WHERE ss_quantity BETWEEN 61 AND 80) > 26032 THEN
62
+ (SELECT Avg(ss_ext_list_price)
63
+ FROM store_sales
64
+ WHERE
65
+ ss_quantity BETWEEN 61 AND 80)
66
+ ELSE (SELECT Avg(ss_net_profit)
67
+ FROM store_sales
68
+ WHERE ss_quantity BETWEEN 61 AND 80)
69
+ END bucket4,
70
+ CASE
71
+ WHEN (SELECT Count(*)
72
+ FROM store_sales
73
+ WHERE ss_quantity BETWEEN 81 AND 100) > 23982 THEN
74
+ (SELECT Avg(ss_ext_list_price)
75
+ FROM store_sales
76
+ WHERE
77
+ ss_quantity BETWEEN 81 AND 100)
78
+ ELSE (SELECT Avg(ss_net_profit)
79
+ FROM store_sales
80
+ WHERE ss_quantity BETWEEN 81 AND 100)
81
+ END bucket5
82
+ FROM reason
83
+ WHERE r_reason_sk = 1;
84
+ """
85
+
86
+
87
+ def polars_impl(run_config: RunConfig) -> pl.LazyFrame:
88
+ """Query 9."""
89
+ # Load required tables
90
+ store_sales = get_data(run_config.dataset_path, "store_sales", run_config.suffix)
91
+ reason = get_data(run_config.dataset_path, "reason", run_config.suffix)
92
+
93
+ # Define bucket configurations: (min_qty, max_qty, count_threshold)
94
+ buckets = [
95
+ (1, 20, 3672),
96
+ (21, 40, 3392),
97
+ (41, 60, 32784),
98
+ (61, 80, 26032),
99
+ (81, 100, 23982),
100
+ ]
101
+
102
+ # Calculate each bucket summary
103
+ bucket_stats = []
104
+ for i, (min_qty, max_qty, _) in enumerate(buckets, 1):
105
+ # Compute count, avg(ss_ext_list_price), avg(ss_net_profit) for each quantity range
106
+ stats = store_sales.filter(
107
+ pl.col("ss_quantity").is_between(min_qty, max_qty, closed="both")
108
+ ).select(
109
+ [
110
+ pl.len().alias(f"count_{i}"),
111
+ pl.col("ss_ext_list_price").mean().alias(f"avg_price_{i}"),
112
+ pl.col("ss_net_profit").mean().alias(f"avg_profit_{i}"),
113
+ ]
114
+ )
115
+ bucket_stats.append(stats)
116
+
117
+ # Combine all bucket summaries into one row
118
+ combined_stats = pl.concat(bucket_stats, how="horizontal")
119
+
120
+ # Select appropriate value per bucket based on count threshold
121
+ bucket_values = []
122
+ for i, (_, _, threshold) in enumerate(buckets, 1):
123
+ bucket = (
124
+ pl.when(pl.col(f"count_{i}") > threshold)
125
+ .then(pl.col(f"avg_price_{i}"))
126
+ .otherwise(pl.col(f"avg_profit_{i}"))
127
+ .alias(f"bucket{i}")
128
+ )
129
+ bucket_values.append(bucket)
130
+
131
+ # Create result DataFrame with one row (using reason table as in SQL)
132
+ return (
133
+ reason.filter(pl.col("r_reason_sk") == 1)
134
+ .join(combined_stats, how="cross")
135
+ .select(bucket_values)
136
+ .limit(1)
137
+ )