cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. cudf_polars/GIT_COMMIT +1 -1
  2. cudf_polars/VERSION +1 -1
  3. cudf_polars/callback.py +60 -15
  4. cudf_polars/containers/column.py +137 -77
  5. cudf_polars/containers/dataframe.py +123 -34
  6. cudf_polars/containers/datatype.py +134 -13
  7. cudf_polars/dsl/expr.py +0 -2
  8. cudf_polars/dsl/expressions/aggregation.py +80 -28
  9. cudf_polars/dsl/expressions/binaryop.py +34 -14
  10. cudf_polars/dsl/expressions/boolean.py +110 -37
  11. cudf_polars/dsl/expressions/datetime.py +59 -30
  12. cudf_polars/dsl/expressions/literal.py +11 -5
  13. cudf_polars/dsl/expressions/rolling.py +460 -119
  14. cudf_polars/dsl/expressions/selection.py +9 -8
  15. cudf_polars/dsl/expressions/slicing.py +1 -1
  16. cudf_polars/dsl/expressions/string.py +256 -114
  17. cudf_polars/dsl/expressions/struct.py +19 -7
  18. cudf_polars/dsl/expressions/ternary.py +33 -3
  19. cudf_polars/dsl/expressions/unary.py +126 -64
  20. cudf_polars/dsl/ir.py +1053 -350
  21. cudf_polars/dsl/to_ast.py +30 -13
  22. cudf_polars/dsl/tracing.py +194 -0
  23. cudf_polars/dsl/translate.py +307 -107
  24. cudf_polars/dsl/utils/aggregations.py +43 -30
  25. cudf_polars/dsl/utils/reshape.py +14 -2
  26. cudf_polars/dsl/utils/rolling.py +12 -8
  27. cudf_polars/dsl/utils/windows.py +35 -20
  28. cudf_polars/experimental/base.py +55 -2
  29. cudf_polars/experimental/benchmarks/pdsds.py +12 -126
  30. cudf_polars/experimental/benchmarks/pdsh.py +792 -2
  31. cudf_polars/experimental/benchmarks/utils.py +596 -39
  32. cudf_polars/experimental/dask_registers.py +47 -20
  33. cudf_polars/experimental/dispatch.py +9 -3
  34. cudf_polars/experimental/distinct.py +2 -0
  35. cudf_polars/experimental/explain.py +15 -2
  36. cudf_polars/experimental/expressions.py +30 -15
  37. cudf_polars/experimental/groupby.py +25 -4
  38. cudf_polars/experimental/io.py +156 -124
  39. cudf_polars/experimental/join.py +53 -23
  40. cudf_polars/experimental/parallel.py +68 -19
  41. cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
  42. cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
  43. cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
  44. cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
  45. cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
  46. cudf_polars/experimental/rapidsmpf/core.py +488 -0
  47. cudf_polars/experimental/rapidsmpf/dask.py +172 -0
  48. cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
  49. cudf_polars/experimental/rapidsmpf/io.py +696 -0
  50. cudf_polars/experimental/rapidsmpf/join.py +322 -0
  51. cudf_polars/experimental/rapidsmpf/lower.py +74 -0
  52. cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
  53. cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
  54. cudf_polars/experimental/rapidsmpf/union.py +115 -0
  55. cudf_polars/experimental/rapidsmpf/utils.py +374 -0
  56. cudf_polars/experimental/repartition.py +9 -2
  57. cudf_polars/experimental/select.py +177 -14
  58. cudf_polars/experimental/shuffle.py +46 -12
  59. cudf_polars/experimental/sort.py +100 -26
  60. cudf_polars/experimental/spilling.py +1 -1
  61. cudf_polars/experimental/statistics.py +24 -5
  62. cudf_polars/experimental/utils.py +25 -7
  63. cudf_polars/testing/asserts.py +13 -8
  64. cudf_polars/testing/io.py +2 -1
  65. cudf_polars/testing/plugin.py +93 -17
  66. cudf_polars/typing/__init__.py +86 -32
  67. cudf_polars/utils/config.py +473 -58
  68. cudf_polars/utils/cuda_stream.py +70 -0
  69. cudf_polars/utils/versions.py +5 -4
  70. cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
  71. cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
  72. {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
  73. cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
  74. cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
  75. {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
  76. {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
@@ -22,8 +22,11 @@ import polars as pl
22
22
 
23
23
  with contextlib.suppress(ImportError):
24
24
  from cudf_polars.experimental.benchmarks.utils import (
25
+ RunConfig,
25
26
  get_data,
27
+ run_duckdb,
26
28
  run_polars,
29
+ run_validate,
27
30
  )
28
31
 
29
32
 
@@ -607,7 +610,7 @@ class PDSHQueries:
607
610
  q1 = (
608
611
  part.filter(pl.col("p_brand") == var1)
609
612
  .filter(pl.col("p_container") == var2)
610
- .join(lineitem, how="left", left_on="p_partkey", right_on="l_partkey")
613
+ .join(lineitem, how="inner", left_on="p_partkey", right_on="l_partkey")
611
614
  )
612
615
 
613
616
  return (
@@ -810,5 +813,792 @@ class PDSHQueries:
810
813
  )
811
814
 
812
815
 
816
+ class PDSHDuckDBQueries:
817
+ """PDS-H DuckDB query definitions."""
818
+
819
+ name: str = "pdsh"
820
+
821
+ @staticmethod
822
+ def q1(run_config: RunConfig) -> str:
823
+ """Query 1."""
824
+ return """
825
+ select
826
+ l_returnflag,
827
+ l_linestatus,
828
+ sum(l_quantity) as sum_qty,
829
+ sum(l_extendedprice) as sum_base_price,
830
+ sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
831
+ sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
832
+ avg(l_quantity) as avg_qty,
833
+ avg(l_extendedprice) as avg_price,
834
+ avg(l_discount) as avg_disc,
835
+ count(*) as count_order
836
+ from
837
+ lineitem
838
+ where
839
+ l_shipdate <= DATE '1998-09-02'
840
+ group by
841
+ l_returnflag,
842
+ l_linestatus
843
+ order by
844
+ l_returnflag,
845
+ l_linestatus
846
+ """
847
+
848
+ @staticmethod
849
+ def q2(run_config: RunConfig) -> str:
850
+ """Query 2."""
851
+ return """
852
+ select
853
+ s_acctbal,
854
+ s_name,
855
+ n_name,
856
+ p_partkey,
857
+ p_mfgr,
858
+ s_address,
859
+ s_phone,
860
+ s_comment
861
+ from
862
+ part,
863
+ supplier,
864
+ partsupp,
865
+ nation,
866
+ region
867
+ where
868
+ p_partkey = ps_partkey
869
+ and s_suppkey = ps_suppkey
870
+ and p_size = 15
871
+ and p_type like '%BRASS'
872
+ and s_nationkey = n_nationkey
873
+ and n_regionkey = r_regionkey
874
+ and r_name = 'EUROPE'
875
+ and ps_supplycost = (
876
+ select
877
+ min(ps_supplycost)
878
+ from
879
+ partsupp,
880
+ supplier,
881
+ nation,
882
+ region
883
+ where
884
+ p_partkey = ps_partkey
885
+ and s_suppkey = ps_suppkey
886
+ and s_nationkey = n_nationkey
887
+ and n_regionkey = r_regionkey
888
+ and r_name = 'EUROPE'
889
+ )
890
+ order by
891
+ s_acctbal desc,
892
+ n_name,
893
+ s_name,
894
+ p_partkey
895
+ limit 100
896
+ """
897
+
898
+ @staticmethod
899
+ def q3(run_config: RunConfig) -> str:
900
+ """Query 3."""
901
+ return """
902
+ select
903
+ l_orderkey,
904
+ sum(l_extendedprice * (1 - l_discount)) as revenue,
905
+ o_orderdate,
906
+ o_shippriority
907
+ from
908
+ customer,
909
+ orders,
910
+ lineitem
911
+ where
912
+ c_mktsegment = 'BUILDING'
913
+ and c_custkey = o_custkey
914
+ and l_orderkey = o_orderkey
915
+ and o_orderdate < '1995-03-15'
916
+ and l_shipdate > '1995-03-15'
917
+ group by
918
+ l_orderkey,
919
+ o_orderdate,
920
+ o_shippriority
921
+ order by
922
+ revenue desc,
923
+ o_orderdate
924
+ limit 10
925
+ """
926
+
927
+ @staticmethod
928
+ def q4(run_config: RunConfig) -> str:
929
+ """Query 4."""
930
+ return """
931
+ select
932
+ o_orderpriority,
933
+ count(*) as order_count
934
+ from
935
+ orders
936
+ where
937
+ o_orderdate >= timestamp '1993-07-01'
938
+ and o_orderdate < timestamp '1993-07-01' + interval '3' month
939
+ and exists (
940
+ select
941
+ *
942
+ from
943
+ lineitem
944
+ where
945
+ l_orderkey = o_orderkey
946
+ and l_commitdate < l_receiptdate
947
+ )
948
+ group by
949
+ o_orderpriority
950
+ order by
951
+ o_orderpriority
952
+ """
953
+
954
+ @staticmethod
955
+ def q5(run_config: RunConfig) -> str:
956
+ """Query 5."""
957
+ return """
958
+ select
959
+ n_name,
960
+ sum(l_extendedprice * (1 - l_discount)) as revenue
961
+ from
962
+ customer,
963
+ orders,
964
+ lineitem,
965
+ supplier,
966
+ nation,
967
+ region
968
+ where
969
+ c_custkey = o_custkey
970
+ and l_orderkey = o_orderkey
971
+ and l_suppkey = s_suppkey
972
+ and c_nationkey = s_nationkey
973
+ and s_nationkey = n_nationkey
974
+ and n_regionkey = r_regionkey
975
+ and r_name = 'ASIA'
976
+ and o_orderdate >= timestamp '1994-01-01'
977
+ and o_orderdate < timestamp '1994-01-01' + interval '1' year
978
+ group by
979
+ n_name
980
+ order by
981
+ revenue desc
982
+ """
983
+
984
+ @staticmethod
985
+ def q6(run_config: RunConfig) -> str:
986
+ """Query 6."""
987
+ return """
988
+ select
989
+ sum(l_extendedprice * l_discount) as revenue
990
+ from
991
+ lineitem
992
+ where
993
+ l_shipdate >= timestamp '1994-01-01'
994
+ and l_shipdate < timestamp '1994-01-01' + interval '1' year
995
+ and l_discount between .06 - 0.01 and .06 + 0.01
996
+ and l_quantity < 24
997
+ """
998
+
999
+ @staticmethod
1000
+ def q7(run_config: RunConfig) -> str:
1001
+ """Query 7."""
1002
+ return """
1003
+ select
1004
+ supp_nation,
1005
+ cust_nation,
1006
+ l_year,
1007
+ sum(volume) as revenue
1008
+ from
1009
+ (
1010
+ select
1011
+ n1.n_name as supp_nation,
1012
+ n2.n_name as cust_nation,
1013
+ year(l_shipdate) as l_year,
1014
+ l_extendedprice * (1 - l_discount) as volume
1015
+ from
1016
+ supplier,
1017
+ lineitem,
1018
+ orders,
1019
+ customer,
1020
+ nation n1,
1021
+ nation n2
1022
+ where
1023
+ s_suppkey = l_suppkey
1024
+ and o_orderkey = l_orderkey
1025
+ and c_custkey = o_custkey
1026
+ and s_nationkey = n1.n_nationkey
1027
+ and c_nationkey = n2.n_nationkey
1028
+ and (
1029
+ (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
1030
+ or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
1031
+ )
1032
+ and l_shipdate between timestamp '1995-01-01' and timestamp '1996-12-31'
1033
+ ) as shipping
1034
+ group by
1035
+ supp_nation,
1036
+ cust_nation,
1037
+ l_year
1038
+ order by
1039
+ supp_nation,
1040
+ cust_nation,
1041
+ l_year
1042
+ """
1043
+
1044
+ @staticmethod
1045
+ def q8(run_config: RunConfig) -> str:
1046
+ """Query 8."""
1047
+ return """
1048
+ select
1049
+ o_year,
1050
+ round(
1051
+ sum(case
1052
+ when nation = 'BRAZIL' then volume
1053
+ else 0
1054
+ end) / sum(volume)
1055
+ , 2) as mkt_share
1056
+ from
1057
+ (
1058
+ select
1059
+ extract(year from o_orderdate) as o_year,
1060
+ l_extendedprice * (1 - l_discount) as volume,
1061
+ n2.n_name as nation
1062
+ from
1063
+ part,
1064
+ supplier,
1065
+ lineitem,
1066
+ orders,
1067
+ customer,
1068
+ nation n1,
1069
+ nation n2,
1070
+ region
1071
+ where
1072
+ p_partkey = l_partkey
1073
+ and s_suppkey = l_suppkey
1074
+ and l_orderkey = o_orderkey
1075
+ and o_custkey = c_custkey
1076
+ and c_nationkey = n1.n_nationkey
1077
+ and n1.n_regionkey = r_regionkey
1078
+ and r_name = 'AMERICA'
1079
+ and s_nationkey = n2.n_nationkey
1080
+ and o_orderdate between timestamp '1995-01-01' and timestamp '1996-12-31'
1081
+ and p_type = 'ECONOMY ANODIZED STEEL'
1082
+ ) as all_nations
1083
+ group by
1084
+ o_year
1085
+ order by
1086
+ o_year
1087
+ """
1088
+
1089
+ @staticmethod
1090
+ def q9(run_config: RunConfig) -> str:
1091
+ """Query 9."""
1092
+ return """
1093
+ select
1094
+ nation,
1095
+ o_year,
1096
+ round(sum(amount), 2) as sum_profit
1097
+ from
1098
+ (
1099
+ select
1100
+ n_name as nation,
1101
+ year(o_orderdate) as o_year,
1102
+ l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
1103
+ from
1104
+ part,
1105
+ supplier,
1106
+ lineitem,
1107
+ partsupp,
1108
+ orders,
1109
+ nation
1110
+ where
1111
+ s_suppkey = l_suppkey
1112
+ and ps_suppkey = l_suppkey
1113
+ and ps_partkey = l_partkey
1114
+ and p_partkey = l_partkey
1115
+ and o_orderkey = l_orderkey
1116
+ and s_nationkey = n_nationkey
1117
+ and p_name like '%green%'
1118
+ ) as profit
1119
+ group by
1120
+ nation,
1121
+ o_year
1122
+ order by
1123
+ nation,
1124
+ o_year desc
1125
+ """
1126
+
1127
+ @staticmethod
1128
+ def q10(run_config: RunConfig) -> str:
1129
+ """Query 10."""
1130
+ return """
1131
+ select
1132
+ c_custkey,
1133
+ c_name,
1134
+ round(sum(l_extendedprice * (1 - l_discount)), 2) as revenue,
1135
+ c_acctbal,
1136
+ n_name,
1137
+ c_address,
1138
+ c_phone,
1139
+ c_comment
1140
+ from
1141
+ customer,
1142
+ orders,
1143
+ lineitem,
1144
+ nation
1145
+ where
1146
+ c_custkey = o_custkey
1147
+ and l_orderkey = o_orderkey
1148
+ and o_orderdate >= date '1993-10-01'
1149
+ and o_orderdate < date '1993-10-01' + interval '3' month
1150
+ and l_returnflag = 'R'
1151
+ and c_nationkey = n_nationkey
1152
+ group by
1153
+ c_custkey,
1154
+ c_name,
1155
+ c_acctbal,
1156
+ c_phone,
1157
+ n_name,
1158
+ c_address,
1159
+ c_comment
1160
+ order by
1161
+ revenue desc
1162
+ limit 20
1163
+ """
1164
+
1165
+ @staticmethod
1166
+ def q11(run_config: RunConfig) -> str:
1167
+ """Query 11."""
1168
+ return f"""
1169
+ select
1170
+ ps_partkey,
1171
+ round(sum(ps_supplycost * ps_availqty), 2) as value
1172
+ from
1173
+ partsupp, supplier, nation
1174
+ where
1175
+ ps_suppkey = s_suppkey
1176
+ and s_nationkey = n_nationkey
1177
+ and n_name = 'GERMANY'
1178
+ group by
1179
+ ps_partkey
1180
+ having
1181
+ sum(ps_supplycost * ps_availqty) > (
1182
+ select
1183
+ sum(ps_supplycost * ps_availqty) * {0.0001 / run_config.scale_factor}
1184
+ from
1185
+ partsupp, supplier, nation
1186
+ where
1187
+ ps_suppkey = s_suppkey
1188
+ and s_nationkey = n_nationkey
1189
+ and n_name = 'GERMANY'
1190
+ )
1191
+ order by
1192
+ value desc
1193
+ """
1194
+
1195
+ @staticmethod
1196
+ def q12(run_config: RunConfig) -> str:
1197
+ """Query 12."""
1198
+ return """
1199
+ select
1200
+ l_shipmode,
1201
+ sum(case
1202
+ when o_orderpriority = '1-URGENT'
1203
+ or o_orderpriority = '2-HIGH'
1204
+ then 1
1205
+ else 0
1206
+ end) as high_line_count,
1207
+ sum(case
1208
+ when o_orderpriority <> '1-URGENT'
1209
+ and o_orderpriority <> '2-HIGH'
1210
+ then 1
1211
+ else 0
1212
+ end) as low_line_count
1213
+ from
1214
+ orders,
1215
+ lineitem
1216
+ where
1217
+ o_orderkey = l_orderkey
1218
+ and l_shipmode in ('MAIL', 'SHIP')
1219
+ and l_commitdate < l_receiptdate
1220
+ and l_shipdate < l_commitdate
1221
+ and l_receiptdate >= date '1994-01-01'
1222
+ and l_receiptdate < date '1994-01-01' + interval '1' year
1223
+ group by
1224
+ l_shipmode
1225
+ order by
1226
+ l_shipmode
1227
+ """
1228
+
1229
+ @staticmethod
1230
+ def q13(run_config: RunConfig) -> str:
1231
+ """Query 13."""
1232
+ return """
1233
+ select
1234
+ c_count, count(*) as custdist
1235
+ from (
1236
+ select
1237
+ c_custkey,
1238
+ count(o_orderkey)
1239
+ from
1240
+ customer left outer join orders on
1241
+ c_custkey = o_custkey
1242
+ and o_comment not like '%special%requests%'
1243
+ group by
1244
+ c_custkey
1245
+ )as c_orders (c_custkey, c_count)
1246
+ group by
1247
+ c_count
1248
+ order by
1249
+ custdist desc,
1250
+ c_count desc
1251
+ """
1252
+
1253
+ @staticmethod
1254
+ def q14(run_config: RunConfig) -> str:
1255
+ """Query 14."""
1256
+ return """
1257
+ select
1258
+ round(100.00 * sum(case
1259
+ when p_type like 'PROMO%'
1260
+ then l_extendedprice * (1 - l_discount)
1261
+ else 0
1262
+ end) / sum(l_extendedprice * (1 - l_discount)), 2) as promo_revenue
1263
+ from
1264
+ lineitem,
1265
+ part
1266
+ where
1267
+ l_partkey = p_partkey
1268
+ and l_shipdate >= date '1995-09-01'
1269
+ and l_shipdate < date '1995-09-01' + interval '1' month
1270
+ """
1271
+
1272
+ @staticmethod
1273
+ def q15(run_config: RunConfig) -> str:
1274
+ """Query 15."""
1275
+ return """
1276
+ with revenue (supplier_no, total_revenue) as (
1277
+ select
1278
+ l_suppkey,
1279
+ sum(l_extendedprice * (1 - l_discount))
1280
+ from
1281
+ lineitem
1282
+ where
1283
+ l_shipdate >= date '1996-01-01'
1284
+ and l_shipdate < date '1996-01-01' + interval '3' month
1285
+ group by
1286
+ l_suppkey
1287
+ )
1288
+ select
1289
+ s_suppkey,
1290
+ s_name,
1291
+ s_address,
1292
+ s_phone,
1293
+ total_revenue
1294
+ from
1295
+ supplier,
1296
+ revenue
1297
+ where
1298
+ s_suppkey = supplier_no
1299
+ and total_revenue = (
1300
+ select
1301
+ max(total_revenue)
1302
+ from
1303
+ revenue
1304
+ )
1305
+ order by
1306
+ s_suppkey
1307
+ """
1308
+
1309
+ @staticmethod
1310
+ def q16(run_config: RunConfig) -> str:
1311
+ """Query 16."""
1312
+ return """
1313
+ select
1314
+ p_brand,
1315
+ p_type,
1316
+ p_size,
1317
+ count(distinct ps_suppkey) as supplier_cnt
1318
+ from
1319
+ partsupp,
1320
+ part
1321
+ where
1322
+ p_partkey = ps_partkey
1323
+ and p_brand <> 'Brand#45'
1324
+ and p_type not like 'MEDIUM POLISHED%'
1325
+ and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
1326
+ and ps_suppkey not in (
1327
+ select
1328
+ s_suppkey
1329
+ from
1330
+ supplier
1331
+ where
1332
+ s_comment like '%Customer%Complaints%'
1333
+ )
1334
+ group by
1335
+ p_brand,
1336
+ p_type,
1337
+ p_size
1338
+ order by
1339
+ supplier_cnt desc,
1340
+ p_brand,
1341
+ p_type,
1342
+ p_size
1343
+ """
1344
+
1345
+ @staticmethod
1346
+ def q17(run_config: RunConfig) -> str:
1347
+ """Query 17."""
1348
+ return """
1349
+ select
1350
+ round(sum(l_extendedprice) / 7.0, 2) as avg_yearly
1351
+ from
1352
+ lineitem,
1353
+ part
1354
+ where
1355
+ p_partkey = l_partkey
1356
+ and p_brand = 'Brand#23'
1357
+ and p_container = 'MED BOX'
1358
+ and l_quantity < (
1359
+ select
1360
+ 0.2 * avg(l_quantity)
1361
+ from
1362
+ lineitem
1363
+ where
1364
+ l_partkey = p_partkey
1365
+ )
1366
+ """
1367
+
1368
+ @staticmethod
1369
+ def q18(run_config: RunConfig) -> str:
1370
+ """Query 18."""
1371
+ return """
1372
+ select
1373
+ c_name,
1374
+ c_custkey,
1375
+ o_orderkey,
1376
+ o_orderdate as o_orderdat,
1377
+ o_totalprice,
1378
+ sum(l_quantity) as col6
1379
+ from
1380
+ customer,
1381
+ orders,
1382
+ lineitem
1383
+ where
1384
+ o_orderkey in (
1385
+ select
1386
+ l_orderkey
1387
+ from
1388
+ lineitem
1389
+ group by
1390
+ l_orderkey having
1391
+ sum(l_quantity) > 300
1392
+ )
1393
+ and c_custkey = o_custkey
1394
+ and o_orderkey = l_orderkey
1395
+ group by
1396
+ c_name,
1397
+ c_custkey,
1398
+ o_orderkey,
1399
+ o_orderdate,
1400
+ o_totalprice
1401
+ order by
1402
+ o_totalprice desc,
1403
+ o_orderdate
1404
+ limit 100
1405
+ """
1406
+
1407
+ @staticmethod
1408
+ def q19(run_config: RunConfig) -> str:
1409
+ """Query 19."""
1410
+ return """
1411
+ select
1412
+ round(sum(l_extendedprice* (1 - l_discount)), 2) as revenue
1413
+ from
1414
+ lineitem,
1415
+ part
1416
+ where
1417
+ (
1418
+ p_partkey = l_partkey
1419
+ and p_brand = 'Brand#12'
1420
+ and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
1421
+ and l_quantity >= 1 and l_quantity <= 1 + 10
1422
+ and p_size between 1 and 5
1423
+ and l_shipmode in ('AIR', 'AIR REG')
1424
+ and l_shipinstruct = 'DELIVER IN PERSON'
1425
+ )
1426
+ or
1427
+ (
1428
+ p_partkey = l_partkey
1429
+ and p_brand = 'Brand#23'
1430
+ and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
1431
+ and l_quantity >= 10 and l_quantity <= 20
1432
+ and p_size between 1 and 10
1433
+ and l_shipmode in ('AIR', 'AIR REG')
1434
+ and l_shipinstruct = 'DELIVER IN PERSON'
1435
+ )
1436
+ or
1437
+ (
1438
+ p_partkey = l_partkey
1439
+ and p_brand = 'Brand#34'
1440
+ and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
1441
+ and l_quantity >= 20 and l_quantity <= 30
1442
+ and p_size between 1 and 15
1443
+ and l_shipmode in ('AIR', 'AIR REG')
1444
+ and l_shipinstruct = 'DELIVER IN PERSON'
1445
+ )
1446
+ """
1447
+
1448
+ @staticmethod
1449
+ def q20(run_config: RunConfig) -> str:
1450
+ """Query 20."""
1451
+ return """
1452
+ select
1453
+ s_name,
1454
+ s_address
1455
+ from
1456
+ supplier,
1457
+ nation
1458
+ where
1459
+ s_suppkey in (
1460
+ select
1461
+ ps_suppkey
1462
+ from
1463
+ partsupp
1464
+ where
1465
+ ps_partkey in (
1466
+ select
1467
+ p_partkey
1468
+ from
1469
+ part
1470
+ where
1471
+ p_name like 'forest%'
1472
+ )
1473
+ and ps_availqty > (
1474
+ select
1475
+ 0.5 * sum(l_quantity)
1476
+ from
1477
+ lineitem
1478
+ where
1479
+ l_partkey = ps_partkey
1480
+ and l_suppkey = ps_suppkey
1481
+ and l_shipdate >= date '1994-01-01'
1482
+ and l_shipdate < date '1994-01-01' + interval '1' year
1483
+ )
1484
+ )
1485
+ and s_nationkey = n_nationkey
1486
+ and n_name = 'CANADA'
1487
+ order by
1488
+ s_name
1489
+ """
1490
+
1491
+ @staticmethod
1492
+ def q21(run_config: RunConfig) -> str:
1493
+ """Query 21."""
1494
+ return """
1495
+ select
1496
+ s_name,
1497
+ count(*) as numwait
1498
+ from
1499
+ supplier,
1500
+ lineitem l1,
1501
+ orders,
1502
+ nation
1503
+ where
1504
+ s_suppkey = l1.l_suppkey
1505
+ and o_orderkey = l1.l_orderkey
1506
+ and o_orderstatus = 'F'
1507
+ and l1.l_receiptdate > l1.l_commitdate
1508
+ and exists (
1509
+ select
1510
+ *
1511
+ from
1512
+ lineitem l2
1513
+ where
1514
+ l2.l_orderkey = l1.l_orderkey
1515
+ and l2.l_suppkey <> l1.l_suppkey
1516
+ )
1517
+ and not exists (
1518
+ select
1519
+ *
1520
+ from
1521
+ lineitem l3
1522
+ where
1523
+ l3.l_orderkey = l1.l_orderkey
1524
+ and l3.l_suppkey <> l1.l_suppkey
1525
+ and l3.l_receiptdate > l3.l_commitdate
1526
+ )
1527
+ and s_nationkey = n_nationkey
1528
+ and n_name = 'SAUDI ARABIA'
1529
+ group by
1530
+ s_name
1531
+ order by
1532
+ numwait desc,
1533
+ s_name
1534
+ limit 100
1535
+ """
1536
+
1537
+ @staticmethod
1538
+ def q22(run_config: RunConfig) -> str:
1539
+ """Query 22."""
1540
+ return """
1541
+ select
1542
+ cntrycode,
1543
+ count(*) as numcust,
1544
+ sum(c_acctbal) as totacctbal
1545
+ from (
1546
+ select
1547
+ substring(c_phone from 1 for 2) as cntrycode,
1548
+ c_acctbal
1549
+ from
1550
+ customer
1551
+ where
1552
+ substring(c_phone from 1 for 2) in
1553
+ (13, 31, 23, 29, 30, 18, 17)
1554
+ and c_acctbal > (
1555
+ select
1556
+ avg(c_acctbal)
1557
+ from
1558
+ customer
1559
+ where
1560
+ c_acctbal > 0.00
1561
+ and substring (c_phone from 1 for 2) in
1562
+ (13, 31, 23, 29, 30, 18, 17)
1563
+ )
1564
+ and not exists (
1565
+ select
1566
+ *
1567
+ from
1568
+ orders
1569
+ where
1570
+ o_custkey = c_custkey
1571
+ )
1572
+ ) as custsale
1573
+ group by
1574
+ cntrycode
1575
+ order by
1576
+ cntrycode
1577
+ """
1578
+
1579
+
813
1580
  if __name__ == "__main__":
814
- run_polars(PDSHQueries)
1581
+ import argparse
1582
+
1583
+ parser = argparse.ArgumentParser(description="Run PDS-H benchmarks.")
1584
+ parser.add_argument(
1585
+ "--engine",
1586
+ choices=["polars", "duckdb", "validate"],
1587
+ default="polars",
1588
+ help="Which engine to use for executing the benchmarks or to validate results.",
1589
+ )
1590
+ args, extra_args = parser.parse_known_args()
1591
+
1592
+ if args.engine == "polars":
1593
+ run_polars(PDSHQueries, extra_args, num_queries=22)
1594
+ elif args.engine == "duckdb":
1595
+ run_duckdb(PDSHDuckDBQueries, extra_args, num_queries=22)
1596
+ elif args.engine == "validate":
1597
+ run_validate(
1598
+ PDSHQueries,
1599
+ PDSHDuckDBQueries,
1600
+ extra_args,
1601
+ num_queries=22,
1602
+ check_dtypes=True,
1603
+ check_column_order=True,
1604
+ )