cudf-polars-cu13 25.10.0__py3-none-any.whl → 26.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cudf_polars/GIT_COMMIT +1 -1
- cudf_polars/VERSION +1 -1
- cudf_polars/callback.py +60 -15
- cudf_polars/containers/column.py +137 -77
- cudf_polars/containers/dataframe.py +123 -34
- cudf_polars/containers/datatype.py +134 -13
- cudf_polars/dsl/expr.py +0 -2
- cudf_polars/dsl/expressions/aggregation.py +80 -28
- cudf_polars/dsl/expressions/binaryop.py +34 -14
- cudf_polars/dsl/expressions/boolean.py +110 -37
- cudf_polars/dsl/expressions/datetime.py +59 -30
- cudf_polars/dsl/expressions/literal.py +11 -5
- cudf_polars/dsl/expressions/rolling.py +460 -119
- cudf_polars/dsl/expressions/selection.py +9 -8
- cudf_polars/dsl/expressions/slicing.py +1 -1
- cudf_polars/dsl/expressions/string.py +256 -114
- cudf_polars/dsl/expressions/struct.py +19 -7
- cudf_polars/dsl/expressions/ternary.py +33 -3
- cudf_polars/dsl/expressions/unary.py +126 -64
- cudf_polars/dsl/ir.py +1053 -350
- cudf_polars/dsl/to_ast.py +30 -13
- cudf_polars/dsl/tracing.py +194 -0
- cudf_polars/dsl/translate.py +307 -107
- cudf_polars/dsl/utils/aggregations.py +43 -30
- cudf_polars/dsl/utils/reshape.py +14 -2
- cudf_polars/dsl/utils/rolling.py +12 -8
- cudf_polars/dsl/utils/windows.py +35 -20
- cudf_polars/experimental/base.py +55 -2
- cudf_polars/experimental/benchmarks/pdsds.py +12 -126
- cudf_polars/experimental/benchmarks/pdsh.py +792 -2
- cudf_polars/experimental/benchmarks/utils.py +596 -39
- cudf_polars/experimental/dask_registers.py +47 -20
- cudf_polars/experimental/dispatch.py +9 -3
- cudf_polars/experimental/distinct.py +2 -0
- cudf_polars/experimental/explain.py +15 -2
- cudf_polars/experimental/expressions.py +30 -15
- cudf_polars/experimental/groupby.py +25 -4
- cudf_polars/experimental/io.py +156 -124
- cudf_polars/experimental/join.py +53 -23
- cudf_polars/experimental/parallel.py +68 -19
- cudf_polars/experimental/rapidsmpf/__init__.py +8 -0
- cudf_polars/experimental/rapidsmpf/collectives/__init__.py +9 -0
- cudf_polars/experimental/rapidsmpf/collectives/allgather.py +90 -0
- cudf_polars/experimental/rapidsmpf/collectives/common.py +96 -0
- cudf_polars/experimental/rapidsmpf/collectives/shuffle.py +253 -0
- cudf_polars/experimental/rapidsmpf/core.py +488 -0
- cudf_polars/experimental/rapidsmpf/dask.py +172 -0
- cudf_polars/experimental/rapidsmpf/dispatch.py +153 -0
- cudf_polars/experimental/rapidsmpf/io.py +696 -0
- cudf_polars/experimental/rapidsmpf/join.py +322 -0
- cudf_polars/experimental/rapidsmpf/lower.py +74 -0
- cudf_polars/experimental/rapidsmpf/nodes.py +735 -0
- cudf_polars/experimental/rapidsmpf/repartition.py +216 -0
- cudf_polars/experimental/rapidsmpf/union.py +115 -0
- cudf_polars/experimental/rapidsmpf/utils.py +374 -0
- cudf_polars/experimental/repartition.py +9 -2
- cudf_polars/experimental/select.py +177 -14
- cudf_polars/experimental/shuffle.py +46 -12
- cudf_polars/experimental/sort.py +100 -26
- cudf_polars/experimental/spilling.py +1 -1
- cudf_polars/experimental/statistics.py +24 -5
- cudf_polars/experimental/utils.py +25 -7
- cudf_polars/testing/asserts.py +13 -8
- cudf_polars/testing/io.py +2 -1
- cudf_polars/testing/plugin.py +93 -17
- cudf_polars/typing/__init__.py +86 -32
- cudf_polars/utils/config.py +473 -58
- cudf_polars/utils/cuda_stream.py +70 -0
- cudf_polars/utils/versions.py +5 -4
- cudf_polars_cu13-26.2.0.dist-info/METADATA +181 -0
- cudf_polars_cu13-26.2.0.dist-info/RECORD +108 -0
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/WHEEL +1 -1
- cudf_polars_cu13-25.10.0.dist-info/METADATA +0 -136
- cudf_polars_cu13-25.10.0.dist-info/RECORD +0 -92
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/licenses/LICENSE +0 -0
- {cudf_polars_cu13-25.10.0.dist-info → cudf_polars_cu13-26.2.0.dist-info}/top_level.txt +0 -0
|
@@ -22,8 +22,11 @@ import polars as pl
|
|
|
22
22
|
|
|
23
23
|
with contextlib.suppress(ImportError):
|
|
24
24
|
from cudf_polars.experimental.benchmarks.utils import (
|
|
25
|
+
RunConfig,
|
|
25
26
|
get_data,
|
|
27
|
+
run_duckdb,
|
|
26
28
|
run_polars,
|
|
29
|
+
run_validate,
|
|
27
30
|
)
|
|
28
31
|
|
|
29
32
|
|
|
@@ -607,7 +610,7 @@ class PDSHQueries:
|
|
|
607
610
|
q1 = (
|
|
608
611
|
part.filter(pl.col("p_brand") == var1)
|
|
609
612
|
.filter(pl.col("p_container") == var2)
|
|
610
|
-
.join(lineitem, how="
|
|
613
|
+
.join(lineitem, how="inner", left_on="p_partkey", right_on="l_partkey")
|
|
611
614
|
)
|
|
612
615
|
|
|
613
616
|
return (
|
|
@@ -810,5 +813,792 @@ class PDSHQueries:
|
|
|
810
813
|
)
|
|
811
814
|
|
|
812
815
|
|
|
816
|
+
class PDSHDuckDBQueries:
|
|
817
|
+
"""PDS-H DuckDB query definitions."""
|
|
818
|
+
|
|
819
|
+
name: str = "pdsh"
|
|
820
|
+
|
|
821
|
+
@staticmethod
|
|
822
|
+
def q1(run_config: RunConfig) -> str:
|
|
823
|
+
"""Query 1."""
|
|
824
|
+
return """
|
|
825
|
+
select
|
|
826
|
+
l_returnflag,
|
|
827
|
+
l_linestatus,
|
|
828
|
+
sum(l_quantity) as sum_qty,
|
|
829
|
+
sum(l_extendedprice) as sum_base_price,
|
|
830
|
+
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
|
|
831
|
+
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
|
|
832
|
+
avg(l_quantity) as avg_qty,
|
|
833
|
+
avg(l_extendedprice) as avg_price,
|
|
834
|
+
avg(l_discount) as avg_disc,
|
|
835
|
+
count(*) as count_order
|
|
836
|
+
from
|
|
837
|
+
lineitem
|
|
838
|
+
where
|
|
839
|
+
l_shipdate <= DATE '1998-09-02'
|
|
840
|
+
group by
|
|
841
|
+
l_returnflag,
|
|
842
|
+
l_linestatus
|
|
843
|
+
order by
|
|
844
|
+
l_returnflag,
|
|
845
|
+
l_linestatus
|
|
846
|
+
"""
|
|
847
|
+
|
|
848
|
+
@staticmethod
|
|
849
|
+
def q2(run_config: RunConfig) -> str:
|
|
850
|
+
"""Query 2."""
|
|
851
|
+
return """
|
|
852
|
+
select
|
|
853
|
+
s_acctbal,
|
|
854
|
+
s_name,
|
|
855
|
+
n_name,
|
|
856
|
+
p_partkey,
|
|
857
|
+
p_mfgr,
|
|
858
|
+
s_address,
|
|
859
|
+
s_phone,
|
|
860
|
+
s_comment
|
|
861
|
+
from
|
|
862
|
+
part,
|
|
863
|
+
supplier,
|
|
864
|
+
partsupp,
|
|
865
|
+
nation,
|
|
866
|
+
region
|
|
867
|
+
where
|
|
868
|
+
p_partkey = ps_partkey
|
|
869
|
+
and s_suppkey = ps_suppkey
|
|
870
|
+
and p_size = 15
|
|
871
|
+
and p_type like '%BRASS'
|
|
872
|
+
and s_nationkey = n_nationkey
|
|
873
|
+
and n_regionkey = r_regionkey
|
|
874
|
+
and r_name = 'EUROPE'
|
|
875
|
+
and ps_supplycost = (
|
|
876
|
+
select
|
|
877
|
+
min(ps_supplycost)
|
|
878
|
+
from
|
|
879
|
+
partsupp,
|
|
880
|
+
supplier,
|
|
881
|
+
nation,
|
|
882
|
+
region
|
|
883
|
+
where
|
|
884
|
+
p_partkey = ps_partkey
|
|
885
|
+
and s_suppkey = ps_suppkey
|
|
886
|
+
and s_nationkey = n_nationkey
|
|
887
|
+
and n_regionkey = r_regionkey
|
|
888
|
+
and r_name = 'EUROPE'
|
|
889
|
+
)
|
|
890
|
+
order by
|
|
891
|
+
s_acctbal desc,
|
|
892
|
+
n_name,
|
|
893
|
+
s_name,
|
|
894
|
+
p_partkey
|
|
895
|
+
limit 100
|
|
896
|
+
"""
|
|
897
|
+
|
|
898
|
+
@staticmethod
|
|
899
|
+
def q3(run_config: RunConfig) -> str:
|
|
900
|
+
"""Query 3."""
|
|
901
|
+
return """
|
|
902
|
+
select
|
|
903
|
+
l_orderkey,
|
|
904
|
+
sum(l_extendedprice * (1 - l_discount)) as revenue,
|
|
905
|
+
o_orderdate,
|
|
906
|
+
o_shippriority
|
|
907
|
+
from
|
|
908
|
+
customer,
|
|
909
|
+
orders,
|
|
910
|
+
lineitem
|
|
911
|
+
where
|
|
912
|
+
c_mktsegment = 'BUILDING'
|
|
913
|
+
and c_custkey = o_custkey
|
|
914
|
+
and l_orderkey = o_orderkey
|
|
915
|
+
and o_orderdate < '1995-03-15'
|
|
916
|
+
and l_shipdate > '1995-03-15'
|
|
917
|
+
group by
|
|
918
|
+
l_orderkey,
|
|
919
|
+
o_orderdate,
|
|
920
|
+
o_shippriority
|
|
921
|
+
order by
|
|
922
|
+
revenue desc,
|
|
923
|
+
o_orderdate
|
|
924
|
+
limit 10
|
|
925
|
+
"""
|
|
926
|
+
|
|
927
|
+
@staticmethod
|
|
928
|
+
def q4(run_config: RunConfig) -> str:
|
|
929
|
+
"""Query 4."""
|
|
930
|
+
return """
|
|
931
|
+
select
|
|
932
|
+
o_orderpriority,
|
|
933
|
+
count(*) as order_count
|
|
934
|
+
from
|
|
935
|
+
orders
|
|
936
|
+
where
|
|
937
|
+
o_orderdate >= timestamp '1993-07-01'
|
|
938
|
+
and o_orderdate < timestamp '1993-07-01' + interval '3' month
|
|
939
|
+
and exists (
|
|
940
|
+
select
|
|
941
|
+
*
|
|
942
|
+
from
|
|
943
|
+
lineitem
|
|
944
|
+
where
|
|
945
|
+
l_orderkey = o_orderkey
|
|
946
|
+
and l_commitdate < l_receiptdate
|
|
947
|
+
)
|
|
948
|
+
group by
|
|
949
|
+
o_orderpriority
|
|
950
|
+
order by
|
|
951
|
+
o_orderpriority
|
|
952
|
+
"""
|
|
953
|
+
|
|
954
|
+
@staticmethod
|
|
955
|
+
def q5(run_config: RunConfig) -> str:
|
|
956
|
+
"""Query 5."""
|
|
957
|
+
return """
|
|
958
|
+
select
|
|
959
|
+
n_name,
|
|
960
|
+
sum(l_extendedprice * (1 - l_discount)) as revenue
|
|
961
|
+
from
|
|
962
|
+
customer,
|
|
963
|
+
orders,
|
|
964
|
+
lineitem,
|
|
965
|
+
supplier,
|
|
966
|
+
nation,
|
|
967
|
+
region
|
|
968
|
+
where
|
|
969
|
+
c_custkey = o_custkey
|
|
970
|
+
and l_orderkey = o_orderkey
|
|
971
|
+
and l_suppkey = s_suppkey
|
|
972
|
+
and c_nationkey = s_nationkey
|
|
973
|
+
and s_nationkey = n_nationkey
|
|
974
|
+
and n_regionkey = r_regionkey
|
|
975
|
+
and r_name = 'ASIA'
|
|
976
|
+
and o_orderdate >= timestamp '1994-01-01'
|
|
977
|
+
and o_orderdate < timestamp '1994-01-01' + interval '1' year
|
|
978
|
+
group by
|
|
979
|
+
n_name
|
|
980
|
+
order by
|
|
981
|
+
revenue desc
|
|
982
|
+
"""
|
|
983
|
+
|
|
984
|
+
@staticmethod
|
|
985
|
+
def q6(run_config: RunConfig) -> str:
|
|
986
|
+
"""Query 6."""
|
|
987
|
+
return """
|
|
988
|
+
select
|
|
989
|
+
sum(l_extendedprice * l_discount) as revenue
|
|
990
|
+
from
|
|
991
|
+
lineitem
|
|
992
|
+
where
|
|
993
|
+
l_shipdate >= timestamp '1994-01-01'
|
|
994
|
+
and l_shipdate < timestamp '1994-01-01' + interval '1' year
|
|
995
|
+
and l_discount between .06 - 0.01 and .06 + 0.01
|
|
996
|
+
and l_quantity < 24
|
|
997
|
+
"""
|
|
998
|
+
|
|
999
|
+
@staticmethod
|
|
1000
|
+
def q7(run_config: RunConfig) -> str:
|
|
1001
|
+
"""Query 7."""
|
|
1002
|
+
return """
|
|
1003
|
+
select
|
|
1004
|
+
supp_nation,
|
|
1005
|
+
cust_nation,
|
|
1006
|
+
l_year,
|
|
1007
|
+
sum(volume) as revenue
|
|
1008
|
+
from
|
|
1009
|
+
(
|
|
1010
|
+
select
|
|
1011
|
+
n1.n_name as supp_nation,
|
|
1012
|
+
n2.n_name as cust_nation,
|
|
1013
|
+
year(l_shipdate) as l_year,
|
|
1014
|
+
l_extendedprice * (1 - l_discount) as volume
|
|
1015
|
+
from
|
|
1016
|
+
supplier,
|
|
1017
|
+
lineitem,
|
|
1018
|
+
orders,
|
|
1019
|
+
customer,
|
|
1020
|
+
nation n1,
|
|
1021
|
+
nation n2
|
|
1022
|
+
where
|
|
1023
|
+
s_suppkey = l_suppkey
|
|
1024
|
+
and o_orderkey = l_orderkey
|
|
1025
|
+
and c_custkey = o_custkey
|
|
1026
|
+
and s_nationkey = n1.n_nationkey
|
|
1027
|
+
and c_nationkey = n2.n_nationkey
|
|
1028
|
+
and (
|
|
1029
|
+
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
|
|
1030
|
+
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
|
|
1031
|
+
)
|
|
1032
|
+
and l_shipdate between timestamp '1995-01-01' and timestamp '1996-12-31'
|
|
1033
|
+
) as shipping
|
|
1034
|
+
group by
|
|
1035
|
+
supp_nation,
|
|
1036
|
+
cust_nation,
|
|
1037
|
+
l_year
|
|
1038
|
+
order by
|
|
1039
|
+
supp_nation,
|
|
1040
|
+
cust_nation,
|
|
1041
|
+
l_year
|
|
1042
|
+
"""
|
|
1043
|
+
|
|
1044
|
+
@staticmethod
|
|
1045
|
+
def q8(run_config: RunConfig) -> str:
|
|
1046
|
+
"""Query 8."""
|
|
1047
|
+
return """
|
|
1048
|
+
select
|
|
1049
|
+
o_year,
|
|
1050
|
+
round(
|
|
1051
|
+
sum(case
|
|
1052
|
+
when nation = 'BRAZIL' then volume
|
|
1053
|
+
else 0
|
|
1054
|
+
end) / sum(volume)
|
|
1055
|
+
, 2) as mkt_share
|
|
1056
|
+
from
|
|
1057
|
+
(
|
|
1058
|
+
select
|
|
1059
|
+
extract(year from o_orderdate) as o_year,
|
|
1060
|
+
l_extendedprice * (1 - l_discount) as volume,
|
|
1061
|
+
n2.n_name as nation
|
|
1062
|
+
from
|
|
1063
|
+
part,
|
|
1064
|
+
supplier,
|
|
1065
|
+
lineitem,
|
|
1066
|
+
orders,
|
|
1067
|
+
customer,
|
|
1068
|
+
nation n1,
|
|
1069
|
+
nation n2,
|
|
1070
|
+
region
|
|
1071
|
+
where
|
|
1072
|
+
p_partkey = l_partkey
|
|
1073
|
+
and s_suppkey = l_suppkey
|
|
1074
|
+
and l_orderkey = o_orderkey
|
|
1075
|
+
and o_custkey = c_custkey
|
|
1076
|
+
and c_nationkey = n1.n_nationkey
|
|
1077
|
+
and n1.n_regionkey = r_regionkey
|
|
1078
|
+
and r_name = 'AMERICA'
|
|
1079
|
+
and s_nationkey = n2.n_nationkey
|
|
1080
|
+
and o_orderdate between timestamp '1995-01-01' and timestamp '1996-12-31'
|
|
1081
|
+
and p_type = 'ECONOMY ANODIZED STEEL'
|
|
1082
|
+
) as all_nations
|
|
1083
|
+
group by
|
|
1084
|
+
o_year
|
|
1085
|
+
order by
|
|
1086
|
+
o_year
|
|
1087
|
+
"""
|
|
1088
|
+
|
|
1089
|
+
@staticmethod
|
|
1090
|
+
def q9(run_config: RunConfig) -> str:
|
|
1091
|
+
"""Query 9."""
|
|
1092
|
+
return """
|
|
1093
|
+
select
|
|
1094
|
+
nation,
|
|
1095
|
+
o_year,
|
|
1096
|
+
round(sum(amount), 2) as sum_profit
|
|
1097
|
+
from
|
|
1098
|
+
(
|
|
1099
|
+
select
|
|
1100
|
+
n_name as nation,
|
|
1101
|
+
year(o_orderdate) as o_year,
|
|
1102
|
+
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
|
|
1103
|
+
from
|
|
1104
|
+
part,
|
|
1105
|
+
supplier,
|
|
1106
|
+
lineitem,
|
|
1107
|
+
partsupp,
|
|
1108
|
+
orders,
|
|
1109
|
+
nation
|
|
1110
|
+
where
|
|
1111
|
+
s_suppkey = l_suppkey
|
|
1112
|
+
and ps_suppkey = l_suppkey
|
|
1113
|
+
and ps_partkey = l_partkey
|
|
1114
|
+
and p_partkey = l_partkey
|
|
1115
|
+
and o_orderkey = l_orderkey
|
|
1116
|
+
and s_nationkey = n_nationkey
|
|
1117
|
+
and p_name like '%green%'
|
|
1118
|
+
) as profit
|
|
1119
|
+
group by
|
|
1120
|
+
nation,
|
|
1121
|
+
o_year
|
|
1122
|
+
order by
|
|
1123
|
+
nation,
|
|
1124
|
+
o_year desc
|
|
1125
|
+
"""
|
|
1126
|
+
|
|
1127
|
+
@staticmethod
|
|
1128
|
+
def q10(run_config: RunConfig) -> str:
|
|
1129
|
+
"""Query 10."""
|
|
1130
|
+
return """
|
|
1131
|
+
select
|
|
1132
|
+
c_custkey,
|
|
1133
|
+
c_name,
|
|
1134
|
+
round(sum(l_extendedprice * (1 - l_discount)), 2) as revenue,
|
|
1135
|
+
c_acctbal,
|
|
1136
|
+
n_name,
|
|
1137
|
+
c_address,
|
|
1138
|
+
c_phone,
|
|
1139
|
+
c_comment
|
|
1140
|
+
from
|
|
1141
|
+
customer,
|
|
1142
|
+
orders,
|
|
1143
|
+
lineitem,
|
|
1144
|
+
nation
|
|
1145
|
+
where
|
|
1146
|
+
c_custkey = o_custkey
|
|
1147
|
+
and l_orderkey = o_orderkey
|
|
1148
|
+
and o_orderdate >= date '1993-10-01'
|
|
1149
|
+
and o_orderdate < date '1993-10-01' + interval '3' month
|
|
1150
|
+
and l_returnflag = 'R'
|
|
1151
|
+
and c_nationkey = n_nationkey
|
|
1152
|
+
group by
|
|
1153
|
+
c_custkey,
|
|
1154
|
+
c_name,
|
|
1155
|
+
c_acctbal,
|
|
1156
|
+
c_phone,
|
|
1157
|
+
n_name,
|
|
1158
|
+
c_address,
|
|
1159
|
+
c_comment
|
|
1160
|
+
order by
|
|
1161
|
+
revenue desc
|
|
1162
|
+
limit 20
|
|
1163
|
+
"""
|
|
1164
|
+
|
|
1165
|
+
@staticmethod
|
|
1166
|
+
def q11(run_config: RunConfig) -> str:
|
|
1167
|
+
"""Query 11."""
|
|
1168
|
+
return f"""
|
|
1169
|
+
select
|
|
1170
|
+
ps_partkey,
|
|
1171
|
+
round(sum(ps_supplycost * ps_availqty), 2) as value
|
|
1172
|
+
from
|
|
1173
|
+
partsupp, supplier, nation
|
|
1174
|
+
where
|
|
1175
|
+
ps_suppkey = s_suppkey
|
|
1176
|
+
and s_nationkey = n_nationkey
|
|
1177
|
+
and n_name = 'GERMANY'
|
|
1178
|
+
group by
|
|
1179
|
+
ps_partkey
|
|
1180
|
+
having
|
|
1181
|
+
sum(ps_supplycost * ps_availqty) > (
|
|
1182
|
+
select
|
|
1183
|
+
sum(ps_supplycost * ps_availqty) * {0.0001 / run_config.scale_factor}
|
|
1184
|
+
from
|
|
1185
|
+
partsupp, supplier, nation
|
|
1186
|
+
where
|
|
1187
|
+
ps_suppkey = s_suppkey
|
|
1188
|
+
and s_nationkey = n_nationkey
|
|
1189
|
+
and n_name = 'GERMANY'
|
|
1190
|
+
)
|
|
1191
|
+
order by
|
|
1192
|
+
value desc
|
|
1193
|
+
"""
|
|
1194
|
+
|
|
1195
|
+
@staticmethod
|
|
1196
|
+
def q12(run_config: RunConfig) -> str:
|
|
1197
|
+
"""Query 12."""
|
|
1198
|
+
return """
|
|
1199
|
+
select
|
|
1200
|
+
l_shipmode,
|
|
1201
|
+
sum(case
|
|
1202
|
+
when o_orderpriority = '1-URGENT'
|
|
1203
|
+
or o_orderpriority = '2-HIGH'
|
|
1204
|
+
then 1
|
|
1205
|
+
else 0
|
|
1206
|
+
end) as high_line_count,
|
|
1207
|
+
sum(case
|
|
1208
|
+
when o_orderpriority <> '1-URGENT'
|
|
1209
|
+
and o_orderpriority <> '2-HIGH'
|
|
1210
|
+
then 1
|
|
1211
|
+
else 0
|
|
1212
|
+
end) as low_line_count
|
|
1213
|
+
from
|
|
1214
|
+
orders,
|
|
1215
|
+
lineitem
|
|
1216
|
+
where
|
|
1217
|
+
o_orderkey = l_orderkey
|
|
1218
|
+
and l_shipmode in ('MAIL', 'SHIP')
|
|
1219
|
+
and l_commitdate < l_receiptdate
|
|
1220
|
+
and l_shipdate < l_commitdate
|
|
1221
|
+
and l_receiptdate >= date '1994-01-01'
|
|
1222
|
+
and l_receiptdate < date '1994-01-01' + interval '1' year
|
|
1223
|
+
group by
|
|
1224
|
+
l_shipmode
|
|
1225
|
+
order by
|
|
1226
|
+
l_shipmode
|
|
1227
|
+
"""
|
|
1228
|
+
|
|
1229
|
+
@staticmethod
|
|
1230
|
+
def q13(run_config: RunConfig) -> str:
|
|
1231
|
+
"""Query 13."""
|
|
1232
|
+
return """
|
|
1233
|
+
select
|
|
1234
|
+
c_count, count(*) as custdist
|
|
1235
|
+
from (
|
|
1236
|
+
select
|
|
1237
|
+
c_custkey,
|
|
1238
|
+
count(o_orderkey)
|
|
1239
|
+
from
|
|
1240
|
+
customer left outer join orders on
|
|
1241
|
+
c_custkey = o_custkey
|
|
1242
|
+
and o_comment not like '%special%requests%'
|
|
1243
|
+
group by
|
|
1244
|
+
c_custkey
|
|
1245
|
+
)as c_orders (c_custkey, c_count)
|
|
1246
|
+
group by
|
|
1247
|
+
c_count
|
|
1248
|
+
order by
|
|
1249
|
+
custdist desc,
|
|
1250
|
+
c_count desc
|
|
1251
|
+
"""
|
|
1252
|
+
|
|
1253
|
+
@staticmethod
|
|
1254
|
+
def q14(run_config: RunConfig) -> str:
|
|
1255
|
+
"""Query 14."""
|
|
1256
|
+
return """
|
|
1257
|
+
select
|
|
1258
|
+
round(100.00 * sum(case
|
|
1259
|
+
when p_type like 'PROMO%'
|
|
1260
|
+
then l_extendedprice * (1 - l_discount)
|
|
1261
|
+
else 0
|
|
1262
|
+
end) / sum(l_extendedprice * (1 - l_discount)), 2) as promo_revenue
|
|
1263
|
+
from
|
|
1264
|
+
lineitem,
|
|
1265
|
+
part
|
|
1266
|
+
where
|
|
1267
|
+
l_partkey = p_partkey
|
|
1268
|
+
and l_shipdate >= date '1995-09-01'
|
|
1269
|
+
and l_shipdate < date '1995-09-01' + interval '1' month
|
|
1270
|
+
"""
|
|
1271
|
+
|
|
1272
|
+
@staticmethod
|
|
1273
|
+
def q15(run_config: RunConfig) -> str:
|
|
1274
|
+
"""Query 15."""
|
|
1275
|
+
return """
|
|
1276
|
+
with revenue (supplier_no, total_revenue) as (
|
|
1277
|
+
select
|
|
1278
|
+
l_suppkey,
|
|
1279
|
+
sum(l_extendedprice * (1 - l_discount))
|
|
1280
|
+
from
|
|
1281
|
+
lineitem
|
|
1282
|
+
where
|
|
1283
|
+
l_shipdate >= date '1996-01-01'
|
|
1284
|
+
and l_shipdate < date '1996-01-01' + interval '3' month
|
|
1285
|
+
group by
|
|
1286
|
+
l_suppkey
|
|
1287
|
+
)
|
|
1288
|
+
select
|
|
1289
|
+
s_suppkey,
|
|
1290
|
+
s_name,
|
|
1291
|
+
s_address,
|
|
1292
|
+
s_phone,
|
|
1293
|
+
total_revenue
|
|
1294
|
+
from
|
|
1295
|
+
supplier,
|
|
1296
|
+
revenue
|
|
1297
|
+
where
|
|
1298
|
+
s_suppkey = supplier_no
|
|
1299
|
+
and total_revenue = (
|
|
1300
|
+
select
|
|
1301
|
+
max(total_revenue)
|
|
1302
|
+
from
|
|
1303
|
+
revenue
|
|
1304
|
+
)
|
|
1305
|
+
order by
|
|
1306
|
+
s_suppkey
|
|
1307
|
+
"""
|
|
1308
|
+
|
|
1309
|
+
@staticmethod
|
|
1310
|
+
def q16(run_config: RunConfig) -> str:
|
|
1311
|
+
"""Query 16."""
|
|
1312
|
+
return """
|
|
1313
|
+
select
|
|
1314
|
+
p_brand,
|
|
1315
|
+
p_type,
|
|
1316
|
+
p_size,
|
|
1317
|
+
count(distinct ps_suppkey) as supplier_cnt
|
|
1318
|
+
from
|
|
1319
|
+
partsupp,
|
|
1320
|
+
part
|
|
1321
|
+
where
|
|
1322
|
+
p_partkey = ps_partkey
|
|
1323
|
+
and p_brand <> 'Brand#45'
|
|
1324
|
+
and p_type not like 'MEDIUM POLISHED%'
|
|
1325
|
+
and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
|
|
1326
|
+
and ps_suppkey not in (
|
|
1327
|
+
select
|
|
1328
|
+
s_suppkey
|
|
1329
|
+
from
|
|
1330
|
+
supplier
|
|
1331
|
+
where
|
|
1332
|
+
s_comment like '%Customer%Complaints%'
|
|
1333
|
+
)
|
|
1334
|
+
group by
|
|
1335
|
+
p_brand,
|
|
1336
|
+
p_type,
|
|
1337
|
+
p_size
|
|
1338
|
+
order by
|
|
1339
|
+
supplier_cnt desc,
|
|
1340
|
+
p_brand,
|
|
1341
|
+
p_type,
|
|
1342
|
+
p_size
|
|
1343
|
+
"""
|
|
1344
|
+
|
|
1345
|
+
@staticmethod
|
|
1346
|
+
def q17(run_config: RunConfig) -> str:
|
|
1347
|
+
"""Query 17."""
|
|
1348
|
+
return """
|
|
1349
|
+
select
|
|
1350
|
+
round(sum(l_extendedprice) / 7.0, 2) as avg_yearly
|
|
1351
|
+
from
|
|
1352
|
+
lineitem,
|
|
1353
|
+
part
|
|
1354
|
+
where
|
|
1355
|
+
p_partkey = l_partkey
|
|
1356
|
+
and p_brand = 'Brand#23'
|
|
1357
|
+
and p_container = 'MED BOX'
|
|
1358
|
+
and l_quantity < (
|
|
1359
|
+
select
|
|
1360
|
+
0.2 * avg(l_quantity)
|
|
1361
|
+
from
|
|
1362
|
+
lineitem
|
|
1363
|
+
where
|
|
1364
|
+
l_partkey = p_partkey
|
|
1365
|
+
)
|
|
1366
|
+
"""
|
|
1367
|
+
|
|
1368
|
+
@staticmethod
|
|
1369
|
+
def q18(run_config: RunConfig) -> str:
|
|
1370
|
+
"""Query 18."""
|
|
1371
|
+
return """
|
|
1372
|
+
select
|
|
1373
|
+
c_name,
|
|
1374
|
+
c_custkey,
|
|
1375
|
+
o_orderkey,
|
|
1376
|
+
o_orderdate as o_orderdat,
|
|
1377
|
+
o_totalprice,
|
|
1378
|
+
sum(l_quantity) as col6
|
|
1379
|
+
from
|
|
1380
|
+
customer,
|
|
1381
|
+
orders,
|
|
1382
|
+
lineitem
|
|
1383
|
+
where
|
|
1384
|
+
o_orderkey in (
|
|
1385
|
+
select
|
|
1386
|
+
l_orderkey
|
|
1387
|
+
from
|
|
1388
|
+
lineitem
|
|
1389
|
+
group by
|
|
1390
|
+
l_orderkey having
|
|
1391
|
+
sum(l_quantity) > 300
|
|
1392
|
+
)
|
|
1393
|
+
and c_custkey = o_custkey
|
|
1394
|
+
and o_orderkey = l_orderkey
|
|
1395
|
+
group by
|
|
1396
|
+
c_name,
|
|
1397
|
+
c_custkey,
|
|
1398
|
+
o_orderkey,
|
|
1399
|
+
o_orderdate,
|
|
1400
|
+
o_totalprice
|
|
1401
|
+
order by
|
|
1402
|
+
o_totalprice desc,
|
|
1403
|
+
o_orderdate
|
|
1404
|
+
limit 100
|
|
1405
|
+
"""
|
|
1406
|
+
|
|
1407
|
+
@staticmethod
|
|
1408
|
+
def q19(run_config: RunConfig) -> str:
|
|
1409
|
+
"""Query 19."""
|
|
1410
|
+
return """
|
|
1411
|
+
select
|
|
1412
|
+
round(sum(l_extendedprice* (1 - l_discount)), 2) as revenue
|
|
1413
|
+
from
|
|
1414
|
+
lineitem,
|
|
1415
|
+
part
|
|
1416
|
+
where
|
|
1417
|
+
(
|
|
1418
|
+
p_partkey = l_partkey
|
|
1419
|
+
and p_brand = 'Brand#12'
|
|
1420
|
+
and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
|
|
1421
|
+
and l_quantity >= 1 and l_quantity <= 1 + 10
|
|
1422
|
+
and p_size between 1 and 5
|
|
1423
|
+
and l_shipmode in ('AIR', 'AIR REG')
|
|
1424
|
+
and l_shipinstruct = 'DELIVER IN PERSON'
|
|
1425
|
+
)
|
|
1426
|
+
or
|
|
1427
|
+
(
|
|
1428
|
+
p_partkey = l_partkey
|
|
1429
|
+
and p_brand = 'Brand#23'
|
|
1430
|
+
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
|
|
1431
|
+
and l_quantity >= 10 and l_quantity <= 20
|
|
1432
|
+
and p_size between 1 and 10
|
|
1433
|
+
and l_shipmode in ('AIR', 'AIR REG')
|
|
1434
|
+
and l_shipinstruct = 'DELIVER IN PERSON'
|
|
1435
|
+
)
|
|
1436
|
+
or
|
|
1437
|
+
(
|
|
1438
|
+
p_partkey = l_partkey
|
|
1439
|
+
and p_brand = 'Brand#34'
|
|
1440
|
+
and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
|
|
1441
|
+
and l_quantity >= 20 and l_quantity <= 30
|
|
1442
|
+
and p_size between 1 and 15
|
|
1443
|
+
and l_shipmode in ('AIR', 'AIR REG')
|
|
1444
|
+
and l_shipinstruct = 'DELIVER IN PERSON'
|
|
1445
|
+
)
|
|
1446
|
+
"""
|
|
1447
|
+
|
|
1448
|
+
@staticmethod
|
|
1449
|
+
def q20(run_config: RunConfig) -> str:
|
|
1450
|
+
"""Query 20."""
|
|
1451
|
+
return """
|
|
1452
|
+
select
|
|
1453
|
+
s_name,
|
|
1454
|
+
s_address
|
|
1455
|
+
from
|
|
1456
|
+
supplier,
|
|
1457
|
+
nation
|
|
1458
|
+
where
|
|
1459
|
+
s_suppkey in (
|
|
1460
|
+
select
|
|
1461
|
+
ps_suppkey
|
|
1462
|
+
from
|
|
1463
|
+
partsupp
|
|
1464
|
+
where
|
|
1465
|
+
ps_partkey in (
|
|
1466
|
+
select
|
|
1467
|
+
p_partkey
|
|
1468
|
+
from
|
|
1469
|
+
part
|
|
1470
|
+
where
|
|
1471
|
+
p_name like 'forest%'
|
|
1472
|
+
)
|
|
1473
|
+
and ps_availqty > (
|
|
1474
|
+
select
|
|
1475
|
+
0.5 * sum(l_quantity)
|
|
1476
|
+
from
|
|
1477
|
+
lineitem
|
|
1478
|
+
where
|
|
1479
|
+
l_partkey = ps_partkey
|
|
1480
|
+
and l_suppkey = ps_suppkey
|
|
1481
|
+
and l_shipdate >= date '1994-01-01'
|
|
1482
|
+
and l_shipdate < date '1994-01-01' + interval '1' year
|
|
1483
|
+
)
|
|
1484
|
+
)
|
|
1485
|
+
and s_nationkey = n_nationkey
|
|
1486
|
+
and n_name = 'CANADA'
|
|
1487
|
+
order by
|
|
1488
|
+
s_name
|
|
1489
|
+
"""
|
|
1490
|
+
|
|
1491
|
+
@staticmethod
|
|
1492
|
+
def q21(run_config: RunConfig) -> str:
|
|
1493
|
+
"""Query 21."""
|
|
1494
|
+
return """
|
|
1495
|
+
select
|
|
1496
|
+
s_name,
|
|
1497
|
+
count(*) as numwait
|
|
1498
|
+
from
|
|
1499
|
+
supplier,
|
|
1500
|
+
lineitem l1,
|
|
1501
|
+
orders,
|
|
1502
|
+
nation
|
|
1503
|
+
where
|
|
1504
|
+
s_suppkey = l1.l_suppkey
|
|
1505
|
+
and o_orderkey = l1.l_orderkey
|
|
1506
|
+
and o_orderstatus = 'F'
|
|
1507
|
+
and l1.l_receiptdate > l1.l_commitdate
|
|
1508
|
+
and exists (
|
|
1509
|
+
select
|
|
1510
|
+
*
|
|
1511
|
+
from
|
|
1512
|
+
lineitem l2
|
|
1513
|
+
where
|
|
1514
|
+
l2.l_orderkey = l1.l_orderkey
|
|
1515
|
+
and l2.l_suppkey <> l1.l_suppkey
|
|
1516
|
+
)
|
|
1517
|
+
and not exists (
|
|
1518
|
+
select
|
|
1519
|
+
*
|
|
1520
|
+
from
|
|
1521
|
+
lineitem l3
|
|
1522
|
+
where
|
|
1523
|
+
l3.l_orderkey = l1.l_orderkey
|
|
1524
|
+
and l3.l_suppkey <> l1.l_suppkey
|
|
1525
|
+
and l3.l_receiptdate > l3.l_commitdate
|
|
1526
|
+
)
|
|
1527
|
+
and s_nationkey = n_nationkey
|
|
1528
|
+
and n_name = 'SAUDI ARABIA'
|
|
1529
|
+
group by
|
|
1530
|
+
s_name
|
|
1531
|
+
order by
|
|
1532
|
+
numwait desc,
|
|
1533
|
+
s_name
|
|
1534
|
+
limit 100
|
|
1535
|
+
"""
|
|
1536
|
+
|
|
1537
|
+
@staticmethod
|
|
1538
|
+
def q22(run_config: RunConfig) -> str:
|
|
1539
|
+
"""Query 22."""
|
|
1540
|
+
return """
|
|
1541
|
+
select
|
|
1542
|
+
cntrycode,
|
|
1543
|
+
count(*) as numcust,
|
|
1544
|
+
sum(c_acctbal) as totacctbal
|
|
1545
|
+
from (
|
|
1546
|
+
select
|
|
1547
|
+
substring(c_phone from 1 for 2) as cntrycode,
|
|
1548
|
+
c_acctbal
|
|
1549
|
+
from
|
|
1550
|
+
customer
|
|
1551
|
+
where
|
|
1552
|
+
substring(c_phone from 1 for 2) in
|
|
1553
|
+
(13, 31, 23, 29, 30, 18, 17)
|
|
1554
|
+
and c_acctbal > (
|
|
1555
|
+
select
|
|
1556
|
+
avg(c_acctbal)
|
|
1557
|
+
from
|
|
1558
|
+
customer
|
|
1559
|
+
where
|
|
1560
|
+
c_acctbal > 0.00
|
|
1561
|
+
and substring (c_phone from 1 for 2) in
|
|
1562
|
+
(13, 31, 23, 29, 30, 18, 17)
|
|
1563
|
+
)
|
|
1564
|
+
and not exists (
|
|
1565
|
+
select
|
|
1566
|
+
*
|
|
1567
|
+
from
|
|
1568
|
+
orders
|
|
1569
|
+
where
|
|
1570
|
+
o_custkey = c_custkey
|
|
1571
|
+
)
|
|
1572
|
+
) as custsale
|
|
1573
|
+
group by
|
|
1574
|
+
cntrycode
|
|
1575
|
+
order by
|
|
1576
|
+
cntrycode
|
|
1577
|
+
"""
|
|
1578
|
+
|
|
1579
|
+
|
|
813
1580
|
if __name__ == "__main__":
|
|
814
|
-
|
|
1581
|
+
import argparse
|
|
1582
|
+
|
|
1583
|
+
parser = argparse.ArgumentParser(description="Run PDS-H benchmarks.")
|
|
1584
|
+
parser.add_argument(
|
|
1585
|
+
"--engine",
|
|
1586
|
+
choices=["polars", "duckdb", "validate"],
|
|
1587
|
+
default="polars",
|
|
1588
|
+
help="Which engine to use for executing the benchmarks or to validate results.",
|
|
1589
|
+
)
|
|
1590
|
+
args, extra_args = parser.parse_known_args()
|
|
1591
|
+
|
|
1592
|
+
if args.engine == "polars":
|
|
1593
|
+
run_polars(PDSHQueries, extra_args, num_queries=22)
|
|
1594
|
+
elif args.engine == "duckdb":
|
|
1595
|
+
run_duckdb(PDSHDuckDBQueries, extra_args, num_queries=22)
|
|
1596
|
+
elif args.engine == "validate":
|
|
1597
|
+
run_validate(
|
|
1598
|
+
PDSHQueries,
|
|
1599
|
+
PDSHDuckDBQueries,
|
|
1600
|
+
extra_args,
|
|
1601
|
+
num_queries=22,
|
|
1602
|
+
check_dtypes=True,
|
|
1603
|
+
check_column_order=True,
|
|
1604
|
+
)
|