duckdb 0.7.2-dev402.0 → 0.7.2-dev457.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +0 -1
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +8 -6
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +9 -1
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +5 -8
- package/src/duckdb/src/function/aggregate/distributive/bool.cpp +2 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +1 -0
- package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -0
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +8 -0
- package/src/duckdb/src/function/aggregate/holistic/quantile.cpp +15 -0
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +42 -11
- package/src/duckdb/src/function/function_binder.cpp +1 -8
- package/src/duckdb/src/function/scalar/date/current.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -3
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +6 -3
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -6
- package/src/duckdb/src/include/duckdb/optimizer/rule/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +24 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +3 -0
- package/src/duckdb/src/optimizer/optimizer.cpp +1 -0
- package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +30 -0
- package/src/duckdb/src/parser/expression/star_expression.cpp +6 -6
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -1
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +17 -2
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +45 -40
- package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +7 -0
- package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +0 -7
- package/src/duckdb/src/planner/bind_context.cpp +2 -25
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +6 -4
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +3 -2
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +176 -0
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +57 -82
- package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +11 -0
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +1 -1
- package/src/duckdb/src/planner/binder.cpp +12 -23
- package/src/duckdb/src/planner/bound_result_modifier.cpp +26 -0
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +9 -2
- package/src/duckdb/src/planner/expression_iterator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -0
- package/src/duckdb/src/planner/operator/logical_distinct.cpp +3 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +8141 -8313
- package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
package/package.json
CHANGED
@@ -16,7 +16,6 @@ struct Int96 {
|
|
16
16
|
uint32_t value[3];
|
17
17
|
};
|
18
18
|
|
19
|
-
int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp);
|
20
19
|
timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts);
|
21
20
|
Int96 TimestampToImpalaTimestamp(timestamp_t &ts);
|
22
21
|
timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts);
|
@@ -12,24 +12,26 @@ namespace duckdb {
|
|
12
12
|
// surely they are joking
|
13
13
|
static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL;
|
14
14
|
static constexpr int64_t MILLISECONDS_PER_DAY = 86400000LL;
|
15
|
-
static constexpr int64_t
|
15
|
+
static constexpr int64_t MICROSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL;
|
16
|
+
static constexpr int64_t NANOSECONDS_PER_MICRO = 1000LL;
|
16
17
|
|
17
|
-
int64_t
|
18
|
+
static int64_t ImpalaTimestampToMicroseconds(const Int96 &impala_timestamp) {
|
18
19
|
int64_t days_since_epoch = impala_timestamp.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS;
|
19
20
|
auto nanoseconds = Load<int64_t>((data_ptr_t)impala_timestamp.value);
|
20
|
-
|
21
|
+
auto microseconds = nanoseconds / NANOSECONDS_PER_MICRO;
|
22
|
+
return days_since_epoch * MICROSECONDS_PER_DAY + microseconds;
|
21
23
|
}
|
22
24
|
|
23
25
|
timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts) {
|
24
|
-
auto
|
25
|
-
return Timestamp::
|
26
|
+
auto impala_us = ImpalaTimestampToMicroseconds(raw_ts);
|
27
|
+
return Timestamp::FromEpochMicroSeconds(impala_us);
|
26
28
|
}
|
27
29
|
|
28
30
|
Int96 TimestampToImpalaTimestamp(timestamp_t &ts) {
|
29
31
|
int32_t hour, min, sec, msec;
|
30
32
|
Time::Convert(Timestamp::GetTime(ts), hour, min, sec, msec);
|
31
33
|
uint64_t ms_since_midnight = hour * 60 * 60 * 1000 + min * 60 * 1000 + sec * 1000 + msec;
|
32
|
-
auto days_since_epoch = Date::Epoch(Timestamp::GetDate(ts)) / (24 * 60 * 60);
|
34
|
+
auto days_since_epoch = Date::Epoch(Timestamp::GetDate(ts)) / int64_t(24 * 60 * 60);
|
33
35
|
// first two uint32 in Int96 are nanoseconds since midnights
|
34
36
|
// last uint32 is number of days since year 4713 BC ("Julian date")
|
35
37
|
Int96 impala_ts;
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#include "duckdb/parser/expression/comparison_expression.hpp"
|
10
10
|
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
11
11
|
#include "duckdb/planner/operator/logical_aggregate.hpp"
|
12
|
+
#include "duckdb/function/function_binder.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
|
@@ -169,13 +170,20 @@ PhysicalPlanGenerator::ExtractAggregateExpressions(unique_ptr<PhysicalOperator>
|
|
169
170
|
vector<unique_ptr<Expression>> expressions;
|
170
171
|
vector<LogicalType> types;
|
171
172
|
|
173
|
+
// bind sorted aggregates
|
174
|
+
for (auto &aggr : aggregates) {
|
175
|
+
auto &bound_aggr = (BoundAggregateExpression &)*aggr;
|
176
|
+
if (bound_aggr.order_bys) {
|
177
|
+
// sorted aggregate!
|
178
|
+
FunctionBinder::BindSortedAggregate(context, bound_aggr, groups);
|
179
|
+
}
|
180
|
+
}
|
172
181
|
for (auto &group : groups) {
|
173
182
|
auto ref = make_unique<BoundReferenceExpression>(group->return_type, expressions.size());
|
174
183
|
types.push_back(group->return_type);
|
175
184
|
expressions.push_back(std::move(group));
|
176
185
|
group = std::move(ref);
|
177
186
|
}
|
178
|
-
|
179
187
|
for (auto &aggr : aggregates) {
|
180
188
|
auto &bound_aggr = (BoundAggregateExpression &)*aggr;
|
181
189
|
for (auto &child : bound_aggr.children) {
|
@@ -9,8 +9,10 @@
|
|
9
9
|
|
10
10
|
namespace duckdb {
|
11
11
|
|
12
|
-
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::
|
13
|
-
|
12
|
+
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
|
13
|
+
D_ASSERT(op.children.size() == 1);
|
14
|
+
auto child = CreatePlan(*op.children[0]);
|
15
|
+
auto &distinct_targets = op.distinct_targets;
|
14
16
|
D_ASSERT(child);
|
15
17
|
D_ASSERT(!distinct_targets.empty());
|
16
18
|
|
@@ -55,6 +57,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreateDistinctOn(unique_ptr<
|
|
55
57
|
FunctionBinder function_binder(context);
|
56
58
|
auto first_aggregate = function_binder.BindAggregateFunction(
|
57
59
|
FirstFun::GetFunction(logical_type), std::move(first_children), nullptr, AggregateType::NON_DISTINCT);
|
60
|
+
first_aggregate->order_bys = op.order_by ? op.order_by->Copy() : nullptr;
|
58
61
|
// add the projection
|
59
62
|
projections.push_back(make_unique<BoundReferenceExpression>(logical_type, group_count + aggregates.size()));
|
60
63
|
// push it to the list of aggregates
|
@@ -81,10 +84,4 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreateDistinctOn(unique_ptr<
|
|
81
84
|
return std::move(aggr_projection);
|
82
85
|
}
|
83
86
|
|
84
|
-
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
|
85
|
-
D_ASSERT(op.children.size() == 1);
|
86
|
-
auto plan = CreatePlan(*op.children[0]);
|
87
|
-
return CreateDistinctOn(std::move(plan), std::move(op.distinct_targets));
|
88
|
-
}
|
89
|
-
|
90
87
|
} // namespace duckdb
|
@@ -95,6 +95,7 @@ AggregateFunction BoolOrFun::GetFunction() {
|
|
95
95
|
auto fun = AggregateFunction::UnaryAggregate<BoolState, bool, bool, BoolOrFunFunction>(
|
96
96
|
LogicalType(LogicalTypeId::BOOLEAN), LogicalType::BOOLEAN);
|
97
97
|
fun.name = "bool_or";
|
98
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
98
99
|
return fun;
|
99
100
|
}
|
100
101
|
|
@@ -102,6 +103,7 @@ AggregateFunction BoolAndFun::GetFunction() {
|
|
102
103
|
auto fun = AggregateFunction::UnaryAggregate<BoolState, bool, bool, BoolAndFunFunction>(
|
103
104
|
LogicalType(LogicalTypeId::BOOLEAN), LogicalType::BOOLEAN);
|
104
105
|
fun.name = "bool_and";
|
106
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
105
107
|
return fun;
|
106
108
|
}
|
107
109
|
|
@@ -76,6 +76,7 @@ AggregateFunction CountFun::GetFunction() {
|
|
76
76
|
LogicalType(LogicalTypeId::ANY), LogicalType::BIGINT);
|
77
77
|
fun.name = "count";
|
78
78
|
fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
|
79
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
79
80
|
return fun;
|
80
81
|
}
|
81
82
|
|
@@ -514,6 +514,7 @@ unique_ptr<FunctionData> BindDecimalMinMax(ClientContext &context, AggregateFunc
|
|
514
514
|
function.name = std::move(name);
|
515
515
|
function.arguments[0] = decimal_type;
|
516
516
|
function.return_type = decimal_type;
|
517
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
517
518
|
return nullptr;
|
518
519
|
}
|
519
520
|
|
@@ -545,6 +546,7 @@ unique_ptr<FunctionData> BindMinMax(ClientContext &context, AggregateFunction &f
|
|
545
546
|
auto name = std::move(function.name);
|
546
547
|
function = GetMinMaxOperator<OP, OP_STRING, OP_VECTOR>(input_type);
|
547
548
|
function.name = std::move(name);
|
549
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
548
550
|
if (function.bind) {
|
549
551
|
return function.bind(context, function, arguments);
|
550
552
|
} else {
|
@@ -110,6 +110,7 @@ AggregateFunction SumFun::GetSumAggregate(PhysicalType type) {
|
|
110
110
|
case PhysicalType::INT16: {
|
111
111
|
auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int16_t, hugeint_t, IntegerSumOperation>(
|
112
112
|
LogicalType::SMALLINT, LogicalType::HUGEINT);
|
113
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
113
114
|
return function;
|
114
115
|
}
|
115
116
|
|
@@ -118,6 +119,7 @@ AggregateFunction SumFun::GetSumAggregate(PhysicalType type) {
|
|
118
119
|
AggregateFunction::UnaryAggregate<SumState<hugeint_t>, int32_t, hugeint_t, SumToHugeintOperation>(
|
119
120
|
LogicalType::INTEGER, LogicalType::HUGEINT);
|
120
121
|
function.statistics = SumPropagateStats;
|
122
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
121
123
|
return function;
|
122
124
|
}
|
123
125
|
case PhysicalType::INT64: {
|
@@ -125,12 +127,14 @@ AggregateFunction SumFun::GetSumAggregate(PhysicalType type) {
|
|
125
127
|
AggregateFunction::UnaryAggregate<SumState<hugeint_t>, int64_t, hugeint_t, SumToHugeintOperation>(
|
126
128
|
LogicalType::BIGINT, LogicalType::HUGEINT);
|
127
129
|
function.statistics = SumPropagateStats;
|
130
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
128
131
|
return function;
|
129
132
|
}
|
130
133
|
case PhysicalType::INT128: {
|
131
134
|
auto function =
|
132
135
|
AggregateFunction::UnaryAggregate<SumState<hugeint_t>, hugeint_t, hugeint_t, HugeintSumOperation>(
|
133
136
|
LogicalType::HUGEINT, LogicalType::HUGEINT);
|
137
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
134
138
|
return function;
|
135
139
|
}
|
136
140
|
default:
|
@@ -144,12 +148,14 @@ AggregateFunction SumFun::GetSumAggregateNoOverflow(PhysicalType type) {
|
|
144
148
|
auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int32_t, hugeint_t, IntegerSumOperation>(
|
145
149
|
LogicalType::INTEGER, LogicalType::HUGEINT);
|
146
150
|
function.name = "sum_no_overflow";
|
151
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
147
152
|
return function;
|
148
153
|
}
|
149
154
|
case PhysicalType::INT64: {
|
150
155
|
auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int64_t, hugeint_t, IntegerSumOperation>(
|
151
156
|
LogicalType::BIGINT, LogicalType::HUGEINT);
|
152
157
|
function.name = "sum_no_overflow";
|
158
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
153
159
|
return function;
|
154
160
|
}
|
155
161
|
default:
|
@@ -164,6 +170,7 @@ unique_ptr<FunctionData> BindDecimalSum(ClientContext &context, AggregateFunctio
|
|
164
170
|
function.name = "sum";
|
165
171
|
function.arguments[0] = decimal_type;
|
166
172
|
function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type));
|
173
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
167
174
|
return nullptr;
|
168
175
|
}
|
169
176
|
|
@@ -174,6 +181,7 @@ unique_ptr<FunctionData> BindDecimalSumNoOverflow(ClientContext &context, Aggreg
|
|
174
181
|
function.name = "sum_no_overflow";
|
175
182
|
function.arguments[0] = decimal_type;
|
176
183
|
function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type));
|
184
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
177
185
|
return nullptr;
|
178
186
|
}
|
179
187
|
|
@@ -795,6 +795,7 @@ AggregateFunction GetTypedDiscreteQuantileListAggregateFunction(const LogicalTyp
|
|
795
795
|
using STATE = QuantileState<SAVE_TYPE>;
|
796
796
|
using OP = QuantileListOperation<INPUT_TYPE, true>;
|
797
797
|
auto fun = QuantileListAggregate<STATE, INPUT_TYPE, list_entry_t, OP>(type, type);
|
798
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
798
799
|
fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, list_entry_t, OP>;
|
799
800
|
return fun;
|
800
801
|
}
|
@@ -851,6 +852,7 @@ AggregateFunction GetTypedContinuousQuantileAggregateFunction(const LogicalType
|
|
851
852
|
using STATE = QuantileState<INPUT_TYPE>;
|
852
853
|
using OP = QuantileScalarOperation<false>;
|
853
854
|
auto fun = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, TARGET_TYPE, OP>(input_type, target_type);
|
855
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
854
856
|
fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, TARGET_TYPE, OP>;
|
855
857
|
return fun;
|
856
858
|
}
|
@@ -904,6 +906,7 @@ AggregateFunction GetTypedContinuousQuantileListAggregateFunction(const LogicalT
|
|
904
906
|
using STATE = QuantileState<INPUT_TYPE>;
|
905
907
|
using OP = QuantileListOperation<CHILD_TYPE, false>;
|
906
908
|
auto fun = QuantileListAggregate<STATE, INPUT_TYPE, list_entry_t, OP>(input_type, result_type);
|
909
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
907
910
|
fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, list_entry_t, OP>;
|
908
911
|
return fun;
|
909
912
|
}
|
@@ -1129,6 +1132,7 @@ AggregateFunction GetTypedMedianAbsoluteDeviationAggregateFunction(const Logical
|
|
1129
1132
|
using STATE = QuantileState<INPUT_TYPE>;
|
1130
1133
|
using OP = MedianAbsoluteDeviationOperation<MEDIAN_TYPE>;
|
1131
1134
|
auto fun = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, TARGET_TYPE, OP>(input_type, target_type);
|
1135
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1132
1136
|
fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, TARGET_TYPE, OP>;
|
1133
1137
|
return fun;
|
1134
1138
|
}
|
@@ -1198,6 +1202,7 @@ unique_ptr<FunctionData> BindMedianDecimal(ClientContext &context, AggregateFunc
|
|
1198
1202
|
function.name = "median";
|
1199
1203
|
function.serialize = QuantileSerialize;
|
1200
1204
|
function.deserialize = QuantileDeserialize;
|
1205
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1201
1206
|
return bind_data;
|
1202
1207
|
}
|
1203
1208
|
|
@@ -1205,6 +1210,7 @@ unique_ptr<FunctionData> BindMedianAbsoluteDeviationDecimal(ClientContext &conte
|
|
1205
1210
|
vector<unique_ptr<Expression>> &arguments) {
|
1206
1211
|
function = GetMedianAbsoluteDeviationAggregateFunction(arguments[0]->return_type);
|
1207
1212
|
function.name = "mad";
|
1213
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1208
1214
|
return nullptr;
|
1209
1215
|
}
|
1210
1216
|
|
@@ -1257,6 +1263,7 @@ unique_ptr<FunctionData> BindDiscreteQuantileDecimal(ClientContext &context, Agg
|
|
1257
1263
|
function.name = "quantile_disc";
|
1258
1264
|
function.serialize = QuantileDecimalSerialize;
|
1259
1265
|
function.deserialize = QuantileDeserialize;
|
1266
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1260
1267
|
return bind_data;
|
1261
1268
|
}
|
1262
1269
|
|
@@ -1267,6 +1274,7 @@ unique_ptr<FunctionData> BindDiscreteQuantileDecimalList(ClientContext &context,
|
|
1267
1274
|
function.name = "quantile_disc";
|
1268
1275
|
function.serialize = QuantileDecimalSerialize;
|
1269
1276
|
function.deserialize = QuantileDeserialize;
|
1277
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1270
1278
|
return bind_data;
|
1271
1279
|
}
|
1272
1280
|
|
@@ -1277,6 +1285,7 @@ unique_ptr<FunctionData> BindContinuousQuantileDecimal(ClientContext &context, A
|
|
1277
1285
|
function.name = "quantile_cont";
|
1278
1286
|
function.serialize = QuantileDecimalSerialize;
|
1279
1287
|
function.deserialize = QuantileDeserialize;
|
1288
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1280
1289
|
return bind_data;
|
1281
1290
|
}
|
1282
1291
|
|
@@ -1287,6 +1296,7 @@ unique_ptr<FunctionData> BindContinuousQuantileDecimalList(ClientContext &contex
|
|
1287
1296
|
function.name = "quantile_cont";
|
1288
1297
|
function.serialize = QuantileDecimalSerialize;
|
1289
1298
|
function.deserialize = QuantileDeserialize;
|
1299
|
+
function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1290
1300
|
return bind_data;
|
1291
1301
|
}
|
1292
1302
|
|
@@ -1316,6 +1326,7 @@ AggregateFunction GetDiscreteQuantileAggregate(const LogicalType &type) {
|
|
1316
1326
|
fun.deserialize = QuantileDeserialize;
|
1317
1327
|
// temporarily push an argument so we can bind the actual quantile
|
1318
1328
|
fun.arguments.emplace_back(LogicalType::DOUBLE);
|
1329
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1319
1330
|
return fun;
|
1320
1331
|
}
|
1321
1332
|
|
@@ -1327,6 +1338,7 @@ AggregateFunction GetDiscreteQuantileListAggregate(const LogicalType &type) {
|
|
1327
1338
|
// temporarily push an argument so we can bind the actual quantile
|
1328
1339
|
auto list_of_double = LogicalType::LIST(LogicalType::DOUBLE);
|
1329
1340
|
fun.arguments.push_back(list_of_double);
|
1341
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1330
1342
|
return fun;
|
1331
1343
|
}
|
1332
1344
|
|
@@ -1337,6 +1349,7 @@ AggregateFunction GetContinuousQuantileAggregate(const LogicalType &type) {
|
|
1337
1349
|
fun.deserialize = QuantileDeserialize;
|
1338
1350
|
// temporarily push an argument so we can bind the actual quantile
|
1339
1351
|
fun.arguments.emplace_back(LogicalType::DOUBLE);
|
1352
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1340
1353
|
return fun;
|
1341
1354
|
}
|
1342
1355
|
|
@@ -1348,6 +1361,7 @@ AggregateFunction GetContinuousQuantileListAggregate(const LogicalType &type) {
|
|
1348
1361
|
// temporarily push an argument so we can bind the actual quantile
|
1349
1362
|
auto list_of_double = LogicalType::LIST(LogicalType::DOUBLE);
|
1350
1363
|
fun.arguments.push_back(list_of_double);
|
1364
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1351
1365
|
return fun;
|
1352
1366
|
}
|
1353
1367
|
|
@@ -1357,6 +1371,7 @@ AggregateFunction GetQuantileDecimalAggregate(const vector<LogicalType> &argumen
|
|
1357
1371
|
fun.bind = bind;
|
1358
1372
|
fun.serialize = QuantileSerialize;
|
1359
1373
|
fun.deserialize = QuantileDeserialize;
|
1374
|
+
fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
|
1360
1375
|
return fun;
|
1361
1376
|
}
|
1362
1377
|
|
@@ -3,6 +3,9 @@
|
|
3
3
|
#include "duckdb/common/types/column_data_collection.hpp"
|
4
4
|
#include "duckdb/function/function_binder.hpp"
|
5
5
|
#include "duckdb/storage/buffer_manager.hpp"
|
6
|
+
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
7
|
+
#include "duckdb/parser/expression_map.hpp"
|
8
|
+
#include "duckdb/function/aggregate/distributive_functions.hpp"
|
6
9
|
|
7
10
|
namespace duckdb {
|
8
11
|
|
@@ -363,16 +366,44 @@ struct SortedAggregateFunction {
|
|
363
366
|
}
|
364
367
|
};
|
365
368
|
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
369
|
+
void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
|
370
|
+
const vector<unique_ptr<Expression>> &groups) {
|
371
|
+
if (!expr.order_bys || expr.order_bys->orders.empty() || expr.children.empty()) {
|
372
|
+
// not a sorted aggregate: return
|
373
|
+
return;
|
374
|
+
}
|
375
|
+
if (context.config.enable_optimizer) {
|
376
|
+
// for each ORDER BY - check if it is actually necessary
|
377
|
+
// expressions that are in the groups do not need to be ORDERED BY
|
378
|
+
// `ORDER BY` on a group has no effect, because for each aggregate, the group is unique
|
379
|
+
// similarly, we only need to ORDER BY each aggregate once
|
380
|
+
expression_set_t seen_expressions;
|
381
|
+
for (auto &target : groups) {
|
382
|
+
seen_expressions.insert(target.get());
|
383
|
+
}
|
384
|
+
vector<BoundOrderByNode> new_order_nodes;
|
385
|
+
for (auto &order_node : expr.order_bys->orders) {
|
386
|
+
if (seen_expressions.find(order_node.expression.get()) != seen_expressions.end()) {
|
387
|
+
// we do not need to order by this node
|
388
|
+
continue;
|
389
|
+
}
|
390
|
+
seen_expressions.insert(order_node.expression.get());
|
391
|
+
new_order_nodes.push_back(std::move(order_node));
|
392
|
+
}
|
393
|
+
if (new_order_nodes.empty()) {
|
394
|
+
expr.order_bys.reset();
|
395
|
+
return;
|
396
|
+
}
|
397
|
+
expr.order_bys->orders = std::move(new_order_nodes);
|
398
|
+
}
|
399
|
+
auto &bound_function = expr.function;
|
400
|
+
auto &children = expr.children;
|
401
|
+
auto &order_bys = *expr.order_bys;
|
402
|
+
auto sorted_bind = make_unique<SortedAggregateBindData>(context, bound_function, expr.children,
|
403
|
+
std::move(expr.bind_info), order_bys);
|
373
404
|
|
374
405
|
// The arguments are the children plus the sort columns.
|
375
|
-
for (auto &order : order_bys
|
406
|
+
for (auto &order : order_bys.orders) {
|
376
407
|
children.emplace_back(std::move(order.expression));
|
377
408
|
}
|
378
409
|
|
@@ -392,9 +423,9 @@ unique_ptr<FunctionData> FunctionBinder::BindSortedAggregate(AggregateFunction &
|
|
392
423
|
AggregateFunction::StateDestroy<SortedAggregateState, SortedAggregateFunction>, nullptr,
|
393
424
|
SortedAggregateFunction::Window, SortedAggregateFunction::Serialize, SortedAggregateFunction::Deserialize);
|
394
425
|
|
395
|
-
|
396
|
-
|
397
|
-
|
426
|
+
expr.function = std::move(ordered_aggregate);
|
427
|
+
expr.bind_info = std::move(sorted_bind);
|
428
|
+
expr.order_bys.reset();
|
398
429
|
}
|
399
430
|
|
400
431
|
} // namespace duckdb
|
@@ -294,8 +294,7 @@ unique_ptr<BoundFunctionExpression> FunctionBinder::BindScalarFunction(ScalarFun
|
|
294
294
|
unique_ptr<BoundAggregateExpression> FunctionBinder::BindAggregateFunction(AggregateFunction bound_function,
|
295
295
|
vector<unique_ptr<Expression>> children,
|
296
296
|
unique_ptr<Expression> filter,
|
297
|
-
AggregateType aggr_type
|
298
|
-
unique_ptr<BoundOrderModifier> order_bys) {
|
297
|
+
AggregateType aggr_type) {
|
299
298
|
unique_ptr<FunctionData> bind_info;
|
300
299
|
if (bound_function.bind) {
|
301
300
|
bind_info = bound_function.bind(context, bound_function, children);
|
@@ -306,12 +305,6 @@ unique_ptr<BoundAggregateExpression> FunctionBinder::BindAggregateFunction(Aggre
|
|
306
305
|
// check if we need to add casts to the children
|
307
306
|
CastToFunctionArguments(bound_function, children);
|
308
307
|
|
309
|
-
// Special case: for ORDER BY aggregates, we wrap the aggregate function in a SortedAggregateFunction
|
310
|
-
// The children are the sort clauses and the binding contains the ordering data.
|
311
|
-
if (order_bys && !order_bys->orders.empty()) {
|
312
|
-
bind_info = BindSortedAggregate(bound_function, children, std::move(bind_info), std::move(order_bys));
|
313
|
-
}
|
314
|
-
|
315
308
|
return make_unique<BoundAggregateExpression>(std::move(bound_function), std::move(children), std::move(filter),
|
316
309
|
std::move(bind_info), aggr_type);
|
317
310
|
}
|
@@ -35,14 +35,12 @@ static void CurrentTimestampFunction(DataChunk &input, ExpressionState &state, V
|
|
35
35
|
|
36
36
|
void CurrentTimeFun::RegisterFunction(BuiltinFunctions &set) {
|
37
37
|
ScalarFunction current_time("get_current_time", {}, LogicalType::TIME, CurrentTimeFunction);
|
38
|
-
;
|
39
38
|
current_time.side_effects = FunctionSideEffects::HAS_SIDE_EFFECTS;
|
40
39
|
set.AddFunction(current_time);
|
41
40
|
}
|
42
41
|
|
43
42
|
void CurrentDateFun::RegisterFunction(BuiltinFunctions &set) {
|
44
43
|
ScalarFunction current_date({}, LogicalType::DATE, CurrentDateFunction);
|
45
|
-
;
|
46
44
|
current_date.side_effects = FunctionSideEffects::HAS_SIDE_EFFECTS;
|
47
45
|
set.AddFunction({"today", "current_date"}, current_date);
|
48
46
|
}
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.7.2-
|
2
|
+
#define DUCKDB_VERSION "0.7.2-dev457"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "403d0ca315"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -88,9 +88,6 @@ protected:
|
|
88
88
|
unique_ptr<PhysicalOperator> CreatePlan(LogicalRecursiveCTE &op);
|
89
89
|
unique_ptr<PhysicalOperator> CreatePlan(LogicalCTERef &op);
|
90
90
|
|
91
|
-
unique_ptr<PhysicalOperator> CreateDistinctOn(unique_ptr<PhysicalOperator> child,
|
92
|
-
vector<unique_ptr<Expression>> distinct_targets);
|
93
|
-
|
94
91
|
unique_ptr<PhysicalOperator> ExtractAggregateExpressions(unique_ptr<PhysicalOperator> child,
|
95
92
|
vector<unique_ptr<Expression>> &expressions,
|
96
93
|
vector<unique_ptr<Expression>> &groups);
|
@@ -17,8 +17,9 @@
|
|
17
17
|
|
18
18
|
namespace duckdb {
|
19
19
|
|
20
|
-
//! This allows us to use the & operator to check if the type is contained in the set
|
21
20
|
enum class AggregateType : uint8_t { NON_DISTINCT = 1, DISTINCT = 2 };
|
21
|
+
//! Whether or not the input order influences the result of the aggregate
|
22
|
+
enum class AggregateOrderDependent : uint8_t { ORDER_DEPENDENT = 1, NOT_ORDER_DEPENDENT = 2 };
|
22
23
|
|
23
24
|
class BoundAggregateExpression;
|
24
25
|
|
@@ -92,7 +93,7 @@ public:
|
|
92
93
|
LogicalType(LogicalTypeId::INVALID), null_handling),
|
93
94
|
state_size(state_size), initialize(initialize), update(update), combine(combine), finalize(finalize),
|
94
95
|
simple_update(simple_update), window(window), bind(bind), destructor(destructor), statistics(statistics),
|
95
|
-
serialize(serialize), deserialize(deserialize) {
|
96
|
+
serialize(serialize), deserialize(deserialize), order_dependent(AggregateOrderDependent::ORDER_DEPENDENT) {
|
96
97
|
}
|
97
98
|
|
98
99
|
DUCKDB_API
|
@@ -107,7 +108,7 @@ public:
|
|
107
108
|
LogicalType(LogicalTypeId::INVALID)),
|
108
109
|
state_size(state_size), initialize(initialize), update(update), combine(combine), finalize(finalize),
|
109
110
|
simple_update(simple_update), window(window), bind(bind), destructor(destructor), statistics(statistics),
|
110
|
-
serialize(serialize), deserialize(deserialize) {
|
111
|
+
serialize(serialize), deserialize(deserialize), order_dependent(AggregateOrderDependent::ORDER_DEPENDENT) {
|
111
112
|
}
|
112
113
|
|
113
114
|
DUCKDB_API AggregateFunction(const vector<LogicalType> &arguments, const LogicalType &return_type,
|
@@ -160,6 +161,8 @@ public:
|
|
160
161
|
|
161
162
|
aggregate_serialize_t serialize;
|
162
163
|
aggregate_deserialize_t deserialize;
|
164
|
+
//! Whether or not the aggregate is order dependent
|
165
|
+
AggregateOrderDependent order_dependent;
|
163
166
|
|
164
167
|
DUCKDB_API bool operator==(const AggregateFunction &rhs) const {
|
165
168
|
return state_size == rhs.state_size && initialize == rhs.initialize && update == rhs.update &&
|
@@ -59,13 +59,10 @@ public:
|
|
59
59
|
DUCKDB_API unique_ptr<BoundAggregateExpression>
|
60
60
|
BindAggregateFunction(AggregateFunction bound_function, vector<unique_ptr<Expression>> children,
|
61
61
|
unique_ptr<Expression> filter = nullptr,
|
62
|
-
AggregateType aggr_type = AggregateType::NON_DISTINCT
|
63
|
-
unique_ptr<BoundOrderModifier> order_bys = nullptr);
|
62
|
+
AggregateType aggr_type = AggregateType::NON_DISTINCT);
|
64
63
|
|
65
|
-
DUCKDB_API
|
66
|
-
|
67
|
-
unique_ptr<FunctionData> bind_info,
|
68
|
-
unique_ptr<BoundOrderModifier> order_bys);
|
64
|
+
DUCKDB_API static void BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
|
65
|
+
const vector<unique_ptr<Expression>> &groups);
|
69
66
|
|
70
67
|
private:
|
71
68
|
//! Cast a set of expressions to the arguments of this function
|
@@ -0,0 +1,24 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/optimizer/rule.hpp"
|
12
|
+
#include "duckdb/parser/expression_map.hpp"
|
13
|
+
|
14
|
+
namespace duckdb {
|
15
|
+
|
16
|
+
class OrderedAggregateOptimizer : public Rule {
|
17
|
+
public:
|
18
|
+
explicit OrderedAggregateOptimizer(ExpressionRewriter &rewriter);
|
19
|
+
|
20
|
+
unique_ptr<Expression> Apply(LogicalOperator &op, vector<Expression *> &bindings, bool &changes_made,
|
21
|
+
bool is_root) override;
|
22
|
+
};
|
23
|
+
|
24
|
+
} // namespace duckdb
|
@@ -59,9 +59,9 @@ public:
|
|
59
59
|
void Verify() const override;
|
60
60
|
|
61
61
|
public:
|
62
|
-
template <class T, class BASE>
|
62
|
+
template <class T, class BASE, class ORDER_MODIFIER = OrderModifier>
|
63
63
|
static string ToString(const T &entry, const string &schema, const string &function_name, bool is_operator = false,
|
64
|
-
bool distinct = false, BASE *filter = nullptr,
|
64
|
+
bool distinct = false, BASE *filter = nullptr, ORDER_MODIFIER *order_bys = nullptr,
|
65
65
|
bool export_state = false, bool add_alias = false) {
|
66
66
|
if (is_operator) {
|
67
67
|
// built-in operator
|
@@ -24,8 +24,8 @@ public:
|
|
24
24
|
case_insensitive_set_t exclude_list;
|
25
25
|
//! List of columns to replace with another expression
|
26
26
|
case_insensitive_map_t<unique_ptr<ParsedExpression>> replace_list;
|
27
|
-
//!
|
28
|
-
|
27
|
+
//! The expression to select the columns (regular expression or list)
|
28
|
+
unique_ptr<ParsedExpression> expr;
|
29
29
|
//! Whether or not this is a COLUMNS expression
|
30
30
|
bool columns = false;
|
31
31
|
|
@@ -335,6 +335,8 @@ private:
|
|
335
335
|
void TransformWindowFrame(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr);
|
336
336
|
|
337
337
|
unique_ptr<SampleOptions> TransformSampleOptions(duckdb_libpgquery::PGNode *options);
|
338
|
+
//! Returns true if an expression is only a star (i.e. "*", without any other decorators)
|
339
|
+
bool ExpressionIsEmptyStar(ParsedExpression &expr);
|
338
340
|
|
339
341
|
private:
|
340
342
|
//! Current stack depth
|
@@ -253,8 +253,8 @@ private:
|
|
253
253
|
BoundStatement Bind(DetachStatement &stmt);
|
254
254
|
|
255
255
|
BoundStatement BindReturning(vector<unique_ptr<ParsedExpression>> returning_list, TableCatalogEntry *table,
|
256
|
-
|
257
|
-
BoundStatement result);
|
256
|
+
const string &alias, idx_t update_table_index,
|
257
|
+
unique_ptr<LogicalOperator> child_operator, BoundStatement result);
|
258
258
|
|
259
259
|
unique_ptr<QueryNode> BindTableMacro(FunctionExpression &function, TableMacroCatalogEntry *macro_func, idx_t depth);
|
260
260
|
|
@@ -339,8 +339,9 @@ private:
|
|
339
339
|
void ExpandStarExpressions(vector<unique_ptr<ParsedExpression>> &select_list,
|
340
340
|
vector<unique_ptr<ParsedExpression>> &new_select_list);
|
341
341
|
void ExpandStarExpression(unique_ptr<ParsedExpression> expr, vector<unique_ptr<ParsedExpression>> &new_select_list);
|
342
|
-
bool FindStarExpression(ParsedExpression &expr, StarExpression **star);
|
342
|
+
bool FindStarExpression(unique_ptr<ParsedExpression> &expr, StarExpression **star, bool is_root, bool in_columns);
|
343
343
|
void ReplaceStarExpression(unique_ptr<ParsedExpression> &expr, unique_ptr<ParsedExpression> &replacement);
|
344
|
+
void BindWhereStarExpression(unique_ptr<ParsedExpression> &expr);
|
344
345
|
|
345
346
|
//! If only a schema name is provided (e.g. "a.b") then figure out if "a" is a schema or a catalog name
|
346
347
|
void BindSchemaOrCatalog(string &catalog_name, string &schema_name);
|
@@ -65,6 +65,9 @@ public:
|
|
65
65
|
|
66
66
|
//! List of order nodes
|
67
67
|
vector<BoundOrderByNode> orders;
|
68
|
+
|
69
|
+
unique_ptr<BoundOrderModifier> Copy() const;
|
70
|
+
static bool Equals(const BoundOrderModifier *left, const BoundOrderModifier *right);
|
68
71
|
};
|
69
72
|
|
70
73
|
class BoundDistinctModifier : public BoundResultModifier {
|
@@ -25,10 +25,13 @@ public:
|
|
25
25
|
vector<unique_ptr<Expression>> children;
|
26
26
|
//! The bound function data (if any)
|
27
27
|
unique_ptr<FunctionData> bind_info;
|
28
|
+
//! The aggregate type (distinct or non-distinct)
|
28
29
|
AggregateType aggr_type;
|
29
30
|
|
30
31
|
//! Filter for this aggregate
|
31
32
|
unique_ptr<Expression> filter;
|
33
|
+
//! The order by expression for this aggregate - if any
|
34
|
+
unique_ptr<BoundOrderModifier> order_bys;
|
32
35
|
|
33
36
|
public:
|
34
37
|
bool IsDistinct() const {
|