duckdb 0.7.2-dev402.0 → 0.7.2-dev457.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +0 -1
  3. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +8 -6
  4. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +9 -1
  5. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +5 -8
  6. package/src/duckdb/src/function/aggregate/distributive/bool.cpp +2 -0
  7. package/src/duckdb/src/function/aggregate/distributive/count.cpp +1 -0
  8. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -0
  9. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +8 -0
  10. package/src/duckdb/src/function/aggregate/holistic/quantile.cpp +15 -0
  11. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +42 -11
  12. package/src/duckdb/src/function/function_binder.cpp +1 -8
  13. package/src/duckdb/src/function/scalar/date/current.cpp +0 -2
  14. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  15. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -3
  16. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +6 -3
  17. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -6
  18. package/src/duckdb/src/include/duckdb/optimizer/rule/list.hpp +1 -0
  19. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +24 -0
  20. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
  21. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +2 -2
  22. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -0
  23. package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -3
  24. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +3 -0
  25. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  26. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -1
  27. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +3 -0
  28. package/src/duckdb/src/optimizer/optimizer.cpp +1 -0
  29. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +30 -0
  30. package/src/duckdb/src/parser/expression/star_expression.cpp +6 -6
  31. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -1
  32. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +17 -2
  33. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +45 -40
  34. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +7 -0
  35. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +0 -7
  36. package/src/duckdb/src/planner/bind_context.cpp +2 -25
  37. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +6 -4
  38. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +3 -2
  39. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +176 -0
  40. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +57 -82
  41. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +11 -0
  42. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  43. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +2 -2
  44. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +1 -1
  45. package/src/duckdb/src/planner/binder.cpp +12 -23
  46. package/src/duckdb/src/planner/bound_result_modifier.cpp +26 -0
  47. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +9 -2
  48. package/src/duckdb/src/planner/expression_iterator.cpp +5 -0
  49. package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -0
  50. package/src/duckdb/src/planner/operator/logical_distinct.cpp +3 -0
  51. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  52. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -1
  53. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +8141 -8313
  54. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  55. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev402.0",
5
+ "version": "0.7.2-dev457.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -16,7 +16,6 @@ struct Int96 {
16
16
  uint32_t value[3];
17
17
  };
18
18
 
19
- int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp);
20
19
  timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts);
21
20
  Int96 TimestampToImpalaTimestamp(timestamp_t &ts);
22
21
  timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts);
@@ -12,24 +12,26 @@ namespace duckdb {
12
12
  // surely they are joking
13
13
  static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL;
14
14
  static constexpr int64_t MILLISECONDS_PER_DAY = 86400000LL;
15
- static constexpr int64_t NANOSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL * 1000LL;
15
+ static constexpr int64_t MICROSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL;
16
+ static constexpr int64_t NANOSECONDS_PER_MICRO = 1000LL;
16
17
 
17
- int64_t ImpalaTimestampToNanoseconds(const Int96 &impala_timestamp) {
18
+ static int64_t ImpalaTimestampToMicroseconds(const Int96 &impala_timestamp) {
18
19
  int64_t days_since_epoch = impala_timestamp.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS;
19
20
  auto nanoseconds = Load<int64_t>((data_ptr_t)impala_timestamp.value);
20
- return days_since_epoch * NANOSECONDS_PER_DAY + nanoseconds;
21
+ auto microseconds = nanoseconds / NANOSECONDS_PER_MICRO;
22
+ return days_since_epoch * MICROSECONDS_PER_DAY + microseconds;
21
23
  }
22
24
 
23
25
  timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts) {
24
- auto impala_ns = ImpalaTimestampToNanoseconds(raw_ts);
25
- return Timestamp::FromEpochNanoSeconds(impala_ns);
26
+ auto impala_us = ImpalaTimestampToMicroseconds(raw_ts);
27
+ return Timestamp::FromEpochMicroSeconds(impala_us);
26
28
  }
27
29
 
28
30
  Int96 TimestampToImpalaTimestamp(timestamp_t &ts) {
29
31
  int32_t hour, min, sec, msec;
30
32
  Time::Convert(Timestamp::GetTime(ts), hour, min, sec, msec);
31
33
  uint64_t ms_since_midnight = hour * 60 * 60 * 1000 + min * 60 * 1000 + sec * 1000 + msec;
32
- auto days_since_epoch = Date::Epoch(Timestamp::GetDate(ts)) / (24 * 60 * 60);
34
+ auto days_since_epoch = Date::Epoch(Timestamp::GetDate(ts)) / int64_t(24 * 60 * 60);
33
35
  // first two uint32 in Int96 are nanoseconds since midnights
34
36
  // last uint32 is number of days since year 4713 BC ("Julian date")
35
37
  Int96 impala_ts;
@@ -9,6 +9,7 @@
9
9
  #include "duckdb/parser/expression/comparison_expression.hpp"
10
10
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
11
11
  #include "duckdb/planner/operator/logical_aggregate.hpp"
12
+ #include "duckdb/function/function_binder.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
@@ -169,13 +170,20 @@ PhysicalPlanGenerator::ExtractAggregateExpressions(unique_ptr<PhysicalOperator>
169
170
  vector<unique_ptr<Expression>> expressions;
170
171
  vector<LogicalType> types;
171
172
 
173
+ // bind sorted aggregates
174
+ for (auto &aggr : aggregates) {
175
+ auto &bound_aggr = (BoundAggregateExpression &)*aggr;
176
+ if (bound_aggr.order_bys) {
177
+ // sorted aggregate!
178
+ FunctionBinder::BindSortedAggregate(context, bound_aggr, groups);
179
+ }
180
+ }
172
181
  for (auto &group : groups) {
173
182
  auto ref = make_unique<BoundReferenceExpression>(group->return_type, expressions.size());
174
183
  types.push_back(group->return_type);
175
184
  expressions.push_back(std::move(group));
176
185
  group = std::move(ref);
177
186
  }
178
-
179
187
  for (auto &aggr : aggregates) {
180
188
  auto &bound_aggr = (BoundAggregateExpression &)*aggr;
181
189
  for (auto &child : bound_aggr.children) {
@@ -9,8 +9,10 @@
9
9
 
10
10
  namespace duckdb {
11
11
 
12
- unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreateDistinctOn(unique_ptr<PhysicalOperator> child,
13
- vector<unique_ptr<Expression>> distinct_targets) {
12
+ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
13
+ D_ASSERT(op.children.size() == 1);
14
+ auto child = CreatePlan(*op.children[0]);
15
+ auto &distinct_targets = op.distinct_targets;
14
16
  D_ASSERT(child);
15
17
  D_ASSERT(!distinct_targets.empty());
16
18
 
@@ -55,6 +57,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreateDistinctOn(unique_ptr<
55
57
  FunctionBinder function_binder(context);
56
58
  auto first_aggregate = function_binder.BindAggregateFunction(
57
59
  FirstFun::GetFunction(logical_type), std::move(first_children), nullptr, AggregateType::NON_DISTINCT);
60
+ first_aggregate->order_bys = op.order_by ? op.order_by->Copy() : nullptr;
58
61
  // add the projection
59
62
  projections.push_back(make_unique<BoundReferenceExpression>(logical_type, group_count + aggregates.size()));
60
63
  // push it to the list of aggregates
@@ -81,10 +84,4 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreateDistinctOn(unique_ptr<
81
84
  return std::move(aggr_projection);
82
85
  }
83
86
 
84
- unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
85
- D_ASSERT(op.children.size() == 1);
86
- auto plan = CreatePlan(*op.children[0]);
87
- return CreateDistinctOn(std::move(plan), std::move(op.distinct_targets));
88
- }
89
-
90
87
  } // namespace duckdb
@@ -95,6 +95,7 @@ AggregateFunction BoolOrFun::GetFunction() {
95
95
  auto fun = AggregateFunction::UnaryAggregate<BoolState, bool, bool, BoolOrFunFunction>(
96
96
  LogicalType(LogicalTypeId::BOOLEAN), LogicalType::BOOLEAN);
97
97
  fun.name = "bool_or";
98
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
98
99
  return fun;
99
100
  }
100
101
 
@@ -102,6 +103,7 @@ AggregateFunction BoolAndFun::GetFunction() {
102
103
  auto fun = AggregateFunction::UnaryAggregate<BoolState, bool, bool, BoolAndFunFunction>(
103
104
  LogicalType(LogicalTypeId::BOOLEAN), LogicalType::BOOLEAN);
104
105
  fun.name = "bool_and";
106
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
105
107
  return fun;
106
108
  }
107
109
 
@@ -76,6 +76,7 @@ AggregateFunction CountFun::GetFunction() {
76
76
  LogicalType(LogicalTypeId::ANY), LogicalType::BIGINT);
77
77
  fun.name = "count";
78
78
  fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
79
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
79
80
  return fun;
80
81
  }
81
82
 
@@ -514,6 +514,7 @@ unique_ptr<FunctionData> BindDecimalMinMax(ClientContext &context, AggregateFunc
514
514
  function.name = std::move(name);
515
515
  function.arguments[0] = decimal_type;
516
516
  function.return_type = decimal_type;
517
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
517
518
  return nullptr;
518
519
  }
519
520
 
@@ -545,6 +546,7 @@ unique_ptr<FunctionData> BindMinMax(ClientContext &context, AggregateFunction &f
545
546
  auto name = std::move(function.name);
546
547
  function = GetMinMaxOperator<OP, OP_STRING, OP_VECTOR>(input_type);
547
548
  function.name = std::move(name);
549
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
548
550
  if (function.bind) {
549
551
  return function.bind(context, function, arguments);
550
552
  } else {
@@ -110,6 +110,7 @@ AggregateFunction SumFun::GetSumAggregate(PhysicalType type) {
110
110
  case PhysicalType::INT16: {
111
111
  auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int16_t, hugeint_t, IntegerSumOperation>(
112
112
  LogicalType::SMALLINT, LogicalType::HUGEINT);
113
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
113
114
  return function;
114
115
  }
115
116
 
@@ -118,6 +119,7 @@ AggregateFunction SumFun::GetSumAggregate(PhysicalType type) {
118
119
  AggregateFunction::UnaryAggregate<SumState<hugeint_t>, int32_t, hugeint_t, SumToHugeintOperation>(
119
120
  LogicalType::INTEGER, LogicalType::HUGEINT);
120
121
  function.statistics = SumPropagateStats;
122
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
121
123
  return function;
122
124
  }
123
125
  case PhysicalType::INT64: {
@@ -125,12 +127,14 @@ AggregateFunction SumFun::GetSumAggregate(PhysicalType type) {
125
127
  AggregateFunction::UnaryAggregate<SumState<hugeint_t>, int64_t, hugeint_t, SumToHugeintOperation>(
126
128
  LogicalType::BIGINT, LogicalType::HUGEINT);
127
129
  function.statistics = SumPropagateStats;
130
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
128
131
  return function;
129
132
  }
130
133
  case PhysicalType::INT128: {
131
134
  auto function =
132
135
  AggregateFunction::UnaryAggregate<SumState<hugeint_t>, hugeint_t, hugeint_t, HugeintSumOperation>(
133
136
  LogicalType::HUGEINT, LogicalType::HUGEINT);
137
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
134
138
  return function;
135
139
  }
136
140
  default:
@@ -144,12 +148,14 @@ AggregateFunction SumFun::GetSumAggregateNoOverflow(PhysicalType type) {
144
148
  auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int32_t, hugeint_t, IntegerSumOperation>(
145
149
  LogicalType::INTEGER, LogicalType::HUGEINT);
146
150
  function.name = "sum_no_overflow";
151
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
147
152
  return function;
148
153
  }
149
154
  case PhysicalType::INT64: {
150
155
  auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int64_t, hugeint_t, IntegerSumOperation>(
151
156
  LogicalType::BIGINT, LogicalType::HUGEINT);
152
157
  function.name = "sum_no_overflow";
158
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
153
159
  return function;
154
160
  }
155
161
  default:
@@ -164,6 +170,7 @@ unique_ptr<FunctionData> BindDecimalSum(ClientContext &context, AggregateFunctio
164
170
  function.name = "sum";
165
171
  function.arguments[0] = decimal_type;
166
172
  function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type));
173
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
167
174
  return nullptr;
168
175
  }
169
176
 
@@ -174,6 +181,7 @@ unique_ptr<FunctionData> BindDecimalSumNoOverflow(ClientContext &context, Aggreg
174
181
  function.name = "sum_no_overflow";
175
182
  function.arguments[0] = decimal_type;
176
183
  function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type));
184
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
177
185
  return nullptr;
178
186
  }
179
187
 
@@ -795,6 +795,7 @@ AggregateFunction GetTypedDiscreteQuantileListAggregateFunction(const LogicalTyp
795
795
  using STATE = QuantileState<SAVE_TYPE>;
796
796
  using OP = QuantileListOperation<INPUT_TYPE, true>;
797
797
  auto fun = QuantileListAggregate<STATE, INPUT_TYPE, list_entry_t, OP>(type, type);
798
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
798
799
  fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, list_entry_t, OP>;
799
800
  return fun;
800
801
  }
@@ -851,6 +852,7 @@ AggregateFunction GetTypedContinuousQuantileAggregateFunction(const LogicalType
851
852
  using STATE = QuantileState<INPUT_TYPE>;
852
853
  using OP = QuantileScalarOperation<false>;
853
854
  auto fun = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, TARGET_TYPE, OP>(input_type, target_type);
855
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
854
856
  fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, TARGET_TYPE, OP>;
855
857
  return fun;
856
858
  }
@@ -904,6 +906,7 @@ AggregateFunction GetTypedContinuousQuantileListAggregateFunction(const LogicalT
904
906
  using STATE = QuantileState<INPUT_TYPE>;
905
907
  using OP = QuantileListOperation<CHILD_TYPE, false>;
906
908
  auto fun = QuantileListAggregate<STATE, INPUT_TYPE, list_entry_t, OP>(input_type, result_type);
909
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
907
910
  fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, list_entry_t, OP>;
908
911
  return fun;
909
912
  }
@@ -1129,6 +1132,7 @@ AggregateFunction GetTypedMedianAbsoluteDeviationAggregateFunction(const Logical
1129
1132
  using STATE = QuantileState<INPUT_TYPE>;
1130
1133
  using OP = MedianAbsoluteDeviationOperation<MEDIAN_TYPE>;
1131
1134
  auto fun = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, TARGET_TYPE, OP>(input_type, target_type);
1135
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1132
1136
  fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, TARGET_TYPE, OP>;
1133
1137
  return fun;
1134
1138
  }
@@ -1198,6 +1202,7 @@ unique_ptr<FunctionData> BindMedianDecimal(ClientContext &context, AggregateFunc
1198
1202
  function.name = "median";
1199
1203
  function.serialize = QuantileSerialize;
1200
1204
  function.deserialize = QuantileDeserialize;
1205
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1201
1206
  return bind_data;
1202
1207
  }
1203
1208
 
@@ -1205,6 +1210,7 @@ unique_ptr<FunctionData> BindMedianAbsoluteDeviationDecimal(ClientContext &conte
1205
1210
  vector<unique_ptr<Expression>> &arguments) {
1206
1211
  function = GetMedianAbsoluteDeviationAggregateFunction(arguments[0]->return_type);
1207
1212
  function.name = "mad";
1213
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1208
1214
  return nullptr;
1209
1215
  }
1210
1216
 
@@ -1257,6 +1263,7 @@ unique_ptr<FunctionData> BindDiscreteQuantileDecimal(ClientContext &context, Agg
1257
1263
  function.name = "quantile_disc";
1258
1264
  function.serialize = QuantileDecimalSerialize;
1259
1265
  function.deserialize = QuantileDeserialize;
1266
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1260
1267
  return bind_data;
1261
1268
  }
1262
1269
 
@@ -1267,6 +1274,7 @@ unique_ptr<FunctionData> BindDiscreteQuantileDecimalList(ClientContext &context,
1267
1274
  function.name = "quantile_disc";
1268
1275
  function.serialize = QuantileDecimalSerialize;
1269
1276
  function.deserialize = QuantileDeserialize;
1277
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1270
1278
  return bind_data;
1271
1279
  }
1272
1280
 
@@ -1277,6 +1285,7 @@ unique_ptr<FunctionData> BindContinuousQuantileDecimal(ClientContext &context, A
1277
1285
  function.name = "quantile_cont";
1278
1286
  function.serialize = QuantileDecimalSerialize;
1279
1287
  function.deserialize = QuantileDeserialize;
1288
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1280
1289
  return bind_data;
1281
1290
  }
1282
1291
 
@@ -1287,6 +1296,7 @@ unique_ptr<FunctionData> BindContinuousQuantileDecimalList(ClientContext &contex
1287
1296
  function.name = "quantile_cont";
1288
1297
  function.serialize = QuantileDecimalSerialize;
1289
1298
  function.deserialize = QuantileDeserialize;
1299
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1290
1300
  return bind_data;
1291
1301
  }
1292
1302
 
@@ -1316,6 +1326,7 @@ AggregateFunction GetDiscreteQuantileAggregate(const LogicalType &type) {
1316
1326
  fun.deserialize = QuantileDeserialize;
1317
1327
  // temporarily push an argument so we can bind the actual quantile
1318
1328
  fun.arguments.emplace_back(LogicalType::DOUBLE);
1329
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1319
1330
  return fun;
1320
1331
  }
1321
1332
 
@@ -1327,6 +1338,7 @@ AggregateFunction GetDiscreteQuantileListAggregate(const LogicalType &type) {
1327
1338
  // temporarily push an argument so we can bind the actual quantile
1328
1339
  auto list_of_double = LogicalType::LIST(LogicalType::DOUBLE);
1329
1340
  fun.arguments.push_back(list_of_double);
1341
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1330
1342
  return fun;
1331
1343
  }
1332
1344
 
@@ -1337,6 +1349,7 @@ AggregateFunction GetContinuousQuantileAggregate(const LogicalType &type) {
1337
1349
  fun.deserialize = QuantileDeserialize;
1338
1350
  // temporarily push an argument so we can bind the actual quantile
1339
1351
  fun.arguments.emplace_back(LogicalType::DOUBLE);
1352
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1340
1353
  return fun;
1341
1354
  }
1342
1355
 
@@ -1348,6 +1361,7 @@ AggregateFunction GetContinuousQuantileListAggregate(const LogicalType &type) {
1348
1361
  // temporarily push an argument so we can bind the actual quantile
1349
1362
  auto list_of_double = LogicalType::LIST(LogicalType::DOUBLE);
1350
1363
  fun.arguments.push_back(list_of_double);
1364
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1351
1365
  return fun;
1352
1366
  }
1353
1367
 
@@ -1357,6 +1371,7 @@ AggregateFunction GetQuantileDecimalAggregate(const vector<LogicalType> &argumen
1357
1371
  fun.bind = bind;
1358
1372
  fun.serialize = QuantileSerialize;
1359
1373
  fun.deserialize = QuantileDeserialize;
1374
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1360
1375
  return fun;
1361
1376
  }
1362
1377
 
@@ -3,6 +3,9 @@
3
3
  #include "duckdb/common/types/column_data_collection.hpp"
4
4
  #include "duckdb/function/function_binder.hpp"
5
5
  #include "duckdb/storage/buffer_manager.hpp"
6
+ #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
7
+ #include "duckdb/parser/expression_map.hpp"
8
+ #include "duckdb/function/aggregate/distributive_functions.hpp"
6
9
 
7
10
  namespace duckdb {
8
11
 
@@ -363,16 +366,44 @@ struct SortedAggregateFunction {
363
366
  }
364
367
  };
365
368
 
366
- unique_ptr<FunctionData> FunctionBinder::BindSortedAggregate(AggregateFunction &bound_function,
367
- vector<unique_ptr<Expression>> &children,
368
- unique_ptr<FunctionData> bind_info,
369
- unique_ptr<BoundOrderModifier> order_bys) {
370
-
371
- auto sorted_bind =
372
- make_unique<SortedAggregateBindData>(context, bound_function, children, std::move(bind_info), *order_bys);
369
+ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
370
+ const vector<unique_ptr<Expression>> &groups) {
371
+ if (!expr.order_bys || expr.order_bys->orders.empty() || expr.children.empty()) {
372
+ // not a sorted aggregate: return
373
+ return;
374
+ }
375
+ if (context.config.enable_optimizer) {
376
+ // for each ORDER BY - check if it is actually necessary
377
+ // expressions that are in the groups do not need to be ORDERED BY
378
+ // `ORDER BY` on a group has no effect, because for each aggregate, the group is unique
379
+ // similarly, we only need to ORDER BY each aggregate once
380
+ expression_set_t seen_expressions;
381
+ for (auto &target : groups) {
382
+ seen_expressions.insert(target.get());
383
+ }
384
+ vector<BoundOrderByNode> new_order_nodes;
385
+ for (auto &order_node : expr.order_bys->orders) {
386
+ if (seen_expressions.find(order_node.expression.get()) != seen_expressions.end()) {
387
+ // we do not need to order by this node
388
+ continue;
389
+ }
390
+ seen_expressions.insert(order_node.expression.get());
391
+ new_order_nodes.push_back(std::move(order_node));
392
+ }
393
+ if (new_order_nodes.empty()) {
394
+ expr.order_bys.reset();
395
+ return;
396
+ }
397
+ expr.order_bys->orders = std::move(new_order_nodes);
398
+ }
399
+ auto &bound_function = expr.function;
400
+ auto &children = expr.children;
401
+ auto &order_bys = *expr.order_bys;
402
+ auto sorted_bind = make_unique<SortedAggregateBindData>(context, bound_function, expr.children,
403
+ std::move(expr.bind_info), order_bys);
373
404
 
374
405
  // The arguments are the children plus the sort columns.
375
- for (auto &order : order_bys->orders) {
406
+ for (auto &order : order_bys.orders) {
376
407
  children.emplace_back(std::move(order.expression));
377
408
  }
378
409
 
@@ -392,9 +423,9 @@ unique_ptr<FunctionData> FunctionBinder::BindSortedAggregate(AggregateFunction &
392
423
  AggregateFunction::StateDestroy<SortedAggregateState, SortedAggregateFunction>, nullptr,
393
424
  SortedAggregateFunction::Window, SortedAggregateFunction::Serialize, SortedAggregateFunction::Deserialize);
394
425
 
395
- bound_function = std::move(ordered_aggregate);
396
-
397
- return std::move(sorted_bind);
426
+ expr.function = std::move(ordered_aggregate);
427
+ expr.bind_info = std::move(sorted_bind);
428
+ expr.order_bys.reset();
398
429
  }
399
430
 
400
431
  } // namespace duckdb
@@ -294,8 +294,7 @@ unique_ptr<BoundFunctionExpression> FunctionBinder::BindScalarFunction(ScalarFun
294
294
  unique_ptr<BoundAggregateExpression> FunctionBinder::BindAggregateFunction(AggregateFunction bound_function,
295
295
  vector<unique_ptr<Expression>> children,
296
296
  unique_ptr<Expression> filter,
297
- AggregateType aggr_type,
298
- unique_ptr<BoundOrderModifier> order_bys) {
297
+ AggregateType aggr_type) {
299
298
  unique_ptr<FunctionData> bind_info;
300
299
  if (bound_function.bind) {
301
300
  bind_info = bound_function.bind(context, bound_function, children);
@@ -306,12 +305,6 @@ unique_ptr<BoundAggregateExpression> FunctionBinder::BindAggregateFunction(Aggre
306
305
  // check if we need to add casts to the children
307
306
  CastToFunctionArguments(bound_function, children);
308
307
 
309
- // Special case: for ORDER BY aggregates, we wrap the aggregate function in a SortedAggregateFunction
310
- // The children are the sort clauses and the binding contains the ordering data.
311
- if (order_bys && !order_bys->orders.empty()) {
312
- bind_info = BindSortedAggregate(bound_function, children, std::move(bind_info), std::move(order_bys));
313
- }
314
-
315
308
  return make_unique<BoundAggregateExpression>(std::move(bound_function), std::move(children), std::move(filter),
316
309
  std::move(bind_info), aggr_type);
317
310
  }
@@ -35,14 +35,12 @@ static void CurrentTimestampFunction(DataChunk &input, ExpressionState &state, V
35
35
 
36
36
  void CurrentTimeFun::RegisterFunction(BuiltinFunctions &set) {
37
37
  ScalarFunction current_time("get_current_time", {}, LogicalType::TIME, CurrentTimeFunction);
38
- ;
39
38
  current_time.side_effects = FunctionSideEffects::HAS_SIDE_EFFECTS;
40
39
  set.AddFunction(current_time);
41
40
  }
42
41
 
43
42
  void CurrentDateFun::RegisterFunction(BuiltinFunctions &set) {
44
43
  ScalarFunction current_date({}, LogicalType::DATE, CurrentDateFunction);
45
- ;
46
44
  current_date.side_effects = FunctionSideEffects::HAS_SIDE_EFFECTS;
47
45
  set.AddFunction({"today", "current_date"}, current_date);
48
46
  }
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev402"
2
+ #define DUCKDB_VERSION "0.7.2-dev457"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "ab9736bed0"
5
+ #define DUCKDB_SOURCE_ID "403d0ca315"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -88,9 +88,6 @@ protected:
88
88
  unique_ptr<PhysicalOperator> CreatePlan(LogicalRecursiveCTE &op);
89
89
  unique_ptr<PhysicalOperator> CreatePlan(LogicalCTERef &op);
90
90
 
91
- unique_ptr<PhysicalOperator> CreateDistinctOn(unique_ptr<PhysicalOperator> child,
92
- vector<unique_ptr<Expression>> distinct_targets);
93
-
94
91
  unique_ptr<PhysicalOperator> ExtractAggregateExpressions(unique_ptr<PhysicalOperator> child,
95
92
  vector<unique_ptr<Expression>> &expressions,
96
93
  vector<unique_ptr<Expression>> &groups);
@@ -17,8 +17,9 @@
17
17
 
18
18
  namespace duckdb {
19
19
 
20
- //! This allows us to use the & operator to check if the type is contained in the set
21
20
  enum class AggregateType : uint8_t { NON_DISTINCT = 1, DISTINCT = 2 };
21
+ //! Whether or not the input order influences the result of the aggregate
22
+ enum class AggregateOrderDependent : uint8_t { ORDER_DEPENDENT = 1, NOT_ORDER_DEPENDENT = 2 };
22
23
 
23
24
  class BoundAggregateExpression;
24
25
 
@@ -92,7 +93,7 @@ public:
92
93
  LogicalType(LogicalTypeId::INVALID), null_handling),
93
94
  state_size(state_size), initialize(initialize), update(update), combine(combine), finalize(finalize),
94
95
  simple_update(simple_update), window(window), bind(bind), destructor(destructor), statistics(statistics),
95
- serialize(serialize), deserialize(deserialize) {
96
+ serialize(serialize), deserialize(deserialize), order_dependent(AggregateOrderDependent::ORDER_DEPENDENT) {
96
97
  }
97
98
 
98
99
  DUCKDB_API
@@ -107,7 +108,7 @@ public:
107
108
  LogicalType(LogicalTypeId::INVALID)),
108
109
  state_size(state_size), initialize(initialize), update(update), combine(combine), finalize(finalize),
109
110
  simple_update(simple_update), window(window), bind(bind), destructor(destructor), statistics(statistics),
110
- serialize(serialize), deserialize(deserialize) {
111
+ serialize(serialize), deserialize(deserialize), order_dependent(AggregateOrderDependent::ORDER_DEPENDENT) {
111
112
  }
112
113
 
113
114
  DUCKDB_API AggregateFunction(const vector<LogicalType> &arguments, const LogicalType &return_type,
@@ -160,6 +161,8 @@ public:
160
161
 
161
162
  aggregate_serialize_t serialize;
162
163
  aggregate_deserialize_t deserialize;
164
+ //! Whether or not the aggregate is order dependent
165
+ AggregateOrderDependent order_dependent;
163
166
 
164
167
  DUCKDB_API bool operator==(const AggregateFunction &rhs) const {
165
168
  return state_size == rhs.state_size && initialize == rhs.initialize && update == rhs.update &&
@@ -59,13 +59,10 @@ public:
59
59
  DUCKDB_API unique_ptr<BoundAggregateExpression>
60
60
  BindAggregateFunction(AggregateFunction bound_function, vector<unique_ptr<Expression>> children,
61
61
  unique_ptr<Expression> filter = nullptr,
62
- AggregateType aggr_type = AggregateType::NON_DISTINCT,
63
- unique_ptr<BoundOrderModifier> order_bys = nullptr);
62
+ AggregateType aggr_type = AggregateType::NON_DISTINCT);
64
63
 
65
- DUCKDB_API unique_ptr<FunctionData> BindSortedAggregate(AggregateFunction &bound_function,
66
- vector<unique_ptr<Expression>> &children,
67
- unique_ptr<FunctionData> bind_info,
68
- unique_ptr<BoundOrderModifier> order_bys);
64
+ DUCKDB_API static void BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
65
+ const vector<unique_ptr<Expression>> &groups);
69
66
 
70
67
  private:
71
68
  //! Cast a set of expressions to the arguments of this function
@@ -10,3 +10,4 @@
10
10
  #include "duckdb/optimizer/rule/move_constants.hpp"
11
11
  #include "duckdb/optimizer/rule/enum_comparison.hpp"
12
12
  #include "duckdb/optimizer/rule/regex_optimizations.hpp"
13
+ #include "duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp"
@@ -0,0 +1,24 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/optimizer/rule.hpp"
12
+ #include "duckdb/parser/expression_map.hpp"
13
+
14
+ namespace duckdb {
15
+
16
+ class OrderedAggregateOptimizer : public Rule {
17
+ public:
18
+ explicit OrderedAggregateOptimizer(ExpressionRewriter &rewriter);
19
+
20
+ unique_ptr<Expression> Apply(LogicalOperator &op, vector<Expression *> &bindings, bool &changes_made,
21
+ bool is_root) override;
22
+ };
23
+
24
+ } // namespace duckdb
@@ -59,9 +59,9 @@ public:
59
59
  void Verify() const override;
60
60
 
61
61
  public:
62
- template <class T, class BASE>
62
+ template <class T, class BASE, class ORDER_MODIFIER = OrderModifier>
63
63
  static string ToString(const T &entry, const string &schema, const string &function_name, bool is_operator = false,
64
- bool distinct = false, BASE *filter = nullptr, OrderModifier *order_bys = nullptr,
64
+ bool distinct = false, BASE *filter = nullptr, ORDER_MODIFIER *order_bys = nullptr,
65
65
  bool export_state = false, bool add_alias = false) {
66
66
  if (is_operator) {
67
67
  // built-in operator
@@ -24,8 +24,8 @@ public:
24
24
  case_insensitive_set_t exclude_list;
25
25
  //! List of columns to replace with another expression
26
26
  case_insensitive_map_t<unique_ptr<ParsedExpression>> replace_list;
27
- //! Regular expression to select columns (if any)
28
- string regex;
27
+ //! The expression to select the columns (regular expression or list)
28
+ unique_ptr<ParsedExpression> expr;
29
29
  //! Whether or not this is a COLUMNS expression
30
30
  bool columns = false;
31
31
 
@@ -335,6 +335,8 @@ private:
335
335
  void TransformWindowFrame(duckdb_libpgquery::PGWindowDef *window_spec, WindowExpression *expr);
336
336
 
337
337
  unique_ptr<SampleOptions> TransformSampleOptions(duckdb_libpgquery::PGNode *options);
338
+ //! Returns true if an expression is only a star (i.e. "*", without any other decorators)
339
+ bool ExpressionIsEmptyStar(ParsedExpression &expr);
338
340
 
339
341
  private:
340
342
  //! Current stack depth
@@ -253,8 +253,8 @@ private:
253
253
  BoundStatement Bind(DetachStatement &stmt);
254
254
 
255
255
  BoundStatement BindReturning(vector<unique_ptr<ParsedExpression>> returning_list, TableCatalogEntry *table,
256
- idx_t update_table_index, unique_ptr<LogicalOperator> child_operator,
257
- BoundStatement result);
256
+ const string &alias, idx_t update_table_index,
257
+ unique_ptr<LogicalOperator> child_operator, BoundStatement result);
258
258
 
259
259
  unique_ptr<QueryNode> BindTableMacro(FunctionExpression &function, TableMacroCatalogEntry *macro_func, idx_t depth);
260
260
 
@@ -339,8 +339,9 @@ private:
339
339
  void ExpandStarExpressions(vector<unique_ptr<ParsedExpression>> &select_list,
340
340
  vector<unique_ptr<ParsedExpression>> &new_select_list);
341
341
  void ExpandStarExpression(unique_ptr<ParsedExpression> expr, vector<unique_ptr<ParsedExpression>> &new_select_list);
342
- bool FindStarExpression(ParsedExpression &expr, StarExpression **star);
342
+ bool FindStarExpression(unique_ptr<ParsedExpression> &expr, StarExpression **star, bool is_root, bool in_columns);
343
343
  void ReplaceStarExpression(unique_ptr<ParsedExpression> &expr, unique_ptr<ParsedExpression> &replacement);
344
+ void BindWhereStarExpression(unique_ptr<ParsedExpression> &expr);
344
345
 
345
346
  //! If only a schema name is provided (e.g. "a.b") then figure out if "a" is a schema or a catalog name
346
347
  void BindSchemaOrCatalog(string &catalog_name, string &schema_name);
@@ -65,6 +65,9 @@ public:
65
65
 
66
66
  //! List of order nodes
67
67
  vector<BoundOrderByNode> orders;
68
+
69
+ unique_ptr<BoundOrderModifier> Copy() const;
70
+ static bool Equals(const BoundOrderModifier *left, const BoundOrderModifier *right);
68
71
  };
69
72
 
70
73
  class BoundDistinctModifier : public BoundResultModifier {
@@ -25,10 +25,13 @@ public:
25
25
  vector<unique_ptr<Expression>> children;
26
26
  //! The bound function data (if any)
27
27
  unique_ptr<FunctionData> bind_info;
28
+ //! The aggregate type (distinct or non-distinct)
28
29
  AggregateType aggr_type;
29
30
 
30
31
  //! Filter for this aggregate
31
32
  unique_ptr<Expression> filter;
33
+ //! The order by expression for this aggregate - if any
34
+ unique_ptr<BoundOrderModifier> order_bys;
32
35
 
33
36
  public:
34
37
  bool IsDistinct() const {