duckdb 0.7.2-dev1803.0 → 0.7.2-dev1898.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/catalog/catalog.cpp +27 -27
  3. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +6 -6
  4. package/src/duckdb/src/catalog/catalog_set.cpp +27 -25
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +6 -6
  6. package/src/duckdb/src/catalog/default/default_types.cpp +4 -4
  7. package/src/duckdb/src/catalog/default/default_views.cpp +4 -4
  8. package/src/duckdb/src/catalog/dependency_list.cpp +7 -6
  9. package/src/duckdb/src/catalog/dependency_manager.cpp +44 -38
  10. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +11 -6
  11. package/src/duckdb/src/common/sort/sorted_block.cpp +9 -4
  12. package/src/duckdb/src/common/types/batched_data_collection.cpp +2 -1
  13. package/src/duckdb/src/common/types/column_data_allocator.cpp +1 -0
  14. package/src/duckdb/src/common/types/vector.cpp +2 -2
  15. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +14 -11
  16. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  17. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +51 -50
  18. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +4 -0
  19. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +14 -13
  20. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +20 -20
  21. package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +2 -2
  22. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +1 -1
  23. package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +3 -3
  24. package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
  25. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
  26. package/src/duckdb/src/execution/physical_plan/plan_update.cpp +1 -1
  27. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +172 -63
  28. package/src/duckdb/src/function/cast/cast_function_set.cpp +2 -1
  29. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +15 -9
  30. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +29 -29
  31. package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +106 -0
  32. package/src/duckdb/src/function/scalar/string/regexp.cpp +145 -28
  33. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  34. package/src/duckdb/src/function/table/checkpoint.cpp +4 -4
  35. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +24 -24
  36. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +7 -6
  37. package/src/duckdb/src/function/table/system/duckdb_databases.cpp +1 -1
  38. package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +11 -11
  39. package/src/duckdb/src/function/table/system/pragma_database_size.cpp +1 -1
  40. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +17 -18
  41. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  42. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  43. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +9 -9
  44. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_map.hpp +38 -0
  45. package/src/duckdb/src/include/duckdb/catalog/catalog_transaction.hpp +4 -3
  46. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +2 -2
  47. package/src/duckdb/src/include/duckdb/catalog/default/default_types.hpp +2 -2
  48. package/src/duckdb/src/include/duckdb/catalog/default/default_views.hpp +2 -2
  49. package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +4 -5
  50. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +4 -5
  51. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +10 -9
  52. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -1
  53. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -0
  55. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +29 -6
  56. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +6 -5
  57. package/src/duckdb/src/include/duckdb/common/serializer.hpp +1 -1
  58. package/src/duckdb/src/include/duckdb/common/types/row_data_collection.hpp +1 -0
  59. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +5 -5
  61. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +2 -2
  62. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +3 -2
  63. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  64. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  65. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -3
  66. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -2
  67. package/src/duckdb/src/include/duckdb/main/settings.hpp +19 -0
  68. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +7 -7
  69. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +11 -11
  70. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +8 -8
  71. package/src/duckdb/src/include/duckdb/optimizer/rule/arithmetic_simplification.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/optimizer/rule/case_simplification.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/optimizer/rule/comparison_simplification.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/optimizer/rule/conjunction_simplification.hpp +2 -2
  75. package/src/duckdb/src/include/duckdb/optimizer/rule/constant_folding.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/optimizer/rule/date_part_simplification.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/optimizer/rule/distributivity.hpp +1 -1
  78. package/src/duckdb/src/include/duckdb/optimizer/rule/empty_needle_removal.hpp +1 -1
  79. package/src/duckdb/src/include/duckdb/optimizer/rule/enum_comparison.hpp +1 -1
  80. package/src/duckdb/src/include/duckdb/optimizer/rule/equal_or_null_simplification.hpp +1 -1
  81. package/src/duckdb/src/include/duckdb/optimizer/rule/in_clause_simplification.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/optimizer/rule/like_optimizations.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/optimizer/rule/move_constants.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +1 -1
  85. package/src/duckdb/src/include/duckdb/optimizer/rule/regex_optimizations.hpp +1 -1
  86. package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +1 -1
  88. package/src/duckdb/src/include/duckdb/parser/expression_map.hpp +19 -6
  89. package/src/duckdb/src/include/duckdb/parser/expression_util.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -7
  91. package/src/duckdb/src/include/duckdb/parser/parser_options.hpp +23 -0
  92. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -3
  93. package/src/duckdb/src/include/duckdb/planner/expression.hpp +5 -2
  94. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -1
  95. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +3 -3
  96. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +10 -2
  97. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +1 -0
  98. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +49 -126
  99. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +5 -5
  100. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +159 -0
  101. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -0
  102. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -5
  103. package/src/duckdb/src/main/client_context.cpp +5 -3
  104. package/src/duckdb/src/main/config.cpp +2 -0
  105. package/src/duckdb/src/main/database.cpp +2 -1
  106. package/src/duckdb/src/main/database_manager.cpp +4 -4
  107. package/src/duckdb/src/main/settings/settings.cpp +36 -0
  108. package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +2 -2
  109. package/src/duckdb/src/optimizer/cse_optimizer.cpp +4 -4
  110. package/src/duckdb/src/optimizer/deliminator.cpp +13 -11
  111. package/src/duckdb/src/optimizer/expression_rewriter.cpp +2 -2
  112. package/src/duckdb/src/optimizer/filter_combiner.cpp +67 -65
  113. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -0
  114. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +26 -25
  115. package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +23 -21
  116. package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +7 -6
  117. package/src/duckdb/src/optimizer/rule/case_simplification.cpp +2 -2
  118. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +6 -7
  119. package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +9 -8
  120. package/src/duckdb/src/optimizer/rule/constant_folding.cpp +7 -7
  121. package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +3 -3
  122. package/src/duckdb/src/optimizer/rule/distributivity.cpp +5 -5
  123. package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +6 -6
  124. package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +4 -4
  125. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +23 -26
  126. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +2 -3
  127. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -3
  128. package/src/duckdb/src/optimizer/rule/move_constants.cpp +6 -6
  129. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +2 -2
  130. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +3 -3
  131. package/src/duckdb/src/parser/expression_util.cpp +6 -6
  132. package/src/duckdb/src/parser/parser.cpp +1 -1
  133. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +7 -3
  134. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +3 -3
  135. package/src/duckdb/src/parser/transformer.cpp +6 -5
  136. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -2
  137. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +3 -3
  138. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +5 -5
  139. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +2 -2
  140. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
  141. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +3 -3
  142. package/src/duckdb/src/storage/buffer/block_handle.cpp +3 -2
  143. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -1
  144. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -0
  145. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +3 -0
  146. package/src/duckdb/src/storage/buffer_manager.cpp +35 -726
  147. package/src/duckdb/src/storage/checkpoint_manager.cpp +2 -2
  148. package/src/duckdb/src/storage/meta_block_reader.cpp +6 -5
  149. package/src/duckdb/src/storage/standard_buffer_manager.cpp +801 -0
  150. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  151. package/src/duckdb/src/transaction/meta_transaction.cpp +13 -13
  152. package/src/duckdb/src/transaction/transaction.cpp +1 -1
  153. package/src/duckdb/src/transaction/transaction_context.cpp +1 -1
  154. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +949 -947
  155. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +16431 -16385
  156. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +503 -493
  157. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  158. package/src/duckdb/ub_src_storage.cpp +2 -0
@@ -4,6 +4,7 @@
4
4
  #include "duckdb/function/function_binder.hpp"
5
5
  #include "duckdb/storage/buffer_manager.hpp"
6
6
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
7
+ #include "duckdb/planner/expression/bound_constant_expression.hpp"
7
8
  #include "duckdb/parser/expression_map.hpp"
8
9
  #include "duckdb/function/aggregate/distributive_functions.hpp"
9
10
 
@@ -12,7 +13,8 @@ namespace duckdb {
12
13
  struct SortedAggregateBindData : public FunctionData {
13
14
  SortedAggregateBindData(ClientContext &context, BoundAggregateExpression &expr)
14
15
  : buffer_manager(BufferManager::GetBufferManager(context)), function(expr.function),
15
- bind_info(std::move(expr.bind_info)) {
16
+ bind_info(std::move(expr.bind_info)), threshold(ClientConfig::GetConfig(context).ordered_aggregate_threshold),
17
+ external(ClientConfig::GetConfig(context).force_external) {
16
18
  auto &children = expr.children;
17
19
  arg_types.reserve(children.size());
18
20
  for (const auto &child : children) {
@@ -32,7 +34,8 @@ struct SortedAggregateBindData : public FunctionData {
32
34
 
33
35
  SortedAggregateBindData(const SortedAggregateBindData &other)
34
36
  : buffer_manager(other.buffer_manager), function(other.function), arg_types(other.arg_types),
35
- sort_types(other.sort_types), sorted_on_args(other.sorted_on_args) {
37
+ sort_types(other.sort_types), sorted_on_args(other.sorted_on_args), threshold(other.threshold),
38
+ external(other.external) {
36
39
  if (other.bind_info) {
37
40
  bind_info = other.bind_info->Copy();
38
41
  }
@@ -76,13 +79,17 @@ struct SortedAggregateBindData : public FunctionData {
76
79
  vector<BoundOrderByNode> orders;
77
80
  vector<LogicalType> sort_types;
78
81
  bool sorted_on_args;
82
+
83
+ //! The sort flush threshold
84
+ const idx_t threshold;
85
+ const bool external;
79
86
  };
80
87
 
81
88
  struct SortedAggregateState {
82
89
  //! Default buffer size, optimised for small group to avoid blowing out memory.
83
90
  static const idx_t BUFFER_CAPACITY = 16;
84
91
 
85
- SortedAggregateState() : nsel(0), offset(0) {
92
+ SortedAggregateState() : count(0), nsel(0), offset(0) {
86
93
  }
87
94
 
88
95
  static inline void InitializeBuffer(DataChunk &chunk, const vector<LogicalType> &types) {
@@ -98,7 +105,7 @@ struct SortedAggregateState {
98
105
  chunk.Initialize(Allocator::DefaultAllocator(), types);
99
106
  }
100
107
 
101
- void Flush(SortedAggregateBindData &order_bind) {
108
+ void Flush(const SortedAggregateBindData &order_bind) {
102
109
  if (ordering) {
103
110
  return;
104
111
  }
@@ -116,7 +123,9 @@ struct SortedAggregateState {
116
123
  }
117
124
  }
118
125
 
119
- void Update(SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
126
+ void Update(const SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
127
+ count += sort_chunk.size();
128
+
120
129
  // Lazy instantiation of the buffer chunks
121
130
  InitializeBuffer(sort_buffer, order_bind.sort_types);
122
131
  if (!order_bind.sorted_on_args) {
@@ -139,7 +148,9 @@ struct SortedAggregateState {
139
148
  }
140
149
  }
141
150
 
142
- void UpdateSlice(SortedAggregateBindData &order_bind, DataChunk &sort_inputs, DataChunk &arg_inputs) {
151
+ void UpdateSlice(const SortedAggregateBindData &order_bind, DataChunk &sort_inputs, DataChunk &arg_inputs) {
152
+ count += nsel;
153
+
143
154
  // Lazy instantiation of the buffer chunks
144
155
  InitializeBuffer(sort_buffer, order_bind.sort_types);
145
156
  if (!order_bind.sorted_on_args) {
@@ -178,25 +189,35 @@ struct SortedAggregateState {
178
189
  Flush(order_bind);
179
190
  ordering->Combine(*other.ordering);
180
191
  arguments->Combine(*other.arguments);
192
+ count += other.count;
181
193
  } else if (other.ordering) {
182
194
  // Force CDC if the other has it
183
195
  Flush(order_bind);
184
196
  ordering->Combine(*other.ordering);
197
+ count += other.count;
185
198
  } else if (other.sort_buffer.size()) {
186
199
  Update(order_bind, other.sort_buffer, other.arg_buffer);
187
200
  }
188
201
  }
189
202
 
190
- void Finalize(SortedAggregateBindData &order_bind, LocalSortState &local_sort) {
203
+ void PrefixSortBuffer(DataChunk &prefixed) {
204
+ for (column_t col_idx = 0; col_idx < sort_buffer.ColumnCount(); ++col_idx) {
205
+ prefixed.data[col_idx + 1].Reference(sort_buffer.data[col_idx]);
206
+ }
207
+ prefixed.SetCardinality(sort_buffer);
208
+ }
209
+
210
+ void Finalize(const SortedAggregateBindData &order_bind, DataChunk &prefixed, LocalSortState &local_sort) {
191
211
  if (arguments) {
192
212
  ColumnDataScanState sort_state;
193
213
  ordering->InitializeScan(sort_state);
194
214
  ColumnDataScanState arg_state;
195
215
  arguments->InitializeScan(arg_state);
196
216
  for (sort_buffer.Reset(); ordering->Scan(sort_state, sort_buffer); sort_buffer.Reset()) {
217
+ PrefixSortBuffer(prefixed);
197
218
  arg_buffer.Reset();
198
219
  arguments->Scan(arg_state, arg_buffer);
199
- local_sort.SinkChunk(sort_buffer, arg_buffer);
220
+ local_sort.SinkChunk(prefixed, arg_buffer);
200
221
  }
201
222
  ordering->Reset();
202
223
  arguments->Reset();
@@ -204,16 +225,20 @@ struct SortedAggregateState {
204
225
  ColumnDataScanState sort_state;
205
226
  ordering->InitializeScan(sort_state);
206
227
  for (sort_buffer.Reset(); ordering->Scan(sort_state, sort_buffer); sort_buffer.Reset()) {
207
- local_sort.SinkChunk(sort_buffer, sort_buffer);
228
+ PrefixSortBuffer(prefixed);
229
+ local_sort.SinkChunk(prefixed, sort_buffer);
208
230
  }
209
231
  ordering->Reset();
210
232
  } else if (order_bind.sorted_on_args) {
211
- local_sort.SinkChunk(sort_buffer, sort_buffer);
233
+ PrefixSortBuffer(prefixed);
234
+ local_sort.SinkChunk(prefixed, sort_buffer);
212
235
  } else {
213
- local_sort.SinkChunk(sort_buffer, arg_buffer);
236
+ PrefixSortBuffer(prefixed);
237
+ local_sort.SinkChunk(prefixed, arg_buffer);
214
238
  }
215
239
  }
216
240
 
241
+ idx_t count;
217
242
  unique_ptr<ColumnDataCollection> arguments;
218
243
  unique_ptr<ColumnDataCollection> ordering;
219
244
 
@@ -237,19 +262,19 @@ struct SortedAggregateFunction {
237
262
  state->~STATE();
238
263
  }
239
264
 
240
- static void ProjectInputs(Vector inputs[], SortedAggregateBindData *order_bind, idx_t input_count, idx_t count,
241
- DataChunk &arg_chunk, DataChunk &sort_chunk) {
265
+ static void ProjectInputs(Vector inputs[], const SortedAggregateBindData &order_bind, idx_t input_count,
266
+ idx_t count, DataChunk &arg_chunk, DataChunk &sort_chunk) {
242
267
  idx_t col = 0;
243
268
 
244
- if (!order_bind->sorted_on_args) {
245
- arg_chunk.InitializeEmpty(order_bind->arg_types);
269
+ if (!order_bind.sorted_on_args) {
270
+ arg_chunk.InitializeEmpty(order_bind.arg_types);
246
271
  for (auto &dst : arg_chunk.data) {
247
272
  dst.Reference(inputs[col++]);
248
273
  }
249
274
  arg_chunk.SetCardinality(count);
250
275
  }
251
276
 
252
- sort_chunk.InitializeEmpty(order_bind->sort_types);
277
+ sort_chunk.InitializeEmpty(order_bind.sort_types);
253
278
  for (auto &dst : sort_chunk.data) {
254
279
  dst.Reference(inputs[col++]);
255
280
  }
@@ -258,13 +283,13 @@ struct SortedAggregateFunction {
258
283
 
259
284
  static void SimpleUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, data_ptr_t state,
260
285
  idx_t count) {
261
- const auto order_bind = (SortedAggregateBindData *)aggr_input_data.bind_data;
286
+ const auto order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
262
287
  DataChunk arg_chunk;
263
288
  DataChunk sort_chunk;
264
289
  ProjectInputs(inputs, order_bind, input_count, count, arg_chunk, sort_chunk);
265
290
 
266
291
  const auto order_state = (SortedAggregateState *)state;
267
- order_state->Update(*order_bind, sort_chunk, arg_chunk);
292
+ order_state->Update(order_bind, sort_chunk, arg_chunk);
268
293
  }
269
294
 
270
295
  static void ScatterUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, Vector &states,
@@ -274,7 +299,7 @@ struct SortedAggregateFunction {
274
299
  }
275
300
 
276
301
  // Append the arguments to the two sub-collections
277
- const auto order_bind = (SortedAggregateBindData *)aggr_input_data.bind_data;
302
+ const auto &order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
278
303
  DataChunk arg_inputs;
279
304
  DataChunk sort_inputs;
280
305
  ProjectInputs(inputs, order_bind, input_count, count, arg_inputs, sort_inputs);
@@ -315,7 +340,7 @@ struct SortedAggregateFunction {
315
340
  continue;
316
341
  }
317
342
 
318
- order_state->UpdateSlice(*order_bind, sort_inputs, arg_inputs);
343
+ order_state->UpdateSlice(order_bind, sort_inputs, arg_inputs);
319
344
  }
320
345
  }
321
346
 
@@ -333,78 +358,162 @@ struct SortedAggregateFunction {
333
358
  }
334
359
 
335
360
  static void Finalize(Vector &states, AggregateInputData &aggr_input_data, Vector &result, idx_t count,
336
- idx_t offset) {
337
- const auto order_bind = (SortedAggregateBindData *)aggr_input_data.bind_data;
338
- auto &buffer_manager = order_bind->buffer_manager;
339
- auto &orders = order_bind->orders;
361
+ const idx_t offset) {
362
+ const auto &order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
363
+ auto &buffer_manager = order_bind.buffer_manager;
340
364
  RowLayout payload_layout;
341
- payload_layout.Initialize(order_bind->arg_types);
365
+ payload_layout.Initialize(order_bind.arg_types);
342
366
  DataChunk chunk;
343
- chunk.Initialize(Allocator::DefaultAllocator(), order_bind->arg_types);
367
+ chunk.Initialize(Allocator::DefaultAllocator(), order_bind.arg_types);
368
+ DataChunk sliced;
369
+ sliced.Initialize(Allocator::DefaultAllocator(), order_bind.arg_types);
344
370
 
345
371
  // Reusable inner state
346
- vector<data_t> agg_state(order_bind->function.state_size());
372
+ vector<data_t> agg_state(order_bind.function.state_size());
347
373
  Vector agg_state_vec(Value::POINTER((idx_t)agg_state.data()));
348
374
 
349
375
  // State variables
350
- const auto input_count = order_bind->function.arguments.size();
351
- auto bind_info = order_bind->bind_info.get();
376
+ auto bind_info = order_bind.bind_info.get();
352
377
  AggregateInputData aggr_bind_info(bind_info, Allocator::DefaultAllocator());
353
378
 
354
379
  // Inner aggregate APIs
355
- auto initialize = order_bind->function.initialize;
356
- auto destructor = order_bind->function.destructor;
357
- auto simple_update = order_bind->function.simple_update;
358
- auto update = order_bind->function.update;
359
- auto finalize = order_bind->function.finalize;
380
+ auto initialize = order_bind.function.initialize;
381
+ auto destructor = order_bind.function.destructor;
382
+ auto simple_update = order_bind.function.simple_update;
383
+ auto update = order_bind.function.update;
384
+ auto finalize = order_bind.function.finalize;
360
385
 
361
386
  auto sdata = FlatVector::GetData<SortedAggregateState *>(states);
387
+
388
+ vector<idx_t> state_unprocessed(count, 0);
362
389
  for (idx_t i = 0; i < count; ++i) {
363
- initialize(agg_state.data());
364
- auto state = sdata[i];
365
-
366
- // Apply the sort before delegating the chunks
367
- auto global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
368
- LocalSortState local_sort;
369
- local_sort.Initialize(*global_sort, global_sort->buffer_manager);
370
- state->Finalize(*order_bind, local_sort);
371
- global_sort->AddLocalState(local_sort);
372
-
373
- if (!global_sort->sorted_blocks.empty()) {
374
- global_sort->PrepareMergePhase();
375
- while (global_sort->sorted_blocks.size() > 1) {
376
- global_sort->InitializeMergeRound();
377
- MergeSorter merge_sorter(*global_sort, global_sort->buffer_manager);
378
- merge_sorter.PerformInMergeRound();
379
- global_sort->CompleteMergeRound(false);
390
+ state_unprocessed[i] = sdata[i]->count;
391
+ }
392
+
393
+ // Sort the input payloads on (state_idx ASC, orders)
394
+ vector<BoundOrderByNode> orders;
395
+ orders.emplace_back(BoundOrderByNode(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST,
396
+ make_uniq<BoundConstantExpression>(Value::USMALLINT(0))));
397
+ for (const auto &order : order_bind.orders) {
398
+ orders.emplace_back(order.Copy());
399
+ }
400
+
401
+ auto global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
402
+ global_sort->external = order_bind.external;
403
+ auto local_sort = make_uniq<LocalSortState>();
404
+ local_sort->Initialize(*global_sort, global_sort->buffer_manager);
405
+
406
+ DataChunk prefixed;
407
+ prefixed.Initialize(Allocator::DefaultAllocator(), global_sort->sort_layout.logical_types);
408
+
409
+ // Go through the states accumulating values to sort until we hit the sort threshold
410
+ idx_t unsorted_count = 0;
411
+ idx_t sorted = 0;
412
+ for (idx_t finalized = 0; finalized < count;) {
413
+ if (unsorted_count < order_bind.threshold) {
414
+ auto state = sdata[finalized];
415
+ prefixed.Reset();
416
+ prefixed.data[0].Reference(Value::USMALLINT(finalized));
417
+ state->Finalize(order_bind, prefixed, *local_sort);
418
+ unsorted_count += state_unprocessed[finalized];
419
+
420
+ // Go to the next aggregate unless this is the last one
421
+ if (++finalized < count) {
422
+ continue;
380
423
  }
424
+ }
425
+
426
+ // If they were all empty (filtering) flush them
427
+ // (This can only happen on the last range)
428
+ if (!unsorted_count) {
429
+ break;
430
+ }
431
+
432
+ // Sort all the data
433
+ global_sort->AddLocalState(*local_sort);
434
+ global_sort->PrepareMergePhase();
435
+ while (global_sort->sorted_blocks.size() > 1) {
436
+ global_sort->InitializeMergeRound();
437
+ MergeSorter merge_sorter(*global_sort, global_sort->buffer_manager);
438
+ merge_sorter.PerformInMergeRound();
439
+ global_sort->CompleteMergeRound(false);
440
+ }
381
441
 
382
- PayloadScanner scanner(*global_sort);
383
- for (;;) {
384
- chunk.Reset();
385
- scanner.Scan(chunk);
386
- if (chunk.size() == 0) {
387
- break;
442
+ auto scanner = make_uniq<PayloadScanner>(*global_sort);
443
+ initialize(agg_state.data());
444
+ while (scanner->Remaining()) {
445
+ chunk.Reset();
446
+ scanner->Scan(chunk);
447
+ idx_t consumed = 0;
448
+
449
+ // Distribute the scanned chunk to the aggregates
450
+ while (consumed < chunk.size()) {
451
+ // Find the next aggregate that needs data
452
+ for (; !state_unprocessed[sorted]; ++sorted) {
453
+ // Finalize a single value at the next offset
454
+ agg_state_vec.SetVectorType(states.GetVectorType());
455
+ finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
456
+ if (destructor) {
457
+ destructor(agg_state_vec, aggr_bind_info, 1);
458
+ }
459
+
460
+ initialize(agg_state.data());
461
+ }
462
+ const auto input_count = MinValue(state_unprocessed[sorted], chunk.size() - consumed);
463
+ for (column_t col_idx = 0; col_idx < chunk.ColumnCount(); ++col_idx) {
464
+ sliced.data[col_idx].Slice(chunk.data[col_idx], consumed, consumed + input_count);
388
465
  }
466
+ sliced.SetCardinality(input_count);
467
+
389
468
  // These are all simple updates, so use it if available
390
469
  if (simple_update) {
391
- simple_update(chunk.data.data(), aggr_bind_info, input_count, agg_state.data(), chunk.size());
470
+ simple_update(sliced.data.data(), aggr_bind_info, 1, agg_state.data(), sliced.size());
392
471
  } else {
393
472
  // We are only updating a constant state
394
473
  agg_state_vec.SetVectorType(VectorType::CONSTANT_VECTOR);
395
- update(chunk.data.data(), aggr_bind_info, input_count, agg_state_vec, chunk.size());
474
+ update(sliced.data.data(), aggr_bind_info, 1, agg_state_vec, sliced.size());
396
475
  }
476
+
477
+ consumed += input_count;
478
+ state_unprocessed[sorted] -= input_count;
397
479
  }
398
480
  }
399
481
 
482
+ // Finalize the last state for this sort
483
+ agg_state_vec.SetVectorType(states.GetVectorType());
484
+ finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
485
+ if (destructor) {
486
+ destructor(agg_state_vec, aggr_bind_info, 1);
487
+ }
488
+ ++sorted;
489
+
490
+ // Stop if we are done
491
+ if (finalized >= count) {
492
+ break;
493
+ }
494
+
495
+ // Create a new sort
496
+ scanner.reset();
497
+ global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
498
+ global_sort->external = order_bind.external;
499
+ local_sort = make_uniq<LocalSortState>();
500
+ local_sort->Initialize(*global_sort, global_sort->buffer_manager);
501
+ unsorted_count = 0;
502
+ }
503
+
504
+ for (; sorted < count; ++sorted) {
505
+ initialize(agg_state.data());
506
+
400
507
  // Finalize a single value at the next offset
401
508
  agg_state_vec.SetVectorType(states.GetVectorType());
402
- finalize(agg_state_vec, aggr_bind_info, result, 1, i + offset);
509
+ finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
403
510
 
404
511
  if (destructor) {
405
512
  destructor(agg_state_vec, aggr_bind_info, 1);
406
513
  }
407
514
  }
515
+
516
+ result.Verify(count);
408
517
  }
409
518
 
410
519
  static void Serialize(FieldWriter &writer, const FunctionData *bind_data, const AggregateFunction &function) {
@@ -429,15 +538,15 @@ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateE
429
538
  // similarly, we only need to ORDER BY each aggregate once
430
539
  expression_set_t seen_expressions;
431
540
  for (auto &target : groups) {
432
- seen_expressions.insert(target.get());
541
+ seen_expressions.insert(*target);
433
542
  }
434
543
  vector<BoundOrderByNode> new_order_nodes;
435
544
  for (auto &order_node : expr.order_bys->orders) {
436
- if (seen_expressions.find(order_node.expression.get()) != seen_expressions.end()) {
545
+ if (seen_expressions.find(*order_node.expression) != seen_expressions.end()) {
437
546
  // we do not need to order by this node
438
547
  continue;
439
548
  }
440
- seen_expressions.insert(order_node.expression.get());
549
+ seen_expressions.insert(*order_node.expression);
441
550
  new_order_nodes.push_back(std::move(order_node));
442
551
  }
443
552
  if (new_order_nodes.empty()) {
@@ -6,7 +6,8 @@
6
6
 
7
7
  namespace duckdb {
8
8
 
9
- BindCastInput::BindCastInput(CastFunctionSet &function_set, BindCastInfo *info, optional_ptr<ClientContext> context)
9
+ BindCastInput::BindCastInput(CastFunctionSet &function_set, optional_ptr<BindCastInfo> info,
10
+ optional_ptr<ClientContext> context)
10
11
  : function_set(function_set), info(info), context(context) {
11
12
  }
12
13
 
@@ -907,23 +907,29 @@ static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
907
907
  }
908
908
 
909
909
  void DivideFun::RegisterFunction(BuiltinFunctions &set) {
910
- ScalarFunctionSet functions("/");
910
+ ScalarFunctionSet fp_divide("/");
911
+ fp_divide.AddFunction(ScalarFunction({LogicalType::FLOAT, LogicalType::FLOAT}, LogicalType::FLOAT,
912
+ GetBinaryFunctionIgnoreZero<DivideOperator>(LogicalType::FLOAT)));
913
+ fp_divide.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE,
914
+ GetBinaryFunctionIgnoreZero<DivideOperator>(LogicalType::DOUBLE)));
915
+ fp_divide.AddFunction(
916
+ ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
917
+ BinaryScalarFunctionIgnoreZero<interval_t, int64_t, interval_t, DivideOperator>));
918
+ set.AddFunction(fp_divide);
919
+
920
+ ScalarFunctionSet full_divide("//");
911
921
  for (auto &type : LogicalType::Numeric()) {
912
922
  if (type.id() == LogicalTypeId::DECIMAL) {
913
923
  continue;
914
924
  } else {
915
- functions.AddFunction(
925
+ full_divide.AddFunction(
916
926
  ScalarFunction({type, type}, type, GetBinaryFunctionIgnoreZero<DivideOperator>(type)));
917
927
  }
918
928
  }
919
- functions.AddFunction(
920
- ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
921
- BinaryScalarFunctionIgnoreZero<interval_t, int64_t, interval_t, DivideOperator>));
929
+ set.AddFunction(full_divide);
922
930
 
923
- set.AddFunction(functions);
924
-
925
- functions.name = "divide";
926
- set.AddFunction(functions);
931
+ full_divide.name = "divide";
932
+ set.AddFunction(full_divide);
927
933
  }
928
934
 
929
935
  //===--------------------------------------------------------------------===//
@@ -15,11 +15,11 @@
15
15
  namespace duckdb {
16
16
 
17
17
  struct NextvalBindData : public FunctionData {
18
- explicit NextvalBindData(SequenceCatalogEntry *sequence) : sequence(sequence) {
18
+ explicit NextvalBindData(optional_ptr<SequenceCatalogEntry> sequence) : sequence(sequence) {
19
19
  }
20
20
 
21
21
  //! The sequence to use for the nextval computation; only if the sequence is a constant
22
- SequenceCatalogEntry *sequence;
22
+ optional_ptr<SequenceCatalogEntry> sequence;
23
23
 
24
24
  unique_ptr<FunctionData> Copy() const override {
25
25
  return make_uniq<NextvalBindData>(sequence);
@@ -32,45 +32,45 @@ struct NextvalBindData : public FunctionData {
32
32
  };
33
33
 
34
34
  struct CurrentSequenceValueOperator {
35
- static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry *seq) {
36
- lock_guard<mutex> seqlock(seq->lock);
35
+ static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry &seq) {
36
+ lock_guard<mutex> seqlock(seq.lock);
37
37
  int64_t result;
38
- if (seq->usage_count == 0u) {
38
+ if (seq.usage_count == 0u) {
39
39
  throw SequenceException("currval: sequence is not yet defined in this session");
40
40
  }
41
- result = seq->last_value;
41
+ result = seq.last_value;
42
42
  return result;
43
43
  }
44
44
  };
45
45
 
46
46
  struct NextSequenceValueOperator {
47
- static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry *seq) {
48
- lock_guard<mutex> seqlock(seq->lock);
47
+ static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry &seq) {
48
+ lock_guard<mutex> seqlock(seq.lock);
49
49
  int64_t result;
50
- result = seq->counter;
51
- bool overflow = !TryAddOperator::Operation(seq->counter, seq->increment, seq->counter);
52
- if (seq->cycle) {
50
+ result = seq.counter;
51
+ bool overflow = !TryAddOperator::Operation(seq.counter, seq.increment, seq.counter);
52
+ if (seq.cycle) {
53
53
  if (overflow) {
54
- seq->counter = seq->increment < 0 ? seq->max_value : seq->min_value;
55
- } else if (seq->counter < seq->min_value) {
56
- seq->counter = seq->max_value;
57
- } else if (seq->counter > seq->max_value) {
58
- seq->counter = seq->min_value;
54
+ seq.counter = seq.increment < 0 ? seq.max_value : seq.min_value;
55
+ } else if (seq.counter < seq.min_value) {
56
+ seq.counter = seq.max_value;
57
+ } else if (seq.counter > seq.max_value) {
58
+ seq.counter = seq.min_value;
59
59
  }
60
60
  } else {
61
- if (result < seq->min_value || (overflow && seq->increment < 0)) {
62
- throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq->name,
63
- seq->min_value);
61
+ if (result < seq.min_value || (overflow && seq.increment < 0)) {
62
+ throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq.name,
63
+ seq.min_value);
64
64
  }
65
- if (result > seq->max_value || overflow) {
66
- throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq->name,
67
- seq->max_value);
65
+ if (result > seq.max_value || overflow) {
66
+ throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq.name,
67
+ seq.max_value);
68
68
  }
69
69
  }
70
- seq->last_value = result;
71
- seq->usage_count++;
72
- if (!seq->temporary) {
73
- transaction.sequence_usage[seq] = SequenceValue(seq->usage_count, seq->counter);
70
+ seq.last_value = result;
71
+ seq.usage_count++;
72
+ if (!seq.temporary) {
73
+ transaction.sequence_usage[&seq] = SequenceValue(seq.usage_count, seq.counter);
74
74
  }
75
75
  return result;
76
76
  }
@@ -98,7 +98,7 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res
98
98
  auto result_data = FlatVector::GetData<int64_t>(result);
99
99
  for (idx_t i = 0; i < args.size(); i++) {
100
100
  // get the next value from the sequence
101
- result_data[i] = OP::Operation(transaction, info.sequence);
101
+ result_data[i] = OP::Operation(transaction, *info.sequence);
102
102
  }
103
103
  } else {
104
104
  // sequence to use comes from the input
@@ -107,7 +107,7 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res
107
107
  auto sequence = BindSequence(context, value.GetString());
108
108
  // finally get the next value from the sequence
109
109
  auto &transaction = DuckTransaction::Get(context, *sequence->catalog);
110
- return OP::Operation(transaction, sequence);
110
+ return OP::Operation(transaction, *sequence);
111
111
  });
112
112
  }
113
113
  }
@@ -129,7 +129,7 @@ static unique_ptr<FunctionData> NextValBind(ClientContext &context, ScalarFuncti
129
129
  static void NextValDependency(BoundFunctionExpression &expr, DependencyList &dependencies) {
130
130
  auto &info = expr.bind_info->Cast<NextvalBindData>();
131
131
  if (info.sequence) {
132
- dependencies.AddDependency(info.sequence);
132
+ dependencies.AddDependency(*info.sequence);
133
133
  }
134
134
  }
135
135