duckdb 0.7.2-dev1803.0 → 0.7.2-dev1898.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/catalog/catalog.cpp +27 -27
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +6 -6
- package/src/duckdb/src/catalog/catalog_set.cpp +27 -25
- package/src/duckdb/src/catalog/default/default_functions.cpp +6 -6
- package/src/duckdb/src/catalog/default/default_types.cpp +4 -4
- package/src/duckdb/src/catalog/default/default_views.cpp +4 -4
- package/src/duckdb/src/catalog/dependency_list.cpp +7 -6
- package/src/duckdb/src/catalog/dependency_manager.cpp +44 -38
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +11 -6
- package/src/duckdb/src/common/sort/sorted_block.cpp +9 -4
- package/src/duckdb/src/common/types/batched_data_collection.cpp +2 -1
- package/src/duckdb/src/common/types/column_data_allocator.cpp +1 -0
- package/src/duckdb/src/common/types/vector.cpp +2 -2
- package/src/duckdb/src/common/vector_operations/vector_copy.cpp +14 -11
- package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +51 -50
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +4 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +14 -13
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +20 -20
- package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_update.cpp +1 -1
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +172 -63
- package/src/duckdb/src/function/cast/cast_function_set.cpp +2 -1
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +15 -9
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +29 -29
- package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +106 -0
- package/src/duckdb/src/function/scalar/string/regexp.cpp +145 -28
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/table/checkpoint.cpp +4 -4
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +24 -24
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +7 -6
- package/src/duckdb/src/function/table/system/duckdb_databases.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +11 -11
- package/src/duckdb/src/function/table/system/pragma_database_size.cpp +1 -1
- package/src/duckdb/src/function/table/system/pragma_table_info.cpp +17 -18
- package/src/duckdb/src/function/table/table_scan.cpp +8 -11
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +9 -9
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_transaction.hpp +4 -3
- package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/default/default_types.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/default/default_views.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/dependency.hpp +4 -5
- package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +4 -5
- package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/field_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +29 -6
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +6 -5
- package/src/duckdb/src/include/duckdb/common/serializer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +3 -2
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -3
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +19 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +7 -7
- package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +11 -11
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/rule/arithmetic_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/case_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/comparison_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/conjunction_simplification.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/rule/constant_folding.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/date_part_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/distributivity.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/empty_needle_removal.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/enum_comparison.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/equal_or_null_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/in_clause_simplification.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/like_optimizations.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/move_constants.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule/regex_optimizations.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression_map.hpp +19 -6
- package/src/duckdb/src/include/duckdb/parser/expression_util.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -7
- package/src/duckdb/src/include/duckdb/parser/parser_options.hpp +23 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -3
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +5 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +10 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +49 -126
- package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +5 -5
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +159 -0
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -5
- package/src/duckdb/src/main/client_context.cpp +5 -3
- package/src/duckdb/src/main/config.cpp +2 -0
- package/src/duckdb/src/main/database.cpp +2 -1
- package/src/duckdb/src/main/database_manager.cpp +4 -4
- package/src/duckdb/src/main/settings/settings.cpp +36 -0
- package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +2 -2
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +4 -4
- package/src/duckdb/src/optimizer/deliminator.cpp +13 -11
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +2 -2
- package/src/duckdb/src/optimizer/filter_combiner.cpp +67 -65
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -0
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +26 -25
- package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +23 -21
- package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +7 -6
- package/src/duckdb/src/optimizer/rule/case_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +6 -7
- package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +9 -8
- package/src/duckdb/src/optimizer/rule/constant_folding.cpp +7 -7
- package/src/duckdb/src/optimizer/rule/date_part_simplification.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/distributivity.cpp +5 -5
- package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +6 -6
- package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +4 -4
- package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +23 -26
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +2 -3
- package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/move_constants.cpp +6 -6
- package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +3 -3
- package/src/duckdb/src/parser/expression_util.cpp +6 -6
- package/src/duckdb/src/parser/parser.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +7 -3
- package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +3 -3
- package/src/duckdb/src/parser/transformer.cpp +6 -5
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +3 -3
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +3 -3
- package/src/duckdb/src/storage/buffer/block_handle.cpp +3 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -1
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -0
- package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +3 -0
- package/src/duckdb/src/storage/buffer_manager.cpp +35 -726
- package/src/duckdb/src/storage/checkpoint_manager.cpp +2 -2
- package/src/duckdb/src/storage/meta_block_reader.cpp +6 -5
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +801 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/transaction/meta_transaction.cpp +13 -13
- package/src/duckdb/src/transaction/transaction.cpp +1 -1
- package/src/duckdb/src/transaction/transaction_context.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +949 -947
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +16431 -16385
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +503 -493
- package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_storage.cpp +2 -0
@@ -4,6 +4,7 @@
|
|
4
4
|
#include "duckdb/function/function_binder.hpp"
|
5
5
|
#include "duckdb/storage/buffer_manager.hpp"
|
6
6
|
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
7
|
+
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
7
8
|
#include "duckdb/parser/expression_map.hpp"
|
8
9
|
#include "duckdb/function/aggregate/distributive_functions.hpp"
|
9
10
|
|
@@ -12,7 +13,8 @@ namespace duckdb {
|
|
12
13
|
struct SortedAggregateBindData : public FunctionData {
|
13
14
|
SortedAggregateBindData(ClientContext &context, BoundAggregateExpression &expr)
|
14
15
|
: buffer_manager(BufferManager::GetBufferManager(context)), function(expr.function),
|
15
|
-
bind_info(std::move(expr.bind_info))
|
16
|
+
bind_info(std::move(expr.bind_info)), threshold(ClientConfig::GetConfig(context).ordered_aggregate_threshold),
|
17
|
+
external(ClientConfig::GetConfig(context).force_external) {
|
16
18
|
auto &children = expr.children;
|
17
19
|
arg_types.reserve(children.size());
|
18
20
|
for (const auto &child : children) {
|
@@ -32,7 +34,8 @@ struct SortedAggregateBindData : public FunctionData {
|
|
32
34
|
|
33
35
|
SortedAggregateBindData(const SortedAggregateBindData &other)
|
34
36
|
: buffer_manager(other.buffer_manager), function(other.function), arg_types(other.arg_types),
|
35
|
-
sort_types(other.sort_types), sorted_on_args(other.sorted_on_args)
|
37
|
+
sort_types(other.sort_types), sorted_on_args(other.sorted_on_args), threshold(other.threshold),
|
38
|
+
external(other.external) {
|
36
39
|
if (other.bind_info) {
|
37
40
|
bind_info = other.bind_info->Copy();
|
38
41
|
}
|
@@ -76,13 +79,17 @@ struct SortedAggregateBindData : public FunctionData {
|
|
76
79
|
vector<BoundOrderByNode> orders;
|
77
80
|
vector<LogicalType> sort_types;
|
78
81
|
bool sorted_on_args;
|
82
|
+
|
83
|
+
//! The sort flush threshold
|
84
|
+
const idx_t threshold;
|
85
|
+
const bool external;
|
79
86
|
};
|
80
87
|
|
81
88
|
struct SortedAggregateState {
|
82
89
|
//! Default buffer size, optimised for small group to avoid blowing out memory.
|
83
90
|
static const idx_t BUFFER_CAPACITY = 16;
|
84
91
|
|
85
|
-
SortedAggregateState() : nsel(0), offset(0) {
|
92
|
+
SortedAggregateState() : count(0), nsel(0), offset(0) {
|
86
93
|
}
|
87
94
|
|
88
95
|
static inline void InitializeBuffer(DataChunk &chunk, const vector<LogicalType> &types) {
|
@@ -98,7 +105,7 @@ struct SortedAggregateState {
|
|
98
105
|
chunk.Initialize(Allocator::DefaultAllocator(), types);
|
99
106
|
}
|
100
107
|
|
101
|
-
void Flush(SortedAggregateBindData &order_bind) {
|
108
|
+
void Flush(const SortedAggregateBindData &order_bind) {
|
102
109
|
if (ordering) {
|
103
110
|
return;
|
104
111
|
}
|
@@ -116,7 +123,9 @@ struct SortedAggregateState {
|
|
116
123
|
}
|
117
124
|
}
|
118
125
|
|
119
|
-
void Update(SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
|
126
|
+
void Update(const SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
|
127
|
+
count += sort_chunk.size();
|
128
|
+
|
120
129
|
// Lazy instantiation of the buffer chunks
|
121
130
|
InitializeBuffer(sort_buffer, order_bind.sort_types);
|
122
131
|
if (!order_bind.sorted_on_args) {
|
@@ -139,7 +148,9 @@ struct SortedAggregateState {
|
|
139
148
|
}
|
140
149
|
}
|
141
150
|
|
142
|
-
void UpdateSlice(SortedAggregateBindData &order_bind, DataChunk &sort_inputs, DataChunk &arg_inputs) {
|
151
|
+
void UpdateSlice(const SortedAggregateBindData &order_bind, DataChunk &sort_inputs, DataChunk &arg_inputs) {
|
152
|
+
count += nsel;
|
153
|
+
|
143
154
|
// Lazy instantiation of the buffer chunks
|
144
155
|
InitializeBuffer(sort_buffer, order_bind.sort_types);
|
145
156
|
if (!order_bind.sorted_on_args) {
|
@@ -178,25 +189,35 @@ struct SortedAggregateState {
|
|
178
189
|
Flush(order_bind);
|
179
190
|
ordering->Combine(*other.ordering);
|
180
191
|
arguments->Combine(*other.arguments);
|
192
|
+
count += other.count;
|
181
193
|
} else if (other.ordering) {
|
182
194
|
// Force CDC if the other has it
|
183
195
|
Flush(order_bind);
|
184
196
|
ordering->Combine(*other.ordering);
|
197
|
+
count += other.count;
|
185
198
|
} else if (other.sort_buffer.size()) {
|
186
199
|
Update(order_bind, other.sort_buffer, other.arg_buffer);
|
187
200
|
}
|
188
201
|
}
|
189
202
|
|
190
|
-
void
|
203
|
+
void PrefixSortBuffer(DataChunk &prefixed) {
|
204
|
+
for (column_t col_idx = 0; col_idx < sort_buffer.ColumnCount(); ++col_idx) {
|
205
|
+
prefixed.data[col_idx + 1].Reference(sort_buffer.data[col_idx]);
|
206
|
+
}
|
207
|
+
prefixed.SetCardinality(sort_buffer);
|
208
|
+
}
|
209
|
+
|
210
|
+
void Finalize(const SortedAggregateBindData &order_bind, DataChunk &prefixed, LocalSortState &local_sort) {
|
191
211
|
if (arguments) {
|
192
212
|
ColumnDataScanState sort_state;
|
193
213
|
ordering->InitializeScan(sort_state);
|
194
214
|
ColumnDataScanState arg_state;
|
195
215
|
arguments->InitializeScan(arg_state);
|
196
216
|
for (sort_buffer.Reset(); ordering->Scan(sort_state, sort_buffer); sort_buffer.Reset()) {
|
217
|
+
PrefixSortBuffer(prefixed);
|
197
218
|
arg_buffer.Reset();
|
198
219
|
arguments->Scan(arg_state, arg_buffer);
|
199
|
-
local_sort.SinkChunk(
|
220
|
+
local_sort.SinkChunk(prefixed, arg_buffer);
|
200
221
|
}
|
201
222
|
ordering->Reset();
|
202
223
|
arguments->Reset();
|
@@ -204,16 +225,20 @@ struct SortedAggregateState {
|
|
204
225
|
ColumnDataScanState sort_state;
|
205
226
|
ordering->InitializeScan(sort_state);
|
206
227
|
for (sort_buffer.Reset(); ordering->Scan(sort_state, sort_buffer); sort_buffer.Reset()) {
|
207
|
-
|
228
|
+
PrefixSortBuffer(prefixed);
|
229
|
+
local_sort.SinkChunk(prefixed, sort_buffer);
|
208
230
|
}
|
209
231
|
ordering->Reset();
|
210
232
|
} else if (order_bind.sorted_on_args) {
|
211
|
-
|
233
|
+
PrefixSortBuffer(prefixed);
|
234
|
+
local_sort.SinkChunk(prefixed, sort_buffer);
|
212
235
|
} else {
|
213
|
-
|
236
|
+
PrefixSortBuffer(prefixed);
|
237
|
+
local_sort.SinkChunk(prefixed, arg_buffer);
|
214
238
|
}
|
215
239
|
}
|
216
240
|
|
241
|
+
idx_t count;
|
217
242
|
unique_ptr<ColumnDataCollection> arguments;
|
218
243
|
unique_ptr<ColumnDataCollection> ordering;
|
219
244
|
|
@@ -237,19 +262,19 @@ struct SortedAggregateFunction {
|
|
237
262
|
state->~STATE();
|
238
263
|
}
|
239
264
|
|
240
|
-
static void ProjectInputs(Vector inputs[], SortedAggregateBindData
|
241
|
-
DataChunk &arg_chunk, DataChunk &sort_chunk) {
|
265
|
+
static void ProjectInputs(Vector inputs[], const SortedAggregateBindData &order_bind, idx_t input_count,
|
266
|
+
idx_t count, DataChunk &arg_chunk, DataChunk &sort_chunk) {
|
242
267
|
idx_t col = 0;
|
243
268
|
|
244
|
-
if (!order_bind
|
245
|
-
arg_chunk.InitializeEmpty(order_bind
|
269
|
+
if (!order_bind.sorted_on_args) {
|
270
|
+
arg_chunk.InitializeEmpty(order_bind.arg_types);
|
246
271
|
for (auto &dst : arg_chunk.data) {
|
247
272
|
dst.Reference(inputs[col++]);
|
248
273
|
}
|
249
274
|
arg_chunk.SetCardinality(count);
|
250
275
|
}
|
251
276
|
|
252
|
-
sort_chunk.InitializeEmpty(order_bind
|
277
|
+
sort_chunk.InitializeEmpty(order_bind.sort_types);
|
253
278
|
for (auto &dst : sort_chunk.data) {
|
254
279
|
dst.Reference(inputs[col++]);
|
255
280
|
}
|
@@ -258,13 +283,13 @@ struct SortedAggregateFunction {
|
|
258
283
|
|
259
284
|
static void SimpleUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, data_ptr_t state,
|
260
285
|
idx_t count) {
|
261
|
-
const auto order_bind =
|
286
|
+
const auto order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
|
262
287
|
DataChunk arg_chunk;
|
263
288
|
DataChunk sort_chunk;
|
264
289
|
ProjectInputs(inputs, order_bind, input_count, count, arg_chunk, sort_chunk);
|
265
290
|
|
266
291
|
const auto order_state = (SortedAggregateState *)state;
|
267
|
-
order_state->Update(
|
292
|
+
order_state->Update(order_bind, sort_chunk, arg_chunk);
|
268
293
|
}
|
269
294
|
|
270
295
|
static void ScatterUpdate(Vector inputs[], AggregateInputData &aggr_input_data, idx_t input_count, Vector &states,
|
@@ -274,7 +299,7 @@ struct SortedAggregateFunction {
|
|
274
299
|
}
|
275
300
|
|
276
301
|
// Append the arguments to the two sub-collections
|
277
|
-
const auto order_bind =
|
302
|
+
const auto &order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
|
278
303
|
DataChunk arg_inputs;
|
279
304
|
DataChunk sort_inputs;
|
280
305
|
ProjectInputs(inputs, order_bind, input_count, count, arg_inputs, sort_inputs);
|
@@ -315,7 +340,7 @@ struct SortedAggregateFunction {
|
|
315
340
|
continue;
|
316
341
|
}
|
317
342
|
|
318
|
-
order_state->UpdateSlice(
|
343
|
+
order_state->UpdateSlice(order_bind, sort_inputs, arg_inputs);
|
319
344
|
}
|
320
345
|
}
|
321
346
|
|
@@ -333,78 +358,162 @@ struct SortedAggregateFunction {
|
|
333
358
|
}
|
334
359
|
|
335
360
|
static void Finalize(Vector &states, AggregateInputData &aggr_input_data, Vector &result, idx_t count,
|
336
|
-
idx_t offset) {
|
337
|
-
const auto order_bind =
|
338
|
-
auto &buffer_manager = order_bind
|
339
|
-
auto &orders = order_bind->orders;
|
361
|
+
const idx_t offset) {
|
362
|
+
const auto &order_bind = aggr_input_data.bind_data->Cast<SortedAggregateBindData>();
|
363
|
+
auto &buffer_manager = order_bind.buffer_manager;
|
340
364
|
RowLayout payload_layout;
|
341
|
-
payload_layout.Initialize(order_bind
|
365
|
+
payload_layout.Initialize(order_bind.arg_types);
|
342
366
|
DataChunk chunk;
|
343
|
-
chunk.Initialize(Allocator::DefaultAllocator(), order_bind
|
367
|
+
chunk.Initialize(Allocator::DefaultAllocator(), order_bind.arg_types);
|
368
|
+
DataChunk sliced;
|
369
|
+
sliced.Initialize(Allocator::DefaultAllocator(), order_bind.arg_types);
|
344
370
|
|
345
371
|
// Reusable inner state
|
346
|
-
vector<data_t> agg_state(order_bind
|
372
|
+
vector<data_t> agg_state(order_bind.function.state_size());
|
347
373
|
Vector agg_state_vec(Value::POINTER((idx_t)agg_state.data()));
|
348
374
|
|
349
375
|
// State variables
|
350
|
-
|
351
|
-
auto bind_info = order_bind->bind_info.get();
|
376
|
+
auto bind_info = order_bind.bind_info.get();
|
352
377
|
AggregateInputData aggr_bind_info(bind_info, Allocator::DefaultAllocator());
|
353
378
|
|
354
379
|
// Inner aggregate APIs
|
355
|
-
auto initialize = order_bind
|
356
|
-
auto destructor = order_bind
|
357
|
-
auto simple_update = order_bind
|
358
|
-
auto update = order_bind
|
359
|
-
auto finalize = order_bind
|
380
|
+
auto initialize = order_bind.function.initialize;
|
381
|
+
auto destructor = order_bind.function.destructor;
|
382
|
+
auto simple_update = order_bind.function.simple_update;
|
383
|
+
auto update = order_bind.function.update;
|
384
|
+
auto finalize = order_bind.function.finalize;
|
360
385
|
|
361
386
|
auto sdata = FlatVector::GetData<SortedAggregateState *>(states);
|
387
|
+
|
388
|
+
vector<idx_t> state_unprocessed(count, 0);
|
362
389
|
for (idx_t i = 0; i < count; ++i) {
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
390
|
+
state_unprocessed[i] = sdata[i]->count;
|
391
|
+
}
|
392
|
+
|
393
|
+
// Sort the input payloads on (state_idx ASC, orders)
|
394
|
+
vector<BoundOrderByNode> orders;
|
395
|
+
orders.emplace_back(BoundOrderByNode(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST,
|
396
|
+
make_uniq<BoundConstantExpression>(Value::USMALLINT(0))));
|
397
|
+
for (const auto &order : order_bind.orders) {
|
398
|
+
orders.emplace_back(order.Copy());
|
399
|
+
}
|
400
|
+
|
401
|
+
auto global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
|
402
|
+
global_sort->external = order_bind.external;
|
403
|
+
auto local_sort = make_uniq<LocalSortState>();
|
404
|
+
local_sort->Initialize(*global_sort, global_sort->buffer_manager);
|
405
|
+
|
406
|
+
DataChunk prefixed;
|
407
|
+
prefixed.Initialize(Allocator::DefaultAllocator(), global_sort->sort_layout.logical_types);
|
408
|
+
|
409
|
+
// Go through the states accumulating values to sort until we hit the sort threshold
|
410
|
+
idx_t unsorted_count = 0;
|
411
|
+
idx_t sorted = 0;
|
412
|
+
for (idx_t finalized = 0; finalized < count;) {
|
413
|
+
if (unsorted_count < order_bind.threshold) {
|
414
|
+
auto state = sdata[finalized];
|
415
|
+
prefixed.Reset();
|
416
|
+
prefixed.data[0].Reference(Value::USMALLINT(finalized));
|
417
|
+
state->Finalize(order_bind, prefixed, *local_sort);
|
418
|
+
unsorted_count += state_unprocessed[finalized];
|
419
|
+
|
420
|
+
// Go to the next aggregate unless this is the last one
|
421
|
+
if (++finalized < count) {
|
422
|
+
continue;
|
380
423
|
}
|
424
|
+
}
|
425
|
+
|
426
|
+
// If they were all empty (filtering) flush them
|
427
|
+
// (This can only happen on the last range)
|
428
|
+
if (!unsorted_count) {
|
429
|
+
break;
|
430
|
+
}
|
431
|
+
|
432
|
+
// Sort all the data
|
433
|
+
global_sort->AddLocalState(*local_sort);
|
434
|
+
global_sort->PrepareMergePhase();
|
435
|
+
while (global_sort->sorted_blocks.size() > 1) {
|
436
|
+
global_sort->InitializeMergeRound();
|
437
|
+
MergeSorter merge_sorter(*global_sort, global_sort->buffer_manager);
|
438
|
+
merge_sorter.PerformInMergeRound();
|
439
|
+
global_sort->CompleteMergeRound(false);
|
440
|
+
}
|
381
441
|
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
442
|
+
auto scanner = make_uniq<PayloadScanner>(*global_sort);
|
443
|
+
initialize(agg_state.data());
|
444
|
+
while (scanner->Remaining()) {
|
445
|
+
chunk.Reset();
|
446
|
+
scanner->Scan(chunk);
|
447
|
+
idx_t consumed = 0;
|
448
|
+
|
449
|
+
// Distribute the scanned chunk to the aggregates
|
450
|
+
while (consumed < chunk.size()) {
|
451
|
+
// Find the next aggregate that needs data
|
452
|
+
for (; !state_unprocessed[sorted]; ++sorted) {
|
453
|
+
// Finalize a single value at the next offset
|
454
|
+
agg_state_vec.SetVectorType(states.GetVectorType());
|
455
|
+
finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
|
456
|
+
if (destructor) {
|
457
|
+
destructor(agg_state_vec, aggr_bind_info, 1);
|
458
|
+
}
|
459
|
+
|
460
|
+
initialize(agg_state.data());
|
461
|
+
}
|
462
|
+
const auto input_count = MinValue(state_unprocessed[sorted], chunk.size() - consumed);
|
463
|
+
for (column_t col_idx = 0; col_idx < chunk.ColumnCount(); ++col_idx) {
|
464
|
+
sliced.data[col_idx].Slice(chunk.data[col_idx], consumed, consumed + input_count);
|
388
465
|
}
|
466
|
+
sliced.SetCardinality(input_count);
|
467
|
+
|
389
468
|
// These are all simple updates, so use it if available
|
390
469
|
if (simple_update) {
|
391
|
-
simple_update(
|
470
|
+
simple_update(sliced.data.data(), aggr_bind_info, 1, agg_state.data(), sliced.size());
|
392
471
|
} else {
|
393
472
|
// We are only updating a constant state
|
394
473
|
agg_state_vec.SetVectorType(VectorType::CONSTANT_VECTOR);
|
395
|
-
update(
|
474
|
+
update(sliced.data.data(), aggr_bind_info, 1, agg_state_vec, sliced.size());
|
396
475
|
}
|
476
|
+
|
477
|
+
consumed += input_count;
|
478
|
+
state_unprocessed[sorted] -= input_count;
|
397
479
|
}
|
398
480
|
}
|
399
481
|
|
482
|
+
// Finalize the last state for this sort
|
483
|
+
agg_state_vec.SetVectorType(states.GetVectorType());
|
484
|
+
finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
|
485
|
+
if (destructor) {
|
486
|
+
destructor(agg_state_vec, aggr_bind_info, 1);
|
487
|
+
}
|
488
|
+
++sorted;
|
489
|
+
|
490
|
+
// Stop if we are done
|
491
|
+
if (finalized >= count) {
|
492
|
+
break;
|
493
|
+
}
|
494
|
+
|
495
|
+
// Create a new sort
|
496
|
+
scanner.reset();
|
497
|
+
global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
|
498
|
+
global_sort->external = order_bind.external;
|
499
|
+
local_sort = make_uniq<LocalSortState>();
|
500
|
+
local_sort->Initialize(*global_sort, global_sort->buffer_manager);
|
501
|
+
unsorted_count = 0;
|
502
|
+
}
|
503
|
+
|
504
|
+
for (; sorted < count; ++sorted) {
|
505
|
+
initialize(agg_state.data());
|
506
|
+
|
400
507
|
// Finalize a single value at the next offset
|
401
508
|
agg_state_vec.SetVectorType(states.GetVectorType());
|
402
|
-
finalize(agg_state_vec, aggr_bind_info, result, 1,
|
509
|
+
finalize(agg_state_vec, aggr_bind_info, result, 1, sorted + offset);
|
403
510
|
|
404
511
|
if (destructor) {
|
405
512
|
destructor(agg_state_vec, aggr_bind_info, 1);
|
406
513
|
}
|
407
514
|
}
|
515
|
+
|
516
|
+
result.Verify(count);
|
408
517
|
}
|
409
518
|
|
410
519
|
static void Serialize(FieldWriter &writer, const FunctionData *bind_data, const AggregateFunction &function) {
|
@@ -429,15 +538,15 @@ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateE
|
|
429
538
|
// similarly, we only need to ORDER BY each aggregate once
|
430
539
|
expression_set_t seen_expressions;
|
431
540
|
for (auto &target : groups) {
|
432
|
-
seen_expressions.insert(target
|
541
|
+
seen_expressions.insert(*target);
|
433
542
|
}
|
434
543
|
vector<BoundOrderByNode> new_order_nodes;
|
435
544
|
for (auto &order_node : expr.order_bys->orders) {
|
436
|
-
if (seen_expressions.find(order_node.expression
|
545
|
+
if (seen_expressions.find(*order_node.expression) != seen_expressions.end()) {
|
437
546
|
// we do not need to order by this node
|
438
547
|
continue;
|
439
548
|
}
|
440
|
-
seen_expressions.insert(order_node.expression
|
549
|
+
seen_expressions.insert(*order_node.expression);
|
441
550
|
new_order_nodes.push_back(std::move(order_node));
|
442
551
|
}
|
443
552
|
if (new_order_nodes.empty()) {
|
@@ -6,7 +6,8 @@
|
|
6
6
|
|
7
7
|
namespace duckdb {
|
8
8
|
|
9
|
-
BindCastInput::BindCastInput(CastFunctionSet &function_set, BindCastInfo
|
9
|
+
BindCastInput::BindCastInput(CastFunctionSet &function_set, optional_ptr<BindCastInfo> info,
|
10
|
+
optional_ptr<ClientContext> context)
|
10
11
|
: function_set(function_set), info(info), context(context) {
|
11
12
|
}
|
12
13
|
|
@@ -907,23 +907,29 @@ static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
|
|
907
907
|
}
|
908
908
|
|
909
909
|
void DivideFun::RegisterFunction(BuiltinFunctions &set) {
|
910
|
-
ScalarFunctionSet
|
910
|
+
ScalarFunctionSet fp_divide("/");
|
911
|
+
fp_divide.AddFunction(ScalarFunction({LogicalType::FLOAT, LogicalType::FLOAT}, LogicalType::FLOAT,
|
912
|
+
GetBinaryFunctionIgnoreZero<DivideOperator>(LogicalType::FLOAT)));
|
913
|
+
fp_divide.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE,
|
914
|
+
GetBinaryFunctionIgnoreZero<DivideOperator>(LogicalType::DOUBLE)));
|
915
|
+
fp_divide.AddFunction(
|
916
|
+
ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
|
917
|
+
BinaryScalarFunctionIgnoreZero<interval_t, int64_t, interval_t, DivideOperator>));
|
918
|
+
set.AddFunction(fp_divide);
|
919
|
+
|
920
|
+
ScalarFunctionSet full_divide("//");
|
911
921
|
for (auto &type : LogicalType::Numeric()) {
|
912
922
|
if (type.id() == LogicalTypeId::DECIMAL) {
|
913
923
|
continue;
|
914
924
|
} else {
|
915
|
-
|
925
|
+
full_divide.AddFunction(
|
916
926
|
ScalarFunction({type, type}, type, GetBinaryFunctionIgnoreZero<DivideOperator>(type)));
|
917
927
|
}
|
918
928
|
}
|
919
|
-
|
920
|
-
ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
|
921
|
-
BinaryScalarFunctionIgnoreZero<interval_t, int64_t, interval_t, DivideOperator>));
|
929
|
+
set.AddFunction(full_divide);
|
922
930
|
|
923
|
-
|
924
|
-
|
925
|
-
functions.name = "divide";
|
926
|
-
set.AddFunction(functions);
|
931
|
+
full_divide.name = "divide";
|
932
|
+
set.AddFunction(full_divide);
|
927
933
|
}
|
928
934
|
|
929
935
|
//===--------------------------------------------------------------------===//
|
@@ -15,11 +15,11 @@
|
|
15
15
|
namespace duckdb {
|
16
16
|
|
17
17
|
struct NextvalBindData : public FunctionData {
|
18
|
-
explicit NextvalBindData(SequenceCatalogEntry
|
18
|
+
explicit NextvalBindData(optional_ptr<SequenceCatalogEntry> sequence) : sequence(sequence) {
|
19
19
|
}
|
20
20
|
|
21
21
|
//! The sequence to use for the nextval computation; only if the sequence is a constant
|
22
|
-
SequenceCatalogEntry
|
22
|
+
optional_ptr<SequenceCatalogEntry> sequence;
|
23
23
|
|
24
24
|
unique_ptr<FunctionData> Copy() const override {
|
25
25
|
return make_uniq<NextvalBindData>(sequence);
|
@@ -32,45 +32,45 @@ struct NextvalBindData : public FunctionData {
|
|
32
32
|
};
|
33
33
|
|
34
34
|
struct CurrentSequenceValueOperator {
|
35
|
-
static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry
|
36
|
-
lock_guard<mutex> seqlock(seq
|
35
|
+
static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry &seq) {
|
36
|
+
lock_guard<mutex> seqlock(seq.lock);
|
37
37
|
int64_t result;
|
38
|
-
if (seq
|
38
|
+
if (seq.usage_count == 0u) {
|
39
39
|
throw SequenceException("currval: sequence is not yet defined in this session");
|
40
40
|
}
|
41
|
-
result = seq
|
41
|
+
result = seq.last_value;
|
42
42
|
return result;
|
43
43
|
}
|
44
44
|
};
|
45
45
|
|
46
46
|
struct NextSequenceValueOperator {
|
47
|
-
static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry
|
48
|
-
lock_guard<mutex> seqlock(seq
|
47
|
+
static int64_t Operation(DuckTransaction &transaction, SequenceCatalogEntry &seq) {
|
48
|
+
lock_guard<mutex> seqlock(seq.lock);
|
49
49
|
int64_t result;
|
50
|
-
result = seq
|
51
|
-
bool overflow = !TryAddOperator::Operation(seq
|
52
|
-
if (seq
|
50
|
+
result = seq.counter;
|
51
|
+
bool overflow = !TryAddOperator::Operation(seq.counter, seq.increment, seq.counter);
|
52
|
+
if (seq.cycle) {
|
53
53
|
if (overflow) {
|
54
|
-
seq
|
55
|
-
} else if (seq
|
56
|
-
seq
|
57
|
-
} else if (seq
|
58
|
-
seq
|
54
|
+
seq.counter = seq.increment < 0 ? seq.max_value : seq.min_value;
|
55
|
+
} else if (seq.counter < seq.min_value) {
|
56
|
+
seq.counter = seq.max_value;
|
57
|
+
} else if (seq.counter > seq.max_value) {
|
58
|
+
seq.counter = seq.min_value;
|
59
59
|
}
|
60
60
|
} else {
|
61
|
-
if (result < seq
|
62
|
-
throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq
|
63
|
-
seq
|
61
|
+
if (result < seq.min_value || (overflow && seq.increment < 0)) {
|
62
|
+
throw SequenceException("nextval: reached minimum value of sequence \"%s\" (%lld)", seq.name,
|
63
|
+
seq.min_value);
|
64
64
|
}
|
65
|
-
if (result > seq
|
66
|
-
throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq
|
67
|
-
seq
|
65
|
+
if (result > seq.max_value || overflow) {
|
66
|
+
throw SequenceException("nextval: reached maximum value of sequence \"%s\" (%lld)", seq.name,
|
67
|
+
seq.max_value);
|
68
68
|
}
|
69
69
|
}
|
70
|
-
seq
|
71
|
-
seq
|
72
|
-
if (!seq
|
73
|
-
transaction.sequence_usage[seq] = SequenceValue(seq
|
70
|
+
seq.last_value = result;
|
71
|
+
seq.usage_count++;
|
72
|
+
if (!seq.temporary) {
|
73
|
+
transaction.sequence_usage[&seq] = SequenceValue(seq.usage_count, seq.counter);
|
74
74
|
}
|
75
75
|
return result;
|
76
76
|
}
|
@@ -98,7 +98,7 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res
|
|
98
98
|
auto result_data = FlatVector::GetData<int64_t>(result);
|
99
99
|
for (idx_t i = 0; i < args.size(); i++) {
|
100
100
|
// get the next value from the sequence
|
101
|
-
result_data[i] = OP::Operation(transaction, info.sequence);
|
101
|
+
result_data[i] = OP::Operation(transaction, *info.sequence);
|
102
102
|
}
|
103
103
|
} else {
|
104
104
|
// sequence to use comes from the input
|
@@ -107,7 +107,7 @@ static void NextValFunction(DataChunk &args, ExpressionState &state, Vector &res
|
|
107
107
|
auto sequence = BindSequence(context, value.GetString());
|
108
108
|
// finally get the next value from the sequence
|
109
109
|
auto &transaction = DuckTransaction::Get(context, *sequence->catalog);
|
110
|
-
return OP::Operation(transaction, sequence);
|
110
|
+
return OP::Operation(transaction, *sequence);
|
111
111
|
});
|
112
112
|
}
|
113
113
|
}
|
@@ -129,7 +129,7 @@ static unique_ptr<FunctionData> NextValBind(ClientContext &context, ScalarFuncti
|
|
129
129
|
static void NextValDependency(BoundFunctionExpression &expr, DependencyList &dependencies) {
|
130
130
|
auto &info = expr.bind_info->Cast<NextvalBindData>();
|
131
131
|
if (info.sequence) {
|
132
|
-
dependencies.AddDependency(info.sequence);
|
132
|
+
dependencies.AddDependency(*info.sequence);
|
133
133
|
}
|
134
134
|
}
|
135
135
|
|