duckdb 0.7.2-dev2320.0 → 0.7.2-dev2410.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/data_chunk.cpp +1 -1
- package/src/duckdb/extension/icu/icu-extension.cpp +2 -2
- package/src/duckdb/extension/icu/icu-makedate.cpp +52 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +4 -0
- package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.cpp +28 -28
- package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.h +4 -4
- package/src/duckdb/extension/json/include/json_common.hpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +2 -2
- package/src/duckdb/extension/json/json_serializer.cpp +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/column_writer.cpp +3 -3
- package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +2 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +2 -2
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/file_buffer.cpp +8 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +24 -25
- package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
- package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/bit.cpp +18 -18
- package/src/duckdb/src/common/types/blob.cpp +7 -7
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +1 -1
- package/src/duckdb/src/common/types/hash.cpp +1 -1
- package/src/duckdb/src/common/types/hyperloglog.cpp +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/string_heap.cpp +2 -2
- package/src/duckdb/src/common/types/string_type.cpp +2 -2
- package/src/duckdb/src/common/types/timestamp.cpp +1 -1
- package/src/duckdb/src/common/types/vector.cpp +7 -7
- package/src/duckdb/src/execution/index/art/art_key.cpp +2 -2
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
- package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/first.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/kurtosis.cpp +3 -2
- package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/skew.cpp +5 -1
- package/src/duckdb/src/function/aggregate/distributive/string_agg.cpp +1 -1
- package/src/duckdb/src/function/cast/list_casts.cpp +1 -1
- package/src/duckdb/src/function/cast/struct_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -3
- package/src/duckdb/src/function/scalar/bit/bitstring.cpp +1 -1
- package/src/duckdb/src/function/scalar/blob/encode.cpp +1 -1
- package/src/duckdb/src/function/scalar/date/strftime.cpp +3 -3
- package/src/duckdb/src/function/scalar/generic/current_setting.cpp +1 -1
- package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
- package/src/duckdb/src/function/scalar/string/ascii.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/concat.cpp +6 -6
- package/src/duckdb/src/function/scalar/string/contains.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/hex.cpp +4 -4
- package/src/duckdb/src/function/scalar/string/instr.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/jaccard.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/jaro_winkler.cpp +5 -5
- package/src/duckdb/src/function/scalar/string/length.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/levenshtein.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/like.cpp +10 -11
- package/src/duckdb/src/function/scalar/string/mismatches.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/pad.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/prefix.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/printf.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +4 -4
- package/src/duckdb/src/function/scalar/string/repeat.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/replace.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/reverse.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/starts_with.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/string_split.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/strip_accents.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/substring.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/suffix.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/translate.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/trim.cpp +3 -3
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +1 -1
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -7
- package/src/duckdb/src/function/scalar/union/union_extract.cpp +1 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/crypto/md5.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +8 -6
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +17 -0
- package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
- package/src/duckdb/src/include/duckdb/optimizer/cse_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/aggregate_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/alter_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/check_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/constant_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/insert_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/relation_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/returning_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/update_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +12 -9
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
- package/src/duckdb/src/main/capi/cast/from_decimal-c.cpp +1 -1
- package/src/duckdb/src/main/capi/result-c.cpp +2 -2
- package/src/duckdb/src/main/config.cpp +26 -0
- package/src/duckdb/src/main/settings/settings.cpp +31 -8
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +9 -8
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +33 -29
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +8 -10
- package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +8 -7
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +6 -6
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +8 -8
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +5 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/plan_expressionlistref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +4 -4
- package/src/duckdb/src/planner/expression.cpp +2 -1
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/check_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/where_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder.cpp +12 -12
- package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +2 -2
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
- package/src/duckdb/src/storage/compression/fsst.cpp +3 -3
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
- package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
- package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
- package/src/duckdb/src/storage/statistics/string_stats.cpp +2 -2
- package/src/duckdb/src/storage/storage_manager.cpp +7 -2
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
- package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
- package/src/statement.cpp +3 -3
@@ -1093,14 +1093,26 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
|
|
1093
1093
|
//===--------------------------------------------------------------------===//
|
1094
1094
|
class WindowGlobalSourceState : public GlobalSourceState {
|
1095
1095
|
public:
|
1096
|
-
explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) :
|
1096
|
+
explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : gsink(*gsink.global_partition), next_bin(0) {
|
1097
1097
|
}
|
1098
1098
|
|
1099
|
-
|
1099
|
+
PartitionGlobalSinkState &gsink;
|
1100
|
+
//! The output read position.
|
1101
|
+
atomic<idx_t> next_bin;
|
1100
1102
|
|
1101
1103
|
public:
|
1102
1104
|
idx_t MaxThreads() override {
|
1103
|
-
|
1105
|
+
// If there is only one partition, we have to process it on one thread.
|
1106
|
+
if (!gsink.grouping_data) {
|
1107
|
+
return 1;
|
1108
|
+
}
|
1109
|
+
|
1110
|
+
// If there is not a lot of data, process serially.
|
1111
|
+
if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
|
1112
|
+
return 1;
|
1113
|
+
}
|
1114
|
+
|
1115
|
+
return gsink.hash_groups.size();
|
1104
1116
|
}
|
1105
1117
|
};
|
1106
1118
|
|
@@ -1112,7 +1124,7 @@ public:
|
|
1112
1124
|
using WindowExecutors = vector<WindowExecutorPtr>;
|
1113
1125
|
|
1114
1126
|
WindowLocalSourceState(const PhysicalWindow &op_p, ExecutionContext &context, WindowGlobalSourceState &gsource)
|
1115
|
-
:
|
1127
|
+
: context(context.client), op(op_p), gsink(gsource.gsink) {
|
1116
1128
|
|
1117
1129
|
vector<LogicalType> output_types;
|
1118
1130
|
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
@@ -1121,29 +1133,134 @@ public:
|
|
1121
1133
|
output_types.emplace_back(wexpr.return_type);
|
1122
1134
|
}
|
1123
1135
|
output_chunk.Initialize(Allocator::Get(context.client), output_types);
|
1136
|
+
|
1137
|
+
const auto &input_types = gsink.payload_types;
|
1138
|
+
layout.Initialize(input_types);
|
1139
|
+
input_chunk.Initialize(gsink.allocator, input_types);
|
1124
1140
|
}
|
1125
1141
|
|
1142
|
+
void MaterializeSortedData();
|
1126
1143
|
void GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
|
1127
1144
|
void Scan(DataChunk &chunk);
|
1128
1145
|
|
1129
|
-
|
1146
|
+
HashGroupPtr hash_group;
|
1130
1147
|
ClientContext &context;
|
1131
1148
|
const PhysicalWindow &op;
|
1132
1149
|
|
1150
|
+
PartitionGlobalSinkState &gsink;
|
1151
|
+
|
1152
|
+
//! The generated input chunks
|
1153
|
+
unique_ptr<RowDataCollection> rows;
|
1154
|
+
unique_ptr<RowDataCollection> heap;
|
1155
|
+
RowLayout layout;
|
1156
|
+
//! The partition boundary mask
|
1157
|
+
vector<validity_t> partition_bits;
|
1158
|
+
ValidityMask partition_mask;
|
1159
|
+
//! The order boundary mask
|
1160
|
+
vector<validity_t> order_bits;
|
1161
|
+
ValidityMask order_mask;
|
1133
1162
|
//! The current execution functions
|
1134
1163
|
WindowExecutors window_execs;
|
1164
|
+
|
1165
|
+
//! The read partition
|
1166
|
+
idx_t hash_bin;
|
1167
|
+
//! The read cursor
|
1168
|
+
unique_ptr<RowDataCollectionScanner> scanner;
|
1169
|
+
//! Buffer for the inputs
|
1170
|
+
DataChunk input_chunk;
|
1135
1171
|
//! Buffer for window results
|
1136
1172
|
DataChunk output_chunk;
|
1137
1173
|
};
|
1138
1174
|
|
1175
|
+
void WindowLocalSourceState::MaterializeSortedData() {
|
1176
|
+
auto &global_sort_state = *hash_group->global_sort;
|
1177
|
+
if (global_sort_state.sorted_blocks.empty()) {
|
1178
|
+
return;
|
1179
|
+
}
|
1180
|
+
|
1181
|
+
// scan the sorted row data
|
1182
|
+
D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
|
1183
|
+
auto &sb = *global_sort_state.sorted_blocks[0];
|
1184
|
+
|
1185
|
+
// Free up some memory before allocating more
|
1186
|
+
sb.radix_sorting_data.clear();
|
1187
|
+
sb.blob_sorting_data = nullptr;
|
1188
|
+
|
1189
|
+
// Move the sorting row blocks into our RDCs
|
1190
|
+
auto &buffer_manager = global_sort_state.buffer_manager;
|
1191
|
+
auto &sd = *sb.payload_data;
|
1192
|
+
|
1193
|
+
// Data blocks are required
|
1194
|
+
D_ASSERT(!sd.data_blocks.empty());
|
1195
|
+
auto &block = sd.data_blocks[0];
|
1196
|
+
rows = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
|
1197
|
+
rows->blocks = std::move(sd.data_blocks);
|
1198
|
+
rows->count = std::accumulate(rows->blocks.begin(), rows->blocks.end(), idx_t(0),
|
1199
|
+
[&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
1200
|
+
|
1201
|
+
// Heap blocks are optional, but we want both for iteration.
|
1202
|
+
if (!sd.heap_blocks.empty()) {
|
1203
|
+
auto &block = sd.heap_blocks[0];
|
1204
|
+
heap = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
|
1205
|
+
heap->blocks = std::move(sd.heap_blocks);
|
1206
|
+
hash_group.reset();
|
1207
|
+
} else {
|
1208
|
+
heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
|
1209
|
+
}
|
1210
|
+
heap->count = std::accumulate(heap->blocks.begin(), heap->blocks.end(), idx_t(0),
|
1211
|
+
[&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
1212
|
+
}
|
1213
|
+
|
1139
1214
|
void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
|
1140
|
-
|
1141
|
-
|
1215
|
+
// Get rid of any stale data
|
1216
|
+
hash_bin = hash_bin_p;
|
1217
|
+
|
1218
|
+
// There are three types of partitions:
|
1219
|
+
// 1. No partition (no sorting)
|
1220
|
+
// 2. One partition (sorting, but no hashing)
|
1221
|
+
// 3. Multiple partitions (sorting and hashing)
|
1222
|
+
|
1223
|
+
// How big is the partition?
|
1224
|
+
idx_t count = 0;
|
1225
|
+
if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
|
1226
|
+
count = gsink.hash_groups[hash_bin]->count;
|
1227
|
+
} else if (gsink.rows && !hash_bin) {
|
1228
|
+
count = gsink.count;
|
1229
|
+
} else {
|
1230
|
+
return;
|
1231
|
+
}
|
1232
|
+
|
1233
|
+
// Initialise masks to false
|
1234
|
+
const auto bit_count = ValidityMask::ValidityMaskSize(count);
|
1235
|
+
partition_bits.clear();
|
1236
|
+
partition_bits.resize(bit_count, 0);
|
1237
|
+
partition_mask.Initialize(partition_bits.data());
|
1238
|
+
|
1239
|
+
order_bits.clear();
|
1240
|
+
order_bits.resize(bit_count, 0);
|
1241
|
+
order_mask.Initialize(order_bits.data());
|
1242
|
+
|
1243
|
+
// Scan the sorted data into new Collections
|
1244
|
+
auto external = gsink.external;
|
1245
|
+
if (gsink.rows && !hash_bin) {
|
1246
|
+
// Simple mask
|
1247
|
+
partition_mask.SetValidUnsafe(0);
|
1248
|
+
order_mask.SetValidUnsafe(0);
|
1249
|
+
// No partition - align the heap blocks with the row blocks
|
1250
|
+
rows = gsink.rows->CloneEmpty(gsink.rows->keep_pinned);
|
1251
|
+
heap = gsink.strings->CloneEmpty(gsink.strings->keep_pinned);
|
1252
|
+
RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gsink.rows, *gsink.strings, layout);
|
1253
|
+
external = true;
|
1254
|
+
} else if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
|
1255
|
+
// Overwrite the collections with the sorted data
|
1256
|
+
hash_group = std::move(gsink.hash_groups[hash_bin]);
|
1257
|
+
hash_group->ComputeMasks(partition_mask, order_mask);
|
1258
|
+
MaterializeSortedData();
|
1259
|
+
} else {
|
1142
1260
|
return;
|
1143
1261
|
}
|
1144
1262
|
|
1145
1263
|
// Create the executors for each function
|
1146
|
-
auto &partition_mask = partition_source.partition_mask;
|
1147
1264
|
window_execs.clear();
|
1148
1265
|
for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
|
1149
1266
|
D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
|
@@ -1154,19 +1271,20 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1154
1271
|
|
1155
1272
|
// First pass over the input without flushing
|
1156
1273
|
// TODO: Factor out the constructor data as global state
|
1274
|
+
scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
|
1157
1275
|
idx_t input_idx = 0;
|
1158
1276
|
while (true) {
|
1159
|
-
|
1160
|
-
|
1161
|
-
if (
|
1277
|
+
input_chunk.Reset();
|
1278
|
+
scanner->Scan(input_chunk);
|
1279
|
+
if (input_chunk.size() == 0) {
|
1162
1280
|
break;
|
1163
1281
|
}
|
1164
1282
|
|
1165
1283
|
// TODO: Parallelization opportunity
|
1166
1284
|
for (auto &wexec : window_execs) {
|
1167
|
-
wexec->Sink(
|
1285
|
+
wexec->Sink(input_chunk, input_idx, scanner->Count());
|
1168
1286
|
}
|
1169
|
-
input_idx +=
|
1287
|
+
input_idx += input_chunk.size();
|
1170
1288
|
}
|
1171
1289
|
|
1172
1290
|
// TODO: Parallelization opportunity
|
@@ -1175,25 +1293,22 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
|
|
1175
1293
|
}
|
1176
1294
|
|
1177
1295
|
// External scanning assumes all blocks are swizzled.
|
1178
|
-
|
1296
|
+
scanner->ReSwizzle();
|
1179
1297
|
|
1180
1298
|
// Second pass can flush
|
1181
|
-
|
1299
|
+
scanner->Reset(true);
|
1182
1300
|
}
|
1183
1301
|
|
1184
1302
|
void WindowLocalSourceState::Scan(DataChunk &result) {
|
1185
|
-
D_ASSERT(
|
1186
|
-
if (!
|
1303
|
+
D_ASSERT(scanner);
|
1304
|
+
if (!scanner->Remaining()) {
|
1187
1305
|
return;
|
1188
1306
|
}
|
1189
1307
|
|
1190
|
-
const auto position =
|
1191
|
-
auto &input_chunk = partition_source.input_chunk;
|
1308
|
+
const auto position = scanner->Scanned();
|
1192
1309
|
input_chunk.Reset();
|
1193
|
-
|
1310
|
+
scanner->Scan(input_chunk);
|
1194
1311
|
|
1195
|
-
auto &partition_mask = partition_source.partition_mask;
|
1196
|
-
auto &order_mask = partition_source.order_mask;
|
1197
1312
|
output_chunk.Reset();
|
1198
1313
|
for (idx_t expr_idx = 0; expr_idx < window_execs.size(); ++expr_idx) {
|
1199
1314
|
auto &executor = *window_execs[expr_idx];
|
@@ -1227,9 +1342,7 @@ unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext
|
|
1227
1342
|
void PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
|
1228
1343
|
LocalSourceState &lstate_p) const {
|
1229
1344
|
auto &lsource = lstate_p.Cast<WindowLocalSourceState>();
|
1230
|
-
auto &lpsource = lsource.partition_source;
|
1231
1345
|
auto &gsource = gstate_p.Cast<WindowGlobalSourceState>();
|
1232
|
-
auto &gpsource = gsource.partition_source;
|
1233
1346
|
auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
|
1234
1347
|
|
1235
1348
|
auto &hash_groups = gsink.global_partition->hash_groups;
|
@@ -1237,17 +1350,17 @@ void PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk, Global
|
|
1237
1350
|
|
1238
1351
|
while (chunk.size() == 0) {
|
1239
1352
|
// Move to the next bin if we are done.
|
1240
|
-
while (!
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
auto hash_bin =
|
1353
|
+
while (!lsource.scanner || !lsource.scanner->Remaining()) {
|
1354
|
+
lsource.scanner.reset();
|
1355
|
+
lsource.rows.reset();
|
1356
|
+
lsource.heap.reset();
|
1357
|
+
lsource.hash_group.reset();
|
1358
|
+
auto hash_bin = gsource.next_bin++;
|
1246
1359
|
if (hash_bin >= bin_count) {
|
1247
1360
|
return;
|
1248
1361
|
}
|
1249
1362
|
|
1250
|
-
for (; hash_bin < hash_groups.size(); hash_bin =
|
1363
|
+
for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
|
1251
1364
|
if (hash_groups[hash_bin]) {
|
1252
1365
|
break;
|
1253
1366
|
}
|