duckdb 0.7.2-dev2320.0 → 0.7.2-dev2410.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/package.json +1 -1
  2. package/src/data_chunk.cpp +1 -1
  3. package/src/duckdb/extension/icu/icu-extension.cpp +2 -2
  4. package/src/duckdb/extension/icu/icu-makedate.cpp +52 -0
  5. package/src/duckdb/extension/icu/icu-strptime.cpp +1 -1
  6. package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +4 -0
  7. package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.cpp +28 -28
  8. package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.h +4 -4
  9. package/src/duckdb/extension/json/include/json_common.hpp +1 -1
  10. package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
  11. package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
  12. package/src/duckdb/extension/json/json_functions.cpp +2 -2
  13. package/src/duckdb/extension/json/json_serializer.cpp +1 -1
  14. package/src/duckdb/extension/parquet/column_reader.cpp +1 -1
  15. package/src/duckdb/extension/parquet/column_writer.cpp +3 -3
  16. package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +2 -2
  17. package/src/duckdb/src/common/arrow/arrow_appender.cpp +2 -2
  18. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  19. package/src/duckdb/src/common/file_buffer.cpp +8 -0
  20. package/src/duckdb/src/common/operator/cast_operators.cpp +24 -25
  21. package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
  22. package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +2 -2
  23. package/src/duckdb/src/common/row_operations/row_scatter.cpp +1 -1
  24. package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
  25. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  26. package/src/duckdb/src/common/types/bit.cpp +18 -18
  27. package/src/duckdb/src/common/types/blob.cpp +7 -7
  28. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +1 -1
  29. package/src/duckdb/src/common/types/column/column_data_collection.cpp +1 -1
  30. package/src/duckdb/src/common/types/hash.cpp +1 -1
  31. package/src/duckdb/src/common/types/hyperloglog.cpp +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  33. package/src/duckdb/src/common/types/string_heap.cpp +2 -2
  34. package/src/duckdb/src/common/types/string_type.cpp +2 -2
  35. package/src/duckdb/src/common/types/timestamp.cpp +1 -1
  36. package/src/duckdb/src/common/types/vector.cpp +7 -7
  37. package/src/duckdb/src/execution/index/art/art_key.cpp +2 -2
  38. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
  39. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
  40. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +1 -1
  41. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  42. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
  43. package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -2
  44. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +2 -2
  45. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +2 -2
  46. package/src/duckdb/src/function/aggregate/distributive/first.cpp +2 -2
  47. package/src/duckdb/src/function/aggregate/distributive/kurtosis.cpp +3 -2
  48. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -2
  49. package/src/duckdb/src/function/aggregate/distributive/skew.cpp +5 -1
  50. package/src/duckdb/src/function/aggregate/distributive/string_agg.cpp +1 -1
  51. package/src/duckdb/src/function/cast/list_casts.cpp +1 -1
  52. package/src/duckdb/src/function/cast/struct_cast.cpp +1 -1
  53. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -3
  54. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +1 -1
  55. package/src/duckdb/src/function/scalar/blob/encode.cpp +1 -1
  56. package/src/duckdb/src/function/scalar/date/strftime.cpp +3 -3
  57. package/src/duckdb/src/function/scalar/generic/current_setting.cpp +1 -1
  58. package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
  59. package/src/duckdb/src/function/scalar/string/ascii.cpp +1 -1
  60. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -2
  61. package/src/duckdb/src/function/scalar/string/concat.cpp +6 -6
  62. package/src/duckdb/src/function/scalar/string/contains.cpp +2 -2
  63. package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +2 -2
  64. package/src/duckdb/src/function/scalar/string/hex.cpp +4 -4
  65. package/src/duckdb/src/function/scalar/string/instr.cpp +1 -1
  66. package/src/duckdb/src/function/scalar/string/jaccard.cpp +1 -1
  67. package/src/duckdb/src/function/scalar/string/jaro_winkler.cpp +5 -5
  68. package/src/duckdb/src/function/scalar/string/length.cpp +1 -1
  69. package/src/duckdb/src/function/scalar/string/levenshtein.cpp +2 -2
  70. package/src/duckdb/src/function/scalar/string/like.cpp +10 -11
  71. package/src/duckdb/src/function/scalar/string/mismatches.cpp +2 -2
  72. package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +1 -1
  73. package/src/duckdb/src/function/scalar/string/pad.cpp +3 -3
  74. package/src/duckdb/src/function/scalar/string/prefix.cpp +2 -2
  75. package/src/duckdb/src/function/scalar/string/printf.cpp +1 -1
  76. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +4 -4
  77. package/src/duckdb/src/function/scalar/string/repeat.cpp +1 -1
  78. package/src/duckdb/src/function/scalar/string/replace.cpp +3 -3
  79. package/src/duckdb/src/function/scalar/string/reverse.cpp +1 -1
  80. package/src/duckdb/src/function/scalar/string/starts_with.cpp +2 -2
  81. package/src/duckdb/src/function/scalar/string/string_split.cpp +3 -3
  82. package/src/duckdb/src/function/scalar/string/strip_accents.cpp +2 -2
  83. package/src/duckdb/src/function/scalar/string/substring.cpp +3 -3
  84. package/src/duckdb/src/function/scalar/string/suffix.cpp +2 -2
  85. package/src/duckdb/src/function/scalar/string/translate.cpp +3 -3
  86. package/src/duckdb/src/function/scalar/string/trim.cpp +3 -3
  87. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +1 -1
  88. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -7
  89. package/src/duckdb/src/function/scalar/union/union_extract.cpp +1 -1
  90. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  91. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +2 -2
  92. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  93. package/src/duckdb/src/include/duckdb/common/crypto/md5.hpp +1 -1
  94. package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
  95. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
  96. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  97. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
  98. package/src/duckdb/src/include/duckdb/common/radix.hpp +1 -1
  99. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
  100. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
  101. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +8 -6
  102. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
  103. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
  104. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
  105. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
  106. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +17 -0
  107. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +1 -1
  108. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -2
  109. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
  110. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
  111. package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +1 -1
  112. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  113. package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
  114. package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
  115. package/src/duckdb/src/include/duckdb/optimizer/cse_optimizer.hpp +1 -1
  116. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
  117. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
  118. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
  119. package/src/duckdb/src/include/duckdb/planner/expression_binder/aggregate_binder.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/planner/expression_binder/alter_binder.hpp +1 -1
  121. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +4 -3
  122. package/src/duckdb/src/include/duckdb/planner/expression_binder/check_binder.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/planner/expression_binder/constant_binder.hpp +1 -1
  124. package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/planner/expression_binder/insert_binder.hpp +1 -1
  128. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +2 -2
  129. package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
  130. package/src/duckdb/src/include/duckdb/planner/expression_binder/relation_binder.hpp +1 -1
  131. package/src/duckdb/src/include/duckdb/planner/expression_binder/returning_binder.hpp +1 -1
  132. package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/planner/expression_binder/update_binder.hpp +1 -1
  134. package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +2 -2
  135. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +12 -9
  136. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
  137. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
  138. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
  139. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
  140. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
  141. package/src/duckdb/src/main/capi/cast/from_decimal-c.cpp +1 -1
  142. package/src/duckdb/src/main/capi/result-c.cpp +2 -2
  143. package/src/duckdb/src/main/config.cpp +26 -0
  144. package/src/duckdb/src/main/settings/settings.cpp +31 -8
  145. package/src/duckdb/src/optimizer/cse_optimizer.cpp +9 -8
  146. package/src/duckdb/src/parser/expression/subquery_expression.cpp +1 -1
  147. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
  148. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +33 -29
  149. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +8 -10
  150. package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
  151. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +2 -2
  152. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +1 -1
  153. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +8 -7
  154. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +2 -2
  155. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +6 -6
  156. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
  157. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
  158. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
  159. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
  160. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +1 -1
  161. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +8 -8
  162. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +5 -5
  163. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -2
  164. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  165. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +2 -2
  166. package/src/duckdb/src/planner/binder/tableref/plan_expressionlistref.cpp +1 -1
  167. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +4 -4
  168. package/src/duckdb/src/planner/expression.cpp +2 -1
  169. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +2 -2
  170. package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
  171. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
  172. package/src/duckdb/src/planner/expression_binder/check_binder.cpp +4 -4
  173. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +1 -1
  174. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
  175. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
  176. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +4 -4
  177. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
  178. package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
  179. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
  180. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +4 -4
  181. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +2 -2
  182. package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +2 -2
  183. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +3 -3
  184. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +2 -2
  185. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +4 -4
  186. package/src/duckdb/src/planner/expression_binder.cpp +12 -12
  187. package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
  188. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +2 -2
  189. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
  190. package/src/duckdb/src/storage/compression/fsst.cpp +3 -3
  191. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +1 -1
  192. package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
  193. package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
  194. package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
  195. package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
  196. package/src/duckdb/src/storage/statistics/string_stats.cpp +2 -2
  197. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  198. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
  199. package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
  200. package/src/statement.cpp +3 -3
@@ -1093,14 +1093,26 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
1093
1093
  //===--------------------------------------------------------------------===//
1094
1094
  class WindowGlobalSourceState : public GlobalSourceState {
1095
1095
  public:
1096
- explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : partition_source(*gsink.global_partition) {
1096
+ explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : gsink(*gsink.global_partition), next_bin(0) {
1097
1097
  }
1098
1098
 
1099
- PartitionGlobalSourceState partition_source;
1099
+ PartitionGlobalSinkState &gsink;
1100
+ //! The output read position.
1101
+ atomic<idx_t> next_bin;
1100
1102
 
1101
1103
  public:
1102
1104
  idx_t MaxThreads() override {
1103
- return partition_source.MaxThreads();
1105
+ // If there is only one partition, we have to process it on one thread.
1106
+ if (!gsink.grouping_data) {
1107
+ return 1;
1108
+ }
1109
+
1110
+ // If there is not a lot of data, process serially.
1111
+ if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
1112
+ return 1;
1113
+ }
1114
+
1115
+ return gsink.hash_groups.size();
1104
1116
  }
1105
1117
  };
1106
1118
 
@@ -1112,7 +1124,7 @@ public:
1112
1124
  using WindowExecutors = vector<WindowExecutorPtr>;
1113
1125
 
1114
1126
  WindowLocalSourceState(const PhysicalWindow &op_p, ExecutionContext &context, WindowGlobalSourceState &gsource)
1115
- : partition_source(gsource.partition_source.gsink), context(context.client), op(op_p) {
1127
+ : context(context.client), op(op_p), gsink(gsource.gsink) {
1116
1128
 
1117
1129
  vector<LogicalType> output_types;
1118
1130
  for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
@@ -1121,29 +1133,134 @@ public:
1121
1133
  output_types.emplace_back(wexpr.return_type);
1122
1134
  }
1123
1135
  output_chunk.Initialize(Allocator::Get(context.client), output_types);
1136
+
1137
+ const auto &input_types = gsink.payload_types;
1138
+ layout.Initialize(input_types);
1139
+ input_chunk.Initialize(gsink.allocator, input_types);
1124
1140
  }
1125
1141
 
1142
+ void MaterializeSortedData();
1126
1143
  void GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
1127
1144
  void Scan(DataChunk &chunk);
1128
1145
 
1129
- PartitionLocalSourceState partition_source;
1146
+ HashGroupPtr hash_group;
1130
1147
  ClientContext &context;
1131
1148
  const PhysicalWindow &op;
1132
1149
 
1150
+ PartitionGlobalSinkState &gsink;
1151
+
1152
+ //! The generated input chunks
1153
+ unique_ptr<RowDataCollection> rows;
1154
+ unique_ptr<RowDataCollection> heap;
1155
+ RowLayout layout;
1156
+ //! The partition boundary mask
1157
+ vector<validity_t> partition_bits;
1158
+ ValidityMask partition_mask;
1159
+ //! The order boundary mask
1160
+ vector<validity_t> order_bits;
1161
+ ValidityMask order_mask;
1133
1162
  //! The current execution functions
1134
1163
  WindowExecutors window_execs;
1164
+
1165
+ //! The read partition
1166
+ idx_t hash_bin;
1167
+ //! The read cursor
1168
+ unique_ptr<RowDataCollectionScanner> scanner;
1169
+ //! Buffer for the inputs
1170
+ DataChunk input_chunk;
1135
1171
  //! Buffer for window results
1136
1172
  DataChunk output_chunk;
1137
1173
  };
1138
1174
 
1175
+ void WindowLocalSourceState::MaterializeSortedData() {
1176
+ auto &global_sort_state = *hash_group->global_sort;
1177
+ if (global_sort_state.sorted_blocks.empty()) {
1178
+ return;
1179
+ }
1180
+
1181
+ // scan the sorted row data
1182
+ D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
1183
+ auto &sb = *global_sort_state.sorted_blocks[0];
1184
+
1185
+ // Free up some memory before allocating more
1186
+ sb.radix_sorting_data.clear();
1187
+ sb.blob_sorting_data = nullptr;
1188
+
1189
+ // Move the sorting row blocks into our RDCs
1190
+ auto &buffer_manager = global_sort_state.buffer_manager;
1191
+ auto &sd = *sb.payload_data;
1192
+
1193
+ // Data blocks are required
1194
+ D_ASSERT(!sd.data_blocks.empty());
1195
+ auto &block = sd.data_blocks[0];
1196
+ rows = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
1197
+ rows->blocks = std::move(sd.data_blocks);
1198
+ rows->count = std::accumulate(rows->blocks.begin(), rows->blocks.end(), idx_t(0),
1199
+ [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
1200
+
1201
+ // Heap blocks are optional, but we want both for iteration.
1202
+ if (!sd.heap_blocks.empty()) {
1203
+ auto &block = sd.heap_blocks[0];
1204
+ heap = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
1205
+ heap->blocks = std::move(sd.heap_blocks);
1206
+ hash_group.reset();
1207
+ } else {
1208
+ heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
1209
+ }
1210
+ heap->count = std::accumulate(heap->blocks.begin(), heap->blocks.end(), idx_t(0),
1211
+ [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
1212
+ }
1213
+
1139
1214
  void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
1140
- const auto count = partition_source.GeneratePartition(hash_bin_p);
1141
- if (!count) {
1215
+ // Get rid of any stale data
1216
+ hash_bin = hash_bin_p;
1217
+
1218
+ // There are three types of partitions:
1219
+ // 1. No partition (no sorting)
1220
+ // 2. One partition (sorting, but no hashing)
1221
+ // 3. Multiple partitions (sorting and hashing)
1222
+
1223
+ // How big is the partition?
1224
+ idx_t count = 0;
1225
+ if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
1226
+ count = gsink.hash_groups[hash_bin]->count;
1227
+ } else if (gsink.rows && !hash_bin) {
1228
+ count = gsink.count;
1229
+ } else {
1230
+ return;
1231
+ }
1232
+
1233
+ // Initialise masks to false
1234
+ const auto bit_count = ValidityMask::ValidityMaskSize(count);
1235
+ partition_bits.clear();
1236
+ partition_bits.resize(bit_count, 0);
1237
+ partition_mask.Initialize(partition_bits.data());
1238
+
1239
+ order_bits.clear();
1240
+ order_bits.resize(bit_count, 0);
1241
+ order_mask.Initialize(order_bits.data());
1242
+
1243
+ // Scan the sorted data into new Collections
1244
+ auto external = gsink.external;
1245
+ if (gsink.rows && !hash_bin) {
1246
+ // Simple mask
1247
+ partition_mask.SetValidUnsafe(0);
1248
+ order_mask.SetValidUnsafe(0);
1249
+ // No partition - align the heap blocks with the row blocks
1250
+ rows = gsink.rows->CloneEmpty(gsink.rows->keep_pinned);
1251
+ heap = gsink.strings->CloneEmpty(gsink.strings->keep_pinned);
1252
+ RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gsink.rows, *gsink.strings, layout);
1253
+ external = true;
1254
+ } else if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
1255
+ // Overwrite the collections with the sorted data
1256
+ hash_group = std::move(gsink.hash_groups[hash_bin]);
1257
+ hash_group->ComputeMasks(partition_mask, order_mask);
1258
+ MaterializeSortedData();
1259
+ } else {
1142
1260
  return;
1143
1261
  }
1144
1262
 
1145
1263
  // Create the executors for each function
1146
- auto &partition_mask = partition_source.partition_mask;
1147
1264
  window_execs.clear();
1148
1265
  for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
1149
1266
  D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
@@ -1154,19 +1271,20 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1154
1271
 
1155
1272
  // First pass over the input without flushing
1156
1273
  // TODO: Factor out the constructor data as global state
1274
+ scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
1157
1275
  idx_t input_idx = 0;
1158
1276
  while (true) {
1159
- partition_source.input_chunk.Reset();
1160
- partition_source.scanner->Scan(partition_source.input_chunk);
1161
- if (partition_source.input_chunk.size() == 0) {
1277
+ input_chunk.Reset();
1278
+ scanner->Scan(input_chunk);
1279
+ if (input_chunk.size() == 0) {
1162
1280
  break;
1163
1281
  }
1164
1282
 
1165
1283
  // TODO: Parallelization opportunity
1166
1284
  for (auto &wexec : window_execs) {
1167
- wexec->Sink(partition_source.input_chunk, input_idx, partition_source.scanner->Count());
1285
+ wexec->Sink(input_chunk, input_idx, scanner->Count());
1168
1286
  }
1169
- input_idx += partition_source.input_chunk.size();
1287
+ input_idx += input_chunk.size();
1170
1288
  }
1171
1289
 
1172
1290
  // TODO: Parallelization opportunity
@@ -1175,25 +1293,22 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1175
1293
  }
1176
1294
 
1177
1295
  // External scanning assumes all blocks are swizzled.
1178
- partition_source.scanner->ReSwizzle();
1296
+ scanner->ReSwizzle();
1179
1297
 
1180
1298
  // Second pass can flush
1181
- partition_source.scanner->Reset(true);
1299
+ scanner->Reset(true);
1182
1300
  }
1183
1301
 
1184
1302
  void WindowLocalSourceState::Scan(DataChunk &result) {
1185
- D_ASSERT(partition_source.scanner);
1186
- if (!partition_source.scanner->Remaining()) {
1303
+ D_ASSERT(scanner);
1304
+ if (!scanner->Remaining()) {
1187
1305
  return;
1188
1306
  }
1189
1307
 
1190
- const auto position = partition_source.scanner->Scanned();
1191
- auto &input_chunk = partition_source.input_chunk;
1308
+ const auto position = scanner->Scanned();
1192
1309
  input_chunk.Reset();
1193
- partition_source.scanner->Scan(input_chunk);
1310
+ scanner->Scan(input_chunk);
1194
1311
 
1195
- auto &partition_mask = partition_source.partition_mask;
1196
- auto &order_mask = partition_source.order_mask;
1197
1312
  output_chunk.Reset();
1198
1313
  for (idx_t expr_idx = 0; expr_idx < window_execs.size(); ++expr_idx) {
1199
1314
  auto &executor = *window_execs[expr_idx];
@@ -1227,9 +1342,7 @@ unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext
1227
1342
  void PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
1228
1343
  LocalSourceState &lstate_p) const {
1229
1344
  auto &lsource = lstate_p.Cast<WindowLocalSourceState>();
1230
- auto &lpsource = lsource.partition_source;
1231
1345
  auto &gsource = gstate_p.Cast<WindowGlobalSourceState>();
1232
- auto &gpsource = gsource.partition_source;
1233
1346
  auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
1234
1347
 
1235
1348
  auto &hash_groups = gsink.global_partition->hash_groups;
@@ -1237,17 +1350,17 @@ void PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk, Global
1237
1350
 
1238
1351
  while (chunk.size() == 0) {
1239
1352
  // Move to the next bin if we are done.
1240
- while (!lpsource.scanner || !lpsource.scanner->Remaining()) {
1241
- lpsource.scanner.reset();
1242
- lpsource.rows.reset();
1243
- lpsource.heap.reset();
1244
- lpsource.hash_group.reset();
1245
- auto hash_bin = gpsource.next_bin++;
1353
+ while (!lsource.scanner || !lsource.scanner->Remaining()) {
1354
+ lsource.scanner.reset();
1355
+ lsource.rows.reset();
1356
+ lsource.heap.reset();
1357
+ lsource.hash_group.reset();
1358
+ auto hash_bin = gsource.next_bin++;
1246
1359
  if (hash_bin >= bin_count) {
1247
1360
  return;
1248
1361
  }
1249
1362
 
1250
- for (; hash_bin < hash_groups.size(); hash_bin = gpsource.next_bin++) {
1363
+ for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
1251
1364
  if (hash_groups[hash_bin]) {
1252
1365
  break;
1253
1366
  }