duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
  55. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
  56. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
  57. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
  58. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
  59. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  60. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  61. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  62. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  63. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  64. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  66. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  67. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  68. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  69. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  70. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  71. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  72. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  73. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  74. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  75. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  76. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  77. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  78. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  79. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  80. package/src/duckdb/src/function/table/read_csv.cpp +124 -58
  81. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  82. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  85. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  86. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  87. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  88. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  89. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  90. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  91. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  92. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  93. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  94. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  95. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  98. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  99. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  100. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  101. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
  102. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  104. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  105. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  106. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  107. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  108. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  109. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  110. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  111. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  112. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  113. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  114. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  115. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  116. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  117. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  119. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  120. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  122. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  124. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  128. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  130. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  131. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
  134. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
  135. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
  136. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
  137. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  138. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  140. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  141. package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
  142. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
  143. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
  144. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
  145. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  147. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  148. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  149. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  150. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  151. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  152. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  153. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  154. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  155. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  156. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  157. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  158. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  159. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  160. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  162. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  163. package/src/duckdb/src/main/appender.cpp +6 -6
  164. package/src/duckdb/src/main/client_context.cpp +1 -1
  165. package/src/duckdb/src/main/connection.cpp +2 -2
  166. package/src/duckdb/src/main/query_result.cpp +13 -0
  167. package/src/duckdb/src/main/settings/settings.cpp +3 -4
  168. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  169. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  170. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  171. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  172. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  173. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  174. package/src/duckdb/src/parallel/executor.cpp +1 -1
  175. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  176. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  177. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  178. package/src/duckdb/src/parser/transformer.cpp +50 -9
  179. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
  180. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  181. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  182. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  183. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  184. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  185. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  186. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
  187. package/src/duckdb/src/planner/binder.cpp +16 -19
  188. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  189. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  190. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  191. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  192. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  193. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  194. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  195. package/src/duckdb/ub_src_common.cpp +2 -0
  196. package/src/duckdb/ub_src_common_types.cpp +0 -16
  197. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  198. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -0,0 +1,316 @@
1
+ #include "duckdb/common/types/row/partitioned_tuple_data.hpp"
2
+
3
+ #include "duckdb/common/radix_partitioning.hpp"
4
+ #include "duckdb/common/types/row/tuple_data_iterator.hpp"
5
+ #include "duckdb/storage/buffer_manager.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ PartitionedTupleData::PartitionedTupleData(PartitionedTupleDataType type_p, BufferManager &buffer_manager_p,
10
+ const TupleDataLayout &layout_p)
11
+ : type(type_p), buffer_manager(buffer_manager_p), layout(layout_p.Copy()),
12
+ allocators(make_shared<PartitionTupleDataAllocators>()) {
13
+ }
14
+
15
+ PartitionedTupleData::PartitionedTupleData(const PartitionedTupleData &other)
16
+ : type(other.type), buffer_manager(other.buffer_manager), layout(other.layout.Copy()) {
17
+ }
18
+
19
+ unique_ptr<PartitionedTupleData> PartitionedTupleData::CreateShared() {
20
+ switch (type) {
21
+ case PartitionedTupleDataType::RADIX:
22
+ return make_uniq<RadixPartitionedTupleData>((RadixPartitionedTupleData &)*this);
23
+ default:
24
+ throw NotImplementedException("CreateShared for this type of PartitionedTupleData");
25
+ }
26
+ }
27
+
28
+ PartitionedTupleData::~PartitionedTupleData() {
29
+ }
30
+
31
+ PartitionedTupleDataType PartitionedTupleData::GetType() const {
32
+ return type;
33
+ }
34
+
35
+ void PartitionedTupleData::InitializeAppendState(PartitionedTupleDataAppendState &state,
36
+ TupleDataPinProperties properties) const {
37
+ state.partition_sel.Initialize();
38
+
39
+ vector<column_t> column_ids;
40
+ column_ids.reserve(layout.ColumnCount());
41
+ for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
42
+ column_ids.emplace_back(col_idx);
43
+ }
44
+
45
+ InitializeAppendStateInternal(state, properties);
46
+ }
47
+
48
+ void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, DataChunk &input) {
49
+ // Compute partition indices and store them in state.partition_indices
50
+ ComputePartitionIndices(state, input);
51
+
52
+ // Build the selection vector for the partitions
53
+ BuildPartitionSel(state, input.size());
54
+
55
+ // Early out: check if everything belongs to a single partition
56
+ const auto &partition_entries = state.partition_entries;
57
+ if (partition_entries.size() == 1) {
58
+ const auto &partition_index = partition_entries.begin()->first;
59
+ auto &partition = *partitions[partition_index];
60
+ auto &partition_pin_state = *state.partition_pin_states[partition_index];
61
+ partition.Append(partition_pin_state, state.chunk_state, input);
62
+ return;
63
+ }
64
+
65
+ TupleDataCollection::ToUnifiedFormat(state.chunk_state, input);
66
+
67
+ // Compute the heap sizes for the whole chunk
68
+ if (!layout.AllConstant()) {
69
+ TupleDataCollection::ComputeHeapSizes(state.chunk_state, input, state.partition_sel, input.size());
70
+ }
71
+
72
+ // Build the buffer space
73
+ BuildBufferSpace(state);
74
+
75
+ // Now scatter everything in one go
76
+ partitions[0]->Scatter(state.chunk_state, input, state.partition_sel, input.size());
77
+ }
78
+
79
+ void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, TupleDataChunkState &input, idx_t count) {
80
+ // Compute partition indices and store them in state.partition_indices
81
+ ComputePartitionIndices(input.row_locations, count, state.partition_indices);
82
+
83
+ // Build the selection vector for the partitions
84
+ BuildPartitionSel(state, count);
85
+
86
+ // Early out: check if everything belongs to a single partition
87
+ auto &partition_entries = state.partition_entries;
88
+ if (partition_entries.size() == 1) {
89
+ const auto &partition_index = partition_entries.begin()->first;
90
+ auto &partition = *partitions[partition_index];
91
+ auto &partition_pin_state = *state.partition_pin_states[partition_index];
92
+
93
+ state.chunk_state.heap_sizes.Reference(input.heap_sizes);
94
+ partition.Build(partition_pin_state, state.chunk_state, 0, count);
95
+ partition.CopyRows(state.chunk_state, input, *FlatVector::IncrementalSelectionVector(), count);
96
+ return;
97
+ }
98
+
99
+ // Build the buffer space
100
+ state.chunk_state.heap_sizes.Slice(input.heap_sizes, state.partition_sel, count);
101
+ state.chunk_state.heap_sizes.Flatten(count);
102
+ BuildBufferSpace(state);
103
+
104
+ // Copy the rows
105
+ partitions[0]->CopyRows(state.chunk_state, input, state.partition_sel, count);
106
+ }
107
+
108
+ void PartitionedTupleData::BuildPartitionSel(PartitionedTupleDataAppendState &state, idx_t count) {
109
+ const auto partition_indices = FlatVector::GetData<idx_t>(state.partition_indices);
110
+ auto &partition_entries = state.partition_entries;
111
+ auto &partition_entries_arr = state.partition_entries_arr;
112
+ partition_entries.clear();
113
+
114
+ const auto max_partition_index = MaxPartitionIndex();
115
+ const auto use_arr = max_partition_index < PartitionedTupleDataAppendState::MAP_THRESHOLD;
116
+
117
+ switch (state.partition_indices.GetVectorType()) {
118
+ case VectorType::FLAT_VECTOR:
119
+ if (use_arr) {
120
+ std::fill_n(partition_entries_arr, max_partition_index + 1, list_entry_t(0, 0));
121
+ for (idx_t i = 0; i < count; i++) {
122
+ const auto &partition_index = partition_indices[i];
123
+ partition_entries_arr[partition_index].length++;
124
+ }
125
+ } else {
126
+ for (idx_t i = 0; i < count; i++) {
127
+ const auto &partition_index = partition_indices[i];
128
+ auto partition_entry = partition_entries.find(partition_index);
129
+ if (partition_entry == partition_entries.end()) {
130
+ partition_entries.emplace(partition_index, list_entry_t(0, 1));
131
+ } else {
132
+ partition_entry->second.length++;
133
+ }
134
+ }
135
+ }
136
+ break;
137
+ case VectorType::CONSTANT_VECTOR:
138
+ partition_entries[partition_indices[0]] = list_entry_t(0, count);
139
+ break;
140
+ default:
141
+ throw InternalException("Unexpected VectorType in PartitionedTupleData::Append");
142
+ }
143
+
144
+ // Early out: check if everything belongs to a single partition
145
+ if (partition_entries.size() == 1) {
146
+ return;
147
+ }
148
+
149
+ // Compute offsets from the counts
150
+ idx_t offset = 0;
151
+ if (use_arr) {
152
+ for (idx_t partition_index = 0; partition_index <= max_partition_index; partition_index++) {
153
+ auto &partition_entry = partition_entries_arr[partition_index];
154
+ partition_entry.offset = offset;
155
+ offset += partition_entry.length;
156
+ }
157
+ } else {
158
+ for (auto &pc : partition_entries) {
159
+ auto &partition_entry = pc.second;
160
+ partition_entry.offset = offset;
161
+ offset += partition_entry.length;
162
+ }
163
+ }
164
+
165
+ // Now initialize a single selection vector that acts as a selection vector for every partition
166
+ auto &all_partitions_sel = state.partition_sel;
167
+ if (use_arr) {
168
+ for (idx_t i = 0; i < count; i++) {
169
+ const auto &partition_index = partition_indices[i];
170
+ auto &partition_offset = partition_entries_arr[partition_index].offset;
171
+ all_partitions_sel[partition_offset++] = i;
172
+ }
173
+ // Now just add it to the map anyway so the rest of the functionality is shared
174
+ for (idx_t partition_index = 0; partition_index <= max_partition_index; partition_index++) {
175
+ const auto &partition_entry = partition_entries_arr[partition_index];
176
+ if (partition_entry.length != 0) {
177
+ partition_entries.emplace(partition_index, partition_entry);
178
+ }
179
+ }
180
+ } else {
181
+ for (idx_t i = 0; i < count; i++) {
182
+ const auto &partition_index = partition_indices[i];
183
+ auto &partition_offset = partition_entries[partition_index].offset;
184
+ all_partitions_sel[partition_offset++] = i;
185
+ }
186
+ }
187
+ }
188
+
189
+ void PartitionedTupleData::BuildBufferSpace(PartitionedTupleDataAppendState &state) {
190
+ for (auto &pc : state.partition_entries) {
191
+ const auto &partition_index = pc.first;
192
+
193
+ // Partition, pin state for this partition index
194
+ auto &partition = *partitions[partition_index];
195
+ auto &partition_pin_state = *state.partition_pin_states[partition_index];
196
+
197
+ // Length and offset for this partition
198
+ const auto &partition_entry = pc.second;
199
+ const auto &partition_length = partition_entry.length;
200
+ const auto partition_offset = partition_entry.offset - partition_length;
201
+
202
+ // Build out the buffer space for this partition
203
+ partition.Build(partition_pin_state, state.chunk_state, partition_offset, partition_length);
204
+ }
205
+ }
206
+
207
+ void PartitionedTupleData::FlushAppendState(PartitionedTupleDataAppendState &state) {
208
+ for (idx_t partition_index = 0; partition_index < partitions.size(); partition_index++) {
209
+ auto &partition = *partitions[partition_index];
210
+ auto &partition_pin_state = *state.partition_pin_states[partition_index];
211
+ partition.FinalizePinState(partition_pin_state);
212
+ }
213
+ }
214
+
215
+ void PartitionedTupleData::Combine(PartitionedTupleData &other) {
216
+ if (other.Count() == 0) {
217
+ return;
218
+ }
219
+
220
+ // Now combine the state's partitions into this
221
+ lock_guard<mutex> guard(lock);
222
+
223
+ if (partitions.empty()) {
224
+ // This is the first merge, we just copy them over
225
+ partitions = std::move(other.partitions);
226
+ } else {
227
+ D_ASSERT(partitions.size() == other.partitions.size());
228
+ // Combine the append state's partitions into this PartitionedTupleData
229
+ for (idx_t i = 0; i < other.partitions.size(); i++) {
230
+ partitions[i]->Combine(*other.partitions[i]);
231
+ }
232
+ }
233
+ }
234
+
235
+ void PartitionedTupleData::Partition(TupleDataCollection &source, TupleDataPinProperties properties) {
236
+ #ifdef DEBUG
237
+ const auto count_before = source.Count();
238
+ #endif
239
+
240
+ PartitionedTupleDataAppendState append_state;
241
+ InitializeAppendState(append_state, properties);
242
+
243
+ TupleDataChunkIterator iterator(source, TupleDataPinProperties::DESTROY_AFTER_DONE, true);
244
+ auto &chunk_state = iterator.GetChunkState();
245
+ do {
246
+ Append(append_state, chunk_state, iterator.GetCurrentChunkCount());
247
+ } while (iterator.Next());
248
+
249
+ FlushAppendState(append_state);
250
+ source.Reset();
251
+
252
+ #ifdef DEBUG
253
+ idx_t count_after = 0;
254
+ for (const auto &partition : partitions) {
255
+ count_after += partition->Count();
256
+ }
257
+ D_ASSERT(count_before == count_after);
258
+ #endif
259
+ }
260
+
261
+ void PartitionedTupleData::Repartition(PartitionedTupleData &new_partitioned_data) {
262
+ D_ASSERT(layout.GetTypes() == new_partitioned_data.layout.GetTypes());
263
+
264
+ PartitionedTupleDataAppendState append_state;
265
+ new_partitioned_data.InitializeAppendState(append_state);
266
+
267
+ const auto reverse = RepartitionReverseOrder();
268
+ const idx_t start_idx = reverse ? partitions.size() : 0;
269
+ const idx_t end_idx = reverse ? 0 : partitions.size();
270
+ const int64_t update = reverse ? -1 : 1;
271
+ const int64_t adjustment = reverse ? -1 : 0;
272
+
273
+ for (idx_t partition_idx = start_idx; partition_idx != end_idx; partition_idx += update) {
274
+ auto actual_partition_idx = partition_idx + adjustment;
275
+ auto &partition = *partitions[actual_partition_idx];
276
+
277
+ if (partition.Count() > 0) {
278
+ TupleDataChunkIterator iterator(partition, TupleDataPinProperties::DESTROY_AFTER_DONE, true);
279
+ auto &chunk_state = iterator.GetChunkState();
280
+ do {
281
+ new_partitioned_data.Append(append_state, chunk_state, iterator.GetCurrentChunkCount());
282
+ } while (iterator.Next());
283
+
284
+ RepartitionFinalizeStates(*this, new_partitioned_data, append_state, actual_partition_idx);
285
+ }
286
+ partitions[actual_partition_idx]->Reset();
287
+ }
288
+
289
+ new_partitioned_data.FlushAppendState(append_state);
290
+ }
291
+
292
+ vector<unique_ptr<TupleDataCollection>> &PartitionedTupleData::GetPartitions() {
293
+ return partitions;
294
+ }
295
+
296
+ idx_t PartitionedTupleData::Count() const {
297
+ idx_t total_count = 0;
298
+ for (auto &partition : partitions) {
299
+ total_count += partition->Count();
300
+ }
301
+ return total_count;
302
+ }
303
+
304
+ idx_t PartitionedTupleData::SizeInBytes() const {
305
+ idx_t total_size = 0;
306
+ for (auto &partition : partitions) {
307
+ total_size += partition->SizeInBytes();
308
+ }
309
+ return total_size;
310
+ }
311
+
312
+ void PartitionedTupleData::CreateAllocator() {
313
+ allocators->allocators.emplace_back(make_shared<TupleDataAllocator>(buffer_manager, layout));
314
+ }
315
+
316
+ } // namespace duckdb
@@ -1,4 +1,4 @@
1
- #include "duckdb/common/types/row_data_collection.hpp"
1
+ #include "duckdb/common/types/row/row_data_collection.hpp"
2
2
 
3
3
  namespace duckdb {
4
4
 
@@ -1,7 +1,7 @@
1
- #include "duckdb/common/types/row_data_collection_scanner.hpp"
1
+ #include "duckdb/common/types/row/row_data_collection_scanner.hpp"
2
2
 
3
3
  #include "duckdb/common/row_operations/row_operations.hpp"
4
- #include "duckdb/common/types/row_data_collection.hpp"
4
+ #include "duckdb/common/types/row/row_data_collection.hpp"
5
5
  #include "duckdb/storage/buffer_manager.hpp"
6
6
 
7
7
  namespace duckdb {
@@ -6,7 +6,7 @@
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
8
8
 
9
- #include "duckdb/common/types/row_layout.hpp"
9
+ #include "duckdb/common/types/row/row_layout.hpp"
10
10
 
11
11
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
12
12