duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
  55. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
  56. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
  57. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
  58. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
  59. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  60. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  61. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  62. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  63. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  64. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  66. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  67. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  68. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  69. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  70. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  71. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  72. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  73. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  74. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  75. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  76. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  77. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  78. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  79. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  80. package/src/duckdb/src/function/table/read_csv.cpp +124 -58
  81. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  82. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  85. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  86. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  87. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  88. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  89. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  90. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  91. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  92. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  93. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  94. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  95. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  98. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  99. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  100. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  101. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
  102. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  104. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  105. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  106. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  107. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  108. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  109. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  110. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  111. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  112. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  113. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  114. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  115. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  116. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  117. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  119. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  120. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  122. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  124. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  128. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  130. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  131. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
  134. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
  135. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
  136. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
  137. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  138. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  140. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  141. package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
  142. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
  143. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
  144. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
  145. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  147. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  148. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  149. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  150. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  151. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  152. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  153. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  154. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  155. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  156. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  157. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  158. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  159. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  160. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  162. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  163. package/src/duckdb/src/main/appender.cpp +6 -6
  164. package/src/duckdb/src/main/client_context.cpp +1 -1
  165. package/src/duckdb/src/main/connection.cpp +2 -2
  166. package/src/duckdb/src/main/query_result.cpp +13 -0
  167. package/src/duckdb/src/main/settings/settings.cpp +3 -4
  168. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  169. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  170. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  171. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  172. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  173. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  174. package/src/duckdb/src/parallel/executor.cpp +1 -1
  175. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  176. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  177. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  178. package/src/duckdb/src/parser/transformer.cpp +50 -9
  179. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
  180. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  181. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  182. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  183. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  184. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  185. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  186. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
  187. package/src/duckdb/src/planner/binder.cpp +16 -19
  188. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  189. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  190. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  191. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  192. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  193. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  194. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  195. package/src/duckdb/ub_src_common.cpp +2 -0
  196. package/src/duckdb/ub_src_common_types.cpp +0 -16
  197. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  198. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -0,0 +1,511 @@
1
+ #include "duckdb/common/types/row/tuple_data_collection.hpp"
2
+
3
+ #include "duckdb/common/fast_mem.hpp"
4
+ #include "duckdb/common/printer.hpp"
5
+ #include "duckdb/common/row_operations/row_operations.hpp"
6
+ #include "duckdb/common/types/row/tuple_data_allocator.hpp"
7
+
8
+ #include <algorithm>
9
+
10
+ namespace duckdb {
11
+
12
+ using ValidityBytes = TupleDataLayout::ValidityBytes;
13
+
14
+ TupleDataCollection::TupleDataCollection(BufferManager &buffer_manager, const TupleDataLayout &layout_p)
15
+ : layout(layout_p.Copy()), allocator(make_shared<TupleDataAllocator>(buffer_manager, layout)) {
16
+ Initialize();
17
+ }
18
+
19
+ TupleDataCollection::TupleDataCollection(shared_ptr<TupleDataAllocator> allocator)
20
+ : layout(allocator->GetLayout().Copy()), allocator(std::move(allocator)) {
21
+ Initialize();
22
+ }
23
+
24
+ TupleDataCollection::~TupleDataCollection() {
25
+ }
26
+
27
+ void TupleDataCollection::Initialize() {
28
+ D_ASSERT(!layout.GetTypes().empty());
29
+ this->count = 0;
30
+ scatter_functions.reserve(layout.ColumnCount());
31
+ gather_functions.reserve(layout.ColumnCount());
32
+ for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
33
+ auto &type = layout.GetTypes()[col_idx];
34
+ scatter_functions.emplace_back(GetScatterFunction(type));
35
+ gather_functions.emplace_back(GetGatherFunction(type));
36
+ }
37
+ }
38
+
39
+ void TupleDataCollection::GetAllColumnIDs(vector<column_t> &column_ids) {
40
+ column_ids.reserve(layout.ColumnCount());
41
+ for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
42
+ column_ids.emplace_back(col_idx);
43
+ }
44
+ }
45
+
46
+ const TupleDataLayout &TupleDataCollection::GetLayout() const {
47
+ return layout;
48
+ }
49
+
50
+ const idx_t &TupleDataCollection::Count() const {
51
+ return count;
52
+ }
53
+
54
+ idx_t TupleDataCollection::ChunkCount() const {
55
+ idx_t total_chunk_count = 0;
56
+ for (const auto &segment : segments) {
57
+ total_chunk_count += segment.ChunkCount();
58
+ }
59
+ return total_chunk_count;
60
+ }
61
+
62
+ idx_t TupleDataCollection::SizeInBytes() const {
63
+ idx_t total_size = 0;
64
+ for (const auto &segment : segments) {
65
+ total_size += segment.SizeInBytes();
66
+ }
67
+ return total_size;
68
+ }
69
+
70
+ void TupleDataCollection::GetBlockPointers(vector<data_ptr_t> &block_pointers) const {
71
+ D_ASSERT(segments.size() == 1);
72
+ const auto &segment = segments[0];
73
+ const auto block_count = segment.allocator->RowBlockCount();
74
+ D_ASSERT(segment.pinned_row_handles.size() == block_count);
75
+ block_pointers.resize(block_count);
76
+ for (idx_t block_idx = 0; block_idx < block_count; block_idx++) {
77
+ block_pointers[block_idx] = segment.pinned_row_handles[block_idx].Ptr();
78
+ }
79
+ }
80
+
81
+ void TupleDataCollection::Unpin() {
82
+ for (auto &segment : segments) {
83
+ segment.Unpin();
84
+ }
85
+ }
86
+
87
+ void VerifyAppendColumns(const TupleDataLayout &layout, const vector<column_t> &column_ids) {
88
+ #ifdef DEBUG
89
+ for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
90
+ if (std::find(column_ids.begin(), column_ids.end(), col_idx) != column_ids.end()) {
91
+ continue;
92
+ }
93
+ // This column will not be appended in the first go - verify that it is fixed-size - we cannot resize heap after
94
+ const auto physical_type = layout.GetTypes()[col_idx].InternalType();
95
+ D_ASSERT(physical_type != PhysicalType::VARCHAR && physical_type != PhysicalType::LIST);
96
+ if (physical_type == PhysicalType::STRUCT) {
97
+ const auto &struct_layout = layout.GetStructLayout(col_idx);
98
+ vector<column_t> struct_column_ids;
99
+ struct_column_ids.reserve(struct_layout.ColumnCount());
100
+ for (idx_t struct_col_idx = 0; struct_col_idx < struct_layout.ColumnCount(); struct_col_idx++) {
101
+ struct_column_ids.emplace_back(struct_col_idx);
102
+ }
103
+ VerifyAppendColumns(struct_layout, struct_column_ids);
104
+ }
105
+ }
106
+ #endif
107
+ }
108
+
109
+ void TupleDataCollection::InitializeAppend(TupleDataAppendState &append_state, TupleDataPinProperties properties) {
110
+ vector<column_t> column_ids;
111
+ GetAllColumnIDs(column_ids);
112
+ InitializeAppend(append_state, std::move(column_ids), properties);
113
+ }
114
+
115
+ void TupleDataCollection::InitializeAppend(TupleDataAppendState &append_state, vector<column_t> column_ids,
116
+ TupleDataPinProperties properties) {
117
+ VerifyAppendColumns(layout, column_ids);
118
+ InitializeAppend(append_state.pin_state, properties);
119
+ InitializeAppend(append_state.chunk_state, std::move(column_ids));
120
+ }
121
+
122
+ void TupleDataCollection::InitializeAppend(TupleDataPinState &pin_state, TupleDataPinProperties properties) {
123
+ pin_state.properties = properties;
124
+ if (segments.empty()) {
125
+ segments.emplace_back(allocator);
126
+ }
127
+ }
128
+
129
+ static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, const vector<LogicalType> &types) {
130
+ vector_data.resize(types.size());
131
+ for (idx_t col_idx = 0; col_idx < types.size(); col_idx++) {
132
+ const auto &type = types[col_idx];
133
+ switch (type.InternalType()) {
134
+ case PhysicalType::STRUCT: {
135
+ const auto &child_list = StructType::GetChildTypes(type);
136
+ vector<LogicalType> child_types;
137
+ child_types.reserve(child_list.size());
138
+ for (const auto &child_entry : child_list) {
139
+ child_types.emplace_back(child_entry.second);
140
+ }
141
+ InitializeVectorFormat(vector_data[col_idx].child_formats, child_types);
142
+ break;
143
+ }
144
+ case PhysicalType::LIST:
145
+ InitializeVectorFormat(vector_data[col_idx].child_formats, {ListType::GetChildType(type)});
146
+ break;
147
+ default:
148
+ break;
149
+ }
150
+ }
151
+ }
152
+
153
+ void TupleDataCollection::InitializeAppend(TupleDataChunkState &chunk_state, vector<column_t> column_ids) {
154
+ if (column_ids.empty()) {
155
+ GetAllColumnIDs(column_ids);
156
+ }
157
+ InitializeVectorFormat(chunk_state.vector_data, layout.GetTypes());
158
+ chunk_state.column_ids = std::move(column_ids);
159
+ }
160
+
161
+ void TupleDataCollection::Append(DataChunk &new_chunk, const SelectionVector &append_sel, idx_t append_count) {
162
+ TupleDataAppendState append_state;
163
+ InitializeAppend(append_state);
164
+ Append(append_state, new_chunk, append_sel, append_count);
165
+ }
166
+
167
+ void TupleDataCollection::Append(DataChunk &new_chunk, vector<column_t> column_ids, const SelectionVector &append_sel,
168
+ const idx_t append_count) {
169
+ TupleDataAppendState append_state;
170
+ InitializeAppend(append_state, std::move(column_ids));
171
+ Append(append_state, new_chunk, append_sel, append_count);
172
+ }
173
+
174
+ void TupleDataCollection::Append(TupleDataAppendState &append_state, DataChunk &new_chunk,
175
+ const SelectionVector &append_sel, const idx_t append_count) {
176
+ Append(append_state.pin_state, append_state.chunk_state, new_chunk, append_sel, append_count);
177
+ }
178
+
179
+ void TupleDataCollection::Append(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, DataChunk &new_chunk,
180
+ const SelectionVector &append_sel, const idx_t append_count) {
181
+ TupleDataCollection::ToUnifiedFormat(chunk_state, new_chunk);
182
+ AppendUnified(pin_state, chunk_state, new_chunk, append_sel, append_count);
183
+ }
184
+
185
+ void TupleDataCollection::AppendUnified(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
186
+ DataChunk &new_chunk, const SelectionVector &append_sel,
187
+ const idx_t append_count) {
188
+ const idx_t actual_append_count = append_count == DConstants::INVALID_INDEX ? new_chunk.size() : append_count;
189
+ if (actual_append_count == 0) {
190
+ return;
191
+ }
192
+
193
+ if (!layout.AllConstant()) {
194
+ TupleDataCollection::ComputeHeapSizes(chunk_state, new_chunk, append_sel, actual_append_count);
195
+ }
196
+
197
+ Build(pin_state, chunk_state, 0, actual_append_count);
198
+
199
+ #ifdef DEBUG
200
+ Vector heap_locations_copy(LogicalType::POINTER);
201
+ if (!layout.AllConstant()) {
202
+ VectorOperations::Copy(chunk_state.heap_locations, heap_locations_copy, actual_append_count, 0, 0);
203
+ }
204
+ #endif
205
+
206
+ Scatter(chunk_state, new_chunk, append_sel, actual_append_count);
207
+
208
+ #ifdef DEBUG
209
+ // Verify that the size of the data written to the heap is the same as the size we computed it would be
210
+ if (!layout.AllConstant()) {
211
+ const auto original_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations_copy);
212
+ const auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
213
+ const auto offset_heap_locations = FlatVector::GetData<data_ptr_t>(chunk_state.heap_locations);
214
+ for (idx_t i = 0; i < actual_append_count; i++) {
215
+ D_ASSERT(offset_heap_locations[i] == original_heap_locations[i] + heap_sizes[i]);
216
+ }
217
+ }
218
+ #endif
219
+ }
220
+
221
+ static inline void ToUnifiedFormatInternal(TupleDataVectorFormat &format, Vector &vector, const idx_t count) {
222
+ vector.ToUnifiedFormat(count, format.data);
223
+ switch (vector.GetType().InternalType()) {
224
+ case PhysicalType::STRUCT: {
225
+ auto &entries = StructVector::GetEntries(vector);
226
+ D_ASSERT(format.child_formats.size() == entries.size());
227
+ for (idx_t struct_col_idx = 0; struct_col_idx < entries.size(); struct_col_idx++) {
228
+ ToUnifiedFormatInternal(format.child_formats[struct_col_idx], *entries[struct_col_idx], count);
229
+ }
230
+ break;
231
+ }
232
+ case PhysicalType::LIST:
233
+ D_ASSERT(format.child_formats.size() == 1);
234
+ ToUnifiedFormatInternal(format.child_formats[0], ListVector::GetEntry(vector), ListVector::GetListSize(vector));
235
+ break;
236
+ default:
237
+ break;
238
+ }
239
+ }
240
+
241
+ void TupleDataCollection::ToUnifiedFormat(TupleDataChunkState &chunk_state, DataChunk &new_chunk) {
242
+ D_ASSERT(chunk_state.vector_data.size() >= chunk_state.column_ids.size()); // Needs InitializeAppend
243
+ for (const auto &col_idx : chunk_state.column_ids) {
244
+ ToUnifiedFormatInternal(chunk_state.vector_data[col_idx], new_chunk.data[col_idx], new_chunk.size());
245
+ }
246
+ }
247
+
248
+ void TupleDataCollection::GetVectorData(const TupleDataChunkState &chunk_state, UnifiedVectorFormat result[]) {
249
+ const auto &vector_data = chunk_state.vector_data;
250
+ for (idx_t i = 0; i < vector_data.size(); i++) {
251
+ const auto &source = vector_data[i].data;
252
+ auto &target = result[i];
253
+ target.sel = source.sel;
254
+ target.data = source.data;
255
+ target.validity = source.validity;
256
+ }
257
+ }
258
+
259
+ void TupleDataCollection::Build(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
260
+ const idx_t append_offset, const idx_t append_count) {
261
+ segments.back().allocator->Build(segments.back(), pin_state, chunk_state, append_offset, append_count);
262
+ count += append_count;
263
+ Verify();
264
+ }
265
+
266
+ void VerifyHeapSizes(const data_ptr_t source_locations[], const idx_t heap_sizes[], const SelectionVector &append_sel,
267
+ const idx_t append_count, const idx_t heap_size_offset) {
268
+ #ifdef DEBUG
269
+ for (idx_t i = 0; i < append_count; i++) {
270
+ auto idx = append_sel.get_index(i);
271
+ const auto stored_heap_size = Load<uint32_t>(source_locations[idx] + heap_size_offset);
272
+ D_ASSERT(stored_heap_size == heap_sizes[idx]);
273
+ }
274
+ #endif
275
+ }
276
+
277
+ void TupleDataCollection::CopyRows(TupleDataChunkState &chunk_state, TupleDataChunkState &input,
278
+ const SelectionVector &append_sel, const idx_t append_count) const {
279
+ const auto source_locations = FlatVector::GetData<data_ptr_t>(input.row_locations);
280
+ const auto target_locations = FlatVector::GetData<data_ptr_t>(chunk_state.row_locations);
281
+
282
+ // Copy rows
283
+ const auto row_width = layout.GetRowWidth();
284
+ for (idx_t i = 0; i < append_count; i++) {
285
+ auto idx = append_sel.get_index(i);
286
+ FastMemcpy(target_locations[i], source_locations[idx], row_width);
287
+ }
288
+
289
+ // Copy heap if we need to
290
+ if (!layout.AllConstant()) {
291
+ const auto source_heap_locations = FlatVector::GetData<data_ptr_t>(input.heap_locations);
292
+ const auto target_heap_locations = FlatVector::GetData<data_ptr_t>(chunk_state.heap_locations);
293
+ const auto heap_sizes = FlatVector::GetData<idx_t>(input.heap_sizes);
294
+ VerifyHeapSizes(source_locations, heap_sizes, append_sel, append_count, layout.GetHeapSizeOffset());
295
+
296
+ // Check if we need to copy anything at all
297
+ idx_t total_heap_size = 0;
298
+ for (idx_t i = 0; i < append_count; i++) {
299
+ auto idx = append_sel.get_index(i);
300
+ total_heap_size += heap_sizes[idx];
301
+ }
302
+ if (total_heap_size == 0) {
303
+ return;
304
+ }
305
+
306
+ // Copy heap
307
+ for (idx_t i = 0; i < append_count; i++) {
308
+ auto idx = append_sel.get_index(i);
309
+ FastMemcpy(target_heap_locations[i], source_heap_locations[idx], heap_sizes[idx]);
310
+ }
311
+
312
+ // Recompute pointers after copying the data
313
+ TupleDataAllocator::RecomputeHeapPointers(input.heap_locations, append_sel, target_locations,
314
+ chunk_state.heap_locations, 0, append_count, layout, 0);
315
+ }
316
+ }
317
+
318
+ void TupleDataCollection::Combine(TupleDataCollection &other) {
319
+ if (other.count == 0) {
320
+ return;
321
+ }
322
+ if (this->layout.GetTypes() != other.GetLayout().GetTypes()) {
323
+ throw InternalException("Attempting to combine TupleDataCollection with mismatching types");
324
+ }
325
+ this->count += other.count;
326
+ this->segments.reserve(this->segments.size() + other.segments.size());
327
+ for (auto &other_seg : other.segments) {
328
+ this->segments.emplace_back(std::move(other_seg));
329
+ }
330
+ other.Reset();
331
+ Verify();
332
+ }
333
+
334
+ void TupleDataCollection::Combine(unique_ptr<TupleDataCollection> other) {
335
+ Combine(*other);
336
+ }
337
+
338
+ void TupleDataCollection::Reset() {
339
+ count = 0;
340
+ segments.clear();
341
+
342
+ // Refreshes the TupleDataAllocator to prevent holding on to allocated data unnecessarily
343
+ allocator = make_shared<TupleDataAllocator>(*allocator);
344
+ }
345
+
346
+ void TupleDataCollection::InitializeChunk(DataChunk &chunk) const {
347
+ chunk.Initialize(allocator->GetAllocator(), layout.GetTypes());
348
+ }
349
+
350
+ void TupleDataCollection::InitializeScanChunk(TupleDataScanState &state, DataChunk &chunk) const {
351
+ auto &column_ids = state.chunk_state.column_ids;
352
+ D_ASSERT(!column_ids.empty());
353
+ vector<LogicalType> chunk_types;
354
+ chunk_types.reserve(column_ids.size());
355
+ for (idx_t i = 0; i < column_ids.size(); i++) {
356
+ auto column_idx = column_ids[i];
357
+ D_ASSERT(column_idx < layout.ColumnCount());
358
+ chunk_types.push_back(layout.GetTypes()[column_idx]);
359
+ }
360
+ chunk.Initialize(allocator->GetAllocator(), chunk_types);
361
+ }
362
+
363
+ void TupleDataCollection::InitializeScan(TupleDataScanState &state, TupleDataPinProperties properties) const {
364
+ vector<column_t> column_ids;
365
+ column_ids.reserve(layout.ColumnCount());
366
+ for (idx_t i = 0; i < layout.ColumnCount(); i++) {
367
+ column_ids.push_back(i);
368
+ }
369
+ InitializeScan(state, std::move(column_ids), properties);
370
+ }
371
+
372
+ void TupleDataCollection::InitializeScan(TupleDataScanState &state, vector<column_t> column_ids,
373
+ TupleDataPinProperties properties) const {
374
+ state.pin_state.row_handles.clear();
375
+ state.pin_state.heap_handles.clear();
376
+ state.pin_state.properties = properties;
377
+ state.segment_index = 0;
378
+ state.chunk_index = 0;
379
+ state.chunk_state.column_ids = std::move(column_ids);
380
+ }
381
+
382
+ void TupleDataCollection::InitializeScan(TupleDataParallelScanState &gstate, TupleDataPinProperties properties) const {
383
+ InitializeScan(gstate.scan_state, properties);
384
+ }
385
+
386
+ void TupleDataCollection::InitializeScan(TupleDataParallelScanState &state, vector<column_t> column_ids,
387
+ TupleDataPinProperties properties) const {
388
+ InitializeScan(state.scan_state, std::move(column_ids), properties);
389
+ }
390
+
391
+ bool TupleDataCollection::Scan(TupleDataScanState &state, DataChunk &result) {
392
+ const auto segment_index_before = state.segment_index;
393
+ idx_t segment_index;
394
+ idx_t chunk_index;
395
+ if (!NextScanIndex(state, segment_index, chunk_index)) {
396
+ return false;
397
+ }
398
+ if (segment_index_before != DConstants::INVALID_INDEX && segment_index != segment_index_before) {
399
+ FinalizePinState(state.pin_state, segments[segment_index_before]);
400
+ }
401
+ ScanAtIndex(state.pin_state, state.chunk_state, state.chunk_state.column_ids, segment_index, chunk_index, result);
402
+ return true;
403
+ }
404
+
405
+ bool TupleDataCollection::Scan(TupleDataParallelScanState &gstate, TupleDataLocalScanState &lstate, DataChunk &result) {
406
+ lstate.pin_state.properties = gstate.scan_state.pin_state.properties;
407
+
408
+ const auto segment_index_before = lstate.segment_index;
409
+ idx_t segment_index;
410
+ idx_t chunk_index;
411
+ {
412
+ lock_guard<mutex> guard(gstate.lock);
413
+ if (!NextScanIndex(gstate.scan_state, segment_index, chunk_index)) {
414
+ return false;
415
+ }
416
+ }
417
+ if (segment_index_before != DConstants::INVALID_INDEX && segment_index_before != segment_index) {
418
+ FinalizePinState(lstate.pin_state, segments[lstate.segment_index]);
419
+ lstate.segment_index = segment_index;
420
+ }
421
+ ScanAtIndex(lstate.pin_state, lstate.chunk_state, gstate.scan_state.chunk_state.column_ids, segment_index,
422
+ chunk_index, result);
423
+ return true;
424
+ }
425
+
426
+ void TupleDataCollection::FinalizePinState(TupleDataPinState &pin_state, TupleDataSegment &segment) {
427
+ segment.allocator->ReleaseOrStoreHandles(pin_state, segment);
428
+ }
429
+
430
+ void TupleDataCollection::FinalizePinState(TupleDataPinState &pin_state) {
431
+ D_ASSERT(segments.size() == 1);
432
+ allocator->ReleaseOrStoreHandles(pin_state, segments.back());
433
+ }
434
+
435
+ bool TupleDataCollection::NextScanIndex(TupleDataScanState &state, idx_t &segment_index, idx_t &chunk_index) {
436
+ // Check if we still have segments to scan
437
+ if (state.segment_index >= segments.size()) {
438
+ // No more data left in the scan
439
+ return false;
440
+ }
441
+ // Check within the current segment if we still have chunks to scan
442
+ while (state.chunk_index >= segments[state.segment_index].ChunkCount()) {
443
+ // Exhausted all chunks for this segment: Move to the next one
444
+ state.segment_index++;
445
+ state.chunk_index = 0;
446
+ if (state.segment_index >= segments.size()) {
447
+ return false;
448
+ }
449
+ }
450
+ segment_index = state.segment_index;
451
+ chunk_index = state.chunk_index++;
452
+ return true;
453
+ }
454
+
455
+ void TupleDataCollection::ScanAtIndex(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
456
+ const vector<column_t> &column_ids, idx_t segment_index, idx_t chunk_index,
457
+ DataChunk &result) {
458
+ auto &segment = segments[segment_index];
459
+ auto &chunk = segment.chunks[chunk_index];
460
+ segment.allocator->InitializeChunkState(segment, pin_state, chunk_state, chunk_index, false);
461
+ result.Reset();
462
+ Gather(chunk_state.row_locations, *FlatVector::IncrementalSelectionVector(), chunk.count, column_ids, result,
463
+ *FlatVector::IncrementalSelectionVector());
464
+ result.SetCardinality(chunk.count);
465
+ }
466
+
467
+ string TupleDataCollection::ToString() {
468
+ DataChunk chunk;
469
+ InitializeChunk(chunk);
470
+
471
+ TupleDataScanState scan_state;
472
+ InitializeScan(scan_state);
473
+
474
+ string result = StringUtil::Format("TupleDataCollection - [%llu Chunks, %llu Rows]\n", ChunkCount(), Count());
475
+ idx_t chunk_idx = 0;
476
+ idx_t row_count = 0;
477
+ while (Scan(scan_state, chunk)) {
478
+ result +=
479
+ StringUtil::Format("Chunk %llu - [Rows %llu - %llu]\n", chunk_idx, row_count, row_count + chunk.size()) +
480
+ chunk.ToString();
481
+ chunk_idx++;
482
+ row_count += chunk.size();
483
+ }
484
+
485
+ return result;
486
+ }
487
+
488
+ void TupleDataCollection::Print() {
489
+ Printer::Print(ToString());
490
+ }
491
+
492
+ void TupleDataCollection::Verify() const {
493
+ #ifdef DEBUG
494
+ idx_t total_segment_count = 0;
495
+ for (const auto &segment : segments) {
496
+ segment.Verify();
497
+ total_segment_count += segment.count;
498
+ }
499
+ D_ASSERT(total_segment_count == this->count);
500
+ #endif
501
+ }
502
+
503
+ void TupleDataCollection::VerifyEverythingPinned() const {
504
+ #ifdef DEBUG
505
+ for (const auto &segment : segments) {
506
+ segment.VerifyEverythingPinned();
507
+ }
508
+ #endif
509
+ }
510
+
511
+ } // namespace duckdb
@@ -0,0 +1,96 @@
1
+ #include "duckdb/common/types/row/tuple_data_iterator.hpp"
2
+
3
+ #include "duckdb/common/types/row/tuple_data_allocator.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ TupleDataChunkIterator::TupleDataChunkIterator(TupleDataCollection &collection_p, TupleDataPinProperties properties_p,
8
+ bool init_heap)
9
+ : TupleDataChunkIterator(collection_p, properties_p, 0, collection_p.ChunkCount(), init_heap) {
10
+ }
11
+
12
+ TupleDataChunkIterator::TupleDataChunkIterator(TupleDataCollection &collection_p, TupleDataPinProperties properties,
13
+ idx_t chunk_idx_from, idx_t chunk_idx_to, bool init_heap_p)
14
+ : collection(collection_p), init_heap(init_heap_p) {
15
+ state.pin_state.properties = properties;
16
+ D_ASSERT(chunk_idx_from < chunk_idx_to);
17
+ D_ASSERT(chunk_idx_to <= collection.ChunkCount());
18
+ idx_t overall_chunk_index = 0;
19
+ for (idx_t segment_idx = 0; segment_idx < collection.segments.size(); segment_idx++) {
20
+ const auto &segment = collection.segments[segment_idx];
21
+ if (chunk_idx_from >= overall_chunk_index && chunk_idx_from <= overall_chunk_index + segment.ChunkCount()) {
22
+ // We start in this segment
23
+ start_segment_idx = segment_idx;
24
+ start_chunk_idx = chunk_idx_from - overall_chunk_index;
25
+ }
26
+ if (chunk_idx_to >= overall_chunk_index && chunk_idx_to <= overall_chunk_index + segment.ChunkCount()) {
27
+ // We end in this segment
28
+ end_segment_idx = segment_idx;
29
+ end_chunk_idx = chunk_idx_to - overall_chunk_index;
30
+ }
31
+ overall_chunk_index += segment.ChunkCount();
32
+ }
33
+
34
+ Reset();
35
+ }
36
+
37
+ void TupleDataChunkIterator::InitializeCurrentChunk() {
38
+ auto &segment = collection.segments[current_segment_idx];
39
+ segment.allocator->InitializeChunkState(segment, state.pin_state, state.chunk_state, current_chunk_idx, init_heap);
40
+ }
41
+
42
+ bool TupleDataChunkIterator::Done() const {
43
+ return current_segment_idx == end_segment_idx && current_chunk_idx == end_chunk_idx;
44
+ }
45
+
46
+ bool TupleDataChunkIterator::Next() {
47
+ D_ASSERT(!Done()); // Check if called after already done
48
+
49
+ // Set the next indices and checks if we're at the end of the collection
50
+ // NextScanIndex can go past this iterators 'end', so we have to check the indices again
51
+ const auto segment_idx_before = current_segment_idx;
52
+ if (!collection.NextScanIndex(state, current_segment_idx, current_chunk_idx) || Done()) {
53
+ // Drop pins / stores them if TupleDataPinProperties::KEEP_EVERYTHING_PINNED
54
+ collection.FinalizePinState(state.pin_state, collection.segments[segment_idx_before]);
55
+ current_segment_idx = end_segment_idx;
56
+ current_chunk_idx = end_chunk_idx;
57
+ return false;
58
+ }
59
+
60
+ // Finalize pin state when moving from one segment to the next
61
+ if (current_segment_idx != segment_idx_before) {
62
+ collection.FinalizePinState(state.pin_state, collection.segments[segment_idx_before]);
63
+ }
64
+
65
+ InitializeCurrentChunk();
66
+ return true;
67
+ }
68
+
69
+ void TupleDataChunkIterator::Reset() {
70
+ state.segment_index = start_segment_idx;
71
+ state.chunk_index = start_chunk_idx;
72
+ collection.NextScanIndex(state, current_segment_idx, current_chunk_idx);
73
+ InitializeCurrentChunk();
74
+ }
75
+
76
+ idx_t TupleDataChunkIterator::GetCurrentChunkCount() const {
77
+ return collection.segments[current_segment_idx].chunks[current_chunk_idx].count;
78
+ }
79
+
80
+ TupleDataChunkState &TupleDataChunkIterator::GetChunkState() {
81
+ return state.chunk_state;
82
+ }
83
+
84
+ data_ptr_t *TupleDataChunkIterator::GetRowLocations() {
85
+ return FlatVector::GetData<data_ptr_t>(state.chunk_state.row_locations);
86
+ }
87
+
88
+ data_ptr_t *TupleDataChunkIterator::GetHeapLocations() {
89
+ return FlatVector::GetData<data_ptr_t>(state.chunk_state.heap_locations);
90
+ }
91
+
92
+ idx_t *TupleDataChunkIterator::GetHeapSizes() {
93
+ return FlatVector::GetData<idx_t>(state.chunk_state.heap_sizes);
94
+ }
95
+
96
+ } // namespace duckdb