duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
  55. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
  56. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
  57. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
  58. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
  59. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  60. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  61. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  62. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  63. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  64. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  66. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  67. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  68. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  69. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  70. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  71. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  72. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  73. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  74. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  75. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  76. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  77. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  78. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  79. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  80. package/src/duckdb/src/function/table/read_csv.cpp +124 -58
  81. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  82. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  85. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  86. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  87. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  88. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  89. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  90. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  91. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  92. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  93. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  94. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  95. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  98. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  99. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  100. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  101. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
  102. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  104. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  105. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  106. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  107. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  108. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  109. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  110. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  111. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  112. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  113. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  114. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  115. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  116. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  117. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  119. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  120. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  122. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  124. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  128. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  130. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  131. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
  134. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
  135. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
  136. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
  137. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  138. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  140. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  141. package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
  142. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
  143. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
  144. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
  145. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  147. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  148. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  149. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  150. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  151. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  152. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  153. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  154. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  155. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  156. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  157. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  158. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  159. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  160. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  162. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  163. package/src/duckdb/src/main/appender.cpp +6 -6
  164. package/src/duckdb/src/main/client_context.cpp +1 -1
  165. package/src/duckdb/src/main/connection.cpp +2 -2
  166. package/src/duckdb/src/main/query_result.cpp +13 -0
  167. package/src/duckdb/src/main/settings/settings.cpp +3 -4
  168. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  169. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  170. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  171. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  172. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  173. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  174. package/src/duckdb/src/parallel/executor.cpp +1 -1
  175. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  176. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  177. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  178. package/src/duckdb/src/parser/transformer.cpp +50 -9
  179. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
  180. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  181. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  182. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  183. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  184. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  185. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  186. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
  187. package/src/duckdb/src/planner/binder.cpp +16 -19
  188. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  189. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  190. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  191. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  192. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  193. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  194. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  195. package/src/duckdb/ub_src_common.cpp +2 -0
  196. package/src/duckdb/ub_src_common_types.cpp +0 -16
  197. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  198. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -0,0 +1,1200 @@
1
+ #include "duckdb/common/fast_mem.hpp"
2
+ #include "duckdb/common/types/null_value.hpp"
3
+ #include "duckdb/common/types/row/tuple_data_collection.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ using ValidityBytes = TupleDataLayout::ValidityBytes;
8
+
9
+ template <class T>
10
+ static constexpr idx_t TupleDataWithinListFixedSize() {
11
+ return sizeof(T);
12
+ }
13
+
14
+ template <>
15
+ constexpr idx_t TupleDataWithinListFixedSize<string_t>() {
16
+ return sizeof(uint32_t);
17
+ }
18
+
19
+ template <class T>
20
+ static inline void TupleDataValueStore(const T &source, const data_ptr_t &row_location, const idx_t offset_in_row,
21
+ data_ptr_t &heap_location) {
22
+ Store<T>(source, row_location + offset_in_row);
23
+ }
24
+
25
+ template <>
26
+ inline void TupleDataValueStore(const string_t &source, const data_ptr_t &row_location, const idx_t offset_in_row,
27
+ data_ptr_t &heap_location) {
28
+ if (source.IsInlined()) {
29
+ Store<string_t>(source, row_location + offset_in_row);
30
+ } else {
31
+ memcpy(heap_location, source.GetDataUnsafe(), source.GetSize());
32
+ Store<string_t>(string_t((const char *)heap_location, source.GetSize()), row_location + offset_in_row);
33
+ heap_location += source.GetSize();
34
+ }
35
+ }
36
+
37
+ template <class T>
38
+ static inline void TupleDataWithinListValueStore(const T &source, const data_ptr_t &location,
39
+ data_ptr_t &heap_location) {
40
+ Store<T>(source, location);
41
+ }
42
+
43
+ template <>
44
+ inline void TupleDataWithinListValueStore(const string_t &source, const data_ptr_t &location,
45
+ data_ptr_t &heap_location) {
46
+ Store<uint32_t>(source.GetSize(), location);
47
+ memcpy(heap_location, source.GetDataUnsafe(), source.GetSize());
48
+ heap_location += source.GetSize();
49
+ }
50
+
51
+ template <class T>
52
+ static inline T TupleDataWithinListValueLoad(const data_ptr_t &location, data_ptr_t &heap_location) {
53
+ return Load<T>(location);
54
+ }
55
+
56
+ template <>
57
+ inline string_t TupleDataWithinListValueLoad(const data_ptr_t &location, data_ptr_t &heap_location) {
58
+ const auto size = Load<uint32_t>(location);
59
+ string_t result((const char *)heap_location, size);
60
+ heap_location += size;
61
+ return result;
62
+ }
63
+
64
+ void TupleDataCollection::ComputeHeapSizes(TupleDataChunkState &chunk_state, const DataChunk &new_chunk,
65
+ const SelectionVector &append_sel, const idx_t append_count) {
66
+ auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
67
+ std::fill_n(heap_sizes, new_chunk.size(), 0);
68
+
69
+ for (idx_t col_idx = 0; col_idx < new_chunk.ColumnCount(); col_idx++) {
70
+ auto &source_v = new_chunk.data[col_idx];
71
+ auto &source_format = chunk_state.vector_data[col_idx];
72
+ TupleDataCollection::ComputeHeapSizes(chunk_state.heap_sizes, source_v, source_format, append_sel,
73
+ append_count);
74
+ }
75
+ }
76
+
77
+ static inline idx_t StringHeapSize(const string_t &val) {
78
+ return val.IsInlined() ? 0 : val.GetSize();
79
+ }
80
+
81
+ void TupleDataCollection::ComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
82
+ TupleDataVectorFormat &source_format, const SelectionVector &append_sel,
83
+ const idx_t append_count) {
84
+ const auto type = source_v.GetType().InternalType();
85
+ if (type != PhysicalType::VARCHAR && type != PhysicalType::STRUCT && type != PhysicalType::LIST) {
86
+ return;
87
+ }
88
+
89
+ auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
90
+
91
+ const auto &source_vector_data = source_format.data;
92
+ const auto &source_sel = *source_vector_data.sel;
93
+ const auto &source_validity = source_vector_data.validity;
94
+
95
+ switch (type) {
96
+ case PhysicalType::VARCHAR: {
97
+ // Only non-inlined strings are stored in the heap
98
+ const auto source_data = (string_t *)source_vector_data.data;
99
+ for (idx_t i = 0; i < append_count; i++) {
100
+ const auto source_idx = source_sel.get_index(append_sel.get_index(i));
101
+ if (source_validity.RowIsValid(source_idx)) {
102
+ heap_sizes[i] += StringHeapSize(source_data[source_idx]);
103
+ } else {
104
+ heap_sizes[i] += StringHeapSize(NullValue<string_t>());
105
+ }
106
+ }
107
+ break;
108
+ }
109
+ case PhysicalType::STRUCT: {
110
+ // Recurse through the struct children
111
+ auto &struct_sources = StructVector::GetEntries(source_v);
112
+ for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
113
+ const auto &struct_source = struct_sources[struct_col_idx];
114
+ auto &struct_format = source_format.child_formats[struct_col_idx];
115
+ TupleDataCollection::ComputeHeapSizes(heap_sizes_v, *struct_source, struct_format, append_sel,
116
+ append_count);
117
+ }
118
+ break;
119
+ }
120
+ case PhysicalType::LIST: {
121
+ // Lists are stored entirely in the heap
122
+ for (idx_t i = 0; i < append_count; i++) {
123
+ auto source_idx = source_sel.get_index(append_sel.get_index(i));
124
+ if (source_validity.RowIsValid(source_idx)) {
125
+ heap_sizes[i] += sizeof(uint64_t); // Size of the list
126
+ }
127
+ }
128
+
129
+ // Recurse
130
+ D_ASSERT(source_format.child_formats.size() == 1);
131
+ auto &child_source_v = ListVector::GetEntry(source_v);
132
+ auto &child_format = source_format.child_formats[0];
133
+ TupleDataCollection::WithinListHeapComputeSizes(heap_sizes_v, child_source_v, child_format, append_sel,
134
+ append_count, source_vector_data);
135
+ break;
136
+ }
137
+ default:
138
+ throw NotImplementedException("ComputeHeapSizes for %s", LogicalTypeIdToString(source_v.GetType().id()));
139
+ }
140
+ }
141
+
142
+ void TupleDataCollection::WithinListHeapComputeSizes(Vector &heap_sizes_v, const Vector &source_v,
143
+ TupleDataVectorFormat &source_format,
144
+ const SelectionVector &append_sel, const idx_t append_count,
145
+ const UnifiedVectorFormat &list_data) {
146
+ auto type = source_v.GetType().InternalType();
147
+ if (TypeIsConstantSize(type)) {
148
+ TupleDataCollection::ComputeFixedWithinListHeapSizes(heap_sizes_v, source_v, source_format, append_sel,
149
+ append_count, list_data);
150
+ return;
151
+ }
152
+
153
+ switch (type) {
154
+ case PhysicalType::VARCHAR:
155
+ TupleDataCollection::StringWithinListComputeHeapSizes(heap_sizes_v, source_v, source_format, append_sel,
156
+ append_count, list_data);
157
+ break;
158
+ case PhysicalType::STRUCT:
159
+ TupleDataCollection::StructWithinListComputeHeapSizes(heap_sizes_v, source_v, source_format, append_sel,
160
+ append_count, list_data);
161
+ break;
162
+ case PhysicalType::LIST:
163
+ TupleDataCollection::ListWithinListComputeHeapSizes(heap_sizes_v, source_v, source_format, append_sel,
164
+ append_count, list_data);
165
+ break;
166
+ default:
167
+ throw NotImplementedException("WithinListHeapComputeSizes for %s",
168
+ LogicalTypeIdToString(source_v.GetType().id()));
169
+ }
170
+ }
171
+
172
+ void TupleDataCollection::ComputeFixedWithinListHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
173
+ TupleDataVectorFormat &source_format,
174
+ const SelectionVector &append_sel, const idx_t append_count,
175
+ const UnifiedVectorFormat &list_data) {
176
+ // List data
177
+ const auto list_sel = *list_data.sel;
178
+ const auto list_entries = (list_entry_t *)list_data.data;
179
+ const auto &list_validity = list_data.validity;
180
+
181
+ // Target
182
+ auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
183
+
184
+ D_ASSERT(TypeIsConstantSize(source_v.GetType().InternalType()));
185
+ const auto type_size = GetTypeIdSize(source_v.GetType().InternalType());
186
+ for (idx_t i = 0; i < append_count; i++) {
187
+ const auto list_idx = list_sel.get_index(append_sel.get_index(i));
188
+ if (!list_validity.RowIsValid(list_idx)) {
189
+ continue; // Original list entry is invalid - no need to serialize the child
190
+ }
191
+
192
+ // Get the current list length
193
+ const auto &list_length = list_entries[list_idx].length;
194
+
195
+ // Size is validity mask and all values
196
+ auto &heap_size = heap_sizes[i];
197
+ heap_size += ValidityBytes::SizeInBytes(list_length);
198
+ heap_size += list_length * type_size;
199
+ }
200
+ }
201
+
202
+ void TupleDataCollection::StringWithinListComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
203
+ TupleDataVectorFormat &source_format,
204
+ const SelectionVector &append_sel, const idx_t append_count,
205
+ const UnifiedVectorFormat &list_data) {
206
+ // Source
207
+ const auto &source_data = source_format.data;
208
+ const auto source_sel = *source_data.sel;
209
+ const auto data = (string_t *)source_data.data;
210
+ const auto &source_validity = source_data.validity;
211
+
212
+ // List data
213
+ const auto list_sel = *list_data.sel;
214
+ const auto list_entries = (list_entry_t *)list_data.data;
215
+ const auto &list_validity = list_data.validity;
216
+
217
+ // Target
218
+ auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
219
+
220
+ for (idx_t i = 0; i < append_count; i++) {
221
+ const auto list_idx = list_sel.get_index(append_sel.get_index(i));
222
+ if (!list_validity.RowIsValid(list_idx)) {
223
+ continue; // Original list entry is invalid - no need to serialize the child
224
+ }
225
+
226
+ // Get the current list entry
227
+ const auto &list_entry = list_entries[list_idx];
228
+ const auto &list_offset = list_entry.offset;
229
+ const auto &list_length = list_entry.length;
230
+
231
+ // Size is validity mask and all string sizes
232
+ auto &heap_size = heap_sizes[i];
233
+ heap_size += ValidityBytes::SizeInBytes(list_length);
234
+ heap_size += list_length * TupleDataWithinListFixedSize<string_t>();
235
+
236
+ // Plus all the actual strings
237
+ for (idx_t child_i = 0; child_i < list_length; child_i++) {
238
+ const auto child_source_idx = source_sel.get_index(list_offset + child_i);
239
+ if (source_validity.RowIsValid(child_source_idx)) {
240
+ heap_size += data[child_source_idx].GetSize();
241
+ }
242
+ }
243
+ }
244
+ }
245
+
246
+ void TupleDataCollection::StructWithinListComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
247
+ TupleDataVectorFormat &source_format,
248
+ const SelectionVector &append_sel, const idx_t append_count,
249
+ const UnifiedVectorFormat &list_data) {
250
+ // List data
251
+ const auto list_sel = *list_data.sel;
252
+ const auto list_entries = (list_entry_t *)list_data.data;
253
+ const auto &list_validity = list_data.validity;
254
+
255
+ // Target
256
+ auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
257
+
258
+ for (idx_t i = 0; i < append_count; i++) {
259
+ const auto list_idx = list_sel.get_index(append_sel.get_index(i));
260
+ if (!list_validity.RowIsValid(list_idx)) {
261
+ continue; // Original list entry is invalid - no need to serialize the child
262
+ }
263
+
264
+ // Get the current list length
265
+ const auto &list_length = list_entries[list_idx].length;
266
+
267
+ // Size is just the validity mask
268
+ heap_sizes[i] += ValidityBytes::SizeInBytes(list_length);
269
+ }
270
+
271
+ // Recurse
272
+ auto &struct_sources = StructVector::GetEntries(source_v);
273
+ for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
274
+ auto &struct_source = *struct_sources[struct_col_idx];
275
+ auto &struct_format = source_format.child_formats[struct_col_idx];
276
+ TupleDataCollection::WithinListHeapComputeSizes(heap_sizes_v, struct_source, struct_format, append_sel,
277
+ append_count, list_data);
278
+ }
279
+ }
280
+
281
+ static void ApplySliceRecursive(const Vector &source_v, TupleDataVectorFormat &source_format,
282
+ const SelectionVector &combined_sel, const idx_t count) {
283
+ D_ASSERT(source_format.combined_list_data);
284
+ auto &combined_list_data = *source_format.combined_list_data;
285
+
286
+ combined_list_data.selection_data = source_format.data.sel->Slice(combined_sel, count);
287
+ source_format.data.owned_sel.Initialize(combined_list_data.selection_data);
288
+ source_format.data.sel = &source_format.data.owned_sel;
289
+
290
+ if (source_v.GetType().InternalType() == PhysicalType::STRUCT) {
291
+ // We have to apply it to the child vectors too
292
+ auto &struct_sources = StructVector::GetEntries(source_v);
293
+ for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
294
+ auto &struct_source = *struct_sources[struct_col_idx];
295
+ auto &struct_format = source_format.child_formats[struct_col_idx];
296
+ struct_format.combined_list_data = make_uniq<CombinedListData>();
297
+ ApplySliceRecursive(struct_source, struct_format, *source_format.data.sel, count);
298
+ }
299
+ }
300
+ }
301
+
302
+ void TupleDataCollection::ListWithinListComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
303
+ TupleDataVectorFormat &source_format,
304
+ const SelectionVector &append_sel, const idx_t append_count,
305
+ const UnifiedVectorFormat &list_data) {
306
+ // List data (of the list Vector that "source_v" is in)
307
+ const auto list_sel = *list_data.sel;
308
+ const auto list_entries = (list_entry_t *)list_data.data;
309
+ const auto &list_validity = list_data.validity;
310
+
311
+ // Child list
312
+ const auto &child_list_data = source_format.data;
313
+ const auto child_list_sel = *child_list_data.sel;
314
+ const auto child_list_entries = (list_entry_t *)child_list_data.data;
315
+ const auto &child_list_validity = child_list_data.validity;
316
+
317
+ // Target
318
+ auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
319
+
320
+ // Construct combined list entries and a selection vector for the child list child
321
+ auto &child_format = source_format.child_formats[0];
322
+ child_format.combined_list_data = make_uniq<CombinedListData>();
323
+ auto &combined_list_data = *child_format.combined_list_data;
324
+ auto &combined_list_entries = combined_list_data.combined_list_entries;
325
+ const auto child_list_child_count = ListVector::GetListSize(source_v);
326
+ SelectionVector combined_sel(child_list_child_count);
327
+ for (idx_t i = 0; i < child_list_child_count; i++) {
328
+ combined_sel.set_index(i, 0);
329
+ }
330
+ idx_t combined_list_offset = 0;
331
+
332
+ for (idx_t i = 0; i < append_count; i++) {
333
+ const auto list_idx = list_sel.get_index(append_sel.get_index(i));
334
+ if (!list_validity.RowIsValid(list_idx)) {
335
+ continue; // Original list entry is invalid - no need to serialize the child list
336
+ }
337
+
338
+ // Get the current list entry
339
+ const auto &list_entry = list_entries[list_idx];
340
+ const auto &list_offset = list_entry.offset;
341
+ const auto &list_length = list_entry.length;
342
+
343
+ // Size is the validity mask and the list sizes
344
+ auto &heap_size = heap_sizes[i];
345
+ heap_size += ValidityBytes::SizeInBytes(list_length);
346
+ heap_size += list_length * sizeof(uint64_t);
347
+
348
+ idx_t child_list_size = 0;
349
+ for (idx_t child_i = 0; child_i < list_length; child_i++) {
350
+ const auto child_list_idx = child_list_sel.get_index(list_offset + child_i);
351
+ const auto &child_list_entry = child_list_entries[child_list_idx];
352
+ if (child_list_validity.RowIsValid(child_list_idx)) {
353
+ const auto &child_list_offset = child_list_entry.offset;
354
+ const auto &child_list_length = child_list_entry.length;
355
+
356
+ // Add this child's list entry's to the combined selection vector
357
+ for (idx_t child_value_i = 0; child_value_i < child_list_length; child_value_i++) {
358
+ auto idx = combined_list_offset + child_list_size + child_value_i;
359
+ auto loc = child_list_offset + child_value_i;
360
+ combined_sel.set_index(idx, loc);
361
+ }
362
+
363
+ child_list_size += child_list_length;
364
+ }
365
+ }
366
+
367
+ // Combine the child list entries into one
368
+ combined_list_entries[list_idx] = {combined_list_offset, child_list_size};
369
+ combined_list_offset += child_list_size;
370
+ }
371
+
372
+ // Create a combined child_list_data to be used as list_data in the recursion
373
+ auto &combined_child_list_data = combined_list_data.combined_data;
374
+ combined_child_list_data.sel = list_data.sel;
375
+ combined_child_list_data.data = (data_ptr_t)combined_list_entries;
376
+ combined_child_list_data.validity = list_data.validity;
377
+
378
+ // Combine the selection vectors
379
+ D_ASSERT(source_format.child_formats.size() == 1);
380
+ auto &child_source = ListVector::GetEntry(source_v);
381
+ ApplySliceRecursive(child_source, child_format, combined_sel, child_list_child_count);
382
+
383
+ // Recurse
384
+ TupleDataCollection::WithinListHeapComputeSizes(heap_sizes_v, child_source, child_format, append_sel, append_count,
385
+ combined_child_list_data);
386
+ }
387
+
388
+ void TupleDataCollection::Scatter(TupleDataChunkState &chunk_state, const DataChunk &new_chunk,
389
+ const SelectionVector &append_sel, const idx_t append_count) const {
390
+ const auto row_locations = FlatVector::GetData<data_ptr_t>(chunk_state.row_locations);
391
+
392
+ // Set the validity mask for each row before inserting data
393
+ const auto validity_bytes = ValidityBytes::SizeInBytes(layout.ColumnCount());
394
+ for (idx_t i = 0; i < append_count; i++) {
395
+ FastMemset(row_locations[i], ~0, validity_bytes);
396
+ }
397
+
398
+ if (!layout.AllConstant()) {
399
+ // Set the heap size for each row
400
+ const auto heap_size_offset = layout.GetHeapSizeOffset();
401
+ const auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
402
+ for (idx_t i = 0; i < append_count; i++) {
403
+ Store<uint32_t>(heap_sizes[i], row_locations[i] + heap_size_offset);
404
+ }
405
+ }
406
+
407
+ // Write the data
408
+ for (const auto &col_idx : chunk_state.column_ids) {
409
+ Scatter(chunk_state, new_chunk.data[col_idx], col_idx, append_sel, append_count);
410
+ }
411
+ }
412
+
413
+ void TupleDataCollection::Scatter(TupleDataChunkState &chunk_state, const Vector &source, const column_t column_id,
414
+ const SelectionVector &append_sel, const idx_t append_count) const {
415
+ const auto &scatter_function = scatter_functions[column_id];
416
+ scatter_function.function(source, chunk_state.vector_data[column_id], append_sel, append_count, layout,
417
+ chunk_state.row_locations, chunk_state.heap_locations, column_id,
418
+ chunk_state.vector_data[column_id].data, scatter_function.child_functions);
419
+ }
420
+
421
+ template <class T>
422
+ static void TupleDataTemplatedScatter(const Vector &source, const TupleDataVectorFormat &source_format,
423
+ const SelectionVector &append_sel, const idx_t append_count,
424
+ const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
425
+ const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
426
+ const vector<TupleDataScatterFunction> &child_functions) {
427
+ // Source
428
+ const auto &source_data = source_format.data;
429
+ const auto source_sel = *source_data.sel;
430
+ const auto data = (T *)source_data.data;
431
+ const auto &validity = source_data.validity;
432
+
433
+ // Target
434
+ auto target_locations = FlatVector::GetData<data_ptr_t>(row_locations);
435
+ auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
436
+
437
+ // Precompute mask indexes
438
+ idx_t entry_idx;
439
+ idx_t idx_in_entry;
440
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
441
+
442
+ const auto offset_in_row = layout.GetOffsets()[col_idx];
443
+ if (validity.AllValid()) {
444
+ for (idx_t i = 0; i < append_count; i++) {
445
+ const auto source_idx = source_sel.get_index(append_sel.get_index(i));
446
+ TupleDataValueStore<T>(data[source_idx], target_locations[i], offset_in_row, target_heap_locations[i]);
447
+ }
448
+ } else {
449
+ for (idx_t i = 0; i < append_count; i++) {
450
+ const auto source_idx = source_sel.get_index(append_sel.get_index(i));
451
+ if (validity.RowIsValid(source_idx)) {
452
+ TupleDataValueStore<T>(data[source_idx], target_locations[i], offset_in_row, target_heap_locations[i]);
453
+ } else {
454
+ TupleDataValueStore<T>(NullValue<T>(), target_locations[i], offset_in_row, target_heap_locations[i]);
455
+ ValidityBytes(target_locations[i]).SetInvalidUnsafe(entry_idx, idx_in_entry);
456
+ }
457
+ }
458
+ }
459
+ }
460
+
461
+ static void TupleDataStructScatter(const Vector &source, const TupleDataVectorFormat &source_format,
462
+ const SelectionVector &append_sel, const idx_t append_count,
463
+ const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
464
+ const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
465
+ const vector<TupleDataScatterFunction> &child_functions) {
466
+ // Source
467
+ const auto &source_data = source_format.data;
468
+ const auto source_sel = *source_data.sel;
469
+ const auto &validity = source_data.validity;
470
+
471
+ // Target
472
+ auto target_locations = FlatVector::GetData<data_ptr_t>(row_locations);
473
+
474
+ // Precompute mask indexes
475
+ idx_t entry_idx;
476
+ idx_t idx_in_entry;
477
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
478
+
479
+ // Set validity of the STRUCT in this layout
480
+ if (!validity.AllValid()) {
481
+ for (idx_t i = 0; i < append_count; i++) {
482
+ const auto source_idx = source_sel.get_index(append_sel.get_index(i));
483
+ if (!validity.RowIsValid(source_idx)) {
484
+ ValidityBytes(target_locations[i]).SetInvalidUnsafe(entry_idx, idx_in_entry);
485
+ }
486
+ }
487
+ }
488
+
489
+ // Create a Vector of pointers to the TupleDataLayout of the STRUCT
490
+ Vector struct_row_locations(LogicalType::POINTER, append_count);
491
+ auto struct_target_locations = FlatVector::GetData<data_ptr_t>(struct_row_locations);
492
+ const auto offset_in_row = layout.GetOffsets()[col_idx];
493
+ for (idx_t i = 0; i < append_count; i++) {
494
+ struct_target_locations[i] = target_locations[i] + offset_in_row;
495
+ }
496
+
497
+ const auto &struct_layout = layout.GetStructLayout(col_idx);
498
+ auto &struct_sources = StructVector::GetEntries(source);
499
+ D_ASSERT(struct_layout.ColumnCount() == struct_sources.size());
500
+
501
+ // Set the validity of the entries within the STRUCTs
502
+ const auto validity_bytes = ValidityBytes::SizeInBytes(struct_layout.ColumnCount());
503
+ for (idx_t i = 0; i < append_count; i++) {
504
+ memset(struct_target_locations[i], ~0, validity_bytes);
505
+ }
506
+
507
+ // Recurse through the struct children
508
+ for (idx_t struct_col_idx = 0; struct_col_idx < struct_layout.ColumnCount(); struct_col_idx++) {
509
+ auto &struct_source = *struct_sources[struct_col_idx];
510
+ const auto &struct_source_format = source_format.child_formats[struct_col_idx];
511
+ const auto &struct_scatter_function = child_functions[struct_col_idx];
512
+ struct_scatter_function.function(struct_source, struct_source_format, append_sel, append_count, struct_layout,
513
+ struct_row_locations, heap_locations, struct_col_idx, dummy_arg,
514
+ struct_scatter_function.child_functions);
515
+ }
516
+ }
517
+
518
+ static void TupleDataListScatter(const Vector &source, const TupleDataVectorFormat &source_format,
519
+ const SelectionVector &append_sel, const idx_t append_count,
520
+ const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
521
+ const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
522
+ const vector<TupleDataScatterFunction> &child_functions) {
523
+ // Source
524
+ const auto &source_data = source_format.data;
525
+ const auto source_sel = *source_data.sel;
526
+ const auto data = (list_entry_t *)source_data.data;
527
+ const auto &validity = source_data.validity;
528
+
529
+ // Target
530
+ auto target_locations = FlatVector::GetData<data_ptr_t>(row_locations);
531
+ auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
532
+
533
+ // Precompute mask indexes
534
+ idx_t entry_idx;
535
+ idx_t idx_in_entry;
536
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
537
+
538
+ // Set validity of the LIST in this layout, and store pointer to where it's stored
539
+ const auto offset_in_row = layout.GetOffsets()[col_idx];
540
+ for (idx_t i = 0; i < append_count; i++) {
541
+ const auto source_idx = source_sel.get_index(append_sel.get_index(i));
542
+ if (validity.RowIsValid(source_idx)) {
543
+ auto &target_heap_location = target_heap_locations[i];
544
+ Store<data_ptr_t>(target_heap_location, target_locations[i] + offset_in_row);
545
+
546
+ // Store list length and skip over it
547
+ Store<uint64_t>(data[source_idx].length, target_heap_location);
548
+ target_heap_location += sizeof(uint64_t);
549
+ } else {
550
+ ValidityBytes(target_locations[i]).SetInvalidUnsafe(entry_idx, idx_in_entry);
551
+ }
552
+ }
553
+
554
+ // Recurse
555
+ D_ASSERT(child_functions.size() == 1);
556
+ auto &child_source = ListVector::GetEntry(source);
557
+ auto &child_format = source_format.child_formats[0];
558
+ const auto &child_function = child_functions[0];
559
+ child_function.function(child_source, child_format, append_sel, append_count, layout, row_locations, heap_locations,
560
+ col_idx, source_format.data, child_function.child_functions);
561
+ }
562
+
563
+ template <class T>
564
+ static void TupleDataTemplatedWithinListScatter(const Vector &source, const TupleDataVectorFormat &source_format,
565
+ const SelectionVector &append_sel, const idx_t append_count,
566
+ const TupleDataLayout &layout, Vector &row_locations,
567
+ Vector &heap_locations, const idx_t col_idx,
568
+ const UnifiedVectorFormat &list_data,
569
+ const vector<TupleDataScatterFunction> &child_functions) {
570
+ // Source
571
+ const auto &source_data = source_format.data;
572
+ const auto source_sel = *source_data.sel;
573
+ const auto data = (T *)source_data.data;
574
+ const auto &source_validity = source_data.validity;
575
+
576
+ // List data
577
+ const auto list_sel = *list_data.sel;
578
+ const auto list_entries = (list_entry_t *)list_data.data;
579
+ const auto &list_validity = list_data.validity;
580
+
581
+ // Target
582
+ auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
583
+
584
+ for (idx_t i = 0; i < append_count; i++) {
585
+ const auto list_idx = list_sel.get_index(append_sel.get_index(i));
586
+ if (!list_validity.RowIsValid(list_idx)) {
587
+ continue; // Original list entry is invalid - no need to serialize the child
588
+ }
589
+
590
+ // Get the current list entry
591
+ const auto &list_entry = list_entries[list_idx];
592
+ const auto &list_offset = list_entry.offset;
593
+ const auto &list_length = list_entry.length;
594
+
595
+ // Initialize validity mask and skip heap pointer over it
596
+ auto &target_heap_location = target_heap_locations[i];
597
+ ValidityBytes child_mask(target_heap_location);
598
+ child_mask.SetAllValid(list_length);
599
+ target_heap_location += ValidityBytes::SizeInBytes(list_length);
600
+
601
+ // Get the start to the fixed-size data and skip the heap pointer over it
602
+ const auto child_data_location = target_heap_location;
603
+ target_heap_location += list_length * TupleDataWithinListFixedSize<T>();
604
+
605
+ // Store the data and validity belonging to this list entry
606
+ for (idx_t child_i = 0; child_i < list_length; child_i++) {
607
+ const auto child_source_idx = source_sel.get_index(list_offset + child_i);
608
+ if (source_validity.RowIsValid(child_source_idx)) {
609
+ TupleDataWithinListValueStore<T>(data[child_source_idx],
610
+ child_data_location + child_i * TupleDataWithinListFixedSize<T>(),
611
+ target_heap_location);
612
+ } else {
613
+ child_mask.SetInvalidUnsafe(child_i);
614
+ }
615
+ }
616
+ }
617
+ }
618
+
619
+ static void TupleDataStructWithinListScatter(const Vector &source, const TupleDataVectorFormat &source_format,
620
+ const SelectionVector &append_sel, const idx_t append_count,
621
+ const TupleDataLayout &layout, Vector &row_locations,
622
+ Vector &heap_locations, const idx_t col_idx,
623
+ const UnifiedVectorFormat &list_data,
624
+ const vector<TupleDataScatterFunction> &child_functions) {
625
+ // Source
626
+ const auto &source_data = source_format.data;
627
+ const auto source_sel = *source_data.sel;
628
+ const auto &source_validity = source_data.validity;
629
+
630
+ // List data
631
+ const auto list_sel = *list_data.sel;
632
+ const auto list_entries = (list_entry_t *)list_data.data;
633
+ const auto &list_validity = list_data.validity;
634
+
635
+ // Target
636
+ auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
637
+
638
+ // Initialize the validity of the STRUCTs
639
+ for (idx_t i = 0; i < append_count; i++) {
640
+ const auto list_idx = list_sel.get_index(append_sel.get_index(i));
641
+ if (!list_validity.RowIsValid(list_idx)) {
642
+ continue; // Original list entry is invalid - no need to serialize the child
643
+ }
644
+
645
+ // Get the current list entry
646
+ const auto &list_entry = list_entries[list_idx];
647
+ const auto &list_offset = list_entry.offset;
648
+ const auto &list_length = list_entry.length;
649
+
650
+ // Initialize validity mask and skip the heap pointer over it
651
+ auto &target_heap_location = target_heap_locations[i];
652
+ ValidityBytes child_mask(target_heap_location);
653
+ child_mask.SetAllValid(list_length);
654
+ target_heap_location += ValidityBytes::SizeInBytes(list_length);
655
+
656
+ // Store the validity belonging to this list entry
657
+ for (idx_t child_i = 0; child_i < list_length; child_i++) {
658
+ const auto child_source_idx = source_sel.get_index(list_offset + child_i);
659
+ if (!source_validity.RowIsValid(child_source_idx)) {
660
+ child_mask.SetInvalidUnsafe(child_i);
661
+ }
662
+ }
663
+ }
664
+
665
+ // Recurse through the children
666
+ auto &struct_sources = StructVector::GetEntries(source);
667
+ for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
668
+ auto &struct_source = *struct_sources[struct_col_idx];
669
+ auto &struct_format = source_format.child_formats[struct_col_idx];
670
+ const auto &struct_scatter_function = child_functions[struct_col_idx];
671
+ struct_scatter_function.function(struct_source, struct_format, append_sel, append_count, layout, row_locations,
672
+ heap_locations, struct_col_idx, list_data,
673
+ struct_scatter_function.child_functions);
674
+ }
675
+ }
676
+
677
+ static void TupleDataListWithinListScatter(const Vector &child_list, const TupleDataVectorFormat &child_list_format,
678
+ const SelectionVector &append_sel, const idx_t append_count,
679
+ const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
680
+ const idx_t col_idx, const UnifiedVectorFormat &list_data,
681
+ const vector<TupleDataScatterFunction> &child_functions) {
682
+ // List data (of the list Vector that "child_list" is in)
683
+ const auto list_sel = *list_data.sel;
684
+ const auto list_entries = (list_entry_t *)list_data.data;
685
+ const auto &list_validity = list_data.validity;
686
+
687
+ // Child list
688
+ const auto &child_list_data = child_list_format.data;
689
+ const auto child_list_sel = *child_list_data.sel;
690
+ const auto child_list_entries = (list_entry_t *)child_list_data.data;
691
+ const auto &child_list_validity = child_list_data.validity;
692
+
693
+ // Target
694
+ auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
695
+
696
+ for (idx_t i = 0; i < append_count; i++) {
697
+ const auto list_idx = list_sel.get_index(append_sel.get_index(i));
698
+ if (!list_validity.RowIsValid(list_idx)) {
699
+ continue; // Original list entry is invalid - no need to serialize the child list
700
+ }
701
+
702
+ // Get the current list entry
703
+ const auto &list_entry = list_entries[list_idx];
704
+ const auto &list_offset = list_entry.offset;
705
+ const auto &list_length = list_entry.length;
706
+
707
+ // Initialize validity mask and skip heap pointer over it
708
+ auto &target_heap_location = target_heap_locations[i];
709
+ ValidityBytes child_mask(target_heap_location);
710
+ child_mask.SetAllValid(list_length);
711
+ target_heap_location += ValidityBytes::SizeInBytes(list_length);
712
+
713
+ // Get the start to the fixed-size data and skip the heap pointer over it
714
+ const auto child_data_location = target_heap_location;
715
+ target_heap_location += list_length * sizeof(uint64_t);
716
+
717
+ for (idx_t child_i = 0; child_i < list_length; child_i++) {
718
+ const auto child_list_idx = child_list_sel.get_index(list_offset + child_i);
719
+ if (child_list_validity.RowIsValid(child_list_idx)) {
720
+ const auto &child_list_length = child_list_entries[child_list_idx].length;
721
+ Store<uint64_t>(child_list_length, child_data_location + child_i * sizeof(uint64_t));
722
+ } else {
723
+ child_mask.SetInvalidUnsafe(child_i);
724
+ }
725
+ }
726
+ }
727
+
728
+ // Recurse
729
+ D_ASSERT(child_functions.size() == 1);
730
+ auto &child_vec = ListVector::GetEntry(child_list);
731
+ auto &child_format = child_list_format.child_formats[0];
732
+ auto &combined_child_list_data = child_format.combined_list_data->combined_data;
733
+ const auto &child_function = child_functions[0];
734
+ child_function.function(child_vec, child_format, append_sel, append_count, layout, row_locations, heap_locations,
735
+ col_idx, combined_child_list_data, child_function.child_functions);
736
+ }
737
+
738
+ template <class T>
739
+ tuple_data_scatter_function_t TupleDataGetScatterFunction(bool within_list) {
740
+ return within_list ? TupleDataTemplatedWithinListScatter<T> : TupleDataTemplatedScatter<T>;
741
+ }
742
+
743
+ TupleDataScatterFunction TupleDataCollection::GetScatterFunction(const LogicalType &type, bool within_list) {
744
+ TupleDataScatterFunction result;
745
+ switch (type.InternalType()) {
746
+ case PhysicalType::BOOL:
747
+ result.function = TupleDataGetScatterFunction<bool>(within_list);
748
+ break;
749
+ case PhysicalType::INT8:
750
+ result.function = TupleDataGetScatterFunction<int8_t>(within_list);
751
+ break;
752
+ case PhysicalType::INT16:
753
+ result.function = TupleDataGetScatterFunction<int16_t>(within_list);
754
+ break;
755
+ case PhysicalType::INT32:
756
+ result.function = TupleDataGetScatterFunction<int32_t>(within_list);
757
+ break;
758
+ case PhysicalType::INT64:
759
+ result.function = TupleDataGetScatterFunction<int64_t>(within_list);
760
+ break;
761
+ case PhysicalType::INT128:
762
+ result.function = TupleDataGetScatterFunction<hugeint_t>(within_list);
763
+ break;
764
+ case PhysicalType::UINT8:
765
+ result.function = TupleDataGetScatterFunction<uint8_t>(within_list);
766
+ break;
767
+ case PhysicalType::UINT16:
768
+ result.function = TupleDataGetScatterFunction<uint16_t>(within_list);
769
+ break;
770
+ case PhysicalType::UINT32:
771
+ result.function = TupleDataGetScatterFunction<uint32_t>(within_list);
772
+ break;
773
+ case PhysicalType::UINT64:
774
+ result.function = TupleDataGetScatterFunction<uint64_t>(within_list);
775
+ break;
776
+ case PhysicalType::FLOAT:
777
+ result.function = TupleDataGetScatterFunction<float>(within_list);
778
+ break;
779
+ case PhysicalType::DOUBLE:
780
+ result.function = TupleDataGetScatterFunction<double>(within_list);
781
+ break;
782
+ case PhysicalType::INTERVAL:
783
+ result.function = TupleDataGetScatterFunction<interval_t>(within_list);
784
+ break;
785
+ case PhysicalType::VARCHAR:
786
+ result.function = TupleDataGetScatterFunction<string_t>(within_list);
787
+ break;
788
+ case PhysicalType::STRUCT: {
789
+ result.function = within_list ? TupleDataStructWithinListScatter : TupleDataStructScatter;
790
+ for (const auto &child_type : StructType::GetChildTypes(type)) {
791
+ result.child_functions.push_back(GetScatterFunction(child_type.second, within_list));
792
+ }
793
+ break;
794
+ }
795
+ case PhysicalType::LIST:
796
+ result.function = within_list ? TupleDataListWithinListScatter : TupleDataListScatter;
797
+ result.child_functions.emplace_back(GetScatterFunction(ListType::GetChildType(type), true));
798
+ break;
799
+ default:
800
+ throw InternalException("Unsupported type for TupleDataCollection::GetScatterFunction");
801
+ }
802
+ return result;
803
+ }
804
+
805
+ void TupleDataCollection::Gather(Vector &row_locations, const SelectionVector &scan_sel, const idx_t scan_count,
806
+ DataChunk &result, const SelectionVector &target_sel) const {
807
+ D_ASSERT(result.ColumnCount() == layout.ColumnCount());
808
+ vector<column_t> column_ids;
809
+ column_ids.reserve(layout.ColumnCount());
810
+ for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
811
+ column_ids.emplace_back(col_idx);
812
+ }
813
+ Gather(row_locations, scan_sel, scan_count, column_ids, result, target_sel);
814
+ }
815
+
816
+ void TupleDataCollection::Gather(Vector &row_locations, const SelectionVector &scan_sel, const idx_t scan_count,
817
+ const vector<column_t> &column_ids, DataChunk &result,
818
+ const SelectionVector &target_sel) const {
819
+ for (idx_t col_idx = 0; col_idx < column_ids.size(); col_idx++) {
820
+ Gather(row_locations, scan_sel, scan_count, column_ids[col_idx], result.data[col_idx], target_sel);
821
+ }
822
+ }
823
+
824
+ void TupleDataCollection::Gather(Vector &row_locations, const SelectionVector &scan_sel, const idx_t scan_count,
825
+ const column_t column_id, Vector &result, const SelectionVector &target_sel) const {
826
+ const auto &gather_function = gather_functions[column_id];
827
+ gather_function.function(layout, row_locations, column_id, scan_sel, scan_count, result, target_sel, result,
828
+ gather_function.child_functions);
829
+ }
830
+
831
+ template <class T>
832
+ static void TupleDataTemplatedGather(const TupleDataLayout &layout, Vector &row_locations, const idx_t col_idx,
833
+ const SelectionVector &scan_sel, const idx_t scan_count, Vector &target,
834
+ const SelectionVector &target_sel, Vector &dummy_vector,
835
+ const vector<TupleDataGatherFunction> &child_functions) {
836
+ // Source
837
+ auto source_locations = FlatVector::GetData<data_ptr_t>(row_locations);
838
+
839
+ // Target
840
+ auto target_data = FlatVector::GetData<T>(target);
841
+ auto &target_validity = FlatVector::Validity(target);
842
+
843
+ // Precompute mask indexes
844
+ idx_t entry_idx;
845
+ idx_t idx_in_entry;
846
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
847
+
848
+ const auto offset_in_row = layout.GetOffsets()[col_idx];
849
+ for (idx_t i = 0; i < scan_count; i++) {
850
+ const auto &source_row = source_locations[scan_sel.get_index(i)];
851
+ const auto target_idx = target_sel.get_index(i);
852
+ ValidityBytes row_mask(source_row);
853
+ if (row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
854
+ target_data[target_idx] = Load<T>(source_row + offset_in_row);
855
+ } else {
856
+ target_validity.SetInvalid(target_idx);
857
+ }
858
+ }
859
+ }
860
+
861
+ static void TupleDataStructGather(const TupleDataLayout &layout, Vector &row_locations, const idx_t col_idx,
862
+ const SelectionVector &scan_sel, const idx_t scan_count, Vector &target,
863
+ const SelectionVector &target_sel, Vector &dummy_vector,
864
+ const vector<TupleDataGatherFunction> &child_functions) {
865
+ // Source
866
+ auto source_locations = FlatVector::GetData<data_ptr_t>(row_locations);
867
+
868
+ // Target
869
+ auto &target_validity = FlatVector::Validity(target);
870
+
871
+ // Precompute mask indexes
872
+ idx_t entry_idx;
873
+ idx_t idx_in_entry;
874
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
875
+
876
+ // Get validity of the struct and create a Vector of pointers to the start of the TupleDataLayout of the STRUCT
877
+ Vector struct_row_locations(LogicalType::POINTER);
878
+ auto struct_source_locations = FlatVector::GetData<data_ptr_t>(struct_row_locations);
879
+ const auto offset_in_row = layout.GetOffsets()[col_idx];
880
+ for (idx_t i = 0; i < scan_count; i++) {
881
+ const auto source_idx = scan_sel.get_index(i);
882
+ const auto &source_row = source_locations[source_idx];
883
+
884
+ // Set the validity
885
+ ValidityBytes row_mask(source_row);
886
+ if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
887
+ const auto target_idx = target_sel.get_index(i);
888
+ target_validity.SetInvalid(target_idx);
889
+ }
890
+
891
+ // Set the pointer
892
+ struct_source_locations[source_idx] = source_row + offset_in_row;
893
+ }
894
+
895
+ // Get the struct layout and struct entries
896
+ const auto &struct_layout = layout.GetStructLayout(col_idx);
897
+ auto &struct_targets = StructVector::GetEntries(target);
898
+ D_ASSERT(struct_layout.ColumnCount() == struct_targets.size());
899
+
900
+ // Recurse through the struct children
901
+ for (idx_t struct_col_idx = 0; struct_col_idx < struct_layout.ColumnCount(); struct_col_idx++) {
902
+ auto &struct_target = *struct_targets[struct_col_idx];
903
+ const auto &struct_gather_function = child_functions[struct_col_idx];
904
+ struct_gather_function.function(struct_layout, struct_row_locations, struct_col_idx, scan_sel, scan_count,
905
+ struct_target, target_sel, dummy_vector,
906
+ struct_gather_function.child_functions);
907
+ }
908
+ }
909
+
910
+ static void TupleDataListGather(const TupleDataLayout &layout, Vector &row_locations, const idx_t col_idx,
911
+ const SelectionVector &scan_sel, const idx_t scan_count, Vector &target,
912
+ const SelectionVector &target_sel, Vector &dummy_vector,
913
+ const vector<TupleDataGatherFunction> &child_functions) {
914
+ // Source
915
+ auto source_locations = FlatVector::GetData<data_ptr_t>(row_locations);
916
+
917
+ // Target
918
+ auto target_list_entries = FlatVector::GetData<list_entry_t>(target);
919
+ auto &target_validity = FlatVector::Validity(target);
920
+
921
+ // Precompute mask indexes
922
+ idx_t entry_idx;
923
+ idx_t idx_in_entry;
924
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
925
+
926
+ // Load pointers to the data from the row
927
+ Vector heap_locations(LogicalType::POINTER);
928
+ auto source_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
929
+ auto &source_heap_validity = FlatVector::Validity(heap_locations);
930
+
931
+ const auto offset_in_row = layout.GetOffsets()[col_idx];
932
+ uint64_t target_list_offset = 0;
933
+ for (idx_t i = 0; i < scan_count; i++) {
934
+ const auto source_idx = scan_sel.get_index(i);
935
+ const auto target_idx = target_sel.get_index(i);
936
+
937
+ const auto &source_row = source_locations[source_idx];
938
+ ValidityBytes row_mask(source_row);
939
+ if (row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
940
+ auto &source_heap_location = source_heap_locations[source_idx];
941
+ source_heap_location = Load<data_ptr_t>(source_row + offset_in_row);
942
+
943
+ // Load list size and skip over
944
+ const auto list_length = Load<uint64_t>(source_heap_location);
945
+ source_heap_location += sizeof(uint64_t);
946
+
947
+ // Initialize list entry, and increment offset
948
+ target_list_entries[target_idx] = {target_list_offset, list_length};
949
+ target_list_offset += list_length;
950
+ } else {
951
+ source_heap_validity.SetInvalid(source_idx);
952
+ target_validity.SetInvalid(target_idx);
953
+ }
954
+ }
955
+ auto list_size_before = ListVector::GetListSize(target);
956
+ ListVector::Reserve(target, list_size_before + target_list_offset);
957
+ ListVector::SetListSize(target, list_size_before + target_list_offset);
958
+
959
+ // Recurse
960
+ D_ASSERT(child_functions.size() == 1);
961
+ const auto &child_function = child_functions[0];
962
+ child_function.function(layout, heap_locations, list_size_before, scan_sel, scan_count,
963
+ ListVector::GetEntry(target), target_sel, target, child_function.child_functions);
964
+ }
965
+
966
+ template <class T>
967
+ static void TupleDataTemplatedWithinListGather(const TupleDataLayout &layout, Vector &heap_locations,
968
+ const idx_t list_size_before, const SelectionVector &scan_sel,
969
+ const idx_t scan_count, Vector &target,
970
+ const SelectionVector &target_sel, Vector &list_vector,
971
+ const vector<TupleDataGatherFunction> &child_functions) {
972
+ // Source
973
+ auto source_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
974
+ auto &source_heap_validity = FlatVector::Validity(heap_locations);
975
+
976
+ // Target
977
+ auto target_data = FlatVector::GetData<T>(target);
978
+ auto &target_validity = FlatVector::Validity(target);
979
+
980
+ // List parent
981
+ const auto list_entries = FlatVector::GetData<list_entry_t>(list_vector);
982
+
983
+ uint64_t target_offset = list_size_before;
984
+ for (idx_t i = 0; i < scan_count; i++) {
985
+ const auto source_idx = scan_sel.get_index(i);
986
+ if (!source_heap_validity.RowIsValid(source_idx)) {
987
+ continue;
988
+ }
989
+
990
+ const auto &list_length = list_entries[target_sel.get_index(i)].length;
991
+
992
+ // Initialize validity mask
993
+ auto &source_heap_location = source_heap_locations[source_idx];
994
+ ValidityBytes source_mask(source_heap_location);
995
+ source_heap_location += ValidityBytes::SizeInBytes(list_length);
996
+
997
+ // Get the start to the fixed-size data and skip the heap pointer over it
998
+ const auto source_data_location = source_heap_location;
999
+ source_heap_location += list_length * TupleDataWithinListFixedSize<T>();
1000
+
1001
+ // Load the child validity and data belonging to this list entry
1002
+ for (idx_t child_i = 0; child_i < list_length; child_i++) {
1003
+ if (source_mask.RowIsValidUnsafe(child_i)) {
1004
+ target_data[target_offset + child_i] = TupleDataWithinListValueLoad<T>(
1005
+ source_data_location + child_i * TupleDataWithinListFixedSize<T>(), source_heap_location);
1006
+ } else {
1007
+ target_validity.SetInvalid(target_offset + child_i);
1008
+ }
1009
+ }
1010
+ target_offset += list_length;
1011
+ }
1012
+ }
1013
+
1014
+ static void TupleDataStructWithinListGather(const TupleDataLayout &layout, Vector &heap_locations,
1015
+ const idx_t list_size_before, const SelectionVector &scan_sel,
1016
+ const idx_t scan_count, Vector &target, const SelectionVector &target_sel,
1017
+ Vector &list_vector,
1018
+ const vector<TupleDataGatherFunction> &child_functions) {
1019
+ // Source
1020
+ auto source_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
1021
+ auto &source_heap_validity = FlatVector::Validity(heap_locations);
1022
+
1023
+ // Target
1024
+ auto &target_validity = FlatVector::Validity(target);
1025
+
1026
+ // List parent
1027
+ const auto list_entries = FlatVector::GetData<list_entry_t>(list_vector);
1028
+
1029
+ uint64_t target_offset = list_size_before;
1030
+ for (idx_t i = 0; i < scan_count; i++) {
1031
+ const auto source_idx = scan_sel.get_index(i);
1032
+ if (!source_heap_validity.RowIsValid(source_idx)) {
1033
+ continue;
1034
+ }
1035
+
1036
+ const auto &list_length = list_entries[target_sel.get_index(i)].length;
1037
+
1038
+ // Initialize validity mask and skip over it
1039
+ auto &source_heap_location = source_heap_locations[source_idx];
1040
+ ValidityBytes source_mask(source_heap_location);
1041
+ source_heap_location += ValidityBytes::SizeInBytes(list_length);
1042
+
1043
+ // Load the child validity belonging to this list entry
1044
+ for (idx_t child_i = 0; child_i < list_length; child_i++) {
1045
+ if (!source_mask.RowIsValidUnsafe(child_i)) {
1046
+ target_validity.SetInvalid(target_offset + child_i);
1047
+ }
1048
+ }
1049
+ target_offset += list_length;
1050
+ }
1051
+
1052
+ // Recurse
1053
+ auto &struct_targets = StructVector::GetEntries(target);
1054
+ for (idx_t struct_col_idx = 0; struct_col_idx < struct_targets.size(); struct_col_idx++) {
1055
+ auto &struct_target = *struct_targets[struct_col_idx];
1056
+ const auto &struct_gather_function = child_functions[struct_col_idx];
1057
+ struct_gather_function.function(layout, heap_locations, list_size_before, scan_sel, scan_count, struct_target,
1058
+ target_sel, list_vector, struct_gather_function.child_functions);
1059
+ }
1060
+ }
1061
+
1062
+ static void TupleDataListWithinListGather(const TupleDataLayout &layout, Vector &heap_locations,
1063
+ const idx_t list_size_before, const SelectionVector &scan_sel,
1064
+ const idx_t scan_count, Vector &target, const SelectionVector &target_sel,
1065
+ Vector &list_vector, const vector<TupleDataGatherFunction> &child_functions) {
1066
+ // Source
1067
+ auto source_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
1068
+ auto &source_heap_validity = FlatVector::Validity(heap_locations);
1069
+
1070
+ // Target
1071
+ auto target_list_entries = FlatVector::GetData<list_entry_t>(target);
1072
+ auto &target_validity = FlatVector::Validity(target);
1073
+ const auto child_list_size_before = ListVector::GetListSize(target);
1074
+
1075
+ // List parent
1076
+ const auto list_entries = FlatVector::GetData<list_entry_t>(list_vector);
1077
+
1078
+ // We need to create a vector that has the combined list sizes (hugeint_t has same size as list_entry_t)
1079
+ Vector combined_list_vector(LogicalType::HUGEINT);
1080
+ auto combined_list_entries = FlatVector::GetData<list_entry_t>(combined_list_vector);
1081
+
1082
+ uint64_t target_offset = list_size_before;
1083
+ uint64_t target_child_offset = child_list_size_before;
1084
+ for (idx_t i = 0; i < scan_count; i++) {
1085
+ const auto source_idx = scan_sel.get_index(i);
1086
+ if (!source_heap_validity.RowIsValid(source_idx)) {
1087
+ continue;
1088
+ }
1089
+
1090
+ const auto &list_length = list_entries[target_sel.get_index(i)].length;
1091
+
1092
+ // Initialize validity mask and skip over it
1093
+ auto &source_heap_location = source_heap_locations[source_idx];
1094
+ ValidityBytes source_mask(source_heap_location);
1095
+ source_heap_location += ValidityBytes::SizeInBytes(list_length);
1096
+
1097
+ // Get the start to the fixed-size data and skip the heap pointer over it
1098
+ const auto source_data_location = source_heap_location;
1099
+ source_heap_location += list_length * sizeof(uint64_t);
1100
+
1101
+ // Set the offset of the combined list entry
1102
+ auto &combined_list_entry = combined_list_entries[target_sel.get_index(i)];
1103
+ combined_list_entry.offset = target_child_offset;
1104
+
1105
+ // Load the child validity and data belonging to this list entry
1106
+ for (idx_t child_i = 0; child_i < list_length; child_i++) {
1107
+ if (source_mask.RowIsValidUnsafe(child_i)) {
1108
+ auto &target_list_entry = target_list_entries[target_offset + child_i];
1109
+ target_list_entry.offset = target_child_offset;
1110
+ target_list_entry.length = Load<uint64_t>(source_data_location + child_i * sizeof(uint64_t));
1111
+ target_child_offset += target_list_entry.length;
1112
+ } else {
1113
+ target_validity.SetInvalid(target_offset + child_i);
1114
+ }
1115
+ }
1116
+
1117
+ // Set the length of the combined list entry
1118
+ combined_list_entry.length = target_child_offset - combined_list_entry.offset;
1119
+
1120
+ target_offset += list_length;
1121
+ }
1122
+ ListVector::Reserve(target, target_child_offset);
1123
+ ListVector::SetListSize(target, target_child_offset);
1124
+
1125
+ // Recurse
1126
+ D_ASSERT(child_functions.size() == 1);
1127
+ const auto &child_function = child_functions[0];
1128
+ child_function.function(layout, heap_locations, child_list_size_before, scan_sel, scan_count,
1129
+ ListVector::GetEntry(target), target_sel, combined_list_vector,
1130
+ child_function.child_functions);
1131
+ }
1132
+
1133
+ template <class T>
1134
+ tuple_data_gather_function_t TupleDataGetGatherFunction(bool within_list) {
1135
+ return within_list ? TupleDataTemplatedWithinListGather<T> : TupleDataTemplatedGather<T>;
1136
+ }
1137
+
1138
+ TupleDataGatherFunction TupleDataCollection::GetGatherFunction(const LogicalType &type, bool within_list) {
1139
+ TupleDataGatherFunction result;
1140
+ switch (type.InternalType()) {
1141
+ case PhysicalType::BOOL:
1142
+ result.function = TupleDataGetGatherFunction<bool>(within_list);
1143
+ break;
1144
+ case PhysicalType::INT8:
1145
+ result.function = TupleDataGetGatherFunction<int8_t>(within_list);
1146
+ break;
1147
+ case PhysicalType::INT16:
1148
+ result.function = TupleDataGetGatherFunction<int16_t>(within_list);
1149
+ break;
1150
+ case PhysicalType::INT32:
1151
+ result.function = TupleDataGetGatherFunction<int32_t>(within_list);
1152
+ break;
1153
+ case PhysicalType::INT64:
1154
+ result.function = TupleDataGetGatherFunction<int64_t>(within_list);
1155
+ break;
1156
+ case PhysicalType::INT128:
1157
+ result.function = TupleDataGetGatherFunction<hugeint_t>(within_list);
1158
+ break;
1159
+ case PhysicalType::UINT8:
1160
+ result.function = TupleDataGetGatherFunction<uint8_t>(within_list);
1161
+ break;
1162
+ case PhysicalType::UINT16:
1163
+ result.function = TupleDataGetGatherFunction<uint16_t>(within_list);
1164
+ break;
1165
+ case PhysicalType::UINT32:
1166
+ result.function = TupleDataGetGatherFunction<uint32_t>(within_list);
1167
+ break;
1168
+ case PhysicalType::UINT64:
1169
+ result.function = TupleDataGetGatherFunction<uint64_t>(within_list);
1170
+ break;
1171
+ case PhysicalType::FLOAT:
1172
+ result.function = TupleDataGetGatherFunction<float>(within_list);
1173
+ break;
1174
+ case PhysicalType::DOUBLE:
1175
+ result.function = TupleDataGetGatherFunction<double>(within_list);
1176
+ break;
1177
+ case PhysicalType::INTERVAL:
1178
+ result.function = TupleDataGetGatherFunction<interval_t>(within_list);
1179
+ break;
1180
+ case PhysicalType::VARCHAR:
1181
+ result.function = TupleDataGetGatherFunction<string_t>(within_list);
1182
+ break;
1183
+ case PhysicalType::STRUCT: {
1184
+ result.function = within_list ? TupleDataStructWithinListGather : TupleDataStructGather;
1185
+ for (const auto &child_type : StructType::GetChildTypes(type)) {
1186
+ result.child_functions.push_back(GetGatherFunction(child_type.second, within_list));
1187
+ }
1188
+ break;
1189
+ }
1190
+ case PhysicalType::LIST:
1191
+ result.function = within_list ? TupleDataListWithinListGather : TupleDataListGather;
1192
+ result.child_functions.push_back(GetGatherFunction(ListType::GetChildType(type), true));
1193
+ break;
1194
+ default:
1195
+ throw InternalException("Unsupported type for TupleDataCollection::GetGatherFunction");
1196
+ }
1197
+ return result;
1198
+ }
1199
+
1200
+ } // namespace duckdb