duckdb 0.7.2-dev1898.0 → 0.7.2-dev2144.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/data_chunk.cpp +13 -1
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +22 -4
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  55. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  56. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  57. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  58. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  59. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  60. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  61. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  62. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  63. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  64. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  65. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  66. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  67. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  68. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  69. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  70. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  71. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  72. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  73. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  74. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  75. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  78. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  79. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  80. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  81. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  82. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  83. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  84. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  85. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  86. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  87. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  88. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  89. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  91. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  92. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  93. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  94. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +2 -2
  95. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  96. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  97. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  98. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  99. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  100. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  101. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  102. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  104. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  105. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  106. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  107. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  108. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  109. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  110. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  111. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  112. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  113. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  114. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  115. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  116. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  117. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  118. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  119. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  122. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  124. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  125. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  126. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  128. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  130. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  131. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  132. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  133. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  134. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  135. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  136. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  137. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  138. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  140. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  141. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  142. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  143. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  144. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  145. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  147. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  148. package/src/duckdb/src/main/appender.cpp +6 -6
  149. package/src/duckdb/src/main/client_context.cpp +1 -1
  150. package/src/duckdb/src/main/connection.cpp +2 -2
  151. package/src/duckdb/src/main/query_result.cpp +13 -0
  152. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  153. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  154. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  155. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  156. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  157. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  158. package/src/duckdb/src/parallel/executor.cpp +1 -1
  159. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  160. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  161. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  162. package/src/duckdb/src/parser/transformer.cpp +50 -9
  163. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  164. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  165. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  166. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  167. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  168. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  169. package/src/duckdb/src/planner/binder.cpp +16 -19
  170. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  171. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  172. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  173. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  174. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  175. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  176. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  177. package/src/duckdb/ub_src_common.cpp +2 -0
  178. package/src/duckdb/ub_src_common_types.cpp +0 -16
  179. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  180. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
  181. package/test/udf.test.ts +9 -0
@@ -0,0 +1,465 @@
1
+ #include "duckdb/common/types/row/tuple_data_allocator.hpp"
2
+
3
+ #include "duckdb/common/types/row/tuple_data_segment.hpp"
4
+ #include "duckdb/common/types/row/tuple_data_states.hpp"
5
+ #include "duckdb/storage/buffer_manager.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ using ValidityBytes = TupleDataLayout::ValidityBytes;
10
+
11
+ TupleDataBlock::TupleDataBlock(BufferManager &buffer_manager, idx_t capacity_p) : capacity(capacity_p), size(0) {
12
+ buffer_manager.Allocate(capacity, false, &handle);
13
+ }
14
+
15
+ TupleDataBlock::TupleDataBlock(TupleDataBlock &&other) noexcept {
16
+ std::swap(handle, other.handle);
17
+ std::swap(capacity, other.capacity);
18
+ std::swap(size, other.size);
19
+ }
20
+
21
+ TupleDataBlock &TupleDataBlock::operator=(TupleDataBlock &&other) noexcept {
22
+ std::swap(handle, other.handle);
23
+ std::swap(capacity, other.capacity);
24
+ std::swap(size, other.size);
25
+ return *this;
26
+ }
27
+
28
+ TupleDataAllocator::TupleDataAllocator(BufferManager &buffer_manager, const TupleDataLayout &layout)
29
+ : buffer_manager(buffer_manager), layout(layout.Copy()) {
30
+ }
31
+
32
+ TupleDataAllocator::TupleDataAllocator(TupleDataAllocator &allocator)
33
+ : buffer_manager(allocator.buffer_manager), layout(allocator.layout.Copy()) {
34
+ }
35
+
36
+ Allocator &TupleDataAllocator::GetAllocator() {
37
+ return buffer_manager.GetBufferAllocator();
38
+ }
39
+
40
+ const TupleDataLayout &TupleDataAllocator::GetLayout() const {
41
+ return layout;
42
+ }
43
+
44
+ idx_t TupleDataAllocator::RowBlockCount() const {
45
+ return row_blocks.size();
46
+ }
47
+
48
+ idx_t TupleDataAllocator::HeapBlockCount() const {
49
+ return heap_blocks.size();
50
+ }
51
+
52
+ void TupleDataAllocator::Build(TupleDataSegment &segment, TupleDataPinState &pin_state,
53
+ TupleDataChunkState &chunk_state, const idx_t append_offset, const idx_t append_count) {
54
+ D_ASSERT(this == segment.allocator.get());
55
+ auto &chunks = segment.chunks;
56
+ if (!chunks.empty()) {
57
+ ReleaseOrStoreHandles(pin_state, segment, chunks.back(), true);
58
+ }
59
+
60
+ // Build the chunk parts for the incoming data
61
+ vector<pair<idx_t, idx_t>> chunk_part_indices;
62
+ idx_t offset = 0;
63
+ while (offset != append_count) {
64
+ if (chunks.empty() || chunks.back().count == STANDARD_VECTOR_SIZE) {
65
+ chunks.emplace_back();
66
+ }
67
+ auto &chunk = chunks.back();
68
+
69
+ // Build the next part
70
+ auto next = MinValue<idx_t>(append_count - offset, STANDARD_VECTOR_SIZE - chunk.count);
71
+ chunk.AddPart(BuildChunkPart(pin_state, chunk_state, append_offset + offset, next), layout);
72
+ chunk_part_indices.emplace_back(chunks.size() - 1, chunk.parts.size() - 1);
73
+
74
+ auto &chunk_part = chunk.parts.back();
75
+ next = chunk_part.count;
76
+ segment.count += next;
77
+
78
+ offset += next;
79
+ }
80
+
81
+ // Now initialize the pointers to write the data to
82
+ vector<TupleDataChunkPart *> parts;
83
+ parts.reserve(chunk_part_indices.size());
84
+ for (auto &indices : chunk_part_indices) {
85
+ parts.emplace_back(&segment.chunks[indices.first].parts[indices.second]);
86
+ }
87
+ InitializeChunkStateInternal(pin_state, chunk_state, append_offset, false, true, false, parts);
88
+
89
+ // To reduce metadata, we try to merge chunk parts where possible
90
+ // Due to the way chunk parts are constructed, only the last part of the first chunk is eligible for merging
91
+ segment.chunks[chunk_part_indices[0].first].MergeLastChunkPart(layout);
92
+
93
+ segment.Verify();
94
+ }
95
+
96
+ TupleDataChunkPart TupleDataAllocator::BuildChunkPart(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
97
+ const idx_t append_offset, const idx_t append_count) {
98
+ D_ASSERT(append_count != 0);
99
+ TupleDataChunkPart result;
100
+
101
+ // Allocate row block (if needed)
102
+ if (row_blocks.empty() || row_blocks.back().RemainingCapacity() < layout.GetRowWidth()) {
103
+ row_blocks.emplace_back(buffer_manager, (idx_t)Storage::BLOCK_SIZE);
104
+ }
105
+ result.row_block_index = row_blocks.size() - 1;
106
+ auto &row_block = row_blocks[result.row_block_index];
107
+ result.row_block_offset = row_block.size;
108
+
109
+ // Set count (might be reduced later when checking heap space)
110
+ result.count = MinValue<idx_t>(row_block.RemainingCapacity(layout.GetRowWidth()), append_count);
111
+ if (!layout.AllConstant()) {
112
+ const auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
113
+
114
+ // Compute total heap size first
115
+ idx_t total_heap_size = 0;
116
+ for (idx_t i = 0; i < result.count; i++) {
117
+ const auto &heap_size = heap_sizes[append_offset + i];
118
+ total_heap_size += heap_size;
119
+ }
120
+
121
+ if (total_heap_size == 0) {
122
+ // We don't need a heap at all
123
+ result.heap_block_index = TupleDataChunkPart::INVALID_INDEX;
124
+ result.heap_block_offset = TupleDataChunkPart::INVALID_INDEX;
125
+ result.total_heap_size = 0;
126
+ result.base_heap_ptr = nullptr;
127
+ } else {
128
+ // Allocate heap block (if needed)
129
+ if (heap_blocks.empty() || heap_blocks.back().RemainingCapacity() < heap_sizes[append_offset]) {
130
+ const auto size = MaxValue<idx_t>((idx_t)Storage::BLOCK_SIZE, heap_sizes[append_offset]);
131
+ heap_blocks.emplace_back(buffer_manager, size);
132
+ }
133
+ result.heap_block_index = heap_blocks.size() - 1;
134
+ auto &heap_block = heap_blocks[result.heap_block_index];
135
+ result.heap_block_offset = heap_block.size;
136
+
137
+ const auto heap_remaining = heap_block.RemainingCapacity();
138
+ if (total_heap_size <= heap_remaining) {
139
+ // Everything fits
140
+ result.total_heap_size = total_heap_size;
141
+ } else {
142
+ // Not everything fits - determine how many we can read next
143
+ result.total_heap_size = 0;
144
+ for (idx_t i = 0; i < result.count; i++) {
145
+ const auto &heap_size = heap_sizes[append_offset + i];
146
+ if (result.total_heap_size + heap_size > heap_remaining) {
147
+ result.count = i;
148
+ break;
149
+ }
150
+ result.total_heap_size += heap_size;
151
+ }
152
+ }
153
+
154
+ // Mark this portion of the heap block as filled and set the pointer
155
+ heap_block.size += result.total_heap_size;
156
+ result.base_heap_ptr = GetBaseHeapPointer(pin_state, result);
157
+ }
158
+ }
159
+ D_ASSERT(result.count != 0 && result.count <= STANDARD_VECTOR_SIZE);
160
+
161
+ // Mark this portion of the row block as filled
162
+ row_block.size += result.count * layout.GetRowWidth();
163
+
164
+ return result;
165
+ }
166
+
167
+ void TupleDataAllocator::InitializeChunkState(TupleDataSegment &segment, TupleDataPinState &pin_state,
168
+ TupleDataChunkState &chunk_state, idx_t chunk_idx, bool init_heap) {
169
+ D_ASSERT(this == segment.allocator.get());
170
+ D_ASSERT(chunk_idx < segment.ChunkCount());
171
+ auto &chunk = segment.chunks[chunk_idx];
172
+
173
+ // Release or store any handles that are no longer required:
174
+ // We can't release the heap here if the current chunk's heap_block_ids is empty, because if we are iterating with
175
+ // PinProperties::DESTROY_AFTER_DONE, we might destroy a heap block that is needed by a later chunk, e.g.,
176
+ // when chunk 0 needs heap block 0, chunk 1 does not need any heap blocks, and chunk 2 needs heap block 0 again
177
+ ReleaseOrStoreHandles(pin_state, segment, chunk, !chunk.heap_block_ids.empty());
178
+
179
+ vector<TupleDataChunkPart *> parts;
180
+ parts.reserve(chunk.parts.size());
181
+ for (auto &part : chunk.parts) {
182
+ parts.emplace_back(&part);
183
+ }
184
+
185
+ InitializeChunkStateInternal(pin_state, chunk_state, 0, true, init_heap, init_heap, parts);
186
+ }
187
+
188
+ static inline void InitializeHeapSizes(const data_ptr_t row_locations[], idx_t heap_sizes[], const idx_t offset,
189
+ const idx_t next, const TupleDataChunkPart &part, const idx_t heap_size_offset) {
190
+ // Read the heap sizes from the rows
191
+ for (idx_t i = 0; i < next; i++) {
192
+ auto idx = offset + i;
193
+ heap_sizes[idx] = Load<uint32_t>(row_locations[idx] + heap_size_offset);
194
+ }
195
+
196
+ // Verify total size
197
+ #ifdef DEBUG
198
+ idx_t total_heap_size = 0;
199
+ for (idx_t i = 0; i < next; i++) {
200
+ auto idx = offset + i;
201
+ total_heap_size += heap_sizes[idx];
202
+ }
203
+ D_ASSERT(total_heap_size == part.total_heap_size);
204
+ #endif
205
+ }
206
+
207
+ void TupleDataAllocator::InitializeChunkStateInternal(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
208
+ idx_t offset, bool recompute, bool init_heap_pointers,
209
+ bool init_heap_sizes, vector<TupleDataChunkPart *> &parts) {
210
+ auto row_locations = FlatVector::GetData<data_ptr_t>(chunk_state.row_locations);
211
+ auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
212
+ auto heap_locations = FlatVector::GetData<data_ptr_t>(chunk_state.heap_locations);
213
+
214
+ for (auto &part : parts) {
215
+ const auto next = part->count;
216
+
217
+ // Set up row locations for the scan
218
+ const auto row_width = layout.GetRowWidth();
219
+ const auto base_row_ptr = GetRowPointer(pin_state, *part);
220
+ for (idx_t i = 0; i < next; i++) {
221
+ row_locations[offset + i] = base_row_ptr + i * row_width;
222
+ }
223
+
224
+ if (layout.AllConstant()) { // Can't have a heap
225
+ offset += next;
226
+ continue;
227
+ }
228
+
229
+ if (part->total_heap_size == 0) {
230
+ if (init_heap_sizes) { // No heap, but we need the heap sizes
231
+ InitializeHeapSizes(row_locations, heap_sizes, offset, next, *part, layout.GetHeapSizeOffset());
232
+ }
233
+ offset += next;
234
+ continue;
235
+ }
236
+
237
+ // Check if heap block has changed - re-compute the pointers within each row if so
238
+ if (recompute && pin_state.properties != TupleDataPinProperties::ALREADY_PINNED) {
239
+ const auto new_base_heap_ptr = GetBaseHeapPointer(pin_state, *part);
240
+ if (part->base_heap_ptr != new_base_heap_ptr) {
241
+ lock_guard<mutex> guard(part->lock);
242
+ const auto old_base_heap_ptr = part->base_heap_ptr;
243
+ if (old_base_heap_ptr != new_base_heap_ptr) {
244
+ Vector old_heap_ptrs(Value::POINTER((uintptr_t)old_base_heap_ptr + part->heap_block_offset));
245
+ Vector new_heap_ptrs(Value::POINTER((uintptr_t)new_base_heap_ptr + part->heap_block_offset));
246
+ RecomputeHeapPointers(old_heap_ptrs, *ConstantVector::ZeroSelectionVector(), row_locations,
247
+ new_heap_ptrs, offset, next, layout, 0);
248
+ part->base_heap_ptr = new_base_heap_ptr;
249
+ }
250
+ }
251
+ }
252
+
253
+ if (init_heap_sizes) {
254
+ InitializeHeapSizes(row_locations, heap_sizes, offset, next, *part, layout.GetHeapSizeOffset());
255
+ }
256
+
257
+ if (init_heap_pointers) {
258
+ // Set the pointers where the heap data will be written (if needed)
259
+ heap_locations[offset] = part->base_heap_ptr + part->heap_block_offset;
260
+ for (idx_t i = 1; i < next; i++) {
261
+ auto idx = offset + i;
262
+ heap_locations[idx] = heap_locations[idx - 1] + heap_sizes[idx - 1];
263
+ }
264
+ }
265
+
266
+ offset += next;
267
+ }
268
+ D_ASSERT(offset <= STANDARD_VECTOR_SIZE);
269
+ }
270
+
271
+ static inline void VerifyStrings(const LogicalTypeId type_id, const data_ptr_t row_locations[], const idx_t col_idx,
272
+ const idx_t base_col_offset, const idx_t col_offset, const idx_t offset,
273
+ const idx_t count) {
274
+ #ifdef DEBUG
275
+ if (type_id != LogicalTypeId::VARCHAR) {
276
+ // Make sure we don't verify BLOB / AGGREGATE_STATE
277
+ return;
278
+ }
279
+ idx_t entry_idx;
280
+ idx_t idx_in_entry;
281
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
282
+ for (idx_t i = 0; i < count; i++) {
283
+ const auto &row_location = row_locations[offset + i] + base_col_offset;
284
+ ValidityBytes row_mask(row_location);
285
+ if (row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
286
+ auto recomputed_string = Load<string_t>(row_location + col_offset);
287
+ recomputed_string.Verify();
288
+ }
289
+ }
290
+ #endif
291
+ }
292
+
293
+ void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const SelectionVector &old_heap_sel,
294
+ const data_ptr_t row_locations[], Vector &new_heap_ptrs,
295
+ const idx_t offset, const idx_t count, const TupleDataLayout &layout,
296
+ const idx_t base_col_offset) {
297
+ const auto old_heap_locations = FlatVector::GetData<data_ptr_t>(old_heap_ptrs);
298
+
299
+ UnifiedVectorFormat new_heap_data;
300
+ new_heap_ptrs.ToUnifiedFormat(offset + count, new_heap_data);
301
+ const auto new_heap_locations = (data_ptr_t *)new_heap_data.data;
302
+ const auto new_heap_sel = *new_heap_data.sel;
303
+
304
+ for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
305
+ const auto &col_offset = layout.GetOffsets()[col_idx];
306
+
307
+ // Precompute mask indexes
308
+ idx_t entry_idx;
309
+ idx_t idx_in_entry;
310
+ ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
311
+
312
+ const auto &type = layout.GetTypes()[col_idx];
313
+ switch (type.InternalType()) {
314
+ case PhysicalType::VARCHAR: {
315
+ for (idx_t i = 0; i < count; i++) {
316
+ const auto idx = offset + i;
317
+ const auto &row_location = row_locations[idx] + base_col_offset;
318
+ ValidityBytes row_mask(row_location);
319
+ if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
320
+ continue;
321
+ }
322
+
323
+ const auto &old_heap_ptr = old_heap_locations[old_heap_sel.get_index(idx)];
324
+ const auto &new_heap_ptr = new_heap_locations[new_heap_sel.get_index(idx)];
325
+
326
+ const auto string_location = row_location + col_offset;
327
+ if (Load<uint32_t>(string_location) > string_t::INLINE_LENGTH) {
328
+ const auto string_ptr_location = string_location + string_t::HEADER_SIZE;
329
+ const auto string_ptr = Load<data_ptr_t>(string_ptr_location);
330
+ const auto diff = string_ptr - old_heap_ptr;
331
+ D_ASSERT(diff >= 0);
332
+ Store<data_ptr_t>(new_heap_ptr + diff, string_ptr_location);
333
+ }
334
+ }
335
+ VerifyStrings(type.id(), row_locations, col_idx, base_col_offset, col_offset, offset, count);
336
+ break;
337
+ }
338
+ case PhysicalType::LIST: {
339
+ for (idx_t i = 0; i < count; i++) {
340
+ const auto idx = offset + i;
341
+ const auto &row_location = row_locations[idx] + base_col_offset;
342
+ ValidityBytes row_mask(row_location);
343
+ if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
344
+ continue;
345
+ }
346
+
347
+ const auto &old_heap_ptr = old_heap_locations[old_heap_sel.get_index(idx)];
348
+ const auto &new_heap_ptr = new_heap_locations[new_heap_sel.get_index(idx)];
349
+
350
+ const auto &list_ptr_location = row_location + col_offset;
351
+ const auto list_ptr = Load<data_ptr_t>(list_ptr_location);
352
+ const auto diff = list_ptr - old_heap_ptr;
353
+ D_ASSERT(diff >= 0);
354
+ Store<data_ptr_t>(new_heap_ptr + diff, list_ptr_location);
355
+ }
356
+ break;
357
+ }
358
+ case PhysicalType::STRUCT: {
359
+ const auto &struct_layout = layout.GetStructLayout(col_idx);
360
+ if (!struct_layout.AllConstant()) {
361
+ RecomputeHeapPointers(old_heap_ptrs, old_heap_sel, row_locations, new_heap_ptrs, offset, count,
362
+ struct_layout, base_col_offset + col_offset);
363
+ }
364
+ break;
365
+ }
366
+ default:
367
+ continue;
368
+ }
369
+ }
370
+ }
371
+
372
+ void TupleDataAllocator::ReleaseOrStoreHandles(TupleDataPinState &pin_state, TupleDataSegment &segment,
373
+ TupleDataChunk &chunk, bool release_heap) {
374
+ D_ASSERT(this == segment.allocator.get());
375
+ ReleaseOrStoreHandlesInternal(segment, segment.pinned_row_handles, pin_state.row_handles, chunk.row_block_ids,
376
+ row_blocks, pin_state.properties);
377
+ if (!layout.AllConstant() && release_heap) {
378
+ ReleaseOrStoreHandlesInternal(segment, segment.pinned_heap_handles, pin_state.heap_handles,
379
+ chunk.heap_block_ids, heap_blocks, pin_state.properties);
380
+ }
381
+ }
382
+
383
+ void TupleDataAllocator::ReleaseOrStoreHandles(TupleDataPinState &pin_state, TupleDataSegment &segment) {
384
+ static TupleDataChunk DUMMY_CHUNK;
385
+ ReleaseOrStoreHandles(pin_state, segment, DUMMY_CHUNK, true);
386
+ }
387
+
388
+ void TupleDataAllocator::ReleaseOrStoreHandlesInternal(TupleDataSegment &segment, vector<BufferHandle> &pinned_handles,
389
+ unordered_map<uint32_t, BufferHandle> &handles,
390
+ const unordered_set<uint32_t> &block_ids,
391
+ vector<TupleDataBlock> &blocks,
392
+ TupleDataPinProperties properties) {
393
+ bool found_handle;
394
+ do {
395
+ found_handle = false;
396
+ for (auto it = handles.begin(); it != handles.end(); it++) {
397
+ const auto block_id = it->first;
398
+ if (block_ids.find(block_id) != block_ids.end()) {
399
+ // still required: do not release
400
+ continue;
401
+ }
402
+ switch (properties) {
403
+ case TupleDataPinProperties::KEEP_EVERYTHING_PINNED: {
404
+ lock_guard<mutex> guard(segment.pinned_handles_lock);
405
+ const auto block_count = block_id + 1;
406
+ if (block_count > pinned_handles.size()) {
407
+ pinned_handles.resize(block_count);
408
+ }
409
+ pinned_handles[block_id] = std::move(it->second);
410
+ break;
411
+ }
412
+ case TupleDataPinProperties::UNPIN_AFTER_DONE:
413
+ case TupleDataPinProperties::ALREADY_PINNED:
414
+ break;
415
+ case TupleDataPinProperties::DESTROY_AFTER_DONE:
416
+ blocks[block_id].handle = nullptr;
417
+ break;
418
+ default:
419
+ D_ASSERT(properties == TupleDataPinProperties::INVALID);
420
+ throw InternalException("Encountered TupleDataPinProperties::INVALID");
421
+ }
422
+ handles.erase(it);
423
+ found_handle = true;
424
+ break;
425
+ }
426
+ } while (found_handle);
427
+ }
428
+
429
+ BufferHandle &TupleDataAllocator::PinRowBlock(TupleDataPinState &pin_state, const TupleDataChunkPart &part) {
430
+ const auto &row_block_index = part.row_block_index;
431
+ auto it = pin_state.row_handles.find(row_block_index);
432
+ if (it == pin_state.row_handles.end()) {
433
+ D_ASSERT(row_block_index < row_blocks.size());
434
+ auto &row_block = row_blocks[row_block_index];
435
+ D_ASSERT(row_block.handle);
436
+ D_ASSERT(part.row_block_offset < row_block.size);
437
+ D_ASSERT(part.row_block_offset + part.count * layout.GetRowWidth() <= row_block.size);
438
+ it = pin_state.row_handles.emplace(row_block_index, buffer_manager.Pin(row_block.handle)).first;
439
+ }
440
+ return it->second;
441
+ }
442
+
443
+ BufferHandle &TupleDataAllocator::PinHeapBlock(TupleDataPinState &pin_state, const TupleDataChunkPart &part) {
444
+ const auto &heap_block_index = part.heap_block_index;
445
+ auto it = pin_state.heap_handles.find(heap_block_index);
446
+ if (it == pin_state.heap_handles.end()) {
447
+ D_ASSERT(heap_block_index < heap_blocks.size());
448
+ auto &heap_block = heap_blocks[heap_block_index];
449
+ D_ASSERT(heap_block.handle);
450
+ D_ASSERT(part.heap_block_offset < heap_block.size);
451
+ D_ASSERT(part.heap_block_offset + part.total_heap_size <= heap_block.size);
452
+ it = pin_state.heap_handles.emplace(heap_block_index, buffer_manager.Pin(heap_block.handle)).first;
453
+ }
454
+ return it->second;
455
+ }
456
+
457
+ data_ptr_t TupleDataAllocator::GetRowPointer(TupleDataPinState &pin_state, const TupleDataChunkPart &part) {
458
+ return PinRowBlock(pin_state, part).Ptr() + part.row_block_offset;
459
+ }
460
+
461
+ data_ptr_t TupleDataAllocator::GetBaseHeapPointer(TupleDataPinState &pin_state, const TupleDataChunkPart &part) {
462
+ return PinHeapBlock(pin_state, part).Ptr();
463
+ }
464
+
465
+ } // namespace duckdb