duckdb 0.7.2-dev1898.0 → 0.7.2-dev2144.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/data_chunk.cpp +13 -1
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +22 -4
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
- package/test/udf.test.ts +9 -0
@@ -0,0 +1,465 @@
|
|
1
|
+
#include "duckdb/common/types/row/tuple_data_allocator.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/types/row/tuple_data_segment.hpp"
|
4
|
+
#include "duckdb/common/types/row/tuple_data_states.hpp"
|
5
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
6
|
+
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
using ValidityBytes = TupleDataLayout::ValidityBytes;
|
10
|
+
|
11
|
+
TupleDataBlock::TupleDataBlock(BufferManager &buffer_manager, idx_t capacity_p) : capacity(capacity_p), size(0) {
|
12
|
+
buffer_manager.Allocate(capacity, false, &handle);
|
13
|
+
}
|
14
|
+
|
15
|
+
TupleDataBlock::TupleDataBlock(TupleDataBlock &&other) noexcept {
|
16
|
+
std::swap(handle, other.handle);
|
17
|
+
std::swap(capacity, other.capacity);
|
18
|
+
std::swap(size, other.size);
|
19
|
+
}
|
20
|
+
|
21
|
+
TupleDataBlock &TupleDataBlock::operator=(TupleDataBlock &&other) noexcept {
|
22
|
+
std::swap(handle, other.handle);
|
23
|
+
std::swap(capacity, other.capacity);
|
24
|
+
std::swap(size, other.size);
|
25
|
+
return *this;
|
26
|
+
}
|
27
|
+
|
28
|
+
TupleDataAllocator::TupleDataAllocator(BufferManager &buffer_manager, const TupleDataLayout &layout)
|
29
|
+
: buffer_manager(buffer_manager), layout(layout.Copy()) {
|
30
|
+
}
|
31
|
+
|
32
|
+
TupleDataAllocator::TupleDataAllocator(TupleDataAllocator &allocator)
|
33
|
+
: buffer_manager(allocator.buffer_manager), layout(allocator.layout.Copy()) {
|
34
|
+
}
|
35
|
+
|
36
|
+
Allocator &TupleDataAllocator::GetAllocator() {
|
37
|
+
return buffer_manager.GetBufferAllocator();
|
38
|
+
}
|
39
|
+
|
40
|
+
const TupleDataLayout &TupleDataAllocator::GetLayout() const {
|
41
|
+
return layout;
|
42
|
+
}
|
43
|
+
|
44
|
+
idx_t TupleDataAllocator::RowBlockCount() const {
|
45
|
+
return row_blocks.size();
|
46
|
+
}
|
47
|
+
|
48
|
+
idx_t TupleDataAllocator::HeapBlockCount() const {
|
49
|
+
return heap_blocks.size();
|
50
|
+
}
|
51
|
+
|
52
|
+
void TupleDataAllocator::Build(TupleDataSegment &segment, TupleDataPinState &pin_state,
|
53
|
+
TupleDataChunkState &chunk_state, const idx_t append_offset, const idx_t append_count) {
|
54
|
+
D_ASSERT(this == segment.allocator.get());
|
55
|
+
auto &chunks = segment.chunks;
|
56
|
+
if (!chunks.empty()) {
|
57
|
+
ReleaseOrStoreHandles(pin_state, segment, chunks.back(), true);
|
58
|
+
}
|
59
|
+
|
60
|
+
// Build the chunk parts for the incoming data
|
61
|
+
vector<pair<idx_t, idx_t>> chunk_part_indices;
|
62
|
+
idx_t offset = 0;
|
63
|
+
while (offset != append_count) {
|
64
|
+
if (chunks.empty() || chunks.back().count == STANDARD_VECTOR_SIZE) {
|
65
|
+
chunks.emplace_back();
|
66
|
+
}
|
67
|
+
auto &chunk = chunks.back();
|
68
|
+
|
69
|
+
// Build the next part
|
70
|
+
auto next = MinValue<idx_t>(append_count - offset, STANDARD_VECTOR_SIZE - chunk.count);
|
71
|
+
chunk.AddPart(BuildChunkPart(pin_state, chunk_state, append_offset + offset, next), layout);
|
72
|
+
chunk_part_indices.emplace_back(chunks.size() - 1, chunk.parts.size() - 1);
|
73
|
+
|
74
|
+
auto &chunk_part = chunk.parts.back();
|
75
|
+
next = chunk_part.count;
|
76
|
+
segment.count += next;
|
77
|
+
|
78
|
+
offset += next;
|
79
|
+
}
|
80
|
+
|
81
|
+
// Now initialize the pointers to write the data to
|
82
|
+
vector<TupleDataChunkPart *> parts;
|
83
|
+
parts.reserve(chunk_part_indices.size());
|
84
|
+
for (auto &indices : chunk_part_indices) {
|
85
|
+
parts.emplace_back(&segment.chunks[indices.first].parts[indices.second]);
|
86
|
+
}
|
87
|
+
InitializeChunkStateInternal(pin_state, chunk_state, append_offset, false, true, false, parts);
|
88
|
+
|
89
|
+
// To reduce metadata, we try to merge chunk parts where possible
|
90
|
+
// Due to the way chunk parts are constructed, only the last part of the first chunk is eligible for merging
|
91
|
+
segment.chunks[chunk_part_indices[0].first].MergeLastChunkPart(layout);
|
92
|
+
|
93
|
+
segment.Verify();
|
94
|
+
}
|
95
|
+
|
96
|
+
TupleDataChunkPart TupleDataAllocator::BuildChunkPart(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
|
97
|
+
const idx_t append_offset, const idx_t append_count) {
|
98
|
+
D_ASSERT(append_count != 0);
|
99
|
+
TupleDataChunkPart result;
|
100
|
+
|
101
|
+
// Allocate row block (if needed)
|
102
|
+
if (row_blocks.empty() || row_blocks.back().RemainingCapacity() < layout.GetRowWidth()) {
|
103
|
+
row_blocks.emplace_back(buffer_manager, (idx_t)Storage::BLOCK_SIZE);
|
104
|
+
}
|
105
|
+
result.row_block_index = row_blocks.size() - 1;
|
106
|
+
auto &row_block = row_blocks[result.row_block_index];
|
107
|
+
result.row_block_offset = row_block.size;
|
108
|
+
|
109
|
+
// Set count (might be reduced later when checking heap space)
|
110
|
+
result.count = MinValue<idx_t>(row_block.RemainingCapacity(layout.GetRowWidth()), append_count);
|
111
|
+
if (!layout.AllConstant()) {
|
112
|
+
const auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
|
113
|
+
|
114
|
+
// Compute total heap size first
|
115
|
+
idx_t total_heap_size = 0;
|
116
|
+
for (idx_t i = 0; i < result.count; i++) {
|
117
|
+
const auto &heap_size = heap_sizes[append_offset + i];
|
118
|
+
total_heap_size += heap_size;
|
119
|
+
}
|
120
|
+
|
121
|
+
if (total_heap_size == 0) {
|
122
|
+
// We don't need a heap at all
|
123
|
+
result.heap_block_index = TupleDataChunkPart::INVALID_INDEX;
|
124
|
+
result.heap_block_offset = TupleDataChunkPart::INVALID_INDEX;
|
125
|
+
result.total_heap_size = 0;
|
126
|
+
result.base_heap_ptr = nullptr;
|
127
|
+
} else {
|
128
|
+
// Allocate heap block (if needed)
|
129
|
+
if (heap_blocks.empty() || heap_blocks.back().RemainingCapacity() < heap_sizes[append_offset]) {
|
130
|
+
const auto size = MaxValue<idx_t>((idx_t)Storage::BLOCK_SIZE, heap_sizes[append_offset]);
|
131
|
+
heap_blocks.emplace_back(buffer_manager, size);
|
132
|
+
}
|
133
|
+
result.heap_block_index = heap_blocks.size() - 1;
|
134
|
+
auto &heap_block = heap_blocks[result.heap_block_index];
|
135
|
+
result.heap_block_offset = heap_block.size;
|
136
|
+
|
137
|
+
const auto heap_remaining = heap_block.RemainingCapacity();
|
138
|
+
if (total_heap_size <= heap_remaining) {
|
139
|
+
// Everything fits
|
140
|
+
result.total_heap_size = total_heap_size;
|
141
|
+
} else {
|
142
|
+
// Not everything fits - determine how many we can read next
|
143
|
+
result.total_heap_size = 0;
|
144
|
+
for (idx_t i = 0; i < result.count; i++) {
|
145
|
+
const auto &heap_size = heap_sizes[append_offset + i];
|
146
|
+
if (result.total_heap_size + heap_size > heap_remaining) {
|
147
|
+
result.count = i;
|
148
|
+
break;
|
149
|
+
}
|
150
|
+
result.total_heap_size += heap_size;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
// Mark this portion of the heap block as filled and set the pointer
|
155
|
+
heap_block.size += result.total_heap_size;
|
156
|
+
result.base_heap_ptr = GetBaseHeapPointer(pin_state, result);
|
157
|
+
}
|
158
|
+
}
|
159
|
+
D_ASSERT(result.count != 0 && result.count <= STANDARD_VECTOR_SIZE);
|
160
|
+
|
161
|
+
// Mark this portion of the row block as filled
|
162
|
+
row_block.size += result.count * layout.GetRowWidth();
|
163
|
+
|
164
|
+
return result;
|
165
|
+
}
|
166
|
+
|
167
|
+
void TupleDataAllocator::InitializeChunkState(TupleDataSegment &segment, TupleDataPinState &pin_state,
|
168
|
+
TupleDataChunkState &chunk_state, idx_t chunk_idx, bool init_heap) {
|
169
|
+
D_ASSERT(this == segment.allocator.get());
|
170
|
+
D_ASSERT(chunk_idx < segment.ChunkCount());
|
171
|
+
auto &chunk = segment.chunks[chunk_idx];
|
172
|
+
|
173
|
+
// Release or store any handles that are no longer required:
|
174
|
+
// We can't release the heap here if the current chunk's heap_block_ids is empty, because if we are iterating with
|
175
|
+
// PinProperties::DESTROY_AFTER_DONE, we might destroy a heap block that is needed by a later chunk, e.g.,
|
176
|
+
// when chunk 0 needs heap block 0, chunk 1 does not need any heap blocks, and chunk 2 needs heap block 0 again
|
177
|
+
ReleaseOrStoreHandles(pin_state, segment, chunk, !chunk.heap_block_ids.empty());
|
178
|
+
|
179
|
+
vector<TupleDataChunkPart *> parts;
|
180
|
+
parts.reserve(chunk.parts.size());
|
181
|
+
for (auto &part : chunk.parts) {
|
182
|
+
parts.emplace_back(&part);
|
183
|
+
}
|
184
|
+
|
185
|
+
InitializeChunkStateInternal(pin_state, chunk_state, 0, true, init_heap, init_heap, parts);
|
186
|
+
}
|
187
|
+
|
188
|
+
static inline void InitializeHeapSizes(const data_ptr_t row_locations[], idx_t heap_sizes[], const idx_t offset,
|
189
|
+
const idx_t next, const TupleDataChunkPart &part, const idx_t heap_size_offset) {
|
190
|
+
// Read the heap sizes from the rows
|
191
|
+
for (idx_t i = 0; i < next; i++) {
|
192
|
+
auto idx = offset + i;
|
193
|
+
heap_sizes[idx] = Load<uint32_t>(row_locations[idx] + heap_size_offset);
|
194
|
+
}
|
195
|
+
|
196
|
+
// Verify total size
|
197
|
+
#ifdef DEBUG
|
198
|
+
idx_t total_heap_size = 0;
|
199
|
+
for (idx_t i = 0; i < next; i++) {
|
200
|
+
auto idx = offset + i;
|
201
|
+
total_heap_size += heap_sizes[idx];
|
202
|
+
}
|
203
|
+
D_ASSERT(total_heap_size == part.total_heap_size);
|
204
|
+
#endif
|
205
|
+
}
|
206
|
+
|
207
|
+
void TupleDataAllocator::InitializeChunkStateInternal(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
|
208
|
+
idx_t offset, bool recompute, bool init_heap_pointers,
|
209
|
+
bool init_heap_sizes, vector<TupleDataChunkPart *> &parts) {
|
210
|
+
auto row_locations = FlatVector::GetData<data_ptr_t>(chunk_state.row_locations);
|
211
|
+
auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
|
212
|
+
auto heap_locations = FlatVector::GetData<data_ptr_t>(chunk_state.heap_locations);
|
213
|
+
|
214
|
+
for (auto &part : parts) {
|
215
|
+
const auto next = part->count;
|
216
|
+
|
217
|
+
// Set up row locations for the scan
|
218
|
+
const auto row_width = layout.GetRowWidth();
|
219
|
+
const auto base_row_ptr = GetRowPointer(pin_state, *part);
|
220
|
+
for (idx_t i = 0; i < next; i++) {
|
221
|
+
row_locations[offset + i] = base_row_ptr + i * row_width;
|
222
|
+
}
|
223
|
+
|
224
|
+
if (layout.AllConstant()) { // Can't have a heap
|
225
|
+
offset += next;
|
226
|
+
continue;
|
227
|
+
}
|
228
|
+
|
229
|
+
if (part->total_heap_size == 0) {
|
230
|
+
if (init_heap_sizes) { // No heap, but we need the heap sizes
|
231
|
+
InitializeHeapSizes(row_locations, heap_sizes, offset, next, *part, layout.GetHeapSizeOffset());
|
232
|
+
}
|
233
|
+
offset += next;
|
234
|
+
continue;
|
235
|
+
}
|
236
|
+
|
237
|
+
// Check if heap block has changed - re-compute the pointers within each row if so
|
238
|
+
if (recompute && pin_state.properties != TupleDataPinProperties::ALREADY_PINNED) {
|
239
|
+
const auto new_base_heap_ptr = GetBaseHeapPointer(pin_state, *part);
|
240
|
+
if (part->base_heap_ptr != new_base_heap_ptr) {
|
241
|
+
lock_guard<mutex> guard(part->lock);
|
242
|
+
const auto old_base_heap_ptr = part->base_heap_ptr;
|
243
|
+
if (old_base_heap_ptr != new_base_heap_ptr) {
|
244
|
+
Vector old_heap_ptrs(Value::POINTER((uintptr_t)old_base_heap_ptr + part->heap_block_offset));
|
245
|
+
Vector new_heap_ptrs(Value::POINTER((uintptr_t)new_base_heap_ptr + part->heap_block_offset));
|
246
|
+
RecomputeHeapPointers(old_heap_ptrs, *ConstantVector::ZeroSelectionVector(), row_locations,
|
247
|
+
new_heap_ptrs, offset, next, layout, 0);
|
248
|
+
part->base_heap_ptr = new_base_heap_ptr;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
}
|
252
|
+
|
253
|
+
if (init_heap_sizes) {
|
254
|
+
InitializeHeapSizes(row_locations, heap_sizes, offset, next, *part, layout.GetHeapSizeOffset());
|
255
|
+
}
|
256
|
+
|
257
|
+
if (init_heap_pointers) {
|
258
|
+
// Set the pointers where the heap data will be written (if needed)
|
259
|
+
heap_locations[offset] = part->base_heap_ptr + part->heap_block_offset;
|
260
|
+
for (idx_t i = 1; i < next; i++) {
|
261
|
+
auto idx = offset + i;
|
262
|
+
heap_locations[idx] = heap_locations[idx - 1] + heap_sizes[idx - 1];
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
266
|
+
offset += next;
|
267
|
+
}
|
268
|
+
D_ASSERT(offset <= STANDARD_VECTOR_SIZE);
|
269
|
+
}
|
270
|
+
|
271
|
+
static inline void VerifyStrings(const LogicalTypeId type_id, const data_ptr_t row_locations[], const idx_t col_idx,
|
272
|
+
const idx_t base_col_offset, const idx_t col_offset, const idx_t offset,
|
273
|
+
const idx_t count) {
|
274
|
+
#ifdef DEBUG
|
275
|
+
if (type_id != LogicalTypeId::VARCHAR) {
|
276
|
+
// Make sure we don't verify BLOB / AGGREGATE_STATE
|
277
|
+
return;
|
278
|
+
}
|
279
|
+
idx_t entry_idx;
|
280
|
+
idx_t idx_in_entry;
|
281
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
282
|
+
for (idx_t i = 0; i < count; i++) {
|
283
|
+
const auto &row_location = row_locations[offset + i] + base_col_offset;
|
284
|
+
ValidityBytes row_mask(row_location);
|
285
|
+
if (row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
|
286
|
+
auto recomputed_string = Load<string_t>(row_location + col_offset);
|
287
|
+
recomputed_string.Verify();
|
288
|
+
}
|
289
|
+
}
|
290
|
+
#endif
|
291
|
+
}
|
292
|
+
|
293
|
+
void TupleDataAllocator::RecomputeHeapPointers(Vector &old_heap_ptrs, const SelectionVector &old_heap_sel,
|
294
|
+
const data_ptr_t row_locations[], Vector &new_heap_ptrs,
|
295
|
+
const idx_t offset, const idx_t count, const TupleDataLayout &layout,
|
296
|
+
const idx_t base_col_offset) {
|
297
|
+
const auto old_heap_locations = FlatVector::GetData<data_ptr_t>(old_heap_ptrs);
|
298
|
+
|
299
|
+
UnifiedVectorFormat new_heap_data;
|
300
|
+
new_heap_ptrs.ToUnifiedFormat(offset + count, new_heap_data);
|
301
|
+
const auto new_heap_locations = (data_ptr_t *)new_heap_data.data;
|
302
|
+
const auto new_heap_sel = *new_heap_data.sel;
|
303
|
+
|
304
|
+
for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
|
305
|
+
const auto &col_offset = layout.GetOffsets()[col_idx];
|
306
|
+
|
307
|
+
// Precompute mask indexes
|
308
|
+
idx_t entry_idx;
|
309
|
+
idx_t idx_in_entry;
|
310
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
311
|
+
|
312
|
+
const auto &type = layout.GetTypes()[col_idx];
|
313
|
+
switch (type.InternalType()) {
|
314
|
+
case PhysicalType::VARCHAR: {
|
315
|
+
for (idx_t i = 0; i < count; i++) {
|
316
|
+
const auto idx = offset + i;
|
317
|
+
const auto &row_location = row_locations[idx] + base_col_offset;
|
318
|
+
ValidityBytes row_mask(row_location);
|
319
|
+
if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
|
320
|
+
continue;
|
321
|
+
}
|
322
|
+
|
323
|
+
const auto &old_heap_ptr = old_heap_locations[old_heap_sel.get_index(idx)];
|
324
|
+
const auto &new_heap_ptr = new_heap_locations[new_heap_sel.get_index(idx)];
|
325
|
+
|
326
|
+
const auto string_location = row_location + col_offset;
|
327
|
+
if (Load<uint32_t>(string_location) > string_t::INLINE_LENGTH) {
|
328
|
+
const auto string_ptr_location = string_location + string_t::HEADER_SIZE;
|
329
|
+
const auto string_ptr = Load<data_ptr_t>(string_ptr_location);
|
330
|
+
const auto diff = string_ptr - old_heap_ptr;
|
331
|
+
D_ASSERT(diff >= 0);
|
332
|
+
Store<data_ptr_t>(new_heap_ptr + diff, string_ptr_location);
|
333
|
+
}
|
334
|
+
}
|
335
|
+
VerifyStrings(type.id(), row_locations, col_idx, base_col_offset, col_offset, offset, count);
|
336
|
+
break;
|
337
|
+
}
|
338
|
+
case PhysicalType::LIST: {
|
339
|
+
for (idx_t i = 0; i < count; i++) {
|
340
|
+
const auto idx = offset + i;
|
341
|
+
const auto &row_location = row_locations[idx] + base_col_offset;
|
342
|
+
ValidityBytes row_mask(row_location);
|
343
|
+
if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
|
344
|
+
continue;
|
345
|
+
}
|
346
|
+
|
347
|
+
const auto &old_heap_ptr = old_heap_locations[old_heap_sel.get_index(idx)];
|
348
|
+
const auto &new_heap_ptr = new_heap_locations[new_heap_sel.get_index(idx)];
|
349
|
+
|
350
|
+
const auto &list_ptr_location = row_location + col_offset;
|
351
|
+
const auto list_ptr = Load<data_ptr_t>(list_ptr_location);
|
352
|
+
const auto diff = list_ptr - old_heap_ptr;
|
353
|
+
D_ASSERT(diff >= 0);
|
354
|
+
Store<data_ptr_t>(new_heap_ptr + diff, list_ptr_location);
|
355
|
+
}
|
356
|
+
break;
|
357
|
+
}
|
358
|
+
case PhysicalType::STRUCT: {
|
359
|
+
const auto &struct_layout = layout.GetStructLayout(col_idx);
|
360
|
+
if (!struct_layout.AllConstant()) {
|
361
|
+
RecomputeHeapPointers(old_heap_ptrs, old_heap_sel, row_locations, new_heap_ptrs, offset, count,
|
362
|
+
struct_layout, base_col_offset + col_offset);
|
363
|
+
}
|
364
|
+
break;
|
365
|
+
}
|
366
|
+
default:
|
367
|
+
continue;
|
368
|
+
}
|
369
|
+
}
|
370
|
+
}
|
371
|
+
|
372
|
+
void TupleDataAllocator::ReleaseOrStoreHandles(TupleDataPinState &pin_state, TupleDataSegment &segment,
|
373
|
+
TupleDataChunk &chunk, bool release_heap) {
|
374
|
+
D_ASSERT(this == segment.allocator.get());
|
375
|
+
ReleaseOrStoreHandlesInternal(segment, segment.pinned_row_handles, pin_state.row_handles, chunk.row_block_ids,
|
376
|
+
row_blocks, pin_state.properties);
|
377
|
+
if (!layout.AllConstant() && release_heap) {
|
378
|
+
ReleaseOrStoreHandlesInternal(segment, segment.pinned_heap_handles, pin_state.heap_handles,
|
379
|
+
chunk.heap_block_ids, heap_blocks, pin_state.properties);
|
380
|
+
}
|
381
|
+
}
|
382
|
+
|
383
|
+
void TupleDataAllocator::ReleaseOrStoreHandles(TupleDataPinState &pin_state, TupleDataSegment &segment) {
|
384
|
+
static TupleDataChunk DUMMY_CHUNK;
|
385
|
+
ReleaseOrStoreHandles(pin_state, segment, DUMMY_CHUNK, true);
|
386
|
+
}
|
387
|
+
|
388
|
+
void TupleDataAllocator::ReleaseOrStoreHandlesInternal(TupleDataSegment &segment, vector<BufferHandle> &pinned_handles,
|
389
|
+
unordered_map<uint32_t, BufferHandle> &handles,
|
390
|
+
const unordered_set<uint32_t> &block_ids,
|
391
|
+
vector<TupleDataBlock> &blocks,
|
392
|
+
TupleDataPinProperties properties) {
|
393
|
+
bool found_handle;
|
394
|
+
do {
|
395
|
+
found_handle = false;
|
396
|
+
for (auto it = handles.begin(); it != handles.end(); it++) {
|
397
|
+
const auto block_id = it->first;
|
398
|
+
if (block_ids.find(block_id) != block_ids.end()) {
|
399
|
+
// still required: do not release
|
400
|
+
continue;
|
401
|
+
}
|
402
|
+
switch (properties) {
|
403
|
+
case TupleDataPinProperties::KEEP_EVERYTHING_PINNED: {
|
404
|
+
lock_guard<mutex> guard(segment.pinned_handles_lock);
|
405
|
+
const auto block_count = block_id + 1;
|
406
|
+
if (block_count > pinned_handles.size()) {
|
407
|
+
pinned_handles.resize(block_count);
|
408
|
+
}
|
409
|
+
pinned_handles[block_id] = std::move(it->second);
|
410
|
+
break;
|
411
|
+
}
|
412
|
+
case TupleDataPinProperties::UNPIN_AFTER_DONE:
|
413
|
+
case TupleDataPinProperties::ALREADY_PINNED:
|
414
|
+
break;
|
415
|
+
case TupleDataPinProperties::DESTROY_AFTER_DONE:
|
416
|
+
blocks[block_id].handle = nullptr;
|
417
|
+
break;
|
418
|
+
default:
|
419
|
+
D_ASSERT(properties == TupleDataPinProperties::INVALID);
|
420
|
+
throw InternalException("Encountered TupleDataPinProperties::INVALID");
|
421
|
+
}
|
422
|
+
handles.erase(it);
|
423
|
+
found_handle = true;
|
424
|
+
break;
|
425
|
+
}
|
426
|
+
} while (found_handle);
|
427
|
+
}
|
428
|
+
|
429
|
+
BufferHandle &TupleDataAllocator::PinRowBlock(TupleDataPinState &pin_state, const TupleDataChunkPart &part) {
|
430
|
+
const auto &row_block_index = part.row_block_index;
|
431
|
+
auto it = pin_state.row_handles.find(row_block_index);
|
432
|
+
if (it == pin_state.row_handles.end()) {
|
433
|
+
D_ASSERT(row_block_index < row_blocks.size());
|
434
|
+
auto &row_block = row_blocks[row_block_index];
|
435
|
+
D_ASSERT(row_block.handle);
|
436
|
+
D_ASSERT(part.row_block_offset < row_block.size);
|
437
|
+
D_ASSERT(part.row_block_offset + part.count * layout.GetRowWidth() <= row_block.size);
|
438
|
+
it = pin_state.row_handles.emplace(row_block_index, buffer_manager.Pin(row_block.handle)).first;
|
439
|
+
}
|
440
|
+
return it->second;
|
441
|
+
}
|
442
|
+
|
443
|
+
BufferHandle &TupleDataAllocator::PinHeapBlock(TupleDataPinState &pin_state, const TupleDataChunkPart &part) {
|
444
|
+
const auto &heap_block_index = part.heap_block_index;
|
445
|
+
auto it = pin_state.heap_handles.find(heap_block_index);
|
446
|
+
if (it == pin_state.heap_handles.end()) {
|
447
|
+
D_ASSERT(heap_block_index < heap_blocks.size());
|
448
|
+
auto &heap_block = heap_blocks[heap_block_index];
|
449
|
+
D_ASSERT(heap_block.handle);
|
450
|
+
D_ASSERT(part.heap_block_offset < heap_block.size);
|
451
|
+
D_ASSERT(part.heap_block_offset + part.total_heap_size <= heap_block.size);
|
452
|
+
it = pin_state.heap_handles.emplace(heap_block_index, buffer_manager.Pin(heap_block.handle)).first;
|
453
|
+
}
|
454
|
+
return it->second;
|
455
|
+
}
|
456
|
+
|
457
|
+
data_ptr_t TupleDataAllocator::GetRowPointer(TupleDataPinState &pin_state, const TupleDataChunkPart &part) {
|
458
|
+
return PinRowBlock(pin_state, part).Ptr() + part.row_block_offset;
|
459
|
+
}
|
460
|
+
|
461
|
+
data_ptr_t TupleDataAllocator::GetBaseHeapPointer(TupleDataPinState &pin_state, const TupleDataChunkPart &part) {
|
462
|
+
return PinHeapBlock(pin_state, part).Ptr();
|
463
|
+
}
|
464
|
+
|
465
|
+
} // namespace duckdb
|