duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/read_csv.cpp +124 -58
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/main/settings/settings.cpp +3 -4
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -0,0 +1,511 @@
|
|
1
|
+
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/fast_mem.hpp"
|
4
|
+
#include "duckdb/common/printer.hpp"
|
5
|
+
#include "duckdb/common/row_operations/row_operations.hpp"
|
6
|
+
#include "duckdb/common/types/row/tuple_data_allocator.hpp"
|
7
|
+
|
8
|
+
#include <algorithm>
|
9
|
+
|
10
|
+
namespace duckdb {
|
11
|
+
|
12
|
+
using ValidityBytes = TupleDataLayout::ValidityBytes;
|
13
|
+
|
14
|
+
TupleDataCollection::TupleDataCollection(BufferManager &buffer_manager, const TupleDataLayout &layout_p)
|
15
|
+
: layout(layout_p.Copy()), allocator(make_shared<TupleDataAllocator>(buffer_manager, layout)) {
|
16
|
+
Initialize();
|
17
|
+
}
|
18
|
+
|
19
|
+
TupleDataCollection::TupleDataCollection(shared_ptr<TupleDataAllocator> allocator)
|
20
|
+
: layout(allocator->GetLayout().Copy()), allocator(std::move(allocator)) {
|
21
|
+
Initialize();
|
22
|
+
}
|
23
|
+
|
24
|
+
TupleDataCollection::~TupleDataCollection() {
|
25
|
+
}
|
26
|
+
|
27
|
+
void TupleDataCollection::Initialize() {
|
28
|
+
D_ASSERT(!layout.GetTypes().empty());
|
29
|
+
this->count = 0;
|
30
|
+
scatter_functions.reserve(layout.ColumnCount());
|
31
|
+
gather_functions.reserve(layout.ColumnCount());
|
32
|
+
for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
|
33
|
+
auto &type = layout.GetTypes()[col_idx];
|
34
|
+
scatter_functions.emplace_back(GetScatterFunction(type));
|
35
|
+
gather_functions.emplace_back(GetGatherFunction(type));
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
void TupleDataCollection::GetAllColumnIDs(vector<column_t> &column_ids) {
|
40
|
+
column_ids.reserve(layout.ColumnCount());
|
41
|
+
for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
|
42
|
+
column_ids.emplace_back(col_idx);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
const TupleDataLayout &TupleDataCollection::GetLayout() const {
|
47
|
+
return layout;
|
48
|
+
}
|
49
|
+
|
50
|
+
const idx_t &TupleDataCollection::Count() const {
|
51
|
+
return count;
|
52
|
+
}
|
53
|
+
|
54
|
+
idx_t TupleDataCollection::ChunkCount() const {
|
55
|
+
idx_t total_chunk_count = 0;
|
56
|
+
for (const auto &segment : segments) {
|
57
|
+
total_chunk_count += segment.ChunkCount();
|
58
|
+
}
|
59
|
+
return total_chunk_count;
|
60
|
+
}
|
61
|
+
|
62
|
+
idx_t TupleDataCollection::SizeInBytes() const {
|
63
|
+
idx_t total_size = 0;
|
64
|
+
for (const auto &segment : segments) {
|
65
|
+
total_size += segment.SizeInBytes();
|
66
|
+
}
|
67
|
+
return total_size;
|
68
|
+
}
|
69
|
+
|
70
|
+
void TupleDataCollection::GetBlockPointers(vector<data_ptr_t> &block_pointers) const {
|
71
|
+
D_ASSERT(segments.size() == 1);
|
72
|
+
const auto &segment = segments[0];
|
73
|
+
const auto block_count = segment.allocator->RowBlockCount();
|
74
|
+
D_ASSERT(segment.pinned_row_handles.size() == block_count);
|
75
|
+
block_pointers.resize(block_count);
|
76
|
+
for (idx_t block_idx = 0; block_idx < block_count; block_idx++) {
|
77
|
+
block_pointers[block_idx] = segment.pinned_row_handles[block_idx].Ptr();
|
78
|
+
}
|
79
|
+
}
|
80
|
+
|
81
|
+
void TupleDataCollection::Unpin() {
|
82
|
+
for (auto &segment : segments) {
|
83
|
+
segment.Unpin();
|
84
|
+
}
|
85
|
+
}
|
86
|
+
|
87
|
+
void VerifyAppendColumns(const TupleDataLayout &layout, const vector<column_t> &column_ids) {
|
88
|
+
#ifdef DEBUG
|
89
|
+
for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
|
90
|
+
if (std::find(column_ids.begin(), column_ids.end(), col_idx) != column_ids.end()) {
|
91
|
+
continue;
|
92
|
+
}
|
93
|
+
// This column will not be appended in the first go - verify that it is fixed-size - we cannot resize heap after
|
94
|
+
const auto physical_type = layout.GetTypes()[col_idx].InternalType();
|
95
|
+
D_ASSERT(physical_type != PhysicalType::VARCHAR && physical_type != PhysicalType::LIST);
|
96
|
+
if (physical_type == PhysicalType::STRUCT) {
|
97
|
+
const auto &struct_layout = layout.GetStructLayout(col_idx);
|
98
|
+
vector<column_t> struct_column_ids;
|
99
|
+
struct_column_ids.reserve(struct_layout.ColumnCount());
|
100
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < struct_layout.ColumnCount(); struct_col_idx++) {
|
101
|
+
struct_column_ids.emplace_back(struct_col_idx);
|
102
|
+
}
|
103
|
+
VerifyAppendColumns(struct_layout, struct_column_ids);
|
104
|
+
}
|
105
|
+
}
|
106
|
+
#endif
|
107
|
+
}
|
108
|
+
|
109
|
+
void TupleDataCollection::InitializeAppend(TupleDataAppendState &append_state, TupleDataPinProperties properties) {
|
110
|
+
vector<column_t> column_ids;
|
111
|
+
GetAllColumnIDs(column_ids);
|
112
|
+
InitializeAppend(append_state, std::move(column_ids), properties);
|
113
|
+
}
|
114
|
+
|
115
|
+
void TupleDataCollection::InitializeAppend(TupleDataAppendState &append_state, vector<column_t> column_ids,
|
116
|
+
TupleDataPinProperties properties) {
|
117
|
+
VerifyAppendColumns(layout, column_ids);
|
118
|
+
InitializeAppend(append_state.pin_state, properties);
|
119
|
+
InitializeAppend(append_state.chunk_state, std::move(column_ids));
|
120
|
+
}
|
121
|
+
|
122
|
+
void TupleDataCollection::InitializeAppend(TupleDataPinState &pin_state, TupleDataPinProperties properties) {
|
123
|
+
pin_state.properties = properties;
|
124
|
+
if (segments.empty()) {
|
125
|
+
segments.emplace_back(allocator);
|
126
|
+
}
|
127
|
+
}
|
128
|
+
|
129
|
+
static void InitializeVectorFormat(vector<TupleDataVectorFormat> &vector_data, const vector<LogicalType> &types) {
|
130
|
+
vector_data.resize(types.size());
|
131
|
+
for (idx_t col_idx = 0; col_idx < types.size(); col_idx++) {
|
132
|
+
const auto &type = types[col_idx];
|
133
|
+
switch (type.InternalType()) {
|
134
|
+
case PhysicalType::STRUCT: {
|
135
|
+
const auto &child_list = StructType::GetChildTypes(type);
|
136
|
+
vector<LogicalType> child_types;
|
137
|
+
child_types.reserve(child_list.size());
|
138
|
+
for (const auto &child_entry : child_list) {
|
139
|
+
child_types.emplace_back(child_entry.second);
|
140
|
+
}
|
141
|
+
InitializeVectorFormat(vector_data[col_idx].child_formats, child_types);
|
142
|
+
break;
|
143
|
+
}
|
144
|
+
case PhysicalType::LIST:
|
145
|
+
InitializeVectorFormat(vector_data[col_idx].child_formats, {ListType::GetChildType(type)});
|
146
|
+
break;
|
147
|
+
default:
|
148
|
+
break;
|
149
|
+
}
|
150
|
+
}
|
151
|
+
}
|
152
|
+
|
153
|
+
void TupleDataCollection::InitializeAppend(TupleDataChunkState &chunk_state, vector<column_t> column_ids) {
|
154
|
+
if (column_ids.empty()) {
|
155
|
+
GetAllColumnIDs(column_ids);
|
156
|
+
}
|
157
|
+
InitializeVectorFormat(chunk_state.vector_data, layout.GetTypes());
|
158
|
+
chunk_state.column_ids = std::move(column_ids);
|
159
|
+
}
|
160
|
+
|
161
|
+
void TupleDataCollection::Append(DataChunk &new_chunk, const SelectionVector &append_sel, idx_t append_count) {
|
162
|
+
TupleDataAppendState append_state;
|
163
|
+
InitializeAppend(append_state);
|
164
|
+
Append(append_state, new_chunk, append_sel, append_count);
|
165
|
+
}
|
166
|
+
|
167
|
+
void TupleDataCollection::Append(DataChunk &new_chunk, vector<column_t> column_ids, const SelectionVector &append_sel,
|
168
|
+
const idx_t append_count) {
|
169
|
+
TupleDataAppendState append_state;
|
170
|
+
InitializeAppend(append_state, std::move(column_ids));
|
171
|
+
Append(append_state, new_chunk, append_sel, append_count);
|
172
|
+
}
|
173
|
+
|
174
|
+
void TupleDataCollection::Append(TupleDataAppendState &append_state, DataChunk &new_chunk,
|
175
|
+
const SelectionVector &append_sel, const idx_t append_count) {
|
176
|
+
Append(append_state.pin_state, append_state.chunk_state, new_chunk, append_sel, append_count);
|
177
|
+
}
|
178
|
+
|
179
|
+
void TupleDataCollection::Append(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, DataChunk &new_chunk,
|
180
|
+
const SelectionVector &append_sel, const idx_t append_count) {
|
181
|
+
TupleDataCollection::ToUnifiedFormat(chunk_state, new_chunk);
|
182
|
+
AppendUnified(pin_state, chunk_state, new_chunk, append_sel, append_count);
|
183
|
+
}
|
184
|
+
|
185
|
+
void TupleDataCollection::AppendUnified(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
|
186
|
+
DataChunk &new_chunk, const SelectionVector &append_sel,
|
187
|
+
const idx_t append_count) {
|
188
|
+
const idx_t actual_append_count = append_count == DConstants::INVALID_INDEX ? new_chunk.size() : append_count;
|
189
|
+
if (actual_append_count == 0) {
|
190
|
+
return;
|
191
|
+
}
|
192
|
+
|
193
|
+
if (!layout.AllConstant()) {
|
194
|
+
TupleDataCollection::ComputeHeapSizes(chunk_state, new_chunk, append_sel, actual_append_count);
|
195
|
+
}
|
196
|
+
|
197
|
+
Build(pin_state, chunk_state, 0, actual_append_count);
|
198
|
+
|
199
|
+
#ifdef DEBUG
|
200
|
+
Vector heap_locations_copy(LogicalType::POINTER);
|
201
|
+
if (!layout.AllConstant()) {
|
202
|
+
VectorOperations::Copy(chunk_state.heap_locations, heap_locations_copy, actual_append_count, 0, 0);
|
203
|
+
}
|
204
|
+
#endif
|
205
|
+
|
206
|
+
Scatter(chunk_state, new_chunk, append_sel, actual_append_count);
|
207
|
+
|
208
|
+
#ifdef DEBUG
|
209
|
+
// Verify that the size of the data written to the heap is the same as the size we computed it would be
|
210
|
+
if (!layout.AllConstant()) {
|
211
|
+
const auto original_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations_copy);
|
212
|
+
const auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
|
213
|
+
const auto offset_heap_locations = FlatVector::GetData<data_ptr_t>(chunk_state.heap_locations);
|
214
|
+
for (idx_t i = 0; i < actual_append_count; i++) {
|
215
|
+
D_ASSERT(offset_heap_locations[i] == original_heap_locations[i] + heap_sizes[i]);
|
216
|
+
}
|
217
|
+
}
|
218
|
+
#endif
|
219
|
+
}
|
220
|
+
|
221
|
+
static inline void ToUnifiedFormatInternal(TupleDataVectorFormat &format, Vector &vector, const idx_t count) {
|
222
|
+
vector.ToUnifiedFormat(count, format.data);
|
223
|
+
switch (vector.GetType().InternalType()) {
|
224
|
+
case PhysicalType::STRUCT: {
|
225
|
+
auto &entries = StructVector::GetEntries(vector);
|
226
|
+
D_ASSERT(format.child_formats.size() == entries.size());
|
227
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < entries.size(); struct_col_idx++) {
|
228
|
+
ToUnifiedFormatInternal(format.child_formats[struct_col_idx], *entries[struct_col_idx], count);
|
229
|
+
}
|
230
|
+
break;
|
231
|
+
}
|
232
|
+
case PhysicalType::LIST:
|
233
|
+
D_ASSERT(format.child_formats.size() == 1);
|
234
|
+
ToUnifiedFormatInternal(format.child_formats[0], ListVector::GetEntry(vector), ListVector::GetListSize(vector));
|
235
|
+
break;
|
236
|
+
default:
|
237
|
+
break;
|
238
|
+
}
|
239
|
+
}
|
240
|
+
|
241
|
+
void TupleDataCollection::ToUnifiedFormat(TupleDataChunkState &chunk_state, DataChunk &new_chunk) {
|
242
|
+
D_ASSERT(chunk_state.vector_data.size() >= chunk_state.column_ids.size()); // Needs InitializeAppend
|
243
|
+
for (const auto &col_idx : chunk_state.column_ids) {
|
244
|
+
ToUnifiedFormatInternal(chunk_state.vector_data[col_idx], new_chunk.data[col_idx], new_chunk.size());
|
245
|
+
}
|
246
|
+
}
|
247
|
+
|
248
|
+
void TupleDataCollection::GetVectorData(const TupleDataChunkState &chunk_state, UnifiedVectorFormat result[]) {
|
249
|
+
const auto &vector_data = chunk_state.vector_data;
|
250
|
+
for (idx_t i = 0; i < vector_data.size(); i++) {
|
251
|
+
const auto &source = vector_data[i].data;
|
252
|
+
auto &target = result[i];
|
253
|
+
target.sel = source.sel;
|
254
|
+
target.data = source.data;
|
255
|
+
target.validity = source.validity;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
259
|
+
void TupleDataCollection::Build(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
|
260
|
+
const idx_t append_offset, const idx_t append_count) {
|
261
|
+
segments.back().allocator->Build(segments.back(), pin_state, chunk_state, append_offset, append_count);
|
262
|
+
count += append_count;
|
263
|
+
Verify();
|
264
|
+
}
|
265
|
+
|
266
|
+
void VerifyHeapSizes(const data_ptr_t source_locations[], const idx_t heap_sizes[], const SelectionVector &append_sel,
|
267
|
+
const idx_t append_count, const idx_t heap_size_offset) {
|
268
|
+
#ifdef DEBUG
|
269
|
+
for (idx_t i = 0; i < append_count; i++) {
|
270
|
+
auto idx = append_sel.get_index(i);
|
271
|
+
const auto stored_heap_size = Load<uint32_t>(source_locations[idx] + heap_size_offset);
|
272
|
+
D_ASSERT(stored_heap_size == heap_sizes[idx]);
|
273
|
+
}
|
274
|
+
#endif
|
275
|
+
}
|
276
|
+
|
277
|
+
void TupleDataCollection::CopyRows(TupleDataChunkState &chunk_state, TupleDataChunkState &input,
|
278
|
+
const SelectionVector &append_sel, const idx_t append_count) const {
|
279
|
+
const auto source_locations = FlatVector::GetData<data_ptr_t>(input.row_locations);
|
280
|
+
const auto target_locations = FlatVector::GetData<data_ptr_t>(chunk_state.row_locations);
|
281
|
+
|
282
|
+
// Copy rows
|
283
|
+
const auto row_width = layout.GetRowWidth();
|
284
|
+
for (idx_t i = 0; i < append_count; i++) {
|
285
|
+
auto idx = append_sel.get_index(i);
|
286
|
+
FastMemcpy(target_locations[i], source_locations[idx], row_width);
|
287
|
+
}
|
288
|
+
|
289
|
+
// Copy heap if we need to
|
290
|
+
if (!layout.AllConstant()) {
|
291
|
+
const auto source_heap_locations = FlatVector::GetData<data_ptr_t>(input.heap_locations);
|
292
|
+
const auto target_heap_locations = FlatVector::GetData<data_ptr_t>(chunk_state.heap_locations);
|
293
|
+
const auto heap_sizes = FlatVector::GetData<idx_t>(input.heap_sizes);
|
294
|
+
VerifyHeapSizes(source_locations, heap_sizes, append_sel, append_count, layout.GetHeapSizeOffset());
|
295
|
+
|
296
|
+
// Check if we need to copy anything at all
|
297
|
+
idx_t total_heap_size = 0;
|
298
|
+
for (idx_t i = 0; i < append_count; i++) {
|
299
|
+
auto idx = append_sel.get_index(i);
|
300
|
+
total_heap_size += heap_sizes[idx];
|
301
|
+
}
|
302
|
+
if (total_heap_size == 0) {
|
303
|
+
return;
|
304
|
+
}
|
305
|
+
|
306
|
+
// Copy heap
|
307
|
+
for (idx_t i = 0; i < append_count; i++) {
|
308
|
+
auto idx = append_sel.get_index(i);
|
309
|
+
FastMemcpy(target_heap_locations[i], source_heap_locations[idx], heap_sizes[idx]);
|
310
|
+
}
|
311
|
+
|
312
|
+
// Recompute pointers after copying the data
|
313
|
+
TupleDataAllocator::RecomputeHeapPointers(input.heap_locations, append_sel, target_locations,
|
314
|
+
chunk_state.heap_locations, 0, append_count, layout, 0);
|
315
|
+
}
|
316
|
+
}
|
317
|
+
|
318
|
+
void TupleDataCollection::Combine(TupleDataCollection &other) {
|
319
|
+
if (other.count == 0) {
|
320
|
+
return;
|
321
|
+
}
|
322
|
+
if (this->layout.GetTypes() != other.GetLayout().GetTypes()) {
|
323
|
+
throw InternalException("Attempting to combine TupleDataCollection with mismatching types");
|
324
|
+
}
|
325
|
+
this->count += other.count;
|
326
|
+
this->segments.reserve(this->segments.size() + other.segments.size());
|
327
|
+
for (auto &other_seg : other.segments) {
|
328
|
+
this->segments.emplace_back(std::move(other_seg));
|
329
|
+
}
|
330
|
+
other.Reset();
|
331
|
+
Verify();
|
332
|
+
}
|
333
|
+
|
334
|
+
void TupleDataCollection::Combine(unique_ptr<TupleDataCollection> other) {
|
335
|
+
Combine(*other);
|
336
|
+
}
|
337
|
+
|
338
|
+
void TupleDataCollection::Reset() {
|
339
|
+
count = 0;
|
340
|
+
segments.clear();
|
341
|
+
|
342
|
+
// Refreshes the TupleDataAllocator to prevent holding on to allocated data unnecessarily
|
343
|
+
allocator = make_shared<TupleDataAllocator>(*allocator);
|
344
|
+
}
|
345
|
+
|
346
|
+
void TupleDataCollection::InitializeChunk(DataChunk &chunk) const {
|
347
|
+
chunk.Initialize(allocator->GetAllocator(), layout.GetTypes());
|
348
|
+
}
|
349
|
+
|
350
|
+
void TupleDataCollection::InitializeScanChunk(TupleDataScanState &state, DataChunk &chunk) const {
|
351
|
+
auto &column_ids = state.chunk_state.column_ids;
|
352
|
+
D_ASSERT(!column_ids.empty());
|
353
|
+
vector<LogicalType> chunk_types;
|
354
|
+
chunk_types.reserve(column_ids.size());
|
355
|
+
for (idx_t i = 0; i < column_ids.size(); i++) {
|
356
|
+
auto column_idx = column_ids[i];
|
357
|
+
D_ASSERT(column_idx < layout.ColumnCount());
|
358
|
+
chunk_types.push_back(layout.GetTypes()[column_idx]);
|
359
|
+
}
|
360
|
+
chunk.Initialize(allocator->GetAllocator(), chunk_types);
|
361
|
+
}
|
362
|
+
|
363
|
+
void TupleDataCollection::InitializeScan(TupleDataScanState &state, TupleDataPinProperties properties) const {
|
364
|
+
vector<column_t> column_ids;
|
365
|
+
column_ids.reserve(layout.ColumnCount());
|
366
|
+
for (idx_t i = 0; i < layout.ColumnCount(); i++) {
|
367
|
+
column_ids.push_back(i);
|
368
|
+
}
|
369
|
+
InitializeScan(state, std::move(column_ids), properties);
|
370
|
+
}
|
371
|
+
|
372
|
+
void TupleDataCollection::InitializeScan(TupleDataScanState &state, vector<column_t> column_ids,
|
373
|
+
TupleDataPinProperties properties) const {
|
374
|
+
state.pin_state.row_handles.clear();
|
375
|
+
state.pin_state.heap_handles.clear();
|
376
|
+
state.pin_state.properties = properties;
|
377
|
+
state.segment_index = 0;
|
378
|
+
state.chunk_index = 0;
|
379
|
+
state.chunk_state.column_ids = std::move(column_ids);
|
380
|
+
}
|
381
|
+
|
382
|
+
void TupleDataCollection::InitializeScan(TupleDataParallelScanState &gstate, TupleDataPinProperties properties) const {
|
383
|
+
InitializeScan(gstate.scan_state, properties);
|
384
|
+
}
|
385
|
+
|
386
|
+
void TupleDataCollection::InitializeScan(TupleDataParallelScanState &state, vector<column_t> column_ids,
|
387
|
+
TupleDataPinProperties properties) const {
|
388
|
+
InitializeScan(state.scan_state, std::move(column_ids), properties);
|
389
|
+
}
|
390
|
+
|
391
|
+
bool TupleDataCollection::Scan(TupleDataScanState &state, DataChunk &result) {
|
392
|
+
const auto segment_index_before = state.segment_index;
|
393
|
+
idx_t segment_index;
|
394
|
+
idx_t chunk_index;
|
395
|
+
if (!NextScanIndex(state, segment_index, chunk_index)) {
|
396
|
+
return false;
|
397
|
+
}
|
398
|
+
if (segment_index_before != DConstants::INVALID_INDEX && segment_index != segment_index_before) {
|
399
|
+
FinalizePinState(state.pin_state, segments[segment_index_before]);
|
400
|
+
}
|
401
|
+
ScanAtIndex(state.pin_state, state.chunk_state, state.chunk_state.column_ids, segment_index, chunk_index, result);
|
402
|
+
return true;
|
403
|
+
}
|
404
|
+
|
405
|
+
bool TupleDataCollection::Scan(TupleDataParallelScanState &gstate, TupleDataLocalScanState &lstate, DataChunk &result) {
|
406
|
+
lstate.pin_state.properties = gstate.scan_state.pin_state.properties;
|
407
|
+
|
408
|
+
const auto segment_index_before = lstate.segment_index;
|
409
|
+
idx_t segment_index;
|
410
|
+
idx_t chunk_index;
|
411
|
+
{
|
412
|
+
lock_guard<mutex> guard(gstate.lock);
|
413
|
+
if (!NextScanIndex(gstate.scan_state, segment_index, chunk_index)) {
|
414
|
+
return false;
|
415
|
+
}
|
416
|
+
}
|
417
|
+
if (segment_index_before != DConstants::INVALID_INDEX && segment_index_before != segment_index) {
|
418
|
+
FinalizePinState(lstate.pin_state, segments[lstate.segment_index]);
|
419
|
+
lstate.segment_index = segment_index;
|
420
|
+
}
|
421
|
+
ScanAtIndex(lstate.pin_state, lstate.chunk_state, gstate.scan_state.chunk_state.column_ids, segment_index,
|
422
|
+
chunk_index, result);
|
423
|
+
return true;
|
424
|
+
}
|
425
|
+
|
426
|
+
void TupleDataCollection::FinalizePinState(TupleDataPinState &pin_state, TupleDataSegment &segment) {
|
427
|
+
segment.allocator->ReleaseOrStoreHandles(pin_state, segment);
|
428
|
+
}
|
429
|
+
|
430
|
+
void TupleDataCollection::FinalizePinState(TupleDataPinState &pin_state) {
|
431
|
+
D_ASSERT(segments.size() == 1);
|
432
|
+
allocator->ReleaseOrStoreHandles(pin_state, segments.back());
|
433
|
+
}
|
434
|
+
|
435
|
+
bool TupleDataCollection::NextScanIndex(TupleDataScanState &state, idx_t &segment_index, idx_t &chunk_index) {
|
436
|
+
// Check if we still have segments to scan
|
437
|
+
if (state.segment_index >= segments.size()) {
|
438
|
+
// No more data left in the scan
|
439
|
+
return false;
|
440
|
+
}
|
441
|
+
// Check within the current segment if we still have chunks to scan
|
442
|
+
while (state.chunk_index >= segments[state.segment_index].ChunkCount()) {
|
443
|
+
// Exhausted all chunks for this segment: Move to the next one
|
444
|
+
state.segment_index++;
|
445
|
+
state.chunk_index = 0;
|
446
|
+
if (state.segment_index >= segments.size()) {
|
447
|
+
return false;
|
448
|
+
}
|
449
|
+
}
|
450
|
+
segment_index = state.segment_index;
|
451
|
+
chunk_index = state.chunk_index++;
|
452
|
+
return true;
|
453
|
+
}
|
454
|
+
|
455
|
+
void TupleDataCollection::ScanAtIndex(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
|
456
|
+
const vector<column_t> &column_ids, idx_t segment_index, idx_t chunk_index,
|
457
|
+
DataChunk &result) {
|
458
|
+
auto &segment = segments[segment_index];
|
459
|
+
auto &chunk = segment.chunks[chunk_index];
|
460
|
+
segment.allocator->InitializeChunkState(segment, pin_state, chunk_state, chunk_index, false);
|
461
|
+
result.Reset();
|
462
|
+
Gather(chunk_state.row_locations, *FlatVector::IncrementalSelectionVector(), chunk.count, column_ids, result,
|
463
|
+
*FlatVector::IncrementalSelectionVector());
|
464
|
+
result.SetCardinality(chunk.count);
|
465
|
+
}
|
466
|
+
|
467
|
+
string TupleDataCollection::ToString() {
|
468
|
+
DataChunk chunk;
|
469
|
+
InitializeChunk(chunk);
|
470
|
+
|
471
|
+
TupleDataScanState scan_state;
|
472
|
+
InitializeScan(scan_state);
|
473
|
+
|
474
|
+
string result = StringUtil::Format("TupleDataCollection - [%llu Chunks, %llu Rows]\n", ChunkCount(), Count());
|
475
|
+
idx_t chunk_idx = 0;
|
476
|
+
idx_t row_count = 0;
|
477
|
+
while (Scan(scan_state, chunk)) {
|
478
|
+
result +=
|
479
|
+
StringUtil::Format("Chunk %llu - [Rows %llu - %llu]\n", chunk_idx, row_count, row_count + chunk.size()) +
|
480
|
+
chunk.ToString();
|
481
|
+
chunk_idx++;
|
482
|
+
row_count += chunk.size();
|
483
|
+
}
|
484
|
+
|
485
|
+
return result;
|
486
|
+
}
|
487
|
+
|
488
|
+
void TupleDataCollection::Print() {
|
489
|
+
Printer::Print(ToString());
|
490
|
+
}
|
491
|
+
|
492
|
+
void TupleDataCollection::Verify() const {
|
493
|
+
#ifdef DEBUG
|
494
|
+
idx_t total_segment_count = 0;
|
495
|
+
for (const auto &segment : segments) {
|
496
|
+
segment.Verify();
|
497
|
+
total_segment_count += segment.count;
|
498
|
+
}
|
499
|
+
D_ASSERT(total_segment_count == this->count);
|
500
|
+
#endif
|
501
|
+
}
|
502
|
+
|
503
|
+
void TupleDataCollection::VerifyEverythingPinned() const {
|
504
|
+
#ifdef DEBUG
|
505
|
+
for (const auto &segment : segments) {
|
506
|
+
segment.VerifyEverythingPinned();
|
507
|
+
}
|
508
|
+
#endif
|
509
|
+
}
|
510
|
+
|
511
|
+
} // namespace duckdb
|
@@ -0,0 +1,96 @@
|
|
1
|
+
#include "duckdb/common/types/row/tuple_data_iterator.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/types/row/tuple_data_allocator.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
TupleDataChunkIterator::TupleDataChunkIterator(TupleDataCollection &collection_p, TupleDataPinProperties properties_p,
|
8
|
+
bool init_heap)
|
9
|
+
: TupleDataChunkIterator(collection_p, properties_p, 0, collection_p.ChunkCount(), init_heap) {
|
10
|
+
}
|
11
|
+
|
12
|
+
TupleDataChunkIterator::TupleDataChunkIterator(TupleDataCollection &collection_p, TupleDataPinProperties properties,
|
13
|
+
idx_t chunk_idx_from, idx_t chunk_idx_to, bool init_heap_p)
|
14
|
+
: collection(collection_p), init_heap(init_heap_p) {
|
15
|
+
state.pin_state.properties = properties;
|
16
|
+
D_ASSERT(chunk_idx_from < chunk_idx_to);
|
17
|
+
D_ASSERT(chunk_idx_to <= collection.ChunkCount());
|
18
|
+
idx_t overall_chunk_index = 0;
|
19
|
+
for (idx_t segment_idx = 0; segment_idx < collection.segments.size(); segment_idx++) {
|
20
|
+
const auto &segment = collection.segments[segment_idx];
|
21
|
+
if (chunk_idx_from >= overall_chunk_index && chunk_idx_from <= overall_chunk_index + segment.ChunkCount()) {
|
22
|
+
// We start in this segment
|
23
|
+
start_segment_idx = segment_idx;
|
24
|
+
start_chunk_idx = chunk_idx_from - overall_chunk_index;
|
25
|
+
}
|
26
|
+
if (chunk_idx_to >= overall_chunk_index && chunk_idx_to <= overall_chunk_index + segment.ChunkCount()) {
|
27
|
+
// We end in this segment
|
28
|
+
end_segment_idx = segment_idx;
|
29
|
+
end_chunk_idx = chunk_idx_to - overall_chunk_index;
|
30
|
+
}
|
31
|
+
overall_chunk_index += segment.ChunkCount();
|
32
|
+
}
|
33
|
+
|
34
|
+
Reset();
|
35
|
+
}
|
36
|
+
|
37
|
+
void TupleDataChunkIterator::InitializeCurrentChunk() {
|
38
|
+
auto &segment = collection.segments[current_segment_idx];
|
39
|
+
segment.allocator->InitializeChunkState(segment, state.pin_state, state.chunk_state, current_chunk_idx, init_heap);
|
40
|
+
}
|
41
|
+
|
42
|
+
bool TupleDataChunkIterator::Done() const {
|
43
|
+
return current_segment_idx == end_segment_idx && current_chunk_idx == end_chunk_idx;
|
44
|
+
}
|
45
|
+
|
46
|
+
bool TupleDataChunkIterator::Next() {
|
47
|
+
D_ASSERT(!Done()); // Check if called after already done
|
48
|
+
|
49
|
+
// Set the next indices and checks if we're at the end of the collection
|
50
|
+
// NextScanIndex can go past this iterators 'end', so we have to check the indices again
|
51
|
+
const auto segment_idx_before = current_segment_idx;
|
52
|
+
if (!collection.NextScanIndex(state, current_segment_idx, current_chunk_idx) || Done()) {
|
53
|
+
// Drop pins / stores them if TupleDataPinProperties::KEEP_EVERYTHING_PINNED
|
54
|
+
collection.FinalizePinState(state.pin_state, collection.segments[segment_idx_before]);
|
55
|
+
current_segment_idx = end_segment_idx;
|
56
|
+
current_chunk_idx = end_chunk_idx;
|
57
|
+
return false;
|
58
|
+
}
|
59
|
+
|
60
|
+
// Finalize pin state when moving from one segment to the next
|
61
|
+
if (current_segment_idx != segment_idx_before) {
|
62
|
+
collection.FinalizePinState(state.pin_state, collection.segments[segment_idx_before]);
|
63
|
+
}
|
64
|
+
|
65
|
+
InitializeCurrentChunk();
|
66
|
+
return true;
|
67
|
+
}
|
68
|
+
|
69
|
+
void TupleDataChunkIterator::Reset() {
|
70
|
+
state.segment_index = start_segment_idx;
|
71
|
+
state.chunk_index = start_chunk_idx;
|
72
|
+
collection.NextScanIndex(state, current_segment_idx, current_chunk_idx);
|
73
|
+
InitializeCurrentChunk();
|
74
|
+
}
|
75
|
+
|
76
|
+
idx_t TupleDataChunkIterator::GetCurrentChunkCount() const {
|
77
|
+
return collection.segments[current_segment_idx].chunks[current_chunk_idx].count;
|
78
|
+
}
|
79
|
+
|
80
|
+
TupleDataChunkState &TupleDataChunkIterator::GetChunkState() {
|
81
|
+
return state.chunk_state;
|
82
|
+
}
|
83
|
+
|
84
|
+
data_ptr_t *TupleDataChunkIterator::GetRowLocations() {
|
85
|
+
return FlatVector::GetData<data_ptr_t>(state.chunk_state.row_locations);
|
86
|
+
}
|
87
|
+
|
88
|
+
data_ptr_t *TupleDataChunkIterator::GetHeapLocations() {
|
89
|
+
return FlatVector::GetData<data_ptr_t>(state.chunk_state.heap_locations);
|
90
|
+
}
|
91
|
+
|
92
|
+
idx_t *TupleDataChunkIterator::GetHeapSizes() {
|
93
|
+
return FlatVector::GetData<idx_t>(state.chunk_state.heap_sizes);
|
94
|
+
}
|
95
|
+
|
96
|
+
} // namespace duckdb
|