duckdb 0.7.2-dev1898.0 → 0.7.2-dev2144.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/data_chunk.cpp +13 -1
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +22 -4
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
- package/test/udf.test.ts +9 -0
@@ -0,0 +1,1200 @@
|
|
1
|
+
#include "duckdb/common/fast_mem.hpp"
|
2
|
+
#include "duckdb/common/types/null_value.hpp"
|
3
|
+
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
using ValidityBytes = TupleDataLayout::ValidityBytes;
|
8
|
+
|
9
|
+
template <class T>
|
10
|
+
static constexpr idx_t TupleDataWithinListFixedSize() {
|
11
|
+
return sizeof(T);
|
12
|
+
}
|
13
|
+
|
14
|
+
template <>
|
15
|
+
constexpr idx_t TupleDataWithinListFixedSize<string_t>() {
|
16
|
+
return sizeof(uint32_t);
|
17
|
+
}
|
18
|
+
|
19
|
+
template <class T>
|
20
|
+
static inline void TupleDataValueStore(const T &source, const data_ptr_t &row_location, const idx_t offset_in_row,
|
21
|
+
data_ptr_t &heap_location) {
|
22
|
+
Store<T>(source, row_location + offset_in_row);
|
23
|
+
}
|
24
|
+
|
25
|
+
template <>
|
26
|
+
inline void TupleDataValueStore(const string_t &source, const data_ptr_t &row_location, const idx_t offset_in_row,
|
27
|
+
data_ptr_t &heap_location) {
|
28
|
+
if (source.IsInlined()) {
|
29
|
+
Store<string_t>(source, row_location + offset_in_row);
|
30
|
+
} else {
|
31
|
+
memcpy(heap_location, source.GetDataUnsafe(), source.GetSize());
|
32
|
+
Store<string_t>(string_t((const char *)heap_location, source.GetSize()), row_location + offset_in_row);
|
33
|
+
heap_location += source.GetSize();
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
template <class T>
|
38
|
+
static inline void TupleDataWithinListValueStore(const T &source, const data_ptr_t &location,
|
39
|
+
data_ptr_t &heap_location) {
|
40
|
+
Store<T>(source, location);
|
41
|
+
}
|
42
|
+
|
43
|
+
template <>
|
44
|
+
inline void TupleDataWithinListValueStore(const string_t &source, const data_ptr_t &location,
|
45
|
+
data_ptr_t &heap_location) {
|
46
|
+
Store<uint32_t>(source.GetSize(), location);
|
47
|
+
memcpy(heap_location, source.GetDataUnsafe(), source.GetSize());
|
48
|
+
heap_location += source.GetSize();
|
49
|
+
}
|
50
|
+
|
51
|
+
template <class T>
|
52
|
+
static inline T TupleDataWithinListValueLoad(const data_ptr_t &location, data_ptr_t &heap_location) {
|
53
|
+
return Load<T>(location);
|
54
|
+
}
|
55
|
+
|
56
|
+
template <>
|
57
|
+
inline string_t TupleDataWithinListValueLoad(const data_ptr_t &location, data_ptr_t &heap_location) {
|
58
|
+
const auto size = Load<uint32_t>(location);
|
59
|
+
string_t result((const char *)heap_location, size);
|
60
|
+
heap_location += size;
|
61
|
+
return result;
|
62
|
+
}
|
63
|
+
|
64
|
+
void TupleDataCollection::ComputeHeapSizes(TupleDataChunkState &chunk_state, const DataChunk &new_chunk,
|
65
|
+
const SelectionVector &append_sel, const idx_t append_count) {
|
66
|
+
auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
|
67
|
+
std::fill_n(heap_sizes, new_chunk.size(), 0);
|
68
|
+
|
69
|
+
for (idx_t col_idx = 0; col_idx < new_chunk.ColumnCount(); col_idx++) {
|
70
|
+
auto &source_v = new_chunk.data[col_idx];
|
71
|
+
auto &source_format = chunk_state.vector_data[col_idx];
|
72
|
+
TupleDataCollection::ComputeHeapSizes(chunk_state.heap_sizes, source_v, source_format, append_sel,
|
73
|
+
append_count);
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
static inline idx_t StringHeapSize(const string_t &val) {
|
78
|
+
return val.IsInlined() ? 0 : val.GetSize();
|
79
|
+
}
|
80
|
+
|
81
|
+
void TupleDataCollection::ComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
|
82
|
+
TupleDataVectorFormat &source_format, const SelectionVector &append_sel,
|
83
|
+
const idx_t append_count) {
|
84
|
+
const auto type = source_v.GetType().InternalType();
|
85
|
+
if (type != PhysicalType::VARCHAR && type != PhysicalType::STRUCT && type != PhysicalType::LIST) {
|
86
|
+
return;
|
87
|
+
}
|
88
|
+
|
89
|
+
auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
|
90
|
+
|
91
|
+
const auto &source_vector_data = source_format.data;
|
92
|
+
const auto &source_sel = *source_vector_data.sel;
|
93
|
+
const auto &source_validity = source_vector_data.validity;
|
94
|
+
|
95
|
+
switch (type) {
|
96
|
+
case PhysicalType::VARCHAR: {
|
97
|
+
// Only non-inlined strings are stored in the heap
|
98
|
+
const auto source_data = (string_t *)source_vector_data.data;
|
99
|
+
for (idx_t i = 0; i < append_count; i++) {
|
100
|
+
const auto source_idx = source_sel.get_index(append_sel.get_index(i));
|
101
|
+
if (source_validity.RowIsValid(source_idx)) {
|
102
|
+
heap_sizes[i] += StringHeapSize(source_data[source_idx]);
|
103
|
+
} else {
|
104
|
+
heap_sizes[i] += StringHeapSize(NullValue<string_t>());
|
105
|
+
}
|
106
|
+
}
|
107
|
+
break;
|
108
|
+
}
|
109
|
+
case PhysicalType::STRUCT: {
|
110
|
+
// Recurse through the struct children
|
111
|
+
auto &struct_sources = StructVector::GetEntries(source_v);
|
112
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
|
113
|
+
const auto &struct_source = struct_sources[struct_col_idx];
|
114
|
+
auto &struct_format = source_format.child_formats[struct_col_idx];
|
115
|
+
TupleDataCollection::ComputeHeapSizes(heap_sizes_v, *struct_source, struct_format, append_sel,
|
116
|
+
append_count);
|
117
|
+
}
|
118
|
+
break;
|
119
|
+
}
|
120
|
+
case PhysicalType::LIST: {
|
121
|
+
// Lists are stored entirely in the heap
|
122
|
+
for (idx_t i = 0; i < append_count; i++) {
|
123
|
+
auto source_idx = source_sel.get_index(append_sel.get_index(i));
|
124
|
+
if (source_validity.RowIsValid(source_idx)) {
|
125
|
+
heap_sizes[i] += sizeof(uint64_t); // Size of the list
|
126
|
+
}
|
127
|
+
}
|
128
|
+
|
129
|
+
// Recurse
|
130
|
+
D_ASSERT(source_format.child_formats.size() == 1);
|
131
|
+
auto &child_source_v = ListVector::GetEntry(source_v);
|
132
|
+
auto &child_format = source_format.child_formats[0];
|
133
|
+
TupleDataCollection::WithinListHeapComputeSizes(heap_sizes_v, child_source_v, child_format, append_sel,
|
134
|
+
append_count, source_vector_data);
|
135
|
+
break;
|
136
|
+
}
|
137
|
+
default:
|
138
|
+
throw NotImplementedException("ComputeHeapSizes for %s", LogicalTypeIdToString(source_v.GetType().id()));
|
139
|
+
}
|
140
|
+
}
|
141
|
+
|
142
|
+
void TupleDataCollection::WithinListHeapComputeSizes(Vector &heap_sizes_v, const Vector &source_v,
|
143
|
+
TupleDataVectorFormat &source_format,
|
144
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
145
|
+
const UnifiedVectorFormat &list_data) {
|
146
|
+
auto type = source_v.GetType().InternalType();
|
147
|
+
if (TypeIsConstantSize(type)) {
|
148
|
+
TupleDataCollection::ComputeFixedWithinListHeapSizes(heap_sizes_v, source_v, source_format, append_sel,
|
149
|
+
append_count, list_data);
|
150
|
+
return;
|
151
|
+
}
|
152
|
+
|
153
|
+
switch (type) {
|
154
|
+
case PhysicalType::VARCHAR:
|
155
|
+
TupleDataCollection::StringWithinListComputeHeapSizes(heap_sizes_v, source_v, source_format, append_sel,
|
156
|
+
append_count, list_data);
|
157
|
+
break;
|
158
|
+
case PhysicalType::STRUCT:
|
159
|
+
TupleDataCollection::StructWithinListComputeHeapSizes(heap_sizes_v, source_v, source_format, append_sel,
|
160
|
+
append_count, list_data);
|
161
|
+
break;
|
162
|
+
case PhysicalType::LIST:
|
163
|
+
TupleDataCollection::ListWithinListComputeHeapSizes(heap_sizes_v, source_v, source_format, append_sel,
|
164
|
+
append_count, list_data);
|
165
|
+
break;
|
166
|
+
default:
|
167
|
+
throw NotImplementedException("WithinListHeapComputeSizes for %s",
|
168
|
+
LogicalTypeIdToString(source_v.GetType().id()));
|
169
|
+
}
|
170
|
+
}
|
171
|
+
|
172
|
+
void TupleDataCollection::ComputeFixedWithinListHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
|
173
|
+
TupleDataVectorFormat &source_format,
|
174
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
175
|
+
const UnifiedVectorFormat &list_data) {
|
176
|
+
// List data
|
177
|
+
const auto list_sel = *list_data.sel;
|
178
|
+
const auto list_entries = (list_entry_t *)list_data.data;
|
179
|
+
const auto &list_validity = list_data.validity;
|
180
|
+
|
181
|
+
// Target
|
182
|
+
auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
|
183
|
+
|
184
|
+
D_ASSERT(TypeIsConstantSize(source_v.GetType().InternalType()));
|
185
|
+
const auto type_size = GetTypeIdSize(source_v.GetType().InternalType());
|
186
|
+
for (idx_t i = 0; i < append_count; i++) {
|
187
|
+
const auto list_idx = list_sel.get_index(append_sel.get_index(i));
|
188
|
+
if (!list_validity.RowIsValid(list_idx)) {
|
189
|
+
continue; // Original list entry is invalid - no need to serialize the child
|
190
|
+
}
|
191
|
+
|
192
|
+
// Get the current list length
|
193
|
+
const auto &list_length = list_entries[list_idx].length;
|
194
|
+
|
195
|
+
// Size is validity mask and all values
|
196
|
+
auto &heap_size = heap_sizes[i];
|
197
|
+
heap_size += ValidityBytes::SizeInBytes(list_length);
|
198
|
+
heap_size += list_length * type_size;
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
void TupleDataCollection::StringWithinListComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
|
203
|
+
TupleDataVectorFormat &source_format,
|
204
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
205
|
+
const UnifiedVectorFormat &list_data) {
|
206
|
+
// Source
|
207
|
+
const auto &source_data = source_format.data;
|
208
|
+
const auto source_sel = *source_data.sel;
|
209
|
+
const auto data = (string_t *)source_data.data;
|
210
|
+
const auto &source_validity = source_data.validity;
|
211
|
+
|
212
|
+
// List data
|
213
|
+
const auto list_sel = *list_data.sel;
|
214
|
+
const auto list_entries = (list_entry_t *)list_data.data;
|
215
|
+
const auto &list_validity = list_data.validity;
|
216
|
+
|
217
|
+
// Target
|
218
|
+
auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
|
219
|
+
|
220
|
+
for (idx_t i = 0; i < append_count; i++) {
|
221
|
+
const auto list_idx = list_sel.get_index(append_sel.get_index(i));
|
222
|
+
if (!list_validity.RowIsValid(list_idx)) {
|
223
|
+
continue; // Original list entry is invalid - no need to serialize the child
|
224
|
+
}
|
225
|
+
|
226
|
+
// Get the current list entry
|
227
|
+
const auto &list_entry = list_entries[list_idx];
|
228
|
+
const auto &list_offset = list_entry.offset;
|
229
|
+
const auto &list_length = list_entry.length;
|
230
|
+
|
231
|
+
// Size is validity mask and all string sizes
|
232
|
+
auto &heap_size = heap_sizes[i];
|
233
|
+
heap_size += ValidityBytes::SizeInBytes(list_length);
|
234
|
+
heap_size += list_length * TupleDataWithinListFixedSize<string_t>();
|
235
|
+
|
236
|
+
// Plus all the actual strings
|
237
|
+
for (idx_t child_i = 0; child_i < list_length; child_i++) {
|
238
|
+
const auto child_source_idx = source_sel.get_index(list_offset + child_i);
|
239
|
+
if (source_validity.RowIsValid(child_source_idx)) {
|
240
|
+
heap_size += data[child_source_idx].GetSize();
|
241
|
+
}
|
242
|
+
}
|
243
|
+
}
|
244
|
+
}
|
245
|
+
|
246
|
+
void TupleDataCollection::StructWithinListComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
|
247
|
+
TupleDataVectorFormat &source_format,
|
248
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
249
|
+
const UnifiedVectorFormat &list_data) {
|
250
|
+
// List data
|
251
|
+
const auto list_sel = *list_data.sel;
|
252
|
+
const auto list_entries = (list_entry_t *)list_data.data;
|
253
|
+
const auto &list_validity = list_data.validity;
|
254
|
+
|
255
|
+
// Target
|
256
|
+
auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
|
257
|
+
|
258
|
+
for (idx_t i = 0; i < append_count; i++) {
|
259
|
+
const auto list_idx = list_sel.get_index(append_sel.get_index(i));
|
260
|
+
if (!list_validity.RowIsValid(list_idx)) {
|
261
|
+
continue; // Original list entry is invalid - no need to serialize the child
|
262
|
+
}
|
263
|
+
|
264
|
+
// Get the current list length
|
265
|
+
const auto &list_length = list_entries[list_idx].length;
|
266
|
+
|
267
|
+
// Size is just the validity mask
|
268
|
+
heap_sizes[i] += ValidityBytes::SizeInBytes(list_length);
|
269
|
+
}
|
270
|
+
|
271
|
+
// Recurse
|
272
|
+
auto &struct_sources = StructVector::GetEntries(source_v);
|
273
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
|
274
|
+
auto &struct_source = *struct_sources[struct_col_idx];
|
275
|
+
auto &struct_format = source_format.child_formats[struct_col_idx];
|
276
|
+
TupleDataCollection::WithinListHeapComputeSizes(heap_sizes_v, struct_source, struct_format, append_sel,
|
277
|
+
append_count, list_data);
|
278
|
+
}
|
279
|
+
}
|
280
|
+
|
281
|
+
static void ApplySliceRecursive(const Vector &source_v, TupleDataVectorFormat &source_format,
|
282
|
+
const SelectionVector &combined_sel, const idx_t count) {
|
283
|
+
D_ASSERT(source_format.combined_list_data);
|
284
|
+
auto &combined_list_data = *source_format.combined_list_data;
|
285
|
+
|
286
|
+
combined_list_data.selection_data = source_format.data.sel->Slice(combined_sel, count);
|
287
|
+
source_format.data.owned_sel.Initialize(combined_list_data.selection_data);
|
288
|
+
source_format.data.sel = &source_format.data.owned_sel;
|
289
|
+
|
290
|
+
if (source_v.GetType().InternalType() == PhysicalType::STRUCT) {
|
291
|
+
// We have to apply it to the child vectors too
|
292
|
+
auto &struct_sources = StructVector::GetEntries(source_v);
|
293
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
|
294
|
+
auto &struct_source = *struct_sources[struct_col_idx];
|
295
|
+
auto &struct_format = source_format.child_formats[struct_col_idx];
|
296
|
+
struct_format.combined_list_data = make_uniq<CombinedListData>();
|
297
|
+
ApplySliceRecursive(struct_source, struct_format, *source_format.data.sel, count);
|
298
|
+
}
|
299
|
+
}
|
300
|
+
}
|
301
|
+
|
302
|
+
void TupleDataCollection::ListWithinListComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v,
|
303
|
+
TupleDataVectorFormat &source_format,
|
304
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
305
|
+
const UnifiedVectorFormat &list_data) {
|
306
|
+
// List data (of the list Vector that "source_v" is in)
|
307
|
+
const auto list_sel = *list_data.sel;
|
308
|
+
const auto list_entries = (list_entry_t *)list_data.data;
|
309
|
+
const auto &list_validity = list_data.validity;
|
310
|
+
|
311
|
+
// Child list
|
312
|
+
const auto &child_list_data = source_format.data;
|
313
|
+
const auto child_list_sel = *child_list_data.sel;
|
314
|
+
const auto child_list_entries = (list_entry_t *)child_list_data.data;
|
315
|
+
const auto &child_list_validity = child_list_data.validity;
|
316
|
+
|
317
|
+
// Target
|
318
|
+
auto heap_sizes = FlatVector::GetData<idx_t>(heap_sizes_v);
|
319
|
+
|
320
|
+
// Construct combined list entries and a selection vector for the child list child
|
321
|
+
auto &child_format = source_format.child_formats[0];
|
322
|
+
child_format.combined_list_data = make_uniq<CombinedListData>();
|
323
|
+
auto &combined_list_data = *child_format.combined_list_data;
|
324
|
+
auto &combined_list_entries = combined_list_data.combined_list_entries;
|
325
|
+
const auto child_list_child_count = ListVector::GetListSize(source_v);
|
326
|
+
SelectionVector combined_sel(child_list_child_count);
|
327
|
+
for (idx_t i = 0; i < child_list_child_count; i++) {
|
328
|
+
combined_sel.set_index(i, 0);
|
329
|
+
}
|
330
|
+
idx_t combined_list_offset = 0;
|
331
|
+
|
332
|
+
for (idx_t i = 0; i < append_count; i++) {
|
333
|
+
const auto list_idx = list_sel.get_index(append_sel.get_index(i));
|
334
|
+
if (!list_validity.RowIsValid(list_idx)) {
|
335
|
+
continue; // Original list entry is invalid - no need to serialize the child list
|
336
|
+
}
|
337
|
+
|
338
|
+
// Get the current list entry
|
339
|
+
const auto &list_entry = list_entries[list_idx];
|
340
|
+
const auto &list_offset = list_entry.offset;
|
341
|
+
const auto &list_length = list_entry.length;
|
342
|
+
|
343
|
+
// Size is the validity mask and the list sizes
|
344
|
+
auto &heap_size = heap_sizes[i];
|
345
|
+
heap_size += ValidityBytes::SizeInBytes(list_length);
|
346
|
+
heap_size += list_length * sizeof(uint64_t);
|
347
|
+
|
348
|
+
idx_t child_list_size = 0;
|
349
|
+
for (idx_t child_i = 0; child_i < list_length; child_i++) {
|
350
|
+
const auto child_list_idx = child_list_sel.get_index(list_offset + child_i);
|
351
|
+
const auto &child_list_entry = child_list_entries[child_list_idx];
|
352
|
+
if (child_list_validity.RowIsValid(child_list_idx)) {
|
353
|
+
const auto &child_list_offset = child_list_entry.offset;
|
354
|
+
const auto &child_list_length = child_list_entry.length;
|
355
|
+
|
356
|
+
// Add this child's list entry's to the combined selection vector
|
357
|
+
for (idx_t child_value_i = 0; child_value_i < child_list_length; child_value_i++) {
|
358
|
+
auto idx = combined_list_offset + child_list_size + child_value_i;
|
359
|
+
auto loc = child_list_offset + child_value_i;
|
360
|
+
combined_sel.set_index(idx, loc);
|
361
|
+
}
|
362
|
+
|
363
|
+
child_list_size += child_list_length;
|
364
|
+
}
|
365
|
+
}
|
366
|
+
|
367
|
+
// Combine the child list entries into one
|
368
|
+
combined_list_entries[list_idx] = {combined_list_offset, child_list_size};
|
369
|
+
combined_list_offset += child_list_size;
|
370
|
+
}
|
371
|
+
|
372
|
+
// Create a combined child_list_data to be used as list_data in the recursion
|
373
|
+
auto &combined_child_list_data = combined_list_data.combined_data;
|
374
|
+
combined_child_list_data.sel = list_data.sel;
|
375
|
+
combined_child_list_data.data = (data_ptr_t)combined_list_entries;
|
376
|
+
combined_child_list_data.validity = list_data.validity;
|
377
|
+
|
378
|
+
// Combine the selection vectors
|
379
|
+
D_ASSERT(source_format.child_formats.size() == 1);
|
380
|
+
auto &child_source = ListVector::GetEntry(source_v);
|
381
|
+
ApplySliceRecursive(child_source, child_format, combined_sel, child_list_child_count);
|
382
|
+
|
383
|
+
// Recurse
|
384
|
+
TupleDataCollection::WithinListHeapComputeSizes(heap_sizes_v, child_source, child_format, append_sel, append_count,
|
385
|
+
combined_child_list_data);
|
386
|
+
}
|
387
|
+
|
388
|
+
void TupleDataCollection::Scatter(TupleDataChunkState &chunk_state, const DataChunk &new_chunk,
|
389
|
+
const SelectionVector &append_sel, const idx_t append_count) const {
|
390
|
+
const auto row_locations = FlatVector::GetData<data_ptr_t>(chunk_state.row_locations);
|
391
|
+
|
392
|
+
// Set the validity mask for each row before inserting data
|
393
|
+
const auto validity_bytes = ValidityBytes::SizeInBytes(layout.ColumnCount());
|
394
|
+
for (idx_t i = 0; i < append_count; i++) {
|
395
|
+
FastMemset(row_locations[i], ~0, validity_bytes);
|
396
|
+
}
|
397
|
+
|
398
|
+
if (!layout.AllConstant()) {
|
399
|
+
// Set the heap size for each row
|
400
|
+
const auto heap_size_offset = layout.GetHeapSizeOffset();
|
401
|
+
const auto heap_sizes = FlatVector::GetData<idx_t>(chunk_state.heap_sizes);
|
402
|
+
for (idx_t i = 0; i < append_count; i++) {
|
403
|
+
Store<uint32_t>(heap_sizes[i], row_locations[i] + heap_size_offset);
|
404
|
+
}
|
405
|
+
}
|
406
|
+
|
407
|
+
// Write the data
|
408
|
+
for (const auto &col_idx : chunk_state.column_ids) {
|
409
|
+
Scatter(chunk_state, new_chunk.data[col_idx], col_idx, append_sel, append_count);
|
410
|
+
}
|
411
|
+
}
|
412
|
+
|
413
|
+
void TupleDataCollection::Scatter(TupleDataChunkState &chunk_state, const Vector &source, const column_t column_id,
|
414
|
+
const SelectionVector &append_sel, const idx_t append_count) const {
|
415
|
+
const auto &scatter_function = scatter_functions[column_id];
|
416
|
+
scatter_function.function(source, chunk_state.vector_data[column_id], append_sel, append_count, layout,
|
417
|
+
chunk_state.row_locations, chunk_state.heap_locations, column_id,
|
418
|
+
chunk_state.vector_data[column_id].data, scatter_function.child_functions);
|
419
|
+
}
|
420
|
+
|
421
|
+
template <class T>
|
422
|
+
static void TupleDataTemplatedScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
423
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
424
|
+
const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
|
425
|
+
const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
|
426
|
+
const vector<TupleDataScatterFunction> &child_functions) {
|
427
|
+
// Source
|
428
|
+
const auto &source_data = source_format.data;
|
429
|
+
const auto source_sel = *source_data.sel;
|
430
|
+
const auto data = (T *)source_data.data;
|
431
|
+
const auto &validity = source_data.validity;
|
432
|
+
|
433
|
+
// Target
|
434
|
+
auto target_locations = FlatVector::GetData<data_ptr_t>(row_locations);
|
435
|
+
auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
|
436
|
+
|
437
|
+
// Precompute mask indexes
|
438
|
+
idx_t entry_idx;
|
439
|
+
idx_t idx_in_entry;
|
440
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
441
|
+
|
442
|
+
const auto offset_in_row = layout.GetOffsets()[col_idx];
|
443
|
+
if (validity.AllValid()) {
|
444
|
+
for (idx_t i = 0; i < append_count; i++) {
|
445
|
+
const auto source_idx = source_sel.get_index(append_sel.get_index(i));
|
446
|
+
TupleDataValueStore<T>(data[source_idx], target_locations[i], offset_in_row, target_heap_locations[i]);
|
447
|
+
}
|
448
|
+
} else {
|
449
|
+
for (idx_t i = 0; i < append_count; i++) {
|
450
|
+
const auto source_idx = source_sel.get_index(append_sel.get_index(i));
|
451
|
+
if (validity.RowIsValid(source_idx)) {
|
452
|
+
TupleDataValueStore<T>(data[source_idx], target_locations[i], offset_in_row, target_heap_locations[i]);
|
453
|
+
} else {
|
454
|
+
TupleDataValueStore<T>(NullValue<T>(), target_locations[i], offset_in_row, target_heap_locations[i]);
|
455
|
+
ValidityBytes(target_locations[i]).SetInvalidUnsafe(entry_idx, idx_in_entry);
|
456
|
+
}
|
457
|
+
}
|
458
|
+
}
|
459
|
+
}
|
460
|
+
|
461
|
+
static void TupleDataStructScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
462
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
463
|
+
const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
|
464
|
+
const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
|
465
|
+
const vector<TupleDataScatterFunction> &child_functions) {
|
466
|
+
// Source
|
467
|
+
const auto &source_data = source_format.data;
|
468
|
+
const auto source_sel = *source_data.sel;
|
469
|
+
const auto &validity = source_data.validity;
|
470
|
+
|
471
|
+
// Target
|
472
|
+
auto target_locations = FlatVector::GetData<data_ptr_t>(row_locations);
|
473
|
+
|
474
|
+
// Precompute mask indexes
|
475
|
+
idx_t entry_idx;
|
476
|
+
idx_t idx_in_entry;
|
477
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
478
|
+
|
479
|
+
// Set validity of the STRUCT in this layout
|
480
|
+
if (!validity.AllValid()) {
|
481
|
+
for (idx_t i = 0; i < append_count; i++) {
|
482
|
+
const auto source_idx = source_sel.get_index(append_sel.get_index(i));
|
483
|
+
if (!validity.RowIsValid(source_idx)) {
|
484
|
+
ValidityBytes(target_locations[i]).SetInvalidUnsafe(entry_idx, idx_in_entry);
|
485
|
+
}
|
486
|
+
}
|
487
|
+
}
|
488
|
+
|
489
|
+
// Create a Vector of pointers to the TupleDataLayout of the STRUCT
|
490
|
+
Vector struct_row_locations(LogicalType::POINTER, append_count);
|
491
|
+
auto struct_target_locations = FlatVector::GetData<data_ptr_t>(struct_row_locations);
|
492
|
+
const auto offset_in_row = layout.GetOffsets()[col_idx];
|
493
|
+
for (idx_t i = 0; i < append_count; i++) {
|
494
|
+
struct_target_locations[i] = target_locations[i] + offset_in_row;
|
495
|
+
}
|
496
|
+
|
497
|
+
const auto &struct_layout = layout.GetStructLayout(col_idx);
|
498
|
+
auto &struct_sources = StructVector::GetEntries(source);
|
499
|
+
D_ASSERT(struct_layout.ColumnCount() == struct_sources.size());
|
500
|
+
|
501
|
+
// Set the validity of the entries within the STRUCTs
|
502
|
+
const auto validity_bytes = ValidityBytes::SizeInBytes(struct_layout.ColumnCount());
|
503
|
+
for (idx_t i = 0; i < append_count; i++) {
|
504
|
+
memset(struct_target_locations[i], ~0, validity_bytes);
|
505
|
+
}
|
506
|
+
|
507
|
+
// Recurse through the struct children
|
508
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < struct_layout.ColumnCount(); struct_col_idx++) {
|
509
|
+
auto &struct_source = *struct_sources[struct_col_idx];
|
510
|
+
const auto &struct_source_format = source_format.child_formats[struct_col_idx];
|
511
|
+
const auto &struct_scatter_function = child_functions[struct_col_idx];
|
512
|
+
struct_scatter_function.function(struct_source, struct_source_format, append_sel, append_count, struct_layout,
|
513
|
+
struct_row_locations, heap_locations, struct_col_idx, dummy_arg,
|
514
|
+
struct_scatter_function.child_functions);
|
515
|
+
}
|
516
|
+
}
|
517
|
+
|
518
|
+
static void TupleDataListScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
519
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
520
|
+
const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
|
521
|
+
const idx_t col_idx, const UnifiedVectorFormat &dummy_arg,
|
522
|
+
const vector<TupleDataScatterFunction> &child_functions) {
|
523
|
+
// Source
|
524
|
+
const auto &source_data = source_format.data;
|
525
|
+
const auto source_sel = *source_data.sel;
|
526
|
+
const auto data = (list_entry_t *)source_data.data;
|
527
|
+
const auto &validity = source_data.validity;
|
528
|
+
|
529
|
+
// Target
|
530
|
+
auto target_locations = FlatVector::GetData<data_ptr_t>(row_locations);
|
531
|
+
auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
|
532
|
+
|
533
|
+
// Precompute mask indexes
|
534
|
+
idx_t entry_idx;
|
535
|
+
idx_t idx_in_entry;
|
536
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
537
|
+
|
538
|
+
// Set validity of the LIST in this layout, and store pointer to where it's stored
|
539
|
+
const auto offset_in_row = layout.GetOffsets()[col_idx];
|
540
|
+
for (idx_t i = 0; i < append_count; i++) {
|
541
|
+
const auto source_idx = source_sel.get_index(append_sel.get_index(i));
|
542
|
+
if (validity.RowIsValid(source_idx)) {
|
543
|
+
auto &target_heap_location = target_heap_locations[i];
|
544
|
+
Store<data_ptr_t>(target_heap_location, target_locations[i] + offset_in_row);
|
545
|
+
|
546
|
+
// Store list length and skip over it
|
547
|
+
Store<uint64_t>(data[source_idx].length, target_heap_location);
|
548
|
+
target_heap_location += sizeof(uint64_t);
|
549
|
+
} else {
|
550
|
+
ValidityBytes(target_locations[i]).SetInvalidUnsafe(entry_idx, idx_in_entry);
|
551
|
+
}
|
552
|
+
}
|
553
|
+
|
554
|
+
// Recurse
|
555
|
+
D_ASSERT(child_functions.size() == 1);
|
556
|
+
auto &child_source = ListVector::GetEntry(source);
|
557
|
+
auto &child_format = source_format.child_formats[0];
|
558
|
+
const auto &child_function = child_functions[0];
|
559
|
+
child_function.function(child_source, child_format, append_sel, append_count, layout, row_locations, heap_locations,
|
560
|
+
col_idx, source_format.data, child_function.child_functions);
|
561
|
+
}
|
562
|
+
|
563
|
+
template <class T>
|
564
|
+
static void TupleDataTemplatedWithinListScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
565
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
566
|
+
const TupleDataLayout &layout, Vector &row_locations,
|
567
|
+
Vector &heap_locations, const idx_t col_idx,
|
568
|
+
const UnifiedVectorFormat &list_data,
|
569
|
+
const vector<TupleDataScatterFunction> &child_functions) {
|
570
|
+
// Source
|
571
|
+
const auto &source_data = source_format.data;
|
572
|
+
const auto source_sel = *source_data.sel;
|
573
|
+
const auto data = (T *)source_data.data;
|
574
|
+
const auto &source_validity = source_data.validity;
|
575
|
+
|
576
|
+
// List data
|
577
|
+
const auto list_sel = *list_data.sel;
|
578
|
+
const auto list_entries = (list_entry_t *)list_data.data;
|
579
|
+
const auto &list_validity = list_data.validity;
|
580
|
+
|
581
|
+
// Target
|
582
|
+
auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
|
583
|
+
|
584
|
+
for (idx_t i = 0; i < append_count; i++) {
|
585
|
+
const auto list_idx = list_sel.get_index(append_sel.get_index(i));
|
586
|
+
if (!list_validity.RowIsValid(list_idx)) {
|
587
|
+
continue; // Original list entry is invalid - no need to serialize the child
|
588
|
+
}
|
589
|
+
|
590
|
+
// Get the current list entry
|
591
|
+
const auto &list_entry = list_entries[list_idx];
|
592
|
+
const auto &list_offset = list_entry.offset;
|
593
|
+
const auto &list_length = list_entry.length;
|
594
|
+
|
595
|
+
// Initialize validity mask and skip heap pointer over it
|
596
|
+
auto &target_heap_location = target_heap_locations[i];
|
597
|
+
ValidityBytes child_mask(target_heap_location);
|
598
|
+
child_mask.SetAllValid(list_length);
|
599
|
+
target_heap_location += ValidityBytes::SizeInBytes(list_length);
|
600
|
+
|
601
|
+
// Get the start to the fixed-size data and skip the heap pointer over it
|
602
|
+
const auto child_data_location = target_heap_location;
|
603
|
+
target_heap_location += list_length * TupleDataWithinListFixedSize<T>();
|
604
|
+
|
605
|
+
// Store the data and validity belonging to this list entry
|
606
|
+
for (idx_t child_i = 0; child_i < list_length; child_i++) {
|
607
|
+
const auto child_source_idx = source_sel.get_index(list_offset + child_i);
|
608
|
+
if (source_validity.RowIsValid(child_source_idx)) {
|
609
|
+
TupleDataWithinListValueStore<T>(data[child_source_idx],
|
610
|
+
child_data_location + child_i * TupleDataWithinListFixedSize<T>(),
|
611
|
+
target_heap_location);
|
612
|
+
} else {
|
613
|
+
child_mask.SetInvalidUnsafe(child_i);
|
614
|
+
}
|
615
|
+
}
|
616
|
+
}
|
617
|
+
}
|
618
|
+
|
619
|
+
static void TupleDataStructWithinListScatter(const Vector &source, const TupleDataVectorFormat &source_format,
|
620
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
621
|
+
const TupleDataLayout &layout, Vector &row_locations,
|
622
|
+
Vector &heap_locations, const idx_t col_idx,
|
623
|
+
const UnifiedVectorFormat &list_data,
|
624
|
+
const vector<TupleDataScatterFunction> &child_functions) {
|
625
|
+
// Source
|
626
|
+
const auto &source_data = source_format.data;
|
627
|
+
const auto source_sel = *source_data.sel;
|
628
|
+
const auto &source_validity = source_data.validity;
|
629
|
+
|
630
|
+
// List data
|
631
|
+
const auto list_sel = *list_data.sel;
|
632
|
+
const auto list_entries = (list_entry_t *)list_data.data;
|
633
|
+
const auto &list_validity = list_data.validity;
|
634
|
+
|
635
|
+
// Target
|
636
|
+
auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
|
637
|
+
|
638
|
+
// Initialize the validity of the STRUCTs
|
639
|
+
for (idx_t i = 0; i < append_count; i++) {
|
640
|
+
const auto list_idx = list_sel.get_index(append_sel.get_index(i));
|
641
|
+
if (!list_validity.RowIsValid(list_idx)) {
|
642
|
+
continue; // Original list entry is invalid - no need to serialize the child
|
643
|
+
}
|
644
|
+
|
645
|
+
// Get the current list entry
|
646
|
+
const auto &list_entry = list_entries[list_idx];
|
647
|
+
const auto &list_offset = list_entry.offset;
|
648
|
+
const auto &list_length = list_entry.length;
|
649
|
+
|
650
|
+
// Initialize validity mask and skip the heap pointer over it
|
651
|
+
auto &target_heap_location = target_heap_locations[i];
|
652
|
+
ValidityBytes child_mask(target_heap_location);
|
653
|
+
child_mask.SetAllValid(list_length);
|
654
|
+
target_heap_location += ValidityBytes::SizeInBytes(list_length);
|
655
|
+
|
656
|
+
// Store the validity belonging to this list entry
|
657
|
+
for (idx_t child_i = 0; child_i < list_length; child_i++) {
|
658
|
+
const auto child_source_idx = source_sel.get_index(list_offset + child_i);
|
659
|
+
if (!source_validity.RowIsValid(child_source_idx)) {
|
660
|
+
child_mask.SetInvalidUnsafe(child_i);
|
661
|
+
}
|
662
|
+
}
|
663
|
+
}
|
664
|
+
|
665
|
+
// Recurse through the children
|
666
|
+
auto &struct_sources = StructVector::GetEntries(source);
|
667
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < struct_sources.size(); struct_col_idx++) {
|
668
|
+
auto &struct_source = *struct_sources[struct_col_idx];
|
669
|
+
auto &struct_format = source_format.child_formats[struct_col_idx];
|
670
|
+
const auto &struct_scatter_function = child_functions[struct_col_idx];
|
671
|
+
struct_scatter_function.function(struct_source, struct_format, append_sel, append_count, layout, row_locations,
|
672
|
+
heap_locations, struct_col_idx, list_data,
|
673
|
+
struct_scatter_function.child_functions);
|
674
|
+
}
|
675
|
+
}
|
676
|
+
|
677
|
+
static void TupleDataListWithinListScatter(const Vector &child_list, const TupleDataVectorFormat &child_list_format,
|
678
|
+
const SelectionVector &append_sel, const idx_t append_count,
|
679
|
+
const TupleDataLayout &layout, Vector &row_locations, Vector &heap_locations,
|
680
|
+
const idx_t col_idx, const UnifiedVectorFormat &list_data,
|
681
|
+
const vector<TupleDataScatterFunction> &child_functions) {
|
682
|
+
// List data (of the list Vector that "child_list" is in)
|
683
|
+
const auto list_sel = *list_data.sel;
|
684
|
+
const auto list_entries = (list_entry_t *)list_data.data;
|
685
|
+
const auto &list_validity = list_data.validity;
|
686
|
+
|
687
|
+
// Child list
|
688
|
+
const auto &child_list_data = child_list_format.data;
|
689
|
+
const auto child_list_sel = *child_list_data.sel;
|
690
|
+
const auto child_list_entries = (list_entry_t *)child_list_data.data;
|
691
|
+
const auto &child_list_validity = child_list_data.validity;
|
692
|
+
|
693
|
+
// Target
|
694
|
+
auto target_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
|
695
|
+
|
696
|
+
for (idx_t i = 0; i < append_count; i++) {
|
697
|
+
const auto list_idx = list_sel.get_index(append_sel.get_index(i));
|
698
|
+
if (!list_validity.RowIsValid(list_idx)) {
|
699
|
+
continue; // Original list entry is invalid - no need to serialize the child list
|
700
|
+
}
|
701
|
+
|
702
|
+
// Get the current list entry
|
703
|
+
const auto &list_entry = list_entries[list_idx];
|
704
|
+
const auto &list_offset = list_entry.offset;
|
705
|
+
const auto &list_length = list_entry.length;
|
706
|
+
|
707
|
+
// Initialize validity mask and skip heap pointer over it
|
708
|
+
auto &target_heap_location = target_heap_locations[i];
|
709
|
+
ValidityBytes child_mask(target_heap_location);
|
710
|
+
child_mask.SetAllValid(list_length);
|
711
|
+
target_heap_location += ValidityBytes::SizeInBytes(list_length);
|
712
|
+
|
713
|
+
// Get the start to the fixed-size data and skip the heap pointer over it
|
714
|
+
const auto child_data_location = target_heap_location;
|
715
|
+
target_heap_location += list_length * sizeof(uint64_t);
|
716
|
+
|
717
|
+
for (idx_t child_i = 0; child_i < list_length; child_i++) {
|
718
|
+
const auto child_list_idx = child_list_sel.get_index(list_offset + child_i);
|
719
|
+
if (child_list_validity.RowIsValid(child_list_idx)) {
|
720
|
+
const auto &child_list_length = child_list_entries[child_list_idx].length;
|
721
|
+
Store<uint64_t>(child_list_length, child_data_location + child_i * sizeof(uint64_t));
|
722
|
+
} else {
|
723
|
+
child_mask.SetInvalidUnsafe(child_i);
|
724
|
+
}
|
725
|
+
}
|
726
|
+
}
|
727
|
+
|
728
|
+
// Recurse
|
729
|
+
D_ASSERT(child_functions.size() == 1);
|
730
|
+
auto &child_vec = ListVector::GetEntry(child_list);
|
731
|
+
auto &child_format = child_list_format.child_formats[0];
|
732
|
+
auto &combined_child_list_data = child_format.combined_list_data->combined_data;
|
733
|
+
const auto &child_function = child_functions[0];
|
734
|
+
child_function.function(child_vec, child_format, append_sel, append_count, layout, row_locations, heap_locations,
|
735
|
+
col_idx, combined_child_list_data, child_function.child_functions);
|
736
|
+
}
|
737
|
+
|
738
|
+
template <class T>
|
739
|
+
tuple_data_scatter_function_t TupleDataGetScatterFunction(bool within_list) {
|
740
|
+
return within_list ? TupleDataTemplatedWithinListScatter<T> : TupleDataTemplatedScatter<T>;
|
741
|
+
}
|
742
|
+
|
743
|
+
TupleDataScatterFunction TupleDataCollection::GetScatterFunction(const LogicalType &type, bool within_list) {
|
744
|
+
TupleDataScatterFunction result;
|
745
|
+
switch (type.InternalType()) {
|
746
|
+
case PhysicalType::BOOL:
|
747
|
+
result.function = TupleDataGetScatterFunction<bool>(within_list);
|
748
|
+
break;
|
749
|
+
case PhysicalType::INT8:
|
750
|
+
result.function = TupleDataGetScatterFunction<int8_t>(within_list);
|
751
|
+
break;
|
752
|
+
case PhysicalType::INT16:
|
753
|
+
result.function = TupleDataGetScatterFunction<int16_t>(within_list);
|
754
|
+
break;
|
755
|
+
case PhysicalType::INT32:
|
756
|
+
result.function = TupleDataGetScatterFunction<int32_t>(within_list);
|
757
|
+
break;
|
758
|
+
case PhysicalType::INT64:
|
759
|
+
result.function = TupleDataGetScatterFunction<int64_t>(within_list);
|
760
|
+
break;
|
761
|
+
case PhysicalType::INT128:
|
762
|
+
result.function = TupleDataGetScatterFunction<hugeint_t>(within_list);
|
763
|
+
break;
|
764
|
+
case PhysicalType::UINT8:
|
765
|
+
result.function = TupleDataGetScatterFunction<uint8_t>(within_list);
|
766
|
+
break;
|
767
|
+
case PhysicalType::UINT16:
|
768
|
+
result.function = TupleDataGetScatterFunction<uint16_t>(within_list);
|
769
|
+
break;
|
770
|
+
case PhysicalType::UINT32:
|
771
|
+
result.function = TupleDataGetScatterFunction<uint32_t>(within_list);
|
772
|
+
break;
|
773
|
+
case PhysicalType::UINT64:
|
774
|
+
result.function = TupleDataGetScatterFunction<uint64_t>(within_list);
|
775
|
+
break;
|
776
|
+
case PhysicalType::FLOAT:
|
777
|
+
result.function = TupleDataGetScatterFunction<float>(within_list);
|
778
|
+
break;
|
779
|
+
case PhysicalType::DOUBLE:
|
780
|
+
result.function = TupleDataGetScatterFunction<double>(within_list);
|
781
|
+
break;
|
782
|
+
case PhysicalType::INTERVAL:
|
783
|
+
result.function = TupleDataGetScatterFunction<interval_t>(within_list);
|
784
|
+
break;
|
785
|
+
case PhysicalType::VARCHAR:
|
786
|
+
result.function = TupleDataGetScatterFunction<string_t>(within_list);
|
787
|
+
break;
|
788
|
+
case PhysicalType::STRUCT: {
|
789
|
+
result.function = within_list ? TupleDataStructWithinListScatter : TupleDataStructScatter;
|
790
|
+
for (const auto &child_type : StructType::GetChildTypes(type)) {
|
791
|
+
result.child_functions.push_back(GetScatterFunction(child_type.second, within_list));
|
792
|
+
}
|
793
|
+
break;
|
794
|
+
}
|
795
|
+
case PhysicalType::LIST:
|
796
|
+
result.function = within_list ? TupleDataListWithinListScatter : TupleDataListScatter;
|
797
|
+
result.child_functions.emplace_back(GetScatterFunction(ListType::GetChildType(type), true));
|
798
|
+
break;
|
799
|
+
default:
|
800
|
+
throw InternalException("Unsupported type for TupleDataCollection::GetScatterFunction");
|
801
|
+
}
|
802
|
+
return result;
|
803
|
+
}
|
804
|
+
|
805
|
+
void TupleDataCollection::Gather(Vector &row_locations, const SelectionVector &scan_sel, const idx_t scan_count,
|
806
|
+
DataChunk &result, const SelectionVector &target_sel) const {
|
807
|
+
D_ASSERT(result.ColumnCount() == layout.ColumnCount());
|
808
|
+
vector<column_t> column_ids;
|
809
|
+
column_ids.reserve(layout.ColumnCount());
|
810
|
+
for (idx_t col_idx = 0; col_idx < layout.ColumnCount(); col_idx++) {
|
811
|
+
column_ids.emplace_back(col_idx);
|
812
|
+
}
|
813
|
+
Gather(row_locations, scan_sel, scan_count, column_ids, result, target_sel);
|
814
|
+
}
|
815
|
+
|
816
|
+
void TupleDataCollection::Gather(Vector &row_locations, const SelectionVector &scan_sel, const idx_t scan_count,
|
817
|
+
const vector<column_t> &column_ids, DataChunk &result,
|
818
|
+
const SelectionVector &target_sel) const {
|
819
|
+
for (idx_t col_idx = 0; col_idx < column_ids.size(); col_idx++) {
|
820
|
+
Gather(row_locations, scan_sel, scan_count, column_ids[col_idx], result.data[col_idx], target_sel);
|
821
|
+
}
|
822
|
+
}
|
823
|
+
|
824
|
+
void TupleDataCollection::Gather(Vector &row_locations, const SelectionVector &scan_sel, const idx_t scan_count,
|
825
|
+
const column_t column_id, Vector &result, const SelectionVector &target_sel) const {
|
826
|
+
const auto &gather_function = gather_functions[column_id];
|
827
|
+
gather_function.function(layout, row_locations, column_id, scan_sel, scan_count, result, target_sel, result,
|
828
|
+
gather_function.child_functions);
|
829
|
+
}
|
830
|
+
|
831
|
+
template <class T>
|
832
|
+
static void TupleDataTemplatedGather(const TupleDataLayout &layout, Vector &row_locations, const idx_t col_idx,
|
833
|
+
const SelectionVector &scan_sel, const idx_t scan_count, Vector &target,
|
834
|
+
const SelectionVector &target_sel, Vector &dummy_vector,
|
835
|
+
const vector<TupleDataGatherFunction> &child_functions) {
|
836
|
+
// Source
|
837
|
+
auto source_locations = FlatVector::GetData<data_ptr_t>(row_locations);
|
838
|
+
|
839
|
+
// Target
|
840
|
+
auto target_data = FlatVector::GetData<T>(target);
|
841
|
+
auto &target_validity = FlatVector::Validity(target);
|
842
|
+
|
843
|
+
// Precompute mask indexes
|
844
|
+
idx_t entry_idx;
|
845
|
+
idx_t idx_in_entry;
|
846
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
847
|
+
|
848
|
+
const auto offset_in_row = layout.GetOffsets()[col_idx];
|
849
|
+
for (idx_t i = 0; i < scan_count; i++) {
|
850
|
+
const auto &source_row = source_locations[scan_sel.get_index(i)];
|
851
|
+
const auto target_idx = target_sel.get_index(i);
|
852
|
+
ValidityBytes row_mask(source_row);
|
853
|
+
if (row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
|
854
|
+
target_data[target_idx] = Load<T>(source_row + offset_in_row);
|
855
|
+
} else {
|
856
|
+
target_validity.SetInvalid(target_idx);
|
857
|
+
}
|
858
|
+
}
|
859
|
+
}
|
860
|
+
|
861
|
+
static void TupleDataStructGather(const TupleDataLayout &layout, Vector &row_locations, const idx_t col_idx,
|
862
|
+
const SelectionVector &scan_sel, const idx_t scan_count, Vector &target,
|
863
|
+
const SelectionVector &target_sel, Vector &dummy_vector,
|
864
|
+
const vector<TupleDataGatherFunction> &child_functions) {
|
865
|
+
// Source
|
866
|
+
auto source_locations = FlatVector::GetData<data_ptr_t>(row_locations);
|
867
|
+
|
868
|
+
// Target
|
869
|
+
auto &target_validity = FlatVector::Validity(target);
|
870
|
+
|
871
|
+
// Precompute mask indexes
|
872
|
+
idx_t entry_idx;
|
873
|
+
idx_t idx_in_entry;
|
874
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
875
|
+
|
876
|
+
// Get validity of the struct and create a Vector of pointers to the start of the TupleDataLayout of the STRUCT
|
877
|
+
Vector struct_row_locations(LogicalType::POINTER);
|
878
|
+
auto struct_source_locations = FlatVector::GetData<data_ptr_t>(struct_row_locations);
|
879
|
+
const auto offset_in_row = layout.GetOffsets()[col_idx];
|
880
|
+
for (idx_t i = 0; i < scan_count; i++) {
|
881
|
+
const auto source_idx = scan_sel.get_index(i);
|
882
|
+
const auto &source_row = source_locations[source_idx];
|
883
|
+
|
884
|
+
// Set the validity
|
885
|
+
ValidityBytes row_mask(source_row);
|
886
|
+
if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
|
887
|
+
const auto target_idx = target_sel.get_index(i);
|
888
|
+
target_validity.SetInvalid(target_idx);
|
889
|
+
}
|
890
|
+
|
891
|
+
// Set the pointer
|
892
|
+
struct_source_locations[source_idx] = source_row + offset_in_row;
|
893
|
+
}
|
894
|
+
|
895
|
+
// Get the struct layout and struct entries
|
896
|
+
const auto &struct_layout = layout.GetStructLayout(col_idx);
|
897
|
+
auto &struct_targets = StructVector::GetEntries(target);
|
898
|
+
D_ASSERT(struct_layout.ColumnCount() == struct_targets.size());
|
899
|
+
|
900
|
+
// Recurse through the struct children
|
901
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < struct_layout.ColumnCount(); struct_col_idx++) {
|
902
|
+
auto &struct_target = *struct_targets[struct_col_idx];
|
903
|
+
const auto &struct_gather_function = child_functions[struct_col_idx];
|
904
|
+
struct_gather_function.function(struct_layout, struct_row_locations, struct_col_idx, scan_sel, scan_count,
|
905
|
+
struct_target, target_sel, dummy_vector,
|
906
|
+
struct_gather_function.child_functions);
|
907
|
+
}
|
908
|
+
}
|
909
|
+
|
910
|
+
static void TupleDataListGather(const TupleDataLayout &layout, Vector &row_locations, const idx_t col_idx,
|
911
|
+
const SelectionVector &scan_sel, const idx_t scan_count, Vector &target,
|
912
|
+
const SelectionVector &target_sel, Vector &dummy_vector,
|
913
|
+
const vector<TupleDataGatherFunction> &child_functions) {
|
914
|
+
// Source
|
915
|
+
auto source_locations = FlatVector::GetData<data_ptr_t>(row_locations);
|
916
|
+
|
917
|
+
// Target
|
918
|
+
auto target_list_entries = FlatVector::GetData<list_entry_t>(target);
|
919
|
+
auto &target_validity = FlatVector::Validity(target);
|
920
|
+
|
921
|
+
// Precompute mask indexes
|
922
|
+
idx_t entry_idx;
|
923
|
+
idx_t idx_in_entry;
|
924
|
+
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
925
|
+
|
926
|
+
// Load pointers to the data from the row
|
927
|
+
Vector heap_locations(LogicalType::POINTER);
|
928
|
+
auto source_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
|
929
|
+
auto &source_heap_validity = FlatVector::Validity(heap_locations);
|
930
|
+
|
931
|
+
const auto offset_in_row = layout.GetOffsets()[col_idx];
|
932
|
+
uint64_t target_list_offset = 0;
|
933
|
+
for (idx_t i = 0; i < scan_count; i++) {
|
934
|
+
const auto source_idx = scan_sel.get_index(i);
|
935
|
+
const auto target_idx = target_sel.get_index(i);
|
936
|
+
|
937
|
+
const auto &source_row = source_locations[source_idx];
|
938
|
+
ValidityBytes row_mask(source_row);
|
939
|
+
if (row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) {
|
940
|
+
auto &source_heap_location = source_heap_locations[source_idx];
|
941
|
+
source_heap_location = Load<data_ptr_t>(source_row + offset_in_row);
|
942
|
+
|
943
|
+
// Load list size and skip over
|
944
|
+
const auto list_length = Load<uint64_t>(source_heap_location);
|
945
|
+
source_heap_location += sizeof(uint64_t);
|
946
|
+
|
947
|
+
// Initialize list entry, and increment offset
|
948
|
+
target_list_entries[target_idx] = {target_list_offset, list_length};
|
949
|
+
target_list_offset += list_length;
|
950
|
+
} else {
|
951
|
+
source_heap_validity.SetInvalid(source_idx);
|
952
|
+
target_validity.SetInvalid(target_idx);
|
953
|
+
}
|
954
|
+
}
|
955
|
+
auto list_size_before = ListVector::GetListSize(target);
|
956
|
+
ListVector::Reserve(target, list_size_before + target_list_offset);
|
957
|
+
ListVector::SetListSize(target, list_size_before + target_list_offset);
|
958
|
+
|
959
|
+
// Recurse
|
960
|
+
D_ASSERT(child_functions.size() == 1);
|
961
|
+
const auto &child_function = child_functions[0];
|
962
|
+
child_function.function(layout, heap_locations, list_size_before, scan_sel, scan_count,
|
963
|
+
ListVector::GetEntry(target), target_sel, target, child_function.child_functions);
|
964
|
+
}
|
965
|
+
|
966
|
+
template <class T>
|
967
|
+
static void TupleDataTemplatedWithinListGather(const TupleDataLayout &layout, Vector &heap_locations,
|
968
|
+
const idx_t list_size_before, const SelectionVector &scan_sel,
|
969
|
+
const idx_t scan_count, Vector &target,
|
970
|
+
const SelectionVector &target_sel, Vector &list_vector,
|
971
|
+
const vector<TupleDataGatherFunction> &child_functions) {
|
972
|
+
// Source
|
973
|
+
auto source_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
|
974
|
+
auto &source_heap_validity = FlatVector::Validity(heap_locations);
|
975
|
+
|
976
|
+
// Target
|
977
|
+
auto target_data = FlatVector::GetData<T>(target);
|
978
|
+
auto &target_validity = FlatVector::Validity(target);
|
979
|
+
|
980
|
+
// List parent
|
981
|
+
const auto list_entries = FlatVector::GetData<list_entry_t>(list_vector);
|
982
|
+
|
983
|
+
uint64_t target_offset = list_size_before;
|
984
|
+
for (idx_t i = 0; i < scan_count; i++) {
|
985
|
+
const auto source_idx = scan_sel.get_index(i);
|
986
|
+
if (!source_heap_validity.RowIsValid(source_idx)) {
|
987
|
+
continue;
|
988
|
+
}
|
989
|
+
|
990
|
+
const auto &list_length = list_entries[target_sel.get_index(i)].length;
|
991
|
+
|
992
|
+
// Initialize validity mask
|
993
|
+
auto &source_heap_location = source_heap_locations[source_idx];
|
994
|
+
ValidityBytes source_mask(source_heap_location);
|
995
|
+
source_heap_location += ValidityBytes::SizeInBytes(list_length);
|
996
|
+
|
997
|
+
// Get the start to the fixed-size data and skip the heap pointer over it
|
998
|
+
const auto source_data_location = source_heap_location;
|
999
|
+
source_heap_location += list_length * TupleDataWithinListFixedSize<T>();
|
1000
|
+
|
1001
|
+
// Load the child validity and data belonging to this list entry
|
1002
|
+
for (idx_t child_i = 0; child_i < list_length; child_i++) {
|
1003
|
+
if (source_mask.RowIsValidUnsafe(child_i)) {
|
1004
|
+
target_data[target_offset + child_i] = TupleDataWithinListValueLoad<T>(
|
1005
|
+
source_data_location + child_i * TupleDataWithinListFixedSize<T>(), source_heap_location);
|
1006
|
+
} else {
|
1007
|
+
target_validity.SetInvalid(target_offset + child_i);
|
1008
|
+
}
|
1009
|
+
}
|
1010
|
+
target_offset += list_length;
|
1011
|
+
}
|
1012
|
+
}
|
1013
|
+
|
1014
|
+
static void TupleDataStructWithinListGather(const TupleDataLayout &layout, Vector &heap_locations,
|
1015
|
+
const idx_t list_size_before, const SelectionVector &scan_sel,
|
1016
|
+
const idx_t scan_count, Vector &target, const SelectionVector &target_sel,
|
1017
|
+
Vector &list_vector,
|
1018
|
+
const vector<TupleDataGatherFunction> &child_functions) {
|
1019
|
+
// Source
|
1020
|
+
auto source_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
|
1021
|
+
auto &source_heap_validity = FlatVector::Validity(heap_locations);
|
1022
|
+
|
1023
|
+
// Target
|
1024
|
+
auto &target_validity = FlatVector::Validity(target);
|
1025
|
+
|
1026
|
+
// List parent
|
1027
|
+
const auto list_entries = FlatVector::GetData<list_entry_t>(list_vector);
|
1028
|
+
|
1029
|
+
uint64_t target_offset = list_size_before;
|
1030
|
+
for (idx_t i = 0; i < scan_count; i++) {
|
1031
|
+
const auto source_idx = scan_sel.get_index(i);
|
1032
|
+
if (!source_heap_validity.RowIsValid(source_idx)) {
|
1033
|
+
continue;
|
1034
|
+
}
|
1035
|
+
|
1036
|
+
const auto &list_length = list_entries[target_sel.get_index(i)].length;
|
1037
|
+
|
1038
|
+
// Initialize validity mask and skip over it
|
1039
|
+
auto &source_heap_location = source_heap_locations[source_idx];
|
1040
|
+
ValidityBytes source_mask(source_heap_location);
|
1041
|
+
source_heap_location += ValidityBytes::SizeInBytes(list_length);
|
1042
|
+
|
1043
|
+
// Load the child validity belonging to this list entry
|
1044
|
+
for (idx_t child_i = 0; child_i < list_length; child_i++) {
|
1045
|
+
if (!source_mask.RowIsValidUnsafe(child_i)) {
|
1046
|
+
target_validity.SetInvalid(target_offset + child_i);
|
1047
|
+
}
|
1048
|
+
}
|
1049
|
+
target_offset += list_length;
|
1050
|
+
}
|
1051
|
+
|
1052
|
+
// Recurse
|
1053
|
+
auto &struct_targets = StructVector::GetEntries(target);
|
1054
|
+
for (idx_t struct_col_idx = 0; struct_col_idx < struct_targets.size(); struct_col_idx++) {
|
1055
|
+
auto &struct_target = *struct_targets[struct_col_idx];
|
1056
|
+
const auto &struct_gather_function = child_functions[struct_col_idx];
|
1057
|
+
struct_gather_function.function(layout, heap_locations, list_size_before, scan_sel, scan_count, struct_target,
|
1058
|
+
target_sel, list_vector, struct_gather_function.child_functions);
|
1059
|
+
}
|
1060
|
+
}
|
1061
|
+
|
1062
|
+
static void TupleDataListWithinListGather(const TupleDataLayout &layout, Vector &heap_locations,
|
1063
|
+
const idx_t list_size_before, const SelectionVector &scan_sel,
|
1064
|
+
const idx_t scan_count, Vector &target, const SelectionVector &target_sel,
|
1065
|
+
Vector &list_vector, const vector<TupleDataGatherFunction> &child_functions) {
|
1066
|
+
// Source
|
1067
|
+
auto source_heap_locations = FlatVector::GetData<data_ptr_t>(heap_locations);
|
1068
|
+
auto &source_heap_validity = FlatVector::Validity(heap_locations);
|
1069
|
+
|
1070
|
+
// Target
|
1071
|
+
auto target_list_entries = FlatVector::GetData<list_entry_t>(target);
|
1072
|
+
auto &target_validity = FlatVector::Validity(target);
|
1073
|
+
const auto child_list_size_before = ListVector::GetListSize(target);
|
1074
|
+
|
1075
|
+
// List parent
|
1076
|
+
const auto list_entries = FlatVector::GetData<list_entry_t>(list_vector);
|
1077
|
+
|
1078
|
+
// We need to create a vector that has the combined list sizes (hugeint_t has same size as list_entry_t)
|
1079
|
+
Vector combined_list_vector(LogicalType::HUGEINT);
|
1080
|
+
auto combined_list_entries = FlatVector::GetData<list_entry_t>(combined_list_vector);
|
1081
|
+
|
1082
|
+
uint64_t target_offset = list_size_before;
|
1083
|
+
uint64_t target_child_offset = child_list_size_before;
|
1084
|
+
for (idx_t i = 0; i < scan_count; i++) {
|
1085
|
+
const auto source_idx = scan_sel.get_index(i);
|
1086
|
+
if (!source_heap_validity.RowIsValid(source_idx)) {
|
1087
|
+
continue;
|
1088
|
+
}
|
1089
|
+
|
1090
|
+
const auto &list_length = list_entries[target_sel.get_index(i)].length;
|
1091
|
+
|
1092
|
+
// Initialize validity mask and skip over it
|
1093
|
+
auto &source_heap_location = source_heap_locations[source_idx];
|
1094
|
+
ValidityBytes source_mask(source_heap_location);
|
1095
|
+
source_heap_location += ValidityBytes::SizeInBytes(list_length);
|
1096
|
+
|
1097
|
+
// Get the start to the fixed-size data and skip the heap pointer over it
|
1098
|
+
const auto source_data_location = source_heap_location;
|
1099
|
+
source_heap_location += list_length * sizeof(uint64_t);
|
1100
|
+
|
1101
|
+
// Set the offset of the combined list entry
|
1102
|
+
auto &combined_list_entry = combined_list_entries[target_sel.get_index(i)];
|
1103
|
+
combined_list_entry.offset = target_child_offset;
|
1104
|
+
|
1105
|
+
// Load the child validity and data belonging to this list entry
|
1106
|
+
for (idx_t child_i = 0; child_i < list_length; child_i++) {
|
1107
|
+
if (source_mask.RowIsValidUnsafe(child_i)) {
|
1108
|
+
auto &target_list_entry = target_list_entries[target_offset + child_i];
|
1109
|
+
target_list_entry.offset = target_child_offset;
|
1110
|
+
target_list_entry.length = Load<uint64_t>(source_data_location + child_i * sizeof(uint64_t));
|
1111
|
+
target_child_offset += target_list_entry.length;
|
1112
|
+
} else {
|
1113
|
+
target_validity.SetInvalid(target_offset + child_i);
|
1114
|
+
}
|
1115
|
+
}
|
1116
|
+
|
1117
|
+
// Set the length of the combined list entry
|
1118
|
+
combined_list_entry.length = target_child_offset - combined_list_entry.offset;
|
1119
|
+
|
1120
|
+
target_offset += list_length;
|
1121
|
+
}
|
1122
|
+
ListVector::Reserve(target, target_child_offset);
|
1123
|
+
ListVector::SetListSize(target, target_child_offset);
|
1124
|
+
|
1125
|
+
// Recurse
|
1126
|
+
D_ASSERT(child_functions.size() == 1);
|
1127
|
+
const auto &child_function = child_functions[0];
|
1128
|
+
child_function.function(layout, heap_locations, child_list_size_before, scan_sel, scan_count,
|
1129
|
+
ListVector::GetEntry(target), target_sel, combined_list_vector,
|
1130
|
+
child_function.child_functions);
|
1131
|
+
}
|
1132
|
+
|
1133
|
+
template <class T>
|
1134
|
+
tuple_data_gather_function_t TupleDataGetGatherFunction(bool within_list) {
|
1135
|
+
return within_list ? TupleDataTemplatedWithinListGather<T> : TupleDataTemplatedGather<T>;
|
1136
|
+
}
|
1137
|
+
|
1138
|
+
TupleDataGatherFunction TupleDataCollection::GetGatherFunction(const LogicalType &type, bool within_list) {
|
1139
|
+
TupleDataGatherFunction result;
|
1140
|
+
switch (type.InternalType()) {
|
1141
|
+
case PhysicalType::BOOL:
|
1142
|
+
result.function = TupleDataGetGatherFunction<bool>(within_list);
|
1143
|
+
break;
|
1144
|
+
case PhysicalType::INT8:
|
1145
|
+
result.function = TupleDataGetGatherFunction<int8_t>(within_list);
|
1146
|
+
break;
|
1147
|
+
case PhysicalType::INT16:
|
1148
|
+
result.function = TupleDataGetGatherFunction<int16_t>(within_list);
|
1149
|
+
break;
|
1150
|
+
case PhysicalType::INT32:
|
1151
|
+
result.function = TupleDataGetGatherFunction<int32_t>(within_list);
|
1152
|
+
break;
|
1153
|
+
case PhysicalType::INT64:
|
1154
|
+
result.function = TupleDataGetGatherFunction<int64_t>(within_list);
|
1155
|
+
break;
|
1156
|
+
case PhysicalType::INT128:
|
1157
|
+
result.function = TupleDataGetGatherFunction<hugeint_t>(within_list);
|
1158
|
+
break;
|
1159
|
+
case PhysicalType::UINT8:
|
1160
|
+
result.function = TupleDataGetGatherFunction<uint8_t>(within_list);
|
1161
|
+
break;
|
1162
|
+
case PhysicalType::UINT16:
|
1163
|
+
result.function = TupleDataGetGatherFunction<uint16_t>(within_list);
|
1164
|
+
break;
|
1165
|
+
case PhysicalType::UINT32:
|
1166
|
+
result.function = TupleDataGetGatherFunction<uint32_t>(within_list);
|
1167
|
+
break;
|
1168
|
+
case PhysicalType::UINT64:
|
1169
|
+
result.function = TupleDataGetGatherFunction<uint64_t>(within_list);
|
1170
|
+
break;
|
1171
|
+
case PhysicalType::FLOAT:
|
1172
|
+
result.function = TupleDataGetGatherFunction<float>(within_list);
|
1173
|
+
break;
|
1174
|
+
case PhysicalType::DOUBLE:
|
1175
|
+
result.function = TupleDataGetGatherFunction<double>(within_list);
|
1176
|
+
break;
|
1177
|
+
case PhysicalType::INTERVAL:
|
1178
|
+
result.function = TupleDataGetGatherFunction<interval_t>(within_list);
|
1179
|
+
break;
|
1180
|
+
case PhysicalType::VARCHAR:
|
1181
|
+
result.function = TupleDataGetGatherFunction<string_t>(within_list);
|
1182
|
+
break;
|
1183
|
+
case PhysicalType::STRUCT: {
|
1184
|
+
result.function = within_list ? TupleDataStructWithinListGather : TupleDataStructGather;
|
1185
|
+
for (const auto &child_type : StructType::GetChildTypes(type)) {
|
1186
|
+
result.child_functions.push_back(GetGatherFunction(child_type.second, within_list));
|
1187
|
+
}
|
1188
|
+
break;
|
1189
|
+
}
|
1190
|
+
case PhysicalType::LIST:
|
1191
|
+
result.function = within_list ? TupleDataListWithinListGather : TupleDataListGather;
|
1192
|
+
result.child_functions.push_back(GetGatherFunction(ListType::GetChildType(type), true));
|
1193
|
+
break;
|
1194
|
+
default:
|
1195
|
+
throw InternalException("Unsupported type for TupleDataCollection::GetGatherFunction");
|
1196
|
+
}
|
1197
|
+
return result;
|
1198
|
+
}
|
1199
|
+
|
1200
|
+
} // namespace duckdb
|