duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -8,7 +8,7 @@ namespace duckdb {
|
|
8
8
|
|
9
9
|
PartitionedTupleData::PartitionedTupleData(PartitionedTupleDataType type_p, BufferManager &buffer_manager_p,
|
10
10
|
const TupleDataLayout &layout_p)
|
11
|
-
: type(type_p), buffer_manager(buffer_manager_p), layout(layout_p.Copy()),
|
11
|
+
: type(type_p), buffer_manager(buffer_manager_p), layout(layout_p.Copy()), count(0), data_size(0),
|
12
12
|
allocators(make_shared<PartitionTupleDataAllocators>()) {
|
13
13
|
}
|
14
14
|
|
@@ -19,6 +19,10 @@ PartitionedTupleData::PartitionedTupleData(const PartitionedTupleData &other)
|
|
19
19
|
PartitionedTupleData::~PartitionedTupleData() {
|
20
20
|
}
|
21
21
|
|
22
|
+
const TupleDataLayout &PartitionedTupleData::GetLayout() const {
|
23
|
+
return layout;
|
24
|
+
}
|
25
|
+
|
22
26
|
PartitionedTupleDataType PartitionedTupleData::GetType() const {
|
23
27
|
return type;
|
24
28
|
}
|
@@ -26,6 +30,7 @@ PartitionedTupleDataType PartitionedTupleData::GetType() const {
|
|
26
30
|
void PartitionedTupleData::InitializeAppendState(PartitionedTupleDataAppendState &state,
|
27
31
|
TupleDataPinProperties properties) const {
|
28
32
|
state.partition_sel.Initialize();
|
33
|
+
state.reverse_partition_sel.Initialize();
|
29
34
|
|
30
35
|
vector<column_t> column_ids;
|
31
36
|
column_ids.reserve(layout.ColumnCount());
|
@@ -36,97 +41,178 @@ void PartitionedTupleData::InitializeAppendState(PartitionedTupleDataAppendState
|
|
36
41
|
InitializeAppendStateInternal(state, properties);
|
37
42
|
}
|
38
43
|
|
39
|
-
void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, DataChunk &input
|
44
|
+
void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, DataChunk &input,
|
45
|
+
const SelectionVector &append_sel, const idx_t append_count) {
|
46
|
+
TupleDataCollection::ToUnifiedFormat(state.chunk_state, input);
|
47
|
+
AppendUnified(state, input, append_sel, append_count);
|
48
|
+
}
|
49
|
+
|
50
|
+
bool PartitionedTupleData::UseFixedSizeMap() const {
|
51
|
+
return MaxPartitionIndex() < PartitionedTupleDataAppendState::MAP_THRESHOLD;
|
52
|
+
}
|
53
|
+
|
54
|
+
void PartitionedTupleData::AppendUnified(PartitionedTupleDataAppendState &state, DataChunk &input,
|
55
|
+
const SelectionVector &append_sel, const idx_t append_count) {
|
56
|
+
const idx_t actual_append_count = append_count == DConstants::INVALID_INDEX ? input.size() : append_count;
|
57
|
+
|
40
58
|
// Compute partition indices and store them in state.partition_indices
|
41
59
|
ComputePartitionIndices(state, input);
|
42
60
|
|
43
61
|
// Build the selection vector for the partitions
|
44
|
-
BuildPartitionSel(state,
|
62
|
+
BuildPartitionSel(state, append_sel, actual_append_count);
|
45
63
|
|
46
64
|
// Early out: check if everything belongs to a single partition
|
47
|
-
|
48
|
-
if (
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
65
|
+
optional_idx partition_index;
|
66
|
+
if (UseFixedSizeMap()) {
|
67
|
+
if (state.fixed_partition_entries.size() == 1) {
|
68
|
+
partition_index = state.fixed_partition_entries.begin().GetKey();
|
69
|
+
}
|
70
|
+
} else {
|
71
|
+
if (state.partition_entries.size() == 1) {
|
72
|
+
partition_index = state.partition_entries.begin()->first;
|
73
|
+
}
|
54
74
|
}
|
75
|
+
if (partition_index.IsValid()) {
|
76
|
+
auto &partition = *partitions[partition_index.GetIndex()];
|
77
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index.GetIndex()];
|
55
78
|
|
56
|
-
|
79
|
+
const auto size_before = partition.SizeInBytes();
|
80
|
+
partition.AppendUnified(partition_pin_state, state.chunk_state, input, append_sel, actual_append_count);
|
81
|
+
data_size += partition.SizeInBytes() - size_before;
|
82
|
+
} else {
|
83
|
+
// Compute the heap sizes for the whole chunk
|
84
|
+
if (!layout.AllConstant()) {
|
85
|
+
TupleDataCollection::ComputeHeapSizes(state.chunk_state, input, state.partition_sel, actual_append_count);
|
86
|
+
}
|
57
87
|
|
58
|
-
|
59
|
-
|
60
|
-
TupleDataCollection::ComputeHeapSizes(state.chunk_state, input, state.partition_sel, input.size());
|
61
|
-
}
|
88
|
+
// Build the buffer space
|
89
|
+
BuildBufferSpace(state);
|
62
90
|
|
63
|
-
|
64
|
-
|
91
|
+
// Now scatter everything in one go
|
92
|
+
partitions[0]->Scatter(state.chunk_state, input, state.partition_sel, actual_append_count);
|
93
|
+
}
|
65
94
|
|
66
|
-
|
67
|
-
|
95
|
+
count += actual_append_count;
|
96
|
+
Verify();
|
68
97
|
}
|
69
98
|
|
70
|
-
void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, TupleDataChunkState &input,
|
99
|
+
void PartitionedTupleData::Append(PartitionedTupleDataAppendState &state, TupleDataChunkState &input,
|
100
|
+
const idx_t append_count) {
|
71
101
|
// Compute partition indices and store them in state.partition_indices
|
72
|
-
ComputePartitionIndices(input.row_locations,
|
102
|
+
ComputePartitionIndices(input.row_locations, append_count, state.partition_indices);
|
73
103
|
|
74
104
|
// Build the selection vector for the partitions
|
75
|
-
BuildPartitionSel(state,
|
105
|
+
BuildPartitionSel(state, *FlatVector::IncrementalSelectionVector(), append_count);
|
76
106
|
|
77
107
|
// Early out: check if everything belongs to a single partition
|
78
|
-
|
79
|
-
if (
|
80
|
-
|
81
|
-
|
82
|
-
|
108
|
+
optional_idx partition_index;
|
109
|
+
if (UseFixedSizeMap()) {
|
110
|
+
if (state.fixed_partition_entries.size() == 1) {
|
111
|
+
partition_index = state.fixed_partition_entries.begin().GetKey();
|
112
|
+
}
|
113
|
+
} else {
|
114
|
+
if (state.partition_entries.size() == 1) {
|
115
|
+
partition_index = state.partition_entries.begin()->first;
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
if (partition_index.IsValid()) {
|
120
|
+
auto &partition = *partitions[partition_index.GetIndex()];
|
121
|
+
auto &partition_pin_state = *state.partition_pin_states[partition_index.GetIndex()];
|
83
122
|
|
84
123
|
state.chunk_state.heap_sizes.Reference(input.heap_sizes);
|
85
|
-
|
86
|
-
partition.
|
87
|
-
|
124
|
+
|
125
|
+
const auto size_before = partition.SizeInBytes();
|
126
|
+
partition.Build(partition_pin_state, state.chunk_state, 0, append_count);
|
127
|
+
data_size += partition.SizeInBytes() - size_before;
|
128
|
+
|
129
|
+
partition.CopyRows(state.chunk_state, input, *FlatVector::IncrementalSelectionVector(), append_count);
|
130
|
+
} else {
|
131
|
+
// Build the buffer space
|
132
|
+
state.chunk_state.heap_sizes.Slice(input.heap_sizes, state.partition_sel, append_count);
|
133
|
+
state.chunk_state.heap_sizes.Flatten(append_count);
|
134
|
+
BuildBufferSpace(state);
|
135
|
+
|
136
|
+
// Copy the rows
|
137
|
+
partitions[0]->CopyRows(state.chunk_state, input, state.partition_sel, append_count);
|
138
|
+
}
|
139
|
+
|
140
|
+
count += append_count;
|
141
|
+
Verify();
|
142
|
+
}
|
143
|
+
|
144
|
+
// LCOV_EXCL_START
|
145
|
+
template <class MAP_TYPE>
|
146
|
+
struct UnorderedMapGetter {
|
147
|
+
static inline const typename MAP_TYPE::key_type &GetKey(typename MAP_TYPE::iterator &iterator) {
|
148
|
+
return iterator->first;
|
88
149
|
}
|
89
150
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
151
|
+
static inline const typename MAP_TYPE::key_type &GetKey(const typename MAP_TYPE::const_iterator &iterator) {
|
152
|
+
return iterator->first;
|
153
|
+
}
|
154
|
+
|
155
|
+
static inline typename MAP_TYPE::mapped_type &GetValue(typename MAP_TYPE::iterator &iterator) {
|
156
|
+
return iterator->second;
|
157
|
+
}
|
158
|
+
|
159
|
+
static inline const typename MAP_TYPE::mapped_type &GetValue(const typename MAP_TYPE::const_iterator &iterator) {
|
160
|
+
return iterator->second;
|
161
|
+
}
|
162
|
+
};
|
163
|
+
|
164
|
+
template <class T>
|
165
|
+
struct FixedSizeMapGetter {
|
166
|
+
static inline const idx_t &GetKey(fixed_size_map_iterator_t<T> &iterator) {
|
167
|
+
return iterator.GetKey();
|
168
|
+
}
|
94
169
|
|
95
|
-
|
96
|
-
|
170
|
+
static inline const idx_t &GetKey(const const_fixed_size_map_iterator_t<T> &iterator) {
|
171
|
+
return iterator.GetKey();
|
172
|
+
}
|
173
|
+
|
174
|
+
static inline T &GetValue(fixed_size_map_iterator_t<T> &iterator) {
|
175
|
+
return iterator.GetValue();
|
176
|
+
}
|
177
|
+
|
178
|
+
static inline const T &GetValue(const const_fixed_size_map_iterator_t<T> &iterator) {
|
179
|
+
return iterator.GetValue();
|
180
|
+
}
|
181
|
+
};
|
182
|
+
// LCOV_EXCL_STOP
|
183
|
+
|
184
|
+
void PartitionedTupleData::BuildPartitionSel(PartitionedTupleDataAppendState &state, const SelectionVector &append_sel,
|
185
|
+
const idx_t append_count) {
|
186
|
+
if (UseFixedSizeMap()) {
|
187
|
+
BuildPartitionSel<fixed_size_map_t<list_entry_t>, FixedSizeMapGetter<list_entry_t>>(
|
188
|
+
state, state.fixed_partition_entries, append_sel, append_count);
|
189
|
+
} else {
|
190
|
+
BuildPartitionSel<perfect_map_t<list_entry_t>, UnorderedMapGetter<perfect_map_t<list_entry_t>>>(
|
191
|
+
state, state.partition_entries, append_sel, append_count);
|
192
|
+
}
|
97
193
|
}
|
98
194
|
|
99
|
-
|
195
|
+
template <class MAP_TYPE, class GETTER>
|
196
|
+
void PartitionedTupleData::BuildPartitionSel(PartitionedTupleDataAppendState &state, MAP_TYPE &partition_entries,
|
197
|
+
const SelectionVector &append_sel, const idx_t append_count) {
|
100
198
|
const auto partition_indices = FlatVector::GetData<idx_t>(state.partition_indices);
|
101
|
-
auto &partition_entries = state.partition_entries;
|
102
|
-
auto &partition_entries_arr = state.partition_entries_arr;
|
103
199
|
partition_entries.clear();
|
104
200
|
|
105
|
-
const auto max_partition_index = MaxPartitionIndex();
|
106
|
-
const auto use_arr = max_partition_index < PartitionedTupleDataAppendState::MAP_THRESHOLD;
|
107
|
-
|
108
201
|
switch (state.partition_indices.GetVectorType()) {
|
109
202
|
case VectorType::FLAT_VECTOR:
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
const auto &partition_index = partition_indices[i];
|
119
|
-
auto partition_entry = partition_entries.find(partition_index);
|
120
|
-
if (partition_entry == partition_entries.end()) {
|
121
|
-
partition_entries.emplace(partition_index, list_entry_t(0, 1));
|
122
|
-
} else {
|
123
|
-
partition_entry->second.length++;
|
124
|
-
}
|
203
|
+
for (idx_t i = 0; i < append_count; i++) {
|
204
|
+
const auto index = append_sel.get_index(i);
|
205
|
+
const auto &partition_index = partition_indices[index];
|
206
|
+
auto partition_entry = partition_entries.find(partition_index);
|
207
|
+
if (partition_entry == partition_entries.end()) {
|
208
|
+
partition_entries[partition_index] = list_entry_t(0, 1);
|
209
|
+
} else {
|
210
|
+
GETTER::GetValue(partition_entry).length++;
|
125
211
|
}
|
126
212
|
}
|
127
213
|
break;
|
128
214
|
case VectorType::CONSTANT_VECTOR:
|
129
|
-
partition_entries[partition_indices[0]] = list_entry_t(0,
|
215
|
+
partition_entries[partition_indices[0]] = list_entry_t(0, append_count);
|
130
216
|
break;
|
131
217
|
default:
|
132
218
|
throw InternalException("Unexpected VectorType in PartitionedTupleData::Append");
|
@@ -134,64 +220,62 @@ void PartitionedTupleData::BuildPartitionSel(PartitionedTupleDataAppendState &st
|
|
134
220
|
|
135
221
|
// Early out: check if everything belongs to a single partition
|
136
222
|
if (partition_entries.size() == 1) {
|
223
|
+
// This needs to be initialized, even if we go the short path here
|
224
|
+
for (idx_t i = 0; i < append_count; i++) {
|
225
|
+
const auto index = append_sel.get_index(i);
|
226
|
+
state.reverse_partition_sel[index] = i;
|
227
|
+
}
|
137
228
|
return;
|
138
229
|
}
|
139
230
|
|
140
231
|
// Compute offsets from the counts
|
141
232
|
idx_t offset = 0;
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
offset += partition_entry.length;
|
147
|
-
}
|
148
|
-
} else {
|
149
|
-
for (auto &pc : partition_entries) {
|
150
|
-
auto &partition_entry = pc.second;
|
151
|
-
partition_entry.offset = offset;
|
152
|
-
offset += partition_entry.length;
|
153
|
-
}
|
233
|
+
for (auto it = partition_entries.begin(); it != partition_entries.end(); ++it) {
|
234
|
+
auto &partition_entry = GETTER::GetValue(it);
|
235
|
+
partition_entry.offset = offset;
|
236
|
+
offset += partition_entry.length;
|
154
237
|
}
|
155
238
|
|
156
239
|
// Now initialize a single selection vector that acts as a selection vector for every partition
|
157
|
-
auto &
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
for (idx_t partition_index = 0; partition_index <= max_partition_index; partition_index++) {
|
166
|
-
const auto &partition_entry = partition_entries_arr[partition_index];
|
167
|
-
if (partition_entry.length != 0) {
|
168
|
-
partition_entries.emplace(partition_index, partition_entry);
|
169
|
-
}
|
170
|
-
}
|
171
|
-
} else {
|
172
|
-
for (idx_t i = 0; i < count; i++) {
|
173
|
-
const auto &partition_index = partition_indices[i];
|
174
|
-
auto &partition_offset = partition_entries[partition_index].offset;
|
175
|
-
all_partitions_sel[partition_offset++] = i;
|
176
|
-
}
|
240
|
+
auto &partition_sel = state.partition_sel;
|
241
|
+
auto &reverse_partition_sel = state.reverse_partition_sel;
|
242
|
+
for (idx_t i = 0; i < append_count; i++) {
|
243
|
+
const auto index = append_sel.get_index(i);
|
244
|
+
const auto &partition_index = partition_indices[index];
|
245
|
+
auto &partition_offset = partition_entries[partition_index].offset;
|
246
|
+
reverse_partition_sel[index] = partition_offset;
|
247
|
+
partition_sel[partition_offset++] = index;
|
177
248
|
}
|
178
249
|
}
|
179
250
|
|
180
251
|
void PartitionedTupleData::BuildBufferSpace(PartitionedTupleDataAppendState &state) {
|
181
|
-
|
182
|
-
|
252
|
+
if (UseFixedSizeMap()) {
|
253
|
+
BuildBufferSpace<fixed_size_map_t<list_entry_t>, FixedSizeMapGetter<list_entry_t>>(
|
254
|
+
state, state.fixed_partition_entries);
|
255
|
+
} else {
|
256
|
+
BuildBufferSpace<perfect_map_t<list_entry_t>, UnorderedMapGetter<perfect_map_t<list_entry_t>>>(
|
257
|
+
state, state.partition_entries);
|
258
|
+
}
|
259
|
+
}
|
260
|
+
|
261
|
+
template <class MAP_TYPE, class GETTER>
|
262
|
+
void PartitionedTupleData::BuildBufferSpace(PartitionedTupleDataAppendState &state, const MAP_TYPE &partition_entries) {
|
263
|
+
for (auto it = partition_entries.begin(); it != partition_entries.end(); ++it) {
|
264
|
+
const auto &partition_index = GETTER::GetKey(it);
|
183
265
|
|
184
266
|
// Partition, pin state for this partition index
|
185
267
|
auto &partition = *partitions[partition_index];
|
186
268
|
auto &partition_pin_state = *state.partition_pin_states[partition_index];
|
187
269
|
|
188
270
|
// Length and offset for this partition
|
189
|
-
const auto &partition_entry =
|
271
|
+
const auto &partition_entry = GETTER::GetValue(it);
|
190
272
|
const auto &partition_length = partition_entry.length;
|
191
273
|
const auto partition_offset = partition_entry.offset - partition_length;
|
192
274
|
|
193
275
|
// Build out the buffer space for this partition
|
276
|
+
const auto size_before = partition.SizeInBytes();
|
194
277
|
partition.Build(partition_pin_state, state.chunk_state, partition_offset, partition_length);
|
278
|
+
data_size += partition.SizeInBytes() - size_before;
|
195
279
|
}
|
196
280
|
}
|
197
281
|
|
@@ -210,7 +294,6 @@ void PartitionedTupleData::Combine(PartitionedTupleData &other) {
|
|
210
294
|
|
211
295
|
// Now combine the state's partitions into this
|
212
296
|
lock_guard<mutex> guard(lock);
|
213
|
-
|
214
297
|
if (partitions.empty()) {
|
215
298
|
// This is the first merge, we just copy them over
|
216
299
|
partitions = std::move(other.partitions);
|
@@ -221,40 +304,28 @@ void PartitionedTupleData::Combine(PartitionedTupleData &other) {
|
|
221
304
|
partitions[i]->Combine(*other.partitions[i]);
|
222
305
|
}
|
223
306
|
}
|
307
|
+
this->count += other.count;
|
308
|
+
this->data_size += other.data_size;
|
309
|
+
Verify();
|
224
310
|
}
|
225
311
|
|
226
|
-
void PartitionedTupleData::
|
227
|
-
|
228
|
-
|
229
|
-
}
|
230
|
-
#ifdef DEBUG
|
231
|
-
const auto count_before = source.Count();
|
232
|
-
#endif
|
233
|
-
|
234
|
-
PartitionedTupleDataAppendState append_state;
|
235
|
-
InitializeAppendState(append_state, properties);
|
236
|
-
|
237
|
-
TupleDataChunkIterator iterator(source, TupleDataPinProperties::DESTROY_AFTER_DONE, true);
|
238
|
-
auto &chunk_state = iterator.GetChunkState();
|
239
|
-
do {
|
240
|
-
Append(append_state, chunk_state, iterator.GetCurrentChunkCount());
|
241
|
-
} while (iterator.Next());
|
242
|
-
|
243
|
-
FlushAppendState(append_state);
|
244
|
-
source.Reset();
|
245
|
-
|
246
|
-
#ifdef DEBUG
|
247
|
-
idx_t count_after = 0;
|
248
|
-
for (const auto &partition : partitions) {
|
249
|
-
count_after += partition->Count();
|
312
|
+
void PartitionedTupleData::Reset() {
|
313
|
+
for (auto &partition : partitions) {
|
314
|
+
partition->Reset();
|
250
315
|
}
|
251
|
-
|
252
|
-
|
316
|
+
this->count = 0;
|
317
|
+
this->data_size = 0;
|
318
|
+
Verify();
|
253
319
|
}
|
254
320
|
|
255
321
|
void PartitionedTupleData::Repartition(PartitionedTupleData &new_partitioned_data) {
|
256
322
|
D_ASSERT(layout.GetTypes() == new_partitioned_data.layout.GetTypes());
|
257
323
|
|
324
|
+
if (partitions.size() == new_partitioned_data.partitions.size()) {
|
325
|
+
new_partitioned_data.Combine(*this);
|
326
|
+
return;
|
327
|
+
}
|
328
|
+
|
258
329
|
PartitionedTupleDataAppendState append_state;
|
259
330
|
new_partitioned_data.InitializeAppendState(append_state);
|
260
331
|
|
@@ -279,20 +350,42 @@ void PartitionedTupleData::Repartition(PartitionedTupleData &new_partitioned_dat
|
|
279
350
|
}
|
280
351
|
partitions[actual_partition_idx]->Reset();
|
281
352
|
}
|
282
|
-
|
283
353
|
new_partitioned_data.FlushAppendState(append_state);
|
354
|
+
|
355
|
+
count = 0;
|
356
|
+
data_size = 0;
|
357
|
+
|
358
|
+
Verify();
|
359
|
+
}
|
360
|
+
|
361
|
+
void PartitionedTupleData::Unpin() {
|
362
|
+
for (auto &partition : partitions) {
|
363
|
+
partition->Unpin();
|
364
|
+
}
|
284
365
|
}
|
285
366
|
|
286
367
|
vector<unique_ptr<TupleDataCollection>> &PartitionedTupleData::GetPartitions() {
|
287
368
|
return partitions;
|
288
369
|
}
|
289
370
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
371
|
+
unique_ptr<TupleDataCollection> PartitionedTupleData::GetUnpartitioned() {
|
372
|
+
auto data_collection = std::move(partitions[0]);
|
373
|
+
partitions[0] = make_uniq<TupleDataCollection>(buffer_manager, layout);
|
374
|
+
|
375
|
+
for (idx_t i = 1; i < partitions.size(); i++) {
|
376
|
+
data_collection->Combine(*partitions[i]);
|
294
377
|
}
|
295
|
-
|
378
|
+
count = 0;
|
379
|
+
data_size = 0;
|
380
|
+
|
381
|
+
data_collection->Verify();
|
382
|
+
Verify();
|
383
|
+
|
384
|
+
return data_collection;
|
385
|
+
}
|
386
|
+
|
387
|
+
idx_t PartitionedTupleData::Count() const {
|
388
|
+
return count;
|
296
389
|
}
|
297
390
|
|
298
391
|
idx_t PartitionedTupleData::SizeInBytes() const {
|
@@ -303,6 +396,39 @@ idx_t PartitionedTupleData::SizeInBytes() const {
|
|
303
396
|
return total_size;
|
304
397
|
}
|
305
398
|
|
399
|
+
idx_t PartitionedTupleData::PartitionCount() const {
|
400
|
+
return partitions.size();
|
401
|
+
}
|
402
|
+
|
403
|
+
void PartitionedTupleData::Verify() const {
|
404
|
+
#ifdef DEBUG
|
405
|
+
idx_t total_count = 0;
|
406
|
+
idx_t total_size = 0;
|
407
|
+
for (auto &partition : partitions) {
|
408
|
+
partition->Verify();
|
409
|
+
total_count += partition->Count();
|
410
|
+
total_size += partition->SizeInBytes();
|
411
|
+
}
|
412
|
+
D_ASSERT(total_count == this->count);
|
413
|
+
D_ASSERT(total_size == this->data_size);
|
414
|
+
#endif
|
415
|
+
}
|
416
|
+
|
417
|
+
// LCOV_EXCL_START
|
418
|
+
string PartitionedTupleData::ToString() {
|
419
|
+
string result =
|
420
|
+
StringUtil::Format("PartitionedTupleData - [%llu Partitions, %llu Rows]\n", partitions.size(), Count());
|
421
|
+
for (idx_t partition_idx = 0; partition_idx < partitions.size(); partition_idx++) {
|
422
|
+
result += StringUtil::Format("Partition %llu: ", partition_idx) + partitions[partition_idx]->ToString();
|
423
|
+
}
|
424
|
+
return result;
|
425
|
+
}
|
426
|
+
|
427
|
+
void PartitionedTupleData::Print() {
|
428
|
+
Printer::Print(ToString());
|
429
|
+
}
|
430
|
+
// LCOV_EXCL_STOP
|
431
|
+
|
306
432
|
void PartitionedTupleData::CreateAllocator() {
|
307
433
|
allocators->allocators.emplace_back(make_shared<TupleDataAllocator>(buffer_manager, layout));
|
308
434
|
}
|
@@ -12,11 +12,10 @@
|
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
14
|
|
15
|
-
RowLayout::RowLayout()
|
16
|
-
: flag_width(0), data_width(0), aggr_width(0), row_width(0), all_constant(true), heap_pointer_offset(0) {
|
15
|
+
RowLayout::RowLayout() : flag_width(0), data_width(0), row_width(0), all_constant(true), heap_pointer_offset(0) {
|
17
16
|
}
|
18
17
|
|
19
|
-
void RowLayout::Initialize(vector<LogicalType> types_p,
|
18
|
+
void RowLayout::Initialize(vector<LogicalType> types_p, bool align) {
|
20
19
|
offsets.clear();
|
21
20
|
types = std::move(types_p);
|
22
21
|
|
@@ -31,7 +30,7 @@ void RowLayout::Initialize(vector<LogicalType> types_p, Aggregates aggregates_p,
|
|
31
30
|
|
32
31
|
// This enables pointer swizzling for out-of-core computation.
|
33
32
|
if (!all_constant) {
|
34
|
-
// When unswizzled the pointer lives here.
|
33
|
+
// When unswizzled, the pointer lives here.
|
35
34
|
// When swizzled, the pointer is replaced by an offset.
|
36
35
|
heap_pointer_offset = row_width;
|
37
36
|
// The 8 byte pointer will be replaced with an 8 byte idx_t when swizzled.
|
@@ -52,39 +51,12 @@ void RowLayout::Initialize(vector<LogicalType> types_p, Aggregates aggregates_p,
|
|
52
51
|
}
|
53
52
|
}
|
54
53
|
|
55
|
-
// Alignment padding for aggregates
|
56
|
-
#ifndef DUCKDB_ALLOW_UNDEFINED
|
57
|
-
if (align) {
|
58
|
-
row_width = AlignValue(row_width);
|
59
|
-
}
|
60
|
-
#endif
|
61
54
|
data_width = row_width - flag_width;
|
62
55
|
|
63
|
-
// Aggregate fields.
|
64
|
-
aggregates = std::move(aggregates_p);
|
65
|
-
for (auto &aggregate : aggregates) {
|
66
|
-
offsets.push_back(row_width);
|
67
|
-
row_width += aggregate.payload_size;
|
68
|
-
#ifndef DUCKDB_ALLOW_UNDEFINED
|
69
|
-
D_ASSERT(aggregate.payload_size == AlignValue(aggregate.payload_size));
|
70
|
-
#endif
|
71
|
-
}
|
72
|
-
aggr_width = row_width - data_width - flag_width;
|
73
|
-
|
74
56
|
// Alignment padding for the next row
|
75
|
-
#ifndef DUCKDB_ALLOW_UNDEFINED
|
76
57
|
if (align) {
|
77
58
|
row_width = AlignValue(row_width);
|
78
59
|
}
|
79
|
-
#endif
|
80
|
-
}
|
81
|
-
|
82
|
-
void RowLayout::Initialize(vector<LogicalType> types_p, bool align) {
|
83
|
-
Initialize(std::move(types_p), Aggregates(), align);
|
84
|
-
}
|
85
|
-
|
86
|
-
void RowLayout::Initialize(Aggregates aggregates_p, bool align) {
|
87
|
-
Initialize(vector<LogicalType>(), std::move(aggregates_p), align);
|
88
60
|
}
|
89
61
|
|
90
62
|
} // namespace duckdb
|