duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -10,28 +10,22 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
12
|
#include "duckdb/common/types/validity_mask.hpp"
|
13
|
-
#include "duckdb/planner/expression.hpp"
|
14
13
|
#include "duckdb/execution/operator/aggregate/aggregate_object.hpp"
|
14
|
+
#include "duckdb/planner/expression.hpp"
|
15
15
|
|
16
16
|
namespace duckdb {
|
17
17
|
|
18
18
|
class RowLayout {
|
19
19
|
public:
|
20
20
|
friend class TupleDataLayout;
|
21
|
-
|
22
|
-
using Aggregates = vector<AggregateObject>;
|
23
21
|
using ValidityBytes = TemplatedValidityMask<uint8_t>;
|
24
22
|
|
25
23
|
//! Creates an empty RowLayout
|
26
24
|
RowLayout();
|
27
25
|
|
28
26
|
public:
|
29
|
-
//! Initializes the RowLayout with the specified types and aggregates to an empty RowLayout
|
30
|
-
void Initialize(vector<LogicalType> types_p, Aggregates aggregates_p, bool align = true);
|
31
27
|
//! Initializes the RowLayout with the specified types to an empty RowLayout
|
32
28
|
void Initialize(vector<LogicalType> types, bool align = true);
|
33
|
-
//! Initializes the RowLayout with the specified aggregates to an empty RowLayout
|
34
|
-
void Initialize(Aggregates aggregates_p, bool align = true);
|
35
29
|
//! Returns the number of data columns
|
36
30
|
inline idx_t ColumnCount() const {
|
37
31
|
return types.size();
|
@@ -40,14 +34,6 @@ public:
|
|
40
34
|
inline const vector<LogicalType> &GetTypes() const {
|
41
35
|
return types;
|
42
36
|
}
|
43
|
-
//! Returns the number of aggregates
|
44
|
-
inline idx_t AggregateCount() const {
|
45
|
-
return aggregates.size();
|
46
|
-
}
|
47
|
-
//! Returns a list of the aggregates for this data chunk
|
48
|
-
inline Aggregates &GetAggregates() {
|
49
|
-
return aggregates;
|
50
|
-
}
|
51
37
|
//! Returns the total width required for each row, including padding
|
52
38
|
inline idx_t GetRowWidth() const {
|
53
39
|
return row_width;
|
@@ -64,10 +50,6 @@ public:
|
|
64
50
|
inline idx_t GetAggrOffset() const {
|
65
51
|
return flag_width + data_width;
|
66
52
|
}
|
67
|
-
//! Returns the total width required for the aggregates, including padding
|
68
|
-
inline idx_t GetAggrWidth() const {
|
69
|
-
return aggr_width;
|
70
|
-
}
|
71
53
|
//! Returns the column offsets into each row
|
72
54
|
inline const vector<idx_t> &GetOffsets() const {
|
73
55
|
return offsets;
|
@@ -83,14 +65,10 @@ public:
|
|
83
65
|
private:
|
84
66
|
//! The types of the data columns
|
85
67
|
vector<LogicalType> types;
|
86
|
-
//! The aggregate functions
|
87
|
-
Aggregates aggregates;
|
88
68
|
//! The width of the validity header
|
89
69
|
idx_t flag_width;
|
90
70
|
//! The width of the data portion
|
91
71
|
idx_t data_width;
|
92
|
-
//! The width of the aggregate state portion
|
93
|
-
idx_t aggr_width;
|
94
72
|
//! The width of the entire row
|
95
73
|
idx_t row_width;
|
96
74
|
//! The offsets to the columns and aggregate data in each row
|
@@ -55,6 +55,8 @@ public:
|
|
55
55
|
TupleDataAllocator(BufferManager &buffer_manager, const TupleDataLayout &layout);
|
56
56
|
TupleDataAllocator(TupleDataAllocator &allocator);
|
57
57
|
|
58
|
+
//! Get the buffer manager
|
59
|
+
BufferManager &GetBufferManager();
|
58
60
|
//! Get the buffer allocator
|
59
61
|
Allocator &GetAllocator();
|
60
62
|
//! Get the layout
|
@@ -83,16 +85,16 @@ public:
|
|
83
85
|
private:
|
84
86
|
//! Builds out a single part (grabs the lock)
|
85
87
|
TupleDataChunkPart BuildChunkPart(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state,
|
86
|
-
const idx_t append_offset, const idx_t append_count);
|
88
|
+
const idx_t append_offset, const idx_t append_count, TupleDataChunk &chunk);
|
87
89
|
//! Internal function for InitializeChunkState
|
88
90
|
void InitializeChunkStateInternal(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, idx_t offset,
|
89
91
|
bool recompute, bool init_heap_pointers, bool init_heap_sizes,
|
90
|
-
|
92
|
+
unsafe_vector<reference<TupleDataChunkPart>> &parts);
|
91
93
|
//! Internal function for ReleaseOrStoreHandles
|
92
|
-
static void ReleaseOrStoreHandlesInternal(TupleDataSegment &segment,
|
93
|
-
|
94
|
-
|
95
|
-
TupleDataPinProperties properties);
|
94
|
+
static void ReleaseOrStoreHandlesInternal(TupleDataSegment &segment,
|
95
|
+
unsafe_vector<BufferHandle> &pinned_row_handles,
|
96
|
+
perfect_map_t<BufferHandle> &handles, const perfect_set_t &block_ids,
|
97
|
+
unsafe_vector<TupleDataBlock> &blocks, TupleDataPinProperties properties);
|
96
98
|
//! Pins the given row block
|
97
99
|
BufferHandle &PinRowBlock(TupleDataPinState &state, const TupleDataChunkPart &part);
|
98
100
|
//! Pins the given heap block
|
@@ -108,9 +110,13 @@ private:
|
|
108
110
|
//! The layout of the data
|
109
111
|
const TupleDataLayout layout;
|
110
112
|
//! Blocks storing the fixed-size rows
|
111
|
-
|
113
|
+
unsafe_vector<TupleDataBlock> row_blocks;
|
112
114
|
//! Blocks storing the variable-size data of the fixed-size rows (e.g., string, list)
|
113
|
-
|
115
|
+
unsafe_vector<TupleDataBlock> heap_blocks;
|
116
|
+
|
117
|
+
//! Re-usable arrays used while building buffer space
|
118
|
+
unsafe_vector<reference<TupleDataChunkPart>> chunk_parts;
|
119
|
+
unsafe_vector<pair<idx_t, idx_t>> chunk_part_indices;
|
114
120
|
};
|
115
121
|
|
116
122
|
} // namespace duckdb
|
@@ -45,6 +45,7 @@ struct TupleDataGatherFunction {
|
|
45
45
|
//! FIXME: rename to RowDataCollection after we phase it out
|
46
46
|
class TupleDataCollection {
|
47
47
|
friend class TupleDataChunkIterator;
|
48
|
+
friend class PartitionedTupleData;
|
48
49
|
|
49
50
|
public:
|
50
51
|
//! Constructs a TupleDataCollection with the specified layout
|
@@ -63,8 +64,6 @@ public:
|
|
63
64
|
idx_t ChunkCount() const;
|
64
65
|
//! The size (in bytes) of the blocks held by this tuple data collection
|
65
66
|
idx_t SizeInBytes() const;
|
66
|
-
//! Get pointers to the pinned blocks
|
67
|
-
void GetBlockPointers(vector<data_ptr_t> &block_pointers) const;
|
68
67
|
//! Unpins all held pins
|
69
68
|
void Unpin();
|
70
69
|
|
@@ -186,6 +185,8 @@ private:
|
|
186
185
|
void Initialize();
|
187
186
|
//! Gets all column ids
|
188
187
|
void GetAllColumnIDs(vector<column_t> &column_ids);
|
188
|
+
//! Adds a segment to this TupleDataCollection
|
189
|
+
void AddSegment(TupleDataSegment &&segment);
|
189
190
|
|
190
191
|
//! Computes the heap sizes for the specific Vector that will be appended
|
191
192
|
static void ComputeHeapSizes(Vector &heap_sizes_v, const Vector &source_v, TupleDataVectorFormat &source,
|
@@ -219,7 +220,7 @@ private:
|
|
219
220
|
void ScanAtIndex(TupleDataPinState &pin_state, TupleDataChunkState &chunk_state, const vector<column_t> &column_ids,
|
220
221
|
idx_t segment_index, idx_t chunk_index, DataChunk &result);
|
221
222
|
|
222
|
-
//! Verify
|
223
|
+
//! Verify count/data size of this collection
|
223
224
|
void Verify() const;
|
224
225
|
|
225
226
|
private:
|
@@ -229,6 +230,8 @@ private:
|
|
229
230
|
shared_ptr<TupleDataAllocator> allocator;
|
230
231
|
//! The number of entries stored in the TupleDataCollection
|
231
232
|
idx_t count;
|
233
|
+
//! The size (in bytes) of this TupleDataCollection
|
234
|
+
idx_t data_size;
|
232
235
|
//! The data segments of the TupleDataCollection
|
233
236
|
unsafe_vector<TupleDataSegment> segments;
|
234
237
|
//! The set of scatter functions
|
@@ -83,9 +83,14 @@ public:
|
|
83
83
|
inline bool AllConstant() const {
|
84
84
|
return all_constant;
|
85
85
|
}
|
86
|
+
//! Gets offset to where heap size is stored
|
86
87
|
inline idx_t GetHeapSizeOffset() const {
|
87
88
|
return heap_size_offset;
|
88
89
|
}
|
90
|
+
//! Returns whether any of the aggregates have a destructor
|
91
|
+
inline bool HasDestructor() const {
|
92
|
+
return has_destructor;
|
93
|
+
}
|
89
94
|
|
90
95
|
private:
|
91
96
|
//! The types of the data columns
|
@@ -108,6 +113,8 @@ private:
|
|
108
113
|
bool all_constant;
|
109
114
|
//! Offset to the heap size of every row
|
110
115
|
idx_t heap_size_offset;
|
116
|
+
//! Whether any of the aggregates have a destructor
|
117
|
+
bool has_destructor;
|
111
118
|
};
|
112
119
|
|
113
120
|
} // namespace duckdb
|
@@ -10,6 +10,7 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
12
|
#include "duckdb/common/mutex.hpp"
|
13
|
+
#include "duckdb/common/perfect_map_set.hpp"
|
13
14
|
#include "duckdb/common/unordered_set.hpp"
|
14
15
|
#include "duckdb/common/vector.hpp"
|
15
16
|
#include "duckdb/storage/buffer_manager.hpp"
|
@@ -21,7 +22,7 @@ class TupleDataLayout;
|
|
21
22
|
|
22
23
|
struct TupleDataChunkPart {
|
23
24
|
public:
|
24
|
-
TupleDataChunkPart();
|
25
|
+
TupleDataChunkPart(mutex &lock);
|
25
26
|
|
26
27
|
//! Disable copy constructors
|
27
28
|
TupleDataChunkPart(const TupleDataChunkPart &other) = delete;
|
@@ -45,8 +46,8 @@ public:
|
|
45
46
|
uint32_t total_heap_size;
|
46
47
|
//! Tuple count for this chunk part
|
47
48
|
uint32_t count;
|
48
|
-
//! Lock for recomputing heap pointers
|
49
|
-
mutex lock;
|
49
|
+
//! Lock for recomputing heap pointers (owned by TupleDataChunk)
|
50
|
+
reference<mutex> lock;
|
50
51
|
};
|
51
52
|
|
52
53
|
struct TupleDataChunk {
|
@@ -70,13 +71,15 @@ public:
|
|
70
71
|
|
71
72
|
public:
|
72
73
|
//! The parts of this chunk
|
73
|
-
|
74
|
+
unsafe_vector<TupleDataChunkPart> parts;
|
74
75
|
//! The row block ids referenced by the chunk
|
75
|
-
|
76
|
+
perfect_set_t row_block_ids;
|
76
77
|
//! The heap block ids referenced by the chunk
|
77
|
-
|
78
|
+
perfect_set_t heap_block_ids;
|
78
79
|
//! Tuple count for this chunk
|
79
80
|
idx_t count;
|
81
|
+
//! Lock for recomputing heap pointers
|
82
|
+
unsafe_unique_ptr<mutex> lock;
|
80
83
|
};
|
81
84
|
|
82
85
|
struct TupleDataSegment {
|
@@ -112,13 +115,15 @@ public:
|
|
112
115
|
unsafe_vector<TupleDataChunk> chunks;
|
113
116
|
//! The tuple count of this segment
|
114
117
|
idx_t count;
|
118
|
+
//! The data size of this segment
|
119
|
+
idx_t data_size;
|
115
120
|
|
116
121
|
//! Lock for modifying pinned_handles
|
117
122
|
mutex pinned_handles_lock;
|
118
123
|
//! Where handles to row blocks will be stored with TupleDataPinProperties::KEEP_EVERYTHING_PINNED
|
119
|
-
|
124
|
+
unsafe_vector<BufferHandle> pinned_row_handles;
|
120
125
|
//! Where handles to heap blocks will be stored with TupleDataPinProperties::KEEP_EVERYTHING_PINNED
|
121
|
-
|
126
|
+
unsafe_vector<BufferHandle> pinned_heap_handles;
|
122
127
|
};
|
123
128
|
|
124
129
|
} // namespace duckdb
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/mutex.hpp"
|
12
|
+
#include "duckdb/common/perfect_map_set.hpp"
|
12
13
|
#include "duckdb/common/types.hpp"
|
13
14
|
|
14
15
|
namespace duckdb {
|
@@ -26,8 +27,8 @@ enum class TupleDataPinProperties : uint8_t {
|
|
26
27
|
};
|
27
28
|
|
28
29
|
struct TupleDataPinState {
|
29
|
-
|
30
|
-
|
30
|
+
perfect_map_t<BufferHandle> row_handles;
|
31
|
+
perfect_map_t<BufferHandle> heap_handles;
|
31
32
|
TupleDataPinProperties properties = TupleDataPinProperties::INVALID;
|
32
33
|
};
|
33
34
|
|
@@ -101,14 +101,14 @@ public:
|
|
101
101
|
DUCKDB_API void Reference(const Value &value);
|
102
102
|
//! Causes this vector to reference the data held by the other vector.
|
103
103
|
//! The type of the "other" vector should match the type of this vector
|
104
|
-
DUCKDB_API void Reference(Vector &other);
|
104
|
+
DUCKDB_API void Reference(const Vector &other);
|
105
105
|
//! Reinterpret the data of the other vector as the type of this vector
|
106
106
|
//! Note that this takes the data of the other vector as-is and places it in this vector
|
107
107
|
//! Without changing the type of this vector
|
108
|
-
DUCKDB_API void Reinterpret(Vector &other);
|
108
|
+
DUCKDB_API void Reinterpret(const Vector &other);
|
109
109
|
|
110
110
|
//! Causes this vector to reference the data held by the other vector, changes the type if required.
|
111
|
-
DUCKDB_API void ReferenceAndSetType(Vector &other);
|
111
|
+
DUCKDB_API void ReferenceAndSetType(const Vector &other);
|
112
112
|
|
113
113
|
//! Resets a vector from a vector cache.
|
114
114
|
//! This turns the vector back into an empty FlatVector with STANDARD_VECTOR_SIZE entries.
|
@@ -88,14 +88,14 @@ public:
|
|
88
88
|
}
|
89
89
|
|
90
90
|
typename original::reference back() {
|
91
|
-
if (original::empty()) {
|
91
|
+
if (MemorySafety<SAFE>::enabled && original::empty()) {
|
92
92
|
throw InternalException("'back' called on an empty vector!");
|
93
93
|
}
|
94
94
|
return get<SAFE>(original::size() - 1);
|
95
95
|
}
|
96
96
|
|
97
97
|
typename original::const_reference back() const {
|
98
|
-
if (original::empty()) {
|
98
|
+
if (MemorySafety<SAFE>::enabled && original::empty()) {
|
99
99
|
throw InternalException("'back' called on an empty vector!");
|
100
100
|
}
|
101
101
|
return get<SAFE>(original::size() - 1);
|
@@ -8,12 +8,13 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
-
#include "duckdb/common/types/row/
|
11
|
+
#include "duckdb/common/types/row/partitioned_tuple_data.hpp"
|
12
12
|
#include "duckdb/execution/base_aggregate_hashtable.hpp"
|
13
13
|
#include "duckdb/storage/arena_allocator.hpp"
|
14
14
|
#include "duckdb/storage/buffer/buffer_handle.hpp"
|
15
15
|
|
16
16
|
namespace duckdb {
|
17
|
+
|
17
18
|
class BlockHandle;
|
18
19
|
class BufferHandle;
|
19
20
|
|
@@ -27,91 +28,87 @@ struct FlushMoveState;
|
|
27
28
|
stores them in the HT. It uses linear probing for collision resolution.
|
28
29
|
*/
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
// [SALT] are the high bits of the hash value, e.g. 16 for 64 bit hashes
|
35
|
-
// [PAGE_NR] is the buffer managed payload page index
|
36
|
-
// [PAGE_OFFSET] is the logical entry offset into said payload page
|
37
|
-
|
38
|
-
// NOTE: PAGE_NR and PAGE_OFFSET are reversed for 64 bit HTs because struct packing
|
39
|
-
|
40
|
-
// payload layout
|
41
|
-
// [VALIDITY][GROUPS][HASH][PADDING][PAYLOAD]
|
42
|
-
// [VALIDITY] is the validity bits of the data columns (including the HASH)
|
43
|
-
// [GROUPS] is the group data, could be multiple values, fixed size, strings are elsewhere
|
44
|
-
// [HASH] is the hash data of the groups
|
45
|
-
// [PADDING] is gunk data to align payload properly
|
46
|
-
// [PAYLOAD] is the payload (i.e. the aggregate states)
|
47
|
-
struct aggr_ht_entry_64 {
|
48
|
-
uint16_t salt;
|
49
|
-
uint16_t page_offset;
|
50
|
-
uint32_t page_nr; // this has to come last because alignment
|
51
|
-
};
|
31
|
+
struct aggr_ht_entry_t {
|
32
|
+
public:
|
33
|
+
explicit aggr_ht_entry_t(hash_t value_p) : value(value_p) {
|
34
|
+
}
|
52
35
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
36
|
+
inline bool IsOccupied() const {
|
37
|
+
return value != 0;
|
38
|
+
}
|
39
|
+
|
40
|
+
inline data_ptr_t GetPointer() const {
|
41
|
+
D_ASSERT(IsOccupied());
|
42
|
+
return reinterpret_cast<data_ptr_t>(value & POINTER_MASK);
|
43
|
+
}
|
44
|
+
inline void SetPointer(const data_ptr_t &pointer) {
|
45
|
+
// Pointer shouldn't use upper bits
|
46
|
+
D_ASSERT((reinterpret_cast<uint64_t>(pointer) & SALT_MASK) == 0);
|
47
|
+
// Value should have all 1's in the pointer area
|
48
|
+
D_ASSERT((value & POINTER_MASK) == POINTER_MASK);
|
49
|
+
// Set upper bits to 1 in pointer so the salt stays intact
|
50
|
+
value &= reinterpret_cast<uint64_t>(pointer) | SALT_MASK;
|
51
|
+
}
|
58
52
|
|
59
|
-
|
53
|
+
static inline hash_t ExtractSalt(const hash_t &hash) {
|
54
|
+
// Leaves upper bits intact, sets lower bits to all 1's
|
55
|
+
return hash | POINTER_MASK;
|
56
|
+
}
|
57
|
+
inline hash_t GetSalt() const {
|
58
|
+
return ExtractSalt(value);
|
59
|
+
}
|
60
|
+
inline void SetSalt(const hash_t &salt) {
|
61
|
+
// Shouldn't be occupied when we set this
|
62
|
+
D_ASSERT(!IsOccupied());
|
63
|
+
// Salt should have all 1's in the pointer field
|
64
|
+
D_ASSERT((salt & POINTER_MASK) == POINTER_MASK);
|
65
|
+
// No need to mask, just put the whole thing there
|
66
|
+
value = salt;
|
67
|
+
}
|
60
68
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
69
|
+
private:
|
70
|
+
//! Upper 16 bits are salt
|
71
|
+
static constexpr const hash_t SALT_MASK = 0xFFFF000000000000;
|
72
|
+
//! Lower 48 bits are the pointer
|
73
|
+
static constexpr const hash_t POINTER_MASK = 0x0000FFFFFFFFFFFF;
|
65
74
|
|
66
|
-
|
67
|
-
AggregateHTAppendState();
|
68
|
-
|
69
|
-
Vector ht_offsets;
|
70
|
-
Vector hash_salts;
|
71
|
-
SelectionVector group_compare_vector;
|
72
|
-
SelectionVector no_match_vector;
|
73
|
-
SelectionVector empty_vector;
|
74
|
-
SelectionVector new_groups;
|
75
|
-
Vector addresses;
|
76
|
-
unsafe_unique_array<UnifiedVectorFormat> group_data;
|
77
|
-
DataChunk group_chunk;
|
78
|
-
|
79
|
-
TupleDataChunkState chunk_state;
|
80
|
-
bool chunk_state_initialized;
|
75
|
+
hash_t value;
|
81
76
|
};
|
82
77
|
|
83
78
|
class GroupedAggregateHashTable : public BaseAggregateHashTable {
|
84
|
-
public:
|
85
|
-
//! The hash table load factor, when a resize is triggered
|
86
|
-
constexpr static float LOAD_FACTOR = 1.5;
|
87
|
-
constexpr static uint8_t HASH_WIDTH = sizeof(hash_t);
|
88
|
-
|
89
79
|
public:
|
90
80
|
GroupedAggregateHashTable(ClientContext &context, Allocator &allocator, vector<LogicalType> group_types,
|
91
81
|
vector<LogicalType> payload_types, const vector<BoundAggregateExpression *> &aggregates,
|
92
|
-
|
93
|
-
idx_t initial_capacity = InitialCapacity());
|
82
|
+
idx_t initial_capacity = InitialCapacity(), idx_t radix_bits = 0);
|
94
83
|
GroupedAggregateHashTable(ClientContext &context, Allocator &allocator, vector<LogicalType> group_types,
|
95
84
|
vector<LogicalType> payload_types, vector<AggregateObject> aggregates,
|
96
|
-
|
97
|
-
idx_t initial_capacity = InitialCapacity());
|
85
|
+
idx_t initial_capacity = InitialCapacity(), idx_t radix_bits = 0);
|
98
86
|
GroupedAggregateHashTable(ClientContext &context, Allocator &allocator, vector<LogicalType> group_types);
|
99
87
|
~GroupedAggregateHashTable() override;
|
100
88
|
|
101
89
|
public:
|
90
|
+
//! The hash table load factor, when a resize is triggered
|
91
|
+
constexpr static float LOAD_FACTOR = 1.5;
|
92
|
+
|
93
|
+
//! Get the layout of this HT
|
94
|
+
const TupleDataLayout &GetLayout() const;
|
95
|
+
//! Number of groups in the HT
|
96
|
+
idx_t Count() const;
|
97
|
+
//! Initial capacity of the HT
|
98
|
+
static idx_t InitialCapacity();
|
99
|
+
//! Capacity that can hold 'count' entries without resizing
|
100
|
+
static idx_t GetCapacityForCount(idx_t count);
|
101
|
+
//! Current capacity of the HT
|
102
|
+
idx_t Capacity() const;
|
103
|
+
//! Threshold at which to resize the HT
|
104
|
+
idx_t ResizeThreshold() const;
|
105
|
+
|
102
106
|
//! Add the given data to the HT, computing the aggregates grouped by the
|
103
107
|
//! data in the group chunk. When resize = true, aggregates will not be
|
104
108
|
//! computed but instead just assigned.
|
105
|
-
idx_t AddChunk(
|
106
|
-
|
107
|
-
idx_t AddChunk(
|
108
|
-
const unsafe_vector<idx_t> &filter);
|
109
|
-
idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload, AggregateType filter);
|
110
|
-
|
111
|
-
//! Scan the HT starting from the scan_position until the result and group
|
112
|
-
//! chunks are filled. scan_position will be updated by this function.
|
113
|
-
//! Returns the amount of elements found.
|
114
|
-
idx_t Scan(TupleDataParallelScanState &gstate, TupleDataLocalScanState &lstate, DataChunk &result);
|
109
|
+
idx_t AddChunk(DataChunk &groups, DataChunk &payload, const unsafe_vector<idx_t> &filter);
|
110
|
+
idx_t AddChunk(DataChunk &groups, Vector &group_hashes, DataChunk &payload, const unsafe_vector<idx_t> &filter);
|
111
|
+
idx_t AddChunk(DataChunk &groups, DataChunk &payload, AggregateType filter);
|
115
112
|
|
116
113
|
//! Fetch the aggregates for specific groups from the HT and place them in the result
|
117
114
|
void FetchAggregates(DataChunk &groups, DataChunk &result);
|
@@ -119,108 +116,90 @@ public:
|
|
119
116
|
//! Finds or creates groups in the hashtable using the specified group keys. The addresses vector will be filled
|
120
117
|
//! with pointers to the groups in the hash table, and the new_groups selection vector will point to the newly
|
121
118
|
//! created groups. The return value is the amount of newly created groups.
|
122
|
-
idx_t FindOrCreateGroups(
|
123
|
-
Vector &addresses_out, SelectionVector &new_groups_out);
|
124
|
-
idx_t FindOrCreateGroups(AggregateHTAppendState &state, DataChunk &groups, Vector &addresses_out,
|
119
|
+
idx_t FindOrCreateGroups(DataChunk &groups, Vector &group_hashes, Vector &addresses_out,
|
125
120
|
SelectionVector &new_groups_out);
|
126
|
-
|
127
|
-
|
128
|
-
//! Executes the filter(if any) and update the aggregates
|
129
|
-
void Combine(GroupedAggregateHashTable &other);
|
130
|
-
|
131
|
-
//! Appends the data in the other HT to this one
|
132
|
-
void Append(GroupedAggregateHashTable &other);
|
133
|
-
|
134
|
-
TupleDataCollection &GetDataCollection() {
|
135
|
-
return *data_collection;
|
136
|
-
}
|
137
|
-
|
138
|
-
idx_t Count() const {
|
139
|
-
return data_collection->Count();
|
140
|
-
}
|
141
|
-
|
142
|
-
idx_t DataSize() const {
|
143
|
-
return data_collection->SizeInBytes();
|
144
|
-
}
|
145
|
-
|
146
|
-
static idx_t InitialCapacity();
|
147
|
-
idx_t Capacity() {
|
148
|
-
return capacity;
|
149
|
-
}
|
150
|
-
|
151
|
-
static idx_t FirstPartSize(idx_t count, HtEntryType entry_type) {
|
152
|
-
idx_t entry_size = entry_type == HT_WIDTH_32 ? sizeof(aggr_ht_entry_32) : sizeof(aggr_ht_entry_64);
|
153
|
-
return NextPowerOfTwo(count * 2L) * entry_size;
|
154
|
-
}
|
121
|
+
idx_t FindOrCreateGroups(DataChunk &groups, Vector &addresses_out, SelectionVector &new_groups_out);
|
122
|
+
void FindOrCreateGroups(DataChunk &groups, Vector &addresses_out);
|
155
123
|
|
156
|
-
|
157
|
-
|
158
|
-
}
|
124
|
+
unique_ptr<PartitionedTupleData> &GetPartitionedData();
|
125
|
+
shared_ptr<ArenaAllocator> GetAggregateAllocator();
|
159
126
|
|
160
|
-
|
161
|
-
idx_t
|
162
|
-
|
127
|
+
//! Resize the HT to the specified size. Must be larger than the current size.
|
128
|
+
void Resize(idx_t size);
|
129
|
+
//! Resets the pointer table of the HT to all 0's
|
130
|
+
void ClearPointerTable();
|
131
|
+
//! Resets the group count to 0
|
132
|
+
void ResetCount();
|
133
|
+
//! Set the radix bits for this HT
|
134
|
+
void SetRadixBits(idx_t radix_bits);
|
135
|
+
//! Initializes the PartitionedTupleData
|
136
|
+
void InitializePartitionedData();
|
163
137
|
|
164
|
-
|
165
|
-
void
|
138
|
+
//! Executes the filter(if any) and update the aggregates
|
139
|
+
void Combine(GroupedAggregateHashTable &other);
|
140
|
+
void Combine(TupleDataCollection &other_data);
|
166
141
|
|
167
|
-
|
142
|
+
//! Unpins the data blocks
|
143
|
+
void UnpinData();
|
168
144
|
|
169
145
|
private:
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
146
|
+
//! Append state
|
147
|
+
struct AggregateHTAppendState {
|
148
|
+
AggregateHTAppendState();
|
149
|
+
|
150
|
+
PartitionedTupleDataAppendState append_state;
|
151
|
+
|
152
|
+
Vector ht_offsets;
|
153
|
+
Vector hash_salts;
|
154
|
+
SelectionVector group_compare_vector;
|
155
|
+
SelectionVector no_match_vector;
|
156
|
+
SelectionVector empty_vector;
|
157
|
+
SelectionVector new_groups;
|
158
|
+
Vector addresses;
|
159
|
+
unsafe_unique_array<UnifiedVectorFormat> group_data;
|
160
|
+
DataChunk group_chunk;
|
161
|
+
} state;
|
162
|
+
|
163
|
+
//! The number of radix bits to partition by
|
164
|
+
idx_t radix_bits;
|
178
165
|
//! The data of the HT
|
179
|
-
unique_ptr<
|
180
|
-
TupleDataPinState td_pin_state;
|
181
|
-
vector<data_ptr_t> payload_hds_ptrs;
|
166
|
+
unique_ptr<PartitionedTupleData> partitioned_data;
|
182
167
|
|
183
|
-
//!
|
184
|
-
|
185
|
-
data_ptr_t hashes_hdl_ptr;
|
186
|
-
idx_t hash_offset; // Offset into the layout of the hash column
|
187
|
-
|
188
|
-
hash_t hash_prefix_shift;
|
168
|
+
//! Predicates for matching groups (always ExpressionType::COMPARE_EQUAL)
|
169
|
+
vector<ExpressionType> predicates;
|
189
170
|
|
171
|
+
//! The number of groups in the HT
|
172
|
+
idx_t count;
|
173
|
+
//! The capacity of the HT. This can be increased using GroupedAggregateHashTable::Resize
|
174
|
+
idx_t capacity;
|
175
|
+
//! The hash map (pointer table) of the HT: allocated data and pointer into it
|
176
|
+
AllocatedData hash_map;
|
177
|
+
aggr_ht_entry_t *entries;
|
178
|
+
//! Offset of the hash column in the rows
|
179
|
+
idx_t hash_offset;
|
190
180
|
//! Bitmask for getting relevant bits from the hashes to determine the position
|
191
181
|
hash_t bitmask;
|
192
182
|
|
193
|
-
bool is_finalized;
|
194
|
-
|
195
|
-
vector<ExpressionType> predicates;
|
196
|
-
|
197
183
|
//! The active arena allocator used by the aggregates for their internal state
|
198
184
|
shared_ptr<ArenaAllocator> aggregate_allocator;
|
199
185
|
//! Owning arena allocators that this HT has data from
|
200
186
|
vector<shared_ptr<ArenaAllocator>> stored_allocators;
|
201
187
|
|
202
188
|
private:
|
189
|
+
//! Disabled the copy constructor
|
203
190
|
GroupedAggregateHashTable(const GroupedAggregateHashTable &) = delete;
|
204
|
-
|
191
|
+
//! Destroy the HT
|
205
192
|
void Destroy();
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
template <class ENTRY>
|
211
|
-
void Resize(idx_t size);
|
212
|
-
//! Initializes the first part of the HT
|
213
|
-
template <class ENTRY>
|
214
|
-
void InitializeHashes();
|
193
|
+
|
194
|
+
//! Apply bitmask to get the entry in the HT
|
195
|
+
inline idx_t ApplyBitMask(hash_t hash) const;
|
196
|
+
|
215
197
|
//! Does the actual group matching / creation
|
216
|
-
|
217
|
-
idx_t FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes_v, Vector &addresses_v,
|
198
|
+
idx_t FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes, Vector &addresses,
|
218
199
|
SelectionVector &new_groups);
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
idx_t FindOrCreateGroupsInternal(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
|
223
|
-
Vector &addresses, SelectionVector &new_groups);
|
200
|
+
|
201
|
+
//! Verify the pointer table of the HT
|
202
|
+
void Verify();
|
224
203
|
};
|
225
204
|
|
226
205
|
} // namespace duckdb
|
package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp
CHANGED
@@ -8,17 +8,18 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
+
#include "duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp"
|
12
|
+
#include "duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp"
|
11
13
|
#include "duckdb/execution/physical_operator.hpp"
|
12
|
-
#include "duckdb/storage/data_table.hpp"
|
13
|
-
#include "duckdb/parser/group_by_node.hpp"
|
14
14
|
#include "duckdb/execution/radix_partitioned_hashtable.hpp"
|
15
|
-
#include "duckdb/
|
16
|
-
#include "duckdb/
|
15
|
+
#include "duckdb/parser/group_by_node.hpp"
|
16
|
+
#include "duckdb/storage/data_table.hpp"
|
17
17
|
|
18
18
|
namespace duckdb {
|
19
19
|
|
20
20
|
class ClientContext;
|
21
21
|
class BufferManager;
|
22
|
+
class PhysicalHashAggregate;
|
22
23
|
|
23
24
|
struct HashAggregateGroupingData {
|
24
25
|
public:
|