duckdb 0.7.2-dev1901.0 → 0.7.2-dev2144.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
- package/src/duckdb/src/common/box_renderer.cpp +4 -2
- package/src/duckdb/src/common/constants.cpp +10 -1
- package/src/duckdb/src/common/filename_pattern.cpp +41 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
- package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
- package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
- package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +22 -4
- package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
- package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
- package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
- package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
- package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
- package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
- package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
- package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
- package/src/duckdb/src/execution/physical_operator.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
- package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
- package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
- package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
- package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
- package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
- package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
- package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
- package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
- package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
- package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
- package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/main/appender.cpp +6 -6
- package/src/duckdb/src/main/client_context.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +13 -0
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
- package/src/duckdb/src/parallel/executor.cpp +1 -1
- package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +50 -9
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
- package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
- package/src/duckdb/src/planner/binder.cpp +16 -19
- package/src/duckdb/src/planner/expression_binder.cpp +8 -8
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +3 -3
- package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_common_types.cpp +0 -16
- package/src/duckdb/ub_src_common_types_column.cpp +10 -0
- package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/common/types/row/tuple_data_states.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/mutex.hpp"
|
12
|
+
#include "duckdb/common/types.hpp"
|
13
|
+
|
14
|
+
namespace duckdb {
|
15
|
+
|
16
|
+
enum class TupleDataPinProperties : uint8_t {
|
17
|
+
INVALID,
|
18
|
+
//! Keeps all passed blocks pinned while scanning/iterating over the chunks (for both reading/writing)
|
19
|
+
KEEP_EVERYTHING_PINNED,
|
20
|
+
//! Unpins blocks after they are done (for both reading/writing)
|
21
|
+
UNPIN_AFTER_DONE,
|
22
|
+
//! Destroys blocks after they are done (for reading only)
|
23
|
+
DESTROY_AFTER_DONE,
|
24
|
+
//! Assumes all blocks are already pinned (for reading only)
|
25
|
+
ALREADY_PINNED
|
26
|
+
};
|
27
|
+
|
28
|
+
struct TupleDataPinState {
|
29
|
+
unordered_map<uint32_t, BufferHandle> row_handles;
|
30
|
+
unordered_map<uint32_t, BufferHandle> heap_handles;
|
31
|
+
TupleDataPinProperties properties = TupleDataPinProperties::INVALID;
|
32
|
+
};
|
33
|
+
|
34
|
+
struct CombinedListData {
|
35
|
+
UnifiedVectorFormat combined_data;
|
36
|
+
list_entry_t combined_list_entries[STANDARD_VECTOR_SIZE];
|
37
|
+
buffer_ptr<SelectionData> selection_data;
|
38
|
+
};
|
39
|
+
|
40
|
+
struct TupleDataVectorFormat {
|
41
|
+
UnifiedVectorFormat data;
|
42
|
+
vector<TupleDataVectorFormat> child_formats;
|
43
|
+
unique_ptr<CombinedListData> combined_list_data;
|
44
|
+
};
|
45
|
+
|
46
|
+
struct TupleDataChunkState {
|
47
|
+
vector<TupleDataVectorFormat> vector_data;
|
48
|
+
vector<column_t> column_ids;
|
49
|
+
|
50
|
+
Vector row_locations = Vector(LogicalType::POINTER);
|
51
|
+
Vector heap_locations = Vector(LogicalType::POINTER);
|
52
|
+
Vector heap_sizes = Vector(LogicalType::UBIGINT);
|
53
|
+
};
|
54
|
+
|
55
|
+
struct TupleDataAppendState {
|
56
|
+
TupleDataPinState pin_state;
|
57
|
+
TupleDataChunkState chunk_state;
|
58
|
+
};
|
59
|
+
|
60
|
+
struct TupleDataScanState {
|
61
|
+
TupleDataPinState pin_state;
|
62
|
+
TupleDataChunkState chunk_state;
|
63
|
+
idx_t segment_index = DConstants::INVALID_INDEX;
|
64
|
+
idx_t chunk_index = DConstants::INVALID_INDEX;
|
65
|
+
};
|
66
|
+
|
67
|
+
struct TupleDataParallelScanState {
|
68
|
+
TupleDataScanState scan_state;
|
69
|
+
mutex lock;
|
70
|
+
};
|
71
|
+
|
72
|
+
using TupleDataLocalScanState = TupleDataScanState;
|
73
|
+
|
74
|
+
} // namespace duckdb
|
@@ -170,6 +170,9 @@ public:
|
|
170
170
|
}
|
171
171
|
return ValidityBuffer::MAX_ENTRY >> (BITS_PER_VALUE - n);
|
172
172
|
}
|
173
|
+
static inline idx_t SizeInBytes(idx_t n) {
|
174
|
+
return (n + BITS_PER_VALUE - 1) / BITS_PER_VALUE;
|
175
|
+
}
|
173
176
|
|
174
177
|
//! RowIsValidUnsafe should only be used if AllValid() is false: it achieves the same as RowIsValid but skips a
|
175
178
|
//! not-null check
|
@@ -175,24 +175,16 @@ public:
|
|
175
175
|
DUCKDB_API static Value BIT(const string &data);
|
176
176
|
|
177
177
|
template <class T>
|
178
|
-
T GetValue() const
|
179
|
-
throw InternalException("Unimplemented template type for Value::GetValue");
|
180
|
-
}
|
178
|
+
T GetValue() const;
|
181
179
|
template <class T>
|
182
|
-
static Value CreateValue(T value)
|
183
|
-
throw InternalException("Unimplemented template type for Value::CreateValue");
|
184
|
-
}
|
180
|
+
static Value CreateValue(T value);
|
185
181
|
// Returns the internal value. Unlike GetValue(), this method does not perform casting, and assumes T matches the
|
186
182
|
// type of the value. Only use this if you know what you are doing.
|
187
183
|
template <class T>
|
188
|
-
T GetValueUnsafe() const
|
189
|
-
throw InternalException("Unimplemented template type for Value::GetValueUnsafe");
|
190
|
-
}
|
184
|
+
T GetValueUnsafe() const;
|
191
185
|
//! Returns a reference to the internal value. This can only be used for primitive types.
|
192
186
|
template <class T>
|
193
|
-
T &GetReferenceUnsafe()
|
194
|
-
throw InternalException("Unimplemented template type for Value::GetReferenceUnsafe");
|
195
|
-
}
|
187
|
+
T &GetReferenceUnsafe();
|
196
188
|
|
197
189
|
//! Return a copy of this value
|
198
190
|
Value Copy() const {
|
@@ -8,14 +8,14 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
+
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
11
12
|
#include "duckdb/execution/base_aggregate_hashtable.hpp"
|
12
|
-
#include "duckdb/storage/buffer/buffer_handle.hpp"
|
13
13
|
#include "duckdb/storage/arena_allocator.hpp"
|
14
|
+
#include "duckdb/storage/buffer/buffer_handle.hpp"
|
14
15
|
|
15
16
|
namespace duckdb {
|
16
17
|
class BlockHandle;
|
17
18
|
class BufferHandle;
|
18
|
-
class RowDataCollection;
|
19
19
|
|
20
20
|
struct FlushMoveState;
|
21
21
|
|
@@ -60,7 +60,7 @@ enum HtEntryType { HT_WIDTH_32, HT_WIDTH_64 };
|
|
60
60
|
|
61
61
|
struct AggregateHTScanState {
|
62
62
|
mutex lock;
|
63
|
-
|
63
|
+
TupleDataScanState scan_state;
|
64
64
|
};
|
65
65
|
|
66
66
|
struct AggregateHTAppendState {
|
@@ -75,6 +75,9 @@ struct AggregateHTAppendState {
|
|
75
75
|
Vector addresses;
|
76
76
|
unique_ptr<UnifiedVectorFormat[]> group_data;
|
77
77
|
DataChunk group_chunk;
|
78
|
+
|
79
|
+
TupleDataChunkState chunk_state;
|
80
|
+
bool chunk_state_initialized;
|
78
81
|
};
|
79
82
|
|
80
83
|
class GroupedAggregateHashTable : public BaseAggregateHashTable {
|
@@ -95,9 +98,6 @@ public:
|
|
95
98
|
GroupedAggregateHashTable(ClientContext &context, Allocator &allocator, vector<LogicalType> group_types);
|
96
99
|
~GroupedAggregateHashTable() override;
|
97
100
|
|
98
|
-
//! The stringheap of the AggregateHashTable
|
99
|
-
unique_ptr<RowDataCollection> string_heap;
|
100
|
-
|
101
101
|
public:
|
102
102
|
//! Add the given data to the HT, computing the aggregates grouped by the
|
103
103
|
//! data in the group chunk. When resize = true, aggregates will not be
|
@@ -110,7 +110,7 @@ public:
|
|
110
110
|
//! Scan the HT starting from the scan_position until the result and group
|
111
111
|
//! chunks are filled. scan_position will be updated by this function.
|
112
112
|
//! Returns the amount of elements found.
|
113
|
-
idx_t Scan(
|
113
|
+
idx_t Scan(TupleDataParallelScanState &gstate, TupleDataLocalScanState &lstate, DataChunk &result);
|
114
114
|
|
115
115
|
//! Fetch the aggregates for specific groups from the HT and place them in the result
|
116
116
|
void FetchAggregates(DataChunk &groups, DataChunk &result);
|
@@ -127,10 +127,15 @@ public:
|
|
127
127
|
//! Executes the filter(if any) and update the aggregates
|
128
128
|
void Combine(GroupedAggregateHashTable &other);
|
129
129
|
|
130
|
-
|
131
|
-
|
132
|
-
return entries;
|
130
|
+
TupleDataCollection &GetDataCollection() {
|
131
|
+
return *data_collection;
|
133
132
|
}
|
133
|
+
|
134
|
+
idx_t Count() const {
|
135
|
+
return data_collection->Count();
|
136
|
+
}
|
137
|
+
|
138
|
+
static idx_t InitialCapacity();
|
134
139
|
idx_t Capacity() {
|
135
140
|
return capacity;
|
136
141
|
}
|
@@ -139,24 +144,23 @@ public:
|
|
139
144
|
idx_t MaxCapacity();
|
140
145
|
static idx_t GetMaxCapacity(HtEntryType entry_type, idx_t tuple_size);
|
141
146
|
|
142
|
-
void Partition(vector<GroupedAggregateHashTable *> &partition_hts,
|
147
|
+
void Partition(vector<GroupedAggregateHashTable *> &partition_hts, idx_t radix_bits);
|
148
|
+
void InitializeFirstPart();
|
143
149
|
|
144
150
|
void Finalize();
|
145
151
|
|
146
152
|
private:
|
147
153
|
HtEntryType entry_type;
|
148
154
|
|
149
|
-
//! The
|
155
|
+
//! The capacity of the HT. This can be increased using GroupedAggregateHashTable::Resize
|
156
|
+
idx_t capacity;
|
157
|
+
//! Tuple width
|
150
158
|
idx_t tuple_size;
|
151
|
-
//!
|
159
|
+
//! Tuples per block
|
152
160
|
idx_t tuples_per_block;
|
153
|
-
//! The capacity of the HT. This can be increased using
|
154
|
-
//! GroupedAggregateHashTable::Resize
|
155
|
-
idx_t capacity;
|
156
|
-
//! The amount of entries stored in the HT currently
|
157
|
-
idx_t entries;
|
158
161
|
//! The data of the HT
|
159
|
-
|
162
|
+
unique_ptr<TupleDataCollection> data_collection;
|
163
|
+
TupleDataPinState td_pin_state;
|
160
164
|
vector<data_ptr_t> payload_hds_ptrs;
|
161
165
|
|
162
166
|
//! The hashes of the HT
|
@@ -165,7 +169,6 @@ private:
|
|
165
169
|
idx_t hash_offset; // Offset into the layout of the hash column
|
166
170
|
|
167
171
|
hash_t hash_prefix_shift;
|
168
|
-
idx_t payload_page_offset;
|
169
172
|
|
170
173
|
//! Bitmask for getting relevant bits from the hashes to determine the position
|
171
174
|
hash_t bitmask;
|
@@ -175,30 +178,30 @@ private:
|
|
175
178
|
vector<ExpressionType> predicates;
|
176
179
|
|
177
180
|
//! The arena allocator used by the aggregates for their internal state
|
178
|
-
ArenaAllocator aggregate_allocator;
|
181
|
+
shared_ptr<ArenaAllocator> aggregate_allocator;
|
179
182
|
|
180
183
|
private:
|
181
184
|
GroupedAggregateHashTable(const GroupedAggregateHashTable &) = delete;
|
182
185
|
|
183
|
-
//! Resize the HT to the specified size. Must be larger than the current
|
184
|
-
//! size.
|
185
186
|
void Destroy();
|
186
|
-
|
187
187
|
void Verify();
|
188
|
-
|
189
|
-
void FlushMove(FlushMoveState &state, Vector &source_addresses, Vector &source_hashes, idx_t count);
|
190
|
-
void NewBlock();
|
191
|
-
|
192
188
|
template <class ENTRY>
|
193
189
|
void VerifyInternal();
|
190
|
+
//! Resize the HT to the specified size. Must be larger than the current size.
|
194
191
|
template <class ENTRY>
|
195
192
|
void Resize(idx_t size);
|
193
|
+
//! Initializes the first part of the HT
|
194
|
+
template <class ENTRY>
|
195
|
+
void InitializeHashes();
|
196
|
+
//! Does the actual group matching / creation
|
197
|
+
template <class ENTRY>
|
198
|
+
idx_t FindOrCreateGroupsInternal(DataChunk &groups, Vector &group_hashes_v, Vector &addresses_v,
|
199
|
+
SelectionVector &new_groups);
|
200
|
+
//! Updates payload_hds_ptrs with the new pointers (after appending to data_collection)
|
201
|
+
void UpdateBlockPointers();
|
196
202
|
template <class ENTRY>
|
197
203
|
idx_t FindOrCreateGroupsInternal(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
|
198
204
|
Vector &addresses, SelectionVector &new_groups);
|
199
|
-
|
200
|
-
template <class FUNC = std::function<void(idx_t, idx_t, data_ptr_t)>>
|
201
|
-
void PayloadApply(FUNC fun);
|
202
205
|
};
|
203
206
|
|
204
207
|
} // namespace duckdb
|
@@ -9,7 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
|
-
#include "duckdb/common/types/
|
12
|
+
#include "duckdb/common/types/row/tuple_data_layout.hpp"
|
13
13
|
#include "duckdb/common/types/vector.hpp"
|
14
14
|
#include "duckdb/execution/operator/aggregate/aggregate_object.hpp"
|
15
15
|
|
@@ -27,7 +27,7 @@ protected:
|
|
27
27
|
Allocator &allocator;
|
28
28
|
BufferManager &buffer_manager;
|
29
29
|
//! A helper for managing offsets into the data buffers
|
30
|
-
|
30
|
+
TupleDataLayout layout;
|
31
31
|
//! The types of the payload columns stored in the hashtable
|
32
32
|
vector<LogicalType> payload_types;
|
33
33
|
//! Intermediate structures and data for aggregate filters
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/optional_ptr.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
class ClientContext;
|
@@ -17,7 +18,7 @@ class Pipeline;
|
|
17
18
|
|
18
19
|
class ExecutionContext {
|
19
20
|
public:
|
20
|
-
ExecutionContext(ClientContext &client_p, ThreadContext &thread_p, Pipeline
|
21
|
+
ExecutionContext(ClientContext &client_p, ThreadContext &thread_p, optional_ptr<Pipeline> pipeline_p)
|
21
22
|
: client(client_p), thread(thread_p), pipeline(pipeline_p) {
|
22
23
|
}
|
23
24
|
|
@@ -26,7 +27,7 @@ public:
|
|
26
27
|
//! The thread-local context for this execution
|
27
28
|
ThreadContext &thread;
|
28
29
|
//! Reference to the pipeline for this execution, can be used for example by operators determine caching strategy
|
29
|
-
Pipeline
|
30
|
+
optional_ptr<Pipeline> pipeline;
|
30
31
|
};
|
31
32
|
|
32
33
|
} // namespace duckdb
|
@@ -151,7 +151,7 @@ protected:
|
|
151
151
|
|
152
152
|
private:
|
153
153
|
//! Client context
|
154
|
-
ClientContext
|
154
|
+
optional_ptr<ClientContext> context;
|
155
155
|
//! The states of the expression executor; this holds any intermediates and temporary states of expressions
|
156
156
|
vector<unique_ptr<ExpressionExecutorState>> states;
|
157
157
|
|
@@ -10,11 +10,11 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
12
|
#include "duckdb/common/radix_partitioning.hpp"
|
13
|
-
#include "duckdb/common/types/column_data_consumer.hpp"
|
13
|
+
#include "duckdb/common/types/column/column_data_consumer.hpp"
|
14
14
|
#include "duckdb/common/types/data_chunk.hpp"
|
15
15
|
#include "duckdb/common/types/null_value.hpp"
|
16
|
-
#include "duckdb/common/types/
|
17
|
-
#include "duckdb/common/types/
|
16
|
+
#include "duckdb/common/types/row/tuple_data_iterator.hpp"
|
17
|
+
#include "duckdb/common/types/row/tuple_data_layout.hpp"
|
18
18
|
#include "duckdb/common/types/vector.hpp"
|
19
19
|
#include "duckdb/execution/aggregate_hashtable.hpp"
|
20
20
|
#include "duckdb/planner/operator/logical_comparison_join.hpp"
|
@@ -30,25 +30,13 @@ struct ClientConfig;
|
|
30
30
|
|
31
31
|
struct JoinHTScanState {
|
32
32
|
public:
|
33
|
-
JoinHTScanState(
|
33
|
+
JoinHTScanState(TupleDataCollection &collection, idx_t chunk_idx_from, idx_t chunk_idx_to,
|
34
|
+
TupleDataPinProperties properties = TupleDataPinProperties::ALREADY_PINNED)
|
35
|
+
: iterator(collection, properties, chunk_idx_from, chunk_idx_to, false), offset_in_chunk(0) {
|
34
36
|
}
|
35
37
|
|
36
|
-
|
37
|
-
idx_t
|
38
|
-
|
39
|
-
//! Used for synchronization of parallel external join
|
40
|
-
idx_t total;
|
41
|
-
idx_t scan_index;
|
42
|
-
idx_t scanned;
|
43
|
-
|
44
|
-
public:
|
45
|
-
void Reset() {
|
46
|
-
position = 0;
|
47
|
-
block_position = 0;
|
48
|
-
total = 0;
|
49
|
-
scan_index = 0;
|
50
|
-
scanned = 0;
|
51
|
-
}
|
38
|
+
TupleDataChunkIterator iterator;
|
39
|
+
idx_t offset_in_chunk;
|
52
40
|
|
53
41
|
private:
|
54
42
|
//! Implicit copying is not allowed
|
@@ -130,33 +118,38 @@ public:
|
|
130
118
|
~JoinHashTable();
|
131
119
|
|
132
120
|
//! Add the given data to the HT
|
133
|
-
void Build(DataChunk &keys, DataChunk &input);
|
121
|
+
void Build(PartitionedTupleDataAppendState &append_state, DataChunk &keys, DataChunk &input);
|
134
122
|
//! Merge another HT into this one
|
135
123
|
void Merge(JoinHashTable &other);
|
124
|
+
//! Combines the partitions in sink_collection into data_collection, as if it were not partitioned
|
125
|
+
void Unpartition();
|
136
126
|
//! Initialize the pointer table for the probe
|
137
127
|
void InitializePointerTable();
|
138
128
|
//! Finalize the build of the HT, constructing the actual hash table and making the HT ready for probing.
|
139
129
|
//! Finalize must be called before any call to Probe, and after Finalize is called Build should no longer be
|
140
130
|
//! ever called.
|
141
|
-
void Finalize(idx_t
|
131
|
+
void Finalize(idx_t chunk_idx_from, idx_t chunk_idx_to, bool parallel);
|
142
132
|
//! Probe the HT with the given input chunk, resulting in the given result
|
143
133
|
unique_ptr<ScanStructure> Probe(DataChunk &keys, Vector *precomputed_hashes = nullptr);
|
144
|
-
//! Scan the HT to
|
145
|
-
|
146
|
-
//! Construct the full outer join result given the addresses and number of found entries
|
147
|
-
void GatherFullOuter(DataChunk &result, Vector &addresses, idx_t found_entries);
|
134
|
+
//! Scan the HT to construct the full outer join result
|
135
|
+
void ScanFullOuter(JoinHTScanState &state, Vector &addresses, DataChunk &result);
|
148
136
|
|
149
137
|
//! Fill the pointer with all the addresses from the hashtable for full scan
|
150
|
-
idx_t FillWithHTOffsets(
|
151
|
-
//! Pins all fixed-size blocks
|
152
|
-
void PinAllBlocks();
|
138
|
+
idx_t FillWithHTOffsets(JoinHTScanState &state, Vector &addresses);
|
153
139
|
|
154
140
|
idx_t Count() const {
|
155
|
-
return
|
141
|
+
return data_collection->Count();
|
142
|
+
}
|
143
|
+
idx_t SizeInBytes() const {
|
144
|
+
return data_collection->SizeInBytes();
|
145
|
+
}
|
146
|
+
|
147
|
+
PartitionedTupleData &GetSinkCollection() {
|
148
|
+
return *sink_collection;
|
156
149
|
}
|
157
150
|
|
158
|
-
|
159
|
-
return *
|
151
|
+
TupleDataCollection &GetDataCollection() {
|
152
|
+
return *data_collection;
|
160
153
|
}
|
161
154
|
|
162
155
|
//! BufferManager
|
@@ -172,7 +165,7 @@ public:
|
|
172
165
|
//! The comparison predicates
|
173
166
|
vector<ExpressionType> predicates;
|
174
167
|
//! Data column layout
|
175
|
-
|
168
|
+
TupleDataLayout layout;
|
176
169
|
//! The size of an entry as stored in the HashTable
|
177
170
|
idx_t entry_size;
|
178
171
|
//! The total tuple size
|
@@ -222,13 +215,12 @@ private:
|
|
222
215
|
idx_t PrepareKeys(DataChunk &keys, unique_ptr<UnifiedVectorFormat[]> &key_data, const SelectionVector *¤t_sel,
|
223
216
|
SelectionVector &sel, bool build_side);
|
224
217
|
|
225
|
-
//!
|
226
|
-
|
227
|
-
//!
|
228
|
-
unique_ptr<
|
229
|
-
//!
|
230
|
-
|
231
|
-
vector<BufferHandle> pinned_handles;
|
218
|
+
//! Lock for combining data_collection when merging HTs
|
219
|
+
mutex data_lock;
|
220
|
+
//! Partitioned data collection that the data is sunk into when building
|
221
|
+
unique_ptr<PartitionedTupleData> sink_collection;
|
222
|
+
//! The DataCollection holding the main data of the hash table
|
223
|
+
unique_ptr<TupleDataCollection> data_collection;
|
232
224
|
//! The hash map of the HT, created after finalization
|
233
225
|
AllocatedData hash_map;
|
234
226
|
//! Whether or not NULL values are considered equal in each of the comparisons
|
@@ -297,34 +289,25 @@ public:
|
|
297
289
|
bool external;
|
298
290
|
//! The current number of radix bits used to partition
|
299
291
|
idx_t radix_bits;
|
292
|
+
//! The max size of the HT
|
293
|
+
idx_t max_ht_size;
|
300
294
|
//! Total count
|
301
295
|
idx_t total_count;
|
302
|
-
//! Number of tuples for the build-side HT per partitioned round
|
303
|
-
idx_t tuples_per_round;
|
304
296
|
|
305
|
-
//! The number of tuples that are swizzled
|
306
|
-
idx_t SwizzledCount() const {
|
307
|
-
return swizzled_block_collection->count;
|
308
|
-
}
|
309
|
-
//! Size of the in-memory data
|
310
|
-
idx_t SizeInBytes() const {
|
311
|
-
return block_collection->SizeInBytes() + string_heap->SizeInBytes();
|
312
|
-
}
|
313
|
-
//! Size of the swizzled data
|
314
|
-
idx_t SwizzledSize() const {
|
315
|
-
return swizzled_block_collection->SizeInBytes() + swizzled_string_heap->SizeInBytes();
|
316
|
-
}
|
317
297
|
//! Capacity of the pointer table given the ht count
|
318
298
|
//! (minimum of 1024 to prevent collision chance for small HT's)
|
319
299
|
static idx_t PointerTableCapacity(idx_t count) {
|
320
300
|
return MaxValue<idx_t>(NextPowerOfTwo(count * 2), 1 << 10);
|
321
301
|
}
|
302
|
+
//! Size of the pointer table (in bytes)
|
303
|
+
static idx_t PointerTableSize(idx_t count) {
|
304
|
+
return PointerTableCapacity(count) * sizeof(data_ptr_t);
|
305
|
+
}
|
322
306
|
|
323
|
-
//!
|
324
|
-
|
325
|
-
|
307
|
+
//! Whether we need to do an external join
|
308
|
+
bool RequiresExternalJoin(ClientConfig &config, vector<unique_ptr<JoinHashTable>> &local_hts);
|
326
309
|
//! Computes partition sizes and number of radix bits (called before scheduling partition tasks)
|
327
|
-
|
310
|
+
bool RequiresPartitioning(ClientConfig &config, vector<unique_ptr<JoinHashTable>> &local_hts);
|
328
311
|
//! Partition this HT
|
329
312
|
void Partition(JoinHashTable &global_ht);
|
330
313
|
|
@@ -340,15 +323,6 @@ private:
|
|
340
323
|
//! First and last partition of the current probe round
|
341
324
|
idx_t partition_start;
|
342
325
|
idx_t partition_end;
|
343
|
-
|
344
|
-
//! Swizzled row data
|
345
|
-
unique_ptr<RowDataCollection> swizzled_block_collection;
|
346
|
-
unique_ptr<RowDataCollection> swizzled_string_heap;
|
347
|
-
|
348
|
-
//! Partitioned data
|
349
|
-
mutex partitioned_data_lock;
|
350
|
-
vector<unique_ptr<RowDataCollection>> partition_block_collections;
|
351
|
-
vector<unique_ptr<RowDataCollection>> partition_string_heaps;
|
352
326
|
};
|
353
327
|
|
354
328
|
} // namespace duckdb
|
@@ -9,7 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
|
-
#include "duckdb/common/types/column_data_collection.hpp"
|
12
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
13
13
|
#include "duckdb/common/types/vector.hpp"
|
14
14
|
#include "duckdb/planner/operator/logical_comparison_join.hpp"
|
15
15
|
|
@@ -18,9 +18,9 @@ public:
|
|
18
18
|
static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::EXECUTE;
|
19
19
|
|
20
20
|
public:
|
21
|
-
explicit PhysicalExecute(PhysicalOperator
|
21
|
+
explicit PhysicalExecute(PhysicalOperator &plan);
|
22
22
|
|
23
|
-
PhysicalOperator
|
23
|
+
PhysicalOperator &plan;
|
24
24
|
unique_ptr<PhysicalOperator> owned_plan;
|
25
25
|
shared_ptr<PreparedStatementData> prepared;
|
26
26
|
|
@@ -9,9 +9,9 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/mutex.hpp"
|
12
|
-
#include "duckdb/
|
12
|
+
#include "duckdb/common/types/column/column_data_collection.hpp"
|
13
13
|
#include "duckdb/execution/operator/join/physical_comparison_join.hpp"
|
14
|
-
#include "duckdb/
|
14
|
+
#include "duckdb/execution/physical_operator.hpp"
|
15
15
|
|
16
16
|
namespace duckdb {
|
17
17
|
|
package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp
CHANGED
@@ -58,7 +58,7 @@ private:
|
|
58
58
|
template <typename T>
|
59
59
|
bool TemplatedFillSelectionVectorBuild(Vector &source, SelectionVector &sel_vec, SelectionVector &seq_sel_vec,
|
60
60
|
idx_t count);
|
61
|
-
bool FullScanHashTable(
|
61
|
+
bool FullScanHashTable(LogicalType &key_type);
|
62
62
|
|
63
63
|
private:
|
64
64
|
const PhysicalHashJoin &join;
|
@@ -24,7 +24,7 @@ public:
|
|
24
24
|
public:
|
25
25
|
PhysicalIndexJoin(LogicalOperator &op, unique_ptr<PhysicalOperator> left, unique_ptr<PhysicalOperator> right,
|
26
26
|
vector<JoinCondition> cond, JoinType join_type, const vector<idx_t> &left_projection_map,
|
27
|
-
vector<idx_t> right_projection_map, vector<column_t> column_ids, Index
|
27
|
+
vector<idx_t> right_projection_map, vector<column_t> column_ids, Index &index, bool lhs_first,
|
28
28
|
idx_t estimated_cardinality);
|
29
29
|
|
30
30
|
//! Columns from RHS used in the query
|
@@ -44,7 +44,7 @@ public:
|
|
44
44
|
//! The types of all conditions
|
45
45
|
vector<LogicalType> build_types;
|
46
46
|
//! Index used for join
|
47
|
-
Index
|
47
|
+
Index &index;
|
48
48
|
|
49
49
|
vector<JoinCondition> conditions;
|
50
50
|
|
package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp
CHANGED
@@ -11,6 +11,8 @@
|
|
11
11
|
#include "duckdb/execution/physical_operator.hpp"
|
12
12
|
#include "duckdb/parser/parsed_data/copy_info.hpp"
|
13
13
|
#include "duckdb/function/copy_function.hpp"
|
14
|
+
#include "duckdb/common/file_system.hpp"
|
15
|
+
#include "duckdb/common/filename_pattern.hpp"
|
14
16
|
|
15
17
|
namespace duckdb {
|
16
18
|
|
@@ -27,7 +29,8 @@ public:
|
|
27
29
|
unique_ptr<FunctionData> bind_data;
|
28
30
|
string file_path;
|
29
31
|
bool use_tmp_file;
|
30
|
-
|
32
|
+
FilenamePattern filename_pattern;
|
33
|
+
bool overwrite_or_ignore;
|
31
34
|
bool parallel;
|
32
35
|
bool per_thread_output;
|
33
36
|
|