duckdb 0.7.2-dev1034.0 → 0.7.2-dev1146.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
- package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -1
- package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
- package/src/duckdb/src/common/sort/comparators.cpp +14 -5
- package/src/duckdb/src/common/types/column_data_collection_segment.cpp +1 -4
- package/src/duckdb/src/common/types/interval.cpp +0 -41
- package/src/duckdb/src/common/types/list_segment.cpp +658 -0
- package/src/duckdb/src/common/types/string_heap.cpp +1 -1
- package/src/duckdb/src/common/types/string_type.cpp +1 -1
- package/src/duckdb/src/common/types/validity_mask.cpp +24 -7
- package/src/duckdb/src/common/types/vector.cpp +3 -7
- package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
- package/src/duckdb/src/execution/index/art/art.cpp +13 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +18 -10
- package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
- package/src/duckdb/src/function/cast_rules.cpp +9 -4
- package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
- package/src/duckdb/src/function/table/read_csv.cpp +5 -0
- package/src/duckdb/src/function/table/table_scan.cpp +8 -11
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
- package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
- package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
- package/src/duckdb/src/include/duckdb.h +21 -0
- package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
- package/src/duckdb/src/main/settings/settings.cpp +20 -8
- package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
- package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
- package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +1 -0
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/local_storage.cpp +7 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_data.cpp +75 -18
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
- package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
- package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
- package/src/duckdb/src/storage/table/row_group.cpp +200 -136
- package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
- package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
- package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
- package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
- package/src/duckdb/ub_src_common_types.cpp +2 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include "duckdb/execution/radix_partitioned_hashtable.hpp"
|
|
2
2
|
#include "duckdb/parallel/task_scheduler.hpp"
|
|
3
3
|
#include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
|
|
4
|
+
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
|
4
5
|
#include "duckdb/parallel/event.hpp"
|
|
5
6
|
|
|
6
7
|
namespace duckdb {
|
|
@@ -52,10 +53,13 @@ RadixPartitionedHashTable::RadixPartitionedHashTable(GroupingSet &grouping_set_p
|
|
|
52
53
|
// Sink
|
|
53
54
|
//===--------------------------------------------------------------------===//
|
|
54
55
|
class RadixHTGlobalState : public GlobalSinkState {
|
|
56
|
+
constexpr const static idx_t MAX_RADIX_PARTITIONS = 32;
|
|
57
|
+
|
|
55
58
|
public:
|
|
56
59
|
explicit RadixHTGlobalState(ClientContext &context)
|
|
57
|
-
: is_empty(true), multi_scan(true),
|
|
58
|
-
partition_info(
|
|
60
|
+
: is_empty(true), multi_scan(true), partitioned(false),
|
|
61
|
+
partition_info(
|
|
62
|
+
MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads())) {
|
|
59
63
|
}
|
|
60
64
|
|
|
61
65
|
vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
|
|
@@ -67,8 +71,8 @@ public:
|
|
|
67
71
|
bool multi_scan;
|
|
68
72
|
//! The lock for updating the global aggregate state
|
|
69
73
|
mutex lock;
|
|
70
|
-
//!
|
|
71
|
-
atomic<
|
|
74
|
+
//! Whether or not any thread has crossed the partitioning threshold
|
|
75
|
+
atomic<bool> partitioned;
|
|
72
76
|
|
|
73
77
|
bool is_finalized = false;
|
|
74
78
|
bool is_partitioned = false;
|
|
@@ -78,7 +82,7 @@ public:
|
|
|
78
82
|
|
|
79
83
|
class RadixHTLocalState : public LocalSinkState {
|
|
80
84
|
public:
|
|
81
|
-
explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : is_empty(true) {
|
|
85
|
+
explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : total_groups(0), is_empty(true) {
|
|
82
86
|
// if there are no groups we create a fake group so everything has the same group
|
|
83
87
|
group_chunk.InitializeEmpty(ht.group_types);
|
|
84
88
|
if (ht.grouping_set.empty()) {
|
|
@@ -89,6 +93,8 @@ public:
|
|
|
89
93
|
DataChunk group_chunk;
|
|
90
94
|
//! The aggregate HT
|
|
91
95
|
unique_ptr<PartitionableHashTable> ht;
|
|
96
|
+
//! The total number of groups found by this thread
|
|
97
|
+
idx_t total_groups;
|
|
92
98
|
|
|
93
99
|
//! Whether or not any tuples were added to the HT
|
|
94
100
|
bool is_empty;
|
|
@@ -145,7 +151,7 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState
|
|
|
145
151
|
}
|
|
146
152
|
D_ASSERT(gstate.finalized_hts.size() == 1);
|
|
147
153
|
D_ASSERT(gstate.finalized_hts[0]);
|
|
148
|
-
|
|
154
|
+
llstate.total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, payload_input, filter);
|
|
149
155
|
return;
|
|
150
156
|
}
|
|
151
157
|
|
|
@@ -159,9 +165,11 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState
|
|
|
159
165
|
group_types, op.payload_types, op.bindings);
|
|
160
166
|
}
|
|
161
167
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
168
|
+
llstate.total_groups += llstate.ht->AddChunk(group_chunk, payload_input,
|
|
169
|
+
gstate.partitioned && gstate.partition_info.n_partitions > 1, filter);
|
|
170
|
+
if (llstate.total_groups >= radix_limit) {
|
|
171
|
+
gstate.partitioned = true;
|
|
172
|
+
}
|
|
165
173
|
}
|
|
166
174
|
|
|
167
175
|
void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkState &state,
|
|
@@ -182,7 +190,7 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
|
|
|
182
190
|
return; // no data
|
|
183
191
|
}
|
|
184
192
|
|
|
185
|
-
if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.
|
|
193
|
+
if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.partitioned) {
|
|
186
194
|
llstate.ht->Partition();
|
|
187
195
|
}
|
|
188
196
|
|