duckdb 0.7.2-dev1138.0 → 0.7.2-dev1146.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -1
- package/src/duckdb/src/common/types/column_data_collection_segment.cpp +1 -4
- package/src/duckdb/src/common/types/validity_mask.cpp +24 -7
- package/src/duckdb/src/common/types/vector.cpp +2 -6
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +17 -10
- package/src/duckdb/src/function/cast_rules.cpp +9 -4
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
package/package.json
CHANGED
|
@@ -122,7 +122,8 @@ struct ParquetWriteGlobalState : public GlobalFunctionData {
|
|
|
122
122
|
};
|
|
123
123
|
|
|
124
124
|
struct ParquetWriteLocalState : public LocalFunctionData {
|
|
125
|
-
explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
|
|
125
|
+
explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
|
|
126
|
+
: buffer(Allocator::Get(context), types) {
|
|
126
127
|
}
|
|
127
128
|
|
|
128
129
|
ColumnDataCollection buffer;
|
|
@@ -169,11 +169,8 @@ idx_t ColumnDataCollectionSegment::ReadVectorInternal(ChunkManagementState &stat
|
|
|
169
169
|
if (type_size > 0) {
|
|
170
170
|
memcpy(target_data + current_offset * type_size, base_ptr, current_vdata.count * type_size);
|
|
171
171
|
}
|
|
172
|
-
// FIXME: use bitwise operations here
|
|
173
172
|
ValidityMask current_validity(validity_data);
|
|
174
|
-
|
|
175
|
-
target_validity.Set(current_offset + k, current_validity.RowIsValid(k));
|
|
176
|
-
}
|
|
173
|
+
target_validity.SliceInPlace(current_validity, current_offset, 0, current_vdata.count);
|
|
177
174
|
current_offset += current_vdata.count;
|
|
178
175
|
next_index = current_vdata.next_data;
|
|
179
176
|
}
|
|
@@ -68,24 +68,41 @@ void ValidityMask::Resize(idx_t old_size, idx_t new_size) {
|
|
|
68
68
|
}
|
|
69
69
|
}
|
|
70
70
|
|
|
71
|
-
void ValidityMask::Slice(const ValidityMask &other, idx_t
|
|
71
|
+
void ValidityMask::Slice(const ValidityMask &other, idx_t source_offset, idx_t count) {
|
|
72
72
|
if (other.AllValid()) {
|
|
73
73
|
validity_mask = nullptr;
|
|
74
74
|
validity_data.reset();
|
|
75
75
|
return;
|
|
76
76
|
}
|
|
77
|
-
if (
|
|
77
|
+
if (source_offset == 0) {
|
|
78
78
|
Initialize(other);
|
|
79
79
|
return;
|
|
80
80
|
}
|
|
81
|
-
ValidityMask new_mask(
|
|
81
|
+
ValidityMask new_mask(count);
|
|
82
|
+
new_mask.SliceInPlace(other, 0, source_offset, count);
|
|
83
|
+
Initialize(new_mask);
|
|
84
|
+
}
|
|
82
85
|
|
|
83
|
-
|
|
86
|
+
bool ValidityMask::IsAligned(idx_t count) {
|
|
87
|
+
return count % BITS_PER_VALUE == 0;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count) {
|
|
91
|
+
if (IsAligned(source_offset) && IsAligned(target_offset)) {
|
|
92
|
+
auto target_validity = GetData();
|
|
93
|
+
auto source_validity = other.GetData();
|
|
94
|
+
auto source_offset_entries = EntryCount(source_offset);
|
|
95
|
+
auto target_offset_entries = EntryCount(target_offset);
|
|
96
|
+
memcpy(target_validity + target_offset_entries, source_validity + source_offset_entries,
|
|
97
|
+
sizeof(validity_t) * EntryCount(count));
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// FIXME: use bitwise operations here
|
|
84
102
|
#if 1
|
|
85
|
-
for (idx_t i =
|
|
86
|
-
|
|
103
|
+
for (idx_t i = 0; i < count; i++) {
|
|
104
|
+
Set(target_offset + i, other.RowIsValid(source_offset + i));
|
|
87
105
|
}
|
|
88
|
-
Initialize(new_mask);
|
|
89
106
|
#else
|
|
90
107
|
// first shift the "whole" units
|
|
91
108
|
idx_t entire_units = offset / BITS_PER_VALUE;
|
|
@@ -136,17 +136,13 @@ void Vector::Slice(Vector &other, idx_t offset, idx_t end) {
|
|
|
136
136
|
for (idx_t i = 0; i < entries.size(); i++) {
|
|
137
137
|
entries[i]->Slice(*other_entries[i], offset, end);
|
|
138
138
|
}
|
|
139
|
-
|
|
140
|
-
new_vector.validity.Slice(other.validity, offset, end);
|
|
141
|
-
} else {
|
|
142
|
-
new_vector.validity = other.validity;
|
|
143
|
-
}
|
|
139
|
+
new_vector.validity.Slice(other.validity, offset, end - offset);
|
|
144
140
|
Reference(new_vector);
|
|
145
141
|
} else {
|
|
146
142
|
Reference(other);
|
|
147
143
|
if (offset > 0) {
|
|
148
144
|
data = data + GetTypeIdSize(internal_type) * offset;
|
|
149
|
-
validity.Slice(other.validity, offset, end);
|
|
145
|
+
validity.Slice(other.validity, offset, end - offset);
|
|
150
146
|
}
|
|
151
147
|
}
|
|
152
148
|
}
|
|
@@ -53,10 +53,13 @@ RadixPartitionedHashTable::RadixPartitionedHashTable(GroupingSet &grouping_set_p
|
|
|
53
53
|
// Sink
|
|
54
54
|
//===--------------------------------------------------------------------===//
|
|
55
55
|
class RadixHTGlobalState : public GlobalSinkState {
|
|
56
|
+
constexpr const static idx_t MAX_RADIX_PARTITIONS = 32;
|
|
57
|
+
|
|
56
58
|
public:
|
|
57
59
|
explicit RadixHTGlobalState(ClientContext &context)
|
|
58
|
-
: is_empty(true), multi_scan(true),
|
|
59
|
-
partition_info(
|
|
60
|
+
: is_empty(true), multi_scan(true), partitioned(false),
|
|
61
|
+
partition_info(
|
|
62
|
+
MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads())) {
|
|
60
63
|
}
|
|
61
64
|
|
|
62
65
|
vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
|
|
@@ -68,8 +71,8 @@ public:
|
|
|
68
71
|
bool multi_scan;
|
|
69
72
|
//! The lock for updating the global aggregate state
|
|
70
73
|
mutex lock;
|
|
71
|
-
//!
|
|
72
|
-
atomic<
|
|
74
|
+
//! Whether or not any thread has crossed the partitioning threshold
|
|
75
|
+
atomic<bool> partitioned;
|
|
73
76
|
|
|
74
77
|
bool is_finalized = false;
|
|
75
78
|
bool is_partitioned = false;
|
|
@@ -79,7 +82,7 @@ public:
|
|
|
79
82
|
|
|
80
83
|
class RadixHTLocalState : public LocalSinkState {
|
|
81
84
|
public:
|
|
82
|
-
explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : is_empty(true) {
|
|
85
|
+
explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : total_groups(0), is_empty(true) {
|
|
83
86
|
// if there are no groups we create a fake group so everything has the same group
|
|
84
87
|
group_chunk.InitializeEmpty(ht.group_types);
|
|
85
88
|
if (ht.grouping_set.empty()) {
|
|
@@ -90,6 +93,8 @@ public:
|
|
|
90
93
|
DataChunk group_chunk;
|
|
91
94
|
//! The aggregate HT
|
|
92
95
|
unique_ptr<PartitionableHashTable> ht;
|
|
96
|
+
//! The total number of groups found by this thread
|
|
97
|
+
idx_t total_groups;
|
|
93
98
|
|
|
94
99
|
//! Whether or not any tuples were added to the HT
|
|
95
100
|
bool is_empty;
|
|
@@ -146,7 +151,7 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState
|
|
|
146
151
|
}
|
|
147
152
|
D_ASSERT(gstate.finalized_hts.size() == 1);
|
|
148
153
|
D_ASSERT(gstate.finalized_hts[0]);
|
|
149
|
-
|
|
154
|
+
llstate.total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, payload_input, filter);
|
|
150
155
|
return;
|
|
151
156
|
}
|
|
152
157
|
|
|
@@ -160,9 +165,11 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState
|
|
|
160
165
|
group_types, op.payload_types, op.bindings);
|
|
161
166
|
}
|
|
162
167
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
168
|
+
llstate.total_groups += llstate.ht->AddChunk(group_chunk, payload_input,
|
|
169
|
+
gstate.partitioned && gstate.partition_info.n_partitions > 1, filter);
|
|
170
|
+
if (llstate.total_groups >= radix_limit) {
|
|
171
|
+
gstate.partitioned = true;
|
|
172
|
+
}
|
|
166
173
|
}
|
|
167
174
|
|
|
168
175
|
void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkState &state,
|
|
@@ -183,7 +190,7 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
|
|
|
183
190
|
return; // no data
|
|
184
191
|
}
|
|
185
192
|
|
|
186
|
-
if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.
|
|
193
|
+
if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.partitioned) {
|
|
187
194
|
llstate.ht->Partition();
|
|
188
195
|
}
|
|
189
196
|
|
|
@@ -207,6 +207,15 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to)
|
|
|
207
207
|
// if aliases are different, an implicit cast is not possible
|
|
208
208
|
return -1;
|
|
209
209
|
}
|
|
210
|
+
if (from.id() == LogicalTypeId::LIST && to.id() == LogicalTypeId::LIST) {
|
|
211
|
+
// Lists can be cast if their child types can be cast
|
|
212
|
+
auto child_cost = ImplicitCast(ListType::GetChildType(from), ListType::GetChildType(to));
|
|
213
|
+
if (child_cost >= 100) {
|
|
214
|
+
// subtract one from the cost because we prefer LIST[X] -> LIST[VARCHAR] over LIST[X] -> VARCHAR
|
|
215
|
+
child_cost--;
|
|
216
|
+
}
|
|
217
|
+
return child_cost;
|
|
218
|
+
}
|
|
210
219
|
if (from.id() == to.id()) {
|
|
211
220
|
// arguments match: do nothing
|
|
212
221
|
return 0;
|
|
@@ -219,10 +228,6 @@ int64_t CastRules::ImplicitCast(const LogicalType &from, const LogicalType &to)
|
|
|
219
228
|
// everything can be cast to VARCHAR, but this cast has a high cost
|
|
220
229
|
return TargetTypeCost(to);
|
|
221
230
|
}
|
|
222
|
-
if (from.id() == LogicalTypeId::LIST && to.id() == LogicalTypeId::LIST) {
|
|
223
|
-
// Lists can be cast if their child types can be cast
|
|
224
|
-
return ImplicitCast(ListType::GetChildType(from), ListType::GetChildType(to));
|
|
225
|
-
}
|
|
226
231
|
|
|
227
232
|
if (from.id() == LogicalTypeId::UNION && to.id() == LogicalTypeId::UNION) {
|
|
228
233
|
// Unions can be cast if the source tags are a subset of the target tags
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.7.2-
|
|
2
|
+
#define DUCKDB_VERSION "0.7.2-dev1146"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "b8cf6a98e2"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|
|
@@ -323,9 +323,12 @@ public:
|
|
|
323
323
|
public:
|
|
324
324
|
DUCKDB_API void Resize(idx_t old_size, idx_t new_size);
|
|
325
325
|
|
|
326
|
-
DUCKDB_API void
|
|
326
|
+
DUCKDB_API void SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count);
|
|
327
|
+
DUCKDB_API void Slice(const ValidityMask &other, idx_t source_offset, idx_t count);
|
|
327
328
|
DUCKDB_API void Combine(const ValidityMask &other, idx_t count);
|
|
328
329
|
DUCKDB_API string ToString(idx_t count) const;
|
|
330
|
+
|
|
331
|
+
DUCKDB_API static bool IsAligned(idx_t count);
|
|
329
332
|
};
|
|
330
333
|
|
|
331
334
|
} // namespace duckdb
|