duckdb 0.8.2-dev4203.0 → 0.8.2-dev4376.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/file_buffer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +107 -29
- package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
- package/src/duckdb/src/execution/index/art/art.cpp +5 -1
- package/src/duckdb/src/execution/index/art/leaf.cpp +13 -10
- package/src/duckdb/src/execution/index/art/node48.cpp +0 -2
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +38 -73
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +245 -27
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +35 -20
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +14 -4
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +1 -7
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +38 -8
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation.hpp +9 -2
- package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +35 -19
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +4 -19
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
- package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
- package/src/duckdb/src/main/relation.cpp +15 -2
- package/src/duckdb/src/main/settings/settings.cpp +5 -10
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
- package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +1 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
- package/src/duckdb/src/storage/partial_block_manager.cpp +42 -15
- package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +26 -32
- package/src/duckdb/src/storage/table/column_data.cpp +14 -9
- package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +102 -192
- package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
- package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
- package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
- package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -4
- package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
- package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
- package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
- package/src/duckdb/ub_src_storage_table.cpp +2 -0
@@ -5,70 +5,288 @@
|
|
5
5
|
|
6
6
|
namespace duckdb {
|
7
7
|
|
8
|
+
//===--------------------------------------------------------------------===//
|
9
|
+
// PartialBlockForIndex
|
10
|
+
//===--------------------------------------------------------------------===//
|
11
|
+
|
12
|
+
PartialBlockForIndex::PartialBlockForIndex(PartialBlockState state, BlockManager &block_manager,
|
13
|
+
const shared_ptr<BlockHandle> &block_handle)
|
14
|
+
: PartialBlock(state, block_manager, block_handle) {
|
15
|
+
}
|
16
|
+
|
17
|
+
void PartialBlockForIndex::Flush(const idx_t free_space_left) {
|
18
|
+
FlushInternal(free_space_left);
|
19
|
+
block_handle = block_manager.ConvertToPersistent(state.block_id, std::move(block_handle));
|
20
|
+
Clear();
|
21
|
+
}
|
22
|
+
|
23
|
+
void PartialBlockForIndex::Merge(PartialBlock &other, idx_t offset, idx_t other_size) {
|
24
|
+
throw InternalException("no merge for PartialBlockForIndex");
|
25
|
+
}
|
26
|
+
|
27
|
+
void PartialBlockForIndex::Clear() {
|
28
|
+
block_handle.reset();
|
29
|
+
}
|
30
|
+
|
31
|
+
//===--------------------------------------------------------------------===//
|
32
|
+
// FixedSizeBuffer
|
33
|
+
//===--------------------------------------------------------------------===//
|
34
|
+
|
35
|
+
constexpr idx_t FixedSizeBuffer::BASE[];
|
36
|
+
constexpr uint8_t FixedSizeBuffer::SHIFT[];
|
37
|
+
|
8
38
|
FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager)
|
9
|
-
: block_manager(block_manager), segment_count(0), dirty(false), vacuum(false),
|
39
|
+
: block_manager(block_manager), segment_count(0), allocation_size(0), dirty(false), vacuum(false), block_pointer(),
|
40
|
+
block_handle(nullptr) {
|
10
41
|
|
11
42
|
auto &buffer_manager = block_manager.buffer_manager;
|
12
43
|
buffer_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &block_handle);
|
13
44
|
}
|
14
45
|
|
15
|
-
FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const
|
16
|
-
|
46
|
+
FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const idx_t allocation_size,
|
47
|
+
const BlockPointer &block_pointer)
|
48
|
+
: block_manager(block_manager), segment_count(segment_count), allocation_size(allocation_size), dirty(false),
|
49
|
+
vacuum(false), block_pointer(block_pointer) {
|
17
50
|
|
18
|
-
D_ASSERT(
|
19
|
-
block_handle = block_manager.RegisterBlock(block_id);
|
20
|
-
D_ASSERT(BlockId() < MAXIMUM_BLOCK);
|
51
|
+
D_ASSERT(block_pointer.IsValid());
|
52
|
+
block_handle = block_manager.RegisterBlock(block_pointer.block_id);
|
53
|
+
D_ASSERT(block_handle->BlockId() < MAXIMUM_BLOCK);
|
21
54
|
}
|
22
55
|
|
23
56
|
void FixedSizeBuffer::Destroy() {
|
24
57
|
if (InMemory()) {
|
58
|
+
// we can have multiple readers on a pinned block, and unpinning the buffer handle
|
59
|
+
// decrements the reader count on the underlying block handle (Destroy() unpins)
|
25
60
|
buffer_handle.Destroy();
|
26
61
|
}
|
27
62
|
if (OnDisk()) {
|
28
|
-
|
63
|
+
// marking a block as modified decreases the reference count of multi-use blocks
|
64
|
+
block_manager.MarkBlockAsModified(block_pointer.block_id);
|
29
65
|
}
|
30
66
|
}
|
31
67
|
|
32
|
-
void FixedSizeBuffer::Serialize(
|
68
|
+
void FixedSizeBuffer::Serialize(PartialBlockManager &partial_block_manager, const idx_t available_segments,
|
69
|
+
const idx_t segment_size, const idx_t bitmask_offset) {
|
33
70
|
|
71
|
+
// we do not serialize a block that is already on disk and not in memory
|
34
72
|
if (!InMemory()) {
|
35
73
|
if (!OnDisk() || dirty) {
|
36
|
-
throw InternalException("invalid
|
74
|
+
throw InternalException("invalid or missing buffer in FixedSizeAllocator");
|
37
75
|
}
|
38
76
|
return;
|
39
77
|
}
|
78
|
+
|
79
|
+
// we do not serialize a block that is already on disk and not dirty
|
40
80
|
if (!dirty && OnDisk()) {
|
41
81
|
return;
|
42
82
|
}
|
43
83
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
84
|
+
if (dirty) {
|
85
|
+
// the allocation possibly changed
|
86
|
+
auto max_offset = GetMaxOffset(available_segments);
|
87
|
+
allocation_size = max_offset * segment_size + bitmask_offset;
|
88
|
+
}
|
48
89
|
|
49
|
-
//
|
50
|
-
|
90
|
+
// the buffer is in memory, so we copied it onto a new buffer when pinning
|
91
|
+
D_ASSERT(InMemory() && !OnDisk());
|
51
92
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
93
|
+
// now we write the changes, first get a partial block allocation
|
94
|
+
PartialBlockAllocation allocation = partial_block_manager.GetBlockAllocation(allocation_size);
|
95
|
+
block_pointer.block_id = allocation.state.block_id;
|
96
|
+
block_pointer.offset = allocation.state.offset;
|
97
|
+
|
98
|
+
auto &buffer_manager = block_manager.buffer_manager;
|
99
|
+
|
100
|
+
if (allocation.partial_block) {
|
101
|
+
// copy to an existing partial block
|
102
|
+
D_ASSERT(block_pointer.offset > 0);
|
103
|
+
auto &p_block_for_index = allocation.partial_block->Cast<PartialBlockForIndex>();
|
104
|
+
auto dst_handle = buffer_manager.Pin(p_block_for_index.block_handle);
|
105
|
+
memcpy(dst_handle.Ptr() + block_pointer.offset, buffer_handle.Ptr(), allocation_size);
|
106
|
+
SetUninitializedRegions(p_block_for_index, segment_size, block_pointer.offset, bitmask_offset);
|
58
107
|
|
59
108
|
} else {
|
60
|
-
//
|
61
|
-
|
62
|
-
D_ASSERT(
|
63
|
-
|
109
|
+
// create a new block that can potentially be used as a partial block
|
110
|
+
D_ASSERT(block_handle);
|
111
|
+
D_ASSERT(!block_pointer.offset);
|
112
|
+
auto p_block_for_index = make_uniq<PartialBlockForIndex>(allocation.state, block_manager, block_handle);
|
113
|
+
SetUninitializedRegions(*p_block_for_index, segment_size, block_pointer.offset, bitmask_offset);
|
114
|
+
allocation.partial_block = std::move(p_block_for_index);
|
64
115
|
}
|
116
|
+
|
117
|
+
partial_block_manager.RegisterPartialBlock(std::move(allocation));
|
118
|
+
|
119
|
+
// resetting this buffer
|
120
|
+
buffer_handle.Destroy();
|
121
|
+
block_handle = block_manager.RegisterBlock(block_pointer.block_id);
|
122
|
+
D_ASSERT(block_handle->BlockId() < MAXIMUM_BLOCK);
|
123
|
+
|
124
|
+
// we persist any changes, so the buffer is no longer dirty
|
125
|
+
dirty = false;
|
65
126
|
}
|
66
127
|
|
67
128
|
void FixedSizeBuffer::Pin() {
|
68
129
|
|
69
130
|
auto &buffer_manager = block_manager.buffer_manager;
|
70
|
-
D_ASSERT(
|
71
|
-
|
131
|
+
D_ASSERT(block_pointer.IsValid());
|
132
|
+
D_ASSERT(block_handle && block_handle->BlockId() < MAXIMUM_BLOCK);
|
133
|
+
D_ASSERT(!dirty);
|
134
|
+
|
135
|
+
buffer_handle = buffer_manager.Pin(block_handle);
|
136
|
+
|
137
|
+
// we need to copy the (partial) data into a new (not yet disk-backed) buffer handle
|
138
|
+
shared_ptr<BlockHandle> new_block_handle;
|
139
|
+
auto new_buffer_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block_handle);
|
140
|
+
|
141
|
+
memcpy(new_buffer_handle.Ptr(), buffer_handle.Ptr() + block_pointer.offset, allocation_size);
|
142
|
+
|
143
|
+
Destroy();
|
144
|
+
buffer_handle = std::move(new_buffer_handle);
|
145
|
+
block_handle = new_block_handle;
|
146
|
+
block_pointer = BlockPointer();
|
147
|
+
}
|
148
|
+
|
149
|
+
uint32_t FixedSizeBuffer::GetOffset(const idx_t bitmask_count) {
|
150
|
+
|
151
|
+
// this function calls Get() on the buffer, so the buffer must already be in memory
|
152
|
+
D_ASSERT(InMemory());
|
153
|
+
|
154
|
+
// get the bitmask data
|
155
|
+
auto bitmask_ptr = reinterpret_cast<validity_t *>(Get());
|
156
|
+
ValidityMask mask(bitmask_ptr);
|
157
|
+
auto data = mask.GetData();
|
158
|
+
|
159
|
+
// fills up a buffer sequentially before searching for free bits
|
160
|
+
if (mask.RowIsValid(segment_count)) {
|
161
|
+
mask.SetInvalid(segment_count);
|
162
|
+
return segment_count;
|
163
|
+
}
|
164
|
+
|
165
|
+
for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
|
166
|
+
// get an entry with free bits
|
167
|
+
if (data[entry_idx] == 0) {
|
168
|
+
continue;
|
169
|
+
}
|
170
|
+
|
171
|
+
// find the position of the free bit
|
172
|
+
auto entry = data[entry_idx];
|
173
|
+
idx_t first_valid_bit = 0;
|
174
|
+
|
175
|
+
// this loop finds the position of the rightmost set bit in entry and stores it
|
176
|
+
// in first_valid_bit
|
177
|
+
for (idx_t i = 0; i < 6; i++) {
|
178
|
+
// set the left half of the bits of this level to zero and test if the entry is still not zero
|
179
|
+
if (entry & BASE[i]) {
|
180
|
+
// first valid bit is in the rightmost s[i] bits
|
181
|
+
// permanently set the left half of the bits to zero
|
182
|
+
entry &= BASE[i];
|
183
|
+
} else {
|
184
|
+
// first valid bit is in the leftmost s[i] bits
|
185
|
+
// shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
|
186
|
+
entry >>= SHIFT[i];
|
187
|
+
first_valid_bit += SHIFT[i];
|
188
|
+
}
|
189
|
+
}
|
190
|
+
D_ASSERT(entry);
|
191
|
+
|
192
|
+
auto prev_bits = entry_idx * sizeof(validity_t) * 8;
|
193
|
+
D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
|
194
|
+
mask.SetInvalid(prev_bits + first_valid_bit);
|
195
|
+
return (prev_bits + first_valid_bit);
|
196
|
+
}
|
197
|
+
|
198
|
+
throw InternalException("Invalid bitmask for FixedSizeAllocator");
|
199
|
+
}
|
200
|
+
|
201
|
+
uint32_t FixedSizeBuffer::GetMaxOffset(const idx_t available_segments) {
|
202
|
+
|
203
|
+
// this function calls Get() on the buffer, so the buffer must already be in memory
|
204
|
+
D_ASSERT(InMemory());
|
205
|
+
|
206
|
+
// finds the maximum zero bit in a bitmask, and adds one to it,
|
207
|
+
// so that max_offset * segment_size = allocated_size of this bitmask's buffer
|
208
|
+
idx_t entry_size = sizeof(validity_t) * 8;
|
209
|
+
idx_t bitmask_count = available_segments / entry_size;
|
210
|
+
if (available_segments % entry_size != 0) {
|
211
|
+
bitmask_count++;
|
212
|
+
}
|
213
|
+
uint32_t max_offset = bitmask_count * sizeof(validity_t) * 8;
|
214
|
+
auto bits_in_last_entry = available_segments % (sizeof(validity_t) * 8);
|
215
|
+
|
216
|
+
// get the bitmask data
|
217
|
+
auto bitmask_ptr = reinterpret_cast<validity_t *>(Get());
|
218
|
+
const ValidityMask mask(bitmask_ptr);
|
219
|
+
const auto data = mask.GetData();
|
220
|
+
|
221
|
+
D_ASSERT(bitmask_count > 0);
|
222
|
+
for (idx_t i = bitmask_count; i > 0; i--) {
|
223
|
+
|
224
|
+
auto entry = data[i - 1];
|
225
|
+
|
226
|
+
// set all bits after bits_in_last_entry
|
227
|
+
if (i == bitmask_count) {
|
228
|
+
entry |= ~idx_t(0) << bits_in_last_entry;
|
229
|
+
}
|
230
|
+
|
231
|
+
if (entry == ~idx_t(0)) {
|
232
|
+
max_offset -= sizeof(validity_t) * 8;
|
233
|
+
continue;
|
234
|
+
}
|
235
|
+
|
236
|
+
// invert data[entry_idx]
|
237
|
+
auto entry_inv = ~entry;
|
238
|
+
idx_t first_valid_bit = 0;
|
239
|
+
|
240
|
+
// then find the position of the LEFTMOST set bit
|
241
|
+
for (idx_t level = 0; level < 6; level++) {
|
242
|
+
|
243
|
+
// set the right half of the bits of this level to zero and test if the entry is still not zero
|
244
|
+
if (entry_inv & ~BASE[level]) {
|
245
|
+
// first valid bit is in the leftmost s[level] bits
|
246
|
+
// shift by s[level] for the next iteration and add s[level] to the position of the leftmost set bit
|
247
|
+
entry_inv >>= SHIFT[level];
|
248
|
+
first_valid_bit += SHIFT[level];
|
249
|
+
} else {
|
250
|
+
// first valid bit is in the rightmost s[level] bits
|
251
|
+
// permanently set the left half of the bits to zero
|
252
|
+
entry_inv &= BASE[level];
|
253
|
+
}
|
254
|
+
}
|
255
|
+
D_ASSERT(entry_inv);
|
256
|
+
max_offset -= sizeof(validity_t) * 8 - first_valid_bit;
|
257
|
+
D_ASSERT(!mask.RowIsValid(max_offset));
|
258
|
+
return max_offset + 1;
|
259
|
+
}
|
260
|
+
|
261
|
+
// there are no allocations in this buffer
|
262
|
+
// FIXME: put this line back in and then fix the missing vacuum bug in
|
263
|
+
// FIXME: test_index_large_aborted_append.test with force_restart
|
264
|
+
// FIXME: test if we still have non-dirty buffer to serialize after fixing this
|
265
|
+
// throw InternalException("tried to serialize empty buffer");
|
266
|
+
return 0;
|
267
|
+
}
|
268
|
+
|
269
|
+
void FixedSizeBuffer::SetUninitializedRegions(PartialBlockForIndex &p_block_for_index, const idx_t segment_size,
|
270
|
+
const idx_t offset, const idx_t bitmask_offset) {
|
271
|
+
|
272
|
+
// this function calls Get() on the buffer, so the buffer must already be in memory
|
273
|
+
D_ASSERT(InMemory());
|
274
|
+
|
275
|
+
auto bitmask_ptr = reinterpret_cast<validity_t *>(Get());
|
276
|
+
ValidityMask mask(bitmask_ptr);
|
277
|
+
|
278
|
+
idx_t i = 0;
|
279
|
+
idx_t max_offset = offset + allocation_size;
|
280
|
+
idx_t current_offset = offset + bitmask_offset;
|
281
|
+
while (current_offset < max_offset) {
|
282
|
+
|
283
|
+
if (mask.RowIsValid(i)) {
|
284
|
+
D_ASSERT(current_offset + segment_size <= max_offset);
|
285
|
+
p_block_for_index.AddUninitializedRegion(current_offset, current_offset + segment_size);
|
286
|
+
}
|
287
|
+
current_offset += segment_size;
|
288
|
+
i++;
|
289
|
+
}
|
72
290
|
}
|
73
291
|
|
74
292
|
} // namespace duckdb
|
@@ -156,8 +156,7 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
|
|
156
156
|
}
|
157
157
|
|
158
158
|
// Find the first group to sort
|
159
|
-
|
160
|
-
if (groups.empty()) {
|
159
|
+
if (!state.global_partition->HasMergeTasks()) {
|
161
160
|
// Empty input!
|
162
161
|
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
163
162
|
}
|
@@ -538,7 +537,7 @@ WindowGlobalSourceState::Task WindowGlobalSourceState::NextTask(idx_t hash_bin)
|
|
538
537
|
if (hash_bin < bin_count) {
|
539
538
|
// Find a non-empty hash group.
|
540
539
|
for (; hash_bin < hash_groups.size(); hash_bin = next_build++) {
|
541
|
-
if (hash_groups[hash_bin]) {
|
540
|
+
if (hash_groups[hash_bin] && hash_groups[hash_bin]->count) {
|
542
541
|
auto result = CreateTask(hash_bin);
|
543
542
|
if (result.second) {
|
544
543
|
return result;
|
@@ -163,8 +163,7 @@ SinkFinalizeType PhysicalAsOfJoin::Finalize(Pipeline &pipeline, Event &event, Cl
|
|
163
163
|
gstate.lhs_sink->SyncPartitioning(gstate.rhs_sink);
|
164
164
|
|
165
165
|
// Find the first group to sort
|
166
|
-
|
167
|
-
if (groups.empty() && EmptyResultIfRHSIsEmpty()) {
|
166
|
+
if (!gstate.rhs_sink.HasMergeTasks() && EmptyResultIfRHSIsEmpty()) {
|
168
167
|
// Empty input!
|
169
168
|
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
|
170
169
|
}
|
@@ -433,6 +432,9 @@ void AsOfProbeBuffer::BeginLeftScan(hash_t scan_bin) {
|
|
433
432
|
|
434
433
|
left_hash = lhs_sink.hash_groups[left_group].get();
|
435
434
|
auto &left_sort = *(left_hash->global_sort);
|
435
|
+
if (left_sort.sorted_blocks.empty()) {
|
436
|
+
return;
|
437
|
+
}
|
436
438
|
lhs_scanner = make_uniq<PayloadScanner>(left_sort, false);
|
437
439
|
left_itr = make_uniq<SBIterator>(left_sort, iterator_comp);
|
438
440
|
|
@@ -681,14 +683,16 @@ class AsOfLocalSourceState : public LocalSourceState {
|
|
681
683
|
public:
|
682
684
|
using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
|
683
685
|
|
684
|
-
AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op);
|
686
|
+
AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op, ClientContext &client_p);
|
685
687
|
|
686
|
-
|
687
|
-
|
688
|
+
// Return true if we were not interrupted (another thread died)
|
689
|
+
bool CombineLeftPartitions();
|
690
|
+
bool MergeLeftPartitions();
|
688
691
|
|
689
692
|
idx_t BeginRightScan(const idx_t hash_bin);
|
690
693
|
|
691
694
|
AsOfGlobalSourceState &gsource;
|
695
|
+
ClientContext &client;
|
692
696
|
|
693
697
|
//! The left side partition being probed
|
694
698
|
AsOfProbeBuffer probe_buffer;
|
@@ -702,38 +706,45 @@ public:
|
|
702
706
|
const bool *found_match = {};
|
703
707
|
};
|
704
708
|
|
705
|
-
AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op
|
706
|
-
|
709
|
+
AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op,
|
710
|
+
ClientContext &client_p)
|
711
|
+
: gsource(gsource), client(client_p), probe_buffer(gsource.gsink.lhs_sink->context, op) {
|
707
712
|
gsource.mergers++;
|
708
713
|
}
|
709
714
|
|
710
|
-
|
715
|
+
bool AsOfLocalSourceState::CombineLeftPartitions() {
|
711
716
|
const auto buffer_count = gsource.gsink.lhs_buffers.size();
|
712
|
-
while (gsource.combined < buffer_count) {
|
717
|
+
while (gsource.combined < buffer_count && !client.interrupted) {
|
713
718
|
const auto next_combine = gsource.next_combine++;
|
714
719
|
if (next_combine < buffer_count) {
|
715
720
|
gsource.gsink.lhs_buffers[next_combine]->Combine();
|
716
721
|
++gsource.combined;
|
717
722
|
} else {
|
718
|
-
|
723
|
+
TaskScheduler::GetScheduler(client).YieldThread();
|
719
724
|
}
|
720
725
|
}
|
726
|
+
|
727
|
+
return !client.interrupted;
|
721
728
|
}
|
722
729
|
|
723
|
-
|
730
|
+
bool AsOfLocalSourceState::MergeLeftPartitions() {
|
724
731
|
PartitionGlobalMergeStates::Callback local_callback;
|
725
732
|
PartitionLocalMergeState local_merge(*gsource.gsink.lhs_sink);
|
726
733
|
gsource.GetMergeStates().ExecuteTask(local_merge, local_callback);
|
727
734
|
gsource.merged++;
|
728
|
-
while (gsource.merged < gsource.mergers) {
|
729
|
-
|
735
|
+
while (gsource.merged < gsource.mergers && !client.interrupted) {
|
736
|
+
TaskScheduler::GetScheduler(client).YieldThread();
|
730
737
|
}
|
738
|
+
return !client.interrupted;
|
731
739
|
}
|
732
740
|
|
733
741
|
idx_t AsOfLocalSourceState::BeginRightScan(const idx_t hash_bin_p) {
|
734
742
|
hash_bin = hash_bin_p;
|
735
743
|
|
736
744
|
hash_group = std::move(gsource.gsink.rhs_sink.hash_groups[hash_bin]);
|
745
|
+
if (hash_group->global_sort->sorted_blocks.empty()) {
|
746
|
+
return 0;
|
747
|
+
}
|
737
748
|
scanner = make_uniq<PayloadScanner>(*hash_group->global_sort);
|
738
749
|
found_match = gsource.gsink.right_outers[hash_bin].GetMatches();
|
739
750
|
|
@@ -743,7 +754,7 @@ idx_t AsOfLocalSourceState::BeginRightScan(const idx_t hash_bin_p) {
|
|
743
754
|
unique_ptr<LocalSourceState> PhysicalAsOfJoin::GetLocalSourceState(ExecutionContext &context,
|
744
755
|
GlobalSourceState &gstate) const {
|
745
756
|
auto &gsource = gstate.Cast<AsOfGlobalSourceState>();
|
746
|
-
return make_uniq<AsOfLocalSourceState>(gsource, *this);
|
757
|
+
return make_uniq<AsOfLocalSourceState>(gsource, *this, context.client);
|
747
758
|
}
|
748
759
|
|
749
760
|
SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk &chunk,
|
@@ -751,17 +762,21 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
|
|
751
762
|
auto &gsource = input.global_state.Cast<AsOfGlobalSourceState>();
|
752
763
|
auto &lsource = input.local_state.Cast<AsOfLocalSourceState>();
|
753
764
|
auto &rhs_sink = gsource.gsink.rhs_sink;
|
765
|
+
auto &client = context.client;
|
754
766
|
|
755
767
|
// Step 1: Combine the partitions
|
756
|
-
lsource.CombineLeftPartitions()
|
768
|
+
if (!lsource.CombineLeftPartitions()) {
|
769
|
+
return SourceResultType::FINISHED;
|
770
|
+
}
|
757
771
|
|
758
772
|
// Step 2: Sort on all threads
|
759
|
-
lsource.MergeLeftPartitions()
|
773
|
+
if (!lsource.MergeLeftPartitions()) {
|
774
|
+
return SourceResultType::FINISHED;
|
775
|
+
}
|
760
776
|
|
761
777
|
// Step 3: Join the partitions
|
762
778
|
auto &lhs_sink = *gsource.gsink.lhs_sink;
|
763
|
-
auto
|
764
|
-
const auto left_bins = partitions.size();
|
779
|
+
const auto left_bins = lhs_sink.grouping_data ? lhs_sink.grouping_data->GetPartitions().size() : 1;
|
765
780
|
while (gsource.flushed < left_bins) {
|
766
781
|
// Make sure we have something to flush
|
767
782
|
if (!lsource.probe_buffer.Scanning()) {
|
@@ -769,13 +784,13 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
|
|
769
784
|
if (left_bin < left_bins) {
|
770
785
|
// More to flush
|
771
786
|
lsource.probe_buffer.BeginLeftScan(left_bin);
|
772
|
-
} else if (!IsRightOuterJoin(join_type)) {
|
787
|
+
} else if (!IsRightOuterJoin(join_type) || client.interrupted) {
|
773
788
|
return SourceResultType::FINISHED;
|
774
789
|
} else {
|
775
790
|
// Wait for all threads to finish
|
776
791
|
// TODO: How to implement a spin wait correctly?
|
777
792
|
// Returning BLOCKED seems to hang the system.
|
778
|
-
|
793
|
+
TaskScheduler::GetScheduler(client).YieldThread();
|
779
794
|
continue;
|
780
795
|
}
|
781
796
|
}
|
@@ -194,7 +194,7 @@ public:
|
|
194
194
|
}
|
195
195
|
auto new_count = current_collection->GetTotalRows();
|
196
196
|
auto batch_type =
|
197
|
-
new_count <
|
197
|
+
new_count < Storage::ROW_GROUP_SIZE ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
|
198
198
|
if (batch_type == RowGroupBatchType::FLUSHED && writer) {
|
199
199
|
writer->WriteLastRowGroup(*current_collection);
|
200
200
|
}
|
@@ -482,7 +482,7 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
|
|
482
482
|
|
483
483
|
lock_guard<mutex> lock(gstate.lock);
|
484
484
|
gstate.insert_count += append_count;
|
485
|
-
if (append_count <
|
485
|
+
if (append_count < Storage::ROW_GROUP_SIZE) {
|
486
486
|
// we have few rows - append to the local storage directly
|
487
487
|
auto &table = gstate.table;
|
488
488
|
auto &storage = table.GetStorage();
|
@@ -837,7 +837,15 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const ar
|
|
837
837
|
throw InvalidInputException("arrow_scan: array length mismatch");
|
838
838
|
}
|
839
839
|
// Make sure this Vector keeps the Arrow chunk alive in case we can zero-copy the data
|
840
|
-
|
840
|
+
if (scan_state.arrow_owned_data.find(idx) == scan_state.arrow_owned_data.end()) {
|
841
|
+
auto arrow_data = make_shared<ArrowArrayWrapper>();
|
842
|
+
arrow_data->arrow_array = scan_state.chunk->arrow_array;
|
843
|
+
scan_state.chunk->arrow_array.release = nullptr;
|
844
|
+
scan_state.arrow_owned_data[idx] = arrow_data;
|
845
|
+
}
|
846
|
+
|
847
|
+
output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.arrow_owned_data[idx]));
|
848
|
+
|
841
849
|
D_ASSERT(arrow_convert_data.find(col_idx) != arrow_convert_data.end());
|
842
850
|
auto &arrow_type = *arrow_convert_data.at(col_idx);
|
843
851
|
if (array.dictionary) {
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev4376"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "312b995450"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -58,21 +58,6 @@ struct DConstants {
|
|
58
58
|
static constexpr const idx_t INVALID_INDEX = idx_t(-1);
|
59
59
|
};
|
60
60
|
|
61
|
-
struct Storage {
|
62
|
-
//! The size of a hard disk sector, only really needed for Direct IO
|
63
|
-
constexpr static int SECTOR_SIZE = 4096;
|
64
|
-
//! Block header size for blocks written to the storage
|
65
|
-
constexpr static int BLOCK_HEADER_SIZE = sizeof(uint64_t);
|
66
|
-
// Size of a memory slot managed by the StorageManager. This is the quantum of allocation for Blocks on DuckDB. We
|
67
|
-
// default to 256KB. (1 << 18)
|
68
|
-
constexpr static int BLOCK_ALLOC_SIZE = 262144;
|
69
|
-
//! The actual memory space that is available within the blocks
|
70
|
-
constexpr static int BLOCK_SIZE = BLOCK_ALLOC_SIZE - BLOCK_HEADER_SIZE;
|
71
|
-
//! The size of the headers. This should be small and written more or less atomically by the hard disk. We default
|
72
|
-
//! to the page size, which is 4KB. (1 << 12)
|
73
|
-
constexpr static int FILE_HEADER_SIZE = 4096;
|
74
|
-
};
|
75
|
-
|
76
61
|
struct LogicalIndex {
|
77
62
|
explicit LogicalIndex(idx_t index) : index(index) {
|
78
63
|
}
|
@@ -1,7 +1,7 @@
|
|
1
1
|
//===----------------------------------------------------------------------===//
|
2
2
|
// DuckDB
|
3
3
|
//
|
4
|
-
// duckdb/common/serializer/
|
4
|
+
// duckdb/common/serializer/memory_stream.hpp
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
@@ -18,7 +18,6 @@ namespace duckdb {
|
|
18
18
|
class PartitionGlobalHashGroup {
|
19
19
|
public:
|
20
20
|
using GlobalSortStatePtr = unique_ptr<GlobalSortState>;
|
21
|
-
using LocalSortStatePtr = unique_ptr<LocalSortState>;
|
22
21
|
using Orders = vector<BoundOrderByNode>;
|
23
22
|
using Types = vector<LogicalType>;
|
24
23
|
|
@@ -54,6 +53,8 @@ public:
|
|
54
53
|
const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
|
55
54
|
const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
|
56
55
|
|
56
|
+
bool HasMergeTasks() const;
|
57
|
+
|
57
58
|
unique_ptr<RadixPartitionedTupleData> CreatePartition(idx_t new_bits) const;
|
58
59
|
void SyncPartitioning(const PartitionGlobalSinkState &other);
|
59
60
|
|
@@ -97,21 +98,26 @@ private:
|
|
97
98
|
|
98
99
|
class PartitionLocalSinkState {
|
99
100
|
public:
|
101
|
+
using LocalSortStatePtr = unique_ptr<LocalSortState>;
|
102
|
+
|
100
103
|
PartitionLocalSinkState(ClientContext &context, PartitionGlobalSinkState &gstate_p);
|
101
104
|
|
102
105
|
// Global state
|
103
106
|
PartitionGlobalSinkState &gstate;
|
104
107
|
Allocator &allocator;
|
105
108
|
|
106
|
-
//
|
109
|
+
// Shared expression evaluation
|
107
110
|
ExpressionExecutor executor;
|
108
111
|
DataChunk group_chunk;
|
109
112
|
DataChunk payload_chunk;
|
113
|
+
size_t sort_cols;
|
114
|
+
|
115
|
+
// OVER(PARTITION BY...) (hash grouping)
|
110
116
|
unique_ptr<PartitionedTupleData> local_partition;
|
111
117
|
unique_ptr<PartitionedTupleDataAppendState> local_append;
|
112
118
|
|
113
|
-
// OVER(...) (sorting)
|
114
|
-
|
119
|
+
// OVER(ORDER BY...) (only sorting)
|
120
|
+
LocalSortStatePtr local_sort;
|
115
121
|
|
116
122
|
// OVER() (no sorting)
|
117
123
|
RowLayout payload_layout;
|
@@ -134,8 +140,12 @@ class PartitionGlobalMergeState {
|
|
134
140
|
public:
|
135
141
|
using GroupDataPtr = unique_ptr<TupleDataCollection>;
|
136
142
|
|
143
|
+
// OVER(PARTITION BY...)
|
137
144
|
PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
|
138
145
|
|
146
|
+
// OVER(ORDER BY...)
|
147
|
+
explicit PartitionGlobalMergeState(PartitionGlobalSinkState &sink);
|
148
|
+
|
139
149
|
bool IsSorted() const {
|
140
150
|
lock_guard<mutex> guard(lock);
|
141
151
|
return stage == PartitionSortStage::SORTED;
|
@@ -332,6 +332,9 @@ public:
|
|
332
332
|
DUCKDB_API string ToString(idx_t count) const;
|
333
333
|
|
334
334
|
DUCKDB_API static bool IsAligned(idx_t count);
|
335
|
+
|
336
|
+
void Write(WriteStream &writer, idx_t count);
|
337
|
+
void Read(ReadStream &reader, idx_t count);
|
335
338
|
};
|
336
339
|
|
337
340
|
} // namespace duckdb
|
@@ -45,6 +45,8 @@ public:
|
|
45
45
|
//! Get a new chain of leaf nodes, might cause new buffer allocations,
|
46
46
|
//! with the node parameter holding the tail of the chain
|
47
47
|
static void New(ART &art, reference<Node> &node, const row_t *row_ids, idx_t count);
|
48
|
+
//! Get a new leaf node without any data
|
49
|
+
static Leaf &New(ART &art, Node &node);
|
48
50
|
//! Free the leaf (chain)
|
49
51
|
static void Free(ART &art, Node &node);
|
50
52
|
|
@@ -29,10 +29,6 @@ public:
|
|
29
29
|
//! We can vacuum 10% or more of the total in-memory footprint
|
30
30
|
static constexpr uint8_t VACUUM_THRESHOLD = 10;
|
31
31
|
|
32
|
-
//! Constants for fast offset calculations in the bitmask
|
33
|
-
static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
|
34
|
-
static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
|
35
|
-
|
36
32
|
public:
|
37
33
|
FixedSizeAllocator(const idx_t segment_size, BlockManager &block_manager);
|
38
34
|
|
@@ -80,7 +76,7 @@ public:
|
|
80
76
|
IndexPointer VacuumPointer(const IndexPointer ptr);
|
81
77
|
|
82
78
|
//! Serializes all in-memory buffers and the metadata
|
83
|
-
BlockPointer Serialize(MetadataWriter &writer);
|
79
|
+
BlockPointer Serialize(PartialBlockManager &partial_block_manager, MetadataWriter &writer);
|
84
80
|
//! Deserializes all metadata
|
85
81
|
void Deserialize(const BlockPointer &block_pointer);
|
86
82
|
|
@@ -117,8 +113,6 @@ private:
|
|
117
113
|
auto buffer_ptr = buffer.Get(dirty);
|
118
114
|
return buffer_ptr + ptr.GetOffset() * segment_size + bitmask_offset;
|
119
115
|
}
|
120
|
-
//! Returns the first free offset in a bitmask
|
121
|
-
uint32_t GetOffset(ValidityMask &mask, const idx_t segment_count);
|
122
116
|
//! Returns an available buffer id
|
123
117
|
idx_t GetAvailableBufferId() const;
|
124
118
|
};
|