duckdb 0.8.2-dev4203.0 → 0.8.2-dev4376.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
  3. package/src/duckdb/src/common/enum_util.cpp +5 -0
  4. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  5. package/src/duckdb/src/common/sort/partition_state.cpp +107 -29
  6. package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
  7. package/src/duckdb/src/execution/index/art/art.cpp +5 -1
  8. package/src/duckdb/src/execution/index/art/leaf.cpp +13 -10
  9. package/src/duckdb/src/execution/index/art/node48.cpp +0 -2
  10. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +38 -73
  11. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +245 -27
  12. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +2 -3
  13. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +35 -20
  14. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
  15. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  16. package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
  17. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  18. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
  19. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
  20. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +14 -4
  21. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  22. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +2 -0
  23. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +1 -7
  24. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +38 -8
  25. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
  26. package/src/duckdb/src/include/duckdb/main/relation.hpp +9 -2
  27. package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
  28. package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
  29. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
  30. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  31. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
  32. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
  33. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +35 -19
  34. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
  35. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
  36. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +4 -19
  37. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
  39. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
  40. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
  42. package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
  43. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
  44. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
  46. package/src/duckdb/src/main/relation.cpp +15 -2
  47. package/src/duckdb/src/main/settings/settings.cpp +5 -10
  48. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
  49. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
  50. package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
  51. package/src/duckdb/src/storage/data_table.cpp +1 -1
  52. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  53. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
  54. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
  55. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
  56. package/src/duckdb/src/storage/partial_block_manager.cpp +42 -15
  57. package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
  58. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  59. package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
  60. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +26 -32
  61. package/src/duckdb/src/storage/table/column_data.cpp +14 -9
  62. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
  63. package/src/duckdb/src/storage/table/row_group.cpp +102 -192
  64. package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
  65. package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
  66. package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
  67. package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
  68. package/src/duckdb/src/transaction/commit_state.cpp +5 -4
  69. package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
  70. package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
  71. package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
  72. package/src/duckdb/ub_src_storage_table.cpp +2 -0
@@ -5,70 +5,288 @@
5
5
 
6
6
  namespace duckdb {
7
7
 
8
+ //===--------------------------------------------------------------------===//
9
+ // PartialBlockForIndex
10
+ //===--------------------------------------------------------------------===//
11
+
12
+ PartialBlockForIndex::PartialBlockForIndex(PartialBlockState state, BlockManager &block_manager,
13
+ const shared_ptr<BlockHandle> &block_handle)
14
+ : PartialBlock(state, block_manager, block_handle) {
15
+ }
16
+
17
+ void PartialBlockForIndex::Flush(const idx_t free_space_left) {
18
+ FlushInternal(free_space_left);
19
+ block_handle = block_manager.ConvertToPersistent(state.block_id, std::move(block_handle));
20
+ Clear();
21
+ }
22
+
23
+ void PartialBlockForIndex::Merge(PartialBlock &other, idx_t offset, idx_t other_size) {
24
+ throw InternalException("no merge for PartialBlockForIndex");
25
+ }
26
+
27
+ void PartialBlockForIndex::Clear() {
28
+ block_handle.reset();
29
+ }
30
+
31
+ //===--------------------------------------------------------------------===//
32
+ // FixedSizeBuffer
33
+ //===--------------------------------------------------------------------===//
34
+
35
+ constexpr idx_t FixedSizeBuffer::BASE[];
36
+ constexpr uint8_t FixedSizeBuffer::SHIFT[];
37
+
8
38
  FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager)
9
- : block_manager(block_manager), segment_count(0), dirty(false), vacuum(false), block_handle(nullptr) {
39
+ : block_manager(block_manager), segment_count(0), allocation_size(0), dirty(false), vacuum(false), block_pointer(),
40
+ block_handle(nullptr) {
10
41
 
11
42
  auto &buffer_manager = block_manager.buffer_manager;
12
43
  buffer_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &block_handle);
13
44
  }
14
45
 
15
- FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const block_id_t &block_id)
16
- : block_manager(block_manager), segment_count(segment_count), dirty(false), vacuum(false) {
46
+ FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const idx_t allocation_size,
47
+ const BlockPointer &block_pointer)
48
+ : block_manager(block_manager), segment_count(segment_count), allocation_size(allocation_size), dirty(false),
49
+ vacuum(false), block_pointer(block_pointer) {
17
50
 
18
- D_ASSERT(block_id < MAXIMUM_BLOCK);
19
- block_handle = block_manager.RegisterBlock(block_id);
20
- D_ASSERT(BlockId() < MAXIMUM_BLOCK);
51
+ D_ASSERT(block_pointer.IsValid());
52
+ block_handle = block_manager.RegisterBlock(block_pointer.block_id);
53
+ D_ASSERT(block_handle->BlockId() < MAXIMUM_BLOCK);
21
54
  }
22
55
 
23
56
  void FixedSizeBuffer::Destroy() {
24
57
  if (InMemory()) {
58
+ // we can have multiple readers on a pinned block, and unpinning the buffer handle
59
+ // decrements the reader count on the underlying block handle (Destroy() unpins)
25
60
  buffer_handle.Destroy();
26
61
  }
27
62
  if (OnDisk()) {
28
- block_manager.MarkBlockAsFree(BlockId());
63
+ // marking a block as modified decreases the reference count of multi-use blocks
64
+ block_manager.MarkBlockAsModified(block_pointer.block_id);
29
65
  }
30
66
  }
31
67
 
32
- void FixedSizeBuffer::Serialize() {
68
+ void FixedSizeBuffer::Serialize(PartialBlockManager &partial_block_manager, const idx_t available_segments,
69
+ const idx_t segment_size, const idx_t bitmask_offset) {
33
70
 
71
+ // we do not serialize a block that is already on disk and not in memory
34
72
  if (!InMemory()) {
35
73
  if (!OnDisk() || dirty) {
36
- throw InternalException("invalid/missing buffer in FixedSizeAllocator");
74
+ throw InternalException("invalid or missing buffer in FixedSizeAllocator");
37
75
  }
38
76
  return;
39
77
  }
78
+
79
+ // we do not serialize a block that is already on disk and not dirty
40
80
  if (!dirty && OnDisk()) {
41
81
  return;
42
82
  }
43
83
 
44
- // the buffer is in memory
45
- D_ASSERT(InMemory());
46
- // the buffer never was on disk, or there were changes to it after loading it from disk
47
- D_ASSERT(!OnDisk() || dirty);
84
+ if (dirty) {
85
+ // the allocation possibly changed
86
+ auto max_offset = GetMaxOffset(available_segments);
87
+ allocation_size = max_offset * segment_size + bitmask_offset;
88
+ }
48
89
 
49
- // we persist any changes, so the buffer is no longer dirty
50
- dirty = false;
90
+ // the buffer is in memory, so we copied it onto a new buffer when pinning
91
+ D_ASSERT(InMemory() && !OnDisk());
51
92
 
52
- if (!OnDisk()) {
53
- // temporary block - convert to persistent
54
- auto block_id = block_manager.GetFreeBlockId();
55
- D_ASSERT(block_id < MAXIMUM_BLOCK);
56
- block_handle = block_manager.ConvertToPersistent(block_id, std::move(block_handle));
57
- buffer_handle.Destroy();
93
+ // now we write the changes, first get a partial block allocation
94
+ PartialBlockAllocation allocation = partial_block_manager.GetBlockAllocation(allocation_size);
95
+ block_pointer.block_id = allocation.state.block_id;
96
+ block_pointer.offset = allocation.state.offset;
97
+
98
+ auto &buffer_manager = block_manager.buffer_manager;
99
+
100
+ if (allocation.partial_block) {
101
+ // copy to an existing partial block
102
+ D_ASSERT(block_pointer.offset > 0);
103
+ auto &p_block_for_index = allocation.partial_block->Cast<PartialBlockForIndex>();
104
+ auto dst_handle = buffer_manager.Pin(p_block_for_index.block_handle);
105
+ memcpy(dst_handle.Ptr() + block_pointer.offset, buffer_handle.Ptr(), allocation_size);
106
+ SetUninitializedRegions(p_block_for_index, segment_size, block_pointer.offset, bitmask_offset);
58
107
 
59
108
  } else {
60
- // already a persistent block - only need to write it
61
- auto block_id = block_handle->BlockId();
62
- D_ASSERT(block_id < MAXIMUM_BLOCK);
63
- block_manager.Write(buffer_handle.GetFileBuffer(), block_id);
109
+ // create a new block that can potentially be used as a partial block
110
+ D_ASSERT(block_handle);
111
+ D_ASSERT(!block_pointer.offset);
112
+ auto p_block_for_index = make_uniq<PartialBlockForIndex>(allocation.state, block_manager, block_handle);
113
+ SetUninitializedRegions(*p_block_for_index, segment_size, block_pointer.offset, bitmask_offset);
114
+ allocation.partial_block = std::move(p_block_for_index);
64
115
  }
116
+
117
+ partial_block_manager.RegisterPartialBlock(std::move(allocation));
118
+
119
+ // resetting this buffer
120
+ buffer_handle.Destroy();
121
+ block_handle = block_manager.RegisterBlock(block_pointer.block_id);
122
+ D_ASSERT(block_handle->BlockId() < MAXIMUM_BLOCK);
123
+
124
+ // we persist any changes, so the buffer is no longer dirty
125
+ dirty = false;
65
126
  }
66
127
 
67
128
  void FixedSizeBuffer::Pin() {
68
129
 
69
130
  auto &buffer_manager = block_manager.buffer_manager;
70
- D_ASSERT(BlockId() < MAXIMUM_BLOCK);
71
- buffer_handle = BufferHandle(buffer_manager.Pin(block_handle));
131
+ D_ASSERT(block_pointer.IsValid());
132
+ D_ASSERT(block_handle && block_handle->BlockId() < MAXIMUM_BLOCK);
133
+ D_ASSERT(!dirty);
134
+
135
+ buffer_handle = buffer_manager.Pin(block_handle);
136
+
137
+ // we need to copy the (partial) data into a new (not yet disk-backed) buffer handle
138
+ shared_ptr<BlockHandle> new_block_handle;
139
+ auto new_buffer_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &new_block_handle);
140
+
141
+ memcpy(new_buffer_handle.Ptr(), buffer_handle.Ptr() + block_pointer.offset, allocation_size);
142
+
143
+ Destroy();
144
+ buffer_handle = std::move(new_buffer_handle);
145
+ block_handle = new_block_handle;
146
+ block_pointer = BlockPointer();
147
+ }
148
+
149
+ uint32_t FixedSizeBuffer::GetOffset(const idx_t bitmask_count) {
150
+
151
+ // this function calls Get() on the buffer, so the buffer must already be in memory
152
+ D_ASSERT(InMemory());
153
+
154
+ // get the bitmask data
155
+ auto bitmask_ptr = reinterpret_cast<validity_t *>(Get());
156
+ ValidityMask mask(bitmask_ptr);
157
+ auto data = mask.GetData();
158
+
159
+ // fills up a buffer sequentially before searching for free bits
160
+ if (mask.RowIsValid(segment_count)) {
161
+ mask.SetInvalid(segment_count);
162
+ return segment_count;
163
+ }
164
+
165
+ for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
166
+ // get an entry with free bits
167
+ if (data[entry_idx] == 0) {
168
+ continue;
169
+ }
170
+
171
+ // find the position of the free bit
172
+ auto entry = data[entry_idx];
173
+ idx_t first_valid_bit = 0;
174
+
175
+ // this loop finds the position of the rightmost set bit in entry and stores it
176
+ // in first_valid_bit
177
+ for (idx_t i = 0; i < 6; i++) {
178
+ // set the left half of the bits of this level to zero and test if the entry is still not zero
179
+ if (entry & BASE[i]) {
180
+ // first valid bit is in the rightmost s[i] bits
181
+ // permanently set the left half of the bits to zero
182
+ entry &= BASE[i];
183
+ } else {
184
+ // first valid bit is in the leftmost s[i] bits
185
+ // shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
186
+ entry >>= SHIFT[i];
187
+ first_valid_bit += SHIFT[i];
188
+ }
189
+ }
190
+ D_ASSERT(entry);
191
+
192
+ auto prev_bits = entry_idx * sizeof(validity_t) * 8;
193
+ D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
194
+ mask.SetInvalid(prev_bits + first_valid_bit);
195
+ return (prev_bits + first_valid_bit);
196
+ }
197
+
198
+ throw InternalException("Invalid bitmask for FixedSizeAllocator");
199
+ }
200
+
201
+ uint32_t FixedSizeBuffer::GetMaxOffset(const idx_t available_segments) {
202
+
203
+ // this function calls Get() on the buffer, so the buffer must already be in memory
204
+ D_ASSERT(InMemory());
205
+
206
+ // finds the maximum zero bit in a bitmask, and adds one to it,
207
+ // so that max_offset * segment_size = allocated_size of this bitmask's buffer
208
+ idx_t entry_size = sizeof(validity_t) * 8;
209
+ idx_t bitmask_count = available_segments / entry_size;
210
+ if (available_segments % entry_size != 0) {
211
+ bitmask_count++;
212
+ }
213
+ uint32_t max_offset = bitmask_count * sizeof(validity_t) * 8;
214
+ auto bits_in_last_entry = available_segments % (sizeof(validity_t) * 8);
215
+
216
+ // get the bitmask data
217
+ auto bitmask_ptr = reinterpret_cast<validity_t *>(Get());
218
+ const ValidityMask mask(bitmask_ptr);
219
+ const auto data = mask.GetData();
220
+
221
+ D_ASSERT(bitmask_count > 0);
222
+ for (idx_t i = bitmask_count; i > 0; i--) {
223
+
224
+ auto entry = data[i - 1];
225
+
226
+ // set all bits after bits_in_last_entry
227
+ if (i == bitmask_count) {
228
+ entry |= ~idx_t(0) << bits_in_last_entry;
229
+ }
230
+
231
+ if (entry == ~idx_t(0)) {
232
+ max_offset -= sizeof(validity_t) * 8;
233
+ continue;
234
+ }
235
+
236
+ // invert data[entry_idx]
237
+ auto entry_inv = ~entry;
238
+ idx_t first_valid_bit = 0;
239
+
240
+ // then find the position of the LEFTMOST set bit
241
+ for (idx_t level = 0; level < 6; level++) {
242
+
243
+ // set the right half of the bits of this level to zero and test if the entry is still not zero
244
+ if (entry_inv & ~BASE[level]) {
245
+ // first valid bit is in the leftmost s[level] bits
246
+ // shift by s[level] for the next iteration and add s[level] to the position of the leftmost set bit
247
+ entry_inv >>= SHIFT[level];
248
+ first_valid_bit += SHIFT[level];
249
+ } else {
250
+ // first valid bit is in the rightmost s[level] bits
251
+ // permanently set the left half of the bits to zero
252
+ entry_inv &= BASE[level];
253
+ }
254
+ }
255
+ D_ASSERT(entry_inv);
256
+ max_offset -= sizeof(validity_t) * 8 - first_valid_bit;
257
+ D_ASSERT(!mask.RowIsValid(max_offset));
258
+ return max_offset + 1;
259
+ }
260
+
261
+ // there are no allocations in this buffer
262
+ // FIXME: put this line back in and then fix the missing vacuum bug in
263
+ // FIXME: test_index_large_aborted_append.test with force_restart
264
+ // FIXME: test if we still have non-dirty buffer to serialize after fixing this
265
+ // throw InternalException("tried to serialize empty buffer");
266
+ return 0;
267
+ }
268
+
269
+ void FixedSizeBuffer::SetUninitializedRegions(PartialBlockForIndex &p_block_for_index, const idx_t segment_size,
270
+ const idx_t offset, const idx_t bitmask_offset) {
271
+
272
+ // this function calls Get() on the buffer, so the buffer must already be in memory
273
+ D_ASSERT(InMemory());
274
+
275
+ auto bitmask_ptr = reinterpret_cast<validity_t *>(Get());
276
+ ValidityMask mask(bitmask_ptr);
277
+
278
+ idx_t i = 0;
279
+ idx_t max_offset = offset + allocation_size;
280
+ idx_t current_offset = offset + bitmask_offset;
281
+ while (current_offset < max_offset) {
282
+
283
+ if (mask.RowIsValid(i)) {
284
+ D_ASSERT(current_offset + segment_size <= max_offset);
285
+ p_block_for_index.AddUninitializedRegion(current_offset, current_offset + segment_size);
286
+ }
287
+ current_offset += segment_size;
288
+ i++;
289
+ }
72
290
  }
73
291
 
74
292
  } // namespace duckdb
@@ -156,8 +156,7 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
156
156
  }
157
157
 
158
158
  // Find the first group to sort
159
- auto &groups = state.global_partition->grouping_data->GetPartitions();
160
- if (groups.empty()) {
159
+ if (!state.global_partition->HasMergeTasks()) {
161
160
  // Empty input!
162
161
  return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
163
162
  }
@@ -538,7 +537,7 @@ WindowGlobalSourceState::Task WindowGlobalSourceState::NextTask(idx_t hash_bin)
538
537
  if (hash_bin < bin_count) {
539
538
  // Find a non-empty hash group.
540
539
  for (; hash_bin < hash_groups.size(); hash_bin = next_build++) {
541
- if (hash_groups[hash_bin]) {
540
+ if (hash_groups[hash_bin] && hash_groups[hash_bin]->count) {
542
541
  auto result = CreateTask(hash_bin);
543
542
  if (result.second) {
544
543
  return result;
@@ -163,8 +163,7 @@ SinkFinalizeType PhysicalAsOfJoin::Finalize(Pipeline &pipeline, Event &event, Cl
163
163
  gstate.lhs_sink->SyncPartitioning(gstate.rhs_sink);
164
164
 
165
165
  // Find the first group to sort
166
- auto &groups = gstate.rhs_sink.grouping_data->GetPartitions();
167
- if (groups.empty() && EmptyResultIfRHSIsEmpty()) {
166
+ if (!gstate.rhs_sink.HasMergeTasks() && EmptyResultIfRHSIsEmpty()) {
168
167
  // Empty input!
169
168
  return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
170
169
  }
@@ -433,6 +432,9 @@ void AsOfProbeBuffer::BeginLeftScan(hash_t scan_bin) {
433
432
 
434
433
  left_hash = lhs_sink.hash_groups[left_group].get();
435
434
  auto &left_sort = *(left_hash->global_sort);
435
+ if (left_sort.sorted_blocks.empty()) {
436
+ return;
437
+ }
436
438
  lhs_scanner = make_uniq<PayloadScanner>(left_sort, false);
437
439
  left_itr = make_uniq<SBIterator>(left_sort, iterator_comp);
438
440
 
@@ -681,14 +683,16 @@ class AsOfLocalSourceState : public LocalSourceState {
681
683
  public:
682
684
  using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
683
685
 
684
- AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op);
686
+ AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op, ClientContext &client_p);
685
687
 
686
- void CombineLeftPartitions();
687
- void MergeLeftPartitions();
688
+ // Return true if we were not interrupted (another thread died)
689
+ bool CombineLeftPartitions();
690
+ bool MergeLeftPartitions();
688
691
 
689
692
  idx_t BeginRightScan(const idx_t hash_bin);
690
693
 
691
694
  AsOfGlobalSourceState &gsource;
695
+ ClientContext &client;
692
696
 
693
697
  //! The left side partition being probed
694
698
  AsOfProbeBuffer probe_buffer;
@@ -702,38 +706,45 @@ public:
702
706
  const bool *found_match = {};
703
707
  };
704
708
 
705
- AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op)
706
- : gsource(gsource), probe_buffer(gsource.gsink.lhs_sink->context, op) {
709
+ AsOfLocalSourceState::AsOfLocalSourceState(AsOfGlobalSourceState &gsource, const PhysicalAsOfJoin &op,
710
+ ClientContext &client_p)
711
+ : gsource(gsource), client(client_p), probe_buffer(gsource.gsink.lhs_sink->context, op) {
707
712
  gsource.mergers++;
708
713
  }
709
714
 
710
- void AsOfLocalSourceState::CombineLeftPartitions() {
715
+ bool AsOfLocalSourceState::CombineLeftPartitions() {
711
716
  const auto buffer_count = gsource.gsink.lhs_buffers.size();
712
- while (gsource.combined < buffer_count) {
717
+ while (gsource.combined < buffer_count && !client.interrupted) {
713
718
  const auto next_combine = gsource.next_combine++;
714
719
  if (next_combine < buffer_count) {
715
720
  gsource.gsink.lhs_buffers[next_combine]->Combine();
716
721
  ++gsource.combined;
717
722
  } else {
718
- std::this_thread::yield();
723
+ TaskScheduler::GetScheduler(client).YieldThread();
719
724
  }
720
725
  }
726
+
727
+ return !client.interrupted;
721
728
  }
722
729
 
723
- void AsOfLocalSourceState::MergeLeftPartitions() {
730
+ bool AsOfLocalSourceState::MergeLeftPartitions() {
724
731
  PartitionGlobalMergeStates::Callback local_callback;
725
732
  PartitionLocalMergeState local_merge(*gsource.gsink.lhs_sink);
726
733
  gsource.GetMergeStates().ExecuteTask(local_merge, local_callback);
727
734
  gsource.merged++;
728
- while (gsource.merged < gsource.mergers) {
729
- std::this_thread::yield();
735
+ while (gsource.merged < gsource.mergers && !client.interrupted) {
736
+ TaskScheduler::GetScheduler(client).YieldThread();
730
737
  }
738
+ return !client.interrupted;
731
739
  }
732
740
 
733
741
  idx_t AsOfLocalSourceState::BeginRightScan(const idx_t hash_bin_p) {
734
742
  hash_bin = hash_bin_p;
735
743
 
736
744
  hash_group = std::move(gsource.gsink.rhs_sink.hash_groups[hash_bin]);
745
+ if (hash_group->global_sort->sorted_blocks.empty()) {
746
+ return 0;
747
+ }
737
748
  scanner = make_uniq<PayloadScanner>(*hash_group->global_sort);
738
749
  found_match = gsource.gsink.right_outers[hash_bin].GetMatches();
739
750
 
@@ -743,7 +754,7 @@ idx_t AsOfLocalSourceState::BeginRightScan(const idx_t hash_bin_p) {
743
754
  unique_ptr<LocalSourceState> PhysicalAsOfJoin::GetLocalSourceState(ExecutionContext &context,
744
755
  GlobalSourceState &gstate) const {
745
756
  auto &gsource = gstate.Cast<AsOfGlobalSourceState>();
746
- return make_uniq<AsOfLocalSourceState>(gsource, *this);
757
+ return make_uniq<AsOfLocalSourceState>(gsource, *this, context.client);
747
758
  }
748
759
 
749
760
  SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk &chunk,
@@ -751,17 +762,21 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
751
762
  auto &gsource = input.global_state.Cast<AsOfGlobalSourceState>();
752
763
  auto &lsource = input.local_state.Cast<AsOfLocalSourceState>();
753
764
  auto &rhs_sink = gsource.gsink.rhs_sink;
765
+ auto &client = context.client;
754
766
 
755
767
  // Step 1: Combine the partitions
756
- lsource.CombineLeftPartitions();
768
+ if (!lsource.CombineLeftPartitions()) {
769
+ return SourceResultType::FINISHED;
770
+ }
757
771
 
758
772
  // Step 2: Sort on all threads
759
- lsource.MergeLeftPartitions();
773
+ if (!lsource.MergeLeftPartitions()) {
774
+ return SourceResultType::FINISHED;
775
+ }
760
776
 
761
777
  // Step 3: Join the partitions
762
778
  auto &lhs_sink = *gsource.gsink.lhs_sink;
763
- auto &partitions = lhs_sink.grouping_data->GetPartitions();
764
- const auto left_bins = partitions.size();
779
+ const auto left_bins = lhs_sink.grouping_data ? lhs_sink.grouping_data->GetPartitions().size() : 1;
765
780
  while (gsource.flushed < left_bins) {
766
781
  // Make sure we have something to flush
767
782
  if (!lsource.probe_buffer.Scanning()) {
@@ -769,13 +784,13 @@ SourceResultType PhysicalAsOfJoin::GetData(ExecutionContext &context, DataChunk
769
784
  if (left_bin < left_bins) {
770
785
  // More to flush
771
786
  lsource.probe_buffer.BeginLeftScan(left_bin);
772
- } else if (!IsRightOuterJoin(join_type)) {
787
+ } else if (!IsRightOuterJoin(join_type) || client.interrupted) {
773
788
  return SourceResultType::FINISHED;
774
789
  } else {
775
790
  // Wait for all threads to finish
776
791
  // TODO: How to implement a spin wait correctly?
777
792
  // Returning BLOCKED seems to hang the system.
778
- std::this_thread::yield();
793
+ TaskScheduler::GetScheduler(client).YieldThread();
779
794
  continue;
780
795
  }
781
796
  }
@@ -194,7 +194,7 @@ public:
194
194
  }
195
195
  auto new_count = current_collection->GetTotalRows();
196
196
  auto batch_type =
197
- new_count < RowGroup::ROW_GROUP_SIZE ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
197
+ new_count < Storage::ROW_GROUP_SIZE ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
198
198
  if (batch_type == RowGroupBatchType::FLUSHED && writer) {
199
199
  writer->WriteLastRowGroup(*current_collection);
200
200
  }
@@ -482,7 +482,7 @@ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, Operato
482
482
 
483
483
  lock_guard<mutex> lock(gstate.lock);
484
484
  gstate.insert_count += append_count;
485
- if (append_count < RowGroup::ROW_GROUP_SIZE) {
485
+ if (append_count < Storage::ROW_GROUP_SIZE) {
486
486
  // we have few rows - append to the local storage directly
487
487
  auto &table = gstate.table;
488
488
  auto &storage = table.GetStorage();
@@ -837,7 +837,15 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const ar
837
837
  throw InvalidInputException("arrow_scan: array length mismatch");
838
838
  }
839
839
  // Make sure this Vector keeps the Arrow chunk alive in case we can zero-copy the data
840
- output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.chunk));
840
+ if (scan_state.arrow_owned_data.find(idx) == scan_state.arrow_owned_data.end()) {
841
+ auto arrow_data = make_shared<ArrowArrayWrapper>();
842
+ arrow_data->arrow_array = scan_state.chunk->arrow_array;
843
+ scan_state.chunk->arrow_array.release = nullptr;
844
+ scan_state.arrow_owned_data[idx] = arrow_data;
845
+ }
846
+
847
+ output.data[idx].GetBuffer()->SetAuxiliaryData(make_uniq<ArrowAuxiliaryData>(scan_state.arrow_owned_data[idx]));
848
+
841
849
  D_ASSERT(arrow_convert_data.find(col_idx) != arrow_convert_data.end());
842
850
  auto &arrow_type = *arrow_convert_data.at(col_idx);
843
851
  if (array.dictionary) {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev4203"
2
+ #define DUCKDB_VERSION "0.8.2-dev4376"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "7208022cac"
5
+ #define DUCKDB_SOURCE_ID "312b995450"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -58,21 +58,6 @@ struct DConstants {
58
58
  static constexpr const idx_t INVALID_INDEX = idx_t(-1);
59
59
  };
60
60
 
61
- struct Storage {
62
- //! The size of a hard disk sector, only really needed for Direct IO
63
- constexpr static int SECTOR_SIZE = 4096;
64
- //! Block header size for blocks written to the storage
65
- constexpr static int BLOCK_HEADER_SIZE = sizeof(uint64_t);
66
- // Size of a memory slot managed by the StorageManager. This is the quantum of allocation for Blocks on DuckDB. We
67
- // default to 256KB. (1 << 18)
68
- constexpr static int BLOCK_ALLOC_SIZE = 262144;
69
- //! The actual memory space that is available within the blocks
70
- constexpr static int BLOCK_SIZE = BLOCK_ALLOC_SIZE - BLOCK_HEADER_SIZE;
71
- //! The size of the headers. This should be small and written more or less atomically by the hard disk. We default
72
- //! to the page size, which is 4KB. (1 << 12)
73
- constexpr static int FILE_HEADER_SIZE = 4096;
74
- };
75
-
76
61
  struct LogicalIndex {
77
62
  explicit LogicalIndex(idx_t index) : index(index) {
78
63
  }
@@ -1,7 +1,7 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/serializer/buffer_stream.hpp
4
+ // duckdb/common/serializer/memory_stream.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
@@ -18,7 +18,6 @@ namespace duckdb {
18
18
  class PartitionGlobalHashGroup {
19
19
  public:
20
20
  using GlobalSortStatePtr = unique_ptr<GlobalSortState>;
21
- using LocalSortStatePtr = unique_ptr<LocalSortState>;
22
21
  using Orders = vector<BoundOrderByNode>;
23
22
  using Types = vector<LogicalType>;
24
23
 
@@ -54,6 +53,8 @@ public:
54
53
  const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
55
54
  const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
56
55
 
56
+ bool HasMergeTasks() const;
57
+
57
58
  unique_ptr<RadixPartitionedTupleData> CreatePartition(idx_t new_bits) const;
58
59
  void SyncPartitioning(const PartitionGlobalSinkState &other);
59
60
 
@@ -97,21 +98,26 @@ private:
97
98
 
98
99
  class PartitionLocalSinkState {
99
100
  public:
101
+ using LocalSortStatePtr = unique_ptr<LocalSortState>;
102
+
100
103
  PartitionLocalSinkState(ClientContext &context, PartitionGlobalSinkState &gstate_p);
101
104
 
102
105
  // Global state
103
106
  PartitionGlobalSinkState &gstate;
104
107
  Allocator &allocator;
105
108
 
106
- // OVER(PARTITION BY...) (hash grouping)
109
+ // Shared expression evaluation
107
110
  ExpressionExecutor executor;
108
111
  DataChunk group_chunk;
109
112
  DataChunk payload_chunk;
113
+ size_t sort_cols;
114
+
115
+ // OVER(PARTITION BY...) (hash grouping)
110
116
  unique_ptr<PartitionedTupleData> local_partition;
111
117
  unique_ptr<PartitionedTupleDataAppendState> local_append;
112
118
 
113
- // OVER(...) (sorting)
114
- size_t sort_cols;
119
+ // OVER(ORDER BY...) (only sorting)
120
+ LocalSortStatePtr local_sort;
115
121
 
116
122
  // OVER() (no sorting)
117
123
  RowLayout payload_layout;
@@ -134,8 +140,12 @@ class PartitionGlobalMergeState {
134
140
  public:
135
141
  using GroupDataPtr = unique_ptr<TupleDataCollection>;
136
142
 
143
+ // OVER(PARTITION BY...)
137
144
  PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
138
145
 
146
+ // OVER(ORDER BY...)
147
+ explicit PartitionGlobalMergeState(PartitionGlobalSinkState &sink);
148
+
139
149
  bool IsSorted() const {
140
150
  lock_guard<mutex> guard(lock);
141
151
  return stage == PartitionSortStage::SORTED;
@@ -332,6 +332,9 @@ public:
332
332
  DUCKDB_API string ToString(idx_t count) const;
333
333
 
334
334
  DUCKDB_API static bool IsAligned(idx_t count);
335
+
336
+ void Write(WriteStream &writer, idx_t count);
337
+ void Read(ReadStream &reader, idx_t count);
335
338
  };
336
339
 
337
340
  } // namespace duckdb
@@ -45,6 +45,8 @@ public:
45
45
  //! Get a new chain of leaf nodes, might cause new buffer allocations,
46
46
  //! with the node parameter holding the tail of the chain
47
47
  static void New(ART &art, reference<Node> &node, const row_t *row_ids, idx_t count);
48
+ //! Get a new leaf node without any data
49
+ static Leaf &New(ART &art, Node &node);
48
50
  //! Free the leaf (chain)
49
51
  static void Free(ART &art, Node &node);
50
52
 
@@ -29,10 +29,6 @@ public:
29
29
  //! We can vacuum 10% or more of the total in-memory footprint
30
30
  static constexpr uint8_t VACUUM_THRESHOLD = 10;
31
31
 
32
- //! Constants for fast offset calculations in the bitmask
33
- static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
34
- static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
35
-
36
32
  public:
37
33
  FixedSizeAllocator(const idx_t segment_size, BlockManager &block_manager);
38
34
 
@@ -80,7 +76,7 @@ public:
80
76
  IndexPointer VacuumPointer(const IndexPointer ptr);
81
77
 
82
78
  //! Serializes all in-memory buffers and the metadata
83
- BlockPointer Serialize(MetadataWriter &writer);
79
+ BlockPointer Serialize(PartialBlockManager &partial_block_manager, MetadataWriter &writer);
84
80
  //! Deserializes all metadata
85
81
  void Deserialize(const BlockPointer &block_pointer);
86
82
 
@@ -117,8 +113,6 @@ private:
117
113
  auto buffer_ptr = buffer.Get(dirty);
118
114
  return buffer_ptr + ptr.GetOffset() * segment_size + bitmask_offset;
119
115
  }
120
- //! Returns the first free offset in a bitmask
121
- uint32_t GetOffset(ValidityMask &mask, const idx_t segment_count);
122
116
  //! Returns an available buffer id
123
117
  idx_t GetAvailableBufferId() const;
124
118
  };