duckdb 0.8.2-dev4203.0 → 0.8.2-dev4376.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/parquet_extension.cpp +1 -1
  3. package/src/duckdb/src/common/enum_util.cpp +5 -0
  4. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  5. package/src/duckdb/src/common/sort/partition_state.cpp +107 -29
  6. package/src/duckdb/src/common/types/validity_mask.cpp +56 -0
  7. package/src/duckdb/src/execution/index/art/art.cpp +5 -1
  8. package/src/duckdb/src/execution/index/art/leaf.cpp +13 -10
  9. package/src/duckdb/src/execution/index/art/node48.cpp +0 -2
  10. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +38 -73
  11. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +245 -27
  12. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +2 -3
  13. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +35 -20
  14. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +1 -1
  15. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  16. package/src/duckdb/src/function/table/arrow_conversion.cpp +9 -1
  17. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  18. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -15
  19. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +1 -1
  20. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +14 -4
  21. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  22. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +2 -0
  23. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +1 -7
  24. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +38 -8
  25. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +3 -0
  26. package/src/duckdb/src/include/duckdb/main/relation.hpp +9 -2
  27. package/src/duckdb/src/include/duckdb/storage/block.hpp +3 -3
  28. package/src/duckdb/src/include/duckdb/storage/compression/bitpacking.hpp +1 -8
  29. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +2 -2
  30. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  31. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +2 -0
  32. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +6 -2
  33. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +35 -19
  34. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +19 -0
  35. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +19 -13
  36. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +4 -19
  37. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +15 -15
  39. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +59 -0
  40. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +1 -6
  42. package/src/duckdb/src/include/duckdb/transaction/delete_info.hpp +3 -2
  43. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +4 -2
  44. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +0 -1
  46. package/src/duckdb/src/main/relation.cpp +15 -2
  47. package/src/duckdb/src/main/settings/settings.cpp +5 -10
  48. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -0
  49. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +0 -1
  50. package/src/duckdb/src/storage/compression/bitpacking.cpp +55 -48
  51. package/src/duckdb/src/storage/data_table.cpp +1 -1
  52. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  53. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +41 -2
  54. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +12 -3
  55. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +8 -2
  56. package/src/duckdb/src/storage/partial_block_manager.cpp +42 -15
  57. package/src/duckdb/src/storage/single_file_block_manager.cpp +1 -2
  58. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  59. package/src/duckdb/src/storage/table/chunk_info.cpp +39 -33
  60. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +26 -32
  61. package/src/duckdb/src/storage/table/column_data.cpp +14 -9
  62. package/src/duckdb/src/storage/table/list_column_data.cpp +2 -2
  63. package/src/duckdb/src/storage/table/row_group.cpp +102 -192
  64. package/src/duckdb/src/storage/table/row_group_collection.cpp +2 -2
  65. package/src/duckdb/src/storage/table/row_version_manager.cpp +228 -0
  66. package/src/duckdb/src/storage/table/update_segment.cpp +2 -2
  67. package/src/duckdb/src/transaction/cleanup_state.cpp +2 -1
  68. package/src/duckdb/src/transaction/commit_state.cpp +5 -4
  69. package/src/duckdb/src/transaction/duck_transaction.cpp +4 -2
  70. package/src/duckdb/src/transaction/rollback_state.cpp +2 -1
  71. package/src/duckdb/src/transaction/undo_buffer.cpp +3 -5
  72. package/src/duckdb/ub_src_storage_table.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev4203.0",
5
+ "version": "0.8.2-dev4376.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -118,7 +118,7 @@ struct ParquetWriteBindData : public TableFunctionData {
118
118
  vector<LogicalType> sql_types;
119
119
  vector<string> column_names;
120
120
  duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
121
- idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
121
+ idx_t row_group_size = Storage::ROW_GROUP_SIZE;
122
122
 
123
123
  //! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
124
124
  static constexpr const idx_t BYTES_PER_ROW = 1024;
@@ -551,6 +551,8 @@ BindingMode EnumUtil::FromString<BindingMode>(const char *value) {
551
551
  template<>
552
552
  const char* EnumUtil::ToChars<BitpackingMode>(BitpackingMode value) {
553
553
  switch(value) {
554
+ case BitpackingMode::INVALID:
555
+ return "INVALID";
554
556
  case BitpackingMode::AUTO:
555
557
  return "AUTO";
556
558
  case BitpackingMode::CONSTANT:
@@ -568,6 +570,9 @@ const char* EnumUtil::ToChars<BitpackingMode>(BitpackingMode value) {
568
570
 
569
571
  template<>
570
572
  BitpackingMode EnumUtil::FromString<BitpackingMode>(const char *value) {
573
+ if (StringUtil::Equals(value, "INVALID")) {
574
+ return BitpackingMode::INVALID;
575
+ }
571
576
  if (StringUtil::Equals(value, "AUTO")) {
572
577
  return BitpackingMode::AUTO;
573
578
  }
@@ -5,7 +5,7 @@
5
5
  #include "duckdb/common/exception.hpp"
6
6
  #include "duckdb/common/file_system.hpp"
7
7
  #include "duckdb/common/helper.hpp"
8
-
8
+ #include "duckdb/storage/storage_info.hpp"
9
9
  #include <cstring>
10
10
 
11
11
  namespace duckdb {
@@ -100,11 +100,30 @@ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
100
100
  }
101
101
 
102
102
  if (!orders.empty()) {
103
- auto types = payload_types;
104
- types.push_back(LogicalType::HASH);
105
- grouping_types.Initialize(types);
103
+ if (partitions.empty()) {
104
+ // Sort early into a dedicated hash group if we only sort.
105
+ grouping_types.Initialize(payload_types);
106
+ auto new_group =
107
+ make_uniq<PartitionGlobalHashGroup>(buffer_manager, partitions, orders, payload_types, external);
108
+ hash_groups.emplace_back(std::move(new_group));
109
+ } else {
110
+ auto types = payload_types;
111
+ types.push_back(LogicalType::HASH);
112
+ grouping_types.Initialize(types);
113
+ ResizeGroupingData(estimated_cardinality);
114
+ }
115
+ }
116
+ }
106
117
 
107
- ResizeGroupingData(estimated_cardinality);
118
+ bool PartitionGlobalSinkState::HasMergeTasks() const {
119
+ if (grouping_data) {
120
+ auto &groups = grouping_data->GetPartitions();
121
+ return !groups.empty();
122
+ } else if (!hash_groups.empty()) {
123
+ D_ASSERT(hash_groups.size() == 1);
124
+ return hash_groups[0]->count > 0;
125
+ } else {
126
+ return false;
108
127
  }
109
128
  }
110
129
 
@@ -206,6 +225,12 @@ PartitionLocalMergeState::PartitionLocalMergeState(PartitionGlobalSinkState &gst
206
225
  }
207
226
 
208
227
  void PartitionLocalMergeState::Scan() {
228
+ if (!merge_state->group_data) {
229
+ // OVER(ORDER BY...)
230
+ // Already sorted
231
+ return;
232
+ }
233
+
209
234
  auto &group_data = *merge_state->group_data;
210
235
  auto &hash_group = *merge_state->hash_group;
211
236
  auto &chunk_state = merge_state->chunk_state;
@@ -243,13 +268,26 @@ PartitionLocalSinkState::PartitionLocalSinkState(ClientContext &context, Partiti
243
268
  sort_cols = gstate.orders.size() + group_types.size();
244
269
 
245
270
  if (sort_cols) {
271
+ auto payload_types = gstate.payload_types;
246
272
  if (!group_types.empty()) {
247
273
  // OVER(PARTITION BY...)
248
274
  group_chunk.Initialize(allocator, group_types);
275
+ payload_types.emplace_back(LogicalType::HASH);
276
+ } else {
277
+ // OVER(ORDER BY...)
278
+ for (idx_t ord_idx = 0; ord_idx < gstate.orders.size(); ord_idx++) {
279
+ auto &pexpr = *gstate.orders[ord_idx].expression.get();
280
+ group_types.push_back(pexpr.return_type);
281
+ executor.AddExpression(pexpr);
282
+ }
283
+ group_chunk.Initialize(allocator, group_types);
284
+
285
+ // Single partition
286
+ auto &global_sort = *gstate.hash_groups[0]->global_sort;
287
+ local_sort = make_uniq<LocalSortState>();
288
+ local_sort->Initialize(global_sort, global_sort.buffer_manager);
249
289
  }
250
290
  // OVER(...)
251
- auto payload_types = gstate.payload_types;
252
- payload_types.emplace_back(LogicalType::HASH);
253
291
  payload_chunk.Initialize(allocator, payload_types);
254
292
  } else {
255
293
  // OVER()
@@ -259,20 +297,14 @@ PartitionLocalSinkState::PartitionLocalSinkState(ClientContext &context, Partiti
259
297
 
260
298
  void PartitionLocalSinkState::Hash(DataChunk &input_chunk, Vector &hash_vector) {
261
299
  const auto count = input_chunk.size();
262
- if (group_chunk.ColumnCount() > 0) {
263
- // OVER(PARTITION BY...) (hash grouping)
264
- group_chunk.Reset();
265
- executor.Execute(input_chunk, group_chunk);
266
- VectorOperations::Hash(group_chunk.data[0], hash_vector, count);
267
- for (idx_t prt_idx = 1; prt_idx < group_chunk.ColumnCount(); ++prt_idx) {
268
- VectorOperations::CombineHash(hash_vector, group_chunk.data[prt_idx], count);
269
- }
270
- } else {
271
- // OVER(...) (sorting)
272
- // Single partition => single hash value
273
- hash_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
274
- auto hashes = ConstantVector::GetData<hash_t>(hash_vector);
275
- hashes[0] = 0;
300
+ D_ASSERT(group_chunk.ColumnCount() > 0);
301
+
302
+ // OVER(PARTITION BY...) (hash grouping)
303
+ group_chunk.Reset();
304
+ executor.Execute(input_chunk, group_chunk);
305
+ VectorOperations::Hash(group_chunk.data[0], hash_vector, count);
306
+ for (idx_t prt_idx = 1; prt_idx < group_chunk.ColumnCount(); ++prt_idx) {
307
+ VectorOperations::CombineHash(hash_vector, group_chunk.data[prt_idx], count);
276
308
  }
277
309
  }
278
310
 
@@ -306,6 +338,22 @@ void PartitionLocalSinkState::Sink(DataChunk &input_chunk) {
306
338
  return;
307
339
  }
308
340
 
341
+ if (local_sort) {
342
+ // OVER(ORDER BY...)
343
+ group_chunk.Reset();
344
+ executor.Execute(input_chunk, group_chunk);
345
+ local_sort->SinkChunk(group_chunk, input_chunk);
346
+
347
+ auto &hash_group = *gstate.hash_groups[0];
348
+ hash_group.count += input_chunk.size();
349
+
350
+ if (local_sort->SizeInBytes() > gstate.memory_per_thread) {
351
+ auto &global_sort = *hash_group.global_sort;
352
+ local_sort->Sort(global_sort, true);
353
+ }
354
+ return;
355
+ }
356
+
309
357
  // OVER(...)
310
358
  payload_chunk.Reset();
311
359
  auto &hash_vector = payload_chunk.data.back();
@@ -338,6 +386,15 @@ void PartitionLocalSinkState::Combine() {
338
386
  return;
339
387
  }
340
388
 
389
+ if (local_sort) {
390
+ // OVER(ORDER BY...)
391
+ auto &hash_group = *gstate.hash_groups[0];
392
+ auto &global_sort = *hash_group.global_sort;
393
+ global_sort.AddLocalState(*local_sort);
394
+ local_sort.reset();
395
+ return;
396
+ }
397
+
341
398
  // OVER(...)
342
399
  gstate.CombineLocalPartition(local_partition, local_append);
343
400
  }
@@ -365,6 +422,19 @@ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &s
365
422
  group_data->InitializeScan(chunk_state, column_ids);
366
423
  }
367
424
 
425
+ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink)
426
+ : sink(sink), memory_per_thread(sink.memory_per_thread),
427
+ num_threads(TaskScheduler::GetScheduler(sink.context).NumberOfThreads()), stage(PartitionSortStage::INIT),
428
+ total_tasks(0), tasks_assigned(0), tasks_completed(0) {
429
+
430
+ const hash_t hash_bin = 0;
431
+ const size_t group_idx = 0;
432
+ hash_group = sink.hash_groups[group_idx].get();
433
+ global_sort = sink.hash_groups[group_idx]->global_sort.get();
434
+
435
+ sink.bin_groups[hash_bin] = group_idx;
436
+ }
437
+
368
438
  void PartitionLocalMergeState::Prepare() {
369
439
  merge_state->group_data.reset();
370
440
 
@@ -390,7 +460,7 @@ void PartitionLocalMergeState::ExecuteTask() {
390
460
  Merge();
391
461
  break;
392
462
  default:
393
- throw InternalException("Unexpected PartitionGlobalMergeState in ExecuteTask!");
463
+ throw InternalException("Unexpected PartitionSortStage in ExecuteTask!");
394
464
  }
395
465
 
396
466
  merge_state->CompleteTask();
@@ -471,15 +541,23 @@ bool PartitionGlobalMergeState::TryPrepareNextStage() {
471
541
 
472
542
  PartitionGlobalMergeStates::PartitionGlobalMergeStates(PartitionGlobalSinkState &sink) {
473
543
  // Schedule all the sorts for maximum thread utilisation
474
- auto &partitions = sink.grouping_data->GetPartitions();
475
- sink.bin_groups.resize(partitions.size(), partitions.size());
476
- for (hash_t hash_bin = 0; hash_bin < partitions.size(); ++hash_bin) {
477
- auto &group_data = partitions[hash_bin];
478
- // Prepare for merge sort phase
479
- if (group_data->Count()) {
480
- auto state = make_uniq<PartitionGlobalMergeState>(sink, std::move(group_data), hash_bin);
481
- states.emplace_back(std::move(state));
544
+ if (sink.grouping_data) {
545
+ auto &partitions = sink.grouping_data->GetPartitions();
546
+ sink.bin_groups.resize(partitions.size(), partitions.size());
547
+ for (hash_t hash_bin = 0; hash_bin < partitions.size(); ++hash_bin) {
548
+ auto &group_data = partitions[hash_bin];
549
+ // Prepare for merge sort phase
550
+ if (group_data->Count()) {
551
+ auto state = make_uniq<PartitionGlobalMergeState>(sink, std::move(group_data), hash_bin);
552
+ states.emplace_back(std::move(state));
553
+ }
482
554
  }
555
+ } else {
556
+ // OVER(ORDER BY...)
557
+ // Already sunk into the single global sort, so set up single merge with no data
558
+ sink.bin_groups.resize(1, 1);
559
+ auto state = make_uniq<PartitionGlobalMergeState>(sink);
560
+ states.emplace_back(std::move(state));
483
561
  }
484
562
  }
485
563
 
@@ -1,4 +1,7 @@
1
1
  #include "duckdb/common/types/validity_mask.hpp"
2
+ #include "duckdb/common/limits.hpp"
3
+ #include "duckdb/common/serializer/write_stream.hpp"
4
+ #include "duckdb/common/serializer/read_stream.hpp"
2
5
 
3
6
  namespace duckdb {
4
7
 
@@ -173,4 +176,57 @@ void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset,
173
176
  #endif
174
177
  }
175
178
 
179
+ enum class ValiditySerialization : uint8_t { BITMASK = 0, VALID_VALUES = 1, INVALID_VALUES = 2 };
180
+
181
+ void ValidityMask::Write(WriteStream &writer, idx_t count) {
182
+ auto valid_values = CountValid(count);
183
+ auto invalid_values = count - valid_values;
184
+ auto bitmask_bytes = ValidityMask::ValidityMaskSize(count);
185
+ auto need_u32 = count >= NumericLimits<uint16_t>::Maximum();
186
+ auto bytes_per_value = need_u32 ? sizeof(uint32_t) : sizeof(uint16_t);
187
+ auto valid_value_size = bytes_per_value * valid_values + sizeof(uint32_t);
188
+ auto invalid_value_size = bytes_per_value * invalid_values + sizeof(uint32_t);
189
+ if (valid_value_size < bitmask_bytes || invalid_value_size < bitmask_bytes) {
190
+ auto serialize_valid = valid_value_size < invalid_value_size;
191
+ // serialize (in)valid value indexes as [COUNT][V0][V1][...][VN]
192
+ auto flag = serialize_valid ? ValiditySerialization::VALID_VALUES : ValiditySerialization::INVALID_VALUES;
193
+ writer.Write(flag);
194
+ writer.Write<uint32_t>(MinValue<uint32_t>(valid_values, invalid_values));
195
+ for (idx_t i = 0; i < count; i++) {
196
+ if (RowIsValid(i) == serialize_valid) {
197
+ if (need_u32) {
198
+ writer.Write<uint32_t>(i);
199
+ } else {
200
+ writer.Write<uint16_t>(i);
201
+ }
202
+ }
203
+ }
204
+ } else {
205
+ // serialize the entire bitmask
206
+ writer.Write(ValiditySerialization::BITMASK);
207
+ writer.WriteData(const_data_ptr_cast(GetData()), bitmask_bytes);
208
+ }
209
+ }
210
+
211
+ void ValidityMask::Read(ReadStream &reader, idx_t count) {
212
+ Initialize(count);
213
+ // deserialize the storage type
214
+ auto flag = reader.Read<ValiditySerialization>();
215
+ if (flag == ValiditySerialization::BITMASK) {
216
+ // deserialize the bitmask
217
+ reader.ReadData(data_ptr_cast(GetData()), ValidityMask::ValidityMaskSize(count));
218
+ return;
219
+ }
220
+ auto is_u32 = count >= NumericLimits<uint16_t>::Maximum();
221
+ auto is_valid = flag == ValiditySerialization::VALID_VALUES;
222
+ auto serialize_count = reader.Read<uint32_t>();
223
+ if (is_valid) {
224
+ SetAllInvalid(count);
225
+ }
226
+ for (idx_t i = 0; i < serialize_count; i++) {
227
+ idx_t index = is_u32 ? reader.Read<uint32_t>() : reader.Read<uint16_t>();
228
+ Set(index, is_valid);
229
+ }
230
+ }
231
+
176
232
  } // namespace duckdb
@@ -985,10 +985,14 @@ BlockPointer ART::Serialize(MetadataWriter &writer) {
985
985
  }
986
986
 
987
987
  lock_guard<mutex> l(lock);
988
+ auto &block_manager = table_io_manager.GetIndexBlockManager();
989
+ PartialBlockManager partial_block_manager(block_manager, CheckpointType::FULL_CHECKPOINT);
990
+
988
991
  vector<BlockPointer> allocator_pointers;
989
992
  for (auto &allocator : *allocators) {
990
- allocator_pointers.push_back(allocator->Serialize(writer));
993
+ allocator_pointers.push_back(allocator->Serialize(partial_block_manager, writer));
991
994
  }
995
+ partial_block_manager.FlushPartialBlocks();
992
996
 
993
997
  root_block_pointer = writer.GetBlockPointer();
994
998
  writer.Write(tree);
@@ -26,6 +26,7 @@ void Leaf::New(ART &art, reference<Node> &node, const row_t *row_ids, idx_t coun
26
26
  auto &leaf = Node::RefMutable<Leaf>(art, node, NType::LEAF);
27
27
 
28
28
  leaf.count = MinValue((idx_t)Node::LEAF_SIZE, count);
29
+
29
30
  for (idx_t i = 0; i < leaf.count; i++) {
30
31
  leaf.row_ids[i] = row_ids[copy_count + i];
31
32
  }
@@ -38,6 +39,16 @@ void Leaf::New(ART &art, reference<Node> &node, const row_t *row_ids, idx_t coun
38
39
  }
39
40
  }
40
41
 
42
+ Leaf &Leaf::New(ART &art, Node &node) {
43
+ node = Node::GetAllocator(art, NType::LEAF).New();
44
+ node.SetMetadata(static_cast<uint8_t>(NType::LEAF));
45
+ auto &leaf = Node::RefMutable<Leaf>(art, node, NType::LEAF);
46
+
47
+ leaf.count = 0;
48
+ leaf.ptr.Clear();
49
+ return leaf;
50
+ }
51
+
41
52
  void Leaf::Free(ART &art, Node &node) {
42
53
 
43
54
  Node current_node = node;
@@ -313,13 +324,10 @@ void Leaf::MoveInlinedToLeaf(ART &art, Node &node) {
313
324
 
314
325
  D_ASSERT(node.GetType() == NType::LEAF_INLINED);
315
326
  auto row_id = node.GetRowId();
316
- node = Node::GetAllocator(art, NType::LEAF).New();
317
- node.SetMetadata(static_cast<uint8_t>(NType::LEAF));
327
+ auto &leaf = New(art, node);
318
328
 
319
- auto &leaf = Node::RefMutable<Leaf>(art, node, NType::LEAF);
320
329
  leaf.count = 1;
321
330
  leaf.row_ids[0] = row_id;
322
- leaf.ptr.Clear();
323
331
  }
324
332
 
325
333
  Leaf &Leaf::Append(ART &art, const row_t row_id) {
@@ -328,12 +336,7 @@ Leaf &Leaf::Append(ART &art, const row_t row_id) {
328
336
 
329
337
  // we need a new leaf node
330
338
  if (leaf.get().count == Node::LEAF_SIZE) {
331
- leaf.get().ptr = Node::GetAllocator(art, NType::LEAF).New();
332
- leaf.get().ptr.SetMetadata(static_cast<uint8_t>(NType::LEAF));
333
-
334
- leaf = Node::RefMutable<Leaf>(art, leaf.get().ptr, NType::LEAF);
335
- leaf.get().count = 0;
336
- leaf.get().ptr.Clear();
339
+ leaf = New(art, leaf.get().ptr);
337
340
  }
338
341
 
339
342
  leaf.get().row_ids[leaf.get().count] = row_id;
@@ -15,8 +15,6 @@ Node48 &Node48::New(ART &art, Node &node) {
15
15
  for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) {
16
16
  n48.child_index[i] = Node::EMPTY_MARKER;
17
17
  }
18
-
19
- // necessary for faster child insertion/deletion
20
18
  for (idx_t i = 0; i < Node::NODE_48_CAPACITY; i++) {
21
19
  n48.children[i].Clear();
22
20
  }
@@ -4,9 +4,6 @@
4
4
 
5
5
  namespace duckdb {
6
6
 
7
- constexpr idx_t FixedSizeAllocator::BASE[];
8
- constexpr uint8_t FixedSizeAllocator::SHIFT[];
9
-
10
7
  FixedSizeAllocator::FixedSizeAllocator(const idx_t segment_size, BlockManager &block_manager)
11
8
  : block_manager(block_manager), buffer_manager(block_manager.buffer_manager),
12
9
  metadata_manager(block_manager.GetMetadataManager()), segment_size(segment_size), total_segment_count(0) {
@@ -61,25 +58,36 @@ IndexPointer FixedSizeAllocator::New() {
61
58
  D_ASSERT(buffers.find(buffer_id) != buffers.end());
62
59
  auto &buffer = buffers.find(buffer_id)->second;
63
60
  ValidityMask mask(reinterpret_cast<validity_t *>(buffer.Get()));
61
+
62
+ // zero-initialize the bitmask to avoid leaking memory to disk
63
+ auto data = mask.GetData();
64
+ for (idx_t i = 0; i < bitmask_count; i++) {
65
+ data[i] = 0;
66
+ }
67
+
68
+ // initializing the bitmask of the new buffer
64
69
  mask.SetAllValid(available_segments_per_buffer);
65
70
  }
66
71
 
67
- // return a pointer
72
+ // return a pointer to a free segment
68
73
  D_ASSERT(!buffers_with_free_space.empty());
69
74
  auto buffer_id = uint32_t(*buffers_with_free_space.begin());
70
75
 
71
76
  D_ASSERT(buffers.find(buffer_id) != buffers.end());
72
77
  auto &buffer = buffers.find(buffer_id)->second;
73
- auto bitmask_ptr = reinterpret_cast<validity_t *>(buffer.Get());
74
- ValidityMask mask(bitmask_ptr);
75
- auto offset = GetOffset(mask, buffer.segment_count);
78
+ auto offset = buffer.GetOffset(bitmask_count);
76
79
 
77
- buffer.segment_count++;
78
80
  total_segment_count++;
81
+ buffer.segment_count++;
79
82
  if (buffer.segment_count == available_segments_per_buffer) {
80
83
  buffers_with_free_space.erase(buffer_id);
81
84
  }
82
85
 
86
+ // zero-initialize that segment
87
+ auto buffer_ptr = buffer.Get();
88
+ auto offset_in_buffer = buffer_ptr + offset * segment_size + bitmask_offset;
89
+ memset(offset_in_buffer, 0, segment_size);
90
+
83
91
  return IndexPointer(buffer_id, offset);
84
92
  }
85
93
 
@@ -95,13 +103,14 @@ void FixedSizeAllocator::Free(const IndexPointer ptr) {
95
103
  ValidityMask mask(bitmask_ptr);
96
104
  D_ASSERT(!mask.RowIsValid(offset));
97
105
  mask.SetValid(offset);
98
- buffers_with_free_space.insert(buffer_id);
99
106
 
100
107
  D_ASSERT(total_segment_count > 0);
101
108
  D_ASSERT(buffer.segment_count > 0);
102
109
 
103
- buffer.segment_count--;
110
+ // adjust the allocator fields
111
+ buffers_with_free_space.insert(buffer_id);
104
112
  total_segment_count--;
113
+ buffer.segment_count--;
105
114
  }
106
115
 
107
116
  void FixedSizeAllocator::Reset() {
@@ -123,6 +132,16 @@ idx_t FixedSizeAllocator::GetMemoryUsage() const {
123
132
  return memory_usage;
124
133
  }
125
134
 
135
+ idx_t FixedSizeAllocator::GetUpperBoundBufferId() const {
136
+ idx_t upper_bound_id = 0;
137
+ for (auto &buffer : buffers) {
138
+ if (buffer.first >= upper_bound_id) {
139
+ upper_bound_id = buffer.first + 1;
140
+ }
141
+ }
142
+ return upper_bound_id;
143
+ }
144
+
126
145
  void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
127
146
 
128
147
  D_ASSERT(segment_size == other.segment_size);
@@ -232,10 +251,10 @@ IndexPointer FixedSizeAllocator::VacuumPointer(const IndexPointer ptr) {
232
251
  return new_ptr;
233
252
  }
234
253
 
235
- BlockPointer FixedSizeAllocator::Serialize(MetadataWriter &writer) {
254
+ BlockPointer FixedSizeAllocator::Serialize(PartialBlockManager &partial_block_manager, MetadataWriter &writer) {
236
255
 
237
256
  for (auto &buffer : buffers) {
238
- buffer.second.Serialize();
257
+ buffer.second.Serialize(partial_block_manager, available_segments_per_buffer, segment_size, bitmask_offset);
239
258
  }
240
259
 
241
260
  auto block_pointer = writer.GetBlockPointer();
@@ -245,8 +264,9 @@ BlockPointer FixedSizeAllocator::Serialize(MetadataWriter &writer) {
245
264
 
246
265
  for (auto &buffer : buffers) {
247
266
  writer.Write(buffer.first);
248
- writer.Write(buffer.second.BlockId());
267
+ writer.Write(buffer.second.block_pointer);
249
268
  writer.Write(buffer.second.segment_count);
269
+ writer.Write(buffer.second.allocation_size);
250
270
  }
251
271
  for (auto &buffer_id : buffers_with_free_space) {
252
272
  writer.Write(buffer_id);
@@ -266,63 +286,18 @@ void FixedSizeAllocator::Deserialize(const BlockPointer &block_pointer) {
266
286
 
267
287
  for (idx_t i = 0; i < buffer_count; i++) {
268
288
  auto buffer_id = reader.Read<idx_t>();
269
- auto block_id = reader.Read<block_id_t>();
270
- auto buffer_segment_count = reader.Read<idx_t>();
271
- FixedSizeBuffer new_buffer(block_manager, buffer_segment_count, block_id);
289
+ auto buffer_block_pointer = reader.Read<BlockPointer>();
290
+ auto segment_count = reader.Read<idx_t>();
291
+ auto allocation_size = reader.Read<idx_t>();
292
+ FixedSizeBuffer new_buffer(block_manager, segment_count, allocation_size, buffer_block_pointer);
272
293
  buffers.insert(make_pair(buffer_id, std::move(new_buffer)));
273
- total_segment_count += buffer_segment_count;
294
+ total_segment_count += segment_count;
274
295
  }
275
296
  for (idx_t i = 0; i < buffers_with_free_space_count; i++) {
276
297
  buffers_with_free_space.insert(reader.Read<idx_t>());
277
298
  }
278
299
  }
279
300
 
280
- uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t segment_count) {
281
-
282
- auto data = mask.GetData();
283
-
284
- // fills up a buffer sequentially before searching for free bits
285
- if (mask.RowIsValid(segment_count)) {
286
- mask.SetInvalid(segment_count);
287
- return segment_count;
288
- }
289
-
290
- for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
291
- // get an entry with free bits
292
- if (data[entry_idx] == 0) {
293
- continue;
294
- }
295
-
296
- // find the position of the free bit
297
- auto entry = data[entry_idx];
298
- idx_t first_valid_bit = 0;
299
-
300
- // this loop finds the position of the rightmost set bit in entry and stores it
301
- // in first_valid_bit
302
- for (idx_t i = 0; i < 6; i++) {
303
- // set the left half of the bits of this level to zero and test if the entry is still not zero
304
- if (entry & BASE[i]) {
305
- // first valid bit is in the rightmost s[i] bits
306
- // permanently set the left half of the bits to zero
307
- entry &= BASE[i];
308
- } else {
309
- // first valid bit is in the leftmost s[i] bits
310
- // shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
311
- entry >>= SHIFT[i];
312
- first_valid_bit += SHIFT[i];
313
- }
314
- }
315
- D_ASSERT(entry);
316
-
317
- auto prev_bits = entry_idx * sizeof(validity_t) * 8;
318
- D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
319
- mask.SetInvalid(prev_bits + first_valid_bit);
320
- return (prev_bits + first_valid_bit);
321
- }
322
-
323
- throw InternalException("Invalid bitmask for FixedSizeAllocator");
324
- }
325
-
326
301
  idx_t FixedSizeAllocator::GetAvailableBufferId() const {
327
302
  idx_t buffer_id = buffers.size();
328
303
  while (buffers.find(buffer_id) != buffers.end()) {
@@ -332,14 +307,4 @@ idx_t FixedSizeAllocator::GetAvailableBufferId() const {
332
307
  return buffer_id;
333
308
  }
334
309
 
335
- idx_t FixedSizeAllocator::GetUpperBoundBufferId() const {
336
- idx_t upper_bound_id = 0;
337
- for (auto &buffer : buffers) {
338
- if (buffer.first >= upper_bound_id) {
339
- upper_bound_id = buffer.first + 1;
340
- }
341
- }
342
- return upper_bound_id;
343
- }
344
-
345
310
  } // namespace duckdb