duckdb 0.8.1-dev287.0 → 0.8.1-dev327.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/parquet-extension.cpp +24 -0
  3. package/src/duckdb/src/common/types/timestamp.cpp +37 -1
  4. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +18 -12
  5. package/src/duckdb/src/execution/index/art/art.cpp +80 -7
  6. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +20 -1
  7. package/src/duckdb/src/execution/index/art/leaf.cpp +10 -11
  8. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +10 -0
  9. package/src/duckdb/src/execution/index/art/node.cpp +47 -35
  10. package/src/duckdb/src/execution/index/art/node16.cpp +3 -0
  11. package/src/duckdb/src/execution/index/art/node256.cpp +1 -0
  12. package/src/duckdb/src/execution/index/art/node4.cpp +3 -0
  13. package/src/duckdb/src/execution/index/art/node48.cpp +2 -0
  14. package/src/duckdb/src/execution/index/art/prefix.cpp +2 -0
  15. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +29 -3
  16. package/src/duckdb/src/function/table/read_csv.cpp +2 -0
  17. package/src/duckdb/src/function/table/repeat.cpp +3 -0
  18. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  19. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +4 -14
  20. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +10 -4
  21. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +3 -0
  22. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +1 -1
  23. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +2 -0
  24. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +13 -3
  25. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
  26. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +0 -2
  27. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/byte_reader.hpp +4 -0
  28. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +13 -13
  29. package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -2
  30. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +0 -6
  31. package/src/duckdb/src/parser/parsed_data/create_info.cpp +0 -3
  32. package/src/duckdb/src/parser/transform/helpers/nodetype_to_string.cpp +0 -2
  33. package/src/duckdb/src/parser/transform/statement/transform_drop.cpp +0 -3
  34. package/src/duckdb/src/parser/transformer.cpp +0 -2
  35. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +0 -27
  36. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +0 -25
  37. package/src/duckdb/src/planner/operator/logical_pivot.cpp +14 -2
  38. package/src/duckdb/src/storage/index.cpp +13 -0
  39. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +0 -14
  40. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12828 -12956
  41. package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3 -0
  42. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +4 -0
  43. package/src/duckdb/ub_src_parser_transform_statement.cpp +0 -2
  44. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_database_info.hpp +0 -46
  45. package/src/duckdb/src/parser/transform/statement/transform_create_database.cpp +0 -27
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.1-dev287.0",
5
+ "version": "0.8.1-dev327.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -665,6 +665,28 @@ unique_ptr<LocalFunctionData> ParquetWriteInitializeLocal(ExecutionContext &cont
665
665
  return make_uniq<ParquetWriteLocalState>(context.client, bind_data.sql_types);
666
666
  }
667
667
 
668
+ // LCOV_EXCL_START
669
+ static void ParquetCopySerialize(FieldWriter &writer, const FunctionData &bind_data_p, const CopyFunction &function) {
670
+ auto &bind_data = bind_data_p.Cast<ParquetWriteBindData>();
671
+ writer.WriteRegularSerializableList<LogicalType>(bind_data.sql_types);
672
+ writer.WriteList<string>(bind_data.column_names);
673
+ writer.WriteField<duckdb_parquet::format::CompressionCodec::type>(bind_data.codec);
674
+ writer.WriteField<idx_t>(bind_data.row_group_size);
675
+ }
676
+
677
+ static unique_ptr<FunctionData> ParquetCopyDeserialize(ClientContext &context, FieldReader &reader,
678
+ CopyFunction &function) {
679
+ unique_ptr<ParquetWriteBindData> data = make_uniq<ParquetWriteBindData>();
680
+
681
+ data->sql_types = reader.ReadRequiredSerializableList<LogicalType, LogicalType>();
682
+ data->column_names = reader.ReadRequiredList<string>();
683
+ data->codec = reader.ReadRequired<duckdb_parquet::format::CompressionCodec::type>();
684
+ data->row_group_size = reader.ReadRequired<idx_t>();
685
+
686
+ return std::move(data);
687
+ }
688
+ // LCOV_EXCL_STOP
689
+
668
690
  //===--------------------------------------------------------------------===//
669
691
  // Execution Mode
670
692
  //===--------------------------------------------------------------------===//
@@ -764,6 +786,8 @@ void ParquetExtension::Load(DuckDB &db) {
764
786
  function.prepare_batch = ParquetWritePrepareBatch;
765
787
  function.flush_batch = ParquetWriteFlushBatch;
766
788
  function.desired_batch_size = ParquetWriteDesiredBatchSize;
789
+ function.serialize = ParquetCopySerialize;
790
+ function.deserialize = ParquetCopyDeserialize;
767
791
 
768
792
  function.extension = "parquet";
769
793
  ExtensionUtil::RegisterFunction(db_instance, function);
@@ -8,6 +8,7 @@
8
8
  #include "duckdb/common/chrono.hpp"
9
9
  #include "duckdb/common/operator/add.hpp"
10
10
  #include "duckdb/common/operator/multiply.hpp"
11
+ #include "duckdb/common/operator/subtract.hpp"
11
12
  #include "duckdb/common/limits.hpp"
12
13
  #include <ctime>
13
14
 
@@ -21,6 +22,38 @@ static_assert(sizeof(timestamp_t) == sizeof(int64_t), "timestamp_t was padded");
21
22
  // Z is optional
22
23
  // ISO 8601
23
24
 
25
+ // arithmetic operators
26
+ timestamp_t timestamp_t::operator+(const double &value) const {
27
+ timestamp_t result;
28
+ if (!TryAddOperator::Operation(this->value, int64_t(value), result.value)) {
29
+ throw OutOfRangeException("Overflow in timestamp addition");
30
+ }
31
+ return result;
32
+ }
33
+
34
+ int64_t timestamp_t::operator-(const timestamp_t &other) const {
35
+ int64_t result;
36
+ if (!TrySubtractOperator::Operation(value, int64_t(other.value), result)) {
37
+ throw OutOfRangeException("Overflow in timestamp subtraction");
38
+ }
39
+ return result;
40
+ }
41
+
42
+ // in-place operators
43
+ timestamp_t &timestamp_t::operator+=(const int64_t &delta) {
44
+ if (!TryAddOperator::Operation(value, delta, value)) {
45
+ throw OutOfRangeException("Overflow in timestamp increment");
46
+ }
47
+ return *this;
48
+ }
49
+
50
+ timestamp_t &timestamp_t::operator-=(const int64_t &delta) {
51
+ if (!TrySubtractOperator::Operation(value, delta, value)) {
52
+ throw OutOfRangeException("Overflow in timestamp decrement");
53
+ }
54
+ return *this;
55
+ }
56
+
24
57
  bool Timestamp::TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &result, bool &has_offset, string_t &tz) {
25
58
  idx_t pos;
26
59
  date_t date;
@@ -59,7 +92,10 @@ bool Timestamp::TryConvertTimestampTZ(const char *str, idx_t len, timestamp_t &r
59
92
  pos++;
60
93
  has_offset = true;
61
94
  } else if (Timestamp::TryParseUTCOffset(str, pos, len, hour_offset, minute_offset)) {
62
- result -= hour_offset * Interval::MICROS_PER_HOUR + minute_offset * Interval::MICROS_PER_MINUTE;
95
+ const int64_t delta = hour_offset * Interval::MICROS_PER_HOUR + minute_offset * Interval::MICROS_PER_MINUTE;
96
+ if (!TrySubtractOperator::Operation(result.value, delta, result.value)) {
97
+ return false;
98
+ }
63
99
  has_offset = true;
64
100
  } else {
65
101
  // Parse a time zone: / [A-Za-z0-9/_]+/
@@ -1153,16 +1153,27 @@ AggregateFunction GetMedianAbsoluteDeviationAggregateFunction(const LogicalType
1153
1153
 
1154
1154
  static void QuantileSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const AggregateFunction &function) {
1155
1155
  D_ASSERT(bind_data_p);
1156
- throw NotImplementedException("FIXME: serializing quantiles is not supported right now");
1157
- //
1158
- // auto bind_data = (QuantileBindData *)bind_data_p;
1159
- // writer.WriteList<Value>(bind_data->quantiles);
1156
+ auto bind_data = dynamic_cast<const QuantileBindData *>(bind_data_p);
1157
+ D_ASSERT(bind_data);
1158
+ writer.WriteRegularSerializableList<Value>(bind_data->quantiles);
1159
+ writer.WriteList<idx_t>(bind_data->order);
1160
+ writer.WriteField<bool>(bind_data->desc);
1160
1161
  }
1161
1162
 
1162
1163
  unique_ptr<FunctionData> QuantileDeserialize(ClientContext &context, FieldReader &reader,
1163
1164
  AggregateFunction &bound_function) {
1164
- auto quantiles = reader.ReadRequiredList<Value>();
1165
- return make_uniq<QuantileBindData>(std::move(quantiles));
1165
+ auto quantiles = reader.ReadRequiredSerializableList<Value, Value>();
1166
+ auto bind_data = make_uniq<QuantileBindData>(quantiles);
1167
+ bind_data->quantiles = quantiles;
1168
+ bind_data->order = reader.ReadRequiredList<idx_t>();
1169
+ bind_data->desc = reader.ReadRequired<bool>();
1170
+
1171
+ return std::move(bind_data);
1172
+ }
1173
+
1174
+ static void QuantileDecimalSerialize(FieldWriter &writer, const FunctionData *bind_data_p,
1175
+ const AggregateFunction &function) {
1176
+ throw NotImplementedException("FIXME: serializing quantiles with decimals is not supported right now");
1166
1177
  }
1167
1178
 
1168
1179
  unique_ptr<FunctionData> BindMedian(ClientContext &context, AggregateFunction &function,
@@ -1176,7 +1187,7 @@ unique_ptr<FunctionData> BindMedianDecimal(ClientContext &context, AggregateFunc
1176
1187
 
1177
1188
  function = GetDiscreteQuantileAggregateFunction(arguments[0]->return_type);
1178
1189
  function.name = "median";
1179
- function.serialize = QuantileSerialize;
1190
+ function.serialize = QuantileDecimalSerialize;
1180
1191
  function.deserialize = QuantileDeserialize;
1181
1192
  function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1182
1193
  return bind_data;
@@ -1227,11 +1238,6 @@ unique_ptr<FunctionData> BindQuantile(ClientContext &context, AggregateFunction
1227
1238
  return make_uniq<QuantileBindData>(quantiles);
1228
1239
  }
1229
1240
 
1230
- static void QuantileDecimalSerialize(FieldWriter &writer, const FunctionData *bind_data_p,
1231
- const AggregateFunction &function) {
1232
- throw NotImplementedException("FIXME: serializing quantiles with decimals is not supported right now");
1233
- }
1234
-
1235
1241
  unique_ptr<FunctionData> BindDiscreteQuantileDecimal(ClientContext &context, AggregateFunction &function,
1236
1242
  vector<unique_ptr<Expression>> &arguments) {
1237
1243
  auto bind_data = BindQuantile(context, function, arguments);
@@ -18,7 +18,6 @@
18
18
  #include "duckdb/storage/table/scan_state.hpp"
19
19
 
20
20
  #include <algorithm>
21
- #include <cstring>
22
21
 
23
22
  namespace duckdb {
24
23
 
@@ -347,7 +346,31 @@ bool ART::ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_ide
347
346
 
348
347
  auto key_section = KeySection(0, count - 1, 0, 0);
349
348
  auto has_constraint = IsUnique();
350
- return Construct(*this, keys, row_ids, *this->tree, key_section, has_constraint);
349
+ if (!Construct(*this, keys, row_ids, *this->tree, key_section, has_constraint)) {
350
+ return false;
351
+ }
352
+
353
+ #ifdef DEBUG
354
+ D_ASSERT(!VerifyAndToStringInternal(true).empty());
355
+ for (idx_t i = 0; i < count; i++) {
356
+ D_ASSERT(!keys[i].Empty());
357
+ auto leaf_node = Lookup(*tree, keys[i], 0);
358
+ D_ASSERT(leaf_node.IsSet());
359
+ auto &leaf = Leaf::Get(*this, leaf_node);
360
+
361
+ if (leaf.IsInlined()) {
362
+ D_ASSERT(row_ids[i] == leaf.row_ids.inlined);
363
+ continue;
364
+ }
365
+
366
+ D_ASSERT(leaf.row_ids.ptr.IsSet());
367
+ Node leaf_segment = leaf.row_ids.ptr;
368
+ auto position = leaf.FindRowId(*this, leaf_segment, row_ids[i]);
369
+ D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
370
+ }
371
+ #endif
372
+
373
+ return true;
351
374
  }
352
375
 
353
376
  //===--------------------------------------------------------------------===//
@@ -397,6 +420,29 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
397
420
  return PreservedError(ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicate key \"%s\"",
398
421
  AppendRowError(input, failed_index)));
399
422
  }
423
+
424
+ #ifdef DEBUG
425
+ for (idx_t i = 0; i < input.size(); i++) {
426
+ if (keys[i].Empty()) {
427
+ continue;
428
+ }
429
+
430
+ auto leaf_node = Lookup(*tree, keys[i], 0);
431
+ D_ASSERT(leaf_node.IsSet());
432
+ auto &leaf = Leaf::Get(*this, leaf_node);
433
+
434
+ if (leaf.IsInlined()) {
435
+ D_ASSERT(row_identifiers[i] == leaf.row_ids.inlined);
436
+ continue;
437
+ }
438
+
439
+ D_ASSERT(leaf.row_ids.ptr.IsSet());
440
+ Node leaf_segment = leaf.row_ids.ptr;
441
+ auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
442
+ D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
443
+ }
444
+ #endif
445
+
400
446
  return PreservedError();
401
447
  }
402
448
 
@@ -535,16 +581,31 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
535
581
  continue;
536
582
  }
537
583
  Erase(*tree, keys[i], 0, row_identifiers[i]);
584
+ }
585
+
538
586
  #ifdef DEBUG
587
+ // verify that we removed all row IDs
588
+ for (idx_t i = 0; i < input.size(); i++) {
589
+ if (keys[i].Empty()) {
590
+ continue;
591
+ }
592
+
539
593
  auto node = Lookup(*tree, keys[i], 0);
540
594
  if (node.IsSet()) {
541
595
  auto &leaf = Leaf::Get(*this, node);
542
- for (idx_t k = 0; k < leaf.count; k++) {
543
- D_ASSERT(leaf.GetRowId(*this, k) != row_identifiers[i]);
596
+
597
+ if (leaf.IsInlined()) {
598
+ D_ASSERT(row_identifiers[i] != leaf.row_ids.inlined);
599
+ continue;
544
600
  }
601
+
602
+ D_ASSERT(leaf.row_ids.ptr.IsSet());
603
+ Node leaf_segment = leaf.row_ids.ptr;
604
+ auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
605
+ D_ASSERT(position == (uint32_t)DConstants::INVALID_INDEX);
545
606
  }
546
- #endif
547
607
  }
608
+ #endif
548
609
  }
549
610
 
550
611
  void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id) {
@@ -1022,6 +1083,10 @@ void ART::Vacuum(IndexLock &state) {
1022
1083
 
1023
1084
  // finalize the vacuum operation
1024
1085
  FinalizeVacuum(flags);
1086
+
1087
+ for (auto &allocator : allocators) {
1088
+ allocator->Verify();
1089
+ }
1025
1090
  }
1026
1091
 
1027
1092
  //===--------------------------------------------------------------------===//
@@ -1059,6 +1124,10 @@ bool ART::MergeIndexes(IndexLock &state, Index &other_index) {
1059
1124
  if (!tree->Merge(*this, *other_art.tree)) {
1060
1125
  return false;
1061
1126
  }
1127
+
1128
+ for (auto &allocator : allocators) {
1129
+ allocator->Verify();
1130
+ }
1062
1131
  return true;
1063
1132
  }
1064
1133
 
@@ -1066,9 +1135,13 @@ bool ART::MergeIndexes(IndexLock &state, Index &other_index) {
1066
1135
  // Utility
1067
1136
  //===--------------------------------------------------------------------===//
1068
1137
 
1069
- string ART::ToString() {
1138
+ string ART::VerifyAndToString(IndexLock &state, const bool only_verify) {
1139
+ return VerifyAndToStringInternal(only_verify);
1140
+ }
1141
+
1142
+ string ART::VerifyAndToStringInternal(const bool only_verify) {
1070
1143
  if (tree->IsSet()) {
1071
- return tree->ToString(*this);
1144
+ return "ART: " + tree->VerifyAndToString(*this, only_verify);
1072
1145
  }
1073
1146
  return "[empty]";
1074
1147
  }
@@ -126,13 +126,20 @@ void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
126
126
 
127
127
  bool FixedSizeAllocator::InitializeVacuum() {
128
128
 
129
+ if (total_allocations == 0) {
130
+ Reset();
131
+ return false;
132
+ }
133
+
129
134
  auto total_available_allocations = allocations_per_buffer * buffers.size();
135
+ D_ASSERT(total_available_allocations >= total_allocations);
130
136
  auto total_free_positions = total_available_allocations - total_allocations;
131
137
 
132
138
  // vacuum_count buffers can be freed
133
- auto vacuum_count = total_free_positions / allocations_per_buffer / 2;
139
+ auto vacuum_count = total_free_positions / allocations_per_buffer;
134
140
 
135
141
  // calculate the vacuum threshold adaptively
142
+ D_ASSERT(vacuum_count < buffers.size());
136
143
  idx_t memory_usage = GetMemoryUsage();
137
144
  idx_t excess_memory_usage = vacuum_count * BUFFER_ALLOC_SIZE;
138
145
  auto excess_percentage = (double)excess_memory_usage / (double)memory_usage;
@@ -171,10 +178,22 @@ SwizzleablePointer FixedSizeAllocator::VacuumPointer(const SwizzleablePointer pt
171
178
  // buffer after the vacuum operation
172
179
 
173
180
  auto new_ptr = New();
181
+
182
+ // new increases the allocation count
183
+ total_allocations--;
184
+
174
185
  memcpy(Get(new_ptr), Get(ptr), allocation_size);
175
186
  return new_ptr;
176
187
  }
177
188
 
189
+ void FixedSizeAllocator::Verify() const {
190
+ #ifdef DEBUG
191
+ auto total_available_allocations = allocations_per_buffer * buffers.size();
192
+ D_ASSERT(total_available_allocations >= total_allocations);
193
+ D_ASSERT(buffers.size() >= buffers_with_free_space.size());
194
+ #endif
195
+ }
196
+
178
197
  uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t allocation_count) {
179
198
 
180
199
  auto data = mask.GetData();
@@ -60,16 +60,10 @@ void Leaf::Free(ART &art, Node &node) {
60
60
  D_ASSERT(node.IsSet());
61
61
  D_ASSERT(!node.IsSwizzled());
62
62
 
63
+ // free leaf segments
63
64
  auto &leaf = Leaf::Get(art, node);
64
-
65
- // delete all leaf segments
66
65
  if (!leaf.IsInlined()) {
67
- auto ptr = leaf.row_ids.ptr;
68
- while (ptr.IsSet()) {
69
- auto next_ptr = LeafSegment::Get(art, ptr).next;
70
- Node::Free(art, ptr);
71
- ptr = next_ptr;
72
- }
66
+ Node::Free(art, leaf.row_ids.ptr);
73
67
  }
74
68
  }
75
69
 
@@ -275,10 +269,10 @@ uint32_t Leaf::FindRowId(const ART &art, Node &ptr, const row_t row_id) const {
275
269
  return (uint32_t)DConstants::INVALID_INDEX;
276
270
  }
277
271
 
278
- string Leaf::ToString(const ART &art) const {
272
+ string Leaf::VerifyAndToString(const ART &art, const bool only_verify) const {
279
273
 
280
274
  if (IsInlined()) {
281
- return "Leaf (" + to_string(count) + "): [" + to_string(row_ids.inlined) + "]";
275
+ return only_verify ? "" : "Leaf [count: 1, row ID: " + to_string(row_ids.inlined) + "]";
282
276
  }
283
277
 
284
278
  auto ptr = row_ids.ptr;
@@ -296,7 +290,10 @@ string Leaf::ToString(const ART &art) const {
296
290
  remaining -= to_string_count;
297
291
  ptr = segment.next;
298
292
  }
299
- return "Leaf (" + to_string(this_count) + ", " + to_string(count) + "): [" + str + "] \n";
293
+
294
+ D_ASSERT(remaining == 0);
295
+ D_ASSERT(this_count == count);
296
+ return only_verify ? "" : "Leaf [count: " + to_string(count) + ", row IDs: " + str + "] \n";
300
297
  }
301
298
 
302
299
  BlockPointer Leaf::Serialize(const ART &art, MetaBlockWriter &writer) const {
@@ -366,6 +363,7 @@ void Leaf::Vacuum(ART &art) {
366
363
  auto &allocator = Node::GetAllocator(art, NType::LEAF_SEGMENT);
367
364
  if (allocator.NeedsVacuum(row_ids.ptr)) {
368
365
  row_ids.ptr.SetPtr(allocator.VacuumPointer(row_ids.ptr));
366
+ row_ids.ptr.type = (uint8_t)NType::LEAF_SEGMENT;
369
367
  }
370
368
 
371
369
  auto ptr = row_ids.ptr;
@@ -374,6 +372,7 @@ void Leaf::Vacuum(ART &art) {
374
372
  ptr = segment.next;
375
373
  if (ptr.IsSet() && allocator.NeedsVacuum(ptr)) {
376
374
  segment.next.SetPtr(allocator.VacuumPointer(ptr));
375
+ segment.next.type = (uint8_t)NType::LEAF_SEGMENT;
377
376
  ptr = segment.next;
378
377
  }
379
378
  }
@@ -15,6 +15,16 @@ LeafSegment &LeafSegment::New(ART &art, Node &node) {
15
15
  return segment;
16
16
  }
17
17
 
18
+ void LeafSegment::Free(ART &art, Node &node) {
19
+
20
+ D_ASSERT(node.IsSet());
21
+ D_ASSERT(!node.IsSwizzled());
22
+
23
+ // free next segment
24
+ auto next_segment = LeafSegment::Get(art, node).next;
25
+ Node::Free(art, next_segment);
26
+ }
27
+
18
28
  LeafSegment &LeafSegment::Append(ART &art, uint32_t &count, const row_t row_id) {
19
29
 
20
30
  reference<LeafSegment> segment(*this);
@@ -69,6 +69,9 @@ void Node::Free(ART &art, Node &node) {
69
69
 
70
70
  // free the prefixes and children of the nodes
71
71
  switch (type) {
72
+ case NType::LEAF_SEGMENT:
73
+ LeafSegment::Free(art, node);
74
+ break;
72
75
  case NType::LEAF:
73
76
  Leaf::Free(art, node);
74
77
  break;
@@ -159,65 +162,57 @@ void Node::DeleteChild(ART &art, Node &node, const uint8_t byte) {
159
162
 
160
163
  optional_ptr<Node> Node::GetChild(ART &art, const uint8_t byte) const {
161
164
 
162
- D_ASSERT(!IsSwizzled());
165
+ D_ASSERT(IsSet() && !IsSwizzled());
163
166
 
164
167
  optional_ptr<Node> child;
165
168
  switch (DecodeARTNodeType()) {
166
- case NType::NODE_4: {
169
+ case NType::NODE_4:
167
170
  child = Node4::Get(art, *this).GetChild(byte);
168
171
  break;
169
- }
170
- case NType::NODE_16: {
172
+ case NType::NODE_16:
171
173
  child = Node16::Get(art, *this).GetChild(byte);
172
174
  break;
173
- }
174
- case NType::NODE_48: {
175
+ case NType::NODE_48:
175
176
  child = Node48::Get(art, *this).GetChild(byte);
176
177
  break;
177
- }
178
- case NType::NODE_256: {
178
+ case NType::NODE_256:
179
179
  child = Node256::Get(art, *this).GetChild(byte);
180
180
  break;
181
- }
182
181
  default:
183
182
  throw InternalException("Invalid node type for GetChild.");
184
183
  }
185
184
 
186
- // unswizzle the ART node before returning it
185
+ // deserialize the ART node before returning it
187
186
  if (child && child->IsSwizzled()) {
188
187
  child->Deserialize(art);
189
188
  }
190
189
  return child;
191
190
  }
192
191
 
193
- optional_ptr<Node> Node::GetNextChild(ART &art, uint8_t &byte) const {
192
+ optional_ptr<Node> Node::GetNextChild(ART &art, uint8_t &byte, const bool deserialize) const {
194
193
 
195
- D_ASSERT(!IsSwizzled());
194
+ D_ASSERT(IsSet() && !IsSwizzled());
196
195
 
197
196
  optional_ptr<Node> child;
198
197
  switch (DecodeARTNodeType()) {
199
- case NType::NODE_4: {
198
+ case NType::NODE_4:
200
199
  child = Node4::Get(art, *this).GetNextChild(byte);
201
200
  break;
202
- }
203
- case NType::NODE_16: {
201
+ case NType::NODE_16:
204
202
  child = Node16::Get(art, *this).GetNextChild(byte);
205
203
  break;
206
- }
207
- case NType::NODE_48: {
204
+ case NType::NODE_48:
208
205
  child = Node48::Get(art, *this).GetNextChild(byte);
209
206
  break;
210
- }
211
- case NType::NODE_256: {
207
+ case NType::NODE_256:
212
208
  child = Node256::Get(art, *this).GetNextChild(byte);
213
209
  break;
214
- }
215
210
  default:
216
211
  throw InternalException("Invalid node type for GetNextChild.");
217
212
  }
218
213
 
219
- // unswizzle the ART node before returning it
220
- if (child && child->IsSwizzled()) {
214
+ // deserialize the ART node before returning it
215
+ if (child && deserialize && child->IsSwizzled()) {
221
216
  child->Deserialize(art);
222
217
  }
223
218
  return child;
@@ -260,10 +255,11 @@ void Node::Deserialize(ART &art) {
260
255
  type = reader.Read<uint8_t>();
261
256
  swizzle_flag = 0;
262
257
 
263
- auto type = DecodeARTNodeType();
264
- SetPtr(Node::GetAllocator(art, type).New());
258
+ auto decoded_type = DecodeARTNodeType();
259
+ SetPtr(Node::GetAllocator(art, decoded_type).New());
260
+ type = (uint8_t)decoded_type;
265
261
 
266
- switch (type) {
262
+ switch (decoded_type) {
267
263
  case NType::LEAF:
268
264
  return Leaf::Get(art, *this).Deserialize(art, reader);
269
265
  case NType::NODE_4:
@@ -283,28 +279,43 @@ void Node::Deserialize(ART &art) {
283
279
  // Utility
284
280
  //===--------------------------------------------------------------------===//
285
281
 
286
- string Node::ToString(ART &art) const {
282
+ string Node::VerifyAndToString(ART &art, const bool only_verify) {
287
283
 
288
- D_ASSERT(!IsSwizzled());
284
+ D_ASSERT(IsSet());
285
+ if (IsSwizzled()) {
286
+ return only_verify ? "" : "swizzled";
287
+ }
289
288
 
290
- if (DecodeARTNodeType() == NType::LEAF) {
291
- return Leaf::Get(art, *this).ToString(art);
289
+ auto type = DecodeARTNodeType();
290
+ if (type == NType::LEAF) {
291
+ auto str = Leaf::Get(art, *this).VerifyAndToString(art, only_verify);
292
+ return only_verify ? "" : "\n" + str;
292
293
  }
293
294
 
294
295
  string str = "Node" + to_string(GetCapacity()) + ": [";
295
296
 
297
+ idx_t child_count = 0;
296
298
  uint8_t byte = 0;
297
- auto child = GetNextChild(art, byte);
299
+ auto child = GetNextChild(art, byte, false);
298
300
  while (child) {
299
- str += "(" + to_string(byte) + ", " + child->ToString(art) + ")";
300
- if (byte == NumericLimits<uint8_t>::Maximum()) {
301
- break;
301
+ child_count++;
302
+ if (child->IsSwizzled()) {
303
+ if (!only_verify) {
304
+ str += "(swizzled)";
305
+ }
306
+ } else {
307
+ str += "(" + to_string(byte) + ", " + child->VerifyAndToString(art, only_verify) + ")";
308
+ if (byte == NumericLimits<uint8_t>::Maximum()) {
309
+ break;
310
+ }
302
311
  }
303
312
  byte++;
304
- child = GetNextChild(art, byte);
313
+ child = GetNextChild(art, byte, false);
305
314
  }
306
315
 
307
- return str + "]";
316
+ // ensure that the child count is at least two
317
+ D_ASSERT(child_count > 1);
318
+ return only_verify ? "" : "\n" + str + "]";
308
319
  }
309
320
 
310
321
  idx_t Node::GetCapacity() const {
@@ -567,6 +578,7 @@ void Node::Vacuum(ART &art, Node &node, const ARTFlags &flags) {
567
578
  needs_vacuum = flags.vacuum_flags[node.type - 1] && allocator.NeedsVacuum(node);
568
579
  if (needs_vacuum) {
569
580
  node.SetPtr(allocator.VacuumPointer(node));
581
+ node.type = (uint8_t)type;
570
582
  }
571
583
 
572
584
  switch (type) {
@@ -60,6 +60,7 @@ Node16 &Node16::ShrinkNode48(ART &art, Node &node16, Node &node48) {
60
60
  n16.prefix.Move(n48.prefix);
61
61
 
62
62
  for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) {
63
+ D_ASSERT(n16.count <= Node::NODE_16_CAPACITY);
63
64
  if (n48.child_index[i] != Node::EMPTY_MARKER) {
64
65
  n16.key[n16.count] = i;
65
66
  n16.children[n16.count] = n48.children[n48.child_index[i]];
@@ -160,6 +161,7 @@ optional_ptr<Node> Node16::GetChild(const uint8_t byte) {
160
161
 
161
162
  for (idx_t i = 0; i < count; i++) {
162
163
  if (key[i] == byte) {
164
+ D_ASSERT(children[i].IsSet());
163
165
  return &children[i];
164
166
  }
165
167
  }
@@ -171,6 +173,7 @@ optional_ptr<Node> Node16::GetNextChild(uint8_t &byte) {
171
173
  for (idx_t i = 0; i < count; i++) {
172
174
  if (key[i] >= byte) {
173
175
  byte = key[i];
176
+ D_ASSERT(children[i].IsSet());
174
177
  return &children[i];
175
178
  }
176
179
  }
@@ -83,6 +83,7 @@ void Node256::InsertChild(ART &art, Node &node, const uint8_t byte, const Node c
83
83
  D_ASSERT(!n256.children[byte].IsSet());
84
84
 
85
85
  n256.count++;
86
+ D_ASSERT(n256.count <= Node::NODE_256_CAPACITY);
86
87
  n256.children[byte] = child;
87
88
  }
88
89
 
@@ -37,6 +37,7 @@ Node4 &Node4::ShrinkNode16(ART &art, Node &node4, Node &node16) {
37
37
  auto &n4 = Node4::New(art, node4);
38
38
  auto &n16 = Node16::Get(art, node16);
39
39
 
40
+ D_ASSERT(n16.count <= Node::NODE_4_CAPACITY);
40
41
  n4.count = n16.count;
41
42
  n4.prefix.Move(n16.prefix);
42
43
 
@@ -145,6 +146,7 @@ optional_ptr<Node> Node4::GetChild(const uint8_t byte) {
145
146
 
146
147
  for (idx_t i = 0; i < count; i++) {
147
148
  if (key[i] == byte) {
149
+ D_ASSERT(children[i].IsSet());
148
150
  return &children[i];
149
151
  }
150
152
  }
@@ -156,6 +158,7 @@ optional_ptr<Node> Node4::GetNextChild(uint8_t &byte) {
156
158
  for (idx_t i = 0; i < count; i++) {
157
159
  if (key[i] >= byte) {
158
160
  byte = key[i];
161
+ D_ASSERT(children[i].IsSet());
159
162
  return &children[i];
160
163
  }
161
164
  }
@@ -85,6 +85,7 @@ Node48 &Node48::ShrinkNode256(ART &art, Node &node48, Node &node256) {
85
85
  n48.prefix.Move(n256.prefix);
86
86
 
87
87
  for (idx_t i = 0; i < Node::NODE_256_CAPACITY; i++) {
88
+ D_ASSERT(n48.count <= Node::NODE_48_CAPACITY);
88
89
  if (n256.children[i].IsSet()) {
89
90
  n48.child_index[i] = n48.count;
90
91
  n48.children[n48.count] = n256.children[i];
@@ -168,6 +169,7 @@ optional_ptr<Node> Node48::GetNextChild(uint8_t &byte) {
168
169
  for (idx_t i = byte; i < Node::NODE_256_CAPACITY; i++) {
169
170
  if (child_index[i] != Node::EMPTY_MARKER) {
170
171
  byte = i;
172
+ D_ASSERT(children[child_index[i]].IsSet());
171
173
  return &children[child_index[i]];
172
174
  }
173
175
  }
@@ -427,6 +427,7 @@ void Prefix::Vacuum(ART &art) {
427
427
  auto &allocator = Node::GetAllocator(art, NType::PREFIX_SEGMENT);
428
428
  if (allocator.NeedsVacuum(data.ptr)) {
429
429
  data.ptr.SetPtr(allocator.VacuumPointer(data.ptr));
430
+ data.ptr.type = (uint8_t)NType::PREFIX_SEGMENT;
430
431
  }
431
432
 
432
433
  auto ptr = data.ptr;
@@ -435,6 +436,7 @@ void Prefix::Vacuum(ART &art) {
435
436
  ptr = segment.next;
436
437
  if (ptr.IsSet() && allocator.NeedsVacuum(ptr)) {
437
438
  segment.next.SetPtr(allocator.VacuumPointer(ptr));
439
+ segment.next.type = (uint8_t)NType::PREFIX_SEGMENT;
438
440
  ptr = segment.next;
439
441
  }
440
442
  }