duckdb 0.8.2-dev1764.0 → 0.8.2-dev1859.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +7 -0
  2. package/binding.gyp +1 -0
  3. package/package.json +1 -1
  4. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
  5. package/src/duckdb/extension/parquet/parquet_extension.cpp +38 -22
  6. package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -4
  7. package/src/duckdb/src/common/constants.cpp +2 -1
  8. package/src/duckdb/src/common/enum_util.cpp +5 -5
  9. package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
  10. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  11. package/src/duckdb/src/common/types/column/column_data_collection.cpp +8 -0
  12. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +5 -0
  13. package/src/duckdb/src/common/types/string_heap.cpp +4 -0
  14. package/src/duckdb/src/core_functions/function_list.cpp +2 -0
  15. package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
  16. package/src/duckdb/src/execution/index/art/art.cpp +49 -108
  17. package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
  18. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +10 -14
  19. package/src/duckdb/src/execution/index/art/iterator.cpp +13 -19
  20. package/src/duckdb/src/execution/index/art/leaf.cpp +290 -241
  21. package/src/duckdb/src/execution/index/art/node.cpp +104 -95
  22. package/src/duckdb/src/execution/index/art/node16.cpp +6 -6
  23. package/src/duckdb/src/execution/index/art/node256.cpp +6 -6
  24. package/src/duckdb/src/execution/index/art/node4.cpp +6 -6
  25. package/src/duckdb/src/execution/index/art/node48.cpp +6 -6
  26. package/src/duckdb/src/execution/index/art/prefix.cpp +49 -39
  27. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +34 -1175
  28. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +4 -14
  29. package/src/duckdb/src/execution/window_executor.cpp +1280 -0
  30. package/src/duckdb/src/execution/window_segment_tree.cpp +224 -117
  31. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  32. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  33. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  34. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
  35. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +10 -0
  36. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +3 -0
  37. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -0
  38. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
  39. package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
  40. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
  42. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
  43. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -2
  44. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +43 -40
  45. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +119 -40
  46. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +1 -0
  47. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +1 -0
  49. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
  50. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +4 -2
  51. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
  52. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +60 -53
  53. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
  54. package/src/duckdb/src/parser/parser.cpp +43 -38
  55. package/src/duckdb/src/storage/arena_allocator.cpp +12 -0
  56. package/src/duckdb/src/storage/compression/rle.cpp +52 -12
  57. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
  58. package/src/duckdb/ub_src_execution.cpp +2 -0
  59. package/src/duckdb/ub_src_execution_index_art.cpp +0 -4
  60. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
  61. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
  62. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
  63. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
package/README.md CHANGED
@@ -100,6 +100,13 @@ var stmt = con.prepare('select ?::INTEGER as fortytwo', function(err, stmt) {
100
100
  });
101
101
  ```
102
102
 
103
+ ## Supported Node versions
104
+ We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 16, Node 18 and Node 20.
105
+ Release schedule for Node.js can be checked here: https://github.com/nodejs/release#release-schedule.
106
+
107
+ We currently bundle and test DuckDB also for Node 10, 12, 14, 17 and 19. We plan of going so going forward as long as the tooling supports it.
108
+ As per July 2023, Node 15 has been removed from the supported versions.
109
+
103
110
  ## Development
104
111
 
105
112
  ### First install:
package/binding.gyp CHANGED
@@ -45,6 +45,7 @@
45
45
  "src/duckdb/ub_src_core_functions_scalar_bit.cpp",
46
46
  "src/duckdb/ub_src_core_functions_scalar_blob.cpp",
47
47
  "src/duckdb/ub_src_core_functions_scalar_date.cpp",
48
+ "src/duckdb/ub_src_core_functions_scalar_debug.cpp",
48
49
  "src/duckdb/ub_src_core_functions_scalar_enum.cpp",
49
50
  "src/duckdb/ub_src_core_functions_scalar_generic.cpp",
50
51
  "src/duckdb/ub_src_core_functions_scalar_list.cpp",
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev1764.0",
5
+ "version": "0.8.2-dev1859.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -93,6 +93,7 @@ public:
93
93
  shared_ptr<ParquetFileMetadataCache> metadata;
94
94
  ParquetOptions parquet_options;
95
95
  MultiFileReaderData reader_data;
96
+ unique_ptr<ColumnReader> root_reader;
96
97
 
97
98
  public:
98
99
  void InitializeScan(ParquetReaderScanState &state, vector<idx_t> groups_to_read);
@@ -116,6 +116,11 @@ struct ParquetWriteBindData : public TableFunctionData {
116
116
  vector<string> column_names;
117
117
  duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
118
118
  idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
119
+
120
+ //! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW
121
+ static constexpr const idx_t BYTES_PER_ROW = 1024;
122
+ idx_t row_group_size_bytes;
123
+
119
124
  ChildFieldIDs field_ids;
120
125
  };
121
126
 
@@ -741,33 +746,39 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
741
746
  unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
742
747
  vector<LogicalType> &sql_types) {
743
748
  D_ASSERT(names.size() == sql_types.size());
749
+ bool row_group_size_bytes_set = false;
744
750
  auto bind_data = make_uniq<ParquetWriteBindData>();
745
751
  for (auto &option : info.options) {
746
- auto loption = StringUtil::Lower(option.first);
752
+ const auto loption = StringUtil::Lower(option.first);
753
+ if (option.second.size() != 1) {
754
+ // All parquet write options require exactly one argument
755
+ throw BinderException("%s requires exactly one argument", StringUtil::Upper(loption));
756
+ }
747
757
  if (loption == "row_group_size" || loption == "chunk_size") {
748
758
  bind_data->row_group_size = option.second[0].GetValue<uint64_t>();
759
+ } else if (loption == "row_group_size_bytes") {
760
+ auto roption = option.second[0];
761
+ if (roption.GetTypeMutable().id() == LogicalTypeId::VARCHAR) {
762
+ bind_data->row_group_size_bytes = DBConfig::ParseMemoryLimit(roption.ToString());
763
+ } else {
764
+ bind_data->row_group_size_bytes = option.second[0].GetValue<uint64_t>();
765
+ }
766
+ row_group_size_bytes_set = true;
749
767
  } else if (loption == "compression" || loption == "codec") {
750
- if (!option.second.empty()) {
751
- auto roption = StringUtil::Lower(option.second[0].ToString());
752
- if (roption == "uncompressed") {
753
- bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED;
754
- continue;
755
- } else if (roption == "snappy") {
756
- bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
757
- continue;
758
- } else if (roption == "gzip") {
759
- bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP;
760
- continue;
761
- } else if (roption == "zstd") {
762
- bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
763
- continue;
764
- }
768
+ const auto roption = StringUtil::Lower(option.second[0].ToString());
769
+ if (roption == "uncompressed") {
770
+ bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED;
771
+ } else if (roption == "snappy") {
772
+ bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
773
+ } else if (roption == "gzip") {
774
+ bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP;
775
+ } else if (roption == "zstd") {
776
+ bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD;
777
+ } else {
778
+ throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]",
779
+ loption);
765
780
  }
766
- throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
767
781
  } else if (loption == "field_ids") {
768
- if (option.second.size() != 1) {
769
- throw BinderException("FIELD_IDS requires exactly one argument");
770
- }
771
782
  if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
772
783
  StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
773
784
  idx_t field_id = 0;
@@ -788,6 +799,9 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info
788
799
  throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
789
800
  }
790
801
  }
802
+ if (!row_group_size_bytes_set) {
803
+ bind_data->row_group_size_bytes = bind_data->row_group_size * ParquetWriteBindData::BYTES_PER_ROW;
804
+ }
791
805
  bind_data->sql_types = sql_types;
792
806
  bind_data->column_names = names;
793
807
  return std::move(bind_data);
@@ -812,8 +826,10 @@ void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, Glob
812
826
 
813
827
  // append data to the local (buffered) chunk collection
814
828
  local_state.buffer.Append(local_state.append_state, input);
815
- if (local_state.buffer.Count() > bind_data.row_group_size) {
816
- // if the chunk collection exceeds a certain size we flush it to the parquet file
829
+
830
+ if (local_state.buffer.Count() > bind_data.row_group_size ||
831
+ local_state.buffer.SizeInBytes() > bind_data.row_group_size_bytes) {
832
+ // if the chunk collection exceeds a certain size (rows/bytes) we flush it to the parquet file
817
833
  local_state.append_state.current_chunk_state.handles.clear();
818
834
  global_state.writer->Flush(local_state.buffer);
819
835
  local_state.buffer.InitializeAppend(local_state.append_state);
@@ -399,8 +399,7 @@ void ParquetReader::InitializeSchema() {
399
399
  if (file_meta_data->schema.size() < 2) {
400
400
  throw FormatException("Need at least one non-root column in the file");
401
401
  }
402
- auto root_reader = CreateReader();
403
-
402
+ root_reader = CreateReader();
404
403
  auto &root_type = root_reader->Type();
405
404
  auto &child_types = StructType::GetChildTypes(root_type);
406
405
  D_ASSERT(root_type.id() == LogicalTypeId::STRUCT);
@@ -450,7 +449,6 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
450
449
  ObjectCache::GetObjectCache(context_p).Put(file_name, metadata);
451
450
  }
452
451
  }
453
-
454
452
  InitializeSchema();
455
453
  }
456
454
 
@@ -483,7 +481,6 @@ unique_ptr<BaseStatistics> ParquetReader::ReadStatistics(const string &name) {
483
481
 
484
482
  unique_ptr<BaseStatistics> column_stats;
485
483
  auto file_meta_data = GetFileMetadata();
486
- auto root_reader = CreateReader();
487
484
  auto column_reader = root_reader->Cast<StructColumnReader>().GetChildReader(file_col_idx);
488
485
 
489
486
  for (idx_t row_group_idx = 0; row_group_idx < file_meta_data->row_groups.size(); row_group_idx++) {
@@ -7,7 +7,8 @@
7
7
  namespace duckdb {
8
8
 
9
9
  constexpr const idx_t DConstants::INVALID_INDEX;
10
- const row_t MAX_ROW_ID = 4611686018427388000ULL; // 2^62
10
+ const row_t MAX_ROW_ID = 36028797018960000ULL; // 2^55
11
+ const row_t MAX_ROW_ID_LOCAL = 72057594037920000ULL; // 2^56
11
12
  const column_t COLUMN_IDENTIFIER_ROW_ID = (column_t)-1;
12
13
  const sel_t ZERO_VECTOR[STANDARD_VECTOR_SIZE] = {0};
13
14
  const double PI = 3.141592653589793;
@@ -3112,8 +3112,6 @@ const char* EnumUtil::ToChars<NType>(NType value) {
3112
3112
  switch(value) {
3113
3113
  case NType::PREFIX:
3114
3114
  return "PREFIX";
3115
- case NType::LEAF_SEGMENT:
3116
- return "LEAF_SEGMENT";
3117
3115
  case NType::LEAF:
3118
3116
  return "LEAF";
3119
3117
  case NType::NODE_4:
@@ -3124,6 +3122,8 @@ const char* EnumUtil::ToChars<NType>(NType value) {
3124
3122
  return "NODE_48";
3125
3123
  case NType::NODE_256:
3126
3124
  return "NODE_256";
3125
+ case NType::LEAF_INLINED:
3126
+ return "LEAF_INLINED";
3127
3127
  default:
3128
3128
  throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
3129
3129
  }
@@ -3134,9 +3134,6 @@ NType EnumUtil::FromString<NType>(const char *value) {
3134
3134
  if (StringUtil::Equals(value, "PREFIX")) {
3135
3135
  return NType::PREFIX;
3136
3136
  }
3137
- if (StringUtil::Equals(value, "LEAF_SEGMENT")) {
3138
- return NType::LEAF_SEGMENT;
3139
- }
3140
3137
  if (StringUtil::Equals(value, "LEAF")) {
3141
3138
  return NType::LEAF;
3142
3139
  }
@@ -3152,6 +3149,9 @@ NType EnumUtil::FromString<NType>(const char *value) {
3152
3149
  if (StringUtil::Equals(value, "NODE_256")) {
3153
3150
  return NType::NODE_256;
3154
3151
  }
3152
+ if (StringUtil::Equals(value, "LEAF_INLINED")) {
3153
+ return NType::LEAF_INLINED;
3154
+ }
3155
3155
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
3156
3156
  }
3157
3157
 
@@ -315,7 +315,7 @@ void LocalSortState::ReOrder(SortedData &sd, data_ptr_t sorting_ptr, RowDataColl
315
315
  sd.data_blocks.back()->block->SetSwizzling(nullptr);
316
316
  // Create a single heap block to store the ordered heap
317
317
  idx_t total_byte_offset =
318
- std::accumulate(heap.blocks.begin(), heap.blocks.end(), 0,
318
+ std::accumulate(heap.blocks.begin(), heap.blocks.end(), (idx_t)0,
319
319
  [](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->byte_offset; });
320
320
  idx_t heap_block_size = MaxValue(total_byte_offset, (idx_t)Storage::BLOCK_SIZE);
321
321
  auto ordered_heap_block = make_uniq<RowDataBlock>(*buffer_manager, heap_block_size, 1);
@@ -85,7 +85,7 @@ SortedBlock::SortedBlock(BufferManager &buffer_manager, GlobalSortState &state)
85
85
  }
86
86
 
87
87
  idx_t SortedBlock::Count() const {
88
- idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), 0,
88
+ idx_t count = std::accumulate(radix_sorting_data.begin(), radix_sorting_data.end(), (idx_t)0,
89
89
  [](idx_t a, const unique_ptr<RowDataBlock> &b) { return a + b->count; });
90
90
  if (!sort_layout.all_constant) {
91
91
  D_ASSERT(count == blob_sorting_data->Count());
@@ -100,6 +100,14 @@ Allocator &ColumnDataCollection::GetAllocator() const {
100
100
  return allocator->GetAllocator();
101
101
  }
102
102
 
103
+ idx_t ColumnDataCollection::SizeInBytes() const {
104
+ idx_t total_size = 0;
105
+ for (const auto &segment : segments) {
106
+ total_size += segment->SizeInBytes();
107
+ }
108
+ return total_size;
109
+ }
110
+
103
111
  //===--------------------------------------------------------------------===//
104
112
  // ColumnDataRow
105
113
  //===--------------------------------------------------------------------===//
@@ -243,6 +243,11 @@ idx_t ColumnDataCollectionSegment::ChunkCount() const {
243
243
  return chunk_data.size();
244
244
  }
245
245
 
246
+ idx_t ColumnDataCollectionSegment::SizeInBytes() const {
247
+ D_ASSERT(!allocator->IsShared());
248
+ return allocator->SizeInBytes() + heap->SizeInBytes();
249
+ }
250
+
246
251
  void ColumnDataCollectionSegment::FetchChunk(idx_t chunk_idx, DataChunk &result) {
247
252
  vector<column_t> column_ids;
248
253
  column_ids.reserve(types.size());
@@ -55,4 +55,8 @@ string_t StringHeap::EmptyString(idx_t len) {
55
55
  return string_t(insert_pos, len);
56
56
  }
57
57
 
58
+ idx_t StringHeap::SizeInBytes() const {
59
+ return allocator.SizeInBytes();
60
+ }
61
+
58
62
  } // namespace duckdb
@@ -17,6 +17,7 @@
17
17
  #include "duckdb/core_functions/scalar/string_functions.hpp"
18
18
  #include "duckdb/core_functions/scalar/struct_functions.hpp"
19
19
  #include "duckdb/core_functions/scalar/union_functions.hpp"
20
+ #include "duckdb/core_functions/scalar/debug_functions.hpp"
20
21
 
21
22
  namespace duckdb {
22
23
 
@@ -339,6 +340,7 @@ static StaticFunctionDefinition internal_functions[] = {
339
340
  DUCKDB_AGGREGATE_FUNCTION(VarPopFun),
340
341
  DUCKDB_AGGREGATE_FUNCTION(VarSampFun),
341
342
  DUCKDB_AGGREGATE_FUNCTION_ALIAS(VarianceFun),
343
+ DUCKDB_SCALAR_FUNCTION(VectorTypeFun),
342
344
  DUCKDB_SCALAR_FUNCTION(VersionFun),
343
345
  DUCKDB_SCALAR_FUNCTION_SET(WeekFun),
344
346
  DUCKDB_SCALAR_FUNCTION_SET(WeekDayFun),
@@ -0,0 +1,23 @@
1
+ #include "duckdb/core_functions/scalar/debug_functions.hpp"
2
+
3
+ #include "duckdb/common/exception.hpp"
4
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
5
+ #include "duckdb/planner/expression/bound_function_expression.hpp"
6
+ #include "duckdb/common/enum_util.hpp"
7
+
8
+ namespace duckdb {
9
+
10
+ static void VectorTypeFunction(DataChunk &input, ExpressionState &state, Vector &result) {
11
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
12
+ auto data = ConstantVector::GetData<string_t>(result);
13
+ data[0] = StringVector::AddString(result, EnumUtil::ToString(input.data[0].GetVectorType()));
14
+ }
15
+
16
+ ScalarFunction VectorTypeFun::GetFunction() {
17
+ return ScalarFunction("vector_type", // name of the function
18
+ {LogicalType::ANY}, // argument list
19
+ LogicalType::VARCHAR, // return type
20
+ VectorTypeFunction);
21
+ }
22
+
23
+ } // namespace duckdb
@@ -5,7 +5,6 @@
5
5
  #include "duckdb/execution/expression_executor.hpp"
6
6
  #include "duckdb/storage/arena_allocator.hpp"
7
7
  #include "duckdb/execution/index/art/art_key.hpp"
8
- #include "duckdb/execution/index/art/leaf_segment.hpp"
9
8
  #include "duckdb/execution/index/art/prefix.hpp"
10
9
  #include "duckdb/execution/index/art/leaf.hpp"
11
10
  #include "duckdb/execution/index/art/node4.hpp"
@@ -44,7 +43,6 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
44
43
 
45
44
  // initialize all allocators
46
45
  allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(Prefix), buffer_manager.GetBufferAllocator()));
47
- allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(LeafSegment), buffer_manager.GetBufferAllocator()));
48
46
  allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(Leaf), buffer_manager.GetBufferAllocator()));
49
47
  allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(Node4), buffer_manager.GetBufferAllocator()));
50
48
  allocators.emplace_back(make_uniq<FixedSizeAllocator>(sizeof(Node16), buffer_manager.GetBufferAllocator()));
@@ -54,8 +52,8 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
54
52
  // set the root node of the tree
55
53
  tree = make_uniq<Node>();
56
54
  if (block_id != DConstants::INVALID_INDEX) {
57
- tree->buffer_id = block_id;
58
- tree->offset = block_offset;
55
+ tree->SetSerialized();
56
+ tree->SetPtr(block_id, block_offset);
59
57
  tree->Deserialize(*this);
60
58
  }
61
59
  serialized_data_pointer = BlockPointer(block_id, block_offset);
@@ -308,7 +306,7 @@ bool Construct(ART &art, vector<ARTKey> &keys, row_t *row_ids, Node &node, KeySe
308
306
  reference<Node> ref_node(node);
309
307
  Prefix::New(art, ref_node, start_key, prefix_start, start_key.len - prefix_start);
310
308
  if (single_row_id) {
311
- Leaf::New(art, ref_node, row_ids[key_section.start]);
309
+ Leaf::New(ref_node, row_ids[key_section.start]);
312
310
  } else {
313
311
  Leaf::New(art, ref_node, row_ids + key_section.start, num_row_ids);
314
312
  }
@@ -358,19 +356,9 @@ bool ART::ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_ide
358
356
  D_ASSERT(!VerifyAndToStringInternal(true).empty());
359
357
  for (idx_t i = 0; i < count; i++) {
360
358
  D_ASSERT(!keys[i].Empty());
361
- auto leaf_node = Lookup(*tree, keys[i], 0);
362
- D_ASSERT(leaf_node.IsSet());
363
- auto &leaf = Leaf::Get(*this, leaf_node);
364
-
365
- if (leaf.IsInlined()) {
366
- D_ASSERT(row_ids[i] == leaf.row_ids.inlined);
367
- continue;
368
- }
369
-
370
- D_ASSERT(leaf.row_ids.ptr.IsSet());
371
- Node leaf_segment = leaf.row_ids.ptr;
372
- auto position = leaf.FindRowId(*this, leaf_segment, row_ids[i]);
373
- D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
359
+ auto leaf = Lookup(*tree, keys[i], 0);
360
+ D_ASSERT(leaf.IsSet());
361
+ D_ASSERT(Leaf::ContainsRowId(*this, leaf, row_ids[i]));
374
362
  }
375
363
  #endif
376
364
 
@@ -431,19 +419,9 @@ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
431
419
  continue;
432
420
  }
433
421
 
434
- auto leaf_node = Lookup(*tree, keys[i], 0);
435
- D_ASSERT(leaf_node.IsSet());
436
- auto &leaf = Leaf::Get(*this, leaf_node);
437
-
438
- if (leaf.IsInlined()) {
439
- D_ASSERT(row_identifiers[i] == leaf.row_ids.inlined);
440
- continue;
441
- }
442
-
443
- D_ASSERT(leaf.row_ids.ptr.IsSet());
444
- Node leaf_segment = leaf.row_ids.ptr;
445
- auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
446
- D_ASSERT(position != (uint32_t)DConstants::INVALID_INDEX);
422
+ auto leaf = Lookup(*tree, keys[i], 0);
423
+ D_ASSERT(leaf.IsSet());
424
+ D_ASSERT(Leaf::ContainsRowId(*this, leaf, row_identifiers[i]));
447
425
  }
448
426
  #endif
449
427
 
@@ -471,19 +449,13 @@ void ART::VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) {
471
449
  CheckConstraintsForChunk(chunk, conflict_manager);
472
450
  }
473
451
 
474
- bool ART::InsertToLeaf(Node &leaf_node, const row_t &row_id) {
452
+ bool ART::InsertToLeaf(Node &leaf, const row_t &row_id) {
475
453
 
476
- auto &leaf = Leaf::Get(*this, leaf_node);
477
-
478
- #ifdef DEBUG
479
- for (idx_t k = 0; k < leaf.count; k++) {
480
- D_ASSERT(leaf.GetRowId(*this, k) != row_id);
481
- }
482
- #endif
483
- if (IsUnique() && leaf.count != 0) {
454
+ if (IsUnique()) {
484
455
  return false;
485
456
  }
486
- leaf.Insert(*this, row_id);
457
+
458
+ Leaf::Insert(*this, leaf, row_id);
487
459
  return true;
488
460
  }
489
461
 
@@ -494,14 +466,14 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
494
466
  D_ASSERT(depth <= key.len);
495
467
  reference<Node> ref_node(node);
496
468
  Prefix::New(*this, ref_node, key, depth, key.len - depth);
497
- Leaf::New(*this, ref_node, row_id);
469
+ Leaf::New(ref_node, row_id);
498
470
  return true;
499
471
  }
500
472
 
501
- auto node_type = node.DecodeARTNodeType();
473
+ auto node_type = node.GetType();
502
474
 
503
475
  // insert the row ID into this leaf
504
- if (node_type == NType::LEAF) {
476
+ if (node_type == NType::LEAF || node_type == NType::LEAF_INLINED) {
505
477
  return InsertToLeaf(node, row_id);
506
478
  }
507
479
 
@@ -518,13 +490,11 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
518
490
 
519
491
  // insert a new leaf node at key[depth]
520
492
  Node leaf_node;
493
+ reference<Node> ref_node(leaf_node);
521
494
  if (depth + 1 < key.len) {
522
- reference<Node> ref_node(leaf_node);
523
495
  Prefix::New(*this, ref_node, key, depth + 1, key.len - depth - 1);
524
- Leaf::New(*this, ref_node, row_id);
525
- } else {
526
- Leaf::New(*this, leaf_node, row_id);
527
496
  }
497
+ Leaf::New(ref_node, row_id);
528
498
  Node::InsertChild(*this, node, key[depth], leaf_node);
529
499
  return true;
530
500
  }
@@ -534,7 +504,7 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
534
504
  auto mismatch_position = Prefix::Traverse(*this, next_node, key, depth);
535
505
 
536
506
  // prefix matches key
537
- if (next_node.get().DecodeARTNodeType() != NType::PREFIX) {
507
+ if (next_node.get().GetType() != NType::PREFIX) {
538
508
  return Insert(next_node, key, depth, row_id);
539
509
  }
540
510
 
@@ -550,13 +520,11 @@ bool ART::Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id
550
520
 
551
521
  // insert new leaf
552
522
  Node leaf_node;
523
+ reference<Node> ref_node(leaf_node);
553
524
  if (depth + 1 < key.len) {
554
- reference<Node> ref_node(leaf_node);
555
525
  Prefix::New(*this, ref_node, key, depth + 1, key.len - depth - 1);
556
- Leaf::New(*this, ref_node, row_id);
557
- } else {
558
- Leaf::New(*this, leaf_node, row_id);
559
526
  }
527
+ Leaf::New(ref_node, row_id);
560
528
  Node4::InsertChild(*this, next_node, key[depth], leaf_node);
561
529
  return true;
562
530
  }
@@ -596,19 +564,9 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
596
564
  continue;
597
565
  }
598
566
 
599
- auto node = Lookup(*tree, keys[i], 0);
600
- if (node.IsSet()) {
601
- auto &leaf = Leaf::Get(*this, node);
602
-
603
- if (leaf.IsInlined()) {
604
- D_ASSERT(row_identifiers[i] != leaf.row_ids.inlined);
605
- continue;
606
- }
607
-
608
- D_ASSERT(leaf.row_ids.ptr.IsSet());
609
- Node leaf_segment = leaf.row_ids.ptr;
610
- auto position = leaf.FindRowId(*this, leaf_segment, row_identifiers[i]);
611
- D_ASSERT(position == (uint32_t)DConstants::INVALID_INDEX);
567
+ auto leaf = Lookup(*tree, keys[i], 0);
568
+ if (leaf.IsSet()) {
569
+ D_ASSERT(!Leaf::ContainsRowId(*this, leaf, row_identifiers[i]));
612
570
  }
613
571
  }
614
572
  #endif
@@ -622,21 +580,17 @@ void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id)
622
580
 
623
581
  // handle prefix
624
582
  reference<Node> next_node(node);
625
- if (next_node.get().DecodeARTNodeType() == NType::PREFIX) {
583
+ if (next_node.get().GetType() == NType::PREFIX) {
626
584
  Prefix::Traverse(*this, next_node, key, depth);
627
- if (next_node.get().DecodeARTNodeType() == NType::PREFIX) {
585
+ if (next_node.get().GetType() == NType::PREFIX) {
628
586
  return;
629
587
  }
630
588
  }
631
589
 
632
590
  // delete a row ID from a leaf (root is leaf with possible prefix nodes)
633
- if (next_node.get().DecodeARTNodeType() == NType::LEAF) {
634
- auto &leaf = Leaf::Get(*this, next_node.get());
635
- leaf.Remove(*this, row_id);
636
-
637
- if (leaf.count == 0) {
591
+ if (next_node.get().GetType() == NType::LEAF || next_node.get().GetType() == NType::LEAF_INLINED) {
592
+ if (Leaf::Remove(*this, next_node, row_id)) {
638
593
  Node::Free(*this, node);
639
- node.Reset();
640
594
  }
641
595
  return;
642
596
  }
@@ -648,20 +602,16 @@ void ART::Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id)
648
602
 
649
603
  auto temp_depth = depth + 1;
650
604
  reference<Node> child_node(*child);
651
- if (child_node.get().DecodeARTNodeType() == NType::PREFIX) {
605
+ if (child_node.get().GetType() == NType::PREFIX) {
652
606
  Prefix::Traverse(*this, child_node, key, temp_depth);
653
- if (child_node.get().DecodeARTNodeType() == NType::PREFIX) {
607
+ if (child_node.get().GetType() == NType::PREFIX) {
654
608
  return;
655
609
  }
656
610
  }
657
611
 
658
- if (child_node.get().DecodeARTNodeType() == NType::LEAF) {
612
+ if (child_node.get().GetType() == NType::LEAF || child_node.get().GetType() == NType::LEAF_INLINED) {
659
613
  // leaf found, remove entry
660
- auto &leaf = Leaf::Get(*this, child_node.get());
661
- leaf.Remove(*this, row_id);
662
-
663
- if (leaf.count == 0) {
664
- // leaf is empty, delete leaf, decrement node counter and maybe shrink node
614
+ if (Leaf::Remove(*this, child_node, row_id)) {
665
615
  Node::DeleteChild(*this, next_node, node, key[depth]);
666
616
  }
667
617
  return;
@@ -713,20 +663,11 @@ static ARTKey CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &val
713
663
 
714
664
  bool ART::SearchEqual(ARTKey &key, idx_t max_count, vector<row_t> &result_ids) {
715
665
 
716
- auto leaf_node = Lookup(*tree, key, 0);
717
- if (!leaf_node.IsSet()) {
666
+ auto leaf = Lookup(*tree, key, 0);
667
+ if (!leaf.IsSet()) {
718
668
  return true;
719
669
  }
720
-
721
- auto &leaf = Leaf::Get(*this, leaf_node);
722
- if (leaf.count > max_count) {
723
- return false;
724
- }
725
- for (idx_t i = 0; i < leaf.count; i++) {
726
- row_t row_id = leaf.GetRowId(*this, i);
727
- result_ids.push_back(row_id);
728
- }
729
- return true;
670
+ return Leaf::GetRowIds(*this, leaf, result_ids, max_count);
730
671
  }
731
672
 
732
673
  void ART::SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size) {
@@ -738,8 +679,10 @@ void ART::SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size) {
738
679
  return;
739
680
  }
740
681
 
741
- auto &leaf = Leaf::Get(*this, leaf_node);
742
- result_size = leaf.count;
682
+ // we only perform index joins on PK/FK columns
683
+ D_ASSERT(leaf_node.GetType() == NType::LEAF_INLINED);
684
+ result_size = 1;
685
+ return;
743
686
  }
744
687
 
745
688
  //===--------------------------------------------------------------------===//
@@ -752,14 +695,14 @@ Node ART::Lookup(Node node, const ARTKey &key, idx_t depth) {
752
695
 
753
696
  // traverse prefix, if exists
754
697
  reference<Node> next_node(node);
755
- if (next_node.get().DecodeARTNodeType() == NType::PREFIX) {
698
+ if (next_node.get().GetType() == NType::PREFIX) {
756
699
  Prefix::Traverse(*this, next_node, key, depth);
757
- if (next_node.get().DecodeARTNodeType() == NType::PREFIX) {
700
+ if (next_node.get().GetType() == NType::PREFIX) {
758
701
  return Node();
759
702
  }
760
703
  }
761
704
 
762
- if (next_node.get().DecodeARTNodeType() == NType::LEAF) {
705
+ if (next_node.get().GetType() == NType::LEAF || next_node.get().GetType() == NType::LEAF_INLINED) {
763
706
  return next_node.get();
764
707
  }
765
708
 
@@ -994,20 +937,18 @@ void ART::CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_m
994
937
  continue;
995
938
  }
996
939
 
997
- auto leaf_node = Lookup(*tree, keys[i], 0);
998
- if (!leaf_node.IsSet()) {
940
+ auto leaf = Lookup(*tree, keys[i], 0);
941
+ if (!leaf.IsSet()) {
999
942
  if (conflict_manager.AddMiss(i)) {
1000
943
  found_conflict = i;
1001
944
  }
1002
945
  continue;
1003
946
  }
1004
947
 
1005
- // When we find a node, we need to update the 'matches' and 'row_ids'
1006
- // NOTE: Leafs can have more than one row_id, but for UNIQUE/PRIMARY KEY they will only have one
1007
- Leaf &leaf = Leaf::Get(*this, leaf_node);
1008
- D_ASSERT(leaf.count == 1);
1009
- auto row_id = leaf.GetRowId(*this, 0);
1010
- if (conflict_manager.AddHit(i, row_id)) {
948
+ // when we find a node, we need to update the 'matches' and 'row_ids'
949
+ // NOTE: leaves can have more than one row_id, but for UNIQUE/PRIMARY KEY they will only have one
950
+ D_ASSERT(leaf.GetType() == NType::LEAF_INLINED);
951
+ if (conflict_manager.AddHit(i, leaf.GetRowId())) {
1011
952
  found_conflict = i;
1012
953
  }
1013
954
  }
@@ -1086,7 +1027,7 @@ void ART::Vacuum(IndexLock &state) {
1086
1027
  }
1087
1028
 
1088
1029
  // traverse the allocated memory of the tree to perform a vacuum
1089
- Node::Vacuum(*this, *tree, flags);
1030
+ tree->Vacuum(*this, flags);
1090
1031
 
1091
1032
  // finalize the vacuum operation
1092
1033
  FinalizeVacuum(flags);
@@ -72,17 +72,6 @@ bool ARTKey::operator>(const ARTKey &k) const {
72
72
  return len > k.len;
73
73
  }
74
74
 
75
- bool ARTKey::operator<(const ARTKey &k) const {
76
- for (uint32_t i = 0; i < MinValue<uint32_t>(len, k.len); i++) {
77
- if (data[i] < k.data[i]) {
78
- return true;
79
- } else if (data[i] > k.data[i]) {
80
- return false;
81
- }
82
- }
83
- return len < k.len;
84
- }
85
-
86
75
  bool ARTKey::operator>=(const ARTKey &k) const {
87
76
  for (uint32_t i = 0; i < MinValue<uint32_t>(len, k.len); i++) {
88
77
  if (data[i] > k.data[i]) {