duckdb 0.7.2-dev2552.0 → 0.7.2-dev2699.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +2 -2
  3. package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  5. package/src/duckdb/src/common/adbc/adbc.cpp +5 -2
  6. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  7. package/src/duckdb/src/execution/index/art/art.cpp +286 -269
  8. package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
  9. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
  10. package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
  11. package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
  12. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
  13. package/src/duckdb/src/execution/index/art/node.cpp +444 -379
  14. package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
  15. package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
  16. package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
  17. package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
  18. package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
  19. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
  20. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
  21. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  22. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  23. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
  24. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +4 -0
  25. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
  26. package/src/duckdb/src/function/table/read_csv.cpp +5 -1
  27. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  28. package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
  29. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
  30. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
  31. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
  32. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
  33. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
  34. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
  38. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
  39. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
  40. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
  41. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
  42. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
  43. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
  44. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
  46. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
  47. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
  48. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
  49. package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
  50. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
  51. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
  52. package/src/duckdb/src/include/duckdb.h +26 -0
  53. package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
  54. package/src/duckdb/src/main/client_context.cpp +1 -1
  55. package/src/duckdb/src/main/query_result.cpp +1 -1
  56. package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
  57. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
  58. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
  59. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
  60. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
  61. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
  62. package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
  63. package/src/duckdb/src/storage/data_table.cpp +6 -3
  64. package/src/duckdb/src/storage/index.cpp +18 -6
  65. package/src/duckdb/src/storage/local_storage.cpp +8 -2
  66. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
  67. package/src/duckdb/src/storage/wal_replay.cpp +1 -1
  68. package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
  69. package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
  70. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  71. package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -1,97 +1,22 @@
1
1
  #include "duckdb/execution/index/art/swizzleable_pointer.hpp"
2
2
 
3
- #include "duckdb/execution/index/art/art.hpp"
3
+ #include "duckdb/storage/meta_block_reader.hpp"
4
4
 
5
5
  namespace duckdb {
6
- SwizzleablePointer::~SwizzleablePointer() {
7
- if (pointer) {
8
- if (!IsSwizzled()) {
9
- Node::Delete((Node *)pointer);
10
- }
11
- }
12
- }
13
6
 
14
- SwizzleablePointer::SwizzleablePointer(duckdb::MetaBlockReader &reader) {
15
- idx_t block_id = reader.Read<block_id_t>();
16
- uint32_t offset = reader.Read<uint32_t>();
17
- if (block_id == DConstants::INVALID_INDEX || offset == (uint32_t)DConstants::INVALID_INDEX) {
18
- pointer = 0;
19
- return;
20
- }
21
- idx_t pointer_size = sizeof(pointer) * 8;
22
- pointer = block_id;
23
- // This assumes high 32 bits of pointer are zero.
24
- pointer = pointer << (pointer_size / 2);
25
- D_ASSERT((pointer >> (pointer_size / 2)) == block_id);
26
- pointer += offset;
27
- // Set the left most bit to indicate this is a swizzled pointer and send it back to the mother-ship
28
- uint64_t mask = 1;
29
- mask = mask << (pointer_size - 1);
30
- // This assumes the 33rd most significant bit of the block_id is zero.
31
- pointer |= mask;
32
- }
33
-
34
- SwizzleablePointer &SwizzleablePointer::operator=(const Node *ptr) {
35
- // If the object already has a non-swizzled pointer, this will leak memory.
36
- //
37
- // TODO: If enabled, this assert will fire, indicating a possible leak. If an exception
38
- // is thrown here, it will cause a double-free. There is some work to do to make all this safer.
39
- // D_ASSERT(empty() || IsSwizzled());
40
- if (sizeof(ptr) == 4) {
41
- pointer = (uint32_t)(size_t)ptr;
42
- } else {
43
- pointer = (uint64_t)ptr;
44
- }
45
- return *this;
46
- }
47
-
48
- bool operator!=(const SwizzleablePointer &s_ptr, const uint64_t &ptr) {
49
- return (s_ptr.pointer != ptr);
50
- }
7
+ SwizzleablePointer::SwizzleablePointer(MetaBlockReader &reader) {
51
8
 
52
- BlockPointer SwizzleablePointer::GetSwizzledBlockInfo() {
53
- D_ASSERT(IsSwizzled());
54
- idx_t pointer_size = sizeof(pointer) * 8;
55
- // This is destructive. Pointer will be invalid after this operation.
56
- // That's okay because this is only ever called from Unswizzle.
57
- pointer = pointer & ~(1ULL << (pointer_size - 1));
58
- uint32_t block_id = pointer >> (pointer_size / 2);
59
- uint32_t offset = pointer & 0xffffffff;
60
- return {block_id, offset};
61
- }
62
-
63
- bool SwizzleablePointer::IsSwizzled() {
64
- idx_t pointer_size = sizeof(pointer) * 8;
65
- return (pointer >> (pointer_size - 1)) & 1;
66
- }
67
-
68
- void SwizzleablePointer::Reset() {
69
- if (pointer) {
70
- if (!IsSwizzled()) {
71
- Node::Delete((Node *)pointer);
72
- }
73
- }
74
- *this = nullptr;
75
- }
9
+ idx_t block_id = reader.Read<block_id_t>();
10
+ offset = reader.Read<uint32_t>();
11
+ type = 0;
76
12
 
77
- Node *SwizzleablePointer::Unswizzle(ART &art) {
78
- if (IsSwizzled()) {
79
- // This means our pointer is not yet in memory, gotta deserialize this
80
- // first we unset the bae
81
- auto block_info = GetSwizzledBlockInfo();
82
- *this = Node::Deserialize(art, block_info.block_id, block_info.offset);
83
- art.Verify();
13
+ if (block_id == DConstants::INVALID_INDEX) {
14
+ swizzle_flag = 0;
15
+ return;
84
16
  }
85
- return (Node *)pointer;
86
- }
87
17
 
88
- BlockPointer SwizzleablePointer::Serialize(ART &art, duckdb::MetaBlockWriter &writer) {
89
- if (pointer) {
90
- Unswizzle(art);
91
- return ((Node *)pointer)->Serialize(art, writer);
92
- } else {
93
- return {(block_id_t)DConstants::INVALID_INDEX, (uint32_t)DConstants::INVALID_INDEX};
94
- }
18
+ buffer_id = (uint32_t)block_id;
19
+ swizzle_flag = 1;
95
20
  }
96
21
 
97
22
  } // namespace duckdb
@@ -13,6 +13,7 @@
13
13
  #include "duckdb/transaction/duck_transaction.hpp"
14
14
  #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
15
15
  #include "duckdb/storage/table/scan_state.hpp"
16
+ #include "duckdb/execution/index/art/art_key.hpp"
16
17
 
17
18
  namespace duckdb {
18
19
 
@@ -48,7 +49,7 @@ public:
48
49
  ExpressionExecutor probe_executor;
49
50
 
50
51
  ArenaAllocator arena_allocator;
51
- vector<Key> keys;
52
+ vector<ARTKey> keys;
52
53
  unique_ptr<ColumnFetchState> fetch_state;
53
54
 
54
55
  public:
@@ -268,8 +268,8 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
268
268
  idx_t row_entry = parse_chunk.size();
269
269
 
270
270
  // test against null string, but only if the value was not quoted
271
- if ((!has_quotes || return_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
272
- Equals::Operation(str_val, string_t(options.null_str))) {
271
+ if ((!(has_quotes && !options.allow_quoted_nulls) || return_types[column].id() != LogicalTypeId::VARCHAR) &&
272
+ !options.force_not_null[column] && Equals::Operation(str_val, string_t(options.null_str))) {
273
273
  FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
274
274
  } else {
275
275
  auto &v = parse_chunk.data[column];
@@ -175,6 +175,8 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
175
175
  }
176
176
  } else if (loption == "null_padding") {
177
177
  null_padding = ParseBoolean(value, loption);
178
+ } else if (loption == "allow_quoted_nulls") {
179
+ allow_quoted_nulls = ParseBoolean(value, loption);
178
180
  } else {
179
181
  throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
180
182
  }
@@ -124,6 +124,8 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
124
124
  while (!successfully_read_first_line) {
125
125
  DataChunk first_line_chunk;
126
126
  first_line_chunk.Initialize(allocator, return_types);
127
+ // Ensure that parse_chunk has no gunk when trying to figure new line
128
+ parse_chunk.Reset();
127
129
  for (; position_buffer < end_buffer; position_buffer++) {
128
130
  if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
129
131
  bool carriage_return = (*buffer)[position_buffer] == '\r';
@@ -183,6 +185,8 @@ bool ParallelCSVReader::SetPosition(DataChunk &insert_chunk) {
183
185
  if (verification_positions.beginning_of_first_line == 0) {
184
186
  verification_positions.beginning_of_first_line = position_buffer;
185
187
  }
188
+ // Ensure that parse_chunk has no gunk when trying to figure new line
189
+ parse_chunk.Reset();
186
190
 
187
191
  verification_positions.end_of_last_line = position_buffer;
188
192
  finished = false;
@@ -6,6 +6,7 @@
6
6
  #include "duckdb/main/client_context.hpp"
7
7
  #include "duckdb/storage/storage_manager.hpp"
8
8
  #include "duckdb/main/database_manager.hpp"
9
+ #include "duckdb/execution/index/art/art_key.hpp"
9
10
 
10
11
  namespace duckdb {
11
12
 
@@ -39,7 +40,7 @@ public:
39
40
 
40
41
  unique_ptr<Index> local_index;
41
42
  ArenaAllocator arena_allocator;
42
- vector<Key> keys;
43
+ vector<ARTKey> keys;
43
44
  DataChunk key_chunk;
44
45
  vector<column_t> key_column_ids;
45
46
  };
@@ -52,7 +53,7 @@ unique_ptr<GlobalSinkState> PhysicalCreateIndex::GetGlobalSinkState(ClientContex
52
53
  case IndexType::ART: {
53
54
  auto &storage = table.GetStorage();
54
55
  state->global_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
55
- info->constraint_type, storage.db, true);
56
+ info->constraint_type, storage.db);
56
57
  break;
57
58
  }
58
59
  default:
@@ -69,13 +70,13 @@ unique_ptr<LocalSinkState> PhysicalCreateIndex::GetLocalSinkState(ExecutionConte
69
70
  case IndexType::ART: {
70
71
  auto &storage = table.GetStorage();
71
72
  state->local_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
72
- info->constraint_type, storage.db, false);
73
+ info->constraint_type, storage.db);
73
74
  break;
74
75
  }
75
76
  default:
76
77
  throw InternalException("Unimplemented index type");
77
78
  }
78
- state->keys = vector<Key>(STANDARD_VECTOR_SIZE);
79
+ state->keys = vector<ARTKey>(STANDARD_VECTOR_SIZE);
79
80
  state->key_chunk.Initialize(Allocator::Get(context.client), state->local_index->logical_types);
80
81
 
81
82
  for (idx_t i = 0; i < state->key_chunk.ColumnCount(); i++) {
@@ -97,9 +98,8 @@ SinkResultType PhysicalCreateIndex::Sink(ExecutionContext &context, GlobalSinkSt
97
98
  ART::GenerateKeys(lstate.arena_allocator, lstate.key_chunk, lstate.keys);
98
99
 
99
100
  auto &storage = table.GetStorage();
100
- auto art =
101
- make_uniq<ART>(lstate.local_index->column_ids, lstate.local_index->table_io_manager,
102
- lstate.local_index->unbound_expressions, lstate.local_index->constraint_type, storage.db, false);
101
+ auto art = make_uniq<ART>(lstate.local_index->column_ids, lstate.local_index->table_io_manager,
102
+ lstate.local_index->unbound_expressions, lstate.local_index->constraint_type, storage.db);
103
103
  if (!art->ConstructFromSorted(lstate.key_chunk.size(), lstate.keys, row_identifiers)) {
104
104
  throw ConstraintException("Data contains duplicates on indexed column(s)");
105
105
  }
@@ -134,11 +134,6 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event,
134
134
  throw TransactionException("Transaction conflict: cannot add an index to a table that has been altered!");
135
135
  }
136
136
 
137
- state.global_index->Verify();
138
- if (state.global_index->track_memory) {
139
- state.global_index->buffer_manager.IncreaseUsedMemory(state.global_index->memory_size);
140
- }
141
-
142
137
  auto &schema = table.schema;
143
138
  auto index_entry = schema.CreateIndex(context, *info, table).get();
144
139
  if (!index_entry) {
@@ -153,6 +148,10 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event,
153
148
  index.parsed_expressions.push_back(parsed_expr->Copy());
154
149
  }
155
150
 
151
+ // vacuum excess memory
152
+ state.global_index->Vacuum();
153
+
154
+ // add index to storage
156
155
  storage.info->indexes.AddIndex(std::move(state.global_index));
157
156
  return SinkFinalizeType::READY;
158
157
  }
@@ -27,7 +27,7 @@ unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompress
27
27
  if (file_handle->CanSeek()) {
28
28
  file_handle->Reset();
29
29
  }
30
- return make_uniq<CSVFileHandle>(std::move(file_handle));
30
+ return make_uniq<CSVFileHandle>(std::move(file_handle), false);
31
31
  }
32
32
 
33
33
  void ReadCSVData::FinalizeRead(ClientContext &context) {
@@ -259,6 +259,7 @@ public:
259
259
  idx_t rows_to_skip, bool force_parallelism_p, vector<column_t> column_ids_p)
260
260
  : file_handle(std::move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p),
261
261
  force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)) {
262
+ file_handle->DisableReset();
262
263
  current_file_path = files_path_p[0];
263
264
  estimated_linenr = rows_to_skip;
264
265
  file_size = file_handle->FileSize();
@@ -861,6 +862,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
861
862
  table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
862
863
  table_function.named_parameters["parallel"] = LogicalType::BOOLEAN;
863
864
  table_function.named_parameters["null_padding"] = LogicalType::BOOLEAN;
865
+ table_function.named_parameters["allow_quoted_nulls"] = LogicalType::BOOLEAN;
864
866
  table_function.named_parameters["column_types"] = LogicalType::ANY;
865
867
  table_function.named_parameters["dtypes"] = LogicalType::ANY;
866
868
  table_function.named_parameters["types"] = LogicalType::ANY;
@@ -920,6 +922,7 @@ void BufferedCSVReaderOptions::Serialize(FieldWriter &writer) const {
920
922
  writer.WriteString(null_str);
921
923
  writer.WriteField<FileCompressionType>(compression);
922
924
  writer.WriteField<NewLineIdentifier>(new_line);
925
+ writer.WriteField<bool>(allow_quoted_nulls);
923
926
  // read options
924
927
  writer.WriteField<idx_t>(skip_rows);
925
928
  writer.WriteField<bool>(skip_rows_set);
@@ -954,6 +957,7 @@ void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
954
957
  null_str = reader.ReadRequired<string>();
955
958
  compression = reader.ReadRequired<FileCompressionType>();
956
959
  new_line = reader.ReadRequired<NewLineIdentifier>();
960
+ allow_quoted_nulls = reader.ReadRequired<bool>();
957
961
  // read options
958
962
  skip_rows = reader.ReadRequired<idx_t>();
959
963
  skip_rows_set = reader.ReadRequired<bool>();
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev2552"
2
+ #define DUCKDB_VERSION "0.7.2-dev2699"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "98590facb8"
5
+ #define DUCKDB_SOURCE_ID "199c0211c7"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -12,4 +12,4 @@
12
12
 
13
13
  namespace duckdb {
14
14
  using std::queue;
15
- }
15
+ } // namespace duckdb
@@ -8,56 +8,54 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/common/common.hpp"
12
- #include "duckdb/common/types/data_chunk.hpp"
13
- #include "duckdb/common/types/vector.hpp"
14
- #include "duckdb/execution/index/art/art_key.hpp"
15
- #include "duckdb/execution/index/art/iterator.hpp"
16
- #include "duckdb/execution/index/art/leaf.hpp"
17
- #include "duckdb/execution/index/art/node.hpp"
18
- #include "duckdb/execution/index/art/node16.hpp"
19
- #include "duckdb/execution/index/art/node256.hpp"
20
- #include "duckdb/execution/index/art/node4.hpp"
21
- #include "duckdb/execution/index/art/node48.hpp"
22
- #include "duckdb/parser/parsed_expression.hpp"
23
- #include "duckdb/storage/data_table.hpp"
24
11
  #include "duckdb/storage/index.hpp"
25
- #include "duckdb/storage/meta_block_writer.hpp"
26
12
 
27
13
  namespace duckdb {
28
14
 
29
- class ConflictManager;
30
- struct ARTIndexScanState;
31
-
15
+ // classes
32
16
  enum class VerifyExistenceType : uint8_t {
33
17
  APPEND = 0, // appends to a table
34
18
  APPEND_FK = 1, // appends to a table that has a foreign key
35
19
  DELETE_FK = 2 // delete from a table that has a foreign key
36
20
  };
21
+ class ConflictManager;
22
+ class Node;
23
+ class ARTKey;
24
+ class FixedSizeAllocator;
25
+
26
+ // structs
27
+ struct ARTIndexScanState;
28
+ struct ARTFlags {
29
+ vector<bool> vacuum_flags;
30
+ vector<idx_t> merge_buffer_counts;
31
+ };
37
32
 
38
33
  class ART : public Index {
39
34
  public:
40
- //! Constructs an ART containing the bound expressions, which are resolved during index construction
35
+ //! Constructs an ART
41
36
  ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
42
- const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type,
43
- AttachedDatabase &db, bool track_memory, idx_t block_id = DConstants::INVALID_INDEX,
44
- idx_t block_offset = DConstants::INVALID_INDEX);
37
+ const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
38
+ AttachedDatabase &db, const idx_t block_id = DConstants::INVALID_INDEX,
39
+ const idx_t block_offset = DConstants::INVALID_INDEX);
45
40
  ~ART() override;
46
41
 
47
42
  //! Root of the tree
48
- Node *tree;
43
+ unique_ptr<Node> tree;
44
+ //! Fixed-size allocators holding the ART nodes
45
+ vector<unique_ptr<FixedSizeAllocator>> allocators;
49
46
 
50
47
  public:
51
48
  //! Initialize a single predicate scan on the index with the given expression and column IDs
52
49
  unique_ptr<IndexScanState> InitializeScanSinglePredicate(const Transaction &transaction, const Value &value,
53
- ExpressionType expression_type) override;
50
+ const ExpressionType expression_type) override;
54
51
  //! Initialize a two predicate scan on the index with the given expression and column IDs
55
- unique_ptr<IndexScanState> InitializeScanTwoPredicates(Transaction &transaction, const Value &low_value,
56
- ExpressionType low_expression_type, const Value &high_value,
57
- ExpressionType high_expression_type) override;
52
+ unique_ptr<IndexScanState> InitializeScanTwoPredicates(const Transaction &transaction, const Value &low_value,
53
+ const ExpressionType low_expression_type,
54
+ const Value &high_value,
55
+ const ExpressionType high_expression_type) override;
58
56
  //! Performs a lookup on the index, fetching up to max_count result IDs. Returns true if all row IDs were fetched,
59
57
  //! and false otherwise
60
- bool Scan(Transaction &transaction, DataTable &table, IndexScanState &state, idx_t max_count,
58
+ bool Scan(const Transaction &transaction, const DataTable &table, IndexScanState &state, const idx_t max_count,
61
59
  vector<row_t> &result_ids) override;
62
60
 
63
61
  //! Called when data is appended to the index. The lock obtained from InitializeLock must be held
@@ -72,22 +70,25 @@ public:
72
70
  PreservedError Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override;
73
71
 
74
72
  //! Construct an ART from a vector of sorted keys
75
- bool ConstructFromSorted(idx_t count, vector<Key> &keys, Vector &row_identifiers);
73
+ bool ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_identifiers);
76
74
 
77
75
  //! Search equal values and fetches the row IDs
78
- bool SearchEqual(Key &key, idx_t max_count, vector<row_t> &result_ids);
76
+ bool SearchEqual(ARTKey &key, idx_t max_count, vector<row_t> &result_ids);
79
77
  //! Search equal values used for joins that do not need to fetch data
80
- void SearchEqualJoinNoFetch(Key &key, idx_t &result_size);
78
+ void SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size);
81
79
 
82
80
  //! Serializes the index and returns the pair of block_id offset positions
83
- BlockPointer Serialize(duckdb::MetaBlockWriter &writer) override;
81
+ BlockPointer Serialize(MetaBlockWriter &writer) override;
84
82
 
85
83
  //! Merge another index into this index. The lock obtained from InitializeLock must be held, and the other
86
84
  //! index must also be locked during the merge
87
85
  bool MergeIndexes(IndexLock &state, Index &other_index) override;
88
86
 
87
+ //! Traverses an ART and vacuums the qualifying nodes. The lock obtained from InitializeLock must be held
88
+ void Vacuum(IndexLock &state) override;
89
+
89
90
  //! Generate ART keys for an input chunk
90
- static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<Key> &keys);
91
+ static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<ARTKey> &keys);
91
92
 
92
93
  //! Generate a string containing all the expressions and their respective values that violate a constraint
93
94
  string GenerateErrorKeyName(DataChunk &input, idx_t row);
@@ -96,31 +97,38 @@ public:
96
97
  //! Performs constraint checking for a chunk of input data
97
98
  void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) override;
98
99
 
99
- //! Returns the string representation of an ART
100
+ //! Returns the string representation of the ART
100
101
  string ToString() override;
101
- //! Verifies that the in-memory size value of the index matches its actual size
102
- void Verify() override;
103
- //! Increases the memory size by the difference between the old size and the current size
104
- //! and performs verifications
105
- void IncreaseAndVerifyMemorySize(idx_t old_memory_size) override;
106
102
 
107
103
  private:
108
104
  //! Insert a row ID into a leaf
109
- bool InsertToLeaf(Leaf &leaf, row_t row_id);
105
+ bool InsertToLeaf(Node &leaf_node, const row_t &row_id);
110
106
  //! Insert a key into the tree
111
- bool Insert(Node *&node, Key &key, idx_t depth, row_t row_id);
107
+ bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
112
108
  //! Erase a key from the tree (if a leaf has more than one value) or erase the leaf itself
113
- void Erase(Node *&node, Key &key, idx_t depth, row_t row_id);
109
+ void Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
114
110
  //! Find the node with a matching key, or return nullptr if not found
115
- Leaf *Lookup(Node *node, Key &key, idx_t depth);
111
+ Node Lookup(Node node, const ARTKey &key, idx_t depth);
116
112
  //! Returns all row IDs belonging to a key greater (or equal) than the search key
117
- bool SearchGreater(ARTIndexScanState *state, Key &key, bool inclusive, idx_t max_count, vector<row_t> &result_ids);
113
+ bool SearchGreater(ARTIndexScanState *state, ARTKey &key, bool inclusive, idx_t max_count,
114
+ vector<row_t> &result_ids);
118
115
  //! Returns all row IDs belonging to a key less (or equal) than the upper_bound
119
- bool SearchLess(ARTIndexScanState *state, Key &upper_bound, bool inclusive, idx_t max_count,
116
+ bool SearchLess(ARTIndexScanState *state, ARTKey &upper_bound, bool inclusive, idx_t max_count,
120
117
  vector<row_t> &result_ids);
121
118
  //! Returns all row IDs belonging to a key within the range of lower_bound and upper_bound
122
- bool SearchCloseRange(ARTIndexScanState *state, Key &lower_bound, Key &upper_bound, bool left_inclusive,
119
+ bool SearchCloseRange(ARTIndexScanState *state, ARTKey &lower_bound, ARTKey &upper_bound, bool left_inclusive,
123
120
  bool right_inclusive, idx_t max_count, vector<row_t> &result_ids);
121
+
122
+ //! Initializes a merge operation by returning a set containing the buffer count of each fixed-size allocator
123
+ void InitializeMerge(ARTFlags &flags);
124
+
125
+ //! Initializes a vacuum operation by calling the initialize operation of the respective
126
+ //! node allocator, and returns a vector containing either true, if the allocator at
127
+ //! the respective position qualifies, or false, if not
128
+ void InitializeVacuum(ARTFlags &flags);
129
+ //! Finalizes a vacuum operation by calling the finalize operation of all qualifying
130
+ //! fixed size allocators
131
+ void FinalizeVacuum(const ARTFlags &flags);
124
132
  };
125
133
 
126
134
  } // namespace duckdb
@@ -17,36 +17,37 @@
17
17
 
18
18
  namespace duckdb {
19
19
 
20
- class Key {
20
+ class ARTKey {
21
21
  public:
22
- Key();
23
- Key(data_ptr_t data, idx_t len);
24
- Key(ArenaAllocator &allocator, idx_t len);
22
+ ARTKey();
23
+ ARTKey(const data_ptr_t &data, const uint32_t &len);
24
+ ARTKey(ArenaAllocator &allocator, const uint32_t &len);
25
25
 
26
- idx_t len;
26
+ uint32_t len;
27
27
  data_ptr_t data;
28
28
 
29
29
  public:
30
30
  template <class T>
31
- static inline Key CreateKey(ArenaAllocator &allocator, const LogicalType &type, T element) {
32
- auto data = Key::CreateData<T>(allocator, element);
33
- return Key(data, sizeof(element));
31
+ static inline ARTKey CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, T element) {
32
+ auto data = ARTKey::CreateData<T>(allocator, element);
33
+ return ARTKey(data, sizeof(element));
34
34
  }
35
35
 
36
36
  template <class T>
37
- static inline Key CreateKey(ArenaAllocator &allocator, const LogicalType &type, const Value &element) {
38
- return CreateKey(allocator, type, element.GetValueUnsafe<T>());
37
+ static inline ARTKey CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const Value &element) {
38
+ return CreateARTKey(allocator, type, element.GetValueUnsafe<T>());
39
39
  }
40
40
 
41
41
  template <class T>
42
- static inline void CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, T element) {
43
- key.data = Key::CreateData<T>(allocator, element);
42
+ static inline void CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, T element) {
43
+ key.data = ARTKey::CreateData<T>(allocator, element);
44
44
  key.len = sizeof(element);
45
45
  }
46
46
 
47
47
  template <class T>
48
- static inline void CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, const Value element) {
49
- key.data = Key::CreateData<T>(allocator, element.GetValueUnsafe<T>());
48
+ static inline void CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key,
49
+ const Value element) {
50
+ key.data = ARTKey::CreateData<T>(allocator, element.GetValueUnsafe<T>());
50
51
  key.len = sizeof(element);
51
52
  }
52
53
 
@@ -57,14 +58,18 @@ public:
57
58
  const data_t &operator[](size_t i) const {
58
59
  return data[i];
59
60
  }
60
- bool operator>(const Key &k) const;
61
- bool operator<(const Key &k) const;
62
- bool operator>=(const Key &k) const;
63
- bool operator==(const Key &k) const;
61
+ bool operator>(const ARTKey &k) const;
62
+ bool operator<(const ARTKey &k) const;
63
+ bool operator>=(const ARTKey &k) const;
64
+ bool operator==(const ARTKey &k) const;
64
65
 
65
- bool ByteMatches(Key &other, idx_t &depth);
66
- bool Empty();
67
- void ConcatenateKey(ArenaAllocator &allocator, Key &concat_key);
66
+ inline bool ByteMatches(const ARTKey &other, const uint32_t &depth) const {
67
+ return data[depth] == other[depth];
68
+ }
69
+ inline bool Empty() const {
70
+ return len == 0;
71
+ }
72
+ void ConcatenateARTKey(ArenaAllocator &allocator, ARTKey &concat_key);
68
73
 
69
74
  private:
70
75
  template <class T>
@@ -76,9 +81,9 @@ private:
76
81
  };
77
82
 
78
83
  template <>
79
- Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, string_t value);
84
+ ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, string_t value);
80
85
  template <>
81
- Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, const char *value);
86
+ ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const char *value);
82
87
  template <>
83
- void Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, string_t value);
88
+ void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, string_t value);
84
89
  } // namespace duckdb