duckdb 0.7.2-dev2552.0 → 0.7.2-dev2675.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +2 -2
  3. package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  5. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  6. package/src/duckdb/src/execution/index/art/art.cpp +286 -269
  7. package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
  8. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
  9. package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
  10. package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
  11. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
  12. package/src/duckdb/src/execution/index/art/node.cpp +444 -379
  13. package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
  14. package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
  15. package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
  16. package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
  17. package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
  18. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
  19. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
  20. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  21. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  22. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
  23. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
  24. package/src/duckdb/src/function/table/read_csv.cpp +5 -1
  25. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  26. package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
  27. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
  28. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
  29. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
  30. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
  31. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
  32. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
  33. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
  34. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
  38. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
  39. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
  40. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
  41. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
  42. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  43. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
  44. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
  45. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
  46. package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
  47. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
  48. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
  49. package/src/duckdb/src/include/duckdb.h +26 -0
  50. package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
  51. package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
  52. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
  53. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
  54. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
  55. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
  56. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
  57. package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
  58. package/src/duckdb/src/storage/data_table.cpp +6 -3
  59. package/src/duckdb/src/storage/index.cpp +18 -6
  60. package/src/duckdb/src/storage/local_storage.cpp +8 -2
  61. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
  62. package/src/duckdb/src/storage/wal_replay.cpp +1 -1
  63. package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
  64. package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
  65. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  66. package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -1,97 +1,22 @@
1
1
  #include "duckdb/execution/index/art/swizzleable_pointer.hpp"
2
2
 
3
- #include "duckdb/execution/index/art/art.hpp"
3
+ #include "duckdb/storage/meta_block_reader.hpp"
4
4
 
5
5
  namespace duckdb {
6
- SwizzleablePointer::~SwizzleablePointer() {
7
- if (pointer) {
8
- if (!IsSwizzled()) {
9
- Node::Delete((Node *)pointer);
10
- }
11
- }
12
- }
13
6
 
14
- SwizzleablePointer::SwizzleablePointer(duckdb::MetaBlockReader &reader) {
15
- idx_t block_id = reader.Read<block_id_t>();
16
- uint32_t offset = reader.Read<uint32_t>();
17
- if (block_id == DConstants::INVALID_INDEX || offset == (uint32_t)DConstants::INVALID_INDEX) {
18
- pointer = 0;
19
- return;
20
- }
21
- idx_t pointer_size = sizeof(pointer) * 8;
22
- pointer = block_id;
23
- // This assumes high 32 bits of pointer are zero.
24
- pointer = pointer << (pointer_size / 2);
25
- D_ASSERT((pointer >> (pointer_size / 2)) == block_id);
26
- pointer += offset;
27
- // Set the left most bit to indicate this is a swizzled pointer and send it back to the mother-ship
28
- uint64_t mask = 1;
29
- mask = mask << (pointer_size - 1);
30
- // This assumes the 33rd most significant bit of the block_id is zero.
31
- pointer |= mask;
32
- }
33
-
34
- SwizzleablePointer &SwizzleablePointer::operator=(const Node *ptr) {
35
- // If the object already has a non-swizzled pointer, this will leak memory.
36
- //
37
- // TODO: If enabled, this assert will fire, indicating a possible leak. If an exception
38
- // is thrown here, it will cause a double-free. There is some work to do to make all this safer.
39
- // D_ASSERT(empty() || IsSwizzled());
40
- if (sizeof(ptr) == 4) {
41
- pointer = (uint32_t)(size_t)ptr;
42
- } else {
43
- pointer = (uint64_t)ptr;
44
- }
45
- return *this;
46
- }
47
-
48
- bool operator!=(const SwizzleablePointer &s_ptr, const uint64_t &ptr) {
49
- return (s_ptr.pointer != ptr);
50
- }
7
+ SwizzleablePointer::SwizzleablePointer(MetaBlockReader &reader) {
51
8
 
52
- BlockPointer SwizzleablePointer::GetSwizzledBlockInfo() {
53
- D_ASSERT(IsSwizzled());
54
- idx_t pointer_size = sizeof(pointer) * 8;
55
- // This is destructive. Pointer will be invalid after this operation.
56
- // That's okay because this is only ever called from Unswizzle.
57
- pointer = pointer & ~(1ULL << (pointer_size - 1));
58
- uint32_t block_id = pointer >> (pointer_size / 2);
59
- uint32_t offset = pointer & 0xffffffff;
60
- return {block_id, offset};
61
- }
62
-
63
- bool SwizzleablePointer::IsSwizzled() {
64
- idx_t pointer_size = sizeof(pointer) * 8;
65
- return (pointer >> (pointer_size - 1)) & 1;
66
- }
67
-
68
- void SwizzleablePointer::Reset() {
69
- if (pointer) {
70
- if (!IsSwizzled()) {
71
- Node::Delete((Node *)pointer);
72
- }
73
- }
74
- *this = nullptr;
75
- }
9
+ idx_t block_id = reader.Read<block_id_t>();
10
+ offset = reader.Read<uint32_t>();
11
+ type = 0;
76
12
 
77
- Node *SwizzleablePointer::Unswizzle(ART &art) {
78
- if (IsSwizzled()) {
79
- // This means our pointer is not yet in memory, gotta deserialize this
80
- // first we unset the bae
81
- auto block_info = GetSwizzledBlockInfo();
82
- *this = Node::Deserialize(art, block_info.block_id, block_info.offset);
83
- art.Verify();
13
+ if (block_id == DConstants::INVALID_INDEX) {
14
+ swizzle_flag = 0;
15
+ return;
84
16
  }
85
- return (Node *)pointer;
86
- }
87
17
 
88
- BlockPointer SwizzleablePointer::Serialize(ART &art, duckdb::MetaBlockWriter &writer) {
89
- if (pointer) {
90
- Unswizzle(art);
91
- return ((Node *)pointer)->Serialize(art, writer);
92
- } else {
93
- return {(block_id_t)DConstants::INVALID_INDEX, (uint32_t)DConstants::INVALID_INDEX};
94
- }
18
+ buffer_id = (uint32_t)block_id;
19
+ swizzle_flag = 1;
95
20
  }
96
21
 
97
22
  } // namespace duckdb
@@ -13,6 +13,7 @@
13
13
  #include "duckdb/transaction/duck_transaction.hpp"
14
14
  #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
15
15
  #include "duckdb/storage/table/scan_state.hpp"
16
+ #include "duckdb/execution/index/art/art_key.hpp"
16
17
 
17
18
  namespace duckdb {
18
19
 
@@ -48,7 +49,7 @@ public:
48
49
  ExpressionExecutor probe_executor;
49
50
 
50
51
  ArenaAllocator arena_allocator;
51
- vector<Key> keys;
52
+ vector<ARTKey> keys;
52
53
  unique_ptr<ColumnFetchState> fetch_state;
53
54
 
54
55
  public:
@@ -268,8 +268,8 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
268
268
  idx_t row_entry = parse_chunk.size();
269
269
 
270
270
  // test against null string, but only if the value was not quoted
271
- if ((!has_quotes || return_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
272
- Equals::Operation(str_val, string_t(options.null_str))) {
271
+ if ((!(has_quotes && !options.allow_quoted_nulls) || return_types[column].id() != LogicalTypeId::VARCHAR) &&
272
+ !options.force_not_null[column] && Equals::Operation(str_val, string_t(options.null_str))) {
273
273
  FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
274
274
  } else {
275
275
  auto &v = parse_chunk.data[column];
@@ -175,6 +175,8 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
175
175
  }
176
176
  } else if (loption == "null_padding") {
177
177
  null_padding = ParseBoolean(value, loption);
178
+ } else if (loption == "allow_quoted_nulls") {
179
+ allow_quoted_nulls = ParseBoolean(value, loption);
178
180
  } else {
179
181
  throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
180
182
  }
@@ -6,6 +6,7 @@
6
6
  #include "duckdb/main/client_context.hpp"
7
7
  #include "duckdb/storage/storage_manager.hpp"
8
8
  #include "duckdb/main/database_manager.hpp"
9
+ #include "duckdb/execution/index/art/art_key.hpp"
9
10
 
10
11
  namespace duckdb {
11
12
 
@@ -39,7 +40,7 @@ public:
39
40
 
40
41
  unique_ptr<Index> local_index;
41
42
  ArenaAllocator arena_allocator;
42
- vector<Key> keys;
43
+ vector<ARTKey> keys;
43
44
  DataChunk key_chunk;
44
45
  vector<column_t> key_column_ids;
45
46
  };
@@ -52,7 +53,7 @@ unique_ptr<GlobalSinkState> PhysicalCreateIndex::GetGlobalSinkState(ClientContex
52
53
  case IndexType::ART: {
53
54
  auto &storage = table.GetStorage();
54
55
  state->global_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
55
- info->constraint_type, storage.db, true);
56
+ info->constraint_type, storage.db);
56
57
  break;
57
58
  }
58
59
  default:
@@ -69,13 +70,13 @@ unique_ptr<LocalSinkState> PhysicalCreateIndex::GetLocalSinkState(ExecutionConte
69
70
  case IndexType::ART: {
70
71
  auto &storage = table.GetStorage();
71
72
  state->local_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
72
- info->constraint_type, storage.db, false);
73
+ info->constraint_type, storage.db);
73
74
  break;
74
75
  }
75
76
  default:
76
77
  throw InternalException("Unimplemented index type");
77
78
  }
78
- state->keys = vector<Key>(STANDARD_VECTOR_SIZE);
79
+ state->keys = vector<ARTKey>(STANDARD_VECTOR_SIZE);
79
80
  state->key_chunk.Initialize(Allocator::Get(context.client), state->local_index->logical_types);
80
81
 
81
82
  for (idx_t i = 0; i < state->key_chunk.ColumnCount(); i++) {
@@ -97,9 +98,8 @@ SinkResultType PhysicalCreateIndex::Sink(ExecutionContext &context, GlobalSinkSt
97
98
  ART::GenerateKeys(lstate.arena_allocator, lstate.key_chunk, lstate.keys);
98
99
 
99
100
  auto &storage = table.GetStorage();
100
- auto art =
101
- make_uniq<ART>(lstate.local_index->column_ids, lstate.local_index->table_io_manager,
102
- lstate.local_index->unbound_expressions, lstate.local_index->constraint_type, storage.db, false);
101
+ auto art = make_uniq<ART>(lstate.local_index->column_ids, lstate.local_index->table_io_manager,
102
+ lstate.local_index->unbound_expressions, lstate.local_index->constraint_type, storage.db);
103
103
  if (!art->ConstructFromSorted(lstate.key_chunk.size(), lstate.keys, row_identifiers)) {
104
104
  throw ConstraintException("Data contains duplicates on indexed column(s)");
105
105
  }
@@ -134,11 +134,6 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event,
134
134
  throw TransactionException("Transaction conflict: cannot add an index to a table that has been altered!");
135
135
  }
136
136
 
137
- state.global_index->Verify();
138
- if (state.global_index->track_memory) {
139
- state.global_index->buffer_manager.IncreaseUsedMemory(state.global_index->memory_size);
140
- }
141
-
142
137
  auto &schema = table.schema;
143
138
  auto index_entry = schema.CreateIndex(context, *info, table).get();
144
139
  if (!index_entry) {
@@ -153,6 +148,10 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event,
153
148
  index.parsed_expressions.push_back(parsed_expr->Copy());
154
149
  }
155
150
 
151
+ // vacuum excess memory
152
+ state.global_index->Vacuum();
153
+
154
+ // add index to storage
156
155
  storage.info->indexes.AddIndex(std::move(state.global_index));
157
156
  return SinkFinalizeType::READY;
158
157
  }
@@ -27,7 +27,7 @@ unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompress
27
27
  if (file_handle->CanSeek()) {
28
28
  file_handle->Reset();
29
29
  }
30
- return make_uniq<CSVFileHandle>(std::move(file_handle));
30
+ return make_uniq<CSVFileHandle>(std::move(file_handle), false);
31
31
  }
32
32
 
33
33
  void ReadCSVData::FinalizeRead(ClientContext &context) {
@@ -259,6 +259,7 @@ public:
259
259
  idx_t rows_to_skip, bool force_parallelism_p, vector<column_t> column_ids_p)
260
260
  : file_handle(std::move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p),
261
261
  force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)) {
262
+ file_handle->DisableReset();
262
263
  current_file_path = files_path_p[0];
263
264
  estimated_linenr = rows_to_skip;
264
265
  file_size = file_handle->FileSize();
@@ -861,6 +862,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
861
862
  table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
862
863
  table_function.named_parameters["parallel"] = LogicalType::BOOLEAN;
863
864
  table_function.named_parameters["null_padding"] = LogicalType::BOOLEAN;
865
+ table_function.named_parameters["allow_quoted_nulls"] = LogicalType::BOOLEAN;
864
866
  table_function.named_parameters["column_types"] = LogicalType::ANY;
865
867
  table_function.named_parameters["dtypes"] = LogicalType::ANY;
866
868
  table_function.named_parameters["types"] = LogicalType::ANY;
@@ -920,6 +922,7 @@ void BufferedCSVReaderOptions::Serialize(FieldWriter &writer) const {
920
922
  writer.WriteString(null_str);
921
923
  writer.WriteField<FileCompressionType>(compression);
922
924
  writer.WriteField<NewLineIdentifier>(new_line);
925
+ writer.WriteField<bool>(allow_quoted_nulls);
923
926
  // read options
924
927
  writer.WriteField<idx_t>(skip_rows);
925
928
  writer.WriteField<bool>(skip_rows_set);
@@ -954,6 +957,7 @@ void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
954
957
  null_str = reader.ReadRequired<string>();
955
958
  compression = reader.ReadRequired<FileCompressionType>();
956
959
  new_line = reader.ReadRequired<NewLineIdentifier>();
960
+ allow_quoted_nulls = reader.ReadRequired<bool>();
957
961
  // read options
958
962
  skip_rows = reader.ReadRequired<idx_t>();
959
963
  skip_rows_set = reader.ReadRequired<bool>();
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev2552"
2
+ #define DUCKDB_VERSION "0.7.2-dev2675"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "98590facb8"
5
+ #define DUCKDB_SOURCE_ID "688b2f1f8c"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -12,4 +12,4 @@
12
12
 
13
13
  namespace duckdb {
14
14
  using std::queue;
15
- }
15
+ } // namespace duckdb
@@ -8,56 +8,54 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/common/common.hpp"
12
- #include "duckdb/common/types/data_chunk.hpp"
13
- #include "duckdb/common/types/vector.hpp"
14
- #include "duckdb/execution/index/art/art_key.hpp"
15
- #include "duckdb/execution/index/art/iterator.hpp"
16
- #include "duckdb/execution/index/art/leaf.hpp"
17
- #include "duckdb/execution/index/art/node.hpp"
18
- #include "duckdb/execution/index/art/node16.hpp"
19
- #include "duckdb/execution/index/art/node256.hpp"
20
- #include "duckdb/execution/index/art/node4.hpp"
21
- #include "duckdb/execution/index/art/node48.hpp"
22
- #include "duckdb/parser/parsed_expression.hpp"
23
- #include "duckdb/storage/data_table.hpp"
24
11
  #include "duckdb/storage/index.hpp"
25
- #include "duckdb/storage/meta_block_writer.hpp"
26
12
 
27
13
  namespace duckdb {
28
14
 
29
- class ConflictManager;
30
- struct ARTIndexScanState;
31
-
15
+ // classes
32
16
  enum class VerifyExistenceType : uint8_t {
33
17
  APPEND = 0, // appends to a table
34
18
  APPEND_FK = 1, // appends to a table that has a foreign key
35
19
  DELETE_FK = 2 // delete from a table that has a foreign key
36
20
  };
21
+ class ConflictManager;
22
+ class Node;
23
+ class ARTKey;
24
+ class FixedSizeAllocator;
25
+
26
+ // structs
27
+ struct ARTIndexScanState;
28
+ struct ARTFlags {
29
+ vector<bool> vacuum_flags;
30
+ vector<idx_t> merge_buffer_counts;
31
+ };
37
32
 
38
33
  class ART : public Index {
39
34
  public:
40
- //! Constructs an ART containing the bound expressions, which are resolved during index construction
35
+ //! Constructs an ART
41
36
  ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
42
- const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type,
43
- AttachedDatabase &db, bool track_memory, idx_t block_id = DConstants::INVALID_INDEX,
44
- idx_t block_offset = DConstants::INVALID_INDEX);
37
+ const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
38
+ AttachedDatabase &db, const idx_t block_id = DConstants::INVALID_INDEX,
39
+ const idx_t block_offset = DConstants::INVALID_INDEX);
45
40
  ~ART() override;
46
41
 
47
42
  //! Root of the tree
48
- Node *tree;
43
+ unique_ptr<Node> tree;
44
+ //! Fixed-size allocators holding the ART nodes
45
+ vector<unique_ptr<FixedSizeAllocator>> allocators;
49
46
 
50
47
  public:
51
48
  //! Initialize a single predicate scan on the index with the given expression and column IDs
52
49
  unique_ptr<IndexScanState> InitializeScanSinglePredicate(const Transaction &transaction, const Value &value,
53
- ExpressionType expression_type) override;
50
+ const ExpressionType expression_type) override;
54
51
  //! Initialize a two predicate scan on the index with the given expression and column IDs
55
- unique_ptr<IndexScanState> InitializeScanTwoPredicates(Transaction &transaction, const Value &low_value,
56
- ExpressionType low_expression_type, const Value &high_value,
57
- ExpressionType high_expression_type) override;
52
+ unique_ptr<IndexScanState> InitializeScanTwoPredicates(const Transaction &transaction, const Value &low_value,
53
+ const ExpressionType low_expression_type,
54
+ const Value &high_value,
55
+ const ExpressionType high_expression_type) override;
58
56
  //! Performs a lookup on the index, fetching up to max_count result IDs. Returns true if all row IDs were fetched,
59
57
  //! and false otherwise
60
- bool Scan(Transaction &transaction, DataTable &table, IndexScanState &state, idx_t max_count,
58
+ bool Scan(const Transaction &transaction, const DataTable &table, IndexScanState &state, const idx_t max_count,
61
59
  vector<row_t> &result_ids) override;
62
60
 
63
61
  //! Called when data is appended to the index. The lock obtained from InitializeLock must be held
@@ -72,22 +70,25 @@ public:
72
70
  PreservedError Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override;
73
71
 
74
72
  //! Construct an ART from a vector of sorted keys
75
- bool ConstructFromSorted(idx_t count, vector<Key> &keys, Vector &row_identifiers);
73
+ bool ConstructFromSorted(idx_t count, vector<ARTKey> &keys, Vector &row_identifiers);
76
74
 
77
75
  //! Search equal values and fetches the row IDs
78
- bool SearchEqual(Key &key, idx_t max_count, vector<row_t> &result_ids);
76
+ bool SearchEqual(ARTKey &key, idx_t max_count, vector<row_t> &result_ids);
79
77
  //! Search equal values used for joins that do not need to fetch data
80
- void SearchEqualJoinNoFetch(Key &key, idx_t &result_size);
78
+ void SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size);
81
79
 
82
80
  //! Serializes the index and returns the pair of block_id offset positions
83
- BlockPointer Serialize(duckdb::MetaBlockWriter &writer) override;
81
+ BlockPointer Serialize(MetaBlockWriter &writer) override;
84
82
 
85
83
  //! Merge another index into this index. The lock obtained from InitializeLock must be held, and the other
86
84
  //! index must also be locked during the merge
87
85
  bool MergeIndexes(IndexLock &state, Index &other_index) override;
88
86
 
87
+ //! Traverses an ART and vacuums the qualifying nodes. The lock obtained from InitializeLock must be held
88
+ void Vacuum(IndexLock &state) override;
89
+
89
90
  //! Generate ART keys for an input chunk
90
- static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<Key> &keys);
91
+ static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<ARTKey> &keys);
91
92
 
92
93
  //! Generate a string containing all the expressions and their respective values that violate a constraint
93
94
  string GenerateErrorKeyName(DataChunk &input, idx_t row);
@@ -96,31 +97,38 @@ public:
96
97
  //! Performs constraint checking for a chunk of input data
97
98
  void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) override;
98
99
 
99
- //! Returns the string representation of an ART
100
+ //! Returns the string representation of the ART
100
101
  string ToString() override;
101
- //! Verifies that the in-memory size value of the index matches its actual size
102
- void Verify() override;
103
- //! Increases the memory size by the difference between the old size and the current size
104
- //! and performs verifications
105
- void IncreaseAndVerifyMemorySize(idx_t old_memory_size) override;
106
102
 
107
103
  private:
108
104
  //! Insert a row ID into a leaf
109
- bool InsertToLeaf(Leaf &leaf, row_t row_id);
105
+ bool InsertToLeaf(Node &leaf_node, const row_t &row_id);
110
106
  //! Insert a key into the tree
111
- bool Insert(Node *&node, Key &key, idx_t depth, row_t row_id);
107
+ bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
112
108
  //! Erase a key from the tree (if a leaf has more than one value) or erase the leaf itself
113
- void Erase(Node *&node, Key &key, idx_t depth, row_t row_id);
109
+ void Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
114
110
  //! Find the node with a matching key, or return nullptr if not found
115
- Leaf *Lookup(Node *node, Key &key, idx_t depth);
111
+ Node Lookup(Node node, const ARTKey &key, idx_t depth);
116
112
  //! Returns all row IDs belonging to a key greater (or equal) than the search key
117
- bool SearchGreater(ARTIndexScanState *state, Key &key, bool inclusive, idx_t max_count, vector<row_t> &result_ids);
113
+ bool SearchGreater(ARTIndexScanState *state, ARTKey &key, bool inclusive, idx_t max_count,
114
+ vector<row_t> &result_ids);
118
115
  //! Returns all row IDs belonging to a key less (or equal) than the upper_bound
119
- bool SearchLess(ARTIndexScanState *state, Key &upper_bound, bool inclusive, idx_t max_count,
116
+ bool SearchLess(ARTIndexScanState *state, ARTKey &upper_bound, bool inclusive, idx_t max_count,
120
117
  vector<row_t> &result_ids);
121
118
  //! Returns all row IDs belonging to a key within the range of lower_bound and upper_bound
122
- bool SearchCloseRange(ARTIndexScanState *state, Key &lower_bound, Key &upper_bound, bool left_inclusive,
119
+ bool SearchCloseRange(ARTIndexScanState *state, ARTKey &lower_bound, ARTKey &upper_bound, bool left_inclusive,
123
120
  bool right_inclusive, idx_t max_count, vector<row_t> &result_ids);
121
+
122
+ //! Initializes a merge operation by returning a set containing the buffer count of each fixed-size allocator
123
+ void InitializeMerge(ARTFlags &flags);
124
+
125
+ //! Initializes a vacuum operation by calling the initialize operation of the respective
126
+ //! node allocator, and returns a vector containing either true, if the allocator at
127
+ //! the respective position qualifies, or false, if not
128
+ void InitializeVacuum(ARTFlags &flags);
129
+ //! Finalizes a vacuum operation by calling the finalize operation of all qualifying
130
+ //! fixed size allocators
131
+ void FinalizeVacuum(const ARTFlags &flags);
124
132
  };
125
133
 
126
134
  } // namespace duckdb
@@ -17,36 +17,37 @@
17
17
 
18
18
  namespace duckdb {
19
19
 
20
- class Key {
20
+ class ARTKey {
21
21
  public:
22
- Key();
23
- Key(data_ptr_t data, idx_t len);
24
- Key(ArenaAllocator &allocator, idx_t len);
22
+ ARTKey();
23
+ ARTKey(const data_ptr_t &data, const uint32_t &len);
24
+ ARTKey(ArenaAllocator &allocator, const uint32_t &len);
25
25
 
26
- idx_t len;
26
+ uint32_t len;
27
27
  data_ptr_t data;
28
28
 
29
29
  public:
30
30
  template <class T>
31
- static inline Key CreateKey(ArenaAllocator &allocator, const LogicalType &type, T element) {
32
- auto data = Key::CreateData<T>(allocator, element);
33
- return Key(data, sizeof(element));
31
+ static inline ARTKey CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, T element) {
32
+ auto data = ARTKey::CreateData<T>(allocator, element);
33
+ return ARTKey(data, sizeof(element));
34
34
  }
35
35
 
36
36
  template <class T>
37
- static inline Key CreateKey(ArenaAllocator &allocator, const LogicalType &type, const Value &element) {
38
- return CreateKey(allocator, type, element.GetValueUnsafe<T>());
37
+ static inline ARTKey CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const Value &element) {
38
+ return CreateARTKey(allocator, type, element.GetValueUnsafe<T>());
39
39
  }
40
40
 
41
41
  template <class T>
42
- static inline void CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, T element) {
43
- key.data = Key::CreateData<T>(allocator, element);
42
+ static inline void CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, T element) {
43
+ key.data = ARTKey::CreateData<T>(allocator, element);
44
44
  key.len = sizeof(element);
45
45
  }
46
46
 
47
47
  template <class T>
48
- static inline void CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, const Value element) {
49
- key.data = Key::CreateData<T>(allocator, element.GetValueUnsafe<T>());
48
+ static inline void CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key,
49
+ const Value element) {
50
+ key.data = ARTKey::CreateData<T>(allocator, element.GetValueUnsafe<T>());
50
51
  key.len = sizeof(element);
51
52
  }
52
53
 
@@ -57,14 +58,18 @@ public:
57
58
  const data_t &operator[](size_t i) const {
58
59
  return data[i];
59
60
  }
60
- bool operator>(const Key &k) const;
61
- bool operator<(const Key &k) const;
62
- bool operator>=(const Key &k) const;
63
- bool operator==(const Key &k) const;
61
+ bool operator>(const ARTKey &k) const;
62
+ bool operator<(const ARTKey &k) const;
63
+ bool operator>=(const ARTKey &k) const;
64
+ bool operator==(const ARTKey &k) const;
64
65
 
65
- bool ByteMatches(Key &other, idx_t &depth);
66
- bool Empty();
67
- void ConcatenateKey(ArenaAllocator &allocator, Key &concat_key);
66
+ inline bool ByteMatches(const ARTKey &other, const uint32_t &depth) const {
67
+ return data[depth] == other[depth];
68
+ }
69
+ inline bool Empty() const {
70
+ return len == 0;
71
+ }
72
+ void ConcatenateARTKey(ArenaAllocator &allocator, ARTKey &concat_key);
68
73
 
69
74
  private:
70
75
  template <class T>
@@ -76,9 +81,9 @@ private:
76
81
  };
77
82
 
78
83
  template <>
79
- Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, string_t value);
84
+ ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, string_t value);
80
85
  template <>
81
- Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, const char *value);
86
+ ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const char *value);
82
87
  template <>
83
- void Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, string_t value);
88
+ void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, string_t value);
84
89
  } // namespace duckdb
@@ -0,0 +1,114 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/index/art/fixed_size_allocator.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/unordered_set.hpp"
12
+ #include "duckdb/common/constants.hpp"
13
+ #include "duckdb/common/vector.hpp"
14
+ #include "duckdb/common/assert.hpp"
15
+ #include "duckdb/common/types/validity_mask.hpp"
16
+ #include "duckdb/storage/buffer_manager.hpp"
17
+ #include "duckdb/execution/index/art/swizzleable_pointer.hpp"
18
+
19
+ namespace duckdb {
20
+
21
+ struct BufferEntry {
22
+ BufferEntry(const data_ptr_t &ptr, const idx_t &allocation_count) : ptr(ptr), allocation_count(allocation_count) {
23
+ }
24
+ data_ptr_t ptr;
25
+ idx_t allocation_count;
26
+ };
27
+
28
+ //! The FixedSizeAllocator provides pointers to fixed-size sections of pre-allocated memory buffers.
29
+ //! The pointers are SwizzleablePointers, and the leftmost byte (swizzle flag and type) must always be zero.
30
+ class FixedSizeAllocator {
31
+ public:
32
+ //! Fixed size of the buffers
33
+ static constexpr idx_t BUFFER_ALLOC_SIZE = Storage::BLOCK_ALLOC_SIZE;
34
+ //! We can vacuum 10% or more of the total memory usage of the allocator
35
+ static constexpr uint8_t VACUUM_THRESHOLD = 10;
36
+
37
+ //! Constants for fast offset calculations in the bitmask
38
+ static constexpr idx_t BASE[] = {0x00000000FFFFFFFF, 0x0000FFFF, 0x00FF, 0x0F, 0x3, 0x1};
39
+ static constexpr uint8_t SHIFT[] = {32, 16, 8, 4, 2, 1};
40
+
41
+ public:
42
+ explicit FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator);
43
+ ~FixedSizeAllocator();
44
+
45
+ //! Allocation size of one element in a buffer
46
+ idx_t allocation_size;
47
+ //! Total number of allocations
48
+ idx_t total_allocations;
49
+ //! Number of validity_t values in the bitmask
50
+ idx_t bitmask_count;
51
+ //! First starting byte of the payload
52
+ idx_t allocation_offset;
53
+ //! Number of possible allocations per buffer
54
+ idx_t allocations_per_buffer;
55
+
56
+ //! Buffers containing the data
57
+ vector<BufferEntry> buffers;
58
+ //! Buffers with free space
59
+ unordered_set<idx_t> buffers_with_free_space;
60
+
61
+ //! Minimum buffer ID of buffers that can be vacuumed
62
+ idx_t min_vacuum_buffer_id;
63
+
64
+ //! Buffer manager of the database instance
65
+ Allocator &allocator;
66
+
67
+ public:
68
+ //! Get a new pointer to data, might cause a new buffer allocation
69
+ SwizzleablePointer New();
70
+ //! Free the data of the pointer
71
+ void Free(const SwizzleablePointer ptr);
72
+ //! Get the data of the pointer
73
+ template <class T>
74
+ inline T *Get(const SwizzleablePointer ptr) const {
75
+ return (T *)Get(ptr);
76
+ }
77
+
78
+ //! Resets the allocator, which e.g. becomes necessary during DELETE FROM table
79
+ void Reset();
80
+
81
+ //! Returns the allocated memory size in bytes
82
+ inline idx_t GetMemoryUsage() const {
83
+ return buffers.size() * BUFFER_ALLOC_SIZE;
84
+ }
85
+
86
+ //! Merge another FixedSizeAllocator with this allocator. Both must have the same allocation size
87
+ void Merge(FixedSizeAllocator &other);
88
+
89
+ //! Initialize a vacuum operation, and return true, if the allocator needs a vacuum
90
+ bool InitializeVacuum();
91
+ //! Finalize a vacuum operation by freeing all buffers exceeding the min_vacuum_buffer_id
92
+ void FinalizeVacuum();
93
+ //! Returns true, if a pointer qualifies for a vacuum operation, and false otherwise
94
+ inline bool NeedsVacuum(const SwizzleablePointer ptr) const {
95
+ if (ptr.buffer_id >= min_vacuum_buffer_id) {
96
+ return true;
97
+ }
98
+ return false;
99
+ }
100
+ //! Vacuums a pointer
101
+ SwizzleablePointer VacuumPointer(const SwizzleablePointer ptr);
102
+
103
+ private:
104
+ //! Returns the data_ptr_t of a pointer
105
+ inline data_ptr_t Get(const SwizzleablePointer ptr) const {
106
+ D_ASSERT(ptr.buffer_id < buffers.size());
107
+ D_ASSERT(ptr.offset < allocations_per_buffer);
108
+ return buffers[ptr.buffer_id].ptr + ptr.offset * allocation_size + allocation_offset;
109
+ }
110
+ //! Returns the first free offset in a bitmask
111
+ uint32_t GetOffset(ValidityMask &mask, const idx_t allocation_count);
112
+ };
113
+
114
+ } // namespace duckdb