duckdb 0.7.2-dev2552.0 → 0.7.2-dev2699.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/binding.gyp +7 -7
  2. package/package.json +2 -2
  3. package/src/duckdb/extension/parquet/parquet_statistics.cpp +3 -0
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  5. package/src/duckdb/src/common/adbc/adbc.cpp +5 -2
  6. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  7. package/src/duckdb/src/execution/index/art/art.cpp +286 -269
  8. package/src/duckdb/src/execution/index/art/art_key.cpp +22 -32
  9. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +224 -0
  10. package/src/duckdb/src/execution/index/art/iterator.cpp +142 -123
  11. package/src/duckdb/src/execution/index/art/leaf.cpp +319 -170
  12. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +42 -0
  13. package/src/duckdb/src/execution/index/art/node.cpp +444 -379
  14. package/src/duckdb/src/execution/index/art/node16.cpp +178 -114
  15. package/src/duckdb/src/execution/index/art/node256.cpp +117 -79
  16. package/src/duckdb/src/execution/index/art/node4.cpp +169 -114
  17. package/src/duckdb/src/execution/index/art/node48.cpp +175 -105
  18. package/src/duckdb/src/execution/index/art/prefix.cpp +405 -127
  19. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +42 -0
  20. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +10 -85
  21. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  22. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  23. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +2 -0
  24. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +4 -0
  25. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +11 -12
  26. package/src/duckdb/src/function/table/read_csv.cpp +5 -1
  27. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  28. package/src/duckdb/src/include/duckdb/common/queue.hpp +1 -1
  29. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +53 -45
  30. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +29 -24
  31. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +114 -0
  32. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +26 -20
  33. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +63 -39
  34. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +36 -0
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +98 -116
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +48 -36
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +52 -35
  38. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +46 -36
  39. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +57 -35
  40. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +57 -50
  41. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +40 -0
  42. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +38 -31
  43. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +2 -1
  44. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
  46. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +4 -1
  47. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
  48. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +0 -5
  49. package/src/duckdb/src/include/duckdb/storage/index.hpp +13 -28
  50. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +0 -2
  51. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +5 -0
  52. package/src/duckdb/src/include/duckdb.h +26 -0
  53. package/src/duckdb/src/main/capi/helper-c.cpp +7 -0
  54. package/src/duckdb/src/main/client_context.cpp +1 -1
  55. package/src/duckdb/src/main/query_result.cpp +1 -1
  56. package/src/duckdb/src/parser/statement/insert_statement.cpp +15 -6
  57. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +1 -1
  58. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +18 -5
  59. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +5 -7
  60. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +20 -7
  61. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +14 -9
  62. package/src/duckdb/src/storage/checkpoint_manager.cpp +11 -9
  63. package/src/duckdb/src/storage/data_table.cpp +6 -3
  64. package/src/duckdb/src/storage/index.cpp +18 -6
  65. package/src/duckdb/src/storage/local_storage.cpp +8 -2
  66. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -9
  67. package/src/duckdb/src/storage/wal_replay.cpp +1 -1
  68. package/src/duckdb/src/transaction/cleanup_state.cpp +6 -0
  69. package/src/duckdb/src/transaction/undo_buffer.cpp +8 -0
  70. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  71. package/src/duckdb/ub_src_execution_index_art.cpp +7 -1
@@ -1,29 +1,27 @@
1
1
  #include "duckdb/execution/index/art/art_key.hpp"
2
2
 
3
- #include "duckdb/execution/index/art/art.hpp"
4
-
5
3
  namespace duckdb {
6
4
 
7
- Key::Key() : len(0) {
5
+ ARTKey::ARTKey() : len(0) {
8
6
  }
9
7
 
10
- Key::Key(data_ptr_t data, idx_t len) : len(len), data(data) {
8
+ ARTKey::ARTKey(const data_ptr_t &data, const uint32_t &len) : len(len), data(data) {
11
9
  }
12
10
 
13
- Key::Key(ArenaAllocator &allocator, idx_t len) : len(len) {
11
+ ARTKey::ARTKey(ArenaAllocator &allocator, const uint32_t &len) : len(len) {
14
12
  data = allocator.Allocate(len);
15
13
  }
16
14
 
17
15
  template <>
18
- Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, string_t value) {
19
- idx_t len = value.GetSize() + 1;
16
+ ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, string_t value) {
17
+ uint32_t len = value.GetSize() + 1;
20
18
  auto data = allocator.Allocate(len);
21
19
  memcpy(data, value.GetData(), len - 1);
22
20
 
23
21
  // FIXME: rethink this
24
22
  if (type == LogicalType::BLOB || type == LogicalType::VARCHAR) {
25
23
  // indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain null-terminated bytes
26
- for (idx_t i = 0; i < len - 1; i++) {
24
+ for (uint32_t i = 0; i < len - 1; i++) {
27
25
  if (data[i] == '\0') {
28
26
  throw NotImplementedException("Indexes cannot contain BLOBs that contain null-terminated bytes.");
29
27
  }
@@ -31,16 +29,16 @@ Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, string_t
31
29
  }
32
30
 
33
31
  data[len - 1] = '\0';
34
- return Key(data, len);
32
+ return ARTKey(data, len);
35
33
  }
36
34
 
37
35
  template <>
38
- Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, const char *value) {
39
- return Key::CreateKey(allocator, type, string_t(value, strlen(value)));
36
+ ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, const char *value) {
37
+ return ARTKey::CreateARTKey(allocator, type, string_t(value, strlen(value)));
40
38
  }
41
39
 
42
40
  template <>
43
- void Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, string_t value) {
41
+ void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, string_t value) {
44
42
  key.len = value.GetSize() + 1;
45
43
  key.data = allocator.Allocate(key.len);
46
44
  memcpy(key.data, value.GetData(), key.len - 1);
@@ -48,7 +46,7 @@ void Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key
48
46
  // FIXME: rethink this
49
47
  if (type == LogicalType::BLOB || type == LogicalType::VARCHAR) {
50
48
  // indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain null-terminated bytes
51
- for (idx_t i = 0; i < key.len - 1; i++) {
49
+ for (uint32_t i = 0; i < key.len - 1; i++) {
52
50
  if (key.data[i] == '\0') {
53
51
  throw NotImplementedException("Indexes cannot contain BLOBs that contain null-terminated bytes.");
54
52
  }
@@ -59,12 +57,12 @@ void Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key
59
57
  }
60
58
 
61
59
  template <>
62
- void Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, const char *value) {
63
- Key::CreateKey(allocator, type, key, string_t(value, strlen(value)));
60
+ void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, ARTKey &key, const char *value) {
61
+ ARTKey::CreateARTKey(allocator, type, key, string_t(value, strlen(value)));
64
62
  }
65
63
 
66
- bool Key::operator>(const Key &k) const {
67
- for (idx_t i = 0; i < MinValue<idx_t>(len, k.len); i++) {
64
+ bool ARTKey::operator>(const ARTKey &k) const {
65
+ for (uint32_t i = 0; i < MinValue<uint32_t>(len, k.len); i++) {
68
66
  if (data[i] > k.data[i]) {
69
67
  return true;
70
68
  } else if (data[i] < k.data[i]) {
@@ -74,8 +72,8 @@ bool Key::operator>(const Key &k) const {
74
72
  return len > k.len;
75
73
  }
76
74
 
77
- bool Key::operator<(const Key &k) const {
78
- for (idx_t i = 0; i < MinValue<idx_t>(len, k.len); i++) {
75
+ bool ARTKey::operator<(const ARTKey &k) const {
76
+ for (uint32_t i = 0; i < MinValue<uint32_t>(len, k.len); i++) {
79
77
  if (data[i] < k.data[i]) {
80
78
  return true;
81
79
  } else if (data[i] > k.data[i]) {
@@ -85,8 +83,8 @@ bool Key::operator<(const Key &k) const {
85
83
  return len < k.len;
86
84
  }
87
85
 
88
- bool Key::operator>=(const Key &k) const {
89
- for (idx_t i = 0; i < MinValue<idx_t>(len, k.len); i++) {
86
+ bool ARTKey::operator>=(const ARTKey &k) const {
87
+ for (uint32_t i = 0; i < MinValue<uint32_t>(len, k.len); i++) {
90
88
  if (data[i] > k.data[i]) {
91
89
  return true;
92
90
  } else if (data[i] < k.data[i]) {
@@ -96,11 +94,11 @@ bool Key::operator>=(const Key &k) const {
96
94
  return len >= k.len;
97
95
  }
98
96
 
99
- bool Key::operator==(const Key &k) const {
97
+ bool ARTKey::operator==(const ARTKey &k) const {
100
98
  if (len != k.len) {
101
99
  return false;
102
100
  }
103
- for (idx_t i = 0; i < len; i++) {
101
+ for (uint32_t i = 0; i < len; i++) {
104
102
  if (data[i] != k.data[i]) {
105
103
  return false;
106
104
  }
@@ -108,15 +106,7 @@ bool Key::operator==(const Key &k) const {
108
106
  return true;
109
107
  }
110
108
 
111
- bool Key::ByteMatches(Key &other, idx_t &depth) {
112
- return data[depth] == other[depth];
113
- }
114
-
115
- bool Key::Empty() {
116
- return len == 0;
117
- }
118
-
119
- void Key::ConcatenateKey(ArenaAllocator &allocator, Key &other_key) {
109
+ void ARTKey::ConcatenateARTKey(ArenaAllocator &allocator, ARTKey &other_key) {
120
110
 
121
111
  auto compound_data = allocator.Allocate(len + other_key.len);
122
112
  memcpy(compound_data, data, len);
@@ -0,0 +1,224 @@
1
+ #include "duckdb/execution/index/art/fixed_size_allocator.hpp"
2
+
3
+ #include "duckdb/common/allocator.hpp"
4
+ #include "duckdb/common/exception.hpp"
5
+ #include "duckdb/common/helper.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ constexpr idx_t FixedSizeAllocator::BASE[];
10
+ constexpr uint8_t FixedSizeAllocator::SHIFT[];
11
+
12
+ FixedSizeAllocator::FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator)
13
+ : allocation_size(allocation_size), total_allocations(0), allocator(allocator) {
14
+
15
+ // calculate how many allocations fit into one buffer
16
+
17
+ idx_t bits_per_value = sizeof(validity_t) * 8;
18
+ idx_t curr_alloc_size = 0;
19
+
20
+ bitmask_count = 0;
21
+ allocations_per_buffer = 0;
22
+
23
+ while (curr_alloc_size < BUFFER_ALLOC_SIZE) {
24
+ if (!bitmask_count || (bitmask_count * bits_per_value) % allocations_per_buffer == 0) {
25
+ bitmask_count++;
26
+ curr_alloc_size += sizeof(validity_t);
27
+ }
28
+
29
+ auto remaining_alloc_size = BUFFER_ALLOC_SIZE - curr_alloc_size;
30
+ auto remaining_allocations = MinValue(remaining_alloc_size / allocation_size, bits_per_value);
31
+
32
+ if (remaining_allocations == 0) {
33
+ break;
34
+ }
35
+
36
+ allocations_per_buffer += remaining_allocations;
37
+ curr_alloc_size += remaining_allocations * allocation_size;
38
+ }
39
+
40
+ allocation_offset = bitmask_count * sizeof(validity_t);
41
+ }
42
+
43
+ FixedSizeAllocator::~FixedSizeAllocator() {
44
+ for (auto &buffer : buffers) {
45
+ allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
46
+ }
47
+ }
48
+
49
+ SwizzleablePointer FixedSizeAllocator::New() {
50
+
51
+ // no more free pointers
52
+ if (buffers_with_free_space.empty()) {
53
+
54
+ // add a new buffer
55
+ idx_t buffer_id = buffers.size();
56
+ D_ASSERT(buffer_id <= (uint32_t)DConstants::INVALID_INDEX);
57
+ auto buffer = allocator.AllocateData(BUFFER_ALLOC_SIZE);
58
+ buffers.emplace_back(buffer, 0);
59
+ buffers_with_free_space.insert(buffer_id);
60
+
61
+ // set the bitmask
62
+ ValidityMask mask((validity_t *)buffer);
63
+ mask.SetAllValid(allocations_per_buffer);
64
+ }
65
+
66
+ // return a pointer
67
+ D_ASSERT(!buffers_with_free_space.empty());
68
+ auto buffer_id = (uint32_t)*buffers_with_free_space.begin();
69
+
70
+ auto bitmask_ptr = (validity_t *)buffers[buffer_id].ptr;
71
+ ValidityMask mask(bitmask_ptr);
72
+ auto offset = GetOffset(mask, buffers[buffer_id].allocation_count);
73
+
74
+ buffers[buffer_id].allocation_count++;
75
+ total_allocations++;
76
+ if (buffers[buffer_id].allocation_count == allocations_per_buffer) {
77
+ buffers_with_free_space.erase(buffer_id);
78
+ }
79
+
80
+ return SwizzleablePointer(offset, buffer_id);
81
+ }
82
+
83
+ void FixedSizeAllocator::Free(const SwizzleablePointer ptr) {
84
+
85
+ auto bitmask_ptr = (validity_t *)buffers[ptr.buffer_id].ptr;
86
+ ValidityMask mask(bitmask_ptr);
87
+ D_ASSERT(!mask.RowIsValid(ptr.offset));
88
+ mask.SetValid(ptr.offset);
89
+ buffers_with_free_space.insert(ptr.buffer_id);
90
+
91
+ D_ASSERT(total_allocations > 0);
92
+ D_ASSERT(buffers[ptr.buffer_id].allocation_count > 0);
93
+ buffers[ptr.buffer_id].allocation_count--;
94
+ total_allocations--;
95
+ }
96
+
97
+ void FixedSizeAllocator::Reset() {
98
+
99
+ for (auto &buffer : buffers) {
100
+ allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
101
+ }
102
+ buffers.clear();
103
+ buffers_with_free_space.clear();
104
+ total_allocations = 0;
105
+ }
106
+
107
+ void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
108
+
109
+ D_ASSERT(allocation_size == other.allocation_size);
110
+
111
+ // remember the buffer count and merge the buffers
112
+ idx_t buffer_count = buffers.size();
113
+ for (auto &buffer : other.buffers) {
114
+ buffers.push_back(buffer);
115
+ }
116
+ other.buffers.clear();
117
+
118
+ // merge the vectors containing all buffers with free space
119
+ for (auto &buffer_id : other.buffers_with_free_space) {
120
+ buffers_with_free_space.insert(buffer_id + buffer_count);
121
+ }
122
+ other.buffers_with_free_space.clear();
123
+
124
+ // add the total allocations
125
+ total_allocations += other.total_allocations;
126
+ }
127
+
128
+ bool FixedSizeAllocator::InitializeVacuum() {
129
+
130
+ auto total_available_allocations = allocations_per_buffer * buffers.size();
131
+ auto total_free_positions = total_available_allocations - total_allocations;
132
+
133
+ // vacuum_count buffers can be freed
134
+ auto vacuum_count = total_free_positions / allocations_per_buffer / 2;
135
+
136
+ // calculate the vacuum threshold adaptively
137
+ idx_t memory_usage = GetMemoryUsage();
138
+ idx_t excess_memory_usage = vacuum_count * BUFFER_ALLOC_SIZE;
139
+ auto excess_percentage = (double)excess_memory_usage / (double)memory_usage;
140
+ auto threshold = (double)VACUUM_THRESHOLD / 100.0;
141
+ if (excess_percentage < threshold) {
142
+ return false;
143
+ }
144
+
145
+ min_vacuum_buffer_id = buffers.size() - vacuum_count;
146
+
147
+ // remove all invalid buffers from the available buffer list to ensure that we do not reuse them
148
+ auto it = buffers_with_free_space.begin();
149
+ while (it != buffers_with_free_space.end()) {
150
+ if (*it >= min_vacuum_buffer_id) {
151
+ it = buffers_with_free_space.erase(it);
152
+ } else {
153
+ it++;
154
+ }
155
+ }
156
+
157
+ return true;
158
+ }
159
+
160
+ void FixedSizeAllocator::FinalizeVacuum() {
161
+
162
+ // free all (now unused) buffers
163
+ while (min_vacuum_buffer_id < buffers.size()) {
164
+ allocator.FreeData(buffers.back().ptr, BUFFER_ALLOC_SIZE);
165
+ buffers.pop_back();
166
+ }
167
+ }
168
+
169
+ SwizzleablePointer FixedSizeAllocator::VacuumPointer(const SwizzleablePointer ptr) {
170
+
171
+ // we do not need to adjust the bitmask of the old buffer, because we will free the entire
172
+ // buffer after the vacuum operation
173
+
174
+ auto new_ptr = New();
175
+ memcpy(Get(new_ptr), Get(ptr), allocation_size);
176
+ return new_ptr;
177
+ }
178
+
179
+ uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t allocation_count) {
180
+
181
+ auto data = mask.GetData();
182
+
183
+ // fills up a buffer sequentially before searching for free bits
184
+ if (mask.RowIsValid(allocation_count)) {
185
+ mask.SetInvalid(allocation_count);
186
+ return allocation_count;
187
+ }
188
+
189
+ // get an entry with free bits
190
+ for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
191
+ if (data[entry_idx] != 0) {
192
+
193
+ // find the position of the free bit
194
+ auto entry = data[entry_idx];
195
+ idx_t first_valid_bit = 0;
196
+
197
+ // this loop finds the position of the rightmost set bit in entry and stores it
198
+ // in first_valid_bit
199
+ for (idx_t i = 0; i < 6; i++) {
200
+ // set the left half of the bits of this level to zero and test if the entry is still not zero
201
+ if (entry & BASE[i]) {
202
+ // first valid bit is in the rightmost s[i] bits
203
+ // permanently set the left half of the bits to zero
204
+ entry &= BASE[i];
205
+ } else {
206
+ // first valid bit is in the leftmost s[i] bits
207
+ // shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
208
+ entry >>= SHIFT[i];
209
+ first_valid_bit += SHIFT[i];
210
+ }
211
+ }
212
+ D_ASSERT(entry);
213
+
214
+ auto prev_bits = entry_idx * sizeof(validity_t) * 8;
215
+ D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
216
+ mask.SetInvalid(prev_bits + first_valid_bit);
217
+ return (prev_bits + first_valid_bit);
218
+ }
219
+ }
220
+
221
+ throw InternalException("Invalid bitmask of FixedSizeAllocator");
222
+ }
223
+
224
+ } // namespace duckdb