duckdb 0.8.2-dev4025.0 → 0.8.2-dev4142.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
  5. package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
  6. package/src/duckdb/extension/json/json_scan.cpp +124 -121
  7. package/src/duckdb/src/catalog/catalog.cpp +20 -0
  8. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
  9. package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -0
  10. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  11. package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
  12. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
  13. package/src/duckdb/src/core_functions/function_list.cpp +7 -0
  14. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
  15. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
  16. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
  17. package/src/duckdb/src/execution/index/art/art.cpp +111 -92
  18. package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
  19. package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
  20. package/src/duckdb/src/execution/index/art/node.cpp +109 -203
  21. package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
  22. package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
  23. package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
  24. package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
  25. package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
  26. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
  27. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
  28. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  29. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
  30. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
  31. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  32. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
  33. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
  34. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
  35. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
  36. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
  37. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
  38. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
  39. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
  40. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
  41. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
  42. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
  43. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
  44. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
  45. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
  46. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
  47. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
  48. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  49. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
  50. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
  53. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
  54. package/src/duckdb/src/main/extension/extension_helper.cpp +17 -0
  55. package/src/duckdb/src/main/extension/extension_install.cpp +5 -3
  56. package/src/duckdb/src/main/extension/extension_load.cpp +3 -3
  57. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
  58. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
  59. package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
  60. package/src/duckdb/src/storage/data_table.cpp +3 -3
  61. package/src/duckdb/src/storage/index.cpp +7 -1
  62. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
  63. package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
  64. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  65. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  66. package/src/duckdb/src/transaction/commit_state.cpp +5 -1
  67. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
  68. package/src/duckdb/ub_src_execution_index.cpp +4 -0
  69. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  70. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  71. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -0,0 +1,345 @@
1
+ #include "duckdb/execution/index/fixed_size_allocator.hpp"
2
+
3
+ #include "duckdb/storage/metadata/metadata_reader.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ constexpr idx_t FixedSizeAllocator::BASE[];
8
+ constexpr uint8_t FixedSizeAllocator::SHIFT[];
9
+
10
+ FixedSizeAllocator::FixedSizeAllocator(const idx_t segment_size, BlockManager &block_manager)
11
+ : block_manager(block_manager), buffer_manager(block_manager.buffer_manager),
12
+ metadata_manager(block_manager.GetMetadataManager()), segment_size(segment_size), total_segment_count(0) {
13
+
14
+ if (segment_size > Storage::BLOCK_SIZE - sizeof(validity_t)) {
15
+ throw InternalException("The maximum segment size of fixed-size allocators is " +
16
+ to_string(Storage::BLOCK_SIZE - sizeof(validity_t)));
17
+ }
18
+
19
+ // calculate how many segments fit into one buffer (available_segments_per_buffer)
20
+
21
+ idx_t bits_per_value = sizeof(validity_t) * 8;
22
+ idx_t byte_count = 0;
23
+
24
+ bitmask_count = 0;
25
+ available_segments_per_buffer = 0;
26
+
27
+ while (byte_count < Storage::BLOCK_SIZE) {
28
+ if (!bitmask_count || (bitmask_count * bits_per_value) % available_segments_per_buffer == 0) {
29
+ // we need to add another validity_t value to the bitmask, to allow storing another
30
+ // bits_per_value segments on a buffer
31
+ bitmask_count++;
32
+ byte_count += sizeof(validity_t);
33
+ }
34
+
35
+ auto remaining_bytes = Storage::BLOCK_SIZE - byte_count;
36
+ auto remaining_segments = MinValue(remaining_bytes / segment_size, bits_per_value);
37
+
38
+ if (remaining_segments == 0) {
39
+ break;
40
+ }
41
+
42
+ available_segments_per_buffer += remaining_segments;
43
+ byte_count += remaining_segments * segment_size;
44
+ }
45
+
46
+ bitmask_offset = bitmask_count * sizeof(validity_t);
47
+ }
48
+
49
+ IndexPointer FixedSizeAllocator::New() {
50
+
51
+ // no more segments available
52
+ if (buffers_with_free_space.empty()) {
53
+
54
+ // add a new buffer
55
+ auto buffer_id = GetAvailableBufferId();
56
+ FixedSizeBuffer new_buffer(block_manager);
57
+ buffers.insert(make_pair(buffer_id, std::move(new_buffer)));
58
+ buffers_with_free_space.insert(buffer_id);
59
+
60
+ // set the bitmask
61
+ D_ASSERT(buffers.find(buffer_id) != buffers.end());
62
+ auto &buffer = buffers.find(buffer_id)->second;
63
+ ValidityMask mask(reinterpret_cast<validity_t *>(buffer.Get()));
64
+ mask.SetAllValid(available_segments_per_buffer);
65
+ }
66
+
67
+ // return a pointer
68
+ D_ASSERT(!buffers_with_free_space.empty());
69
+ auto buffer_id = uint32_t(*buffers_with_free_space.begin());
70
+
71
+ D_ASSERT(buffers.find(buffer_id) != buffers.end());
72
+ auto &buffer = buffers.find(buffer_id)->second;
73
+ auto bitmask_ptr = reinterpret_cast<validity_t *>(buffer.Get());
74
+ ValidityMask mask(bitmask_ptr);
75
+ auto offset = GetOffset(mask, buffer.segment_count);
76
+
77
+ buffer.segment_count++;
78
+ total_segment_count++;
79
+ if (buffer.segment_count == available_segments_per_buffer) {
80
+ buffers_with_free_space.erase(buffer_id);
81
+ }
82
+
83
+ return IndexPointer(buffer_id, offset);
84
+ }
85
+
86
+ void FixedSizeAllocator::Free(const IndexPointer ptr) {
87
+
88
+ auto buffer_id = ptr.GetBufferId();
89
+ auto offset = ptr.GetOffset();
90
+
91
+ D_ASSERT(buffers.find(buffer_id) != buffers.end());
92
+ auto &buffer = buffers.find(buffer_id)->second;
93
+
94
+ auto bitmask_ptr = reinterpret_cast<validity_t *>(buffer.Get());
95
+ ValidityMask mask(bitmask_ptr);
96
+ D_ASSERT(!mask.RowIsValid(offset));
97
+ mask.SetValid(offset);
98
+ buffers_with_free_space.insert(buffer_id);
99
+
100
+ D_ASSERT(total_segment_count > 0);
101
+ D_ASSERT(buffer.segment_count > 0);
102
+
103
+ buffer.segment_count--;
104
+ total_segment_count--;
105
+ }
106
+
107
+ void FixedSizeAllocator::Reset() {
108
+ for (auto &buffer : buffers) {
109
+ buffer.second.Destroy();
110
+ }
111
+ buffers.clear();
112
+ buffers_with_free_space.clear();
113
+ total_segment_count = 0;
114
+ }
115
+
116
+ idx_t FixedSizeAllocator::GetMemoryUsage() const {
117
+ idx_t memory_usage = 0;
118
+ for (auto &buffer : buffers) {
119
+ if (buffer.second.InMemory()) {
120
+ memory_usage += Storage::BLOCK_SIZE;
121
+ }
122
+ }
123
+ return memory_usage;
124
+ }
125
+
126
+ void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
127
+
128
+ D_ASSERT(segment_size == other.segment_size);
129
+
130
+ // remember the buffer count and merge the buffers
131
+ idx_t upper_bound_id = GetUpperBoundBufferId();
132
+ for (auto &buffer : other.buffers) {
133
+ buffers.insert(make_pair(buffer.first + upper_bound_id, std::move(buffer.second)));
134
+ }
135
+ other.buffers.clear();
136
+
137
+ // merge the buffers with free spaces
138
+ for (auto &buffer_id : other.buffers_with_free_space) {
139
+ buffers_with_free_space.insert(buffer_id + upper_bound_id);
140
+ }
141
+ other.buffers_with_free_space.clear();
142
+
143
+ // add the total allocations
144
+ total_segment_count += other.total_segment_count;
145
+ }
146
+
147
+ bool FixedSizeAllocator::InitializeVacuum() {
148
+
149
+ // NOTE: we do not vacuum buffers that are not in memory. We might consider changing this
150
+ // in the future, although buffers on disk should almost never be eligible for a vacuum
151
+
152
+ if (total_segment_count == 0) {
153
+ Reset();
154
+ return false;
155
+ }
156
+
157
+ multimap<idx_t, idx_t> temporary_vacuum_buffers;
158
+ D_ASSERT(vacuum_buffers.empty());
159
+ idx_t available_segments_in_memory = 0;
160
+
161
+ for (auto &buffer : buffers) {
162
+ buffer.second.vacuum = false;
163
+ if (buffer.second.InMemory()) {
164
+ auto available_segments_in_buffer = available_segments_per_buffer - buffer.second.segment_count;
165
+ available_segments_in_memory += available_segments_in_buffer;
166
+ temporary_vacuum_buffers.emplace(available_segments_in_buffer, buffer.first);
167
+ }
168
+ }
169
+
170
+ // no buffers in memory
171
+ if (temporary_vacuum_buffers.empty()) {
172
+ return false;
173
+ }
174
+
175
+ auto excess_buffer_count = available_segments_in_memory / available_segments_per_buffer;
176
+
177
+ // calculate the vacuum threshold adaptively
178
+ D_ASSERT(excess_buffer_count < temporary_vacuum_buffers.size());
179
+ idx_t memory_usage = GetMemoryUsage();
180
+ idx_t excess_memory_usage = excess_buffer_count * Storage::BLOCK_SIZE;
181
+ auto excess_percentage = double(excess_memory_usage) / double(memory_usage);
182
+ auto threshold = double(VACUUM_THRESHOLD) / 100.0;
183
+ if (excess_percentage < threshold) {
184
+ return false;
185
+ }
186
+
187
+ D_ASSERT(excess_buffer_count <= temporary_vacuum_buffers.size());
188
+ D_ASSERT(temporary_vacuum_buffers.size() <= buffers.size());
189
+
190
+ // erasing from a multimap, we vacuum the buffers with the most free spaces (least full)
191
+ while (temporary_vacuum_buffers.size() != excess_buffer_count) {
192
+ temporary_vacuum_buffers.erase(temporary_vacuum_buffers.begin());
193
+ }
194
+
195
+ // adjust the buffers, and erase all to-be-vacuumed buffers from the available buffer list
196
+ for (auto &vacuum_buffer : temporary_vacuum_buffers) {
197
+ auto buffer_id = vacuum_buffer.second;
198
+ D_ASSERT(buffers.find(buffer_id) != buffers.end());
199
+ buffers.find(buffer_id)->second.vacuum = true;
200
+ buffers_with_free_space.erase(buffer_id);
201
+ }
202
+
203
+ for (auto &vacuum_buffer : temporary_vacuum_buffers) {
204
+ vacuum_buffers.insert(vacuum_buffer.second);
205
+ }
206
+
207
+ return true;
208
+ }
209
+
210
+ void FixedSizeAllocator::FinalizeVacuum() {
211
+
212
+ for (auto &buffer_id : vacuum_buffers) {
213
+ D_ASSERT(buffers.find(buffer_id) != buffers.end());
214
+ auto &buffer = buffers.find(buffer_id)->second;
215
+ D_ASSERT(buffer.InMemory());
216
+ buffer.Destroy();
217
+ buffers.erase(buffer_id);
218
+ }
219
+ vacuum_buffers.clear();
220
+ }
221
+
222
+ IndexPointer FixedSizeAllocator::VacuumPointer(const IndexPointer ptr) {
223
+
224
+ // we do not need to adjust the bitmask of the old buffer, because we will free the entire
225
+ // buffer after the vacuum operation
226
+
227
+ auto new_ptr = New();
228
+ // new increases the allocation count, we need to counter that here
229
+ total_segment_count--;
230
+
231
+ memcpy(Get(new_ptr), Get(ptr), segment_size);
232
+ return new_ptr;
233
+ }
234
+
235
+ BlockPointer FixedSizeAllocator::Serialize(MetadataWriter &writer) {
236
+
237
+ for (auto &buffer : buffers) {
238
+ buffer.second.Serialize();
239
+ }
240
+
241
+ auto block_pointer = writer.GetBlockPointer();
242
+ writer.Write(segment_size);
243
+ writer.Write(static_cast<idx_t>(buffers.size()));
244
+ writer.Write(static_cast<idx_t>(buffers_with_free_space.size()));
245
+
246
+ for (auto &buffer : buffers) {
247
+ writer.Write(buffer.first);
248
+ writer.Write(buffer.second.BlockId());
249
+ writer.Write(buffer.second.segment_count);
250
+ }
251
+ for (auto &buffer_id : buffers_with_free_space) {
252
+ writer.Write(buffer_id);
253
+ }
254
+
255
+ return block_pointer;
256
+ }
257
+
258
+ void FixedSizeAllocator::Deserialize(const BlockPointer &block_pointer) {
259
+
260
+ MetadataReader reader(metadata_manager, block_pointer);
261
+ segment_size = reader.Read<idx_t>();
262
+ auto buffer_count = reader.Read<idx_t>();
263
+ auto buffers_with_free_space_count = reader.Read<idx_t>();
264
+
265
+ total_segment_count = 0;
266
+
267
+ for (idx_t i = 0; i < buffer_count; i++) {
268
+ auto buffer_id = reader.Read<idx_t>();
269
+ auto block_id = reader.Read<block_id_t>();
270
+ auto buffer_segment_count = reader.Read<idx_t>();
271
+ FixedSizeBuffer new_buffer(block_manager, buffer_segment_count, block_id);
272
+ buffers.insert(make_pair(buffer_id, std::move(new_buffer)));
273
+ total_segment_count += buffer_segment_count;
274
+ }
275
+ for (idx_t i = 0; i < buffers_with_free_space_count; i++) {
276
+ buffers_with_free_space.insert(reader.Read<idx_t>());
277
+ }
278
+ }
279
+
280
+ uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t segment_count) {
281
+
282
+ auto data = mask.GetData();
283
+
284
+ // fills up a buffer sequentially before searching for free bits
285
+ if (mask.RowIsValid(segment_count)) {
286
+ mask.SetInvalid(segment_count);
287
+ return segment_count;
288
+ }
289
+
290
+ for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
291
+ // get an entry with free bits
292
+ if (data[entry_idx] == 0) {
293
+ continue;
294
+ }
295
+
296
+ // find the position of the free bit
297
+ auto entry = data[entry_idx];
298
+ idx_t first_valid_bit = 0;
299
+
300
+ // this loop finds the position of the rightmost set bit in entry and stores it
301
+ // in first_valid_bit
302
+ for (idx_t i = 0; i < 6; i++) {
303
+ // set the left half of the bits of this level to zero and test if the entry is still not zero
304
+ if (entry & BASE[i]) {
305
+ // first valid bit is in the rightmost s[i] bits
306
+ // permanently set the left half of the bits to zero
307
+ entry &= BASE[i];
308
+ } else {
309
+ // first valid bit is in the leftmost s[i] bits
310
+ // shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
311
+ entry >>= SHIFT[i];
312
+ first_valid_bit += SHIFT[i];
313
+ }
314
+ }
315
+ D_ASSERT(entry);
316
+
317
+ auto prev_bits = entry_idx * sizeof(validity_t) * 8;
318
+ D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
319
+ mask.SetInvalid(prev_bits + first_valid_bit);
320
+ return (prev_bits + first_valid_bit);
321
+ }
322
+
323
+ throw InternalException("Invalid bitmask for FixedSizeAllocator");
324
+ }
325
+
326
+ idx_t FixedSizeAllocator::GetAvailableBufferId() const {
327
+ idx_t buffer_id = buffers.size();
328
+ while (buffers.find(buffer_id) != buffers.end()) {
329
+ D_ASSERT(buffer_id > 0);
330
+ buffer_id--;
331
+ }
332
+ return buffer_id;
333
+ }
334
+
335
+ idx_t FixedSizeAllocator::GetUpperBoundBufferId() const {
336
+ idx_t upper_bound_id = 0;
337
+ for (auto &buffer : buffers) {
338
+ if (buffer.first >= upper_bound_id) {
339
+ upper_bound_id = buffer.first + 1;
340
+ }
341
+ }
342
+ return upper_bound_id;
343
+ }
344
+
345
+ } // namespace duckdb
@@ -0,0 +1,74 @@
1
+ #include "duckdb/execution/index/fixed_size_buffer.hpp"
2
+
3
+ #include "duckdb/storage/block_manager.hpp"
4
+ #include "duckdb/storage/buffer_manager.hpp"
5
+
6
+ namespace duckdb {
7
+
8
+ FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager)
9
+ : block_manager(block_manager), segment_count(0), dirty(false), vacuum(false), block_handle(nullptr) {
10
+
11
+ auto &buffer_manager = block_manager.buffer_manager;
12
+ buffer_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &block_handle);
13
+ }
14
+
15
+ FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const block_id_t &block_id)
16
+ : block_manager(block_manager), segment_count(segment_count), dirty(false), vacuum(false) {
17
+
18
+ D_ASSERT(block_id < MAXIMUM_BLOCK);
19
+ block_handle = block_manager.RegisterBlock(block_id);
20
+ D_ASSERT(BlockId() < MAXIMUM_BLOCK);
21
+ }
22
+
23
+ void FixedSizeBuffer::Destroy() {
24
+ if (InMemory()) {
25
+ buffer_handle.Destroy();
26
+ }
27
+ if (OnDisk()) {
28
+ block_manager.MarkBlockAsFree(BlockId());
29
+ }
30
+ }
31
+
32
+ void FixedSizeBuffer::Serialize() {
33
+
34
+ if (!InMemory()) {
35
+ if (!OnDisk() || dirty) {
36
+ throw InternalException("invalid/missing buffer in FixedSizeAllocator");
37
+ }
38
+ return;
39
+ }
40
+ if (!dirty && OnDisk()) {
41
+ return;
42
+ }
43
+
44
+ // the buffer is in memory
45
+ D_ASSERT(InMemory());
46
+ // the buffer never was on disk, or there were changes to it after loading it from disk
47
+ D_ASSERT(!OnDisk() || dirty);
48
+
49
+ // we persist any changes, so the buffer is no longer dirty
50
+ dirty = false;
51
+
52
+ if (!OnDisk()) {
53
+ // temporary block - convert to persistent
54
+ auto block_id = block_manager.GetFreeBlockId();
55
+ D_ASSERT(block_id < MAXIMUM_BLOCK);
56
+ block_handle = block_manager.ConvertToPersistent(block_id, std::move(block_handle));
57
+ buffer_handle.Destroy();
58
+
59
+ } else {
60
+ // already a persistent block - only need to write it
61
+ auto block_id = block_handle->BlockId();
62
+ D_ASSERT(block_id < MAXIMUM_BLOCK);
63
+ block_manager.Write(buffer_handle.GetFileBuffer(), block_id);
64
+ }
65
+ }
66
+
67
+ void FixedSizeBuffer::Pin() {
68
+
69
+ auto &buffer_manager = block_manager.buffer_manager;
70
+ D_ASSERT(BlockId() < MAXIMUM_BLOCK);
71
+ buffer_handle = BufferHandle(buffer_manager.Pin(block_handle));
72
+ }
73
+
74
+ } // namespace duckdb
@@ -564,7 +564,7 @@ WindowGlobalSourceState::Task WindowGlobalSourceState::NextTask(idx_t hash_bin)
564
564
 
565
565
  // If there is nothing to steal but there are unfinished partitions,
566
566
  // yield until any pending builds are done.
567
- TaskScheduler::GetScheduler(context).YieldThread();
567
+ TaskScheduler::YieldThread();
568
568
  }
569
569
 
570
570
  return Task();
@@ -89,7 +89,7 @@ SinkResultType PhysicalCreateARTIndex::SinkUnsorted(Vector &row_identifiers, Ope
89
89
  // insert the row IDs
90
90
  auto &art = l_state.local_index->Cast<ART>();
91
91
  for (idx_t i = 0; i < count; i++) {
92
- if (!art.Insert(*art.tree, l_state.keys[i], 0, row_ids[i])) {
92
+ if (!art.Insert(art.tree, l_state.keys[i], 0, row_ids[i])) {
93
93
  throw ConstraintException("Data contains duplicates on indexed column(s)");
94
94
  }
95
95
  }
@@ -131,7 +131,9 @@ public:
131
131
  }
132
132
  const Value ColumnDefault(idx_t col) override {
133
133
  auto &column = entry.GetColumn(LogicalIndex(col));
134
- if (column.DefaultValue()) {
134
+ if (column.Generated()) {
135
+ return Value(column.GeneratedExpression().ToString());
136
+ } else if (column.DefaultValue()) {
135
137
  return Value(column.DefaultValue()->ToString());
136
138
  }
137
139
  return Value();
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev4025"
2
+ #define DUCKDB_VERSION "0.8.2-dev4142"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "9698e9e6a8"
5
+ #define DUCKDB_SOURCE_ID "d5c4422f72"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -305,6 +305,7 @@ public:
305
305
  static void AutoloadExtensionByConfigName(ClientContext &context, const string &configuration_name);
306
306
  //! Autoload the extension required for `function_name` or throw a CatalogException
307
307
  static bool AutoLoadExtensionByCatalogEntry(ClientContext &context, CatalogType type, const string &entry_name);
308
+ DUCKDB_API static bool TryAutoLoad(ClientContext &context, const string &extension_name) noexcept;
308
309
 
309
310
  protected:
310
311
  //! Reference to the database
@@ -24,6 +24,8 @@ public:
24
24
  public:
25
25
  string GetSchemaName() const override;
26
26
  string GetTableName() const override;
27
+ //! This drops in-memory index data and marks all blocks on disk as free blocks, allowing to reclaim them
28
+ void CommitDrop();
27
29
  };
28
30
 
29
31
  } // namespace duckdb
@@ -34,7 +34,7 @@ public:
34
34
  void Invalidate() {
35
35
  index = INVALID_INDEX;
36
36
  }
37
- idx_t GetIndex() {
37
+ idx_t GetIndex() const {
38
38
  if (index == INVALID_INDEX) {
39
39
  throw InternalException("Attempting to get the index of an optional_idx that is not set");
40
40
  }
@@ -219,4 +219,55 @@ struct ListRangeFun {
219
219
  static ScalarFunctionSet GetFunctions();
220
220
  };
221
221
 
222
+ struct ListCosineSimilarityFun {
223
+ static constexpr const char *Name = "list_cosine_similarity";
224
+ static constexpr const char *Parameters = "list1,list2";
225
+ static constexpr const char *Description = "Compute the cosine similarity between two lists.";
226
+ static constexpr const char *Example = "list_cosine_similarity([1, 2, 3], [1, 2, 3])";
227
+
228
+ static ScalarFunctionSet GetFunctions();
229
+ };
230
+
231
+ struct ListCosineSimilarityFunAlias {
232
+ using ALIAS = ListCosineSimilarityFun;
233
+
234
+ static constexpr const char *Name = "<=>";
235
+ };
236
+
237
+ struct ListDistanceFun {
238
+ static constexpr const char *Name = "list_distance";
239
+ static constexpr const char *Parameters = "list1,list2";
240
+ static constexpr const char *Description = "Compute the distance between two lists.";
241
+ static constexpr const char *Example = "list_distance([1, 2, 3], [1, 2, 3])";
242
+
243
+ static ScalarFunctionSet GetFunctions();
244
+ };
245
+
246
+ struct ListDistanceFunAlias {
247
+ using ALIAS = ListDistanceFun;
248
+
249
+ static constexpr const char *Name = "<->";
250
+ };
251
+
252
+ struct ListInnerProductFun {
253
+ static constexpr const char *Name = "list_inner_product";
254
+ static constexpr const char *Parameters = "list1,list2";
255
+ static constexpr const char *Description = "Compute the inner product between two lists.";
256
+ static constexpr const char *Example = "list_inner_product([1, 2, 3], [1, 2, 3])";
257
+
258
+ static ScalarFunctionSet GetFunctions();
259
+ };
260
+
261
+ struct ListDotProductFun {
262
+ using ALIAS = ListInnerProductFun;
263
+
264
+ static constexpr const char *Name = "list_dot_product";
265
+ };
266
+
267
+ struct ListInnerProductFunAlias {
268
+ using ALIAS = ListInnerProductFun;
269
+
270
+ static constexpr const char *Name = "<#>";
271
+ };
272
+
222
273
  } // namespace duckdb
@@ -9,6 +9,8 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/storage/index.hpp"
12
+ #include "duckdb/execution/index/art/node.hpp"
13
+ #include "duckdb/common/array.hpp"
12
14
 
13
15
  namespace duckdb {
14
16
 
@@ -19,7 +21,6 @@ enum class VerifyExistenceType : uint8_t {
19
21
  DELETE_FK = 2 // delete from a table that has a foreign key
20
22
  };
21
23
  class ConflictManager;
22
- class Node;
23
24
  class ARTKey;
24
25
  class FixedSizeAllocator;
25
26
 
@@ -31,18 +32,22 @@ struct ARTFlags {
31
32
  };
32
33
 
33
34
  class ART : public Index {
35
+ public:
36
+ //! FixedSizeAllocator count of the ART
37
+ static constexpr uint8_t ALLOCATOR_COUNT = 6;
38
+
34
39
  public:
35
40
  //! Constructs an ART
36
41
  ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
37
42
  const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
38
- AttachedDatabase &db, const shared_ptr<vector<FixedSizeAllocator>> &allocators_ptr = nullptr,
39
- BlockPointer block = BlockPointer());
40
- ~ART() override;
43
+ AttachedDatabase &db,
44
+ const shared_ptr<array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT>> &allocators_ptr = nullptr,
45
+ const BlockPointer &block = BlockPointer());
41
46
 
42
47
  //! Root of the tree
43
- unique_ptr<Node> tree;
48
+ Node tree = Node();
44
49
  //! Fixed-size allocators holding the ART nodes
45
- shared_ptr<vector<FixedSizeAllocator>> allocators;
50
+ shared_ptr<array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT>> allocators;
46
51
  //! True, if the ART owns its data
47
52
  bool owns_data;
48
53
 
@@ -66,6 +71,8 @@ public:
66
71
  void VerifyAppend(DataChunk &chunk) override;
67
72
  //! Verify that data can be appended to the index without a constraint violation using the conflict manager
68
73
  void VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) override;
74
+ //! Deletes all data from the index. The lock obtained from InitializeLock must be held
75
+ void CommitDrop(IndexLock &index_lock) override;
69
76
  //! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held
70
77
  void Delete(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
71
78
  //! Insert a chunk of entries into the index
@@ -103,7 +110,7 @@ public:
103
110
  string VerifyAndToString(IndexLock &state, const bool only_verify) override;
104
111
 
105
112
  //! Find the node with a matching key, or return nullptr if not found
106
- Node Lookup(Node node, const ARTKey &key, idx_t depth);
113
+ optional_ptr<const Node> Lookup(const Node &node, const ARTKey &key, idx_t depth);
107
114
  //! Insert a key into the tree
108
115
  bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
109
116
 
@@ -136,6 +143,9 @@ private:
136
143
  //! Internal function to return the string representation of the ART,
137
144
  //! or only traverses and verifies the index
138
145
  string VerifyAndToStringInternal(const bool only_verify);
146
+
147
+ //! Deserialize the allocators of the ART
148
+ void Deserialize(const BlockPointer &pointer);
139
149
  };
140
150
 
141
151
  } // namespace duckdb
@@ -41,11 +41,11 @@ public:
41
41
  return key_bytes[idx];
42
42
  }
43
43
  //! Greater than operator
44
- bool operator>(const ARTKey &k) const;
44
+ bool operator>(const ARTKey &key) const;
45
45
  //! Greater than or equal to operator
46
- bool operator>=(const ARTKey &k) const;
46
+ bool operator>=(const ARTKey &key) const;
47
47
  //! Equal to operator
48
- bool operator==(const ARTKey &k) const;
48
+ bool operator==(const ARTKey &key) const;
49
49
 
50
50
  private:
51
51
  vector<uint8_t> key_bytes;
@@ -62,10 +62,10 @@ public:
62
62
  //! If upper_bound is the empty ARTKey, than there is no upper bound
63
63
  bool Scan(const ARTKey &upper_bound, const idx_t max_count, vector<row_t> &result_ids, const bool equal);
64
64
  //! Finds the minimum (leaf) of the current subtree
65
- void FindMinimum(Node &node);
65
+ void FindMinimum(const Node &node);
66
66
  //! Finds the lower bound of the ART and adds the nodes to the stack. Returns false, if the lower
67
67
  //! bound exceeds the maximum value of the ART
68
- bool LowerBound(Node &node, const ARTKey &key, const bool equal, idx_t depth);
68
+ bool LowerBound(const Node &node, const ARTKey &key, const bool equal, idx_t depth);
69
69
 
70
70
  private:
71
71
  //! Stack of nodes from the root to the currently active node