duckdb 0.8.2-dev4025.0 → 0.8.2-dev4142.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
- package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
- package/src/duckdb/extension/json/json_scan.cpp +124 -121
- package/src/duckdb/src/catalog/catalog.cpp +20 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -0
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +7 -0
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
- package/src/duckdb/src/execution/index/art/art.cpp +111 -92
- package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
- package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
- package/src/duckdb/src/execution/index/art/node.cpp +109 -203
- package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
- package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
- package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
- package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
- package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +17 -0
- package/src/duckdb/src/main/extension/extension_install.cpp +5 -3
- package/src/duckdb/src/main/extension/extension_load.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/index.cpp +7 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +0 -8
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -1
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
- package/src/duckdb/ub_src_execution_index.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -0,0 +1,345 @@
|
|
1
|
+
#include "duckdb/execution/index/fixed_size_allocator.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/storage/metadata/metadata_reader.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
constexpr idx_t FixedSizeAllocator::BASE[];
|
8
|
+
constexpr uint8_t FixedSizeAllocator::SHIFT[];
|
9
|
+
|
10
|
+
FixedSizeAllocator::FixedSizeAllocator(const idx_t segment_size, BlockManager &block_manager)
|
11
|
+
: block_manager(block_manager), buffer_manager(block_manager.buffer_manager),
|
12
|
+
metadata_manager(block_manager.GetMetadataManager()), segment_size(segment_size), total_segment_count(0) {
|
13
|
+
|
14
|
+
if (segment_size > Storage::BLOCK_SIZE - sizeof(validity_t)) {
|
15
|
+
throw InternalException("The maximum segment size of fixed-size allocators is " +
|
16
|
+
to_string(Storage::BLOCK_SIZE - sizeof(validity_t)));
|
17
|
+
}
|
18
|
+
|
19
|
+
// calculate how many segments fit into one buffer (available_segments_per_buffer)
|
20
|
+
|
21
|
+
idx_t bits_per_value = sizeof(validity_t) * 8;
|
22
|
+
idx_t byte_count = 0;
|
23
|
+
|
24
|
+
bitmask_count = 0;
|
25
|
+
available_segments_per_buffer = 0;
|
26
|
+
|
27
|
+
while (byte_count < Storage::BLOCK_SIZE) {
|
28
|
+
if (!bitmask_count || (bitmask_count * bits_per_value) % available_segments_per_buffer == 0) {
|
29
|
+
// we need to add another validity_t value to the bitmask, to allow storing another
|
30
|
+
// bits_per_value segments on a buffer
|
31
|
+
bitmask_count++;
|
32
|
+
byte_count += sizeof(validity_t);
|
33
|
+
}
|
34
|
+
|
35
|
+
auto remaining_bytes = Storage::BLOCK_SIZE - byte_count;
|
36
|
+
auto remaining_segments = MinValue(remaining_bytes / segment_size, bits_per_value);
|
37
|
+
|
38
|
+
if (remaining_segments == 0) {
|
39
|
+
break;
|
40
|
+
}
|
41
|
+
|
42
|
+
available_segments_per_buffer += remaining_segments;
|
43
|
+
byte_count += remaining_segments * segment_size;
|
44
|
+
}
|
45
|
+
|
46
|
+
bitmask_offset = bitmask_count * sizeof(validity_t);
|
47
|
+
}
|
48
|
+
|
49
|
+
IndexPointer FixedSizeAllocator::New() {
|
50
|
+
|
51
|
+
// no more segments available
|
52
|
+
if (buffers_with_free_space.empty()) {
|
53
|
+
|
54
|
+
// add a new buffer
|
55
|
+
auto buffer_id = GetAvailableBufferId();
|
56
|
+
FixedSizeBuffer new_buffer(block_manager);
|
57
|
+
buffers.insert(make_pair(buffer_id, std::move(new_buffer)));
|
58
|
+
buffers_with_free_space.insert(buffer_id);
|
59
|
+
|
60
|
+
// set the bitmask
|
61
|
+
D_ASSERT(buffers.find(buffer_id) != buffers.end());
|
62
|
+
auto &buffer = buffers.find(buffer_id)->second;
|
63
|
+
ValidityMask mask(reinterpret_cast<validity_t *>(buffer.Get()));
|
64
|
+
mask.SetAllValid(available_segments_per_buffer);
|
65
|
+
}
|
66
|
+
|
67
|
+
// return a pointer
|
68
|
+
D_ASSERT(!buffers_with_free_space.empty());
|
69
|
+
auto buffer_id = uint32_t(*buffers_with_free_space.begin());
|
70
|
+
|
71
|
+
D_ASSERT(buffers.find(buffer_id) != buffers.end());
|
72
|
+
auto &buffer = buffers.find(buffer_id)->second;
|
73
|
+
auto bitmask_ptr = reinterpret_cast<validity_t *>(buffer.Get());
|
74
|
+
ValidityMask mask(bitmask_ptr);
|
75
|
+
auto offset = GetOffset(mask, buffer.segment_count);
|
76
|
+
|
77
|
+
buffer.segment_count++;
|
78
|
+
total_segment_count++;
|
79
|
+
if (buffer.segment_count == available_segments_per_buffer) {
|
80
|
+
buffers_with_free_space.erase(buffer_id);
|
81
|
+
}
|
82
|
+
|
83
|
+
return IndexPointer(buffer_id, offset);
|
84
|
+
}
|
85
|
+
|
86
|
+
void FixedSizeAllocator::Free(const IndexPointer ptr) {
|
87
|
+
|
88
|
+
auto buffer_id = ptr.GetBufferId();
|
89
|
+
auto offset = ptr.GetOffset();
|
90
|
+
|
91
|
+
D_ASSERT(buffers.find(buffer_id) != buffers.end());
|
92
|
+
auto &buffer = buffers.find(buffer_id)->second;
|
93
|
+
|
94
|
+
auto bitmask_ptr = reinterpret_cast<validity_t *>(buffer.Get());
|
95
|
+
ValidityMask mask(bitmask_ptr);
|
96
|
+
D_ASSERT(!mask.RowIsValid(offset));
|
97
|
+
mask.SetValid(offset);
|
98
|
+
buffers_with_free_space.insert(buffer_id);
|
99
|
+
|
100
|
+
D_ASSERT(total_segment_count > 0);
|
101
|
+
D_ASSERT(buffer.segment_count > 0);
|
102
|
+
|
103
|
+
buffer.segment_count--;
|
104
|
+
total_segment_count--;
|
105
|
+
}
|
106
|
+
|
107
|
+
void FixedSizeAllocator::Reset() {
|
108
|
+
for (auto &buffer : buffers) {
|
109
|
+
buffer.second.Destroy();
|
110
|
+
}
|
111
|
+
buffers.clear();
|
112
|
+
buffers_with_free_space.clear();
|
113
|
+
total_segment_count = 0;
|
114
|
+
}
|
115
|
+
|
116
|
+
idx_t FixedSizeAllocator::GetMemoryUsage() const {
|
117
|
+
idx_t memory_usage = 0;
|
118
|
+
for (auto &buffer : buffers) {
|
119
|
+
if (buffer.second.InMemory()) {
|
120
|
+
memory_usage += Storage::BLOCK_SIZE;
|
121
|
+
}
|
122
|
+
}
|
123
|
+
return memory_usage;
|
124
|
+
}
|
125
|
+
|
126
|
+
void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
|
127
|
+
|
128
|
+
D_ASSERT(segment_size == other.segment_size);
|
129
|
+
|
130
|
+
// remember the buffer count and merge the buffers
|
131
|
+
idx_t upper_bound_id = GetUpperBoundBufferId();
|
132
|
+
for (auto &buffer : other.buffers) {
|
133
|
+
buffers.insert(make_pair(buffer.first + upper_bound_id, std::move(buffer.second)));
|
134
|
+
}
|
135
|
+
other.buffers.clear();
|
136
|
+
|
137
|
+
// merge the buffers with free spaces
|
138
|
+
for (auto &buffer_id : other.buffers_with_free_space) {
|
139
|
+
buffers_with_free_space.insert(buffer_id + upper_bound_id);
|
140
|
+
}
|
141
|
+
other.buffers_with_free_space.clear();
|
142
|
+
|
143
|
+
// add the total allocations
|
144
|
+
total_segment_count += other.total_segment_count;
|
145
|
+
}
|
146
|
+
|
147
|
+
bool FixedSizeAllocator::InitializeVacuum() {
|
148
|
+
|
149
|
+
// NOTE: we do not vacuum buffers that are not in memory. We might consider changing this
|
150
|
+
// in the future, although buffers on disk should almost never be eligible for a vacuum
|
151
|
+
|
152
|
+
if (total_segment_count == 0) {
|
153
|
+
Reset();
|
154
|
+
return false;
|
155
|
+
}
|
156
|
+
|
157
|
+
multimap<idx_t, idx_t> temporary_vacuum_buffers;
|
158
|
+
D_ASSERT(vacuum_buffers.empty());
|
159
|
+
idx_t available_segments_in_memory = 0;
|
160
|
+
|
161
|
+
for (auto &buffer : buffers) {
|
162
|
+
buffer.second.vacuum = false;
|
163
|
+
if (buffer.second.InMemory()) {
|
164
|
+
auto available_segments_in_buffer = available_segments_per_buffer - buffer.second.segment_count;
|
165
|
+
available_segments_in_memory += available_segments_in_buffer;
|
166
|
+
temporary_vacuum_buffers.emplace(available_segments_in_buffer, buffer.first);
|
167
|
+
}
|
168
|
+
}
|
169
|
+
|
170
|
+
// no buffers in memory
|
171
|
+
if (temporary_vacuum_buffers.empty()) {
|
172
|
+
return false;
|
173
|
+
}
|
174
|
+
|
175
|
+
auto excess_buffer_count = available_segments_in_memory / available_segments_per_buffer;
|
176
|
+
|
177
|
+
// calculate the vacuum threshold adaptively
|
178
|
+
D_ASSERT(excess_buffer_count < temporary_vacuum_buffers.size());
|
179
|
+
idx_t memory_usage = GetMemoryUsage();
|
180
|
+
idx_t excess_memory_usage = excess_buffer_count * Storage::BLOCK_SIZE;
|
181
|
+
auto excess_percentage = double(excess_memory_usage) / double(memory_usage);
|
182
|
+
auto threshold = double(VACUUM_THRESHOLD) / 100.0;
|
183
|
+
if (excess_percentage < threshold) {
|
184
|
+
return false;
|
185
|
+
}
|
186
|
+
|
187
|
+
D_ASSERT(excess_buffer_count <= temporary_vacuum_buffers.size());
|
188
|
+
D_ASSERT(temporary_vacuum_buffers.size() <= buffers.size());
|
189
|
+
|
190
|
+
// erasing from a multimap, we vacuum the buffers with the most free spaces (least full)
|
191
|
+
while (temporary_vacuum_buffers.size() != excess_buffer_count) {
|
192
|
+
temporary_vacuum_buffers.erase(temporary_vacuum_buffers.begin());
|
193
|
+
}
|
194
|
+
|
195
|
+
// adjust the buffers, and erase all to-be-vacuumed buffers from the available buffer list
|
196
|
+
for (auto &vacuum_buffer : temporary_vacuum_buffers) {
|
197
|
+
auto buffer_id = vacuum_buffer.second;
|
198
|
+
D_ASSERT(buffers.find(buffer_id) != buffers.end());
|
199
|
+
buffers.find(buffer_id)->second.vacuum = true;
|
200
|
+
buffers_with_free_space.erase(buffer_id);
|
201
|
+
}
|
202
|
+
|
203
|
+
for (auto &vacuum_buffer : temporary_vacuum_buffers) {
|
204
|
+
vacuum_buffers.insert(vacuum_buffer.second);
|
205
|
+
}
|
206
|
+
|
207
|
+
return true;
|
208
|
+
}
|
209
|
+
|
210
|
+
void FixedSizeAllocator::FinalizeVacuum() {
|
211
|
+
|
212
|
+
for (auto &buffer_id : vacuum_buffers) {
|
213
|
+
D_ASSERT(buffers.find(buffer_id) != buffers.end());
|
214
|
+
auto &buffer = buffers.find(buffer_id)->second;
|
215
|
+
D_ASSERT(buffer.InMemory());
|
216
|
+
buffer.Destroy();
|
217
|
+
buffers.erase(buffer_id);
|
218
|
+
}
|
219
|
+
vacuum_buffers.clear();
|
220
|
+
}
|
221
|
+
|
222
|
+
IndexPointer FixedSizeAllocator::VacuumPointer(const IndexPointer ptr) {
|
223
|
+
|
224
|
+
// we do not need to adjust the bitmask of the old buffer, because we will free the entire
|
225
|
+
// buffer after the vacuum operation
|
226
|
+
|
227
|
+
auto new_ptr = New();
|
228
|
+
// new increases the allocation count, we need to counter that here
|
229
|
+
total_segment_count--;
|
230
|
+
|
231
|
+
memcpy(Get(new_ptr), Get(ptr), segment_size);
|
232
|
+
return new_ptr;
|
233
|
+
}
|
234
|
+
|
235
|
+
BlockPointer FixedSizeAllocator::Serialize(MetadataWriter &writer) {
|
236
|
+
|
237
|
+
for (auto &buffer : buffers) {
|
238
|
+
buffer.second.Serialize();
|
239
|
+
}
|
240
|
+
|
241
|
+
auto block_pointer = writer.GetBlockPointer();
|
242
|
+
writer.Write(segment_size);
|
243
|
+
writer.Write(static_cast<idx_t>(buffers.size()));
|
244
|
+
writer.Write(static_cast<idx_t>(buffers_with_free_space.size()));
|
245
|
+
|
246
|
+
for (auto &buffer : buffers) {
|
247
|
+
writer.Write(buffer.first);
|
248
|
+
writer.Write(buffer.second.BlockId());
|
249
|
+
writer.Write(buffer.second.segment_count);
|
250
|
+
}
|
251
|
+
for (auto &buffer_id : buffers_with_free_space) {
|
252
|
+
writer.Write(buffer_id);
|
253
|
+
}
|
254
|
+
|
255
|
+
return block_pointer;
|
256
|
+
}
|
257
|
+
|
258
|
+
void FixedSizeAllocator::Deserialize(const BlockPointer &block_pointer) {
|
259
|
+
|
260
|
+
MetadataReader reader(metadata_manager, block_pointer);
|
261
|
+
segment_size = reader.Read<idx_t>();
|
262
|
+
auto buffer_count = reader.Read<idx_t>();
|
263
|
+
auto buffers_with_free_space_count = reader.Read<idx_t>();
|
264
|
+
|
265
|
+
total_segment_count = 0;
|
266
|
+
|
267
|
+
for (idx_t i = 0; i < buffer_count; i++) {
|
268
|
+
auto buffer_id = reader.Read<idx_t>();
|
269
|
+
auto block_id = reader.Read<block_id_t>();
|
270
|
+
auto buffer_segment_count = reader.Read<idx_t>();
|
271
|
+
FixedSizeBuffer new_buffer(block_manager, buffer_segment_count, block_id);
|
272
|
+
buffers.insert(make_pair(buffer_id, std::move(new_buffer)));
|
273
|
+
total_segment_count += buffer_segment_count;
|
274
|
+
}
|
275
|
+
for (idx_t i = 0; i < buffers_with_free_space_count; i++) {
|
276
|
+
buffers_with_free_space.insert(reader.Read<idx_t>());
|
277
|
+
}
|
278
|
+
}
|
279
|
+
|
280
|
+
uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t segment_count) {
|
281
|
+
|
282
|
+
auto data = mask.GetData();
|
283
|
+
|
284
|
+
// fills up a buffer sequentially before searching for free bits
|
285
|
+
if (mask.RowIsValid(segment_count)) {
|
286
|
+
mask.SetInvalid(segment_count);
|
287
|
+
return segment_count;
|
288
|
+
}
|
289
|
+
|
290
|
+
for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
|
291
|
+
// get an entry with free bits
|
292
|
+
if (data[entry_idx] == 0) {
|
293
|
+
continue;
|
294
|
+
}
|
295
|
+
|
296
|
+
// find the position of the free bit
|
297
|
+
auto entry = data[entry_idx];
|
298
|
+
idx_t first_valid_bit = 0;
|
299
|
+
|
300
|
+
// this loop finds the position of the rightmost set bit in entry and stores it
|
301
|
+
// in first_valid_bit
|
302
|
+
for (idx_t i = 0; i < 6; i++) {
|
303
|
+
// set the left half of the bits of this level to zero and test if the entry is still not zero
|
304
|
+
if (entry & BASE[i]) {
|
305
|
+
// first valid bit is in the rightmost s[i] bits
|
306
|
+
// permanently set the left half of the bits to zero
|
307
|
+
entry &= BASE[i];
|
308
|
+
} else {
|
309
|
+
// first valid bit is in the leftmost s[i] bits
|
310
|
+
// shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
|
311
|
+
entry >>= SHIFT[i];
|
312
|
+
first_valid_bit += SHIFT[i];
|
313
|
+
}
|
314
|
+
}
|
315
|
+
D_ASSERT(entry);
|
316
|
+
|
317
|
+
auto prev_bits = entry_idx * sizeof(validity_t) * 8;
|
318
|
+
D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
|
319
|
+
mask.SetInvalid(prev_bits + first_valid_bit);
|
320
|
+
return (prev_bits + first_valid_bit);
|
321
|
+
}
|
322
|
+
|
323
|
+
throw InternalException("Invalid bitmask for FixedSizeAllocator");
|
324
|
+
}
|
325
|
+
|
326
|
+
idx_t FixedSizeAllocator::GetAvailableBufferId() const {
|
327
|
+
idx_t buffer_id = buffers.size();
|
328
|
+
while (buffers.find(buffer_id) != buffers.end()) {
|
329
|
+
D_ASSERT(buffer_id > 0);
|
330
|
+
buffer_id--;
|
331
|
+
}
|
332
|
+
return buffer_id;
|
333
|
+
}
|
334
|
+
|
335
|
+
idx_t FixedSizeAllocator::GetUpperBoundBufferId() const {
|
336
|
+
idx_t upper_bound_id = 0;
|
337
|
+
for (auto &buffer : buffers) {
|
338
|
+
if (buffer.first >= upper_bound_id) {
|
339
|
+
upper_bound_id = buffer.first + 1;
|
340
|
+
}
|
341
|
+
}
|
342
|
+
return upper_bound_id;
|
343
|
+
}
|
344
|
+
|
345
|
+
} // namespace duckdb
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#include "duckdb/execution/index/fixed_size_buffer.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/storage/block_manager.hpp"
|
4
|
+
#include "duckdb/storage/buffer_manager.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager)
|
9
|
+
: block_manager(block_manager), segment_count(0), dirty(false), vacuum(false), block_handle(nullptr) {
|
10
|
+
|
11
|
+
auto &buffer_manager = block_manager.buffer_manager;
|
12
|
+
buffer_handle = buffer_manager.Allocate(Storage::BLOCK_SIZE, false, &block_handle);
|
13
|
+
}
|
14
|
+
|
15
|
+
FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const block_id_t &block_id)
|
16
|
+
: block_manager(block_manager), segment_count(segment_count), dirty(false), vacuum(false) {
|
17
|
+
|
18
|
+
D_ASSERT(block_id < MAXIMUM_BLOCK);
|
19
|
+
block_handle = block_manager.RegisterBlock(block_id);
|
20
|
+
D_ASSERT(BlockId() < MAXIMUM_BLOCK);
|
21
|
+
}
|
22
|
+
|
23
|
+
void FixedSizeBuffer::Destroy() {
|
24
|
+
if (InMemory()) {
|
25
|
+
buffer_handle.Destroy();
|
26
|
+
}
|
27
|
+
if (OnDisk()) {
|
28
|
+
block_manager.MarkBlockAsFree(BlockId());
|
29
|
+
}
|
30
|
+
}
|
31
|
+
|
32
|
+
void FixedSizeBuffer::Serialize() {
|
33
|
+
|
34
|
+
if (!InMemory()) {
|
35
|
+
if (!OnDisk() || dirty) {
|
36
|
+
throw InternalException("invalid/missing buffer in FixedSizeAllocator");
|
37
|
+
}
|
38
|
+
return;
|
39
|
+
}
|
40
|
+
if (!dirty && OnDisk()) {
|
41
|
+
return;
|
42
|
+
}
|
43
|
+
|
44
|
+
// the buffer is in memory
|
45
|
+
D_ASSERT(InMemory());
|
46
|
+
// the buffer never was on disk, or there were changes to it after loading it from disk
|
47
|
+
D_ASSERT(!OnDisk() || dirty);
|
48
|
+
|
49
|
+
// we persist any changes, so the buffer is no longer dirty
|
50
|
+
dirty = false;
|
51
|
+
|
52
|
+
if (!OnDisk()) {
|
53
|
+
// temporary block - convert to persistent
|
54
|
+
auto block_id = block_manager.GetFreeBlockId();
|
55
|
+
D_ASSERT(block_id < MAXIMUM_BLOCK);
|
56
|
+
block_handle = block_manager.ConvertToPersistent(block_id, std::move(block_handle));
|
57
|
+
buffer_handle.Destroy();
|
58
|
+
|
59
|
+
} else {
|
60
|
+
// already a persistent block - only need to write it
|
61
|
+
auto block_id = block_handle->BlockId();
|
62
|
+
D_ASSERT(block_id < MAXIMUM_BLOCK);
|
63
|
+
block_manager.Write(buffer_handle.GetFileBuffer(), block_id);
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
void FixedSizeBuffer::Pin() {
|
68
|
+
|
69
|
+
auto &buffer_manager = block_manager.buffer_manager;
|
70
|
+
D_ASSERT(BlockId() < MAXIMUM_BLOCK);
|
71
|
+
buffer_handle = BufferHandle(buffer_manager.Pin(block_handle));
|
72
|
+
}
|
73
|
+
|
74
|
+
} // namespace duckdb
|
@@ -564,7 +564,7 @@ WindowGlobalSourceState::Task WindowGlobalSourceState::NextTask(idx_t hash_bin)
|
|
564
564
|
|
565
565
|
// If there is nothing to steal but there are unfinished partitions,
|
566
566
|
// yield until any pending builds are done.
|
567
|
-
TaskScheduler::
|
567
|
+
TaskScheduler::YieldThread();
|
568
568
|
}
|
569
569
|
|
570
570
|
return Task();
|
@@ -89,7 +89,7 @@ SinkResultType PhysicalCreateARTIndex::SinkUnsorted(Vector &row_identifiers, Ope
|
|
89
89
|
// insert the row IDs
|
90
90
|
auto &art = l_state.local_index->Cast<ART>();
|
91
91
|
for (idx_t i = 0; i < count; i++) {
|
92
|
-
if (!art.Insert(
|
92
|
+
if (!art.Insert(art.tree, l_state.keys[i], 0, row_ids[i])) {
|
93
93
|
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
94
94
|
}
|
95
95
|
}
|
@@ -131,7 +131,9 @@ public:
|
|
131
131
|
}
|
132
132
|
const Value ColumnDefault(idx_t col) override {
|
133
133
|
auto &column = entry.GetColumn(LogicalIndex(col));
|
134
|
-
if (column.
|
134
|
+
if (column.Generated()) {
|
135
|
+
return Value(column.GeneratedExpression().ToString());
|
136
|
+
} else if (column.DefaultValue()) {
|
135
137
|
return Value(column.DefaultValue()->ToString());
|
136
138
|
}
|
137
139
|
return Value();
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev4142"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "d5c4422f72"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -305,6 +305,7 @@ public:
|
|
305
305
|
static void AutoloadExtensionByConfigName(ClientContext &context, const string &configuration_name);
|
306
306
|
//! Autoload the extension required for `function_name` or throw a CatalogException
|
307
307
|
static bool AutoLoadExtensionByCatalogEntry(ClientContext &context, CatalogType type, const string &entry_name);
|
308
|
+
DUCKDB_API static bool TryAutoLoad(ClientContext &context, const string &extension_name) noexcept;
|
308
309
|
|
309
310
|
protected:
|
310
311
|
//! Reference to the database
|
@@ -24,6 +24,8 @@ public:
|
|
24
24
|
public:
|
25
25
|
string GetSchemaName() const override;
|
26
26
|
string GetTableName() const override;
|
27
|
+
//! This drops in-memory index data and marks all blocks on disk as free blocks, allowing to reclaim them
|
28
|
+
void CommitDrop();
|
27
29
|
};
|
28
30
|
|
29
31
|
} // namespace duckdb
|
@@ -219,4 +219,55 @@ struct ListRangeFun {
|
|
219
219
|
static ScalarFunctionSet GetFunctions();
|
220
220
|
};
|
221
221
|
|
222
|
+
struct ListCosineSimilarityFun {
|
223
|
+
static constexpr const char *Name = "list_cosine_similarity";
|
224
|
+
static constexpr const char *Parameters = "list1,list2";
|
225
|
+
static constexpr const char *Description = "Compute the cosine similarity between two lists.";
|
226
|
+
static constexpr const char *Example = "list_cosine_similarity([1, 2, 3], [1, 2, 3])";
|
227
|
+
|
228
|
+
static ScalarFunctionSet GetFunctions();
|
229
|
+
};
|
230
|
+
|
231
|
+
struct ListCosineSimilarityFunAlias {
|
232
|
+
using ALIAS = ListCosineSimilarityFun;
|
233
|
+
|
234
|
+
static constexpr const char *Name = "<=>";
|
235
|
+
};
|
236
|
+
|
237
|
+
struct ListDistanceFun {
|
238
|
+
static constexpr const char *Name = "list_distance";
|
239
|
+
static constexpr const char *Parameters = "list1,list2";
|
240
|
+
static constexpr const char *Description = "Compute the distance between two lists.";
|
241
|
+
static constexpr const char *Example = "list_distance([1, 2, 3], [1, 2, 3])";
|
242
|
+
|
243
|
+
static ScalarFunctionSet GetFunctions();
|
244
|
+
};
|
245
|
+
|
246
|
+
struct ListDistanceFunAlias {
|
247
|
+
using ALIAS = ListDistanceFun;
|
248
|
+
|
249
|
+
static constexpr const char *Name = "<->";
|
250
|
+
};
|
251
|
+
|
252
|
+
struct ListInnerProductFun {
|
253
|
+
static constexpr const char *Name = "list_inner_product";
|
254
|
+
static constexpr const char *Parameters = "list1,list2";
|
255
|
+
static constexpr const char *Description = "Compute the inner product between two lists.";
|
256
|
+
static constexpr const char *Example = "list_inner_product([1, 2, 3], [1, 2, 3])";
|
257
|
+
|
258
|
+
static ScalarFunctionSet GetFunctions();
|
259
|
+
};
|
260
|
+
|
261
|
+
struct ListDotProductFun {
|
262
|
+
using ALIAS = ListInnerProductFun;
|
263
|
+
|
264
|
+
static constexpr const char *Name = "list_dot_product";
|
265
|
+
};
|
266
|
+
|
267
|
+
struct ListInnerProductFunAlias {
|
268
|
+
using ALIAS = ListInnerProductFun;
|
269
|
+
|
270
|
+
static constexpr const char *Name = "<#>";
|
271
|
+
};
|
272
|
+
|
222
273
|
} // namespace duckdb
|
@@ -9,6 +9,8 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/storage/index.hpp"
|
12
|
+
#include "duckdb/execution/index/art/node.hpp"
|
13
|
+
#include "duckdb/common/array.hpp"
|
12
14
|
|
13
15
|
namespace duckdb {
|
14
16
|
|
@@ -19,7 +21,6 @@ enum class VerifyExistenceType : uint8_t {
|
|
19
21
|
DELETE_FK = 2 // delete from a table that has a foreign key
|
20
22
|
};
|
21
23
|
class ConflictManager;
|
22
|
-
class Node;
|
23
24
|
class ARTKey;
|
24
25
|
class FixedSizeAllocator;
|
25
26
|
|
@@ -31,18 +32,22 @@ struct ARTFlags {
|
|
31
32
|
};
|
32
33
|
|
33
34
|
class ART : public Index {
|
35
|
+
public:
|
36
|
+
//! FixedSizeAllocator count of the ART
|
37
|
+
static constexpr uint8_t ALLOCATOR_COUNT = 6;
|
38
|
+
|
34
39
|
public:
|
35
40
|
//! Constructs an ART
|
36
41
|
ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
37
42
|
const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
|
38
|
-
AttachedDatabase &db,
|
39
|
-
|
40
|
-
|
43
|
+
AttachedDatabase &db,
|
44
|
+
const shared_ptr<array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT>> &allocators_ptr = nullptr,
|
45
|
+
const BlockPointer &block = BlockPointer());
|
41
46
|
|
42
47
|
//! Root of the tree
|
43
|
-
|
48
|
+
Node tree = Node();
|
44
49
|
//! Fixed-size allocators holding the ART nodes
|
45
|
-
shared_ptr<
|
50
|
+
shared_ptr<array<unique_ptr<FixedSizeAllocator>, ALLOCATOR_COUNT>> allocators;
|
46
51
|
//! True, if the ART owns its data
|
47
52
|
bool owns_data;
|
48
53
|
|
@@ -66,6 +71,8 @@ public:
|
|
66
71
|
void VerifyAppend(DataChunk &chunk) override;
|
67
72
|
//! Verify that data can be appended to the index without a constraint violation using the conflict manager
|
68
73
|
void VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) override;
|
74
|
+
//! Deletes all data from the index. The lock obtained from InitializeLock must be held
|
75
|
+
void CommitDrop(IndexLock &index_lock) override;
|
69
76
|
//! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held
|
70
77
|
void Delete(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
|
71
78
|
//! Insert a chunk of entries into the index
|
@@ -103,7 +110,7 @@ public:
|
|
103
110
|
string VerifyAndToString(IndexLock &state, const bool only_verify) override;
|
104
111
|
|
105
112
|
//! Find the node with a matching key, or return nullptr if not found
|
106
|
-
Node Lookup(Node node, const ARTKey &key, idx_t depth);
|
113
|
+
optional_ptr<const Node> Lookup(const Node &node, const ARTKey &key, idx_t depth);
|
107
114
|
//! Insert a key into the tree
|
108
115
|
bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
|
109
116
|
|
@@ -136,6 +143,9 @@ private:
|
|
136
143
|
//! Internal function to return the string representation of the ART,
|
137
144
|
//! or only traverses and verifies the index
|
138
145
|
string VerifyAndToStringInternal(const bool only_verify);
|
146
|
+
|
147
|
+
//! Deserialize the allocators of the ART
|
148
|
+
void Deserialize(const BlockPointer &pointer);
|
139
149
|
};
|
140
150
|
|
141
151
|
} // namespace duckdb
|
@@ -41,11 +41,11 @@ public:
|
|
41
41
|
return key_bytes[idx];
|
42
42
|
}
|
43
43
|
//! Greater than operator
|
44
|
-
bool operator>(const ARTKey &
|
44
|
+
bool operator>(const ARTKey &key) const;
|
45
45
|
//! Greater than or equal to operator
|
46
|
-
bool operator>=(const ARTKey &
|
46
|
+
bool operator>=(const ARTKey &key) const;
|
47
47
|
//! Equal to operator
|
48
|
-
bool operator==(const ARTKey &
|
48
|
+
bool operator==(const ARTKey &key) const;
|
49
49
|
|
50
50
|
private:
|
51
51
|
vector<uint8_t> key_bytes;
|
@@ -62,10 +62,10 @@ public:
|
|
62
62
|
//! If upper_bound is the empty ARTKey, than there is no upper bound
|
63
63
|
bool Scan(const ARTKey &upper_bound, const idx_t max_count, vector<row_t> &result_ids, const bool equal);
|
64
64
|
//! Finds the minimum (leaf) of the current subtree
|
65
|
-
void FindMinimum(Node &node);
|
65
|
+
void FindMinimum(const Node &node);
|
66
66
|
//! Finds the lower bound of the ART and adds the nodes to the stack. Returns false, if the lower
|
67
67
|
//! bound exceeds the maximum value of the ART
|
68
|
-
bool LowerBound(Node &node, const ARTKey &key, const bool equal, idx_t depth);
|
68
|
+
bool LowerBound(const Node &node, const ARTKey &key, const bool equal, idx_t depth);
|
69
69
|
|
70
70
|
private:
|
71
71
|
//! Stack of nodes from the root to the currently active node
|