duckdb 0.8.2-dev3989.0 → 0.8.2-dev4126.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +8 -7
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
- package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
- package/src/duckdb/extension/json/json_scan.cpp +124 -121
- package/src/duckdb/extension/parquet/parquet_extension.cpp +23 -13
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
- package/src/duckdb/src/common/crypto/md5.cpp +2 -12
- package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +8 -0
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
- package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +32 -0
- package/src/duckdb/src/execution/index/art/art.cpp +111 -92
- package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
- package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
- package/src/duckdb/src/execution/index/art/node.cpp +109 -203
- package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
- package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
- package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
- package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
- package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/suffix.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +15 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
- package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/index.cpp +7 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +10 -16
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/transaction/commit_state.cpp +5 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +4 -1
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +24 -2
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_execution_index.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -262,7 +262,8 @@ void CheckpointWriter::WriteSchema(SchemaCatalogEntry &schema) {
|
|
262
262
|
void CheckpointReader::ReadSchema(ClientContext &context, MetadataReader &reader) {
|
263
263
|
// read the schema and create it in the catalog
|
264
264
|
auto info = CatalogEntry::Deserialize(reader);
|
265
|
-
|
265
|
+
|
266
|
+
// we set create conflict to IGNORE_ON_CONFLICT, so that we can ignore a failure when recreating the main schema
|
266
267
|
info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
|
267
268
|
catalog.CreateSchema(context, info->Cast<CreateSchemaInfo>());
|
268
269
|
|
@@ -336,14 +337,12 @@ void CheckpointReader::ReadSequence(ClientContext &context, MetadataReader &read
|
|
336
337
|
// Indexes
|
337
338
|
//===--------------------------------------------------------------------===//
|
338
339
|
void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
|
339
|
-
//
|
340
|
-
//
|
341
|
-
auto
|
340
|
+
// we write the index data in WriteTableData
|
341
|
+
// here, we only write the root pointer
|
342
|
+
const auto root_block_pointer = index_catalog.index->GetRootBlockPointer();
|
342
343
|
auto &metadata_writer = GetMetadataWriter();
|
343
344
|
index_catalog.Serialize(metadata_writer);
|
344
|
-
|
345
|
-
metadata_writer.Write(root_offset.block_id);
|
346
|
-
metadata_writer.Write(root_offset.offset);
|
345
|
+
metadata_writer.Write(root_block_pointer);
|
347
346
|
}
|
348
347
|
|
349
348
|
void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader) {
|
@@ -358,10 +357,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
|
|
358
357
|
auto &index_catalog = schema_catalog.CreateIndex(context, index_info, table_catalog)->Cast<DuckIndexEntry>();
|
359
358
|
index_catalog.info = table_catalog.GetStorage().info;
|
360
359
|
|
361
|
-
// we deserialize the index lazily, i.e., we
|
362
|
-
|
363
|
-
auto root_block_id = reader.Read<block_id_t>();
|
364
|
-
auto root_offset = reader.Read<uint32_t>();
|
360
|
+
// we deserialize the index lazily, i.e., we only load the root block pointer
|
361
|
+
const auto index_block_pointer = reader.Read<BlockPointer>();
|
365
362
|
|
366
363
|
// obtain the expressions of the ART from the index metadata
|
367
364
|
vector<unique_ptr<Expression>> unbound_expressions;
|
@@ -401,9 +398,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
|
|
401
398
|
switch (index_info.index_type) {
|
402
399
|
case IndexType::ART: {
|
403
400
|
auto &storage = table_catalog.GetStorage();
|
404
|
-
auto art =
|
405
|
-
|
406
|
-
index_info.constraint_type, storage.db, nullptr, BlockPointer(root_block_id, root_offset));
|
401
|
+
auto art = make_uniq<ART>(index_info.column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
|
402
|
+
index_info.constraint_type, storage.db, nullptr, index_block_pointer);
|
407
403
|
index_catalog.index = art.get();
|
408
404
|
storage.info->indexes.AddIndex(std::move(art));
|
409
405
|
break;
|
@@ -450,7 +446,7 @@ void CheckpointReader::ReadTableMacro(ClientContext &context, MetadataReader &re
|
|
450
446
|
// Table Metadata
|
451
447
|
//===--------------------------------------------------------------------===//
|
452
448
|
void CheckpointWriter::WriteTable(TableCatalogEntry &table) {
|
453
|
-
// write the table
|
449
|
+
// write the table metadata
|
454
450
|
table.Serialize(GetMetadataWriter());
|
455
451
|
// now we need to write the table data.
|
456
452
|
if (auto writer = GetTableDataWriter(table)) {
|
@@ -483,12 +479,11 @@ void CheckpointReader::ReadTableData(ClientContext &context, MetadataReader &rea
|
|
483
479
|
data_reader.ReadTableData();
|
484
480
|
bound_info.data->total_rows = reader.Read<idx_t>();
|
485
481
|
|
486
|
-
//
|
487
|
-
idx_t
|
488
|
-
for (idx_t i = 0; i <
|
489
|
-
auto
|
490
|
-
|
491
|
-
bound_info.indexes.emplace_back(idx_block_id, idx_offset);
|
482
|
+
// get the root block pointers of each index
|
483
|
+
idx_t index_count = reader.Read<idx_t>();
|
484
|
+
for (idx_t i = 0; i < index_count; i++) {
|
485
|
+
const auto index_pointer = reader.Read<BlockPointer>();
|
486
|
+
bound_info.indexes.emplace_back(index_pointer);
|
492
487
|
}
|
493
488
|
}
|
494
489
|
|
@@ -429,13 +429,13 @@ void DataTable::VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk,
|
|
429
429
|
data_table.info->indexes.VerifyForeignKey(*dst_keys_ptr, dst_chunk, regular_conflicts);
|
430
430
|
regular_conflicts.Finalize();
|
431
431
|
auto ®ular_matches = regular_conflicts.Conflicts();
|
432
|
-
// check whether or not the chunk can be inserted or deleted into the referenced table' transaction local storage
|
433
|
-
auto &local_storage = LocalStorage::Get(context, db);
|
434
432
|
|
433
|
+
// check if we can insert the chunk into the reference table's local storage
|
434
|
+
auto &local_storage = LocalStorage::Get(context, db);
|
435
435
|
bool error = IsForeignKeyConstraintError(is_append, count, regular_matches);
|
436
436
|
bool transaction_error = false;
|
437
|
-
|
438
437
|
bool transaction_check = local_storage.Find(data_table);
|
438
|
+
|
439
439
|
if (transaction_check) {
|
440
440
|
auto &transact_index = local_storage.GetIndexes(data_table);
|
441
441
|
transact_index.VerifyForeignKey(*dst_keys_ptr, dst_chunk, transaction_conflicts);
|
@@ -12,7 +12,7 @@ Index::Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_mana
|
|
12
12
|
IndexConstraintType constraint_type_p)
|
13
13
|
|
14
14
|
: type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
|
15
|
-
db(db)
|
15
|
+
db(db) {
|
16
16
|
|
17
17
|
for (auto &expr : unbound_expressions) {
|
18
18
|
types.push_back(expr->return_type.InternalType());
|
@@ -39,6 +39,12 @@ PreservedError Index::Append(DataChunk &entries, Vector &row_identifiers) {
|
|
39
39
|
return Append(state, entries, row_identifiers);
|
40
40
|
}
|
41
41
|
|
42
|
+
void Index::CommitDrop() {
|
43
|
+
IndexLock index_lock;
|
44
|
+
InitializeLock(index_lock);
|
45
|
+
CommitDrop(index_lock);
|
46
|
+
}
|
47
|
+
|
42
48
|
void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
|
43
49
|
IndexLock state;
|
44
50
|
InitializeLock(state);
|
@@ -13,7 +13,7 @@ MetadataManager::~MetadataManager() {
|
|
13
13
|
|
14
14
|
MetadataHandle MetadataManager::AllocateHandle() {
|
15
15
|
// check if there is any free space left in an existing block
|
16
|
-
// if not allocate a new
|
16
|
+
// if not allocate a new block
|
17
17
|
block_id_t free_block = INVALID_BLOCK;
|
18
18
|
for (auto &kv : blocks) {
|
19
19
|
auto &block = kv.second;
|
@@ -230,27 +230,27 @@ void MetadataBlock::FreeBlocksFromInteger(idx_t free_list) {
|
|
230
230
|
}
|
231
231
|
|
232
232
|
void MetadataManager::MarkBlocksAsModified() {
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
}
|
233
|
+
|
234
|
+
// for any blocks that were modified in the last checkpoint - set them to free blocks currently
|
235
|
+
for (auto &kv : modified_blocks) {
|
236
|
+
auto block_id = kv.first;
|
237
|
+
idx_t modified_list = kv.second;
|
238
|
+
auto entry = blocks.find(block_id);
|
239
|
+
D_ASSERT(entry != blocks.end());
|
240
|
+
auto &block = entry->second;
|
241
|
+
idx_t current_free_blocks = block.FreeBlocksToInteger();
|
242
|
+
// merge the current set of free blocks with the modified blocks
|
243
|
+
idx_t new_free_blocks = current_free_blocks | modified_list;
|
244
|
+
// if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
|
245
|
+
// // if new free_blocks is all blocks - mark entire block as modified
|
246
|
+
// blocks.erase(entry);
|
247
|
+
// block_manager.MarkBlockAsModified(block_id);
|
248
|
+
// } else {
|
249
|
+
// set the new set of free blocks
|
250
|
+
block.FreeBlocksFromInteger(new_free_blocks);
|
251
|
+
// }
|
253
252
|
}
|
253
|
+
|
254
254
|
modified_blocks.clear();
|
255
255
|
for (auto &kv : blocks) {
|
256
256
|
auto &block = kv.second;
|
@@ -3,11 +3,11 @@
|
|
3
3
|
#include "duckdb/common/allocator.hpp"
|
4
4
|
#include "duckdb/common/exception.hpp"
|
5
5
|
#include "duckdb/common/set.hpp"
|
6
|
-
#include "duckdb/storage/in_memory_block_manager.hpp"
|
7
|
-
#include "duckdb/storage/storage_manager.hpp"
|
8
6
|
#include "duckdb/main/attached_database.hpp"
|
9
7
|
#include "duckdb/main/database.hpp"
|
10
8
|
#include "duckdb/storage/buffer/buffer_pool.hpp"
|
9
|
+
#include "duckdb/storage/in_memory_block_manager.hpp"
|
10
|
+
#include "duckdb/storage/storage_manager.hpp"
|
11
11
|
|
12
12
|
namespace duckdb {
|
13
13
|
|
@@ -77,14 +77,6 @@ idx_t StandardBufferManager::GetMaxMemory() const {
|
|
77
77
|
return buffer_pool.GetMaxMemory();
|
78
78
|
}
|
79
79
|
|
80
|
-
// POTENTIALLY PROBLEMATIC
|
81
|
-
// void StandardBufferManager::IncreaseUsedMemory(idx_t size, bool unsafe) {
|
82
|
-
// if (!unsafe && buffer_pool.GetUsedMemory() + size > buffer_pool.GetMaxMemory()) {
|
83
|
-
// throw OutOfMemoryException("Failed to allocate data of size %lld%s", size, InMemoryWarning());
|
84
|
-
// }
|
85
|
-
// buffer_pool.IncreaseUsedMemory(size);
|
86
|
-
//}
|
87
|
-
|
88
80
|
template <typename... ARGS>
|
89
81
|
TempBufferPoolReservation StandardBufferManager::EvictBlocksOrThrow(idx_t memory_delta, unique_ptr<FileBuffer> *buffer,
|
90
82
|
ARGS... args) {
|
@@ -329,12 +321,13 @@ private:
|
|
329
321
|
};
|
330
322
|
|
331
323
|
class TemporaryFileHandle {
|
332
|
-
constexpr static idx_t
|
324
|
+
constexpr static idx_t MAX_ALLOWED_INDEX_BASE = 4000;
|
333
325
|
|
334
326
|
public:
|
335
|
-
TemporaryFileHandle(DatabaseInstance &db, const string &temp_directory, idx_t index)
|
336
|
-
: db(db), file_index(index),
|
337
|
-
|
327
|
+
TemporaryFileHandle(idx_t temp_file_count, DatabaseInstance &db, const string &temp_directory, idx_t index)
|
328
|
+
: max_allowed_index((1 << temp_file_count) * MAX_ALLOWED_INDEX_BASE), db(db), file_index(index),
|
329
|
+
path(FileSystem::GetFileSystem(db).JoinPath(temp_directory,
|
330
|
+
"duckdb_temp_storage-" + to_string(index) + ".tmp")) {
|
338
331
|
}
|
339
332
|
|
340
333
|
public:
|
@@ -348,7 +341,7 @@ public:
|
|
348
341
|
public:
|
349
342
|
TemporaryFileIndex TryGetBlockIndex() {
|
350
343
|
TemporaryFileLock lock(file_lock);
|
351
|
-
if (index_manager.GetMaxIndex() >=
|
344
|
+
if (index_manager.GetMaxIndex() >= max_allowed_index && index_manager.HasFreeBlocks()) {
|
352
345
|
// file is at capacity
|
353
346
|
return TemporaryFileIndex();
|
354
347
|
}
|
@@ -426,6 +419,7 @@ private:
|
|
426
419
|
}
|
427
420
|
|
428
421
|
private:
|
422
|
+
const idx_t max_allowed_index;
|
429
423
|
DatabaseInstance &db;
|
430
424
|
unique_ptr<FileHandle> handle;
|
431
425
|
idx_t file_index;
|
@@ -467,7 +461,7 @@ public:
|
|
467
461
|
if (!handle) {
|
468
462
|
// no existing handle to write to; we need to create & open a new file
|
469
463
|
auto new_file_index = index_manager.GetNewBlockIndex();
|
470
|
-
auto new_file = make_uniq<TemporaryFileHandle>(db, temp_directory, new_file_index);
|
464
|
+
auto new_file = make_uniq<TemporaryFileHandle>(files.size(), db, temp_directory, new_file_index);
|
471
465
|
handle = new_file.get();
|
472
466
|
files[new_file_index] = std::move(new_file);
|
473
467
|
|
@@ -54,7 +54,7 @@ void TableIndexList::VerifyForeignKey(const vector<PhysicalIndex> &fk_keys, Data
|
|
54
54
|
? ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE
|
55
55
|
: ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE;
|
56
56
|
|
57
|
-
// check whether
|
57
|
+
// check whether the chunk can be inserted or deleted into the referenced table storage
|
58
58
|
auto index = FindForeignKeyIndex(fk_keys, fk_type);
|
59
59
|
if (!index) {
|
60
60
|
throw InternalException("Internal Foreign Key error: could not find index to verify...");
|
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "duckdb/transaction/commit_state.hpp"
|
2
2
|
|
3
|
+
#include "duckdb/catalog/catalog_entry/duck_index_entry.hpp"
|
3
4
|
#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
|
4
5
|
#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
|
5
6
|
#include "duckdb/catalog/catalog_set.hpp"
|
@@ -124,9 +125,12 @@ void CommitState::WriteCatalogEntry(CatalogEntry &entry, data_ptr_t dataptr) {
|
|
124
125
|
case CatalogType::TYPE_ENTRY:
|
125
126
|
log->WriteDropType(entry.Cast<TypeCatalogEntry>());
|
126
127
|
break;
|
127
|
-
case CatalogType::INDEX_ENTRY:
|
128
|
+
case CatalogType::INDEX_ENTRY: {
|
129
|
+
auto &index_entry = entry.Cast<DuckIndexEntry>();
|
130
|
+
index_entry.CommitDrop();
|
128
131
|
log->WriteDropIndex(entry.Cast<IndexCatalogEntry>());
|
129
132
|
break;
|
133
|
+
}
|
130
134
|
case CatalogType::PREPARED_STATEMENT:
|
131
135
|
case CatalogType::SCALAR_FUNCTION_ENTRY:
|
132
136
|
// do nothing, indexes/prepared statements/functions aren't persisted to disk
|
@@ -17,8 +17,10 @@ public:
|
|
17
17
|
static std::string ComputeSha256Hash(const std::string& file_content);
|
18
18
|
static bool IsValidSha256Signature(const std::string& pubkey, const std::string& signature, const std::string& sha256_hash);
|
19
19
|
static void Hmac256(const char* key, size_t key_len, const char* message, size_t message_len, char* out);
|
20
|
+
static void ToBase16(char *in, char *out, size_t len);
|
20
21
|
|
21
|
-
static constexpr size_t
|
22
|
+
static constexpr size_t SHA256_HASH_LENGTH_BYTES = 32;
|
23
|
+
static constexpr size_t SHA256_HASH_LENGTH_TEXT = 64;
|
22
24
|
|
23
25
|
class SHA256State {
|
24
26
|
public:
|
@@ -26,6 +28,7 @@ public:
|
|
26
28
|
~SHA256State();
|
27
29
|
void AddString(const std::string & str);
|
28
30
|
std::string Finalize();
|
31
|
+
void FinishHex(char *out);
|
29
32
|
private:
|
30
33
|
void *sha_context;
|
31
34
|
};
|
@@ -38,7 +38,7 @@ void MbedTlsWrapper::ComputeSha256Hash(const char* in, size_t in_len, char* out)
|
|
38
38
|
|
39
39
|
string MbedTlsWrapper::ComputeSha256Hash(const string& file_content) {
|
40
40
|
string hash;
|
41
|
-
hash.resize(MbedTlsWrapper::
|
41
|
+
hash.resize(MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
|
42
42
|
ComputeSha256Hash(file_content.data(), file_content.size(), (char*)hash.data());
|
43
43
|
return hash;
|
44
44
|
}
|
@@ -83,6 +83,17 @@ void MbedTlsWrapper::Hmac256(const char* key, size_t key_len, const char* messag
|
|
83
83
|
mbedtls_md_free(&hmac_ctx);
|
84
84
|
}
|
85
85
|
|
86
|
+
void MbedTlsWrapper::ToBase16(char *in, char *out, size_t len) {
|
87
|
+
static char const HEX_CODES[] = "0123456789abcdef";
|
88
|
+
size_t i, j;
|
89
|
+
|
90
|
+
for (j = i = 0; i < len; i++) {
|
91
|
+
int a = in[i];
|
92
|
+
out[j++] = HEX_CODES[(a >> 4) & 0xf];
|
93
|
+
out[j++] = HEX_CODES[a & 0xf];
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
86
97
|
MbedTlsWrapper::SHA256State::SHA256State() : sha_context(new mbedtls_sha256_context()) {
|
87
98
|
mbedtls_sha256_init((mbedtls_sha256_context*)sha_context);
|
88
99
|
|
@@ -104,7 +115,7 @@ void MbedTlsWrapper::SHA256State::AddString(const std::string & str) {
|
|
104
115
|
|
105
116
|
std::string MbedTlsWrapper::SHA256State::Finalize() {
|
106
117
|
string hash;
|
107
|
-
hash.resize(MbedTlsWrapper::
|
118
|
+
hash.resize(MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
|
108
119
|
|
109
120
|
if (mbedtls_sha256_finish((mbedtls_sha256_context*)sha_context, (unsigned char*)hash.data())) {
|
110
121
|
throw std::runtime_error("SHA256 Error");
|
@@ -112,3 +123,14 @@ std::string MbedTlsWrapper::SHA256State::Finalize() {
|
|
112
123
|
|
113
124
|
return hash;
|
114
125
|
}
|
126
|
+
|
127
|
+
void MbedTlsWrapper::SHA256State::FinishHex(char *out) {
|
128
|
+
string hash;
|
129
|
+
hash.resize(MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
|
130
|
+
|
131
|
+
if (mbedtls_sha256_finish((mbedtls_sha256_context *)sha_context, (unsigned char *)hash.data())) {
|
132
|
+
throw std::runtime_error("SHA256 Error");
|
133
|
+
}
|
134
|
+
|
135
|
+
MbedTlsWrapper::ToBase16(const_cast<char *>(hash.c_str()), out, MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
|
136
|
+
}
|
@@ -348,9 +348,11 @@
|
|
348
348
|
|
349
349
|
#include "extension/icu/third_party/icu/i18n/wintzimpl.cpp"
|
350
350
|
|
351
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
351
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp"
|
352
352
|
|
353
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
353
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
|
354
|
+
|
355
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
|
354
356
|
|
355
357
|
#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp"
|
356
358
|
|
@@ -358,7 +360,5 @@
|
|
358
360
|
|
359
361
|
#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp"
|
360
362
|
|
361
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
362
|
-
|
363
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
|
363
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"
|
364
364
|
|
@@ -10,5 +10,11 @@
|
|
10
10
|
|
11
11
|
#include "src/core_functions/scalar/list/list_sort.cpp"
|
12
12
|
|
13
|
+
#include "src/core_functions/scalar/list/list_distance.cpp"
|
14
|
+
|
15
|
+
#include "src/core_functions/scalar/list/list_cosine_similarity.cpp"
|
16
|
+
|
17
|
+
#include "src/core_functions/scalar/list/list_inner_product.cpp"
|
18
|
+
|
13
19
|
#include "src/core_functions/scalar/list/range.cpp"
|
14
20
|
|
@@ -1,238 +0,0 @@
|
|
1
|
-
#include "duckdb/execution/index/art/fixed_size_allocator.hpp"
|
2
|
-
|
3
|
-
namespace duckdb {
|
4
|
-
|
5
|
-
constexpr idx_t FixedSizeAllocator::BASE[];
|
6
|
-
constexpr uint8_t FixedSizeAllocator::SHIFT[];
|
7
|
-
|
8
|
-
FixedSizeAllocator::FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator)
|
9
|
-
: allocation_size(allocation_size), total_allocations(0), allocator(allocator) {
|
10
|
-
|
11
|
-
// calculate how many allocations fit into one buffer
|
12
|
-
|
13
|
-
idx_t bits_per_value = sizeof(validity_t) * 8;
|
14
|
-
idx_t curr_alloc_size = 0;
|
15
|
-
|
16
|
-
bitmask_count = 0;
|
17
|
-
allocations_per_buffer = 0;
|
18
|
-
|
19
|
-
while (curr_alloc_size < BUFFER_ALLOC_SIZE) {
|
20
|
-
if (!bitmask_count || (bitmask_count * bits_per_value) % allocations_per_buffer == 0) {
|
21
|
-
bitmask_count++;
|
22
|
-
curr_alloc_size += sizeof(validity_t);
|
23
|
-
}
|
24
|
-
|
25
|
-
auto remaining_alloc_size = BUFFER_ALLOC_SIZE - curr_alloc_size;
|
26
|
-
auto remaining_allocations = MinValue(remaining_alloc_size / allocation_size, bits_per_value);
|
27
|
-
|
28
|
-
if (remaining_allocations == 0) {
|
29
|
-
break;
|
30
|
-
}
|
31
|
-
|
32
|
-
allocations_per_buffer += remaining_allocations;
|
33
|
-
curr_alloc_size += remaining_allocations * allocation_size;
|
34
|
-
}
|
35
|
-
|
36
|
-
allocation_offset = bitmask_count * sizeof(validity_t);
|
37
|
-
}
|
38
|
-
|
39
|
-
FixedSizeAllocator::~FixedSizeAllocator() {
|
40
|
-
for (auto &buffer : buffers) {
|
41
|
-
allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
|
42
|
-
}
|
43
|
-
}
|
44
|
-
|
45
|
-
Node FixedSizeAllocator::New() {
|
46
|
-
|
47
|
-
// no more free pointers
|
48
|
-
if (buffers_with_free_space.empty()) {
|
49
|
-
|
50
|
-
// add a new buffer
|
51
|
-
idx_t buffer_id = buffers.size();
|
52
|
-
D_ASSERT(buffer_id <= (uint32_t)DConstants::INVALID_INDEX);
|
53
|
-
auto buffer = allocator.AllocateData(BUFFER_ALLOC_SIZE);
|
54
|
-
buffers.emplace_back(buffer, 0);
|
55
|
-
buffers_with_free_space.insert(buffer_id);
|
56
|
-
|
57
|
-
// set the bitmask
|
58
|
-
ValidityMask mask(reinterpret_cast<validity_t *>(buffer));
|
59
|
-
mask.SetAllValid(allocations_per_buffer);
|
60
|
-
}
|
61
|
-
|
62
|
-
// return a pointer
|
63
|
-
D_ASSERT(!buffers_with_free_space.empty());
|
64
|
-
auto buffer_id = (uint32_t)*buffers_with_free_space.begin();
|
65
|
-
|
66
|
-
auto bitmask_ptr = reinterpret_cast<validity_t *>(buffers[buffer_id].ptr);
|
67
|
-
ValidityMask mask(bitmask_ptr);
|
68
|
-
auto offset = GetOffset(mask, buffers[buffer_id].allocation_count);
|
69
|
-
|
70
|
-
buffers[buffer_id].allocation_count++;
|
71
|
-
total_allocations++;
|
72
|
-
if (buffers[buffer_id].allocation_count == allocations_per_buffer) {
|
73
|
-
buffers_with_free_space.erase(buffer_id);
|
74
|
-
}
|
75
|
-
|
76
|
-
return Node(buffer_id, offset);
|
77
|
-
}
|
78
|
-
|
79
|
-
void FixedSizeAllocator::Free(const Node ptr) {
|
80
|
-
auto bitmask_ptr = reinterpret_cast<validity_t *>(buffers[ptr.GetBufferId()].ptr);
|
81
|
-
ValidityMask mask(bitmask_ptr);
|
82
|
-
D_ASSERT(!mask.RowIsValid(ptr.GetOffset()));
|
83
|
-
mask.SetValid(ptr.GetOffset());
|
84
|
-
buffers_with_free_space.insert(ptr.GetBufferId());
|
85
|
-
|
86
|
-
D_ASSERT(total_allocations > 0);
|
87
|
-
D_ASSERT(buffers[ptr.GetBufferId()].allocation_count > 0);
|
88
|
-
buffers[ptr.GetBufferId()].allocation_count--;
|
89
|
-
total_allocations--;
|
90
|
-
}
|
91
|
-
|
92
|
-
void FixedSizeAllocator::Reset() {
|
93
|
-
|
94
|
-
for (auto &buffer : buffers) {
|
95
|
-
allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
|
96
|
-
}
|
97
|
-
buffers.clear();
|
98
|
-
buffers_with_free_space.clear();
|
99
|
-
total_allocations = 0;
|
100
|
-
}
|
101
|
-
|
102
|
-
void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
|
103
|
-
|
104
|
-
D_ASSERT(allocation_size == other.allocation_size);
|
105
|
-
|
106
|
-
// remember the buffer count and merge the buffers
|
107
|
-
idx_t buffer_count = buffers.size();
|
108
|
-
for (auto &buffer : other.buffers) {
|
109
|
-
buffers.push_back(buffer);
|
110
|
-
}
|
111
|
-
other.buffers.clear();
|
112
|
-
|
113
|
-
// merge the buffers with free spaces
|
114
|
-
for (auto &buffer_id : other.buffers_with_free_space) {
|
115
|
-
buffers_with_free_space.insert(buffer_id + buffer_count);
|
116
|
-
}
|
117
|
-
other.buffers_with_free_space.clear();
|
118
|
-
|
119
|
-
// add the total allocations
|
120
|
-
total_allocations += other.total_allocations;
|
121
|
-
}
|
122
|
-
|
123
|
-
bool FixedSizeAllocator::InitializeVacuum() {
|
124
|
-
|
125
|
-
if (total_allocations == 0) {
|
126
|
-
Reset();
|
127
|
-
return false;
|
128
|
-
}
|
129
|
-
|
130
|
-
auto total_available_allocations = allocations_per_buffer * buffers.size();
|
131
|
-
D_ASSERT(total_available_allocations >= total_allocations);
|
132
|
-
auto total_free_positions = total_available_allocations - total_allocations;
|
133
|
-
|
134
|
-
// vacuum_count buffers can be freed
|
135
|
-
auto vacuum_count = total_free_positions / allocations_per_buffer;
|
136
|
-
|
137
|
-
// calculate the vacuum threshold adaptively
|
138
|
-
D_ASSERT(vacuum_count < buffers.size());
|
139
|
-
idx_t memory_usage = GetMemoryUsage();
|
140
|
-
idx_t excess_memory_usage = vacuum_count * BUFFER_ALLOC_SIZE;
|
141
|
-
auto excess_percentage = (double)excess_memory_usage / (double)memory_usage;
|
142
|
-
auto threshold = (double)VACUUM_THRESHOLD / 100.0;
|
143
|
-
if (excess_percentage < threshold) {
|
144
|
-
return false;
|
145
|
-
}
|
146
|
-
|
147
|
-
min_vacuum_buffer_id = buffers.size() - vacuum_count;
|
148
|
-
|
149
|
-
// remove all invalid buffers from the available buffer list to ensure that we do not reuse them
|
150
|
-
auto it = buffers_with_free_space.begin();
|
151
|
-
while (it != buffers_with_free_space.end()) {
|
152
|
-
if (*it >= min_vacuum_buffer_id) {
|
153
|
-
it = buffers_with_free_space.erase(it);
|
154
|
-
} else {
|
155
|
-
it++;
|
156
|
-
}
|
157
|
-
}
|
158
|
-
|
159
|
-
return true;
|
160
|
-
}
|
161
|
-
|
162
|
-
void FixedSizeAllocator::FinalizeVacuum() {
|
163
|
-
|
164
|
-
// free all (now unused) buffers
|
165
|
-
while (min_vacuum_buffer_id < buffers.size()) {
|
166
|
-
allocator.FreeData(buffers.back().ptr, BUFFER_ALLOC_SIZE);
|
167
|
-
buffers.pop_back();
|
168
|
-
}
|
169
|
-
}
|
170
|
-
|
171
|
-
Node FixedSizeAllocator::VacuumPointer(const Node ptr) {
|
172
|
-
|
173
|
-
// we do not need to adjust the bitmask of the old buffer, because we will free the entire
|
174
|
-
// buffer after the vacuum operation
|
175
|
-
|
176
|
-
auto new_ptr = New();
|
177
|
-
|
178
|
-
// new increases the allocation count
|
179
|
-
total_allocations--;
|
180
|
-
|
181
|
-
memcpy(Get(new_ptr), Get(ptr), allocation_size);
|
182
|
-
return new_ptr;
|
183
|
-
}
|
184
|
-
|
185
|
-
void FixedSizeAllocator::Verify() const {
|
186
|
-
#ifdef DEBUG
|
187
|
-
auto total_available_allocations = allocations_per_buffer * buffers.size();
|
188
|
-
D_ASSERT(total_available_allocations >= total_allocations);
|
189
|
-
D_ASSERT(buffers.size() >= buffers_with_free_space.size());
|
190
|
-
#endif
|
191
|
-
}
|
192
|
-
|
193
|
-
uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t allocation_count) {
|
194
|
-
|
195
|
-
auto data = mask.GetData();
|
196
|
-
|
197
|
-
// fills up a buffer sequentially before searching for free bits
|
198
|
-
if (mask.RowIsValid(allocation_count)) {
|
199
|
-
mask.SetInvalid(allocation_count);
|
200
|
-
return allocation_count;
|
201
|
-
}
|
202
|
-
|
203
|
-
// get an entry with free bits
|
204
|
-
for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
|
205
|
-
if (data[entry_idx] != 0) {
|
206
|
-
|
207
|
-
// find the position of the free bit
|
208
|
-
auto entry = data[entry_idx];
|
209
|
-
idx_t first_valid_bit = 0;
|
210
|
-
|
211
|
-
// this loop finds the position of the rightmost set bit in entry and stores it
|
212
|
-
// in first_valid_bit
|
213
|
-
for (idx_t i = 0; i < 6; i++) {
|
214
|
-
// set the left half of the bits of this level to zero and test if the entry is still not zero
|
215
|
-
if (entry & BASE[i]) {
|
216
|
-
// first valid bit is in the rightmost s[i] bits
|
217
|
-
// permanently set the left half of the bits to zero
|
218
|
-
entry &= BASE[i];
|
219
|
-
} else {
|
220
|
-
// first valid bit is in the leftmost s[i] bits
|
221
|
-
// shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
|
222
|
-
entry >>= SHIFT[i];
|
223
|
-
first_valid_bit += SHIFT[i];
|
224
|
-
}
|
225
|
-
}
|
226
|
-
D_ASSERT(entry);
|
227
|
-
|
228
|
-
auto prev_bits = entry_idx * sizeof(validity_t) * 8;
|
229
|
-
D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
|
230
|
-
mask.SetInvalid(prev_bits + first_valid_bit);
|
231
|
-
return (prev_bits + first_valid_bit);
|
232
|
-
}
|
233
|
-
}
|
234
|
-
|
235
|
-
throw InternalException("Invalid bitmask of FixedSizeAllocator");
|
236
|
-
}
|
237
|
-
|
238
|
-
} // namespace duckdb
|