duckdb 0.8.2-dev3989.0 → 0.8.2-dev4126.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/binding.gyp +8 -7
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/buffered_json_reader.cpp +76 -74
  4. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +35 -32
  5. package/src/duckdb/extension/json/include/json_scan.hpp +9 -6
  6. package/src/duckdb/extension/json/json_scan.cpp +124 -121
  7. package/src/duckdb/extension/parquet/parquet_extension.cpp +23 -13
  8. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +5 -0
  9. package/src/duckdb/src/common/crypto/md5.cpp +2 -12
  10. package/src/duckdb/src/common/radix_partitioning.cpp +1 -1
  11. package/src/duckdb/src/common/sort/partition_state.cpp +5 -1
  12. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +1 -1
  13. package/src/duckdb/src/core_functions/function_list.cpp +8 -0
  14. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +78 -0
  15. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +72 -0
  16. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +70 -0
  17. package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +32 -0
  18. package/src/duckdb/src/execution/index/art/art.cpp +111 -92
  19. package/src/duckdb/src/execution/index/art/iterator.cpp +21 -27
  20. package/src/duckdb/src/execution/index/art/leaf.cpp +72 -153
  21. package/src/duckdb/src/execution/index/art/node.cpp +109 -203
  22. package/src/duckdb/src/execution/index/art/node16.cpp +32 -64
  23. package/src/duckdb/src/execution/index/art/node256.cpp +38 -53
  24. package/src/duckdb/src/execution/index/art/node4.cpp +31 -62
  25. package/src/duckdb/src/execution/index/art/node48.cpp +43 -65
  26. package/src/duckdb/src/execution/index/art/prefix.cpp +70 -141
  27. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +345 -0
  28. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +74 -0
  29. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  30. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +1 -1
  31. package/src/duckdb/src/function/scalar/string/suffix.cpp +1 -1
  32. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +3 -1
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  34. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +1 -1
  36. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +51 -0
  37. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +9 -0
  38. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +17 -7
  39. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +5 -5
  40. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +10 -16
  41. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +38 -116
  42. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +17 -18
  43. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +17 -23
  44. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +17 -18
  45. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +17 -24
  46. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +16 -22
  47. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +126 -0
  48. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +79 -0
  49. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +96 -0
  50. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/storage/block.hpp +1 -1
  53. package/src/duckdb/src/include/duckdb/storage/index.hpp +10 -8
  54. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +3 -0
  55. package/src/duckdb/src/main/extension/extension_helper.cpp +15 -1
  56. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +14 -5
  57. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +2 -3
  58. package/src/duckdb/src/storage/checkpoint_manager.cpp +16 -21
  59. package/src/duckdb/src/storage/data_table.cpp +3 -3
  60. package/src/duckdb/src/storage/index.cpp +7 -1
  61. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +21 -21
  62. package/src/duckdb/src/storage/standard_buffer_manager.cpp +10 -16
  63. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  64. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  65. package/src/duckdb/src/transaction/commit_state.cpp +5 -1
  66. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +4 -1
  67. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +24 -2
  68. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
  69. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +6 -0
  70. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  71. package/src/duckdb/ub_src_execution_index.cpp +4 -0
  72. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  73. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  74. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +0 -115
@@ -262,7 +262,8 @@ void CheckpointWriter::WriteSchema(SchemaCatalogEntry &schema) {
262
262
  void CheckpointReader::ReadSchema(ClientContext &context, MetadataReader &reader) {
263
263
  // read the schema and create it in the catalog
264
264
  auto info = CatalogEntry::Deserialize(reader);
265
- // we set create conflict to ignore to ignore the failure of recreating the main schema
265
+
266
+ // we set create conflict to IGNORE_ON_CONFLICT, so that we can ignore a failure when recreating the main schema
266
267
  info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT;
267
268
  catalog.CreateSchema(context, info->Cast<CreateSchemaInfo>());
268
269
 
@@ -336,14 +337,12 @@ void CheckpointReader::ReadSequence(ClientContext &context, MetadataReader &read
336
337
  // Indexes
337
338
  //===--------------------------------------------------------------------===//
338
339
  void CheckpointWriter::WriteIndex(IndexCatalogEntry &index_catalog) {
339
- // The index data should already have been written as part of WriteTableData.
340
- // Here, we need only serialize the pointer to that data.
341
- auto root_offset = index_catalog.index->GetSerializedDataPointer();
340
+ // we write the index data in WriteTableData
341
+ // here, we only write the root pointer
342
+ const auto root_block_pointer = index_catalog.index->GetRootBlockPointer();
342
343
  auto &metadata_writer = GetMetadataWriter();
343
344
  index_catalog.Serialize(metadata_writer);
344
- // Serialize the Block id and offset of root node
345
- metadata_writer.Write(root_offset.block_id);
346
- metadata_writer.Write(root_offset.offset);
345
+ metadata_writer.Write(root_block_pointer);
347
346
  }
348
347
 
349
348
  void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader) {
@@ -358,10 +357,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
358
357
  auto &index_catalog = schema_catalog.CreateIndex(context, index_info, table_catalog)->Cast<DuckIndexEntry>();
359
358
  index_catalog.info = table_catalog.GetStorage().info;
360
359
 
361
- // we deserialize the index lazily, i.e., we do not need to load any node information
362
- // except the root block id and offset
363
- auto root_block_id = reader.Read<block_id_t>();
364
- auto root_offset = reader.Read<uint32_t>();
360
+ // we deserialize the index lazily, i.e., we only load the root block pointer
361
+ const auto index_block_pointer = reader.Read<BlockPointer>();
365
362
 
366
363
  // obtain the expressions of the ART from the index metadata
367
364
  vector<unique_ptr<Expression>> unbound_expressions;
@@ -401,9 +398,8 @@ void CheckpointReader::ReadIndex(ClientContext &context, MetadataReader &reader)
401
398
  switch (index_info.index_type) {
402
399
  case IndexType::ART: {
403
400
  auto &storage = table_catalog.GetStorage();
404
- auto art =
405
- make_uniq<ART>(index_info.column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
406
- index_info.constraint_type, storage.db, nullptr, BlockPointer(root_block_id, root_offset));
401
+ auto art = make_uniq<ART>(index_info.column_ids, TableIOManager::Get(storage), std::move(unbound_expressions),
402
+ index_info.constraint_type, storage.db, nullptr, index_block_pointer);
407
403
  index_catalog.index = art.get();
408
404
  storage.info->indexes.AddIndex(std::move(art));
409
405
  break;
@@ -450,7 +446,7 @@ void CheckpointReader::ReadTableMacro(ClientContext &context, MetadataReader &re
450
446
  // Table Metadata
451
447
  //===--------------------------------------------------------------------===//
452
448
  void CheckpointWriter::WriteTable(TableCatalogEntry &table) {
453
- // write the table meta data
449
+ // write the table metadata
454
450
  table.Serialize(GetMetadataWriter());
455
451
  // now we need to write the table data.
456
452
  if (auto writer = GetTableDataWriter(table)) {
@@ -483,12 +479,11 @@ void CheckpointReader::ReadTableData(ClientContext &context, MetadataReader &rea
483
479
  data_reader.ReadTableData();
484
480
  bound_info.data->total_rows = reader.Read<idx_t>();
485
481
 
486
- // Get any indexes block info
487
- idx_t num_indexes = reader.Read<idx_t>();
488
- for (idx_t i = 0; i < num_indexes; i++) {
489
- auto idx_block_id = reader.Read<block_id_t>();
490
- auto idx_offset = reader.Read<uint32_t>();
491
- bound_info.indexes.emplace_back(idx_block_id, idx_offset);
482
+ // get the root block pointers of each index
483
+ idx_t index_count = reader.Read<idx_t>();
484
+ for (idx_t i = 0; i < index_count; i++) {
485
+ const auto index_pointer = reader.Read<BlockPointer>();
486
+ bound_info.indexes.emplace_back(index_pointer);
492
487
  }
493
488
  }
494
489
 
@@ -429,13 +429,13 @@ void DataTable::VerifyForeignKeyConstraint(const BoundForeignKeyConstraint &bfk,
429
429
  data_table.info->indexes.VerifyForeignKey(*dst_keys_ptr, dst_chunk, regular_conflicts);
430
430
  regular_conflicts.Finalize();
431
431
  auto &regular_matches = regular_conflicts.Conflicts();
432
- // check whether or not the chunk can be inserted or deleted into the referenced table' transaction local storage
433
- auto &local_storage = LocalStorage::Get(context, db);
434
432
 
433
+ // check if we can insert the chunk into the reference table's local storage
434
+ auto &local_storage = LocalStorage::Get(context, db);
435
435
  bool error = IsForeignKeyConstraintError(is_append, count, regular_matches);
436
436
  bool transaction_error = false;
437
-
438
437
  bool transaction_check = local_storage.Find(data_table);
438
+
439
439
  if (transaction_check) {
440
440
  auto &transact_index = local_storage.GetIndexes(data_table);
441
441
  transact_index.VerifyForeignKey(*dst_keys_ptr, dst_chunk, transaction_conflicts);
@@ -12,7 +12,7 @@ Index::Index(AttachedDatabase &db, IndexType type, TableIOManager &table_io_mana
12
12
  IndexConstraintType constraint_type_p)
13
13
 
14
14
  : type(type), table_io_manager(table_io_manager), column_ids(column_ids_p), constraint_type(constraint_type_p),
15
- db(db), buffer_manager(BufferManager::GetBufferManager(db)) {
15
+ db(db) {
16
16
 
17
17
  for (auto &expr : unbound_expressions) {
18
18
  types.push_back(expr->return_type.InternalType());
@@ -39,6 +39,12 @@ PreservedError Index::Append(DataChunk &entries, Vector &row_identifiers) {
39
39
  return Append(state, entries, row_identifiers);
40
40
  }
41
41
 
42
+ void Index::CommitDrop() {
43
+ IndexLock index_lock;
44
+ InitializeLock(index_lock);
45
+ CommitDrop(index_lock);
46
+ }
47
+
42
48
  void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
43
49
  IndexLock state;
44
50
  InitializeLock(state);
@@ -13,7 +13,7 @@ MetadataManager::~MetadataManager() {
13
13
 
14
14
  MetadataHandle MetadataManager::AllocateHandle() {
15
15
  // check if there is any free space left in an existing block
16
- // if not allocate a new bloc
16
+ // if not allocate a new block
17
17
  block_id_t free_block = INVALID_BLOCK;
18
18
  for (auto &kv : blocks) {
19
19
  auto &block = kv.second;
@@ -230,27 +230,27 @@ void MetadataBlock::FreeBlocksFromInteger(idx_t free_list) {
230
230
  }
231
231
 
232
232
  void MetadataManager::MarkBlocksAsModified() {
233
- if (!modified_blocks.empty()) {
234
- // for any blocks that were modified in the last checkpoint - set them to free blocks currently
235
- for (auto &kv : modified_blocks) {
236
- auto block_id = kv.first;
237
- idx_t modified_list = kv.second;
238
- auto entry = blocks.find(block_id);
239
- D_ASSERT(entry != blocks.end());
240
- auto &block = entry->second;
241
- idx_t current_free_blocks = block.FreeBlocksToInteger();
242
- // merge the current set of free blocks with the modified blocks
243
- idx_t new_free_blocks = current_free_blocks | modified_list;
244
- // if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
245
- // // if new free_blocks is all blocks - mark entire block as modified
246
- // blocks.erase(entry);
247
- // block_manager.MarkBlockAsModified(block_id);
248
- // } else {
249
- // set the new set of free blocks
250
- block.FreeBlocksFromInteger(new_free_blocks);
251
- // }
252
- }
233
+
234
+ // for any blocks that were modified in the last checkpoint - set them to free blocks currently
235
+ for (auto &kv : modified_blocks) {
236
+ auto block_id = kv.first;
237
+ idx_t modified_list = kv.second;
238
+ auto entry = blocks.find(block_id);
239
+ D_ASSERT(entry != blocks.end());
240
+ auto &block = entry->second;
241
+ idx_t current_free_blocks = block.FreeBlocksToInteger();
242
+ // merge the current set of free blocks with the modified blocks
243
+ idx_t new_free_blocks = current_free_blocks | modified_list;
244
+ // if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
245
+ // // if new free_blocks is all blocks - mark entire block as modified
246
+ // blocks.erase(entry);
247
+ // block_manager.MarkBlockAsModified(block_id);
248
+ // } else {
249
+ // set the new set of free blocks
250
+ block.FreeBlocksFromInteger(new_free_blocks);
251
+ // }
253
252
  }
253
+
254
254
  modified_blocks.clear();
255
255
  for (auto &kv : blocks) {
256
256
  auto &block = kv.second;
@@ -3,11 +3,11 @@
3
3
  #include "duckdb/common/allocator.hpp"
4
4
  #include "duckdb/common/exception.hpp"
5
5
  #include "duckdb/common/set.hpp"
6
- #include "duckdb/storage/in_memory_block_manager.hpp"
7
- #include "duckdb/storage/storage_manager.hpp"
8
6
  #include "duckdb/main/attached_database.hpp"
9
7
  #include "duckdb/main/database.hpp"
10
8
  #include "duckdb/storage/buffer/buffer_pool.hpp"
9
+ #include "duckdb/storage/in_memory_block_manager.hpp"
10
+ #include "duckdb/storage/storage_manager.hpp"
11
11
 
12
12
  namespace duckdb {
13
13
 
@@ -77,14 +77,6 @@ idx_t StandardBufferManager::GetMaxMemory() const {
77
77
  return buffer_pool.GetMaxMemory();
78
78
  }
79
79
 
80
- // POTENTIALLY PROBLEMATIC
81
- // void StandardBufferManager::IncreaseUsedMemory(idx_t size, bool unsafe) {
82
- // if (!unsafe && buffer_pool.GetUsedMemory() + size > buffer_pool.GetMaxMemory()) {
83
- // throw OutOfMemoryException("Failed to allocate data of size %lld%s", size, InMemoryWarning());
84
- // }
85
- // buffer_pool.IncreaseUsedMemory(size);
86
- //}
87
-
88
80
  template <typename... ARGS>
89
81
  TempBufferPoolReservation StandardBufferManager::EvictBlocksOrThrow(idx_t memory_delta, unique_ptr<FileBuffer> *buffer,
90
82
  ARGS... args) {
@@ -329,12 +321,13 @@ private:
329
321
  };
330
322
 
331
323
  class TemporaryFileHandle {
332
- constexpr static idx_t MAX_ALLOWED_INDEX = 4000;
324
+ constexpr static idx_t MAX_ALLOWED_INDEX_BASE = 4000;
333
325
 
334
326
  public:
335
- TemporaryFileHandle(DatabaseInstance &db, const string &temp_directory, idx_t index)
336
- : db(db), file_index(index), path(FileSystem::GetFileSystem(db).JoinPath(
337
- temp_directory, "duckdb_temp_storage-" + to_string(index) + ".tmp")) {
327
+ TemporaryFileHandle(idx_t temp_file_count, DatabaseInstance &db, const string &temp_directory, idx_t index)
328
+ : max_allowed_index((1 << temp_file_count) * MAX_ALLOWED_INDEX_BASE), db(db), file_index(index),
329
+ path(FileSystem::GetFileSystem(db).JoinPath(temp_directory,
330
+ "duckdb_temp_storage-" + to_string(index) + ".tmp")) {
338
331
  }
339
332
 
340
333
  public:
@@ -348,7 +341,7 @@ public:
348
341
  public:
349
342
  TemporaryFileIndex TryGetBlockIndex() {
350
343
  TemporaryFileLock lock(file_lock);
351
- if (index_manager.GetMaxIndex() >= MAX_ALLOWED_INDEX && index_manager.HasFreeBlocks()) {
344
+ if (index_manager.GetMaxIndex() >= max_allowed_index && index_manager.HasFreeBlocks()) {
352
345
  // file is at capacity
353
346
  return TemporaryFileIndex();
354
347
  }
@@ -426,6 +419,7 @@ private:
426
419
  }
427
420
 
428
421
  private:
422
+ const idx_t max_allowed_index;
429
423
  DatabaseInstance &db;
430
424
  unique_ptr<FileHandle> handle;
431
425
  idx_t file_index;
@@ -467,7 +461,7 @@ public:
467
461
  if (!handle) {
468
462
  // no existing handle to write to; we need to create & open a new file
469
463
  auto new_file_index = index_manager.GetNewBlockIndex();
470
- auto new_file = make_uniq<TemporaryFileHandle>(db, temp_directory, new_file_index);
464
+ auto new_file = make_uniq<TemporaryFileHandle>(files.size(), db, temp_directory, new_file_index);
471
465
  handle = new_file.get();
472
466
  files[new_file_index] = std::move(new_file);
473
467
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- const uint64_t VERSION_NUMBER = 57;
5
+ const uint64_t VERSION_NUMBER = 58;
6
6
 
7
7
  struct StorageVersionInfo {
8
8
  const char *version_name;
@@ -54,7 +54,7 @@ void TableIndexList::VerifyForeignKey(const vector<PhysicalIndex> &fk_keys, Data
54
54
  ? ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE
55
55
  : ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE;
56
56
 
57
- // check whether or not the chunk can be inserted or deleted into the referenced table' storage
57
+ // check whether the chunk can be inserted or deleted into the referenced table storage
58
58
  auto index = FindForeignKeyIndex(fk_keys, fk_type);
59
59
  if (!index) {
60
60
  throw InternalException("Internal Foreign Key error: could not find index to verify...");
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/transaction/commit_state.hpp"
2
2
 
3
+ #include "duckdb/catalog/catalog_entry/duck_index_entry.hpp"
3
4
  #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
4
5
  #include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
5
6
  #include "duckdb/catalog/catalog_set.hpp"
@@ -124,9 +125,12 @@ void CommitState::WriteCatalogEntry(CatalogEntry &entry, data_ptr_t dataptr) {
124
125
  case CatalogType::TYPE_ENTRY:
125
126
  log->WriteDropType(entry.Cast<TypeCatalogEntry>());
126
127
  break;
127
- case CatalogType::INDEX_ENTRY:
128
+ case CatalogType::INDEX_ENTRY: {
129
+ auto &index_entry = entry.Cast<DuckIndexEntry>();
130
+ index_entry.CommitDrop();
128
131
  log->WriteDropIndex(entry.Cast<IndexCatalogEntry>());
129
132
  break;
133
+ }
130
134
  case CatalogType::PREPARED_STATEMENT:
131
135
  case CatalogType::SCALAR_FUNCTION_ENTRY:
132
136
  // do nothing, indexes/prepared statements/functions aren't persisted to disk
@@ -17,8 +17,10 @@ public:
17
17
  static std::string ComputeSha256Hash(const std::string& file_content);
18
18
  static bool IsValidSha256Signature(const std::string& pubkey, const std::string& signature, const std::string& sha256_hash);
19
19
  static void Hmac256(const char* key, size_t key_len, const char* message, size_t message_len, char* out);
20
+ static void ToBase16(char *in, char *out, size_t len);
20
21
 
21
- static constexpr size_t SHA256_HASH_BYTES = 32;
22
+ static constexpr size_t SHA256_HASH_LENGTH_BYTES = 32;
23
+ static constexpr size_t SHA256_HASH_LENGTH_TEXT = 64;
22
24
 
23
25
  class SHA256State {
24
26
  public:
@@ -26,6 +28,7 @@ public:
26
28
  ~SHA256State();
27
29
  void AddString(const std::string & str);
28
30
  std::string Finalize();
31
+ void FinishHex(char *out);
29
32
  private:
30
33
  void *sha_context;
31
34
  };
@@ -38,7 +38,7 @@ void MbedTlsWrapper::ComputeSha256Hash(const char* in, size_t in_len, char* out)
38
38
 
39
39
  string MbedTlsWrapper::ComputeSha256Hash(const string& file_content) {
40
40
  string hash;
41
- hash.resize(MbedTlsWrapper::SHA256_HASH_BYTES);
41
+ hash.resize(MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
42
42
  ComputeSha256Hash(file_content.data(), file_content.size(), (char*)hash.data());
43
43
  return hash;
44
44
  }
@@ -83,6 +83,17 @@ void MbedTlsWrapper::Hmac256(const char* key, size_t key_len, const char* messag
83
83
  mbedtls_md_free(&hmac_ctx);
84
84
  }
85
85
 
86
+ void MbedTlsWrapper::ToBase16(char *in, char *out, size_t len) {
87
+ static char const HEX_CODES[] = "0123456789abcdef";
88
+ size_t i, j;
89
+
90
+ for (j = i = 0; i < len; i++) {
91
+ int a = in[i];
92
+ out[j++] = HEX_CODES[(a >> 4) & 0xf];
93
+ out[j++] = HEX_CODES[a & 0xf];
94
+ }
95
+ }
96
+
86
97
  MbedTlsWrapper::SHA256State::SHA256State() : sha_context(new mbedtls_sha256_context()) {
87
98
  mbedtls_sha256_init((mbedtls_sha256_context*)sha_context);
88
99
 
@@ -104,7 +115,7 @@ void MbedTlsWrapper::SHA256State::AddString(const std::string & str) {
104
115
 
105
116
  std::string MbedTlsWrapper::SHA256State::Finalize() {
106
117
  string hash;
107
- hash.resize(MbedTlsWrapper::SHA256_HASH_BYTES);
118
+ hash.resize(MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
108
119
 
109
120
  if (mbedtls_sha256_finish((mbedtls_sha256_context*)sha_context, (unsigned char*)hash.data())) {
110
121
  throw std::runtime_error("SHA256 Error");
@@ -112,3 +123,14 @@ std::string MbedTlsWrapper::SHA256State::Finalize() {
112
123
 
113
124
  return hash;
114
125
  }
126
+
127
+ void MbedTlsWrapper::SHA256State::FinishHex(char *out) {
128
+ string hash;
129
+ hash.resize(MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
130
+
131
+ if (mbedtls_sha256_finish((mbedtls_sha256_context *)sha_context, (unsigned char *)hash.data())) {
132
+ throw std::runtime_error("SHA256 Error");
133
+ }
134
+
135
+ MbedTlsWrapper::ToBase16(const_cast<char *>(hash.c_str()), out, MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
136
+ }
@@ -348,9 +348,11 @@
348
348
 
349
349
  #include "extension/icu/third_party/icu/i18n/wintzimpl.cpp"
350
350
 
351
- #include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
351
+ #include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp"
352
352
 
353
- #include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"
353
+ #include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
354
+
355
+ #include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
354
356
 
355
357
  #include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp"
356
358
 
@@ -358,7 +360,5 @@
358
360
 
359
361
  #include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp"
360
362
 
361
- #include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp"
362
-
363
- #include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
363
+ #include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"
364
364
 
@@ -10,5 +10,11 @@
10
10
 
11
11
  #include "src/core_functions/scalar/list/list_sort.cpp"
12
12
 
13
+ #include "src/core_functions/scalar/list/list_distance.cpp"
14
+
15
+ #include "src/core_functions/scalar/list/list_cosine_similarity.cpp"
16
+
17
+ #include "src/core_functions/scalar/list/list_inner_product.cpp"
18
+
13
19
  #include "src/core_functions/scalar/list/range.cpp"
14
20
 
@@ -34,6 +34,8 @@
34
34
 
35
35
  #include "src/core_functions/scalar/string/reverse.cpp"
36
36
 
37
+ #include "src/core_functions/scalar/string/sha256.cpp"
38
+
37
39
  #include "src/core_functions/scalar/string/starts_with.cpp"
38
40
 
39
41
  #include "src/core_functions/scalar/string/string_split.cpp"
@@ -0,0 +1,4 @@
1
+ #include "src/execution/index/fixed_size_allocator.cpp"
2
+
3
+ #include "src/execution/index/fixed_size_buffer.cpp"
4
+
@@ -2,8 +2,6 @@
2
2
 
3
3
  #include "src/execution/index/art/node.cpp"
4
4
 
5
- #include "src/execution/index/art/fixed_size_allocator.cpp"
6
-
7
5
  #include "src/execution/index/art/iterator.cpp"
8
6
 
9
7
  #include "src/execution/index/art/leaf.cpp"
@@ -1,238 +0,0 @@
1
- #include "duckdb/execution/index/art/fixed_size_allocator.hpp"
2
-
3
- namespace duckdb {
4
-
5
- constexpr idx_t FixedSizeAllocator::BASE[];
6
- constexpr uint8_t FixedSizeAllocator::SHIFT[];
7
-
8
- FixedSizeAllocator::FixedSizeAllocator(const idx_t allocation_size, Allocator &allocator)
9
- : allocation_size(allocation_size), total_allocations(0), allocator(allocator) {
10
-
11
- // calculate how many allocations fit into one buffer
12
-
13
- idx_t bits_per_value = sizeof(validity_t) * 8;
14
- idx_t curr_alloc_size = 0;
15
-
16
- bitmask_count = 0;
17
- allocations_per_buffer = 0;
18
-
19
- while (curr_alloc_size < BUFFER_ALLOC_SIZE) {
20
- if (!bitmask_count || (bitmask_count * bits_per_value) % allocations_per_buffer == 0) {
21
- bitmask_count++;
22
- curr_alloc_size += sizeof(validity_t);
23
- }
24
-
25
- auto remaining_alloc_size = BUFFER_ALLOC_SIZE - curr_alloc_size;
26
- auto remaining_allocations = MinValue(remaining_alloc_size / allocation_size, bits_per_value);
27
-
28
- if (remaining_allocations == 0) {
29
- break;
30
- }
31
-
32
- allocations_per_buffer += remaining_allocations;
33
- curr_alloc_size += remaining_allocations * allocation_size;
34
- }
35
-
36
- allocation_offset = bitmask_count * sizeof(validity_t);
37
- }
38
-
39
- FixedSizeAllocator::~FixedSizeAllocator() {
40
- for (auto &buffer : buffers) {
41
- allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
42
- }
43
- }
44
-
45
- Node FixedSizeAllocator::New() {
46
-
47
- // no more free pointers
48
- if (buffers_with_free_space.empty()) {
49
-
50
- // add a new buffer
51
- idx_t buffer_id = buffers.size();
52
- D_ASSERT(buffer_id <= (uint32_t)DConstants::INVALID_INDEX);
53
- auto buffer = allocator.AllocateData(BUFFER_ALLOC_SIZE);
54
- buffers.emplace_back(buffer, 0);
55
- buffers_with_free_space.insert(buffer_id);
56
-
57
- // set the bitmask
58
- ValidityMask mask(reinterpret_cast<validity_t *>(buffer));
59
- mask.SetAllValid(allocations_per_buffer);
60
- }
61
-
62
- // return a pointer
63
- D_ASSERT(!buffers_with_free_space.empty());
64
- auto buffer_id = (uint32_t)*buffers_with_free_space.begin();
65
-
66
- auto bitmask_ptr = reinterpret_cast<validity_t *>(buffers[buffer_id].ptr);
67
- ValidityMask mask(bitmask_ptr);
68
- auto offset = GetOffset(mask, buffers[buffer_id].allocation_count);
69
-
70
- buffers[buffer_id].allocation_count++;
71
- total_allocations++;
72
- if (buffers[buffer_id].allocation_count == allocations_per_buffer) {
73
- buffers_with_free_space.erase(buffer_id);
74
- }
75
-
76
- return Node(buffer_id, offset);
77
- }
78
-
79
- void FixedSizeAllocator::Free(const Node ptr) {
80
- auto bitmask_ptr = reinterpret_cast<validity_t *>(buffers[ptr.GetBufferId()].ptr);
81
- ValidityMask mask(bitmask_ptr);
82
- D_ASSERT(!mask.RowIsValid(ptr.GetOffset()));
83
- mask.SetValid(ptr.GetOffset());
84
- buffers_with_free_space.insert(ptr.GetBufferId());
85
-
86
- D_ASSERT(total_allocations > 0);
87
- D_ASSERT(buffers[ptr.GetBufferId()].allocation_count > 0);
88
- buffers[ptr.GetBufferId()].allocation_count--;
89
- total_allocations--;
90
- }
91
-
92
- void FixedSizeAllocator::Reset() {
93
-
94
- for (auto &buffer : buffers) {
95
- allocator.FreeData(buffer.ptr, BUFFER_ALLOC_SIZE);
96
- }
97
- buffers.clear();
98
- buffers_with_free_space.clear();
99
- total_allocations = 0;
100
- }
101
-
102
- void FixedSizeAllocator::Merge(FixedSizeAllocator &other) {
103
-
104
- D_ASSERT(allocation_size == other.allocation_size);
105
-
106
- // remember the buffer count and merge the buffers
107
- idx_t buffer_count = buffers.size();
108
- for (auto &buffer : other.buffers) {
109
- buffers.push_back(buffer);
110
- }
111
- other.buffers.clear();
112
-
113
- // merge the buffers with free spaces
114
- for (auto &buffer_id : other.buffers_with_free_space) {
115
- buffers_with_free_space.insert(buffer_id + buffer_count);
116
- }
117
- other.buffers_with_free_space.clear();
118
-
119
- // add the total allocations
120
- total_allocations += other.total_allocations;
121
- }
122
-
123
- bool FixedSizeAllocator::InitializeVacuum() {
124
-
125
- if (total_allocations == 0) {
126
- Reset();
127
- return false;
128
- }
129
-
130
- auto total_available_allocations = allocations_per_buffer * buffers.size();
131
- D_ASSERT(total_available_allocations >= total_allocations);
132
- auto total_free_positions = total_available_allocations - total_allocations;
133
-
134
- // vacuum_count buffers can be freed
135
- auto vacuum_count = total_free_positions / allocations_per_buffer;
136
-
137
- // calculate the vacuum threshold adaptively
138
- D_ASSERT(vacuum_count < buffers.size());
139
- idx_t memory_usage = GetMemoryUsage();
140
- idx_t excess_memory_usage = vacuum_count * BUFFER_ALLOC_SIZE;
141
- auto excess_percentage = (double)excess_memory_usage / (double)memory_usage;
142
- auto threshold = (double)VACUUM_THRESHOLD / 100.0;
143
- if (excess_percentage < threshold) {
144
- return false;
145
- }
146
-
147
- min_vacuum_buffer_id = buffers.size() - vacuum_count;
148
-
149
- // remove all invalid buffers from the available buffer list to ensure that we do not reuse them
150
- auto it = buffers_with_free_space.begin();
151
- while (it != buffers_with_free_space.end()) {
152
- if (*it >= min_vacuum_buffer_id) {
153
- it = buffers_with_free_space.erase(it);
154
- } else {
155
- it++;
156
- }
157
- }
158
-
159
- return true;
160
- }
161
-
162
- void FixedSizeAllocator::FinalizeVacuum() {
163
-
164
- // free all (now unused) buffers
165
- while (min_vacuum_buffer_id < buffers.size()) {
166
- allocator.FreeData(buffers.back().ptr, BUFFER_ALLOC_SIZE);
167
- buffers.pop_back();
168
- }
169
- }
170
-
171
- Node FixedSizeAllocator::VacuumPointer(const Node ptr) {
172
-
173
- // we do not need to adjust the bitmask of the old buffer, because we will free the entire
174
- // buffer after the vacuum operation
175
-
176
- auto new_ptr = New();
177
-
178
- // new increases the allocation count
179
- total_allocations--;
180
-
181
- memcpy(Get(new_ptr), Get(ptr), allocation_size);
182
- return new_ptr;
183
- }
184
-
185
- void FixedSizeAllocator::Verify() const {
186
- #ifdef DEBUG
187
- auto total_available_allocations = allocations_per_buffer * buffers.size();
188
- D_ASSERT(total_available_allocations >= total_allocations);
189
- D_ASSERT(buffers.size() >= buffers_with_free_space.size());
190
- #endif
191
- }
192
-
193
- uint32_t FixedSizeAllocator::GetOffset(ValidityMask &mask, const idx_t allocation_count) {
194
-
195
- auto data = mask.GetData();
196
-
197
- // fills up a buffer sequentially before searching for free bits
198
- if (mask.RowIsValid(allocation_count)) {
199
- mask.SetInvalid(allocation_count);
200
- return allocation_count;
201
- }
202
-
203
- // get an entry with free bits
204
- for (idx_t entry_idx = 0; entry_idx < bitmask_count; entry_idx++) {
205
- if (data[entry_idx] != 0) {
206
-
207
- // find the position of the free bit
208
- auto entry = data[entry_idx];
209
- idx_t first_valid_bit = 0;
210
-
211
- // this loop finds the position of the rightmost set bit in entry and stores it
212
- // in first_valid_bit
213
- for (idx_t i = 0; i < 6; i++) {
214
- // set the left half of the bits of this level to zero and test if the entry is still not zero
215
- if (entry & BASE[i]) {
216
- // first valid bit is in the rightmost s[i] bits
217
- // permanently set the left half of the bits to zero
218
- entry &= BASE[i];
219
- } else {
220
- // first valid bit is in the leftmost s[i] bits
221
- // shift by s[i] for the next iteration and add s[i] to the position of the rightmost set bit
222
- entry >>= SHIFT[i];
223
- first_valid_bit += SHIFT[i];
224
- }
225
- }
226
- D_ASSERT(entry);
227
-
228
- auto prev_bits = entry_idx * sizeof(validity_t) * 8;
229
- D_ASSERT(mask.RowIsValid(prev_bits + first_valid_bit));
230
- mask.SetInvalid(prev_bits + first_valid_bit);
231
- return (prev_bits + first_valid_bit);
232
- }
233
- }
234
-
235
- throw InternalException("Invalid bitmask of FixedSizeAllocator");
236
- }
237
-
238
- } // namespace duckdb