duckdb 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +9 -6
- package/package.json +2 -2
- package/scripts/node_version.sh +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +67 -6
- package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +9 -3
- package/src/duckdb/extension/json/include/json_serializer.hpp +12 -0
- package/src/duckdb/extension/json/json_functions/json_create.cpp +10 -10
- package/src/duckdb/extension/parquet/decoder/delta_length_byte_array_decoder.cpp +19 -5
- package/src/duckdb/extension/parquet/include/decoder/delta_length_byte_array_decoder.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +11 -2
- package/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp +2 -1
- package/src/duckdb/extension/parquet/parquet_reader.cpp +3 -1
- package/src/duckdb/extension/parquet/parquet_writer.cpp +16 -1
- package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/writer/primitive_column_writer.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_table_functions.cpp +1 -1
- package/src/duckdb/src/common/adbc/adbc.cpp +8 -6
- package/src/duckdb/src/common/csv_writer.cpp +1 -13
- package/src/duckdb/src/common/encryption_key_manager.cpp +10 -9
- package/src/duckdb/src/common/enum_util.cpp +19 -0
- package/src/duckdb/src/common/enums/compression_type.cpp +51 -16
- package/src/duckdb/src/common/exception/binder_exception.cpp +7 -2
- package/src/duckdb/src/common/progress_bar/unscented_kalman_filter.cpp +2 -2
- package/src/duckdb/src/common/random_engine.cpp +10 -0
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +13 -2
- package/src/duckdb/src/execution/index/art/art.cpp +6 -3
- package/src/duckdb/src/execution/index/bound_index.cpp +32 -21
- package/src/duckdb/src/execution/index/unbound_index.cpp +20 -9
- package/src/duckdb/src/execution/join_hashtable.cpp +9 -3
- package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +5 -0
- package/src/duckdb/src/function/cast/cast_function_set.cpp +3 -1
- package/src/duckdb/src/function/macro_function.cpp +1 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -1
- package/src/duckdb/src/function/scalar/create_sort_key.cpp +5 -3
- package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +1 -1
- package/src/duckdb/src/function/scalar/system/parse_log_message.cpp +4 -2
- package/src/duckdb/src/function/table/copy_csv.cpp +28 -4
- package/src/duckdb/src/function/table/direct_file_reader.cpp +10 -0
- package/src/duckdb/src/function/table/read_file.cpp +65 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/common/csv_writer.hpp +0 -3
- package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +42 -2
- package/src/duckdb/src/include/duckdb/common/http_util.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +0 -11
- package/src/duckdb/src/include/duckdb/common/random_engine.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +115 -97
- package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +54 -0
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -2
- package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +26 -8
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_file.hpp +0 -49
- package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/logging/log_type.hpp +14 -0
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/database.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +10 -6
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/insert_relation.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation.hpp +10 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_pullup.hpp +10 -14
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/bound_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/block.hpp +9 -0
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +9 -2
- package/src/duckdb/src/include/duckdb/storage/index.hpp +8 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +0 -7
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -2
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +6 -0
- package/src/duckdb/src/logging/log_manager.cpp +2 -1
- package/src/duckdb/src/logging/log_types.cpp +30 -1
- package/src/duckdb/src/main/attached_database.cpp +4 -7
- package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +2 -3
- package/src/duckdb/src/main/buffered_data/buffered_data.cpp +2 -3
- package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +1 -2
- package/src/duckdb/src/main/capi/prepared-c.cpp +9 -2
- package/src/duckdb/src/main/config.cpp +6 -5
- package/src/duckdb/src/main/database.cpp +9 -3
- package/src/duckdb/src/main/database_file_path_manager.cpp +43 -14
- package/src/duckdb/src/main/database_manager.cpp +1 -1
- package/src/duckdb/src/main/http/http_util.cpp +19 -1
- package/src/duckdb/src/main/profiling_info.cpp +11 -0
- package/src/duckdb/src/main/query_profiler.cpp +16 -0
- package/src/duckdb/src/main/relation/create_table_relation.cpp +9 -0
- package/src/duckdb/src/main/relation/insert_relation.cpp +7 -0
- package/src/duckdb/src/main/relation/table_relation.cpp +14 -0
- package/src/duckdb/src/main/relation.cpp +28 -12
- package/src/duckdb/src/main/settings/custom_settings.cpp +9 -3
- package/src/duckdb/src/optimizer/filter_pullup.cpp +14 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +29 -10
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +7 -0
- package/src/duckdb/src/parallel/task_executor.cpp +4 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +79 -0
- package/src/duckdb/src/parser/transform/expression/transform_cast.cpp +3 -1
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +1 -0
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +12 -4
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +16 -12
- package/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp +42 -5
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +0 -24
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +1 -1
- package/src/duckdb/src/planner/binder.cpp +0 -1
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +20 -6
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +8 -6
- package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -22
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +7 -0
- package/src/duckdb/src/storage/compression/zstd.cpp +34 -12
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +15 -2
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -6
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +11 -15
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +1 -19
- package/src/duckdb/src/storage/single_file_block_manager.cpp +33 -3
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +3 -1
- package/src/duckdb/src/storage/storage_info.cpp +4 -0
- package/src/duckdb/src/storage/storage_manager.cpp +8 -0
- package/src/duckdb/src/storage/table/array_column_data.cpp +1 -1
- package/src/duckdb/src/storage/table/column_data.cpp +3 -2
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -2
- package/src/duckdb/src/storage/table/row_group.cpp +41 -24
- package/src/duckdb/src/storage/table/row_group_collection.cpp +114 -11
- package/src/duckdb/src/storage/table_index_list.cpp +18 -5
- package/src/duckdb/src/transaction/cleanup_state.cpp +7 -2
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +5 -0
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +8 -21
- package/src/duckdb/third_party/parquet/parquet_types.cpp +57 -35
- package/src/duckdb/third_party/parquet/parquet_types.h +9 -2
- package/src/duckdb/ub_src_common_types_row.cpp +0 -2
|
@@ -26,14 +26,14 @@
|
|
|
26
26
|
namespace duckdb {
|
|
27
27
|
|
|
28
28
|
RowGroup::RowGroup(RowGroupCollection &collection_p, idx_t start, idx_t count)
|
|
29
|
-
: SegmentBase<RowGroup>(start, count), collection(collection_p), version_info(nullptr),
|
|
30
|
-
row_id_is_loaded(false), has_changes(false) {
|
|
29
|
+
: SegmentBase<RowGroup>(start, count), collection(collection_p), version_info(nullptr), deletes_is_loaded(false),
|
|
30
|
+
allocation_size(0), row_id_is_loaded(false), has_changes(false) {
|
|
31
31
|
Verify();
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
RowGroup::RowGroup(RowGroupCollection &collection_p, RowGroupPointer pointer)
|
|
35
35
|
: SegmentBase<RowGroup>(pointer.row_start, pointer.tuple_count), collection(collection_p), version_info(nullptr),
|
|
36
|
-
allocation_size(0), row_id_is_loaded(false), has_changes(false) {
|
|
36
|
+
deletes_is_loaded(false), allocation_size(0), row_id_is_loaded(false), has_changes(false) {
|
|
37
37
|
// deserialize the columns
|
|
38
38
|
if (pointer.data_pointers.size() != collection_p.GetTypes().size()) {
|
|
39
39
|
throw IOException("Row group column count is unaligned with table column count. Corrupt file?");
|
|
@@ -45,7 +45,6 @@ RowGroup::RowGroup(RowGroupCollection &collection_p, RowGroupPointer pointer)
|
|
|
45
45
|
this->is_loaded[c] = false;
|
|
46
46
|
}
|
|
47
47
|
this->deletes_pointers = std::move(pointer.deletes_pointers);
|
|
48
|
-
this->deletes_is_loaded = false;
|
|
49
48
|
this->has_metadata_blocks = pointer.has_metadata_blocks;
|
|
50
49
|
this->extra_metadata_blocks = std::move(pointer.extra_metadata_blocks);
|
|
51
50
|
|
|
@@ -54,7 +53,7 @@ RowGroup::RowGroup(RowGroupCollection &collection_p, RowGroupPointer pointer)
|
|
|
54
53
|
|
|
55
54
|
RowGroup::RowGroup(RowGroupCollection &collection_p, PersistentRowGroupData &data)
|
|
56
55
|
: SegmentBase<RowGroup>(data.start, data.count), collection(collection_p), version_info(nullptr),
|
|
57
|
-
allocation_size(0), row_id_is_loaded(false), has_changes(false) {
|
|
56
|
+
deletes_is_loaded(false), allocation_size(0), row_id_is_loaded(false), has_changes(false) {
|
|
58
57
|
auto &block_manager = GetBlockManager();
|
|
59
58
|
auto &info = GetTableInfo();
|
|
60
59
|
auto &types = collection.get().GetTypes();
|
|
@@ -974,21 +973,15 @@ bool RowGroup::HasUnloadedDeletes() const {
|
|
|
974
973
|
return !deletes_is_loaded;
|
|
975
974
|
}
|
|
976
975
|
|
|
977
|
-
vector<
|
|
978
|
-
if (has_metadata_blocks) {
|
|
979
|
-
|
|
980
|
-
// read if from "column_pointers" and "extra_metadata_blocks"
|
|
981
|
-
auto result = column_pointers;
|
|
982
|
-
for (auto &block_pointer : extra_metadata_blocks) {
|
|
983
|
-
result.emplace_back(block_pointer, 0);
|
|
984
|
-
}
|
|
985
|
-
return result;
|
|
976
|
+
vector<idx_t> RowGroup::GetOrComputeExtraMetadataBlocks(bool force_compute) {
|
|
977
|
+
if (has_metadata_blocks && !force_compute) {
|
|
978
|
+
return extra_metadata_blocks;
|
|
986
979
|
}
|
|
987
|
-
vector<MetaBlockPointer> result;
|
|
988
980
|
if (column_pointers.empty()) {
|
|
989
981
|
// no pointers
|
|
990
|
-
return
|
|
982
|
+
return {};
|
|
991
983
|
}
|
|
984
|
+
vector<MetaBlockPointer> read_pointers;
|
|
992
985
|
// column_pointers stores the beginning of each column
|
|
993
986
|
// if columns are big - they may span multiple metadata blocks
|
|
994
987
|
// we need to figure out all blocks that this row group points to
|
|
@@ -999,13 +992,25 @@ vector<MetaBlockPointer> RowGroup::GetColumnPointers() {
|
|
|
999
992
|
// for all but the last column pointer - we can just follow the linked list until we reach the last column
|
|
1000
993
|
MetadataReader reader(metadata_manager, column_pointers[0]);
|
|
1001
994
|
auto last_pointer = column_pointers[last_idx];
|
|
1002
|
-
|
|
995
|
+
read_pointers = reader.GetRemainingBlocks(last_pointer);
|
|
1003
996
|
}
|
|
1004
997
|
// for the last column we need to deserialize the column - because we don't know where it stops
|
|
1005
998
|
auto &types = GetCollection().GetTypes();
|
|
1006
|
-
MetadataReader reader(metadata_manager, column_pointers[last_idx], &
|
|
999
|
+
MetadataReader reader(metadata_manager, column_pointers[last_idx], &read_pointers);
|
|
1007
1000
|
ColumnData::Deserialize(GetBlockManager(), GetTableInfo(), last_idx, start, reader, types[last_idx]);
|
|
1008
|
-
|
|
1001
|
+
|
|
1002
|
+
unordered_set<idx_t> result_as_set;
|
|
1003
|
+
for (auto &ptr : read_pointers) {
|
|
1004
|
+
result_as_set.emplace(ptr.block_pointer);
|
|
1005
|
+
}
|
|
1006
|
+
for (auto &ptr : column_pointers) {
|
|
1007
|
+
result_as_set.erase(ptr.block_pointer);
|
|
1008
|
+
}
|
|
1009
|
+
return {result_as_set.begin(), result_as_set.end()};
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
const vector<MetaBlockPointer> &RowGroup::GetColumnStartPointers() const {
|
|
1013
|
+
return column_pointers;
|
|
1009
1014
|
}
|
|
1010
1015
|
|
|
1011
1016
|
RowGroupWriteData RowGroup::WriteToDisk(RowGroupWriter &writer) {
|
|
@@ -1014,7 +1019,8 @@ RowGroupWriteData RowGroup::WriteToDisk(RowGroupWriter &writer) {
|
|
|
1014
1019
|
// we have existing metadata and the row group has not been changed
|
|
1015
1020
|
// re-use previous metadata
|
|
1016
1021
|
RowGroupWriteData result;
|
|
1017
|
-
result.
|
|
1022
|
+
result.reuse_existing_metadata_blocks = true;
|
|
1023
|
+
result.existing_extra_metadata_blocks = GetOrComputeExtraMetadataBlocks();
|
|
1018
1024
|
return result;
|
|
1019
1025
|
}
|
|
1020
1026
|
auto &compression_types = writer.GetCompressionTypes();
|
|
@@ -1042,14 +1048,23 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriteData write_data, RowGroupWrite
|
|
|
1042
1048
|
// construct the row group pointer and write the column meta data to disk
|
|
1043
1049
|
row_group_pointer.row_start = start;
|
|
1044
1050
|
row_group_pointer.tuple_count = count;
|
|
1045
|
-
if (
|
|
1051
|
+
if (write_data.reuse_existing_metadata_blocks) {
|
|
1046
1052
|
// we are re-using the previous metadata
|
|
1047
1053
|
row_group_pointer.data_pointers = column_pointers;
|
|
1048
|
-
row_group_pointer.has_metadata_blocks =
|
|
1049
|
-
row_group_pointer.extra_metadata_blocks =
|
|
1054
|
+
row_group_pointer.has_metadata_blocks = true;
|
|
1055
|
+
row_group_pointer.extra_metadata_blocks = write_data.existing_extra_metadata_blocks;
|
|
1050
1056
|
row_group_pointer.deletes_pointers = deletes_pointers;
|
|
1051
|
-
|
|
1057
|
+
vector<MetaBlockPointer> extra_metadata_block_pointers;
|
|
1058
|
+
extra_metadata_block_pointers.reserve(write_data.existing_extra_metadata_blocks.size());
|
|
1059
|
+
for (auto &block_pointer : write_data.existing_extra_metadata_blocks) {
|
|
1060
|
+
extra_metadata_block_pointers.emplace_back(block_pointer, 0);
|
|
1061
|
+
}
|
|
1062
|
+
metadata_manager->ClearModifiedBlocks(column_pointers);
|
|
1063
|
+
metadata_manager->ClearModifiedBlocks(extra_metadata_block_pointers);
|
|
1052
1064
|
metadata_manager->ClearModifiedBlocks(deletes_pointers);
|
|
1065
|
+
// remember metadata_blocks to avoid loading them on future checkpoints
|
|
1066
|
+
has_metadata_blocks = true;
|
|
1067
|
+
extra_metadata_blocks = row_group_pointer.extra_metadata_blocks;
|
|
1053
1068
|
return row_group_pointer;
|
|
1054
1069
|
}
|
|
1055
1070
|
D_ASSERT(write_data.states.size() == columns.size());
|
|
@@ -1092,6 +1107,7 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriteData write_data, RowGroupWrite
|
|
|
1092
1107
|
}
|
|
1093
1108
|
// this metadata block is not stored - add it to the extra metadata blocks
|
|
1094
1109
|
row_group_pointer.extra_metadata_blocks.push_back(column_pointer.block_pointer);
|
|
1110
|
+
metadata_blocks.insert(column_pointer.block_pointer);
|
|
1095
1111
|
}
|
|
1096
1112
|
// set up the pointers correctly within this row group for future operations
|
|
1097
1113
|
column_pointers = row_group_pointer.data_pointers;
|
|
@@ -1113,6 +1129,7 @@ bool RowGroup::HasChanges() const {
|
|
|
1113
1129
|
// we have deletes
|
|
1114
1130
|
return true;
|
|
1115
1131
|
}
|
|
1132
|
+
D_ASSERT(!deletes_is_loaded.load());
|
|
1116
1133
|
// check if any of the columns have changes
|
|
1117
1134
|
// avoid loading unloaded columns - unloaded columns can never have changes
|
|
1118
1135
|
for (idx_t c = 0; c < columns.size(); c++) {
|
|
@@ -665,14 +665,16 @@ void RowGroupCollection::Update(TransactionData transaction, DataTable &data_tab
|
|
|
665
665
|
void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_identifiers, idx_t count) {
|
|
666
666
|
auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
|
|
667
667
|
|
|
668
|
-
// Collect all
|
|
668
|
+
// Collect all Indexed columns on the table.
|
|
669
669
|
unordered_set<column_t> indexed_column_id_set;
|
|
670
670
|
indexes.Scan([&](Index &index) {
|
|
671
|
-
D_ASSERT(index.IsBound());
|
|
672
671
|
auto &set = index.GetColumnIdSet();
|
|
673
672
|
indexed_column_id_set.insert(set.begin(), set.end());
|
|
674
673
|
return false;
|
|
675
674
|
});
|
|
675
|
+
|
|
676
|
+
// If we are in WAL replay, delete data will be buffered, and so we sort the column_ids
|
|
677
|
+
// since the sorted form will be the mapping used to get back physical IDs from the buffered index chunk.
|
|
676
678
|
vector<StorageIndex> column_ids;
|
|
677
679
|
for (auto &col : indexed_column_id_set) {
|
|
678
680
|
column_ids.emplace_back(col);
|
|
@@ -686,10 +688,10 @@ void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_
|
|
|
686
688
|
|
|
687
689
|
// Initialize the fetch state. Only use indexed columns.
|
|
688
690
|
TableScanState state;
|
|
689
|
-
|
|
691
|
+
auto column_ids_copy = column_ids;
|
|
692
|
+
state.Initialize(std::move(column_ids_copy));
|
|
690
693
|
state.table_state.max_row = row_start + total_rows;
|
|
691
694
|
|
|
692
|
-
// Used for scanning data. Only contains the indexed columns.
|
|
693
695
|
DataChunk fetch_chunk;
|
|
694
696
|
fetch_chunk.Initialize(GetAllocator(), column_types);
|
|
695
697
|
|
|
@@ -749,17 +751,24 @@ void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_
|
|
|
749
751
|
result_chunk.SetCardinality(fetch_chunk);
|
|
750
752
|
|
|
751
753
|
// Slice the vector with all rows that are present in this vector.
|
|
752
|
-
//
|
|
754
|
+
// If the index is bound, delete the data. If unbound, buffer into unbound_index.
|
|
753
755
|
result_chunk.Slice(sel, sel_count);
|
|
754
756
|
indexes.Scan([&](Index &index) {
|
|
755
757
|
if (index.IsBound()) {
|
|
756
758
|
index.Cast<BoundIndex>().Delete(result_chunk, row_identifiers);
|
|
757
759
|
return false;
|
|
758
760
|
}
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
761
|
+
// Buffering takes only the indexed columns in ordering of the column_ids mapping.
|
|
762
|
+
DataChunk index_column_chunk;
|
|
763
|
+
index_column_chunk.InitializeEmpty(column_types);
|
|
764
|
+
for (idx_t i = 0; i < column_types.size(); i++) {
|
|
765
|
+
auto col_id = column_ids[i].GetPrimaryIndex();
|
|
766
|
+
index_column_chunk.data[i].Reference(result_chunk.data[col_id]);
|
|
767
|
+
}
|
|
768
|
+
index_column_chunk.SetCardinality(result_chunk.size());
|
|
769
|
+
auto &unbound_index = index.Cast<UnboundIndex>();
|
|
770
|
+
unbound_index.BufferChunk(index_column_chunk, row_identifiers, column_ids, BufferedIndexReplay::DEL_ENTRY);
|
|
771
|
+
return false;
|
|
763
772
|
});
|
|
764
773
|
}
|
|
765
774
|
}
|
|
@@ -1136,7 +1145,7 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl
|
|
|
1136
1145
|
break;
|
|
1137
1146
|
}
|
|
1138
1147
|
auto &write_state = checkpoint_state.write_data[segment_idx];
|
|
1139
|
-
if (write_state.
|
|
1148
|
+
if (!write_state.reuse_existing_metadata_blocks) {
|
|
1140
1149
|
table_has_changes = true;
|
|
1141
1150
|
break;
|
|
1142
1151
|
}
|
|
@@ -1150,7 +1159,14 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl
|
|
|
1150
1159
|
auto &entry = segments[segment_idx];
|
|
1151
1160
|
auto &row_group = *entry.node;
|
|
1152
1161
|
auto &write_state = checkpoint_state.write_data[segment_idx];
|
|
1153
|
-
metadata_manager.ClearModifiedBlocks(
|
|
1162
|
+
metadata_manager.ClearModifiedBlocks(row_group.GetColumnStartPointers());
|
|
1163
|
+
D_ASSERT(write_state.reuse_existing_metadata_blocks);
|
|
1164
|
+
vector<MetaBlockPointer> extra_metadata_block_pointers;
|
|
1165
|
+
extra_metadata_block_pointers.reserve(write_state.existing_extra_metadata_blocks.size());
|
|
1166
|
+
for (auto &block_pointer : write_state.existing_extra_metadata_blocks) {
|
|
1167
|
+
extra_metadata_block_pointers.emplace_back(block_pointer, 0);
|
|
1168
|
+
}
|
|
1169
|
+
metadata_manager.ClearModifiedBlocks(extra_metadata_block_pointers);
|
|
1154
1170
|
metadata_manager.ClearModifiedBlocks(row_group.GetDeletesPointers());
|
|
1155
1171
|
row_groups->AppendSegment(l, std::move(entry.node));
|
|
1156
1172
|
}
|
|
@@ -1178,11 +1194,98 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl
|
|
|
1178
1194
|
if (!row_group_writer) {
|
|
1179
1195
|
throw InternalException("Missing row group writer for index %llu", segment_idx);
|
|
1180
1196
|
}
|
|
1197
|
+
bool metadata_reuse = checkpoint_state.write_data[segment_idx].reuse_existing_metadata_blocks;
|
|
1181
1198
|
auto pointer =
|
|
1182
1199
|
row_group.Checkpoint(std::move(checkpoint_state.write_data[segment_idx]), *row_group_writer, global_stats);
|
|
1200
|
+
|
|
1201
|
+
auto debug_verify_blocks = DBConfig::GetSetting<DebugVerifyBlocksSetting>(GetAttached().GetDatabase()) &&
|
|
1202
|
+
dynamic_cast<SingleFileTableDataWriter *>(&checkpoint_state.writer) != nullptr;
|
|
1203
|
+
RowGroupPointer pointer_copy;
|
|
1204
|
+
if (debug_verify_blocks) {
|
|
1205
|
+
pointer_copy = pointer;
|
|
1206
|
+
}
|
|
1183
1207
|
writer.AddRowGroup(std::move(pointer), std::move(row_group_writer));
|
|
1184
1208
|
row_groups->AppendSegment(l, std::move(entry.node));
|
|
1185
1209
|
new_total_rows += row_group.count;
|
|
1210
|
+
|
|
1211
|
+
if (debug_verify_blocks) {
|
|
1212
|
+
if (!pointer_copy.has_metadata_blocks) {
|
|
1213
|
+
throw InternalException("Checkpointing should always remember metadata blocks");
|
|
1214
|
+
}
|
|
1215
|
+
if (metadata_reuse && pointer_copy.data_pointers != row_group.GetColumnStartPointers()) {
|
|
1216
|
+
throw InternalException("Colum start pointers changed during metadata reuse");
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
// Capture blocks that have been written
|
|
1220
|
+
vector<MetaBlockPointer> all_written_blocks = pointer_copy.data_pointers;
|
|
1221
|
+
vector<MetaBlockPointer> all_metadata_blocks;
|
|
1222
|
+
for (auto &block : pointer_copy.extra_metadata_blocks) {
|
|
1223
|
+
all_written_blocks.emplace_back(block, 0);
|
|
1224
|
+
all_metadata_blocks.emplace_back(block, 0);
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
// Verify that we can load the metadata correctly again
|
|
1228
|
+
vector<MetaBlockPointer> all_quick_read_blocks;
|
|
1229
|
+
for (auto &ptr : row_group.GetColumnStartPointers()) {
|
|
1230
|
+
all_quick_read_blocks.emplace_back(ptr);
|
|
1231
|
+
if (metadata_reuse && !block_manager.GetMetadataManager().BlockHasBeenCleared(ptr)) {
|
|
1232
|
+
throw InternalException("Found column start block that was not cleared");
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
auto extra_metadata_blocks = row_group.GetOrComputeExtraMetadataBlocks(/* force_compute: */ true);
|
|
1236
|
+
for (auto &ptr : extra_metadata_blocks) {
|
|
1237
|
+
auto block_pointer = MetaBlockPointer(ptr, 0);
|
|
1238
|
+
all_quick_read_blocks.emplace_back(block_pointer);
|
|
1239
|
+
if (metadata_reuse && !block_manager.GetMetadataManager().BlockHasBeenCleared(block_pointer)) {
|
|
1240
|
+
throw InternalException("Found extra metadata block that was not cleared");
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1244
|
+
// Deserialize all columns to check if the quick read via GetOrComputeExtraMetadataBlocks was correct
|
|
1245
|
+
vector<MetaBlockPointer> all_full_read_blocks;
|
|
1246
|
+
auto column_start_pointers = row_group.GetColumnStartPointers();
|
|
1247
|
+
auto &types = row_group.GetCollection().GetTypes();
|
|
1248
|
+
auto &metadata_manager = row_group.GetCollection().GetMetadataManager();
|
|
1249
|
+
for (idx_t i = 0; i < column_start_pointers.size(); i++) {
|
|
1250
|
+
MetadataReader reader(metadata_manager, column_start_pointers[i], &all_full_read_blocks);
|
|
1251
|
+
ColumnData::Deserialize(GetBlockManager(), GetTableInfo(), i, row_group.start, reader, types[i]);
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
// Derive sets of blocks to compare
|
|
1255
|
+
set<idx_t> all_written_block_ids;
|
|
1256
|
+
for (auto &ptr : all_written_blocks) {
|
|
1257
|
+
all_written_block_ids.insert(ptr.block_pointer);
|
|
1258
|
+
}
|
|
1259
|
+
set<idx_t> all_quick_read_block_ids;
|
|
1260
|
+
for (auto &ptr : all_quick_read_blocks) {
|
|
1261
|
+
all_quick_read_block_ids.insert(ptr.block_pointer);
|
|
1262
|
+
}
|
|
1263
|
+
set<idx_t> all_full_read_block_ids;
|
|
1264
|
+
for (auto &ptr : all_full_read_blocks) {
|
|
1265
|
+
all_full_read_block_ids.insert(ptr.block_pointer);
|
|
1266
|
+
}
|
|
1267
|
+
if (all_written_block_ids != all_quick_read_block_ids ||
|
|
1268
|
+
all_quick_read_block_ids != all_full_read_block_ids) {
|
|
1269
|
+
std::stringstream oss;
|
|
1270
|
+
oss << "Written: ";
|
|
1271
|
+
for (auto &block : all_written_blocks) {
|
|
1272
|
+
oss << block << ", ";
|
|
1273
|
+
}
|
|
1274
|
+
oss << "\n";
|
|
1275
|
+
oss << "Quick read: ";
|
|
1276
|
+
for (auto &block : all_quick_read_blocks) {
|
|
1277
|
+
oss << block << ", ";
|
|
1278
|
+
}
|
|
1279
|
+
oss << "\n";
|
|
1280
|
+
oss << "Full read: ";
|
|
1281
|
+
for (auto &block : all_full_read_blocks) {
|
|
1282
|
+
oss << block << ", ";
|
|
1283
|
+
}
|
|
1284
|
+
oss << "\n";
|
|
1285
|
+
|
|
1286
|
+
throw InternalException("Reloading blocks just written does not yield same blocks: " + oss.str());
|
|
1287
|
+
}
|
|
1288
|
+
}
|
|
1186
1289
|
}
|
|
1187
1290
|
total_rows = new_total_rows;
|
|
1188
1291
|
l.Release();
|
|
@@ -147,11 +147,17 @@ void TableIndexList::Bind(ClientContext &context, DataTableInfo &table_info, con
|
|
|
147
147
|
// Create an IndexBinder to bind the index
|
|
148
148
|
IndexBinder idx_binder(*binder, context);
|
|
149
149
|
|
|
150
|
-
// Apply any outstanding
|
|
150
|
+
// Apply any outstanding buffered replays and replace the unbound index with a bound index.
|
|
151
151
|
auto &unbound_index = index_entry->index->Cast<UnboundIndex>();
|
|
152
152
|
auto bound_idx = idx_binder.BindIndex(unbound_index);
|
|
153
|
-
if (unbound_index.
|
|
154
|
-
|
|
153
|
+
if (unbound_index.HasBufferedReplays()) {
|
|
154
|
+
// For replaying buffered index operations, we only want the physical column types (skip over
|
|
155
|
+
// generated column types).
|
|
156
|
+
vector<LogicalType> physical_column_types;
|
|
157
|
+
for (auto &col : table.GetColumns().Physical()) {
|
|
158
|
+
physical_column_types.push_back(col.Type());
|
|
159
|
+
}
|
|
160
|
+
bound_idx->ApplyBufferedReplays(physical_column_types, unbound_index.GetBufferedReplays(),
|
|
155
161
|
unbound_index.GetMappedColumnIds());
|
|
156
162
|
}
|
|
157
163
|
|
|
@@ -255,11 +261,18 @@ void TableIndexList::InitializeIndexChunk(DataChunk &index_chunk, const vector<L
|
|
|
255
261
|
auto &index_list = data_table_info.GetIndexes();
|
|
256
262
|
auto indexed_columns = index_list.GetRequiredColumns();
|
|
257
263
|
|
|
258
|
-
|
|
264
|
+
// Store the mapped_column_ids and index_types in sorted canonical form, needed for
|
|
265
|
+
// buffering WAL index operations during replay (see notes in unbound_index.hpp).
|
|
266
|
+
// First sort mapped_column_ids, then populate index_types according to the sorted order.
|
|
259
267
|
for (auto &col : indexed_columns) {
|
|
260
|
-
index_types.push_back(table_types[col]);
|
|
261
268
|
mapped_column_ids.emplace_back(col);
|
|
262
269
|
}
|
|
270
|
+
std::sort(mapped_column_ids.begin(), mapped_column_ids.end());
|
|
271
|
+
|
|
272
|
+
vector<LogicalType> index_types;
|
|
273
|
+
for (auto &col : mapped_column_ids) {
|
|
274
|
+
index_types.push_back(table_types[col.GetPrimaryIndex()]);
|
|
275
|
+
}
|
|
263
276
|
|
|
264
277
|
index_chunk.InitializeEmpty(index_types);
|
|
265
278
|
}
|
|
@@ -95,10 +95,15 @@ void CleanupState::Flush() {
|
|
|
95
95
|
// set up the row identifiers vector
|
|
96
96
|
Vector row_identifiers(LogicalType::ROW_TYPE, data_ptr_cast(row_numbers));
|
|
97
97
|
|
|
98
|
-
// delete the tuples from all the indexes
|
|
98
|
+
// delete the tuples from all the indexes.
|
|
99
|
+
// If there is any issue with removal, a FatalException must be thrown since there may be a corruption of
|
|
100
|
+
// data, hence the transaction cannot be guaranteed.
|
|
99
101
|
try {
|
|
100
102
|
current_table->RemoveFromIndexes(row_identifiers, count);
|
|
101
|
-
} catch (
|
|
103
|
+
} catch (std::exception &ex) {
|
|
104
|
+
throw FatalException(ErrorData(ex).Message());
|
|
105
|
+
} catch (...) {
|
|
106
|
+
throw FatalException("unknown failure in CleanupState::Flush");
|
|
102
107
|
}
|
|
103
108
|
|
|
104
109
|
count = 0;
|
|
@@ -81,6 +81,7 @@ class AESStateMBEDTLS : public duckdb::EncryptionState {
|
|
|
81
81
|
DUCKDB_API void GenerateRandomData(duckdb::data_ptr_t data, duckdb::idx_t len) override;
|
|
82
82
|
DUCKDB_API void FinalizeGCM(duckdb::data_ptr_t tag, duckdb::idx_t tag_len);
|
|
83
83
|
DUCKDB_API const mbedtls_cipher_info_t *GetCipher(size_t key_len);
|
|
84
|
+
DUCKDB_API static void SecureClearData(duckdb::data_ptr_t data, duckdb::idx_t len);
|
|
84
85
|
|
|
85
86
|
private:
|
|
86
87
|
DUCKDB_API void InitializeInternal(duckdb::const_data_ptr_t iv, duckdb::idx_t iv_len, duckdb::const_data_ptr_t aad, duckdb::idx_t aad_len);
|
|
@@ -98,6 +99,10 @@ class AESStateMBEDTLS : public duckdb::EncryptionState {
|
|
|
98
99
|
}
|
|
99
100
|
|
|
100
101
|
~AESStateMBEDTLSFactory() override {} //
|
|
102
|
+
|
|
103
|
+
DUCKDB_API bool SupportsEncryption() override {
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
101
106
|
};
|
|
102
107
|
};
|
|
103
108
|
|
|
@@ -271,6 +271,10 @@ const mbedtls_cipher_info_t *MbedTlsWrapper::AESStateMBEDTLS::GetCipher(size_t k
|
|
|
271
271
|
}
|
|
272
272
|
}
|
|
273
273
|
|
|
274
|
+
void MbedTlsWrapper::AESStateMBEDTLS::SecureClearData(duckdb::data_ptr_t data, duckdb::idx_t len) {
|
|
275
|
+
mbedtls_platform_zeroize(data, len);
|
|
276
|
+
}
|
|
277
|
+
|
|
274
278
|
MbedTlsWrapper::AESStateMBEDTLS::AESStateMBEDTLS(duckdb::EncryptionTypes::CipherType cipher_p, duckdb::idx_t key_len) : EncryptionState(cipher_p, key_len), context(duckdb::make_uniq<mbedtls_cipher_context_t>()) {
|
|
275
279
|
mbedtls_cipher_init(context.get());
|
|
276
280
|
|
|
@@ -296,20 +300,12 @@ MbedTlsWrapper::AESStateMBEDTLS::~AESStateMBEDTLS() {
|
|
|
296
300
|
}
|
|
297
301
|
}
|
|
298
302
|
|
|
299
|
-
void
|
|
300
|
-
duckdb::
|
|
301
|
-
|
|
302
|
-
while (len) {
|
|
303
|
-
const auto random_integer = random_engine.NextRandomInteger();
|
|
304
|
-
const auto next = duckdb::MinValue<duckdb::idx_t>(len, sizeof(random_integer));
|
|
305
|
-
memcpy(data, duckdb::const_data_ptr_cast(&random_integer), next);
|
|
306
|
-
data += next;
|
|
307
|
-
len -= next;
|
|
308
|
-
}
|
|
303
|
+
static void ThrowInsecureRNG() {
|
|
304
|
+
throw duckdb::InvalidConfigurationException("DuckDB requires a secure random engine to be loaded to enable secure crypto. Normally, this will be handled automatically by DuckDB by autoloading the `httpfs` Extension, but that seems to have failed. Please ensure the httpfs extension is loaded manually using `LOAD httpfs`.");
|
|
309
305
|
}
|
|
310
306
|
|
|
311
307
|
void MbedTlsWrapper::AESStateMBEDTLS::GenerateRandomData(duckdb::data_ptr_t data, duckdb::idx_t len) {
|
|
312
|
-
|
|
308
|
+
ThrowInsecureRNG();
|
|
313
309
|
}
|
|
314
310
|
|
|
315
311
|
void MbedTlsWrapper::AESStateMBEDTLS::InitializeInternal(duckdb::const_data_ptr_t iv, duckdb::idx_t iv_len, duckdb::const_data_ptr_t aad, duckdb::idx_t aad_len){
|
|
@@ -325,16 +321,7 @@ void MbedTlsWrapper::AESStateMBEDTLS::InitializeInternal(duckdb::const_data_ptr_
|
|
|
325
321
|
}
|
|
326
322
|
|
|
327
323
|
void MbedTlsWrapper::AESStateMBEDTLS::InitializeEncryption(duckdb::const_data_ptr_t iv, duckdb::idx_t iv_len, duckdb::const_data_ptr_t key, duckdb::idx_t key_len_p, duckdb::const_data_ptr_t aad, duckdb::idx_t aad_len) {
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
if (key_len_p != key_len) {
|
|
331
|
-
throw duckdb::InternalException("Invalid encryption key length, expected %llu, got %llu", key_len, key_len_p);
|
|
332
|
-
}
|
|
333
|
-
if (mbedtls_cipher_setkey(context.get(), key, key_len * 8, MBEDTLS_ENCRYPT)) {
|
|
334
|
-
throw runtime_error("Failed to set AES key for encryption");
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
InitializeInternal(iv, iv_len, aad, aad_len);
|
|
324
|
+
ThrowInsecureRNG();
|
|
338
325
|
}
|
|
339
326
|
|
|
340
327
|
void MbedTlsWrapper::AESStateMBEDTLS::InitializeDecryption(duckdb::const_data_ptr_t iv, duckdb::idx_t iv_len, duckdb::const_data_ptr_t key, duckdb::idx_t key_len_p, duckdb::const_data_ptr_t aad, duckdb::idx_t aad_len) {
|