duckdb 1.4.2-dev4.0 → 1.4.3-dev0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/duckdb/extension/icu/icu_extension.cpp +67 -6
- package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +9 -3
- package/src/duckdb/extension/json/include/json_serializer.hpp +12 -0
- package/src/duckdb/extension/json/json_functions/json_create.cpp +10 -10
- package/src/duckdb/extension/parquet/decoder/delta_length_byte_array_decoder.cpp +19 -5
- package/src/duckdb/extension/parquet/include/decoder/delta_length_byte_array_decoder.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +11 -2
- package/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp +2 -1
- package/src/duckdb/extension/parquet/parquet_reader.cpp +3 -1
- package/src/duckdb/extension/parquet/parquet_writer.cpp +16 -1
- package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/writer/primitive_column_writer.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_table_functions.cpp +1 -1
- package/src/duckdb/src/common/adbc/adbc.cpp +8 -6
- package/src/duckdb/src/common/csv_writer.cpp +1 -13
- package/src/duckdb/src/common/encryption_key_manager.cpp +10 -9
- package/src/duckdb/src/common/enum_util.cpp +19 -0
- package/src/duckdb/src/common/enums/compression_type.cpp +51 -16
- package/src/duckdb/src/common/exception/binder_exception.cpp +7 -2
- package/src/duckdb/src/common/progress_bar/unscented_kalman_filter.cpp +2 -2
- package/src/duckdb/src/common/random_engine.cpp +10 -0
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +13 -2
- package/src/duckdb/src/execution/index/art/art.cpp +6 -3
- package/src/duckdb/src/execution/index/bound_index.cpp +32 -21
- package/src/duckdb/src/execution/index/unbound_index.cpp +20 -9
- package/src/duckdb/src/execution/join_hashtable.cpp +9 -3
- package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +5 -0
- package/src/duckdb/src/function/cast/cast_function_set.cpp +3 -1
- package/src/duckdb/src/function/macro_function.cpp +1 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -1
- package/src/duckdb/src/function/scalar/create_sort_key.cpp +5 -3
- package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +1 -1
- package/src/duckdb/src/function/scalar/system/parse_log_message.cpp +4 -2
- package/src/duckdb/src/function/table/copy_csv.cpp +28 -4
- package/src/duckdb/src/function/table/direct_file_reader.cpp +10 -0
- package/src/duckdb/src/function/table/read_file.cpp +65 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/common/csv_writer.hpp +0 -3
- package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +42 -2
- package/src/duckdb/src/include/duckdb/common/http_util.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +0 -11
- package/src/duckdb/src/include/duckdb/common/random_engine.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +115 -97
- package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +54 -0
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -2
- package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +26 -8
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_file.hpp +0 -49
- package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/logging/log_type.hpp +14 -0
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/database.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +10 -6
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/insert_relation.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation.hpp +10 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_pullup.hpp +10 -14
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/bound_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/block.hpp +9 -0
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +9 -2
- package/src/duckdb/src/include/duckdb/storage/index.hpp +8 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +0 -7
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -2
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +6 -0
- package/src/duckdb/src/logging/log_manager.cpp +2 -1
- package/src/duckdb/src/logging/log_types.cpp +30 -1
- package/src/duckdb/src/main/attached_database.cpp +4 -7
- package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +2 -3
- package/src/duckdb/src/main/buffered_data/buffered_data.cpp +2 -3
- package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +1 -2
- package/src/duckdb/src/main/capi/prepared-c.cpp +9 -2
- package/src/duckdb/src/main/config.cpp +6 -5
- package/src/duckdb/src/main/database.cpp +9 -3
- package/src/duckdb/src/main/database_file_path_manager.cpp +43 -14
- package/src/duckdb/src/main/database_manager.cpp +1 -1
- package/src/duckdb/src/main/http/http_util.cpp +19 -1
- package/src/duckdb/src/main/profiling_info.cpp +11 -0
- package/src/duckdb/src/main/query_profiler.cpp +16 -0
- package/src/duckdb/src/main/relation/create_table_relation.cpp +9 -0
- package/src/duckdb/src/main/relation/insert_relation.cpp +7 -0
- package/src/duckdb/src/main/relation/table_relation.cpp +14 -0
- package/src/duckdb/src/main/relation.cpp +28 -12
- package/src/duckdb/src/main/settings/custom_settings.cpp +9 -3
- package/src/duckdb/src/optimizer/filter_pullup.cpp +14 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +29 -10
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +7 -0
- package/src/duckdb/src/parallel/task_executor.cpp +4 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +79 -0
- package/src/duckdb/src/parser/transform/expression/transform_cast.cpp +3 -1
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +1 -0
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +12 -4
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +16 -12
- package/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp +42 -5
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +0 -24
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +1 -1
- package/src/duckdb/src/planner/binder.cpp +0 -1
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +20 -6
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +8 -6
- package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -22
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +7 -0
- package/src/duckdb/src/storage/compression/zstd.cpp +34 -12
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +15 -2
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -6
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +11 -15
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +1 -19
- package/src/duckdb/src/storage/single_file_block_manager.cpp +33 -3
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +3 -1
- package/src/duckdb/src/storage/storage_info.cpp +4 -0
- package/src/duckdb/src/storage/storage_manager.cpp +8 -0
- package/src/duckdb/src/storage/table/array_column_data.cpp +1 -1
- package/src/duckdb/src/storage/table/column_data.cpp +3 -2
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -2
- package/src/duckdb/src/storage/table/row_group.cpp +41 -24
- package/src/duckdb/src/storage/table/row_group_collection.cpp +114 -11
- package/src/duckdb/src/storage/table_index_list.cpp +18 -5
- package/src/duckdb/src/transaction/cleanup_state.cpp +7 -2
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +5 -0
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +8 -21
- package/src/duckdb/third_party/parquet/parquet_types.cpp +57 -35
- package/src/duckdb/third_party/parquet/parquet_types.h +9 -2
- package/src/duckdb/ub_src_common_types_row.cpp +0 -2
|
@@ -142,6 +142,11 @@ public:
|
|
|
142
142
|
unique_ptr<AnalyzeState> ZSTDStorage::StringInitAnalyze(ColumnData &col_data, PhysicalType type) {
|
|
143
143
|
// check if the storage version we are writing to supports sztd
|
|
144
144
|
auto &storage = col_data.GetStorageManager();
|
|
145
|
+
auto &block_manager = col_data.GetBlockManager();
|
|
146
|
+
if (block_manager.InMemory()) {
|
|
147
|
+
//! Can't use ZSTD in in-memory environment
|
|
148
|
+
return nullptr;
|
|
149
|
+
}
|
|
145
150
|
if (storage.GetStorageVersion() < 4) {
|
|
146
151
|
// compatibility mode with old versions - disable zstd
|
|
147
152
|
return nullptr;
|
|
@@ -249,6 +254,7 @@ public:
|
|
|
249
254
|
|
|
250
255
|
public:
|
|
251
256
|
void ResetOutBuffer() {
|
|
257
|
+
D_ASSERT(GetCurrentOffset() <= GetWritableSpace(info));
|
|
252
258
|
out_buffer.dst = current_buffer_ptr;
|
|
253
259
|
out_buffer.pos = 0;
|
|
254
260
|
|
|
@@ -347,6 +353,7 @@ public:
|
|
|
347
353
|
void InitializeVector() {
|
|
348
354
|
D_ASSERT(!in_vector);
|
|
349
355
|
if (vector_count + 1 >= total_vector_count) {
|
|
356
|
+
//! Last vector
|
|
350
357
|
vector_size = analyze_state->count - (ZSTD_VECTOR_SIZE * vector_count);
|
|
351
358
|
} else {
|
|
352
359
|
vector_size = ZSTD_VECTOR_SIZE;
|
|
@@ -355,6 +362,7 @@ public:
|
|
|
355
362
|
current_offset = UnsafeNumericCast<page_offset_t>(
|
|
356
363
|
AlignValue<idx_t, sizeof(string_length_t)>(UnsafeNumericCast<idx_t>(current_offset)));
|
|
357
364
|
current_buffer_ptr = current_buffer->Ptr() + current_offset;
|
|
365
|
+
D_ASSERT(GetCurrentOffset() <= GetWritableSpace(info));
|
|
358
366
|
compressed_size = 0;
|
|
359
367
|
uncompressed_size = 0;
|
|
360
368
|
|
|
@@ -413,15 +421,11 @@ public:
|
|
|
413
421
|
throw InvalidInputException("ZSTD Compression failed: %s",
|
|
414
422
|
duckdb_zstd::ZSTD_getErrorName(compress_result));
|
|
415
423
|
}
|
|
424
|
+
D_ASSERT(GetCurrentOffset() <= GetWritableSpace(info));
|
|
416
425
|
if (compress_result == 0) {
|
|
417
426
|
// Finished
|
|
418
427
|
break;
|
|
419
428
|
}
|
|
420
|
-
if (out_buffer.pos != out_buffer.size) {
|
|
421
|
-
throw InternalException("Expected ZSTD_compressStream2 to fully utilize the current buffer, but pos is "
|
|
422
|
-
"%d, while size is %d",
|
|
423
|
-
out_buffer.pos, out_buffer.size);
|
|
424
|
-
}
|
|
425
429
|
NewPage();
|
|
426
430
|
}
|
|
427
431
|
}
|
|
@@ -691,7 +695,7 @@ public:
|
|
|
691
695
|
explicit ZSTDScanState(ColumnSegment &segment)
|
|
692
696
|
: state(segment.GetSegmentState()->Cast<UncompressedStringSegmentState>()),
|
|
693
697
|
block_manager(segment.GetBlockManager()), buffer_manager(BufferManager::GetBufferManager(segment.db)),
|
|
694
|
-
segment_block_offset(segment.GetBlockOffset()) {
|
|
698
|
+
segment_block_offset(segment.GetBlockOffset()), segment(segment) {
|
|
695
699
|
decompression_context = duckdb_zstd::ZSTD_createDCtx();
|
|
696
700
|
segment_handle = buffer_manager.Pin(segment.block);
|
|
697
701
|
|
|
@@ -791,14 +795,23 @@ public:
|
|
|
791
795
|
|
|
792
796
|
auto vector_size = metadata.count;
|
|
793
797
|
|
|
798
|
+
auto string_lengths_size = (sizeof(string_length_t) * vector_size);
|
|
794
799
|
scan_state.string_lengths = reinterpret_cast<string_length_t *>(scan_state.current_buffer_ptr);
|
|
795
|
-
scan_state.current_buffer_ptr +=
|
|
800
|
+
scan_state.current_buffer_ptr += string_lengths_size;
|
|
796
801
|
|
|
797
802
|
// Update the in_buffer to point to the start of the compressed data frame
|
|
798
803
|
idx_t current_offset = UnsafeNumericCast<idx_t>(scan_state.current_buffer_ptr - handle_start);
|
|
799
804
|
scan_state.in_buffer.src = scan_state.current_buffer_ptr;
|
|
800
805
|
scan_state.in_buffer.pos = 0;
|
|
801
|
-
scan_state.
|
|
806
|
+
if (scan_state.metadata.block_offset + string_lengths_size + scan_state.metadata.compressed_size >
|
|
807
|
+
(segment.SegmentSize() - sizeof(block_id_t))) {
|
|
808
|
+
//! We know that the compressed size is too big to fit on the current page
|
|
809
|
+
scan_state.in_buffer.size =
|
|
810
|
+
MinValue(metadata.compressed_size, block_manager.GetBlockSize() - sizeof(block_id_t) - current_offset);
|
|
811
|
+
} else {
|
|
812
|
+
scan_state.in_buffer.size =
|
|
813
|
+
MinValue(metadata.compressed_size, block_manager.GetBlockSize() - current_offset);
|
|
814
|
+
}
|
|
802
815
|
|
|
803
816
|
// Initialize the context for streaming decompression
|
|
804
817
|
duckdb_zstd::ZSTD_DCtx_reset(decompression_context, duckdb_zstd::ZSTD_reset_session_only);
|
|
@@ -832,7 +845,7 @@ public:
|
|
|
832
845
|
scan_state.in_buffer.src = ptr;
|
|
833
846
|
scan_state.in_buffer.pos = 0;
|
|
834
847
|
|
|
835
|
-
idx_t page_size =
|
|
848
|
+
idx_t page_size = segment.SegmentSize() - sizeof(block_id_t);
|
|
836
849
|
idx_t remaining_compressed_data = scan_state.metadata.compressed_size - scan_state.compressed_scan_count;
|
|
837
850
|
scan_state.in_buffer.size = MinValue<idx_t>(page_size, remaining_compressed_data);
|
|
838
851
|
}
|
|
@@ -842,6 +855,7 @@ public:
|
|
|
842
855
|
return;
|
|
843
856
|
}
|
|
844
857
|
|
|
858
|
+
auto &in_buffer = scan_state.in_buffer;
|
|
845
859
|
duckdb_zstd::ZSTD_outBuffer out_buffer;
|
|
846
860
|
|
|
847
861
|
out_buffer.dst = destination;
|
|
@@ -849,18 +863,25 @@ public:
|
|
|
849
863
|
out_buffer.size = uncompressed_length;
|
|
850
864
|
|
|
851
865
|
while (true) {
|
|
852
|
-
idx_t old_pos =
|
|
866
|
+
idx_t old_pos = in_buffer.pos;
|
|
853
867
|
size_t res = duckdb_zstd::ZSTD_decompressStream(
|
|
854
868
|
/* zds = */ decompression_context,
|
|
855
869
|
/* output =*/&out_buffer,
|
|
856
|
-
/* input =*/&
|
|
857
|
-
scan_state.compressed_scan_count +=
|
|
870
|
+
/* input =*/&in_buffer);
|
|
871
|
+
scan_state.compressed_scan_count += in_buffer.pos - old_pos;
|
|
858
872
|
if (duckdb_zstd::ZSTD_isError(res)) {
|
|
859
873
|
throw InvalidInputException("ZSTD Decompression failed: %s", duckdb_zstd::ZSTD_getErrorName(res));
|
|
860
874
|
}
|
|
861
875
|
if (out_buffer.pos == out_buffer.size) {
|
|
876
|
+
//! Done decompressing the relevant portion
|
|
877
|
+
break;
|
|
878
|
+
}
|
|
879
|
+
if (!res) {
|
|
880
|
+
D_ASSERT(out_buffer.pos == out_buffer.size);
|
|
881
|
+
D_ASSERT(in_buffer.pos == in_buffer.size);
|
|
862
882
|
break;
|
|
863
883
|
}
|
|
884
|
+
D_ASSERT(in_buffer.pos == in_buffer.size);
|
|
864
885
|
// Did not fully decompress, it needs a new page to read from
|
|
865
886
|
LoadNextPageForVector(scan_state);
|
|
866
887
|
}
|
|
@@ -956,6 +977,7 @@ public:
|
|
|
956
977
|
idx_t segment_count;
|
|
957
978
|
//! The amount of tuples consumed
|
|
958
979
|
idx_t scanned_count = 0;
|
|
980
|
+
ColumnSegment &segment;
|
|
959
981
|
|
|
960
982
|
//! Buffer for skipping data
|
|
961
983
|
AllocatedData skip_buffer;
|
|
@@ -1195,7 +1195,7 @@ ErrorData DataTable::AppendToIndexes(TableIndexList &indexes, optional_ptr<Table
|
|
|
1195
1195
|
if (!index.IsBound()) {
|
|
1196
1196
|
// Buffer only the key columns, and store their mapping.
|
|
1197
1197
|
auto &unbound_index = index.Cast<UnboundIndex>();
|
|
1198
|
-
unbound_index.BufferChunk(index_chunk, row_ids, mapped_column_ids);
|
|
1198
|
+
unbound_index.BufferChunk(index_chunk, row_ids, mapped_column_ids, BufferedIndexReplay::INSERT_ENTRY);
|
|
1199
1199
|
return false;
|
|
1200
1200
|
}
|
|
1201
1201
|
|
|
@@ -154,12 +154,25 @@ void LocalTableStorage::FlushBlocks() {
|
|
|
154
154
|
ErrorData LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, RowGroupCollection &source,
|
|
155
155
|
TableIndexList &index_list, const vector<LogicalType> &table_types,
|
|
156
156
|
row_t &start_row) {
|
|
157
|
-
//
|
|
157
|
+
// mapped_column_ids contains the physical column indices of each Indexed column in the table.
|
|
158
|
+
// This mapping is used to retrieve the physical column index for the corresponding vector of an index chunk scan.
|
|
159
|
+
// For example, if we are processing data for index_chunk.data[i], we can retrieve the physical column index
|
|
160
|
+
// by getting the value at mapped_column_ids[i].
|
|
161
|
+
// An important note is that the index_chunk orderings are created in accordance with this mapping, not the other
|
|
162
|
+
// way around. (Check the scan code below, where the mapped_column_ids is passed as a parameter to the scan.
|
|
163
|
+
// The index_chunk inside of that lambda is ordered according to the mapping that is a parameter to the scan).
|
|
164
|
+
|
|
165
|
+
// mapped_column_ids is used in two places:
|
|
166
|
+
// 1) To create the physical table chunk in this function.
|
|
167
|
+
// 2) If we are in an unbound state (i.e., WAL replay is happening right now), this mapping and the index_chunk
|
|
168
|
+
// are buffered in unbound_index. However, there can also be buffered deletes happening, so it is important
|
|
169
|
+
// to maintain a canonical representation of the mapping, which is just sorting.
|
|
158
170
|
auto indexed_columns = index_list.GetRequiredColumns();
|
|
159
171
|
vector<StorageIndex> mapped_column_ids;
|
|
160
172
|
for (auto &col : indexed_columns) {
|
|
161
173
|
mapped_column_ids.emplace_back(col);
|
|
162
174
|
}
|
|
175
|
+
std::sort(mapped_column_ids.begin(), mapped_column_ids.end());
|
|
163
176
|
|
|
164
177
|
// However, because the bound expressions of the indexes (and their bound
|
|
165
178
|
// column references) are in relation to ALL table columns, we create an
|
|
@@ -168,6 +181,7 @@ ErrorData LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, RowGr
|
|
|
168
181
|
DataChunk table_chunk;
|
|
169
182
|
table_chunk.InitializeEmpty(table_types);
|
|
170
183
|
|
|
184
|
+
// index_chunk scans are created here in the mapped_column_ids ordering (see note above).
|
|
171
185
|
ErrorData error;
|
|
172
186
|
source.Scan(transaction, mapped_column_ids, [&](DataChunk &index_chunk) -> bool {
|
|
173
187
|
D_ASSERT(index_chunk.ColumnCount() == mapped_column_ids.size());
|
|
@@ -195,7 +209,6 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen
|
|
|
195
209
|
bool append_to_table) {
|
|
196
210
|
// In this function, we might scan all table columns,
|
|
197
211
|
// as we might also append to the table itself (append_to_table).
|
|
198
|
-
|
|
199
212
|
auto &table = table_ref.get();
|
|
200
213
|
if (append_to_table) {
|
|
201
214
|
table.InitializeAppend(transaction, append_state);
|
|
@@ -104,7 +104,11 @@ MetadataHandle MetadataManager::Pin(QueryContext context, const MetadataPointer
|
|
|
104
104
|
shared_ptr<BlockHandle> block_handle;
|
|
105
105
|
{
|
|
106
106
|
lock_guard<mutex> guard(block_lock);
|
|
107
|
-
auto
|
|
107
|
+
auto entry = blocks.find(UnsafeNumericCast<int64_t>(pointer.block_index));
|
|
108
|
+
if (entry == blocks.end()) {
|
|
109
|
+
throw InternalException("Trying to pin block %llu - but the block did not exist", pointer.block_index);
|
|
110
|
+
}
|
|
111
|
+
auto &block = entry->second;
|
|
108
112
|
#ifdef DEBUG
|
|
109
113
|
for (auto &free_block : block.free_blocks) {
|
|
110
114
|
if (free_block == pointer.index) {
|
|
@@ -272,15 +276,18 @@ void MetadataManager::Flush() {
|
|
|
272
276
|
}
|
|
273
277
|
continue;
|
|
274
278
|
}
|
|
275
|
-
auto
|
|
279
|
+
auto block_handle = block.block;
|
|
280
|
+
auto handle = buffer_manager.Pin(block_handle);
|
|
276
281
|
// zero-initialize the few leftover bytes
|
|
277
282
|
memset(handle.Ptr() + total_metadata_size, 0, block_manager.GetBlockSize() - total_metadata_size);
|
|
278
283
|
D_ASSERT(kv.first == block.block_id);
|
|
279
|
-
if (
|
|
280
|
-
auto new_block =
|
|
281
|
-
block_manager.ConvertToPersistent(QueryContext(), kv.first, block.block, std::move(handle));
|
|
282
|
-
|
|
284
|
+
if (block_handle->BlockId() >= MAXIMUM_BLOCK) {
|
|
283
285
|
// Convert the temporary block to a persistent block.
|
|
286
|
+
// we cannot use ConvertToPersistent as another thread might still be reading the block
|
|
287
|
+
// so we use the safe version of ConvertToPersistent
|
|
288
|
+
auto new_block = block_manager.ConvertToPersistent(QueryContext(), kv.first, std::move(block_handle),
|
|
289
|
+
std::move(handle), ConvertToPersistentMode::THREAD_SAFE);
|
|
290
|
+
|
|
284
291
|
guard.lock();
|
|
285
292
|
block.block = std::move(new_block);
|
|
286
293
|
guard.unlock();
|
|
@@ -366,6 +373,7 @@ void MetadataBlock::FreeBlocksFromInteger(idx_t free_list) {
|
|
|
366
373
|
}
|
|
367
374
|
|
|
368
375
|
void MetadataManager::MarkBlocksAsModified() {
|
|
376
|
+
unique_lock<mutex> guard(block_lock);
|
|
369
377
|
// for any blocks that were modified in the last checkpoint - set them to free blocks currently
|
|
370
378
|
for (auto &kv : modified_blocks) {
|
|
371
379
|
auto block_id = kv.first;
|
|
@@ -379,7 +387,10 @@ void MetadataManager::MarkBlocksAsModified() {
|
|
|
379
387
|
if (new_free_blocks == NumericLimits<idx_t>::Maximum()) {
|
|
380
388
|
// if new free_blocks is all blocks - mark entire block as modified
|
|
381
389
|
blocks.erase(entry);
|
|
390
|
+
|
|
391
|
+
guard.unlock();
|
|
382
392
|
block_manager.MarkBlockAsModified(block_id);
|
|
393
|
+
guard.lock();
|
|
383
394
|
} else {
|
|
384
395
|
// set the new set of free blocks
|
|
385
396
|
block.FreeBlocksFromInteger(new_free_blocks);
|
|
@@ -414,6 +425,18 @@ void MetadataManager::ClearModifiedBlocks(const vector<MetaBlockPointer> &pointe
|
|
|
414
425
|
}
|
|
415
426
|
}
|
|
416
427
|
|
|
428
|
+
bool MetadataManager::BlockHasBeenCleared(const MetaBlockPointer &pointer) {
|
|
429
|
+
unique_lock<mutex> guard(block_lock);
|
|
430
|
+
auto block_id = pointer.GetBlockId();
|
|
431
|
+
auto block_index = pointer.GetBlockIndex();
|
|
432
|
+
auto entry = modified_blocks.find(block_id);
|
|
433
|
+
if (entry == modified_blocks.end()) {
|
|
434
|
+
throw InternalException("BlockHasBeenCleared - Block id %llu not found in modified_blocks", block_id);
|
|
435
|
+
}
|
|
436
|
+
auto &modified_list = entry->second;
|
|
437
|
+
return (modified_list & (1ULL << block_index)) == 0ULL;
|
|
438
|
+
}
|
|
439
|
+
|
|
417
440
|
vector<MetadataBlockInfo> MetadataManager::GetMetadataInfo() const {
|
|
418
441
|
vector<MetadataBlockInfo> result;
|
|
419
442
|
unique_lock<mutex> guard(block_lock);
|
|
@@ -4,11 +4,8 @@ namespace duckdb {
|
|
|
4
4
|
|
|
5
5
|
MetadataReader::MetadataReader(MetadataManager &manager, MetaBlockPointer pointer,
|
|
6
6
|
optional_ptr<vector<MetaBlockPointer>> read_pointers_p, BlockReaderType type)
|
|
7
|
-
: manager(manager), type(type), next_pointer(
|
|
8
|
-
|
|
9
|
-
if (read_pointers) {
|
|
10
|
-
read_pointers->push_back(pointer);
|
|
11
|
-
}
|
|
7
|
+
: manager(manager), type(type), next_pointer(pointer), has_next_block(true), read_pointers(read_pointers_p),
|
|
8
|
+
index(0), offset(0), next_offset(pointer.offset), capacity(0) {
|
|
12
9
|
}
|
|
13
10
|
|
|
14
11
|
MetadataReader::MetadataReader(MetadataManager &manager, BlockPointer pointer)
|
|
@@ -59,11 +56,10 @@ MetaBlockPointer MetadataReader::GetMetaBlockPointer() {
|
|
|
59
56
|
vector<MetaBlockPointer> MetadataReader::GetRemainingBlocks(MetaBlockPointer last_block) {
|
|
60
57
|
vector<MetaBlockPointer> result;
|
|
61
58
|
while (has_next_block) {
|
|
62
|
-
|
|
63
|
-
if (last_block.IsValid() && next_block_pointer.block_pointer == last_block.block_pointer) {
|
|
59
|
+
if (last_block.IsValid() && next_pointer.block_pointer == last_block.block_pointer) {
|
|
64
60
|
break;
|
|
65
61
|
}
|
|
66
|
-
result.push_back(
|
|
62
|
+
result.push_back(next_pointer);
|
|
67
63
|
ReadNextBlock();
|
|
68
64
|
}
|
|
69
65
|
return result;
|
|
@@ -77,18 +73,18 @@ void MetadataReader::ReadNextBlock(QueryContext context) {
|
|
|
77
73
|
if (!has_next_block) {
|
|
78
74
|
throw IOException("No more data remaining in MetadataReader");
|
|
79
75
|
}
|
|
80
|
-
|
|
81
|
-
|
|
76
|
+
if (read_pointers) {
|
|
77
|
+
read_pointers->push_back(next_pointer);
|
|
78
|
+
}
|
|
79
|
+
auto next_disk_pointer = FromDiskPointer(next_pointer);
|
|
80
|
+
block = manager.Pin(context, next_disk_pointer);
|
|
81
|
+
index = next_disk_pointer.index;
|
|
82
82
|
|
|
83
83
|
idx_t next_block = Load<idx_t>(BasePtr());
|
|
84
84
|
if (next_block == idx_t(-1)) {
|
|
85
85
|
has_next_block = false;
|
|
86
86
|
} else {
|
|
87
|
-
next_pointer =
|
|
88
|
-
MetaBlockPointer next_block_pointer(next_block, 0);
|
|
89
|
-
if (read_pointers) {
|
|
90
|
-
read_pointers->push_back(next_block_pointer);
|
|
91
|
-
}
|
|
87
|
+
next_pointer = MetaBlockPointer(next_block, 0);
|
|
92
88
|
}
|
|
93
89
|
if (next_offset < sizeof(block_id_t)) {
|
|
94
90
|
next_offset = sizeof(block_id_t);
|
|
@@ -32,7 +32,7 @@ MetaBlockPointer MetadataWriter::GetMetaBlockPointer() {
|
|
|
32
32
|
|
|
33
33
|
void MetadataWriter::SetWrittenPointers(optional_ptr<vector<MetaBlockPointer>> written_pointers_p) {
|
|
34
34
|
written_pointers = written_pointers_p;
|
|
35
|
-
if (written_pointers && capacity > 0) {
|
|
35
|
+
if (written_pointers && capacity > 0 && offset < capacity) {
|
|
36
36
|
written_pointers->push_back(manager.GetDiskPointer(current_pointer));
|
|
37
37
|
}
|
|
38
38
|
}
|
|
@@ -38,28 +38,10 @@ unique_ptr<QueryNode> QueryNode::Deserialize(Deserializer &deserializer) {
|
|
|
38
38
|
}
|
|
39
39
|
result->modifiers = std::move(modifiers);
|
|
40
40
|
result->cte_map = std::move(cte_map);
|
|
41
|
+
ExtractCTENodes(result);
|
|
41
42
|
return result;
|
|
42
43
|
}
|
|
43
44
|
|
|
44
|
-
void CTENode::Serialize(Serializer &serializer) const {
|
|
45
|
-
QueryNode::Serialize(serializer);
|
|
46
|
-
serializer.WritePropertyWithDefault<string>(200, "cte_name", ctename);
|
|
47
|
-
serializer.WritePropertyWithDefault<unique_ptr<QueryNode>>(201, "query", query);
|
|
48
|
-
serializer.WritePropertyWithDefault<unique_ptr<QueryNode>>(202, "child", child);
|
|
49
|
-
serializer.WritePropertyWithDefault<vector<string>>(203, "aliases", aliases);
|
|
50
|
-
serializer.WritePropertyWithDefault<CTEMaterialize>(204, "materialized", materialized, CTEMaterialize::CTE_MATERIALIZE_DEFAULT);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
unique_ptr<QueryNode> CTENode::Deserialize(Deserializer &deserializer) {
|
|
54
|
-
auto result = duckdb::unique_ptr<CTENode>(new CTENode());
|
|
55
|
-
deserializer.ReadPropertyWithDefault<string>(200, "cte_name", result->ctename);
|
|
56
|
-
deserializer.ReadPropertyWithDefault<unique_ptr<QueryNode>>(201, "query", result->query);
|
|
57
|
-
deserializer.ReadPropertyWithDefault<unique_ptr<QueryNode>>(202, "child", result->child);
|
|
58
|
-
deserializer.ReadPropertyWithDefault<vector<string>>(203, "aliases", result->aliases);
|
|
59
|
-
deserializer.ReadPropertyWithExplicitDefault<CTEMaterialize>(204, "materialized", result->materialized, CTEMaterialize::CTE_MATERIALIZE_DEFAULT);
|
|
60
|
-
return std::move(result);
|
|
61
|
-
}
|
|
62
|
-
|
|
63
45
|
void RecursiveCTENode::Serialize(Serializer &serializer) const {
|
|
64
46
|
QueryNode::Serialize(serializer);
|
|
65
47
|
serializer.WritePropertyWithDefault<string>(200, "cte_name", ctename);
|
|
@@ -66,8 +66,8 @@ void DeserializeEncryptionData(ReadStream &stream, data_t *dest, idx_t size) {
|
|
|
66
66
|
|
|
67
67
|
void GenerateDBIdentifier(uint8_t *db_identifier) {
|
|
68
68
|
memset(db_identifier, 0, MainHeader::DB_IDENTIFIER_LEN);
|
|
69
|
-
|
|
70
|
-
|
|
69
|
+
RandomEngine engine;
|
|
70
|
+
engine.RandomData(db_identifier, MainHeader::DB_IDENTIFIER_LEN);
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
void EncryptCanary(MainHeader &main_header, const shared_ptr<EncryptionState> &encryption_state,
|
|
@@ -362,6 +362,15 @@ void SingleFileBlockManager::CheckAndAddEncryptionKey(MainHeader &main_header) {
|
|
|
362
362
|
void SingleFileBlockManager::CreateNewDatabase(QueryContext context) {
|
|
363
363
|
auto flags = GetFileFlags(true);
|
|
364
364
|
|
|
365
|
+
auto encryption_enabled = options.encryption_options.encryption_enabled;
|
|
366
|
+
if (encryption_enabled) {
|
|
367
|
+
if (!db.GetDatabase().GetEncryptionUtil()->SupportsEncryption() && !options.read_only) {
|
|
368
|
+
throw InvalidConfigurationException(
|
|
369
|
+
"The database was opened with encryption enabled, but DuckDB currently has a read-only crypto module "
|
|
370
|
+
"loaded. Please re-open using READONLY, or ensure httpfs is loaded using `LOAD httpfs`.");
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
|
|
365
374
|
// open the RDBMS handle
|
|
366
375
|
auto &fs = FileSystem::Get(db);
|
|
367
376
|
handle = fs.OpenFile(path, flags);
|
|
@@ -376,7 +385,6 @@ void SingleFileBlockManager::CreateNewDatabase(QueryContext context) {
|
|
|
376
385
|
// Derive the encryption key and add it to the cache.
|
|
377
386
|
// Not used for plain databases.
|
|
378
387
|
data_t derived_key[MainHeader::DEFAULT_ENCRYPTION_KEY_LENGTH];
|
|
379
|
-
auto encryption_enabled = options.encryption_options.encryption_enabled;
|
|
380
388
|
|
|
381
389
|
// We need the unique database identifier, if the storage version is new enough.
|
|
382
390
|
// If encryption is enabled, we also use it as the salt.
|
|
@@ -487,6 +495,15 @@ void SingleFileBlockManager::LoadExistingDatabase(QueryContext context) {
|
|
|
487
495
|
if (main_header.IsEncrypted()) {
|
|
488
496
|
if (options.encryption_options.encryption_enabled) {
|
|
489
497
|
//! Encryption is set
|
|
498
|
+
|
|
499
|
+
//! Check if our encryption module can write, if not, we should throw here
|
|
500
|
+
if (!db.GetDatabase().GetEncryptionUtil()->SupportsEncryption() && !options.read_only) {
|
|
501
|
+
throw InvalidConfigurationException(
|
|
502
|
+
"The database is encrypted, but DuckDB currently has a read-only crypto module loaded. Either "
|
|
503
|
+
"re-open the database using `ATTACH '..' (READONLY)`, or ensure httpfs is loaded using `LOAD "
|
|
504
|
+
"httpfs`.");
|
|
505
|
+
}
|
|
506
|
+
|
|
490
507
|
//! Check if the given key upon attach is correct
|
|
491
508
|
// Derive the encryption key and add it to cache
|
|
492
509
|
CheckAndAddEncryptionKey(main_header);
|
|
@@ -506,6 +523,19 @@ void SingleFileBlockManager::LoadExistingDatabase(QueryContext context) {
|
|
|
506
523
|
path, EncryptionTypes::CipherToString(config_cipher),
|
|
507
524
|
EncryptionTypes::CipherToString(stored_cipher));
|
|
508
525
|
}
|
|
526
|
+
|
|
527
|
+
// This avoids the cipher from being downgrades by an attacker FIXME: we likely want to have a propervalidation
|
|
528
|
+
// of the cipher used instead of this trick to avoid downgrades
|
|
529
|
+
if (stored_cipher != EncryptionTypes::GCM) {
|
|
530
|
+
if (config_cipher == EncryptionTypes::INVALID) {
|
|
531
|
+
throw CatalogException(
|
|
532
|
+
"Cannot open encrypted database \"%s\" without explicitly specifying the "
|
|
533
|
+
"encryption cipher for security reasons. Please make sure you understand the security implications "
|
|
534
|
+
"and re-attach the database specifying the desired cipher.",
|
|
535
|
+
path);
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
509
539
|
// this is ugly, but the storage manager does not know the cipher type before
|
|
510
540
|
db.GetStorageManager().SetCipher(stored_cipher);
|
|
511
541
|
}
|
|
@@ -542,8 +542,10 @@ unique_ptr<FileBuffer> StandardBufferManager::ReadTemporaryBuffer(QueryContext c
|
|
|
542
542
|
BlockHandle &block,
|
|
543
543
|
unique_ptr<FileBuffer> reusable_buffer) {
|
|
544
544
|
D_ASSERT(!temporary_directory.path.empty());
|
|
545
|
-
D_ASSERT(temporary_directory.handle.get());
|
|
546
545
|
auto id = block.BlockId();
|
|
546
|
+
if (!temporary_directory.handle) {
|
|
547
|
+
throw InternalException("ReadTemporaryBuffer called but temporary directory has not been instantiated yet");
|
|
548
|
+
}
|
|
547
549
|
if (temporary_directory.handle->GetTempFile().HasTemporaryBuffer(id)) {
|
|
548
550
|
// This is a block that was offloaded to a regular .tmp file, the file contains blocks of a fixed size
|
|
549
551
|
return temporary_directory.handle->GetTempFile().ReadTemporaryBuffer(context, id, std::move(reusable_buffer));
|
|
@@ -83,6 +83,8 @@ static const StorageVersionInfo storage_version_info[] = {
|
|
|
83
83
|
{"v1.3.1", 66},
|
|
84
84
|
{"v1.3.2", 66},
|
|
85
85
|
{"v1.4.0", 67},
|
|
86
|
+
{"v1.4.1", 67},
|
|
87
|
+
{"v1.4.2", 67},
|
|
86
88
|
{nullptr, 0}
|
|
87
89
|
};
|
|
88
90
|
// END OF STORAGE VERSION INFO
|
|
@@ -108,6 +110,8 @@ static const SerializationVersionInfo serialization_version_info[] = {
|
|
|
108
110
|
{"v1.3.1", 5},
|
|
109
111
|
{"v1.3.2", 5},
|
|
110
112
|
{"v1.4.0", 6},
|
|
113
|
+
{"v1.4.1", 6},
|
|
114
|
+
{"v1.4.2", 6},
|
|
111
115
|
{"latest", 6},
|
|
112
116
|
{nullptr, 0}
|
|
113
117
|
};
|
|
@@ -142,6 +142,14 @@ bool StorageManager::InMemory() const {
|
|
|
142
142
|
return path == IN_MEMORY_PATH;
|
|
143
143
|
}
|
|
144
144
|
|
|
145
|
+
inline void ClearUserKey(shared_ptr<string> const &encryption_key) {
|
|
146
|
+
if (encryption_key && !encryption_key->empty()) {
|
|
147
|
+
duckdb_mbedtls::MbedTlsWrapper::AESStateMBEDTLS::SecureClearData(data_ptr_cast(&(*encryption_key)[0]),
|
|
148
|
+
encryption_key->size());
|
|
149
|
+
encryption_key->clear();
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
145
153
|
void StorageManager::Initialize(QueryContext context) {
|
|
146
154
|
bool in_memory = InMemory();
|
|
147
155
|
if (in_memory && read_only) {
|
|
@@ -495,10 +495,10 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) {
|
|
|
495
495
|
void ColumnData::AppendData(BaseStatistics &append_stats, ColumnAppendState &state, UnifiedVectorFormat &vdata,
|
|
496
496
|
idx_t append_count) {
|
|
497
497
|
idx_t offset = 0;
|
|
498
|
-
this->count += append_count;
|
|
499
498
|
while (true) {
|
|
500
499
|
// append the data from the vector
|
|
501
500
|
idx_t copied_elements = state.current->Append(state, vdata, offset, append_count);
|
|
501
|
+
this->count += copied_elements;
|
|
502
502
|
append_stats.Merge(state.current->stats.statistics);
|
|
503
503
|
if (copied_elements == append_count) {
|
|
504
504
|
// finished copying everything
|
|
@@ -868,7 +868,8 @@ bool PersistentCollectionData::HasUpdates() const {
|
|
|
868
868
|
}
|
|
869
869
|
|
|
870
870
|
PersistentColumnData ColumnData::Serialize() {
|
|
871
|
-
|
|
871
|
+
auto result = count ? PersistentColumnData(type.InternalType(), GetDataPointers())
|
|
872
|
+
: PersistentColumnData(type.InternalType());
|
|
872
873
|
result.has_updates = HasUpdates();
|
|
873
874
|
return result;
|
|
874
875
|
}
|
|
@@ -109,9 +109,10 @@ CompressionType ForceCompression(StorageManager &storage_manager,
|
|
|
109
109
|
CompressionType compression_type) {
|
|
110
110
|
// One of the force_compression flags has been set
|
|
111
111
|
// check if this compression method is available
|
|
112
|
-
//
|
|
112
|
+
// auto compression_availability_result = CompressionTypeIsAvailable(compression_type, storage_manager);
|
|
113
|
+
// if (!compression_availability_result.IsAvailable()) {
|
|
113
114
|
// throw InvalidInputException("The forced compression method (%s) is not available in the current storage
|
|
114
|
-
// version",
|
|
115
|
+
// version", CompressionTypeToString(compression_type));
|
|
115
116
|
//}
|
|
116
117
|
|
|
117
118
|
bool found = false;
|