duckdb 1.1.4-dev13.0 → 1.1.4-dev14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/core_functions/function_list.cpp +1 -0
- package/src/duckdb/extension/core_functions/include/core_functions/scalar/map_functions.hpp +9 -0
- package/src/duckdb/extension/core_functions/scalar/date/current.cpp +1 -0
- package/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp +2 -2
- package/src/duckdb/extension/core_functions/scalar/generic/typeof.cpp +1 -1
- package/src/duckdb/extension/core_functions/scalar/list/flatten.cpp +91 -61
- package/src/duckdb/extension/core_functions/scalar/map/map_extract.cpp +89 -8
- package/src/duckdb/extension/icu/icu-current.cpp +63 -0
- package/src/duckdb/extension/icu/icu-makedate.cpp +43 -39
- package/src/duckdb/extension/icu/icu-timezone.cpp +63 -63
- package/src/duckdb/extension/icu/icu_extension.cpp +2 -0
- package/src/duckdb/extension/icu/include/icu-casts.hpp +39 -0
- package/src/duckdb/extension/icu/include/icu-current.hpp +17 -0
- package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -1
- package/src/duckdb/extension/parquet/column_writer.cpp +26 -18
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +0 -6
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +15 -1
- package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +67 -15
- package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +5 -6
- package/src/duckdb/src/catalog/catalog.cpp +21 -8
- package/src/duckdb/src/catalog/catalog_search_path.cpp +17 -1
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +0 -3
- package/src/duckdb/src/catalog/dependency_list.cpp +7 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +1 -56
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -2
- package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +58 -28
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +1 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +6 -2
- package/src/duckdb/src/common/enum_util.cpp +26 -22
- package/src/duckdb/src/common/error_data.cpp +3 -2
- package/src/duckdb/src/common/gzip_file_system.cpp +8 -8
- package/src/duckdb/src/common/local_file_system.cpp +2 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +1 -1
- package/src/duckdb/src/common/random_engine.cpp +4 -1
- package/src/duckdb/src/common/serializer/memory_stream.cpp +23 -19
- package/src/duckdb/src/common/serializer/serializer.cpp +1 -1
- package/src/duckdb/src/common/types/bit.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +0 -5
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -1
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +0 -4
- package/src/duckdb/src/common/types.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +52 -42
- package/src/duckdb/src/execution/index/art/leaf.cpp +4 -9
- package/src/duckdb/src/execution/index/art/node.cpp +13 -13
- package/src/duckdb/src/execution/index/art/prefix.cpp +21 -16
- package/src/duckdb/src/execution/index/bound_index.cpp +6 -8
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +39 -34
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +2 -1
- package/src/duckdb/src/execution/index/unbound_index.cpp +10 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +62 -44
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +26 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +69 -40
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +3 -7
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +11 -5
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +4 -0
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +8 -8
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +36 -12
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +12 -9
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +29 -1
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -10
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +58 -35
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +9 -4
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +7 -6
- package/src/duckdb/src/function/compression_config.cpp +4 -0
- package/src/duckdb/src/function/function_binder.cpp +1 -1
- package/src/duckdb/src/function/scalar/system/write_log.cpp +2 -2
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +15 -2
- package/src/duckdb/src/function/table/arrow_conversion.cpp +10 -10
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -5
- package/src/duckdb/src/function/table/read_csv.cpp +21 -4
- package/src/duckdb/src/function/table/sniff_csv.cpp +7 -0
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +4 -0
- package/src/duckdb/src/function/table/system/duckdb_secret_types.cpp +71 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +120 -36
- package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
- package/src/duckdb/src/function/window/window_aggregate_function.cpp +6 -1
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +135 -11
- package/src/duckdb/src/function/window/window_segment_tree.cpp +50 -22
- package/src/duckdb/src/function/window/window_token_tree.cpp +4 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +4 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +4 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -8
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +0 -2
- package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +9 -3
- package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +11 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -10
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +6 -5
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +37 -32
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +36 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +5 -30
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +7 -1
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +3 -3
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/logging/logger.hpp +40 -119
- package/src/duckdb/src/include/duckdb/logging/logging.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +0 -8
- package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +11 -7
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +10 -0
- package/src/duckdb/src/include/duckdb/parser/constraint.hpp +9 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +36 -9
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +9 -1
- package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +4 -4
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +14 -10
- package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +6 -1
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +7 -2
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +9 -0
- package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +6 -4
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +2 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -0
- package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +1 -1
- package/src/duckdb/src/logging/logger.cpp +8 -66
- package/src/duckdb/src/main/attached_database.cpp +3 -1
- package/src/duckdb/src/main/client_context.cpp +4 -2
- package/src/duckdb/src/main/config.cpp +20 -2
- package/src/duckdb/src/main/connection.cpp +2 -29
- package/src/duckdb/src/main/connection_manager.cpp +5 -3
- package/src/duckdb/src/main/database.cpp +2 -2
- package/src/duckdb/src/main/extension/extension_helper.cpp +4 -5
- package/src/duckdb/src/main/extension/extension_install.cpp +23 -10
- package/src/duckdb/src/main/extension/extension_load.cpp +6 -7
- package/src/duckdb/src/main/extension.cpp +27 -9
- package/src/duckdb/src/main/secret/secret_manager.cpp +11 -0
- package/src/duckdb/src/main/settings/custom_settings.cpp +44 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +6 -0
- package/src/duckdb/src/optimizer/filter_combiner.cpp +13 -3
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +33 -6
- package/src/duckdb/src/optimizer/late_materialization.cpp +14 -3
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +0 -3
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +5 -1
- package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +6 -3
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +49 -0
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +1 -0
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +50 -12
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +7 -5
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +1 -0
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +12 -2
- package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +0 -1
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +55 -39
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +2 -1
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +15 -7
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +13 -8
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +8 -3
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +17 -1
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +1 -0
- package/src/duckdb/src/planner/filter/conjunction_filter.cpp +1 -0
- package/src/duckdb/src/planner/filter/constant_filter.cpp +21 -0
- package/src/duckdb/src/planner/filter/in_filter.cpp +4 -7
- package/src/duckdb/src/planner/logical_operator.cpp +5 -3
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +2 -0
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +3 -4
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -5
- package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +4 -4
- package/src/duckdb/src/storage/compression/fsst.cpp +2 -2
- package/src/duckdb/src/storage/compression/roaring/common.cpp +10 -1
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +11 -6
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +4 -0
- package/src/duckdb/src/storage/compression/zstd.cpp +6 -0
- package/src/duckdb/src/storage/data_table.cpp +104 -109
- package/src/duckdb/src/storage/local_storage.cpp +8 -6
- package/src/duckdb/src/storage/magic_bytes.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_dependency.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +7 -5
- package/src/duckdb/src/storage/single_file_block_manager.cpp +95 -28
- package/src/duckdb/src/storage/storage_info.cpp +38 -0
- package/src/duckdb/src/storage/storage_manager.cpp +11 -0
- package/src/duckdb/src/storage/table/column_data.cpp +4 -0
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
- package/src/duckdb/src/storage/table/row_group_collection.cpp +67 -68
- package/src/duckdb/src/storage/table/table_statistics.cpp +4 -4
- package/src/duckdb/src/storage/table_index_list.cpp +41 -15
- package/src/duckdb/src/storage/wal_replay.cpp +3 -1
- package/src/duckdb/src/storage/write_ahead_log.cpp +11 -4
- package/src/duckdb/src/transaction/meta_transaction.cpp +1 -1
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
- package/src/duckdb/third_party/httplib/httplib.hpp +0 -1
- package/src/duckdb/third_party/re2/util/logging.h +10 -10
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
@@ -662,7 +662,9 @@ static double CalculateTypeSimilarity(const LogicalType &merged, const LogicalTy
|
|
662
662
|
}
|
663
663
|
|
664
664
|
// Only maps and structs can be merged into a map
|
665
|
-
|
665
|
+
if (type.id() != LogicalTypeId::STRUCT) {
|
666
|
+
return -1;
|
667
|
+
}
|
666
668
|
return CalculateMapAndStructSimilarity(merged, type, false, max_depth, depth);
|
667
669
|
}
|
668
670
|
case LogicalTypeId::LIST: {
|
@@ -498,6 +498,7 @@ void BasicColumnWriter::BeginWrite(ColumnWriterState &state_p) {
|
|
498
498
|
hdr.data_page_header.repetition_level_encoding = Encoding::RLE;
|
499
499
|
|
500
500
|
write_info.temp_writer = make_uniq<MemoryStream>(
|
501
|
+
Allocator::Get(writer.GetContext()),
|
501
502
|
MaxValue<idx_t>(NextPowerOfTwo(page_info.estimated_page_size), MemoryStream::DEFAULT_INITIAL_CAPACITY));
|
502
503
|
write_info.write_count = page_info.empty_count;
|
503
504
|
write_info.max_write_count = page_info.row_count;
|
@@ -717,6 +718,7 @@ void BasicColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
|
|
717
718
|
column_chunk.meta_data.total_compressed_size =
|
718
719
|
UnsafeNumericCast<int64_t>(column_writer.GetTotalWritten() - start_offset);
|
719
720
|
column_chunk.meta_data.total_uncompressed_size = UnsafeNumericCast<int64_t>(total_uncompressed_size);
|
721
|
+
state.row_group.total_byte_size += column_chunk.meta_data.total_uncompressed_size;
|
720
722
|
|
721
723
|
if (state.bloom_filter) {
|
722
724
|
writer.BufferBloomFilter(state.col_idx, std::move(state.bloom_filter));
|
@@ -1173,7 +1175,7 @@ void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const string_t &value
|
|
1173
1175
|
|
1174
1176
|
// helpers to get size from strings
|
1175
1177
|
template <class SRC>
|
1176
|
-
static
|
1178
|
+
static idx_t GetDlbaStringSize(const SRC &src_value) {
|
1177
1179
|
return 0;
|
1178
1180
|
}
|
1179
1181
|
|
@@ -1311,21 +1313,26 @@ public:
|
|
1311
1313
|
|
1312
1314
|
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1313
1315
|
if (state.dictionary.size() == 0 || state.dictionary.size() > writer.DictionarySizeLimit()) {
|
1314
|
-
|
1315
|
-
|
1316
|
-
case Type::type::INT32:
|
1317
|
-
case Type::type::INT64:
|
1318
|
-
state.encoding = Encoding::DELTA_BINARY_PACKED;
|
1319
|
-
break;
|
1320
|
-
case Type::type::BYTE_ARRAY:
|
1321
|
-
state.encoding = Encoding::DELTA_LENGTH_BYTE_ARRAY;
|
1322
|
-
break;
|
1323
|
-
case Type::type::FLOAT:
|
1324
|
-
case Type::type::DOUBLE:
|
1325
|
-
state.encoding = Encoding::BYTE_STREAM_SPLIT;
|
1326
|
-
break;
|
1327
|
-
default:
|
1316
|
+
if (writer.GetParquetVersion() == ParquetVersion::V1) {
|
1317
|
+
// Can't do the cool stuff for V1
|
1328
1318
|
state.encoding = Encoding::PLAIN;
|
1319
|
+
} else {
|
1320
|
+
// If we aren't doing dictionary encoding, these encodings are virtually always better than PLAIN
|
1321
|
+
switch (type) {
|
1322
|
+
case Type::type::INT32:
|
1323
|
+
case Type::type::INT64:
|
1324
|
+
state.encoding = Encoding::DELTA_BINARY_PACKED;
|
1325
|
+
break;
|
1326
|
+
case Type::type::BYTE_ARRAY:
|
1327
|
+
state.encoding = Encoding::DELTA_LENGTH_BYTE_ARRAY;
|
1328
|
+
break;
|
1329
|
+
case Type::type::FLOAT:
|
1330
|
+
case Type::type::DOUBLE:
|
1331
|
+
state.encoding = Encoding::BYTE_STREAM_SPLIT;
|
1332
|
+
break;
|
1333
|
+
default:
|
1334
|
+
state.encoding = Encoding::PLAIN;
|
1335
|
+
}
|
1329
1336
|
}
|
1330
1337
|
state.dictionary.clear();
|
1331
1338
|
}
|
@@ -1463,8 +1470,9 @@ public:
|
|
1463
1470
|
make_uniq<ParquetBloomFilter>(state.dictionary.size(), writer.BloomFilterFalsePositiveRatio());
|
1464
1471
|
|
1465
1472
|
// first write the contents of the dictionary page to a temporary buffer
|
1466
|
-
auto temp_writer = make_uniq<MemoryStream>(
|
1467
|
-
NextPowerOfTwo(state.dictionary.size() * sizeof(TGT)),
|
1473
|
+
auto temp_writer = make_uniq<MemoryStream>(
|
1474
|
+
Allocator::Get(writer.GetContext()), MaxValue<idx_t>(NextPowerOfTwo(state.dictionary.size() * sizeof(TGT)),
|
1475
|
+
MemoryStream::DEFAULT_INITIAL_CAPACITY));
|
1468
1476
|
for (idx_t r = 0; r < values.size(); r++) {
|
1469
1477
|
const TGT target_value = OP::template Operation<SRC, TGT>(values[r]);
|
1470
1478
|
// update the statistics
|
@@ -1838,7 +1846,7 @@ public:
|
|
1838
1846
|
auto enum_count = EnumType::GetSize(enum_type);
|
1839
1847
|
auto string_values = FlatVector::GetData<string_t>(enum_values);
|
1840
1848
|
// first write the contents of the dictionary page to a temporary buffer
|
1841
|
-
auto temp_writer = make_uniq<MemoryStream>();
|
1849
|
+
auto temp_writer = make_uniq<MemoryStream>(Allocator::Get(writer.GetContext()));
|
1842
1850
|
for (idx_t r = 0; r < enum_count; r++) {
|
1843
1851
|
D_ASSERT(!FlatVector::IsNull(enum_values, r));
|
1844
1852
|
// update the statistics
|
@@ -216,12 +216,6 @@ private:
|
|
216
216
|
void PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx);
|
217
217
|
LogicalType DeriveLogicalType(const SchemaElement &s_ele);
|
218
218
|
|
219
|
-
template <typename... Args>
|
220
|
-
std::runtime_error FormatException(const string fmt_str, Args... params) {
|
221
|
-
return std::runtime_error("Failed to read Parquet file \"" + file_name +
|
222
|
-
"\": " + StringUtil::Format(fmt_str, params...));
|
223
|
-
}
|
224
|
-
|
225
219
|
private:
|
226
220
|
unique_ptr<FileHandle> file_handle;
|
227
221
|
};
|
@@ -68,13 +68,19 @@ struct ParquetBloomFilterEntry {
|
|
68
68
|
idx_t column_idx;
|
69
69
|
};
|
70
70
|
|
71
|
+
enum class ParquetVersion : uint8_t {
|
72
|
+
V1 = 1, //! Excludes DELTA_BINARY_PACKED, DELTA_LENGTH_BYTE_ARRAY, BYTE_STREAM_SPLIT
|
73
|
+
V2 = 2, //! Includes the encodings above
|
74
|
+
};
|
75
|
+
|
71
76
|
class ParquetWriter {
|
72
77
|
public:
|
73
78
|
ParquetWriter(ClientContext &context, FileSystem &fs, string file_name, vector<LogicalType> types,
|
74
79
|
vector<string> names, duckdb_parquet::CompressionCodec::type codec, ChildFieldIDs field_ids,
|
75
80
|
const vector<pair<string, string>> &kv_metadata,
|
76
81
|
shared_ptr<ParquetEncryptionConfig> encryption_config, idx_t dictionary_size_limit,
|
77
|
-
double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl
|
82
|
+
double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl,
|
83
|
+
ParquetVersion parquet_version);
|
78
84
|
|
79
85
|
public:
|
80
86
|
void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
|
@@ -85,6 +91,9 @@ public:
|
|
85
91
|
static duckdb_parquet::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type);
|
86
92
|
static void SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::SchemaElement &schema_ele);
|
87
93
|
|
94
|
+
ClientContext &GetContext() {
|
95
|
+
return context;
|
96
|
+
}
|
88
97
|
duckdb_apache::thrift::protocol::TProtocol *GetProtocol() {
|
89
98
|
return protocol.get();
|
90
99
|
}
|
@@ -117,6 +126,9 @@ public:
|
|
117
126
|
lock_guard<mutex> glock(lock);
|
118
127
|
return file_meta_data.row_groups.size();
|
119
128
|
}
|
129
|
+
ParquetVersion GetParquetVersion() const {
|
130
|
+
return parquet_version;
|
131
|
+
}
|
120
132
|
|
121
133
|
uint32_t Write(const duckdb_apache::thrift::TBase &object);
|
122
134
|
uint32_t WriteData(const const_data_ptr_t buffer, const uint32_t buffer_size);
|
@@ -129,6 +141,7 @@ public:
|
|
129
141
|
void BufferBloomFilter(idx_t col_idx, unique_ptr<ParquetBloomFilter> bloom_filter);
|
130
142
|
|
131
143
|
private:
|
144
|
+
ClientContext &context;
|
132
145
|
string file_name;
|
133
146
|
vector<LogicalType> sql_types;
|
134
147
|
vector<string> column_names;
|
@@ -140,6 +153,7 @@ private:
|
|
140
153
|
int64_t compression_level;
|
141
154
|
bool debug_use_openssl;
|
142
155
|
shared_ptr<EncryptionUtil> encryption_util;
|
156
|
+
ParquetVersion parquet_version;
|
143
157
|
|
144
158
|
unique_ptr<BufferedFileWriter> writer;
|
145
159
|
std::shared_ptr<duckdb_apache::thrift::protocol::TProtocol> protocol;
|
@@ -98,6 +98,7 @@ public:
|
|
98
98
|
}
|
99
99
|
if (new_size > alloc_len) {
|
100
100
|
alloc_len = NextPowerOfTwo(new_size);
|
101
|
+
allocated_data.Reset(); // Have to reset before allocating new buffer (otherwise we use ~2x the memory)
|
101
102
|
allocated_data = allocator.Allocate(alloc_len);
|
102
103
|
ptr = allocated_data.get();
|
103
104
|
}
|
@@ -203,6 +203,9 @@ struct ParquetWriteBindData : public TableFunctionData {
|
|
203
203
|
ChildFieldIDs field_ids;
|
204
204
|
//! The compression level, higher value is more
|
205
205
|
int64_t compression_level = ZStdFileSystem::DefaultCompressionLevel();
|
206
|
+
|
207
|
+
//! Which encodings to include when writing
|
208
|
+
ParquetVersion parquet_version = ParquetVersion::V1;
|
206
209
|
};
|
207
210
|
|
208
211
|
struct ParquetWriteGlobalState : public GlobalFunctionData {
|
@@ -371,6 +374,7 @@ public:
|
|
371
374
|
table_function.named_parameters["explicit_cardinality"] = LogicalType::UBIGINT;
|
372
375
|
table_function.named_parameters["schema"] = LogicalTypeId::ANY;
|
373
376
|
table_function.named_parameters["encryption_config"] = LogicalTypeId::ANY;
|
377
|
+
table_function.named_parameters["parquet_version"] = LogicalType::VARCHAR;
|
374
378
|
table_function.get_partition_data = ParquetScanGetPartitionData;
|
375
379
|
table_function.serialize = ParquetScanSerialize;
|
376
380
|
table_function.deserialize = ParquetScanDeserialize;
|
@@ -1289,6 +1293,15 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBi
|
|
1289
1293
|
}
|
1290
1294
|
bind_data->compression_level = val;
|
1291
1295
|
compression_level_set = true;
|
1296
|
+
} else if (loption == "parquet_version") {
|
1297
|
+
const auto roption = StringUtil::Upper(option.second[0].ToString());
|
1298
|
+
if (roption == "V1") {
|
1299
|
+
bind_data->parquet_version = ParquetVersion::V1;
|
1300
|
+
} else if (roption == "V2") {
|
1301
|
+
bind_data->parquet_version = ParquetVersion::V2;
|
1302
|
+
} else {
|
1303
|
+
throw BinderException("Expected parquet_version 'V1' or 'V2'");
|
1304
|
+
}
|
1292
1305
|
} else {
|
1293
1306
|
throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
|
1294
1307
|
}
|
@@ -1319,7 +1332,7 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
|
|
1319
1332
|
context, fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec,
|
1320
1333
|
parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, parquet_bind.encryption_config,
|
1321
1334
|
parquet_bind.dictionary_size_limit, parquet_bind.bloom_filter_false_positive_ratio,
|
1322
|
-
parquet_bind.compression_level, parquet_bind.debug_use_openssl);
|
1335
|
+
parquet_bind.compression_level, parquet_bind.debug_use_openssl, parquet_bind.parquet_version);
|
1323
1336
|
return std::move(global_state);
|
1324
1337
|
}
|
1325
1338
|
|
@@ -1424,6 +1437,29 @@ duckdb_parquet::CompressionCodec::type EnumUtil::FromString<duckdb_parquet::Comp
|
|
1424
1437
|
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
1425
1438
|
}
|
1426
1439
|
|
1440
|
+
template <>
|
1441
|
+
const char *EnumUtil::ToChars<ParquetVersion>(ParquetVersion value) {
|
1442
|
+
switch (value) {
|
1443
|
+
case ParquetVersion::V1:
|
1444
|
+
return "V1";
|
1445
|
+
case ParquetVersion::V2:
|
1446
|
+
return "V2";
|
1447
|
+
default:
|
1448
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
1449
|
+
}
|
1450
|
+
}
|
1451
|
+
|
1452
|
+
template <>
|
1453
|
+
ParquetVersion EnumUtil::FromString<ParquetVersion>(const char *value) {
|
1454
|
+
if (StringUtil::Equals(value, "V1")) {
|
1455
|
+
return ParquetVersion::V1;
|
1456
|
+
}
|
1457
|
+
if (StringUtil::Equals(value, "V2")) {
|
1458
|
+
return ParquetVersion::V2;
|
1459
|
+
}
|
1460
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
1461
|
+
}
|
1462
|
+
|
1427
1463
|
static optional_idx SerializeCompressionLevel(const int64_t compression_level) {
|
1428
1464
|
return compression_level < 0 ? NumericLimits<idx_t>::Maximum() - NumericCast<idx_t>(AbsValue(compression_level))
|
1429
1465
|
: NumericCast<idx_t>(compression_level);
|
@@ -1455,13 +1491,25 @@ static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bin
|
|
1455
1491
|
bind_data.encryption_config, nullptr);
|
1456
1492
|
|
1457
1493
|
// 108 was dictionary_compression_ratio_threshold, but was deleted
|
1494
|
+
|
1495
|
+
// To avoid doubly defining the default values in both ParquetWriteBindData and here,
|
1496
|
+
// and possibly making a mistake, we just get the values from ParquetWriteBindData.
|
1497
|
+
// We have to std::move them, otherwise MSVC will complain that it's not a "const T &&"
|
1458
1498
|
const auto compression_level = SerializeCompressionLevel(bind_data.compression_level);
|
1459
1499
|
D_ASSERT(DeserializeCompressionLevel(compression_level) == bind_data.compression_level);
|
1460
|
-
|
1461
|
-
serializer.
|
1462
|
-
serializer.
|
1463
|
-
|
1464
|
-
serializer.
|
1500
|
+
ParquetWriteBindData default_value;
|
1501
|
+
serializer.WritePropertyWithDefault(109, "compression_level", compression_level);
|
1502
|
+
serializer.WritePropertyWithDefault(110, "row_groups_per_file", bind_data.row_groups_per_file,
|
1503
|
+
default_value.row_groups_per_file);
|
1504
|
+
serializer.WritePropertyWithDefault(111, "debug_use_openssl", bind_data.debug_use_openssl,
|
1505
|
+
default_value.debug_use_openssl);
|
1506
|
+
serializer.WritePropertyWithDefault(112, "dictionary_size_limit", bind_data.dictionary_size_limit,
|
1507
|
+
default_value.dictionary_size_limit);
|
1508
|
+
serializer.WritePropertyWithDefault(113, "bloom_filter_false_positive_ratio",
|
1509
|
+
bind_data.bloom_filter_false_positive_ratio,
|
1510
|
+
default_value.bloom_filter_false_positive_ratio);
|
1511
|
+
serializer.WritePropertyWithDefault(114, "parquet_version", bind_data.parquet_version,
|
1512
|
+
default_value.parquet_version);
|
1465
1513
|
}
|
1466
1514
|
|
1467
1515
|
static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) {
|
@@ -1473,21 +1521,25 @@ static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserialize
|
|
1473
1521
|
data->row_group_size_bytes = deserializer.ReadProperty<idx_t>(104, "row_group_size_bytes");
|
1474
1522
|
data->kv_metadata = deserializer.ReadProperty<vector<pair<string, string>>>(105, "kv_metadata");
|
1475
1523
|
data->field_ids = deserializer.ReadProperty<ChildFieldIDs>(106, "field_ids");
|
1476
|
-
deserializer.ReadPropertyWithExplicitDefault<shared_ptr<ParquetEncryptionConfig>>(
|
1477
|
-
|
1524
|
+
deserializer.ReadPropertyWithExplicitDefault<shared_ptr<ParquetEncryptionConfig>>(
|
1525
|
+
107, "encryption_config", data->encryption_config, std::move(ParquetWriteBindData().encryption_config));
|
1478
1526
|
deserializer.ReadDeletedProperty<double>(108, "dictionary_compression_ratio_threshold");
|
1479
1527
|
|
1480
1528
|
optional_idx compression_level;
|
1481
1529
|
deserializer.ReadPropertyWithDefault<optional_idx>(109, "compression_level", compression_level);
|
1482
1530
|
data->compression_level = DeserializeCompressionLevel(compression_level);
|
1483
1531
|
D_ASSERT(SerializeCompressionLevel(data->compression_level) == compression_level);
|
1484
|
-
|
1485
|
-
|
1486
|
-
|
1487
|
-
data->
|
1488
|
-
deserializer.ReadPropertyWithExplicitDefault<
|
1489
|
-
data->
|
1490
|
-
|
1532
|
+
ParquetWriteBindData default_value;
|
1533
|
+
data->row_groups_per_file = deserializer.ReadPropertyWithExplicitDefault<optional_idx>(
|
1534
|
+
110, "row_groups_per_file", default_value.row_groups_per_file);
|
1535
|
+
data->debug_use_openssl =
|
1536
|
+
deserializer.ReadPropertyWithExplicitDefault<bool>(111, "debug_use_openssl", default_value.debug_use_openssl);
|
1537
|
+
data->dictionary_size_limit = deserializer.ReadPropertyWithExplicitDefault<idx_t>(
|
1538
|
+
112, "dictionary_size_limit", default_value.dictionary_size_limit);
|
1539
|
+
data->bloom_filter_false_positive_ratio = deserializer.ReadPropertyWithExplicitDefault<double>(
|
1540
|
+
113, "bloom_filter_false_positive_ratio", default_value.bloom_filter_false_positive_ratio);
|
1541
|
+
data->parquet_version =
|
1542
|
+
deserializer.ReadPropertyWithExplicitDefault(114, "parquet_version", default_value.parquet_version);
|
1491
1543
|
|
1492
1544
|
return std::move(data);
|
1493
1545
|
}
|
@@ -492,7 +492,8 @@ void ParquetReader::InitializeSchema(ClientContext &context) {
|
|
492
492
|
}
|
493
493
|
// check if we like this schema
|
494
494
|
if (file_meta_data->schema.size() < 2) {
|
495
|
-
throw
|
495
|
+
throw InvalidInputException("Failed to read Parquet file '%s': Need at least one non-root column in the file",
|
496
|
+
file_name);
|
496
497
|
}
|
497
498
|
root_reader = CreateReader(context);
|
498
499
|
auto &root_type = root_reader->Type();
|
@@ -778,12 +779,13 @@ void ParquetReader::PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t c
|
|
778
779
|
FilterPropagateResult prune_result;
|
779
780
|
// TODO we might not have stats but STILL a bloom filter so move this up
|
780
781
|
// check the bloom filter if present
|
781
|
-
|
782
|
+
bool is_generated_column = column_reader.FileIdx() >= group.columns.size();
|
783
|
+
if (!column_reader.Type().IsNested() && !is_generated_column &&
|
782
784
|
ParquetStatisticsUtils::BloomFilterSupported(column_reader.Type().id()) &&
|
783
785
|
ParquetStatisticsUtils::BloomFilterExcludes(filter, group.columns[column_reader.FileIdx()].meta_data,
|
784
786
|
*state.thrift_file_proto, allocator)) {
|
785
787
|
prune_result = FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
786
|
-
} else if (column_reader.Type().id() == LogicalTypeId::VARCHAR &&
|
788
|
+
} else if (column_reader.Type().id() == LogicalTypeId::VARCHAR && !is_generated_column &&
|
787
789
|
group.columns[column_reader.FileIdx()].meta_data.statistics.__isset.min_value &&
|
788
790
|
group.columns[column_reader.FileIdx()].meta_data.statistics.__isset.max_value) {
|
789
791
|
|
@@ -321,12 +321,12 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
|
|
321
321
|
const vector<pair<string, string>> &kv_metadata,
|
322
322
|
shared_ptr<ParquetEncryptionConfig> encryption_config_p, idx_t dictionary_size_limit_p,
|
323
323
|
double bloom_filter_false_positive_ratio_p, int64_t compression_level_p,
|
324
|
-
bool debug_use_openssl_p)
|
325
|
-
: file_name(std::move(file_name_p)), sql_types(std::move(types_p)),
|
326
|
-
|
327
|
-
dictionary_size_limit(dictionary_size_limit_p),
|
324
|
+
bool debug_use_openssl_p, ParquetVersion parquet_version)
|
325
|
+
: context(context), file_name(std::move(file_name_p)), sql_types(std::move(types_p)),
|
326
|
+
column_names(std::move(names_p)), codec(codec), field_ids(std::move(field_ids_p)),
|
327
|
+
encryption_config(std::move(encryption_config_p)), dictionary_size_limit(dictionary_size_limit_p),
|
328
328
|
bloom_filter_false_positive_ratio(bloom_filter_false_positive_ratio_p), compression_level(compression_level_p),
|
329
|
-
debug_use_openssl(debug_use_openssl_p) {
|
329
|
+
debug_use_openssl(debug_use_openssl_p), parquet_version(parquet_version) {
|
330
330
|
|
331
331
|
// initialize the file writer
|
332
332
|
writer = make_uniq<BufferedFileWriter>(fs, file_name.c_str(),
|
@@ -395,7 +395,6 @@ void ParquetWriter::PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGro
|
|
395
395
|
// set up a new row group for this chunk collection
|
396
396
|
auto &row_group = result.row_group;
|
397
397
|
row_group.num_rows = NumericCast<int64_t>(buffer.Count());
|
398
|
-
row_group.total_byte_size = NumericCast<int64_t>(buffer.SizeInBytes());
|
399
398
|
row_group.__isset.file_offset = true;
|
400
399
|
|
401
400
|
auto &states = result.states;
|
@@ -426,7 +426,12 @@ vector<CatalogSearchEntry> GetCatalogEntries(CatalogEntryRetriever &retriever, c
|
|
426
426
|
entries.emplace_back(catalog, schema_name);
|
427
427
|
}
|
428
428
|
if (entries.empty()) {
|
429
|
-
|
429
|
+
auto catalog_entry = Catalog::GetCatalogEntry(context, catalog);
|
430
|
+
if (catalog_entry) {
|
431
|
+
entries.emplace_back(catalog, catalog_entry->GetDefaultSchema());
|
432
|
+
} else {
|
433
|
+
entries.emplace_back(catalog, DEFAULT_SCHEMA);
|
434
|
+
}
|
430
435
|
}
|
431
436
|
} else {
|
432
437
|
// specific catalog and schema provided
|
@@ -687,7 +692,7 @@ CatalogException Catalog::CreateMissingEntryException(CatalogEntryRetriever &ret
|
|
687
692
|
// however, if there is an exact match in another schema, we will always show it
|
688
693
|
static constexpr const double UNSEEN_PENALTY = 0.2;
|
689
694
|
auto unseen_entries = SimilarEntriesInSchemas(context, entry_name, type, unseen_schemas);
|
690
|
-
|
695
|
+
set<string> suggestions;
|
691
696
|
if (!unseen_entries.empty() && (unseen_entries[0].score == 1.0 || unseen_entries[0].score - UNSEEN_PENALTY >
|
692
697
|
(entries.empty() ? 0.0 : entries[0].score))) {
|
693
698
|
// the closest matching entry requires qualification as it is not in the default search path
|
@@ -698,19 +703,19 @@ CatalogException Catalog::CreateMissingEntryException(CatalogEntryRetriever &ret
|
|
698
703
|
bool qualify_database;
|
699
704
|
bool qualify_schema;
|
700
705
|
FindMinimalQualification(retriever, catalog_name, schema_name, qualify_database, qualify_schema);
|
701
|
-
|
706
|
+
auto qualified_name = unseen_entry.GetQualifiedName(qualify_database, qualify_schema);
|
707
|
+
suggestions.insert(qualified_name);
|
702
708
|
}
|
703
709
|
} else if (!entries.empty()) {
|
704
710
|
for (auto &entry : entries) {
|
705
|
-
suggestions.
|
711
|
+
suggestions.insert(entry.name);
|
706
712
|
}
|
707
713
|
}
|
708
714
|
|
709
715
|
string did_you_mean;
|
710
|
-
std::sort(suggestions.begin(), suggestions.end());
|
711
716
|
if (suggestions.size() > 2) {
|
712
|
-
|
713
|
-
suggestions.
|
717
|
+
string last = *suggestions.rbegin();
|
718
|
+
suggestions.erase(last);
|
714
719
|
did_you_mean = StringUtil::Join(suggestions, ", ") + ", or " + last;
|
715
720
|
} else {
|
716
721
|
did_you_mean = StringUtil::Join(suggestions, " or ");
|
@@ -968,12 +973,16 @@ optional_ptr<SchemaCatalogEntry> Catalog::GetSchema(CatalogEntryRetriever &retri
|
|
968
973
|
// skip if it is not an attached database
|
969
974
|
continue;
|
970
975
|
}
|
971
|
-
auto on_not_found = i + 1 == entries.size() ? if_not_found : OnEntryNotFound::RETURN_NULL;
|
976
|
+
const auto on_not_found = i + 1 == entries.size() ? if_not_found : OnEntryNotFound::RETURN_NULL;
|
972
977
|
auto result = catalog->GetSchema(retriever.GetContext(), schema_name, on_not_found, error_context);
|
973
978
|
if (result) {
|
974
979
|
return result;
|
975
980
|
}
|
976
981
|
}
|
982
|
+
// Catalog has not been found.
|
983
|
+
if (if_not_found == OnEntryNotFound::THROW_EXCEPTION) {
|
984
|
+
throw CatalogException(error_context, "Catalog with name %s does not exist!", catalog_name);
|
985
|
+
}
|
977
986
|
return nullptr;
|
978
987
|
}
|
979
988
|
|
@@ -1073,6 +1082,10 @@ optional_ptr<DependencyManager> Catalog::GetDependencyManager() {
|
|
1073
1082
|
return nullptr;
|
1074
1083
|
}
|
1075
1084
|
|
1085
|
+
string Catalog::GetDefaultSchema() const {
|
1086
|
+
return DEFAULT_SCHEMA;
|
1087
|
+
}
|
1088
|
+
|
1076
1089
|
//! Whether this catalog has a default table. Catalogs with a default table can be queries by their catalog name
|
1077
1090
|
bool Catalog::HasDefaultTable() const {
|
1078
1091
|
return !default_table.empty();
|
@@ -165,7 +165,7 @@ void CatalogSearchPath::Set(vector<CatalogSearchEntry> new_paths, CatalogSetPath
|
|
165
165
|
if (path.catalog.empty()) {
|
166
166
|
auto catalog = Catalog::GetCatalogEntry(context, path.schema);
|
167
167
|
if (catalog) {
|
168
|
-
auto schema = catalog->GetSchema(context,
|
168
|
+
auto schema = catalog->GetSchema(context, catalog->GetDefaultSchema(), OnEntryNotFound::RETURN_NULL);
|
169
169
|
if (schema) {
|
170
170
|
path.catalog = std::move(path.schema);
|
171
171
|
path.schema = schema->name;
|
@@ -205,6 +205,22 @@ string CatalogSearchPath::GetDefaultSchema(const string &catalog) {
|
|
205
205
|
return DEFAULT_SCHEMA;
|
206
206
|
}
|
207
207
|
|
208
|
+
string CatalogSearchPath::GetDefaultSchema(ClientContext &context, const string &catalog) {
|
209
|
+
for (auto &path : paths) {
|
210
|
+
if (path.catalog == TEMP_CATALOG) {
|
211
|
+
continue;
|
212
|
+
}
|
213
|
+
if (StringUtil::CIEquals(path.catalog, catalog)) {
|
214
|
+
return path.schema;
|
215
|
+
}
|
216
|
+
}
|
217
|
+
auto catalog_entry = Catalog::GetCatalogEntry(context, catalog);
|
218
|
+
if (catalog_entry) {
|
219
|
+
return catalog_entry->GetDefaultSchema();
|
220
|
+
}
|
221
|
+
return DEFAULT_SCHEMA;
|
222
|
+
}
|
223
|
+
|
208
224
|
string CatalogSearchPath::GetDefaultCatalog(const string &schema) {
|
209
225
|
if (DefaultSchemaGenerator::IsDefaultSchema(schema)) {
|
210
226
|
return SYSTEM_CATALOG;
|
@@ -353,7 +353,7 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,
|
|
353
353
|
// push the old entry in the undo buffer for this transaction
|
354
354
|
if (transaction.transaction) {
|
355
355
|
// serialize the AlterInfo into a temporary buffer
|
356
|
-
MemoryStream stream;
|
356
|
+
MemoryStream stream(Allocator::Get(*transaction.db));
|
357
357
|
BinarySerializer serializer(stream);
|
358
358
|
serializer.Begin();
|
359
359
|
serializer.WriteProperty(100, "column_name", alter_info.GetColumnName());
|
@@ -163,9 +163,6 @@ static const DefaultMacro internal_macros[] = {
|
|
163
163
|
|
164
164
|
// date functions
|
165
165
|
{DEFAULT_SCHEMA, "date_add", {"date", "interval", nullptr}, {{nullptr, nullptr}}, "date + interval"},
|
166
|
-
{DEFAULT_SCHEMA, "current_date", {nullptr}, {{nullptr, nullptr}}, "current_timestamp::DATE"},
|
167
|
-
{DEFAULT_SCHEMA, "today", {nullptr}, {{nullptr, nullptr}}, "current_timestamp::DATE"},
|
168
|
-
{DEFAULT_SCHEMA, "get_current_time", {nullptr}, {{nullptr, nullptr}}, "current_timestamp::TIMETZ"},
|
169
166
|
|
170
167
|
// regexp functions
|
171
168
|
{DEFAULT_SCHEMA, "regexp_split_to_table", {"text", "pattern", nullptr}, {{nullptr, nullptr}}, "unnest(string_split_regex(text, pattern))"},
|
@@ -61,6 +61,13 @@ LogicalDependency::LogicalDependency(CatalogEntry &entry) {
|
|
61
61
|
}
|
62
62
|
}
|
63
63
|
|
64
|
+
LogicalDependency::LogicalDependency(optional_ptr<Catalog> catalog_p, CatalogEntryInfo entry_p, string catalog_str)
|
65
|
+
: entry(std::move(entry_p)), catalog(std::move(catalog_str)) {
|
66
|
+
if (catalog_p) {
|
67
|
+
catalog = catalog_p->GetName();
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
64
71
|
bool LogicalDependency::operator==(const LogicalDependency &other) const {
|
65
72
|
return other.entry.name == entry.name && other.entry.schema == entry.schema && other.entry.type == entry.type;
|
66
73
|
}
|
@@ -53,7 +53,6 @@ AdbcStatusCode duckdb_adbc_init(int version, void *driver, struct AdbcError *err
|
|
53
53
|
adbc_driver->ConnectionGetInfo = duckdb_adbc::ConnectionGetInfo;
|
54
54
|
adbc_driver->StatementGetParameterSchema = duckdb_adbc::StatementGetParameterSchema;
|
55
55
|
adbc_driver->ConnectionGetTableSchema = duckdb_adbc::ConnectionGetTableSchema;
|
56
|
-
adbc_driver->StatementSetSubstraitPlan = duckdb_adbc::StatementSetSubstraitPlan;
|
57
56
|
return ADBC_STATUS_OK;
|
58
57
|
}
|
59
58
|
|
@@ -70,7 +69,6 @@ struct DuckDBAdbcStatementWrapper {
|
|
70
69
|
ArrowArrayStream ingestion_stream;
|
71
70
|
IngestionMode ingestion_mode = IngestionMode::CREATE;
|
72
71
|
bool temporary_table = false;
|
73
|
-
uint8_t *substrait_plan;
|
74
72
|
uint64_t plan_length;
|
75
73
|
};
|
76
74
|
|
@@ -157,36 +155,6 @@ AdbcStatusCode DatabaseNew(struct AdbcDatabase *database, struct AdbcError *erro
|
|
157
155
|
return CheckResult(res, error, "Failed to allocate");
|
158
156
|
}
|
159
157
|
|
160
|
-
AdbcStatusCode StatementSetSubstraitPlan(struct AdbcStatement *statement, const uint8_t *plan, size_t length,
|
161
|
-
struct AdbcError *error) {
|
162
|
-
if (!statement) {
|
163
|
-
SetError(error, "Statement is not set");
|
164
|
-
return ADBC_STATUS_INVALID_ARGUMENT;
|
165
|
-
}
|
166
|
-
if (!plan) {
|
167
|
-
SetError(error, "Substrait Plan is not set");
|
168
|
-
return ADBC_STATUS_INVALID_ARGUMENT;
|
169
|
-
}
|
170
|
-
if (length == 0) {
|
171
|
-
SetError(error, "Can't execute plan with size = 0");
|
172
|
-
return ADBC_STATUS_INVALID_ARGUMENT;
|
173
|
-
}
|
174
|
-
auto wrapper = static_cast<DuckDBAdbcStatementWrapper *>(statement->private_data);
|
175
|
-
if (wrapper->ingestion_stream.release) {
|
176
|
-
// Release any resources currently held by the ingestion stream before we overwrite it
|
177
|
-
wrapper->ingestion_stream.release(&wrapper->ingestion_stream);
|
178
|
-
wrapper->ingestion_stream.release = nullptr;
|
179
|
-
}
|
180
|
-
if (wrapper->statement) {
|
181
|
-
duckdb_destroy_prepare(&wrapper->statement);
|
182
|
-
wrapper->statement = nullptr;
|
183
|
-
}
|
184
|
-
wrapper->substrait_plan = static_cast<uint8_t *>(malloc(sizeof(uint8_t) * length));
|
185
|
-
wrapper->plan_length = length;
|
186
|
-
memcpy(wrapper->substrait_plan, plan, length);
|
187
|
-
return ADBC_STATUS_OK;
|
188
|
-
}
|
189
|
-
|
190
158
|
AdbcStatusCode DatabaseSetOption(struct AdbcDatabase *database, const char *key, const char *value,
|
191
159
|
struct AdbcError *error) {
|
192
160
|
if (!database) {
|
@@ -677,7 +645,6 @@ AdbcStatusCode StatementNew(struct AdbcConnection *connection, struct AdbcStatem
|
|
677
645
|
statement_wrapper->ingestion_stream.release = nullptr;
|
678
646
|
statement_wrapper->ingestion_table_name = nullptr;
|
679
647
|
statement_wrapper->db_schema = nullptr;
|
680
|
-
statement_wrapper->substrait_plan = nullptr;
|
681
648
|
statement_wrapper->temporary_table = false;
|
682
649
|
|
683
650
|
statement_wrapper->ingestion_mode = IngestionMode::CREATE;
|
@@ -709,10 +676,6 @@ AdbcStatusCode StatementRelease(struct AdbcStatement *statement, struct AdbcErro
|
|
709
676
|
free(wrapper->db_schema);
|
710
677
|
wrapper->db_schema = nullptr;
|
711
678
|
}
|
712
|
-
if (wrapper->substrait_plan) {
|
713
|
-
free(wrapper->substrait_plan);
|
714
|
-
wrapper->substrait_plan = nullptr;
|
715
|
-
}
|
716
679
|
free(statement->private_data);
|
717
680
|
statement->private_data = nullptr;
|
718
681
|
return ADBC_STATUS_OK;
|
@@ -805,25 +768,7 @@ AdbcStatusCode StatementExecuteQuery(struct AdbcStatement *statement, struct Arr
|
|
805
768
|
if (has_stream && to_table) {
|
806
769
|
return IngestToTableFromBoundStream(wrapper, error);
|
807
770
|
}
|
808
|
-
if (
|
809
|
-
auto plan_str = std::string(reinterpret_cast<const char *>(wrapper->substrait_plan), wrapper->plan_length);
|
810
|
-
duckdb::vector<duckdb::Value> params;
|
811
|
-
params.emplace_back(duckdb::Value::BLOB_RAW(plan_str));
|
812
|
-
duckdb::unique_ptr<duckdb::QueryResult> query_result;
|
813
|
-
try {
|
814
|
-
query_result = reinterpret_cast<duckdb::Connection *>(wrapper->connection)
|
815
|
-
->TableFunction("from_substrait", params)
|
816
|
-
->Execute();
|
817
|
-
} catch (duckdb::Exception &e) {
|
818
|
-
std::string error_msg = "It was not possible to execute substrait query. " + std::string(e.what());
|
819
|
-
SetError(error, error_msg);
|
820
|
-
return ADBC_STATUS_INVALID_ARGUMENT;
|
821
|
-
}
|
822
|
-
auto arrow_wrapper = new duckdb::ArrowResultWrapper();
|
823
|
-
arrow_wrapper->result =
|
824
|
-
duckdb::unique_ptr_cast<duckdb::QueryResult, duckdb::MaterializedQueryResult>(std::move(query_result));
|
825
|
-
wrapper->result = reinterpret_cast<duckdb_arrow>(arrow_wrapper);
|
826
|
-
} else if (has_stream) {
|
771
|
+
if (has_stream) {
|
827
772
|
// A stream was bound to the statement, use that to bind parameters
|
828
773
|
duckdb::unique_ptr<duckdb::QueryResult> result;
|
829
774
|
ArrowArrayStream stream = wrapper->ingestion_stream;
|
@@ -73,14 +73,15 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
|
|
73
73
|
InitializeChild(root_holder.nested_children.back()[0], root_holder);
|
74
74
|
child.children = &root_holder.nested_children_ptr.back()[0];
|
75
75
|
child.children[0]->name = "entries";
|
76
|
+
child.children[0]->flags = 0; // Set the 'entries' field to non-nullable
|
76
77
|
SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options, context);
|
77
78
|
}
|
78
79
|
|
79
80
|
bool SetArrowExtension(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
|
80
81
|
ClientContext &context) {
|
81
82
|
auto &config = DBConfig::GetConfig(context);
|
82
|
-
if (config.HasArrowExtension(type
|
83
|
-
auto arrow_extension = config.GetArrowExtension(type
|
83
|
+
if (config.HasArrowExtension(type)) {
|
84
|
+
auto arrow_extension = config.GetArrowExtension(type);
|
84
85
|
arrow_extension.PopulateArrowSchema(root_holder, child, type, context, arrow_extension);
|
85
86
|
return true;
|
86
87
|
}
|