duckdb 1.1.4-dev13.0 → 1.1.4-dev14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/core_functions/function_list.cpp +1 -0
- package/src/duckdb/extension/core_functions/include/core_functions/scalar/map_functions.hpp +9 -0
- package/src/duckdb/extension/core_functions/scalar/date/current.cpp +1 -0
- package/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp +2 -2
- package/src/duckdb/extension/core_functions/scalar/generic/typeof.cpp +1 -1
- package/src/duckdb/extension/core_functions/scalar/list/flatten.cpp +91 -61
- package/src/duckdb/extension/core_functions/scalar/map/map_extract.cpp +89 -8
- package/src/duckdb/extension/icu/icu-current.cpp +63 -0
- package/src/duckdb/extension/icu/icu-makedate.cpp +43 -39
- package/src/duckdb/extension/icu/icu-timezone.cpp +63 -63
- package/src/duckdb/extension/icu/icu_extension.cpp +2 -0
- package/src/duckdb/extension/icu/include/icu-casts.hpp +39 -0
- package/src/duckdb/extension/icu/include/icu-current.hpp +17 -0
- package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -1
- package/src/duckdb/extension/parquet/column_writer.cpp +26 -18
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +0 -6
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +15 -1
- package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +67 -15
- package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +5 -6
- package/src/duckdb/src/catalog/catalog.cpp +21 -8
- package/src/duckdb/src/catalog/catalog_search_path.cpp +17 -1
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +0 -3
- package/src/duckdb/src/catalog/dependency_list.cpp +7 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +1 -56
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -2
- package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +58 -28
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +1 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +6 -2
- package/src/duckdb/src/common/enum_util.cpp +26 -22
- package/src/duckdb/src/common/error_data.cpp +3 -2
- package/src/duckdb/src/common/gzip_file_system.cpp +8 -8
- package/src/duckdb/src/common/local_file_system.cpp +2 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +1 -1
- package/src/duckdb/src/common/random_engine.cpp +4 -1
- package/src/duckdb/src/common/serializer/memory_stream.cpp +23 -19
- package/src/duckdb/src/common/serializer/serializer.cpp +1 -1
- package/src/duckdb/src/common/types/bit.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +0 -5
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -1
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +0 -4
- package/src/duckdb/src/common/types.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +52 -42
- package/src/duckdb/src/execution/index/art/leaf.cpp +4 -9
- package/src/duckdb/src/execution/index/art/node.cpp +13 -13
- package/src/duckdb/src/execution/index/art/prefix.cpp +21 -16
- package/src/duckdb/src/execution/index/bound_index.cpp +6 -8
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +39 -34
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +2 -1
- package/src/duckdb/src/execution/index/unbound_index.cpp +10 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +62 -44
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +26 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +69 -40
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +3 -7
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +11 -5
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +4 -0
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +8 -8
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +36 -12
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +12 -9
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +29 -1
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -10
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +58 -35
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +9 -4
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +7 -6
- package/src/duckdb/src/function/compression_config.cpp +4 -0
- package/src/duckdb/src/function/function_binder.cpp +1 -1
- package/src/duckdb/src/function/scalar/system/write_log.cpp +2 -2
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +15 -2
- package/src/duckdb/src/function/table/arrow_conversion.cpp +10 -10
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -5
- package/src/duckdb/src/function/table/read_csv.cpp +21 -4
- package/src/duckdb/src/function/table/sniff_csv.cpp +7 -0
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +4 -0
- package/src/duckdb/src/function/table/system/duckdb_secret_types.cpp +71 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +120 -36
- package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
- package/src/duckdb/src/function/window/window_aggregate_function.cpp +6 -1
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +135 -11
- package/src/duckdb/src/function/window/window_segment_tree.cpp +50 -22
- package/src/duckdb/src/function/window/window_token_tree.cpp +4 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +4 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +4 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -8
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +0 -2
- package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +9 -3
- package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +11 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -10
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +6 -5
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +37 -32
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +36 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +5 -30
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +7 -1
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +3 -3
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/logging/logger.hpp +40 -119
- package/src/duckdb/src/include/duckdb/logging/logging.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +0 -8
- package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +11 -7
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +10 -0
- package/src/duckdb/src/include/duckdb/parser/constraint.hpp +9 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +36 -9
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +9 -1
- package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +4 -4
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +14 -10
- package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +6 -1
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +7 -2
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +9 -0
- package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +6 -4
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +2 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -0
- package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +1 -1
- package/src/duckdb/src/logging/logger.cpp +8 -66
- package/src/duckdb/src/main/attached_database.cpp +3 -1
- package/src/duckdb/src/main/client_context.cpp +4 -2
- package/src/duckdb/src/main/config.cpp +20 -2
- package/src/duckdb/src/main/connection.cpp +2 -29
- package/src/duckdb/src/main/connection_manager.cpp +5 -3
- package/src/duckdb/src/main/database.cpp +2 -2
- package/src/duckdb/src/main/extension/extension_helper.cpp +4 -5
- package/src/duckdb/src/main/extension/extension_install.cpp +23 -10
- package/src/duckdb/src/main/extension/extension_load.cpp +6 -7
- package/src/duckdb/src/main/extension.cpp +27 -9
- package/src/duckdb/src/main/secret/secret_manager.cpp +11 -0
- package/src/duckdb/src/main/settings/custom_settings.cpp +44 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +6 -0
- package/src/duckdb/src/optimizer/filter_combiner.cpp +13 -3
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +33 -6
- package/src/duckdb/src/optimizer/late_materialization.cpp +14 -3
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +0 -3
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +5 -1
- package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +6 -3
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +49 -0
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +1 -0
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +50 -12
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +7 -5
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +1 -0
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +12 -2
- package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +0 -1
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +55 -39
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +2 -1
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +15 -7
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +13 -8
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +8 -3
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +17 -1
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +1 -0
- package/src/duckdb/src/planner/filter/conjunction_filter.cpp +1 -0
- package/src/duckdb/src/planner/filter/constant_filter.cpp +21 -0
- package/src/duckdb/src/planner/filter/in_filter.cpp +4 -7
- package/src/duckdb/src/planner/logical_operator.cpp +5 -3
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +2 -0
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +3 -4
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -5
- package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +4 -4
- package/src/duckdb/src/storage/compression/fsst.cpp +2 -2
- package/src/duckdb/src/storage/compression/roaring/common.cpp +10 -1
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +11 -6
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +4 -0
- package/src/duckdb/src/storage/compression/zstd.cpp +6 -0
- package/src/duckdb/src/storage/data_table.cpp +104 -109
- package/src/duckdb/src/storage/local_storage.cpp +8 -6
- package/src/duckdb/src/storage/magic_bytes.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_dependency.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +7 -5
- package/src/duckdb/src/storage/single_file_block_manager.cpp +95 -28
- package/src/duckdb/src/storage/storage_info.cpp +38 -0
- package/src/duckdb/src/storage/storage_manager.cpp +11 -0
- package/src/duckdb/src/storage/table/column_data.cpp +4 -0
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
- package/src/duckdb/src/storage/table/row_group_collection.cpp +67 -68
- package/src/duckdb/src/storage/table/table_statistics.cpp +4 -4
- package/src/duckdb/src/storage/table_index_list.cpp +41 -15
- package/src/duckdb/src/storage/wal_replay.cpp +3 -1
- package/src/duckdb/src/storage/write_ahead_log.cpp +11 -4
- package/src/duckdb/src/transaction/meta_transaction.cpp +1 -1
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
- package/src/duckdb/third_party/httplib/httplib.hpp +0 -1
- package/src/duckdb/third_party/re2/util/logging.h +10 -10
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
@@ -47,7 +47,10 @@ class DeleteLocalState : public LocalSinkState {
|
|
47
47
|
public:
|
48
48
|
DeleteLocalState(ClientContext &context, TableCatalogEntry &table,
|
49
49
|
const vector<unique_ptr<BoundConstraint>> &bound_constraints) {
|
50
|
-
|
50
|
+
const auto &types = table.GetTypes();
|
51
|
+
auto initialize = vector<bool>(types.size(), false);
|
52
|
+
delete_chunk.Initialize(Allocator::Get(context), types, initialize);
|
53
|
+
|
51
54
|
auto &storage = table.GetStorage();
|
52
55
|
delete_state = storage.InitializeDelete(table, context, bound_constraints);
|
53
56
|
}
|
@@ -64,34 +67,79 @@ SinkResultType PhysicalDelete::Sink(ExecutionContext &context, DataChunk &chunk,
|
|
64
67
|
auto &transaction = DuckTransaction::Get(context.client, table.db);
|
65
68
|
auto &row_ids = chunk.data[row_id_index];
|
66
69
|
|
67
|
-
vector<StorageIndex> column_ids;
|
68
|
-
for (idx_t i = 0; i < table.ColumnCount(); i++) {
|
69
|
-
column_ids.emplace_back(i);
|
70
|
-
};
|
71
|
-
auto fetch_state = ColumnFetchState();
|
72
|
-
|
73
70
|
lock_guard<mutex> delete_guard(g_state.delete_lock);
|
74
71
|
if (!return_chunk && !g_state.has_unique_indexes) {
|
75
72
|
g_state.deleted_count += table.Delete(*l_state.delete_state, context.client, row_ids, chunk.size());
|
76
73
|
return SinkResultType::NEED_MORE_INPUT;
|
77
74
|
}
|
78
75
|
|
79
|
-
|
76
|
+
auto types = table.GetTypes();
|
77
|
+
auto to_be_fetched = vector<bool>(types.size(), return_chunk);
|
78
|
+
vector<StorageIndex> column_ids;
|
79
|
+
vector<LogicalType> column_types;
|
80
|
+
if (return_chunk) {
|
81
|
+
// Fetch all columns.
|
82
|
+
column_types = types;
|
83
|
+
for (idx_t i = 0; i < table.ColumnCount(); i++) {
|
84
|
+
column_ids.emplace_back(i);
|
85
|
+
}
|
86
|
+
|
87
|
+
} else {
|
88
|
+
// Fetch only the required columns for updating the delete indexes.
|
89
|
+
auto &local_storage = LocalStorage::Get(context.client, table.db);
|
90
|
+
auto storage = local_storage.GetStorage(table);
|
91
|
+
unordered_set<column_t> indexed_column_id_set;
|
92
|
+
storage->delete_indexes.Scan([&](Index &index) {
|
93
|
+
if (!index.IsBound() || !index.IsUnique()) {
|
94
|
+
return false;
|
95
|
+
}
|
96
|
+
auto &set = index.GetColumnIdSet();
|
97
|
+
indexed_column_id_set.insert(set.begin(), set.end());
|
98
|
+
return false;
|
99
|
+
});
|
100
|
+
for (auto &col : indexed_column_id_set) {
|
101
|
+
column_ids.emplace_back(col);
|
102
|
+
}
|
103
|
+
sort(column_ids.begin(), column_ids.end());
|
104
|
+
for (auto &col : column_ids) {
|
105
|
+
auto i = col.GetPrimaryIndex();
|
106
|
+
to_be_fetched[i] = true;
|
107
|
+
column_types.push_back(types[i]);
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
80
111
|
l_state.delete_chunk.Reset();
|
81
112
|
row_ids.Flatten(chunk.size());
|
82
|
-
|
113
|
+
|
114
|
+
// Fetch the to-be-deleted chunk.
|
115
|
+
DataChunk fetch_chunk;
|
116
|
+
fetch_chunk.Initialize(Allocator::Get(context.client), column_types, chunk.size());
|
117
|
+
auto fetch_state = ColumnFetchState();
|
118
|
+
table.Fetch(transaction, fetch_chunk, column_ids, row_ids, chunk.size(), fetch_state);
|
119
|
+
|
120
|
+
// Reference the necessary columns of the fetch_chunk.
|
121
|
+
idx_t fetch_idx = 0;
|
122
|
+
for (idx_t i = 0; i < table.ColumnCount(); i++) {
|
123
|
+
if (to_be_fetched[i]) {
|
124
|
+
l_state.delete_chunk.data[i].Reference(fetch_chunk.data[fetch_idx++]);
|
125
|
+
continue;
|
126
|
+
}
|
127
|
+
l_state.delete_chunk.data[i].Reference(Value(types[i]));
|
128
|
+
}
|
129
|
+
l_state.delete_chunk.SetCardinality(fetch_chunk);
|
83
130
|
|
84
131
|
// Append the deleted row IDs to the delete indexes.
|
85
132
|
// If we only delete local row IDs, then the delete_chunk is empty.
|
86
133
|
if (g_state.has_unique_indexes && l_state.delete_chunk.size() != 0) {
|
87
134
|
auto &local_storage = LocalStorage::Get(context.client, table.db);
|
88
135
|
auto storage = local_storage.GetStorage(table);
|
136
|
+
IndexAppendInfo index_append_info(IndexAppendMode::IGNORE_DUPLICATES, nullptr);
|
89
137
|
storage->delete_indexes.Scan([&](Index &index) {
|
90
138
|
if (!index.IsBound() || !index.IsUnique()) {
|
91
139
|
return false;
|
92
140
|
}
|
93
141
|
auto &bound_index = index.Cast<BoundIndex>();
|
94
|
-
auto error = bound_index.Append(l_state.delete_chunk, row_ids);
|
142
|
+
auto error = bound_index.Append(l_state.delete_chunk, row_ids, index_append_info);
|
95
143
|
if (error.HasError()) {
|
96
144
|
throw InternalException("failed to update delete ART in physical delete: ", error.Message());
|
97
145
|
}
|
@@ -229,6 +229,7 @@ static void CreateUpdateChunk(ExecutionContext &context, DataChunk &chunk, Table
|
|
229
229
|
auto &do_update_condition = op.do_update_condition;
|
230
230
|
auto &set_types = op.set_types;
|
231
231
|
auto &set_expressions = op.set_expressions;
|
232
|
+
|
232
233
|
// Check the optional condition for the DO UPDATE clause, to filter which rows will be updated
|
233
234
|
if (do_update_condition) {
|
234
235
|
DataChunk do_update_filter_result;
|
@@ -252,19 +253,28 @@ static void CreateUpdateChunk(ExecutionContext &context, DataChunk &chunk, Table
|
|
252
253
|
chunk.SetCardinality(selection.Count());
|
253
254
|
// Also apply this Slice to the to-update row_ids
|
254
255
|
row_ids.Slice(selection.Selection(), selection.Count());
|
256
|
+
row_ids.Flatten(selection.Count());
|
255
257
|
}
|
256
258
|
}
|
257
259
|
|
258
|
-
|
259
|
-
|
260
|
+
if (chunk.size() == 0) {
|
261
|
+
auto initialize = vector<bool>(set_types.size(), false);
|
262
|
+
update_chunk.Initialize(context.client, set_types, initialize, chunk.size());
|
263
|
+
update_chunk.SetCardinality(chunk);
|
264
|
+
return;
|
265
|
+
}
|
266
|
+
|
267
|
+
// Execute the SET expressions.
|
268
|
+
update_chunk.Initialize(context.client, set_types, chunk.size());
|
260
269
|
ExpressionExecutor executor(context.client, set_expressions);
|
261
270
|
executor.Execute(chunk, update_chunk);
|
262
271
|
update_chunk.SetCardinality(chunk);
|
263
272
|
}
|
264
273
|
|
265
274
|
template <bool GLOBAL>
|
266
|
-
static idx_t PerformOnConflictAction(InsertLocalState &lstate,
|
267
|
-
TableCatalogEntry &table, Vector &row_ids,
|
275
|
+
static idx_t PerformOnConflictAction(InsertLocalState &lstate, InsertGlobalState &gstate, ExecutionContext &context,
|
276
|
+
DataChunk &chunk, TableCatalogEntry &table, Vector &row_ids,
|
277
|
+
const PhysicalInsert &op) {
|
268
278
|
// Early-out, if we do nothing on conflicting rows.
|
269
279
|
if (op.action_type == OnConflictAction::NOTHING) {
|
270
280
|
return 0;
|
@@ -275,15 +285,8 @@ static idx_t PerformOnConflictAction(InsertLocalState &lstate, ExecutionContext
|
|
275
285
|
CreateUpdateChunk(context, chunk, table, row_ids, update_chunk, op);
|
276
286
|
auto &data_table = table.GetStorage();
|
277
287
|
|
278
|
-
|
279
|
-
|
280
|
-
if (GLOBAL) {
|
281
|
-
auto update_state = data_table.InitializeUpdate(table, context.client, op.bound_constraints);
|
282
|
-
data_table.Update(*update_state, context.client, row_ids, set_columns, update_chunk);
|
283
|
-
return update_chunk.size();
|
284
|
-
}
|
285
|
-
auto &local_storage = LocalStorage::Get(context.client, data_table.db);
|
286
|
-
local_storage.Update(data_table, row_ids, set_columns, update_chunk);
|
288
|
+
if (update_chunk.size() == 0) {
|
289
|
+
// Nothing to do
|
287
290
|
return update_chunk.size();
|
288
291
|
}
|
289
292
|
|
@@ -297,6 +300,27 @@ static idx_t PerformOnConflictAction(InsertLocalState &lstate, ExecutionContext
|
|
297
300
|
append_chunk.data[set_columns[i].index].Reference(update_chunk.data[i]);
|
298
301
|
}
|
299
302
|
|
303
|
+
// Perform the UPDATE on the (global) storage.
|
304
|
+
if (!op.update_is_del_and_insert) {
|
305
|
+
if (!op.parallel && op.return_chunk) {
|
306
|
+
gstate.return_collection.Append(append_chunk);
|
307
|
+
}
|
308
|
+
|
309
|
+
if (GLOBAL) {
|
310
|
+
auto update_state = data_table.InitializeUpdate(table, context.client, op.bound_constraints);
|
311
|
+
data_table.Update(*update_state, context.client, row_ids, set_columns, update_chunk);
|
312
|
+
return update_chunk.size();
|
313
|
+
}
|
314
|
+
auto &local_storage = LocalStorage::Get(context.client, data_table.db);
|
315
|
+
if (gstate.initialized) {
|
316
|
+
// Flush the data first, it might be referenced by the Update
|
317
|
+
data_table.FinalizeLocalAppend(gstate.append_state);
|
318
|
+
gstate.initialized = false;
|
319
|
+
}
|
320
|
+
local_storage.Update(data_table, row_ids, set_columns, update_chunk);
|
321
|
+
return update_chunk.size();
|
322
|
+
}
|
323
|
+
|
300
324
|
if (GLOBAL) {
|
301
325
|
auto &delete_state = lstate.GetDeleteState(data_table, table, context.client);
|
302
326
|
data_table.Delete(delete_state, context.client, row_ids, update_chunk.size());
|
@@ -305,6 +329,9 @@ static idx_t PerformOnConflictAction(InsertLocalState &lstate, ExecutionContext
|
|
305
329
|
local_storage.Delete(data_table, row_ids, update_chunk.size());
|
306
330
|
}
|
307
331
|
|
332
|
+
if (!op.parallel && op.return_chunk) {
|
333
|
+
gstate.return_collection.Append(append_chunk);
|
334
|
+
}
|
308
335
|
data_table.LocalAppend(table, context.client, append_chunk, op.bound_constraints, row_ids, append_chunk);
|
309
336
|
return update_chunk.size();
|
310
337
|
}
|
@@ -357,8 +384,8 @@ static void CheckDistinctnessInternal(ValidityMask &valid, vector<reference<Vect
|
|
357
384
|
}
|
358
385
|
}
|
359
386
|
|
360
|
-
void PrepareSortKeys(DataChunk &input, unordered_map<column_t, unique_ptr<Vector>> &sort_keys,
|
361
|
-
|
387
|
+
static void PrepareSortKeys(DataChunk &input, unordered_map<column_t, unique_ptr<Vector>> &sort_keys,
|
388
|
+
const unordered_set<column_t> &column_ids) {
|
362
389
|
OrderModifiers order_modifiers(OrderType::ASCENDING, OrderByNullType::NULLS_LAST);
|
363
390
|
for (auto &it : column_ids) {
|
364
391
|
auto &sort_key = sort_keys[it];
|
@@ -440,7 +467,7 @@ static void VerifyOnConflictCondition(ExecutionContext &context, DataChunk &comb
|
|
440
467
|
|
441
468
|
template <bool GLOBAL>
|
442
469
|
static idx_t HandleInsertConflicts(TableCatalogEntry &table, ExecutionContext &context, InsertLocalState &lstate,
|
443
|
-
DataChunk &tuples, const PhysicalInsert &op) {
|
470
|
+
InsertGlobalState &gstate, DataChunk &tuples, const PhysicalInsert &op) {
|
444
471
|
auto &types_to_fetch = op.types_to_fetch;
|
445
472
|
auto &on_conflict_condition = op.on_conflict_condition;
|
446
473
|
auto &conflict_target = op.conflict_target;
|
@@ -510,7 +537,7 @@ static idx_t HandleInsertConflicts(TableCatalogEntry &table, ExecutionContext &c
|
|
510
537
|
RegisterUpdatedRows(lstate, row_ids, combined_chunk.size());
|
511
538
|
}
|
512
539
|
|
513
|
-
affected_tuples += PerformOnConflictAction<GLOBAL>(lstate, context, combined_chunk, table, row_ids, op);
|
540
|
+
affected_tuples += PerformOnConflictAction<GLOBAL>(lstate, gstate, context, combined_chunk, table, row_ids, op);
|
514
541
|
|
515
542
|
// Remove the conflicting tuples from the insert chunk
|
516
543
|
SelectionVector sel_vec(tuples.size());
|
@@ -590,6 +617,11 @@ idx_t PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionCont
|
|
590
617
|
}
|
591
618
|
}
|
592
619
|
if (action_type == OnConflictAction::UPDATE) {
|
620
|
+
if (do_update_condition) {
|
621
|
+
//! See https://github.com/duckdblabs/duckdb-internal/issues/4090 for context
|
622
|
+
throw NotImplementedException("Inner conflicts detected with a conditional DO UPDATE on-conflict "
|
623
|
+
"action, not fully implemented yet");
|
624
|
+
}
|
593
625
|
ManagedSelection last_occurrences(last_occurrences_of_conflict.size());
|
594
626
|
for (auto &idx : last_occurrences_of_conflict) {
|
595
627
|
last_occurrences.Append(idx);
|
@@ -607,9 +639,9 @@ idx_t PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionCont
|
|
607
639
|
// Check whether any conflicts arise, and if they all meet the conflict_target + condition
|
608
640
|
// If that's not the case - We throw the first error
|
609
641
|
idx_t updated_tuples = 0;
|
610
|
-
updated_tuples += HandleInsertConflicts<true>(table, context, lstate, lstate.insert_chunk, *this);
|
642
|
+
updated_tuples += HandleInsertConflicts<true>(table, context, lstate, gstate, lstate.insert_chunk, *this);
|
611
643
|
// Also check the transaction-local storage+ART so we can detect conflicts within this transaction
|
612
|
-
updated_tuples += HandleInsertConflicts<false>(table, context, lstate, lstate.insert_chunk, *this);
|
644
|
+
updated_tuples += HandleInsertConflicts<false>(table, context, lstate, gstate, lstate.insert_chunk, *this);
|
613
645
|
|
614
646
|
return updated_tuples;
|
615
647
|
}
|
@@ -628,31 +660,22 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
|
|
628
660
|
gstate.initialized = true;
|
629
661
|
}
|
630
662
|
|
631
|
-
if (action_type != OnConflictAction::NOTHING && return_chunk) {
|
632
|
-
// If the action is UPDATE or REPLACE, we will always create either an APPEND or an INSERT
|
633
|
-
// for NOTHING we don't create either an APPEND or an INSERT for the tuple
|
634
|
-
// so it should not be added to the RETURNING chunk
|
635
|
-
gstate.return_collection.Append(lstate.insert_chunk);
|
636
|
-
}
|
637
663
|
idx_t updated_tuples = OnConflictHandling(table, context, gstate, lstate);
|
638
|
-
|
639
|
-
// Because we didn't add to the RETURNING chunk yet
|
640
|
-
// we add the tuples that did not get filtered out now
|
641
|
-
gstate.return_collection.Append(lstate.insert_chunk);
|
642
|
-
}
|
664
|
+
|
643
665
|
gstate.insert_count += lstate.insert_chunk.size();
|
644
666
|
gstate.insert_count += updated_tuples;
|
667
|
+
if (!parallel && return_chunk) {
|
668
|
+
gstate.return_collection.Append(lstate.insert_chunk);
|
669
|
+
}
|
645
670
|
storage.LocalAppend(gstate.append_state, context.client, lstate.insert_chunk, true);
|
646
671
|
if (action_type == OnConflictAction::UPDATE && lstate.update_chunk.size() != 0) {
|
647
|
-
|
648
|
-
|
649
|
-
gstate.initialized = false;
|
650
|
-
(void)HandleInsertConflicts<true>(table, context, lstate, lstate.update_chunk, *this);
|
651
|
-
(void)HandleInsertConflicts<false>(table, context, lstate, lstate.update_chunk, *this);
|
672
|
+
(void)HandleInsertConflicts<true>(table, context, lstate, gstate, lstate.update_chunk, *this);
|
673
|
+
(void)HandleInsertConflicts<false>(table, context, lstate, gstate, lstate.update_chunk, *this);
|
652
674
|
// All of the tuples should have been turned into an update, leaving the chunk empty afterwards
|
653
675
|
D_ASSERT(lstate.update_chunk.size() == 0);
|
654
676
|
}
|
655
677
|
} else {
|
678
|
+
//! FIXME: can't we enable this by using a BatchedDataCollection ?
|
656
679
|
D_ASSERT(!return_chunk);
|
657
680
|
// parallel append
|
658
681
|
if (!lstate.local_collection) {
|
@@ -88,7 +88,8 @@ SinkResultType PhysicalCreateARTIndex::SinkUnsorted(OperatorSinkInput &input) co
|
|
88
88
|
// Insert each key and its corresponding row ID.
|
89
89
|
for (idx_t i = 0; i < row_count; i++) {
|
90
90
|
auto status = art.tree.GetGateStatus();
|
91
|
-
auto conflict_type =
|
91
|
+
auto conflict_type =
|
92
|
+
art.Insert(art.tree, l_state.keys[i], 0, l_state.row_ids[i], status, nullptr, IndexAppendMode::DEFAULT);
|
92
93
|
D_ASSERT(conflict_type != ARTConflictType::TRANSACTION);
|
93
94
|
if (conflict_type == ARTConflictType::CONSTRAINT) {
|
94
95
|
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
@@ -97,6 +97,7 @@ public:
|
|
97
97
|
void SetRadixBits(const idx_t &radix_bits_p);
|
98
98
|
bool SetRadixBitsToExternal();
|
99
99
|
idx_t GetRadixBits() const;
|
100
|
+
idx_t GetExternalRadixBits() const;
|
100
101
|
|
101
102
|
private:
|
102
103
|
void SetRadixBitsInternal(idx_t radix_bits_p, bool external);
|
@@ -210,13 +211,13 @@ RadixHTGlobalSinkState::RadixHTGlobalSinkState(ClientContext &context_p, const R
|
|
210
211
|
auto tuples_per_block = block_alloc_size / radix_ht.GetLayout().GetRowWidth();
|
211
212
|
idx_t ht_count =
|
212
213
|
LossyNumericCast<idx_t>(static_cast<double>(config.sink_capacity) / GroupedAggregateHashTable::LOAD_FACTOR);
|
213
|
-
auto num_partitions = RadixPartitioning::NumberOfPartitions(config.
|
214
|
+
auto num_partitions = RadixPartitioning::NumberOfPartitions(config.GetExternalRadixBits());
|
214
215
|
auto count_per_partition = ht_count / num_partitions;
|
215
216
|
auto blocks_per_partition = (count_per_partition + tuples_per_block) / tuples_per_block + 1;
|
216
217
|
if (!radix_ht.GetLayout().AllConstant()) {
|
217
218
|
blocks_per_partition += 2;
|
218
219
|
}
|
219
|
-
auto ht_size = blocks_per_partition * block_alloc_size + config.sink_capacity * sizeof(ht_entry_t);
|
220
|
+
auto ht_size = num_partitions * blocks_per_partition * block_alloc_size + config.sink_capacity * sizeof(ht_entry_t);
|
220
221
|
|
221
222
|
// This really is the minimum reservation that we can do
|
222
223
|
auto num_threads = NumericCast<idx_t>(TaskScheduler::GetScheduler(context).NumberOfThreads());
|
@@ -280,13 +281,17 @@ idx_t RadixHTConfig::GetRadixBits() const {
|
|
280
281
|
return sink_radix_bits;
|
281
282
|
}
|
282
283
|
|
284
|
+
idx_t RadixHTConfig::GetExternalRadixBits() const {
|
285
|
+
return MAXIMUM_FINAL_SINK_RADIX_BITS;
|
286
|
+
}
|
287
|
+
|
283
288
|
void RadixHTConfig::SetRadixBitsInternal(const idx_t radix_bits_p, bool external) {
|
284
|
-
if (sink_radix_bits
|
289
|
+
if (sink_radix_bits > radix_bits_p || sink.any_combined) {
|
285
290
|
return;
|
286
291
|
}
|
287
292
|
|
288
293
|
auto guard = sink.Lock();
|
289
|
-
if (sink_radix_bits
|
294
|
+
if (sink_radix_bits > radix_bits_p || sink.any_combined) {
|
290
295
|
return;
|
291
296
|
}
|
292
297
|
|
@@ -50,7 +50,10 @@ ReservoirSample::ReservoirSample(idx_t sample_count, unique_ptr<ReservoirChunk>
|
|
50
50
|
if (reservoir_chunk) {
|
51
51
|
this->reservoir_chunk = std::move(reservoir_chunk);
|
52
52
|
sel_size = this->reservoir_chunk->chunk.size();
|
53
|
-
sel = SelectionVector(
|
53
|
+
sel = SelectionVector(FIXED_SAMPLE_SIZE);
|
54
|
+
for (idx_t i = 0; i < sel_size; i++) {
|
55
|
+
sel.set_index(i, i);
|
56
|
+
}
|
54
57
|
ExpandSerializedSample();
|
55
58
|
}
|
56
59
|
stats_sample = true;
|
@@ -225,10 +228,6 @@ vector<uint32_t> ReservoirSample::GetRandomizedVector(uint32_t range, uint32_t s
|
|
225
228
|
for (uint32_t i = 0; i < range; i++) {
|
226
229
|
ret.push_back(i);
|
227
230
|
}
|
228
|
-
if (size == FIXED_SAMPLE_SIZE) {
|
229
|
-
std::shuffle(ret.begin(), ret.end(), base_reservoir_sample->random);
|
230
|
-
return ret;
|
231
|
-
}
|
232
231
|
for (uint32_t i = 0; i < size; i++) {
|
233
232
|
uint32_t random_shuffle = base_reservoir_sample->random.NextRandomInteger32(i, range);
|
234
233
|
if (random_shuffle == i) {
|
@@ -305,6 +304,7 @@ void ReservoirSample::SimpleMerge(ReservoirSample &other) {
|
|
305
304
|
auto offset = reservoir_chunk->chunk.size();
|
306
305
|
for (idx_t i = keep_from_this; i < size_after_merge; i++) {
|
307
306
|
if (i >= GetActiveSampleCount()) {
|
307
|
+
D_ASSERT(sel_size >= GetActiveSampleCount());
|
308
308
|
sel.set_index(GetActiveSampleCount(), offset);
|
309
309
|
sel_size += 1;
|
310
310
|
} else {
|
@@ -551,7 +551,7 @@ void ReservoirSample::ExpandSerializedSample() {
|
|
551
551
|
}
|
552
552
|
|
553
553
|
idx_t ReservoirSample::GetReservoirChunkCapacity() const {
|
554
|
-
return sample_count + (FIXED_SAMPLE_SIZE_MULTIPLIER * FIXED_SAMPLE_SIZE);
|
554
|
+
return sample_count + (FIXED_SAMPLE_SIZE_MULTIPLIER * MinValue<idx_t>(sample_count, FIXED_SAMPLE_SIZE));
|
555
555
|
}
|
556
556
|
|
557
557
|
idx_t ReservoirSample::FillReservoir(DataChunk &chunk) {
|
@@ -749,6 +749,7 @@ void ReservoirSample::AddToReservoir(DataChunk &chunk) {
|
|
749
749
|
|
750
750
|
if (chunk_sel.size == 0) {
|
751
751
|
// not adding any samples
|
752
|
+
base_reservoir_sample->num_entries_seen_total += chunk.size();
|
752
753
|
return;
|
753
754
|
}
|
754
755
|
idx_t size = chunk_sel.size;
|
@@ -65,6 +65,10 @@ static optional_ptr<CompressionFunction> LoadCompressionFunction(CompressionFunc
|
|
65
65
|
|
66
66
|
static void TryLoadCompression(DBConfig &config, vector<reference<CompressionFunction>> &result, CompressionType type,
|
67
67
|
const PhysicalType physical_type) {
|
68
|
+
if (config.options.disabled_compression_methods.find(type) != config.options.disabled_compression_methods.end()) {
|
69
|
+
// explicitly disabled
|
70
|
+
return;
|
71
|
+
}
|
68
72
|
auto function = config.GetCompressionFunction(type, physical_type);
|
69
73
|
if (!function) {
|
70
74
|
return;
|
@@ -457,7 +457,7 @@ unique_ptr<Expression> FunctionBinder::BindScalarFunction(ScalarFunction bound_f
|
|
457
457
|
std::move(children), std::move(bind_info), is_operator);
|
458
458
|
if (result_func->function.bind_expression) {
|
459
459
|
// if a bind_expression callback is registered - call it and emit the resulting expression
|
460
|
-
FunctionBindExpressionInput input(context, result_func->bind_info.get(),
|
460
|
+
FunctionBindExpressionInput input(context, result_func->bind_info.get(), result_func->children);
|
461
461
|
result = result_func->function.bind_expression(input);
|
462
462
|
}
|
463
463
|
if (!result) {
|
@@ -114,11 +114,11 @@ static void WriteLogValues(T &LogSource, LogLevel level, const string_t *data, c
|
|
114
114
|
const string &type) {
|
115
115
|
if (!type.empty()) {
|
116
116
|
for (idx_t i = 0; i < size; i++) {
|
117
|
-
|
117
|
+
DUCKDB_LOG(LogSource, type.c_str(), level, data[sel->get_index(i)]);
|
118
118
|
}
|
119
119
|
} else {
|
120
120
|
for (idx_t i = 0; i < size; i++) {
|
121
|
-
|
121
|
+
DUCKDB_LOG(LogSource, type.c_str(), level, data[sel->get_index(i)]);
|
122
122
|
}
|
123
123
|
}
|
124
124
|
}
|
@@ -56,7 +56,7 @@ void ArrowType::ThrowIfInvalid() const {
|
|
56
56
|
}
|
57
57
|
}
|
58
58
|
|
59
|
-
unique_ptr<ArrowType> ArrowType::GetTypeFromFormat(
|
59
|
+
unique_ptr<ArrowType> ArrowType::GetTypeFromFormat(string &format) {
|
60
60
|
if (format == "n") {
|
61
61
|
return make_uniq<ArrowType>(LogicalType::SQLNULL);
|
62
62
|
} else if (format == "b") {
|
@@ -179,6 +179,14 @@ unique_ptr<ArrowType> ArrowType::GetTypeFromFormat(DBConfig &config, ArrowSchema
|
|
179
179
|
}
|
180
180
|
return make_uniq<ArrowType>(LogicalType::TIMESTAMP_TZ, std::move(type_info));
|
181
181
|
}
|
182
|
+
return nullptr;
|
183
|
+
}
|
184
|
+
|
185
|
+
unique_ptr<ArrowType> ArrowType::GetTypeFromFormat(DBConfig &config, ArrowSchema &schema, string &format) {
|
186
|
+
auto type = GetTypeFromFormat(format);
|
187
|
+
if (type) {
|
188
|
+
return type;
|
189
|
+
}
|
182
190
|
if (format == "+l") {
|
183
191
|
return CreateListType(config, *schema.children[0], ArrowVariableSizeType::NORMAL, false);
|
184
192
|
} else if (format == "+L") {
|
@@ -361,8 +369,13 @@ unique_ptr<ArrowType> ArrowType::GetTypeFromSchema(DBConfig &config, ArrowSchema
|
|
361
369
|
auto arrow_type = GetTypeFromFormat(config, schema, format);
|
362
370
|
if (schema_metadata.HasExtension()) {
|
363
371
|
auto extension_info = schema_metadata.GetExtensionInfo(string(format));
|
364
|
-
|
372
|
+
if (config.HasArrowExtension(extension_info)) {
|
373
|
+
auto extension = config.GetArrowExtension(extension_info);
|
374
|
+
arrow_type = extension.GetType(schema, schema_metadata);
|
375
|
+
arrow_type->extension_data = extension.GetTypeExtension();
|
376
|
+
}
|
365
377
|
}
|
378
|
+
|
366
379
|
return arrow_type;
|
367
380
|
}
|
368
381
|
|
@@ -118,7 +118,8 @@ static void ColumnArrowToDuckDBRunEndEncoded(Vector &vector, const ArrowArray &a
|
|
118
118
|
|
119
119
|
static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size,
|
120
120
|
const ArrowType &arrow_type, int64_t nested_offset = -1,
|
121
|
-
ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0
|
121
|
+
ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0,
|
122
|
+
bool ignore_extensions = false);
|
122
123
|
|
123
124
|
static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state,
|
124
125
|
idx_t size, const ArrowType &arrow_type, int64_t nested_offset = -1,
|
@@ -765,17 +766,15 @@ static void ColumnArrowToDuckDBRunEndEncoded(Vector &vector, const ArrowArray &a
|
|
765
766
|
|
766
767
|
static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size,
|
767
768
|
const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask,
|
768
|
-
uint64_t parent_offset) {
|
769
|
+
uint64_t parent_offset, bool ignore_extensions) {
|
769
770
|
auto &scan_state = array_state.state;
|
770
771
|
D_ASSERT(!array.dictionary);
|
771
|
-
if (arrow_type.HasExtension()) {
|
772
|
+
if (!ignore_extensions && arrow_type.HasExtension()) {
|
772
773
|
if (arrow_type.extension_data->arrow_to_duckdb) {
|
773
|
-
//
|
774
|
+
// Convert the storage and then call the cast function
|
774
775
|
Vector input_data(arrow_type.extension_data->GetInternalType());
|
775
|
-
|
776
|
-
|
777
|
-
ColumnArrowToDuckDB(input_data, array, array_state, size, input_arrow_type, nested_offset, parent_mask,
|
778
|
-
parent_offset);
|
776
|
+
ColumnArrowToDuckDB(input_data, array, array_state, size, arrow_type, nested_offset, parent_mask,
|
777
|
+
parent_offset, /*ignore_extensions*/ true);
|
779
778
|
arrow_type.extension_data->arrow_to_duckdb(array_state.context, input_data, vector, size);
|
780
779
|
return;
|
781
780
|
}
|
@@ -1105,7 +1104,7 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
|
|
1105
1104
|
break;
|
1106
1105
|
case ArrowArrayPhysicalType::DEFAULT:
|
1107
1106
|
ColumnArrowToDuckDB(child_entry, child_array, child_state, size, child_type, nested_offset,
|
1108
|
-
&struct_validity_mask, NumericCast<uint64_t>(array.offset));
|
1107
|
+
&struct_validity_mask, NumericCast<uint64_t>(array.offset), false);
|
1109
1108
|
break;
|
1110
1109
|
default:
|
1111
1110
|
throw NotImplementedException("ArrowArrayPhysicalType not recognized");
|
@@ -1138,7 +1137,8 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArraySca
|
|
1138
1137
|
ColumnArrowToDuckDBRunEndEncoded(child, child_array, child_state, size, child_type);
|
1139
1138
|
break;
|
1140
1139
|
case ArrowArrayPhysicalType::DEFAULT:
|
1141
|
-
ColumnArrowToDuckDB(child, child_array, child_state, size, child_type, nested_offset, &validity_mask
|
1140
|
+
ColumnArrowToDuckDB(child, child_array, child_state, size, child_type, nested_offset, &validity_mask,
|
1141
|
+
false);
|
1142
1142
|
break;
|
1143
1143
|
default:
|
1144
1144
|
throw NotImplementedException("ArrowArrayPhysicalType not recognized");
|
@@ -97,7 +97,7 @@ void BaseCSVData::Finalize() {
|
|
97
97
|
const char escape = options.dialect_options.state_machine_options.escape.GetValue();
|
98
98
|
// Allow nullstr to be escape character + some non-special character, e.g., "\N" (MySQL default).
|
99
99
|
// In this case, only unquoted occurrences of the nullstr will be recognized as null values.
|
100
|
-
if (options.dialect_options.state_machine_options.
|
100
|
+
if (options.dialect_options.state_machine_options.strict_mode == false && null_str.size() == 2 &&
|
101
101
|
null_str[0] == escape && null_str[1] != '\0') {
|
102
102
|
continue;
|
103
103
|
}
|
@@ -371,7 +371,7 @@ static void WriteQuotedString(WriteStream &writer, WriteCSVData &csv_data, const
|
|
371
371
|
struct LocalWriteCSVData : public LocalFunctionData {
|
372
372
|
public:
|
373
373
|
LocalWriteCSVData(ClientContext &context, vector<unique_ptr<Expression>> &expressions)
|
374
|
-
: executor(context, expressions) {
|
374
|
+
: executor(context, expressions), stream(Allocator::Get(context)) {
|
375
375
|
}
|
376
376
|
|
377
377
|
public:
|
@@ -451,7 +451,7 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
|
|
451
451
|
}
|
452
452
|
|
453
453
|
if (!(options.dialect_options.header.IsSetByUser() && !options.dialect_options.header.GetValue())) {
|
454
|
-
MemoryStream stream;
|
454
|
+
MemoryStream stream(Allocator::Get(context));
|
455
455
|
// write the header line to the file
|
456
456
|
for (idx_t i = 0; i < csv_data.options.name_list.size(); i++) {
|
457
457
|
if (i != 0) {
|
@@ -554,7 +554,7 @@ void WriteCSVFinalize(ClientContext &context, FunctionData &bind_data, GlobalFun
|
|
554
554
|
auto &csv_data = bind_data.Cast<WriteCSVData>();
|
555
555
|
auto &options = csv_data.options;
|
556
556
|
|
557
|
-
MemoryStream stream;
|
557
|
+
MemoryStream stream(Allocator::Get(context));
|
558
558
|
if (!options.suffix.empty()) {
|
559
559
|
stream.WriteData(const_data_ptr_cast(options.suffix.c_str()), options.suffix.size());
|
560
560
|
} else if (global_state.written_anything) {
|
@@ -582,6 +582,9 @@ CopyFunctionExecutionMode WriteCSVExecutionMode(bool preserve_insertion_order, b
|
|
582
582
|
// Prepare Batch
|
583
583
|
//===--------------------------------------------------------------------===//
|
584
584
|
struct WriteCSVBatchData : public PreparedBatchData {
|
585
|
+
explicit WriteCSVBatchData(Allocator &allocator) : stream(allocator) {
|
586
|
+
}
|
587
|
+
|
585
588
|
//! The thread-local buffer to write data into
|
586
589
|
MemoryStream stream;
|
587
590
|
};
|
@@ -603,7 +606,7 @@ unique_ptr<PreparedBatchData> WriteCSVPrepareBatch(ClientContext &context, Funct
|
|
603
606
|
|
604
607
|
// write CSV chunks to the batch data
|
605
608
|
bool written_anything = false;
|
606
|
-
auto batch = make_uniq<WriteCSVBatchData>();
|
609
|
+
auto batch = make_uniq<WriteCSVBatchData>(Allocator::Get(context));
|
607
610
|
for (auto &chunk : collection->Chunks()) {
|
608
611
|
WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->stream, chunk, written_anything, executor);
|
609
612
|
}
|
@@ -62,6 +62,8 @@ void SchemaDiscovery(ClientContext &context, ReadCSVData &result, CSVReaderOptio
|
|
62
62
|
options.file_path = file_paths[current_file];
|
63
63
|
|
64
64
|
result.buffer_manager = make_shared_ptr<CSVBufferManager>(context, options, options.file_path, 0, false);
|
65
|
+
idx_t only_header_or_empty_files = 0;
|
66
|
+
|
65
67
|
{
|
66
68
|
CSVSniffer sniffer(options, result.buffer_manager, CSVStateMachineCache::Get(context));
|
67
69
|
auto sniffer_result = sniffer.SniffCSV();
|
@@ -71,14 +73,17 @@ void SchemaDiscovery(ClientContext &context, ReadCSVData &result, CSVReaderOptio
|
|
71
73
|
schemas.emplace_back(sniffer_result.names, sniffer_result.return_types, file_paths[0], rows_read,
|
72
74
|
result.buffer_manager->GetBuffer(0)->actual_size == 0);
|
73
75
|
total_number_of_rows += sniffer.LinesSniffed();
|
76
|
+
current_file++;
|
77
|
+
if (sniffer.EmptyOrOnlyHeader()) {
|
78
|
+
only_header_or_empty_files++;
|
79
|
+
}
|
74
80
|
}
|
75
81
|
|
76
82
|
// We do a copy of the options to not pollute the options of the first file.
|
77
83
|
constexpr idx_t max_files_to_sniff = 10;
|
78
84
|
idx_t files_to_sniff = file_paths.size() > max_files_to_sniff ? max_files_to_sniff : file_paths.size();
|
79
|
-
while (total_number_of_rows < required_number_of_lines && current_file
|
85
|
+
while (total_number_of_rows < required_number_of_lines && current_file < files_to_sniff) {
|
80
86
|
auto option_copy = option_og;
|
81
|
-
current_file++;
|
82
87
|
option_copy.file_path = file_paths[current_file];
|
83
88
|
auto buffer_manager =
|
84
89
|
make_shared_ptr<CSVBufferManager>(context, option_copy, option_copy.file_path, current_file, false);
|
@@ -94,6 +99,10 @@ void SchemaDiscovery(ClientContext &context, ReadCSVData &result, CSVReaderOptio
|
|
94
99
|
schemas.emplace_back(sniffer_result.names, sniffer_result.return_types, option_copy.file_path, rows_read);
|
95
100
|
}
|
96
101
|
total_number_of_rows += sniffer.LinesSniffed();
|
102
|
+
if (sniffer.EmptyOrOnlyHeader()) {
|
103
|
+
only_header_or_empty_files++;
|
104
|
+
}
|
105
|
+
current_file++;
|
97
106
|
}
|
98
107
|
|
99
108
|
// We might now have multiple schemas, we need to go through them to define the one true schema
|
@@ -115,6 +124,13 @@ void SchemaDiscovery(ClientContext &context, ReadCSVData &result, CSVReaderOptio
|
|
115
124
|
names = best_schema.GetNames();
|
116
125
|
return_types = best_schema.GetTypes();
|
117
126
|
}
|
127
|
+
if (only_header_or_empty_files == current_file && !options.columns_set) {
|
128
|
+
for (auto &type : return_types) {
|
129
|
+
D_ASSERT(type.id() == LogicalTypeId::BOOLEAN);
|
130
|
+
// we default to varchar if all files are empty or only have a header after all the sniffing
|
131
|
+
type = LogicalType::VARCHAR;
|
132
|
+
}
|
133
|
+
}
|
118
134
|
result.csv_types = return_types;
|
119
135
|
result.csv_names = names;
|
120
136
|
}
|
@@ -334,7 +350,7 @@ void ReadCSVTableFunction::ReadCSVAddNamedParameters(TableFunction &table_functi
|
|
334
350
|
table_function.named_parameters["column_names"] = LogicalType::LIST(LogicalType::VARCHAR);
|
335
351
|
table_function.named_parameters["comment"] = LogicalType::VARCHAR;
|
336
352
|
table_function.named_parameters["encoding"] = LogicalType::VARCHAR;
|
337
|
-
table_function.named_parameters["
|
353
|
+
table_function.named_parameters["strict_mode"] = LogicalType::BOOLEAN;
|
338
354
|
|
339
355
|
MultiFileReader::AddParameters(table_function);
|
340
356
|
}
|
@@ -358,7 +374,8 @@ void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionD
|
|
358
374
|
MultiFileReader().ComplexFilterPushdown(context, file_list, data.options.file_options, info, filters);
|
359
375
|
if (filtered_list) {
|
360
376
|
data.files = filtered_list->GetAllFiles();
|
361
|
-
|
377
|
+
SimpleMultiFileList simple_filtered_list(data.files);
|
378
|
+
MultiFileReader::PruneReaders(data, simple_filtered_list);
|
362
379
|
} else {
|
363
380
|
data.files = file_list.GetAllFiles();
|
364
381
|
}
|
@@ -152,6 +152,13 @@ static void CSVSniffFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
152
152
|
}
|
153
153
|
CSVSniffer sniffer(sniffer_options, buffer_manager, CSVStateMachineCache::Get(context));
|
154
154
|
auto sniffer_result = sniffer.SniffCSV(data.force_match);
|
155
|
+
if (sniffer.EmptyOrOnlyHeader()) {
|
156
|
+
for (auto &type : sniffer_result.return_types) {
|
157
|
+
D_ASSERT(type.id() == LogicalTypeId::BOOLEAN);
|
158
|
+
// we default to varchar if all files are empty or only have a header after all the sniffing
|
159
|
+
type = LogicalType::VARCHAR;
|
160
|
+
}
|
161
|
+
}
|
155
162
|
string str_opt;
|
156
163
|
string separator = ", ";
|
157
164
|
// Set output
|