duckdb 1.3.1-dev6.0 → 1.3.2-dev0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/core_functions/aggregate/distributive/arg_min_max.cpp +27 -39
- package/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp +2 -3
- package/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_sort_tree.hpp +1 -1
- package/src/duckdb/extension/core_functions/lambda_functions.cpp +16 -14
- package/src/duckdb/extension/core_functions/scalar/list/list_filter.cpp +3 -2
- package/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp +46 -10
- package/src/duckdb/extension/core_functions/scalar/list/list_transform.cpp +3 -2
- package/src/duckdb/extension/core_functions/scalar/random/random.cpp +3 -1
- package/src/duckdb/extension/icu/icu-datefunc.cpp +5 -3
- package/src/duckdb/extension/icu/icu-strptime.cpp +6 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +4 -0
- package/src/duckdb/extension/icu/icu_extension.cpp +7 -2
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +1 -1
- package/src/duckdb/extension/icu/include/icu-helpers.hpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uloc.cpp +5 -5
- package/src/duckdb/extension/json/include/json_common.hpp +19 -0
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -4
- package/src/duckdb/extension/json/include/json_functions.hpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +38 -17
- package/src/duckdb/extension/json/json_functions/json_table_in_out.cpp +11 -7
- package/src/duckdb/extension/json/json_functions.cpp +4 -4
- package/src/duckdb/extension/json/json_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +7 -1
- package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +2 -1
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +3 -0
- package/src/duckdb/extension/parquet/include/writer/parquet_write_operators.hpp +3 -1
- package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_crypto.cpp +9 -5
- package/src/duckdb/extension/parquet/parquet_extension.cpp +26 -0
- package/src/duckdb/extension/parquet/parquet_float16.cpp +4 -2
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +3 -3
- package/src/duckdb/extension/parquet/parquet_multi_file_info.cpp +12 -0
- package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -4
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +13 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
- package/src/duckdb/extension/parquet/reader/decimal_column_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/reader/struct_column_reader.cpp +13 -4
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +2 -0
- package/src/duckdb/src/catalog/catalog.cpp +10 -4
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +4 -10
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -2
- package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +7 -1
- package/src/duckdb/src/catalog/catalog_set.cpp +21 -1
- package/src/duckdb/src/common/adbc/adbc.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +17 -5
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +23 -15
- package/src/duckdb/src/common/box_renderer.cpp +1 -2
- package/src/duckdb/src/common/enum_util.cpp +4 -3
- package/src/duckdb/src/common/local_file_system.cpp +13 -12
- package/src/duckdb/src/common/multi_file/multi_file_column_mapper.cpp +35 -12
- package/src/duckdb/src/common/multi_file/multi_file_reader.cpp +13 -3
- package/src/duckdb/src/common/string_util.cpp +7 -5
- package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +4 -4
- package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +4 -4
- package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +4 -4
- package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +4 -4
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +1 -1
- package/src/duckdb/src/common/types/uuid.cpp +5 -1
- package/src/duckdb/src/common/types.cpp +28 -0
- package/src/duckdb/src/common/virtual_file_system.cpp +5 -0
- package/src/duckdb/src/execution/column_binding_resolver.cpp +49 -30
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +4 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +10 -7
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +2 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +1 -4
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +53 -1
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +58 -59
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +10 -5
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +4 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +18 -8
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +1 -0
- package/src/duckdb/src/execution/physical_plan_generator.cpp +5 -5
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +2 -1
- package/src/duckdb/src/function/function.cpp +4 -0
- package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +6 -0
- package/src/duckdb/src/function/scalar/struct/remap_struct.cpp +10 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +1 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -9
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_column_mapper.hpp +3 -5
- package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_reader.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/shadow_forbidden_functions.hpp +40 -0
- package/src/duckdb/src/include/duckdb/common/string.hpp +25 -2
- package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +20 -24
- package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +20 -24
- package/src/duckdb/src/include/duckdb/common/types.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +34 -8
- package/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +15 -3
- package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +7 -3
- package/src/duckdb/src/include/duckdb/function/function.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +20 -12
- package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +4 -3
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +3 -1
- package/src/duckdb/src/include/duckdb/logging/log_type.hpp +17 -0
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +22 -6
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +27 -13
- package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +11 -0
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +7 -1
- package/src/duckdb/src/include/duckdb/original/std/locale.hpp +10 -0
- package/src/duckdb/src/include/duckdb/original/std/memory.hpp +12 -0
- package/src/duckdb/src/include/duckdb/original/std/sstream.hpp +11 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +5 -3
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +4 -2
- package/src/duckdb/src/logging/log_manager.cpp +1 -0
- package/src/duckdb/src/logging/log_types.cpp +40 -0
- package/src/duckdb/src/main/attached_database.cpp +4 -0
- package/src/duckdb/src/main/client_context.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +1 -0
- package/src/duckdb/src/main/database.cpp +1 -0
- package/src/duckdb/src/main/database_manager.cpp +19 -2
- package/src/duckdb/src/main/extension/extension_helper.cpp +4 -3
- package/src/duckdb/src/main/query_profiler.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +1 -1
- package/src/duckdb/src/main/secret/secret_manager.cpp +2 -0
- package/src/duckdb/src/main/settings/autogenerated_settings.cpp +7 -0
- package/src/duckdb/src/main/settings/custom_settings.cpp +106 -34
- package/src/duckdb/src/optimizer/optimizer.cpp +1 -1
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +18 -8
- package/src/duckdb/src/parallel/executor.cpp +5 -0
- package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +5 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +21 -24
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -8
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +3 -2
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +0 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +3 -0
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +3 -0
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +3 -0
- package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +0 -1
- package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder.cpp +4 -2
- package/src/duckdb/src/planner/logical_operator.cpp +2 -1
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +4 -1
- package/src/duckdb/src/storage/buffer/block_handle.cpp +8 -0
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +44 -18
- package/src/duckdb/src/storage/caching_file_system.cpp +7 -7
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +4 -3
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/wal_replay.cpp +9 -4
- package/src/duckdb/third_party/fmt/include/fmt/format.h +8 -1
- package/src/duckdb/third_party/fsst/libfsst.cpp +4 -3
- package/src/duckdb/third_party/httplib/httplib.hpp +25 -22
- package/src/duckdb/third_party/hyperloglog/sds.cpp +7 -3
- package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +8 -1
- package/src/duckdb/third_party/re2/re2/filtered_re2.h +8 -2
- package/src/duckdb/third_party/re2/re2/pod_array.h +7 -1
- package/src/duckdb/third_party/re2/re2/re2.cc +6 -2
- package/src/duckdb/third_party/re2/re2/set.cc +1 -1
- package/src/duckdb/third_party/re2/re2/set.h +7 -1
- package/src/duckdb/ub_src_logging.cpp +4 -4
@@ -173,13 +173,13 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
|
|
173
173
|
}
|
174
174
|
|
175
175
|
// Returns true if a comment is acceptable
|
176
|
-
bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols,
|
177
|
-
if (
|
176
|
+
bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, const CSVReaderOptions &options) {
|
177
|
+
if (options.dialect_options.state_machine_options.comment.IsSetByUser()) {
|
178
178
|
return true;
|
179
179
|
}
|
180
180
|
// For a comment to be acceptable, we want 3/5th's the majority of unmatched in the columns
|
181
181
|
constexpr double min_majority = 0.6;
|
182
|
-
// detected comments
|
182
|
+
// detected comments are all lines that started with a comment character.
|
183
183
|
double detected_comments = 0;
|
184
184
|
// If at least one comment is a full line comment
|
185
185
|
bool has_full_line_comment = false;
|
@@ -192,7 +192,9 @@ bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool
|
|
192
192
|
has_full_line_comment = true;
|
193
193
|
valid_comments++;
|
194
194
|
}
|
195
|
-
if (result.column_counts[i].number_of_columns == num_cols
|
195
|
+
if ((result.column_counts[i].number_of_columns == num_cols ||
|
196
|
+
(result.column_counts[i].number_of_columns <= num_cols && options.null_padding)) &&
|
197
|
+
result.column_counts[i].is_mid_comment) {
|
196
198
|
valid_comments++;
|
197
199
|
}
|
198
200
|
}
|
@@ -212,13 +214,13 @@ bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool
|
|
212
214
|
return valid_comments / detected_comments >= min_majority;
|
213
215
|
}
|
214
216
|
|
215
|
-
void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
216
|
-
|
217
|
-
idx_t &min_ignored_rows) {
|
217
|
+
void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner, CandidateStats &stats,
|
218
|
+
vector<unique_ptr<ColumnCountScanner>> &successful_candidates) {
|
218
219
|
// The sniffed_column_counts variable keeps track of the number of columns found for each row
|
219
220
|
auto &sniffed_column_counts = scanner->ParseChunk();
|
220
221
|
idx_t dirty_notes = 0;
|
221
222
|
idx_t dirty_notes_minus_comments = 0;
|
223
|
+
idx_t empty_lines = 0;
|
222
224
|
if (sniffed_column_counts.error) {
|
223
225
|
if (!scanner->error_handler->HasError(MAXIMUM_LINE_SIZE)) {
|
224
226
|
all_fail_max_line_size = false;
|
@@ -232,7 +234,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
232
234
|
idx_t consistent_rows = 0;
|
233
235
|
idx_t num_cols = sniffed_column_counts.result_position == 0 ? 1 : sniffed_column_counts[0].number_of_columns;
|
234
236
|
const bool ignore_errors = options.ignore_errors.GetValue();
|
235
|
-
// If we are ignoring errors and not null_padding
|
237
|
+
// If we are ignoring errors and not null_padding, we pick the most frequent number of columns as the right one
|
236
238
|
const bool use_most_frequent_columns = ignore_errors && !options.null_padding;
|
237
239
|
if (use_most_frequent_columns) {
|
238
240
|
num_cols = sniffed_column_counts.GetMostFrequentColumnCount();
|
@@ -242,8 +244,8 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
242
244
|
idx_t ignored_rows = 0;
|
243
245
|
const bool allow_padding = options.null_padding;
|
244
246
|
bool first_valid = false;
|
245
|
-
if (sniffed_column_counts.result_position > rows_read) {
|
246
|
-
rows_read = sniffed_column_counts.result_position;
|
247
|
+
if (sniffed_column_counts.result_position > stats.rows_read) {
|
248
|
+
stats.rows_read = sniffed_column_counts.result_position;
|
247
249
|
}
|
248
250
|
if (set_columns.IsCandidateUnacceptable(num_cols, options.null_padding, ignore_errors,
|
249
251
|
sniffed_column_counts[0].last_value_always_empty)) {
|
@@ -279,9 +281,10 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
279
281
|
sniffed_column_counts.state_machine.dialect_options.rows_until_header = row;
|
280
282
|
}
|
281
283
|
padding_count = 0;
|
282
|
-
// we use the maximum
|
284
|
+
// we use the maximum number of num_cols that we find
|
283
285
|
num_cols = sniffed_column_counts[row].number_of_columns;
|
284
|
-
dirty_notes = row;
|
286
|
+
dirty_notes = row + sniffed_column_counts[row].empty_lines;
|
287
|
+
empty_lines = sniffed_column_counts[row].empty_lines;
|
285
288
|
dirty_notes_minus_comments = dirty_notes - comment_rows;
|
286
289
|
header_idx = row;
|
287
290
|
consistent_rows = 1;
|
@@ -289,7 +292,8 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
289
292
|
if (!first_valid) {
|
290
293
|
first_valid = true;
|
291
294
|
sniffed_column_counts.state_machine.dialect_options.rows_until_header = row;
|
292
|
-
dirty_notes = row;
|
295
|
+
dirty_notes = row + sniffed_column_counts[row].empty_lines;
|
296
|
+
empty_lines = sniffed_column_counts[row].empty_lines;
|
293
297
|
dirty_notes_minus_comments = dirty_notes - comment_rows;
|
294
298
|
num_cols = sniffed_column_counts[row].number_of_columns;
|
295
299
|
}
|
@@ -311,24 +315,26 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
311
315
|
consistent_rows += padding_count;
|
312
316
|
|
313
317
|
// Whether there are more values (rows) available that are consistent, exceeding the current best.
|
314
|
-
const bool more_values = consistent_rows > best_consistent_rows && num_cols >= max_columns_found;
|
318
|
+
const bool more_values = consistent_rows > stats.best_consistent_rows && num_cols >= max_columns_found;
|
315
319
|
|
316
|
-
const bool more_columns = consistent_rows == best_consistent_rows && num_cols > max_columns_found;
|
320
|
+
const bool more_columns = consistent_rows == stats.best_consistent_rows && num_cols > max_columns_found;
|
317
321
|
|
318
322
|
// If additional padding is required when compared to the previous padding count.
|
319
|
-
const bool require_more_padding = padding_count > prev_padding_count;
|
323
|
+
const bool require_more_padding = padding_count > stats.prev_padding_count;
|
320
324
|
|
321
325
|
// If less padding is now required when compared to the previous padding count.
|
322
|
-
const bool require_less_padding = padding_count < prev_padding_count;
|
326
|
+
const bool require_less_padding = padding_count < stats.prev_padding_count;
|
323
327
|
|
324
328
|
// If there was only a single column before, and the new number of columns exceeds that.
|
325
|
-
const bool single_column_before =
|
329
|
+
const bool single_column_before =
|
330
|
+
max_columns_found < 2 && num_cols > max_columns_found * successful_candidates.size();
|
326
331
|
|
327
332
|
// If the number of rows is consistent with the calculated value after accounting for skipped rows and the
|
328
333
|
// start row.
|
329
|
-
const bool rows_consistent =
|
330
|
-
|
331
|
-
|
334
|
+
const bool rows_consistent = consistent_rows +
|
335
|
+
(dirty_notes_minus_comments - options.dialect_options.skip_rows.GetValue()) +
|
336
|
+
comment_rows - empty_lines ==
|
337
|
+
sniffed_column_counts.result_position - options.dialect_options.skip_rows.GetValue();
|
332
338
|
// If there are more than one consistent row.
|
333
339
|
const bool more_than_one_row = consistent_rows > 1;
|
334
340
|
|
@@ -336,14 +342,14 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
336
342
|
const bool more_than_one_column = num_cols > 1;
|
337
343
|
|
338
344
|
// If the start position is valid.
|
339
|
-
const bool start_good =
|
340
|
-
|
345
|
+
const bool start_good =
|
346
|
+
!successful_candidates.empty() &&
|
347
|
+
dirty_notes <= successful_candidates.front()->GetStateMachine().dialect_options.skip_rows.GetValue();
|
341
348
|
|
342
349
|
// If padding happened but it is not allowed.
|
343
350
|
const bool invalid_padding = !allow_padding && padding_count > 0;
|
344
351
|
|
345
|
-
const bool comments_are_acceptable = AreCommentsAcceptable(
|
346
|
-
sniffed_column_counts, num_cols, options.dialect_options.state_machine_options.comment.IsSetByUser());
|
352
|
+
const bool comments_are_acceptable = AreCommentsAcceptable(sniffed_column_counts, num_cols, options);
|
347
353
|
|
348
354
|
const bool quoted =
|
349
355
|
scanner->ever_quoted &&
|
@@ -360,44 +366,44 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
360
366
|
// If rows are consistent and no invalid padding happens, this is the best suitable candidate if one of the
|
361
367
|
// following is valid:
|
362
368
|
// - There's a single column before.
|
363
|
-
// - There are more values and no additional padding is required.
|
369
|
+
// - There are more values, and no additional padding is required.
|
364
370
|
// - There's more than one column and less padding is required.
|
365
371
|
if (columns_match_set && (rows_consistent || (set_columns.IsSet() && ignore_errors)) &&
|
366
372
|
(single_column_before || ((more_values || more_columns) && !require_more_padding) ||
|
367
|
-
(more_than_one_column && require_less_padding) || quoted) &&
|
373
|
+
(more_than_one_column && require_less_padding) || (quoted && comment_rows == 0)) &&
|
368
374
|
!invalid_padding && comments_are_acceptable) {
|
369
|
-
if (!
|
370
|
-
consistent_rows <= best_consistent_rows) {
|
375
|
+
if (!successful_candidates.empty() && set_columns.IsSet() && max_columns_found == set_columns.Size() &&
|
376
|
+
consistent_rows <= stats.best_consistent_rows) {
|
371
377
|
// We have a candidate that fits our requirements better
|
372
|
-
if (
|
378
|
+
if (successful_candidates.front()->ever_quoted || !scanner->ever_quoted) {
|
373
379
|
return;
|
374
380
|
}
|
375
381
|
}
|
376
382
|
auto &sniffing_state_machine = scanner->GetStateMachine();
|
377
383
|
|
378
|
-
if (!
|
384
|
+
if (!successful_candidates.empty() && successful_candidates.front()->ever_quoted) {
|
379
385
|
// Give preference to quoted boys.
|
380
386
|
if (!scanner->ever_quoted) {
|
381
387
|
return;
|
382
388
|
} else {
|
383
389
|
// Give preference to one that got escaped
|
384
|
-
if (!scanner->ever_escaped &&
|
390
|
+
if (!scanner->ever_escaped && successful_candidates.front()->ever_escaped &&
|
385
391
|
sniffing_state_machine.dialect_options.state_machine_options.strict_mode.GetValue()) {
|
386
392
|
return;
|
387
393
|
}
|
388
|
-
if (best_consistent_rows == consistent_rows && num_cols >= max_columns_found) {
|
394
|
+
if (stats.best_consistent_rows == consistent_rows && num_cols >= max_columns_found) {
|
389
395
|
// If both have not been escaped, this might get solved later on.
|
390
396
|
sniffing_state_machine.dialect_options.num_cols = num_cols;
|
391
|
-
|
397
|
+
successful_candidates.emplace_back(std::move(scanner));
|
392
398
|
max_columns_found = num_cols;
|
393
399
|
return;
|
394
400
|
}
|
395
401
|
}
|
396
402
|
}
|
397
|
-
if (max_columns_found == num_cols && (ignored_rows > min_ignored_rows)) {
|
403
|
+
if (max_columns_found == num_cols && (ignored_rows > stats.min_ignored_rows)) {
|
398
404
|
return;
|
399
405
|
}
|
400
|
-
if (max_columns_found > 1 && num_cols > max_columns_found && consistent_rows < best_consistent_rows / 2 &&
|
406
|
+
if (max_columns_found > 1 && num_cols > max_columns_found && consistent_rows < stats.best_consistent_rows / 2 &&
|
401
407
|
(options.null_padding || ignore_errors)) {
|
402
408
|
// When null_padding is true, we only give preference to a max number of columns if null padding is at least
|
403
409
|
// 50% as consistent as the best case scenario
|
@@ -406,39 +412,40 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
406
412
|
if (quoted && num_cols < max_columns_found) {
|
407
413
|
if (scanner->ever_escaped &&
|
408
414
|
sniffing_state_machine.dialect_options.state_machine_options.strict_mode.GetValue()) {
|
409
|
-
for (auto &candidate :
|
415
|
+
for (auto &candidate : successful_candidates) {
|
410
416
|
if (candidate->ever_quoted && candidate->ever_escaped) {
|
411
417
|
return;
|
412
418
|
}
|
413
419
|
}
|
414
420
|
|
415
421
|
} else {
|
416
|
-
for (auto &candidate :
|
422
|
+
for (auto &candidate : successful_candidates) {
|
417
423
|
if (candidate->ever_quoted) {
|
418
424
|
return;
|
419
425
|
}
|
420
426
|
}
|
421
427
|
}
|
422
428
|
}
|
423
|
-
best_consistent_rows = consistent_rows;
|
429
|
+
stats.best_consistent_rows = consistent_rows;
|
424
430
|
max_columns_found = num_cols;
|
425
|
-
prev_padding_count = padding_count;
|
426
|
-
min_ignored_rows = ignored_rows;
|
431
|
+
stats.prev_padding_count = padding_count;
|
432
|
+
stats.min_ignored_rows = ignored_rows;
|
427
433
|
|
428
434
|
if (options.dialect_options.skip_rows.IsSetByUser()) {
|
429
|
-
// If skip rows
|
435
|
+
// If skip rows are set by the user, and we found dirty notes, we only accept it if either null_padding or
|
430
436
|
// ignore_errors is set we have comments
|
431
|
-
if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue() &&
|
437
|
+
if (dirty_notes - empty_lines != 0 && !options.null_padding && !options.ignore_errors.GetValue() &&
|
438
|
+
comment_rows == 0) {
|
432
439
|
return;
|
433
440
|
}
|
434
441
|
sniffing_state_machine.dialect_options.skip_rows = options.dialect_options.skip_rows.GetValue();
|
435
442
|
} else if (!options.null_padding) {
|
436
443
|
sniffing_state_machine.dialect_options.skip_rows = dirty_notes_minus_comments;
|
437
444
|
}
|
438
|
-
|
445
|
+
successful_candidates.clear();
|
439
446
|
sniffing_state_machine.dialect_options.num_cols = num_cols;
|
440
447
|
lines_sniffed = sniffed_column_counts.result_position;
|
441
|
-
|
448
|
+
successful_candidates.emplace_back(std::move(scanner));
|
442
449
|
return;
|
443
450
|
}
|
444
451
|
// If there's more than one row and column, the start is good, rows are consistent,
|
@@ -449,7 +456,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
449
456
|
auto &sniffing_state_machine = scanner->GetStateMachine();
|
450
457
|
|
451
458
|
if (options.dialect_options.skip_rows.IsSetByUser()) {
|
452
|
-
// If skip rows
|
459
|
+
// If skip rows are set by the user, and we found dirty notes, we only accept it if either null_padding or
|
453
460
|
// ignore_errors is set
|
454
461
|
if (dirty_notes != 0 && !options.null_padding && !options.ignore_errors.GetValue()) {
|
455
462
|
return;
|
@@ -460,7 +467,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
460
467
|
}
|
461
468
|
sniffing_state_machine.dialect_options.num_cols = num_cols;
|
462
469
|
lines_sniffed = sniffed_column_counts.result_position;
|
463
|
-
|
470
|
+
successful_candidates.emplace_back(std::move(scanner));
|
464
471
|
}
|
465
472
|
}
|
466
473
|
|
@@ -481,8 +488,8 @@ bool CSVSniffer::RefineCandidateNextChunk(ColumnCountScanner &candidate) const {
|
|
481
488
|
}
|
482
489
|
|
483
490
|
void CSVSniffer::RefineCandidates() {
|
484
|
-
// It's very frequent that more than one dialect can parse a csv file
|
485
|
-
// fully on the whole sample dataset, when/if it fails we go to the next one.
|
491
|
+
// It's very frequent that more than one dialect can parse a csv file; hence here we run one state machine
|
492
|
+
// fully on the whole sample dataset, when/if it fails, we go to the next one.
|
486
493
|
if (candidates.empty()) {
|
487
494
|
// No candidates to refine
|
488
495
|
return;
|
@@ -587,22 +594,14 @@ NewLineIdentifier CSVSniffer::DetectNewLineDelimiter(CSVBufferManager &buffer_ma
|
|
587
594
|
void CSVSniffer::DetectDialect() {
|
588
595
|
// Variables for Dialect Detection
|
589
596
|
DialectCandidates dialect_candidates(options.dialect_options.state_machine_options);
|
590
|
-
|
591
|
-
idx_t rows_read = 0;
|
592
|
-
// Best Number of consistent rows (i.e., presenting all columns)
|
593
|
-
idx_t best_consistent_rows = 0;
|
594
|
-
// If padding was necessary (i.e., rows are missing some columns, how many)
|
595
|
-
idx_t prev_padding_count = 0;
|
596
|
-
// Min number of ignores rows
|
597
|
-
idx_t best_ignored_rows = 0;
|
597
|
+
CandidateStats stats;
|
598
598
|
// Vector of CSV State Machines
|
599
599
|
vector<unique_ptr<ColumnCountScanner>> csv_state_machines;
|
600
600
|
// Step 1: Generate state machines
|
601
601
|
GenerateStateMachineSearchSpace(csv_state_machines, dialect_candidates);
|
602
602
|
// Step 2: Analyze all candidates on the first chunk
|
603
603
|
for (auto &state_machine : csv_state_machines) {
|
604
|
-
AnalyzeDialectCandidate(std::move(state_machine),
|
605
|
-
best_ignored_rows);
|
604
|
+
AnalyzeDialectCandidate(std::move(state_machine), stats, candidates);
|
606
605
|
}
|
607
606
|
// Step 3: Loop over candidates and find if they can still produce good results for the remaining chunks
|
608
607
|
RefineCandidates();
|
@@ -17,14 +17,11 @@
|
|
17
17
|
#include "duckdb/parallel/thread_context.hpp"
|
18
18
|
#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
|
19
19
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
20
|
-
#include "duckdb/planner/filter/conjunction_filter.hpp"
|
21
20
|
#include "duckdb/planner/filter/constant_filter.hpp"
|
22
21
|
#include "duckdb/planner/filter/in_filter.hpp"
|
23
|
-
#include "duckdb/planner/filter/null_filter.hpp"
|
24
22
|
#include "duckdb/planner/filter/optional_filter.hpp"
|
25
23
|
#include "duckdb/planner/table_filter.hpp"
|
26
24
|
#include "duckdb/storage/buffer_manager.hpp"
|
27
|
-
#include "duckdb/storage/storage_manager.hpp"
|
28
25
|
#include "duckdb/storage/temporary_memory_manager.hpp"
|
29
26
|
|
30
27
|
namespace duckdb {
|
@@ -249,7 +246,7 @@ public:
|
|
249
246
|
};
|
250
247
|
|
251
248
|
unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &context) const {
|
252
|
-
auto result = make_uniq<JoinHashTable>(context, conditions, payload_columns.col_types, join_type,
|
249
|
+
auto result = make_uniq<JoinHashTable>(context, *this, conditions, payload_columns.col_types, join_type,
|
253
250
|
rhs_output_columns.col_idxs);
|
254
251
|
if (!delim_types.empty() && join_type == JoinType::MARK) {
|
255
252
|
// correlated MARK join
|
@@ -779,7 +776,9 @@ unique_ptr<DataChunk> JoinFilterPushdownInfo::Finalize(ClientContext &context, o
|
|
779
776
|
continue;
|
780
777
|
}
|
781
778
|
// if the HT is small we can generate a complete "OR" filter
|
782
|
-
|
779
|
+
// but only if the join condition is equality.
|
780
|
+
if (ht && ht->Count() > 1 && ht->Count() <= dynamic_or_filter_threshold &&
|
781
|
+
cmp == ExpressionType::COMPARE_EQUAL) {
|
783
782
|
PushInFilter(info, *ht, op, filter_idx, filter_col_idx);
|
784
783
|
}
|
785
784
|
|
@@ -852,6 +851,8 @@ SinkFinalizeType PhysicalHashJoin::Finalize(Pipeline &pipeline, Event &event, Cl
|
|
852
851
|
sink.external = false;
|
853
852
|
}
|
854
853
|
}
|
854
|
+
DUCKDB_LOG(context, PhysicalOperatorLogType, *this, "PhysicalHashJoin", "Finalize",
|
855
|
+
{{"external", to_string(sink.external)}});
|
855
856
|
if (sink.external) {
|
856
857
|
// External Hash Join
|
857
858
|
sink.perfect_join_executor.reset();
|
@@ -862,8 +863,12 @@ SinkFinalizeType PhysicalHashJoin::Finalize(Pipeline &pipeline, Event &event, Cl
|
|
862
863
|
if (!very_very_skewed &&
|
863
864
|
(max_partition_ht_size + sink.probe_side_requirement) > sink.temporary_memory_state->GetReservation()) {
|
864
865
|
// We have to repartition
|
866
|
+
const auto radix_bits_before = ht.GetRadixBits();
|
865
867
|
ht.SetRepartitionRadixBits(sink.temporary_memory_state->GetReservation(), sink.max_partition_size,
|
866
868
|
sink.max_partition_count);
|
869
|
+
DUCKDB_LOG(context, PhysicalOperatorLogType, *this, "PhysicalHashJoin", "Repartition",
|
870
|
+
{{"partitions_before", to_string(RadixPartitioning::NumberOfPartitions(radix_bits_before))},
|
871
|
+
{"partitions_after", to_string(RadixPartitioning::NumberOfPartitions(ht.GetRadixBits()))}});
|
867
872
|
auto new_event = make_shared_ptr<HashJoinRepartitionEvent>(pipeline, *this, sink, sink.local_hash_tables);
|
868
873
|
event.InsertEvent(std::move(new_event));
|
869
874
|
} else {
|
@@ -112,12 +112,16 @@ public:
|
|
112
112
|
}
|
113
113
|
// initialize writing to the file
|
114
114
|
global_state = op.function.copy_to_initialize_global(context, *op.bind_data, op.file_path);
|
115
|
+
if (op.function.initialize_operator) {
|
116
|
+
op.function.initialize_operator(*global_state, op);
|
117
|
+
}
|
115
118
|
if (op.return_type == CopyFunctionReturnType::WRITTEN_FILE_STATISTICS) {
|
116
119
|
written_file_info = make_uniq<CopyToFileInfo>(op.file_path);
|
117
120
|
written_file_info->file_stats = make_uniq<CopyFunctionFileStatistics>();
|
118
121
|
op.function.copy_to_get_written_statistics(context, *op.bind_data, *global_state,
|
119
122
|
*written_file_info->file_stats);
|
120
123
|
}
|
124
|
+
initialized = true;
|
121
125
|
}
|
122
126
|
|
123
127
|
void AddBatchData(idx_t batch_index, unique_ptr<PreparedBatchData> new_batch, idx_t memory_usage) {
|
@@ -52,6 +52,7 @@ public:
|
|
52
52
|
file_write_lock_if_rotating(make_uniq<StorageLock>()) {
|
53
53
|
max_open_files = ClientConfig::GetConfig(context).partitioned_write_max_open_files;
|
54
54
|
}
|
55
|
+
|
55
56
|
StorageLock lock;
|
56
57
|
atomic<bool> initialized;
|
57
58
|
atomic<idx_t> rows_copied;
|
@@ -78,6 +79,9 @@ public:
|
|
78
79
|
}
|
79
80
|
// initialize writing to the file
|
80
81
|
global_state = op.function.copy_to_initialize_global(context, *op.bind_data, op.file_path);
|
82
|
+
if (op.function.initialize_operator) {
|
83
|
+
op.function.initialize_operator(*global_state, op);
|
84
|
+
}
|
81
85
|
auto written_file_info = AddFile(*write_lock, op.file_path, op.return_type);
|
82
86
|
if (written_file_info) {
|
83
87
|
op.function.copy_to_get_written_statistics(context, *op.bind_data, *global_state,
|
@@ -217,6 +221,9 @@ public:
|
|
217
221
|
written_file_info->partition_keys = Value::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR,
|
218
222
|
std::move(partition_keys), std::move(partition_values));
|
219
223
|
}
|
224
|
+
if (op.function.initialize_operator) {
|
225
|
+
op.function.initialize_operator(*info->global_state, op);
|
226
|
+
}
|
220
227
|
auto &result = *info;
|
221
228
|
info->active_writes = 1;
|
222
229
|
// store in active write map
|
@@ -353,6 +360,9 @@ unique_ptr<GlobalFunctionData> PhysicalCopyToFile::CreateFileState(ClientContext
|
|
353
360
|
if (written_file_info) {
|
354
361
|
function.copy_to_get_written_statistics(context, *bind_data, *result, *written_file_info->file_stats);
|
355
362
|
}
|
363
|
+
if (function.initialize_operator) {
|
364
|
+
function.initialize_operator(*result, *this);
|
365
|
+
}
|
356
366
|
return result;
|
357
367
|
}
|
358
368
|
|
@@ -408,12 +418,9 @@ void CheckDirectory(FileSystem &fs, const string &file_path, CopyOverwriteMode o
|
|
408
418
|
unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext &context) const {
|
409
419
|
if (partition_output || per_thread_output || rotate) {
|
410
420
|
auto &fs = FileSystem::GetFileSystem(context);
|
411
|
-
if (fs.
|
412
|
-
|
413
|
-
|
414
|
-
// for remote files we cannot do anything - as we cannot delete the file
|
415
|
-
throw IOException("Cannot write to \"%s\" - it exists and is a file, not a directory!", file_path);
|
416
|
-
} else {
|
421
|
+
if (!fs.IsRemoteFile(file_path)) {
|
422
|
+
if (fs.FileExists(file_path)) {
|
423
|
+
// the target file exists AND is a file (not a directory)
|
417
424
|
// for local files we can remove the file if OVERWRITE_OR_IGNORE is enabled
|
418
425
|
if (overwrite_mode == CopyOverwriteMode::COPY_OVERWRITE) {
|
419
426
|
fs.RemoveFile(file_path);
|
@@ -432,7 +439,7 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
|
|
432
439
|
}
|
433
440
|
|
434
441
|
auto state = make_uniq<CopyToFunctionGlobalState>(context);
|
435
|
-
if (!per_thread_output && rotate) {
|
442
|
+
if (!per_thread_output && rotate && write_empty_file) {
|
436
443
|
auto global_lock = state->lock.GetExclusiveLock();
|
437
444
|
state->global_state = CreateFileState(context, *state, *global_lock);
|
438
445
|
}
|
@@ -490,6 +497,9 @@ void PhysicalCopyToFile::WriteRotateInternal(ExecutionContext &context, GlobalSi
|
|
490
497
|
while (true) {
|
491
498
|
// Grab global lock and dereference the current file state (and corresponding lock)
|
492
499
|
auto global_guard = g.lock.GetExclusiveLock();
|
500
|
+
if (!g.global_state) {
|
501
|
+
g.global_state = CreateFileState(context.client, *sink_state, *global_guard);
|
502
|
+
}
|
493
503
|
auto &file_state = *g.global_state;
|
494
504
|
auto &file_lock = *g.file_write_lock_if_rotating;
|
495
505
|
if (rotate && function.rotate_next_file(file_state, *bind_data, file_size_bytes)) {
|
@@ -523,7 +533,7 @@ SinkResultType PhysicalCopyToFile::Sink(ExecutionContext &context, DataChunk &ch
|
|
523
533
|
auto &g = input.global_state.Cast<CopyToFunctionGlobalState>();
|
524
534
|
auto &l = input.local_state.Cast<CopyToFunctionLocalState>();
|
525
535
|
|
526
|
-
if (!write_empty_file) {
|
536
|
+
if (!write_empty_file && !rotate) {
|
527
537
|
// if we are only writing the file when there are rows to write we need to initialize here
|
528
538
|
g.Initialize(context.client, *this);
|
529
539
|
}
|
@@ -19,7 +19,7 @@ namespace duckdb {
|
|
19
19
|
|
20
20
|
void ReorderTableEntries(catalog_entry_vector_t &tables);
|
21
21
|
|
22
|
-
using
|
22
|
+
using duckdb::stringstream;
|
23
23
|
|
24
24
|
PhysicalExport::PhysicalExport(vector<LogicalType> types, CopyFunction function, unique_ptr<CopyInfo> info,
|
25
25
|
idx_t estimated_cardinality, unique_ptr<BoundExportData> exported_tables)
|
@@ -72,6 +72,7 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c
|
|
72
72
|
if (!options.default_table.name.empty()) {
|
73
73
|
attached_db->GetCatalog().SetDefaultTable(options.default_table.schema, options.default_table.name);
|
74
74
|
}
|
75
|
+
attached_db->FinalizeLoad(context.client);
|
75
76
|
return SourceResultType::FINISHED;
|
76
77
|
}
|
77
78
|
|
@@ -28,17 +28,17 @@ unique_ptr<PhysicalPlan> PhysicalPlanGenerator::Plan(unique_ptr<LogicalOperator>
|
|
28
28
|
PhysicalOperator &PhysicalPlanGenerator::ResolveAndPlan(unique_ptr<LogicalOperator> op) {
|
29
29
|
auto &profiler = QueryProfiler::Get(context);
|
30
30
|
|
31
|
+
// Resolve the types of each operator.
|
32
|
+
profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_RESOLVE_TYPES);
|
33
|
+
op->ResolveOperatorTypes();
|
34
|
+
profiler.EndPhase();
|
35
|
+
|
31
36
|
// Resolve the column references.
|
32
37
|
profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_COLUMN_BINDING);
|
33
38
|
ColumnBindingResolver resolver;
|
34
39
|
resolver.VisitOperator(*op);
|
35
40
|
profiler.EndPhase();
|
36
41
|
|
37
|
-
// Resolve the types of each operator.
|
38
|
-
profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_RESOLVE_TYPES);
|
39
|
-
op->ResolveOperatorTypes();
|
40
|
-
profiler.EndPhase();
|
41
|
-
|
42
42
|
// Create the main physical plan.
|
43
43
|
profiler.StartPhase(MetricsType::PHYSICAL_PLANNER_CREATE_PLAN);
|
44
44
|
physical_plan = PlanInternal(*op);
|
@@ -124,7 +124,8 @@ static string_t HandleString(Vector &vec, const char *buf, idx_t start, idx_t en
|
|
124
124
|
bool escaped = false;
|
125
125
|
|
126
126
|
bool quoted = false;
|
127
|
-
|
127
|
+
// Satisfy GCC warning about uninitialized variable
|
128
|
+
char quote_char = '\0';
|
128
129
|
stack<char> scopes;
|
129
130
|
for (idx_t i = 0; i < length; i++) {
|
130
131
|
auto current_char = buf[start + i];
|
@@ -36,6 +36,10 @@ bool TableFunctionData::Equals(const FunctionData &other) const {
|
|
36
36
|
return false;
|
37
37
|
}
|
38
38
|
|
39
|
+
bool FunctionData::SupportStatementCache() const {
|
40
|
+
return true;
|
41
|
+
}
|
42
|
+
|
39
43
|
Function::Function(string name_p) : name(std::move(name_p)) {
|
40
44
|
}
|
41
45
|
Function::~Function() {
|
@@ -882,9 +882,15 @@ ScalarFunctionSet OperatorMultiplyFun::GetFunctions() {
|
|
882
882
|
multiply.AddFunction(
|
883
883
|
ScalarFunction({LogicalType::INTERVAL, LogicalType::DOUBLE}, LogicalType::INTERVAL,
|
884
884
|
ScalarFunction::BinaryFunction<interval_t, double, interval_t, MultiplyOperator>));
|
885
|
+
multiply.AddFunction(
|
886
|
+
ScalarFunction({LogicalType::DOUBLE, LogicalType::INTERVAL}, LogicalType::INTERVAL,
|
887
|
+
ScalarFunction::BinaryFunction<double, interval_t, interval_t, MultiplyOperator>));
|
885
888
|
multiply.AddFunction(
|
886
889
|
ScalarFunction({LogicalType::BIGINT, LogicalType::INTERVAL}, LogicalType::INTERVAL,
|
887
890
|
ScalarFunction::BinaryFunction<int64_t, interval_t, interval_t, MultiplyOperator>));
|
891
|
+
multiply.AddFunction(
|
892
|
+
ScalarFunction({LogicalType::INTERVAL, LogicalType::BIGINT}, LogicalType::INTERVAL,
|
893
|
+
ScalarFunction::BinaryFunction<interval_t, int64_t, interval_t, MultiplyOperator>));
|
888
894
|
for (auto &func : multiply.functions) {
|
889
895
|
ScalarFunction::SetReturnsError(func);
|
890
896
|
}
|
@@ -84,6 +84,7 @@ static void RemapMap(Vector &input, Vector &default_vector, Vector &result, idx_
|
|
84
84
|
auto &result_key_vector = MapVector::GetKeys(result);
|
85
85
|
auto &result_value_vector = MapVector::GetValues(result);
|
86
86
|
auto list_size = ListVector::GetListSize(input);
|
87
|
+
ListVector::Reserve(result, list_size);
|
87
88
|
ListVector::SetListSize(result, list_size);
|
88
89
|
|
89
90
|
bool has_top_level_null = false;
|
@@ -136,6 +137,7 @@ static void RemapList(Vector &input, Vector &default_vector, Vector &result, idx
|
|
136
137
|
auto &input_vector = ListVector::GetEntry(input);
|
137
138
|
auto &result_vector = ListVector::GetEntry(result);
|
138
139
|
auto list_size = ListVector::GetListSize(input);
|
140
|
+
ListVector::Reserve(result, list_size);
|
139
141
|
ListVector::SetListSize(result, list_size);
|
140
142
|
|
141
143
|
bool has_top_level_null = false;
|
@@ -401,6 +403,9 @@ struct RemapEntry {
|
|
401
403
|
auto &child_types = StructType::GetChildTypes(default_type);
|
402
404
|
for (idx_t child_idx = 0; child_idx < child_types.size(); child_idx++) {
|
403
405
|
auto &child_default = child_types[child_idx];
|
406
|
+
if (!result_entry->second.child_remaps || !entry->second.child_map) {
|
407
|
+
throw BinderException("No child remaps found");
|
408
|
+
}
|
404
409
|
HandleDefault(child_idx, child_default.first, child_default.second, *entry->second.child_map,
|
405
410
|
*result_entry->second.child_remaps);
|
406
411
|
}
|
@@ -542,6 +547,10 @@ static unique_ptr<FunctionData> RemapStructBind(ClientContext &context, ScalarFu
|
|
542
547
|
if (arg->return_type.id() == LogicalTypeId::UNKNOWN) {
|
543
548
|
throw ParameterNotResolvedException();
|
544
549
|
}
|
550
|
+
if (arg->return_type.id() == LogicalTypeId::SQLNULL && arg_idx == 2) {
|
551
|
+
// remap target can be NULL
|
552
|
+
continue;
|
553
|
+
}
|
545
554
|
if (!arg->return_type.IsNested()) {
|
546
555
|
throw BinderException("Struct remap can only remap nested types, not '%s'", arg->return_type.ToString());
|
547
556
|
} else if (arg->return_type.id() == LogicalTypeId::STRUCT && StructType::IsUnnamed(arg->return_type)) {
|
@@ -571,11 +580,11 @@ static unique_ptr<FunctionData> RemapStructBind(ClientContext &context, ScalarFu
|
|
571
580
|
auto target_map = RemapIndex::GetMap(to_type);
|
572
581
|
|
573
582
|
Value remap_val = ExpressionExecutor::EvaluateScalar(context, *arguments[2]);
|
574
|
-
auto &remap_types = StructType::GetChildTypes(arguments[2]->return_type);
|
575
583
|
|
576
584
|
// (recursively) generate the remap entries
|
577
585
|
case_insensitive_map_t<RemapEntry> remap_map;
|
578
586
|
if (!remap_val.IsNull()) {
|
587
|
+
auto &remap_types = StructType::GetChildTypes(arguments[2]->return_type);
|
579
588
|
auto &remap_values = StructValue::GetChildren(remap_val);
|
580
589
|
for (idx_t remap_idx = 0; remap_idx < remap_values.size(); remap_idx++) {
|
581
590
|
auto &remap_val = remap_values[remap_idx];
|
@@ -221,6 +221,7 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyFunctio
|
|
221
221
|
memset(bind_data->requires_quotes.get(), 0, sizeof(bool) * 256);
|
222
222
|
bind_data->requires_quotes['\n'] = true;
|
223
223
|
bind_data->requires_quotes['\r'] = true;
|
224
|
+
bind_data->requires_quotes['#'] = true;
|
224
225
|
bind_data->requires_quotes[NumericCast<idx_t>(
|
225
226
|
bind_data->options.dialect_options.state_machine_options.delimiter.GetValue()[0])] = true;
|
226
227
|
bind_data->requires_quotes[NumericCast<idx_t>(
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#ifndef DUCKDB_PATCH_VERSION
|
2
|
-
#define DUCKDB_PATCH_VERSION "
|
2
|
+
#define DUCKDB_PATCH_VERSION "1"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_MINOR_VERSION
|
5
5
|
#define DUCKDB_MINOR_VERSION 3
|
@@ -8,10 +8,10 @@
|
|
8
8
|
#define DUCKDB_MAJOR_VERSION 1
|
9
9
|
#endif
|
10
10
|
#ifndef DUCKDB_VERSION
|
11
|
-
#define DUCKDB_VERSION "v1.3.
|
11
|
+
#define DUCKDB_VERSION "v1.3.1"
|
12
12
|
#endif
|
13
13
|
#ifndef DUCKDB_SOURCE_ID
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
14
|
+
#define DUCKDB_SOURCE_ID "2063dda3e6"
|
15
15
|
#endif
|
16
16
|
#include "duckdb/function/table/system_functions.hpp"
|
17
17
|
#include "duckdb/main/database.hpp"
|
@@ -109,6 +109,7 @@ public:
|
|
109
109
|
}
|
110
110
|
virtual void Initialize(bool load_builtin) = 0;
|
111
111
|
virtual void Initialize(optional_ptr<ClientContext> context, bool load_builtin);
|
112
|
+
virtual void FinalizeLoad(optional_ptr<ClientContext> context);
|
112
113
|
|
113
114
|
bool IsSystemCatalog() const;
|
114
115
|
bool IsTemporaryCatalog() const;
|
@@ -66,7 +66,7 @@ private:
|
|
66
66
|
unique_ptr<CatalogEntry> ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info);
|
67
67
|
unique_ptr<CatalogEntry> SetNotNull(ClientContext &context, SetNotNullInfo &info);
|
68
68
|
unique_ptr<CatalogEntry> DropNotNull(ClientContext &context, DropNotNullInfo &info);
|
69
|
-
unique_ptr<CatalogEntry> AddForeignKeyConstraint(
|
69
|
+
unique_ptr<CatalogEntry> AddForeignKeyConstraint(AlterForeignKeyInfo &info);
|
70
70
|
unique_ptr<CatalogEntry> DropForeignKeyConstraint(ClientContext &context, AlterForeignKeyInfo &info);
|
71
71
|
unique_ptr<CatalogEntry> SetColumnComment(ClientContext &context, SetColumnCommentInfo &info);
|
72
72
|
unique_ptr<CatalogEntry> AddConstraint(ClientContext &context, AddConstraintInfo &info);
|
@@ -35,7 +35,7 @@ private:
|
|
35
35
|
static string WriteOptionallyQuoted(const string &input);
|
36
36
|
};
|
37
37
|
|
38
|
-
enum class CatalogSetPathType { SET_SCHEMA, SET_SCHEMAS };
|
38
|
+
enum class CatalogSetPathType { SET_SCHEMA, SET_SCHEMAS, SET_DIRECTLY };
|
39
39
|
|
40
40
|
//! The schema search path, in order by which entries are searched if no schema entry is provided
|
41
41
|
class CatalogSearchPath {
|