duckdb 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -1
- package/package.json +1 -1
- package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
- package/src/duckdb/extension/json/include/json_common.hpp +14 -4
- package/src/duckdb/extension/json/include/json_executors.hpp +11 -3
- package/src/duckdb/extension/json/json_extension.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_extract.cpp +11 -3
- package/src/duckdb/extension/json/json_functions/json_value.cpp +4 -3
- package/src/duckdb/extension/json/json_functions.cpp +16 -7
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/column_writer.cpp +54 -43
- package/src/duckdb/extension/parquet/geo_parquet.cpp +19 -0
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +10 -6
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +3 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +2 -1
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +1 -0
- package/src/duckdb/src/common/arrow/arrow_util.cpp +60 -0
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +1 -53
- package/src/duckdb/src/common/cgroups.cpp +15 -24
- package/src/duckdb/src/common/constants.cpp +8 -0
- package/src/duckdb/src/common/enum_util.cpp +331 -326
- package/src/duckdb/src/common/http_util.cpp +5 -1
- package/src/duckdb/src/common/operator/cast_operators.cpp +6 -60
- package/src/duckdb/src/common/types/bit.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +2 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +5 -0
- package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +1 -1
- package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +17 -15
- package/src/duckdb/src/execution/index/art/prefix.cpp +9 -34
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +4 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +1 -0
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +23 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +33 -4
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +23 -13
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +23 -19
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +12 -11
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -14
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +4 -4
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +3 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -1
- package/src/duckdb/src/function/cast/decimal_cast.cpp +33 -3
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +9 -0
- package/src/duckdb/src/function/table/arrow.cpp +34 -22
- package/src/duckdb/src/function/table/sniff_csv.cpp +4 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +31 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +2 -16
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +60 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +2 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +9 -5
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +5 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +14 -5
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/settings.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +7 -1
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +7 -4
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +4 -4
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -4
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/transaction_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb.h +8 -8
- package/src/duckdb/src/main/appender.cpp +1 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +3 -3
- package/src/duckdb/src/main/capi/helper-c.cpp +4 -0
- package/src/duckdb/src/main/config.cpp +24 -11
- package/src/duckdb/src/main/database.cpp +6 -5
- package/src/duckdb/src/main/extension/extension_install.cpp +13 -8
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -4
- package/src/duckdb/src/main/extension.cpp +1 -1
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +10 -1
- package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +9 -5
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +14 -8
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -0
- package/src/duckdb/src/optimizer/optimizer.cpp +4 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -11
- package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +1 -7
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +3 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
- package/src/duckdb/src/parser/keyword_helper.cpp +4 -0
- package/src/duckdb/src/parser/parser.cpp +20 -18
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +8 -3
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -0
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +7 -1
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -11
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +27 -10
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +24 -9
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -3
- package/src/duckdb/src/planner/binder.cpp +5 -6
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +1 -0
- package/src/duckdb/src/planner/expression_binder/select_binder.cpp +9 -0
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +2 -2
- package/src/duckdb/src/planner/operator/logical_positional_join.cpp +1 -0
- package/src/duckdb/src/storage/buffer/block_handle.cpp +18 -21
- package/src/duckdb/src/storage/buffer/block_manager.cpp +12 -4
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +12 -2
- package/src/duckdb/src/storage/buffer_manager.cpp +3 -2
- package/src/duckdb/src/storage/compression/rle.cpp +5 -2
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +8 -7
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +19 -20
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +1 -2
- package/src/duckdb/src/storage/table/column_data.cpp +5 -2
- package/src/duckdb/src/storage/table/column_segment.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group_collection.cpp +18 -14
- package/src/duckdb/src/storage/table/standard_column_data.cpp +3 -3
- package/src/duckdb/src/storage/wal_replay.cpp +2 -3
- package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -2
- package/src/duckdb/third_party/libpg_query/include/pg_simplified_token.hpp +6 -4
- package/src/duckdb/third_party/libpg_query/include/postgres_parser.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +801 -799
- package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +6 -2
- package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +0 -1
- package/src/duckdb/ub_src_common_arrow.cpp +2 -0
- package/vendor.py +1 -2
@@ -46,7 +46,7 @@ string DialectCandidates::Print() {
|
|
46
46
|
}
|
47
47
|
search_space << "\n";
|
48
48
|
search_space << "Quote/Escape Candidates: ";
|
49
|
-
for (uint8_t i = 0; i < static_cast<uint8_t>(
|
49
|
+
for (uint8_t i = 0; i < static_cast<uint8_t>(quote_rule_candidates.size()); i++) {
|
50
50
|
auto quote_candidate = quote_candidates_map[i];
|
51
51
|
auto escape_candidate = escape_candidates_map[i];
|
52
52
|
for (idx_t j = 0; j < quote_candidate.size(); j++) {
|
@@ -60,7 +60,7 @@ string DialectCandidates::Print() {
|
|
60
60
|
search_space << ",";
|
61
61
|
}
|
62
62
|
}
|
63
|
-
if (i <
|
63
|
+
if (i < quote_rule_candidates.size() - 1) {
|
64
64
|
search_space << ",";
|
65
65
|
}
|
66
66
|
}
|
@@ -111,7 +111,7 @@ DialectCandidates::DialectCandidates(const CSVStateMachineOptions &options) {
|
|
111
111
|
for (auto "e_rule : default_quote_rule) {
|
112
112
|
quote_candidates_map[static_cast<uint8_t>(quote_rule)] = {options.quote.GetValue()};
|
113
113
|
}
|
114
|
-
// also add it as
|
114
|
+
// also add it as an escape rule
|
115
115
|
if (!IsQuoteDefault(options.quote.GetValue())) {
|
116
116
|
escape_candidates_map[static_cast<uint8_t>(QuoteRule::QUOTES_RFC)].emplace_back(options.quote.GetValue());
|
117
117
|
}
|
@@ -124,14 +124,14 @@ DialectCandidates::DialectCandidates(const CSVStateMachineOptions &options) {
|
|
124
124
|
if (options.escape.IsSetByUser()) {
|
125
125
|
// user provided escape: use that escape rule
|
126
126
|
if (options.escape == '\0') {
|
127
|
-
|
127
|
+
quote_rule_candidates = {QuoteRule::QUOTES_RFC};
|
128
128
|
} else {
|
129
|
-
|
129
|
+
quote_rule_candidates = {QuoteRule::QUOTES_OTHER};
|
130
130
|
}
|
131
|
-
escape_candidates_map[static_cast<uint8_t>(
|
131
|
+
escape_candidates_map[static_cast<uint8_t>(quote_rule_candidates[0])] = {options.escape.GetValue()};
|
132
132
|
} else {
|
133
133
|
// no escape provided: try standard/common escapes
|
134
|
-
|
134
|
+
quote_rule_candidates = default_quote_rule;
|
135
135
|
}
|
136
136
|
}
|
137
137
|
|
@@ -146,12 +146,12 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
|
|
146
146
|
}
|
147
147
|
CSVIterator first_iterator;
|
148
148
|
bool iterator_set = false;
|
149
|
-
for (const auto
|
150
|
-
const auto "e_candidates = dialect_candidates.quote_candidates_map.at(static_cast<uint8_t>(
|
149
|
+
for (const auto quote_rule : dialect_candidates.quote_rule_candidates) {
|
150
|
+
const auto "e_candidates = dialect_candidates.quote_candidates_map.at(static_cast<uint8_t>(quote_rule));
|
151
151
|
for (const auto "e : quote_candidates) {
|
152
152
|
for (const auto &delimiter : dialect_candidates.delim_candidates) {
|
153
153
|
const auto &escape_candidates =
|
154
|
-
dialect_candidates.escape_candidates_map.at(static_cast<uint8_t>(
|
154
|
+
dialect_candidates.escape_candidates_map.at(static_cast<uint8_t>(quote_rule));
|
155
155
|
for (const auto &escape : escape_candidates) {
|
156
156
|
for (const auto &comment : dialect_candidates.comment_candidates) {
|
157
157
|
D_ASSERT(buffer_manager);
|
@@ -181,7 +181,7 @@ void CSVSniffer::GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountSc
|
|
181
181
|
|
182
182
|
// Returns true if a comment is acceptable
|
183
183
|
bool AreCommentsAcceptable(const ColumnCountResult &result, idx_t num_cols, bool comment_set_by_user) {
|
184
|
-
// For a comment to be acceptable, we want 3/5th's majority of
|
184
|
+
// For a comment to be acceptable, we want 3/5th's the majority of unmatched in the columns
|
185
185
|
constexpr double min_majority = 0.6;
|
186
186
|
// detected comments, are all lines that started with a comment character.
|
187
187
|
double detected_comments = 0;
|
@@ -226,6 +226,12 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
226
226
|
}
|
227
227
|
idx_t consistent_rows = 0;
|
228
228
|
idx_t num_cols = sniffed_column_counts.result_position == 0 ? 1 : sniffed_column_counts[0].number_of_columns;
|
229
|
+
const bool ignore_errors = options.ignore_errors.GetValue();
|
230
|
+
// If we are ignoring errors and not null_padding , we pick the most frequent number of columns as the right one
|
231
|
+
bool use_most_frequent_columns = ignore_errors && !options.null_padding;
|
232
|
+
if (use_most_frequent_columns) {
|
233
|
+
num_cols = sniffed_column_counts.GetMostFrequentColumnCount();
|
234
|
+
}
|
229
235
|
idx_t padding_count = 0;
|
230
236
|
idx_t comment_rows = 0;
|
231
237
|
idx_t ignored_rows = 0;
|
@@ -234,7 +240,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
234
240
|
if (sniffed_column_counts.result_position > rows_read) {
|
235
241
|
rows_read = sniffed_column_counts.result_position;
|
236
242
|
}
|
237
|
-
if (set_columns.IsCandidateUnacceptable(num_cols, options.null_padding,
|
243
|
+
if (set_columns.IsCandidateUnacceptable(num_cols, options.null_padding, ignore_errors,
|
238
244
|
sniffed_column_counts[0].last_value_always_empty)) {
|
239
245
|
// Not acceptable
|
240
246
|
return;
|
@@ -242,8 +248,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
242
248
|
idx_t header_idx = 0;
|
243
249
|
for (idx_t row = 0; row < sniffed_column_counts.result_position; row++) {
|
244
250
|
if (set_columns.IsCandidateUnacceptable(sniffed_column_counts[row].number_of_columns, options.null_padding,
|
245
|
-
|
246
|
-
sniffed_column_counts[row].last_value_always_empty)) {
|
251
|
+
ignore_errors, sniffed_column_counts[row].last_value_always_empty)) {
|
247
252
|
// Not acceptable
|
248
253
|
return;
|
249
254
|
}
|
@@ -258,7 +263,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
258
263
|
consistent_rows++;
|
259
264
|
} else if (num_cols < sniffed_column_counts[row].number_of_columns &&
|
260
265
|
(!options.dialect_options.skip_rows.IsSetByUser() || comment_rows > 0) &&
|
261
|
-
(!set_columns.IsSet() || options.null_padding)) {
|
266
|
+
(!set_columns.IsSet() || options.null_padding) && (!first_valid || (!use_most_frequent_columns))) {
|
262
267
|
// all rows up to this point will need padding
|
263
268
|
if (!first_valid) {
|
264
269
|
first_valid = true;
|
@@ -268,15 +273,14 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
268
273
|
// we use the maximum amount of num_cols that we find
|
269
274
|
num_cols = sniffed_column_counts[row].number_of_columns;
|
270
275
|
dirty_notes = row;
|
271
|
-
// sniffed_column_counts.state_machine.dialect_options.rows_until_header = dirty_notes;
|
272
276
|
dirty_notes_minus_comments = dirty_notes - comment_rows;
|
273
277
|
header_idx = row;
|
274
278
|
consistent_rows = 1;
|
275
|
-
} else if (sniffed_column_counts[row].number_of_columns == num_cols ||
|
276
|
-
(options.ignore_errors.GetValue() && !options.null_padding)) {
|
279
|
+
} else if (sniffed_column_counts[row].number_of_columns == num_cols || (use_most_frequent_columns)) {
|
277
280
|
if (!first_valid) {
|
278
281
|
first_valid = true;
|
279
282
|
sniffed_column_counts.state_machine.dialect_options.rows_until_header = row;
|
283
|
+
dirty_notes = row;
|
280
284
|
}
|
281
285
|
if (sniffed_column_counts[row].number_of_columns != num_cols) {
|
282
286
|
ignored_rows++;
|
@@ -404,7 +408,7 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner> scanner,
|
|
404
408
|
}
|
405
409
|
}
|
406
410
|
|
407
|
-
bool CSVSniffer::RefineCandidateNextChunk(ColumnCountScanner &candidate) {
|
411
|
+
bool CSVSniffer::RefineCandidateNextChunk(ColumnCountScanner &candidate) const {
|
408
412
|
auto &sniffed_column_counts = candidate.ParseChunk();
|
409
413
|
for (idx_t i = 0; i < sniffed_column_counts.result_position; i++) {
|
410
414
|
if (set_columns.IsSet()) {
|
@@ -1,7 +1,6 @@
|
|
1
1
|
#include "duckdb/common/types/cast_helpers.hpp"
|
2
2
|
#include "duckdb/execution/operator/csv_scanner/csv_sniffer.hpp"
|
3
3
|
#include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
|
4
|
-
#include "duckdb/common/types/value.hpp"
|
5
4
|
|
6
5
|
#include "utf8proc.hpp"
|
7
6
|
|
@@ -32,8 +31,7 @@ static string TrimWhitespace(const string &col_name) {
|
|
32
31
|
}
|
33
32
|
|
34
33
|
// Find the last character that is not right trimmed
|
35
|
-
idx_t end;
|
36
|
-
end = begin;
|
34
|
+
idx_t end = begin;
|
37
35
|
for (auto next = begin; next < col_name.size();) {
|
38
36
|
auto bytes = utf8proc_iterate(str + next, NumericCast<utf8proc_ssize_t>(size - next), &codepoint);
|
39
37
|
D_ASSERT(bytes > 0);
|
@@ -90,7 +88,9 @@ static string NormalizeColumnName(const string &col_name) {
|
|
90
88
|
}
|
91
89
|
|
92
90
|
// prepend _ if name starts with a digit or is a reserved keyword
|
93
|
-
|
91
|
+
auto keyword = KeywordHelper::KeywordCategoryType(col_name_cleaned);
|
92
|
+
if (keyword == KeywordCategory::KEYWORD_TYPE_FUNC || keyword == KeywordCategory::KEYWORD_RESERVED ||
|
93
|
+
(col_name_cleaned[0] >= '0' && col_name_cleaned[0] <= '9')) {
|
94
94
|
col_name_cleaned = "_" + col_name_cleaned;
|
95
95
|
}
|
96
96
|
return col_name_cleaned;
|
@@ -98,10 +98,9 @@ static string NormalizeColumnName(const string &col_name) {
|
|
98
98
|
|
99
99
|
// If our columns were set by the user, we verify if their names match with the first row
|
100
100
|
bool CSVSniffer::DetectHeaderWithSetColumn(ClientContext &context, vector<HeaderValue> &best_header_row,
|
101
|
-
SetColumns &set_columns, CSVReaderOptions &options) {
|
101
|
+
const SetColumns &set_columns, CSVReaderOptions &options) {
|
102
102
|
bool has_header = true;
|
103
|
-
|
104
|
-
bool first_row_consistent = true;
|
103
|
+
|
105
104
|
std::ostringstream error;
|
106
105
|
// User set the names, we must check if they match the first row
|
107
106
|
// We do a +1 to check for situations where the csv file has an extra all null column
|
@@ -125,6 +124,8 @@ bool CSVSniffer::DetectHeaderWithSetColumn(ClientContext &context, vector<Header
|
|
125
124
|
}
|
126
125
|
|
127
126
|
if (!has_header) {
|
127
|
+
bool all_varchar = true;
|
128
|
+
bool first_row_consistent = true;
|
128
129
|
// We verify if the types are consistent
|
129
130
|
for (idx_t col = 0; col < set_columns.Size(); col++) {
|
130
131
|
// try cast to sql_type of column
|
@@ -168,7 +169,7 @@ bool EmptyHeader(const string &col_name, bool is_null, bool normalize) {
|
|
168
169
|
|
169
170
|
vector<string>
|
170
171
|
CSVSniffer::DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &best_header_row,
|
171
|
-
CSVStateMachine &state_machine, SetColumns &set_columns,
|
172
|
+
CSVStateMachine &state_machine, const SetColumns &set_columns,
|
172
173
|
unordered_map<idx_t, vector<LogicalType>> &best_sql_types_candidates_per_column_idx,
|
173
174
|
CSVReaderOptions &options, CSVErrorHandler &error_handler) {
|
174
175
|
vector<string> detected_names;
|
@@ -187,9 +188,7 @@ CSVSniffer::DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &be
|
|
187
188
|
return detected_names;
|
188
189
|
}
|
189
190
|
// information for header detection
|
190
|
-
bool first_row_consistent = true;
|
191
191
|
// check if header row is all null and/or consistent with detected column data types
|
192
|
-
bool first_row_nulls = true;
|
193
192
|
// If null-padding is not allowed and there is a mismatch between our header candidate and the number of columns
|
194
193
|
// We can't detect the dialect/type options properly
|
195
194
|
if (!options.null_padding && best_sql_types_candidates_per_column_idx.size() != best_header_row.size()) {
|
@@ -198,12 +197,14 @@ CSVSniffer::DetectHeaderInternal(ClientContext &context, vector<HeaderValue> &be
|
|
198
197
|
state_machine.dialect_options.state_machine_options.delimiter.GetValue());
|
199
198
|
error_handler.Error(error);
|
200
199
|
}
|
201
|
-
bool all_varchar = true;
|
202
200
|
bool has_header;
|
203
201
|
|
204
202
|
if (set_columns.IsSet()) {
|
205
203
|
has_header = DetectHeaderWithSetColumn(context, best_header_row, set_columns, options);
|
206
204
|
} else {
|
205
|
+
bool first_row_consistent = true;
|
206
|
+
bool all_varchar = true;
|
207
|
+
bool first_row_nulls = true;
|
207
208
|
for (idx_t col = 0; col < best_header_row.size(); col++) {
|
208
209
|
if (!best_header_row[col].IsNull()) {
|
209
210
|
first_row_nulls = false;
|
@@ -103,6 +103,10 @@ bool CSVSniffer::CanYouCastIt(ClientContext &context, const string_t value, cons
|
|
103
103
|
auto value_ptr = value.GetData();
|
104
104
|
auto value_size = value.GetSize();
|
105
105
|
switch (type.id()) {
|
106
|
+
case LogicalTypeId::BOOLEAN: {
|
107
|
+
bool dummy_value;
|
108
|
+
return TryCastStringBool(value_ptr, value_size, dummy_value, true);
|
109
|
+
}
|
106
110
|
case LogicalTypeId::TINYINT: {
|
107
111
|
int8_t dummy_value;
|
108
112
|
return TrySimpleIntegerCast(value_ptr, value_size, dummy_value, false);
|
@@ -251,19 +255,20 @@ void CSVSniffer::InitializeDateAndTimeStampDetection(CSVStateMachine &candidate,
|
|
251
255
|
auto user_format = options.dialect_options.date_format.find(sql_type.id());
|
252
256
|
if (user_format->second.IsSetByUser()) {
|
253
257
|
format_candidate.format.emplace_back(user_format->second.GetValue().format_specifier);
|
254
|
-
}
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
258
|
+
} else {
|
259
|
+
auto entry = format_template_candidates.find(sql_type.id());
|
260
|
+
if (entry != format_template_candidates.end()) {
|
261
|
+
const auto &format_template_list = entry->second;
|
262
|
+
for (const auto &t : format_template_list) {
|
263
|
+
const auto format_string = GenerateDateFormat(separator, t);
|
264
|
+
// don't parse ISO 8601
|
265
|
+
if (format_string.find("%Y-%m-%d") == string::npos) {
|
266
|
+
format_candidate.format.emplace_back(format_string);
|
267
|
+
}
|
264
268
|
}
|
265
269
|
}
|
266
270
|
}
|
271
|
+
// order by preference
|
267
272
|
original_format_candidates = format_candidates;
|
268
273
|
}
|
269
274
|
// initialise the first candidate
|
@@ -290,7 +295,8 @@ void CSVSniffer::DetectDateAndTimeStampFormats(CSVStateMachine &candidate, const
|
|
290
295
|
bool had_format_candidates = !save_format_candidates.empty();
|
291
296
|
bool initial_format_candidates =
|
292
297
|
save_format_candidates.size() == original_format_candidates.at(sql_type.id()).format.size();
|
293
|
-
|
298
|
+
bool is_set_by_user = options.dialect_options.date_format.find(sql_type.id())->second.IsSetByUser();
|
299
|
+
while (!type_format_candidates.empty() && !is_set_by_user) {
|
294
300
|
// avoid using exceptions for flow control...
|
295
301
|
auto ¤t_format = candidate.dialect_options.date_format[sql_type.id()].GetValue();
|
296
302
|
if (current_format.Parse(dummy_val, result, true)) {
|
@@ -341,7 +347,7 @@ void CSVSniffer::SniffTypes(DataChunk &data_chunk, CSVStateMachine &state_machin
|
|
341
347
|
// try cast from string to sql_type
|
342
348
|
while (col_type_candidates.size() > 1) {
|
343
349
|
const auto &sql_type = col_type_candidates.back();
|
344
|
-
// try formatting for date types if the user did not specify one and it starts with numeric
|
350
|
+
// try formatting for date types if the user did not specify one, and it starts with numeric
|
345
351
|
// values.
|
346
352
|
string separator;
|
347
353
|
// If Value is not Null, Has a numeric date format, and the current investigated candidate is
|
@@ -382,7 +388,7 @@ void CSVSniffer::SniffTypes(DataChunk &data_chunk, CSVStateMachine &state_machin
|
|
382
388
|
}
|
383
389
|
|
384
390
|
// If we have a predefined date/timestamp format we set it
|
385
|
-
void CSVSniffer::SetUserDefinedDateTimeFormat(CSVStateMachine &candidate) {
|
391
|
+
void CSVSniffer::SetUserDefinedDateTimeFormat(CSVStateMachine &candidate) const {
|
386
392
|
const vector<LogicalTypeId> data_time_formats {LogicalTypeId::DATE, LogicalTypeId::TIMESTAMP};
|
387
393
|
for (auto &date_time_format : data_time_formats) {
|
388
394
|
auto &user_option = options.dialect_options.date_format.at(date_time_format);
|
@@ -423,7 +429,7 @@ void CSVSniffer::DetectTypes() {
|
|
423
429
|
}
|
424
430
|
}
|
425
431
|
}
|
426
|
-
if (break_loop) {
|
432
|
+
if (break_loop && !candidate->state_machine->options.ignore_errors.GetValue()) {
|
427
433
|
continue;
|
428
434
|
}
|
429
435
|
}
|
package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp
CHANGED
@@ -22,7 +22,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op
|
|
22
22
|
auto &transition_array = state_machine_cache[state_machine_options];
|
23
23
|
|
24
24
|
for (uint32_t i = 0; i < StateMachine::NUM_STATES; i++) {
|
25
|
-
CSVState cur_state = CSVState(i);
|
25
|
+
CSVState cur_state = static_cast<CSVState>(i);
|
26
26
|
switch (cur_state) {
|
27
27
|
case CSVState::QUOTED:
|
28
28
|
case CSVState::QUOTED_NEW_LINE:
|
@@ -234,11 +234,11 @@ CSVStateMachineCache::CSVStateMachineCache() {
|
|
234
234
|
auto default_delimiter = DialectCandidates::GetDefaultDelimiter();
|
235
235
|
auto default_comment = DialectCandidates::GetDefaultComment();
|
236
236
|
|
237
|
-
for (auto
|
238
|
-
const auto "e_candidates = default_quote[static_cast<uint8_t>(
|
237
|
+
for (auto quote_rule : default_quote_rule) {
|
238
|
+
const auto "e_candidates = default_quote[static_cast<uint8_t>(quote_rule)];
|
239
239
|
for (const auto "e : quote_candidates) {
|
240
240
|
for (const auto &delimiter : default_delimiter) {
|
241
|
-
const auto &escape_candidates = default_escape[static_cast<uint8_t>(
|
241
|
+
const auto &escape_candidates = default_escape[static_cast<uint8_t>(quote_rule)];
|
242
242
|
for (const auto &escape : escape_candidates) {
|
243
243
|
for (const auto &comment : default_comment) {
|
244
244
|
Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_N});
|
@@ -302,7 +302,9 @@ CSVError CSVError::UnterminatedQuotesError(const CSVReaderOptions &options, idx_
|
|
302
302
|
std::ostringstream error;
|
303
303
|
error << "Value with unterminated quote found." << '\n';
|
304
304
|
std::ostringstream how_to_fix_it;
|
305
|
-
how_to_fix_it << "Possible
|
305
|
+
how_to_fix_it << "Possible fixes:" << '\n';
|
306
|
+
how_to_fix_it << "* Enable ignore errors (ignore_errors=true) to skip this row" << '\n';
|
307
|
+
how_to_fix_it << "* Set quote do empty or to a different value (e.g., quote=\'\')" << '\n';
|
306
308
|
return CSVError(error.str(), UNTERMINATED_QUOTES, current_column, csv_row, error_info, row_byte_position,
|
307
309
|
byte_position, options, how_to_fix_it.str(), current_path);
|
308
310
|
}
|
@@ -618,7 +618,12 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
|
|
618
618
|
|
619
619
|
if (tail_count < result_count) {
|
620
620
|
result_count = tail_count;
|
621
|
-
|
621
|
+
if (result_count == 0) {
|
622
|
+
// Need to reset here otherwise we may use the non-flat chunk when constructing LEFT/OUTER
|
623
|
+
chunk.Reset();
|
624
|
+
} else {
|
625
|
+
chunk.Slice(*sel, result_count);
|
626
|
+
}
|
622
627
|
}
|
623
628
|
}
|
624
629
|
|
@@ -114,11 +114,42 @@ struct DecimalScaleDownOperator {
|
|
114
114
|
}
|
115
115
|
};
|
116
116
|
|
117
|
+
// This function detects if we can scale a decimal down to another.
|
118
|
+
template <class INPUT_TYPE>
|
119
|
+
bool CanScaleDownDecimal(INPUT_TYPE input, DecimalScaleInput<INPUT_TYPE> &data) {
|
120
|
+
int64_t divisor = UnsafeNumericCast<int64_t>(NumericHelper::POWERS_OF_TEN[data.source_scale]);
|
121
|
+
auto value = input % divisor;
|
122
|
+
auto rounded_input = input;
|
123
|
+
if (rounded_input < 0) {
|
124
|
+
rounded_input *= -1;
|
125
|
+
value *= -1;
|
126
|
+
}
|
127
|
+
if (value >= divisor / 2) {
|
128
|
+
rounded_input += divisor;
|
129
|
+
}
|
130
|
+
return rounded_input < data.limit && rounded_input > -data.limit;
|
131
|
+
}
|
132
|
+
|
133
|
+
template <>
|
134
|
+
bool CanScaleDownDecimal<hugeint_t>(hugeint_t input, DecimalScaleInput<hugeint_t> &data) {
|
135
|
+
auto divisor = UnsafeNumericCast<hugeint_t>(Hugeint::POWERS_OF_TEN[data.source_scale]);
|
136
|
+
hugeint_t value = input % divisor;
|
137
|
+
hugeint_t rounded_input = input;
|
138
|
+
if (rounded_input < 0) {
|
139
|
+
rounded_input *= -1;
|
140
|
+
value *= -1;
|
141
|
+
}
|
142
|
+
if (value >= divisor / 2) {
|
143
|
+
rounded_input += divisor;
|
144
|
+
}
|
145
|
+
return rounded_input < data.limit && rounded_input > -data.limit;
|
146
|
+
}
|
147
|
+
|
117
148
|
struct DecimalScaleDownCheckOperator {
|
118
149
|
template <class INPUT_TYPE, class RESULT_TYPE>
|
119
150
|
static RESULT_TYPE Operation(INPUT_TYPE input, ValidityMask &mask, idx_t idx, void *dataptr) {
|
120
|
-
auto data =
|
121
|
-
if (input
|
151
|
+
auto data = static_cast<DecimalScaleInput<INPUT_TYPE> *>(dataptr);
|
152
|
+
if (!CanScaleDownDecimal(input, *data)) {
|
122
153
|
auto error = StringUtil::Format("Casting value \"%s\" to type %s failed: value is out of range!",
|
123
154
|
Decimal::ToString(input, data->source_width, data->source_scale),
|
124
155
|
data->result.GetType().ToString());
|
@@ -145,7 +176,6 @@ bool TemplatedDecimalScaleDown(Vector &source, Vector &result, idx_t count, Cast
|
|
145
176
|
return true;
|
146
177
|
} else {
|
147
178
|
// type might not fit: check limit
|
148
|
-
|
149
179
|
auto limit = UnsafeNumericCast<SOURCE>(POWERS_SOURCE::POWERS_OF_TEN[target_width]);
|
150
180
|
DecimalScaleInput<SOURCE> input(result, limit, divide_factor, parameters, source_width, source_scale);
|
151
181
|
UnaryExecutor::GenericExecute<SOURCE, DEST, DecimalScaleDownCheckOperator>(source, result, count, &input,
|
@@ -43,6 +43,15 @@ bool ArrowType::RunEndEncoded() const {
|
|
43
43
|
return run_end_encoded;
|
44
44
|
}
|
45
45
|
|
46
|
+
void ArrowType::ThrowIfInvalid() const {
|
47
|
+
if (type.id() == LogicalTypeId::INVALID) {
|
48
|
+
if (not_implemented) {
|
49
|
+
throw NotImplementedException(error_message);
|
50
|
+
}
|
51
|
+
throw InvalidInputException(error_message);
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
46
55
|
LogicalType ArrowType::GetDuckType(bool use_dictionary) const {
|
47
56
|
if (use_dictionary && dictionary_type) {
|
48
57
|
return dictionary_type->GetDuckType();
|
@@ -35,9 +35,11 @@ static unique_ptr<ArrowType> GetArrowExtensionType(const ArrowSchemaMetadata &ex
|
|
35
35
|
// Check for arrow canonical extensions
|
36
36
|
if (arrow_extension == "arrow.uuid") {
|
37
37
|
if (format != "w:16") {
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
std::ostringstream error;
|
39
|
+
error
|
40
|
+
<< "arrow.uuid must be a fixed-size binary of 16 bytes (i.e., \'w:16\'). It is incorrectly defined as:"
|
41
|
+
<< format;
|
42
|
+
return make_uniq<ArrowType>(error.str());
|
41
43
|
}
|
42
44
|
return make_uniq<ArrowType>(LogicalType::UUID);
|
43
45
|
} else if (arrow_extension == "arrow.json") {
|
@@ -49,40 +51,47 @@ static unique_ptr<ArrowType> GetArrowExtensionType(const ArrowSchemaMetadata &ex
|
|
49
51
|
} else if (format == "vu") {
|
50
52
|
return make_uniq<ArrowType>(LogicalType::JSON(), make_uniq<ArrowStringInfo>(ArrowVariableSizeType::VIEW));
|
51
53
|
} else {
|
52
|
-
|
53
|
-
|
54
|
-
|
54
|
+
std::ostringstream error;
|
55
|
+
error
|
56
|
+
<< "arrow.json must be of a varchar format (i.e., \'u\',\'U\' or \'vu\'). It is incorrectly defined as:"
|
57
|
+
<< format;
|
58
|
+
return make_uniq<ArrowType>(error.str());
|
55
59
|
}
|
56
60
|
}
|
57
61
|
// Check for DuckDB canonical extensions
|
58
62
|
else if (arrow_extension == "duckdb.hugeint") {
|
59
63
|
if (format != "w:16") {
|
60
|
-
|
61
|
-
|
62
|
-
|
64
|
+
std::ostringstream error;
|
65
|
+
error << "duckdb.hugeint must be a fixed-size binary of 16 bytes (i.e., \'w:16\'). It is incorrectly "
|
66
|
+
"defined as:"
|
67
|
+
<< format;
|
68
|
+
return make_uniq<ArrowType>(error.str());
|
63
69
|
}
|
64
70
|
return make_uniq<ArrowType>(LogicalType::HUGEINT);
|
65
|
-
|
66
71
|
} else if (arrow_extension == "duckdb.uhugeint") {
|
67
72
|
if (format != "w:16") {
|
68
|
-
|
69
|
-
|
70
|
-
|
73
|
+
std::ostringstream error;
|
74
|
+
error << "duckdb.uhugeint must be a fixed-size binary of 16 bytes (i.e., \'w:16\'). It is incorrectly "
|
75
|
+
"defined as:"
|
76
|
+
<< format;
|
77
|
+
return make_uniq<ArrowType>(error.str());
|
71
78
|
}
|
72
79
|
return make_uniq<ArrowType>(LogicalType::UHUGEINT);
|
73
80
|
} else if (arrow_extension == "duckdb.time_tz") {
|
74
81
|
if (format != "w:8") {
|
75
|
-
|
76
|
-
|
77
|
-
|
82
|
+
std::ostringstream error;
|
83
|
+
error << "duckdb.time_tz must be a fixed-size binary of 8 bytes (i.e., \'w:8\'). It is incorrectly defined "
|
84
|
+
"as:"
|
85
|
+
<< format;
|
86
|
+
return make_uniq<ArrowType>(error.str());
|
78
87
|
}
|
79
88
|
return make_uniq<ArrowType>(LogicalType::TIME_TZ,
|
80
89
|
make_uniq<ArrowDateTimeInfo>(ArrowDateTimeType::MICROSECONDS));
|
81
90
|
} else if (arrow_extension == "duckdb.bit") {
|
82
91
|
if (format != "z" && format != "Z") {
|
83
|
-
|
84
|
-
|
85
|
-
|
92
|
+
std::ostringstream error;
|
93
|
+
error << "duckdb.bit must be a blob (i.e., \'z\' or \'Z\'). It is incorrectly defined as:" << format;
|
94
|
+
return make_uniq<ArrowType>(error.str());
|
86
95
|
} else if (format == "z") {
|
87
96
|
auto type_info = make_uniq<ArrowStringInfo>(ArrowVariableSizeType::NORMAL);
|
88
97
|
return make_uniq<ArrowType>(LogicalType::BIT, std::move(type_info));
|
@@ -91,9 +100,10 @@ static unique_ptr<ArrowType> GetArrowExtensionType(const ArrowSchemaMetadata &ex
|
|
91
100
|
return make_uniq<ArrowType>(LogicalType::BIT, std::move(type_info));
|
92
101
|
|
93
102
|
} else {
|
94
|
-
|
95
|
-
|
96
|
-
|
103
|
+
std::ostringstream error;
|
104
|
+
error << "Arrow Type with extension name: " << arrow_extension << " and format: " << format
|
105
|
+
<< ", is not currently supported in DuckDB.";
|
106
|
+
return make_uniq<ArrowType>(error.str(), true);
|
97
107
|
}
|
98
108
|
}
|
99
109
|
static unique_ptr<ArrowType> GetArrowLogicalTypeNoDictionary(ArrowSchema &schema) {
|
@@ -384,10 +394,12 @@ unique_ptr<ArrowArrayStreamWrapper> ProduceArrowScan(const ArrowScanFunctionData
|
|
384
394
|
//! Generate Projection Pushdown Vector
|
385
395
|
ArrowStreamParameters parameters;
|
386
396
|
D_ASSERT(!column_ids.empty());
|
397
|
+
auto &arrow_types = function.arrow_table.GetColumns();
|
387
398
|
for (idx_t idx = 0; idx < column_ids.size(); idx++) {
|
388
399
|
auto col_idx = column_ids[idx];
|
389
400
|
if (col_idx != COLUMN_IDENTIFIER_ROW_ID) {
|
390
401
|
auto &schema = *function.schema_root.arrow_schema.children[col_idx];
|
402
|
+
arrow_types.at(col_idx)->ThrowIfInvalid();
|
391
403
|
parameters.projected_columns.projection_map[idx] = schema.name;
|
392
404
|
parameters.projected_columns.columns.emplace_back(schema.name);
|
393
405
|
parameters.projected_columns.filter_to_col[idx] = col_idx;
|
@@ -96,6 +96,9 @@ string FormatOptions(char opt) {
|
|
96
96
|
if (opt == '\'') {
|
97
97
|
return "''";
|
98
98
|
}
|
99
|
+
if (opt == '\0') {
|
100
|
+
return "";
|
101
|
+
}
|
99
102
|
string result;
|
100
103
|
result += opt;
|
101
104
|
return result;
|
@@ -214,7 +217,7 @@ static void CSVSniffFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
214
217
|
<< "'" << separator;
|
215
218
|
}
|
216
219
|
// 11.2. Quote
|
217
|
-
if (!sniffer_options.dialect_options.
|
220
|
+
if (!sniffer_options.dialect_options.state_machine_options.quote.IsSetByUser()) {
|
218
221
|
csv_read << "quote="
|
219
222
|
<< "'" << FormatOptions(sniffer_options.dialect_options.state_machine_options.quote.GetValue()) << "'"
|
220
223
|
<< separator;
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#ifndef DUCKDB_PATCH_VERSION
|
2
|
-
#define DUCKDB_PATCH_VERSION "
|
2
|
+
#define DUCKDB_PATCH_VERSION "1"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_MINOR_VERSION
|
5
5
|
#define DUCKDB_MINOR_VERSION 1
|
@@ -8,10 +8,10 @@
|
|
8
8
|
#define DUCKDB_MAJOR_VERSION 1
|
9
9
|
#endif
|
10
10
|
#ifndef DUCKDB_VERSION
|
11
|
-
#define DUCKDB_VERSION "v1.1.
|
11
|
+
#define DUCKDB_VERSION "v1.1.1"
|
12
12
|
#endif
|
13
13
|
#ifndef DUCKDB_SOURCE_ID
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
14
|
+
#define DUCKDB_SOURCE_ID "af39bd0dcf"
|
15
15
|
#endif
|
16
16
|
#include "duckdb/function/table/system_functions.hpp"
|
17
17
|
#include "duckdb/main/database.hpp"
|
@@ -0,0 +1,31 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/common/arrow/arrow_util.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
#include "duckdb/common/arrow/arrow.hpp"
|
11
|
+
#include "duckdb/main/chunk_scan_state.hpp"
|
12
|
+
#include "duckdb/main/client_properties.hpp"
|
13
|
+
#include "duckdb/common/helper.hpp"
|
14
|
+
#include "duckdb/common/error_data.hpp"
|
15
|
+
|
16
|
+
namespace duckdb {
|
17
|
+
|
18
|
+
class QueryResult;
|
19
|
+
class DataChunk;
|
20
|
+
|
21
|
+
class ArrowUtil {
|
22
|
+
public:
|
23
|
+
static bool TryFetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out,
|
24
|
+
idx_t &result_count, ErrorData &error);
|
25
|
+
static idx_t FetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out);
|
26
|
+
|
27
|
+
private:
|
28
|
+
static bool TryFetchNext(QueryResult &result, unique_ptr<DataChunk> &out, ErrorData &error);
|
29
|
+
};
|
30
|
+
|
31
|
+
} // namespace duckdb
|
@@ -9,14 +9,9 @@
|
|
9
9
|
#pragma once
|
10
10
|
#include "duckdb/common/arrow/arrow.hpp"
|
11
11
|
#include "duckdb/common/helper.hpp"
|
12
|
-
#include "duckdb/common/error_data.hpp"
|
13
|
-
#include "duckdb/main/chunk_scan_state.hpp"
|
14
|
-
#include "duckdb/main/client_properties.hpp"
|
15
12
|
|
16
13
|
//! Here we have the internal duckdb classes that interact with Arrow's Internal Header (i.e., duckdb/commons/arrow.hpp)
|
17
14
|
namespace duckdb {
|
18
|
-
class QueryResult;
|
19
|
-
class DataChunk;
|
20
15
|
|
21
16
|
class ArrowSchemaWrapper {
|
22
17
|
public:
|
@@ -49,23 +44,14 @@ public:
|
|
49
44
|
public:
|
50
45
|
void GetSchema(ArrowSchemaWrapper &schema);
|
51
46
|
|
52
|
-
shared_ptr<ArrowArrayWrapper> GetNextChunk();
|
47
|
+
virtual shared_ptr<ArrowArrayWrapper> GetNextChunk();
|
53
48
|
|
54
49
|
const char *GetError();
|
55
50
|
|
56
|
-
~ArrowArrayStreamWrapper();
|
51
|
+
virtual ~ArrowArrayStreamWrapper();
|
57
52
|
ArrowArrayStreamWrapper() {
|
58
53
|
arrow_array_stream.release = nullptr;
|
59
54
|
}
|
60
55
|
};
|
61
56
|
|
62
|
-
class ArrowUtil {
|
63
|
-
public:
|
64
|
-
static bool TryFetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out,
|
65
|
-
idx_t &result_count, ErrorData &error);
|
66
|
-
static idx_t FetchChunk(ChunkScanState &scan_state, ClientProperties options, idx_t chunk_size, ArrowArray *out);
|
67
|
-
|
68
|
-
private:
|
69
|
-
static bool TryFetchNext(QueryResult &result, unique_ptr<DataChunk> &out, ErrorData &error);
|
70
|
-
};
|
71
57
|
} // namespace duckdb
|