duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -1,20 +1,21 @@
|
|
1
1
|
#include "duckdb/function/table/read_csv.hpp"
|
2
|
-
#include "duckdb/function/function_set.hpp"
|
3
|
-
#include "duckdb/main/client_context.hpp"
|
4
|
-
#include "duckdb/main/database.hpp"
|
5
|
-
#include "duckdb/common/string_util.hpp"
|
6
2
|
#include "duckdb/common/enum_util.hpp"
|
3
|
+
#include "duckdb/common/multi_file_reader.hpp"
|
4
|
+
#include "duckdb/common/string_util.hpp"
|
7
5
|
#include "duckdb/common/union_by_name.hpp"
|
6
|
+
#include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
|
7
|
+
#include "duckdb/execution/operator/scan/csv/csv_line_info.hpp"
|
8
|
+
#include "duckdb/execution/operator/scan/csv/csv_sniffer.hpp"
|
9
|
+
#include "duckdb/function/function_set.hpp"
|
10
|
+
#include "duckdb/main/client_context.hpp"
|
11
|
+
#include "duckdb/main/client_data.hpp"
|
8
12
|
#include "duckdb/main/config.hpp"
|
13
|
+
#include "duckdb/main/database.hpp"
|
9
14
|
#include "duckdb/parser/expression/constant_expression.hpp"
|
10
15
|
#include "duckdb/parser/expression/function_expression.hpp"
|
11
16
|
#include "duckdb/parser/tableref/table_function_ref.hpp"
|
12
17
|
#include "duckdb/planner/operator/logical_get.hpp"
|
13
18
|
#include "duckdb/main/extension_helper.hpp"
|
14
|
-
#include "duckdb/common/multi_file_reader.hpp"
|
15
|
-
#include "duckdb/main/client_data.hpp"
|
16
|
-
#include "duckdb/execution/operator/persistent/csv_line_info.hpp"
|
17
|
-
#include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
|
18
19
|
#include "duckdb/common/serializer/format_serializer.hpp"
|
19
20
|
#include "duckdb/common/serializer/format_deserializer.hpp"
|
20
21
|
|
@@ -26,23 +27,22 @@ unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompress
|
|
26
27
|
ClientContext &context) {
|
27
28
|
auto &fs = FileSystem::GetFileSystem(context);
|
28
29
|
auto &allocator = BufferAllocator::Get(context);
|
29
|
-
return CSVFileHandle::OpenFile(fs, allocator, file_path, compression
|
30
|
+
return CSVFileHandle::OpenFile(fs, allocator, file_path, compression);
|
30
31
|
}
|
31
32
|
|
32
33
|
void ReadCSVData::FinalizeRead(ClientContext &context) {
|
33
34
|
BaseCSVData::Finalize();
|
34
35
|
// Here we identify if we can run this CSV file on parallel or not.
|
35
|
-
bool null_or_empty = options.delimiter.empty() || options.escape.empty() || options.quote.empty() ||
|
36
|
-
options.delimiter[0] == '\0' || options.escape[0] == '\0' || options.quote[0] == '\0';
|
37
|
-
bool complex_options = options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1;
|
38
36
|
bool not_supported_options = options.null_padding;
|
39
37
|
|
40
38
|
auto number_of_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
41
|
-
|
39
|
+
//! If we have many csv files, we run single-threaded on each file and parallelize on the number of files
|
40
|
+
bool many_csv_files = files.size() > 1 && int64_t(files.size() * 2) >= number_of_threads;
|
41
|
+
if (options.parallel_mode != ParallelMode::PARALLEL && many_csv_files) {
|
42
42
|
single_threaded = true;
|
43
43
|
}
|
44
|
-
if (options.parallel_mode == ParallelMode::SINGLE_THREADED ||
|
45
|
-
|
44
|
+
if (options.parallel_mode == ParallelMode::SINGLE_THREADED || not_supported_options ||
|
45
|
+
options.dialect_options.new_line == NewLineIdentifier::MIX) {
|
46
46
|
// not supported for parallel CSV reading
|
47
47
|
single_threaded = true;
|
48
48
|
}
|
@@ -231,10 +231,14 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
231
231
|
}
|
232
232
|
if (options.auto_detect) {
|
233
233
|
options.file_path = result->files[0];
|
234
|
-
|
235
|
-
|
234
|
+
// Initialize Buffer Manager and Sniffer
|
235
|
+
auto file_handle = BaseCSVReader::OpenCSV(context, options);
|
236
|
+
result->buffer_manager = make_shared<CSVBufferManager>(context, std::move(file_handle), options);
|
237
|
+
CSVSniffer sniffer(options, result->buffer_manager, result->state_machine_cache);
|
238
|
+
auto sniffer_result = sniffer.SniffCSV();
|
239
|
+
return_types = sniffer_result.return_types;
|
236
240
|
if (names.empty()) {
|
237
|
-
names.
|
241
|
+
names = sniffer_result.names;
|
238
242
|
} else {
|
239
243
|
if (explicitly_set_columns) {
|
240
244
|
// The user has influenced the names, can't assume they are valid anymore
|
@@ -246,10 +250,8 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
246
250
|
} else {
|
247
251
|
D_ASSERT(return_types.size() == names.size());
|
248
252
|
}
|
249
|
-
initial_reader->names = names;
|
250
253
|
}
|
251
|
-
|
252
|
-
result->initial_reader = std::move(initial_reader);
|
254
|
+
|
253
255
|
} else {
|
254
256
|
D_ASSERT(return_types.size() == names.size());
|
255
257
|
}
|
@@ -275,15 +277,10 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
275
277
|
} else {
|
276
278
|
result->reader_bind = MultiFileReader::BindOptions(options.file_options, result->files, return_types, names);
|
277
279
|
}
|
278
|
-
|
279
280
|
result->return_types = return_types;
|
280
281
|
result->return_names = names;
|
281
282
|
result->FinalizeRead(context);
|
282
283
|
|
283
|
-
if (options.auto_detect) {
|
284
|
-
result->initial_reader->options = options;
|
285
|
-
}
|
286
|
-
|
287
284
|
return std::move(result);
|
288
285
|
}
|
289
286
|
|
@@ -299,17 +296,25 @@ static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFun
|
|
299
296
|
|
300
297
|
struct ParallelCSVGlobalState : public GlobalTableFunctionState {
|
301
298
|
public:
|
302
|
-
ParallelCSVGlobalState(ClientContext &context,
|
303
|
-
const
|
304
|
-
|
305
|
-
:
|
306
|
-
force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
|
299
|
+
ParallelCSVGlobalState(ClientContext &context, shared_ptr<CSVBufferManager> buffer_manager_p,
|
300
|
+
const CSVReaderOptions &options, idx_t system_threads_p, const vector<string> &files_path_p,
|
301
|
+
bool force_parallelism_p, vector<column_t> column_ids_p)
|
302
|
+
: buffer_manager(std::move(buffer_manager_p)), system_threads(system_threads_p),
|
303
|
+
buffer_size(options.buffer_size), force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
|
307
304
|
line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
|
308
|
-
file_handle->DisableReset();
|
309
305
|
current_file_path = files_path_p[0];
|
310
|
-
|
306
|
+
CSVFileHandle *file_handle_ptr;
|
307
|
+
|
308
|
+
if (!buffer_manager) {
|
309
|
+
file_handle = ReadCSV::OpenCSV(current_file_path, options.compression, context);
|
310
|
+
file_handle_ptr = file_handle.get();
|
311
|
+
} else {
|
312
|
+
file_handle_ptr = buffer_manager->file_handle.get();
|
313
|
+
}
|
314
|
+
|
315
|
+
file_size = file_handle_ptr->FileSize();
|
311
316
|
first_file_size = file_size;
|
312
|
-
on_disk_file =
|
317
|
+
on_disk_file = file_handle_ptr->OnDiskFile();
|
313
318
|
bytes_read = 0;
|
314
319
|
if (buffer_size < file_size || file_size == 0) {
|
315
320
|
bytes_per_local_state = buffer_size / ParallelCSVGlobalState::MaxThreads();
|
@@ -321,10 +326,6 @@ public:
|
|
321
326
|
// this boy needs to be at least one.
|
322
327
|
bytes_per_local_state = 1;
|
323
328
|
}
|
324
|
-
for (idx_t i = 0; i < rows_to_skip; i++) {
|
325
|
-
file_handle->ReadLine();
|
326
|
-
}
|
327
|
-
first_position = current_csv_position;
|
328
329
|
running_threads = MaxThreads();
|
329
330
|
|
330
331
|
// Initialize all the book-keeping variables
|
@@ -337,10 +338,11 @@ public:
|
|
337
338
|
batch_to_tuple_end.resize(file_count);
|
338
339
|
|
339
340
|
// Initialize the lines read
|
340
|
-
line_info.lines_read[0][0] =
|
341
|
-
if (has_header) {
|
341
|
+
line_info.lines_read[0][0] = options.dialect_options.skip_rows;
|
342
|
+
if (options.has_header && options.dialect_options.header) {
|
342
343
|
line_info.lines_read[0][0]++;
|
343
344
|
}
|
345
|
+
first_position = options.dialect_options.true_start;
|
344
346
|
}
|
345
347
|
explicit ParallelCSVGlobalState(idx_t system_threads_p)
|
346
348
|
: system_threads(system_threads_p), line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
|
@@ -390,9 +392,7 @@ public:
|
|
390
392
|
|
391
393
|
private:
|
392
394
|
//! File Handle for current file
|
393
|
-
|
394
|
-
shared_ptr<CSVBuffer> current_buffer;
|
395
|
-
shared_ptr<CSVBuffer> next_buffer;
|
395
|
+
shared_ptr<CSVBufferManager> buffer_manager;
|
396
396
|
|
397
397
|
//! The index of the next file to read (i.e. current file + 1)
|
398
398
|
idx_t file_index = 1;
|
@@ -418,12 +418,9 @@ private:
|
|
418
418
|
|
419
419
|
//! Forces parallelism for small CSV Files, should only be used for testing.
|
420
420
|
bool force_parallelism = false;
|
421
|
-
//! Current (Global) position of CSV
|
422
|
-
idx_t current_csv_position = 0;
|
423
421
|
//! First Position of First Buffer
|
424
422
|
idx_t first_position = 0;
|
425
423
|
//! Current File Number
|
426
|
-
idx_t file_number = 0;
|
427
424
|
idx_t max_tuple_end = 0;
|
428
425
|
//! The vector stores positions where threads ended the last line they read in the CSV File, and the set stores
|
429
426
|
//! Positions where they started reading the first line.
|
@@ -438,8 +435,10 @@ private:
|
|
438
435
|
vector<column_t> column_ids;
|
439
436
|
//! Line Info used in error messages
|
440
437
|
LineInfo line_info;
|
441
|
-
//!
|
442
|
-
|
438
|
+
//! Current Buffer index
|
439
|
+
idx_t cur_buffer_idx = 0;
|
440
|
+
//! Only used if we don't run auto_detection first
|
441
|
+
unique_ptr<CSVFileHandle> file_handle;
|
443
442
|
};
|
444
443
|
|
445
444
|
idx_t ParallelCSVGlobalState::MaxThreads() const {
|
@@ -538,31 +537,33 @@ void LineInfo::Verify(idx_t file_idx, idx_t batch_idx, idx_t cur_first_pos) {
|
|
538
537
|
problematic_line);
|
539
538
|
}
|
540
539
|
}
|
541
|
-
|
542
540
|
bool ParallelCSVGlobalState::Next(ClientContext &context, const ReadCSVData &bind_data,
|
543
541
|
unique_ptr<ParallelCSVReader> &reader) {
|
544
542
|
lock_guard<mutex> parallel_lock(main_mutex);
|
545
|
-
if (!
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
543
|
+
if (!buffer_manager && file_handle) {
|
544
|
+
buffer_manager = make_shared<CSVBufferManager>(context, std::move(file_handle), bind_data.options);
|
545
|
+
}
|
546
|
+
if (!buffer_manager) {
|
547
|
+
return false;
|
550
548
|
}
|
549
|
+
auto current_buffer = buffer_manager->GetBuffer(cur_buffer_idx);
|
550
|
+
auto next_buffer = buffer_manager->GetBuffer(cur_buffer_idx + 1);
|
551
|
+
|
551
552
|
if (!current_buffer) {
|
552
553
|
// This means we are done with the current file, we need to go to the next one (if exists).
|
553
554
|
if (file_index < bind_data.files.size()) {
|
554
|
-
current_file_path = bind_data.files[file_index
|
555
|
+
current_file_path = bind_data.files[file_index];
|
555
556
|
file_handle = ReadCSV::OpenCSV(current_file_path, bind_data.options.compression, context);
|
556
|
-
|
557
|
-
|
557
|
+
buffer_manager =
|
558
|
+
make_shared<CSVBufferManager>(context, std::move(file_handle), bind_data.options, file_index);
|
559
|
+
cur_buffer_idx = 0;
|
560
|
+
first_position = 0;
|
558
561
|
local_batch_index = 0;
|
559
562
|
|
560
|
-
line_info.lines_read[
|
563
|
+
line_info.lines_read[file_index++][local_batch_index] = (bind_data.options.has_header ? 1 : 0);
|
561
564
|
|
562
|
-
current_buffer =
|
563
|
-
|
564
|
-
next_buffer = shared_ptr<CSVBuffer>(
|
565
|
-
current_buffer->Next(*file_handle, buffer_size, current_csv_position, file_number).release());
|
565
|
+
current_buffer = buffer_manager->GetBuffer(cur_buffer_idx);
|
566
|
+
next_buffer = buffer_manager->GetBuffer(cur_buffer_idx + 1);
|
566
567
|
} else {
|
567
568
|
// We are done scanning.
|
568
569
|
reader.reset();
|
@@ -570,20 +571,21 @@ bool ParallelCSVGlobalState::Next(ClientContext &context, const ReadCSVData &bin
|
|
570
571
|
}
|
571
572
|
}
|
572
573
|
// set up the current buffer
|
573
|
-
line_info.current_batches[
|
574
|
-
auto result = make_uniq<CSVBufferRead>(
|
575
|
-
|
574
|
+
line_info.current_batches[file_index - 1].insert(local_batch_index);
|
575
|
+
auto result = make_uniq<CSVBufferRead>(
|
576
|
+
buffer_manager->GetBuffer(cur_buffer_idx), buffer_manager->GetBuffer(cur_buffer_idx + 1), next_byte,
|
577
|
+
next_byte + bytes_per_local_state, batch_index++, local_batch_index++, &line_info);
|
576
578
|
// move the byte index of the CSV reader to the next buffer
|
577
579
|
next_byte += bytes_per_local_state;
|
578
|
-
if (next_byte >= current_buffer->
|
580
|
+
if (next_byte >= current_buffer->actual_size) {
|
579
581
|
// We replace the current buffer with the next buffer
|
580
582
|
next_byte = 0;
|
581
|
-
bytes_read += current_buffer->
|
582
|
-
current_buffer = next_buffer;
|
583
|
-
|
583
|
+
bytes_read += current_buffer->actual_size;
|
584
|
+
current_buffer = std::move(next_buffer);
|
585
|
+
cur_buffer_idx++;
|
586
|
+
if (current_buffer) {
|
584
587
|
// Next buffer gets the next-next buffer
|
585
|
-
next_buffer =
|
586
|
-
next_buffer->Next(*file_handle, buffer_size, current_csv_position, file_number).release());
|
588
|
+
next_buffer = buffer_manager->GetBuffer(cur_buffer_idx + 1);
|
587
589
|
}
|
588
590
|
}
|
589
591
|
if (!reader || reader->options.file_path != current_file_path) {
|
@@ -602,6 +604,9 @@ bool ParallelCSVGlobalState::Next(ClientContext &context, const ReadCSVData &bin
|
|
602
604
|
reader->names = bind_data.column_info[file_index - 1].names;
|
603
605
|
} else {
|
604
606
|
// regular file - use the standard options
|
607
|
+
if (!result) {
|
608
|
+
return false;
|
609
|
+
}
|
605
610
|
reader = make_uniq<ParallelCSVReader>(context, bind_data.options, std::move(result), first_position,
|
606
611
|
bind_data.csv_types, file_index - 1);
|
607
612
|
reader->names = bind_data.csv_names;
|
@@ -701,22 +706,11 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
|
|
701
706
|
// This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
|
702
707
|
return make_uniq<ParallelCSVGlobalState>(context.db->NumberOfThreads());
|
703
708
|
}
|
704
|
-
unique_ptr<CSVFileHandle> file_handle;
|
705
|
-
|
706
709
|
bind_data.options.file_path = bind_data.files[0];
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
file_handle->DisableReset();
|
712
|
-
bind_data.initial_reader.reset();
|
713
|
-
} else {
|
714
|
-
file_handle = ReadCSV::OpenCSV(bind_data.options.file_path, bind_data.options.compression, context);
|
715
|
-
}
|
716
|
-
return make_uniq<ParallelCSVGlobalState>(
|
717
|
-
context, std::move(file_handle), bind_data.files, context.db->NumberOfThreads(), bind_data.options.buffer_size,
|
718
|
-
bind_data.options.skip_rows, ClientConfig::GetConfig(context).verify_parallelism, input.column_ids,
|
719
|
-
bind_data.options.header && bind_data.options.has_header);
|
710
|
+
auto buffer_manager = bind_data.buffer_manager;
|
711
|
+
return make_uniq<ParallelCSVGlobalState>(context, buffer_manager, bind_data.options, context.db->NumberOfThreads(),
|
712
|
+
bind_data.files, ClientConfig::GetConfig(context).verify_parallelism,
|
713
|
+
input.column_ids);
|
720
714
|
}
|
721
715
|
|
722
716
|
//===--------------------------------------------------------------------===//
|
@@ -764,7 +758,7 @@ static void ParallelReadCSVFunction(ClientContext &context, TableFunctionInput &
|
|
764
758
|
if (csv_local_state.csv_reader->finished) {
|
765
759
|
auto verification_updates = csv_local_state.csv_reader->GetVerificationPositions();
|
766
760
|
csv_global_state.UpdateVerification(verification_updates,
|
767
|
-
csv_local_state.csv_reader->buffer->buffer->
|
761
|
+
csv_local_state.csv_reader->buffer->buffer->file_idx,
|
768
762
|
csv_local_state.csv_reader->buffer->local_batch_index);
|
769
763
|
csv_global_state.UpdateLinesRead(*csv_local_state.csv_reader->buffer, csv_local_state.csv_reader->file_idx);
|
770
764
|
auto has_next = csv_global_state.Next(context, bind_data, csv_local_state.csv_reader);
|
@@ -819,17 +813,13 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
|
|
819
813
|
|
820
814
|
unique_ptr<BufferedCSVReader> GetCSVReader(ClientContext &context, ReadCSVData &bind_data, idx_t &file_index,
|
821
815
|
idx_t &total_size) {
|
822
|
-
|
823
|
-
if (reader) {
|
824
|
-
reader->file_handle->DisableReset();
|
825
|
-
}
|
826
|
-
return reader;
|
816
|
+
return GetCSVReaderInternal(context, bind_data, file_index, total_size);
|
827
817
|
}
|
828
818
|
|
829
819
|
private:
|
830
820
|
unique_ptr<BufferedCSVReader> GetCSVReaderInternal(ClientContext &context, ReadCSVData &bind_data,
|
831
821
|
idx_t &file_index, idx_t &total_size) {
|
832
|
-
|
822
|
+
CSVReaderOptions options;
|
833
823
|
{
|
834
824
|
lock_guard<mutex> l(csv_lock);
|
835
825
|
if (initial_reader) {
|
@@ -889,13 +879,7 @@ static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext
|
|
889
879
|
return std::move(result);
|
890
880
|
} else {
|
891
881
|
bind_data.options.file_path = bind_data.files[0];
|
892
|
-
|
893
|
-
// If this is a pipe and an initial reader already exists due to read_csv_auto
|
894
|
-
// We must re-use it, since we can't restart the reader due for it being a pipe.
|
895
|
-
result->initial_reader = std::move(bind_data.initial_reader);
|
896
|
-
} else {
|
897
|
-
result->initial_reader = make_uniq<BufferedCSVReader>(context, bind_data.options, bind_data.csv_types);
|
898
|
-
}
|
882
|
+
result->initial_reader = make_uniq<BufferedCSVReader>(context, bind_data.options, bind_data.csv_types);
|
899
883
|
if (!bind_data.options.file_options.union_by_name) {
|
900
884
|
result->initial_reader->names = bind_data.csv_names;
|
901
885
|
}
|
@@ -1095,35 +1079,79 @@ void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionD
|
|
1095
1079
|
unique_ptr<NodeStatistics> CSVReaderCardinality(ClientContext &context, const FunctionData *bind_data_p) {
|
1096
1080
|
auto &bind_data = bind_data_p->Cast<ReadCSVData>();
|
1097
1081
|
idx_t per_file_cardinality = 0;
|
1098
|
-
if (bind_data.
|
1082
|
+
if (bind_data.buffer_manager && bind_data.buffer_manager->file_handle) {
|
1099
1083
|
auto estimated_row_width = (bind_data.csv_types.size() * 5);
|
1100
|
-
per_file_cardinality = bind_data.
|
1084
|
+
per_file_cardinality = bind_data.buffer_manager->file_handle->FileSize() / estimated_row_width;
|
1101
1085
|
} else {
|
1102
1086
|
// determined through the scientific method as the average amount of rows in a CSV file
|
1103
1087
|
per_file_cardinality = 42;
|
1104
1088
|
}
|
1105
1089
|
return make_uniq<NodeStatistics>(bind_data.files.size() * per_file_cardinality);
|
1106
1090
|
}
|
1091
|
+
void CSVStateMachineOptions::Serialize(FieldWriter &writer) const {
|
1092
|
+
writer.WriteField<char>(delimiter);
|
1093
|
+
writer.WriteField<char>(quote);
|
1094
|
+
writer.WriteField<char>(escape);
|
1095
|
+
}
|
1107
1096
|
|
1108
|
-
void
|
1097
|
+
void DialectOptions::Serialize(FieldWriter &writer) const {
|
1098
|
+
state_machine_options.Serialize(writer);
|
1099
|
+
writer.WriteField<bool>(header);
|
1100
|
+
writer.WriteField<idx_t>(num_cols);
|
1101
|
+
writer.WriteField<NewLineIdentifier>(new_line);
|
1102
|
+
writer.WriteField<idx_t>(skip_rows);
|
1103
|
+
vector<string> csv_formats;
|
1104
|
+
for (auto &format : date_format) {
|
1105
|
+
writer.WriteField(has_format.find(format.first)->second);
|
1106
|
+
csv_formats.push_back(format.second.format_specifier);
|
1107
|
+
}
|
1108
|
+
writer.WriteList<string>(csv_formats);
|
1109
|
+
}
|
1110
|
+
void CSVStateMachineOptions::Deserialize(FieldReader &reader) {
|
1111
|
+
delimiter = reader.ReadRequired<char>();
|
1112
|
+
quote = reader.ReadRequired<char>();
|
1113
|
+
escape = reader.ReadRequired<char>();
|
1114
|
+
}
|
1115
|
+
void DialectOptions::Deserialize(FieldReader &reader) {
|
1116
|
+
state_machine_options.Deserialize(reader);
|
1117
|
+
header = reader.ReadRequired<bool>();
|
1118
|
+
num_cols = reader.ReadRequired<idx_t>();
|
1119
|
+
new_line = reader.ReadRequired<NewLineIdentifier>();
|
1120
|
+
skip_rows = reader.ReadRequired<idx_t>();
|
1121
|
+
|
1122
|
+
bool has_date = reader.ReadRequired<bool>();
|
1123
|
+
bool has_timestamp = reader.ReadRequired<bool>();
|
1124
|
+
auto formats = reader.ReadRequiredList<string>();
|
1125
|
+
|
1126
|
+
vector<LogicalTypeId> format_types {LogicalTypeId::DATE, LogicalTypeId::TIMESTAMP};
|
1127
|
+
if (has_date) {
|
1128
|
+
has_format[LogicalTypeId::DATE] = true;
|
1129
|
+
}
|
1130
|
+
if (has_timestamp) {
|
1131
|
+
has_format[LogicalTypeId::TIMESTAMP] = true;
|
1132
|
+
}
|
1133
|
+
for (idx_t f_idx = 0; f_idx < formats.size(); f_idx++) {
|
1134
|
+
auto &format = formats[f_idx];
|
1135
|
+
auto &type = format_types[f_idx];
|
1136
|
+
if (format.empty()) {
|
1137
|
+
continue;
|
1138
|
+
}
|
1139
|
+
StrTimeFormat::ParseFormatSpecifier(format, date_format[type]);
|
1140
|
+
}
|
1141
|
+
}
|
1142
|
+
|
1143
|
+
void CSVReaderOptions::Serialize(FieldWriter &writer) const {
|
1109
1144
|
// common options
|
1110
1145
|
writer.WriteField<bool>(has_delimiter);
|
1111
|
-
writer.WriteString(delimiter);
|
1112
1146
|
writer.WriteField<bool>(has_quote);
|
1113
|
-
writer.WriteString(quote);
|
1114
1147
|
writer.WriteField<bool>(has_escape);
|
1115
|
-
writer.WriteString(escape);
|
1116
1148
|
writer.WriteField<bool>(has_header);
|
1117
|
-
writer.WriteField<bool>(header);
|
1118
1149
|
writer.WriteField<bool>(ignore_errors);
|
1119
|
-
writer.WriteField<idx_t>(num_cols);
|
1120
1150
|
writer.WriteField<idx_t>(buffer_sample_size);
|
1121
1151
|
writer.WriteString(null_str);
|
1122
1152
|
writer.WriteField<FileCompressionType>(compression);
|
1123
|
-
writer.WriteField<NewLineIdentifier>(new_line);
|
1124
1153
|
writer.WriteField<bool>(allow_quoted_nulls);
|
1125
1154
|
// read options
|
1126
|
-
writer.WriteField<idx_t>(skip_rows);
|
1127
1155
|
writer.WriteField<bool>(skip_rows_set);
|
1128
1156
|
writer.WriteField<idx_t>(maximum_line_size);
|
1129
1157
|
writer.WriteField<bool>(normalize_names);
|
@@ -1139,37 +1167,29 @@ void BufferedCSVReaderOptions::Serialize(FieldWriter &writer) const {
|
|
1139
1167
|
writer.WriteSerializable(file_options);
|
1140
1168
|
// write options
|
1141
1169
|
writer.WriteListNoReference<bool>(force_quote);
|
1142
|
-
|
1143
|
-
|
1144
|
-
for (auto &format : date_format) {
|
1145
|
-
csv_formats.push_back(format.second.format_specifier);
|
1146
|
-
}
|
1147
|
-
writer.WriteList<string>(csv_formats);
|
1170
|
+
|
1171
|
+
// reject options
|
1148
1172
|
writer.WriteString(rejects_table_name);
|
1149
1173
|
writer.WriteField<idx_t>(rejects_limit);
|
1150
1174
|
writer.WriteList<string>(rejects_recovery_columns);
|
1151
1175
|
writer.WriteList<idx_t>(rejects_recovery_column_ids);
|
1176
|
+
|
1177
|
+
// Serialize Dialect Options
|
1178
|
+
dialect_options.Serialize(writer);
|
1152
1179
|
}
|
1153
1180
|
|
1154
|
-
void
|
1181
|
+
void CSVReaderOptions::Deserialize(FieldReader &reader) {
|
1155
1182
|
// common options
|
1156
1183
|
has_delimiter = reader.ReadRequired<bool>();
|
1157
|
-
delimiter = reader.ReadRequired<string>();
|
1158
1184
|
has_quote = reader.ReadRequired<bool>();
|
1159
|
-
quote = reader.ReadRequired<string>();
|
1160
1185
|
has_escape = reader.ReadRequired<bool>();
|
1161
|
-
escape = reader.ReadRequired<string>();
|
1162
1186
|
has_header = reader.ReadRequired<bool>();
|
1163
|
-
header = reader.ReadRequired<bool>();
|
1164
1187
|
ignore_errors = reader.ReadRequired<bool>();
|
1165
|
-
num_cols = reader.ReadRequired<idx_t>();
|
1166
1188
|
buffer_sample_size = reader.ReadRequired<idx_t>();
|
1167
1189
|
null_str = reader.ReadRequired<string>();
|
1168
1190
|
compression = reader.ReadRequired<FileCompressionType>();
|
1169
|
-
new_line = reader.ReadRequired<NewLineIdentifier>();
|
1170
1191
|
allow_quoted_nulls = reader.ReadRequired<bool>();
|
1171
1192
|
// read options
|
1172
|
-
skip_rows = reader.ReadRequired<idx_t>();
|
1173
1193
|
skip_rows_set = reader.ReadRequired<bool>();
|
1174
1194
|
maximum_line_size = reader.ReadRequired<idx_t>();
|
1175
1195
|
normalize_names = reader.ReadRequired<bool>();
|
@@ -1185,21 +1205,15 @@ void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
|
|
1185
1205
|
file_options = reader.ReadRequiredSerializable<MultiFileReaderOptions, MultiFileReaderOptions>();
|
1186
1206
|
// write options
|
1187
1207
|
force_quote = reader.ReadRequiredList<bool>();
|
1188
|
-
|
1189
|
-
|
1190
|
-
for (idx_t f_idx = 0; f_idx < formats.size(); f_idx++) {
|
1191
|
-
auto &format = formats[f_idx];
|
1192
|
-
auto &type = format_types[f_idx];
|
1193
|
-
if (format.empty()) {
|
1194
|
-
continue;
|
1195
|
-
}
|
1196
|
-
has_format[type] = true;
|
1197
|
-
StrTimeFormat::ParseFormatSpecifier(format, date_format[type]);
|
1198
|
-
}
|
1208
|
+
|
1209
|
+
// rejects options
|
1199
1210
|
rejects_table_name = reader.ReadRequired<string>();
|
1200
1211
|
rejects_limit = reader.ReadRequired<idx_t>();
|
1201
1212
|
rejects_recovery_columns = reader.ReadRequiredList<string>();
|
1202
1213
|
rejects_recovery_column_ids = reader.ReadRequiredList<idx_t>();
|
1214
|
+
|
1215
|
+
// dialect options
|
1216
|
+
dialect_options.Deserialize(reader);
|
1203
1217
|
}
|
1204
1218
|
|
1205
1219
|
static void CSVReaderSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const TableFunction &function) {
|
@@ -1246,7 +1260,7 @@ static void CSVReaderFormatSerialize(FormatSerializer &serializer, const optiona
|
|
1246
1260
|
const TableFunction &function) {
|
1247
1261
|
auto &bind_data = bind_data_p->Cast<ReadCSVData>();
|
1248
1262
|
serializer.WriteProperty(100, "extra_info", function.extra_info);
|
1249
|
-
serializer.WriteProperty(101, "csv_data", bind_data);
|
1263
|
+
serializer.WriteProperty(101, "csv_data", &bind_data);
|
1250
1264
|
}
|
1251
1265
|
|
1252
1266
|
static unique_ptr<FunctionData> CSVReaderFormatDeserialize(FormatDeserializer &deserializer, TableFunction &function) {
|
@@ -458,7 +458,6 @@ static void TableScanFormatSerialize(FormatSerializer &serializer, const optiona
|
|
458
458
|
serializer.WriteProperty(103, "is_index_scan", bind_data.is_index_scan);
|
459
459
|
serializer.WriteProperty(104, "is_create_index", bind_data.is_create_index);
|
460
460
|
serializer.WriteProperty(105, "result_ids", bind_data.result_ids);
|
461
|
-
serializer.WriteProperty(106, "result_ids", bind_data.result_ids);
|
462
461
|
}
|
463
462
|
|
464
463
|
static unique_ptr<FunctionData> TableScanFormatDeserialize(FormatDeserializer &deserializer, TableFunction &function) {
|
@@ -474,7 +473,6 @@ static unique_ptr<FunctionData> TableScanFormatDeserialize(FormatDeserializer &d
|
|
474
473
|
deserializer.ReadProperty(103, "is_index_scan", result->is_index_scan);
|
475
474
|
deserializer.ReadProperty(104, "is_create_index", result->is_create_index);
|
476
475
|
deserializer.ReadProperty(105, "result_ids", result->result_ids);
|
477
|
-
deserializer.ReadProperty(106, "result_ids", result->result_ids);
|
478
476
|
return std::move(result);
|
479
477
|
}
|
480
478
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev3949"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "c21a9cb87c"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -66,6 +66,8 @@ enum class BlockState : uint8_t;
|
|
66
66
|
|
67
67
|
enum class CAPIResultSetType : uint8_t;
|
68
68
|
|
69
|
+
enum class CSVState : uint8_t;
|
70
|
+
|
69
71
|
enum class CTEMaterialize : uint8_t;
|
70
72
|
|
71
73
|
enum class CatalogType : uint8_t;
|
@@ -130,6 +132,8 @@ enum class FunctionNullHandling : uint8_t;
|
|
130
132
|
|
131
133
|
enum class FunctionSideEffects : uint8_t;
|
132
134
|
|
135
|
+
enum class HLLStorageType : uint8_t;
|
136
|
+
|
133
137
|
enum class IndexConstraintType : uint8_t;
|
134
138
|
|
135
139
|
enum class IndexType : uint8_t;
|
@@ -208,6 +212,8 @@ enum class QueryNodeType : uint8_t;
|
|
208
212
|
|
209
213
|
enum class QueryResultType : uint8_t;
|
210
214
|
|
215
|
+
enum class QuoteRule : uint8_t;
|
216
|
+
|
211
217
|
enum class RelationType : uint8_t;
|
212
218
|
|
213
219
|
enum class ResultModifierType : uint8_t;
|
@@ -334,6 +340,9 @@ const char* EnumUtil::ToChars<BlockState>(BlockState value);
|
|
334
340
|
template<>
|
335
341
|
const char* EnumUtil::ToChars<CAPIResultSetType>(CAPIResultSetType value);
|
336
342
|
|
343
|
+
template<>
|
344
|
+
const char* EnumUtil::ToChars<CSVState>(CSVState value);
|
345
|
+
|
337
346
|
template<>
|
338
347
|
const char* EnumUtil::ToChars<CTEMaterialize>(CTEMaterialize value);
|
339
348
|
|
@@ -430,6 +439,9 @@ const char* EnumUtil::ToChars<FunctionNullHandling>(FunctionNullHandling value);
|
|
430
439
|
template<>
|
431
440
|
const char* EnumUtil::ToChars<FunctionSideEffects>(FunctionSideEffects value);
|
432
441
|
|
442
|
+
template<>
|
443
|
+
const char* EnumUtil::ToChars<HLLStorageType>(HLLStorageType value);
|
444
|
+
|
433
445
|
template<>
|
434
446
|
const char* EnumUtil::ToChars<IndexConstraintType>(IndexConstraintType value);
|
435
447
|
|
@@ -547,6 +559,9 @@ const char* EnumUtil::ToChars<QueryNodeType>(QueryNodeType value);
|
|
547
559
|
template<>
|
548
560
|
const char* EnumUtil::ToChars<QueryResultType>(QueryResultType value);
|
549
561
|
|
562
|
+
template<>
|
563
|
+
const char* EnumUtil::ToChars<QuoteRule>(QuoteRule value);
|
564
|
+
|
550
565
|
template<>
|
551
566
|
const char* EnumUtil::ToChars<RelationType>(RelationType value);
|
552
567
|
|
@@ -710,6 +725,9 @@ BlockState EnumUtil::FromString<BlockState>(const char *value);
|
|
710
725
|
template<>
|
711
726
|
CAPIResultSetType EnumUtil::FromString<CAPIResultSetType>(const char *value);
|
712
727
|
|
728
|
+
template<>
|
729
|
+
CSVState EnumUtil::FromString<CSVState>(const char *value);
|
730
|
+
|
713
731
|
template<>
|
714
732
|
CTEMaterialize EnumUtil::FromString<CTEMaterialize>(const char *value);
|
715
733
|
|
@@ -806,6 +824,9 @@ FunctionNullHandling EnumUtil::FromString<FunctionNullHandling>(const char *valu
|
|
806
824
|
template<>
|
807
825
|
FunctionSideEffects EnumUtil::FromString<FunctionSideEffects>(const char *value);
|
808
826
|
|
827
|
+
template<>
|
828
|
+
HLLStorageType EnumUtil::FromString<HLLStorageType>(const char *value);
|
829
|
+
|
809
830
|
template<>
|
810
831
|
IndexConstraintType EnumUtil::FromString<IndexConstraintType>(const char *value);
|
811
832
|
|
@@ -923,6 +944,9 @@ QueryNodeType EnumUtil::FromString<QueryNodeType>(const char *value);
|
|
923
944
|
template<>
|
924
945
|
QueryResultType EnumUtil::FromString<QueryResultType>(const char *value);
|
925
946
|
|
947
|
+
template<>
|
948
|
+
QuoteRule EnumUtil::FromString<QuoteRule>(const char *value);
|
949
|
+
|
926
950
|
template<>
|
927
951
|
RelationType EnumUtil::FromString<RelationType>(const char *value);
|
928
952
|
|
@@ -16,16 +16,25 @@ namespace duckdb {
|
|
16
16
|
class ClientContext;
|
17
17
|
class Value;
|
18
18
|
|
19
|
+
struct FileOpenerInfo {
|
20
|
+
string file_path;
|
21
|
+
};
|
22
|
+
|
19
23
|
//! Abstract type that provide client-specific context to FileSystem.
|
20
24
|
class FileOpener {
|
21
25
|
public:
|
26
|
+
FileOpener() {
|
27
|
+
}
|
22
28
|
virtual ~FileOpener() {};
|
23
29
|
|
30
|
+
virtual bool TryGetCurrentSetting(const string &key, Value &result, FileOpenerInfo &info);
|
24
31
|
virtual bool TryGetCurrentSetting(const string &key, Value &result) = 0;
|
25
32
|
virtual ClientContext *TryGetClientContext() = 0;
|
26
33
|
|
27
34
|
DUCKDB_API static ClientContext *TryGetClientContext(FileOpener *opener);
|
28
35
|
DUCKDB_API static bool TryGetCurrentSetting(FileOpener *opener, const string &key, Value &result);
|
36
|
+
DUCKDB_API static bool TryGetCurrentSetting(FileOpener *opener, const string &key, Value &result,
|
37
|
+
FileOpenerInfo &info);
|
29
38
|
};
|
30
39
|
|
31
40
|
} // namespace duckdb
|