duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  6. package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
  7. package/src/duckdb/extension/json/json_scan.cpp +1 -1
  8. package/src/duckdb/extension/json/json_serializer.cpp +26 -69
  9. package/src/duckdb/src/common/enum_util.cpp +119 -7
  10. package/src/duckdb/src/common/extra_type_info.cpp +7 -3
  11. package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
  12. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  13. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
  14. package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
  15. package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
  16. package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
  17. package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
  18. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  19. package/src/duckdb/src/common/types/interval.cpp +3 -0
  20. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
  21. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  22. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  23. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
  24. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  26. package/src/duckdb/src/common/types/value.cpp +63 -42
  27. package/src/duckdb/src/common/types/vector.cpp +33 -67
  28. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
  29. package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
  30. package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
  31. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
  32. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
  33. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
  34. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  36. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  39. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
  40. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  41. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  42. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
  43. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  44. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  45. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  46. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  47. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  48. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  49. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  50. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
  51. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
  52. package/src/duckdb/src/execution/window_executor.cpp +6 -5
  53. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  54. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  55. package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
  56. package/src/duckdb/src/function/table/read_csv.cpp +150 -136
  57. package/src/duckdb/src/function/table/table_scan.cpp +0 -2
  58. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  59. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
  60. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  61. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  62. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  64. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  65. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
  66. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
  67. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
  68. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  69. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
  70. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
  71. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  73. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
  74. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
  75. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  76. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  77. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  78. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
  79. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  80. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  81. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
  82. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  83. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  84. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
  85. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
  86. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
  88. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
  89. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
  90. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
  91. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  92. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  93. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  94. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  95. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
  97. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  98. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  99. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  100. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
  101. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  102. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
  103. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
  104. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
  105. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
  106. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  108. package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
  110. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  111. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  112. package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
  113. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  115. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  117. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  119. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
  120. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  121. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
  122. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
  123. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  124. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  125. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  126. package/src/duckdb/src/include/duckdb.h +12 -0
  127. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  128. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  129. package/src/duckdb/src/main/client_verify.cpp +1 -0
  130. package/src/duckdb/src/main/config.cpp +2 -2
  131. package/src/duckdb/src/main/connection.cpp +3 -3
  132. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  133. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
  134. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
  135. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  136. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
  137. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
  138. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
  139. package/src/duckdb/src/planner/logical_operator.cpp +1 -1
  140. package/src/duckdb/src/planner/planner.cpp +1 -1
  141. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  142. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
  143. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
  144. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
  145. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
  146. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
  147. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
  148. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
  149. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
  150. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
  151. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
  152. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
  153. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  154. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
  155. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  156. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  157. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  158. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  159. package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
  160. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  161. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  162. package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
  163. package/src/duckdb/src/storage/table/row_group.cpp +68 -1
  164. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  165. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  166. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  167. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  168. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  169. package/src/duckdb/ub_src_execution.cpp +0 -2
  170. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  171. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  172. package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
  173. package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
  174. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  175. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  176. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  177. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  178. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  179. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  180. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -1,20 +1,21 @@
1
1
  #include "duckdb/function/table/read_csv.hpp"
2
- #include "duckdb/function/function_set.hpp"
3
- #include "duckdb/main/client_context.hpp"
4
- #include "duckdb/main/database.hpp"
5
- #include "duckdb/common/string_util.hpp"
6
2
  #include "duckdb/common/enum_util.hpp"
3
+ #include "duckdb/common/multi_file_reader.hpp"
4
+ #include "duckdb/common/string_util.hpp"
7
5
  #include "duckdb/common/union_by_name.hpp"
6
+ #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
7
+ #include "duckdb/execution/operator/scan/csv/csv_line_info.hpp"
8
+ #include "duckdb/execution/operator/scan/csv/csv_sniffer.hpp"
9
+ #include "duckdb/function/function_set.hpp"
10
+ #include "duckdb/main/client_context.hpp"
11
+ #include "duckdb/main/client_data.hpp"
8
12
  #include "duckdb/main/config.hpp"
13
+ #include "duckdb/main/database.hpp"
9
14
  #include "duckdb/parser/expression/constant_expression.hpp"
10
15
  #include "duckdb/parser/expression/function_expression.hpp"
11
16
  #include "duckdb/parser/tableref/table_function_ref.hpp"
12
17
  #include "duckdb/planner/operator/logical_get.hpp"
13
18
  #include "duckdb/main/extension_helper.hpp"
14
- #include "duckdb/common/multi_file_reader.hpp"
15
- #include "duckdb/main/client_data.hpp"
16
- #include "duckdb/execution/operator/persistent/csv_line_info.hpp"
17
- #include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
18
19
  #include "duckdb/common/serializer/format_serializer.hpp"
19
20
  #include "duckdb/common/serializer/format_deserializer.hpp"
20
21
 
@@ -26,23 +27,22 @@ unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompress
26
27
  ClientContext &context) {
27
28
  auto &fs = FileSystem::GetFileSystem(context);
28
29
  auto &allocator = BufferAllocator::Get(context);
29
- return CSVFileHandle::OpenFile(fs, allocator, file_path, compression, false);
30
+ return CSVFileHandle::OpenFile(fs, allocator, file_path, compression);
30
31
  }
31
32
 
32
33
  void ReadCSVData::FinalizeRead(ClientContext &context) {
33
34
  BaseCSVData::Finalize();
34
35
  // Here we identify if we can run this CSV file on parallel or not.
35
- bool null_or_empty = options.delimiter.empty() || options.escape.empty() || options.quote.empty() ||
36
- options.delimiter[0] == '\0' || options.escape[0] == '\0' || options.quote[0] == '\0';
37
- bool complex_options = options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1;
38
36
  bool not_supported_options = options.null_padding;
39
37
 
40
38
  auto number_of_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
41
- if (options.parallel_mode != ParallelMode::PARALLEL && int64_t(files.size() * 2) >= number_of_threads) {
39
+ //! If we have many csv files, we run single-threaded on each file and parallelize on the number of files
40
+ bool many_csv_files = files.size() > 1 && int64_t(files.size() * 2) >= number_of_threads;
41
+ if (options.parallel_mode != ParallelMode::PARALLEL && many_csv_files) {
42
42
  single_threaded = true;
43
43
  }
44
- if (options.parallel_mode == ParallelMode::SINGLE_THREADED || null_or_empty || not_supported_options ||
45
- complex_options || options.new_line == NewLineIdentifier::MIX) {
44
+ if (options.parallel_mode == ParallelMode::SINGLE_THREADED || not_supported_options ||
45
+ options.dialect_options.new_line == NewLineIdentifier::MIX) {
46
46
  // not supported for parallel CSV reading
47
47
  single_threaded = true;
48
48
  }
@@ -231,10 +231,14 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
231
231
  }
232
232
  if (options.auto_detect) {
233
233
  options.file_path = result->files[0];
234
- auto initial_reader = make_uniq<BufferedCSVReader>(context, options);
235
- return_types.assign(initial_reader->return_types.begin(), initial_reader->return_types.end());
234
+ // Initialize Buffer Manager and Sniffer
235
+ auto file_handle = BaseCSVReader::OpenCSV(context, options);
236
+ result->buffer_manager = make_shared<CSVBufferManager>(context, std::move(file_handle), options);
237
+ CSVSniffer sniffer(options, result->buffer_manager, result->state_machine_cache);
238
+ auto sniffer_result = sniffer.SniffCSV();
239
+ return_types = sniffer_result.return_types;
236
240
  if (names.empty()) {
237
- names.assign(initial_reader->names.begin(), initial_reader->names.end());
241
+ names = sniffer_result.names;
238
242
  } else {
239
243
  if (explicitly_set_columns) {
240
244
  // The user has influenced the names, can't assume they are valid anymore
@@ -246,10 +250,8 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
246
250
  } else {
247
251
  D_ASSERT(return_types.size() == names.size());
248
252
  }
249
- initial_reader->names = names;
250
253
  }
251
- options = initial_reader->options;
252
- result->initial_reader = std::move(initial_reader);
254
+
253
255
  } else {
254
256
  D_ASSERT(return_types.size() == names.size());
255
257
  }
@@ -275,15 +277,10 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
275
277
  } else {
276
278
  result->reader_bind = MultiFileReader::BindOptions(options.file_options, result->files, return_types, names);
277
279
  }
278
-
279
280
  result->return_types = return_types;
280
281
  result->return_names = names;
281
282
  result->FinalizeRead(context);
282
283
 
283
- if (options.auto_detect) {
284
- result->initial_reader->options = options;
285
- }
286
-
287
284
  return std::move(result);
288
285
  }
289
286
 
@@ -299,17 +296,25 @@ static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFun
299
296
 
300
297
  struct ParallelCSVGlobalState : public GlobalTableFunctionState {
301
298
  public:
302
- ParallelCSVGlobalState(ClientContext &context, unique_ptr<CSVFileHandle> file_handle_p,
303
- const vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
304
- idx_t rows_to_skip, bool force_parallelism_p, vector<column_t> column_ids_p, bool has_header)
305
- : file_handle(std::move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p),
306
- force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
299
+ ParallelCSVGlobalState(ClientContext &context, shared_ptr<CSVBufferManager> buffer_manager_p,
300
+ const CSVReaderOptions &options, idx_t system_threads_p, const vector<string> &files_path_p,
301
+ bool force_parallelism_p, vector<column_t> column_ids_p)
302
+ : buffer_manager(std::move(buffer_manager_p)), system_threads(system_threads_p),
303
+ buffer_size(options.buffer_size), force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
307
304
  line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
308
- file_handle->DisableReset();
309
305
  current_file_path = files_path_p[0];
310
- file_size = file_handle->FileSize();
306
+ CSVFileHandle *file_handle_ptr;
307
+
308
+ if (!buffer_manager) {
309
+ file_handle = ReadCSV::OpenCSV(current_file_path, options.compression, context);
310
+ file_handle_ptr = file_handle.get();
311
+ } else {
312
+ file_handle_ptr = buffer_manager->file_handle.get();
313
+ }
314
+
315
+ file_size = file_handle_ptr->FileSize();
311
316
  first_file_size = file_size;
312
- on_disk_file = file_handle->OnDiskFile();
317
+ on_disk_file = file_handle_ptr->OnDiskFile();
313
318
  bytes_read = 0;
314
319
  if (buffer_size < file_size || file_size == 0) {
315
320
  bytes_per_local_state = buffer_size / ParallelCSVGlobalState::MaxThreads();
@@ -321,10 +326,6 @@ public:
321
326
  // this boy needs to be at least one.
322
327
  bytes_per_local_state = 1;
323
328
  }
324
- for (idx_t i = 0; i < rows_to_skip; i++) {
325
- file_handle->ReadLine();
326
- }
327
- first_position = current_csv_position;
328
329
  running_threads = MaxThreads();
329
330
 
330
331
  // Initialize all the book-keeping variables
@@ -337,10 +338,11 @@ public:
337
338
  batch_to_tuple_end.resize(file_count);
338
339
 
339
340
  // Initialize the lines read
340
- line_info.lines_read[0][0] = rows_to_skip;
341
- if (has_header) {
341
+ line_info.lines_read[0][0] = options.dialect_options.skip_rows;
342
+ if (options.has_header && options.dialect_options.header) {
342
343
  line_info.lines_read[0][0]++;
343
344
  }
345
+ first_position = options.dialect_options.true_start;
344
346
  }
345
347
  explicit ParallelCSVGlobalState(idx_t system_threads_p)
346
348
  : system_threads(system_threads_p), line_info(main_mutex, batch_to_tuple_end, tuple_start, tuple_end) {
@@ -390,9 +392,7 @@ public:
390
392
 
391
393
  private:
392
394
  //! File Handle for current file
393
- unique_ptr<CSVFileHandle> file_handle;
394
- shared_ptr<CSVBuffer> current_buffer;
395
- shared_ptr<CSVBuffer> next_buffer;
395
+ shared_ptr<CSVBufferManager> buffer_manager;
396
396
 
397
397
  //! The index of the next file to read (i.e. current file + 1)
398
398
  idx_t file_index = 1;
@@ -418,12 +418,9 @@ private:
418
418
 
419
419
  //! Forces parallelism for small CSV Files, should only be used for testing.
420
420
  bool force_parallelism = false;
421
- //! Current (Global) position of CSV
422
- idx_t current_csv_position = 0;
423
421
  //! First Position of First Buffer
424
422
  idx_t first_position = 0;
425
423
  //! Current File Number
426
- idx_t file_number = 0;
427
424
  idx_t max_tuple_end = 0;
428
425
  //! The vector stores positions where threads ended the last line they read in the CSV File, and the set stores
429
426
  //! Positions where they started reading the first line.
@@ -438,8 +435,10 @@ private:
438
435
  vector<column_t> column_ids;
439
436
  //! Line Info used in error messages
440
437
  LineInfo line_info;
441
- //! Have we initialized our reading
442
- bool initialized = false;
438
+ //! Current Buffer index
439
+ idx_t cur_buffer_idx = 0;
440
+ //! Only used if we don't run auto_detection first
441
+ unique_ptr<CSVFileHandle> file_handle;
443
442
  };
444
443
 
445
444
  idx_t ParallelCSVGlobalState::MaxThreads() const {
@@ -538,31 +537,33 @@ void LineInfo::Verify(idx_t file_idx, idx_t batch_idx, idx_t cur_first_pos) {
538
537
  problematic_line);
539
538
  }
540
539
  }
541
-
542
540
  bool ParallelCSVGlobalState::Next(ClientContext &context, const ReadCSVData &bind_data,
543
541
  unique_ptr<ParallelCSVReader> &reader) {
544
542
  lock_guard<mutex> parallel_lock(main_mutex);
545
- if (!initialized && file_handle) {
546
- current_buffer = make_shared<CSVBuffer>(context, buffer_size, *file_handle, current_csv_position, file_number);
547
- next_buffer = shared_ptr<CSVBuffer>(
548
- current_buffer->Next(*file_handle, buffer_size, current_csv_position, file_number).release());
549
- initialized = true;
543
+ if (!buffer_manager && file_handle) {
544
+ buffer_manager = make_shared<CSVBufferManager>(context, std::move(file_handle), bind_data.options);
545
+ }
546
+ if (!buffer_manager) {
547
+ return false;
550
548
  }
549
+ auto current_buffer = buffer_manager->GetBuffer(cur_buffer_idx);
550
+ auto next_buffer = buffer_manager->GetBuffer(cur_buffer_idx + 1);
551
+
551
552
  if (!current_buffer) {
552
553
  // This means we are done with the current file, we need to go to the next one (if exists).
553
554
  if (file_index < bind_data.files.size()) {
554
- current_file_path = bind_data.files[file_index++];
555
+ current_file_path = bind_data.files[file_index];
555
556
  file_handle = ReadCSV::OpenCSV(current_file_path, bind_data.options.compression, context);
556
- current_csv_position = 0;
557
- file_number++;
557
+ buffer_manager =
558
+ make_shared<CSVBufferManager>(context, std::move(file_handle), bind_data.options, file_index);
559
+ cur_buffer_idx = 0;
560
+ first_position = 0;
558
561
  local_batch_index = 0;
559
562
 
560
- line_info.lines_read[file_number][local_batch_index] = (bind_data.options.has_header ? 1 : 0);
563
+ line_info.lines_read[file_index++][local_batch_index] = (bind_data.options.has_header ? 1 : 0);
561
564
 
562
- current_buffer =
563
- make_shared<CSVBuffer>(context, buffer_size, *file_handle, current_csv_position, file_number);
564
- next_buffer = shared_ptr<CSVBuffer>(
565
- current_buffer->Next(*file_handle, buffer_size, current_csv_position, file_number).release());
565
+ current_buffer = buffer_manager->GetBuffer(cur_buffer_idx);
566
+ next_buffer = buffer_manager->GetBuffer(cur_buffer_idx + 1);
566
567
  } else {
567
568
  // We are done scanning.
568
569
  reader.reset();
@@ -570,20 +571,21 @@ bool ParallelCSVGlobalState::Next(ClientContext &context, const ReadCSVData &bin
570
571
  }
571
572
  }
572
573
  // set up the current buffer
573
- line_info.current_batches[file_number].insert(local_batch_index);
574
- auto result = make_uniq<CSVBufferRead>(current_buffer, next_buffer, next_byte, next_byte + bytes_per_local_state,
575
- batch_index++, local_batch_index++, &line_info);
574
+ line_info.current_batches[file_index - 1].insert(local_batch_index);
575
+ auto result = make_uniq<CSVBufferRead>(
576
+ buffer_manager->GetBuffer(cur_buffer_idx), buffer_manager->GetBuffer(cur_buffer_idx + 1), next_byte,
577
+ next_byte + bytes_per_local_state, batch_index++, local_batch_index++, &line_info);
576
578
  // move the byte index of the CSV reader to the next buffer
577
579
  next_byte += bytes_per_local_state;
578
- if (next_byte >= current_buffer->GetBufferSize()) {
580
+ if (next_byte >= current_buffer->actual_size) {
579
581
  // We replace the current buffer with the next buffer
580
582
  next_byte = 0;
581
- bytes_read += current_buffer->GetBufferSize();
582
- current_buffer = next_buffer;
583
- if (next_buffer) {
583
+ bytes_read += current_buffer->actual_size;
584
+ current_buffer = std::move(next_buffer);
585
+ cur_buffer_idx++;
586
+ if (current_buffer) {
584
587
  // Next buffer gets the next-next buffer
585
- next_buffer = shared_ptr<CSVBuffer>(
586
- next_buffer->Next(*file_handle, buffer_size, current_csv_position, file_number).release());
588
+ next_buffer = buffer_manager->GetBuffer(cur_buffer_idx + 1);
587
589
  }
588
590
  }
589
591
  if (!reader || reader->options.file_path != current_file_path) {
@@ -602,6 +604,9 @@ bool ParallelCSVGlobalState::Next(ClientContext &context, const ReadCSVData &bin
602
604
  reader->names = bind_data.column_info[file_index - 1].names;
603
605
  } else {
604
606
  // regular file - use the standard options
607
+ if (!result) {
608
+ return false;
609
+ }
605
610
  reader = make_uniq<ParallelCSVReader>(context, bind_data.options, std::move(result), first_position,
606
611
  bind_data.csv_types, file_index - 1);
607
612
  reader->names = bind_data.csv_names;
@@ -701,22 +706,11 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
701
706
  // This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
702
707
  return make_uniq<ParallelCSVGlobalState>(context.db->NumberOfThreads());
703
708
  }
704
- unique_ptr<CSVFileHandle> file_handle;
705
-
706
709
  bind_data.options.file_path = bind_data.files[0];
707
-
708
- if (bind_data.initial_reader) {
709
- file_handle = std::move(bind_data.initial_reader->file_handle);
710
- file_handle->Reset();
711
- file_handle->DisableReset();
712
- bind_data.initial_reader.reset();
713
- } else {
714
- file_handle = ReadCSV::OpenCSV(bind_data.options.file_path, bind_data.options.compression, context);
715
- }
716
- return make_uniq<ParallelCSVGlobalState>(
717
- context, std::move(file_handle), bind_data.files, context.db->NumberOfThreads(), bind_data.options.buffer_size,
718
- bind_data.options.skip_rows, ClientConfig::GetConfig(context).verify_parallelism, input.column_ids,
719
- bind_data.options.header && bind_data.options.has_header);
710
+ auto buffer_manager = bind_data.buffer_manager;
711
+ return make_uniq<ParallelCSVGlobalState>(context, buffer_manager, bind_data.options, context.db->NumberOfThreads(),
712
+ bind_data.files, ClientConfig::GetConfig(context).verify_parallelism,
713
+ input.column_ids);
720
714
  }
721
715
 
722
716
  //===--------------------------------------------------------------------===//
@@ -764,7 +758,7 @@ static void ParallelReadCSVFunction(ClientContext &context, TableFunctionInput &
764
758
  if (csv_local_state.csv_reader->finished) {
765
759
  auto verification_updates = csv_local_state.csv_reader->GetVerificationPositions();
766
760
  csv_global_state.UpdateVerification(verification_updates,
767
- csv_local_state.csv_reader->buffer->buffer->GetFileNumber(),
761
+ csv_local_state.csv_reader->buffer->buffer->file_idx,
768
762
  csv_local_state.csv_reader->buffer->local_batch_index);
769
763
  csv_global_state.UpdateLinesRead(*csv_local_state.csv_reader->buffer, csv_local_state.csv_reader->file_idx);
770
764
  auto has_next = csv_global_state.Next(context, bind_data, csv_local_state.csv_reader);
@@ -819,17 +813,13 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
819
813
 
820
814
  unique_ptr<BufferedCSVReader> GetCSVReader(ClientContext &context, ReadCSVData &bind_data, idx_t &file_index,
821
815
  idx_t &total_size) {
822
- auto reader = GetCSVReaderInternal(context, bind_data, file_index, total_size);
823
- if (reader) {
824
- reader->file_handle->DisableReset();
825
- }
826
- return reader;
816
+ return GetCSVReaderInternal(context, bind_data, file_index, total_size);
827
817
  }
828
818
 
829
819
  private:
830
820
  unique_ptr<BufferedCSVReader> GetCSVReaderInternal(ClientContext &context, ReadCSVData &bind_data,
831
821
  idx_t &file_index, idx_t &total_size) {
832
- BufferedCSVReaderOptions options;
822
+ CSVReaderOptions options;
833
823
  {
834
824
  lock_guard<mutex> l(csv_lock);
835
825
  if (initial_reader) {
@@ -889,13 +879,7 @@ static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext
889
879
  return std::move(result);
890
880
  } else {
891
881
  bind_data.options.file_path = bind_data.files[0];
892
- if (bind_data.initial_reader) {
893
- // If this is a pipe and an initial reader already exists due to read_csv_auto
894
- // We must re-use it, since we can't restart the reader due for it being a pipe.
895
- result->initial_reader = std::move(bind_data.initial_reader);
896
- } else {
897
- result->initial_reader = make_uniq<BufferedCSVReader>(context, bind_data.options, bind_data.csv_types);
898
- }
882
+ result->initial_reader = make_uniq<BufferedCSVReader>(context, bind_data.options, bind_data.csv_types);
899
883
  if (!bind_data.options.file_options.union_by_name) {
900
884
  result->initial_reader->names = bind_data.csv_names;
901
885
  }
@@ -1095,35 +1079,79 @@ void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionD
1095
1079
  unique_ptr<NodeStatistics> CSVReaderCardinality(ClientContext &context, const FunctionData *bind_data_p) {
1096
1080
  auto &bind_data = bind_data_p->Cast<ReadCSVData>();
1097
1081
  idx_t per_file_cardinality = 0;
1098
- if (bind_data.initial_reader && bind_data.initial_reader->file_handle) {
1082
+ if (bind_data.buffer_manager && bind_data.buffer_manager->file_handle) {
1099
1083
  auto estimated_row_width = (bind_data.csv_types.size() * 5);
1100
- per_file_cardinality = bind_data.initial_reader->file_handle->FileSize() / estimated_row_width;
1084
+ per_file_cardinality = bind_data.buffer_manager->file_handle->FileSize() / estimated_row_width;
1101
1085
  } else {
1102
1086
  // determined through the scientific method as the average amount of rows in a CSV file
1103
1087
  per_file_cardinality = 42;
1104
1088
  }
1105
1089
  return make_uniq<NodeStatistics>(bind_data.files.size() * per_file_cardinality);
1106
1090
  }
1091
+ void CSVStateMachineOptions::Serialize(FieldWriter &writer) const {
1092
+ writer.WriteField<char>(delimiter);
1093
+ writer.WriteField<char>(quote);
1094
+ writer.WriteField<char>(escape);
1095
+ }
1107
1096
 
1108
- void BufferedCSVReaderOptions::Serialize(FieldWriter &writer) const {
1097
+ void DialectOptions::Serialize(FieldWriter &writer) const {
1098
+ state_machine_options.Serialize(writer);
1099
+ writer.WriteField<bool>(header);
1100
+ writer.WriteField<idx_t>(num_cols);
1101
+ writer.WriteField<NewLineIdentifier>(new_line);
1102
+ writer.WriteField<idx_t>(skip_rows);
1103
+ vector<string> csv_formats;
1104
+ for (auto &format : date_format) {
1105
+ writer.WriteField(has_format.find(format.first)->second);
1106
+ csv_formats.push_back(format.second.format_specifier);
1107
+ }
1108
+ writer.WriteList<string>(csv_formats);
1109
+ }
1110
+ void CSVStateMachineOptions::Deserialize(FieldReader &reader) {
1111
+ delimiter = reader.ReadRequired<char>();
1112
+ quote = reader.ReadRequired<char>();
1113
+ escape = reader.ReadRequired<char>();
1114
+ }
1115
+ void DialectOptions::Deserialize(FieldReader &reader) {
1116
+ state_machine_options.Deserialize(reader);
1117
+ header = reader.ReadRequired<bool>();
1118
+ num_cols = reader.ReadRequired<idx_t>();
1119
+ new_line = reader.ReadRequired<NewLineIdentifier>();
1120
+ skip_rows = reader.ReadRequired<idx_t>();
1121
+
1122
+ bool has_date = reader.ReadRequired<bool>();
1123
+ bool has_timestamp = reader.ReadRequired<bool>();
1124
+ auto formats = reader.ReadRequiredList<string>();
1125
+
1126
+ vector<LogicalTypeId> format_types {LogicalTypeId::DATE, LogicalTypeId::TIMESTAMP};
1127
+ if (has_date) {
1128
+ has_format[LogicalTypeId::DATE] = true;
1129
+ }
1130
+ if (has_timestamp) {
1131
+ has_format[LogicalTypeId::TIMESTAMP] = true;
1132
+ }
1133
+ for (idx_t f_idx = 0; f_idx < formats.size(); f_idx++) {
1134
+ auto &format = formats[f_idx];
1135
+ auto &type = format_types[f_idx];
1136
+ if (format.empty()) {
1137
+ continue;
1138
+ }
1139
+ StrTimeFormat::ParseFormatSpecifier(format, date_format[type]);
1140
+ }
1141
+ }
1142
+
1143
+ void CSVReaderOptions::Serialize(FieldWriter &writer) const {
1109
1144
  // common options
1110
1145
  writer.WriteField<bool>(has_delimiter);
1111
- writer.WriteString(delimiter);
1112
1146
  writer.WriteField<bool>(has_quote);
1113
- writer.WriteString(quote);
1114
1147
  writer.WriteField<bool>(has_escape);
1115
- writer.WriteString(escape);
1116
1148
  writer.WriteField<bool>(has_header);
1117
- writer.WriteField<bool>(header);
1118
1149
  writer.WriteField<bool>(ignore_errors);
1119
- writer.WriteField<idx_t>(num_cols);
1120
1150
  writer.WriteField<idx_t>(buffer_sample_size);
1121
1151
  writer.WriteString(null_str);
1122
1152
  writer.WriteField<FileCompressionType>(compression);
1123
- writer.WriteField<NewLineIdentifier>(new_line);
1124
1153
  writer.WriteField<bool>(allow_quoted_nulls);
1125
1154
  // read options
1126
- writer.WriteField<idx_t>(skip_rows);
1127
1155
  writer.WriteField<bool>(skip_rows_set);
1128
1156
  writer.WriteField<idx_t>(maximum_line_size);
1129
1157
  writer.WriteField<bool>(normalize_names);
@@ -1139,37 +1167,29 @@ void BufferedCSVReaderOptions::Serialize(FieldWriter &writer) const {
1139
1167
  writer.WriteSerializable(file_options);
1140
1168
  // write options
1141
1169
  writer.WriteListNoReference<bool>(force_quote);
1142
- // FIXME: serialize date_format / has_format
1143
- vector<string> csv_formats;
1144
- for (auto &format : date_format) {
1145
- csv_formats.push_back(format.second.format_specifier);
1146
- }
1147
- writer.WriteList<string>(csv_formats);
1170
+
1171
+ // reject options
1148
1172
  writer.WriteString(rejects_table_name);
1149
1173
  writer.WriteField<idx_t>(rejects_limit);
1150
1174
  writer.WriteList<string>(rejects_recovery_columns);
1151
1175
  writer.WriteList<idx_t>(rejects_recovery_column_ids);
1176
+
1177
+ // Serialize Dialect Options
1178
+ dialect_options.Serialize(writer);
1152
1179
  }
1153
1180
 
1154
- void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
1181
+ void CSVReaderOptions::Deserialize(FieldReader &reader) {
1155
1182
  // common options
1156
1183
  has_delimiter = reader.ReadRequired<bool>();
1157
- delimiter = reader.ReadRequired<string>();
1158
1184
  has_quote = reader.ReadRequired<bool>();
1159
- quote = reader.ReadRequired<string>();
1160
1185
  has_escape = reader.ReadRequired<bool>();
1161
- escape = reader.ReadRequired<string>();
1162
1186
  has_header = reader.ReadRequired<bool>();
1163
- header = reader.ReadRequired<bool>();
1164
1187
  ignore_errors = reader.ReadRequired<bool>();
1165
- num_cols = reader.ReadRequired<idx_t>();
1166
1188
  buffer_sample_size = reader.ReadRequired<idx_t>();
1167
1189
  null_str = reader.ReadRequired<string>();
1168
1190
  compression = reader.ReadRequired<FileCompressionType>();
1169
- new_line = reader.ReadRequired<NewLineIdentifier>();
1170
1191
  allow_quoted_nulls = reader.ReadRequired<bool>();
1171
1192
  // read options
1172
- skip_rows = reader.ReadRequired<idx_t>();
1173
1193
  skip_rows_set = reader.ReadRequired<bool>();
1174
1194
  maximum_line_size = reader.ReadRequired<idx_t>();
1175
1195
  normalize_names = reader.ReadRequired<bool>();
@@ -1185,21 +1205,15 @@ void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
1185
1205
  file_options = reader.ReadRequiredSerializable<MultiFileReaderOptions, MultiFileReaderOptions>();
1186
1206
  // write options
1187
1207
  force_quote = reader.ReadRequiredList<bool>();
1188
- auto formats = reader.ReadRequiredList<string>();
1189
- vector<LogicalTypeId> format_types {LogicalTypeId::DATE, LogicalTypeId::TIMESTAMP};
1190
- for (idx_t f_idx = 0; f_idx < formats.size(); f_idx++) {
1191
- auto &format = formats[f_idx];
1192
- auto &type = format_types[f_idx];
1193
- if (format.empty()) {
1194
- continue;
1195
- }
1196
- has_format[type] = true;
1197
- StrTimeFormat::ParseFormatSpecifier(format, date_format[type]);
1198
- }
1208
+
1209
+ // rejects options
1199
1210
  rejects_table_name = reader.ReadRequired<string>();
1200
1211
  rejects_limit = reader.ReadRequired<idx_t>();
1201
1212
  rejects_recovery_columns = reader.ReadRequiredList<string>();
1202
1213
  rejects_recovery_column_ids = reader.ReadRequiredList<idx_t>();
1214
+
1215
+ // dialect options
1216
+ dialect_options.Deserialize(reader);
1203
1217
  }
1204
1218
 
1205
1219
  static void CSVReaderSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const TableFunction &function) {
@@ -1246,7 +1260,7 @@ static void CSVReaderFormatSerialize(FormatSerializer &serializer, const optiona
1246
1260
  const TableFunction &function) {
1247
1261
  auto &bind_data = bind_data_p->Cast<ReadCSVData>();
1248
1262
  serializer.WriteProperty(100, "extra_info", function.extra_info);
1249
- serializer.WriteProperty(101, "csv_data", bind_data);
1263
+ serializer.WriteProperty(101, "csv_data", &bind_data);
1250
1264
  }
1251
1265
 
1252
1266
  static unique_ptr<FunctionData> CSVReaderFormatDeserialize(FormatDeserializer &deserializer, TableFunction &function) {
@@ -458,7 +458,6 @@ static void TableScanFormatSerialize(FormatSerializer &serializer, const optiona
458
458
  serializer.WriteProperty(103, "is_index_scan", bind_data.is_index_scan);
459
459
  serializer.WriteProperty(104, "is_create_index", bind_data.is_create_index);
460
460
  serializer.WriteProperty(105, "result_ids", bind_data.result_ids);
461
- serializer.WriteProperty(106, "result_ids", bind_data.result_ids);
462
461
  }
463
462
 
464
463
  static unique_ptr<FunctionData> TableScanFormatDeserialize(FormatDeserializer &deserializer, TableFunction &function) {
@@ -474,7 +473,6 @@ static unique_ptr<FunctionData> TableScanFormatDeserialize(FormatDeserializer &d
474
473
  deserializer.ReadProperty(103, "is_index_scan", result->is_index_scan);
475
474
  deserializer.ReadProperty(104, "is_create_index", result->is_create_index);
476
475
  deserializer.ReadProperty(105, "result_ids", result->result_ids);
477
- deserializer.ReadProperty(106, "result_ids", result->result_ids);
478
476
  return std::move(result);
479
477
  }
480
478
 
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev3458"
2
+ #define DUCKDB_VERSION "0.8.2-dev3949"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "3d880e1053"
5
+ #define DUCKDB_SOURCE_ID "c21a9cb87c"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -66,6 +66,8 @@ enum class BlockState : uint8_t;
66
66
 
67
67
  enum class CAPIResultSetType : uint8_t;
68
68
 
69
+ enum class CSVState : uint8_t;
70
+
69
71
  enum class CTEMaterialize : uint8_t;
70
72
 
71
73
  enum class CatalogType : uint8_t;
@@ -130,6 +132,8 @@ enum class FunctionNullHandling : uint8_t;
130
132
 
131
133
  enum class FunctionSideEffects : uint8_t;
132
134
 
135
+ enum class HLLStorageType : uint8_t;
136
+
133
137
  enum class IndexConstraintType : uint8_t;
134
138
 
135
139
  enum class IndexType : uint8_t;
@@ -208,6 +212,8 @@ enum class QueryNodeType : uint8_t;
208
212
 
209
213
  enum class QueryResultType : uint8_t;
210
214
 
215
+ enum class QuoteRule : uint8_t;
216
+
211
217
  enum class RelationType : uint8_t;
212
218
 
213
219
  enum class ResultModifierType : uint8_t;
@@ -334,6 +340,9 @@ const char* EnumUtil::ToChars<BlockState>(BlockState value);
334
340
  template<>
335
341
  const char* EnumUtil::ToChars<CAPIResultSetType>(CAPIResultSetType value);
336
342
 
343
+ template<>
344
+ const char* EnumUtil::ToChars<CSVState>(CSVState value);
345
+
337
346
  template<>
338
347
  const char* EnumUtil::ToChars<CTEMaterialize>(CTEMaterialize value);
339
348
 
@@ -430,6 +439,9 @@ const char* EnumUtil::ToChars<FunctionNullHandling>(FunctionNullHandling value);
430
439
  template<>
431
440
  const char* EnumUtil::ToChars<FunctionSideEffects>(FunctionSideEffects value);
432
441
 
442
+ template<>
443
+ const char* EnumUtil::ToChars<HLLStorageType>(HLLStorageType value);
444
+
433
445
  template<>
434
446
  const char* EnumUtil::ToChars<IndexConstraintType>(IndexConstraintType value);
435
447
 
@@ -547,6 +559,9 @@ const char* EnumUtil::ToChars<QueryNodeType>(QueryNodeType value);
547
559
  template<>
548
560
  const char* EnumUtil::ToChars<QueryResultType>(QueryResultType value);
549
561
 
562
+ template<>
563
+ const char* EnumUtil::ToChars<QuoteRule>(QuoteRule value);
564
+
550
565
  template<>
551
566
  const char* EnumUtil::ToChars<RelationType>(RelationType value);
552
567
 
@@ -710,6 +725,9 @@ BlockState EnumUtil::FromString<BlockState>(const char *value);
710
725
  template<>
711
726
  CAPIResultSetType EnumUtil::FromString<CAPIResultSetType>(const char *value);
712
727
 
728
+ template<>
729
+ CSVState EnumUtil::FromString<CSVState>(const char *value);
730
+
713
731
  template<>
714
732
  CTEMaterialize EnumUtil::FromString<CTEMaterialize>(const char *value);
715
733
 
@@ -806,6 +824,9 @@ FunctionNullHandling EnumUtil::FromString<FunctionNullHandling>(const char *valu
806
824
  template<>
807
825
  FunctionSideEffects EnumUtil::FromString<FunctionSideEffects>(const char *value);
808
826
 
827
+ template<>
828
+ HLLStorageType EnumUtil::FromString<HLLStorageType>(const char *value);
829
+
809
830
  template<>
810
831
  IndexConstraintType EnumUtil::FromString<IndexConstraintType>(const char *value);
811
832
 
@@ -923,6 +944,9 @@ QueryNodeType EnumUtil::FromString<QueryNodeType>(const char *value);
923
944
  template<>
924
945
  QueryResultType EnumUtil::FromString<QueryResultType>(const char *value);
925
946
 
947
+ template<>
948
+ QuoteRule EnumUtil::FromString<QuoteRule>(const char *value);
949
+
926
950
  template<>
927
951
  RelationType EnumUtil::FromString<RelationType>(const char *value);
928
952
 
@@ -16,16 +16,25 @@ namespace duckdb {
16
16
  class ClientContext;
17
17
  class Value;
18
18
 
19
+ struct FileOpenerInfo {
20
+ string file_path;
21
+ };
22
+
19
23
  //! Abstract type that provide client-specific context to FileSystem.
20
24
  class FileOpener {
21
25
  public:
26
+ FileOpener() {
27
+ }
22
28
  virtual ~FileOpener() {};
23
29
 
30
+ virtual bool TryGetCurrentSetting(const string &key, Value &result, FileOpenerInfo &info);
24
31
  virtual bool TryGetCurrentSetting(const string &key, Value &result) = 0;
25
32
  virtual ClientContext *TryGetClientContext() = 0;
26
33
 
27
34
  DUCKDB_API static ClientContext *TryGetClientContext(FileOpener *opener);
28
35
  DUCKDB_API static bool TryGetCurrentSetting(FileOpener *opener, const string &key, Value &result);
36
+ DUCKDB_API static bool TryGetCurrentSetting(FileOpener *opener, const string &key, Value &result,
37
+ FileOpenerInfo &info);
29
38
  };
30
39
 
31
40
  } // namespace duckdb