duckdb 0.10.1-dev9.0 → 0.10.2-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (702) hide show
  1. package/.github/workflows/NodeJS.yml +9 -16
  2. package/binding.gyp +4 -1
  3. package/package.json +1 -1
  4. package/src/database.cpp +1 -0
  5. package/src/duckdb/extension/icu/icu-strptime.cpp +46 -4
  6. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -0
  7. package/src/duckdb/extension/icu/icu-timezone.cpp +28 -4
  8. package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -5
  9. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +3 -3
  10. package/src/duckdb/extension/json/include/json_transform.hpp +2 -0
  11. package/src/duckdb/extension/json/json_functions/json_create.cpp +6 -0
  12. package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -9
  13. package/src/duckdb/extension/json/json_functions.cpp +1 -2
  14. package/src/duckdb/extension/json/json_scan.cpp +1 -0
  15. package/src/duckdb/extension/parquet/column_reader.cpp +17 -1
  16. package/src/duckdb/extension/parquet/column_writer.cpp +151 -20
  17. package/src/duckdb/extension/parquet/include/column_writer.hpp +3 -4
  18. package/src/duckdb/extension/parquet/parquet_extension.cpp +14 -9
  19. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +10 -0
  20. package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -0
  21. package/src/duckdb/src/catalog/catalog.cpp +94 -10
  22. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +9 -5
  23. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +1 -0
  24. package/src/duckdb/src/catalog/catalog_entry/ub_duckdb_catalog_entries.cpp +16 -0
  25. package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +28 -0
  26. package/src/duckdb/src/catalog/default/default_functions.cpp +3 -0
  27. package/src/duckdb/src/catalog/default/default_views.cpp +3 -3
  28. package/src/duckdb/src/catalog/default/ub_duckdb_catalog_default_entries.cpp +5 -0
  29. package/src/duckdb/src/catalog/dependency_catalog_set.cpp +1 -3
  30. package/src/duckdb/src/catalog/dependency_manager.cpp +2 -5
  31. package/src/duckdb/src/catalog/ub_duckdb_catalog.cpp +10 -0
  32. package/src/duckdb/src/common/adbc/adbc.cpp +1 -1
  33. package/src/duckdb/src/common/adbc/driver_manager.cpp +2 -1
  34. package/src/duckdb/src/common/adbc/nanoarrow/ub_duckdb_adbc_nanoarrow.cpp +5 -0
  35. package/src/duckdb/src/common/adbc/ub_duckdb_adbc.cpp +3 -0
  36. package/src/duckdb/src/common/allocator.cpp +34 -8
  37. package/src/duckdb/src/common/arrow/appender/fixed_size_list_data.cpp +39 -0
  38. package/src/duckdb/src/common/arrow/appender/ub_duckdb_common_arrow_appender.cpp +6 -0
  39. package/src/duckdb/src/common/arrow/appender/union_data.cpp +2 -1
  40. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -5
  41. package/src/duckdb/src/common/arrow/arrow_converter.cpp +17 -0
  42. package/src/duckdb/src/common/arrow/ub_duckdb_common_arrow.cpp +4 -0
  43. package/src/duckdb/src/common/compressed_file_system.cpp +1 -0
  44. package/src/duckdb/src/common/crypto/ub_duckdb_common_crypto.cpp +2 -0
  45. package/src/duckdb/src/common/enum_util.cpp +153 -13
  46. package/src/duckdb/src/common/enums/logical_operator_type.cpp +0 -2
  47. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -2
  48. package/src/duckdb/src/common/enums/ub_duckdb_common_enums.cpp +12 -0
  49. package/src/duckdb/src/common/exception/conversion_exception.cpp +23 -0
  50. package/src/duckdb/src/common/exception.cpp +7 -14
  51. package/src/duckdb/src/common/extra_type_info.cpp +6 -5
  52. package/src/duckdb/src/common/gzip_file_system.cpp +5 -4
  53. package/src/duckdb/src/common/local_file_system.cpp +85 -10
  54. package/src/duckdb/src/common/operator/cast_operators.cpp +413 -305
  55. package/src/duckdb/src/common/operator/ub_duckdb_common_operators.cpp +4 -0
  56. package/src/duckdb/src/common/progress_bar/ub_duckdb_progress_bar.cpp +3 -0
  57. package/src/duckdb/src/common/re2_regex.cpp +2 -1
  58. package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +2 -2
  59. package/src/duckdb/src/common/row_operations/row_matcher.cpp +1 -0
  60. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  61. package/src/duckdb/src/common/row_operations/ub_duckdb_row_operations.cpp +9 -0
  62. package/src/duckdb/src/common/serializer/binary_serializer.cpp +3 -3
  63. package/src/duckdb/src/common/serializer/ub_duckdb_common_serializer.cpp +7 -0
  64. package/src/duckdb/src/common/sort/partition_state.cpp +2 -3
  65. package/src/duckdb/src/common/sort/ub_duckdb_sort.cpp +7 -0
  66. package/src/duckdb/src/common/string_util.cpp +3 -3
  67. package/src/duckdb/src/common/types/bit.cpp +7 -6
  68. package/src/duckdb/src/common/types/blob.cpp +20 -9
  69. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +8 -6
  70. package/src/duckdb/src/common/types/column/column_data_collection.cpp +11 -1
  71. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +5 -0
  72. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +1 -1
  73. package/src/duckdb/src/common/types/column/ub_duckdb_common_types_column.cpp +6 -0
  74. package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
  75. package/src/duckdb/src/common/types/date.cpp +1 -1
  76. package/src/duckdb/src/common/types/hugeint.cpp +3 -2
  77. package/src/duckdb/src/common/types/interval.cpp +1 -1
  78. package/src/duckdb/src/common/types/list_segment.cpp +2 -1
  79. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  80. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +7 -7
  81. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -27
  82. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +168 -88
  83. package/src/duckdb/src/common/types/row/ub_duckdb_common_types_row.cpp +11 -0
  84. package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
  85. package/src/duckdb/src/common/types/string_heap.cpp +5 -1
  86. package/src/duckdb/src/common/types/string_type.cpp +18 -4
  87. package/src/duckdb/src/common/types/time.cpp +4 -2
  88. package/src/duckdb/src/common/types/timestamp.cpp +32 -6
  89. package/src/duckdb/src/common/types/ub_duckdb_common_types.cpp +28 -0
  90. package/src/duckdb/src/common/types/uhugeint.cpp +3 -2
  91. package/src/duckdb/src/common/types/uuid.cpp +11 -0
  92. package/src/duckdb/src/common/types/validity_mask.cpp +4 -3
  93. package/src/duckdb/src/common/types/value.cpp +17 -6
  94. package/src/duckdb/src/common/types/vector.cpp +243 -68
  95. package/src/duckdb/src/common/types.cpp +7 -5
  96. package/src/duckdb/src/common/ub_duckdb_common.cpp +34 -0
  97. package/src/duckdb/src/common/value_operations/ub_duckdb_value_operations.cpp +2 -0
  98. package/src/duckdb/src/common/vector_operations/generators.cpp +2 -1
  99. package/src/duckdb/src/common/vector_operations/vector_cast.cpp +4 -4
  100. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +2 -2
  101. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +17 -6
  102. package/src/duckdb/src/core_functions/aggregate/algebraic/ub_duckdb_aggr_algebraic.cpp +5 -0
  103. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +30 -6
  104. package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +1 -1
  105. package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +12 -5
  106. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -2
  107. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +2 -1
  108. package/src/duckdb/src/core_functions/aggregate/distributive/ub_duckdb_aggr_distributive.cpp +13 -0
  109. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +2 -1
  110. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +2 -9
  111. package/src/duckdb/src/core_functions/aggregate/holistic/ub_duckdb_aggr_holistic.cpp +5 -0
  112. package/src/duckdb/src/core_functions/aggregate/nested/ub_duckdb_aggr_nested.cpp +3 -0
  113. package/src/duckdb/src/core_functions/aggregate/regression/ub_duckdb_aggr_regr.cpp +8 -0
  114. package/src/duckdb/src/core_functions/function_list.cpp +1 -0
  115. package/src/duckdb/src/core_functions/scalar/bit/bitstring.cpp +2 -2
  116. package/src/duckdb/src/core_functions/scalar/bit/ub_duckdb_func_bit.cpp +2 -0
  117. package/src/duckdb/src/core_functions/scalar/blob/ub_duckdb_func_blob.cpp +3 -0
  118. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +8 -0
  119. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +22 -3
  120. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +29 -7
  121. package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +3 -1
  122. package/src/duckdb/src/core_functions/scalar/date/ub_duckdb_func_date.cpp +12 -0
  123. package/src/duckdb/src/core_functions/scalar/debug/ub_duckdb_func_debug.cpp +2 -0
  124. package/src/duckdb/src/core_functions/scalar/enum/ub_duckdb_func_enum.cpp +2 -0
  125. package/src/duckdb/src/core_functions/scalar/generic/ub_duckdb_func_generic.cpp +9 -0
  126. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +12 -8
  127. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -1
  128. package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +6 -5
  129. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +5 -3
  130. package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +28 -12
  131. package/src/duckdb/src/core_functions/scalar/list/ub_duckdb_func_list.cpp +11 -0
  132. package/src/duckdb/src/core_functions/scalar/map/map.cpp +129 -160
  133. package/src/duckdb/src/core_functions/scalar/map/ub_duckdb_func_map_nested.cpp +8 -0
  134. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +19 -16
  135. package/src/duckdb/src/core_functions/scalar/math/ub_duckdb_func_math.cpp +1 -0
  136. package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +4 -4
  137. package/src/duckdb/src/core_functions/scalar/operators/ub_duckdb_func_ops.cpp +1 -0
  138. package/src/duckdb/src/core_functions/scalar/random/ub_duckdb_func_random.cpp +3 -0
  139. package/src/duckdb/src/core_functions/scalar/string/hex.cpp +2 -1
  140. package/src/duckdb/src/core_functions/scalar/string/pad.cpp +2 -2
  141. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +1 -1
  142. package/src/duckdb/src/core_functions/scalar/string/replace.cpp +1 -1
  143. package/src/duckdb/src/core_functions/scalar/string/string_split.cpp +2 -1
  144. package/src/duckdb/src/core_functions/scalar/string/translate.cpp +1 -1
  145. package/src/duckdb/src/core_functions/scalar/string/ub_duckdb_func_string.cpp +26 -0
  146. package/src/duckdb/src/core_functions/scalar/struct/ub_duckdb_func_struct.cpp +3 -0
  147. package/src/duckdb/src/core_functions/scalar/union/ub_duckdb_func_union.cpp +4 -0
  148. package/src/duckdb/src/core_functions/ub_duckdb_core_functions.cpp +3 -0
  149. package/src/duckdb/src/execution/adaptive_filter.cpp +1 -1
  150. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  151. package/src/duckdb/src/execution/column_binding_resolver.cpp +7 -1
  152. package/src/duckdb/src/execution/expression_executor/execute_case.cpp +2 -2
  153. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -0
  154. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +2 -2
  155. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +1 -1
  156. package/src/duckdb/src/execution/expression_executor/ub_duckdb_expression_executor.cpp +11 -0
  157. package/src/duckdb/src/execution/expression_executor.cpp +5 -2
  158. package/src/duckdb/src/execution/index/art/art.cpp +12 -6
  159. package/src/duckdb/src/execution/index/art/art_key.cpp +3 -3
  160. package/src/duckdb/src/execution/index/art/leaf.cpp +2 -2
  161. package/src/duckdb/src/execution/index/art/node16.cpp +2 -2
  162. package/src/duckdb/src/execution/index/art/node256.cpp +3 -3
  163. package/src/duckdb/src/execution/index/art/node48.cpp +5 -5
  164. package/src/duckdb/src/execution/index/art/prefix.cpp +1 -1
  165. package/src/duckdb/src/execution/index/art/ub_duckdb_art_index_execution.cpp +12 -0
  166. package/src/duckdb/src/execution/index/art/ub_duckdb_execution_index_art.cpp +11 -0
  167. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +6 -5
  168. package/src/duckdb/src/execution/index/ub_duckdb_execution_index.cpp +3 -0
  169. package/src/duckdb/src/execution/nested_loop_join/ub_duckdb_nested_loop_join.cpp +3 -0
  170. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -1
  171. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -4
  172. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +0 -1
  173. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +3 -4
  174. package/src/duckdb/src/execution/operator/aggregate/ub_duckdb_operator_aggregate.cpp +9 -0
  175. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +1 -1
  176. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +8 -3
  177. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +1 -1
  178. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +5 -5
  179. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +2 -3
  180. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +5 -1
  181. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +2 -2
  182. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +151 -79
  183. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +11 -6
  184. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +27 -6
  185. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +71 -18
  186. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +22 -11
  187. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +6 -4
  188. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +5 -3
  189. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/ub_duckdb_operator_csv_sniffer.cpp +7 -0
  190. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp +3 -3
  191. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +30 -5
  192. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +6 -2
  193. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +47 -46
  194. package/src/duckdb/src/execution/operator/csv_scanner/ub_duckdb_operator_csv_scanner.cpp +10 -0
  195. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +102 -54
  196. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +8 -1
  197. package/src/duckdb/src/execution/operator/filter/ub_duckdb_operator_filter.cpp +2 -0
  198. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +54 -36
  199. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +56 -32
  200. package/src/duckdb/src/execution/operator/helper/physical_streaming_limit.cpp +9 -13
  201. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +12 -0
  202. package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +221 -0
  203. package/src/duckdb/src/execution/operator/helper/ub_duckdb_operator_helper.cpp +18 -0
  204. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -0
  205. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +23 -8
  206. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +8 -3
  207. package/src/duckdb/src/execution/operator/join/ub_duckdb_operator_join.cpp +16 -0
  208. package/src/duckdb/src/execution/operator/order/physical_order.cpp +2 -3
  209. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -7
  210. package/src/duckdb/src/execution/operator/order/ub_duckdb_operator_order.cpp +3 -0
  211. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +451 -55
  212. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +312 -150
  213. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +200 -75
  214. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -0
  215. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +6 -5
  216. package/src/duckdb/src/execution/operator/persistent/ub_duckdb_operator_persistent.cpp +10 -0
  217. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +3 -2
  218. package/src/duckdb/src/execution/operator/projection/ub_duckdb_operator_projection.cpp +5 -0
  219. package/src/duckdb/src/execution/operator/scan/ub_duckdb_operator_scan.cpp +7 -0
  220. package/src/duckdb/src/execution/operator/schema/ub_duckdb_operator_schema.cpp +12 -0
  221. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +25 -0
  222. package/src/duckdb/src/execution/operator/set/ub_duckdb_operator_set.cpp +4 -0
  223. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +4 -4
  224. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +2 -2
  225. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +2 -2
  226. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +7 -17
  227. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +10 -0
  228. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +7 -0
  229. package/src/duckdb/src/execution/physical_plan/plan_limit.cpp +45 -13
  230. package/src/duckdb/src/execution/physical_plan/ub_duckdb_physical_plan.cpp +44 -0
  231. package/src/duckdb/src/execution/physical_plan_generator.cpp +5 -3
  232. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +13 -12
  233. package/src/duckdb/src/execution/ub_duckdb_execution.cpp +15 -0
  234. package/src/duckdb/src/execution/window_executor.cpp +71 -61
  235. package/src/duckdb/src/execution/window_segment_tree.cpp +6 -6
  236. package/src/duckdb/src/extension_forward_decl/icu.cpp +59 -0
  237. package/src/duckdb/src/function/aggregate/algebraic/ub_duckdb_aggr_algebraic.cpp +5 -0
  238. package/src/duckdb/src/function/aggregate/distributive/first.cpp +2 -2
  239. package/src/duckdb/src/function/aggregate/distributive/ub_duckdb_aggr_distr.cpp +3 -0
  240. package/src/duckdb/src/function/aggregate/holistic/ub_duckdb_aggr_holistic.cpp +5 -0
  241. package/src/duckdb/src/function/aggregate/nested/ub_duckdb_aggr_nested.cpp +3 -0
  242. package/src/duckdb/src/function/aggregate/regression/ub_duckdb_aggr_regr.cpp +8 -0
  243. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +3 -2
  244. package/src/duckdb/src/function/aggregate/ub_duckdb_func_aggr.cpp +3 -0
  245. package/src/duckdb/src/function/cast/array_casts.cpp +2 -4
  246. package/src/duckdb/src/function/cast/bit_cast.cpp +13 -13
  247. package/src/duckdb/src/function/cast/cast_function_set.cpp +2 -0
  248. package/src/duckdb/src/function/cast/decimal_cast.cpp +38 -44
  249. package/src/duckdb/src/function/cast/default_casts.cpp +5 -2
  250. package/src/duckdb/src/function/cast/enum_casts.cpp +5 -5
  251. package/src/duckdb/src/function/cast/list_casts.cpp +24 -14
  252. package/src/duckdb/src/function/cast/string_cast.cpp +48 -30
  253. package/src/duckdb/src/function/cast/struct_cast.cpp +2 -2
  254. package/src/duckdb/src/function/cast/time_casts.cpp +12 -0
  255. package/src/duckdb/src/function/cast/ub_duckdb_func_cast.cpp +17 -0
  256. package/src/duckdb/src/function/cast/union/ub_duckdb_union_cast.cpp +2 -0
  257. package/src/duckdb/src/function/cast/union_casts.cpp +13 -15
  258. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +1 -1
  259. package/src/duckdb/src/function/cast_rules.cpp +2 -1
  260. package/src/duckdb/src/function/pragma/ub_duckdb_func_pragma.cpp +3 -0
  261. package/src/duckdb/src/function/scalar/bit/ub_duckdb_func_bit.cpp +2 -0
  262. package/src/duckdb/src/function/scalar/blob/ub_duckdb_func_blob.cpp +3 -0
  263. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +7 -6
  264. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +3 -3
  265. package/src/duckdb/src/function/scalar/compressed_materialization/ub_duckdb_func_compressed_materialization.cpp +3 -0
  266. package/src/duckdb/src/function/scalar/date/ub_duckdb_func_date.cpp +12 -0
  267. package/src/duckdb/src/function/scalar/enum/ub_duckdb_func_enum.cpp +2 -0
  268. package/src/duckdb/src/function/scalar/generic/ub_duckdb_func_generic.cpp +8 -0
  269. package/src/duckdb/src/function/scalar/generic/ub_duckdb_func_generic_main.cpp +2 -0
  270. package/src/duckdb/src/function/scalar/list/list_resize.cpp +3 -1
  271. package/src/duckdb/src/function/scalar/list/list_zip.cpp +3 -4
  272. package/src/duckdb/src/function/scalar/list/ub_duckdb_func_list.cpp +11 -0
  273. package/src/duckdb/src/function/scalar/list/ub_duckdb_func_list_nested.cpp +5 -0
  274. package/src/duckdb/src/function/scalar/map/ub_duckdb_func_map_nested.cpp +7 -0
  275. package/src/duckdb/src/function/scalar/math/ub_duckdb_func_math.cpp +4 -0
  276. package/src/duckdb/src/function/scalar/nested_functions.cpp +7 -3
  277. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +1 -1
  278. package/src/duckdb/src/function/scalar/operators/multiply.cpp +4 -2
  279. package/src/duckdb/src/function/scalar/operators/subtract.cpp +9 -3
  280. package/src/duckdb/src/function/scalar/operators/ub_duckdb_func_ops.cpp +6 -0
  281. package/src/duckdb/src/function/scalar/operators/ub_duckdb_func_ops_main.cpp +5 -0
  282. package/src/duckdb/src/function/scalar/sequence/ub_duckdb_func_seq.cpp +2 -0
  283. package/src/duckdb/src/function/scalar/strftime_format.cpp +21 -20
  284. package/src/duckdb/src/function/scalar/string/like.cpp +14 -3
  285. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +4 -2
  286. package/src/duckdb/src/function/scalar/string/regexp/ub_duckdb_func_string_regexp.cpp +3 -0
  287. package/src/duckdb/src/function/scalar/string/regexp.cpp +6 -4
  288. package/src/duckdb/src/function/scalar/string/suffix.cpp +1 -1
  289. package/src/duckdb/src/function/scalar/string/ub_duckdb_func_string.cpp +31 -0
  290. package/src/duckdb/src/function/scalar/string/ub_duckdb_func_string_main.cpp +12 -0
  291. package/src/duckdb/src/function/scalar/struct/ub_duckdb_func_struct.cpp +4 -0
  292. package/src/duckdb/src/function/scalar/struct/ub_duckdb_func_struct_main.cpp +2 -0
  293. package/src/duckdb/src/function/scalar/system/ub_duckdb_func_system.cpp +2 -0
  294. package/src/duckdb/src/function/scalar/ub_duckdb_func_scalar.cpp +9 -0
  295. package/src/duckdb/src/function/scalar/union/ub_duckdb_func_union.cpp +4 -0
  296. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +28 -2
  297. package/src/duckdb/src/function/table/arrow/ub_duckdb_arrow_conversion.cpp +2 -0
  298. package/src/duckdb/src/function/table/arrow.cpp +23 -6
  299. package/src/duckdb/src/function/table/arrow_conversion.cpp +75 -33
  300. package/src/duckdb/src/function/table/copy_csv.cpp +8 -3
  301. package/src/duckdb/src/function/table/range.cpp +5 -0
  302. package/src/duckdb/src/function/table/read_csv.cpp +9 -8
  303. package/src/duckdb/src/function/table/read_file.cpp +1 -1
  304. package/src/duckdb/src/function/table/sniff_csv.cpp +5 -5
  305. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +6 -2
  306. package/src/duckdb/src/function/table/system/duckdb_secrets.cpp +5 -1
  307. package/src/duckdb/src/function/table/system/duckdb_sequences.cpp +3 -1
  308. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +13 -1
  309. package/src/duckdb/src/function/table/system/test_all_types.cpp +64 -0
  310. package/src/duckdb/src/function/table/system/ub_duckdb_table_func_system.cpp +23 -0
  311. package/src/duckdb/src/function/table/ub_duckdb_func_table.cpp +16 -0
  312. package/src/duckdb/src/function/table/version/pragma_version.cpp +11 -2
  313. package/src/duckdb/src/function/table/version/ub_duckdb_func_table_version.cpp +2 -0
  314. package/src/duckdb/src/function/ub_duckdb_function.cpp +14 -0
  315. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +11 -1
  316. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -0
  317. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +1 -1
  318. package/src/duckdb/src/include/duckdb/common/allocator.hpp +8 -11
  319. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +2 -2
  320. package/src/duckdb/src/include/duckdb/common/arrow/appender/fixed_size_list_data.hpp +14 -0
  321. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +1 -0
  322. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +1 -1
  323. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -2
  324. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +2 -2
  325. package/src/duckdb/src/include/duckdb/common/bswap.hpp +6 -2
  326. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +32 -0
  327. package/src/duckdb/src/include/duckdb/common/enums/catalog_lookup_behavior.hpp +21 -0
  328. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +0 -1
  329. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -1
  330. package/src/duckdb/src/include/duckdb/common/enums/prepared_statement_mode.hpp +20 -0
  331. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -0
  332. package/src/duckdb/src/include/duckdb/common/exception/conversion_exception.hpp +6 -0
  333. package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
  334. package/src/duckdb/src/include/duckdb/common/exception.hpp +2 -0
  335. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +3 -3
  336. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +6 -5
  337. package/src/duckdb/src/include/duckdb/common/file_system.hpp +4 -2
  338. package/src/duckdb/src/include/duckdb/common/helper.hpp +7 -0
  339. package/src/duckdb/src/include/duckdb/common/limits.hpp +2 -2
  340. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
  341. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +64 -0
  342. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +34 -33
  343. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +150 -124
  344. package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +5 -5
  345. package/src/duckdb/src/include/duckdb/common/radix.hpp +1 -1
  346. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +1 -1
  347. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +4 -4
  348. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +1 -1
  349. package/src/duckdb/src/include/duckdb/common/types/blob.hpp +4 -1
  350. package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +15 -13
  351. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +5 -0
  352. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +2 -0
  353. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +3 -0
  354. package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +1 -1
  355. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +1 -1
  356. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +4 -1
  357. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +2 -1
  358. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +2 -0
  359. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +11 -4
  360. package/src/duckdb/src/include/duckdb/common/types/time.hpp +1 -1
  361. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +7 -2
  362. package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -0
  363. package/src/duckdb/src/include/duckdb/common/types/value.hpp +7 -0
  364. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +40 -7
  365. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
  366. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -1
  367. package/src/duckdb/src/include/duckdb/common/vector_operations/general_cast.hpp +12 -4
  368. package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +6 -6
  369. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +3 -2
  370. package/src/duckdb/src/include/duckdb/common/windows_undefs.hpp +4 -0
  371. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -1
  372. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +9 -0
  373. package/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +2 -1
  374. package/src/duckdb/src/include/duckdb/execution/executor.hpp +10 -0
  375. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
  376. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +262 -0
  377. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +70 -0
  378. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +103 -0
  379. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +74 -0
  380. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_casting.hpp +154 -0
  381. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +130 -0
  382. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +60 -0
  383. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +70 -0
  384. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +155 -0
  385. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +166 -0
  386. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +191 -0
  387. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +30 -0
  388. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +99 -0
  389. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +91 -0
  390. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +80 -0
  391. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/quote_rules.hpp +21 -0
  392. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +93 -0
  393. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +60 -0
  394. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +35 -0
  395. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +200 -0
  396. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +13 -10
  397. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp +5 -11
  398. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_limit.hpp +4 -6
  399. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_verify_vector.hpp +51 -0
  400. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +3 -0
  401. package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_memory_manager.hpp +165 -0
  402. package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_task_manager.hpp +48 -0
  403. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +10 -17
  404. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +4 -0
  405. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -0
  406. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +10 -6
  407. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
  408. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +2 -1
  409. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  410. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +1 -0
  411. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +14 -5
  412. package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +27 -43
  413. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +4 -4
  414. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +3 -3
  415. package/src/duckdb/src/include/duckdb/function/function.hpp +1 -1
  416. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -0
  417. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +1 -1
  418. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +1 -1
  419. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +10 -4
  420. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +8 -6
  421. package/src/duckdb/src/include/duckdb/function/table_function.hpp +3 -3
  422. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +3 -0
  423. package/src/duckdb/src/include/duckdb/main/capi/cast/from_decimal.hpp +6 -4
  424. package/src/duckdb/src/include/duckdb/main/capi/cast/to_decimal.hpp +17 -10
  425. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  426. package/src/duckdb/src/include/duckdb/main/client_context.hpp +17 -3
  427. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +2 -2
  428. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +20 -0
  429. package/src/duckdb/src/include/duckdb/main/client_data.hpp +1 -1
  430. package/src/duckdb/src/include/duckdb/main/database.hpp +3 -2
  431. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +2 -1
  432. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +1 -1
  433. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +230 -199
  434. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
  435. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +1 -1
  436. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +2 -0
  437. package/src/duckdb/src/include/duckdb/main/relation.hpp +1 -1
  438. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +4 -0
  439. package/src/duckdb/src/include/duckdb/main/settings.hpp +54 -10
  440. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +2 -1
  441. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +2 -0
  442. package/src/duckdb/src/include/duckdb/parallel/concurrentqueue.hpp +21 -5
  443. package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
  444. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +37 -0
  445. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +1 -1
  446. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
  447. package/src/duckdb/src/include/duckdb/parallel/task.hpp +0 -20
  448. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -2
  449. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +10 -0
  450. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
  451. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +0 -24
  452. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +46 -0
  453. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +2 -0
  454. package/src/duckdb/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp +1 -1
  455. package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +3 -2
  456. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +3 -3
  457. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +3 -3
  458. package/src/duckdb/src/include/duckdb/planner/binder.hpp +11 -4
  459. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +59 -23
  460. package/src/duckdb/src/include/duckdb/planner/expression.hpp +1 -0
  461. package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -2
  462. package/src/duckdb/src/include/duckdb/planner/expression_iterator.hpp +11 -3
  463. package/src/duckdb/src/include/duckdb/planner/extension_callback.hpp +6 -0
  464. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +0 -1
  465. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -1
  466. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit.hpp +5 -9
  467. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +0 -15
  468. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +5 -0
  469. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +40 -5
  470. package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +1 -1
  471. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +5 -1
  472. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +4 -4
  473. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +2 -1
  474. package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +10 -8
  475. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +5 -1
  476. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +3 -3
  477. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/bit_reader.hpp +11 -10
  478. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/bit_utils.hpp +3 -1
  479. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/chimp128.hpp +1 -1
  480. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/leading_zero_buffer.hpp +5 -3
  481. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +2 -1
  482. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
  483. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -1
  484. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  485. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +1 -1
  486. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -0
  487. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +1 -1
  488. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +0 -2
  489. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +1 -1
  490. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +5 -3
  491. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  492. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +5 -0
  493. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +9 -2
  494. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +2 -6
  495. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +3 -3
  496. package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +169 -0
  497. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +11 -1
  498. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -1
  499. package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -1
  500. package/src/duckdb/src/include/duckdb.h +119 -67
  501. package/src/duckdb/src/main/appender.cpp +2 -1
  502. package/src/duckdb/src/main/attached_database.cpp +49 -27
  503. package/src/duckdb/src/main/capi/appender-c.cpp +1 -1
  504. package/src/duckdb/src/main/capi/cast/ub_duckdb_main_capi_cast.cpp +3 -0
  505. package/src/duckdb/src/main/capi/cast/utils-c.cpp +1 -1
  506. package/src/duckdb/src/main/capi/data_chunk-c.cpp +9 -1
  507. package/src/duckdb/src/main/capi/datetime-c.cpp +14 -8
  508. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +29 -2
  509. package/src/duckdb/src/main/capi/helper-c.cpp +2 -0
  510. package/src/duckdb/src/main/capi/hugeint-c.cpp +2 -1
  511. package/src/duckdb/src/main/capi/logical_types-c.cpp +35 -1
  512. package/src/duckdb/src/main/capi/result-c.cpp +9 -0
  513. package/src/duckdb/src/main/capi/ub_duckdb_main_capi.cpp +19 -0
  514. package/src/duckdb/src/main/chunk_scan_state/ub_duckdb_main_chunk_scan_state.cpp +2 -0
  515. package/src/duckdb/src/main/client_context.cpp +133 -33
  516. package/src/duckdb/src/main/client_context_file_opener.cpp +8 -7
  517. package/src/duckdb/src/main/config.cpp +2 -0
  518. package/src/duckdb/src/main/connection_manager.cpp +8 -0
  519. package/src/duckdb/src/main/database.cpp +13 -4
  520. package/src/duckdb/src/main/database_manager.cpp +10 -1
  521. package/src/duckdb/src/main/extension/extension_helper.cpp +8 -5
  522. package/src/duckdb/src/main/extension/extension_install.cpp +1 -1
  523. package/src/duckdb/src/main/extension/ub_duckdb_main_extension.cpp +6 -0
  524. package/src/duckdb/src/main/prepared_statement_data.cpp +23 -6
  525. package/src/duckdb/src/main/query_profiler.cpp +9 -7
  526. package/src/duckdb/src/main/relation/read_csv_relation.cpp +17 -12
  527. package/src/duckdb/src/main/relation/ub_duckdb_main_relation.cpp +26 -0
  528. package/src/duckdb/src/main/relation/value_relation.cpp +2 -0
  529. package/src/duckdb/src/main/secret/secret.cpp +1 -1
  530. package/src/duckdb/src/main/secret/secret_manager.cpp +41 -8
  531. package/src/duckdb/src/main/secret/secret_storage.cpp +8 -2
  532. package/src/duckdb/src/main/settings/settings.cpp +42 -2
  533. package/src/duckdb/src/main/settings/ub_duckdb_main_settings.cpp +2 -0
  534. package/src/duckdb/src/main/ub_duckdb_main.cpp +25 -0
  535. package/src/duckdb/src/optimizer/compressed_materialization/ub_duckdb_optimizer_compressed_materialization.cpp +4 -0
  536. package/src/duckdb/src/optimizer/filter_combiner.cpp +20 -14
  537. package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +5 -1
  538. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +23 -11
  539. package/src/duckdb/src/optimizer/join_order/ub_duckdb_optimizer_join_order.cpp +12 -0
  540. package/src/duckdb/src/optimizer/matcher/ub_duckdb_optimizer_matcher.cpp +2 -0
  541. package/src/duckdb/src/optimizer/pullup/ub_duckdb_optimizer_pullup.cpp +6 -0
  542. package/src/duckdb/src/optimizer/pushdown/pushdown_limit.cpp +1 -1
  543. package/src/duckdb/src/optimizer/pushdown/ub_duckdb_optimizer_pushdown.cpp +12 -0
  544. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +7 -6
  545. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +8 -6
  546. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +1 -1
  547. package/src/duckdb/src/optimizer/rule/ub_duckdb_optimizer_rules.cpp +16 -0
  548. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +71 -1
  549. package/src/duckdb/src/optimizer/statistics/expression/ub_duckdb_optimizer_statistics_expr.cpp +11 -0
  550. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -1
  551. package/src/duckdb/src/optimizer/statistics/operator/propagate_limit.cpp +5 -1
  552. package/src/duckdb/src/optimizer/statistics/operator/ub_duckdb_optimizer_statistics_op.cpp +11 -0
  553. package/src/duckdb/src/optimizer/topn_optimizer.cpp +12 -11
  554. package/src/duckdb/src/optimizer/ub_duckdb_optimizer.cpp +20 -0
  555. package/src/duckdb/src/parallel/executor.cpp +8 -19
  556. package/src/duckdb/src/parallel/executor_task.cpp +6 -2
  557. package/src/duckdb/src/parallel/pipeline.cpp +12 -6
  558. package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -1
  559. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +2 -2
  560. package/src/duckdb/src/parallel/pipeline_initialize_event.cpp +1 -2
  561. package/src/duckdb/src/parallel/task_scheduler.cpp +15 -8
  562. package/src/duckdb/src/parallel/ub_duckdb_parallel.cpp +15 -0
  563. package/src/duckdb/src/parser/constraints/ub_duckdb_constraints.cpp +5 -0
  564. package/src/duckdb/src/parser/expression/ub_duckdb_expression.cpp +18 -0
  565. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +0 -18
  566. package/src/duckdb/src/parser/parsed_data/comment_on_column_info.cpp +44 -0
  567. package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +1 -0
  568. package/src/duckdb/src/parser/parsed_data/extra_drop_info.cpp +2 -0
  569. package/src/duckdb/src/parser/parsed_data/ub_duckdb_parsed_data.cpp +24 -0
  570. package/src/duckdb/src/parser/parser.cpp +1 -1
  571. package/src/duckdb/src/parser/query_error_context.cpp +15 -1
  572. package/src/duckdb/src/parser/query_node/ub_duckdb_query_node.cpp +5 -0
  573. package/src/duckdb/src/parser/statement/export_statement.cpp +2 -1
  574. package/src/duckdb/src/parser/statement/relation_statement.cpp +2 -2
  575. package/src/duckdb/src/parser/statement/ub_duckdb_statement.cpp +25 -0
  576. package/src/duckdb/src/parser/tableref/pivotref.cpp +3 -3
  577. package/src/duckdb/src/parser/tableref/showref.cpp +2 -0
  578. package/src/duckdb/src/parser/tableref/ub_duckdb_parser_tableref.cpp +8 -0
  579. package/src/duckdb/src/parser/transform/constraint/ub_duckdb_transformer_constraint.cpp +2 -0
  580. package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +1 -1
  581. package/src/duckdb/src/parser/transform/expression/transform_cast.cpp +10 -2
  582. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +7 -7
  583. package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +2 -1
  584. package/src/duckdb/src/parser/transform/expression/ub_duckdb_transformer_expression.cpp +20 -0
  585. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +2 -2
  586. package/src/duckdb/src/parser/transform/helpers/ub_duckdb_transformer_helpers.cpp +8 -0
  587. package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +3 -8
  588. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +3 -1
  589. package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +2 -3
  590. package/src/duckdb/src/parser/transform/statement/ub_duckdb_transformer_statement.cpp +37 -0
  591. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +16 -0
  592. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +49 -32
  593. package/src/duckdb/src/parser/transform/tableref/ub_duckdb_transformer_tableref.cpp +8 -0
  594. package/src/duckdb/src/parser/ub_duckdb_parser.cpp +15 -0
  595. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +11 -4
  596. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +4 -1
  597. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +71 -5
  598. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +35 -1
  599. package/src/duckdb/src/planner/binder/expression/ub_duckdb_bind_expression.cpp +20 -0
  600. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +55 -51
  601. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +1 -11
  602. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +2 -2
  603. package/src/duckdb/src/planner/binder/query_node/ub_duckdb_bind_query_node.cpp +12 -0
  604. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +12 -6
  605. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +1 -1
  606. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +8 -2
  607. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +6 -3
  608. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +13 -2
  609. package/src/duckdb/src/planner/binder/statement/ub_duckdb_bind_statement.cpp +26 -0
  610. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +15 -4
  611. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -2
  612. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +93 -45
  613. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +6 -1
  614. package/src/duckdb/src/planner/binder/tableref/ub_duckdb_bind_tableref.cpp +17 -0
  615. package/src/duckdb/src/planner/binder.cpp +22 -23
  616. package/src/duckdb/src/planner/bound_result_modifier.cpp +67 -4
  617. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +5 -1
  618. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +3 -0
  619. package/src/duckdb/src/planner/expression/ub_duckdb_planner_expression.cpp +19 -0
  620. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +14 -9
  621. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +17 -9
  622. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +61 -37
  623. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +8 -4
  624. package/src/duckdb/src/planner/expression_binder/ub_duckdb_expression_binders.cpp +20 -0
  625. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +3 -2
  626. package/src/duckdb/src/planner/expression_iterator.cpp +73 -52
  627. package/src/duckdb/src/planner/filter/ub_duckdb_planner_filter.cpp +4 -0
  628. package/src/duckdb/src/planner/logical_operator_visitor.cpp +4 -14
  629. package/src/duckdb/src/planner/operator/logical_limit.cpp +14 -6
  630. package/src/duckdb/src/planner/operator/ub_duckdb_planner_operator.cpp +43 -0
  631. package/src/duckdb/src/planner/parsed_data/ub_duckdb_planner_parsed_data.cpp +2 -0
  632. package/src/duckdb/src/planner/planner.cpp +3 -0
  633. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +46 -18
  634. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +39 -33
  635. package/src/duckdb/src/planner/subquery/ub_duckdb_planner_subquery.cpp +4 -0
  636. package/src/duckdb/src/planner/ub_duckdb_planner.cpp +15 -0
  637. package/src/duckdb/src/storage/arena_allocator.cpp +9 -0
  638. package/src/duckdb/src/storage/buffer/block_handle.cpp +7 -2
  639. package/src/duckdb/src/storage/buffer/block_manager.cpp +7 -3
  640. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +121 -24
  641. package/src/duckdb/src/storage/buffer/ub_duckdb_storage_buffer.cpp +6 -0
  642. package/src/duckdb/src/storage/checkpoint/ub_duckdb_storage_checkpoint.cpp +5 -0
  643. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +4 -4
  644. package/src/duckdb/src/storage/compression/chimp/ub_duckdb_storage_compression_chimp.cpp +6 -0
  645. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +6 -5
  646. package/src/duckdb/src/storage/compression/fsst.cpp +3 -2
  647. package/src/duckdb/src/storage/compression/rle.cpp +1 -1
  648. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +4 -4
  649. package/src/duckdb/src/storage/compression/ub_duckdb_storage_compression.cpp +12 -0
  650. package/src/duckdb/src/storage/data_table.cpp +6 -3
  651. package/src/duckdb/src/storage/local_storage.cpp +5 -2
  652. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +4 -4
  653. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +1 -1
  654. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +1 -1
  655. package/src/duckdb/src/storage/metadata/ub_duckdb_storage_metadata.cpp +4 -0
  656. package/src/duckdb/src/storage/partial_block_manager.cpp +1 -1
  657. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +2 -0
  658. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +3 -0
  659. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +5 -29
  660. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +22 -4
  661. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +12 -32
  662. package/src/duckdb/src/storage/serialization/ub_duckdb_storage_serialization.cpp +16 -0
  663. package/src/duckdb/src/storage/standard_buffer_manager.cpp +29 -397
  664. package/src/duckdb/src/storage/statistics/string_stats.cpp +1 -1
  665. package/src/duckdb/src/storage/statistics/ub_duckdb_storage_statistics.cpp +10 -0
  666. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  667. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -3
  668. package/src/duckdb/src/storage/table/column_data.cpp +7 -16
  669. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -1
  670. package/src/duckdb/src/storage/table/column_segment.cpp +68 -78
  671. package/src/duckdb/src/storage/table/list_column_data.cpp +1 -0
  672. package/src/duckdb/src/storage/table/row_group.cpp +17 -7
  673. package/src/duckdb/src/storage/table/row_group_collection.cpp +28 -27
  674. package/src/duckdb/src/storage/table/ub_duckdb_storage_table.cpp +17 -0
  675. package/src/duckdb/src/storage/table/update_segment.cpp +7 -7
  676. package/src/duckdb/src/storage/temporary_file_manager.cpp +334 -0
  677. package/src/duckdb/src/storage/ub_duckdb_storage.cpp +20 -0
  678. package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
  679. package/src/duckdb/src/transaction/commit_state.cpp +4 -2
  680. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +32 -17
  681. package/src/duckdb/src/transaction/meta_transaction.cpp +24 -0
  682. package/src/duckdb/src/transaction/transaction_context.cpp +0 -9
  683. package/src/duckdb/src/transaction/ub_duckdb_transaction.cpp +11 -0
  684. package/src/duckdb/src/transaction/undo_buffer.cpp +1 -1
  685. package/src/duckdb/third_party/jaro_winkler/details/common.hpp +1 -1
  686. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +2 -0
  687. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1032 -551
  688. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +24122 -24304
  689. package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +0 -3
  690. package/src/duckdb/third_party/mbedtls/library/entropy_poll.cpp +32 -4
  691. package/src/duckdb/third_party/miniz/miniz_wrapper.hpp +4 -4
  692. package/src/duckdb/third_party/tdigest/t_digest.hpp +3 -3
  693. package/src/duckdb/ub_src_common_arrow_appender.cpp +2 -0
  694. package/src/duckdb/ub_src_common_exception.cpp +2 -0
  695. package/src/duckdb/ub_src_execution_operator_helper.cpp +2 -0
  696. package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -2
  697. package/src/duckdb/ub_src_execution_physical_plan.cpp +0 -2
  698. package/src/duckdb/ub_src_parser_parsed_data.cpp +1 -1
  699. package/src/duckdb/ub_src_planner_operator.cpp +0 -2
  700. package/src/duckdb/ub_src_storage.cpp +2 -0
  701. package/test/columns.test.ts +1 -1
  702. package/test/test_all_types.test.ts +1 -1
@@ -0,0 +1,166 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/operator/csv_scanner/csv_reader_options.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/execution/operator/csv_scanner/csv_buffer.hpp"
12
+ #include "duckdb/execution/operator/csv_scanner/csv_option.hpp"
13
+ #include "duckdb/execution/operator/csv_scanner/state_machine_options.hpp"
14
+ #include "duckdb/common/map.hpp"
15
+ #include "duckdb/function/scalar/strftime_format.hpp"
16
+ #include "duckdb/common/types/value.hpp"
17
+ #include "duckdb/common/case_insensitive_map.hpp"
18
+ #include "duckdb/common/types.hpp"
19
+ #include "duckdb/common/multi_file_reader_options.hpp"
20
+
21
+ namespace duckdb {
22
+
23
+ struct DialectOptions {
24
+ CSVStateMachineOptions state_machine_options;
25
+ //! Expected number of columns
26
+ idx_t num_cols = 0;
27
+ //! Whether or not the file has a header line
28
+ CSVOption<bool> header = false;
29
+ //! The date format to use (if any is specified)
30
+ map<LogicalTypeId, CSVOption<StrpTimeFormat>> date_format = {{LogicalTypeId::DATE, {}},
31
+ {LogicalTypeId::TIMESTAMP, {}}};
32
+ //! How many leading rows to skip
33
+ CSVOption<idx_t> skip_rows = 0;
34
+ };
35
+
36
+ struct CSVReaderOptions {
37
+ //===--------------------------------------------------------------------===//
38
+ // CommonCSVOptions
39
+ //===--------------------------------------------------------------------===//
40
+ //! See struct above.
41
+ DialectOptions dialect_options;
42
+ //! Whether or not we should ignore InvalidInput errors
43
+ bool ignore_errors = false;
44
+ //! Rejects table name
45
+ string rejects_table_name;
46
+ //! Rejects table entry limit (0 = no limit)
47
+ idx_t rejects_limit = 0;
48
+ //! Columns to use as recovery key for rejected rows when reading with ignore_errors = true
49
+ vector<string> rejects_recovery_columns;
50
+ //! Index of the recovery columns
51
+ vector<idx_t> rejects_recovery_column_ids;
52
+ //! Number of samples to buffer
53
+ idx_t buffer_sample_size = (idx_t)STANDARD_VECTOR_SIZE * 50;
54
+ //! Specifies the string that represents a null value
55
+ string null_str;
56
+ //! Whether file is compressed or not, and if so which compression type
57
+ //! AUTO_DETECT (default; infer from file extension)
58
+ FileCompressionType compression = FileCompressionType::AUTO_DETECT;
59
+ //! Option to convert quoted values to NULL values
60
+ bool allow_quoted_nulls = true;
61
+
62
+ //===--------------------------------------------------------------------===//
63
+ // CSVAutoOptions
64
+ //===--------------------------------------------------------------------===//
65
+ //! SQL Type list mapping of name to SQL type index in sql_type_list
66
+ case_insensitive_map_t<idx_t> sql_types_per_column;
67
+ //! User-defined SQL type list
68
+ vector<LogicalType> sql_type_list;
69
+ //! User-defined name list
70
+ vector<string> name_list;
71
+ //! Types considered as candidates for auto detection ordered by descending specificity (~ from high to low)
72
+ vector<LogicalType> auto_type_candidates = {LogicalType::VARCHAR, LogicalType::TIMESTAMP, LogicalType::DATE,
73
+ LogicalType::TIME, LogicalType::DOUBLE, LogicalType::BIGINT,
74
+ LogicalType::BOOLEAN, LogicalType::SQLNULL};
75
+ //! In case the sniffer found a mismatch error from user defined types or dialect
76
+ string sniffer_user_mismatch_error;
77
+ //! In case the sniffer found a mismatch error from user defined types or dialect
78
+ vector<bool> was_type_manually_set;
79
+ //===--------------------------------------------------------------------===//
80
+ // ReadCSVOptions
81
+ //===--------------------------------------------------------------------===//
82
+ //! Maximum CSV line size: specified because if we reach this amount, we likely have wrong delimiters (default: 2MB)
83
+ //! note that this is the guaranteed line length that will succeed, longer lines may be accepted if slightly above
84
+ idx_t maximum_line_size = 2097152;
85
+ //! Whether or not header names shall be normalized
86
+ bool normalize_names = false;
87
+ //! True, if column with that index must skip null check
88
+ vector<bool> force_not_null;
89
+ //! Number of sample chunks used in auto-detection
90
+ idx_t sample_size_chunks = 20480 / STANDARD_VECTOR_SIZE;
91
+ //! Consider all columns to be of type varchar
92
+ bool all_varchar = false;
93
+ //! Whether or not to automatically detect dialect and datatypes
94
+ bool auto_detect = true;
95
+ //! The file path of the CSV file to read
96
+ string file_path;
97
+ //! Multi-file reader options
98
+ MultiFileReaderOptions file_options;
99
+ //! Buffer Size (Parallel Scan)
100
+ idx_t buffer_size = CSVBuffer::CSV_BUFFER_SIZE;
101
+ //! Decimal separator when reading as numeric
102
+ string decimal_separator = ".";
103
+ //! Whether or not to pad rows that do not have enough columns with NULL values
104
+ bool null_padding = false;
105
+ //! If we should attempt to run parallel scanning over one file
106
+ bool parallel = true;
107
+
108
+ //! User defined parameters for the csv function concatenated on a string
109
+ string user_defined_parameters;
110
+ //===--------------------------------------------------------------------===//
111
+ // WriteCSVOptions
112
+ //===--------------------------------------------------------------------===//
113
+ //! True, if column with that index must be quoted
114
+ vector<bool> force_quote;
115
+ //! Prefix/suffix/custom newline the entire file once (enables writing of files as JSON arrays)
116
+ string prefix;
117
+ string suffix;
118
+ string write_newline;
119
+
120
+ //! The date format to use (if any is specified)
121
+ map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
122
+ //! The date format to use for writing (if any is specified)
123
+ map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
124
+ //! Whether or not a type format is specified
125
+ map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
126
+
127
+ void Serialize(Serializer &serializer) const;
128
+ static CSVReaderOptions Deserialize(Deserializer &deserializer);
129
+
130
+ void SetCompression(const string &compression);
131
+
132
+ bool GetHeader() const;
133
+ void SetHeader(bool has_header);
134
+
135
+ string GetEscape() const;
136
+ void SetEscape(const string &escape);
137
+
138
+ int64_t GetSkipRows() const;
139
+ void SetSkipRows(int64_t rows);
140
+
141
+ string GetQuote() const;
142
+ void SetQuote(const string &quote);
143
+ void SetDelimiter(const string &delimiter);
144
+ string GetDelimiter() const;
145
+
146
+ NewLineIdentifier GetNewline() const;
147
+ void SetNewline(const string &input);
148
+ //! Set an option that is supported by both reading and writing functions, called by
149
+ //! the SetReadOption and SetWriteOption methods
150
+ bool SetBaseOption(const string &loption, const Value &value);
151
+
152
+ //! loption - lowercase string
153
+ //! set - argument(s) to the option
154
+ //! expected_names - names expected if the option is "columns"
155
+ void SetReadOption(const string &loption, const Value &value, vector<string> &expected_names);
156
+ void SetWriteOption(const string &loption, const Value &value);
157
+ void SetDateFormat(LogicalTypeId type, const string &format, bool read_format);
158
+ void ToNamedParameters(named_parameter_map_t &out);
159
+ void FromNamedParameters(named_parameter_map_t &in, ClientContext &context, vector<LogicalType> &return_types,
160
+ vector<string> &names);
161
+
162
+ string ToString() const;
163
+ //! If the type for column with idx i was manually set
164
+ bool WasTypeManuallySet(idx_t i) const;
165
+ };
166
+ } // namespace duckdb
@@ -0,0 +1,191 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/operator/csv_scanner/csv_sniffer.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
12
+ #include "duckdb/common/vector.hpp"
13
+ #include "duckdb/execution/operator/csv_scanner/quote_rules.hpp"
14
+ #include "duckdb/execution/operator/csv_scanner/column_count_scanner.hpp"
15
+
16
+ namespace duckdb {
17
+ struct DateTimestampSniffing {
18
+ bool initialized = false;
19
+ vector<string> format;
20
+ };
21
+ //! Struct to store the result of the Sniffer
22
+ struct SnifferResult {
23
+ SnifferResult(vector<LogicalType> return_types_p, vector<string> names_p)
24
+ : return_types(std::move(return_types_p)), names(std::move(names_p)) {
25
+ }
26
+ //! Return Types that were detected
27
+ vector<LogicalType> return_types;
28
+ //! Column Names that were detected
29
+ vector<string> names;
30
+ };
31
+
32
+ //! This represents the data related to columns that have been set by the user
33
+ //! e.g., from a copy command
34
+ struct SetColumns {
35
+ SetColumns(const vector<LogicalType> *types_p, const vector<string> *names_p) : types(types_p), names(names_p) {
36
+ if (!types) {
37
+ D_ASSERT(!types && !names);
38
+ } else {
39
+ D_ASSERT(types->size() == names->size());
40
+ }
41
+ }
42
+ SetColumns() {};
43
+ //! Return Types that were detected
44
+ const vector<LogicalType> *types = nullptr;
45
+ //! Column Names that were detected
46
+ const vector<string> *names = nullptr;
47
+ //! If columns are set
48
+ bool IsSet();
49
+ //! How many columns
50
+ idx_t Size();
51
+ //! Helper function that checks if candidate is acceptable based on the number of columns it produces
52
+ inline bool IsCandidateUnacceptable(idx_t num_cols, bool null_padding, bool ignore_errors,
53
+ bool last_value_always_empty) {
54
+ if (!IsSet() || ignore_errors) {
55
+ // We can't say its unacceptable if it's not set or if we ignore errors
56
+ return false;
57
+ }
58
+ idx_t size = Size();
59
+ // If the columns are set and there is a mismatch with the expected number of columns, with null_padding and
60
+ // ignore_errors not set, we don't have a suitable candidate.
61
+ // Note that we compare with max_columns_found + 1, because some broken files have the behaviour where two
62
+ // columns are represented as: | col 1 | col_2 |
63
+ if (num_cols == size || num_cols == size + last_value_always_empty) {
64
+ // Good Candidate
65
+ return false;
66
+ }
67
+ // if we detected more columns than we have set, it's all good because we can null-pad them
68
+ if (null_padding && num_cols > size) {
69
+ return false;
70
+ }
71
+
72
+ // Unacceptable
73
+ return true;
74
+ }
75
+ };
76
+
77
+ //! Sniffer that detects Header, Dialect and Types of CSV Files
78
+ class CSVSniffer {
79
+ public:
80
+ explicit CSVSniffer(CSVReaderOptions &options_p, shared_ptr<CSVBufferManager> buffer_manager_p,
81
+ CSVStateMachineCache &state_machine_cache, SetColumns set_columns = {});
82
+
83
+ //! Main method that sniffs the CSV file, returns the types, names and options as a result
84
+ //! CSV Sniffing consists of five steps:
85
+ //! 1. Dialect Detection: Generate the CSV Options (delimiter, quote, escape, etc.)
86
+ //! 2. Type Detection: Figures out the types of the columns (For one chunk)
87
+ //! 3. Type Refinement: Refines the types of the columns for the remaining chunks
88
+ //! 4. Header Detection: Figures out if the CSV file has a header and produces the names of the columns
89
+ //! 5. Type Replacement: Replaces the types of the columns if the user specified them
90
+ SnifferResult SniffCSV(bool force_match = false);
91
+
92
+ static NewLineIdentifier DetectNewLineDelimiter(CSVBufferManager &buffer_manager);
93
+
94
+ private:
95
+ //! CSV State Machine Cache
96
+ CSVStateMachineCache &state_machine_cache;
97
+ //! Highest number of columns found
98
+ idx_t max_columns_found = 0;
99
+ //! Current Candidates being considered
100
+ vector<unique_ptr<ColumnCountScanner>> candidates;
101
+ //! Reference to original CSV Options, it will be modified as a result of the sniffer.
102
+ CSVReaderOptions &options;
103
+ //! Buffer being used on sniffer
104
+ shared_ptr<CSVBufferManager> buffer_manager;
105
+ //! Information regarding columns that were set by user/query
106
+ SetColumns set_columns;
107
+ shared_ptr<CSVErrorHandler> error_handler;
108
+ shared_ptr<CSVErrorHandler> detection_error_handler;
109
+ //! Sets the result options
110
+ void SetResultOptions();
111
+
112
+ //! ------------------------------------------------------//
113
+ //! ----------------- Dialect Detection ----------------- //
114
+ //! ------------------------------------------------------//
115
+ //! First phase of auto detection: detect CSV dialect (i.e. delimiter, quote rules, etc)
116
+ void DetectDialect();
117
+ //! Functions called in the main DetectDialect(); function
118
+ //! 1. Generates the search space candidates for the dialect
119
+ void GenerateCandidateDetectionSearchSpace(vector<char> &delim_candidates, vector<QuoteRule> &quoterule_candidates,
120
+ unordered_map<uint8_t, vector<char>> &quote_candidates_map,
121
+ unordered_map<uint8_t, vector<char>> &escape_candidates_map);
122
+ //! 2. Generates the search space candidates for the state machines
123
+ void GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountScanner>> &column_count_scanners,
124
+ const vector<char> &delimiter_candidates,
125
+ const vector<QuoteRule> &quoterule_candidates,
126
+ const unordered_map<uint8_t, vector<char>> &quote_candidates_map,
127
+ const unordered_map<uint8_t, vector<char>> &escape_candidates_map);
128
+ //! 3. Analyzes if dialect candidate is a good candidate to be considered, if so, it adds it to the candidates
129
+ void AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner>, idx_t &rows_read, idx_t &best_consistent_rows,
130
+ idx_t &prev_padding_count);
131
+ //! 4. Refine Candidates over remaining chunks
132
+ void RefineCandidates();
133
+
134
+ //! Checks if candidate still produces good values for the next chunk
135
+ bool RefineCandidateNextChunk(ColumnCountScanner &candidate);
136
+
137
+ //! ------------------------------------------------------//
138
+ //! ------------------- Type Detection ------------------ //
139
+ //! ------------------------------------------------------//
140
+ //! Second phase of auto detection: detect types, format template candidates
141
+ //! ordered by descending specificity (~ from high to low)
142
+ void DetectTypes();
143
+ //! Change the date format for the type to the string
144
+ //! Try to cast a string value to the specified sql type
145
+ bool TryCastValue(const DialectOptions &dialect_options, const string &decimal_separator, const Value &value,
146
+ const LogicalType &sql_type);
147
+ void SetDateFormat(CSVStateMachine &candidate, const string &format_specifier, const LogicalTypeId &sql_type);
148
+
149
+ //! Function that initialized the necessary variables used for date and timestamp detection
150
+ void InitializeDateAndTimeStampDetection(CSVStateMachine &candidate, const string &separator,
151
+ const LogicalType &sql_type);
152
+ //! Functions that performs detection for date and timestamp formats
153
+ void DetectDateAndTimeStampFormats(CSVStateMachine &candidate, const LogicalType &sql_type, const string &separator,
154
+ Value &dummy_val);
155
+
156
+ //! Variables for Type Detection
157
+ //! Format Candidates for Date and Timestamp Types
158
+ const map<LogicalTypeId, vector<const char *>> format_template_candidates = {
159
+ {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
160
+ {LogicalTypeId::TIMESTAMP,
161
+ {"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
162
+ "%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S"}},
163
+ };
164
+ unordered_map<idx_t, vector<LogicalType>> best_sql_types_candidates_per_column_idx;
165
+ map<LogicalTypeId, vector<string>> best_format_candidates;
166
+ unique_ptr<StringValueScanner> best_candidate;
167
+ vector<Value> best_header_row;
168
+ //! Variable used for sniffing date and timestamp
169
+ map<LogicalTypeId, DateTimestampSniffing> format_candidates;
170
+
171
+ //! ------------------------------------------------------//
172
+ //! ------------------ Type Refinement ------------------ //
173
+ //! ------------------------------------------------------//
174
+ void RefineTypes();
175
+ bool TryCastVector(Vector &parse_chunk_col, idx_t size, const LogicalType &sql_type);
176
+ vector<LogicalType> detected_types;
177
+ //! ------------------------------------------------------//
178
+ //! ------------------ Header Detection ----------------- //
179
+ //! ------------------------------------------------------//
180
+ void DetectHeader();
181
+ bool DetectHeaderWithSetColumn();
182
+ vector<string> names;
183
+
184
+ //! ------------------------------------------------------//
185
+ //! ------------------ Type Replacement ----------------- //
186
+ //! ------------------------------------------------------//
187
+ void ReplaceTypes();
188
+ vector<bool> manually_set;
189
+ };
190
+
191
+ } // namespace duckdb
@@ -0,0 +1,30 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/operator/csv_scanner/csv_state.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include <cstdint>
12
+
13
+ namespace duckdb {
14
+
15
+ //! All States of CSV Parsing
16
+ enum class CSVState : uint8_t {
17
+ STANDARD = 0, //! Regular unquoted field state
18
+ DELIMITER = 1, //! State after encountering a field separator (e.g., ;)
19
+ RECORD_SEPARATOR = 2, //! State after encountering a record separator (i.e., \n)
20
+ CARRIAGE_RETURN = 3, //! State after encountering a carriage return(i.e., \r)
21
+ QUOTED = 4, //! State when inside a quoted field
22
+ UNQUOTED = 5, //! State when leaving a quoted field
23
+ ESCAPE = 6, //! State when encountering an escape character (e.g., \)
24
+ INVALID = 7, //! Got to an Invalid State, this should error.
25
+ NOT_SET = 8, //! If the state is not set, usually the first state before getting the first character
26
+ QUOTED_NEW_LINE = 9, //! If we have a quoted newline
27
+ EMPTY_SPACE = 10 //! If we have empty spaces in the beginning and end of value
28
+ };
29
+
30
+ } // namespace duckdb
@@ -0,0 +1,99 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/operator/csv_scanner/csv_state_machine.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp"
12
+ #include "duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp"
13
+ #include "duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp"
14
+
15
+ namespace duckdb {
16
+
17
+ //! State of necessary CSV States to parse file
18
+ //! Current, previous, and state before the previous
19
+ struct CSVStates {
20
+ void Initialize() {
21
+ states[0] = CSVState::NOT_SET;
22
+ states[1] = CSVState::NOT_SET;
23
+ }
24
+ inline bool NewValue() {
25
+ return states[1] == CSVState::DELIMITER;
26
+ }
27
+
28
+ inline bool NewRow() {
29
+ // It is a new row, if the previous state is not a record separator, and the current one is
30
+ return states[0] != CSVState::RECORD_SEPARATOR && states[0] != CSVState::CARRIAGE_RETURN &&
31
+ (states[1] == CSVState::RECORD_SEPARATOR || states[1] == CSVState::CARRIAGE_RETURN);
32
+ }
33
+
34
+ inline bool EmptyLastValue() {
35
+ // It is a new row, if the previous state is not a record separator, and the current one is
36
+ return states[0] == CSVState::DELIMITER &&
37
+ (states[1] == CSVState::RECORD_SEPARATOR || states[1] == CSVState::CARRIAGE_RETURN);
38
+ }
39
+
40
+ inline bool EmptyLine() {
41
+ return (states[1] == CSVState::CARRIAGE_RETURN || states[1] == CSVState::RECORD_SEPARATOR) &&
42
+ (states[0] == CSVState::RECORD_SEPARATOR || states[0] == CSVState::NOT_SET);
43
+ }
44
+
45
+ inline bool IsNotSet() {
46
+ return states[1] == CSVState::NOT_SET;
47
+ }
48
+
49
+ inline bool IsCurrentNewRow() {
50
+ return states[1] == CSVState::RECORD_SEPARATOR || states[1] == CSVState::CARRIAGE_RETURN;
51
+ }
52
+
53
+ inline bool IsCarriageReturn() {
54
+ return states[1] == CSVState::CARRIAGE_RETURN;
55
+ }
56
+
57
+ inline bool IsQuoted() {
58
+ return states[0] == CSVState::QUOTED;
59
+ }
60
+ inline bool IsEscaped() {
61
+ return states[1] == CSVState::ESCAPE || (states[0] == CSVState::UNQUOTED && states[1] == CSVState::QUOTED);
62
+ }
63
+ inline bool IsQuotedCurrent() {
64
+ return states[1] == CSVState::QUOTED || states[1] == CSVState::QUOTED_NEW_LINE;
65
+ }
66
+ CSVState states[2];
67
+ };
68
+
69
+ //! The CSV State Machine comprises a state transition array (STA).
70
+ //! The STA indicates the current state of parsing based on both the current and preceding characters.
71
+ //! This reveals whether we are dealing with a Field, a New Line, a Delimiter, and so forth.
72
+ //! The STA's creation depends on the provided quote, character, and delimiter options for that state machine.
73
+ //! The motivation behind implementing an STA is to remove branching in regular CSV Parsing by predicting and detecting
74
+ //! the states. Note: The State Machine is currently utilized solely in the CSV Sniffer.
75
+ class CSVStateMachine {
76
+ public:
77
+ explicit CSVStateMachine(CSVReaderOptions &options_p, const CSVStateMachineOptions &state_machine_options,
78
+ CSVStateMachineCache &csv_state_machine_cache_p);
79
+
80
+ explicit CSVStateMachine(const StateMachine &transition_array, const CSVReaderOptions &options);
81
+
82
+ //! Transition all states to next state, that depends on the current char
83
+ inline void Transition(CSVStates &states, char current_char) const {
84
+ states.states[0] = states.states[1];
85
+ states.states[1] = transition_array[static_cast<uint8_t>(current_char)][static_cast<uint8_t>(states.states[1])];
86
+ }
87
+
88
+ //! The Transition Array is a Finite State Machine
89
+ //! It holds the transitions of all states, on all 256 possible different characters
90
+ const StateMachine &transition_array;
91
+ //! Options of this state machine
92
+ const CSVStateMachineOptions state_machine_options;
93
+ //! CSV Reader Options
94
+ const CSVReaderOptions &options;
95
+ //! Dialect options resulting from sniffing
96
+ DialectOptions dialect_options;
97
+ };
98
+
99
+ } // namespace duckdb
@@ -0,0 +1,91 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/storage/object_cache.hpp"
12
+ #include "duckdb/common/types/hash.hpp"
13
+ #include "duckdb/execution/operator/csv_scanner/state_machine_options.hpp"
14
+ #include "duckdb/execution/operator/csv_scanner/quote_rules.hpp"
15
+ #include "duckdb/execution/operator/csv_scanner/csv_state.hpp"
16
+
17
+ namespace duckdb {
18
+
19
+ //! Class to wrap the state machine matrix
20
+ class StateMachine {
21
+ public:
22
+ static constexpr uint32_t NUM_STATES = 11;
23
+ static constexpr uint32_t NUM_TRANSITIONS = 256;
24
+ CSVState state_machine[NUM_TRANSITIONS][NUM_STATES];
25
+ //! Transitions where we might skip processing
26
+ //! For the Standard State
27
+ bool skip_standard[256];
28
+ //! For the Quoted State
29
+ bool skip_quoted[256];
30
+
31
+ uint64_t delimiter = 0;
32
+ uint64_t new_line = 0;
33
+ uint64_t carriage_return = 0;
34
+ uint64_t quote = 0;
35
+ uint64_t escape = 0;
36
+ const CSVState *operator[](idx_t i) const {
37
+ return state_machine[i];
38
+ }
39
+
40
+ CSVState *operator[](idx_t i) {
41
+ return state_machine[i];
42
+ }
43
+ };
44
+
45
+ //! Hash function used in out state machine cache, it hashes and combines all options used to generate a state machine
46
+ struct HashCSVStateMachineConfig {
47
+ size_t operator()(CSVStateMachineOptions const &config) const noexcept {
48
+ auto h_delimiter = Hash(config.delimiter.GetValue());
49
+ auto h_quote = Hash(config.quote.GetValue());
50
+ auto h_escape = Hash(config.escape.GetValue());
51
+ auto h_newline = Hash((uint8_t)config.new_line.GetValue());
52
+ return CombineHash(h_delimiter, CombineHash(h_quote, CombineHash(h_escape, h_newline)));
53
+ }
54
+ };
55
+
56
+ //! The CSVStateMachineCache caches state machines, although small ~2kb, the actual creation of multiple State Machines
57
+ //! can become a bottleneck on sniffing, when reading very small csv files.
58
+ //! Hence the cache stores State Machines based on their different delimiter|quote|escape options.
59
+ class CSVStateMachineCache : public ObjectCacheEntry {
60
+ public:
61
+ CSVStateMachineCache();
62
+ ~CSVStateMachineCache() override = default;
63
+ //! Gets a state machine from the cache, if it's not from one the default options
64
+ //! It first caches it, then returns it.
65
+ static CSVStateMachineCache &Get(ClientContext &context);
66
+
67
+ //! Gets a state machine from the cache, if it's not from one the default options
68
+ //! It first caches it, then returns it.
69
+ const StateMachine &Get(const CSVStateMachineOptions &state_machine_options);
70
+
71
+ static string ObjectType() {
72
+ return "CSV_STATE_MACHINE_CACHE";
73
+ }
74
+
75
+ string GetObjectType() override {
76
+ return ObjectType();
77
+ }
78
+
79
+ private:
80
+ void Insert(const CSVStateMachineOptions &state_machine_options);
81
+ //! Cache on delimiter|quote|escape|newline
82
+ unordered_map<CSVStateMachineOptions, StateMachine, HashCSVStateMachineConfig> state_machine_cache;
83
+ //! Default value for options used to intialize CSV State Machine Cache
84
+ const vector<char> default_delimiter = {',', '|', ';', '\t'};
85
+ const vector<vector<char>> default_quote = {{'\"'}, {'\"', '\''}, {'\0'}};
86
+ const vector<QuoteRule> default_quote_rule = {QuoteRule::QUOTES_RFC, QuoteRule::QUOTES_OTHER, QuoteRule::NO_QUOTES};
87
+ const vector<vector<char>> default_escape = {{'\0', '\"', '\''}, {'\\'}, {'\0'}};
88
+ //! Because the state machine cache can be accessed in Parallel we need a mutex.
89
+ mutex main_mutex;
90
+ };
91
+ } // namespace duckdb
@@ -0,0 +1,80 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/operator/csv_scanner/global_csv_state.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp"
12
+ #include "duckdb/execution/operator/csv_scanner/scanner_boundary.hpp"
13
+ #include "duckdb/execution/operator/csv_scanner/csv_state_machine.hpp"
14
+ #include "duckdb/execution/operator/csv_scanner/csv_error.hpp"
15
+ #include "duckdb/function/table/read_csv.hpp"
16
+ #include "duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp"
17
+ #include "duckdb/execution/operator/csv_scanner/string_value_scanner.hpp"
18
+
19
+ namespace duckdb {
20
+
21
+ //! CSV Global State is used in the CSV Reader Table Function, it controls what each thread
22
+ struct CSVGlobalState : public GlobalTableFunctionState {
23
+ public:
24
+ CSVGlobalState(ClientContext &context, const shared_ptr<CSVBufferManager> &buffer_manager_p,
25
+ const CSVReaderOptions &options, idx_t system_threads_p, const vector<string> &files,
26
+ vector<column_t> column_ids_p, const ReadCSVData &bind_data);
27
+
28
+ ~CSVGlobalState() override {
29
+ }
30
+
31
+ //! Generates a CSV Scanner, with information regarding the piece of buffer it should be read.
32
+ //! In case it returns a nullptr it means we are done reading these files.
33
+ unique_ptr<StringValueScanner> Next();
34
+
35
+ void FillRejectsTable();
36
+
37
+ void DecrementThread();
38
+
39
+ //! Returns Current Progress of this CSV Read
40
+ double GetProgress(const ReadCSVData &bind_data) const;
41
+
42
+ //! Calculates the Max Threads that will be used by this CSV Reader
43
+ idx_t MaxThreads() const override;
44
+ //! We hold information on the current scanner boundary
45
+ CSVIterator current_boundary;
46
+
47
+ private:
48
+ //! Reference to the client context that created this scan
49
+ ClientContext &context;
50
+
51
+ vector<shared_ptr<CSVFileScan>> file_scans;
52
+
53
+ //! Mutex to lock when getting next batch of bytes (Parallel Only)
54
+ mutable mutex main_mutex;
55
+
56
+ //! Basically max number of threads in DuckDB
57
+ idx_t system_threads;
58
+
59
+ //! Number of threads being used in this scanner
60
+ idx_t running_threads = 1;
61
+ //! The column ids to read
62
+ vector<column_t> column_ids;
63
+
64
+ string sniffer_mismatch_error;
65
+
66
+ bool finished = false;
67
+
68
+ const ReadCSVData &bind_data;
69
+
70
+ vector<LogicalType> file_schema;
71
+
72
+ bool single_threaded = false;
73
+
74
+ atomic<idx_t> scanner_idx;
75
+
76
+ atomic<idx_t> last_file_idx;
77
+ shared_ptr<CSVBufferUsage> current_buffer_in_use;
78
+ };
79
+
80
+ } // namespace duckdb