duckdb 0.10.1-dev9.0 → 0.10.2-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (702) hide show
  1. package/.github/workflows/NodeJS.yml +9 -16
  2. package/binding.gyp +4 -1
  3. package/package.json +1 -1
  4. package/src/database.cpp +1 -0
  5. package/src/duckdb/extension/icu/icu-strptime.cpp +46 -4
  6. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -0
  7. package/src/duckdb/extension/icu/icu-timezone.cpp +28 -4
  8. package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -5
  9. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +3 -3
  10. package/src/duckdb/extension/json/include/json_transform.hpp +2 -0
  11. package/src/duckdb/extension/json/json_functions/json_create.cpp +6 -0
  12. package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -9
  13. package/src/duckdb/extension/json/json_functions.cpp +1 -2
  14. package/src/duckdb/extension/json/json_scan.cpp +1 -0
  15. package/src/duckdb/extension/parquet/column_reader.cpp +17 -1
  16. package/src/duckdb/extension/parquet/column_writer.cpp +151 -20
  17. package/src/duckdb/extension/parquet/include/column_writer.hpp +3 -4
  18. package/src/duckdb/extension/parquet/parquet_extension.cpp +14 -9
  19. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +10 -0
  20. package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -0
  21. package/src/duckdb/src/catalog/catalog.cpp +94 -10
  22. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +9 -5
  23. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +1 -0
  24. package/src/duckdb/src/catalog/catalog_entry/ub_duckdb_catalog_entries.cpp +16 -0
  25. package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +28 -0
  26. package/src/duckdb/src/catalog/default/default_functions.cpp +3 -0
  27. package/src/duckdb/src/catalog/default/default_views.cpp +3 -3
  28. package/src/duckdb/src/catalog/default/ub_duckdb_catalog_default_entries.cpp +5 -0
  29. package/src/duckdb/src/catalog/dependency_catalog_set.cpp +1 -3
  30. package/src/duckdb/src/catalog/dependency_manager.cpp +2 -5
  31. package/src/duckdb/src/catalog/ub_duckdb_catalog.cpp +10 -0
  32. package/src/duckdb/src/common/adbc/adbc.cpp +1 -1
  33. package/src/duckdb/src/common/adbc/driver_manager.cpp +2 -1
  34. package/src/duckdb/src/common/adbc/nanoarrow/ub_duckdb_adbc_nanoarrow.cpp +5 -0
  35. package/src/duckdb/src/common/adbc/ub_duckdb_adbc.cpp +3 -0
  36. package/src/duckdb/src/common/allocator.cpp +34 -8
  37. package/src/duckdb/src/common/arrow/appender/fixed_size_list_data.cpp +39 -0
  38. package/src/duckdb/src/common/arrow/appender/ub_duckdb_common_arrow_appender.cpp +6 -0
  39. package/src/duckdb/src/common/arrow/appender/union_data.cpp +2 -1
  40. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -5
  41. package/src/duckdb/src/common/arrow/arrow_converter.cpp +17 -0
  42. package/src/duckdb/src/common/arrow/ub_duckdb_common_arrow.cpp +4 -0
  43. package/src/duckdb/src/common/compressed_file_system.cpp +1 -0
  44. package/src/duckdb/src/common/crypto/ub_duckdb_common_crypto.cpp +2 -0
  45. package/src/duckdb/src/common/enum_util.cpp +153 -13
  46. package/src/duckdb/src/common/enums/logical_operator_type.cpp +0 -2
  47. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -2
  48. package/src/duckdb/src/common/enums/ub_duckdb_common_enums.cpp +12 -0
  49. package/src/duckdb/src/common/exception/conversion_exception.cpp +23 -0
  50. package/src/duckdb/src/common/exception.cpp +7 -14
  51. package/src/duckdb/src/common/extra_type_info.cpp +6 -5
  52. package/src/duckdb/src/common/gzip_file_system.cpp +5 -4
  53. package/src/duckdb/src/common/local_file_system.cpp +85 -10
  54. package/src/duckdb/src/common/operator/cast_operators.cpp +413 -305
  55. package/src/duckdb/src/common/operator/ub_duckdb_common_operators.cpp +4 -0
  56. package/src/duckdb/src/common/progress_bar/ub_duckdb_progress_bar.cpp +3 -0
  57. package/src/duckdb/src/common/re2_regex.cpp +2 -1
  58. package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +2 -2
  59. package/src/duckdb/src/common/row_operations/row_matcher.cpp +1 -0
  60. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  61. package/src/duckdb/src/common/row_operations/ub_duckdb_row_operations.cpp +9 -0
  62. package/src/duckdb/src/common/serializer/binary_serializer.cpp +3 -3
  63. package/src/duckdb/src/common/serializer/ub_duckdb_common_serializer.cpp +7 -0
  64. package/src/duckdb/src/common/sort/partition_state.cpp +2 -3
  65. package/src/duckdb/src/common/sort/ub_duckdb_sort.cpp +7 -0
  66. package/src/duckdb/src/common/string_util.cpp +3 -3
  67. package/src/duckdb/src/common/types/bit.cpp +7 -6
  68. package/src/duckdb/src/common/types/blob.cpp +20 -9
  69. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +8 -6
  70. package/src/duckdb/src/common/types/column/column_data_collection.cpp +11 -1
  71. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +5 -0
  72. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +1 -1
  73. package/src/duckdb/src/common/types/column/ub_duckdb_common_types_column.cpp +6 -0
  74. package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
  75. package/src/duckdb/src/common/types/date.cpp +1 -1
  76. package/src/duckdb/src/common/types/hugeint.cpp +3 -2
  77. package/src/duckdb/src/common/types/interval.cpp +1 -1
  78. package/src/duckdb/src/common/types/list_segment.cpp +2 -1
  79. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  80. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +7 -7
  81. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -27
  82. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +168 -88
  83. package/src/duckdb/src/common/types/row/ub_duckdb_common_types_row.cpp +11 -0
  84. package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
  85. package/src/duckdb/src/common/types/string_heap.cpp +5 -1
  86. package/src/duckdb/src/common/types/string_type.cpp +18 -4
  87. package/src/duckdb/src/common/types/time.cpp +4 -2
  88. package/src/duckdb/src/common/types/timestamp.cpp +32 -6
  89. package/src/duckdb/src/common/types/ub_duckdb_common_types.cpp +28 -0
  90. package/src/duckdb/src/common/types/uhugeint.cpp +3 -2
  91. package/src/duckdb/src/common/types/uuid.cpp +11 -0
  92. package/src/duckdb/src/common/types/validity_mask.cpp +4 -3
  93. package/src/duckdb/src/common/types/value.cpp +17 -6
  94. package/src/duckdb/src/common/types/vector.cpp +243 -68
  95. package/src/duckdb/src/common/types.cpp +7 -5
  96. package/src/duckdb/src/common/ub_duckdb_common.cpp +34 -0
  97. package/src/duckdb/src/common/value_operations/ub_duckdb_value_operations.cpp +2 -0
  98. package/src/duckdb/src/common/vector_operations/generators.cpp +2 -1
  99. package/src/duckdb/src/common/vector_operations/vector_cast.cpp +4 -4
  100. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +2 -2
  101. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +17 -6
  102. package/src/duckdb/src/core_functions/aggregate/algebraic/ub_duckdb_aggr_algebraic.cpp +5 -0
  103. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +30 -6
  104. package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +1 -1
  105. package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +12 -5
  106. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -2
  107. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +2 -1
  108. package/src/duckdb/src/core_functions/aggregate/distributive/ub_duckdb_aggr_distributive.cpp +13 -0
  109. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +2 -1
  110. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +2 -9
  111. package/src/duckdb/src/core_functions/aggregate/holistic/ub_duckdb_aggr_holistic.cpp +5 -0
  112. package/src/duckdb/src/core_functions/aggregate/nested/ub_duckdb_aggr_nested.cpp +3 -0
  113. package/src/duckdb/src/core_functions/aggregate/regression/ub_duckdb_aggr_regr.cpp +8 -0
  114. package/src/duckdb/src/core_functions/function_list.cpp +1 -0
  115. package/src/duckdb/src/core_functions/scalar/bit/bitstring.cpp +2 -2
  116. package/src/duckdb/src/core_functions/scalar/bit/ub_duckdb_func_bit.cpp +2 -0
  117. package/src/duckdb/src/core_functions/scalar/blob/ub_duckdb_func_blob.cpp +3 -0
  118. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +8 -0
  119. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +22 -3
  120. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +29 -7
  121. package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +3 -1
  122. package/src/duckdb/src/core_functions/scalar/date/ub_duckdb_func_date.cpp +12 -0
  123. package/src/duckdb/src/core_functions/scalar/debug/ub_duckdb_func_debug.cpp +2 -0
  124. package/src/duckdb/src/core_functions/scalar/enum/ub_duckdb_func_enum.cpp +2 -0
  125. package/src/duckdb/src/core_functions/scalar/generic/ub_duckdb_func_generic.cpp +9 -0
  126. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +12 -8
  127. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -1
  128. package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +6 -5
  129. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +5 -3
  130. package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +28 -12
  131. package/src/duckdb/src/core_functions/scalar/list/ub_duckdb_func_list.cpp +11 -0
  132. package/src/duckdb/src/core_functions/scalar/map/map.cpp +129 -160
  133. package/src/duckdb/src/core_functions/scalar/map/ub_duckdb_func_map_nested.cpp +8 -0
  134. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +19 -16
  135. package/src/duckdb/src/core_functions/scalar/math/ub_duckdb_func_math.cpp +1 -0
  136. package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +4 -4
  137. package/src/duckdb/src/core_functions/scalar/operators/ub_duckdb_func_ops.cpp +1 -0
  138. package/src/duckdb/src/core_functions/scalar/random/ub_duckdb_func_random.cpp +3 -0
  139. package/src/duckdb/src/core_functions/scalar/string/hex.cpp +2 -1
  140. package/src/duckdb/src/core_functions/scalar/string/pad.cpp +2 -2
  141. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +1 -1
  142. package/src/duckdb/src/core_functions/scalar/string/replace.cpp +1 -1
  143. package/src/duckdb/src/core_functions/scalar/string/string_split.cpp +2 -1
  144. package/src/duckdb/src/core_functions/scalar/string/translate.cpp +1 -1
  145. package/src/duckdb/src/core_functions/scalar/string/ub_duckdb_func_string.cpp +26 -0
  146. package/src/duckdb/src/core_functions/scalar/struct/ub_duckdb_func_struct.cpp +3 -0
  147. package/src/duckdb/src/core_functions/scalar/union/ub_duckdb_func_union.cpp +4 -0
  148. package/src/duckdb/src/core_functions/ub_duckdb_core_functions.cpp +3 -0
  149. package/src/duckdb/src/execution/adaptive_filter.cpp +1 -1
  150. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  151. package/src/duckdb/src/execution/column_binding_resolver.cpp +7 -1
  152. package/src/duckdb/src/execution/expression_executor/execute_case.cpp +2 -2
  153. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -0
  154. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +2 -2
  155. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +1 -1
  156. package/src/duckdb/src/execution/expression_executor/ub_duckdb_expression_executor.cpp +11 -0
  157. package/src/duckdb/src/execution/expression_executor.cpp +5 -2
  158. package/src/duckdb/src/execution/index/art/art.cpp +12 -6
  159. package/src/duckdb/src/execution/index/art/art_key.cpp +3 -3
  160. package/src/duckdb/src/execution/index/art/leaf.cpp +2 -2
  161. package/src/duckdb/src/execution/index/art/node16.cpp +2 -2
  162. package/src/duckdb/src/execution/index/art/node256.cpp +3 -3
  163. package/src/duckdb/src/execution/index/art/node48.cpp +5 -5
  164. package/src/duckdb/src/execution/index/art/prefix.cpp +1 -1
  165. package/src/duckdb/src/execution/index/art/ub_duckdb_art_index_execution.cpp +12 -0
  166. package/src/duckdb/src/execution/index/art/ub_duckdb_execution_index_art.cpp +11 -0
  167. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +6 -5
  168. package/src/duckdb/src/execution/index/ub_duckdb_execution_index.cpp +3 -0
  169. package/src/duckdb/src/execution/nested_loop_join/ub_duckdb_nested_loop_join.cpp +3 -0
  170. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -1
  171. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -4
  172. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +0 -1
  173. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +3 -4
  174. package/src/duckdb/src/execution/operator/aggregate/ub_duckdb_operator_aggregate.cpp +9 -0
  175. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +1 -1
  176. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +8 -3
  177. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +1 -1
  178. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +5 -5
  179. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +2 -3
  180. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +5 -1
  181. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +2 -2
  182. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +151 -79
  183. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +11 -6
  184. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +27 -6
  185. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +71 -18
  186. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +22 -11
  187. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +6 -4
  188. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +5 -3
  189. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/ub_duckdb_operator_csv_sniffer.cpp +7 -0
  190. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp +3 -3
  191. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +30 -5
  192. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +6 -2
  193. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +47 -46
  194. package/src/duckdb/src/execution/operator/csv_scanner/ub_duckdb_operator_csv_scanner.cpp +10 -0
  195. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +102 -54
  196. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +8 -1
  197. package/src/duckdb/src/execution/operator/filter/ub_duckdb_operator_filter.cpp +2 -0
  198. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +54 -36
  199. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +56 -32
  200. package/src/duckdb/src/execution/operator/helper/physical_streaming_limit.cpp +9 -13
  201. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +12 -0
  202. package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +221 -0
  203. package/src/duckdb/src/execution/operator/helper/ub_duckdb_operator_helper.cpp +18 -0
  204. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -0
  205. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +23 -8
  206. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +8 -3
  207. package/src/duckdb/src/execution/operator/join/ub_duckdb_operator_join.cpp +16 -0
  208. package/src/duckdb/src/execution/operator/order/physical_order.cpp +2 -3
  209. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -7
  210. package/src/duckdb/src/execution/operator/order/ub_duckdb_operator_order.cpp +3 -0
  211. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +451 -55
  212. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +312 -150
  213. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +200 -75
  214. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -0
  215. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +6 -5
  216. package/src/duckdb/src/execution/operator/persistent/ub_duckdb_operator_persistent.cpp +10 -0
  217. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +3 -2
  218. package/src/duckdb/src/execution/operator/projection/ub_duckdb_operator_projection.cpp +5 -0
  219. package/src/duckdb/src/execution/operator/scan/ub_duckdb_operator_scan.cpp +7 -0
  220. package/src/duckdb/src/execution/operator/schema/ub_duckdb_operator_schema.cpp +12 -0
  221. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +25 -0
  222. package/src/duckdb/src/execution/operator/set/ub_duckdb_operator_set.cpp +4 -0
  223. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +4 -4
  224. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +2 -2
  225. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +2 -2
  226. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +7 -17
  227. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +10 -0
  228. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +7 -0
  229. package/src/duckdb/src/execution/physical_plan/plan_limit.cpp +45 -13
  230. package/src/duckdb/src/execution/physical_plan/ub_duckdb_physical_plan.cpp +44 -0
  231. package/src/duckdb/src/execution/physical_plan_generator.cpp +5 -3
  232. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +13 -12
  233. package/src/duckdb/src/execution/ub_duckdb_execution.cpp +15 -0
  234. package/src/duckdb/src/execution/window_executor.cpp +71 -61
  235. package/src/duckdb/src/execution/window_segment_tree.cpp +6 -6
  236. package/src/duckdb/src/extension_forward_decl/icu.cpp +59 -0
  237. package/src/duckdb/src/function/aggregate/algebraic/ub_duckdb_aggr_algebraic.cpp +5 -0
  238. package/src/duckdb/src/function/aggregate/distributive/first.cpp +2 -2
  239. package/src/duckdb/src/function/aggregate/distributive/ub_duckdb_aggr_distr.cpp +3 -0
  240. package/src/duckdb/src/function/aggregate/holistic/ub_duckdb_aggr_holistic.cpp +5 -0
  241. package/src/duckdb/src/function/aggregate/nested/ub_duckdb_aggr_nested.cpp +3 -0
  242. package/src/duckdb/src/function/aggregate/regression/ub_duckdb_aggr_regr.cpp +8 -0
  243. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +3 -2
  244. package/src/duckdb/src/function/aggregate/ub_duckdb_func_aggr.cpp +3 -0
  245. package/src/duckdb/src/function/cast/array_casts.cpp +2 -4
  246. package/src/duckdb/src/function/cast/bit_cast.cpp +13 -13
  247. package/src/duckdb/src/function/cast/cast_function_set.cpp +2 -0
  248. package/src/duckdb/src/function/cast/decimal_cast.cpp +38 -44
  249. package/src/duckdb/src/function/cast/default_casts.cpp +5 -2
  250. package/src/duckdb/src/function/cast/enum_casts.cpp +5 -5
  251. package/src/duckdb/src/function/cast/list_casts.cpp +24 -14
  252. package/src/duckdb/src/function/cast/string_cast.cpp +48 -30
  253. package/src/duckdb/src/function/cast/struct_cast.cpp +2 -2
  254. package/src/duckdb/src/function/cast/time_casts.cpp +12 -0
  255. package/src/duckdb/src/function/cast/ub_duckdb_func_cast.cpp +17 -0
  256. package/src/duckdb/src/function/cast/union/ub_duckdb_union_cast.cpp +2 -0
  257. package/src/duckdb/src/function/cast/union_casts.cpp +13 -15
  258. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +1 -1
  259. package/src/duckdb/src/function/cast_rules.cpp +2 -1
  260. package/src/duckdb/src/function/pragma/ub_duckdb_func_pragma.cpp +3 -0
  261. package/src/duckdb/src/function/scalar/bit/ub_duckdb_func_bit.cpp +2 -0
  262. package/src/duckdb/src/function/scalar/blob/ub_duckdb_func_blob.cpp +3 -0
  263. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +7 -6
  264. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +3 -3
  265. package/src/duckdb/src/function/scalar/compressed_materialization/ub_duckdb_func_compressed_materialization.cpp +3 -0
  266. package/src/duckdb/src/function/scalar/date/ub_duckdb_func_date.cpp +12 -0
  267. package/src/duckdb/src/function/scalar/enum/ub_duckdb_func_enum.cpp +2 -0
  268. package/src/duckdb/src/function/scalar/generic/ub_duckdb_func_generic.cpp +8 -0
  269. package/src/duckdb/src/function/scalar/generic/ub_duckdb_func_generic_main.cpp +2 -0
  270. package/src/duckdb/src/function/scalar/list/list_resize.cpp +3 -1
  271. package/src/duckdb/src/function/scalar/list/list_zip.cpp +3 -4
  272. package/src/duckdb/src/function/scalar/list/ub_duckdb_func_list.cpp +11 -0
  273. package/src/duckdb/src/function/scalar/list/ub_duckdb_func_list_nested.cpp +5 -0
  274. package/src/duckdb/src/function/scalar/map/ub_duckdb_func_map_nested.cpp +7 -0
  275. package/src/duckdb/src/function/scalar/math/ub_duckdb_func_math.cpp +4 -0
  276. package/src/duckdb/src/function/scalar/nested_functions.cpp +7 -3
  277. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +1 -1
  278. package/src/duckdb/src/function/scalar/operators/multiply.cpp +4 -2
  279. package/src/duckdb/src/function/scalar/operators/subtract.cpp +9 -3
  280. package/src/duckdb/src/function/scalar/operators/ub_duckdb_func_ops.cpp +6 -0
  281. package/src/duckdb/src/function/scalar/operators/ub_duckdb_func_ops_main.cpp +5 -0
  282. package/src/duckdb/src/function/scalar/sequence/ub_duckdb_func_seq.cpp +2 -0
  283. package/src/duckdb/src/function/scalar/strftime_format.cpp +21 -20
  284. package/src/duckdb/src/function/scalar/string/like.cpp +14 -3
  285. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +4 -2
  286. package/src/duckdb/src/function/scalar/string/regexp/ub_duckdb_func_string_regexp.cpp +3 -0
  287. package/src/duckdb/src/function/scalar/string/regexp.cpp +6 -4
  288. package/src/duckdb/src/function/scalar/string/suffix.cpp +1 -1
  289. package/src/duckdb/src/function/scalar/string/ub_duckdb_func_string.cpp +31 -0
  290. package/src/duckdb/src/function/scalar/string/ub_duckdb_func_string_main.cpp +12 -0
  291. package/src/duckdb/src/function/scalar/struct/ub_duckdb_func_struct.cpp +4 -0
  292. package/src/duckdb/src/function/scalar/struct/ub_duckdb_func_struct_main.cpp +2 -0
  293. package/src/duckdb/src/function/scalar/system/ub_duckdb_func_system.cpp +2 -0
  294. package/src/duckdb/src/function/scalar/ub_duckdb_func_scalar.cpp +9 -0
  295. package/src/duckdb/src/function/scalar/union/ub_duckdb_func_union.cpp +4 -0
  296. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +28 -2
  297. package/src/duckdb/src/function/table/arrow/ub_duckdb_arrow_conversion.cpp +2 -0
  298. package/src/duckdb/src/function/table/arrow.cpp +23 -6
  299. package/src/duckdb/src/function/table/arrow_conversion.cpp +75 -33
  300. package/src/duckdb/src/function/table/copy_csv.cpp +8 -3
  301. package/src/duckdb/src/function/table/range.cpp +5 -0
  302. package/src/duckdb/src/function/table/read_csv.cpp +9 -8
  303. package/src/duckdb/src/function/table/read_file.cpp +1 -1
  304. package/src/duckdb/src/function/table/sniff_csv.cpp +5 -5
  305. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +6 -2
  306. package/src/duckdb/src/function/table/system/duckdb_secrets.cpp +5 -1
  307. package/src/duckdb/src/function/table/system/duckdb_sequences.cpp +3 -1
  308. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +13 -1
  309. package/src/duckdb/src/function/table/system/test_all_types.cpp +64 -0
  310. package/src/duckdb/src/function/table/system/ub_duckdb_table_func_system.cpp +23 -0
  311. package/src/duckdb/src/function/table/ub_duckdb_func_table.cpp +16 -0
  312. package/src/duckdb/src/function/table/version/pragma_version.cpp +11 -2
  313. package/src/duckdb/src/function/table/version/ub_duckdb_func_table_version.cpp +2 -0
  314. package/src/duckdb/src/function/ub_duckdb_function.cpp +14 -0
  315. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +11 -1
  316. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -0
  317. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +1 -1
  318. package/src/duckdb/src/include/duckdb/common/allocator.hpp +8 -11
  319. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +2 -2
  320. package/src/duckdb/src/include/duckdb/common/arrow/appender/fixed_size_list_data.hpp +14 -0
  321. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +1 -0
  322. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +1 -1
  323. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -2
  324. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +2 -2
  325. package/src/duckdb/src/include/duckdb/common/bswap.hpp +6 -2
  326. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +32 -0
  327. package/src/duckdb/src/include/duckdb/common/enums/catalog_lookup_behavior.hpp +21 -0
  328. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +0 -1
  329. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -1
  330. package/src/duckdb/src/include/duckdb/common/enums/prepared_statement_mode.hpp +20 -0
  331. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -0
  332. package/src/duckdb/src/include/duckdb/common/exception/conversion_exception.hpp +6 -0
  333. package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
  334. package/src/duckdb/src/include/duckdb/common/exception.hpp +2 -0
  335. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +3 -3
  336. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +6 -5
  337. package/src/duckdb/src/include/duckdb/common/file_system.hpp +4 -2
  338. package/src/duckdb/src/include/duckdb/common/helper.hpp +7 -0
  339. package/src/duckdb/src/include/duckdb/common/limits.hpp +2 -2
  340. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
  341. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +64 -0
  342. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +34 -33
  343. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +150 -124
  344. package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +5 -5
  345. package/src/duckdb/src/include/duckdb/common/radix.hpp +1 -1
  346. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +1 -1
  347. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +4 -4
  348. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +1 -1
  349. package/src/duckdb/src/include/duckdb/common/types/blob.hpp +4 -1
  350. package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +15 -13
  351. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +5 -0
  352. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +2 -0
  353. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +3 -0
  354. package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +1 -1
  355. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +1 -1
  356. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +4 -1
  357. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +2 -1
  358. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +2 -0
  359. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +11 -4
  360. package/src/duckdb/src/include/duckdb/common/types/time.hpp +1 -1
  361. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +7 -2
  362. package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -0
  363. package/src/duckdb/src/include/duckdb/common/types/value.hpp +7 -0
  364. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +40 -7
  365. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
  366. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -1
  367. package/src/duckdb/src/include/duckdb/common/vector_operations/general_cast.hpp +12 -4
  368. package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +6 -6
  369. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +3 -2
  370. package/src/duckdb/src/include/duckdb/common/windows_undefs.hpp +4 -0
  371. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -1
  372. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +9 -0
  373. package/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +2 -1
  374. package/src/duckdb/src/include/duckdb/execution/executor.hpp +10 -0
  375. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
  376. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +262 -0
  377. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +70 -0
  378. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +103 -0
  379. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +74 -0
  380. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_casting.hpp +154 -0
  381. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +130 -0
  382. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +60 -0
  383. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +70 -0
  384. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +155 -0
  385. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +166 -0
  386. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +191 -0
  387. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +30 -0
  388. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +99 -0
  389. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +91 -0
  390. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +80 -0
  391. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/quote_rules.hpp +21 -0
  392. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +93 -0
  393. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +60 -0
  394. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +35 -0
  395. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +200 -0
  396. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +13 -10
  397. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp +5 -11
  398. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_limit.hpp +4 -6
  399. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_verify_vector.hpp +51 -0
  400. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +3 -0
  401. package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_memory_manager.hpp +165 -0
  402. package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_task_manager.hpp +48 -0
  403. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +10 -17
  404. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +4 -0
  405. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -0
  406. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +10 -6
  407. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
  408. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +2 -1
  409. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  410. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +1 -0
  411. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +14 -5
  412. package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +27 -43
  413. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +4 -4
  414. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +3 -3
  415. package/src/duckdb/src/include/duckdb/function/function.hpp +1 -1
  416. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -0
  417. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +1 -1
  418. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +1 -1
  419. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +10 -4
  420. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +8 -6
  421. package/src/duckdb/src/include/duckdb/function/table_function.hpp +3 -3
  422. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +3 -0
  423. package/src/duckdb/src/include/duckdb/main/capi/cast/from_decimal.hpp +6 -4
  424. package/src/duckdb/src/include/duckdb/main/capi/cast/to_decimal.hpp +17 -10
  425. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  426. package/src/duckdb/src/include/duckdb/main/client_context.hpp +17 -3
  427. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +2 -2
  428. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +20 -0
  429. package/src/duckdb/src/include/duckdb/main/client_data.hpp +1 -1
  430. package/src/duckdb/src/include/duckdb/main/database.hpp +3 -2
  431. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +2 -1
  432. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +1 -1
  433. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +230 -199
  434. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
  435. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +1 -1
  436. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +2 -0
  437. package/src/duckdb/src/include/duckdb/main/relation.hpp +1 -1
  438. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +4 -0
  439. package/src/duckdb/src/include/duckdb/main/settings.hpp +54 -10
  440. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +2 -1
  441. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +2 -0
  442. package/src/duckdb/src/include/duckdb/parallel/concurrentqueue.hpp +21 -5
  443. package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
  444. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +37 -0
  445. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +1 -1
  446. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
  447. package/src/duckdb/src/include/duckdb/parallel/task.hpp +0 -20
  448. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -2
  449. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +10 -0
  450. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
  451. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +0 -24
  452. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +46 -0
  453. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +2 -0
  454. package/src/duckdb/src/include/duckdb/parser/parsed_data/extra_drop_info.hpp +1 -1
  455. package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +3 -2
  456. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +3 -3
  457. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +3 -3
  458. package/src/duckdb/src/include/duckdb/planner/binder.hpp +11 -4
  459. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +59 -23
  460. package/src/duckdb/src/include/duckdb/planner/expression.hpp +1 -0
  461. package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -2
  462. package/src/duckdb/src/include/duckdb/planner/expression_iterator.hpp +11 -3
  463. package/src/duckdb/src/include/duckdb/planner/extension_callback.hpp +6 -0
  464. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +0 -1
  465. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -1
  466. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit.hpp +5 -9
  467. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +0 -15
  468. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +5 -0
  469. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +40 -5
  470. package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +1 -1
  471. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +5 -1
  472. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +4 -4
  473. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +2 -1
  474. package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +10 -8
  475. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +5 -1
  476. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +3 -3
  477. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/bit_reader.hpp +11 -10
  478. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/bit_utils.hpp +3 -1
  479. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/chimp128.hpp +1 -1
  480. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/leading_zero_buffer.hpp +5 -3
  481. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +2 -1
  482. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
  483. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -1
  484. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  485. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +1 -1
  486. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -0
  487. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +1 -1
  488. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +0 -2
  489. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +1 -1
  490. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +5 -3
  491. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  492. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +5 -0
  493. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +9 -2
  494. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +2 -6
  495. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +3 -3
  496. package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +169 -0
  497. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +11 -1
  498. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +6 -1
  499. package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -1
  500. package/src/duckdb/src/include/duckdb.h +119 -67
  501. package/src/duckdb/src/main/appender.cpp +2 -1
  502. package/src/duckdb/src/main/attached_database.cpp +49 -27
  503. package/src/duckdb/src/main/capi/appender-c.cpp +1 -1
  504. package/src/duckdb/src/main/capi/cast/ub_duckdb_main_capi_cast.cpp +3 -0
  505. package/src/duckdb/src/main/capi/cast/utils-c.cpp +1 -1
  506. package/src/duckdb/src/main/capi/data_chunk-c.cpp +9 -1
  507. package/src/duckdb/src/main/capi/datetime-c.cpp +14 -8
  508. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +29 -2
  509. package/src/duckdb/src/main/capi/helper-c.cpp +2 -0
  510. package/src/duckdb/src/main/capi/hugeint-c.cpp +2 -1
  511. package/src/duckdb/src/main/capi/logical_types-c.cpp +35 -1
  512. package/src/duckdb/src/main/capi/result-c.cpp +9 -0
  513. package/src/duckdb/src/main/capi/ub_duckdb_main_capi.cpp +19 -0
  514. package/src/duckdb/src/main/chunk_scan_state/ub_duckdb_main_chunk_scan_state.cpp +2 -0
  515. package/src/duckdb/src/main/client_context.cpp +133 -33
  516. package/src/duckdb/src/main/client_context_file_opener.cpp +8 -7
  517. package/src/duckdb/src/main/config.cpp +2 -0
  518. package/src/duckdb/src/main/connection_manager.cpp +8 -0
  519. package/src/duckdb/src/main/database.cpp +13 -4
  520. package/src/duckdb/src/main/database_manager.cpp +10 -1
  521. package/src/duckdb/src/main/extension/extension_helper.cpp +8 -5
  522. package/src/duckdb/src/main/extension/extension_install.cpp +1 -1
  523. package/src/duckdb/src/main/extension/ub_duckdb_main_extension.cpp +6 -0
  524. package/src/duckdb/src/main/prepared_statement_data.cpp +23 -6
  525. package/src/duckdb/src/main/query_profiler.cpp +9 -7
  526. package/src/duckdb/src/main/relation/read_csv_relation.cpp +17 -12
  527. package/src/duckdb/src/main/relation/ub_duckdb_main_relation.cpp +26 -0
  528. package/src/duckdb/src/main/relation/value_relation.cpp +2 -0
  529. package/src/duckdb/src/main/secret/secret.cpp +1 -1
  530. package/src/duckdb/src/main/secret/secret_manager.cpp +41 -8
  531. package/src/duckdb/src/main/secret/secret_storage.cpp +8 -2
  532. package/src/duckdb/src/main/settings/settings.cpp +42 -2
  533. package/src/duckdb/src/main/settings/ub_duckdb_main_settings.cpp +2 -0
  534. package/src/duckdb/src/main/ub_duckdb_main.cpp +25 -0
  535. package/src/duckdb/src/optimizer/compressed_materialization/ub_duckdb_optimizer_compressed_materialization.cpp +4 -0
  536. package/src/duckdb/src/optimizer/filter_combiner.cpp +20 -14
  537. package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +5 -1
  538. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +23 -11
  539. package/src/duckdb/src/optimizer/join_order/ub_duckdb_optimizer_join_order.cpp +12 -0
  540. package/src/duckdb/src/optimizer/matcher/ub_duckdb_optimizer_matcher.cpp +2 -0
  541. package/src/duckdb/src/optimizer/pullup/ub_duckdb_optimizer_pullup.cpp +6 -0
  542. package/src/duckdb/src/optimizer/pushdown/pushdown_limit.cpp +1 -1
  543. package/src/duckdb/src/optimizer/pushdown/ub_duckdb_optimizer_pushdown.cpp +12 -0
  544. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +7 -6
  545. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +8 -6
  546. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +1 -1
  547. package/src/duckdb/src/optimizer/rule/ub_duckdb_optimizer_rules.cpp +16 -0
  548. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +71 -1
  549. package/src/duckdb/src/optimizer/statistics/expression/ub_duckdb_optimizer_statistics_expr.cpp +11 -0
  550. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -1
  551. package/src/duckdb/src/optimizer/statistics/operator/propagate_limit.cpp +5 -1
  552. package/src/duckdb/src/optimizer/statistics/operator/ub_duckdb_optimizer_statistics_op.cpp +11 -0
  553. package/src/duckdb/src/optimizer/topn_optimizer.cpp +12 -11
  554. package/src/duckdb/src/optimizer/ub_duckdb_optimizer.cpp +20 -0
  555. package/src/duckdb/src/parallel/executor.cpp +8 -19
  556. package/src/duckdb/src/parallel/executor_task.cpp +6 -2
  557. package/src/duckdb/src/parallel/pipeline.cpp +12 -6
  558. package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -1
  559. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +2 -2
  560. package/src/duckdb/src/parallel/pipeline_initialize_event.cpp +1 -2
  561. package/src/duckdb/src/parallel/task_scheduler.cpp +15 -8
  562. package/src/duckdb/src/parallel/ub_duckdb_parallel.cpp +15 -0
  563. package/src/duckdb/src/parser/constraints/ub_duckdb_constraints.cpp +5 -0
  564. package/src/duckdb/src/parser/expression/ub_duckdb_expression.cpp +18 -0
  565. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +0 -18
  566. package/src/duckdb/src/parser/parsed_data/comment_on_column_info.cpp +44 -0
  567. package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +1 -0
  568. package/src/duckdb/src/parser/parsed_data/extra_drop_info.cpp +2 -0
  569. package/src/duckdb/src/parser/parsed_data/ub_duckdb_parsed_data.cpp +24 -0
  570. package/src/duckdb/src/parser/parser.cpp +1 -1
  571. package/src/duckdb/src/parser/query_error_context.cpp +15 -1
  572. package/src/duckdb/src/parser/query_node/ub_duckdb_query_node.cpp +5 -0
  573. package/src/duckdb/src/parser/statement/export_statement.cpp +2 -1
  574. package/src/duckdb/src/parser/statement/relation_statement.cpp +2 -2
  575. package/src/duckdb/src/parser/statement/ub_duckdb_statement.cpp +25 -0
  576. package/src/duckdb/src/parser/tableref/pivotref.cpp +3 -3
  577. package/src/duckdb/src/parser/tableref/showref.cpp +2 -0
  578. package/src/duckdb/src/parser/tableref/ub_duckdb_parser_tableref.cpp +8 -0
  579. package/src/duckdb/src/parser/transform/constraint/ub_duckdb_transformer_constraint.cpp +2 -0
  580. package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +1 -1
  581. package/src/duckdb/src/parser/transform/expression/transform_cast.cpp +10 -2
  582. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +7 -7
  583. package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +2 -1
  584. package/src/duckdb/src/parser/transform/expression/ub_duckdb_transformer_expression.cpp +20 -0
  585. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +2 -2
  586. package/src/duckdb/src/parser/transform/helpers/ub_duckdb_transformer_helpers.cpp +8 -0
  587. package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +3 -8
  588. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +3 -1
  589. package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +2 -3
  590. package/src/duckdb/src/parser/transform/statement/ub_duckdb_transformer_statement.cpp +37 -0
  591. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +16 -0
  592. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +49 -32
  593. package/src/duckdb/src/parser/transform/tableref/ub_duckdb_transformer_tableref.cpp +8 -0
  594. package/src/duckdb/src/parser/ub_duckdb_parser.cpp +15 -0
  595. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +11 -4
  596. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +4 -1
  597. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +71 -5
  598. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +35 -1
  599. package/src/duckdb/src/planner/binder/expression/ub_duckdb_bind_expression.cpp +20 -0
  600. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +55 -51
  601. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +1 -11
  602. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +2 -2
  603. package/src/duckdb/src/planner/binder/query_node/ub_duckdb_bind_query_node.cpp +12 -0
  604. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +12 -6
  605. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +1 -1
  606. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +8 -2
  607. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +6 -3
  608. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +13 -2
  609. package/src/duckdb/src/planner/binder/statement/ub_duckdb_bind_statement.cpp +26 -0
  610. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +15 -4
  611. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -2
  612. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +93 -45
  613. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +6 -1
  614. package/src/duckdb/src/planner/binder/tableref/ub_duckdb_bind_tableref.cpp +17 -0
  615. package/src/duckdb/src/planner/binder.cpp +22 -23
  616. package/src/duckdb/src/planner/bound_result_modifier.cpp +67 -4
  617. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +5 -1
  618. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +3 -0
  619. package/src/duckdb/src/planner/expression/ub_duckdb_planner_expression.cpp +19 -0
  620. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +14 -9
  621. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +17 -9
  622. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +61 -37
  623. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +8 -4
  624. package/src/duckdb/src/planner/expression_binder/ub_duckdb_expression_binders.cpp +20 -0
  625. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +3 -2
  626. package/src/duckdb/src/planner/expression_iterator.cpp +73 -52
  627. package/src/duckdb/src/planner/filter/ub_duckdb_planner_filter.cpp +4 -0
  628. package/src/duckdb/src/planner/logical_operator_visitor.cpp +4 -14
  629. package/src/duckdb/src/planner/operator/logical_limit.cpp +14 -6
  630. package/src/duckdb/src/planner/operator/ub_duckdb_planner_operator.cpp +43 -0
  631. package/src/duckdb/src/planner/parsed_data/ub_duckdb_planner_parsed_data.cpp +2 -0
  632. package/src/duckdb/src/planner/planner.cpp +3 -0
  633. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +46 -18
  634. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +39 -33
  635. package/src/duckdb/src/planner/subquery/ub_duckdb_planner_subquery.cpp +4 -0
  636. package/src/duckdb/src/planner/ub_duckdb_planner.cpp +15 -0
  637. package/src/duckdb/src/storage/arena_allocator.cpp +9 -0
  638. package/src/duckdb/src/storage/buffer/block_handle.cpp +7 -2
  639. package/src/duckdb/src/storage/buffer/block_manager.cpp +7 -3
  640. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +121 -24
  641. package/src/duckdb/src/storage/buffer/ub_duckdb_storage_buffer.cpp +6 -0
  642. package/src/duckdb/src/storage/checkpoint/ub_duckdb_storage_checkpoint.cpp +5 -0
  643. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +4 -4
  644. package/src/duckdb/src/storage/compression/chimp/ub_duckdb_storage_compression_chimp.cpp +6 -0
  645. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +6 -5
  646. package/src/duckdb/src/storage/compression/fsst.cpp +3 -2
  647. package/src/duckdb/src/storage/compression/rle.cpp +1 -1
  648. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +4 -4
  649. package/src/duckdb/src/storage/compression/ub_duckdb_storage_compression.cpp +12 -0
  650. package/src/duckdb/src/storage/data_table.cpp +6 -3
  651. package/src/duckdb/src/storage/local_storage.cpp +5 -2
  652. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +4 -4
  653. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +1 -1
  654. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +1 -1
  655. package/src/duckdb/src/storage/metadata/ub_duckdb_storage_metadata.cpp +4 -0
  656. package/src/duckdb/src/storage/partial_block_manager.cpp +1 -1
  657. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +2 -0
  658. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +3 -0
  659. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +5 -29
  660. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +22 -4
  661. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +12 -32
  662. package/src/duckdb/src/storage/serialization/ub_duckdb_storage_serialization.cpp +16 -0
  663. package/src/duckdb/src/storage/standard_buffer_manager.cpp +29 -397
  664. package/src/duckdb/src/storage/statistics/string_stats.cpp +1 -1
  665. package/src/duckdb/src/storage/statistics/ub_duckdb_storage_statistics.cpp +10 -0
  666. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  667. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -3
  668. package/src/duckdb/src/storage/table/column_data.cpp +7 -16
  669. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -1
  670. package/src/duckdb/src/storage/table/column_segment.cpp +68 -78
  671. package/src/duckdb/src/storage/table/list_column_data.cpp +1 -0
  672. package/src/duckdb/src/storage/table/row_group.cpp +17 -7
  673. package/src/duckdb/src/storage/table/row_group_collection.cpp +28 -27
  674. package/src/duckdb/src/storage/table/ub_duckdb_storage_table.cpp +17 -0
  675. package/src/duckdb/src/storage/table/update_segment.cpp +7 -7
  676. package/src/duckdb/src/storage/temporary_file_manager.cpp +334 -0
  677. package/src/duckdb/src/storage/ub_duckdb_storage.cpp +20 -0
  678. package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
  679. package/src/duckdb/src/transaction/commit_state.cpp +4 -2
  680. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +32 -17
  681. package/src/duckdb/src/transaction/meta_transaction.cpp +24 -0
  682. package/src/duckdb/src/transaction/transaction_context.cpp +0 -9
  683. package/src/duckdb/src/transaction/ub_duckdb_transaction.cpp +11 -0
  684. package/src/duckdb/src/transaction/undo_buffer.cpp +1 -1
  685. package/src/duckdb/third_party/jaro_winkler/details/common.hpp +1 -1
  686. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +2 -0
  687. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1032 -551
  688. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +24122 -24304
  689. package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +0 -3
  690. package/src/duckdb/third_party/mbedtls/library/entropy_poll.cpp +32 -4
  691. package/src/duckdb/third_party/miniz/miniz_wrapper.hpp +4 -4
  692. package/src/duckdb/third_party/tdigest/t_digest.hpp +3 -3
  693. package/src/duckdb/ub_src_common_arrow_appender.cpp +2 -0
  694. package/src/duckdb/ub_src_common_exception.cpp +2 -0
  695. package/src/duckdb/ub_src_execution_operator_helper.cpp +2 -0
  696. package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -2
  697. package/src/duckdb/ub_src_execution_physical_plan.cpp +0 -2
  698. package/src/duckdb/ub_src_parser_parsed_data.cpp +1 -1
  699. package/src/duckdb/ub_src_planner_operator.cpp +0 -2
  700. package/src/duckdb/ub_src_storage.cpp +2 -0
  701. package/test/columns.test.ts +1 -1
  702. package/test/test_all_types.test.ts +1 -1
@@ -1,60 +1,128 @@
1
1
  #include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
2
-
3
- #include "duckdb/common/allocator.hpp"
4
- #include "duckdb/common/types/batched_data_collection.hpp"
5
- #include "duckdb/common/vector_operations/vector_operations.hpp"
6
2
  #include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
7
3
  #include "duckdb/parallel/base_pipeline_event.hpp"
8
-
4
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
5
+ #include "duckdb/common/types/batched_data_collection.hpp"
6
+ #include "duckdb/common/allocator.hpp"
7
+ #include "duckdb/common/queue.hpp"
8
+ #include "duckdb/storage/buffer_manager.hpp"
9
+ #include "duckdb/execution/operator/persistent/batch_memory_manager.hpp"
10
+ #include "duckdb/execution/operator/persistent/batch_task_manager.hpp"
11
+ #include "duckdb/parallel/executor_task.hpp"
9
12
  #include <algorithm>
10
13
 
11
14
  namespace duckdb {
12
15
 
16
+ struct ActiveFlushGuard {
17
+ explicit ActiveFlushGuard(atomic<bool> &bool_value_p) : bool_value(bool_value_p) {
18
+ bool_value = true;
19
+ }
20
+ ~ActiveFlushGuard() {
21
+ bool_value = false;
22
+ }
23
+
24
+ atomic<bool> &bool_value;
25
+ };
26
+
13
27
  PhysicalBatchCopyToFile::PhysicalBatchCopyToFile(vector<LogicalType> types, CopyFunction function_p,
14
28
  unique_ptr<FunctionData> bind_data_p, idx_t estimated_cardinality)
15
29
  : PhysicalOperator(PhysicalOperatorType::BATCH_COPY_TO_FILE, std::move(types), estimated_cardinality),
16
30
  function(std::move(function_p)), bind_data(std::move(bind_data_p)) {
17
31
  if (!function.flush_batch || !function.prepare_batch) {
18
- throw InternalException(
19
- "PhysicalBatchCopyToFile created for copy function that does not have prepare_batch/flush_batch defined");
32
+ throw InternalException("PhysicalFixedBatchCopy created for copy function that does not have "
33
+ "prepare_batch/flush_batch defined");
20
34
  }
21
35
  }
22
36
 
23
37
  //===--------------------------------------------------------------------===//
24
38
  // States
25
39
  //===--------------------------------------------------------------------===//
26
- class BatchCopyToGlobalState : public GlobalSinkState {
40
+ class BatchCopyTask {
41
+ public:
42
+ virtual ~BatchCopyTask() {
43
+ }
44
+
45
+ virtual void Execute(const PhysicalBatchCopyToFile &op, ClientContext &context, GlobalSinkState &gstate_p) = 0;
46
+ };
47
+
48
+ struct FixedRawBatchData {
49
+ FixedRawBatchData(idx_t memory_usage_p, unique_ptr<ColumnDataCollection> collection_p)
50
+ : memory_usage(memory_usage_p), collection(std::move(collection_p)) {
51
+ }
52
+
53
+ idx_t memory_usage;
54
+ unique_ptr<ColumnDataCollection> collection;
55
+ };
56
+
57
+ struct FixedPreparedBatchData {
58
+ idx_t memory_usage;
59
+ unique_ptr<PreparedBatchData> prepared_data;
60
+ };
61
+
62
+ class FixedBatchCopyGlobalState : public GlobalSinkState {
63
+ public:
64
+ // heuristic - we need at least 4MB of cache space per column per thread we launch
65
+ static constexpr const idx_t MINIMUM_MEMORY_PER_COLUMN_PER_THREAD = 4 * 1024 * 1024;
66
+
27
67
  public:
28
- explicit BatchCopyToGlobalState(unique_ptr<GlobalFunctionData> global_state)
29
- : rows_copied(0), global_state(std::move(global_state)), any_flushing(false) {
68
+ explicit FixedBatchCopyGlobalState(ClientContext &context_p, unique_ptr<GlobalFunctionData> global_state,
69
+ idx_t minimum_memory_per_thread)
70
+ : memory_manager(context_p, minimum_memory_per_thread), rows_copied(0), global_state(std::move(global_state)),
71
+ batch_size(0), scheduled_batch_index(0), flushed_batch_index(0), any_flushing(false), any_finished(false),
72
+ minimum_memory_per_thread(minimum_memory_per_thread) {
30
73
  }
31
74
 
75
+ BatchMemoryManager memory_manager;
76
+ BatchTaskManager<BatchCopyTask> task_manager;
32
77
  mutex lock;
78
+ mutex flush_lock;
33
79
  //! The total number of rows copied to the file
34
80
  atomic<idx_t> rows_copied;
35
81
  //! Global copy state
36
82
  unique_ptr<GlobalFunctionData> global_state;
83
+ //! The desired batch size (if any)
84
+ idx_t batch_size;
85
+ //! Unpartitioned batches
86
+ map<idx_t, unique_ptr<FixedRawBatchData>> raw_batches;
37
87
  //! The prepared batch data by batch index - ready to flush
38
- map<idx_t, unique_ptr<PreparedBatchData>> batch_data;
39
- //! Lock for flushing to disk
40
- mutex flush_lock;
41
- //! Whether or not any threads are flushing (only one thread can flush at a time)
88
+ map<idx_t, unique_ptr<FixedPreparedBatchData>> batch_data;
89
+ //! The index of the latest batch index that has been scheduled
90
+ atomic<idx_t> scheduled_batch_index;
91
+ //! The index of the latest batch index that has been flushed
92
+ atomic<idx_t> flushed_batch_index;
93
+ //! Whether or not any thread is flushing
42
94
  atomic<bool> any_flushing;
95
+ //! Whether or not any threads are finished
96
+ atomic<bool> any_finished;
97
+ //! Minimum memory per thread
98
+ idx_t minimum_memory_per_thread;
43
99
 
44
- void AddBatchData(idx_t batch_index, unique_ptr<PreparedBatchData> new_batch) {
100
+ void AddBatchData(idx_t batch_index, unique_ptr<PreparedBatchData> new_batch, idx_t memory_usage) {
45
101
  // move the batch data to the set of prepared batch data
46
102
  lock_guard<mutex> l(lock);
47
- auto entry = batch_data.insert(make_pair(batch_index, std::move(new_batch)));
103
+ auto prepared_data = make_uniq<FixedPreparedBatchData>();
104
+ prepared_data->prepared_data = std::move(new_batch);
105
+ prepared_data->memory_usage = memory_usage;
106
+ auto entry = batch_data.insert(make_pair(batch_index, std::move(prepared_data)));
48
107
  if (!entry.second) {
49
- throw InternalException("Duplicate batch index %llu encountered in PhysicalBatchCopyToFile", batch_index);
108
+ throw InternalException("Duplicate batch index %llu encountered in PhysicalFixedBatchCopy", batch_index);
50
109
  }
51
110
  }
111
+
112
+ idx_t MaxThreads(idx_t source_max_threads) override {
113
+ // try to request 4MB per column per thread
114
+ memory_manager.SetMemorySize(source_max_threads * minimum_memory_per_thread);
115
+ // cap the concurrent threads working on this task based on the amount of available memory
116
+ return MinValue<idx_t>(source_max_threads, memory_manager.AvailableMemory() / minimum_memory_per_thread + 1);
117
+ }
52
118
  };
53
119
 
54
- class BatchCopyToLocalState : public LocalSinkState {
120
+ enum class FixedBatchCopyState { SINKING_DATA = 1, PROCESSING_TASKS = 2 };
121
+
122
+ class FixedBatchCopyLocalState : public LocalSinkState {
55
123
  public:
56
- explicit BatchCopyToLocalState(unique_ptr<LocalFunctionData> local_state_p)
57
- : local_state(std::move(local_state_p)), rows_copied(0) {
124
+ explicit FixedBatchCopyLocalState(unique_ptr<LocalFunctionData> local_state_p)
125
+ : local_state(std::move(local_state_p)), rows_copied(0), local_memory_usage(0) {
58
126
  }
59
127
 
60
128
  //! Local copy state
@@ -65,12 +133,17 @@ public:
65
133
  ColumnDataAppendState append_state;
66
134
  //! How many rows have been copied in total
67
135
  idx_t rows_copied;
136
+ //! Memory usage of the thread-local collection
137
+ idx_t local_memory_usage;
68
138
  //! The current batch index
69
139
  optional_idx batch_index;
140
+ //! Current task
141
+ FixedBatchCopyState current_task = FixedBatchCopyState::SINKING_DATA;
70
142
 
71
143
  void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
72
- collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
144
+ collection = make_uniq<ColumnDataCollection>(context, op.children[0]->types, ColumnDataAllocatorType::HYBRID);
73
145
  collection->InitializeAppend(append_state);
146
+ local_memory_usage = 0;
74
147
  }
75
148
  };
76
149
 
@@ -79,32 +152,138 @@ public:
79
152
  //===--------------------------------------------------------------------===//
80
153
  SinkResultType PhysicalBatchCopyToFile::Sink(ExecutionContext &context, DataChunk &chunk,
81
154
  OperatorSinkInput &input) const {
82
- auto &state = input.local_state.Cast<BatchCopyToLocalState>();
155
+ auto &state = input.local_state.Cast<FixedBatchCopyLocalState>();
156
+ auto &gstate = input.global_state.Cast<FixedBatchCopyGlobalState>();
157
+ auto &memory_manager = gstate.memory_manager;
158
+ auto batch_index = state.partition_info.batch_index.GetIndex();
159
+ if (state.current_task == FixedBatchCopyState::PROCESSING_TASKS) {
160
+ ExecuteTasks(context.client, gstate);
161
+ FlushBatchData(context.client, gstate, memory_manager.GetMinimumBatchIndex());
162
+
163
+ if (!memory_manager.IsMinimumBatchIndex(batch_index) && memory_manager.OutOfMemory(batch_index)) {
164
+ lock_guard<mutex> l(memory_manager.GetBlockedTaskLock());
165
+ if (!memory_manager.IsMinimumBatchIndex(batch_index)) {
166
+ // no tasks to process, we are not the minimum batch index and we have no memory available to buffer
167
+ // block the task for now
168
+ memory_manager.BlockTask(input.interrupt_state);
169
+ return SinkResultType::BLOCKED;
170
+ }
171
+ }
172
+ state.current_task = FixedBatchCopyState::SINKING_DATA;
173
+ }
174
+ if (!memory_manager.IsMinimumBatchIndex(batch_index)) {
175
+ memory_manager.UpdateMinBatchIndex(state.partition_info.min_batch_index.GetIndex());
176
+
177
+ // we are not processing the current min batch index
178
+ // check if we have exceeded the maximum number of unflushed rows
179
+ if (memory_manager.OutOfMemory(batch_index)) {
180
+ // out-of-memory - stop sinking chunks and instead assist in processing tasks for the minimum batch index
181
+ state.current_task = FixedBatchCopyState::PROCESSING_TASKS;
182
+ return Sink(context, chunk, input);
183
+ }
184
+ }
83
185
  if (!state.collection) {
84
186
  state.InitializeCollection(context.client, *this);
85
- state.batch_index = state.partition_info.batch_index.GetIndex();
187
+ state.batch_index = batch_index;
86
188
  }
87
189
  state.rows_copied += chunk.size();
88
190
  state.collection->Append(state.append_state, chunk);
191
+ auto new_memory_usage = state.collection->AllocationSize();
192
+ if (new_memory_usage > state.local_memory_usage) {
193
+ // memory usage increased - add to global state
194
+ memory_manager.IncreaseUnflushedMemory(new_memory_usage - state.local_memory_usage);
195
+ state.local_memory_usage = new_memory_usage;
196
+ } else if (new_memory_usage < state.local_memory_usage) {
197
+ throw InternalException("PhysicalFixedBatchCopy - memory usage decreased somehow?");
198
+ }
89
199
  return SinkResultType::NEED_MORE_INPUT;
90
200
  }
91
201
 
92
202
  SinkCombineResultType PhysicalBatchCopyToFile::Combine(ExecutionContext &context,
93
203
  OperatorSinkCombineInput &input) const {
94
- auto &state = input.local_state.Cast<BatchCopyToLocalState>();
95
- auto &gstate = input.global_state.Cast<BatchCopyToGlobalState>();
204
+ auto &state = input.local_state.Cast<FixedBatchCopyLocalState>();
205
+ auto &gstate = input.global_state.Cast<FixedBatchCopyGlobalState>();
206
+ auto &memory_manager = gstate.memory_manager;
96
207
  gstate.rows_copied += state.rows_copied;
97
208
 
209
+ // add any final remaining local batches
210
+ AddLocalBatch(context.client, gstate, state);
211
+
212
+ if (!gstate.any_finished) {
213
+ // signal that this thread is finished processing batches and that we should move on to Finalize
214
+ lock_guard<mutex> l(gstate.lock);
215
+ gstate.any_finished = true;
216
+ }
217
+ memory_manager.UpdateMinBatchIndex(state.partition_info.min_batch_index.GetIndex());
218
+ ExecuteTasks(context.client, gstate);
219
+
98
220
  return SinkCombineResultType::FINISHED;
99
221
  }
100
222
 
223
+ //===--------------------------------------------------------------------===//
224
+ // ProcessRemainingBatchesEvent
225
+ //===--------------------------------------------------------------------===//
226
+ class ProcessRemainingBatchesTask : public ExecutorTask {
227
+ public:
228
+ ProcessRemainingBatchesTask(Executor &executor, shared_ptr<Event> event_p, FixedBatchCopyGlobalState &state_p,
229
+ ClientContext &context, const PhysicalBatchCopyToFile &op)
230
+ : ExecutorTask(executor, std::move(event_p)), op(op), gstate(state_p), context(context) {
231
+ }
232
+
233
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
234
+ while (op.ExecuteTask(context, gstate)) {
235
+ op.FlushBatchData(context, gstate, 0);
236
+ }
237
+ event->FinishTask();
238
+ return TaskExecutionResult::TASK_FINISHED;
239
+ }
240
+
241
+ private:
242
+ const PhysicalBatchCopyToFile &op;
243
+ FixedBatchCopyGlobalState &gstate;
244
+ ClientContext &context;
245
+ };
246
+
247
+ class ProcessRemainingBatchesEvent : public BasePipelineEvent {
248
+ public:
249
+ ProcessRemainingBatchesEvent(const PhysicalBatchCopyToFile &op_p, FixedBatchCopyGlobalState &gstate_p,
250
+ Pipeline &pipeline_p, ClientContext &context)
251
+ : BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
252
+ }
253
+ const PhysicalBatchCopyToFile &op;
254
+ FixedBatchCopyGlobalState &gstate;
255
+ ClientContext &context;
256
+
257
+ public:
258
+ void Schedule() override {
259
+ vector<shared_ptr<Task>> tasks;
260
+ for (idx_t i = 0; i < idx_t(TaskScheduler::GetScheduler(context).NumberOfThreads()); i++) {
261
+ auto process_task =
262
+ make_uniq<ProcessRemainingBatchesTask>(pipeline->executor, shared_from_this(), gstate, context, op);
263
+ tasks.push_back(std::move(process_task));
264
+ }
265
+ D_ASSERT(!tasks.empty());
266
+ SetTasks(std::move(tasks));
267
+ }
268
+
269
+ void FinishEvent() override {
270
+ //! Now that all batches are processed we finish flushing the file to disk
271
+ op.FinalFlush(context, gstate);
272
+ }
273
+ };
101
274
  //===--------------------------------------------------------------------===//
102
275
  // Finalize
103
276
  //===--------------------------------------------------------------------===//
104
277
  SinkFinalizeType PhysicalBatchCopyToFile::FinalFlush(ClientContext &context, GlobalSinkState &gstate_p) const {
105
- auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
106
- idx_t min_batch_index = idx_t(NumericLimits<int64_t>::Maximum());
278
+ auto &gstate = gstate_p.Cast<FixedBatchCopyGlobalState>();
279
+ if (gstate.task_manager.TaskCount() != 0) {
280
+ throw InternalException("Unexecuted tasks are remaining in PhysicalFixedBatchCopy::FinalFlush!?");
281
+ }
282
+ auto min_batch_index = idx_t(NumericLimits<int64_t>::Maximum());
107
283
  FlushBatchData(context, gstate_p, min_batch_index);
284
+ if (gstate.scheduled_batch_index != gstate.flushed_batch_index) {
285
+ throw InternalException("Not all batches were flushed to disk - incomplete file?");
286
+ }
108
287
  if (function.copy_to_finalize) {
109
288
  function.copy_to_finalize(context, *bind_data, *gstate.global_state);
110
289
 
@@ -112,29 +291,193 @@ SinkFinalizeType PhysicalBatchCopyToFile::FinalFlush(ClientContext &context, Glo
112
291
  PhysicalCopyToFile::MoveTmpFile(context, file_path);
113
292
  }
114
293
  }
294
+ gstate.memory_manager.FinalCheck();
115
295
  return SinkFinalizeType::READY;
116
296
  }
117
297
 
118
298
  SinkFinalizeType PhysicalBatchCopyToFile::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
119
299
  OperatorSinkFinalizeInput &input) const {
120
- FinalFlush(context, input.global_state);
300
+ auto &gstate = input.global_state.Cast<FixedBatchCopyGlobalState>();
301
+ auto min_batch_index = idx_t(NumericLimits<int64_t>::Maximum());
302
+ // repartition any remaining batches
303
+ RepartitionBatches(context, input.global_state, min_batch_index, true);
304
+ // check if we have multiple tasks to execute
305
+ if (gstate.task_manager.TaskCount() <= 1) {
306
+ // we don't - just execute the remaining task and finish flushing to disk
307
+ ExecuteTasks(context, input.global_state);
308
+ FinalFlush(context, input.global_state);
309
+ } else {
310
+ // we have multiple tasks remaining - launch an event to execute the tasks in parallel
311
+ auto new_event = make_shared<ProcessRemainingBatchesEvent>(*this, gstate, pipeline, context);
312
+ event.InsertEvent(std::move(new_event));
313
+ }
121
314
  return SinkFinalizeType::READY;
122
315
  }
123
316
 
317
+ //===--------------------------------------------------------------------===//
318
+ // Tasks
319
+ //===--------------------------------------------------------------------===//
320
+ class RepartitionedFlushTask : public BatchCopyTask {
321
+ public:
322
+ RepartitionedFlushTask() {
323
+ }
324
+
325
+ void Execute(const PhysicalBatchCopyToFile &op, ClientContext &context, GlobalSinkState &gstate_p) override {
326
+ op.FlushBatchData(context, gstate_p, 0);
327
+ }
328
+ };
329
+
330
+ class PrepareBatchTask : public BatchCopyTask {
331
+ public:
332
+ PrepareBatchTask(idx_t batch_index, unique_ptr<FixedRawBatchData> batch_data_p)
333
+ : batch_index(batch_index), batch_data(std::move(batch_data_p)) {
334
+ }
335
+
336
+ idx_t batch_index;
337
+ unique_ptr<FixedRawBatchData> batch_data;
338
+
339
+ void Execute(const PhysicalBatchCopyToFile &op, ClientContext &context, GlobalSinkState &gstate_p) override {
340
+ auto &gstate = gstate_p.Cast<FixedBatchCopyGlobalState>();
341
+ auto memory_usage = batch_data->memory_usage;
342
+ auto prepared_batch =
343
+ op.function.prepare_batch(context, *op.bind_data, *gstate.global_state, std::move(batch_data->collection));
344
+ gstate.AddBatchData(batch_index, std::move(prepared_batch), memory_usage);
345
+ if (batch_index == gstate.flushed_batch_index) {
346
+ gstate.task_manager.AddTask(make_uniq<RepartitionedFlushTask>());
347
+ }
348
+ }
349
+ };
350
+
124
351
  //===--------------------------------------------------------------------===//
125
352
  // Batch Data Handling
126
353
  //===--------------------------------------------------------------------===//
127
- void PhysicalBatchCopyToFile::PrepareBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t batch_index,
128
- unique_ptr<ColumnDataCollection> collection) const {
129
- auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
354
+ void PhysicalBatchCopyToFile::AddRawBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t batch_index,
355
+ unique_ptr<FixedRawBatchData> raw_batch) const {
356
+ auto &gstate = gstate_p.Cast<FixedBatchCopyGlobalState>();
357
+
358
+ // add the batch index to the set of raw batches
359
+ lock_guard<mutex> l(gstate.lock);
360
+ auto entry = gstate.raw_batches.insert(make_pair(batch_index, std::move(raw_batch)));
361
+ if (!entry.second) {
362
+ throw InternalException("Duplicate batch index %llu encountered in PhysicalFixedBatchCopy", batch_index);
363
+ }
364
+ }
365
+
366
+ static bool CorrectSizeForBatch(idx_t collection_size, idx_t desired_size) {
367
+ if (desired_size == 0) {
368
+ // a batch size of 0 indicates we are happy with any batch size
369
+ return true;
370
+ }
371
+ return idx_t(AbsValue<int64_t>(int64_t(collection_size) - int64_t(desired_size))) < STANDARD_VECTOR_SIZE;
372
+ }
373
+
374
+ void PhysicalBatchCopyToFile::RepartitionBatches(ClientContext &context, GlobalSinkState &gstate_p, idx_t min_index,
375
+ bool final) const {
376
+ auto &gstate = gstate_p.Cast<FixedBatchCopyGlobalState>();
377
+ auto &task_manager = gstate.task_manager;
378
+
379
+ // repartition batches until the min index is reached
380
+ lock_guard<mutex> l(gstate.lock);
381
+ if (gstate.raw_batches.empty()) {
382
+ return;
383
+ }
384
+ if (!final) {
385
+ if (gstate.any_finished) {
386
+ // we only repartition in ::NextBatch if all threads are still busy processing batches
387
+ // otherwise we might end up repartitioning a lot of data with only a few threads remaining
388
+ // which causes erratic performance
389
+ return;
390
+ }
391
+ // if this is not the final flush we first check if we have enough data to merge past the batch threshold
392
+ idx_t candidate_rows = 0;
393
+ for (auto entry = gstate.raw_batches.begin(); entry != gstate.raw_batches.end(); entry++) {
394
+ if (entry->first >= min_index) {
395
+ // we have exceeded the minimum batch
396
+ break;
397
+ }
398
+ candidate_rows += entry->second->collection->Count();
399
+ }
400
+ if (candidate_rows < gstate.batch_size) {
401
+ // not enough rows - cancel!
402
+ return;
403
+ }
404
+ }
405
+ // gather all collections we can repartition
406
+ idx_t max_batch_index = 0;
407
+ vector<unique_ptr<FixedRawBatchData>> raw_batches;
408
+ for (auto entry = gstate.raw_batches.begin(); entry != gstate.raw_batches.end();) {
409
+ if (entry->first >= min_index) {
410
+ break;
411
+ }
412
+ max_batch_index = entry->first;
413
+ raw_batches.push_back(std::move(entry->second));
414
+ entry = gstate.raw_batches.erase(entry);
415
+ }
416
+ unique_ptr<FixedRawBatchData> append_batch;
417
+ ColumnDataAppendState append_state;
418
+ // now perform the actual repartitioning
419
+ for (auto &current_batch : raw_batches) {
420
+ if (!append_batch) {
421
+ auto current_count = current_batch->collection->Count();
422
+ if (CorrectSizeForBatch(current_count, gstate.batch_size)) {
423
+ // the collection is ~approximately equal to the batch size (off by at most one vector)
424
+ // use it directly
425
+ task_manager.AddTask(
426
+ make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(current_batch)));
427
+ current_batch.reset();
428
+ } else if (current_count < gstate.batch_size) {
429
+ // the collection is smaller than the batch size - use it as a starting point
430
+ append_batch = std::move(current_batch);
431
+ current_batch.reset();
432
+ } else {
433
+ // the collection is too large for a batch - we need to repartition
434
+ // create an empty collection
435
+ auto new_collection =
436
+ make_uniq<ColumnDataCollection>(context, children[0]->types, ColumnDataAllocatorType::HYBRID);
437
+ append_batch = make_uniq<FixedRawBatchData>(0, std::move(new_collection));
438
+ }
439
+ if (append_batch) {
440
+ append_batch->collection->InitializeAppend(append_state);
441
+ }
442
+ }
443
+ if (!current_batch) {
444
+ // we have consumed the collection already - no need to append
445
+ continue;
446
+ }
447
+ auto &current_collection = *current_batch->collection;
448
+ append_batch->memory_usage += current_batch->memory_usage;
449
+ // iterate the collection while appending
450
+ for (auto &chunk : current_collection.Chunks()) {
451
+ // append the chunk to the collection
452
+ append_batch->collection->Append(append_state, chunk);
453
+ if (append_batch->collection->Count() < gstate.batch_size) {
454
+ // the collection is still under the batch size - continue
455
+ continue;
456
+ }
457
+ // the collection is full - move it to the result and create a new one
458
+ task_manager.AddTask(make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(append_batch)));
130
459
 
131
- // prepare the batch
132
- auto batch_data = function.prepare_batch(context, *bind_data, *gstate.global_state, std::move(collection));
133
- gstate.AddBatchData(batch_index, std::move(batch_data));
460
+ auto new_collection =
461
+ make_uniq<ColumnDataCollection>(context, children[0]->types, ColumnDataAllocatorType::HYBRID);
462
+ append_batch = make_uniq<FixedRawBatchData>(0, std::move(new_collection));
463
+ append_batch->collection->InitializeAppend(append_state);
464
+ }
465
+ }
466
+ if (append_batch && append_batch->collection->Count() > 0) {
467
+ // if there are any remaining batches that are not filled up to the batch size
468
+ // AND this is not the final collection
469
+ // re-add it to the set of raw (to-be-merged) batches
470
+ if (final || CorrectSizeForBatch(append_batch->collection->Count(), gstate.batch_size)) {
471
+ task_manager.AddTask(make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(append_batch)));
472
+ } else {
473
+ gstate.raw_batches[max_batch_index] = std::move(append_batch);
474
+ }
475
+ }
134
476
  }
135
477
 
136
478
  void PhysicalBatchCopyToFile::FlushBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t min_index) const {
137
- auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
479
+ auto &gstate = gstate_p.Cast<FixedBatchCopyGlobalState>();
480
+ auto &memory_manager = gstate.memory_manager;
138
481
 
139
482
  // flush batch data to disk (if there are any to flush)
140
483
  // grab the flush lock - we can only call flush_batch with this lock
@@ -148,45 +491,92 @@ void PhysicalBatchCopyToFile::FlushBatchData(ClientContext &context, GlobalSinkS
148
491
  }
149
492
  ActiveFlushGuard active_flush(gstate.any_flushing);
150
493
  while (true) {
151
- unique_ptr<PreparedBatchData> batch_data;
494
+ unique_ptr<FixedPreparedBatchData> batch_data;
152
495
  {
153
- // fetch the next batch to flush (if any)
154
496
  lock_guard<mutex> l(gstate.lock);
155
497
  if (gstate.batch_data.empty()) {
156
498
  // no batch data left to flush
157
499
  break;
158
500
  }
159
501
  auto entry = gstate.batch_data.begin();
160
- if (entry->first >= min_index) {
161
- // this data is past the min_index - we cannot write it yet
502
+ if (entry->first != gstate.flushed_batch_index) {
503
+ // this entry is not yet ready to be flushed
162
504
  break;
163
505
  }
164
- if (!entry->second) {
165
- // this batch is in process of being prepared but is not ready yet
166
- break;
506
+ if (entry->first < gstate.flushed_batch_index) {
507
+ throw InternalException("Batch index was out of order!?");
167
508
  }
168
509
  batch_data = std::move(entry->second);
169
510
  gstate.batch_data.erase(entry);
170
511
  }
171
- function.flush_batch(context, *bind_data, *gstate.global_state, *batch_data);
512
+ function.flush_batch(context, *bind_data, *gstate.global_state, *batch_data->prepared_data);
513
+ batch_data->prepared_data.reset();
514
+ memory_manager.ReduceUnflushedMemory(batch_data->memory_usage);
515
+ gstate.flushed_batch_index++;
516
+ }
517
+ }
518
+
519
+ //===--------------------------------------------------------------------===//
520
+ // Tasks
521
+ //===--------------------------------------------------------------------===//
522
+ bool PhysicalBatchCopyToFile::ExecuteTask(ClientContext &context, GlobalSinkState &gstate_p) const {
523
+ auto &gstate = gstate_p.Cast<FixedBatchCopyGlobalState>();
524
+ auto task = gstate.task_manager.GetTask();
525
+ if (!task) {
526
+ return false;
527
+ }
528
+ task->Execute(*this, context, gstate_p);
529
+ return true;
530
+ }
531
+
532
+ void PhysicalBatchCopyToFile::ExecuteTasks(ClientContext &context, GlobalSinkState &gstate_p) const {
533
+ while (ExecuteTask(context, gstate_p)) {
172
534
  }
173
535
  }
174
536
 
175
537
  //===--------------------------------------------------------------------===//
176
538
  // Next Batch
177
539
  //===--------------------------------------------------------------------===//
540
+ void PhysicalBatchCopyToFile::AddLocalBatch(ClientContext &context, GlobalSinkState &gstate_p,
541
+ LocalSinkState &lstate) const {
542
+ auto &state = lstate.Cast<FixedBatchCopyLocalState>();
543
+ auto &gstate = gstate_p.Cast<FixedBatchCopyGlobalState>();
544
+ auto &memory_manager = gstate.memory_manager;
545
+ if (!state.collection || state.collection->Count() == 0) {
546
+ return;
547
+ }
548
+ // we finished processing this batch
549
+ // start flushing data
550
+ auto min_batch_index = state.partition_info.min_batch_index.GetIndex();
551
+ // push the raw batch data into the set of unprocessed batches
552
+ auto raw_batch = make_uniq<FixedRawBatchData>(state.local_memory_usage, std::move(state.collection));
553
+ AddRawBatchData(context, gstate, state.batch_index.GetIndex(), std::move(raw_batch));
554
+ // attempt to repartition to our desired batch size
555
+ RepartitionBatches(context, gstate, min_batch_index);
556
+ // unblock tasks so they can help process batches (if any are blocked)
557
+ auto any_unblocked = memory_manager.UnblockTasks();
558
+ // if any threads were unblocked they can pick up execution of the tasks
559
+ // otherwise we will execute a task and flush here
560
+ if (!any_unblocked) {
561
+ //! Execute a single repartition task
562
+ ExecuteTask(context, gstate);
563
+ //! Flush batch data to disk (if any is ready)
564
+ FlushBatchData(context, gstate, memory_manager.GetMinimumBatchIndex());
565
+ }
566
+ }
567
+
178
568
  SinkNextBatchType PhysicalBatchCopyToFile::NextBatch(ExecutionContext &context,
179
569
  OperatorSinkNextBatchInput &input) const {
180
570
  auto &lstate = input.local_state;
181
- auto &gstate_p = input.global_state;
182
- auto &state = lstate.Cast<BatchCopyToLocalState>();
183
- if (state.collection && state.collection->Count() > 0) {
184
- // we finished processing this batch
185
- // start flushing data
186
- auto min_batch_index = lstate.partition_info.min_batch_index.GetIndex();
187
- PrepareBatchData(context.client, gstate_p, state.batch_index.GetIndex(), std::move(state.collection));
188
- FlushBatchData(context.client, gstate_p, min_batch_index);
189
- }
571
+ auto &state = lstate.Cast<FixedBatchCopyLocalState>();
572
+ auto &gstate = input.global_state.Cast<FixedBatchCopyGlobalState>();
573
+ auto &memory_manager = gstate.memory_manager;
574
+
575
+ // add the previously finished batch (if any) to the state
576
+ AddLocalBatch(context.client, gstate, state);
577
+
578
+ // update the minimum batch index
579
+ memory_manager.UpdateMinBatchIndex(state.partition_info.min_batch_index.GetIndex());
190
580
  state.batch_index = lstate.partition_info.batch_index.GetIndex();
191
581
 
192
582
  state.InitializeCollection(context.client, *this);
@@ -194,11 +584,17 @@ SinkNextBatchType PhysicalBatchCopyToFile::NextBatch(ExecutionContext &context,
194
584
  }
195
585
 
196
586
  unique_ptr<LocalSinkState> PhysicalBatchCopyToFile::GetLocalSinkState(ExecutionContext &context) const {
197
- return make_uniq<BatchCopyToLocalState>(function.copy_to_initialize_local(context, *bind_data));
587
+ return make_uniq<FixedBatchCopyLocalState>(function.copy_to_initialize_local(context, *bind_data));
198
588
  }
199
589
 
200
590
  unique_ptr<GlobalSinkState> PhysicalBatchCopyToFile::GetGlobalSinkState(ClientContext &context) const {
201
- return make_uniq<BatchCopyToGlobalState>(function.copy_to_initialize_global(context, *bind_data, file_path));
591
+ // request memory based on the minimum amount of memory per column
592
+ auto minimum_memory_per_thread =
593
+ FixedBatchCopyGlobalState::MINIMUM_MEMORY_PER_COLUMN_PER_THREAD * children[0]->types.size();
594
+ auto result = make_uniq<FixedBatchCopyGlobalState>(
595
+ context, function.copy_to_initialize_global(context, *bind_data, file_path), minimum_memory_per_thread);
596
+ result->batch_size = function.desired_batch_size ? function.desired_batch_size(context, *bind_data) : 0;
597
+ return std::move(result);
202
598
  }
203
599
 
204
600
  //===--------------------------------------------------------------------===//
@@ -206,7 +602,7 @@ unique_ptr<GlobalSinkState> PhysicalBatchCopyToFile::GetGlobalSinkState(ClientCo
206
602
  //===--------------------------------------------------------------------===//
207
603
  SourceResultType PhysicalBatchCopyToFile::GetData(ExecutionContext &context, DataChunk &chunk,
208
604
  OperatorSourceInput &input) const {
209
- auto &g = sink_state->Cast<BatchCopyToGlobalState>();
605
+ auto &g = sink_state->Cast<FixedBatchCopyGlobalState>();
210
606
 
211
607
  chunk.SetCardinality(1);
212
608
  chunk.SetValue(0, 0, Value::BIGINT(g.rows_copied));