duckdb 0.7.2-dev2867.0 → 0.7.2-dev3117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-datepart.cpp +5 -1
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +8 -1
  6. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +1 -3
  7. package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -3
  8. package/src/duckdb/extension/json/json_functions/json_transform.cpp +3 -2
  9. package/src/duckdb/extension/parquet/parquet-extension.cpp +9 -7
  10. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +18 -7
  11. package/src/duckdb/src/catalog/default/default_functions.cpp +2 -0
  12. package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -3
  13. package/src/duckdb/src/common/arrow/arrow_converter.cpp +2 -2
  14. package/src/duckdb/src/common/enum_util.cpp +5908 -0
  15. package/src/duckdb/src/common/enums/expression_type.cpp +216 -4
  16. package/src/duckdb/src/common/enums/join_type.cpp +6 -5
  17. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  18. package/src/duckdb/src/common/exception.cpp +1 -1
  19. package/src/duckdb/src/common/exception_format_value.cpp +2 -2
  20. package/src/duckdb/src/common/multi_file_reader.cpp +14 -0
  21. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +143 -0
  22. package/src/duckdb/src/common/serializer/binary_serializer.cpp +160 -0
  23. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  24. package/src/duckdb/src/common/string_util.cpp +6 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +3 -3
  26. package/src/duckdb/src/common/types.cpp +11 -10
  27. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +4 -4
  28. package/src/duckdb/src/core_functions/function_list.cpp +2 -0
  29. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -1
  30. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +2 -3
  31. package/src/duckdb/src/core_functions/scalar/string/format_bytes.cpp +29 -0
  32. package/src/duckdb/src/execution/aggregate_hashtable.cpp +3 -3
  33. package/src/duckdb/src/execution/index/art/art.cpp +5 -1
  34. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  35. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +2 -2
  36. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +65 -45
  37. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +17 -11
  38. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -39
  39. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +10 -9
  40. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -4
  41. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +6 -21
  42. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +13 -13
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +15 -14
  44. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +3 -2
  45. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +4 -4
  46. package/src/duckdb/src/execution/operator/helper/physical_pragma.cpp +4 -2
  47. package/src/duckdb/src/execution/operator/helper/physical_prepare.cpp +4 -2
  48. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +10 -8
  49. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +4 -3
  50. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +7 -6
  51. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +2 -1
  52. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +4 -2
  53. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +8 -8
  54. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +17 -16
  55. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +12 -9
  56. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +2 -1
  57. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +3 -4
  58. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +5 -5
  59. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +16 -15
  60. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +13 -12
  61. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +12 -10
  62. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +13 -11
  63. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +8 -6
  64. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/order/physical_order.cpp +13 -13
  66. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +8 -8
  67. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +165 -0
  68. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +160 -145
  69. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +11 -26
  70. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +14 -19
  71. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +7 -6
  72. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +18 -30
  73. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +14 -18
  74. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +6 -4
  75. package/src/duckdb/src/execution/operator/scan/physical_dummy_scan.cpp +4 -19
  76. package/src/duckdb/src/execution/operator/scan/physical_empty_result.cpp +3 -2
  77. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +14 -5
  78. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +6 -4
  79. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +3 -19
  80. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +13 -25
  81. package/src/duckdb/src/execution/operator/schema/physical_create_function.cpp +4 -19
  82. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +8 -9
  83. package/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp +4 -19
  84. package/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp +4 -19
  85. package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +4 -19
  86. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +9 -26
  87. package/src/duckdb/src/execution/operator/schema/physical_create_view.cpp +4 -19
  88. package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +4 -19
  89. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +3 -19
  90. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +9 -8
  91. package/src/duckdb/src/execution/operator/set/physical_union.cpp +1 -1
  92. package/src/duckdb/src/execution/partitionable_hashtable.cpp +2 -2
  93. package/src/duckdb/src/execution/physical_operator.cpp +11 -5
  94. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +25 -4
  95. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +2 -1
  96. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +16 -16
  97. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +5 -4
  98. package/src/duckdb/src/function/table/arrow_conversion.cpp +3 -3
  99. package/src/duckdb/src/function/table/copy_csv.cpp +85 -29
  100. package/src/duckdb/src/function/table/read_csv.cpp +17 -11
  101. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +2 -1
  102. package/src/duckdb/src/function/table/system/duckdb_types.cpp +2 -1
  103. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  104. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +7 -1
  105. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +958 -0
  106. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +3 -3
  107. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +16 -4
  108. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/common/exception.hpp +4 -4
  110. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +3 -2
  111. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +44 -0
  112. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +45 -0
  113. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +93 -0
  114. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +92 -0
  115. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +7 -3
  116. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +2 -2
  117. package/src/duckdb/src/include/duckdb/common/set.hpp +2 -1
  118. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -1
  119. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
  121. package/src/duckdb/src/include/duckdb/common/vector.hpp +61 -14
  122. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +15 -0
  123. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +3 -2
  124. package/src/duckdb/src/include/duckdb/execution/executor.hpp +10 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp +2 -2
  127. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +8 -11
  128. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +2 -4
  129. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -7
  130. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -4
  131. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +1 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +2 -5
  133. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -4
  134. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp +2 -4
  135. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_load.hpp +1 -2
  136. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -2
  137. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_pragma.hpp +1 -2
  138. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_prepare.hpp +1 -2
  139. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +2 -4
  140. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reset.hpp +1 -2
  141. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +1 -2
  142. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_transaction.hpp +1 -2
  143. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -4
  144. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -4
  145. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +2 -4
  146. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -2
  147. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +1 -2
  148. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -4
  149. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -4
  150. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -4
  151. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +2 -4
  152. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +2 -4
  153. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -4
  154. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -4
  155. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +68 -0
  156. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +3 -5
  157. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -5
  158. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_delete.hpp +2 -4
  159. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -4
  160. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -4
  161. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -4
  162. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -2
  163. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_dummy_scan.hpp +1 -3
  164. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_empty_result.hpp +1 -2
  165. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +1 -2
  166. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +1 -2
  167. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_alter.hpp +1 -3
  168. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_attach.hpp +1 -3
  169. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_function.hpp +1 -3
  170. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +2 -4
  171. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_schema.hpp +1 -3
  172. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_sequence.hpp +1 -3
  173. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +1 -3
  174. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +2 -5
  175. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_view.hpp +1 -3
  176. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +1 -3
  177. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_drop.hpp +1 -3
  178. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +2 -4
  179. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -3
  180. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +7 -4
  181. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +26 -6
  182. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +6 -6
  183. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +2 -1
  184. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +32 -4
  185. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -2
  186. package/src/duckdb/src/include/duckdb/function/table_function.hpp +0 -1
  187. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  188. package/src/duckdb/src/include/duckdb/main/config.hpp +4 -0
  189. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -3
  190. package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +24 -0
  191. package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +1 -0
  192. package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
  193. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +63 -0
  194. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +16 -3
  195. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +51 -7
  196. package/src/duckdb/src/include/duckdb/parallel/task.hpp +21 -2
  197. package/src/duckdb/src/include/duckdb/parallel/task_counter.hpp +2 -2
  198. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +2 -2
  199. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +1 -0
  200. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +6 -0
  201. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +2 -0
  203. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -0
  204. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +2 -1
  205. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -0
  206. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
  207. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -0
  208. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -0
  209. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +1 -0
  210. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +1 -0
  211. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -0
  212. package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +1 -0
  213. package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +1 -0
  214. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +1 -0
  215. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +1 -0
  216. package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +1 -0
  217. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -0
  218. package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +1 -0
  219. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +1 -1
  220. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  221. package/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +46 -0
  222. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +24 -3
  223. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +46 -1
  224. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +9 -10
  225. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -1
  226. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +2 -2
  227. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -3
  228. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -0
  229. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +1 -1
  230. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +22 -0
  231. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +3 -3
  232. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +2 -2
  233. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -2
  234. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -2
  235. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +9 -34
  236. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +26 -0
  237. package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +25 -0
  238. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +6 -0
  239. package/src/duckdb/src/main/client_context.cpp +1 -0
  240. package/src/duckdb/src/main/client_verify.cpp +5 -0
  241. package/src/duckdb/src/main/config.cpp +4 -0
  242. package/src/duckdb/src/main/database.cpp +22 -34
  243. package/src/duckdb/src/main/database_path_and_type.cpp +23 -0
  244. package/src/duckdb/src/main/extension/extension_load.cpp +19 -15
  245. package/src/duckdb/src/main/relation/join_relation.cpp +2 -1
  246. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -3
  247. package/src/duckdb/src/parallel/event.cpp +1 -1
  248. package/src/duckdb/src/parallel/executor.cpp +39 -3
  249. package/src/duckdb/src/parallel/executor_task.cpp +11 -0
  250. package/src/duckdb/src/parallel/interrupt.cpp +57 -0
  251. package/src/duckdb/src/parallel/pipeline.cpp +49 -6
  252. package/src/duckdb/src/parallel/pipeline_executor.cpp +248 -69
  253. package/src/duckdb/src/parallel/pipeline_initialize_event.cpp +1 -1
  254. package/src/duckdb/src/parallel/task_scheduler.cpp +57 -22
  255. package/src/duckdb/src/parser/base_expression.cpp +6 -0
  256. package/src/duckdb/src/parser/expression/window_expression.cpp +1 -1
  257. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +2 -2
  258. package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
  259. package/src/duckdb/src/parser/result_modifier.cpp +2 -2
  260. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -44
  261. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -3
  262. package/src/duckdb/src/parser/tableref.cpp +1 -1
  263. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -3
  264. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +6 -0
  265. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +4 -1
  266. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +17 -3
  267. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +8 -2
  268. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +7 -0
  269. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +14 -2
  270. package/src/duckdb/src/planner/operator/logical_column_data_get.cpp +11 -0
  271. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -2
  272. package/src/duckdb/src/planner/operator/logical_cteref.cpp +11 -0
  273. package/src/duckdb/src/planner/operator/logical_delete.cpp +10 -0
  274. package/src/duckdb/src/planner/operator/logical_delim_get.cpp +12 -1
  275. package/src/duckdb/src/planner/operator/logical_dummy_scan.cpp +12 -1
  276. package/src/duckdb/src/planner/operator/logical_expression_get.cpp +12 -1
  277. package/src/duckdb/src/planner/operator/logical_get.cpp +10 -4
  278. package/src/duckdb/src/planner/operator/logical_insert.cpp +12 -1
  279. package/src/duckdb/src/planner/operator/logical_pivot.cpp +11 -0
  280. package/src/duckdb/src/planner/operator/logical_projection.cpp +11 -0
  281. package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +11 -0
  282. package/src/duckdb/src/planner/operator/logical_set_operation.cpp +11 -0
  283. package/src/duckdb/src/planner/operator/logical_unnest.cpp +12 -1
  284. package/src/duckdb/src/planner/operator/logical_update.cpp +10 -0
  285. package/src/duckdb/src/planner/operator/logical_window.cpp +11 -0
  286. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  287. package/src/duckdb/src/storage/data_table.cpp +5 -0
  288. package/src/duckdb/src/storage/local_storage.cpp +40 -110
  289. package/src/duckdb/src/storage/optimistic_data_writer.cpp +96 -0
  290. package/src/duckdb/src/storage/partial_block_manager.cpp +73 -9
  291. package/src/duckdb/src/storage/single_file_block_manager.cpp +3 -1
  292. package/src/duckdb/src/storage/standard_buffer_manager.cpp +17 -12
  293. package/src/duckdb/src/storage/statistics/base_statistics.cpp +3 -0
  294. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +90 -82
  295. package/src/duckdb/src/storage/table/column_data.cpp +19 -45
  296. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +7 -7
  297. package/src/duckdb/src/storage/table/column_segment.cpp +1 -1
  298. package/src/duckdb/src/storage/table/list_column_data.cpp +6 -11
  299. package/src/duckdb/src/storage/table/row_group.cpp +13 -14
  300. package/src/duckdb/src/storage/table/row_group_collection.cpp +10 -4
  301. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -10
  302. package/src/duckdb/src/storage/table/struct_column_data.cpp +7 -13
  303. package/src/duckdb/src/storage/table/update_segment.cpp +0 -25
  304. package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -6
  305. package/src/duckdb/src/transaction/commit_state.cpp +4 -4
  306. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
  307. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +20 -0
  308. package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +13 -0
  309. package/src/duckdb/src/verification/statement_verifier.cpp +7 -0
  310. package/src/duckdb/ub_src_common.cpp +2 -2
  311. package/src/duckdb/ub_src_common_serializer.cpp +4 -2
  312. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  313. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  314. package/src/duckdb/ub_src_main.cpp +2 -0
  315. package/src/duckdb/ub_src_parallel.cpp +2 -0
  316. package/src/duckdb/ub_src_storage.cpp +2 -0
  317. package/src/duckdb/src/common/serializer/enum_serializer.cpp +0 -1180
  318. package/src/duckdb/src/common/vector.cpp +0 -12
  319. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +0 -113
@@ -6,27 +6,12 @@ namespace duckdb {
6
6
  //===--------------------------------------------------------------------===//
7
7
  // Source
8
8
  //===--------------------------------------------------------------------===//
9
- class CreateSequenceSourceState : public GlobalSourceState {
10
- public:
11
- CreateSequenceSourceState() : finished(false) {
12
- }
13
-
14
- bool finished;
15
- };
16
-
17
- unique_ptr<GlobalSourceState> PhysicalCreateSequence::GetGlobalSourceState(ClientContext &context) const {
18
- return make_uniq<CreateSequenceSourceState>();
19
- }
20
-
21
- void PhysicalCreateSequence::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
22
- LocalSourceState &lstate) const {
23
- auto &state = gstate.Cast<CreateSequenceSourceState>();
24
- if (state.finished) {
25
- return;
26
- }
9
+ SourceResultType PhysicalCreateSequence::GetData(ExecutionContext &context, DataChunk &chunk,
10
+ OperatorSourceInput &input) const {
27
11
  auto &catalog = Catalog::GetCatalog(context.client, info->catalog);
28
12
  catalog.CreateSequence(context.client, *info);
29
- state.finished = true;
13
+
14
+ return SourceResultType::FINISHED;
30
15
  }
31
16
 
32
17
  } // namespace duckdb
@@ -16,27 +16,12 @@ PhysicalCreateTable::PhysicalCreateTable(LogicalOperator &op, SchemaCatalogEntry
16
16
  //===--------------------------------------------------------------------===//
17
17
  // Source
18
18
  //===--------------------------------------------------------------------===//
19
- class CreateTableSourceState : public GlobalSourceState {
20
- public:
21
- CreateTableSourceState() : finished(false) {
22
- }
23
-
24
- bool finished;
25
- };
26
-
27
- unique_ptr<GlobalSourceState> PhysicalCreateTable::GetGlobalSourceState(ClientContext &context) const {
28
- return make_uniq<CreateTableSourceState>();
29
- }
30
-
31
- void PhysicalCreateTable::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
32
- LocalSourceState &lstate) const {
33
- auto &state = gstate.Cast<CreateTableSourceState>();
34
- if (state.finished) {
35
- return;
36
- }
19
+ SourceResultType PhysicalCreateTable::GetData(ExecutionContext &context, DataChunk &chunk,
20
+ OperatorSourceInput &input) const {
37
21
  auto &catalog = schema.catalog;
38
22
  catalog.CreateTable(catalog.GetCatalogTransaction(context.client), schema, *info);
39
- state.finished = true;
23
+
24
+ return SourceResultType::FINISHED;
40
25
  }
41
26
 
42
27
  } // namespace duckdb
@@ -29,16 +29,15 @@ unique_ptr<GlobalSinkState> PhysicalCreateType::GetGlobalSinkState(ClientContext
29
29
  return make_uniq<CreateTypeGlobalState>(context);
30
30
  }
31
31
 
32
- SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
33
- DataChunk &input) const {
34
- auto &gstate = gstate_p.Cast<CreateTypeGlobalState>();
35
- idx_t total_row_count = gstate.size + input.size();
32
+ SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
33
+ auto &gstate = input.global_state.Cast<CreateTypeGlobalState>();
34
+ idx_t total_row_count = gstate.size + chunk.size();
36
35
  if (total_row_count > NumericLimits<uint32_t>::Maximum()) {
37
36
  throw InvalidInputException("Attempted to create ENUM of size %llu, which exceeds the maximum size of %llu",
38
37
  total_row_count, NumericLimits<uint32_t>::Maximum());
39
38
  }
40
39
  UnifiedVectorFormat sdata;
41
- input.data[0].ToUnifiedFormat(input.size(), sdata);
40
+ chunk.data[0].ToUnifiedFormat(chunk.size(), sdata);
42
41
 
43
42
  if (total_row_count > gstate.capacity) {
44
43
  // We must resize our result vector
@@ -49,7 +48,7 @@ SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkSta
49
48
  auto src_ptr = (string_t *)sdata.data;
50
49
  auto result_ptr = FlatVector::GetData<string_t>(gstate.result);
51
50
  // Input vector has NULL value, we just throw an exception
52
- for (idx_t i = 0; i < input.size(); i++) {
51
+ for (idx_t i = 0; i < chunk.size(); i++) {
53
52
  idx_t idx = sdata.sel->get_index(i);
54
53
  if (!sdata.validity.RowIsValid(idx)) {
55
54
  throw InvalidInputException("Attempted to create ENUM type with NULL value!");
@@ -70,25 +69,8 @@ SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkSta
70
69
  //===--------------------------------------------------------------------===//
71
70
  // Source
72
71
  //===--------------------------------------------------------------------===//
73
- class CreateTypeSourceState : public GlobalSourceState {
74
- public:
75
- CreateTypeSourceState() : finished(false) {
76
- }
77
-
78
- bool finished;
79
- };
80
-
81
- unique_ptr<GlobalSourceState> PhysicalCreateType::GetGlobalSourceState(ClientContext &context) const {
82
- return make_uniq<CreateTypeSourceState>();
83
- }
84
-
85
- void PhysicalCreateType::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
86
- LocalSourceState &lstate) const {
87
- auto &state = gstate.Cast<CreateTypeSourceState>();
88
- if (state.finished) {
89
- return;
90
- }
91
-
72
+ SourceResultType PhysicalCreateType::GetData(ExecutionContext &context, DataChunk &chunk,
73
+ OperatorSourceInput &input) const {
92
74
  if (IsSink()) {
93
75
  D_ASSERT(info->type == LogicalType::INVALID);
94
76
  auto &g_sink_state = sink_state->Cast<CreateTypeGlobalState>();
@@ -100,7 +82,8 @@ void PhysicalCreateType::GetData(ExecutionContext &context, DataChunk &chunk, Gl
100
82
  D_ASSERT(catalog_entry->type == CatalogType::TYPE_ENTRY);
101
83
  auto &catalog_type = catalog_entry->Cast<TypeCatalogEntry>();
102
84
  EnumType::SetCatalog(info->type, &catalog_type);
103
- state.finished = true;
85
+
86
+ return SourceResultType::FINISHED;
104
87
  }
105
88
 
106
89
  } // namespace duckdb
@@ -6,27 +6,12 @@ namespace duckdb {
6
6
  //===--------------------------------------------------------------------===//
7
7
  // Source
8
8
  //===--------------------------------------------------------------------===//
9
- class CreateViewSourceState : public GlobalSourceState {
10
- public:
11
- CreateViewSourceState() : finished(false) {
12
- }
13
-
14
- bool finished;
15
- };
16
-
17
- unique_ptr<GlobalSourceState> PhysicalCreateView::GetGlobalSourceState(ClientContext &context) const {
18
- return make_uniq<CreateViewSourceState>();
19
- }
20
-
21
- void PhysicalCreateView::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
22
- LocalSourceState &lstate) const {
23
- auto &state = gstate.Cast<CreateViewSourceState>();
24
- if (state.finished) {
25
- return;
26
- }
9
+ SourceResultType PhysicalCreateView::GetData(ExecutionContext &context, DataChunk &chunk,
10
+ OperatorSourceInput &input) const {
27
11
  auto &catalog = Catalog::GetCatalog(context.client, info->catalog);
28
12
  catalog.CreateView(context.client, *info);
29
- state.finished = true;
13
+
14
+ return SourceResultType::FINISHED;
30
15
  }
31
16
 
32
17
  } // namespace duckdb
@@ -11,27 +11,12 @@ namespace duckdb {
11
11
  //===--------------------------------------------------------------------===//
12
12
  // Source
13
13
  //===--------------------------------------------------------------------===//
14
- class DetachSourceState : public GlobalSourceState {
15
- public:
16
- DetachSourceState() : finished(false) {
17
- }
18
-
19
- bool finished;
20
- };
21
-
22
- unique_ptr<GlobalSourceState> PhysicalDetach::GetGlobalSourceState(ClientContext &context) const {
23
- return make_uniq<DetachSourceState>();
24
- }
25
-
26
- void PhysicalDetach::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
27
- LocalSourceState &lstate) const {
28
- auto &state = gstate.Cast<DetachSourceState>();
29
- if (state.finished) {
30
- return;
31
- }
14
+ SourceResultType PhysicalDetach::GetData(ExecutionContext &context, DataChunk &chunk,
15
+ OperatorSourceInput &input) const {
32
16
  auto &db_manager = DatabaseManager::Get(context.client);
33
17
  db_manager.DetachDatabase(context.client, info->name, info->if_not_found);
34
- state.finished = true;
18
+
19
+ return SourceResultType::FINISHED;
35
20
  }
36
21
 
37
22
  } // namespace duckdb
@@ -11,24 +11,7 @@ namespace duckdb {
11
11
  //===--------------------------------------------------------------------===//
12
12
  // Source
13
13
  //===--------------------------------------------------------------------===//
14
- class DropSourceState : public GlobalSourceState {
15
- public:
16
- DropSourceState() : finished(false) {
17
- }
18
-
19
- bool finished;
20
- };
21
-
22
- unique_ptr<GlobalSourceState> PhysicalDrop::GetGlobalSourceState(ClientContext &context) const {
23
- return make_uniq<DropSourceState>();
24
- }
25
-
26
- void PhysicalDrop::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
27
- LocalSourceState &lstate) const {
28
- auto &state = gstate.Cast<DropSourceState>();
29
- if (state.finished) {
30
- return;
31
- }
14
+ SourceResultType PhysicalDrop::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
32
15
  switch (info->type) {
33
16
  case CatalogType::PREPARED_STATEMENT: {
34
17
  // DEALLOCATE silently ignores errors
@@ -62,7 +45,8 @@ void PhysicalDrop::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSo
62
45
  break;
63
46
  }
64
47
  }
65
- state.finished = true;
48
+
49
+ return SourceResultType::FINISHED;
66
50
  }
67
51
 
68
52
  } // namespace duckdb
@@ -61,16 +61,15 @@ idx_t PhysicalRecursiveCTE::ProbeHT(DataChunk &chunk, RecursiveCTEState &state)
61
61
  return new_group_count;
62
62
  }
63
63
 
64
- SinkResultType PhysicalRecursiveCTE::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
65
- DataChunk &input) const {
66
- auto &gstate = state.Cast<RecursiveCTEState>();
64
+ SinkResultType PhysicalRecursiveCTE::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
65
+ auto &gstate = input.global_state.Cast<RecursiveCTEState>();
67
66
  if (!union_all) {
68
- idx_t match_count = ProbeHT(input, gstate);
67
+ idx_t match_count = ProbeHT(chunk, gstate);
69
68
  if (match_count > 0) {
70
- gstate.intermediate_table.Append(input);
69
+ gstate.intermediate_table.Append(chunk);
71
70
  }
72
71
  } else {
73
- gstate.intermediate_table.Append(input);
72
+ gstate.intermediate_table.Append(chunk);
74
73
  }
75
74
  return SinkResultType::NEED_MORE_INPUT;
76
75
  }
@@ -78,8 +77,8 @@ SinkResultType PhysicalRecursiveCTE::Sink(ExecutionContext &context, GlobalSinkS
78
77
  //===--------------------------------------------------------------------===//
79
78
  // Source
80
79
  //===--------------------------------------------------------------------===//
81
- void PhysicalRecursiveCTE::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
82
- LocalSourceState &lstate) const {
80
+ SourceResultType PhysicalRecursiveCTE::GetData(ExecutionContext &context, DataChunk &chunk,
81
+ OperatorSourceInput &input) const {
83
82
  auto &gstate = sink_state->Cast<RecursiveCTEState>();
84
83
  if (!gstate.initialized) {
85
84
  gstate.intermediate_table.InitializeScan(gstate.scan_state);
@@ -117,6 +116,8 @@ void PhysicalRecursiveCTE::GetData(ExecutionContext &context, DataChunk &chunk,
117
116
  gstate.intermediate_table.InitializeScan(gstate.scan_state);
118
117
  }
119
118
  }
119
+
120
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
120
121
  }
121
122
 
122
123
  void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context) const {
@@ -28,7 +28,7 @@ void PhysicalUnion::BuildPipelines(Pipeline &current, MetaPipeline &meta_pipelin
28
28
  order_matters = true;
29
29
  }
30
30
  if (sink) {
31
- if (sink->SinkOrderDependent() && !sink->RequiresBatchIndex()) {
31
+ if (sink->SinkOrderDependent() || sink->RequiresBatchIndex()) {
32
32
  order_matters = true;
33
33
  }
34
34
  if (!sink->ParallelSink()) {
@@ -48,7 +48,7 @@ HtEntryType PartitionableHashTable::GetHTEntrySize() {
48
48
  }
49
49
 
50
50
  idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes,
51
- DataChunk &payload, const vector<idx_t> &filter) {
51
+ DataChunk &payload, const unsafe_vector<idx_t> &filter) {
52
52
  // If this is false, a single AddChunk would overflow the max capacity
53
53
  D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
54
54
  if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity()) {
@@ -65,7 +65,7 @@ idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &group
65
65
  }
66
66
 
67
67
  idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition,
68
- const vector<idx_t> &filter) {
68
+ const unsafe_vector<idx_t> &filter) {
69
69
  groups.Hash(hashes);
70
70
 
71
71
  // we partition when we are asked to or when the unpartitioned ht runs out of space
@@ -72,8 +72,8 @@ unique_ptr<GlobalSourceState> PhysicalOperator::GetGlobalSourceState(ClientConte
72
72
  }
73
73
 
74
74
  // LCOV_EXCL_START
75
- void PhysicalOperator::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
76
- LocalSourceState &lstate) const {
75
+ SourceResultType PhysicalOperator::GetData(ExecutionContext &context, DataChunk &chunk,
76
+ OperatorSourceInput &input) const {
77
77
  throw InternalException("Calling GetData on a node that is not a source!");
78
78
  }
79
79
 
@@ -91,10 +91,10 @@ double PhysicalOperator::GetProgress(ClientContext &context, GlobalSourceState &
91
91
  // Sink
92
92
  //===--------------------------------------------------------------------===//
93
93
  // LCOV_EXCL_START
94
- SinkResultType PhysicalOperator::Sink(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate,
95
- DataChunk &input) const {
94
+ SinkResultType PhysicalOperator::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
96
95
  throw InternalException("Calling Sink on a node that is not a sink!");
97
96
  }
97
+
98
98
  // LCOV_EXCL_STOP
99
99
 
100
100
  void PhysicalOperator::Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const {
@@ -105,6 +105,9 @@ SinkFinalizeType PhysicalOperator::Finalize(Pipeline &pipeline, Event &event, Cl
105
105
  return SinkFinalizeType::READY;
106
106
  }
107
107
 
108
+ void PhysicalOperator::NextBatch(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p) const {
109
+ }
110
+
108
111
  unique_ptr<LocalSinkState> PhysicalOperator::GetLocalSinkState(ExecutionContext &context) const {
109
112
  return make_uniq<LocalSinkState>();
110
113
  }
@@ -238,7 +241,9 @@ OperatorResultType CachingPhysicalOperator::Execute(ExecutionContext &context, D
238
241
  state.initialized = true;
239
242
  state.can_cache_chunk = true;
240
243
 
241
- if (!context.pipeline || !caching_supported) {
244
+ if (!context.client.config.enable_caching_operators) {
245
+ state.can_cache_chunk = false;
246
+ } else if (!context.pipeline || !caching_supported) {
242
247
  state.can_cache_chunk = false;
243
248
  } else if (!context.pipeline->GetSink()) {
244
249
  // Disabling for pipelines without Sink, i.e. when pulling
@@ -252,6 +257,7 @@ OperatorResultType CachingPhysicalOperator::Execute(ExecutionContext &context, D
252
257
  if (!state.can_cache_chunk) {
253
258
  return child_result;
254
259
  }
260
+ // TODO chunk size of 0 should not result in a cache being created!
255
261
  if (chunk.size() < CACHE_THRESHOLD) {
256
262
  // we have filtered out a significant amount of tuples
257
263
  // add this chunk to the cache and continue
@@ -1,17 +1,40 @@
1
1
  #include "duckdb/execution/physical_plan_generator.hpp"
2
2
  #include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
3
+ #include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
3
4
  #include "duckdb/planner/operator/logical_copy_to_file.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
7
8
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile &op) {
8
9
  auto plan = CreatePlan(*op.children[0]);
10
+ bool preserve_insertion_order = PhysicalPlanGenerator::PreserveInsertionOrder(context, *plan);
11
+ bool supports_batch_index = PhysicalPlanGenerator::UseBatchIndex(context, *plan);
9
12
  auto &fs = FileSystem::GetFileSystem(context);
10
13
  op.file_path = fs.ExpandPath(op.file_path, FileSystem::GetFileOpener(context));
11
-
12
14
  if (op.use_tmp_file) {
13
15
  op.file_path += ".tmp";
14
16
  }
17
+ if (op.per_thread_output || op.partition_output || !op.partition_columns.empty() || op.overwrite_or_ignore) {
18
+ // hive-partitioning/per-thread output does not care about insertion order, and does not support batch indexes
19
+ preserve_insertion_order = false;
20
+ supports_batch_index = false;
21
+ }
22
+ auto mode = CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE;
23
+ if (op.function.execution_mode) {
24
+ mode = op.function.execution_mode(preserve_insertion_order, supports_batch_index);
25
+ }
26
+ if (mode == CopyFunctionExecutionMode::BATCH_COPY_TO_FILE) {
27
+ if (!supports_batch_index) {
28
+ throw InternalException("BATCH_COPY_TO_FILE can only be used if batch indexes are supported");
29
+ }
30
+ // batched copy to file
31
+ auto copy = make_uniq<PhysicalBatchCopyToFile>(op.types, op.function, std::move(op.bind_data),
32
+ op.estimated_cardinality);
33
+ copy->file_path = op.file_path;
34
+ copy->use_tmp_file = op.use_tmp_file;
35
+ copy->children.push_back(std::move(plan));
36
+ return std::move(copy);
37
+ }
15
38
  // COPY from select statement to file
16
39
  auto copy = make_uniq<PhysicalCopyToFile>(op.types, op.function, std::move(op.bind_data), op.estimated_cardinality);
17
40
  copy->file_path = op.file_path;
@@ -23,9 +46,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile
23
46
  copy->partition_columns = op.partition_columns;
24
47
  copy->names = op.names;
25
48
  copy->expected_types = op.expected_types;
26
- if (op.function.parallel) {
27
- copy->parallel = op.function.parallel(context, *copy->bind_data);
28
- }
49
+ copy->parallel = mode == CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE;
29
50
 
30
51
  copy->children.push_back(std::move(plan));
31
52
  return std::move(copy);
@@ -2,6 +2,7 @@
2
2
  #include "duckdb/execution/operator/helper/physical_streaming_sample.hpp"
3
3
  #include "duckdb/execution/physical_plan_generator.hpp"
4
4
  #include "duckdb/planner/operator/logical_sample.hpp"
5
+ #include "duckdb/common/enum_util.hpp"
5
6
 
6
7
  namespace duckdb {
7
8
 
@@ -20,7 +21,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSample &op
20
21
  if (!op.sample_options->is_percentage) {
21
22
  throw ParserException("Sample method %s cannot be used with a discrete sample count, either switch to "
22
23
  "reservoir sampling or use a sample_size",
23
- SampleMethodToString(op.sample_options->method));
24
+ EnumUtil::ToString(op.sample_options->method));
24
25
  }
25
26
  sample = make_uniq<PhysicalStreamingSample>(op.types, op.sample_options->method,
26
27
  op.sample_options->sample_size.GetValue<double>(),
@@ -130,19 +130,18 @@ void RadixPartitionedHashTable::PopulateGroupChunk(DataChunk &group_chunk, DataC
130
130
  group_chunk.Verify();
131
131
  }
132
132
 
133
- void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
134
- DataChunk &groups_input, DataChunk &payload_input,
135
- const vector<idx_t> &filter) const {
136
- auto &llstate = lstate.Cast<RadixHTLocalState>();
137
- auto &gstate = state.Cast<RadixHTGlobalState>();
133
+ void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input,
134
+ DataChunk &payload_input, const unsafe_vector<idx_t> &filter) const {
135
+ auto &llstate = input.local_state.Cast<RadixHTLocalState>();
136
+ auto &gstate = input.global_state.Cast<RadixHTGlobalState>();
138
137
  D_ASSERT(!gstate.is_finalized);
139
138
 
140
139
  DataChunk &group_chunk = llstate.group_chunk;
141
- PopulateGroupChunk(group_chunk, groups_input);
140
+ PopulateGroupChunk(group_chunk, chunk);
142
141
 
143
142
  // if we have non-combinable aggregates (e.g. string_agg) we cannot keep parallel hash
144
143
  // tables
145
- if (ForceSingleHT(state)) {
144
+ if (ForceSingleHT(input.global_state)) {
146
145
  lock_guard<mutex> glock(gstate.lock);
147
146
  gstate.is_empty = gstate.is_empty && group_chunk.size() == 0;
148
147
  if (gstate.finalized_hts.empty()) {
@@ -304,7 +303,7 @@ private:
304
303
  };
305
304
 
306
305
  void RadixPartitionedHashTable::ScheduleTasks(Executor &executor, const shared_ptr<Event> &event,
307
- GlobalSinkState &state, vector<unique_ptr<Task>> &tasks) const {
306
+ GlobalSinkState &state, vector<shared_ptr<Task>> &tasks) const {
308
307
  auto &gstate = state.Cast<RadixHTGlobalState>();
309
308
  if (!gstate.is_partitioned) {
310
309
  return;
@@ -382,14 +381,14 @@ idx_t RadixPartitionedHashTable::Size(GlobalSinkState &sink_state) const {
382
381
  return count;
383
382
  }
384
383
 
385
- void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSinkState &sink_state,
386
- GlobalSourceState &gsstate, LocalSourceState &lsstate) const {
384
+ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &chunk,
385
+ GlobalSinkState &sink_state, OperatorSourceInput &input) const {
387
386
  auto &gstate = sink_state.Cast<RadixHTGlobalState>();
388
- auto &state = gsstate.Cast<RadixHTGlobalSourceState>();
389
- auto &lstate = lsstate.Cast<RadixHTLocalSourceState>();
387
+ auto &state = input.global_state.Cast<RadixHTGlobalSourceState>();
388
+ auto &lstate = input.local_state.Cast<RadixHTLocalSourceState>();
390
389
  D_ASSERT(gstate.is_finalized);
391
390
  if (state.finished) {
392
- return;
391
+ return SourceResultType::FINISHED;
393
392
  }
394
393
 
395
394
  // special case hack to sort out aggregating from empty intermediates
@@ -421,11 +420,11 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
421
420
  chunk.data[null_groups.size() + op.aggregates.size() + i].Reference(grouping_values[i]);
422
421
  }
423
422
  state.finished = true;
424
- return;
423
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
425
424
  }
426
425
  if (gstate.is_empty) {
427
426
  state.finished = true;
428
- return;
427
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
429
428
  }
430
429
  idx_t elements_found = 0;
431
430
 
@@ -461,7 +460,7 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
461
460
  ht_index = state.ht_index;
462
461
  if (ht_index >= gstate.finalized_hts.size()) {
463
462
  state.finished = true;
464
- return;
463
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
465
464
  }
466
465
  }
467
466
  D_ASSERT(ht_index < gstate.finalized_hts.size());
@@ -511,6 +510,7 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
511
510
  for (idx_t i = 0; i < op.grouping_functions.size(); i++) {
512
511
  chunk.data[op.GroupCount() + op.aggregates.size() + i].Reference(grouping_values[i]);
513
512
  }
513
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
514
514
  }
515
515
 
516
516
  } // namespace duckdb
@@ -10,6 +10,7 @@
10
10
  #include "duckdb/common/types/time.hpp"
11
11
  #include "duckdb/common/types/timestamp.hpp"
12
12
  #include "duckdb/common/vector_operations/vector_operations.hpp"
13
+ #include "duckdb/common/enum_util.hpp"
13
14
  #include "duckdb/function/scalar/operators.hpp"
14
15
  #include "duckdb/planner/expression/bound_function_expression.hpp"
15
16
  #include "duckdb/function/scalar/nested_functions.hpp"
@@ -365,8 +366,8 @@ ScalarFunction AddFun::GetFunction(const LogicalType &left_type, const LogicalTy
365
366
  break;
366
367
  }
367
368
  // LCOV_EXCL_START
368
- throw NotImplementedException("AddFun for types %s, %s", LogicalTypeIdToString(left_type.id()),
369
- LogicalTypeIdToString(right_type.id()));
369
+ throw NotImplementedException("AddFun for types %s, %s", EnumUtil::ToString(left_type.id()),
370
+ EnumUtil::ToString(right_type.id()));
370
371
  // LCOV_EXCL_STOP
371
372
  }
372
373
 
@@ -617,8 +618,8 @@ ScalarFunction SubtractFun::GetFunction(const LogicalType &left_type, const Logi
617
618
  break;
618
619
  }
619
620
  // LCOV_EXCL_START
620
- throw NotImplementedException("SubtractFun for types %s, %s", LogicalTypeIdToString(left_type.id()),
621
- LogicalTypeIdToString(right_type.id()));
621
+ throw NotImplementedException("SubtractFun for types %s, %s", EnumUtil::ToString(left_type.id()),
622
+ EnumUtil::ToString(right_type.id()));
622
623
  // LCOV_EXCL_STOP
623
624
  }
624
625
 
@@ -253,6 +253,9 @@ static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets)
253
253
  }
254
254
  auto cptr = cdata + offsets[row_idx];
255
255
  auto str_len = offsets[row_idx + 1] - offsets[row_idx];
256
+ if (str_len > NumericLimits<uint32_t>::Maximum()) { // LCOV_EXCL_START
257
+ throw ConversionException("DuckDB does not support Strings over 4GB");
258
+ } // LCOV_EXCL_STOP
256
259
  strings[row_idx] = string_t(cptr, str_len);
257
260
  }
258
261
  }
@@ -406,9 +409,6 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
406
409
  auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
407
410
  auto cdata = (char *)array.buffers[2];
408
411
  if (original_type.first == ArrowVariableSizeType::SUPER_SIZE) {
409
- if (((uint64_t *)array.buffers[1])[array.length] > NumericLimits<uint32_t>::Maximum()) { // LCOV_EXCL_START
410
- throw ConversionException("DuckDB does not support Strings over 4GB");
411
- } // LCOV_EXCL_STOP
412
412
  auto offsets = (uint64_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
413
413
  if (nested_offset != -1) {
414
414
  offsets = (uint64_t *)array.buffers[1] + array.offset + nested_offset;