duckdb 0.7.2-dev2867.0 → 0.7.2-dev3117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-datepart.cpp +5 -1
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +8 -1
  6. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +1 -3
  7. package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -3
  8. package/src/duckdb/extension/json/json_functions/json_transform.cpp +3 -2
  9. package/src/duckdb/extension/parquet/parquet-extension.cpp +9 -7
  10. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +18 -7
  11. package/src/duckdb/src/catalog/default/default_functions.cpp +2 -0
  12. package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -3
  13. package/src/duckdb/src/common/arrow/arrow_converter.cpp +2 -2
  14. package/src/duckdb/src/common/enum_util.cpp +5908 -0
  15. package/src/duckdb/src/common/enums/expression_type.cpp +216 -4
  16. package/src/duckdb/src/common/enums/join_type.cpp +6 -5
  17. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  18. package/src/duckdb/src/common/exception.cpp +1 -1
  19. package/src/duckdb/src/common/exception_format_value.cpp +2 -2
  20. package/src/duckdb/src/common/multi_file_reader.cpp +14 -0
  21. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +143 -0
  22. package/src/duckdb/src/common/serializer/binary_serializer.cpp +160 -0
  23. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  24. package/src/duckdb/src/common/string_util.cpp +6 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +3 -3
  26. package/src/duckdb/src/common/types.cpp +11 -10
  27. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +4 -4
  28. package/src/duckdb/src/core_functions/function_list.cpp +2 -0
  29. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -1
  30. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +2 -3
  31. package/src/duckdb/src/core_functions/scalar/string/format_bytes.cpp +29 -0
  32. package/src/duckdb/src/execution/aggregate_hashtable.cpp +3 -3
  33. package/src/duckdb/src/execution/index/art/art.cpp +5 -1
  34. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  35. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +2 -2
  36. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +65 -45
  37. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +17 -11
  38. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -39
  39. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +10 -9
  40. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -4
  41. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +6 -21
  42. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +13 -13
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +15 -14
  44. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +3 -2
  45. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +4 -4
  46. package/src/duckdb/src/execution/operator/helper/physical_pragma.cpp +4 -2
  47. package/src/duckdb/src/execution/operator/helper/physical_prepare.cpp +4 -2
  48. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +10 -8
  49. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +4 -3
  50. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +7 -6
  51. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +2 -1
  52. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +4 -2
  53. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +8 -8
  54. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +17 -16
  55. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +12 -9
  56. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +2 -1
  57. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +3 -4
  58. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +5 -5
  59. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +16 -15
  60. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +13 -12
  61. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +12 -10
  62. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +13 -11
  63. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +8 -6
  64. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/order/physical_order.cpp +13 -13
  66. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +8 -8
  67. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +165 -0
  68. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +160 -145
  69. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +11 -26
  70. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +14 -19
  71. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +7 -6
  72. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +18 -30
  73. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +14 -18
  74. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +6 -4
  75. package/src/duckdb/src/execution/operator/scan/physical_dummy_scan.cpp +4 -19
  76. package/src/duckdb/src/execution/operator/scan/physical_empty_result.cpp +3 -2
  77. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +14 -5
  78. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +6 -4
  79. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +3 -19
  80. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +13 -25
  81. package/src/duckdb/src/execution/operator/schema/physical_create_function.cpp +4 -19
  82. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +8 -9
  83. package/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp +4 -19
  84. package/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp +4 -19
  85. package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +4 -19
  86. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +9 -26
  87. package/src/duckdb/src/execution/operator/schema/physical_create_view.cpp +4 -19
  88. package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +4 -19
  89. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +3 -19
  90. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +9 -8
  91. package/src/duckdb/src/execution/operator/set/physical_union.cpp +1 -1
  92. package/src/duckdb/src/execution/partitionable_hashtable.cpp +2 -2
  93. package/src/duckdb/src/execution/physical_operator.cpp +11 -5
  94. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +25 -4
  95. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +2 -1
  96. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +16 -16
  97. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +5 -4
  98. package/src/duckdb/src/function/table/arrow_conversion.cpp +3 -3
  99. package/src/duckdb/src/function/table/copy_csv.cpp +85 -29
  100. package/src/duckdb/src/function/table/read_csv.cpp +17 -11
  101. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +2 -1
  102. package/src/duckdb/src/function/table/system/duckdb_types.cpp +2 -1
  103. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  104. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +7 -1
  105. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +958 -0
  106. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +3 -3
  107. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +16 -4
  108. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/common/exception.hpp +4 -4
  110. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +3 -2
  111. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +44 -0
  112. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +45 -0
  113. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +93 -0
  114. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +92 -0
  115. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +7 -3
  116. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +2 -2
  117. package/src/duckdb/src/include/duckdb/common/set.hpp +2 -1
  118. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -1
  119. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
  121. package/src/duckdb/src/include/duckdb/common/vector.hpp +61 -14
  122. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +15 -0
  123. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +3 -2
  124. package/src/duckdb/src/include/duckdb/execution/executor.hpp +10 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp +2 -2
  127. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +8 -11
  128. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +2 -4
  129. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -7
  130. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -4
  131. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +1 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +2 -5
  133. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -4
  134. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp +2 -4
  135. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_load.hpp +1 -2
  136. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -2
  137. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_pragma.hpp +1 -2
  138. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_prepare.hpp +1 -2
  139. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +2 -4
  140. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reset.hpp +1 -2
  141. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +1 -2
  142. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_transaction.hpp +1 -2
  143. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -4
  144. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -4
  145. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +2 -4
  146. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -2
  147. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +1 -2
  148. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -4
  149. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -4
  150. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -4
  151. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +2 -4
  152. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +2 -4
  153. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -4
  154. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -4
  155. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +68 -0
  156. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +3 -5
  157. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -5
  158. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_delete.hpp +2 -4
  159. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -4
  160. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -4
  161. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -4
  162. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -2
  163. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_dummy_scan.hpp +1 -3
  164. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_empty_result.hpp +1 -2
  165. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +1 -2
  166. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +1 -2
  167. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_alter.hpp +1 -3
  168. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_attach.hpp +1 -3
  169. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_function.hpp +1 -3
  170. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +2 -4
  171. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_schema.hpp +1 -3
  172. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_sequence.hpp +1 -3
  173. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +1 -3
  174. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +2 -5
  175. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_view.hpp +1 -3
  176. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +1 -3
  177. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_drop.hpp +1 -3
  178. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +2 -4
  179. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -3
  180. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +7 -4
  181. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +26 -6
  182. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +6 -6
  183. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +2 -1
  184. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +32 -4
  185. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -2
  186. package/src/duckdb/src/include/duckdb/function/table_function.hpp +0 -1
  187. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  188. package/src/duckdb/src/include/duckdb/main/config.hpp +4 -0
  189. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -3
  190. package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +24 -0
  191. package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +1 -0
  192. package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
  193. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +63 -0
  194. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +16 -3
  195. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +51 -7
  196. package/src/duckdb/src/include/duckdb/parallel/task.hpp +21 -2
  197. package/src/duckdb/src/include/duckdb/parallel/task_counter.hpp +2 -2
  198. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +2 -2
  199. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +1 -0
  200. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +6 -0
  201. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +2 -0
  203. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -0
  204. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +2 -1
  205. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -0
  206. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
  207. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -0
  208. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -0
  209. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +1 -0
  210. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +1 -0
  211. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -0
  212. package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +1 -0
  213. package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +1 -0
  214. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +1 -0
  215. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +1 -0
  216. package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +1 -0
  217. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -0
  218. package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +1 -0
  219. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +1 -1
  220. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  221. package/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +46 -0
  222. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +24 -3
  223. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +46 -1
  224. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +9 -10
  225. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -1
  226. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +2 -2
  227. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -3
  228. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -0
  229. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +1 -1
  230. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +22 -0
  231. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +3 -3
  232. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +2 -2
  233. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -2
  234. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -2
  235. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +9 -34
  236. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +26 -0
  237. package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +25 -0
  238. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +6 -0
  239. package/src/duckdb/src/main/client_context.cpp +1 -0
  240. package/src/duckdb/src/main/client_verify.cpp +5 -0
  241. package/src/duckdb/src/main/config.cpp +4 -0
  242. package/src/duckdb/src/main/database.cpp +22 -34
  243. package/src/duckdb/src/main/database_path_and_type.cpp +23 -0
  244. package/src/duckdb/src/main/extension/extension_load.cpp +19 -15
  245. package/src/duckdb/src/main/relation/join_relation.cpp +2 -1
  246. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -3
  247. package/src/duckdb/src/parallel/event.cpp +1 -1
  248. package/src/duckdb/src/parallel/executor.cpp +39 -3
  249. package/src/duckdb/src/parallel/executor_task.cpp +11 -0
  250. package/src/duckdb/src/parallel/interrupt.cpp +57 -0
  251. package/src/duckdb/src/parallel/pipeline.cpp +49 -6
  252. package/src/duckdb/src/parallel/pipeline_executor.cpp +248 -69
  253. package/src/duckdb/src/parallel/pipeline_initialize_event.cpp +1 -1
  254. package/src/duckdb/src/parallel/task_scheduler.cpp +57 -22
  255. package/src/duckdb/src/parser/base_expression.cpp +6 -0
  256. package/src/duckdb/src/parser/expression/window_expression.cpp +1 -1
  257. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +2 -2
  258. package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
  259. package/src/duckdb/src/parser/result_modifier.cpp +2 -2
  260. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -44
  261. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -3
  262. package/src/duckdb/src/parser/tableref.cpp +1 -1
  263. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -3
  264. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +6 -0
  265. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +4 -1
  266. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +17 -3
  267. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +8 -2
  268. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +7 -0
  269. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +14 -2
  270. package/src/duckdb/src/planner/operator/logical_column_data_get.cpp +11 -0
  271. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -2
  272. package/src/duckdb/src/planner/operator/logical_cteref.cpp +11 -0
  273. package/src/duckdb/src/planner/operator/logical_delete.cpp +10 -0
  274. package/src/duckdb/src/planner/operator/logical_delim_get.cpp +12 -1
  275. package/src/duckdb/src/planner/operator/logical_dummy_scan.cpp +12 -1
  276. package/src/duckdb/src/planner/operator/logical_expression_get.cpp +12 -1
  277. package/src/duckdb/src/planner/operator/logical_get.cpp +10 -4
  278. package/src/duckdb/src/planner/operator/logical_insert.cpp +12 -1
  279. package/src/duckdb/src/planner/operator/logical_pivot.cpp +11 -0
  280. package/src/duckdb/src/planner/operator/logical_projection.cpp +11 -0
  281. package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +11 -0
  282. package/src/duckdb/src/planner/operator/logical_set_operation.cpp +11 -0
  283. package/src/duckdb/src/planner/operator/logical_unnest.cpp +12 -1
  284. package/src/duckdb/src/planner/operator/logical_update.cpp +10 -0
  285. package/src/duckdb/src/planner/operator/logical_window.cpp +11 -0
  286. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  287. package/src/duckdb/src/storage/data_table.cpp +5 -0
  288. package/src/duckdb/src/storage/local_storage.cpp +40 -110
  289. package/src/duckdb/src/storage/optimistic_data_writer.cpp +96 -0
  290. package/src/duckdb/src/storage/partial_block_manager.cpp +73 -9
  291. package/src/duckdb/src/storage/single_file_block_manager.cpp +3 -1
  292. package/src/duckdb/src/storage/standard_buffer_manager.cpp +17 -12
  293. package/src/duckdb/src/storage/statistics/base_statistics.cpp +3 -0
  294. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +90 -82
  295. package/src/duckdb/src/storage/table/column_data.cpp +19 -45
  296. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +7 -7
  297. package/src/duckdb/src/storage/table/column_segment.cpp +1 -1
  298. package/src/duckdb/src/storage/table/list_column_data.cpp +6 -11
  299. package/src/duckdb/src/storage/table/row_group.cpp +13 -14
  300. package/src/duckdb/src/storage/table/row_group_collection.cpp +10 -4
  301. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -10
  302. package/src/duckdb/src/storage/table/struct_column_data.cpp +7 -13
  303. package/src/duckdb/src/storage/table/update_segment.cpp +0 -25
  304. package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -6
  305. package/src/duckdb/src/transaction/commit_state.cpp +4 -4
  306. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
  307. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +20 -0
  308. package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +13 -0
  309. package/src/duckdb/src/verification/statement_verifier.cpp +7 -0
  310. package/src/duckdb/ub_src_common.cpp +2 -2
  311. package/src/duckdb/ub_src_common_serializer.cpp +4 -2
  312. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  313. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  314. package/src/duckdb/ub_src_main.cpp +2 -0
  315. package/src/duckdb/ub_src_parallel.cpp +2 -0
  316. package/src/duckdb/ub_src_storage.cpp +2 -0
  317. package/src/duckdb/src/common/serializer/enum_serializer.cpp +0 -1180
  318. package/src/duckdb/src/common/vector.cpp +0 -12
  319. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +0 -113
@@ -17,6 +17,7 @@ namespace duckdb {
17
17
 
18
18
  class Binder;
19
19
  struct BoundStatement;
20
+ class ColumnDataCollection;
20
21
  class ExecutionContext;
21
22
 
22
23
  struct LocalFunctionData {
@@ -51,6 +52,24 @@ struct GlobalFunctionData {
51
52
  }
52
53
  };
53
54
 
55
+ struct PreparedBatchData {
56
+ virtual ~PreparedBatchData() {
57
+ }
58
+
59
+ template <class TARGET>
60
+ TARGET &Cast() {
61
+ D_ASSERT(dynamic_cast<TARGET *>(this));
62
+ return (TARGET &)*this;
63
+ }
64
+ template <class TARGET>
65
+ const TARGET &Cast() const {
66
+ D_ASSERT(dynamic_cast<const TARGET *>(this));
67
+ return (const TARGET &)*this;
68
+ }
69
+ };
70
+
71
+ enum class CopyFunctionExecutionMode { REGULAR_COPY_TO_FILE, PARALLEL_COPY_TO_FILE, BATCH_COPY_TO_FILE };
72
+
54
73
  typedef BoundStatement (*copy_to_plan_t)(Binder &binder, CopyStatement &stmt);
55
74
  typedef unique_ptr<FunctionData> (*copy_to_bind_t)(ClientContext &context, CopyInfo &info, vector<string> &names,
56
75
  vector<LogicalType> &sql_types);
@@ -71,15 +90,21 @@ typedef unique_ptr<FunctionData> (*copy_to_deserialize_t)(ClientContext &context
71
90
  typedef unique_ptr<FunctionData> (*copy_from_bind_t)(ClientContext &context, CopyInfo &info,
72
91
  vector<string> &expected_names,
73
92
  vector<LogicalType> &expected_types);
74
- typedef bool (*copy_to_is_parallel_t)(ClientContext &context, FunctionData &bind_data);
93
+ typedef CopyFunctionExecutionMode (*copy_to_execution_mode_t)(bool preserve_insertion_order, bool supports_batch_index);
94
+
95
+ typedef unique_ptr<PreparedBatchData> (*copy_prepare_batch_t)(ClientContext &context, FunctionData &bind_data,
96
+ GlobalFunctionData &gstate,
97
+ unique_ptr<ColumnDataCollection> collection);
98
+ typedef void (*copy_flush_batch_t)(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate,
99
+ PreparedBatchData &batch);
75
100
 
76
101
  class CopyFunction : public Function {
77
102
  public:
78
103
  explicit CopyFunction(string name)
79
104
  : Function(name), plan(nullptr), copy_to_bind(nullptr), copy_to_initialize_local(nullptr),
80
105
  copy_to_initialize_global(nullptr), copy_to_sink(nullptr), copy_to_combine(nullptr),
81
- copy_to_finalize(nullptr), parallel(nullptr), serialize(nullptr), deserialize(nullptr),
82
- copy_from_bind(nullptr) {
106
+ copy_to_finalize(nullptr), execution_mode(nullptr), prepare_batch(nullptr), flush_batch(nullptr),
107
+ serialize(nullptr), deserialize(nullptr), copy_from_bind(nullptr) {
83
108
  }
84
109
 
85
110
  //! Plan rewrite copy function
@@ -91,7 +116,10 @@ public:
91
116
  copy_to_sink_t copy_to_sink;
92
117
  copy_to_combine_t copy_to_combine;
93
118
  copy_to_finalize_t copy_to_finalize;
94
- copy_to_is_parallel_t parallel;
119
+ copy_to_execution_mode_t execution_mode;
120
+
121
+ copy_prepare_batch_t prepare_batch;
122
+ copy_flush_batch_t flush_batch;
95
123
 
96
124
  copy_to_serialize_t serialize;
97
125
  copy_to_deserialize_t deserialize;
@@ -54,6 +54,8 @@ struct WriteCSVData : public BaseCSVData {
54
54
  bool is_simple;
55
55
  //! The size of the CSV file (in bytes) that we buffer before we flush it to disk
56
56
  idx_t flush_size = 4096 * 8;
57
+ //! For each byte whether or not the CSV file requires quotes when containing the byte
58
+ unique_ptr<bool[]> requires_quotes;
57
59
  };
58
60
 
59
61
  struct ColumnInfo {
@@ -97,8 +99,8 @@ struct ReadCSVData : public BaseCSVData {
97
99
  bool single_threaded = false;
98
100
  //! Reader bind data
99
101
  MultiFileReaderBindData reader_bind;
100
- //! If all files are On-Disk file (e.g., not a pipe)
101
- bool file_exists = true;
102
+ //! If any file is a pipe
103
+ bool is_pipe = false;
102
104
  vector<ColumnInfo> column_info;
103
105
 
104
106
  void Initialize(unique_ptr<BufferedCSVReader> &reader) {
@@ -182,7 +182,6 @@ typedef unique_ptr<LocalTableFunctionState> (*table_function_init_local_t)(Execu
182
182
  typedef unique_ptr<BaseStatistics> (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data,
183
183
  column_t column_index);
184
184
  typedef void (*table_function_t)(ClientContext &context, TableFunctionInput &data, DataChunk &output);
185
-
186
185
  typedef OperatorResultType (*table_in_out_function_t)(ExecutionContext &context, TableFunctionInput &data,
187
186
  DataChunk &input, DataChunk &output);
188
187
  typedef OperatorFinalizeResultType (*table_in_out_function_final_t)(ExecutionContext &context, TableFunctionInput &data,
@@ -63,6 +63,8 @@ struct ClientConfig {
63
63
  bool verify_serializer = false;
64
64
  //! Enable the running of optimizers
65
65
  bool enable_optimizer = true;
66
+ //! Enable caching operators
67
+ bool enable_caching_operators = true;
66
68
  //! Force parallelism of small tables, used for testing
67
69
  bool verify_parallelism = false;
68
70
  //! Force index join independent of table cardinality, used for testing
@@ -84,6 +84,8 @@ struct ExtensionOption {
84
84
  struct DBConfigOptions {
85
85
  //! Database file path. May be empty for in-memory mode
86
86
  string database_path;
87
+ //! Database type. If empty, automatically extracted from `database_path`, where a `type:path` syntax is expected
88
+ string database_type;
87
89
  //! Access mode of the database (AUTOMATIC, READ_ONLY or READ_WRITE)
88
90
  AccessMode access_mode = AccessMode::AUTOMATIC;
89
91
  //! Checkpoint when WAL reaches this size (default: 16MB)
@@ -149,6 +151,8 @@ struct DBConfigOptions {
149
151
  DebugInitialize debug_initialize = DebugInitialize::NO_INITIALIZE;
150
152
  //! The set of unrecognized (other) options
151
153
  unordered_map<string, Value> unrecognized_options;
154
+ //! Whether to print bindings when printing the plan (debug mode only)
155
+ static bool debug_print_bindings;
152
156
 
153
157
  bool operator==(const DBConfigOptions &other) const;
154
158
  };
@@ -54,13 +54,11 @@ public:
54
54
 
55
55
  DUCKDB_API bool TryGetCurrentSetting(const std::string &key, Value &result);
56
56
 
57
- //! Get the database extension type from a given path
58
- string ExtractDatabaseType(string &path);
59
57
  unique_ptr<AttachedDatabase> CreateAttachedDatabase(AttachInfo &info, const string &type, AccessMode access_mode);
60
58
 
61
59
  private:
62
60
  void Initialize(const char *path, DBConfig *config);
63
- void CreateDatabase(const string &database_type);
61
+ void CreateMainDatabase();
64
62
 
65
63
  void Configure(DBConfig &config);
66
64
 
@@ -0,0 +1,24 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/main/database_path_and_type.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include <string>
12
+ #include "duckdb/main/config.hpp"
13
+
14
+ namespace duckdb {
15
+
16
+ struct DBPathAndType {
17
+
18
+ //! Parse database extension type and rest of path from combined form (type:path)
19
+ static DBPathAndType Parse(const string &combined_path, const DBConfig &config);
20
+
21
+ const string path;
22
+ const string type;
23
+ };
24
+ } // namespace duckdb
@@ -20,6 +20,7 @@ public:
20
20
  shared_ptr<Relation> left;
21
21
  shared_ptr<Relation> right;
22
22
  SetOperationType setop_type;
23
+ vector<ColumnDefinition> columns;
23
24
 
24
25
  public:
25
26
  unique_ptr<QueryNode> GetQueryNode() override;
@@ -41,7 +41,7 @@ public:
41
41
 
42
42
  void CompleteDependency();
43
43
 
44
- void SetTasks(vector<unique_ptr<Task>> tasks);
44
+ void SetTasks(vector<shared_ptr<Task>> tasks);
45
45
 
46
46
  void InsertEvent(shared_ptr<Event> replacement_event);
47
47
 
@@ -0,0 +1,63 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // src/include/duckdb/parallel/interrupt.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/atomic.hpp"
12
+ #include "duckdb/common/mutex.hpp"
13
+ #include "duckdb/parallel/task.hpp"
14
+ #include <condition_variable>
15
+ #include <memory>
16
+
17
+ namespace duckdb {
18
+
19
+ //! InterruptMode specifies how operators should block/unblock, note that this will happen transparently to the
20
+ //! operator, as the operator only needs to return a BLOCKED result and call the callback using the InterruptState.
21
+ //! NO_INTERRUPTS: No blocking mode is specified, an error will be thrown when the operator blocks. Should only be used
22
+ //! when manually calling operators of which is known they will never block.
23
+ //! TASK: A weak pointer to a task is provided. On the callback, this task will be signalled. If the Task has
24
+ //! been deleted, this callback becomes a NOP. This is the preferred way to await blocked pipelines.
25
+ //! BLOCKING: The caller has blocked awaiting some synchronization primitive to wait for the callback.
26
+ enum class InterruptMode : uint8_t { NO_INTERRUPTS, TASK, BLOCKING };
27
+
28
+ //! Synchronization primitive used to await a callback in InterruptMode::BLOCKING.
29
+ struct InterruptDoneSignalState {
30
+ //! Called by the callback to signal the interrupt is over
31
+ void Signal();
32
+ //! Await the callback signalling the interrupt is over
33
+ void Await();
34
+
35
+ protected:
36
+ mutex lock;
37
+ std::condition_variable cv;
38
+ bool done = false;
39
+ };
40
+
41
+ //! State required to make the callback after some asynchronous operation within an operator source / sink.
42
+ class InterruptState {
43
+ public:
44
+ //! Default interrupt state will be set to InterruptMode::NO_INTERRUPTS and throw an error on use of Callback()
45
+ InterruptState();
46
+ //! Register the task to be interrupted and set mode to InterruptMode::TASK, the preferred way to handle interrupts
47
+ InterruptState(weak_ptr<Task> task);
48
+ //! Register signal state and set mode to InterruptMode::BLOCKING, used for code paths without Task.
49
+ InterruptState(weak_ptr<InterruptDoneSignalState> done_signal);
50
+
51
+ //! Perform the callback to indicate the Interrupt is over
52
+ DUCKDB_API void Callback() const;
53
+
54
+ protected:
55
+ //! Current interrupt mode
56
+ InterruptMode mode;
57
+ //! Task ptr for InterruptMode::TASK
58
+ weak_ptr<Task> current_task;
59
+ //! Signal state for InterruptMode::BLOCKING
60
+ weak_ptr<InterruptDoneSignalState> signal_state;
61
+ };
62
+
63
+ } // namespace duckdb
@@ -9,6 +9,8 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/atomic.hpp"
12
+ #include "duckdb/common/unordered_set.hpp"
13
+ #include "duckdb/common/set.hpp"
12
14
  #include "duckdb/execution/physical_operator.hpp"
13
15
  #include "duckdb/function/table_function.hpp"
14
16
  #include "duckdb/parallel/task_scheduler.hpp"
@@ -64,9 +66,7 @@ public:
64
66
  void Reset();
65
67
  void ResetSink();
66
68
  void ResetSource(bool force);
67
- void ClearSource() {
68
- source_state.reset();
69
- }
69
+ void ClearSource();
70
70
  void Schedule(shared_ptr<Event> &event);
71
71
 
72
72
  //! Finalize this pipeline
@@ -94,6 +94,12 @@ public:
94
94
  //! Returns whether any of the operators in the pipeline care about preserving order
95
95
  bool IsOrderDependent() const;
96
96
 
97
+ //! Registers a new batch index for a pipeline executor - returns the current minimum batch index
98
+ idx_t RegisterNewBatchIndex();
99
+
100
+ //! Updates the batch index of a pipeline (and returns the new minimum batch index)
101
+ idx_t UpdateBatchIndex(idx_t old_index, idx_t new_index);
102
+
97
103
  private:
98
104
  //! Whether or not the pipeline has been readied
99
105
  bool ready;
@@ -116,6 +122,13 @@ private:
116
122
 
117
123
  //! The base batch index of this pipeline
118
124
  idx_t base_batch_index = 0;
125
+ //! Lock for accessing the set of batch indexes
126
+ mutex batch_lock;
127
+ //! The set of batch indexes that are currently being processed
128
+ //! Despite batch indexes being unique - this is a multiset
129
+ //! The reason is that when we start a new pipeline we insert the current minimum batch index as a placeholder
130
+ //! Which leads to duplicate entries in the set of active batch indexes
131
+ multiset<idx_t> batch_indexes;
119
132
 
120
133
  private:
121
134
  void ScheduleSequentialTask(shared_ptr<Event> &event);
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/types/data_chunk.hpp"
12
+ #include "duckdb/parallel/interrupt.hpp"
12
13
  #include "duckdb/parallel/pipeline.hpp"
13
14
  #include "duckdb/execution/physical_operator.hpp"
14
15
  #include "duckdb/parallel/thread_context.hpp"
@@ -20,16 +21,27 @@
20
21
  namespace duckdb {
21
22
  class Executor;
22
23
 
24
+ //! The result of executing a PipelineExecutor
25
+ enum class PipelineExecuteResult {
26
+ //! PipelineExecutor is fully executed: the source is completely exhausted
27
+ FINISHED,
28
+ //! PipelineExecutor is not yet fully executed and can be called again immediately
29
+ NOT_FINISHED,
30
+ //! The PipelineExecutor was interrupted and should not be called again until the interrupt is handled as specified
31
+ //! in the InterruptMode
32
+ INTERRUPTED
33
+ };
34
+
23
35
  //! The Pipeline class represents an execution pipeline
24
36
  class PipelineExecutor {
25
37
  public:
26
38
  PipelineExecutor(ClientContext &context, Pipeline &pipeline);
27
39
 
28
40
  //! Fully execute a pipeline with a source and a sink until the source is completely exhausted
29
- void Execute();
30
- //! Execute a pipeline with a source and a sink until finished, or until max_chunks have been processed
41
+ PipelineExecuteResult Execute();
42
+ //! Execute a pipeline with a source and a sink until finished, or until max_chunks were processed from the source
31
43
  //! Returns true if execution is finished, false if Execute should be called again
32
- bool Execute(idx_t max_chunks);
44
+ PipelineExecuteResult Execute(idx_t max_chunks);
33
45
 
34
46
  //! Push a single input DataChunk into the pipeline.
35
47
  //! Returns either OperatorResultType::NEED_MORE_INPUT or OperatorResultType::FINISHED
@@ -48,6 +60,9 @@ public:
48
60
  //! This flushes profiler states
49
61
  void PullFinalize();
50
62
 
63
+ //! Registers the task in the interrupt_state to allow Source/Sink operators to block the task
64
+ void SetTaskForInterrupts(weak_ptr<Task> current_task);
65
+
51
66
  private:
52
67
  //! The pipeline to process
53
68
  Pipeline &pipeline;
@@ -65,6 +80,8 @@ private:
65
80
  unique_ptr<LocalSourceState> local_source_state;
66
81
  //! The local sink state (if any)
67
82
  unique_ptr<LocalSinkState> local_sink_state;
83
+ //! The interrupt state, holding required information for sink/source operators to block
84
+ InterruptState interrupt_state;
68
85
 
69
86
  //! The final chunk used for moving data into the sink
70
87
  DataChunk final_chunk;
@@ -79,28 +96,55 @@ private:
79
96
  //! Whether or not this pipeline requires keeping track of the batch index of the source
80
97
  bool requires_batch_index = false;
81
98
 
99
+ //! Source has indicated it is exhausted
100
+ bool exhausted_source = false;
101
+ //! Flushing of intermediate operators has started
102
+ bool started_flushing = false;
103
+ //! Flushing of caching operators is done
104
+ bool done_flushing = false;
105
+
106
+ //! This flag is set when the pipeline gets interrupted by the Sink -> the final_chunk should be re-sink-ed.
107
+ bool remaining_sink_chunk = false;
108
+
109
+ //! Current operator being flushed
110
+ idx_t flushing_idx;
111
+ //! Whether the current flushing_idx should be flushed: this needs to be stored to make flushing code re-entrant
112
+ bool should_flush_current_idx = true;
113
+
82
114
  private:
83
115
  void StartOperator(PhysicalOperator &op);
84
116
  void EndOperator(PhysicalOperator &op, optional_ptr<DataChunk> chunk);
85
117
 
86
118
  //! Reset the operator index to the first operator
87
119
  void GoToSource(idx_t &current_idx, idx_t initial_idx);
88
- void FetchFromSource(DataChunk &result);
120
+ SourceResultType FetchFromSource(DataChunk &result);
89
121
 
90
122
  void FinishProcessing(int32_t operator_idx = -1);
91
123
  bool IsFinished();
92
124
 
125
+ //! Wrappers for sink/source calls to respective operators
126
+ SourceResultType GetData(DataChunk &chunk, OperatorSourceInput &input);
127
+ SinkResultType Sink(DataChunk &chunk, OperatorSinkInput &input);
128
+
93
129
  OperatorResultType ExecutePushInternal(DataChunk &input, idx_t initial_idx = 0);
94
130
  //! Pushes a chunk through the pipeline and returns a single result chunk
95
131
  //! Returns whether or not a new input chunk is needed, or whether or not we are finished
96
132
  OperatorResultType Execute(DataChunk &input, DataChunk &result, idx_t initial_index = 0);
97
133
 
98
- //! FlushCachedOperators methods push/pull any remaining cached results through the pipeline
99
- void FlushCachingOperatorsPull(DataChunk &result);
100
- void FlushCachingOperatorsPush();
134
+ //! Tries to flush all state from intermediate operators. Will return true if all state is flushed, false in the
135
+ //! case of a blocked sink.
136
+ bool TryFlushCachingOperators();
101
137
 
102
138
  static bool CanCacheType(const LogicalType &type);
103
139
  void CacheChunk(DataChunk &input, idx_t operator_idx);
140
+
141
+ #ifdef DUCKDB_DEBUG_ASYNC_SINK_SOURCE
142
+ //! Debugging state: number of times blocked
143
+ int debug_blocked_sink_count = 0;
144
+ int debug_blocked_source_count = 0;
145
+ //! Number of times the Sink/Source will block before actually returning data
146
+ int debug_blocked_target_count = 1;
147
+ #endif
104
148
  };
105
149
 
106
150
  } // namespace duckdb
@@ -13,13 +13,16 @@
13
13
  namespace duckdb {
14
14
  class ClientContext;
15
15
  class Executor;
16
+ class Task;
17
+ class DatabaseInstance;
18
+ struct ProducerToken;
16
19
 
17
20
  enum class TaskExecutionMode : uint8_t { PROCESS_ALL, PROCESS_PARTIAL };
18
21
 
19
- enum class TaskExecutionResult : uint8_t { TASK_FINISHED, TASK_NOT_FINISHED, TASK_ERROR };
22
+ enum class TaskExecutionResult : uint8_t { TASK_FINISHED, TASK_NOT_FINISHED, TASK_ERROR, TASK_BLOCKED };
20
23
 
21
24
  //! Generic parallel task
22
- class Task {
25
+ class Task : public std::enable_shared_from_this<Task> {
23
26
  public:
24
27
  virtual ~Task() {
25
28
  }
@@ -28,7 +31,20 @@ public:
28
31
  //! If mode is PROCESS_ALL, Execute should always finish processing and return TASK_FINISHED
29
32
  //! If mode is PROCESS_PARTIAL, Execute can return TASK_NOT_FINISHED, in which case Execute will be called again
30
33
  //! In case of an error, TASK_ERROR is returned
34
+ //! In case the task has interrupted, BLOCKED is returned.
31
35
  virtual TaskExecutionResult Execute(TaskExecutionMode mode) = 0;
36
+
37
+ //! Descheduling a task ensures the task is not executed, but remains available for rescheduling as long as
38
+ //! required, generally until some code in an operator calls the InterruptState::Callback() method of a state of the
39
+ //! InterruptMode::TASK mode.
40
+ virtual void Deschedule() {
41
+ throw InternalException("Cannot deschedule task of base Task class");
42
+ };
43
+
44
+ //! Ensures a task is rescheduled to the correct queue
45
+ virtual void Reschedule() {
46
+ throw InternalException("Cannot reschedule task of base Task class");
47
+ }
32
48
  };
33
49
 
34
50
  //! Execute a task within an executor, including exception handling
@@ -39,6 +55,9 @@ public:
39
55
  ExecutorTask(ClientContext &context);
40
56
  virtual ~ExecutorTask();
41
57
 
58
+ void Deschedule() override;
59
+ void Reschedule() override;
60
+
42
61
  Executor &executor;
43
62
 
44
63
  public:
@@ -18,7 +18,7 @@ public:
18
18
  : scheduler(scheduler_p), token(scheduler_p.CreateProducer()), task_count(0), tasks_completed(0) {
19
19
  }
20
20
 
21
- virtual void AddTask(unique_ptr<Task> task) {
21
+ virtual void AddTask(shared_ptr<Task> task) {
22
22
  ++task_count;
23
23
  scheduler.ScheduleTask(*token, std::move(task));
24
24
  }
@@ -29,7 +29,7 @@ public:
29
29
 
30
30
  virtual void Finish() {
31
31
  while (tasks_completed < task_count) {
32
- unique_ptr<Task> task;
32
+ shared_ptr<Task> task;
33
33
  if (scheduler.GetTaskFromProducer(*token, task)) {
34
34
  task->Execute();
35
35
  task.reset();
@@ -47,9 +47,9 @@ public:
47
47
 
48
48
  unique_ptr<ProducerToken> CreateProducer();
49
49
  //! Schedule a task to be executed by the task scheduler
50
- void ScheduleTask(ProducerToken &producer, unique_ptr<Task> task);
50
+ void ScheduleTask(ProducerToken &producer, shared_ptr<Task> task);
51
51
  //! Fetches a task from a specific producer, returns true if successful or false if no tasks were available
52
- bool GetTaskFromProducer(ProducerToken &token, unique_ptr<Task> &task);
52
+ bool GetTaskFromProducer(ProducerToken &token, shared_ptr<Task> &task);
53
53
  //! Run tasks forever until "marker" is set to false, "marker" must remain valid until the thread is joined
54
54
  void ExecuteForever(atomic<bool> *marker);
55
55
  //! Run tasks until `marker` is set to false, `max_tasks` have been completed, or until there are no more tasks
@@ -17,6 +17,7 @@ namespace duckdb {
17
17
 
18
18
  enum class SampleMethod : uint8_t { SYSTEM_SAMPLE = 0, BERNOULLI_SAMPLE = 1, RESERVOIR_SAMPLE = 2 };
19
19
 
20
+ // **DEPRECATED**: Use EnumUtil directly instead.
20
21
  string SampleMethodToString(SampleMethod method);
21
22
 
22
23
  struct SampleOptions {
@@ -9,6 +9,8 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/to_string.hpp"
13
+
12
14
  #include <functional>
13
15
 
14
16
  namespace duckdb {
@@ -23,6 +25,10 @@ struct ColumnBinding {
23
25
  ColumnBinding(idx_t table, idx_t column) : table_index(table), column_index(column) {
24
26
  }
25
27
 
28
+ string ToString() const {
29
+ return "#[" + to_string(table_index) + "." + to_string(column_index) + "]";
30
+ }
31
+
26
32
  bool operator==(const ColumnBinding &rhs) const {
27
33
  return table_index == rhs.table_index && column_index == rhs.column_index;
28
34
  }
@@ -42,6 +42,7 @@ public:
42
42
  }
43
43
 
44
44
  string ToString() const override;
45
+ string GetName() const override;
45
46
 
46
47
  bool Equals(const BaseExpression *other) const override;
47
48
  hash_t Hash() const override;
@@ -59,6 +59,8 @@ protected:
59
59
 
60
60
  idx_t TryBindGroup(ParsedExpression &expr, idx_t depth);
61
61
  BindResult BindGroup(ParsedExpression &expr, idx_t depth, idx_t group_index);
62
+
63
+ bool QualifyColumnAlias(const ColumnRefExpression &colref) override;
62
64
  };
63
65
 
64
66
  } // namespace duckdb
@@ -101,6 +101,8 @@ public:
101
101
  static bool ContainsType(const LogicalType &type, LogicalTypeId target);
102
102
  static LogicalType ExchangeType(const LogicalType &type, LogicalTypeId target, LogicalType new_type);
103
103
 
104
+ virtual bool QualifyColumnAlias(const ColumnRefExpression &colref);
105
+
104
106
  //! Bind the given expresion. Unlike Bind(), this does *not* mute the given ParsedExpression.
105
107
  //! Exposed to be used from sub-binders that aren't subclasses of ExpressionBinder.
106
108
  virtual BindResult BindExpression(unique_ptr<ParsedExpression> &expr_ptr, idx_t depth,
@@ -35,7 +35,7 @@ public:
35
35
  //! The set of grouping sets (optional).
36
36
  vector<GroupingSet> grouping_sets;
37
37
  //! The list of grouping function calls (optional)
38
- vector<vector<idx_t>> grouping_functions;
38
+ vector<unsafe_vector<idx_t>> grouping_functions;
39
39
  //! Group statistics (optional)
40
40
  vector<unique_ptr<BaseStatistics>> group_stats;
41
41
 
@@ -47,6 +47,7 @@ public:
47
47
  static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
48
48
  idx_t EstimateCardinality(ClientContext &context) override;
49
49
  vector<idx_t> GetTableIndex() const override;
50
+ string GetName() const override;
50
51
 
51
52
  protected:
52
53
  void ResolveTypes() override;
@@ -34,6 +34,7 @@ public:
34
34
  void Serialize(FieldWriter &writer) const override;
35
35
  static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
36
36
  vector<idx_t> GetTableIndex() const override;
37
+ string GetName() const override;
37
38
 
38
39
  protected:
39
40
  void ResolveTypes() override {
@@ -41,6 +41,7 @@ public:
41
41
  void Serialize(FieldWriter &writer) const override;
42
42
  static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
43
43
  vector<idx_t> GetTableIndex() const override;
44
+ string GetName() const override;
44
45
 
45
46
  protected:
46
47
  void ResolveTypes() override {
@@ -29,6 +29,7 @@ public:
29
29
  static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
30
30
  idx_t EstimateCardinality(ClientContext &context) override;
31
31
  vector<idx_t> GetTableIndex() const override;
32
+ string GetName() const override;
32
33
 
33
34
  protected:
34
35
  vector<ColumnBinding> GetColumnBindings() override;
@@ -36,6 +36,7 @@ public:
36
36
  void Serialize(FieldWriter &writer) const override;
37
37
  static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
38
38
  vector<idx_t> GetTableIndex() const override;
39
+ string GetName() const override;
39
40
 
40
41
  protected:
41
42
  void ResolveTypes() override {
@@ -35,6 +35,7 @@ public:
35
35
  void Serialize(FieldWriter &writer) const override;
36
36
  static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
37
37
  vector<idx_t> GetTableIndex() const override;
38
+ string GetName() const override;
38
39
 
39
40
  protected:
40
41
  void ResolveTypes() override {
@@ -41,6 +41,7 @@ public:
41
41
  return expressions.size();
42
42
  }
43
43
  vector<idx_t> GetTableIndex() const override;
44
+ string GetName() const override;
44
45
 
45
46
  protected:
46
47
  void ResolveTypes() override {
@@ -69,5 +69,6 @@ protected:
69
69
 
70
70
  idx_t EstimateCardinality(ClientContext &context) override;
71
71
  vector<idx_t> GetTableIndex() const override;
72
+ string GetName() const override;
72
73
  };
73
74
  } // namespace duckdb
@@ -29,6 +29,7 @@ public:
29
29
  void Serialize(FieldWriter &writer) const override;
30
30
  static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
31
31
  vector<idx_t> GetTableIndex() const override;
32
+ string GetName() const override;
32
33
 
33
34
  protected:
34
35
  void ResolveTypes() override;
@@ -27,6 +27,7 @@ public:
27
27
  void Serialize(FieldWriter &writer) const override;
28
28
  static unique_ptr<LogicalOperator> Deserialize(LogicalDeserializationState &state, FieldReader &reader);
29
29
  vector<idx_t> GetTableIndex() const override;
30
+ string GetName() const override;
30
31
 
31
32
  protected:
32
33
  void ResolveTypes() override;