duckdb 0.7.2-dev2820.0 → 0.7.2-dev2995.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (254) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-datepart.cpp +55 -1
  4. package/src/duckdb/extension/parquet/parquet-extension.cpp +5 -4
  5. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +18 -7
  6. package/src/duckdb/src/catalog/default/default_functions.cpp +2 -0
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -3
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +2 -2
  9. package/src/duckdb/src/common/local_file_system.cpp +1 -3
  10. package/src/duckdb/src/common/multi_file_reader.cpp +11 -8
  11. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  12. package/src/duckdb/src/common/string_util.cpp +6 -1
  13. package/src/duckdb/src/core_functions/function_list.cpp +2 -0
  14. package/src/duckdb/src/core_functions/scalar/string/format_bytes.cpp +29 -0
  15. package/src/duckdb/src/execution/index/art/art.cpp +5 -1
  16. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +62 -43
  17. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +17 -11
  18. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -39
  19. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +10 -9
  20. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -4
  21. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +6 -21
  22. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +13 -13
  23. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +15 -14
  24. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +3 -2
  25. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +4 -4
  26. package/src/duckdb/src/execution/operator/helper/physical_pragma.cpp +4 -2
  27. package/src/duckdb/src/execution/operator/helper/physical_prepare.cpp +4 -2
  28. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +10 -8
  29. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +4 -3
  30. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +7 -6
  31. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +4 -2
  32. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +8 -8
  33. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +17 -16
  34. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +10 -8
  35. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +3 -4
  36. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +5 -5
  37. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +16 -15
  38. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +13 -12
  39. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +12 -10
  40. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +13 -11
  41. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +8 -6
  42. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  43. package/src/duckdb/src/execution/operator/order/physical_order.cpp +13 -13
  44. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +8 -8
  45. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +160 -145
  46. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +10 -25
  47. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +14 -19
  48. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +7 -6
  49. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +18 -30
  50. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +14 -18
  51. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +6 -4
  52. package/src/duckdb/src/execution/operator/scan/physical_dummy_scan.cpp +4 -19
  53. package/src/duckdb/src/execution/operator/scan/physical_empty_result.cpp +3 -2
  54. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +14 -5
  55. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +6 -4
  56. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +3 -19
  57. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +4 -18
  58. package/src/duckdb/src/execution/operator/schema/physical_create_function.cpp +4 -19
  59. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +8 -9
  60. package/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp +4 -19
  61. package/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp +4 -19
  62. package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +4 -19
  63. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +20 -28
  64. package/src/duckdb/src/execution/operator/schema/physical_create_view.cpp +4 -19
  65. package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +4 -19
  66. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +3 -19
  67. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +9 -8
  68. package/src/duckdb/src/execution/operator/set/physical_union.cpp +1 -1
  69. package/src/duckdb/src/execution/physical_operator.cpp +11 -5
  70. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +16 -16
  71. package/src/duckdb/src/function/table/arrow_conversion.cpp +3 -3
  72. package/src/duckdb/src/function/table/read_csv.cpp +7 -4
  73. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  74. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +7 -1
  75. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +16 -4
  76. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -4
  77. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +45 -0
  78. package/src/duckdb/src/include/duckdb/common/set.hpp +2 -1
  79. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +15 -0
  80. package/src/duckdb/src/include/duckdb/execution/executor.hpp +10 -1
  81. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -8
  82. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +2 -4
  83. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -7
  84. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -4
  85. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +1 -2
  86. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +2 -5
  87. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -4
  88. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp +2 -4
  89. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_load.hpp +1 -2
  90. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -2
  91. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_pragma.hpp +1 -2
  92. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_prepare.hpp +1 -2
  93. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +2 -4
  94. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reset.hpp +1 -2
  95. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +1 -2
  96. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_transaction.hpp +1 -2
  97. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -4
  98. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -4
  99. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +2 -4
  100. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -2
  101. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +1 -2
  102. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -4
  103. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -4
  104. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -4
  105. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +2 -4
  106. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +2 -4
  107. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -4
  108. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -4
  109. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +3 -5
  110. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -5
  111. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_delete.hpp +2 -4
  112. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -4
  113. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -4
  114. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -4
  115. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -2
  116. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_dummy_scan.hpp +1 -3
  117. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_empty_result.hpp +1 -2
  118. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +1 -2
  119. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +1 -2
  120. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_alter.hpp +1 -3
  121. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_attach.hpp +1 -3
  122. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_function.hpp +1 -3
  123. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +2 -4
  124. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_schema.hpp +1 -3
  125. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_sequence.hpp +1 -3
  126. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +1 -3
  127. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +6 -5
  128. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_view.hpp +1 -3
  129. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +1 -3
  130. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_drop.hpp +1 -3
  131. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +2 -4
  132. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +7 -4
  133. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +26 -6
  134. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +5 -5
  135. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +2 -1
  136. package/src/duckdb/src/include/duckdb/function/table_function.hpp +0 -1
  137. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  138. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  139. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
  140. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
  141. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +0 -2
  142. package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
  143. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +63 -0
  144. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +16 -3
  145. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +51 -7
  146. package/src/duckdb/src/include/duckdb/parallel/task.hpp +21 -2
  147. package/src/duckdb/src/include/duckdb/parallel/task_counter.hpp +2 -2
  148. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +2 -2
  149. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +3 -0
  150. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -1
  151. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +6 -0
  152. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -0
  153. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -0
  154. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -0
  155. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
  156. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -0
  157. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -0
  158. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +1 -0
  159. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +1 -0
  160. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -0
  161. package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +1 -0
  162. package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +1 -0
  163. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +1 -0
  164. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +1 -0
  165. package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +1 -0
  166. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -0
  167. package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +1 -0
  168. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  169. package/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +46 -0
  170. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +24 -3
  171. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +46 -1
  172. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +9 -10
  173. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -1
  174. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +2 -2
  175. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -3
  176. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -0
  177. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +1 -1
  178. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +22 -0
  179. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +3 -3
  180. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +2 -2
  181. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -2
  182. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -2
  183. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +9 -34
  184. package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +25 -0
  185. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +5 -0
  186. package/src/duckdb/src/main/attached_database.cpp +5 -3
  187. package/src/duckdb/src/main/client_verify.cpp +4 -0
  188. package/src/duckdb/src/main/config.cpp +4 -0
  189. package/src/duckdb/src/main/database.cpp +45 -48
  190. package/src/duckdb/src/main/extension/extension_load.cpp +32 -49
  191. package/src/duckdb/src/parallel/event.cpp +1 -1
  192. package/src/duckdb/src/parallel/executor.cpp +39 -3
  193. package/src/duckdb/src/parallel/executor_task.cpp +11 -0
  194. package/src/duckdb/src/parallel/interrupt.cpp +57 -0
  195. package/src/duckdb/src/parallel/pipeline.cpp +49 -6
  196. package/src/duckdb/src/parallel/pipeline_executor.cpp +248 -69
  197. package/src/duckdb/src/parallel/pipeline_initialize_event.cpp +1 -1
  198. package/src/duckdb/src/parallel/task_scheduler.cpp +57 -22
  199. package/src/duckdb/src/parser/base_expression.cpp +6 -0
  200. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +1 -4
  201. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +2 -4
  202. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +43 -24
  203. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +3 -0
  204. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +17 -28
  205. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +17 -3
  206. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +8 -2
  207. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +13 -1
  208. package/src/duckdb/src/planner/operator/logical_column_data_get.cpp +11 -0
  209. package/src/duckdb/src/planner/operator/logical_cteref.cpp +11 -0
  210. package/src/duckdb/src/planner/operator/logical_delete.cpp +10 -0
  211. package/src/duckdb/src/planner/operator/logical_delim_get.cpp +12 -1
  212. package/src/duckdb/src/planner/operator/logical_dummy_scan.cpp +12 -1
  213. package/src/duckdb/src/planner/operator/logical_expression_get.cpp +12 -1
  214. package/src/duckdb/src/planner/operator/logical_get.cpp +10 -4
  215. package/src/duckdb/src/planner/operator/logical_insert.cpp +12 -1
  216. package/src/duckdb/src/planner/operator/logical_pivot.cpp +11 -0
  217. package/src/duckdb/src/planner/operator/logical_projection.cpp +11 -0
  218. package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +11 -0
  219. package/src/duckdb/src/planner/operator/logical_set_operation.cpp +11 -0
  220. package/src/duckdb/src/planner/operator/logical_unnest.cpp +12 -1
  221. package/src/duckdb/src/planner/operator/logical_update.cpp +10 -0
  222. package/src/duckdb/src/planner/operator/logical_window.cpp +11 -0
  223. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  224. package/src/duckdb/src/storage/data_table.cpp +5 -0
  225. package/src/duckdb/src/storage/local_storage.cpp +40 -110
  226. package/src/duckdb/src/storage/optimistic_data_writer.cpp +96 -0
  227. package/src/duckdb/src/storage/partial_block_manager.cpp +73 -9
  228. package/src/duckdb/src/storage/single_file_block_manager.cpp +3 -1
  229. package/src/duckdb/src/storage/standard_buffer_manager.cpp +17 -12
  230. package/src/duckdb/src/storage/statistics/base_statistics.cpp +3 -0
  231. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +90 -82
  232. package/src/duckdb/src/storage/table/column_data.cpp +19 -45
  233. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +7 -7
  234. package/src/duckdb/src/storage/table/column_segment.cpp +1 -1
  235. package/src/duckdb/src/storage/table/list_column_data.cpp +6 -11
  236. package/src/duckdb/src/storage/table/row_group.cpp +13 -14
  237. package/src/duckdb/src/storage/table/row_group_collection.cpp +10 -4
  238. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -10
  239. package/src/duckdb/src/storage/table/struct_column_data.cpp +7 -13
  240. package/src/duckdb/src/storage/table/update_segment.cpp +0 -25
  241. package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -6
  242. package/src/duckdb/src/transaction/commit_state.cpp +4 -4
  243. package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +13 -0
  244. package/src/duckdb/src/verification/statement_verifier.cpp +4 -0
  245. package/src/duckdb/third_party/fmt/format.cc +0 -5
  246. package/src/duckdb/third_party/fmt/include/fmt/core.h +10 -12
  247. package/src/duckdb/third_party/fmt/include/fmt/format-inl.h +2 -33
  248. package/src/duckdb/third_party/fmt/include/fmt/format.h +61 -24
  249. package/src/duckdb/third_party/fmt/include/fmt/printf.h +15 -1
  250. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
  251. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +10735 -10674
  252. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  253. package/src/duckdb/ub_src_parallel.cpp +2 -0
  254. package/src/duckdb/ub_src_storage.cpp +2 -0
@@ -3,6 +3,7 @@
3
3
  #include "duckdb/catalog/catalog.hpp"
4
4
  #include "duckdb/common/types/column/column_data_collection.hpp"
5
5
  #include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
6
+ #include "duckdb/common/string_map_set.hpp"
6
7
 
7
8
  namespace duckdb {
8
9
 
@@ -21,22 +22,22 @@ public:
21
22
  Vector result;
22
23
  idx_t size = 0;
23
24
  idx_t capacity = STANDARD_VECTOR_SIZE;
25
+ string_set_t found_strings;
24
26
  };
25
27
 
26
28
  unique_ptr<GlobalSinkState> PhysicalCreateType::GetGlobalSinkState(ClientContext &context) const {
27
29
  return make_uniq<CreateTypeGlobalState>(context);
28
30
  }
29
31
 
30
- SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
31
- DataChunk &input) const {
32
- auto &gstate = gstate_p.Cast<CreateTypeGlobalState>();
33
- idx_t total_row_count = gstate.size + input.size();
32
+ SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
33
+ auto &gstate = input.global_state.Cast<CreateTypeGlobalState>();
34
+ idx_t total_row_count = gstate.size + chunk.size();
34
35
  if (total_row_count > NumericLimits<uint32_t>::Maximum()) {
35
36
  throw InvalidInputException("Attempted to create ENUM of size %llu, which exceeds the maximum size of %llu",
36
37
  total_row_count, NumericLimits<uint32_t>::Maximum());
37
38
  }
38
39
  UnifiedVectorFormat sdata;
39
- input.data[0].ToUnifiedFormat(input.size(), sdata);
40
+ chunk.data[0].ToUnifiedFormat(chunk.size(), sdata);
40
41
 
41
42
  if (total_row_count > gstate.capacity) {
42
43
  // We must resize our result vector
@@ -47,13 +48,20 @@ SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkSta
47
48
  auto src_ptr = (string_t *)sdata.data;
48
49
  auto result_ptr = FlatVector::GetData<string_t>(gstate.result);
49
50
  // Input vector has NULL value, we just throw an exception
50
- for (idx_t i = 0; i < input.size(); i++) {
51
+ for (idx_t i = 0; i < chunk.size(); i++) {
51
52
  idx_t idx = sdata.sel->get_index(i);
52
53
  if (!sdata.validity.RowIsValid(idx)) {
53
54
  throw InvalidInputException("Attempted to create ENUM type with NULL value!");
54
55
  }
55
- result_ptr[gstate.size++] =
56
- StringVector::AddStringOrBlob(gstate.result, src_ptr[idx].GetData(), src_ptr[idx].GetSize());
56
+ auto str = src_ptr[idx];
57
+ auto entry = gstate.found_strings.find(src_ptr[idx]);
58
+ if (entry != gstate.found_strings.end()) {
59
+ // entry was already found - skip
60
+ continue;
61
+ }
62
+ auto owned_string = StringVector::AddStringOrBlob(gstate.result, str.GetData(), str.GetSize());
63
+ gstate.found_strings.insert(owned_string);
64
+ result_ptr[gstate.size++] = owned_string;
57
65
  }
58
66
  return SinkResultType::NEED_MORE_INPUT;
59
67
  }
@@ -61,25 +69,8 @@ SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkSta
61
69
  //===--------------------------------------------------------------------===//
62
70
  // Source
63
71
  //===--------------------------------------------------------------------===//
64
- class CreateTypeSourceState : public GlobalSourceState {
65
- public:
66
- CreateTypeSourceState() : finished(false) {
67
- }
68
-
69
- bool finished;
70
- };
71
-
72
- unique_ptr<GlobalSourceState> PhysicalCreateType::GetGlobalSourceState(ClientContext &context) const {
73
- return make_uniq<CreateTypeSourceState>();
74
- }
75
-
76
- void PhysicalCreateType::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
77
- LocalSourceState &lstate) const {
78
- auto &state = gstate.Cast<CreateTypeSourceState>();
79
- if (state.finished) {
80
- return;
81
- }
82
-
72
+ SourceResultType PhysicalCreateType::GetData(ExecutionContext &context, DataChunk &chunk,
73
+ OperatorSourceInput &input) const {
83
74
  if (IsSink()) {
84
75
  D_ASSERT(info->type == LogicalType::INVALID);
85
76
  auto &g_sink_state = sink_state->Cast<CreateTypeGlobalState>();
@@ -91,7 +82,8 @@ void PhysicalCreateType::GetData(ExecutionContext &context, DataChunk &chunk, Gl
91
82
  D_ASSERT(catalog_entry->type == CatalogType::TYPE_ENTRY);
92
83
  auto &catalog_type = catalog_entry->Cast<TypeCatalogEntry>();
93
84
  EnumType::SetCatalog(info->type, &catalog_type);
94
- state.finished = true;
85
+
86
+ return SourceResultType::FINISHED;
95
87
  }
96
88
 
97
89
  } // namespace duckdb
@@ -6,27 +6,12 @@ namespace duckdb {
6
6
  //===--------------------------------------------------------------------===//
7
7
  // Source
8
8
  //===--------------------------------------------------------------------===//
9
- class CreateViewSourceState : public GlobalSourceState {
10
- public:
11
- CreateViewSourceState() : finished(false) {
12
- }
13
-
14
- bool finished;
15
- };
16
-
17
- unique_ptr<GlobalSourceState> PhysicalCreateView::GetGlobalSourceState(ClientContext &context) const {
18
- return make_uniq<CreateViewSourceState>();
19
- }
20
-
21
- void PhysicalCreateView::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
22
- LocalSourceState &lstate) const {
23
- auto &state = gstate.Cast<CreateViewSourceState>();
24
- if (state.finished) {
25
- return;
26
- }
9
+ SourceResultType PhysicalCreateView::GetData(ExecutionContext &context, DataChunk &chunk,
10
+ OperatorSourceInput &input) const {
27
11
  auto &catalog = Catalog::GetCatalog(context.client, info->catalog);
28
12
  catalog.CreateView(context.client, *info);
29
- state.finished = true;
13
+
14
+ return SourceResultType::FINISHED;
30
15
  }
31
16
 
32
17
  } // namespace duckdb
@@ -11,27 +11,12 @@ namespace duckdb {
11
11
  //===--------------------------------------------------------------------===//
12
12
  // Source
13
13
  //===--------------------------------------------------------------------===//
14
- class DetachSourceState : public GlobalSourceState {
15
- public:
16
- DetachSourceState() : finished(false) {
17
- }
18
-
19
- bool finished;
20
- };
21
-
22
- unique_ptr<GlobalSourceState> PhysicalDetach::GetGlobalSourceState(ClientContext &context) const {
23
- return make_uniq<DetachSourceState>();
24
- }
25
-
26
- void PhysicalDetach::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
27
- LocalSourceState &lstate) const {
28
- auto &state = gstate.Cast<DetachSourceState>();
29
- if (state.finished) {
30
- return;
31
- }
14
+ SourceResultType PhysicalDetach::GetData(ExecutionContext &context, DataChunk &chunk,
15
+ OperatorSourceInput &input) const {
32
16
  auto &db_manager = DatabaseManager::Get(context.client);
33
17
  db_manager.DetachDatabase(context.client, info->name, info->if_not_found);
34
- state.finished = true;
18
+
19
+ return SourceResultType::FINISHED;
35
20
  }
36
21
 
37
22
  } // namespace duckdb
@@ -11,24 +11,7 @@ namespace duckdb {
11
11
  //===--------------------------------------------------------------------===//
12
12
  // Source
13
13
  //===--------------------------------------------------------------------===//
14
- class DropSourceState : public GlobalSourceState {
15
- public:
16
- DropSourceState() : finished(false) {
17
- }
18
-
19
- bool finished;
20
- };
21
-
22
- unique_ptr<GlobalSourceState> PhysicalDrop::GetGlobalSourceState(ClientContext &context) const {
23
- return make_uniq<DropSourceState>();
24
- }
25
-
26
- void PhysicalDrop::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
27
- LocalSourceState &lstate) const {
28
- auto &state = gstate.Cast<DropSourceState>();
29
- if (state.finished) {
30
- return;
31
- }
14
+ SourceResultType PhysicalDrop::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const {
32
15
  switch (info->type) {
33
16
  case CatalogType::PREPARED_STATEMENT: {
34
17
  // DEALLOCATE silently ignores errors
@@ -62,7 +45,8 @@ void PhysicalDrop::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSo
62
45
  break;
63
46
  }
64
47
  }
65
- state.finished = true;
48
+
49
+ return SourceResultType::FINISHED;
66
50
  }
67
51
 
68
52
  } // namespace duckdb
@@ -61,16 +61,15 @@ idx_t PhysicalRecursiveCTE::ProbeHT(DataChunk &chunk, RecursiveCTEState &state)
61
61
  return new_group_count;
62
62
  }
63
63
 
64
- SinkResultType PhysicalRecursiveCTE::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
65
- DataChunk &input) const {
66
- auto &gstate = state.Cast<RecursiveCTEState>();
64
+ SinkResultType PhysicalRecursiveCTE::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
65
+ auto &gstate = input.global_state.Cast<RecursiveCTEState>();
67
66
  if (!union_all) {
68
- idx_t match_count = ProbeHT(input, gstate);
67
+ idx_t match_count = ProbeHT(chunk, gstate);
69
68
  if (match_count > 0) {
70
- gstate.intermediate_table.Append(input);
69
+ gstate.intermediate_table.Append(chunk);
71
70
  }
72
71
  } else {
73
- gstate.intermediate_table.Append(input);
72
+ gstate.intermediate_table.Append(chunk);
74
73
  }
75
74
  return SinkResultType::NEED_MORE_INPUT;
76
75
  }
@@ -78,8 +77,8 @@ SinkResultType PhysicalRecursiveCTE::Sink(ExecutionContext &context, GlobalSinkS
78
77
  //===--------------------------------------------------------------------===//
79
78
  // Source
80
79
  //===--------------------------------------------------------------------===//
81
- void PhysicalRecursiveCTE::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
82
- LocalSourceState &lstate) const {
80
+ SourceResultType PhysicalRecursiveCTE::GetData(ExecutionContext &context, DataChunk &chunk,
81
+ OperatorSourceInput &input) const {
83
82
  auto &gstate = sink_state->Cast<RecursiveCTEState>();
84
83
  if (!gstate.initialized) {
85
84
  gstate.intermediate_table.InitializeScan(gstate.scan_state);
@@ -117,6 +116,8 @@ void PhysicalRecursiveCTE::GetData(ExecutionContext &context, DataChunk &chunk,
117
116
  gstate.intermediate_table.InitializeScan(gstate.scan_state);
118
117
  }
119
118
  }
119
+
120
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
120
121
  }
121
122
 
122
123
  void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context) const {
@@ -28,7 +28,7 @@ void PhysicalUnion::BuildPipelines(Pipeline &current, MetaPipeline &meta_pipelin
28
28
  order_matters = true;
29
29
  }
30
30
  if (sink) {
31
- if (sink->SinkOrderDependent() && !sink->RequiresBatchIndex()) {
31
+ if (sink->SinkOrderDependent() || sink->RequiresBatchIndex()) {
32
32
  order_matters = true;
33
33
  }
34
34
  if (!sink->ParallelSink()) {
@@ -72,8 +72,8 @@ unique_ptr<GlobalSourceState> PhysicalOperator::GetGlobalSourceState(ClientConte
72
72
  }
73
73
 
74
74
  // LCOV_EXCL_START
75
- void PhysicalOperator::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
76
- LocalSourceState &lstate) const {
75
+ SourceResultType PhysicalOperator::GetData(ExecutionContext &context, DataChunk &chunk,
76
+ OperatorSourceInput &input) const {
77
77
  throw InternalException("Calling GetData on a node that is not a source!");
78
78
  }
79
79
 
@@ -91,10 +91,10 @@ double PhysicalOperator::GetProgress(ClientContext &context, GlobalSourceState &
91
91
  // Sink
92
92
  //===--------------------------------------------------------------------===//
93
93
  // LCOV_EXCL_START
94
- SinkResultType PhysicalOperator::Sink(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate,
95
- DataChunk &input) const {
94
+ SinkResultType PhysicalOperator::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
96
95
  throw InternalException("Calling Sink on a node that is not a sink!");
97
96
  }
97
+
98
98
  // LCOV_EXCL_STOP
99
99
 
100
100
  void PhysicalOperator::Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const {
@@ -105,6 +105,9 @@ SinkFinalizeType PhysicalOperator::Finalize(Pipeline &pipeline, Event &event, Cl
105
105
  return SinkFinalizeType::READY;
106
106
  }
107
107
 
108
+ void PhysicalOperator::NextBatch(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p) const {
109
+ }
110
+
108
111
  unique_ptr<LocalSinkState> PhysicalOperator::GetLocalSinkState(ExecutionContext &context) const {
109
112
  return make_uniq<LocalSinkState>();
110
113
  }
@@ -238,7 +241,9 @@ OperatorResultType CachingPhysicalOperator::Execute(ExecutionContext &context, D
238
241
  state.initialized = true;
239
242
  state.can_cache_chunk = true;
240
243
 
241
- if (!context.pipeline || !caching_supported) {
244
+ if (!context.client.config.enable_caching_operators) {
245
+ state.can_cache_chunk = false;
246
+ } else if (!context.pipeline || !caching_supported) {
242
247
  state.can_cache_chunk = false;
243
248
  } else if (!context.pipeline->GetSink()) {
244
249
  // Disabling for pipelines without Sink, i.e. when pulling
@@ -252,6 +257,7 @@ OperatorResultType CachingPhysicalOperator::Execute(ExecutionContext &context, D
252
257
  if (!state.can_cache_chunk) {
253
258
  return child_result;
254
259
  }
260
+ // TODO chunk size of 0 should not result in a cache being created!
255
261
  if (chunk.size() < CACHE_THRESHOLD) {
256
262
  // we have filtered out a significant amount of tuples
257
263
  // add this chunk to the cache and continue
@@ -130,19 +130,18 @@ void RadixPartitionedHashTable::PopulateGroupChunk(DataChunk &group_chunk, DataC
130
130
  group_chunk.Verify();
131
131
  }
132
132
 
133
- void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
134
- DataChunk &groups_input, DataChunk &payload_input,
135
- const vector<idx_t> &filter) const {
136
- auto &llstate = lstate.Cast<RadixHTLocalState>();
137
- auto &gstate = state.Cast<RadixHTGlobalState>();
133
+ void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input,
134
+ DataChunk &payload_input, const vector<idx_t> &filter) const {
135
+ auto &llstate = input.local_state.Cast<RadixHTLocalState>();
136
+ auto &gstate = input.global_state.Cast<RadixHTGlobalState>();
138
137
  D_ASSERT(!gstate.is_finalized);
139
138
 
140
139
  DataChunk &group_chunk = llstate.group_chunk;
141
- PopulateGroupChunk(group_chunk, groups_input);
140
+ PopulateGroupChunk(group_chunk, chunk);
142
141
 
143
142
  // if we have non-combinable aggregates (e.g. string_agg) we cannot keep parallel hash
144
143
  // tables
145
- if (ForceSingleHT(state)) {
144
+ if (ForceSingleHT(input.global_state)) {
146
145
  lock_guard<mutex> glock(gstate.lock);
147
146
  gstate.is_empty = gstate.is_empty && group_chunk.size() == 0;
148
147
  if (gstate.finalized_hts.empty()) {
@@ -304,7 +303,7 @@ private:
304
303
  };
305
304
 
306
305
  void RadixPartitionedHashTable::ScheduleTasks(Executor &executor, const shared_ptr<Event> &event,
307
- GlobalSinkState &state, vector<unique_ptr<Task>> &tasks) const {
306
+ GlobalSinkState &state, vector<shared_ptr<Task>> &tasks) const {
308
307
  auto &gstate = state.Cast<RadixHTGlobalState>();
309
308
  if (!gstate.is_partitioned) {
310
309
  return;
@@ -382,14 +381,14 @@ idx_t RadixPartitionedHashTable::Size(GlobalSinkState &sink_state) const {
382
381
  return count;
383
382
  }
384
383
 
385
- void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSinkState &sink_state,
386
- GlobalSourceState &gsstate, LocalSourceState &lsstate) const {
384
+ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &chunk,
385
+ GlobalSinkState &sink_state, OperatorSourceInput &input) const {
387
386
  auto &gstate = sink_state.Cast<RadixHTGlobalState>();
388
- auto &state = gsstate.Cast<RadixHTGlobalSourceState>();
389
- auto &lstate = lsstate.Cast<RadixHTLocalSourceState>();
387
+ auto &state = input.global_state.Cast<RadixHTGlobalSourceState>();
388
+ auto &lstate = input.local_state.Cast<RadixHTLocalSourceState>();
390
389
  D_ASSERT(gstate.is_finalized);
391
390
  if (state.finished) {
392
- return;
391
+ return SourceResultType::FINISHED;
393
392
  }
394
393
 
395
394
  // special case hack to sort out aggregating from empty intermediates
@@ -421,11 +420,11 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
421
420
  chunk.data[null_groups.size() + op.aggregates.size() + i].Reference(grouping_values[i]);
422
421
  }
423
422
  state.finished = true;
424
- return;
423
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
425
424
  }
426
425
  if (gstate.is_empty) {
427
426
  state.finished = true;
428
- return;
427
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
429
428
  }
430
429
  idx_t elements_found = 0;
431
430
 
@@ -461,7 +460,7 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
461
460
  ht_index = state.ht_index;
462
461
  if (ht_index >= gstate.finalized_hts.size()) {
463
462
  state.finished = true;
464
- return;
463
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
465
464
  }
466
465
  }
467
466
  D_ASSERT(ht_index < gstate.finalized_hts.size());
@@ -511,6 +510,7 @@ void RadixPartitionedHashTable::GetData(ExecutionContext &context, DataChunk &ch
511
510
  for (idx_t i = 0; i < op.grouping_functions.size(); i++) {
512
511
  chunk.data[op.GroupCount() + op.aggregates.size() + i].Reference(grouping_values[i]);
513
512
  }
513
+ return chunk.size() == 0 ? SourceResultType::FINISHED : SourceResultType::HAVE_MORE_OUTPUT;
514
514
  }
515
515
 
516
516
  } // namespace duckdb
@@ -253,6 +253,9 @@ static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets)
253
253
  }
254
254
  auto cptr = cdata + offsets[row_idx];
255
255
  auto str_len = offsets[row_idx + 1] - offsets[row_idx];
256
+ if (str_len > NumericLimits<uint32_t>::Maximum()) { // LCOV_EXCL_START
257
+ throw ConversionException("DuckDB does not support Strings over 4GB");
258
+ } // LCOV_EXCL_STOP
256
259
  strings[row_idx] = string_t(cptr, str_len);
257
260
  }
258
261
  }
@@ -406,9 +409,6 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca
406
409
  auto original_type = arrow_convert_data[col_idx]->variable_sz_type[arrow_convert_idx.variable_sized_index++];
407
410
  auto cdata = (char *)array.buffers[2];
408
411
  if (original_type.first == ArrowVariableSizeType::SUPER_SIZE) {
409
- if (((uint64_t *)array.buffers[1])[array.length] > NumericLimits<uint32_t>::Maximum()) { // LCOV_EXCL_START
410
- throw ConversionException("DuckDB does not support Strings over 4GB");
411
- } // LCOV_EXCL_STOP
412
412
  auto offsets = (uint64_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
413
413
  if (nested_offset != -1) {
414
414
  offsets = (uint64_t *)array.buffers[1] + array.offset + nested_offset;
@@ -497,7 +497,8 @@ bool ParallelCSVGlobalState::Next(ClientContext &context, const ReadCSVData &bin
497
497
  }
498
498
  reader->options.file_path = current_file_path;
499
499
  MultiFileReader::InitializeReader(*reader, bind_data.options.file_options, bind_data.reader_bind,
500
- bind_data.return_types, bind_data.return_names, column_ids, nullptr);
500
+ bind_data.return_types, bind_data.return_names, column_ids, nullptr,
501
+ bind_data.files.front());
501
502
  } else {
502
503
  // update the current reader
503
504
  reader->SetBufferRead(std::move(result));
@@ -660,7 +661,8 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
660
661
  result->names = csv_names;
661
662
  }
662
663
  MultiFileReader::InitializeReader(*result, bind_data.options.file_options, bind_data.reader_bind,
663
- bind_data.return_types, bind_data.return_names, column_ids, nullptr);
664
+ bind_data.return_types, bind_data.return_names, column_ids, nullptr,
665
+ bind_data.files.front());
664
666
  }
665
667
  total_size = result->file_handle->FileSize();
666
668
  return result;
@@ -707,14 +709,15 @@ static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext
707
709
  }
708
710
  }
709
711
  MultiFileReader::InitializeReader(*result->initial_reader, bind_data.options.file_options, bind_data.reader_bind,
710
- bind_data.return_types, bind_data.return_names, input.column_ids, input.filters);
712
+ bind_data.return_types, bind_data.return_names, input.column_ids, input.filters,
713
+ bind_data.files.front());
711
714
  for (auto &reader : bind_data.union_readers) {
712
715
  if (!reader) {
713
716
  continue;
714
717
  }
715
718
  MultiFileReader::InitializeReader(*reader, bind_data.options.file_options, bind_data.reader_bind,
716
719
  bind_data.return_types, bind_data.return_names, input.column_ids,
717
- input.filters);
720
+ input.filters, bind_data.files.front());
718
721
  }
719
722
  result->column_ids = input.column_ids;
720
723
 
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev2820"
2
+ #define DUCKDB_VERSION "0.7.2-dev2995"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "fc797c18cf"
5
+ #define DUCKDB_SOURCE_ID "9b1d80a9ee"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -80,7 +80,7 @@ public:
80
80
  virtual unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id) = 0;
81
81
 
82
82
  //! Serialize the meta information of the TableCatalogEntry a serializer
83
- virtual void Serialize(Serializer &serializer) const;
83
+ void Serialize(Serializer &serializer) const;
84
84
  //! Deserializes to a CreateTableInfo
85
85
  static unique_ptr<CreateTableInfo> Deserialize(Deserializer &source, ClientContext &context);
86
86
 
@@ -103,6 +103,12 @@ public:
103
103
  virtual TableStorageInfo GetStorageInfo(ClientContext &context) = 0;
104
104
 
105
105
  protected:
106
+ // This is used to serialize the entry by #Serialize(Serializer& ). It is virtual to allow
107
+ // Custom catalog implementations to override the default implementation. We can not make
108
+ // The Serialize method itself virtual as the logic is tightly coupled to the static
109
+ // Deserialize method.
110
+ virtual CreateTableInfo GetTableInfoForSerialization() const;
111
+
106
112
  //! A list of columns that are part of this table
107
113
  ColumnList columns;
108
114
  //! A list of constraints that are part of this table
@@ -14,25 +14,37 @@ namespace duckdb {
14
14
 
15
15
  //! The OperatorResultType is used to indicate how data should flow around a regular (i.e. non-sink and non-source)
16
16
  //! physical operator
17
- //! There are three possible results:
17
+ //! There are four possible results:
18
18
  //! NEED_MORE_INPUT means the operator is done with the current input and can consume more input if available
19
19
  //! If there is more input the operator will be called with more input, otherwise the operator will not be called again.
20
20
  //! HAVE_MORE_OUTPUT means the operator is not finished yet with the current input.
21
21
  //! The operator will be called again with the same input.
22
22
  //! FINISHED means the operator has finished the entire pipeline and no more processing is necessary.
23
23
  //! The operator will not be called again, and neither will any other operators in this pipeline.
24
- enum class OperatorResultType : uint8_t { NEED_MORE_INPUT, HAVE_MORE_OUTPUT, FINISHED };
24
+ //! BLOCKED means the operator does not want to be called right now. e.g. because its currently doing async I/O. The
25
+ //! operator has set the interrupt state and the caller is expected to handle it. Note that intermediate operators
26
+ //! should currently not emit this state.
27
+ enum class OperatorResultType : uint8_t { NEED_MORE_INPUT, HAVE_MORE_OUTPUT, FINISHED, BLOCKED };
25
28
 
26
29
  //! OperatorFinalizeResultType is used to indicate whether operators have finished flushing their cached results.
27
30
  //! FINISHED means the operator has flushed all cached data.
28
31
  //! HAVE_MORE_OUTPUT means the operator contains more results.
29
32
  enum class OperatorFinalizeResultType : uint8_t { HAVE_MORE_OUTPUT, FINISHED };
30
33
 
34
+ //! SourceResultType is used to indicate the result of data being pulled out of a source.
35
+ //! There are three possible results:
36
+ //! HAVE_MORE_OUTPUT means the source has more output, this flag should only be set when data is returned, empty results
37
+ //! should only occur for the FINISHED and BLOCKED flags
38
+ //! FINISHED means the source is exhausted
39
+ //! BLOCKED means the source is currently blocked, e.g. by some async I/O
40
+ enum class SourceResultType : uint8_t { HAVE_MORE_OUTPUT, FINISHED, BLOCKED };
41
+
31
42
  //! The SinkResultType is used to indicate the result of data flowing into a sink
32
- //! There are two possible results:
43
+ //! There are three possible results:
33
44
  //! NEED_MORE_INPUT means the sink needs more input
34
45
  //! FINISHED means the sink is finished executing, and more input will not change the result any further
35
- enum class SinkResultType : uint8_t { NEED_MORE_INPUT, FINISHED };
46
+ //! BLOCKED means the sink is currently blocked, e.g. by some async I/O.
47
+ enum class SinkResultType : uint8_t { NEED_MORE_INPUT, FINISHED, BLOCKED };
36
48
 
37
49
  //! The SinkFinalizeType is used to indicate the result of a Finalize call on a sink
38
50
  //! There are two possible results:
@@ -107,7 +107,8 @@ struct MultiFileReader {
107
107
  DUCKDB_API static void CreateMapping(const string &file_name, const vector<LogicalType> &local_types,
108
108
  const vector<string> &local_names, const vector<LogicalType> &global_types,
109
109
  const vector<string> &global_names, const vector<column_t> &global_column_ids,
110
- optional_ptr<TableFilterSet> filters, MultiFileReaderData &reader_data);
110
+ optional_ptr<TableFilterSet> filters, MultiFileReaderData &reader_data,
111
+ const string &initial_file);
111
112
  //! Finalize the reading of a chunk - applying any constants that are required
112
113
  DUCKDB_API static void FinalizeChunk(const MultiFileReaderBindData &bind_data,
113
114
  const MultiFileReaderData &reader_data, DataChunk &chunk);
@@ -156,11 +157,11 @@ struct MultiFileReader {
156
157
  static void InitializeReader(READER_CLASS &reader, const MultiFileReaderOptions &options,
157
158
  const MultiFileReaderBindData &bind_data, const vector<LogicalType> &global_types,
158
159
  const vector<string> &global_names, const vector<column_t> &global_column_ids,
159
- optional_ptr<TableFilterSet> table_filters) {
160
+ optional_ptr<TableFilterSet> table_filters, const string &initial_file) {
160
161
  FinalizeBind(options, bind_data, reader.GetFileName(), reader.GetNames(), global_types, global_names,
161
162
  global_column_ids, reader.reader_data);
162
163
  CreateMapping(reader.GetFileName(), reader.GetTypes(), reader.GetNames(), global_types, global_names,
163
- global_column_ids, table_filters, reader.reader_data);
164
+ global_column_ids, table_filters, reader.reader_data, initial_file);
164
165
  reader.reader_data.filters = table_filters;
165
166
  }
166
167
 
@@ -193,7 +194,7 @@ private:
193
194
  static void CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
194
195
  const vector<string> &local_names, const vector<LogicalType> &global_types,
195
196
  const vector<string> &global_names, const vector<column_t> &global_column_ids,
196
- MultiFileReaderData &reader_data);
197
+ MultiFileReaderData &reader_data, const string &initial_file);
197
198
  };
198
199
 
199
200
  } // namespace duckdb
@@ -0,0 +1,45 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/optional_idx.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/exception.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ class optional_idx {
16
+ static constexpr const idx_t INVALID_INDEX = idx_t(-1);
17
+
18
+ public:
19
+ optional_idx() : index(INVALID_INDEX) {
20
+ }
21
+ optional_idx(idx_t index) : index(index) { // NOLINT: allow implicit conversion from idx_t
22
+ if (index == INVALID_INDEX) {
23
+ throw InternalException("optional_idx cannot be initialized with an invalid index");
24
+ }
25
+ }
26
+
27
+ static optional_idx Invalid() {
28
+ return INVALID_INDEX;
29
+ }
30
+
31
+ bool IsValid() const {
32
+ return index != DConstants::INVALID_INDEX;
33
+ }
34
+ idx_t GetIndex() {
35
+ if (index == INVALID_INDEX) {
36
+ throw InternalException("Attempting to get the index of an optional_idx that is not set");
37
+ }
38
+ return index;
39
+ }
40
+
41
+ private:
42
+ idx_t index;
43
+ };
44
+
45
+ } // namespace duckdb
@@ -11,5 +11,6 @@
11
11
  #include <set>
12
12
 
13
13
  namespace duckdb {
14
+ using std::multiset;
14
15
  using std::set;
15
- }
16
+ } // namespace duckdb
@@ -88,6 +88,21 @@ struct FormatFun {
88
88
  static ScalarFunction GetFunction();
89
89
  };
90
90
 
91
+ struct FormatBytesFun {
92
+ static constexpr const char *Name = "format_bytes";
93
+ static constexpr const char *Parameters = "bytes";
94
+ static constexpr const char *Description = "Converts bytes to a human-readable presentation (e.g. 16000 -> 16KB)";
95
+ static constexpr const char *Example = "format_bytes(1000 * 16)";
96
+
97
+ static ScalarFunction GetFunction();
98
+ };
99
+
100
+ struct FormatreadabledecimalsizeFun {
101
+ using ALIAS = FormatBytesFun;
102
+
103
+ static constexpr const char *Name = "formatReadableDecimalSize";
104
+ };
105
+
91
106
  struct HammingFun {
92
107
  static constexpr const char *Name = "hamming";
93
108
  static constexpr const char *Parameters = "str1,str2";