duckdb 0.7.2-dev2867.0 → 0.7.2-dev3117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-datepart.cpp +5 -1
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +8 -1
  6. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +1 -3
  7. package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -3
  8. package/src/duckdb/extension/json/json_functions/json_transform.cpp +3 -2
  9. package/src/duckdb/extension/parquet/parquet-extension.cpp +9 -7
  10. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +18 -7
  11. package/src/duckdb/src/catalog/default/default_functions.cpp +2 -0
  12. package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -3
  13. package/src/duckdb/src/common/arrow/arrow_converter.cpp +2 -2
  14. package/src/duckdb/src/common/enum_util.cpp +5908 -0
  15. package/src/duckdb/src/common/enums/expression_type.cpp +216 -4
  16. package/src/duckdb/src/common/enums/join_type.cpp +6 -5
  17. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  18. package/src/duckdb/src/common/exception.cpp +1 -1
  19. package/src/duckdb/src/common/exception_format_value.cpp +2 -2
  20. package/src/duckdb/src/common/multi_file_reader.cpp +14 -0
  21. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +143 -0
  22. package/src/duckdb/src/common/serializer/binary_serializer.cpp +160 -0
  23. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  24. package/src/duckdb/src/common/string_util.cpp +6 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +3 -3
  26. package/src/duckdb/src/common/types.cpp +11 -10
  27. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +4 -4
  28. package/src/duckdb/src/core_functions/function_list.cpp +2 -0
  29. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -1
  30. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +2 -3
  31. package/src/duckdb/src/core_functions/scalar/string/format_bytes.cpp +29 -0
  32. package/src/duckdb/src/execution/aggregate_hashtable.cpp +3 -3
  33. package/src/duckdb/src/execution/index/art/art.cpp +5 -1
  34. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  35. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +2 -2
  36. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +65 -45
  37. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +17 -11
  38. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -39
  39. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +10 -9
  40. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -4
  41. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +6 -21
  42. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +13 -13
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +15 -14
  44. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +3 -2
  45. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +4 -4
  46. package/src/duckdb/src/execution/operator/helper/physical_pragma.cpp +4 -2
  47. package/src/duckdb/src/execution/operator/helper/physical_prepare.cpp +4 -2
  48. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +10 -8
  49. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +4 -3
  50. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +7 -6
  51. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +2 -1
  52. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +4 -2
  53. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +8 -8
  54. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +17 -16
  55. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +12 -9
  56. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +2 -1
  57. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +3 -4
  58. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +5 -5
  59. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +16 -15
  60. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +13 -12
  61. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +12 -10
  62. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +13 -11
  63. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +8 -6
  64. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/order/physical_order.cpp +13 -13
  66. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +8 -8
  67. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +165 -0
  68. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +160 -145
  69. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +11 -26
  70. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +14 -19
  71. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +7 -6
  72. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +18 -30
  73. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +14 -18
  74. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +6 -4
  75. package/src/duckdb/src/execution/operator/scan/physical_dummy_scan.cpp +4 -19
  76. package/src/duckdb/src/execution/operator/scan/physical_empty_result.cpp +3 -2
  77. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +14 -5
  78. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +6 -4
  79. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +3 -19
  80. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +13 -25
  81. package/src/duckdb/src/execution/operator/schema/physical_create_function.cpp +4 -19
  82. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +8 -9
  83. package/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp +4 -19
  84. package/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp +4 -19
  85. package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +4 -19
  86. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +9 -26
  87. package/src/duckdb/src/execution/operator/schema/physical_create_view.cpp +4 -19
  88. package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +4 -19
  89. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +3 -19
  90. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +9 -8
  91. package/src/duckdb/src/execution/operator/set/physical_union.cpp +1 -1
  92. package/src/duckdb/src/execution/partitionable_hashtable.cpp +2 -2
  93. package/src/duckdb/src/execution/physical_operator.cpp +11 -5
  94. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +25 -4
  95. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +2 -1
  96. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +16 -16
  97. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +5 -4
  98. package/src/duckdb/src/function/table/arrow_conversion.cpp +3 -3
  99. package/src/duckdb/src/function/table/copy_csv.cpp +85 -29
  100. package/src/duckdb/src/function/table/read_csv.cpp +17 -11
  101. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +2 -1
  102. package/src/duckdb/src/function/table/system/duckdb_types.cpp +2 -1
  103. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  104. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +7 -1
  105. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +958 -0
  106. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +3 -3
  107. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +16 -4
  108. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/common/exception.hpp +4 -4
  110. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +3 -2
  111. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +44 -0
  112. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +45 -0
  113. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +93 -0
  114. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +92 -0
  115. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +7 -3
  116. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +2 -2
  117. package/src/duckdb/src/include/duckdb/common/set.hpp +2 -1
  118. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -1
  119. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
  121. package/src/duckdb/src/include/duckdb/common/vector.hpp +61 -14
  122. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +15 -0
  123. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +3 -2
  124. package/src/duckdb/src/include/duckdb/execution/executor.hpp +10 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp +2 -2
  127. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +8 -11
  128. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +2 -4
  129. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -7
  130. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -4
  131. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +1 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +2 -5
  133. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -4
  134. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp +2 -4
  135. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_load.hpp +1 -2
  136. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -2
  137. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_pragma.hpp +1 -2
  138. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_prepare.hpp +1 -2
  139. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +2 -4
  140. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reset.hpp +1 -2
  141. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +1 -2
  142. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_transaction.hpp +1 -2
  143. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -4
  144. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -4
  145. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +2 -4
  146. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -2
  147. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +1 -2
  148. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -4
  149. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -4
  150. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -4
  151. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +2 -4
  152. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +2 -4
  153. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -4
  154. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -4
  155. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +68 -0
  156. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +3 -5
  157. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -5
  158. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_delete.hpp +2 -4
  159. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -4
  160. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -4
  161. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -4
  162. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -2
  163. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_dummy_scan.hpp +1 -3
  164. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_empty_result.hpp +1 -2
  165. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +1 -2
  166. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +1 -2
  167. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_alter.hpp +1 -3
  168. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_attach.hpp +1 -3
  169. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_function.hpp +1 -3
  170. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +2 -4
  171. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_schema.hpp +1 -3
  172. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_sequence.hpp +1 -3
  173. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +1 -3
  174. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +2 -5
  175. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_view.hpp +1 -3
  176. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +1 -3
  177. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_drop.hpp +1 -3
  178. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +2 -4
  179. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -3
  180. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +7 -4
  181. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +26 -6
  182. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +6 -6
  183. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +2 -1
  184. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +32 -4
  185. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -2
  186. package/src/duckdb/src/include/duckdb/function/table_function.hpp +0 -1
  187. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  188. package/src/duckdb/src/include/duckdb/main/config.hpp +4 -0
  189. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -3
  190. package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +24 -0
  191. package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +1 -0
  192. package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
  193. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +63 -0
  194. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +16 -3
  195. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +51 -7
  196. package/src/duckdb/src/include/duckdb/parallel/task.hpp +21 -2
  197. package/src/duckdb/src/include/duckdb/parallel/task_counter.hpp +2 -2
  198. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +2 -2
  199. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +1 -0
  200. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +6 -0
  201. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +2 -0
  203. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -0
  204. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +2 -1
  205. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -0
  206. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
  207. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -0
  208. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -0
  209. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +1 -0
  210. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +1 -0
  211. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -0
  212. package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +1 -0
  213. package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +1 -0
  214. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +1 -0
  215. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +1 -0
  216. package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +1 -0
  217. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -0
  218. package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +1 -0
  219. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +1 -1
  220. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  221. package/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +46 -0
  222. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +24 -3
  223. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +46 -1
  224. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +9 -10
  225. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -1
  226. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +2 -2
  227. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -3
  228. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -0
  229. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +1 -1
  230. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +22 -0
  231. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +3 -3
  232. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +2 -2
  233. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -2
  234. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -2
  235. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +9 -34
  236. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +26 -0
  237. package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +25 -0
  238. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +6 -0
  239. package/src/duckdb/src/main/client_context.cpp +1 -0
  240. package/src/duckdb/src/main/client_verify.cpp +5 -0
  241. package/src/duckdb/src/main/config.cpp +4 -0
  242. package/src/duckdb/src/main/database.cpp +22 -34
  243. package/src/duckdb/src/main/database_path_and_type.cpp +23 -0
  244. package/src/duckdb/src/main/extension/extension_load.cpp +19 -15
  245. package/src/duckdb/src/main/relation/join_relation.cpp +2 -1
  246. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -3
  247. package/src/duckdb/src/parallel/event.cpp +1 -1
  248. package/src/duckdb/src/parallel/executor.cpp +39 -3
  249. package/src/duckdb/src/parallel/executor_task.cpp +11 -0
  250. package/src/duckdb/src/parallel/interrupt.cpp +57 -0
  251. package/src/duckdb/src/parallel/pipeline.cpp +49 -6
  252. package/src/duckdb/src/parallel/pipeline_executor.cpp +248 -69
  253. package/src/duckdb/src/parallel/pipeline_initialize_event.cpp +1 -1
  254. package/src/duckdb/src/parallel/task_scheduler.cpp +57 -22
  255. package/src/duckdb/src/parser/base_expression.cpp +6 -0
  256. package/src/duckdb/src/parser/expression/window_expression.cpp +1 -1
  257. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +2 -2
  258. package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
  259. package/src/duckdb/src/parser/result_modifier.cpp +2 -2
  260. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -44
  261. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -3
  262. package/src/duckdb/src/parser/tableref.cpp +1 -1
  263. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -3
  264. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +6 -0
  265. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +4 -1
  266. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +17 -3
  267. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +8 -2
  268. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +7 -0
  269. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +14 -2
  270. package/src/duckdb/src/planner/operator/logical_column_data_get.cpp +11 -0
  271. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -2
  272. package/src/duckdb/src/planner/operator/logical_cteref.cpp +11 -0
  273. package/src/duckdb/src/planner/operator/logical_delete.cpp +10 -0
  274. package/src/duckdb/src/planner/operator/logical_delim_get.cpp +12 -1
  275. package/src/duckdb/src/planner/operator/logical_dummy_scan.cpp +12 -1
  276. package/src/duckdb/src/planner/operator/logical_expression_get.cpp +12 -1
  277. package/src/duckdb/src/planner/operator/logical_get.cpp +10 -4
  278. package/src/duckdb/src/planner/operator/logical_insert.cpp +12 -1
  279. package/src/duckdb/src/planner/operator/logical_pivot.cpp +11 -0
  280. package/src/duckdb/src/planner/operator/logical_projection.cpp +11 -0
  281. package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +11 -0
  282. package/src/duckdb/src/planner/operator/logical_set_operation.cpp +11 -0
  283. package/src/duckdb/src/planner/operator/logical_unnest.cpp +12 -1
  284. package/src/duckdb/src/planner/operator/logical_update.cpp +10 -0
  285. package/src/duckdb/src/planner/operator/logical_window.cpp +11 -0
  286. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  287. package/src/duckdb/src/storage/data_table.cpp +5 -0
  288. package/src/duckdb/src/storage/local_storage.cpp +40 -110
  289. package/src/duckdb/src/storage/optimistic_data_writer.cpp +96 -0
  290. package/src/duckdb/src/storage/partial_block_manager.cpp +73 -9
  291. package/src/duckdb/src/storage/single_file_block_manager.cpp +3 -1
  292. package/src/duckdb/src/storage/standard_buffer_manager.cpp +17 -12
  293. package/src/duckdb/src/storage/statistics/base_statistics.cpp +3 -0
  294. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +90 -82
  295. package/src/duckdb/src/storage/table/column_data.cpp +19 -45
  296. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +7 -7
  297. package/src/duckdb/src/storage/table/column_segment.cpp +1 -1
  298. package/src/duckdb/src/storage/table/list_column_data.cpp +6 -11
  299. package/src/duckdb/src/storage/table/row_group.cpp +13 -14
  300. package/src/duckdb/src/storage/table/row_group_collection.cpp +10 -4
  301. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -10
  302. package/src/duckdb/src/storage/table/struct_column_data.cpp +7 -13
  303. package/src/duckdb/src/storage/table/update_segment.cpp +0 -25
  304. package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -6
  305. package/src/duckdb/src/transaction/commit_state.cpp +4 -4
  306. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
  307. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +20 -0
  308. package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +13 -0
  309. package/src/duckdb/src/verification/statement_verifier.cpp +7 -0
  310. package/src/duckdb/ub_src_common.cpp +2 -2
  311. package/src/duckdb/ub_src_common_serializer.cpp +4 -2
  312. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  313. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  314. package/src/duckdb/ub_src_main.cpp +2 -0
  315. package/src/duckdb/ub_src_parallel.cpp +2 -0
  316. package/src/duckdb/ub_src_storage.cpp +2 -0
  317. package/src/duckdb/src/common/serializer/enum_serializer.cpp +0 -1180
  318. package/src/duckdb/src/common/vector.cpp +0 -12
  319. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +0 -113
@@ -22,86 +22,103 @@ unique_ptr<BaseStatistics> ColumnCheckpointState::GetStatistics() {
22
22
  return std::move(global_stats);
23
23
  }
24
24
 
25
- struct PartialBlockForCheckpoint : PartialBlock {
26
- struct PartialColumnSegment {
27
- ColumnData *data;
28
- ColumnSegment *segment;
29
- uint32_t offset_in_block;
30
- };
31
-
32
- public:
33
- PartialBlockForCheckpoint(ColumnData *first_data, ColumnSegment *first_segment, BlockManager &block_manager,
34
- PartialBlockState state)
35
- : PartialBlock(state), first_data(first_data), first_segment(first_segment), block_manager(block_manager) {
36
- }
25
+ PartialBlockForCheckpoint::PartialBlockForCheckpoint(ColumnData &data, ColumnSegment &segment,
26
+ BlockManager &block_manager, PartialBlockState state)
27
+ : PartialBlock(state), block_manager(block_manager), block(segment.block) {
28
+ AddSegmentToTail(data, segment, 0);
29
+ }
37
30
 
38
- ~PartialBlockForCheckpoint() override {
39
- D_ASSERT(IsFlushed() || Exception::UncaughtException());
40
- }
31
+ PartialBlockForCheckpoint::~PartialBlockForCheckpoint() {
32
+ D_ASSERT(IsFlushed() || Exception::UncaughtException());
33
+ }
41
34
 
42
- // We will copy all subsequent segment data into the memory corresponding
43
- // to the first segment. Once the block is full (or checkpoint is complete)
44
- // we'll invoke Flush(), which will cause
45
- // the block to get written to storage (via BlockManger::ConvertToPersistent),
46
- // and all segments to have their references updated
47
- // (via ColumnSegment::ConvertToPersistent)
48
- ColumnData *first_data;
49
- ColumnSegment *first_segment;
50
- BlockManager &block_manager;
51
- vector<PartialColumnSegment> tail_segments;
52
-
53
- private:
54
- struct UninitializedRegion {
55
- idx_t start;
56
- idx_t end;
57
- };
58
- vector<UninitializedRegion> uninitialized_regions;
59
-
60
- public:
61
- bool IsFlushed() {
62
- // first_segment is zeroed on Flush
63
- return !first_segment;
64
- }
35
+ bool PartialBlockForCheckpoint::IsFlushed() {
36
+ // segments are cleared on Flush
37
+ return segments.empty();
38
+ }
65
39
 
66
- void AddUninitializedRegion(idx_t start, idx_t end) override {
67
- uninitialized_regions.push_back({start, end});
68
- }
40
+ void PartialBlockForCheckpoint::AddUninitializedRegion(idx_t start, idx_t end) {
41
+ uninitialized_regions.push_back({start, end});
42
+ }
69
43
 
70
- void Flush(idx_t free_space_left) override {
71
- // At this point, we've already copied all data from tail_segments
72
- // into the page owned by first_segment. We flush all segment data to
73
- // disk with the following call.
74
- if (free_space_left > 0 || !uninitialized_regions.empty()) {
75
- auto handle = block_manager.buffer_manager.Pin(first_segment->block);
76
- // memset any uninitialized regions
77
- for (auto &uninitialized : uninitialized_regions) {
78
- memset(handle.Ptr() + uninitialized.start, 0, uninitialized.end - uninitialized.start);
79
- }
80
- // memset any free space at the end of the block to 0 prior to writing to disk
81
- memset(handle.Ptr() + Storage::BLOCK_SIZE - free_space_left, 0, free_space_left);
44
+ void PartialBlockForCheckpoint::Flush(idx_t free_space_left) {
45
+ if (IsFlushed()) {
46
+ throw InternalException("Flush called on partial block that was already flushed");
47
+ }
48
+ // if we have any free space or uninitialized regions we need to zero-initialize them
49
+ if (free_space_left > 0 || !uninitialized_regions.empty()) {
50
+ auto handle = block_manager.buffer_manager.Pin(block);
51
+ // memset any uninitialized regions
52
+ for (auto &uninitialized : uninitialized_regions) {
53
+ memset(handle.Ptr() + uninitialized.start, 0, uninitialized.end - uninitialized.start);
82
54
  }
83
- first_data->IncrementVersion();
84
- first_segment->ConvertToPersistent(&block_manager, state.block_id);
85
- // Now that the page is persistent, update tail_segments to point to the
86
- // newly persistent block.
87
- for (auto e : tail_segments) {
88
- e.data->IncrementVersion();
89
- e.segment->MarkAsPersistent(first_segment->block, e.offset_in_block);
55
+ // memset any free space at the end of the block to 0 prior to writing to disk
56
+ memset(handle.Ptr() + Storage::BLOCK_SIZE - free_space_left, 0, free_space_left);
57
+ }
58
+ // At this point, we've already copied all data from tail_segments
59
+ // into the page owned by first_segment. We flush all segment data to
60
+ // disk with the following call.
61
+ // persist the first segment to disk and point the remaining segments to the same block
62
+ bool fetch_new_block = state.block_id == INVALID_BLOCK;
63
+ if (fetch_new_block) {
64
+ state.block_id = block_manager.GetFreeBlockId();
65
+ }
66
+ for (idx_t i = 0; i < segments.size(); i++) {
67
+ auto &segment = segments[i];
68
+ segment.data.IncrementVersion();
69
+ if (i == 0) {
70
+ // the first segment is converted to persistent - this writes the data for ALL segments to disk
71
+ D_ASSERT(segment.offset_in_block == 0);
72
+ segment.segment.ConvertToPersistent(&block_manager, state.block_id);
73
+ // update the block after it has been converted to a persistent segment
74
+ block = segment.segment.block;
75
+ } else {
76
+ // subsequent segments are MARKED as persistent - they don't need to be rewritten
77
+ segment.segment.MarkAsPersistent(block, segment.offset_in_block);
78
+ if (fetch_new_block) {
79
+ // if we fetched a new block we need to increase the reference count to the block
80
+ block_manager.IncreaseBlockReferenceCount(state.block_id);
81
+ }
90
82
  }
91
- first_segment = nullptr;
92
- tail_segments.clear();
93
83
  }
84
+ Clear();
85
+ }
94
86
 
95
- void Clear() override {
96
- first_data = nullptr;
97
- first_segment = nullptr;
98
- tail_segments.clear();
87
+ void PartialBlockForCheckpoint::Clear() {
88
+ uninitialized_regions.clear();
89
+ block.reset();
90
+ segments.clear();
91
+ }
92
+
93
+ void PartialBlockForCheckpoint::Merge(PartialBlock &other_p, idx_t offset, idx_t other_size) {
94
+ auto &other = other_p.Cast<PartialBlockForCheckpoint>();
95
+
96
+ auto &buffer_manager = block_manager.buffer_manager;
97
+ // pin the source block
98
+ auto old_handle = buffer_manager.Pin(other.block);
99
+ // pin the target block
100
+ auto new_handle = buffer_manager.Pin(block);
101
+ // memcpy the contents of the old block to the new block
102
+ memcpy(new_handle.Ptr() + offset, old_handle.Ptr(), other_size);
103
+
104
+ // now copy over all of the segments to the new block
105
+ // move over the uninitialized regions
106
+ for (auto &region : other.uninitialized_regions) {
107
+ region.start += offset;
108
+ region.end += offset;
109
+ uninitialized_regions.push_back(region);
99
110
  }
100
111
 
101
- void AddSegmentToTail(ColumnData *data, ColumnSegment *segment, uint32_t offset_in_block) {
102
- tail_segments.push_back({data, segment, offset_in_block});
112
+ // move over the segments
113
+ for (auto &segment : other.segments) {
114
+ AddSegmentToTail(segment.data, segment.segment, segment.offset_in_block + offset);
103
115
  }
104
- };
116
+ other.Clear();
117
+ }
118
+
119
+ void PartialBlockForCheckpoint::AddSegmentToTail(ColumnData &data, ColumnSegment &segment, uint32_t offset_in_block) {
120
+ segments.emplace_back(data, segment, offset_in_block);
121
+ }
105
122
 
106
123
  void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_t segment_size) {
107
124
  D_ASSERT(segment_size <= Storage::BLOCK_SIZE);
@@ -128,14 +145,14 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
128
145
  if (allocation.partial_block) {
129
146
  // Use an existing block.
130
147
  D_ASSERT(offset_in_block > 0);
131
- auto pstate = (PartialBlockForCheckpoint *)allocation.partial_block.get();
148
+ auto &pstate = allocation.partial_block->Cast<PartialBlockForCheckpoint>();
132
149
  // pin the source block
133
150
  auto old_handle = buffer_manager.Pin(segment->block);
134
151
  // pin the target block
135
- auto new_handle = buffer_manager.Pin(pstate->first_segment->block);
152
+ auto new_handle = buffer_manager.Pin(pstate.block);
136
153
  // memcpy the contents of the old block to the new block
137
154
  memcpy(new_handle.Ptr() + offset_in_block, old_handle.Ptr(), segment_size);
138
- pstate->AddSegmentToTail(&column_data, segment.get(), offset_in_block);
155
+ pstate.AddSegmentToTail(column_data, *segment, offset_in_block);
139
156
  } else {
140
157
  // Create a new block for future reuse.
141
158
  if (segment->SegmentSize() != Storage::BLOCK_SIZE) {
@@ -146,7 +163,7 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
146
163
  }
147
164
  D_ASSERT(offset_in_block == 0);
148
165
  allocation.partial_block = make_uniq<PartialBlockForCheckpoint>(
149
- &column_data, segment.get(), *allocation.block_manager, allocation.state);
166
+ column_data, *segment, *allocation.block_manager, allocation.state);
150
167
  }
151
168
  // Writer will decide whether to reuse this block.
152
169
  partial_block_manager.RegisterPartialBlock(std::move(allocation));
@@ -180,13 +197,4 @@ void ColumnCheckpointState::WriteDataPointers(RowGroupWriter &writer) {
180
197
  writer.WriteColumnDataPointers(*this);
181
198
  }
182
199
 
183
- void ColumnCheckpointState::GetBlockIds(unordered_set<block_id_t> &result) {
184
- for (auto &pointer : data_pointers) {
185
- if (pointer.block_pointer.block_id == INVALID_BLOCK) {
186
- continue;
187
- }
188
- result.insert(pointer.block_pointer.block_id);
189
- }
190
- }
191
-
192
200
  } // namespace duckdb
@@ -21,7 +21,7 @@
21
21
  namespace duckdb {
22
22
 
23
23
  ColumnData::ColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
24
- LogicalType type_p, ColumnData *parent)
24
+ LogicalType type_p, optional_ptr<ColumnData> parent)
25
25
  : start(start_row), count(0), block_manager(block_manager), info(info), column_index(column_index),
26
26
  type(std::move(type_p)), parent(parent), version(0) {
27
27
  if (!parent) {
@@ -29,24 +29,17 @@ ColumnData::ColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t c
29
29
  }
30
30
  }
31
31
 
32
- ColumnData::ColumnData(ColumnData &other, idx_t start, ColumnData *parent)
33
- : start(start), count(other.count), block_manager(other.block_manager), info(other.info),
34
- column_index(other.column_index), type(std::move(other.type)), parent(parent),
35
- version(parent ? parent->version + 1 : 0) {
36
- if (other.updates) {
37
- updates = make_uniq<UpdateSegment>(*other.updates, *this);
38
- }
39
- if (other.stats) {
40
- stats = make_uniq<SegmentStatistics>(other.stats->statistics.Copy());
41
- }
32
+ ColumnData::~ColumnData() {
33
+ }
34
+
35
+ void ColumnData::SetStart(idx_t new_start) {
36
+ this->start = new_start;
42
37
  idx_t offset = 0;
43
- for (auto &segment : other.data.Segments()) {
44
- this->data.AppendSegment(ColumnSegment::CreateSegment(segment, start + offset));
38
+ for (auto &segment : data.Segments()) {
39
+ segment.start = start + offset;
45
40
  offset += segment.count;
46
41
  }
47
- }
48
-
49
- ColumnData::~ColumnData() {
42
+ data.Reinitialize();
50
43
  }
51
44
 
52
45
  DatabaseInstance &ColumnData::GetDatabase() const {
@@ -278,8 +271,8 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) {
278
271
  AppendTransientSegment(l, start);
279
272
  }
280
273
  auto segment = data.GetLastSegment(l);
281
- if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
282
- // no transient segments yet
274
+ if (segment->segment_type == ColumnSegmentType::PERSISTENT || !segment->function.get().init_append) {
275
+ // we cannot append to this segment - append a new segment
283
276
  auto total_rows = segment->start + segment->count;
284
277
  AppendTransientSegment(l, total_rows);
285
278
  state.current = data.GetLastSegment(l);
@@ -418,9 +411,9 @@ unique_ptr<ColumnCheckpointState> ColumnData::CreateCheckpointState(RowGroup &ro
418
411
  return make_uniq<ColumnCheckpointState>(row_group, *this, partial_block_manager);
419
412
  }
420
413
 
421
- void ColumnData::CheckpointScan(ColumnSegment *segment, ColumnScanState &state, idx_t row_group_start, idx_t count,
414
+ void ColumnData::CheckpointScan(ColumnSegment &segment, ColumnScanState &state, idx_t row_group_start, idx_t count,
422
415
  Vector &scan_vector) {
423
- segment->Scan(state, count, scan_vector, 0, true);
416
+ segment.Scan(state, count, scan_vector, 0, true);
424
417
  if (updates) {
425
418
  scan_vector.Flatten(count);
426
419
  updates->FetchCommittedRange(state.row_index - row_group_start, count, scan_vector);
@@ -489,7 +482,7 @@ void ColumnData::DeserializeColumn(Deserializer &source) {
489
482
 
490
483
  shared_ptr<ColumnData> ColumnData::Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
491
484
  idx_t start_row, Deserializer &source, const LogicalType &type,
492
- ColumnData *parent) {
485
+ optional_ptr<ColumnData> parent) {
493
486
  auto entry = ColumnData::CreateColumn(block_manager, info, column_index, start_row, type, parent);
494
487
  entry->DeserializeColumn(source);
495
488
  return entry;
@@ -565,48 +558,29 @@ void ColumnData::Verify(RowGroup &parent) {
565
558
 
566
559
  template <class RET, class OP>
567
560
  static RET CreateColumnInternal(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
568
- const LogicalType &type, ColumnData *parent) {
561
+ const LogicalType &type, optional_ptr<ColumnData> parent) {
569
562
  if (type.InternalType() == PhysicalType::STRUCT) {
570
563
  return OP::template Create<StructColumnData>(block_manager, info, column_index, start_row, type, parent);
571
564
  } else if (type.InternalType() == PhysicalType::LIST) {
572
565
  return OP::template Create<ListColumnData>(block_manager, info, column_index, start_row, type, parent);
573
566
  } else if (type.id() == LogicalTypeId::VALIDITY) {
574
- return OP::template Create<ValidityColumnData>(block_manager, info, column_index, start_row, parent);
567
+ return OP::template Create<ValidityColumnData>(block_manager, info, column_index, start_row, *parent);
575
568
  }
576
569
  return OP::template Create<StandardColumnData>(block_manager, info, column_index, start_row, type, parent);
577
570
  }
578
571
 
579
- template <class RET, class OP>
580
- static RET CreateColumnInternal(ColumnData &other, idx_t start_row, ColumnData *parent) {
581
- if (other.type.InternalType() == PhysicalType::STRUCT) {
582
- return OP::template Create<StructColumnData>(other, start_row, parent);
583
- } else if (other.type.InternalType() == PhysicalType::LIST) {
584
- return OP::template Create<ListColumnData>(other, start_row, parent);
585
- } else if (other.type.id() == LogicalTypeId::VALIDITY) {
586
- return OP::template Create<ValidityColumnData>(other, start_row, parent);
587
- }
588
- return OP::template Create<StandardColumnData>(other, start_row, parent);
589
- }
590
-
591
572
  shared_ptr<ColumnData> ColumnData::CreateColumn(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
592
- idx_t start_row, const LogicalType &type, ColumnData *parent) {
573
+ idx_t start_row, const LogicalType &type,
574
+ optional_ptr<ColumnData> parent) {
593
575
  return CreateColumnInternal<shared_ptr<ColumnData>, SharedConstructor>(block_manager, info, column_index, start_row,
594
576
  type, parent);
595
577
  }
596
578
 
597
- shared_ptr<ColumnData> ColumnData::CreateColumn(ColumnData &other, idx_t start_row, ColumnData *parent) {
598
- return CreateColumnInternal<shared_ptr<ColumnData>, SharedConstructor>(other, start_row, parent);
599
- }
600
-
601
579
  unique_ptr<ColumnData> ColumnData::CreateColumnUnique(BlockManager &block_manager, DataTableInfo &info,
602
580
  idx_t column_index, idx_t start_row, const LogicalType &type,
603
- ColumnData *parent) {
581
+ optional_ptr<ColumnData> parent) {
604
582
  return CreateColumnInternal<unique_ptr<ColumnData>, UniqueConstructor>(block_manager, info, column_index, start_row,
605
583
  type, parent);
606
584
  }
607
585
 
608
- unique_ptr<ColumnData> ColumnData::CreateColumnUnique(ColumnData &other, idx_t start_row, ColumnData *parent) {
609
- return CreateColumnInternal<unique_ptr<ColumnData>, UniqueConstructor>(other, start_row, parent);
610
- }
611
-
612
586
  } // namespace duckdb
@@ -43,16 +43,16 @@ ColumnCheckpointState &ColumnDataCheckpointer::GetCheckpointState() {
43
43
  void ColumnDataCheckpointer::ScanSegments(const std::function<void(Vector &, idx_t)> &callback) {
44
44
  Vector scan_vector(intermediate.GetType(), nullptr);
45
45
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
46
- auto segment = nodes[segment_idx].node.get();
46
+ auto &segment = *nodes[segment_idx].node;
47
47
  ColumnScanState scan_state;
48
- scan_state.current = segment;
49
- segment->InitializeScan(scan_state);
48
+ scan_state.current = &segment;
49
+ segment.InitializeScan(scan_state);
50
50
 
51
- for (idx_t base_row_index = 0; base_row_index < segment->count; base_row_index += STANDARD_VECTOR_SIZE) {
51
+ for (idx_t base_row_index = 0; base_row_index < segment.count; base_row_index += STANDARD_VECTOR_SIZE) {
52
52
  scan_vector.Reference(intermediate);
53
53
 
54
- idx_t count = MinValue<idx_t>(segment->count - base_row_index, STANDARD_VECTOR_SIZE);
55
- scan_state.row_index = segment->start + base_row_index;
54
+ idx_t count = MinValue<idx_t>(segment.count - base_row_index, STANDARD_VECTOR_SIZE);
55
+ scan_state.row_index = segment.start + base_row_index;
56
56
 
57
57
  col_data.CheckpointScan(segment, scan_state, row_group.start, count, scan_vector);
58
58
 
@@ -250,7 +250,7 @@ void ColumnDataCheckpointer::Checkpoint(vector<SegmentNode<ColumnSegment>> nodes
250
250
  // no changes: only need to write the metadata for this column
251
251
  WritePersistentSegments();
252
252
  } else {
253
- // there are changes: rewrite the set of columns
253
+ // there are changes: rewrite the set of columns);
254
254
  WriteToDisk();
255
255
  }
256
256
  }
@@ -172,7 +172,7 @@ void ColumnSegment::RevertAppend(idx_t start_row) {
172
172
  //===--------------------------------------------------------------------===//
173
173
  // Convert To Persistent
174
174
  //===--------------------------------------------------------------------===//
175
- void ColumnSegment::ConvertToPersistent(BlockManager *block_manager, block_id_t block_id_p) {
175
+ void ColumnSegment::ConvertToPersistent(optional_ptr<BlockManager> block_manager, block_id_t block_id_p) {
176
176
  D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT);
177
177
  segment_type = ColumnSegmentType::PERSISTENT;
178
178
 
@@ -8,19 +8,19 @@
8
8
  namespace duckdb {
9
9
 
10
10
  ListColumnData::ListColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
11
- LogicalType type_p, ColumnData *parent)
11
+ LogicalType type_p, optional_ptr<ColumnData> parent)
12
12
  : ColumnData(block_manager, info, column_index, start_row, std::move(type_p), parent),
13
- validity(block_manager, info, 0, start_row, this) {
13
+ validity(block_manager, info, 0, start_row, *this) {
14
14
  D_ASSERT(type.InternalType() == PhysicalType::LIST);
15
15
  auto &child_type = ListType::GetChildType(type);
16
16
  // the child column, with column index 1 (0 is the validity mask)
17
17
  child_column = ColumnData::CreateColumnUnique(block_manager, info, 1, start_row, child_type, this);
18
18
  }
19
19
 
20
- ListColumnData::ListColumnData(ColumnData &original, idx_t start_row, ColumnData *parent)
21
- : ColumnData(original, start_row, parent), validity(((ListColumnData &)original).validity, start_row, this) {
22
- auto &list_data = (ListColumnData &)original;
23
- child_column = ColumnData::CreateColumnUnique(*list_data.child_column, start_row, this);
20
+ void ListColumnData::SetStart(idx_t new_start) {
21
+ ColumnData::SetStart(new_start);
22
+ child_column->SetStart(new_start);
23
+ validity.SetStart(new_start);
24
24
  }
25
25
 
26
26
  bool ListColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) {
@@ -322,11 +322,6 @@ public:
322
322
  validity_state->WriteDataPointers(writer);
323
323
  child_state->WriteDataPointers(writer);
324
324
  }
325
- void GetBlockIds(unordered_set<block_id_t> &result) override {
326
- ColumnCheckpointState::GetBlockIds(result);
327
- validity_state->GetBlockIds(result);
328
- child_state->GetBlockIds(result);
329
- }
330
325
  };
331
326
 
332
327
  unique_ptr<ColumnCheckpointState> ListColumnData::CreateCheckpointState(RowGroup &row_group,
@@ -45,16 +45,15 @@ RowGroup::RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer)
45
45
  Verify();
46
46
  }
47
47
 
48
- RowGroup::RowGroup(RowGroup &row_group, RowGroupCollection &collection, idx_t start)
49
- : SegmentBase<RowGroup>(start, row_group.count.load()), collection(collection),
50
- version_info(std::move(row_group.version_info)) {
51
- for (auto &column : row_group.GetColumns()) {
52
- this->columns.push_back(ColumnData::CreateColumn(*column, start));
48
+ void RowGroup::MoveToCollection(RowGroupCollection &collection, idx_t new_start) {
49
+ this->collection = collection;
50
+ this->start = new_start;
51
+ for (auto &column : GetColumns()) {
52
+ column->SetStart(new_start);
53
53
  }
54
54
  if (version_info) {
55
- version_info->SetStart(start);
55
+ version_info->SetStart(new_start);
56
56
  }
57
- Verify();
58
57
  }
59
58
 
60
59
  void VersionNode::SetStart(idx_t start) {
@@ -101,8 +100,8 @@ ColumnData &RowGroup::GetColumn(idx_t c) {
101
100
  if (column_pointers.size() != columns.size()) {
102
101
  throw InternalException("Lazy loading a column but the pointer was not set");
103
102
  }
104
- auto &block_manager = collection.GetBlockManager();
105
- auto &types = collection.GetTypes();
103
+ auto &block_manager = GetCollection().GetBlockManager();
104
+ auto &types = GetCollection().GetTypes();
106
105
  auto &block_pointer = column_pointers[c];
107
106
  MetaBlockReader column_data_reader(block_manager, block_pointer.block_id);
108
107
  column_data_reader.offset = block_pointer.offset;
@@ -113,14 +112,14 @@ ColumnData &RowGroup::GetColumn(idx_t c) {
113
112
  }
114
113
 
115
114
  DatabaseInstance &RowGroup::GetDatabase() {
116
- return collection.GetDatabase();
115
+ return GetCollection().GetDatabase();
117
116
  }
118
117
 
119
118
  BlockManager &RowGroup::GetBlockManager() {
120
- return collection.GetBlockManager();
119
+ return GetCollection().GetBlockManager();
121
120
  }
122
121
  DataTableInfo &RowGroup::GetTableInfo() {
123
- return collection.GetTableInfo();
122
+ return GetCollection().GetTableInfo();
124
123
  }
125
124
 
126
125
  void RowGroup::InitializeEmpty(const vector<LogicalType> &types) {
@@ -231,7 +230,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(RowGroupCollection &new_collection, con
231
230
  column_data->InitializeAppend(append_state);
232
231
 
233
232
  // scan the original table, and fill the new column with the transformed value
234
- scan_state.Initialize(collection.GetTypes());
233
+ scan_state.Initialize(GetCollection().GetTypes());
235
234
  InitializeScan(scan_state);
236
235
 
237
236
  DataChunk append_chunk;
@@ -537,7 +536,7 @@ void RowGroup::Scan(TransactionData transaction, CollectionScanState &state, Dat
537
536
  }
538
537
 
539
538
  void RowGroup::ScanCommitted(CollectionScanState &state, DataChunk &result, TableScanType type) {
540
- auto &transaction_manager = DuckTransactionManager::Get(collection.GetAttached());
539
+ auto &transaction_manager = DuckTransactionManager::Get(GetCollection().GetAttached());
541
540
 
542
541
  auto lowest_active_start = transaction_manager.LowestActiveStart();
543
542
  auto lowest_active_id = transaction_manager.LowestActiveId();
@@ -96,6 +96,10 @@ RowGroup *RowGroupCollection::GetRowGroup(int64_t index) {
96
96
  return (RowGroup *)row_groups->GetSegmentByIndex(index);
97
97
  }
98
98
 
99
+ idx_t RowGroupCollection::RowGroupCount() {
100
+ return row_groups->GetSegmentCount();
101
+ }
102
+
99
103
  void RowGroupCollection::Verify() {
100
104
  #ifdef DEBUG
101
105
  idx_t current_total_rows = 0;
@@ -444,10 +448,12 @@ void RowGroupCollection::RevertAppendInternal(idx_t start_row, idx_t count) {
444
448
  void RowGroupCollection::MergeStorage(RowGroupCollection &data) {
445
449
  D_ASSERT(data.types == types);
446
450
  auto index = row_start + total_rows.load();
447
- for (auto &row_group : data.row_groups->Segments()) {
448
- auto new_group = make_uniq<RowGroup>(row_group, *this, index);
449
- index += new_group->count;
450
- row_groups->AppendSegment(std::move(new_group));
451
+ auto segments = data.row_groups->MoveSegments();
452
+ for (auto &entry : segments) {
453
+ auto &row_group = entry.node;
454
+ row_group->MoveToCollection(*this, index);
455
+ index += row_group->count;
456
+ row_groups->AppendSegment(std::move(row_group));
451
457
  }
452
458
  stats.MergeStats(data.stats);
453
459
  total_rows += data.total_rows.load();
@@ -10,13 +10,14 @@
10
10
  namespace duckdb {
11
11
 
12
12
  StandardColumnData::StandardColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
13
- idx_t start_row, LogicalType type, ColumnData *parent)
13
+ idx_t start_row, LogicalType type, optional_ptr<ColumnData> parent)
14
14
  : ColumnData(block_manager, info, column_index, start_row, std::move(type), parent),
15
- validity(block_manager, info, 0, start_row, this) {
15
+ validity(block_manager, info, 0, start_row, *this) {
16
16
  }
17
17
 
18
- StandardColumnData::StandardColumnData(ColumnData &original, idx_t start_row, ColumnData *parent)
19
- : ColumnData(original, start_row, parent), validity(((StandardColumnData &)original).validity, start_row, this) {
18
+ void StandardColumnData::SetStart(idx_t new_start) {
19
+ ColumnData::SetStart(new_start);
20
+ validity.SetStart(new_start);
20
21
  }
21
22
 
22
23
  bool StandardColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) {
@@ -176,11 +177,6 @@ public:
176
177
  ColumnCheckpointState::WriteDataPointers(writer);
177
178
  validity_state->WriteDataPointers(writer);
178
179
  }
179
-
180
- void GetBlockIds(unordered_set<block_id_t> &result) override {
181
- ColumnCheckpointState::GetBlockIds(result);
182
- validity_state->GetBlockIds(result);
183
- }
184
180
  };
185
181
 
186
182
  unique_ptr<ColumnCheckpointState>
@@ -198,7 +194,7 @@ unique_ptr<ColumnCheckpointState> StandardColumnData::Checkpoint(RowGroup &row_g
198
194
  return base_state;
199
195
  }
200
196
 
201
- void StandardColumnData::CheckpointScan(ColumnSegment *segment, ColumnScanState &state, idx_t row_group_start,
197
+ void StandardColumnData::CheckpointScan(ColumnSegment &segment, ColumnScanState &state, idx_t row_group_start,
202
198
  idx_t count, Vector &scan_vector) {
203
199
  ColumnData::CheckpointScan(segment, state, row_group_start, count, scan_vector);
204
200
 
@@ -8,9 +8,9 @@
8
8
  namespace duckdb {
9
9
 
10
10
  StructColumnData::StructColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
11
- idx_t start_row, LogicalType type_p, ColumnData *parent)
11
+ idx_t start_row, LogicalType type_p, optional_ptr<ColumnData> parent)
12
12
  : ColumnData(block_manager, info, column_index, start_row, std::move(type_p), parent),
13
- validity(block_manager, info, 0, start_row, this) {
13
+ validity(block_manager, info, 0, start_row, *this) {
14
14
  D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
15
15
  auto &child_types = StructType::GetChildTypes(type);
16
16
  D_ASSERT(child_types.size() > 0);
@@ -23,12 +23,12 @@ StructColumnData::StructColumnData(BlockManager &block_manager, DataTableInfo &i
23
23
  }
24
24
  }
25
25
 
26
- StructColumnData::StructColumnData(ColumnData &original, idx_t start_row, ColumnData *parent)
27
- : ColumnData(original, start_row, parent), validity(((StructColumnData &)original).validity, start_row, this) {
28
- auto &struct_data = (StructColumnData &)original;
29
- for (auto &child_col : struct_data.sub_columns) {
30
- sub_columns.push_back(ColumnData::CreateColumnUnique(*child_col, start_row, this));
26
+ void StructColumnData::SetStart(idx_t new_start) {
27
+ this->start = new_start;
28
+ for (auto &sub_column : sub_columns) {
29
+ sub_column->SetStart(new_start);
31
30
  }
31
+ validity.SetStart(new_start);
32
32
  }
33
33
 
34
34
  bool StructColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) {
@@ -246,12 +246,6 @@ public:
246
246
  state->WriteDataPointers(writer);
247
247
  }
248
248
  }
249
- void GetBlockIds(unordered_set<block_id_t> &result) override {
250
- validity_state->GetBlockIds(result);
251
- for (auto &state : child_states) {
252
- state->GetBlockIds(result);
253
- }
254
- }
255
249
  };
256
250
 
257
251
  unique_ptr<ColumnCheckpointState> StructColumnData::CreateCheckpointState(RowGroup &row_group,
@@ -37,31 +37,6 @@ UpdateSegment::UpdateSegment(ColumnData &column_data)
37
37
  this->statistics_update_function = GetStatisticsUpdateFunction(physical_type);
38
38
  }
39
39
 
40
- UpdateSegment::UpdateSegment(UpdateSegment &other, ColumnData &owner)
41
- : column_data(owner), root(std::move(other.root)), stats(std::move(other.stats)), type_size(other.type_size) {
42
-
43
- this->heap.Move(other.heap);
44
- // update the segment links
45
- if (root) {
46
- for (idx_t i = 0; i < RowGroup::ROW_GROUP_VECTOR_COUNT; i++) {
47
- if (!root->info[i]) {
48
- continue;
49
- }
50
- for (auto info = root->info[i]->info.get(); info; info = info->next) {
51
- info->segment = this;
52
- }
53
- }
54
- }
55
- initialize_update_function = other.initialize_update_function;
56
- merge_update_function = other.merge_update_function;
57
- fetch_update_function = other.fetch_update_function;
58
- fetch_committed_function = other.fetch_committed_function;
59
- fetch_committed_range = other.fetch_committed_range;
60
- fetch_row_function = other.fetch_row_function;
61
- rollback_update_function = other.rollback_update_function;
62
- statistics_update_function = other.statistics_update_function;
63
- }
64
-
65
40
  UpdateSegment::~UpdateSegment() {
66
41
  }
67
42
 
@@ -5,12 +5,8 @@
5
5
  namespace duckdb {
6
6
 
7
7
  ValidityColumnData::ValidityColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
8
- idx_t start_row, ColumnData *parent)
9
- : ColumnData(block_manager, info, column_index, start_row, LogicalType(LogicalTypeId::VALIDITY), parent) {
10
- }
11
-
12
- ValidityColumnData::ValidityColumnData(ColumnData &original, idx_t start_row, ColumnData *parent)
13
- : ColumnData(original, start_row, parent) {
8
+ idx_t start_row, ColumnData &parent)
9
+ : ColumnData(block_manager, info, column_index, start_row, LogicalType(LogicalTypeId::VALIDITY), &parent) {
14
10
  }
15
11
 
16
12
  bool ValidityColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filter) {