duckdb 0.7.2-dev2867.0 → 0.7.2-dev2995.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-datepart.cpp +5 -1
  4. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +18 -7
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +2 -0
  6. package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -3
  7. package/src/duckdb/src/common/arrow/arrow_converter.cpp +2 -2
  8. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  9. package/src/duckdb/src/common/string_util.cpp +6 -1
  10. package/src/duckdb/src/core_functions/function_list.cpp +2 -0
  11. package/src/duckdb/src/core_functions/scalar/string/format_bytes.cpp +29 -0
  12. package/src/duckdb/src/execution/index/art/art.cpp +5 -1
  13. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +62 -43
  14. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +17 -11
  15. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -39
  16. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +10 -9
  17. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -4
  18. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +6 -21
  19. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +13 -13
  20. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +15 -14
  21. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +3 -2
  22. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +4 -4
  23. package/src/duckdb/src/execution/operator/helper/physical_pragma.cpp +4 -2
  24. package/src/duckdb/src/execution/operator/helper/physical_prepare.cpp +4 -2
  25. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +10 -8
  26. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +4 -3
  27. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +7 -6
  28. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +4 -2
  29. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +8 -8
  30. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +17 -16
  31. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +10 -8
  32. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +3 -4
  33. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +5 -5
  34. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +16 -15
  35. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +13 -12
  36. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +12 -10
  37. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +13 -11
  38. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +8 -6
  39. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  40. package/src/duckdb/src/execution/operator/order/physical_order.cpp +13 -13
  41. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +8 -8
  42. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +160 -145
  43. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +10 -25
  44. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +14 -19
  45. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +7 -6
  46. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +18 -30
  47. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +14 -18
  48. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +6 -4
  49. package/src/duckdb/src/execution/operator/scan/physical_dummy_scan.cpp +4 -19
  50. package/src/duckdb/src/execution/operator/scan/physical_empty_result.cpp +3 -2
  51. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +14 -5
  52. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +6 -4
  53. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +3 -19
  54. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +4 -18
  55. package/src/duckdb/src/execution/operator/schema/physical_create_function.cpp +4 -19
  56. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +8 -9
  57. package/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp +4 -19
  58. package/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp +4 -19
  59. package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +4 -19
  60. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +9 -26
  61. package/src/duckdb/src/execution/operator/schema/physical_create_view.cpp +4 -19
  62. package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +4 -19
  63. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +3 -19
  64. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +9 -8
  65. package/src/duckdb/src/execution/operator/set/physical_union.cpp +1 -1
  66. package/src/duckdb/src/execution/physical_operator.cpp +11 -5
  67. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +16 -16
  68. package/src/duckdb/src/function/table/arrow_conversion.cpp +3 -3
  69. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  70. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +7 -1
  71. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +16 -4
  72. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +45 -0
  73. package/src/duckdb/src/include/duckdb/common/set.hpp +2 -1
  74. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +15 -0
  75. package/src/duckdb/src/include/duckdb/execution/executor.hpp +10 -1
  76. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -8
  77. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +2 -4
  78. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -7
  79. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -4
  80. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +1 -2
  81. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +2 -5
  82. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -4
  83. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp +2 -4
  84. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_load.hpp +1 -2
  85. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -2
  86. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_pragma.hpp +1 -2
  87. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_prepare.hpp +1 -2
  88. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +2 -4
  89. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reset.hpp +1 -2
  90. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +1 -2
  91. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_transaction.hpp +1 -2
  92. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -4
  93. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -4
  94. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +2 -4
  95. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -2
  96. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +1 -2
  97. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -4
  98. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -4
  99. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -4
  100. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +2 -4
  101. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +2 -4
  102. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -4
  103. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -4
  104. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +3 -5
  105. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -5
  106. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_delete.hpp +2 -4
  107. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -4
  108. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -4
  109. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -4
  110. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -2
  111. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_dummy_scan.hpp +1 -3
  112. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_empty_result.hpp +1 -2
  113. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +1 -2
  114. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +1 -2
  115. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_alter.hpp +1 -3
  116. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_attach.hpp +1 -3
  117. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_function.hpp +1 -3
  118. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +2 -4
  119. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_schema.hpp +1 -3
  120. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_sequence.hpp +1 -3
  121. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +1 -3
  122. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +2 -5
  123. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_view.hpp +1 -3
  124. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +1 -3
  125. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_drop.hpp +1 -3
  126. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +2 -4
  127. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +7 -4
  128. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +26 -6
  129. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +5 -5
  130. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +2 -1
  131. package/src/duckdb/src/include/duckdb/function/table_function.hpp +0 -1
  132. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  133. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  134. package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
  135. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +63 -0
  136. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +16 -3
  137. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +51 -7
  138. package/src/duckdb/src/include/duckdb/parallel/task.hpp +21 -2
  139. package/src/duckdb/src/include/duckdb/parallel/task_counter.hpp +2 -2
  140. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +2 -2
  141. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +6 -0
  142. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -0
  143. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -0
  144. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -0
  145. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
  146. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -0
  147. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -0
  148. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +1 -0
  149. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +1 -0
  150. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -0
  151. package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +1 -0
  152. package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +1 -0
  153. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +1 -0
  154. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +1 -0
  155. package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +1 -0
  156. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -0
  157. package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +1 -0
  158. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  159. package/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +46 -0
  160. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +24 -3
  161. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +46 -1
  162. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +9 -10
  163. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -1
  164. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +2 -2
  165. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -3
  166. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -0
  167. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +1 -1
  168. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +22 -0
  169. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +3 -3
  170. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +2 -2
  171. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -2
  172. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -2
  173. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +9 -34
  174. package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +25 -0
  175. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +5 -0
  176. package/src/duckdb/src/main/client_verify.cpp +4 -0
  177. package/src/duckdb/src/main/config.cpp +4 -0
  178. package/src/duckdb/src/main/database.cpp +11 -11
  179. package/src/duckdb/src/main/extension/extension_load.cpp +19 -15
  180. package/src/duckdb/src/parallel/event.cpp +1 -1
  181. package/src/duckdb/src/parallel/executor.cpp +39 -3
  182. package/src/duckdb/src/parallel/executor_task.cpp +11 -0
  183. package/src/duckdb/src/parallel/interrupt.cpp +57 -0
  184. package/src/duckdb/src/parallel/pipeline.cpp +49 -6
  185. package/src/duckdb/src/parallel/pipeline_executor.cpp +248 -69
  186. package/src/duckdb/src/parallel/pipeline_initialize_event.cpp +1 -1
  187. package/src/duckdb/src/parallel/task_scheduler.cpp +57 -22
  188. package/src/duckdb/src/parser/base_expression.cpp +6 -0
  189. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +17 -3
  190. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +8 -2
  191. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +13 -1
  192. package/src/duckdb/src/planner/operator/logical_column_data_get.cpp +11 -0
  193. package/src/duckdb/src/planner/operator/logical_cteref.cpp +11 -0
  194. package/src/duckdb/src/planner/operator/logical_delete.cpp +10 -0
  195. package/src/duckdb/src/planner/operator/logical_delim_get.cpp +12 -1
  196. package/src/duckdb/src/planner/operator/logical_dummy_scan.cpp +12 -1
  197. package/src/duckdb/src/planner/operator/logical_expression_get.cpp +12 -1
  198. package/src/duckdb/src/planner/operator/logical_get.cpp +10 -4
  199. package/src/duckdb/src/planner/operator/logical_insert.cpp +12 -1
  200. package/src/duckdb/src/planner/operator/logical_pivot.cpp +11 -0
  201. package/src/duckdb/src/planner/operator/logical_projection.cpp +11 -0
  202. package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +11 -0
  203. package/src/duckdb/src/planner/operator/logical_set_operation.cpp +11 -0
  204. package/src/duckdb/src/planner/operator/logical_unnest.cpp +12 -1
  205. package/src/duckdb/src/planner/operator/logical_update.cpp +10 -0
  206. package/src/duckdb/src/planner/operator/logical_window.cpp +11 -0
  207. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  208. package/src/duckdb/src/storage/data_table.cpp +5 -0
  209. package/src/duckdb/src/storage/local_storage.cpp +40 -110
  210. package/src/duckdb/src/storage/optimistic_data_writer.cpp +96 -0
  211. package/src/duckdb/src/storage/partial_block_manager.cpp +73 -9
  212. package/src/duckdb/src/storage/single_file_block_manager.cpp +3 -1
  213. package/src/duckdb/src/storage/standard_buffer_manager.cpp +17 -12
  214. package/src/duckdb/src/storage/statistics/base_statistics.cpp +3 -0
  215. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +90 -82
  216. package/src/duckdb/src/storage/table/column_data.cpp +19 -45
  217. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +7 -7
  218. package/src/duckdb/src/storage/table/column_segment.cpp +1 -1
  219. package/src/duckdb/src/storage/table/list_column_data.cpp +6 -11
  220. package/src/duckdb/src/storage/table/row_group.cpp +13 -14
  221. package/src/duckdb/src/storage/table/row_group_collection.cpp +10 -4
  222. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -10
  223. package/src/duckdb/src/storage/table/struct_column_data.cpp +7 -13
  224. package/src/duckdb/src/storage/table/update_segment.cpp +0 -25
  225. package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -6
  226. package/src/duckdb/src/transaction/commit_state.cpp +4 -4
  227. package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +13 -0
  228. package/src/duckdb/src/verification/statement_verifier.cpp +4 -0
  229. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  230. package/src/duckdb/ub_src_parallel.cpp +2 -0
  231. package/src/duckdb/ub_src_storage.cpp +2 -0
@@ -0,0 +1,96 @@
1
+ #include "duckdb/storage/optimistic_data_writer.hpp"
2
+ #include "duckdb/storage/table/column_segment.hpp"
3
+ #include "duckdb/storage/partial_block_manager.hpp"
4
+ #include "duckdb/storage/table/column_checkpoint_state.hpp"
5
+
6
+ namespace duckdb {
7
+
8
+ OptimisticDataWriter::OptimisticDataWriter(DataTable &table) : table(table) {
9
+ }
10
+
11
+ OptimisticDataWriter::OptimisticDataWriter(DataTable &table, OptimisticDataWriter &parent) : table(table) {
12
+ if (parent.partial_manager) {
13
+ parent.partial_manager->ClearBlocks();
14
+ }
15
+ }
16
+
17
+ OptimisticDataWriter::~OptimisticDataWriter() {
18
+ }
19
+
20
+ bool OptimisticDataWriter::PrepareWrite() {
21
+ // check if we should pre-emptively write the table to disk
22
+ if (table.info->IsTemporary() || StorageManager::Get(table.info->db).InMemory()) {
23
+ return false;
24
+ }
25
+ // we should! write the second-to-last row group to disk
26
+ // allocate the partial block-manager if none is allocated yet
27
+ if (!partial_manager) {
28
+ auto &block_manager = table.info->table_io_manager->GetBlockManagerForRowData();
29
+ partial_manager = make_uniq<PartialBlockManager>(block_manager, CheckpointType::APPEND_TO_TABLE);
30
+ }
31
+ return true;
32
+ }
33
+
34
+ void OptimisticDataWriter::WriteNewRowGroup(RowGroupCollection &row_groups) {
35
+ // we finished writing a complete row group
36
+ if (!PrepareWrite()) {
37
+ return;
38
+ }
39
+ // flush second-to-last row group
40
+ auto row_group = row_groups.GetRowGroup(-2);
41
+ FlushToDisk(row_group);
42
+ }
43
+
44
+ void OptimisticDataWriter::WriteLastRowGroup(RowGroupCollection &row_groups) {
45
+ // we finished writing a complete row group
46
+ if (!PrepareWrite()) {
47
+ return;
48
+ }
49
+ // flush second-to-last row group
50
+ auto row_group = row_groups.GetRowGroup(-1);
51
+ if (!row_group) {
52
+ return;
53
+ }
54
+ FlushToDisk(row_group);
55
+ }
56
+
57
+ void OptimisticDataWriter::FlushToDisk(RowGroup *row_group) {
58
+ if (!row_group) {
59
+ throw InternalException("FlushToDisk called without a RowGroup");
60
+ }
61
+ //! The set of column compression types (if any)
62
+ vector<CompressionType> compression_types;
63
+ D_ASSERT(compression_types.empty());
64
+ for (auto &column : table.column_definitions) {
65
+ compression_types.push_back(column.CompressionType());
66
+ }
67
+ row_group->WriteToDisk(*partial_manager, compression_types);
68
+ }
69
+
70
+ void OptimisticDataWriter::Merge(OptimisticDataWriter &other) {
71
+ if (!other.partial_manager) {
72
+ return;
73
+ }
74
+ if (!partial_manager) {
75
+ partial_manager = std::move(other.partial_manager);
76
+ return;
77
+ }
78
+ partial_manager->Merge(*other.partial_manager);
79
+ other.partial_manager.reset();
80
+ }
81
+
82
+ void OptimisticDataWriter::FinalFlush() {
83
+ if (partial_manager) {
84
+ partial_manager->FlushPartialBlocks();
85
+ partial_manager.reset();
86
+ }
87
+ }
88
+
89
+ void OptimisticDataWriter::Rollback() {
90
+ if (partial_manager) {
91
+ partial_manager->Rollback();
92
+ partial_manager.reset();
93
+ }
94
+ }
95
+
96
+ } // namespace duckdb
@@ -2,9 +2,10 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- PartialBlockManager::PartialBlockManager(BlockManager &block_manager, uint32_t max_partial_block_size,
6
- uint32_t max_use_count)
7
- : block_manager(block_manager), max_partial_block_size(max_partial_block_size), max_use_count(max_use_count) {
5
+ PartialBlockManager::PartialBlockManager(BlockManager &block_manager, CheckpointType checkpoint_type,
6
+ uint32_t max_partial_block_size, uint32_t max_use_count)
7
+ : block_manager(block_manager), checkpoint_type(checkpoint_type), max_partial_block_size(max_partial_block_size),
8
+ max_use_count(max_use_count) {
8
9
  }
9
10
  PartialBlockManager::~PartialBlockManager() {
10
11
  }
@@ -23,7 +24,9 @@ PartialBlockAllocation PartialBlockManager::GetBlockAllocation(uint32_t segment_
23
24
  //! there is! increase the reference count of this block
24
25
  allocation.partial_block->state.block_use_count += 1;
25
26
  allocation.state = allocation.partial_block->state;
26
- block_manager.IncreaseBlockReferenceCount(allocation.state.block_id);
27
+ if (checkpoint_type == CheckpointType::FULL_CHECKPOINT) {
28
+ block_manager.IncreaseBlockReferenceCount(allocation.state.block_id);
29
+ }
27
30
  } else {
28
31
  // full block: get a free block to write to
29
32
  AllocateBlock(allocation.state, segment_size);
@@ -31,9 +34,18 @@ PartialBlockAllocation PartialBlockManager::GetBlockAllocation(uint32_t segment_
31
34
  return allocation;
32
35
  }
33
36
 
37
+ bool PartialBlockManager::HasBlockAllocation(uint32_t segment_size) {
38
+ return segment_size <= max_partial_block_size &&
39
+ partially_filled_blocks.lower_bound(segment_size) != partially_filled_blocks.end();
40
+ }
41
+
34
42
  void PartialBlockManager::AllocateBlock(PartialBlockState &state, uint32_t segment_size) {
35
43
  D_ASSERT(segment_size <= Storage::BLOCK_SIZE);
36
- state.block_id = block_manager.GetFreeBlockId();
44
+ if (checkpoint_type == CheckpointType::FULL_CHECKPOINT) {
45
+ state.block_id = block_manager.GetFreeBlockId();
46
+ } else {
47
+ state.block_id = INVALID_BLOCK;
48
+ }
37
49
  state.block_size = Storage::BLOCK_SIZE;
38
50
  state.offset_in_block = 0;
39
51
  state.block_use_count = 1;
@@ -82,21 +94,73 @@ void PartialBlockManager::RegisterPartialBlock(PartialBlockAllocation &&allocati
82
94
  // Flush any block that we're not going to reuse.
83
95
  if (block_to_free) {
84
96
  block_to_free->Flush(free_space);
97
+ AddWrittenBlock(block_to_free->state.block_id);
85
98
  }
86
99
  }
87
100
 
88
- void PartialBlockManager::FlushPartialBlocks() {
101
+ void PartialBlock::Merge(PartialBlock &other, idx_t offset, idx_t other_size) {
102
+ throw InternalException("PartialBlock::Merge not implemented for this block type");
103
+ }
104
+
105
+ void PartialBlockManager::Merge(PartialBlockManager &other) {
106
+ if (&other == this) {
107
+ throw InternalException("Cannot merge into itself");
108
+ }
109
+ // for each partially filled block in the other manager, check if we can merge it into an existing block in this
110
+ // manager
111
+ for (auto &e : other.partially_filled_blocks) {
112
+ if (!e.second) {
113
+ throw InternalException("Empty partially filled block found");
114
+ }
115
+ auto used_space = Storage::BLOCK_SIZE - e.first;
116
+ if (HasBlockAllocation(used_space)) {
117
+ // we can merge this block into an existing block - merge them
118
+ // merge blocks
119
+ auto allocation = GetBlockAllocation(used_space);
120
+ allocation.partial_block->Merge(*e.second, allocation.state.offset_in_block, used_space);
121
+
122
+ // re-register the partial block
123
+ allocation.state.offset_in_block += used_space;
124
+ RegisterPartialBlock(std::move(allocation));
125
+ } else {
126
+ // we cannot merge this block - append it directly to the current block manager
127
+ partially_filled_blocks.insert(make_pair(e.first, std::move(e.second)));
128
+ }
129
+ }
130
+ // copy over the written blocks
131
+ for (auto &block_id : other.written_blocks) {
132
+ AddWrittenBlock(block_id);
133
+ }
134
+ other.written_blocks.clear();
135
+ other.partially_filled_blocks.clear();
136
+ }
137
+
138
+ void PartialBlockManager::AddWrittenBlock(block_id_t block) {
139
+ auto entry = written_blocks.insert(block);
140
+ if (!entry.second) {
141
+ throw InternalException("Written block already exists");
142
+ }
143
+ }
144
+
145
+ void PartialBlockManager::ClearBlocks() {
89
146
  for (auto &e : partially_filled_blocks) {
90
- e.second->Flush(e.first);
147
+ e.second->Clear();
91
148
  }
92
149
  partially_filled_blocks.clear();
93
150
  }
94
151
 
95
- void PartialBlockManager::Clear() {
152
+ void PartialBlockManager::FlushPartialBlocks() {
96
153
  for (auto &e : partially_filled_blocks) {
97
- e.second->Clear();
154
+ e.second->Flush(e.first);
98
155
  }
99
156
  partially_filled_blocks.clear();
100
157
  }
101
158
 
159
+ void PartialBlockManager::Rollback() {
160
+ ClearBlocks();
161
+ for (auto &block_id : written_blocks) {
162
+ block_manager.MarkBlockAsFree(block_id);
163
+ }
164
+ }
165
+
102
166
  } // namespace duckdb
@@ -287,7 +287,9 @@ void SingleFileBlockManager::MarkBlockAsFree(block_id_t block_id) {
287
287
  lock_guard<mutex> lock(block_lock);
288
288
  D_ASSERT(block_id >= 0);
289
289
  D_ASSERT(block_id < max_block);
290
- D_ASSERT(free_list.find(block_id) == free_list.end());
290
+ if (free_list.find(block_id) != free_list.end()) {
291
+ throw InternalException("MarkBlockAsFree called but block %llu was already freed!", block_id);
292
+ }
291
293
  multi_use_blocks.erase(block_id);
292
294
  free_list.insert(block_id);
293
295
  }
@@ -90,15 +90,18 @@ TempBufferPoolReservation StandardBufferManager::EvictBlocksOrThrow(idx_t memory
90
90
  ARGS... args) {
91
91
  auto r = buffer_pool.EvictBlocks(memory_delta, buffer_pool.maximum_memory, buffer);
92
92
  if (!r.success) {
93
- throw OutOfMemoryException(args..., InMemoryWarning());
93
+ string extra_text = StringUtil::Format(" (%s/%s used)", StringUtil::BytesToHumanReadableString(GetUsedMemory()),
94
+ StringUtil::BytesToHumanReadableString(GetMaxMemory()));
95
+ extra_text += InMemoryWarning();
96
+ throw OutOfMemoryException(args..., extra_text);
94
97
  }
95
98
  return std::move(r.reservation);
96
99
  }
97
100
 
98
101
  shared_ptr<BlockHandle> StandardBufferManager::RegisterSmallMemory(idx_t block_size) {
99
102
  D_ASSERT(block_size < Storage::BLOCK_SIZE);
100
- auto res = EvictBlocksOrThrow(block_size, nullptr, "could not allocate block of %lld bytes (%lld/%lld used) %s",
101
- block_size, GetUsedMemory(), GetMaxMemory());
103
+ auto res = EvictBlocksOrThrow(block_size, nullptr, "could not allocate block of size %s%s",
104
+ StringUtil::BytesToHumanReadableString(block_size));
102
105
 
103
106
  auto buffer = ConstructManagedBuffer(block_size, nullptr, FileBufferType::TINY_BUFFER);
104
107
 
@@ -112,9 +115,8 @@ shared_ptr<BlockHandle> StandardBufferManager::RegisterMemory(idx_t block_size,
112
115
  auto alloc_size = GetAllocSize(block_size);
113
116
  // first evict blocks until we have enough memory to store this buffer
114
117
  unique_ptr<FileBuffer> reusable_buffer;
115
- auto res =
116
- EvictBlocksOrThrow(alloc_size, &reusable_buffer, "could not allocate block of %lld bytes (%lld/%lld used) %s",
117
- alloc_size, GetUsedMemory(), GetMaxMemory());
118
+ auto res = EvictBlocksOrThrow(alloc_size, &reusable_buffer, "could not allocate block of size %s%s",
119
+ StringUtil::BytesToHumanReadableString(alloc_size));
118
120
 
119
121
  auto buffer = ConstructManagedBuffer(block_size, std::move(reusable_buffer));
120
122
 
@@ -144,8 +146,9 @@ void StandardBufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t bl
144
146
  return;
145
147
  } else if (memory_delta > 0) {
146
148
  // evict blocks until we have space to resize this block
147
- auto reservation = EvictBlocksOrThrow(memory_delta, nullptr, "failed to resize block from %lld to %lld%s",
148
- handle->memory_usage, req.alloc_size);
149
+ auto reservation = EvictBlocksOrThrow(memory_delta, nullptr, "failed to resize block from %s to %s%s",
150
+ StringUtil::BytesToHumanReadableString(handle->memory_usage),
151
+ StringUtil::BytesToHumanReadableString(req.alloc_size));
149
152
  // EvictBlocks decrements 'current_memory' for us.
150
153
  handle->memory_charge.Merge(std::move(reservation));
151
154
  } else {
@@ -171,8 +174,8 @@ BufferHandle StandardBufferManager::Pin(shared_ptr<BlockHandle> &handle) {
171
174
  }
172
175
  // evict blocks until we have space for the current block
173
176
  unique_ptr<FileBuffer> reusable_buffer;
174
- auto reservation =
175
- EvictBlocksOrThrow(required_memory, &reusable_buffer, "failed to pin block of size %lld%s", required_memory);
177
+ auto reservation = EvictBlocksOrThrow(required_memory, &reusable_buffer, "failed to pin block of size %s%s",
178
+ StringUtil::BytesToHumanReadableString(required_memory));
176
179
  // lock the handle again and repeat the check (in case anybody loaded in the mean time)
177
180
  lock_guard<mutex> lock(handle->lock);
178
181
  // check if the block is already loaded
@@ -732,7 +735,8 @@ void StandardBufferManager::ReserveMemory(idx_t size) {
732
735
  if (size == 0) {
733
736
  return;
734
737
  }
735
- auto reservation = EvictBlocksOrThrow(size, nullptr, "failed to reserve memory data of size %lld%s", size);
738
+ auto reservation = EvictBlocksOrThrow(size, nullptr, "failed to reserve memory data of size %s%s",
739
+ StringUtil::BytesToHumanReadableString(size));
736
740
  reservation.size = 0;
737
741
  }
738
742
 
@@ -748,7 +752,8 @@ void StandardBufferManager::FreeReservedMemory(idx_t size) {
748
752
  //===--------------------------------------------------------------------===//
749
753
  data_ptr_t StandardBufferManager::BufferAllocatorAllocate(PrivateAllocatorData *private_data, idx_t size) {
750
754
  auto &data = (BufferAllocatorData &)*private_data;
751
- auto reservation = data.manager.EvictBlocksOrThrow(size, nullptr, "failed to allocate data of size %lld%s", size);
755
+ auto reservation = data.manager.EvictBlocksOrThrow(size, nullptr, "failed to allocate data of size %s%s",
756
+ StringUtil::BytesToHumanReadableString(size));
752
757
  // We rely on manual tracking of this one. :(
753
758
  reservation.size = 0;
754
759
  return Allocator::Get(data.manager.db).AllocateData(size);
@@ -271,6 +271,7 @@ void BaseStatistics::Serialize(Serializer &serializer) const {
271
271
  FieldWriter writer(serializer);
272
272
  writer.WriteField<bool>(has_null);
273
273
  writer.WriteField<bool>(has_no_null);
274
+ writer.WriteField<idx_t>(distinct_count);
274
275
  Serialize(writer);
275
276
  writer.Finalize();
276
277
  }
@@ -316,9 +317,11 @@ BaseStatistics BaseStatistics::Deserialize(Deserializer &source, LogicalType typ
316
317
  FieldReader reader(source);
317
318
  bool has_null = reader.ReadRequired<bool>();
318
319
  bool has_no_null = reader.ReadRequired<bool>();
320
+ idx_t distinct_count = reader.ReadRequired<idx_t>();
319
321
  auto result = DeserializeType(reader, std::move(type));
320
322
  result.has_null = has_null;
321
323
  result.has_no_null = has_no_null;
324
+ result.distinct_count = distinct_count;
322
325
  reader.Finalize();
323
326
  return result;
324
327
  }
@@ -22,86 +22,103 @@ unique_ptr<BaseStatistics> ColumnCheckpointState::GetStatistics() {
22
22
  return std::move(global_stats);
23
23
  }
24
24
 
25
- struct PartialBlockForCheckpoint : PartialBlock {
26
- struct PartialColumnSegment {
27
- ColumnData *data;
28
- ColumnSegment *segment;
29
- uint32_t offset_in_block;
30
- };
31
-
32
- public:
33
- PartialBlockForCheckpoint(ColumnData *first_data, ColumnSegment *first_segment, BlockManager &block_manager,
34
- PartialBlockState state)
35
- : PartialBlock(state), first_data(first_data), first_segment(first_segment), block_manager(block_manager) {
36
- }
25
+ PartialBlockForCheckpoint::PartialBlockForCheckpoint(ColumnData &data, ColumnSegment &segment,
26
+ BlockManager &block_manager, PartialBlockState state)
27
+ : PartialBlock(state), block_manager(block_manager), block(segment.block) {
28
+ AddSegmentToTail(data, segment, 0);
29
+ }
37
30
 
38
- ~PartialBlockForCheckpoint() override {
39
- D_ASSERT(IsFlushed() || Exception::UncaughtException());
40
- }
31
+ PartialBlockForCheckpoint::~PartialBlockForCheckpoint() {
32
+ D_ASSERT(IsFlushed() || Exception::UncaughtException());
33
+ }
41
34
 
42
- // We will copy all subsequent segment data into the memory corresponding
43
- // to the first segment. Once the block is full (or checkpoint is complete)
44
- // we'll invoke Flush(), which will cause
45
- // the block to get written to storage (via BlockManger::ConvertToPersistent),
46
- // and all segments to have their references updated
47
- // (via ColumnSegment::ConvertToPersistent)
48
- ColumnData *first_data;
49
- ColumnSegment *first_segment;
50
- BlockManager &block_manager;
51
- vector<PartialColumnSegment> tail_segments;
52
-
53
- private:
54
- struct UninitializedRegion {
55
- idx_t start;
56
- idx_t end;
57
- };
58
- vector<UninitializedRegion> uninitialized_regions;
59
-
60
- public:
61
- bool IsFlushed() {
62
- // first_segment is zeroed on Flush
63
- return !first_segment;
64
- }
35
+ bool PartialBlockForCheckpoint::IsFlushed() {
36
+ // segments are cleared on Flush
37
+ return segments.empty();
38
+ }
65
39
 
66
- void AddUninitializedRegion(idx_t start, idx_t end) override {
67
- uninitialized_regions.push_back({start, end});
68
- }
40
+ void PartialBlockForCheckpoint::AddUninitializedRegion(idx_t start, idx_t end) {
41
+ uninitialized_regions.push_back({start, end});
42
+ }
69
43
 
70
- void Flush(idx_t free_space_left) override {
71
- // At this point, we've already copied all data from tail_segments
72
- // into the page owned by first_segment. We flush all segment data to
73
- // disk with the following call.
74
- if (free_space_left > 0 || !uninitialized_regions.empty()) {
75
- auto handle = block_manager.buffer_manager.Pin(first_segment->block);
76
- // memset any uninitialized regions
77
- for (auto &uninitialized : uninitialized_regions) {
78
- memset(handle.Ptr() + uninitialized.start, 0, uninitialized.end - uninitialized.start);
79
- }
80
- // memset any free space at the end of the block to 0 prior to writing to disk
81
- memset(handle.Ptr() + Storage::BLOCK_SIZE - free_space_left, 0, free_space_left);
44
+ void PartialBlockForCheckpoint::Flush(idx_t free_space_left) {
45
+ if (IsFlushed()) {
46
+ throw InternalException("Flush called on partial block that was already flushed");
47
+ }
48
+ // if we have any free space or uninitialized regions we need to zero-initialize them
49
+ if (free_space_left > 0 || !uninitialized_regions.empty()) {
50
+ auto handle = block_manager.buffer_manager.Pin(block);
51
+ // memset any uninitialized regions
52
+ for (auto &uninitialized : uninitialized_regions) {
53
+ memset(handle.Ptr() + uninitialized.start, 0, uninitialized.end - uninitialized.start);
82
54
  }
83
- first_data->IncrementVersion();
84
- first_segment->ConvertToPersistent(&block_manager, state.block_id);
85
- // Now that the page is persistent, update tail_segments to point to the
86
- // newly persistent block.
87
- for (auto e : tail_segments) {
88
- e.data->IncrementVersion();
89
- e.segment->MarkAsPersistent(first_segment->block, e.offset_in_block);
55
+ // memset any free space at the end of the block to 0 prior to writing to disk
56
+ memset(handle.Ptr() + Storage::BLOCK_SIZE - free_space_left, 0, free_space_left);
57
+ }
58
+ // At this point, we've already copied all data from tail_segments
59
+ // into the page owned by first_segment. We flush all segment data to
60
+ // disk with the following call.
61
+ // persist the first segment to disk and point the remaining segments to the same block
62
+ bool fetch_new_block = state.block_id == INVALID_BLOCK;
63
+ if (fetch_new_block) {
64
+ state.block_id = block_manager.GetFreeBlockId();
65
+ }
66
+ for (idx_t i = 0; i < segments.size(); i++) {
67
+ auto &segment = segments[i];
68
+ segment.data.IncrementVersion();
69
+ if (i == 0) {
70
+ // the first segment is converted to persistent - this writes the data for ALL segments to disk
71
+ D_ASSERT(segment.offset_in_block == 0);
72
+ segment.segment.ConvertToPersistent(&block_manager, state.block_id);
73
+ // update the block after it has been converted to a persistent segment
74
+ block = segment.segment.block;
75
+ } else {
76
+ // subsequent segments are MARKED as persistent - they don't need to be rewritten
77
+ segment.segment.MarkAsPersistent(block, segment.offset_in_block);
78
+ if (fetch_new_block) {
79
+ // if we fetched a new block we need to increase the reference count to the block
80
+ block_manager.IncreaseBlockReferenceCount(state.block_id);
81
+ }
90
82
  }
91
- first_segment = nullptr;
92
- tail_segments.clear();
93
83
  }
84
+ Clear();
85
+ }
94
86
 
95
- void Clear() override {
96
- first_data = nullptr;
97
- first_segment = nullptr;
98
- tail_segments.clear();
87
+ void PartialBlockForCheckpoint::Clear() {
88
+ uninitialized_regions.clear();
89
+ block.reset();
90
+ segments.clear();
91
+ }
92
+
93
+ void PartialBlockForCheckpoint::Merge(PartialBlock &other_p, idx_t offset, idx_t other_size) {
94
+ auto &other = other_p.Cast<PartialBlockForCheckpoint>();
95
+
96
+ auto &buffer_manager = block_manager.buffer_manager;
97
+ // pin the source block
98
+ auto old_handle = buffer_manager.Pin(other.block);
99
+ // pin the target block
100
+ auto new_handle = buffer_manager.Pin(block);
101
+ // memcpy the contents of the old block to the new block
102
+ memcpy(new_handle.Ptr() + offset, old_handle.Ptr(), other_size);
103
+
104
+ // now copy over all of the segments to the new block
105
+ // move over the uninitialized regions
106
+ for (auto &region : other.uninitialized_regions) {
107
+ region.start += offset;
108
+ region.end += offset;
109
+ uninitialized_regions.push_back(region);
99
110
  }
100
111
 
101
- void AddSegmentToTail(ColumnData *data, ColumnSegment *segment, uint32_t offset_in_block) {
102
- tail_segments.push_back({data, segment, offset_in_block});
112
+ // move over the segments
113
+ for (auto &segment : other.segments) {
114
+ AddSegmentToTail(segment.data, segment.segment, segment.offset_in_block + offset);
103
115
  }
104
- };
116
+ other.Clear();
117
+ }
118
+
119
+ void PartialBlockForCheckpoint::AddSegmentToTail(ColumnData &data, ColumnSegment &segment, uint32_t offset_in_block) {
120
+ segments.emplace_back(data, segment, offset_in_block);
121
+ }
105
122
 
106
123
  void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_t segment_size) {
107
124
  D_ASSERT(segment_size <= Storage::BLOCK_SIZE);
@@ -128,14 +145,14 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
128
145
  if (allocation.partial_block) {
129
146
  // Use an existing block.
130
147
  D_ASSERT(offset_in_block > 0);
131
- auto pstate = (PartialBlockForCheckpoint *)allocation.partial_block.get();
148
+ auto &pstate = allocation.partial_block->Cast<PartialBlockForCheckpoint>();
132
149
  // pin the source block
133
150
  auto old_handle = buffer_manager.Pin(segment->block);
134
151
  // pin the target block
135
- auto new_handle = buffer_manager.Pin(pstate->first_segment->block);
152
+ auto new_handle = buffer_manager.Pin(pstate.block);
136
153
  // memcpy the contents of the old block to the new block
137
154
  memcpy(new_handle.Ptr() + offset_in_block, old_handle.Ptr(), segment_size);
138
- pstate->AddSegmentToTail(&column_data, segment.get(), offset_in_block);
155
+ pstate.AddSegmentToTail(column_data, *segment, offset_in_block);
139
156
  } else {
140
157
  // Create a new block for future reuse.
141
158
  if (segment->SegmentSize() != Storage::BLOCK_SIZE) {
@@ -146,7 +163,7 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
146
163
  }
147
164
  D_ASSERT(offset_in_block == 0);
148
165
  allocation.partial_block = make_uniq<PartialBlockForCheckpoint>(
149
- &column_data, segment.get(), *allocation.block_manager, allocation.state);
166
+ column_data, *segment, *allocation.block_manager, allocation.state);
150
167
  }
151
168
  // Writer will decide whether to reuse this block.
152
169
  partial_block_manager.RegisterPartialBlock(std::move(allocation));
@@ -180,13 +197,4 @@ void ColumnCheckpointState::WriteDataPointers(RowGroupWriter &writer) {
180
197
  writer.WriteColumnDataPointers(*this);
181
198
  }
182
199
 
183
- void ColumnCheckpointState::GetBlockIds(unordered_set<block_id_t> &result) {
184
- for (auto &pointer : data_pointers) {
185
- if (pointer.block_pointer.block_id == INVALID_BLOCK) {
186
- continue;
187
- }
188
- result.insert(pointer.block_pointer.block_id);
189
- }
190
- }
191
-
192
200
  } // namespace duckdb
@@ -21,7 +21,7 @@
21
21
  namespace duckdb {
22
22
 
23
23
  ColumnData::ColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
24
- LogicalType type_p, ColumnData *parent)
24
+ LogicalType type_p, optional_ptr<ColumnData> parent)
25
25
  : start(start_row), count(0), block_manager(block_manager), info(info), column_index(column_index),
26
26
  type(std::move(type_p)), parent(parent), version(0) {
27
27
  if (!parent) {
@@ -29,24 +29,17 @@ ColumnData::ColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t c
29
29
  }
30
30
  }
31
31
 
32
- ColumnData::ColumnData(ColumnData &other, idx_t start, ColumnData *parent)
33
- : start(start), count(other.count), block_manager(other.block_manager), info(other.info),
34
- column_index(other.column_index), type(std::move(other.type)), parent(parent),
35
- version(parent ? parent->version + 1 : 0) {
36
- if (other.updates) {
37
- updates = make_uniq<UpdateSegment>(*other.updates, *this);
38
- }
39
- if (other.stats) {
40
- stats = make_uniq<SegmentStatistics>(other.stats->statistics.Copy());
41
- }
32
+ ColumnData::~ColumnData() {
33
+ }
34
+
35
+ void ColumnData::SetStart(idx_t new_start) {
36
+ this->start = new_start;
42
37
  idx_t offset = 0;
43
- for (auto &segment : other.data.Segments()) {
44
- this->data.AppendSegment(ColumnSegment::CreateSegment(segment, start + offset));
38
+ for (auto &segment : data.Segments()) {
39
+ segment.start = start + offset;
45
40
  offset += segment.count;
46
41
  }
47
- }
48
-
49
- ColumnData::~ColumnData() {
42
+ data.Reinitialize();
50
43
  }
51
44
 
52
45
  DatabaseInstance &ColumnData::GetDatabase() const {
@@ -278,8 +271,8 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) {
278
271
  AppendTransientSegment(l, start);
279
272
  }
280
273
  auto segment = data.GetLastSegment(l);
281
- if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
282
- // no transient segments yet
274
+ if (segment->segment_type == ColumnSegmentType::PERSISTENT || !segment->function.get().init_append) {
275
+ // we cannot append to this segment - append a new segment
283
276
  auto total_rows = segment->start + segment->count;
284
277
  AppendTransientSegment(l, total_rows);
285
278
  state.current = data.GetLastSegment(l);
@@ -418,9 +411,9 @@ unique_ptr<ColumnCheckpointState> ColumnData::CreateCheckpointState(RowGroup &ro
418
411
  return make_uniq<ColumnCheckpointState>(row_group, *this, partial_block_manager);
419
412
  }
420
413
 
421
- void ColumnData::CheckpointScan(ColumnSegment *segment, ColumnScanState &state, idx_t row_group_start, idx_t count,
414
+ void ColumnData::CheckpointScan(ColumnSegment &segment, ColumnScanState &state, idx_t row_group_start, idx_t count,
422
415
  Vector &scan_vector) {
423
- segment->Scan(state, count, scan_vector, 0, true);
416
+ segment.Scan(state, count, scan_vector, 0, true);
424
417
  if (updates) {
425
418
  scan_vector.Flatten(count);
426
419
  updates->FetchCommittedRange(state.row_index - row_group_start, count, scan_vector);
@@ -489,7 +482,7 @@ void ColumnData::DeserializeColumn(Deserializer &source) {
489
482
 
490
483
  shared_ptr<ColumnData> ColumnData::Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
491
484
  idx_t start_row, Deserializer &source, const LogicalType &type,
492
- ColumnData *parent) {
485
+ optional_ptr<ColumnData> parent) {
493
486
  auto entry = ColumnData::CreateColumn(block_manager, info, column_index, start_row, type, parent);
494
487
  entry->DeserializeColumn(source);
495
488
  return entry;
@@ -565,48 +558,29 @@ void ColumnData::Verify(RowGroup &parent) {
565
558
 
566
559
  template <class RET, class OP>
567
560
  static RET CreateColumnInternal(BlockManager &block_manager, DataTableInfo &info, idx_t column_index, idx_t start_row,
568
- const LogicalType &type, ColumnData *parent) {
561
+ const LogicalType &type, optional_ptr<ColumnData> parent) {
569
562
  if (type.InternalType() == PhysicalType::STRUCT) {
570
563
  return OP::template Create<StructColumnData>(block_manager, info, column_index, start_row, type, parent);
571
564
  } else if (type.InternalType() == PhysicalType::LIST) {
572
565
  return OP::template Create<ListColumnData>(block_manager, info, column_index, start_row, type, parent);
573
566
  } else if (type.id() == LogicalTypeId::VALIDITY) {
574
- return OP::template Create<ValidityColumnData>(block_manager, info, column_index, start_row, parent);
567
+ return OP::template Create<ValidityColumnData>(block_manager, info, column_index, start_row, *parent);
575
568
  }
576
569
  return OP::template Create<StandardColumnData>(block_manager, info, column_index, start_row, type, parent);
577
570
  }
578
571
 
579
- template <class RET, class OP>
580
- static RET CreateColumnInternal(ColumnData &other, idx_t start_row, ColumnData *parent) {
581
- if (other.type.InternalType() == PhysicalType::STRUCT) {
582
- return OP::template Create<StructColumnData>(other, start_row, parent);
583
- } else if (other.type.InternalType() == PhysicalType::LIST) {
584
- return OP::template Create<ListColumnData>(other, start_row, parent);
585
- } else if (other.type.id() == LogicalTypeId::VALIDITY) {
586
- return OP::template Create<ValidityColumnData>(other, start_row, parent);
587
- }
588
- return OP::template Create<StandardColumnData>(other, start_row, parent);
589
- }
590
-
591
572
  shared_ptr<ColumnData> ColumnData::CreateColumn(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
592
- idx_t start_row, const LogicalType &type, ColumnData *parent) {
573
+ idx_t start_row, const LogicalType &type,
574
+ optional_ptr<ColumnData> parent) {
593
575
  return CreateColumnInternal<shared_ptr<ColumnData>, SharedConstructor>(block_manager, info, column_index, start_row,
594
576
  type, parent);
595
577
  }
596
578
 
597
- shared_ptr<ColumnData> ColumnData::CreateColumn(ColumnData &other, idx_t start_row, ColumnData *parent) {
598
- return CreateColumnInternal<shared_ptr<ColumnData>, SharedConstructor>(other, start_row, parent);
599
- }
600
-
601
579
  unique_ptr<ColumnData> ColumnData::CreateColumnUnique(BlockManager &block_manager, DataTableInfo &info,
602
580
  idx_t column_index, idx_t start_row, const LogicalType &type,
603
- ColumnData *parent) {
581
+ optional_ptr<ColumnData> parent) {
604
582
  return CreateColumnInternal<unique_ptr<ColumnData>, UniqueConstructor>(block_manager, info, column_index, start_row,
605
583
  type, parent);
606
584
  }
607
585
 
608
- unique_ptr<ColumnData> ColumnData::CreateColumnUnique(ColumnData &other, idx_t start_row, ColumnData *parent) {
609
- return CreateColumnInternal<unique_ptr<ColumnData>, UniqueConstructor>(other, start_row, parent);
610
- }
611
-
612
586
  } // namespace duckdb