duckdb 0.7.2-dev16.0 → 0.7.2-dev314.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-extension.cpp +2 -0
  4. package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
  5. package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
  6. package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
  7. package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
  8. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  9. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  10. package/src/duckdb/extension/parquet/parquet-extension.cpp +11 -2
  11. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  12. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
  13. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
  14. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
  15. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  16. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  17. package/src/duckdb/src/common/types/bit.cpp +95 -58
  18. package/src/duckdb/src/common/types/value.cpp +149 -53
  19. package/src/duckdb/src/common/types/vector.cpp +13 -10
  20. package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
  21. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  22. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  23. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
  24. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
  25. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1 -1
  26. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
  27. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
  28. package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
  29. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
  30. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +254 -0
  31. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
  32. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -16
  33. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
  34. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +16 -5
  35. package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
  36. package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
  37. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +99 -0
  38. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  39. package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
  40. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  41. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  42. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  43. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  44. package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
  45. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  46. package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
  47. package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
  48. package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
  49. package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
  50. package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
  51. package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
  52. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
  53. package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
  54. package/src/duckdb/src/function/scalar/operators/multiply.cpp +0 -6
  55. package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
  56. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  57. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  58. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  59. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  60. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  61. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  62. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
  63. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
  64. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
  65. package/src/duckdb/src/function/table/read_csv.cpp +9 -0
  66. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  67. package/src/duckdb/src/function/table_function.cpp +19 -0
  68. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
  69. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
  70. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
  71. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +5 -1
  73. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -8
  74. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -2
  75. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
  77. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
  78. package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
  79. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +12 -0
  80. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  81. package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
  82. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  83. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
  84. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
  85. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
  86. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  87. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
  88. package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
  89. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
  90. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
  91. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  92. package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
  93. package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
  94. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +76 -0
  95. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
  96. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +28 -0
  97. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  98. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -0
  99. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
  100. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
  101. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  102. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  103. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  104. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  105. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  106. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  107. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  108. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  109. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  110. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
  111. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  112. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
  113. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
  115. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  117. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  119. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  120. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  122. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
  123. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  124. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
  125. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +2 -0
  126. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  127. package/src/duckdb/src/include/duckdb.h +49 -1
  128. package/src/duckdb/src/include/duckdb.hpp +0 -1
  129. package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
  130. package/src/duckdb/src/main/capi/result-c.cpp +27 -1
  131. package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
  132. package/src/duckdb/src/main/client_context.cpp +8 -1
  133. package/src/duckdb/src/main/config.cpp +66 -1
  134. package/src/duckdb/src/main/database.cpp +10 -2
  135. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +98 -67
  136. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +16 -3
  137. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  138. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  139. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  140. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  141. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  142. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  143. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  144. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  145. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  146. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  147. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
  148. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
  149. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  150. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
  151. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
  152. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
  153. package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
  154. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  155. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
  156. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +8 -0
  157. package/src/duckdb/src/parser/query_node.cpp +1 -1
  158. package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
  159. package/src/duckdb/src/parser/tableref/pivotref.cpp +296 -0
  160. package/src/duckdb/src/parser/tableref.cpp +3 -0
  161. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
  162. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
  163. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
  164. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
  165. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +150 -0
  166. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
  167. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +1 -1
  168. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
  169. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +105 -0
  170. package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
  171. package/src/duckdb/src/parser/transformer.cpp +15 -3
  172. package/src/duckdb/src/planner/bind_context.cpp +16 -0
  173. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -3
  174. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
  175. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +1 -1
  176. package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
  177. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
  178. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +365 -0
  179. package/src/duckdb/src/planner/binder.cpp +7 -1
  180. package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
  181. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  182. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  183. package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
  184. package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
  185. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  186. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
  187. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
  188. package/src/duckdb/src/storage/compression/bitpacking.cpp +28 -24
  189. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +43 -45
  190. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  191. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  192. package/src/duckdb/src/storage/compression/rle.cpp +19 -15
  193. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  194. package/src/duckdb/src/storage/data_table.cpp +4 -6
  195. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  196. package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
  197. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
  198. package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
  199. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
  200. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  201. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  202. package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
  203. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  204. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
  205. package/src/duckdb/src/storage/table/column_data.cpp +16 -11
  206. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
  207. package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
  208. package/src/duckdb/src/storage/table/list_column_data.cpp +39 -58
  209. package/src/duckdb/src/storage/table/row_group.cpp +24 -23
  210. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
  211. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
  212. package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
  213. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  214. package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
  215. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +3 -0
  216. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +34 -1
  217. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1020 -530
  218. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +7 -0
  219. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +23560 -22737
  220. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
  221. package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
  222. package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
  223. package/src/duckdb/ub_src_main_capi.cpp +2 -0
  224. package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
  225. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  226. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  227. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  228. package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
  229. package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
  230. package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
  231. package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
  232. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  233. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  234. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  235. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  236. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  237. package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
  238. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  239. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  240. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  241. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  242. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -1,5 +1,5 @@
1
1
  #include "duckdb/storage/table/list_column_data.hpp"
2
- #include "duckdb/storage/statistics/list_statistics.hpp"
2
+ #include "duckdb/storage/statistics/list_stats.hpp"
3
3
  #include "duckdb/transaction/transaction.hpp"
4
4
 
5
5
  namespace duckdb {
@@ -39,15 +39,14 @@ void ListColumnData::InitializeScan(ColumnScanState &state) {
39
39
  state.child_states.push_back(std::move(child_state));
40
40
  }
41
41
 
42
- list_entry_t ListColumnData::FetchListEntry(idx_t row_idx) {
42
+ uint64_t ListColumnData::FetchListOffset(idx_t row_idx) {
43
43
  auto segment = (ColumnSegment *)data.GetSegment(row_idx);
44
44
  ColumnFetchState fetch_state;
45
45
  Vector result(type, 1);
46
46
  segment->FetchRow(fetch_state, row_idx, result, 0);
47
47
 
48
48
  // initialize the child scan with the required offset
49
- auto list_data = FlatVector::GetData<list_entry_t>(result);
50
- return list_data[0];
49
+ return FlatVector::GetData<uint64_t>(result)[0];
51
50
  }
52
51
 
53
52
  void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
@@ -63,8 +62,7 @@ void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_
63
62
  state.child_states.push_back(std::move(validity_state));
64
63
 
65
64
  // we need to read the list at position row_idx to get the correct row offset of the child
66
- auto list_entry = FetchListEntry(row_idx);
67
- auto child_offset = list_entry.offset;
65
+ auto child_offset = row_idx == start ? 0 : FetchListOffset(row_idx - 1);
68
66
 
69
67
  D_ASSERT(child_offset <= child_column->GetMaxEntry());
70
68
  ColumnScanState child_state;
@@ -89,26 +87,26 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co
89
87
  // updates not supported for lists
90
88
  D_ASSERT(!updates);
91
89
 
92
- idx_t scan_count = ScanVector(state, result, count);
90
+ Vector offset_vector(LogicalType::UBIGINT, count);
91
+ idx_t scan_count = ScanVector(state, offset_vector, count);
93
92
  D_ASSERT(scan_count > 0);
94
93
  validity.ScanCount(state.child_states[0], result, count);
95
94
 
96
- auto data = FlatVector::GetData<list_entry_t>(result);
97
- auto first_entry = data[0];
95
+ auto data = FlatVector::GetData<uint64_t>(offset_vector);
98
96
  auto last_entry = data[scan_count - 1];
99
97
 
100
- #ifdef DEBUG
101
- for (idx_t i = 1; i < scan_count; i++) {
102
- D_ASSERT(data[i].offset == data[i - 1].offset + data[i - 1].length);
103
- }
104
- #endif
105
98
  // shift all offsets so they are 0 at the first entry
99
+ auto result_data = FlatVector::GetData<list_entry_t>(result);
100
+ auto base_offset = state.last_offset;
101
+ idx_t current_offset = 0;
106
102
  for (idx_t i = 0; i < scan_count; i++) {
107
- data[i].offset -= first_entry.offset;
103
+ result_data[i].offset = current_offset;
104
+ result_data[i].length = data[i] - current_offset - base_offset;
105
+ current_offset += result_data[i].length;
108
106
  }
109
107
 
110
- D_ASSERT(last_entry.offset >= first_entry.offset);
111
- idx_t child_scan_count = last_entry.offset + last_entry.length - first_entry.offset;
108
+ D_ASSERT(last_entry >= base_offset);
109
+ idx_t child_scan_count = last_entry - base_offset;
112
110
  ListVector::Reserve(result, child_scan_count);
113
111
 
114
112
  if (child_scan_count > 0) {
@@ -118,6 +116,7 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co
118
116
  child_column->start + child_column->GetMaxEntry());
119
117
  child_column->ScanCount(state.child_states[1], child_entry, child_scan_count);
120
118
  }
119
+ state.last_offset = last_entry;
121
120
 
122
121
  ListVector::SetListSize(result, child_scan_count);
123
122
  return scan_count;
@@ -130,19 +129,19 @@ void ListColumnData::Skip(ColumnScanState &state, idx_t count) {
130
129
  // we need to read the list entries/offsets to figure out how much to skip
131
130
  // note that we only need to read the first and last entry
132
131
  // however, let's just read all "count" entries for now
133
- auto data = unique_ptr<list_entry_t[]>(new list_entry_t[count]);
134
- Vector result(type, (data_ptr_t)data.get());
132
+ Vector result(LogicalType::UBIGINT, count);
135
133
  idx_t scan_count = ScanVector(state, result, count);
136
134
  if (scan_count == 0) {
137
135
  return;
138
136
  }
139
137
 
140
- auto &first_entry = data[0];
141
- auto &last_entry = data[scan_count - 1];
142
- idx_t child_scan_count = last_entry.offset + last_entry.length - first_entry.offset;
138
+ auto data = FlatVector::GetData<uint64_t>(result);
139
+ auto last_entry = data[scan_count - 1];
140
+ idx_t child_scan_count = last_entry - state.last_offset;
143
141
  if (child_scan_count == 0) {
144
142
  return;
145
143
  }
144
+ state.last_offset = last_entry;
146
145
 
147
146
  // skip the child state forward by the child_scan_count
148
147
  child_column->Skip(state.child_states[1], child_scan_count);
@@ -163,10 +162,8 @@ void ListColumnData::InitializeAppend(ColumnAppendState &state) {
163
162
  state.child_appends.push_back(std::move(child_append_state));
164
163
  }
165
164
 
166
- void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, Vector &vector, idx_t count) {
165
+ void ListColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) {
167
166
  D_ASSERT(count > 0);
168
- auto &stats = (ListStatistics &)stats_p;
169
-
170
167
  UnifiedVectorFormat list_data;
171
168
  vector.ToUnifiedFormat(count, list_data);
172
169
  auto &list_validity = list_data.validity;
@@ -177,8 +174,8 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
177
174
  idx_t child_count = 0;
178
175
 
179
176
  ValidityMask append_mask(count);
180
- auto append_offsets = unique_ptr<list_entry_t[]>(new list_entry_t[count]);
181
- bool child_contiguous = false;
177
+ auto append_offsets = unique_ptr<uint64_t[]>(new uint64_t[count]);
178
+ bool child_contiguous = true;
182
179
  for (idx_t i = 0; i < count; i++) {
183
180
  auto input_idx = list_data.sel->get_index(i);
184
181
  if (list_validity.RowIsValid(input_idx)) {
@@ -186,17 +183,11 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
186
183
  if (input_list.offset != child_count) {
187
184
  child_contiguous = false;
188
185
  }
189
- append_offsets[i].offset = start_offset + child_count;
190
- append_offsets[i].length = input_list.length;
186
+ append_offsets[i] = start_offset + child_count + input_list.length;
191
187
  child_count += input_list.length;
192
188
  } else {
193
189
  append_mask.SetInvalid(i);
194
- if (i > 0) {
195
- append_offsets[i].offset = append_offsets[i - 1].offset + append_offsets[i - 1].length;
196
- } else {
197
- append_offsets[i].offset = start_offset;
198
- }
199
- append_offsets[i].length = 0;
190
+ append_offsets[i] = start_offset + child_count;
200
191
  }
201
192
  }
202
193
  auto &list_child = ListVector::GetEntry(vector);
@@ -218,27 +209,19 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
218
209
  D_ASSERT(current_count == child_count);
219
210
  child_vector.Slice(list_child, child_sel, child_count);
220
211
  }
221
- #ifdef DEBUG
222
- D_ASSERT(append_offsets[0].offset == start_offset);
223
- for (idx_t i = 1; i < count; i++) {
224
- D_ASSERT(append_offsets[i].offset == append_offsets[i - 1].offset + append_offsets[i - 1].length);
225
- }
226
- D_ASSERT(append_offsets[count - 1].offset + append_offsets[count - 1].length - append_offsets[0].offset ==
227
- child_count);
228
- #endif
229
212
 
230
213
  UnifiedVectorFormat vdata;
231
- vdata.validity = append_mask;
232
214
  vdata.sel = FlatVector::IncrementalSelectionVector();
233
215
  vdata.data = (data_ptr_t)append_offsets.get();
234
216
 
235
217
  // append the list offsets
236
218
  ColumnData::AppendData(stats, state, vdata, count);
237
219
  // append the validity data
238
- validity.AppendData(*stats.validity_stats, state.child_appends[0], vdata, count);
220
+ vdata.validity = append_mask;
221
+ validity.AppendData(stats, state.child_appends[0], vdata, count);
239
222
  // append the child vector
240
223
  if (child_count > 0) {
241
- child_column->Append(*stats.child_stats, state.child_appends[1], child_vector, child_count);
224
+ child_column->Append(ListStats::GetChildStats(stats), state.child_appends[1], child_vector, child_count);
242
225
  }
243
226
  }
244
227
 
@@ -248,8 +231,8 @@ void ListColumnData::RevertAppend(row_t start_row) {
248
231
  auto column_count = GetMaxEntry();
249
232
  if (column_count > start) {
250
233
  // revert append in the child column
251
- auto list_entry = FetchListEntry(column_count - 1);
252
- child_column->RevertAppend(list_entry.offset + list_entry.length);
234
+ auto list_offset = FetchListOffset(column_count - 1);
235
+ child_column->RevertAppend(list_offset);
253
236
  }
254
237
  }
255
238
 
@@ -281,19 +264,18 @@ void ListColumnData::FetchRow(TransactionData transaction, ColumnFetchState &sta
281
264
  auto child_state = make_unique<ColumnFetchState>();
282
265
  state.child_states.push_back(std::move(child_state));
283
266
  }
284
- // fetch the list_entry_t and the validity mask for that list
285
- auto segment = (ColumnSegment *)data.GetSegment(row_id);
286
267
 
287
268
  // now perform the fetch within the segment
288
- segment->FetchRow(state, row_id, result, result_idx);
269
+ auto start_offset = idx_t(row_id) == this->start ? 0 : FetchListOffset(row_id - 1);
270
+ auto end_offset = FetchListOffset(row_id);
289
271
  validity.FetchRow(transaction, *state.child_states[0], row_id, result, result_idx);
290
272
 
291
273
  auto &validity = FlatVector::Validity(result);
292
274
  auto list_data = FlatVector::GetData<list_entry_t>(result);
293
275
  auto &list_entry = list_data[result_idx];
294
- auto original_offset = list_entry.offset;
295
276
  // set the list entry offset to the size of the current list
296
277
  list_entry.offset = ListVector::GetListSize(result);
278
+ list_entry.length = end_offset - start_offset;
297
279
  if (!validity.RowIsValid(result_idx)) {
298
280
  // the list is NULL! no need to fetch the child
299
281
  D_ASSERT(list_entry.length == 0);
@@ -307,7 +289,7 @@ void ListColumnData::FetchRow(TransactionData transaction, ColumnFetchState &sta
307
289
  auto &child_type = ListType::GetChildType(result.GetType());
308
290
  Vector child_scan(child_type, child_scan_count);
309
291
  // seek the scan towards the specified position and read [length] entries
310
- child_column->InitializeScanWithOffset(*child_state, start + original_offset);
292
+ child_column->InitializeScanWithOffset(*child_state, start + start_offset);
311
293
  D_ASSERT(child_type.InternalType() == PhysicalType::STRUCT ||
312
294
  child_state->row_index + child_scan_count - this->start <= child_column->GetMaxEntry());
313
295
  child_column->ScanCount(*child_state, child_scan, child_scan_count);
@@ -324,7 +306,7 @@ void ListColumnData::CommitDropColumn() {
324
306
  struct ListColumnCheckpointState : public ColumnCheckpointState {
325
307
  ListColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, PartialBlockManager &partial_block_manager)
326
308
  : ColumnCheckpointState(row_group, column_data, partial_block_manager) {
327
- global_stats = make_unique<ListStatistics>(column_data.type);
309
+ global_stats = ListStats::CreateEmpty(column_data.type).ToUnique();
328
310
  }
329
311
 
330
312
  unique_ptr<ColumnCheckpointState> validity_state;
@@ -333,10 +315,8 @@ struct ListColumnCheckpointState : public ColumnCheckpointState {
333
315
  public:
334
316
  unique_ptr<BaseStatistics> GetStatistics() override {
335
317
  auto stats = global_stats->Copy();
336
- auto &list_stats = (ListStatistics &)*stats;
337
- stats->validity_stats = validity_state->GetStatistics();
338
- list_stats.child_stats = child_state->GetStatistics();
339
- return stats;
318
+ ListStats::SetChildStats(stats, child_state->GetStatistics());
319
+ return stats.ToUnique();
340
320
  }
341
321
 
342
322
  void WriteDataPointers(RowGroupWriter &writer) override {
@@ -376,6 +356,7 @@ void ListColumnData::DeserializeColumn(Deserializer &source) {
376
356
  }
377
357
 
378
358
  void ListColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, TableStorageInfo &result) {
359
+ ColumnData::GetStorageInfo(row_group_index, col_path, result);
379
360
  col_path.push_back(0);
380
361
  validity.GetStorageInfo(row_group_index, col_path, result);
381
362
  col_path.back() = 1;
@@ -46,8 +46,7 @@ RowGroup::RowGroup(AttachedDatabase &db, BlockManager &block_manager, DataTableI
46
46
 
47
47
  // set up the statistics
48
48
  for (auto &stats : pointer.statistics) {
49
- auto stats_type = stats->type;
50
- this->stats.push_back(make_shared<SegmentStatistics>(stats_type, std::move(stats)));
49
+ this->stats.emplace_back(std::move(stats));
51
50
  }
52
51
  this->version_info = std::move(pointer.versions);
53
52
 
@@ -88,7 +87,7 @@ void RowGroup::InitializeEmpty(const vector<LogicalType> &types) {
88
87
  // set up the segment trees for the column segments
89
88
  for (idx_t i = 0; i < types.size(); i++) {
90
89
  auto column_data = ColumnData::CreateColumn(block_manager, GetTableInfo(), i, start, types[i]);
91
- stats.push_back(make_shared<SegmentStatistics>(types[i]));
90
+ stats.emplace_back(types[i]);
92
91
  columns.push_back(std::move(column_data));
93
92
  }
94
93
  }
@@ -158,7 +157,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
158
157
  InitializeScan(scan_state);
159
158
 
160
159
  Vector append_vector(target_type);
161
- auto altered_col_stats = make_shared<SegmentStatistics>(target_type);
160
+ SegmentStatistics altered_col_stats(target_type);
162
161
  while (true) {
163
162
  // scan the table
164
163
  scan_chunk.Reset();
@@ -168,7 +167,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
168
167
  }
169
168
  // execute the expression
170
169
  executor.ExecuteExpression(scan_chunk, append_vector);
171
- column_data->Append(*altered_col_stats->statistics, append_state, append_vector, scan_chunk.size());
170
+ column_data->Append(altered_col_stats.statistics, append_state, append_vector, scan_chunk.size());
172
171
  }
173
172
 
174
173
  // set up the row_group based on this row_group
@@ -178,11 +177,11 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
178
177
  if (i == changed_idx) {
179
178
  // this is the altered column: use the new column
180
179
  row_group->columns.push_back(std::move(column_data));
181
- row_group->stats.push_back(std::move(altered_col_stats));
180
+ row_group->stats.push_back(std::move(altered_col_stats)); // NOLINT: false positive
182
181
  } else {
183
182
  // this column was not altered: use the data directly
184
183
  row_group->columns.push_back(columns[i]);
185
- row_group->stats.push_back(stats[i]);
184
+ row_group->stats.emplace_back(stats[i].statistics.Copy());
186
185
  }
187
186
  }
188
187
  row_group->Verify();
@@ -196,8 +195,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
196
195
  // construct a new column data for the new column
197
196
  auto added_column =
198
197
  ColumnData::CreateColumn(block_manager, GetTableInfo(), columns.size(), start, new_column.Type());
199
- auto added_col_stats = make_shared<SegmentStatistics>(
200
- new_column.Type(), BaseStatistics::CreateEmpty(new_column.Type(), StatisticsType::LOCAL_STATS));
198
+ SegmentStatistics added_col_stats(new_column.Type());
201
199
 
202
200
  idx_t rows_to_write = this->count;
203
201
  if (rows_to_write > 0) {
@@ -211,7 +209,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
211
209
  dummy_chunk.SetCardinality(rows_in_this_vector);
212
210
  executor.ExecuteExpression(dummy_chunk, result);
213
211
  }
214
- added_column->Append(*added_col_stats->statistics, state, result, rows_in_this_vector);
212
+ added_column->Append(added_col_stats.statistics, state, result, rows_in_this_vector);
215
213
  }
216
214
  }
217
215
 
@@ -219,7 +217,9 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
219
217
  auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
220
218
  row_group->version_info = version_info;
221
219
  row_group->columns = columns;
222
- row_group->stats = stats;
220
+ for (auto &stat : stats) {
221
+ row_group->stats.emplace_back(stat.statistics.Copy());
222
+ }
223
223
  // now add the new column
224
224
  row_group->columns.push_back(std::move(added_column));
225
225
  row_group->stats.push_back(std::move(added_col_stats));
@@ -236,7 +236,9 @@ unique_ptr<RowGroup> RowGroup::RemoveColumn(idx_t removed_column) {
236
236
  auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
237
237
  row_group->version_info = version_info;
238
238
  row_group->columns = columns;
239
- row_group->stats = stats;
239
+ for (auto &stat : stats) {
240
+ row_group->stats.emplace_back(stat.statistics.Copy());
241
+ }
240
242
  // now remove the column
241
243
  row_group->columns.erase(row_group->columns.begin() + removed_column);
242
244
  row_group->stats.erase(row_group->stats.begin() + removed_column);
@@ -275,7 +277,7 @@ bool RowGroup::CheckZonemap(TableFilterSet &filters, const vector<column_t> &col
275
277
  auto &filter = entry.second;
276
278
  auto base_column_index = column_ids[column_index];
277
279
 
278
- auto propagate_result = filter->CheckStatistics(*stats[base_column_index]->statistics);
280
+ auto propagate_result = filter->CheckStatistics(stats[base_column_index].statistics);
279
281
  if (propagate_result == FilterPropagateResult::FILTER_ALWAYS_FALSE ||
280
282
  propagate_result == FilterPropagateResult::FILTER_FALSE_OR_NULL) {
281
283
  return false;
@@ -628,7 +630,7 @@ void RowGroup::InitializeAppend(RowGroupAppendState &append_state) {
628
630
  void RowGroup::Append(RowGroupAppendState &state, DataChunk &chunk, idx_t append_count) {
629
631
  // append to the current row_group
630
632
  for (idx_t i = 0; i < columns.size(); i++) {
631
- columns[i]->Append(*stats[i]->statistics, state.states[i], chunk.data[i], append_count);
633
+ columns[i]->Append(stats[i].statistics, state.states[i], chunk.data[i], append_count);
632
634
  }
633
635
  state.offset_in_row_group += append_count;
634
636
  }
@@ -671,21 +673,21 @@ unique_ptr<BaseStatistics> RowGroup::GetStatistics(idx_t column_idx) {
671
673
  D_ASSERT(column_idx < stats.size());
672
674
 
673
675
  lock_guard<mutex> slock(stats_lock);
674
- return stats[column_idx]->statistics->Copy();
676
+ return stats[column_idx].statistics.ToUnique();
675
677
  }
676
678
 
677
679
  void RowGroup::MergeStatistics(idx_t column_idx, const BaseStatistics &other) {
678
680
  D_ASSERT(column_idx < stats.size());
679
681
 
680
682
  lock_guard<mutex> slock(stats_lock);
681
- stats[column_idx]->statistics->Merge(other);
683
+ stats[column_idx].statistics.Merge(other);
682
684
  }
683
685
 
684
686
  void RowGroup::MergeIntoStatistics(idx_t column_idx, BaseStatistics &other) {
685
687
  D_ASSERT(column_idx < stats.size());
686
688
 
687
689
  lock_guard<mutex> slock(stats_lock);
688
- other.Merge(*stats[column_idx]->statistics);
690
+ other.Merge(stats[column_idx].statistics);
689
691
  }
690
692
 
691
693
  RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
@@ -711,14 +713,14 @@ RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
711
713
  auto stats = checkpoint_state->GetStatistics();
712
714
  D_ASSERT(stats);
713
715
 
714
- result.statistics.push_back(std::move(stats));
716
+ result.statistics.push_back(stats->Copy());
715
717
  result.states.push_back(std::move(checkpoint_state));
716
718
  }
717
719
  D_ASSERT(result.states.size() == result.statistics.size());
718
720
  return result;
719
721
  }
720
722
 
721
- RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats) {
723
+ RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats) {
722
724
  RowGroupPointer row_group_pointer;
723
725
 
724
726
  vector<CompressionType> compression_types;
@@ -728,7 +730,7 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, vector<unique_ptr<B
728
730
  }
729
731
  auto result = WriteToDisk(writer.GetPartialBlockManager(), compression_types);
730
732
  for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) {
731
- global_stats[column_idx]->Merge(*result.statistics[column_idx]);
733
+ global_stats.GetStats(column_idx).Statistics().Merge(result.statistics[column_idx]);
732
734
  }
733
735
  row_group_pointer.statistics = std::move(result.statistics);
734
736
 
@@ -805,7 +807,7 @@ void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &main_serializer)
805
807
  writer.WriteField<uint64_t>(pointer.tuple_count);
806
808
  auto &serializer = writer.GetSerializer();
807
809
  for (auto &stats : pointer.statistics) {
808
- stats->Serialize(serializer);
810
+ stats.Serialize(serializer);
809
811
  }
810
812
  for (auto &data_pointer : pointer.data_pointers) {
811
813
  serializer.Write<block_id_t>(data_pointer.block_id);
@@ -828,8 +830,7 @@ RowGroupPointer RowGroup::Deserialize(Deserializer &main_source, const ColumnLis
828
830
 
829
831
  auto &source = reader.GetSource();
830
832
  for (auto &col : columns.Physical()) {
831
- auto stats = BaseStatistics::Deserialize(source, col.Type());
832
- result.statistics.push_back(std::move(stats));
833
+ result.statistics.push_back(BaseStatistics::Deserialize(source, col.Type()));
833
834
  }
834
835
  for (idx_t i = 0; i < columns.PhysicalColumnCount(); i++) {
835
836
  BlockPointer pointer;
@@ -280,7 +280,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
280
280
  // merge the stats
281
281
  auto stats_lock = stats.GetLock();
282
282
  for (idx_t i = 0; i < types.size(); i++) {
283
- current_row_group->MergeIntoStatistics(i, *stats.GetStats(i).stats);
283
+ current_row_group->MergeIntoStatistics(i, stats.GetStats(i).Statistics());
284
284
  }
285
285
  }
286
286
  remaining -= append_count;
@@ -319,11 +319,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
319
319
  state.current_row += append_count;
320
320
  auto stats_lock = stats.GetLock();
321
321
  for (idx_t col_idx = 0; col_idx < types.size(); col_idx++) {
322
- auto type = types[col_idx].InternalType();
323
- if (type == PhysicalType::LIST || type == PhysicalType::STRUCT) {
324
- continue;
325
- }
326
- stats.GetStats(col_idx).stats->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
322
+ stats.GetStats(col_idx).UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
327
323
  }
328
324
  return new_row_group;
329
325
  }
@@ -513,13 +509,13 @@ void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_i
513
509
  auto row_group = (RowGroup *)row_groups->GetSegment(first_id);
514
510
  row_group->UpdateColumn(transaction, updates, row_ids, column_path);
515
511
 
516
- row_group->MergeIntoStatistics(primary_column_idx, *stats.GetStats(primary_column_idx).stats);
512
+ row_group->MergeIntoStatistics(primary_column_idx, stats.GetStats(primary_column_idx).Statistics());
517
513
  }
518
514
 
519
515
  //===--------------------------------------------------------------------===//
520
516
  // Checkpoint
521
517
  //===--------------------------------------------------------------------===//
522
- void RowGroupCollection::Checkpoint(TableDataWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats) {
518
+ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &global_stats) {
523
519
  for (auto row_group = (RowGroup *)row_groups->GetRootSegment(); row_group;
524
520
  row_group = (RowGroup *)row_group->Next()) {
525
521
  auto rowg_writer = writer.GetRowGroupWriter(*row_group);
@@ -590,7 +586,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont
590
586
  while (current_row_group) {
591
587
  auto new_row_group = current_row_group->AddColumn(new_column, executor, default_value, default_vector);
592
588
  // merge in the statistics
593
- new_row_group->MergeIntoStatistics(new_column_idx, *new_column_stats.stats);
589
+ new_row_group->MergeIntoStatistics(new_column_idx, new_column_stats.Statistics());
594
590
 
595
591
  result->row_groups->AppendSegment(std::move(new_row_group));
596
592
  current_row_group = (RowGroup *)current_row_group->Next();
@@ -651,7 +647,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(ClientContext &cont
651
647
  while (current_row_group) {
652
648
  auto new_row_group = current_row_group->AlterType(target_type, changed_idx, executor,
653
649
  scan_state.table_state.row_group_state, scan_chunk);
654
- new_row_group->MergeIntoStatistics(changed_idx, *changed_stats.stats);
650
+ new_row_group->MergeIntoStatistics(changed_idx, changed_stats.Statistics());
655
651
  result->row_groups->AppendSegment(std::move(new_row_group));
656
652
  current_row_group = (RowGroup *)current_row_group->Next();
657
653
  }
@@ -696,14 +692,18 @@ void RowGroupCollection::VerifyNewConstraint(DataTable &parent, const BoundConst
696
692
  //===--------------------------------------------------------------------===//
697
693
  // Statistics
698
694
  //===--------------------------------------------------------------------===//
695
+ void RowGroupCollection::CopyStats(TableStatistics &other_stats) {
696
+ stats.CopyStats(other_stats);
697
+ }
698
+
699
699
  unique_ptr<BaseStatistics> RowGroupCollection::CopyStats(column_t column_id) {
700
700
  return stats.CopyStats(column_id);
701
701
  }
702
702
 
703
- void RowGroupCollection::SetStatistics(column_t column_id, const std::function<void(BaseStatistics &)> &set_fun) {
703
+ void RowGroupCollection::SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats) {
704
704
  D_ASSERT(column_id != COLUMN_IDENTIFIER_ROW_ID);
705
705
  auto stats_guard = stats.GetLock();
706
- set_fun(*stats.GetStats(column_id).stats);
706
+ stats.GetStats(column_id).SetDistinct(std::move(distinct_stats));
707
707
  }
708
708
 
709
709
  } // namespace duckdb
@@ -24,7 +24,7 @@ bool StandardColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filte
24
24
  return true;
25
25
  }
26
26
  state.segment_checked = true;
27
- auto prune_result = filter.CheckStatistics(*state.current->stats.statistics);
27
+ auto prune_result = filter.CheckStatistics(state.current->stats.statistics);
28
28
  if (prune_result != FilterPropagateResult::FILTER_ALWAYS_FALSE) {
29
29
  return true;
30
30
  }
@@ -91,8 +91,7 @@ void StandardColumnData::InitializeAppend(ColumnAppendState &state) {
91
91
  void StandardColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata,
92
92
  idx_t count) {
93
93
  ColumnData::AppendData(stats, state, vdata, count);
94
-
95
- validity.AppendData(*stats.validity_stats, state.child_appends[0], vdata, count);
94
+ validity.AppendData(stats, state.child_appends[0], vdata, count);
96
95
  }
97
96
 
98
97
  void StandardColumnData::RevertAppend(row_t start_row) {
@@ -136,9 +135,11 @@ unique_ptr<BaseStatistics> StandardColumnData::GetUpdateStatistics() {
136
135
  return nullptr;
137
136
  }
138
137
  if (!stats) {
139
- stats = BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS);
138
+ stats = BaseStatistics::CreateEmpty(type).ToUnique();
139
+ }
140
+ if (validity_stats) {
141
+ stats->Merge(*validity_stats);
140
142
  }
141
- stats->validity_stats = std::move(validity_stats);
142
143
  return stats;
143
144
  }
144
145
 
@@ -169,7 +170,6 @@ struct StandardColumnCheckpointState : public ColumnCheckpointState {
169
170
  public:
170
171
  unique_ptr<BaseStatistics> GetStatistics() override {
171
172
  D_ASSERT(global_stats);
172
- global_stats->validity_stats = validity_state->GetStatistics();
173
173
  return std::move(global_stats);
174
174
  }
175
175
 
@@ -1,5 +1,5 @@
1
1
  #include "duckdb/storage/table/struct_column_data.hpp"
2
- #include "duckdb/storage/statistics/struct_statistics.hpp"
2
+ #include "duckdb/storage/statistics/struct_stats.hpp"
3
3
  #include "duckdb/transaction/transaction.hpp"
4
4
 
5
5
  namespace duckdb {
@@ -127,12 +127,12 @@ void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, V
127
127
  vector.Flatten(count);
128
128
 
129
129
  // append the null values
130
- validity.Append(*stats.validity_stats, state.child_appends[0], vector, count);
130
+ validity.Append(stats, state.child_appends[0], vector, count);
131
131
 
132
- auto &struct_stats = (StructStatistics &)stats;
133
132
  auto &child_entries = StructVector::GetEntries(vector);
134
133
  for (idx_t i = 0; i < child_entries.size(); i++) {
135
- sub_columns[i]->Append(*struct_stats.child_stats[i], state.child_appends[i + 1], *child_entries[i], count);
134
+ sub_columns[i]->Append(StructStats::GetChildStats(stats, i), state.child_appends[i + 1], *child_entries[i],
135
+ count);
136
136
  }
137
137
  }
138
138
 
@@ -190,16 +190,18 @@ void StructColumnData::UpdateColumn(TransactionData transaction, const vector<co
190
190
 
191
191
  unique_ptr<BaseStatistics> StructColumnData::GetUpdateStatistics() {
192
192
  // check if any child column has updates
193
- auto stats = BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS);
194
- auto &struct_stats = (StructStatistics &)*stats;
195
- stats->validity_stats = validity.GetUpdateStatistics();
193
+ auto stats = BaseStatistics::CreateEmpty(type);
194
+ auto validity_stats = validity.GetUpdateStatistics();
195
+ if (validity_stats) {
196
+ stats.Merge(*validity_stats);
197
+ }
196
198
  for (idx_t i = 0; i < sub_columns.size(); i++) {
197
199
  auto child_stats = sub_columns[i]->GetUpdateStatistics();
198
200
  if (child_stats) {
199
- struct_stats.child_stats[i] = std::move(child_stats);
201
+ StructStats::SetChildStats(stats, i, std::move(child_stats));
200
202
  }
201
203
  }
202
- return stats;
204
+ return stats.ToUnique();
203
205
  }
204
206
 
205
207
  void StructColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
@@ -230,7 +232,7 @@ struct StructColumnCheckpointState : public ColumnCheckpointState {
230
232
  StructColumnCheckpointState(RowGroup &row_group, ColumnData &column_data,
231
233
  PartialBlockManager &partial_block_manager)
232
234
  : ColumnCheckpointState(row_group, column_data, partial_block_manager) {
233
- global_stats = make_unique<StructStatistics>(column_data.type);
235
+ global_stats = StructStats::CreateEmpty(column_data.type).ToUnique();
234
236
  }
235
237
 
236
238
  unique_ptr<ColumnCheckpointState> validity_state;
@@ -238,14 +240,11 @@ struct StructColumnCheckpointState : public ColumnCheckpointState {
238
240
 
239
241
  public:
240
242
  unique_ptr<BaseStatistics> GetStatistics() override {
241
- auto stats = make_unique<StructStatistics>(column_data.type);
242
- D_ASSERT(stats->child_stats.size() == child_states.size());
243
- stats->validity_stats = validity_state->GetStatistics();
243
+ auto stats = StructStats::CreateEmpty(column_data.type);
244
244
  for (idx_t i = 0; i < child_states.size(); i++) {
245
- stats->child_stats[i] = child_states[i]->GetStatistics();
246
- D_ASSERT(stats->child_stats[i]);
245
+ StructStats::SetChildStats(stats, i, child_states[i]->GetStatistics());
247
246
  }
248
- return std::move(stats);
247
+ return stats.ToUnique();
249
248
  }
250
249
 
251
250
  void WriteDataPointers(RowGroupWriter &writer) override {