duckdb 0.7.2-dev16.0 → 0.7.2-dev314.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-extension.cpp +2 -0
  4. package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
  5. package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
  6. package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
  7. package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
  8. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  9. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  10. package/src/duckdb/extension/parquet/parquet-extension.cpp +11 -2
  11. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  12. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
  13. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
  14. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
  15. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  16. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  17. package/src/duckdb/src/common/types/bit.cpp +95 -58
  18. package/src/duckdb/src/common/types/value.cpp +149 -53
  19. package/src/duckdb/src/common/types/vector.cpp +13 -10
  20. package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
  21. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  22. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  23. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
  24. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
  25. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1 -1
  26. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
  27. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
  28. package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
  29. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
  30. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +254 -0
  31. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
  32. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -16
  33. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
  34. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +16 -5
  35. package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
  36. package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
  37. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +99 -0
  38. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  39. package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
  40. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  41. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  42. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  43. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  44. package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
  45. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  46. package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
  47. package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
  48. package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
  49. package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
  50. package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
  51. package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
  52. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
  53. package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
  54. package/src/duckdb/src/function/scalar/operators/multiply.cpp +0 -6
  55. package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
  56. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  57. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  58. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  59. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  60. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  61. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  62. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
  63. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
  64. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
  65. package/src/duckdb/src/function/table/read_csv.cpp +9 -0
  66. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  67. package/src/duckdb/src/function/table_function.cpp +19 -0
  68. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
  69. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
  70. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
  71. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +5 -1
  73. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -8
  74. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -2
  75. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
  77. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
  78. package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
  79. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +12 -0
  80. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  81. package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
  82. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  83. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
  84. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
  85. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
  86. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  87. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
  88. package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
  89. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
  90. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
  91. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  92. package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
  93. package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
  94. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +76 -0
  95. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
  96. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +28 -0
  97. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  98. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -0
  99. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
  100. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
  101. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  102. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  103. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  104. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  105. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  106. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  107. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  108. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  109. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  110. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
  111. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  112. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
  113. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
  115. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  117. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  119. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  120. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  122. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
  123. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  124. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
  125. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +2 -0
  126. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  127. package/src/duckdb/src/include/duckdb.h +49 -1
  128. package/src/duckdb/src/include/duckdb.hpp +0 -1
  129. package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
  130. package/src/duckdb/src/main/capi/result-c.cpp +27 -1
  131. package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
  132. package/src/duckdb/src/main/client_context.cpp +8 -1
  133. package/src/duckdb/src/main/config.cpp +66 -1
  134. package/src/duckdb/src/main/database.cpp +10 -2
  135. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +98 -67
  136. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +16 -3
  137. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  138. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  139. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  140. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  141. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  142. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  143. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  144. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  145. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  146. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  147. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
  148. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
  149. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  150. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
  151. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
  152. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
  153. package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
  154. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  155. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
  156. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +8 -0
  157. package/src/duckdb/src/parser/query_node.cpp +1 -1
  158. package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
  159. package/src/duckdb/src/parser/tableref/pivotref.cpp +296 -0
  160. package/src/duckdb/src/parser/tableref.cpp +3 -0
  161. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
  162. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
  163. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
  164. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
  165. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +150 -0
  166. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
  167. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +1 -1
  168. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
  169. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +105 -0
  170. package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
  171. package/src/duckdb/src/parser/transformer.cpp +15 -3
  172. package/src/duckdb/src/planner/bind_context.cpp +16 -0
  173. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -3
  174. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
  175. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +1 -1
  176. package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
  177. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
  178. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +365 -0
  179. package/src/duckdb/src/planner/binder.cpp +7 -1
  180. package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
  181. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  182. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  183. package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
  184. package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
  185. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  186. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
  187. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
  188. package/src/duckdb/src/storage/compression/bitpacking.cpp +28 -24
  189. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +43 -45
  190. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  191. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  192. package/src/duckdb/src/storage/compression/rle.cpp +19 -15
  193. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  194. package/src/duckdb/src/storage/data_table.cpp +4 -6
  195. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  196. package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
  197. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
  198. package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
  199. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
  200. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  201. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  202. package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
  203. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  204. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
  205. package/src/duckdb/src/storage/table/column_data.cpp +16 -11
  206. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
  207. package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
  208. package/src/duckdb/src/storage/table/list_column_data.cpp +39 -58
  209. package/src/duckdb/src/storage/table/row_group.cpp +24 -23
  210. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
  211. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
  212. package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
  213. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  214. package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
  215. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +3 -0
  216. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +34 -1
  217. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1020 -530
  218. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +7 -0
  219. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +23560 -22737
  220. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
  221. package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
  222. package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
  223. package/src/duckdb/ub_src_main_capi.cpp +2 -0
  224. package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
  225. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  226. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  227. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  228. package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
  229. package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
  230. package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
  231. package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
  232. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  233. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  234. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  235. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  236. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  237. package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
  238. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  239. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  240. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  241. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  242. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -5,7 +5,7 @@
5
5
  #include "duckdb/storage/buffer_manager.hpp"
6
6
  #include "duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp"
7
7
  #include "duckdb/storage/segment/uncompressed.hpp"
8
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
8
+
9
9
  #include "duckdb/storage/table/append_state.hpp"
10
10
  #include "duckdb/storage/table/column_data_checkpointer.hpp"
11
11
  #include "duckdb/storage/table/column_segment.hpp"
@@ -153,13 +153,7 @@ void FixedSizeScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_co
153
153
  auto source_data = data + start * sizeof(T);
154
154
 
155
155
  result.SetVectorType(VectorType::FLAT_VECTOR);
156
- if (std::is_same<T, list_entry_t>()) {
157
- // list columns are modified in-place during the scans to correct the offsets
158
- // so we can't do a zero-copy there
159
- memcpy(FlatVector::GetData(result), source_data, scan_count * sizeof(T));
160
- } else {
161
- FlatVector::SetData(result, source_data);
162
- }
156
+ FlatVector::SetData(result, source_data);
163
157
  }
164
158
 
165
159
  //===--------------------------------------------------------------------===//
@@ -186,48 +180,52 @@ static unique_ptr<CompressionAppendState> FixedSizeInitAppend(ColumnSegment &seg
186
180
  return make_unique<CompressionAppendState>(std::move(handle));
187
181
  }
188
182
 
189
- template <class T>
190
- static void AppendLoop(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, UnifiedVectorFormat &adata,
191
- idx_t offset, idx_t count) {
192
- auto sdata = (T *)adata.data;
193
- auto tdata = (T *)target;
194
- if (!adata.validity.AllValid()) {
195
- for (idx_t i = 0; i < count; i++) {
196
- auto source_idx = adata.sel->get_index(offset + i);
197
- auto target_idx = target_offset + i;
198
- bool is_null = !adata.validity.RowIsValid(source_idx);
199
- if (!is_null) {
200
- NumericStatistics::Update<T>(stats, sdata[source_idx]);
183
+ struct StandardFixedSizeAppend {
184
+ template <class T>
185
+ static void Append(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, UnifiedVectorFormat &adata,
186
+ idx_t offset, idx_t count) {
187
+ auto sdata = (T *)adata.data;
188
+ auto tdata = (T *)target;
189
+ if (!adata.validity.AllValid()) {
190
+ for (idx_t i = 0; i < count; i++) {
191
+ auto source_idx = adata.sel->get_index(offset + i);
192
+ auto target_idx = target_offset + i;
193
+ bool is_null = !adata.validity.RowIsValid(source_idx);
194
+ if (!is_null) {
195
+ NumericStats::Update<T>(stats.statistics, sdata[source_idx]);
196
+ tdata[target_idx] = sdata[source_idx];
197
+ } else {
198
+ // we insert a NullValue<T> in the null gap for debuggability
199
+ // this value should never be used or read anywhere
200
+ tdata[target_idx] = NullValue<T>();
201
+ }
202
+ }
203
+ } else {
204
+ for (idx_t i = 0; i < count; i++) {
205
+ auto source_idx = adata.sel->get_index(offset + i);
206
+ auto target_idx = target_offset + i;
207
+ NumericStats::Update<T>(stats.statistics, sdata[source_idx]);
201
208
  tdata[target_idx] = sdata[source_idx];
202
- } else {
203
- // we insert a NullValue<T> in the null gap for debuggability
204
- // this value should never be used or read anywhere
205
- tdata[target_idx] = NullValue<T>();
206
209
  }
207
210
  }
208
- } else {
211
+ }
212
+ };
213
+
214
+ struct ListFixedSizeAppend {
215
+ template <class T>
216
+ static void Append(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, UnifiedVectorFormat &adata,
217
+ idx_t offset, idx_t count) {
218
+ auto sdata = (uint64_t *)adata.data;
219
+ auto tdata = (uint64_t *)target;
209
220
  for (idx_t i = 0; i < count; i++) {
210
221
  auto source_idx = adata.sel->get_index(offset + i);
211
222
  auto target_idx = target_offset + i;
212
- NumericStatistics::Update<T>(stats, sdata[source_idx]);
213
223
  tdata[target_idx] = sdata[source_idx];
214
224
  }
215
225
  }
216
- }
217
-
218
- template <>
219
- void AppendLoop<list_entry_t>(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset,
220
- UnifiedVectorFormat &adata, idx_t offset, idx_t count) {
221
- auto sdata = (list_entry_t *)adata.data;
222
- auto tdata = (list_entry_t *)target;
223
- for (idx_t i = 0; i < count; i++) {
224
- auto source_idx = adata.sel->get_index(offset + i);
225
- auto target_idx = target_offset + i;
226
- tdata[target_idx] = sdata[source_idx];
227
- }
228
- }
226
+ };
229
227
 
230
- template <class T>
228
+ template <class T, class OP>
231
229
  idx_t FixedSizeAppend(CompressionAppendState &append_state, ColumnSegment &segment, SegmentStatistics &stats,
232
230
  UnifiedVectorFormat &data, idx_t offset, idx_t count) {
233
231
  D_ASSERT(segment.GetBlockOffset() == 0);
@@ -236,7 +234,7 @@ idx_t FixedSizeAppend(CompressionAppendState &append_state, ColumnSegment &segme
236
234
  idx_t max_tuple_count = segment.SegmentSize() / sizeof(T);
237
235
  idx_t copy_count = MinValue<idx_t>(count, max_tuple_count - segment.count);
238
236
 
239
- AppendLoop<T>(stats, target_ptr, segment.count, data, offset, copy_count);
237
+ OP::template Append<T>(stats, target_ptr, segment.count, data, offset, copy_count);
240
238
  segment.count += copy_count;
241
239
  return copy_count;
242
240
  }
@@ -249,14 +247,14 @@ idx_t FixedSizeFinalizeAppend(ColumnSegment &segment, SegmentStatistics &stats)
249
247
  //===--------------------------------------------------------------------===//
250
248
  // Get Function
251
249
  //===--------------------------------------------------------------------===//
252
- template <class T>
250
+ template <class T, class APPENDER = StandardFixedSizeAppend>
253
251
  CompressionFunction FixedSizeGetFunction(PhysicalType data_type) {
254
252
  return CompressionFunction(CompressionType::COMPRESSION_UNCOMPRESSED, data_type, FixedSizeInitAnalyze,
255
253
  FixedSizeAnalyze, FixedSizeFinalAnalyze<T>, UncompressedFunctions::InitCompression,
256
254
  UncompressedFunctions::Compress, UncompressedFunctions::FinalizeCompress,
257
255
  FixedSizeInitScan, FixedSizeScan<T>, FixedSizeScanPartial<T>, FixedSizeFetchRow<T>,
258
- UncompressedFunctions::EmptySkip, nullptr, FixedSizeInitAppend, FixedSizeAppend<T>,
259
- FixedSizeFinalizeAppend<T>, nullptr);
256
+ UncompressedFunctions::EmptySkip, nullptr, FixedSizeInitAppend,
257
+ FixedSizeAppend<T, APPENDER>, FixedSizeFinalizeAppend<T>, nullptr);
260
258
  }
261
259
 
262
260
  CompressionFunction FixedSizeUncompressed::GetFunction(PhysicalType data_type) {
@@ -287,7 +285,7 @@ CompressionFunction FixedSizeUncompressed::GetFunction(PhysicalType data_type) {
287
285
  case PhysicalType::INTERVAL:
288
286
  return FixedSizeGetFunction<interval_t>(data_type);
289
287
  case PhysicalType::LIST:
290
- return FixedSizeGetFunction<list_entry_t>(data_type);
288
+ return FixedSizeGetFunction<uint64_t, ListFixedSizeAppend>(data_type);
291
289
  default:
292
290
  throw InternalException("Unsupported type for FixedSizeUncompressed::GetFunction");
293
291
  }
@@ -1,8 +1,7 @@
1
1
  #include "duckdb/function/compression/compression.hpp"
2
2
  #include "duckdb/storage/buffer_manager.hpp"
3
3
  #include "duckdb/common/types/vector.hpp"
4
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
5
- #include "duckdb/storage/statistics/validity_statistics.hpp"
4
+
6
5
  #include "duckdb/storage/table/column_segment.hpp"
7
6
  #include "duckdb/function/compression_function.hpp"
8
7
  #include "duckdb/storage/segment/uncompressed.hpp"
@@ -20,8 +19,8 @@ unique_ptr<SegmentScanState> ConstantInitScan(ColumnSegment &segment) {
20
19
  // Scan Partial
21
20
  //===--------------------------------------------------------------------===//
22
21
  void ConstantFillFunctionValidity(ColumnSegment &segment, Vector &result, idx_t start_idx, idx_t count) {
23
- auto &validity = (ValidityStatistics &)*segment.stats.statistics;
24
- if (validity.has_null) {
22
+ auto &stats = segment.stats.statistics;
23
+ if (stats.CanHaveNull()) {
25
24
  auto &mask = FlatVector::Validity(result);
26
25
  for (idx_t i = 0; i < count; i++) {
27
26
  mask.SetInvalid(start_idx + i);
@@ -31,10 +30,10 @@ void ConstantFillFunctionValidity(ColumnSegment &segment, Vector &result, idx_t
31
30
 
32
31
  template <class T>
33
32
  void ConstantFillFunction(ColumnSegment &segment, Vector &result, idx_t start_idx, idx_t count) {
34
- auto &nstats = (NumericStatistics &)*segment.stats.statistics;
33
+ auto &nstats = segment.stats.statistics;
35
34
 
36
35
  auto data = FlatVector::GetData<T>(result);
37
- auto constant_value = nstats.min.GetValueUnsafe<T>();
36
+ auto constant_value = NumericStats::GetMinUnsafe<T>(nstats);
38
37
  for (idx_t i = 0; i < count; i++) {
39
38
  data[start_idx + i] = constant_value;
40
39
  }
@@ -55,8 +54,8 @@ void ConstantScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t s
55
54
  // Scan base data
56
55
  //===--------------------------------------------------------------------===//
57
56
  void ConstantScanFunctionValidity(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) {
58
- auto &validity = (ValidityStatistics &)*segment.stats.statistics;
59
- if (validity.has_null) {
57
+ auto &stats = segment.stats.statistics;
58
+ if (stats.CanHaveNull()) {
60
59
  if (result.GetVectorType() == VectorType::CONSTANT_VECTOR) {
61
60
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
62
61
  ConstantVector::SetNull(result, true);
@@ -69,10 +68,10 @@ void ConstantScanFunctionValidity(ColumnSegment &segment, ColumnScanState &state
69
68
 
70
69
  template <class T>
71
70
  void ConstantScanFunction(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) {
72
- auto &nstats = (NumericStatistics &)*segment.stats.statistics;
71
+ auto &nstats = segment.stats.statistics;
73
72
 
74
73
  auto data = FlatVector::GetData<T>(result);
75
- data[0] = nstats.min.GetValueUnsafe<T>();
74
+ data[0] = NumericStats::GetMinUnsafe<T>(nstats);
76
75
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
77
76
  }
78
77
 
@@ -10,7 +10,7 @@
10
10
  #include "duckdb/function/compression_function.hpp"
11
11
  #include "duckdb/main/config.hpp"
12
12
  #include "duckdb/storage/buffer_manager.hpp"
13
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
13
+
14
14
  #include "duckdb/storage/table/column_data_checkpointer.hpp"
15
15
  #include "duckdb/storage/table/column_segment.hpp"
16
16
  #include "duckdb/common/operator/subtract.hpp"
@@ -1,5 +1,5 @@
1
1
  #include "duckdb/function/compression/compression.hpp"
2
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
2
+
3
3
  #include "duckdb/storage/table/column_segment.hpp"
4
4
  #include "duckdb/function/compression_function.hpp"
5
5
  #include "duckdb/main/config.hpp"
@@ -118,12 +118,12 @@ struct RLEConstants {
118
118
  static constexpr const idx_t RLE_HEADER_SIZE = sizeof(uint64_t);
119
119
  };
120
120
 
121
- template <class T>
121
+ template <class T, bool WRITE_STATISTICS>
122
122
  struct RLECompressState : public CompressionState {
123
123
  struct RLEWriter {
124
124
  template <class VALUE_TYPE>
125
125
  static void Operation(VALUE_TYPE value, rle_count_t count, void *dataptr, bool is_null) {
126
- auto state = (RLECompressState<T> *)dataptr;
126
+ auto state = (RLECompressState<T, WRITE_STATISTICS> *)dataptr;
127
127
  state->WriteValue(value, count, is_null);
128
128
  }
129
129
  };
@@ -160,7 +160,7 @@ struct RLECompressState : public CompressionState {
160
160
  auto data = (T *)vdata.data;
161
161
  for (idx_t i = 0; i < count; i++) {
162
162
  auto idx = vdata.sel->get_index(i);
163
- state.template Update<RLECompressState<T>::RLEWriter>(data, vdata.validity, idx);
163
+ state.template Update<RLECompressState<T, WRITE_STATISTICS>::RLEWriter>(data, vdata.validity, idx);
164
164
  }
165
165
  }
166
166
 
@@ -174,8 +174,8 @@ struct RLECompressState : public CompressionState {
174
174
  entry_count++;
175
175
 
176
176
  // update meta data
177
- if (!is_null) {
178
- NumericStatistics::Update<T>(current_segment->stats, value);
177
+ if (WRITE_STATISTICS && !is_null) {
178
+ NumericStats::Update<T>(current_segment->stats.statistics, value);
179
179
  }
180
180
  current_segment->count += count;
181
181
 
@@ -206,7 +206,7 @@ struct RLECompressState : public CompressionState {
206
206
  }
207
207
 
208
208
  void Finalize() {
209
- state.template Flush<RLECompressState<T>::RLEWriter>();
209
+ state.template Flush<RLECompressState<T, WRITE_STATISTICS>::RLEWriter>();
210
210
 
211
211
  FlushSegment();
212
212
  current_segment.reset();
@@ -222,23 +222,23 @@ struct RLECompressState : public CompressionState {
222
222
  idx_t max_rle_count;
223
223
  };
224
224
 
225
- template <class T>
225
+ template <class T, bool WRITE_STATISTICS>
226
226
  unique_ptr<CompressionState> RLEInitCompression(ColumnDataCheckpointer &checkpointer, unique_ptr<AnalyzeState> state) {
227
- return make_unique<RLECompressState<T>>(checkpointer);
227
+ return make_unique<RLECompressState<T, WRITE_STATISTICS>>(checkpointer);
228
228
  }
229
229
 
230
- template <class T>
230
+ template <class T, bool WRITE_STATISTICS>
231
231
  void RLECompress(CompressionState &state_p, Vector &scan_vector, idx_t count) {
232
- auto &state = (RLECompressState<T> &)state_p;
232
+ auto &state = (RLECompressState<T, WRITE_STATISTICS> &)state_p;
233
233
  UnifiedVectorFormat vdata;
234
234
  scan_vector.ToUnifiedFormat(count, vdata);
235
235
 
236
236
  state.Append(vdata, count);
237
237
  }
238
238
 
239
- template <class T>
239
+ template <class T, bool WRITE_STATISTICS>
240
240
  void RLEFinalizeCompress(CompressionState &state_p) {
241
- auto &state = (RLECompressState<T> &)state_p;
241
+ auto &state = (RLECompressState<T, WRITE_STATISTICS> &)state_p;
242
242
  state.Finalize();
243
243
  }
244
244
 
@@ -341,10 +341,11 @@ void RLEFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id,
341
341
  //===--------------------------------------------------------------------===//
342
342
  // Get Function
343
343
  //===--------------------------------------------------------------------===//
344
- template <class T>
344
+ template <class T, bool WRITE_STATISTICS = true>
345
345
  CompressionFunction GetRLEFunction(PhysicalType data_type) {
346
346
  return CompressionFunction(CompressionType::COMPRESSION_RLE, data_type, RLEInitAnalyze<T>, RLEAnalyze<T>,
347
- RLEFinalAnalyze<T>, RLEInitCompression<T>, RLECompress<T>, RLEFinalizeCompress<T>,
347
+ RLEFinalAnalyze<T>, RLEInitCompression<T, WRITE_STATISTICS>,
348
+ RLECompress<T, WRITE_STATISTICS>, RLEFinalizeCompress<T, WRITE_STATISTICS>,
348
349
  RLEInitScan<T>, RLEScan<T>, RLEScanPartial<T>, RLEFetchRow<T>, RLESkip<T>);
349
350
  }
350
351
 
@@ -373,6 +374,8 @@ CompressionFunction RLEFun::GetFunction(PhysicalType type) {
373
374
  return GetRLEFunction<float>(type);
374
375
  case PhysicalType::DOUBLE:
375
376
  return GetRLEFunction<double>(type);
377
+ case PhysicalType::LIST:
378
+ return GetRLEFunction<uint64_t, false>(type);
376
379
  default:
377
380
  throw InternalException("Unsupported type for RLE");
378
381
  }
@@ -392,6 +395,7 @@ bool RLEFun::TypeIsSupported(PhysicalType type) {
392
395
  case PhysicalType::UINT64:
393
396
  case PhysicalType::FLOAT:
394
397
  case PhysicalType::DOUBLE:
398
+ case PhysicalType::LIST:
395
399
  return true;
396
400
  default:
397
401
  return false;
@@ -2,7 +2,7 @@
2
2
  #include "duckdb/storage/buffer_manager.hpp"
3
3
  #include "duckdb/common/types/vector.hpp"
4
4
  #include "duckdb/storage/table/append_state.hpp"
5
- #include "duckdb/storage/statistics/validity_statistics.hpp"
5
+
6
6
  #include "duckdb/common/types/null_value.hpp"
7
7
  #include "duckdb/storage/table/column_segment.hpp"
8
8
  #include "duckdb/function/compression_function.hpp"
@@ -410,14 +410,14 @@ unique_ptr<CompressedSegmentState> ValidityInitSegment(ColumnSegment &segment, b
410
410
  idx_t ValidityAppend(CompressionAppendState &append_state, ColumnSegment &segment, SegmentStatistics &stats,
411
411
  UnifiedVectorFormat &data, idx_t offset, idx_t vcount) {
412
412
  D_ASSERT(segment.GetBlockOffset() == 0);
413
- auto &validity_stats = (ValidityStatistics &)*stats.statistics;
413
+ auto &validity_stats = stats.statistics;
414
414
 
415
415
  auto max_tuples = segment.SegmentSize() / ValidityMask::STANDARD_MASK_SIZE * STANDARD_VECTOR_SIZE;
416
416
  idx_t append_count = MinValue<idx_t>(vcount, max_tuples - segment.count);
417
417
  if (data.validity.AllValid()) {
418
418
  // no null values: skip append
419
419
  segment.count += append_count;
420
- validity_stats.has_no_null = true;
420
+ validity_stats.SetHasNoNull();
421
421
  return append_count;
422
422
  }
423
423
 
@@ -426,9 +426,9 @@ idx_t ValidityAppend(CompressionAppendState &append_state, ColumnSegment &segmen
426
426
  auto idx = data.sel->get_index(offset + i);
427
427
  if (!data.validity.RowIsValidUnsafe(idx)) {
428
428
  mask.SetInvalidUnsafe(segment.count + i);
429
- validity_stats.has_null = true;
429
+ validity_stats.SetHasNull();
430
430
  } else {
431
- validity_stats.has_no_null = true;
431
+ validity_stats.SetHasNoNull();
432
432
  }
433
433
  }
434
434
  segment.count += append_count;
@@ -1223,9 +1223,9 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
1223
1223
  return row_groups->CopyStats(column_id);
1224
1224
  }
1225
1225
 
1226
- void DataTable::SetStatistics(column_t column_id, const std::function<void(BaseStatistics &)> &set_fun) {
1226
+ void DataTable::SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats) {
1227
1227
  D_ASSERT(column_id != COLUMN_IDENTIFIER_ROW_ID);
1228
- row_groups->SetStatistics(column_id, set_fun);
1228
+ row_groups->SetDistinct(column_id, std::move(distinct_stats));
1229
1229
  }
1230
1230
 
1231
1231
  //===--------------------------------------------------------------------===//
@@ -1234,10 +1234,8 @@ void DataTable::SetStatistics(column_t column_id, const std::function<void(BaseS
1234
1234
  void DataTable::Checkpoint(TableDataWriter &writer) {
1235
1235
  // checkpoint each individual row group
1236
1236
  // FIXME: we might want to combine adjacent row groups in case they have had deletions...
1237
- vector<unique_ptr<BaseStatistics>> global_stats;
1238
- for (idx_t i = 0; i < column_definitions.size(); i++) {
1239
- global_stats.push_back(row_groups->CopyStats(i));
1240
- }
1237
+ TableStatistics global_stats;
1238
+ row_groups->CopyStats(global_stats);
1241
1239
 
1242
1240
  row_groups->Checkpoint(writer, global_stats);
1243
1241