duckdb 0.7.2-dev16.0 → 0.7.2-dev314.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-extension.cpp +2 -0
  4. package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
  5. package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
  6. package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
  7. package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
  8. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  9. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  10. package/src/duckdb/extension/parquet/parquet-extension.cpp +11 -2
  11. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  12. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
  13. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
  14. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
  15. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  16. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  17. package/src/duckdb/src/common/types/bit.cpp +95 -58
  18. package/src/duckdb/src/common/types/value.cpp +149 -53
  19. package/src/duckdb/src/common/types/vector.cpp +13 -10
  20. package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
  21. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  22. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  23. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
  24. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
  25. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1 -1
  26. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
  27. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
  28. package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
  29. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
  30. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +254 -0
  31. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
  32. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -16
  33. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
  34. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +16 -5
  35. package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
  36. package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
  37. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +99 -0
  38. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  39. package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
  40. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  41. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  42. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  43. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  44. package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
  45. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  46. package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
  47. package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
  48. package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
  49. package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
  50. package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
  51. package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
  52. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
  53. package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
  54. package/src/duckdb/src/function/scalar/operators/multiply.cpp +0 -6
  55. package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
  56. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  57. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  58. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  59. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  60. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  61. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  62. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
  63. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
  64. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
  65. package/src/duckdb/src/function/table/read_csv.cpp +9 -0
  66. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  67. package/src/duckdb/src/function/table_function.cpp +19 -0
  68. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
  69. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
  70. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
  71. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +5 -1
  73. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -8
  74. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -2
  75. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
  77. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
  78. package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
  79. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +12 -0
  80. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  81. package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
  82. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  83. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
  84. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
  85. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
  86. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  87. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
  88. package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
  89. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
  90. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
  91. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  92. package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
  93. package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
  94. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +76 -0
  95. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
  96. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +28 -0
  97. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  98. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -0
  99. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
  100. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
  101. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  102. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  103. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  104. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  105. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  106. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  107. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  108. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  109. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  110. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
  111. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  112. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
  113. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
  115. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  117. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  119. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  120. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  122. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
  123. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  124. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
  125. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +2 -0
  126. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  127. package/src/duckdb/src/include/duckdb.h +49 -1
  128. package/src/duckdb/src/include/duckdb.hpp +0 -1
  129. package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
  130. package/src/duckdb/src/main/capi/result-c.cpp +27 -1
  131. package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
  132. package/src/duckdb/src/main/client_context.cpp +8 -1
  133. package/src/duckdb/src/main/config.cpp +66 -1
  134. package/src/duckdb/src/main/database.cpp +10 -2
  135. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +98 -67
  136. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +16 -3
  137. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  138. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  139. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  140. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  141. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  142. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  143. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  144. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  145. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  146. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  147. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
  148. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
  149. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  150. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
  151. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
  152. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
  153. package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
  154. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  155. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
  156. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +8 -0
  157. package/src/duckdb/src/parser/query_node.cpp +1 -1
  158. package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
  159. package/src/duckdb/src/parser/tableref/pivotref.cpp +296 -0
  160. package/src/duckdb/src/parser/tableref.cpp +3 -0
  161. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
  162. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
  163. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
  164. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
  165. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +150 -0
  166. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
  167. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +1 -1
  168. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
  169. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +105 -0
  170. package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
  171. package/src/duckdb/src/parser/transformer.cpp +15 -3
  172. package/src/duckdb/src/planner/bind_context.cpp +16 -0
  173. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -3
  174. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
  175. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +1 -1
  176. package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
  177. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
  178. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +365 -0
  179. package/src/duckdb/src/planner/binder.cpp +7 -1
  180. package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
  181. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  182. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  183. package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
  184. package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
  185. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  186. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
  187. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
  188. package/src/duckdb/src/storage/compression/bitpacking.cpp +28 -24
  189. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +43 -45
  190. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  191. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  192. package/src/duckdb/src/storage/compression/rle.cpp +19 -15
  193. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  194. package/src/duckdb/src/storage/data_table.cpp +4 -6
  195. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  196. package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
  197. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
  198. package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
  199. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
  200. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  201. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  202. package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
  203. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  204. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
  205. package/src/duckdb/src/storage/table/column_data.cpp +16 -11
  206. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
  207. package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
  208. package/src/duckdb/src/storage/table/list_column_data.cpp +39 -58
  209. package/src/duckdb/src/storage/table/row_group.cpp +24 -23
  210. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
  211. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
  212. package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
  213. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  214. package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
  215. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +3 -0
  216. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +34 -1
  217. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1020 -530
  218. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +7 -0
  219. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +23560 -22737
  220. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
  221. package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
  222. package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
  223. package/src/duckdb/ub_src_main_capi.cpp +2 -0
  224. package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
  225. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  226. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  227. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  228. package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
  229. package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
  230. package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
  231. package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
  232. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  233. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  234. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  235. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  236. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  237. package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
  238. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  239. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  240. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  241. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  242. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -2,86 +2,61 @@
2
2
  #include "duckdb/common/field_writer.hpp"
3
3
  #include "duckdb/common/string_util.hpp"
4
4
  #include "duckdb/common/types/vector.hpp"
5
- #include "duckdb/storage/statistics/distinct_statistics.hpp"
6
- #include "duckdb/storage/statistics/list_statistics.hpp"
7
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
8
- #include "duckdb/storage/statistics/string_statistics.hpp"
9
- #include "duckdb/storage/statistics/struct_statistics.hpp"
10
- #include "duckdb/storage/statistics/validity_statistics.hpp"
5
+ #include "duckdb/storage/statistics/base_statistics.hpp"
6
+ #include "duckdb/storage/statistics/list_stats.hpp"
7
+ #include "duckdb/storage/statistics/struct_stats.hpp"
11
8
 
12
9
  namespace duckdb {
13
10
 
14
- BaseStatistics::BaseStatistics(LogicalType type, StatisticsType stats_type)
15
- : type(std::move(type)), stats_type(stats_type) {
11
+ BaseStatistics::BaseStatistics() : type(LogicalType::INVALID) {
16
12
  }
17
13
 
18
- BaseStatistics::~BaseStatistics() {
19
- }
20
-
21
- void BaseStatistics::InitializeBase() {
22
- validity_stats = make_unique<ValidityStatistics>(false);
23
- if (stats_type == GLOBAL_STATS) {
24
- distinct_stats = make_unique<DistinctStatistics>();
25
- }
14
+ BaseStatistics::BaseStatistics(LogicalType type) {
15
+ Construct(*this, std::move(type));
26
16
  }
27
17
 
28
- bool BaseStatistics::CanHaveNull() const {
29
- if (!validity_stats) {
30
- // we don't know
31
- // solid maybe
32
- return true;
18
+ void BaseStatistics::Construct(BaseStatistics &stats, LogicalType type) {
19
+ stats.distinct_count = 0;
20
+ stats.type = std::move(type);
21
+ switch (GetStatsType(stats.type)) {
22
+ case StatisticsType::LIST_STATS:
23
+ ListStats::Construct(stats);
24
+ break;
25
+ case StatisticsType::STRUCT_STATS:
26
+ StructStats::Construct(stats);
27
+ break;
28
+ default:
29
+ break;
33
30
  }
34
- return ((ValidityStatistics &)*validity_stats).has_null;
35
31
  }
36
32
 
37
- bool BaseStatistics::CanHaveNoNull() const {
38
- if (!validity_stats) {
39
- // we don't know
40
- // solid maybe
41
- return true;
42
- }
43
- return ((ValidityStatistics &)*validity_stats).has_no_null;
33
+ BaseStatistics::~BaseStatistics() {
44
34
  }
45
35
 
46
- void BaseStatistics::UpdateDistinctStatistics(Vector &v, idx_t count) {
47
- if (!distinct_stats) {
48
- return;
49
- }
50
- auto &d_stats = (DistinctStatistics &)*distinct_stats;
51
- d_stats.Update(v, count);
36
+ BaseStatistics::BaseStatistics(BaseStatistics &&other) noexcept {
37
+ std::swap(type, other.type);
38
+ has_null = other.has_null;
39
+ has_no_null = other.has_no_null;
40
+ distinct_count = other.distinct_count;
41
+ stats_union = other.stats_union;
42
+ std::swap(child_stats, other.child_stats);
52
43
  }
53
44
 
54
- void MergeInternal(unique_ptr<BaseStatistics> &orig, const unique_ptr<BaseStatistics> &other) {
55
- if (other) {
56
- if (orig) {
57
- orig->Merge(*other);
58
- } else {
59
- orig = other->Copy();
60
- }
61
- }
45
+ BaseStatistics &BaseStatistics::operator=(BaseStatistics &&other) noexcept {
46
+ std::swap(type, other.type);
47
+ has_null = other.has_null;
48
+ has_no_null = other.has_no_null;
49
+ distinct_count = other.distinct_count;
50
+ stats_union = other.stats_union;
51
+ std::swap(child_stats, other.child_stats);
52
+ return *this;
62
53
  }
63
54
 
64
- void BaseStatistics::Merge(const BaseStatistics &other) {
65
- D_ASSERT(type == other.type);
66
- MergeInternal(validity_stats, other.validity_stats);
67
- if (stats_type == GLOBAL_STATS) {
68
- MergeInternal(distinct_stats, other.distinct_stats);
55
+ StatisticsType BaseStatistics::GetStatsType(const LogicalType &type) {
56
+ if (type.id() == LogicalTypeId::SQLNULL) {
57
+ return StatisticsType::BASE_STATS;
69
58
  }
70
- }
71
-
72
- idx_t BaseStatistics::GetDistinctCount() {
73
- if (distinct_stats) {
74
- auto &d_stats = (DistinctStatistics &)*distinct_stats;
75
- return d_stats.GetCount();
76
- }
77
- return 0;
78
- }
79
-
80
- unique_ptr<BaseStatistics> BaseStatistics::CreateEmpty(LogicalType type, StatisticsType stats_type) {
81
- unique_ptr<BaseStatistics> result;
82
59
  switch (type.InternalType()) {
83
- case PhysicalType::BIT:
84
- return make_unique<ValidityStatistics>(false, false);
85
60
  case PhysicalType::BOOL:
86
61
  case PhysicalType::INT8:
87
62
  case PhysicalType::INT16:
@@ -94,113 +69,323 @@ unique_ptr<BaseStatistics> BaseStatistics::CreateEmpty(LogicalType type, Statist
94
69
  case PhysicalType::INT128:
95
70
  case PhysicalType::FLOAT:
96
71
  case PhysicalType::DOUBLE:
97
- result = make_unique<NumericStatistics>(std::move(type), stats_type);
98
- break;
72
+ return StatisticsType::NUMERIC_STATS;
99
73
  case PhysicalType::VARCHAR:
100
- result = make_unique<StringStatistics>(std::move(type), stats_type);
101
- break;
74
+ return StatisticsType::STRING_STATS;
102
75
  case PhysicalType::STRUCT:
103
- result = make_unique<StructStatistics>(std::move(type));
104
- break;
76
+ return StatisticsType::STRUCT_STATS;
105
77
  case PhysicalType::LIST:
106
- result = make_unique<ListStatistics>(std::move(type));
107
- break;
78
+ return StatisticsType::LIST_STATS;
79
+ case PhysicalType::BIT:
108
80
  case PhysicalType::INTERVAL:
109
81
  default:
110
- result = make_unique<BaseStatistics>(std::move(type), stats_type);
82
+ return StatisticsType::BASE_STATS;
111
83
  }
112
- result->InitializeBase();
84
+ }
85
+
86
+ StatisticsType BaseStatistics::GetStatsType() const {
87
+ return GetStatsType(GetType());
88
+ }
89
+
90
+ void BaseStatistics::InitializeUnknown() {
91
+ has_null = true;
92
+ has_no_null = true;
93
+ }
94
+
95
+ void BaseStatistics::InitializeEmpty() {
96
+ has_null = false;
97
+ has_no_null = true;
98
+ }
99
+
100
+ bool BaseStatistics::CanHaveNull() const {
101
+ return has_null;
102
+ }
103
+
104
+ bool BaseStatistics::CanHaveNoNull() const {
105
+ return has_no_null;
106
+ }
107
+
108
+ bool BaseStatistics::IsConstant() const {
109
+ if (type.id() == LogicalTypeId::VALIDITY) {
110
+ // validity mask
111
+ if (CanHaveNull() && !CanHaveNoNull()) {
112
+ return true;
113
+ }
114
+ if (!CanHaveNull() && CanHaveNoNull()) {
115
+ return true;
116
+ }
117
+ return false;
118
+ }
119
+ switch (GetStatsType()) {
120
+ case StatisticsType::NUMERIC_STATS:
121
+ return NumericStats::IsConstant(*this);
122
+ default:
123
+ break;
124
+ }
125
+ return false;
126
+ }
127
+
128
+ void BaseStatistics::Merge(const BaseStatistics &other) {
129
+ has_null = has_null || other.has_null;
130
+ has_no_null = has_no_null || other.has_no_null;
131
+ switch (GetStatsType()) {
132
+ case StatisticsType::NUMERIC_STATS:
133
+ NumericStats::Merge(*this, other);
134
+ break;
135
+ case StatisticsType::STRING_STATS:
136
+ StringStats::Merge(*this, other);
137
+ break;
138
+ case StatisticsType::LIST_STATS:
139
+ ListStats::Merge(*this, other);
140
+ break;
141
+ case StatisticsType::STRUCT_STATS:
142
+ StructStats::Merge(*this, other);
143
+ break;
144
+ default:
145
+ break;
146
+ }
147
+ }
148
+
149
+ idx_t BaseStatistics::GetDistinctCount() {
150
+ return distinct_count;
151
+ }
152
+
153
+ BaseStatistics BaseStatistics::CreateUnknownType(LogicalType type) {
154
+ switch (GetStatsType(type)) {
155
+ case StatisticsType::NUMERIC_STATS:
156
+ return NumericStats::CreateUnknown(std::move(type));
157
+ case StatisticsType::STRING_STATS:
158
+ return StringStats::CreateUnknown(std::move(type));
159
+ case StatisticsType::LIST_STATS:
160
+ return ListStats::CreateUnknown(std::move(type));
161
+ case StatisticsType::STRUCT_STATS:
162
+ return StructStats::CreateUnknown(std::move(type));
163
+ default:
164
+ return BaseStatistics(std::move(type));
165
+ }
166
+ }
167
+
168
+ BaseStatistics BaseStatistics::CreateEmptyType(LogicalType type) {
169
+ switch (GetStatsType(type)) {
170
+ case StatisticsType::NUMERIC_STATS:
171
+ return NumericStats::CreateEmpty(std::move(type));
172
+ case StatisticsType::STRING_STATS:
173
+ return StringStats::CreateEmpty(std::move(type));
174
+ case StatisticsType::LIST_STATS:
175
+ return ListStats::CreateEmpty(std::move(type));
176
+ case StatisticsType::STRUCT_STATS:
177
+ return StructStats::CreateEmpty(std::move(type));
178
+ default:
179
+ return BaseStatistics(std::move(type));
180
+ }
181
+ }
182
+
183
+ BaseStatistics BaseStatistics::CreateUnknown(LogicalType type) {
184
+ auto result = CreateUnknownType(std::move(type));
185
+ result.InitializeUnknown();
113
186
  return result;
114
187
  }
115
188
 
116
- unique_ptr<BaseStatistics> BaseStatistics::Copy() const {
117
- auto result = make_unique<BaseStatistics>(type, stats_type);
118
- result->CopyBase(*this);
189
+ BaseStatistics BaseStatistics::CreateEmpty(LogicalType type) {
190
+ if (type.InternalType() == PhysicalType::BIT) {
191
+ // FIXME: this special case should not be necessary
192
+ // but currently InitializeEmpty sets StatsInfo::CAN_HAVE_VALID_VALUES
193
+ BaseStatistics result(std::move(type));
194
+ result.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
195
+ result.Set(StatsInfo::CANNOT_HAVE_VALID_VALUES);
196
+ return result;
197
+ }
198
+ auto result = CreateEmptyType(std::move(type));
199
+ result.InitializeEmpty();
119
200
  return result;
120
201
  }
121
202
 
122
- void BaseStatistics::CopyBase(const BaseStatistics &orig) {
123
- if (orig.validity_stats) {
124
- validity_stats = orig.validity_stats->Copy();
203
+ void BaseStatistics::Copy(const BaseStatistics &other) {
204
+ D_ASSERT(GetType() == other.GetType());
205
+ CopyBase(other);
206
+ stats_union = other.stats_union;
207
+ switch (GetStatsType()) {
208
+ case StatisticsType::LIST_STATS:
209
+ ListStats::Copy(*this, other);
210
+ break;
211
+ case StatisticsType::STRUCT_STATS:
212
+ StructStats::Copy(*this, other);
213
+ break;
214
+ default:
215
+ break;
125
216
  }
126
- if (orig.distinct_stats) {
127
- distinct_stats = orig.distinct_stats->Copy();
217
+ }
218
+
219
+ BaseStatistics BaseStatistics::Copy() const {
220
+ BaseStatistics result(type);
221
+ result.Copy(*this);
222
+ return result;
223
+ }
224
+
225
+ unique_ptr<BaseStatistics> BaseStatistics::ToUnique() const {
226
+ auto result = unique_ptr<BaseStatistics>(new BaseStatistics(type));
227
+ result->Copy(*this);
228
+ return result;
229
+ }
230
+
231
+ void BaseStatistics::CopyBase(const BaseStatistics &other) {
232
+ has_null = other.has_null;
233
+ has_no_null = other.has_no_null;
234
+ distinct_count = other.distinct_count;
235
+ }
236
+
237
+ void BaseStatistics::Set(StatsInfo info) {
238
+ switch (info) {
239
+ case StatsInfo::CAN_HAVE_NULL_VALUES:
240
+ has_null = true;
241
+ break;
242
+ case StatsInfo::CANNOT_HAVE_NULL_VALUES:
243
+ has_null = false;
244
+ break;
245
+ case StatsInfo::CAN_HAVE_VALID_VALUES:
246
+ has_no_null = true;
247
+ break;
248
+ case StatsInfo::CANNOT_HAVE_VALID_VALUES:
249
+ has_no_null = false;
250
+ break;
251
+ case StatsInfo::CAN_HAVE_NULL_AND_VALID_VALUES:
252
+ has_null = true;
253
+ has_no_null = true;
254
+ break;
255
+ default:
256
+ throw InternalException("Unrecognized StatsInfo for BaseStatistics::Set");
128
257
  }
129
258
  }
130
259
 
260
+ void BaseStatistics::CombineValidity(BaseStatistics &left, BaseStatistics &right) {
261
+ has_null = left.has_null || right.has_null;
262
+ has_no_null = left.has_no_null || right.has_no_null;
263
+ }
264
+
265
+ void BaseStatistics::CopyValidity(BaseStatistics &stats) {
266
+ has_null = stats.has_null;
267
+ has_no_null = stats.has_no_null;
268
+ }
269
+
131
270
  void BaseStatistics::Serialize(Serializer &serializer) const {
132
271
  FieldWriter writer(serializer);
133
- ValidityStatistics(CanHaveNull(), CanHaveNoNull()).Serialize(writer);
272
+ writer.WriteField<bool>(has_null);
273
+ writer.WriteField<bool>(has_no_null);
134
274
  Serialize(writer);
135
- auto ptype = type.InternalType();
136
- if (ptype != PhysicalType::BIT) {
137
- writer.WriteField<StatisticsType>(stats_type);
138
- writer.WriteOptional<BaseStatistics>(distinct_stats);
139
- }
140
275
  writer.Finalize();
141
276
  }
142
277
 
143
- void BaseStatistics::Serialize(FieldWriter &writer) const {
278
+ void BaseStatistics::SetDistinctCount(idx_t count) {
279
+ this->distinct_count = count;
144
280
  }
145
281
 
146
- unique_ptr<BaseStatistics> BaseStatistics::Deserialize(Deserializer &source, LogicalType type) {
147
- FieldReader reader(source);
148
- auto validity_stats = ValidityStatistics::Deserialize(reader);
149
- unique_ptr<BaseStatistics> result;
150
- auto ptype = type.InternalType();
151
- switch (ptype) {
152
- case PhysicalType::BIT:
153
- result = ValidityStatistics::Deserialize(reader);
154
- break;
155
- case PhysicalType::BOOL:
156
- case PhysicalType::INT8:
157
- case PhysicalType::INT16:
158
- case PhysicalType::INT32:
159
- case PhysicalType::INT64:
160
- case PhysicalType::UINT8:
161
- case PhysicalType::UINT16:
162
- case PhysicalType::UINT32:
163
- case PhysicalType::UINT64:
164
- case PhysicalType::INT128:
165
- case PhysicalType::FLOAT:
166
- case PhysicalType::DOUBLE:
167
- result = NumericStatistics::Deserialize(reader, std::move(type));
168
- break;
169
- case PhysicalType::VARCHAR:
170
- result = StringStatistics::Deserialize(reader, std::move(type));
282
+ void BaseStatistics::Serialize(FieldWriter &writer) const {
283
+ switch (GetStatsType()) {
284
+ case StatisticsType::NUMERIC_STATS:
285
+ NumericStats::Serialize(*this, writer);
171
286
  break;
172
- case PhysicalType::STRUCT:
173
- result = StructStatistics::Deserialize(reader, std::move(type));
287
+ case StatisticsType::STRING_STATS:
288
+ StringStats::Serialize(*this, writer);
174
289
  break;
175
- case PhysicalType::LIST:
176
- result = ListStatistics::Deserialize(reader, std::move(type));
290
+ case StatisticsType::LIST_STATS:
291
+ ListStats::Serialize(*this, writer);
177
292
  break;
178
- case PhysicalType::INTERVAL:
179
- result = make_unique<BaseStatistics>(std::move(type), StatisticsType::LOCAL_STATS);
293
+ case StatisticsType::STRUCT_STATS:
294
+ StructStats::Serialize(*this, writer);
180
295
  break;
181
296
  default:
182
- throw InternalException("Unimplemented type for statistics deserialization");
297
+ break;
183
298
  }
184
-
185
- if (ptype != PhysicalType::BIT) {
186
- result->validity_stats = std::move(validity_stats);
187
- result->stats_type = reader.ReadField<StatisticsType>(StatisticsType::LOCAL_STATS);
188
- result->distinct_stats = reader.ReadOptional<DistinctStatistics>(nullptr);
299
+ }
300
+ BaseStatistics BaseStatistics::DeserializeType(FieldReader &reader, LogicalType type) {
301
+ switch (GetStatsType(type)) {
302
+ case StatisticsType::NUMERIC_STATS:
303
+ return NumericStats::Deserialize(reader, std::move(type));
304
+ case StatisticsType::STRING_STATS:
305
+ return StringStats::Deserialize(reader, std::move(type));
306
+ case StatisticsType::LIST_STATS:
307
+ return ListStats::Deserialize(reader, std::move(type));
308
+ case StatisticsType::STRUCT_STATS:
309
+ return StructStats::Deserialize(reader, std::move(type));
310
+ default:
311
+ return BaseStatistics(std::move(type));
189
312
  }
313
+ }
190
314
 
315
+ BaseStatistics BaseStatistics::Deserialize(Deserializer &source, LogicalType type) {
316
+ FieldReader reader(source);
317
+ bool has_null = reader.ReadRequired<bool>();
318
+ bool has_no_null = reader.ReadRequired<bool>();
319
+ auto result = DeserializeType(reader, std::move(type));
320
+ result.has_null = has_null;
321
+ result.has_no_null = has_no_null;
191
322
  reader.Finalize();
192
323
  return result;
193
324
  }
194
325
 
195
326
  string BaseStatistics::ToString() const {
196
- return StringUtil::Format("%s%s", validity_stats ? validity_stats->ToString() : "",
197
- distinct_stats ? distinct_stats->ToString() : "");
327
+ auto has_n = has_null ? "true" : "false";
328
+ auto has_n_n = has_no_null ? "true" : "false";
329
+ string result =
330
+ StringUtil::Format("%s%s", StringUtil::Format("[Has Null: %s, Has No Null: %s]", has_n, has_n_n),
331
+ distinct_count > 0 ? StringUtil::Format("[Approx Unique: %lld]", distinct_count) : "");
332
+ switch (GetStatsType()) {
333
+ case StatisticsType::NUMERIC_STATS:
334
+ result = NumericStats::ToString(*this) + result;
335
+ break;
336
+ case StatisticsType::STRING_STATS:
337
+ result = StringStats::ToString(*this) + result;
338
+ break;
339
+ case StatisticsType::LIST_STATS:
340
+ result = ListStats::ToString(*this) + result;
341
+ break;
342
+ case StatisticsType::STRUCT_STATS:
343
+ result = StructStats::ToString(*this) + result;
344
+ break;
345
+ default:
346
+ break;
347
+ }
348
+ return result;
198
349
  }
199
350
 
200
351
  void BaseStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
201
352
  D_ASSERT(vector.GetType() == this->type);
202
- if (validity_stats) {
203
- validity_stats->Verify(vector, sel, count);
353
+ switch (GetStatsType()) {
354
+ case StatisticsType::NUMERIC_STATS:
355
+ NumericStats::Verify(*this, vector, sel, count);
356
+ break;
357
+ case StatisticsType::STRING_STATS:
358
+ StringStats::Verify(*this, vector, sel, count);
359
+ break;
360
+ case StatisticsType::LIST_STATS:
361
+ ListStats::Verify(*this, vector, sel, count);
362
+ break;
363
+ case StatisticsType::STRUCT_STATS:
364
+ StructStats::Verify(*this, vector, sel, count);
365
+ break;
366
+ default:
367
+ break;
368
+ }
369
+ if (has_null && has_no_null) {
370
+ // nothing to verify
371
+ return;
372
+ }
373
+ UnifiedVectorFormat vdata;
374
+ vector.ToUnifiedFormat(count, vdata);
375
+ for (idx_t i = 0; i < count; i++) {
376
+ auto idx = sel.get_index(i);
377
+ auto index = vdata.sel->get_index(idx);
378
+ bool row_is_valid = vdata.validity.RowIsValid(index);
379
+ if (row_is_valid && !has_no_null) {
380
+ throw InternalException(
381
+ "Statistics mismatch: vector labeled as having only NULL values, but vector contains valid values: %s",
382
+ vector.ToString(count));
383
+ }
384
+ if (!row_is_valid && !has_null) {
385
+ throw InternalException(
386
+ "Statistics mismatch: vector labeled as not having NULL values, but vector contains null values: %s",
387
+ vector.ToString(count));
388
+ }
204
389
  }
205
390
  }
206
391
 
@@ -209,4 +394,64 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
209
394
  Verify(vector, *sel, count);
210
395
  }
211
396
 
397
+ BaseStatistics BaseStatistics::FromConstantType(const Value &input) {
398
+ switch (GetStatsType(input.type())) {
399
+ case StatisticsType::NUMERIC_STATS: {
400
+ auto result = NumericStats::CreateEmpty(input.type());
401
+ NumericStats::SetMin(result, input);
402
+ NumericStats::SetMax(result, input);
403
+ return result;
404
+ }
405
+ case StatisticsType::STRING_STATS: {
406
+ auto result = StringStats::CreateEmpty(input.type());
407
+ if (!input.IsNull()) {
408
+ auto &string_value = StringValue::Get(input);
409
+ StringStats::Update(result, string_t(string_value));
410
+ }
411
+ return result;
412
+ }
413
+ case StatisticsType::LIST_STATS: {
414
+ auto result = ListStats::CreateEmpty(input.type());
415
+ auto &child_stats = ListStats::GetChildStats(result);
416
+ if (!input.IsNull()) {
417
+ auto &list_children = ListValue::GetChildren(input);
418
+ for (auto &child_element : list_children) {
419
+ child_stats.Merge(FromConstant(child_element));
420
+ }
421
+ }
422
+ return result;
423
+ }
424
+ case StatisticsType::STRUCT_STATS: {
425
+ auto result = StructStats::CreateEmpty(input.type());
426
+ auto &child_types = StructType::GetChildTypes(input.type());
427
+ if (input.IsNull()) {
428
+ for (idx_t i = 0; i < child_types.size(); i++) {
429
+ StructStats::SetChildStats(result, i, FromConstant(Value(child_types[i].second)));
430
+ }
431
+ } else {
432
+ auto &struct_children = StructValue::GetChildren(input);
433
+ for (idx_t i = 0; i < child_types.size(); i++) {
434
+ StructStats::SetChildStats(result, i, FromConstant(struct_children[i]));
435
+ }
436
+ }
437
+ return result;
438
+ }
439
+ default:
440
+ return BaseStatistics(input.type());
441
+ }
442
+ }
443
+
444
+ BaseStatistics BaseStatistics::FromConstant(const Value &input) {
445
+ auto result = FromConstantType(input);
446
+ result.SetDistinctCount(1);
447
+ if (input.IsNull()) {
448
+ result.Set(StatsInfo::CAN_HAVE_NULL_VALUES);
449
+ result.Set(StatsInfo::CANNOT_HAVE_VALID_VALUES);
450
+ } else {
451
+ result.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
452
+ result.Set(StatsInfo::CAN_HAVE_VALID_VALUES);
453
+ }
454
+ return result;
455
+ }
456
+
212
457
  } // namespace duckdb
@@ -1,13 +1,68 @@
1
1
  #include "duckdb/storage/statistics/column_statistics.hpp"
2
+ #include "duckdb/common/serializer.hpp"
2
3
 
3
4
  namespace duckdb {
4
5
 
5
- ColumnStatistics::ColumnStatistics(unique_ptr<BaseStatistics> stats_p) : stats(std::move(stats_p)) {
6
+ ColumnStatistics::ColumnStatistics(BaseStatistics stats_p) : stats(std::move(stats_p)) {
7
+ auto type = stats.GetType().InternalType();
8
+ if (type != PhysicalType::LIST && type != PhysicalType::STRUCT) {
9
+ distinct_stats = make_unique<DistinctStatistics>();
10
+ }
11
+ }
12
+ ColumnStatistics::ColumnStatistics(BaseStatistics stats_p, unique_ptr<DistinctStatistics> distinct_stats_p)
13
+ : stats(std::move(stats_p)), distinct_stats(std::move(distinct_stats_p)) {
6
14
  }
7
15
 
8
16
  shared_ptr<ColumnStatistics> ColumnStatistics::CreateEmptyStats(const LogicalType &type) {
9
- auto col_stats = BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS);
10
- return make_shared<ColumnStatistics>(std::move(col_stats));
17
+ return make_shared<ColumnStatistics>(BaseStatistics::CreateEmpty(type));
18
+ }
19
+
20
+ void ColumnStatistics::Merge(ColumnStatistics &other) {
21
+ stats.Merge(other.stats);
22
+ if (distinct_stats) {
23
+ distinct_stats->Merge(*other.distinct_stats);
24
+ }
25
+ }
26
+
27
+ BaseStatistics &ColumnStatistics::Statistics() {
28
+ return stats;
29
+ }
30
+
31
+ bool ColumnStatistics::HasDistinctStats() {
32
+ return distinct_stats.get();
33
+ }
34
+
35
+ DistinctStatistics &ColumnStatistics::DistinctStats() {
36
+ if (!distinct_stats) {
37
+ throw InternalException("DistinctStats called without distinct_stats");
38
+ }
39
+ return *distinct_stats;
40
+ }
41
+
42
+ void ColumnStatistics::SetDistinct(unique_ptr<DistinctStatistics> distinct) {
43
+ this->distinct_stats = std::move(distinct);
44
+ }
45
+
46
+ void ColumnStatistics::UpdateDistinctStatistics(Vector &v, idx_t count) {
47
+ if (!distinct_stats) {
48
+ return;
49
+ }
50
+ auto &d_stats = (DistinctStatistics &)*distinct_stats;
51
+ d_stats.Update(v, count);
52
+ }
53
+
54
+ shared_ptr<ColumnStatistics> ColumnStatistics::Copy() const {
55
+ return make_shared<ColumnStatistics>(stats.Copy(), distinct_stats ? distinct_stats->Copy() : nullptr);
56
+ }
57
+ void ColumnStatistics::Serialize(Serializer &serializer) const {
58
+ stats.Serialize(serializer);
59
+ serializer.WriteOptional(distinct_stats);
60
+ }
61
+
62
+ shared_ptr<ColumnStatistics> ColumnStatistics::Deserialize(Deserializer &source, const LogicalType &type) {
63
+ auto stats = BaseStatistics::Deserialize(source, type);
64
+ auto distinct_stats = source.ReadOptional<DistinctStatistics>();
65
+ return make_shared<ColumnStatistics>(stats.Copy(), std::move(distinct_stats));
11
66
  }
12
67
 
13
68
  } // namespace duckdb
@@ -7,23 +7,18 @@
7
7
 
8
8
  namespace duckdb {
9
9
 
10
- DistinctStatistics::DistinctStatistics()
11
- : BaseStatistics(LogicalType::INVALID, StatisticsType::LOCAL_STATS), log(make_unique<HyperLogLog>()),
12
- sample_count(0), total_count(0) {
10
+ DistinctStatistics::DistinctStatistics() : log(make_unique<HyperLogLog>()), sample_count(0), total_count(0) {
13
11
  }
14
12
 
15
13
  DistinctStatistics::DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count)
16
- : BaseStatistics(LogicalType::INVALID, StatisticsType::LOCAL_STATS), log(std::move(log)),
17
- sample_count(sample_count), total_count(total_count) {
14
+ : log(std::move(log)), sample_count(sample_count), total_count(total_count) {
18
15
  }
19
16
 
20
- unique_ptr<BaseStatistics> DistinctStatistics::Copy() const {
17
+ unique_ptr<DistinctStatistics> DistinctStatistics::Copy() const {
21
18
  return make_unique<DistinctStatistics>(log->Copy(), sample_count, total_count);
22
19
  }
23
20
 
24
- void DistinctStatistics::Merge(const BaseStatistics &other_p) {
25
- BaseStatistics::Merge(other_p);
26
- auto &other = (const DistinctStatistics &)other_p;
21
+ void DistinctStatistics::Merge(const DistinctStatistics &other) {
27
22
  log = log->Merge(*other.log);
28
23
  sample_count += other.sample_count;
29
24
  total_count += other.total_count;