duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
  3. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  4. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  5. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  6. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  7. package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
  8. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  9. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  10. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
  11. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
  12. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
  13. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
  14. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
  15. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
  16. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
  17. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  18. package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
  19. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  20. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  21. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  22. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  23. package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
  24. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  25. package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
  26. package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
  27. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
  28. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  29. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  30. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  31. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  32. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  33. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
  34. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
  35. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
  39. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  40. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  41. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  42. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  43. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  44. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  46. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
  51. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  52. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
  53. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  54. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
  55. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  56. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  57. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  58. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  59. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
  61. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  62. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
  63. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  64. package/src/duckdb/src/main/config.cpp +66 -1
  65. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
  66. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  67. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  68. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  69. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  70. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  71. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  72. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  73. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  74. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  75. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  76. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
  77. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
  78. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  79. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
  80. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
  81. package/src/duckdb/src/planner/bind_context.cpp +16 -0
  82. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
  83. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
  84. package/src/duckdb/src/planner/binder.cpp +2 -1
  85. package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
  86. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  87. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  88. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  89. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
  90. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
  91. package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
  92. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
  93. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  94. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  95. package/src/duckdb/src/storage/compression/rle.cpp +2 -2
  96. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  97. package/src/duckdb/src/storage/data_table.cpp +4 -6
  98. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  99. package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
  100. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
  101. package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
  102. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
  103. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  104. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  105. package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
  106. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  107. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
  108. package/src/duckdb/src/storage/table/column_data.cpp +16 -14
  109. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
  110. package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
  111. package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
  112. package/src/duckdb/src/storage/table/row_group.cpp +24 -23
  113. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
  114. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
  115. package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
  116. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  117. package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
  118. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
  119. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
  120. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
  121. package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
  122. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  123. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  124. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  125. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  126. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  127. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  128. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  129. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  130. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  131. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -3,7 +3,7 @@
3
3
  #include "duckdb/parser/expression/bound_expression.hpp"
4
4
  #include "duckdb/function/scalar/nested_functions.hpp"
5
5
  #include "duckdb/common/case_insensitive_map.hpp"
6
- #include "duckdb/storage/statistics/struct_statistics.hpp"
6
+ #include "duckdb/storage/statistics/struct_stats.hpp"
7
7
  #include "duckdb/planner/expression_binder.hpp"
8
8
 
9
9
  namespace duckdb {
@@ -81,22 +81,19 @@ static unique_ptr<FunctionData> StructInsertBind(ClientContext &context, ScalarF
81
81
  unique_ptr<BaseStatistics> StructInsertStats(ClientContext &context, FunctionStatisticsInput &input) {
82
82
  auto &child_stats = input.child_stats;
83
83
  auto &expr = input.expr;
84
- if (child_stats.empty() || !child_stats[0]) {
85
- return nullptr;
86
- }
87
- auto &existing_struct_stats = (StructStatistics &)*child_stats[0];
88
- auto new_struct_stats = make_unique<StructStatistics>(expr.return_type);
84
+ auto new_struct_stats = StructStats::CreateUnknown(expr.return_type);
89
85
 
90
- for (idx_t i = 0; i < existing_struct_stats.child_stats.size(); i++) {
91
- new_struct_stats->child_stats[i] =
92
- existing_struct_stats.child_stats[i] ? existing_struct_stats.child_stats[i]->Copy() : nullptr;
86
+ auto existing_count = StructType::GetChildCount(child_stats[0].GetType());
87
+ auto existing_stats = StructStats::GetChildStats(child_stats[0]);
88
+ for (idx_t i = 0; i < existing_count; i++) {
89
+ StructStats::SetChildStats(new_struct_stats, i, existing_stats[i]);
93
90
  }
94
-
95
- auto offset = new_struct_stats->child_stats.size() - child_stats.size();
91
+ auto new_count = StructType::GetChildCount(expr.return_type);
92
+ auto offset = new_count - child_stats.size();
96
93
  for (idx_t i = 1; i < child_stats.size(); i++) {
97
- new_struct_stats->child_stats[offset + i] = child_stats[i] ? child_stats[i]->Copy() : nullptr;
94
+ StructStats::SetChildStats(new_struct_stats, offset + i, child_stats[i]);
98
95
  }
99
- return std::move(new_struct_stats);
96
+ return new_struct_stats.ToUnique();
100
97
  }
101
98
 
102
99
  void StructInsertFun::RegisterFunction(BuiltinFunctions &set) {
@@ -3,7 +3,7 @@
3
3
  #include "duckdb/parser/expression/bound_expression.hpp"
4
4
  #include "duckdb/function/scalar/nested_functions.hpp"
5
5
  #include "duckdb/common/case_insensitive_map.hpp"
6
- #include "duckdb/storage/statistics/struct_statistics.hpp"
6
+ #include "duckdb/storage/statistics/struct_stats.hpp"
7
7
  #include "duckdb/planner/expression_binder.hpp"
8
8
 
9
9
  namespace duckdb {
@@ -61,12 +61,11 @@ static unique_ptr<FunctionData> StructPackBind(ClientContext &context, ScalarFun
61
61
  unique_ptr<BaseStatistics> StructPackStats(ClientContext &context, FunctionStatisticsInput &input) {
62
62
  auto &child_stats = input.child_stats;
63
63
  auto &expr = input.expr;
64
- auto struct_stats = make_unique<StructStatistics>(expr.return_type);
65
- D_ASSERT(child_stats.size() == struct_stats->child_stats.size());
66
- for (idx_t i = 0; i < struct_stats->child_stats.size(); i++) {
67
- struct_stats->child_stats[i] = child_stats[i] ? child_stats[i]->Copy() : nullptr;
64
+ auto struct_stats = StructStats::CreateUnknown(expr.return_type);
65
+ for (idx_t i = 0; i < child_stats.size(); i++) {
66
+ StructStats::SetChildStats(struct_stats, i, child_stats[i]);
68
67
  }
69
- return std::move(struct_stats);
68
+ return struct_stats.ToUnique();
70
69
  }
71
70
 
72
71
  void StructPackFun::RegisterFunction(BuiltinFunctions &set) {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev225"
2
+ #define DUCKDB_VERSION "0.7.2-dev314"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "96a1cada02"
5
+ #define DUCKDB_SOURCE_ID "7e05c8c034"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -20,7 +20,7 @@ class PhysicalPerfectHashAggregate : public PhysicalOperator {
20
20
  public:
21
21
  PhysicalPerfectHashAggregate(ClientContext &context, vector<LogicalType> types,
22
22
  vector<unique_ptr<Expression>> aggregates, vector<unique_ptr<Expression>> groups,
23
- vector<unique_ptr<BaseStatistics>> group_stats, vector<idx_t> required_bits,
23
+ const vector<unique_ptr<BaseStatistics>> &group_stats, vector<idx_t> required_bits,
24
24
  idx_t estimated_cardinality);
25
25
 
26
26
  //! The groups
@@ -29,6 +29,17 @@ struct AggregateInputData {
29
29
  Allocator &allocator;
30
30
  };
31
31
 
32
+ struct AggregateStatisticsInput {
33
+ AggregateStatisticsInput(FunctionData *bind_data_p, vector<BaseStatistics> &child_stats_p,
34
+ NodeStatistics *node_stats_p)
35
+ : bind_data(bind_data_p), child_stats(child_stats_p), node_stats(node_stats_p) {
36
+ }
37
+
38
+ FunctionData *bind_data;
39
+ vector<BaseStatistics> &child_stats;
40
+ NodeStatistics *node_stats;
41
+ };
42
+
32
43
  //! The type used for sizing hashed aggregate function states
33
44
  typedef idx_t (*aggregate_size_t)();
34
45
  //! The type used for initializing hashed aggregate function states
@@ -43,9 +54,7 @@ typedef void (*aggregate_finalize_t)(Vector &state, AggregateInputData &aggr_inp
43
54
  idx_t offset);
44
55
  //! The type used for propagating statistics in aggregate functions (optional)
45
56
  typedef unique_ptr<BaseStatistics> (*aggregate_statistics_t)(ClientContext &context, BoundAggregateExpression &expr,
46
- FunctionData *bind_data,
47
- vector<unique_ptr<BaseStatistics>> &child_stats,
48
- NodeStatistics *node_stats);
57
+ AggregateStatisticsInput &input);
49
58
  //! Binds the scalar function and creates the function data
50
59
  typedef unique_ptr<FunctionData> (*bind_aggregate_function_t)(ClientContext &context, AggregateFunction &function,
51
60
  vector<unique_ptr<Expression>> &arguments);
@@ -29,13 +29,13 @@ class ScalarFunctionCatalogEntry;
29
29
 
30
30
  struct FunctionStatisticsInput {
31
31
  FunctionStatisticsInput(BoundFunctionExpression &expr_p, FunctionData *bind_data_p,
32
- vector<unique_ptr<BaseStatistics>> &child_stats_p, unique_ptr<Expression> *expr_ptr_p)
32
+ vector<BaseStatistics> &child_stats_p, unique_ptr<Expression> *expr_ptr_p)
33
33
  : expr(expr_p), bind_data(bind_data_p), child_stats(child_stats_p), expr_ptr(expr_ptr_p) {
34
34
  }
35
35
 
36
36
  BoundFunctionExpression &expr;
37
37
  FunctionData *bind_data;
38
- vector<unique_ptr<BaseStatistics>> &child_stats;
38
+ vector<BaseStatistics> &child_stats;
39
39
  unique_ptr<Expression> *expr_ptr;
40
40
  };
41
41
 
@@ -141,6 +141,8 @@ public:
141
141
 
142
142
  //! Add all the bindings from a BindContext to this BindContext. The other BindContext is destroyed in the process.
143
143
  void AddContext(BindContext other);
144
+ //! For semi and anti joins we remove the binding context of the right table after binding the condition.
145
+ void RemoveContext(vector<std::pair<string, duckdb::Binding *>> &other_bindings_list);
144
146
 
145
147
  //! Gets a binding of the specified name. Returns a nullptr and sets the out_error if the binding could not be
146
148
  //! found.
@@ -12,6 +12,7 @@
12
12
 
13
13
  namespace duckdb {
14
14
  class DuckTableEntry;
15
+ class TableStatistics;
15
16
 
16
17
  //! The table data writer is responsible for writing the data of a table to
17
18
  //! storage.
@@ -30,7 +31,7 @@ public:
30
31
 
31
32
  CompressionType GetColumnCompressionType(idx_t i);
32
33
 
33
- virtual void FinalizeTable(vector<unique_ptr<BaseStatistics>> &&global_stats, DataTableInfo *info) = 0;
34
+ virtual void FinalizeTable(TableStatistics &&global_stats, DataTableInfo *info) = 0;
34
35
  virtual unique_ptr<RowGroupWriter> GetRowGroupWriter(RowGroup &row_group) = 0;
35
36
 
36
37
  virtual void AddRowGroup(RowGroupPointer &&row_group_pointer, unique_ptr<RowGroupWriter> &&writer);
@@ -47,7 +48,7 @@ public:
47
48
  MetaBlockWriter &table_data_writer, MetaBlockWriter &meta_data_writer);
48
49
 
49
50
  public:
50
- virtual void FinalizeTable(vector<unique_ptr<BaseStatistics>> &&global_stats, DataTableInfo *info) override;
51
+ virtual void FinalizeTable(TableStatistics &&global_stats, DataTableInfo *info) override;
51
52
  virtual unique_ptr<RowGroupWriter> GetRowGroupWriter(RowGroup &row_group) override;
52
53
 
53
54
  private:
@@ -18,7 +18,7 @@
18
18
  #include "duckdb/function/compression/compression.hpp"
19
19
  #include "duckdb/main/config.hpp"
20
20
  #include "duckdb/storage/buffer_manager.hpp"
21
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
21
+
22
22
  #include "duckdb/storage/table/column_data_checkpointer.hpp"
23
23
  #include "duckdb/storage/table/column_segment.hpp"
24
24
  #include "duckdb/common/operator/subtract.hpp"
@@ -150,7 +150,7 @@ public:
150
150
 
151
151
  if (is_valid) {
152
152
  T floating_point_value = Load<T>((const_data_ptr_t)&value);
153
- NumericStatistics::Update<T>(current_segment->stats, floating_point_value);
153
+ NumericStats::Update<T>(current_segment->stats.statistics, floating_point_value);
154
154
  } else {
155
155
  //! FIXME: find a cheaper alternative to storing a NULL
156
156
  // store this as "value_identical", only using 9 bits for a NULL
@@ -17,7 +17,7 @@
17
17
  #include "duckdb/function/compression_function.hpp"
18
18
  #include "duckdb/main/config.hpp"
19
19
  #include "duckdb/storage/buffer_manager.hpp"
20
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
20
+
21
21
  #include "duckdb/storage/table/column_data_checkpointer.hpp"
22
22
  #include "duckdb/storage/table/column_segment.hpp"
23
23
  #include "duckdb/common/operator/subtract.hpp"
@@ -17,7 +17,7 @@
17
17
  #include "duckdb/function/compression_function.hpp"
18
18
  #include "duckdb/main/config.hpp"
19
19
  #include "duckdb/storage/buffer_manager.hpp"
20
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
20
+
21
21
  #include "duckdb/storage/table/column_data_checkpointer.hpp"
22
22
  #include "duckdb/storage/table/column_segment.hpp"
23
23
  #include "duckdb/common/operator/subtract.hpp"
@@ -18,7 +18,7 @@
18
18
  #include "duckdb/function/compression/compression.hpp"
19
19
  #include "duckdb/main/config.hpp"
20
20
  #include "duckdb/storage/buffer_manager.hpp"
21
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
21
+
22
22
  #include "duckdb/storage/table/column_data_checkpointer.hpp"
23
23
  #include "duckdb/storage/table/column_segment.hpp"
24
24
  #include "duckdb/common/operator/subtract.hpp"
@@ -49,7 +49,7 @@ public:
49
49
  }
50
50
 
51
51
  if (is_valid) {
52
- NumericStatistics::Update<VALUE_TYPE>(state_wrapper->current_segment->stats, value);
52
+ NumericStats::Update<VALUE_TYPE>(state_wrapper->current_segment->stats.statistics, value);
53
53
  }
54
54
 
55
55
  state_wrapper->WriteValue(Load<EXACT_TYPE>((const_data_ptr_t)&value));
@@ -17,7 +17,7 @@
17
17
  #include "duckdb/function/compression_function.hpp"
18
18
  #include "duckdb/main/config.hpp"
19
19
  #include "duckdb/storage/buffer_manager.hpp"
20
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
20
+
21
21
  #include "duckdb/storage/table/column_data_checkpointer.hpp"
22
22
  #include "duckdb/storage/table/column_segment.hpp"
23
23
  #include "duckdb/common/operator/subtract.hpp"
@@ -17,7 +17,7 @@
17
17
  #include "duckdb/function/compression_function.hpp"
18
18
  #include "duckdb/main/config.hpp"
19
19
  #include "duckdb/storage/buffer_manager.hpp"
20
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
20
+
21
21
  #include "duckdb/storage/table/column_data_checkpointer.hpp"
22
22
  #include "duckdb/storage/table/column_segment.hpp"
23
23
  #include "duckdb/common/operator/subtract.hpp"
@@ -18,12 +18,15 @@
18
18
  namespace duckdb {
19
19
 
20
20
  struct DataPointer {
21
+ DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
22
+ }
23
+
21
24
  uint64_t row_start;
22
25
  uint64_t tuple_count;
23
26
  BlockPointer block_pointer;
24
27
  CompressionType compression_type;
25
28
  //! Type-specific statistics of the segment
26
- unique_ptr<BaseStatistics> statistics;
29
+ BaseStatistics statistics;
27
30
  };
28
31
 
29
32
  struct RowGroupPointer {
@@ -32,7 +35,7 @@ struct RowGroupPointer {
32
35
  //! The data pointers of the column segments stored in the row group
33
36
  vector<BlockPointer> data_pointers;
34
37
  //! The per-column statistics of the row group
35
- vector<unique_ptr<BaseStatistics>> statistics;
38
+ vector<BaseStatistics> statistics;
36
39
  //! The versions information of the row group (if any)
37
40
  shared_ptr<VersionNode> versions;
38
41
  };
@@ -169,7 +169,7 @@ public:
169
169
  //! Get statistics of a physical column within the table
170
170
  unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id);
171
171
  //! Sets statistics of a physical column within the table
172
- void SetStatistics(column_t column_id, const std::function<void(BaseStatistics &)> &set_fun);
172
+ void SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats);
173
173
 
174
174
  //! Checkpoint the table to the specified table data writer
175
175
  void Checkpoint(TableDataWriter &writer);
@@ -13,6 +13,8 @@
13
13
  #include "duckdb/common/operator/comparison_operators.hpp"
14
14
  #include "duckdb/common/enums/expression_type.hpp"
15
15
  #include "duckdb/common/types/value.hpp"
16
+ #include "duckdb/storage/statistics/numeric_stats.hpp"
17
+ #include "duckdb/storage/statistics/string_stats.hpp"
16
18
 
17
19
  namespace duckdb {
18
20
  struct SelectionVector;
@@ -22,61 +24,121 @@ class Deserializer;
22
24
  class FieldWriter;
23
25
  class FieldReader;
24
26
  class Vector;
25
- class ValidityStatistics;
26
- class DistinctStatistics;
27
27
  struct UnifiedVectorFormat;
28
28
 
29
- enum StatisticsType { LOCAL_STATS = 0, GLOBAL_STATS = 1 };
29
+ enum class StatsInfo : uint8_t {
30
+ CAN_HAVE_NULL_VALUES = 0,
31
+ CANNOT_HAVE_NULL_VALUES = 1,
32
+ CAN_HAVE_VALID_VALUES = 2,
33
+ CANNOT_HAVE_VALID_VALUES = 3,
34
+ CAN_HAVE_NULL_AND_VALID_VALUES = 4
35
+ };
30
36
 
31
- class BaseStatistics {
32
- public:
33
- BaseStatistics(LogicalType type, StatisticsType stats_type);
34
- virtual ~BaseStatistics();
37
+ enum class StatisticsType : uint8_t { NUMERIC_STATS, STRING_STATS, LIST_STATS, STRUCT_STATS, BASE_STATS };
35
38
 
36
- //! The type of the logical segment
37
- LogicalType type;
38
- //! The validity stats of the column (if any)
39
- unique_ptr<BaseStatistics> validity_stats;
40
- //! The approximate count distinct stats of the column (if any)
41
- unique_ptr<BaseStatistics> distinct_stats;
42
- idx_t distinct_count; // estimate that one may have even if distinct_stats==nullptr
39
+ class BaseStatistics {
40
+ friend struct NumericStats;
41
+ friend struct StringStats;
42
+ friend struct StructStats;
43
+ friend struct ListStats;
43
44
 
44
- //! Whether these are 'global' stats, i.e., over a whole table, or just over a segment
45
- //! Some statistics are more expensive to keep, therefore we only keep them globally
46
- StatisticsType stats_type;
45
+ public:
46
+ DUCKDB_API ~BaseStatistics();
47
+ // disable copy constructors
48
+ BaseStatistics(const BaseStatistics &other) = delete;
49
+ BaseStatistics &operator=(const BaseStatistics &) = delete;
50
+ //! enable move constructors
51
+ DUCKDB_API BaseStatistics(BaseStatistics &&other) noexcept;
52
+ DUCKDB_API BaseStatistics &operator=(BaseStatistics &&) noexcept;
47
53
 
48
54
  public:
49
- static unique_ptr<BaseStatistics> CreateEmpty(LogicalType type, StatisticsType stats_type);
55
+ //! Creates a set of statistics for data that is unknown, i.e. "has_null" is true, "has_no_null" is true, etc
56
+ //! This can be used in case nothing is known about the data - or can be used as a baseline when only a few things
57
+ //! are known
58
+ static BaseStatistics CreateUnknown(LogicalType type);
59
+ //! Creates statistics for an empty database, i.e. "has_null" is false, "has_no_null" is false, etc
60
+ //! This is used when incrementally constructing statistics by constantly adding new values
61
+ static BaseStatistics CreateEmpty(LogicalType type);
62
+
63
+ DUCKDB_API StatisticsType GetStatsType() const;
64
+ DUCKDB_API static StatisticsType GetStatsType(const LogicalType &type);
50
65
 
51
66
  DUCKDB_API bool CanHaveNull() const;
52
67
  DUCKDB_API bool CanHaveNoNull() const;
53
68
 
54
- void UpdateDistinctStatistics(Vector &v, idx_t count);
69
+ void SetDistinctCount(idx_t distinct_count);
70
+
71
+ bool IsConstant() const;
72
+
73
+ const LogicalType &GetType() const {
74
+ return type;
75
+ }
55
76
 
56
- virtual bool IsConstant() const {
57
- return false;
77
+ void Set(StatsInfo info);
78
+ void CombineValidity(BaseStatistics &left, BaseStatistics &right);
79
+ void CopyValidity(BaseStatistics &stats);
80
+ inline void SetHasNull() {
81
+ has_null = true;
82
+ }
83
+ inline void SetHasNoNull() {
84
+ has_no_null = true;
58
85
  }
59
86
 
60
- virtual void Merge(const BaseStatistics &other);
87
+ void Merge(const BaseStatistics &other);
88
+
89
+ void Copy(const BaseStatistics &other);
61
90
 
62
- virtual unique_ptr<BaseStatistics> Copy() const;
91
+ BaseStatistics Copy() const;
92
+ unique_ptr<BaseStatistics> ToUnique() const;
63
93
  void CopyBase(const BaseStatistics &orig);
64
94
 
65
- virtual void Serialize(Serializer &serializer) const;
66
- virtual void Serialize(FieldWriter &writer) const;
95
+ void Serialize(Serializer &serializer) const;
96
+ void Serialize(FieldWriter &writer) const;
67
97
 
68
- virtual idx_t GetDistinctCount();
98
+ idx_t GetDistinctCount();
69
99
 
70
- static unique_ptr<BaseStatistics> Deserialize(Deserializer &source, LogicalType type);
100
+ static BaseStatistics Deserialize(Deserializer &source, LogicalType type);
71
101
 
72
102
  //! Verify that a vector does not violate the statistics
73
- virtual void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const;
103
+ void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const;
74
104
  void Verify(Vector &vector, idx_t count) const;
75
105
 
76
- virtual string ToString() const;
106
+ string ToString() const;
107
+
108
+ static BaseStatistics FromConstant(const Value &input);
109
+
110
+ private:
111
+ BaseStatistics();
112
+ explicit BaseStatistics(LogicalType type);
77
113
 
78
- protected:
79
- void InitializeBase();
114
+ static void Construct(BaseStatistics &stats, LogicalType type);
115
+
116
+ void InitializeUnknown();
117
+ void InitializeEmpty();
118
+
119
+ static BaseStatistics CreateUnknownType(LogicalType type);
120
+ static BaseStatistics CreateEmptyType(LogicalType type);
121
+ static BaseStatistics DeserializeType(FieldReader &reader, LogicalType type);
122
+ static BaseStatistics FromConstantType(const Value &input);
123
+
124
+ private:
125
+ //! The type of the logical segment
126
+ LogicalType type;
127
+ //! Whether or not the segment can contain NULL values
128
+ bool has_null;
129
+ //! Whether or not the segment can contain values that are not null
130
+ bool has_no_null;
131
+ // estimate that one may have even if distinct_stats==nullptr
132
+ idx_t distinct_count;
133
+ //! Numeric and String stats
134
+ union {
135
+ //! Numeric stats data, for numeric stats
136
+ NumericStatsData numeric_data;
137
+ //! String stats data, for string stats
138
+ StringStatsData string_data;
139
+ } stats_union;
140
+ //! Child stats (for LIST and STRUCT)
141
+ unique_ptr<BaseStatistics[]> child_stats;
80
142
  };
81
143
 
82
144
  } // namespace duckdb
@@ -9,17 +9,36 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/storage/statistics/base_statistics.hpp"
12
+ #include "duckdb/storage/statistics/distinct_statistics.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
15
16
  class ColumnStatistics {
16
17
  public:
17
- explicit ColumnStatistics(unique_ptr<BaseStatistics> stats_p);
18
-
19
- unique_ptr<BaseStatistics> stats;
18
+ explicit ColumnStatistics(BaseStatistics stats_p);
19
+ ColumnStatistics(BaseStatistics stats_p, unique_ptr<DistinctStatistics> distinct_stats_p);
20
20
 
21
21
  public:
22
22
  static shared_ptr<ColumnStatistics> CreateEmptyStats(const LogicalType &type);
23
+
24
+ void Merge(ColumnStatistics &other);
25
+
26
+ void UpdateDistinctStatistics(Vector &v, idx_t count);
27
+
28
+ BaseStatistics &Statistics();
29
+
30
+ bool HasDistinctStats();
31
+ DistinctStatistics &DistinctStats();
32
+ void SetDistinct(unique_ptr<DistinctStatistics> distinct_stats);
33
+
34
+ shared_ptr<ColumnStatistics> Copy() const;
35
+ void Serialize(Serializer &serializer) const;
36
+ static shared_ptr<ColumnStatistics> Deserialize(Deserializer &source, const LogicalType &type);
37
+
38
+ private:
39
+ BaseStatistics stats;
40
+ //! The approximate count distinct stats of the column
41
+ unique_ptr<DistinctStatistics> distinct_stats;
23
42
  };
24
43
 
25
44
  } // namespace duckdb
@@ -17,7 +17,7 @@ class Serializer;
17
17
  class Deserializer;
18
18
  class Vector;
19
19
 
20
- class DistinctStatistics : public BaseStatistics {
20
+ class DistinctStatistics {
21
21
  public:
22
22
  DistinctStatistics();
23
23
  explicit DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count);
@@ -30,12 +30,12 @@ public:
30
30
  atomic<idx_t> total_count;
31
31
 
32
32
  public:
33
- void Merge(const BaseStatistics &other) override;
33
+ void Merge(const DistinctStatistics &other);
34
34
 
35
- unique_ptr<BaseStatistics> Copy() const override;
35
+ unique_ptr<DistinctStatistics> Copy() const;
36
36
 
37
- void Serialize(Serializer &serializer) const override;
38
- void Serialize(FieldWriter &writer) const override;
37
+ void Serialize(Serializer &serializer) const;
38
+ void Serialize(FieldWriter &writer) const;
39
39
 
40
40
  static unique_ptr<DistinctStatistics> Deserialize(Deserializer &source);
41
41
  static unique_ptr<DistinctStatistics> Deserialize(FieldReader &reader);
@@ -43,7 +43,7 @@ public:
43
43
  void Update(Vector &update, idx_t count, bool sample = true);
44
44
  void Update(UnifiedVectorFormat &update_data, const LogicalType &ptype, idx_t count, bool sample = true);
45
45
 
46
- string ToString() const override;
46
+ string ToString() const;
47
47
  idx_t GetCount() const;
48
48
 
49
49
  private:
@@ -0,0 +1,41 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/storage/statistics/list_stats.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/exception.hpp"
13
+ #include "duckdb/common/types/hugeint.hpp"
14
+
15
+ namespace duckdb {
16
+ class BaseStatistics;
17
+ class FieldWriter;
18
+ class FieldReader;
19
+ struct SelectionVector;
20
+ class Vector;
21
+
22
+ struct ListStats {
23
+ DUCKDB_API static void Construct(BaseStatistics &stats);
24
+ DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type);
25
+ DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type);
26
+
27
+ DUCKDB_API static const BaseStatistics &GetChildStats(const BaseStatistics &stats);
28
+ DUCKDB_API static BaseStatistics &GetChildStats(BaseStatistics &stats);
29
+ DUCKDB_API static void SetChildStats(BaseStatistics &stats, unique_ptr<BaseStatistics> new_stats);
30
+
31
+ DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
32
+ DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
33
+
34
+ DUCKDB_API static string ToString(const BaseStatistics &stats);
35
+
36
+ DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
37
+ DUCKDB_API static void Copy(BaseStatistics &stats, const BaseStatistics &other);
38
+ DUCKDB_API static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
39
+ };
40
+
41
+ } // namespace duckdb