duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
  3. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  4. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  5. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  6. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  7. package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
  8. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  9. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  10. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
  11. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
  12. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
  13. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
  14. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
  15. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
  16. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
  17. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  18. package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
  19. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  20. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  21. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  22. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  23. package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
  24. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  25. package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
  26. package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
  27. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
  28. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  29. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  30. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  31. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  32. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  33. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
  34. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
  35. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
  39. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  40. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  41. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  42. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  43. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  44. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  46. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
  51. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  52. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
  53. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  54. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
  55. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  56. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  57. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  58. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  59. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
  61. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  62. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
  63. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  64. package/src/duckdb/src/main/config.cpp +66 -1
  65. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
  66. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  67. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  68. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  69. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  70. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  71. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  72. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  73. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  74. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  75. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  76. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
  77. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
  78. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  79. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
  80. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
  81. package/src/duckdb/src/planner/bind_context.cpp +16 -0
  82. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
  83. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
  84. package/src/duckdb/src/planner/binder.cpp +2 -1
  85. package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
  86. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  87. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  88. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  89. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
  90. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
  91. package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
  92. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
  93. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  94. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  95. package/src/duckdb/src/storage/compression/rle.cpp +2 -2
  96. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  97. package/src/duckdb/src/storage/data_table.cpp +4 -6
  98. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  99. package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
  100. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
  101. package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
  102. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
  103. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  104. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  105. package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
  106. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  107. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
  108. package/src/duckdb/src/storage/table/column_data.cpp +16 -14
  109. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
  110. package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
  111. package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
  112. package/src/duckdb/src/storage/table/row_group.cpp +24 -23
  113. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
  114. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
  115. package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
  116. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  117. package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
  118. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
  119. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
  120. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
  121. package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
  122. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  123. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  124. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  125. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  126. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  127. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  128. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  129. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  130. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  131. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -9,7 +9,7 @@
9
9
  #include "duckdb/function/scalar/date_functions.hpp"
10
10
  #include "duckdb/function/scalar/nested_functions.hpp"
11
11
  #include "duckdb/planner/expression/bound_function_expression.hpp"
12
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
12
+
13
13
  #include "duckdb/common/field_writer.hpp"
14
14
 
15
15
  namespace duckdb {
@@ -161,34 +161,27 @@ DatePartSpecifier GetDateTypePartSpecifier(const string &specifier, LogicalType
161
161
  }
162
162
 
163
163
  template <int64_t MIN, int64_t MAX>
164
- static unique_ptr<BaseStatistics> PropagateSimpleDatePartStatistics(vector<unique_ptr<BaseStatistics>> &child_stats) {
164
+ static unique_ptr<BaseStatistics> PropagateSimpleDatePartStatistics(vector<BaseStatistics> &child_stats) {
165
165
  // we can always propagate simple date part statistics
166
166
  // since the min and max can never exceed these bounds
167
- auto result = make_unique<NumericStatistics>(LogicalType::BIGINT, Value::BIGINT(MIN), Value::BIGINT(MAX),
168
- StatisticsType::LOCAL_STATS);
169
- if (!child_stats[0]) {
170
- // if there are no child stats, we don't know
171
- result->validity_stats = make_unique<ValidityStatistics>(true);
172
- } else if (child_stats[0]->validity_stats) {
173
- result->validity_stats = child_stats[0]->validity_stats->Copy();
174
- }
175
- return std::move(result);
167
+ auto result = NumericStats::CreateEmpty(LogicalType::BIGINT);
168
+ result.CopyValidity(child_stats[0]);
169
+ NumericStats::SetMin(result, Value::BIGINT(MIN));
170
+ NumericStats::SetMax(result, Value::BIGINT(MAX));
171
+ return result.ToUnique();
176
172
  }
177
173
 
178
174
  struct DatePart {
179
175
  template <class T, class OP>
180
- static unique_ptr<BaseStatistics> PropagateDatePartStatistics(vector<unique_ptr<BaseStatistics>> &child_stats) {
176
+ static unique_ptr<BaseStatistics> PropagateDatePartStatistics(vector<BaseStatistics> &child_stats) {
181
177
  // we can only propagate complex date part stats if the child has stats
182
- if (!child_stats[0]) {
183
- return nullptr;
184
- }
185
- auto &nstats = (NumericStatistics &)*child_stats[0];
186
- if (nstats.min.IsNull() || nstats.max.IsNull()) {
178
+ auto &nstats = child_stats[0];
179
+ if (!NumericStats::HasMinMax(nstats)) {
187
180
  return nullptr;
188
181
  }
189
182
  // run the operator on both the min and the max, this gives us the [min, max] bound
190
- auto min = nstats.min.GetValueUnsafe<T>();
191
- auto max = nstats.max.GetValueUnsafe<T>();
183
+ auto min = NumericStats::GetMinUnsafe<T>(nstats);
184
+ auto max = NumericStats::GetMaxUnsafe<T>(nstats);
192
185
  if (min > max) {
193
186
  return nullptr;
194
187
  }
@@ -198,12 +191,11 @@ struct DatePart {
198
191
  }
199
192
  auto min_part = OP::template Operation<T, int64_t>(min);
200
193
  auto max_part = OP::template Operation<T, int64_t>(max);
201
- auto result = make_unique<NumericStatistics>(LogicalType::BIGINT, Value::BIGINT(min_part),
202
- Value::BIGINT(max_part), StatisticsType::LOCAL_STATS);
203
- if (child_stats[0]->validity_stats) {
204
- result->validity_stats = child_stats[0]->validity_stats->Copy();
205
- }
206
- return std::move(result);
194
+ auto result = NumericStats::CreateEmpty(LogicalType::BIGINT);
195
+ NumericStats::SetMin(result, Value::BIGINT(min_part));
196
+ NumericStats::SetMax(result, Value::BIGINT(max_part));
197
+ result.CopyValidity(child_stats[0]);
198
+ return result.ToUnique();
207
199
  }
208
200
 
209
201
  template <typename OP>
@@ -9,7 +9,6 @@
9
9
  #include "duckdb/common/vector_operations/ternary_executor.hpp"
10
10
  #include "duckdb/common/vector_operations/vector_operations.hpp"
11
11
  #include "duckdb/common/string_util.hpp"
12
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
13
12
 
14
13
  namespace duckdb {
15
14
 
@@ -8,7 +8,6 @@
8
8
  #include "duckdb/common/types/value.hpp"
9
9
  #include "duckdb/common/string_util.hpp"
10
10
  #include "duckdb/execution/expression_executor.hpp"
11
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
12
11
 
13
12
  namespace duckdb {
14
13
 
@@ -588,18 +587,15 @@ static void DateTruncFunction(DataChunk &args, ExpressionState &state, Vector &r
588
587
  }
589
588
 
590
589
  template <class TA, class TR, class OP>
591
- static unique_ptr<BaseStatistics> DateTruncStatistics(vector<unique_ptr<BaseStatistics>> &child_stats) {
590
+ static unique_ptr<BaseStatistics> DateTruncStatistics(vector<BaseStatistics> &child_stats) {
592
591
  // we can only propagate date stats if the child has stats
593
- if (!child_stats[1]) {
594
- return nullptr;
595
- }
596
- auto &nstats = (NumericStatistics &)*child_stats[1];
597
- if (nstats.min.IsNull() || nstats.max.IsNull()) {
592
+ auto &nstats = child_stats[1];
593
+ if (!NumericStats::HasMinMax(nstats)) {
598
594
  return nullptr;
599
595
  }
600
596
  // run the operator on both the min and the max, this gives us the [min, max] bound
601
- auto min = nstats.min.GetValueUnsafe<TA>();
602
- auto max = nstats.max.GetValueUnsafe<TA>();
597
+ auto min = NumericStats::GetMinUnsafe<TA>(nstats);
598
+ auto max = NumericStats::GetMaxUnsafe<TA>(nstats);
603
599
  if (min > max) {
604
600
  return nullptr;
605
601
  }
@@ -610,11 +606,11 @@ static unique_ptr<BaseStatistics> DateTruncStatistics(vector<unique_ptr<BaseStat
610
606
 
611
607
  auto min_value = Value::CreateValue(min_part);
612
608
  auto max_value = Value::CreateValue(max_part);
613
- auto result = make_unique<NumericStatistics>(min_value.type(), min_value, max_value, StatisticsType::LOCAL_STATS);
614
- if (child_stats[0]->validity_stats) {
615
- result->validity_stats = child_stats[1]->validity_stats->Copy();
616
- }
617
- return std::move(result);
609
+ auto result = NumericStats::CreateEmpty(min_value.type());
610
+ NumericStats::SetMin(result, min_value);
611
+ NumericStats::SetMax(result, max_value);
612
+ result.CopyValidity(child_stats[0]);
613
+ return result.ToUnique();
618
614
  }
619
615
 
620
616
  template <class TA, class TR, class OP>
@@ -38,10 +38,8 @@ unique_ptr<FunctionData> StatsBind(ClientContext &context, ScalarFunction &bound
38
38
  static unique_ptr<BaseStatistics> StatsPropagateStats(ClientContext &context, FunctionStatisticsInput &input) {
39
39
  auto &child_stats = input.child_stats;
40
40
  auto &bind_data = input.bind_data;
41
- if (child_stats[0]) {
42
- auto &info = (StatsBindData &)*bind_data;
43
- info.stats = child_stats[0]->ToString();
44
- }
41
+ auto &info = (StatsBindData &)*bind_data;
42
+ info.stats = child_stats[0].ToString();
45
43
  return nullptr;
46
44
  }
47
45
 
@@ -1,7 +1,7 @@
1
1
  #include "duckdb/common/types/data_chunk.hpp"
2
2
  #include "duckdb/function/scalar/nested_functions.hpp"
3
3
  #include "duckdb/planner/expression/bound_function_expression.hpp"
4
- #include "duckdb/storage/statistics/list_statistics.hpp"
4
+ #include "duckdb/storage/statistics/list_stats.hpp"
5
5
 
6
6
  namespace duckdb {
7
7
 
@@ -121,17 +121,10 @@ static unique_ptr<FunctionData> ListFlattenBind(ClientContext &context, ScalarFu
121
121
 
122
122
  static unique_ptr<BaseStatistics> ListFlattenStats(ClientContext &context, FunctionStatisticsInput &input) {
123
123
  auto &child_stats = input.child_stats;
124
- if (!child_stats[0]) {
125
- return nullptr;
126
- }
127
- auto &list_stats = (ListStatistics &)*child_stats[0];
128
- if (!list_stats.child_stats || list_stats.child_stats->type == LogicalTypeId::SQLNULL) {
129
- return nullptr;
130
- }
131
-
132
- auto child_copy = list_stats.child_stats->Copy();
133
- child_copy->validity_stats = make_unique<ValidityStatistics>(true);
134
- return child_copy;
124
+ auto &list_child_stats = ListStats::GetChildStats(child_stats[0]);
125
+ auto child_copy = list_child_stats.Copy();
126
+ child_copy.Set(StatsInfo::CAN_HAVE_NULL_VALUES);
127
+ return child_copy.ToUnique();
135
128
  }
136
129
 
137
130
  void ListFlattenFun::RegisterFunction(BuiltinFunctions &set) {
@@ -3,8 +3,6 @@
3
3
  #include "duckdb/planner/expression/bound_function_expression.hpp"
4
4
  #include "duckdb/planner/expression/bound_parameter_expression.hpp"
5
5
  #include "duckdb/planner/expression_binder.hpp"
6
- #include "duckdb/storage/statistics/list_statistics.hpp"
7
- #include "duckdb/storage/statistics/validity_statistics.hpp"
8
6
 
9
7
  namespace duckdb {
10
8
 
@@ -108,14 +106,11 @@ static unique_ptr<FunctionData> ListConcatBind(ClientContext &context, ScalarFun
108
106
  static unique_ptr<BaseStatistics> ListConcatStats(ClientContext &context, FunctionStatisticsInput &input) {
109
107
  auto &child_stats = input.child_stats;
110
108
  D_ASSERT(child_stats.size() == 2);
111
- if (!child_stats[0] || !child_stats[1]) {
112
- return nullptr;
113
- }
114
109
 
115
- auto &left_stats = (ListStatistics &)*child_stats[0];
116
- auto &right_stats = (ListStatistics &)*child_stats[1];
110
+ auto &left_stats = child_stats[0];
111
+ auto &right_stats = child_stats[1];
117
112
 
118
- auto stats = left_stats.Copy();
113
+ auto stats = left_stats.ToUnique();
119
114
  stats->Merge(right_stats);
120
115
 
121
116
  return stats;
@@ -7,8 +7,7 @@
7
7
  #include "duckdb/function/scalar/string_functions.hpp"
8
8
  #include "duckdb/parser/expression/bound_expression.hpp"
9
9
  #include "duckdb/planner/expression/bound_function_expression.hpp"
10
- #include "duckdb/storage/statistics/list_statistics.hpp"
11
- #include "duckdb/storage/statistics/validity_statistics.hpp"
10
+ #include "duckdb/storage/statistics/list_stats.hpp"
12
11
 
13
12
  namespace duckdb {
14
13
 
@@ -212,17 +211,11 @@ static unique_ptr<FunctionData> ListExtractBind(ClientContext &context, ScalarFu
212
211
 
213
212
  static unique_ptr<BaseStatistics> ListExtractStats(ClientContext &context, FunctionStatisticsInput &input) {
214
213
  auto &child_stats = input.child_stats;
215
- if (!child_stats[0]) {
216
- return nullptr;
217
- }
218
- auto &list_stats = (ListStatistics &)*child_stats[0];
219
- if (!list_stats.child_stats) {
220
- return nullptr;
221
- }
222
- auto child_copy = list_stats.child_stats->Copy();
214
+ auto &list_child_stats = ListStats::GetChildStats(child_stats[0]);
215
+ auto child_copy = list_child_stats.Copy();
223
216
  // list_extract always pushes a NULL, since if the offset is out of range for a list it inserts a null
224
- child_copy->validity_stats = make_unique<ValidityStatistics>(true);
225
- return child_copy;
217
+ child_copy.Set(StatsInfo::CAN_HAVE_NULL_VALUES);
218
+ return child_copy.ToUnique();
226
219
  }
227
220
 
228
221
  void ListExtractFun::RegisterFunction(BuiltinFunctions &set) {
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/function/scalar/nested_functions.hpp"
5
5
  #include "duckdb/common/types/data_chunk.hpp"
6
6
  #include "duckdb/common/pair.hpp"
7
- #include "duckdb/storage/statistics/list_statistics.hpp"
7
+ #include "duckdb/storage/statistics/list_stats.hpp"
8
8
  #include "duckdb/planner/expression_binder.hpp"
9
9
 
10
10
  namespace duckdb {
@@ -49,16 +49,12 @@ static unique_ptr<FunctionData> ListValueBind(ClientContext &context, ScalarFunc
49
49
  unique_ptr<BaseStatistics> ListValueStats(ClientContext &context, FunctionStatisticsInput &input) {
50
50
  auto &child_stats = input.child_stats;
51
51
  auto &expr = input.expr;
52
- auto list_stats = make_unique<ListStatistics>(expr.return_type);
52
+ auto list_stats = ListStats::CreateEmpty(expr.return_type);
53
+ auto &list_child_stats = ListStats::GetChildStats(list_stats);
53
54
  for (idx_t i = 0; i < child_stats.size(); i++) {
54
- if (child_stats[i]) {
55
- list_stats->child_stats->Merge(*child_stats[i]);
56
- } else {
57
- list_stats->child_stats.reset();
58
- return std::move(list_stats);
59
- }
55
+ list_child_stats.Merge(child_stats[i]);
60
56
  }
61
- return std::move(list_stats);
57
+ return list_stats.ToUnique();
62
58
  }
63
59
 
64
60
  void ListValueFun::RegisterFunction(BuiltinFunctions &set) {
@@ -8,7 +8,6 @@
8
8
  #include "duckdb/common/algorithm.hpp"
9
9
  #include "duckdb/execution/expression_executor.hpp"
10
10
  #include "duckdb/common/likely.hpp"
11
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
12
11
  #include "duckdb/common/types/bit.hpp"
13
12
  #include <cmath>
14
13
  #include <errno.h>
@@ -77,25 +76,22 @@ static unique_ptr<BaseStatistics> PropagateAbsStats(ClientContext &context, Func
77
76
  auto &expr = input.expr;
78
77
  D_ASSERT(child_stats.size() == 1);
79
78
  // can only propagate stats if the children have stats
80
- if (!child_stats[0]) {
81
- return nullptr;
82
- }
83
- auto &lstats = (NumericStatistics &)*child_stats[0];
79
+ auto &lstats = child_stats[0];
84
80
  Value new_min, new_max;
85
81
  bool potential_overflow = true;
86
- if (!lstats.min.IsNull() && !lstats.max.IsNull()) {
82
+ if (NumericStats::HasMinMax(lstats)) {
87
83
  switch (expr.return_type.InternalType()) {
88
84
  case PhysicalType::INT8:
89
- potential_overflow = lstats.min.GetValue<int8_t>() == NumericLimits<int8_t>::Minimum();
85
+ potential_overflow = NumericStats::Min(lstats).GetValue<int8_t>() == NumericLimits<int8_t>::Minimum();
90
86
  break;
91
87
  case PhysicalType::INT16:
92
- potential_overflow = lstats.min.GetValue<int16_t>() == NumericLimits<int16_t>::Minimum();
88
+ potential_overflow = NumericStats::Min(lstats).GetValue<int16_t>() == NumericLimits<int16_t>::Minimum();
93
89
  break;
94
90
  case PhysicalType::INT32:
95
- potential_overflow = lstats.min.GetValue<int32_t>() == NumericLimits<int32_t>::Minimum();
91
+ potential_overflow = NumericStats::Min(lstats).GetValue<int32_t>() == NumericLimits<int32_t>::Minimum();
96
92
  break;
97
93
  case PhysicalType::INT64:
98
- potential_overflow = lstats.min.GetValue<int64_t>() == NumericLimits<int64_t>::Minimum();
94
+ potential_overflow = NumericStats::Min(lstats).GetValue<int64_t>() == NumericLimits<int64_t>::Minimum();
99
95
  break;
100
96
  default:
101
97
  return nullptr;
@@ -108,8 +104,8 @@ static unique_ptr<BaseStatistics> PropagateAbsStats(ClientContext &context, Func
108
104
  // no potential overflow
109
105
 
110
106
  // compute stats
111
- auto current_min = lstats.min.GetValue<int64_t>();
112
- auto current_max = lstats.max.GetValue<int64_t>();
107
+ auto current_min = NumericStats::Min(lstats).GetValue<int64_t>();
108
+ auto current_max = NumericStats::Max(lstats).GetValue<int64_t>();
113
109
 
114
110
  int64_t min_val, max_val;
115
111
 
@@ -125,16 +121,17 @@ static unique_ptr<BaseStatistics> PropagateAbsStats(ClientContext &context, Func
125
121
  } else {
126
122
  // if both current_min and current_max are > 0, then the abs is a no-op and can be removed entirely
127
123
  *input.expr_ptr = std::move(input.expr.children[0]);
128
- return std::move(child_stats[0]);
124
+ return child_stats[0].ToUnique();
129
125
  }
130
126
  new_min = Value::Numeric(expr.return_type, min_val);
131
127
  new_max = Value::Numeric(expr.return_type, max_val);
132
128
  expr.function.function = ScalarFunction::GetScalarUnaryFunction<AbsOperator>(expr.return_type);
133
129
  }
134
- auto stats = make_unique<NumericStatistics>(expr.return_type, std::move(new_min), std::move(new_max),
135
- StatisticsType::LOCAL_STATS);
136
- stats->validity_stats = lstats.validity_stats->Copy();
137
- return std::move(stats);
130
+ auto stats = NumericStats::CreateEmpty(expr.return_type);
131
+ NumericStats::SetMin(stats, new_min);
132
+ NumericStats::SetMax(stats, new_max);
133
+ stats.CopyValidity(lstats);
134
+ return stats.ToUnique();
138
135
  }
139
136
 
140
137
  template <class OP>
@@ -12,7 +12,6 @@
12
12
  #include "duckdb/common/vector_operations/vector_operations.hpp"
13
13
  #include "duckdb/function/scalar/operators.hpp"
14
14
  #include "duckdb/planner/expression/bound_function_expression.hpp"
15
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
16
15
  #include "duckdb/function/scalar/nested_functions.hpp"
17
16
 
18
17
  #include <limits>
@@ -78,15 +77,15 @@ static scalar_function_t GetScalarBinaryFunction(PhysicalType type) {
78
77
  //===--------------------------------------------------------------------===//
79
78
  struct AddPropagateStatistics {
80
79
  template <class T, class OP>
81
- static bool Operation(LogicalType type, NumericStatistics &lstats, NumericStatistics &rstats, Value &new_min,
80
+ static bool Operation(LogicalType type, BaseStatistics &lstats, BaseStatistics &rstats, Value &new_min,
82
81
  Value &new_max) {
83
82
  T min, max;
84
83
  // new min is min+min
85
- if (!OP::Operation(lstats.min.GetValueUnsafe<T>(), rstats.min.GetValueUnsafe<T>(), min)) {
84
+ if (!OP::Operation(NumericStats::GetMinUnsafe<T>(lstats), NumericStats::GetMinUnsafe<T>(rstats), min)) {
86
85
  return true;
87
86
  }
88
87
  // new max is max+max
89
- if (!OP::Operation(lstats.max.GetValueUnsafe<T>(), rstats.max.GetValueUnsafe<T>(), max)) {
88
+ if (!OP::Operation(NumericStats::GetMaxUnsafe<T>(lstats), NumericStats::GetMaxUnsafe<T>(rstats), max)) {
90
89
  return true;
91
90
  }
92
91
  new_min = Value::Numeric(type, min);
@@ -97,13 +96,13 @@ struct AddPropagateStatistics {
97
96
 
98
97
  struct SubtractPropagateStatistics {
99
98
  template <class T, class OP>
100
- static bool Operation(LogicalType type, NumericStatistics &lstats, NumericStatistics &rstats, Value &new_min,
99
+ static bool Operation(LogicalType type, BaseStatistics &lstats, BaseStatistics &rstats, Value &new_min,
101
100
  Value &new_max) {
102
101
  T min, max;
103
- if (!OP::Operation(lstats.min.GetValueUnsafe<T>(), rstats.max.GetValueUnsafe<T>(), min)) {
102
+ if (!OP::Operation(NumericStats::GetMinUnsafe<T>(lstats), NumericStats::GetMaxUnsafe<T>(rstats), min)) {
104
103
  return true;
105
104
  }
106
- if (!OP::Operation(lstats.max.GetValueUnsafe<T>(), rstats.min.GetValueUnsafe<T>(), max)) {
105
+ if (!OP::Operation(NumericStats::GetMaxUnsafe<T>(lstats), NumericStats::GetMinUnsafe<T>(rstats), max)) {
107
106
  return true;
108
107
  }
109
108
  new_min = Value::Numeric(type, min);
@@ -136,14 +135,11 @@ static unique_ptr<BaseStatistics> PropagateNumericStats(ClientContext &context,
136
135
  auto &expr = input.expr;
137
136
  D_ASSERT(child_stats.size() == 2);
138
137
  // can only propagate stats if the children have stats
139
- if (!child_stats[0] || !child_stats[1]) {
140
- return nullptr;
141
- }
142
- auto &lstats = (NumericStatistics &)*child_stats[0];
143
- auto &rstats = (NumericStatistics &)*child_stats[1];
138
+ auto &lstats = child_stats[0];
139
+ auto &rstats = child_stats[1];
144
140
  Value new_min, new_max;
145
141
  bool potential_overflow = true;
146
- if (!lstats.min.IsNull() && !lstats.max.IsNull() && !rstats.min.IsNull() && !rstats.max.IsNull()) {
142
+ if (NumericStats::HasMinMax(lstats) && NumericStats::HasMinMax(rstats)) {
147
143
  switch (expr.return_type.InternalType()) {
148
144
  case PhysicalType::INT8:
149
145
  potential_overflow =
@@ -176,10 +172,11 @@ static unique_ptr<BaseStatistics> PropagateNumericStats(ClientContext &context,
176
172
  }
177
173
  expr.function.function = GetScalarIntegerFunction<BASEOP>(expr.return_type.InternalType());
178
174
  }
179
- auto stats = make_unique<NumericStatistics>(expr.return_type, std::move(new_min), std::move(new_max),
180
- StatisticsType::LOCAL_STATS);
181
- stats->validity_stats = ValidityStatistics::Combine(lstats.validity_stats, rstats.validity_stats);
182
- return std::move(stats);
175
+ auto result = NumericStats::CreateEmpty(expr.return_type);
176
+ NumericStats::SetMin(result, new_min);
177
+ NumericStats::SetMax(result, new_max);
178
+ result.CombineValidity(lstats, rstats);
179
+ return result.ToUnique();
183
180
  }
184
181
 
185
182
  template <class OP, class OPOVERFLOWCHECK, bool IS_SUBTRACT = false>
@@ -491,9 +488,9 @@ unique_ptr<FunctionData> DecimalNegateBind(ClientContext &context, ScalarFunctio
491
488
 
492
489
  struct NegatePropagateStatistics {
493
490
  template <class T>
494
- static bool Operation(LogicalType type, NumericStatistics &istats, Value &new_min, Value &new_max) {
495
- auto max_value = istats.max.GetValueUnsafe<T>();
496
- auto min_value = istats.min.GetValueUnsafe<T>();
491
+ static bool Operation(LogicalType type, BaseStatistics &istats, Value &new_min, Value &new_max) {
492
+ auto max_value = NumericStats::GetMaxUnsafe<T>(istats);
493
+ auto min_value = NumericStats::GetMinUnsafe<T>(istats);
497
494
  if (!NegateOperator::CanNegate<T>(min_value) || !NegateOperator::CanNegate<T>(max_value)) {
498
495
  return true;
499
496
  }
@@ -510,13 +507,10 @@ static unique_ptr<BaseStatistics> NegateBindStatistics(ClientContext &context, F
510
507
  auto &expr = input.expr;
511
508
  D_ASSERT(child_stats.size() == 1);
512
509
  // can only propagate stats if the children have stats
513
- if (!child_stats[0]) {
514
- return nullptr;
515
- }
516
- auto &istats = (NumericStatistics &)*child_stats[0];
510
+ auto &istats = child_stats[0];
517
511
  Value new_min, new_max;
518
512
  bool potential_overflow = true;
519
- if (!istats.min.IsNull() && !istats.max.IsNull()) {
513
+ if (NumericStats::HasMinMax(istats)) {
520
514
  switch (expr.return_type.InternalType()) {
521
515
  case PhysicalType::INT8:
522
516
  potential_overflow =
@@ -542,12 +536,11 @@ static unique_ptr<BaseStatistics> NegateBindStatistics(ClientContext &context, F
542
536
  new_min = Value(expr.return_type);
543
537
  new_max = Value(expr.return_type);
544
538
  }
545
- auto stats = make_unique<NumericStatistics>(expr.return_type, std::move(new_min), std::move(new_max),
546
- StatisticsType::LOCAL_STATS);
547
- if (istats.validity_stats) {
548
- stats->validity_stats = istats.validity_stats->Copy();
549
- }
550
- return std::move(stats);
539
+ auto stats = NumericStats::CreateEmpty(expr.return_type);
540
+ NumericStats::SetMin(stats, new_min);
541
+ NumericStats::SetMax(stats, new_max);
542
+ stats.CopyValidity(istats);
543
+ return stats.ToUnique();
551
544
  }
552
545
 
553
546
  ScalarFunction SubtractFun::GetFunction(const LogicalType &type) {
@@ -662,7 +655,7 @@ void SubtractFun::RegisterFunction(BuiltinFunctions &set) {
662
655
  //===--------------------------------------------------------------------===//
663
656
  struct MultiplyPropagateStatistics {
664
657
  template <class T, class OP>
665
- static bool Operation(LogicalType type, NumericStatistics &lstats, NumericStatistics &rstats, Value &new_min,
658
+ static bool Operation(LogicalType type, BaseStatistics &lstats, BaseStatistics &rstats, Value &new_min,
666
659
  Value &new_max) {
667
660
  // statistics propagation on the multiplication is slightly less straightforward because of negative numbers
668
661
  // the new min/max depend on the signs of the input types
@@ -671,8 +664,8 @@ struct MultiplyPropagateStatistics {
671
664
  // etc
672
665
  // rather than doing all this switcheroo we just multiply all combinations of lmin/lmax with rmin/rmax
673
666
  // and check what the minimum/maximum value is
674
- T lvals[] {lstats.min.GetValueUnsafe<T>(), lstats.max.GetValueUnsafe<T>()};
675
- T rvals[] {rstats.min.GetValueUnsafe<T>(), rstats.max.GetValueUnsafe<T>()};
667
+ T lvals[] {NumericStats::GetMinUnsafe<T>(lstats), NumericStats::GetMaxUnsafe<T>(lstats)};
668
+ T rvals[] {NumericStats::GetMinUnsafe<T>(rstats), NumericStats::GetMaxUnsafe<T>(rstats)};
676
669
  T min = NumericLimits<T>::Maximum();
677
670
  T max = NumericLimits<T>::Minimum();
678
671
  // multiplications
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/common/vector_operations/vector_operations.hpp"
5
5
  #include "duckdb/common/vector_operations/unary_executor.hpp"
6
6
  #include "duckdb/planner/expression/bound_function_expression.hpp"
7
- #include "duckdb/storage/statistics/string_statistics.hpp"
7
+
8
8
  #include "utf8proc.hpp"
9
9
 
10
10
  #include <string.h>
@@ -153,11 +153,7 @@ static unique_ptr<BaseStatistics> CaseConvertPropagateStats(ClientContext &conte
153
153
  auto &expr = input.expr;
154
154
  D_ASSERT(child_stats.size() == 1);
155
155
  // can only propagate stats if the children have stats
156
- if (!child_stats[0]) {
157
- return nullptr;
158
- }
159
- auto &sstats = (StringStatistics &)*child_stats[0];
160
- if (!sstats.has_unicode) {
156
+ if (!StringStats::CanContainUnicode(child_stats[0])) {
161
157
  expr.function.function = CaseConvertFunctionASCII<IS_UPPER>;
162
158
  }
163
159
  return nullptr;
@@ -3,7 +3,7 @@
3
3
  #include "duckdb/common/exception.hpp"
4
4
  #include "duckdb/common/vector_operations/vector_operations.hpp"
5
5
  #include "duckdb/planner/expression/bound_function_expression.hpp"
6
- #include "duckdb/storage/statistics/string_statistics.hpp"
6
+
7
7
  #include "utf8proc.hpp"
8
8
 
9
9
  namespace duckdb {
@@ -42,12 +42,8 @@ static unique_ptr<BaseStatistics> InStrPropagateStats(ClientContext &context, Fu
42
42
  auto &expr = input.expr;
43
43
  D_ASSERT(child_stats.size() == 2);
44
44
  // can only propagate stats if the children have stats
45
- if (!child_stats[0]) {
46
- return nullptr;
47
- }
48
45
  // for strpos, we only care if the FIRST string has unicode or not
49
- auto &sstats = (StringStatistics &)*child_stats[0];
50
- if (!sstats.has_unicode) {
46
+ if (!StringStats::CanContainUnicode(child_stats[0])) {
51
47
  expr.function.function = ScalarFunction::BinaryFunction<string_t, string_t, int64_t, InstrAsciiOperator>;
52
48
  }
53
49
  return nullptr;
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/common/exception.hpp"
5
5
  #include "duckdb/common/vector_operations/vector_operations.hpp"
6
6
  #include "duckdb/planner/expression/bound_function_expression.hpp"
7
- #include "duckdb/storage/statistics/string_statistics.hpp"
7
+
8
8
  #include "duckdb/planner/expression/bound_parameter_expression.hpp"
9
9
  #include "utf8proc.hpp"
10
10
 
@@ -78,11 +78,7 @@ static unique_ptr<BaseStatistics> LengthPropagateStats(ClientContext &context, F
78
78
  auto &expr = input.expr;
79
79
  D_ASSERT(child_stats.size() == 1);
80
80
  // can only propagate stats if the children have stats
81
- if (!child_stats[0]) {
82
- return nullptr;
83
- }
84
- auto &sstats = (StringStatistics &)*child_stats[0];
85
- if (!sstats.has_unicode) {
81
+ if (!StringStats::CanContainUnicode(child_stats[0])) {
86
82
  expr.function.function = ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>;
87
83
  }
88
84
  return nullptr;
@@ -2,7 +2,7 @@
2
2
  #include "duckdb/common/vector_operations/vector_operations.hpp"
3
3
  #include "duckdb/function/scalar/string_functions.hpp"
4
4
  #include "duckdb/planner/expression/bound_function_expression.hpp"
5
- #include "duckdb/storage/statistics/string_statistics.hpp"
5
+
6
6
  #include "duckdb/execution/expression_executor.hpp"
7
7
 
8
8
  namespace duckdb {
@@ -482,11 +482,7 @@ static unique_ptr<BaseStatistics> ILikePropagateStats(ClientContext &context, Fu
482
482
  auto &expr = input.expr;
483
483
  D_ASSERT(child_stats.size() >= 1);
484
484
  // can only propagate stats if the children have stats
485
- if (!child_stats[0]) {
486
- return nullptr;
487
- }
488
- auto &sstats = (StringStatistics &)*child_stats[0];
489
- if (!sstats.has_unicode) {
485
+ if (!StringStats::CanContainUnicode(child_stats[0])) {
490
486
  expr.function.function = ScalarFunction::BinaryFunction<string_t, string_t, bool, ASCII_OP>;
491
487
  }
492
488
  return nullptr;
@@ -4,7 +4,7 @@
4
4
  #include "duckdb/common/exception.hpp"
5
5
  #include "duckdb/common/vector_operations/vector_operations.hpp"
6
6
  #include "duckdb/common/vector_operations/ternary_executor.hpp"
7
- #include "duckdb/storage/statistics/string_statistics.hpp"
7
+
8
8
  #include "duckdb/planner/expression/bound_function_expression.hpp"
9
9
  #include "utf8proc.hpp"
10
10
  #include "duckdb/common/types/blob.hpp"
@@ -307,12 +307,8 @@ static unique_ptr<BaseStatistics> SubstringPropagateStats(ClientContext &context
307
307
  auto &child_stats = input.child_stats;
308
308
  auto &expr = input.expr;
309
309
  // can only propagate stats if the children have stats
310
- if (!child_stats[0]) {
311
- return nullptr;
312
- }
313
310
  // we only care about the stats of the first child (i.e. the string)
314
- auto &sstats = (StringStatistics &)*child_stats[0];
315
- if (!sstats.has_unicode) {
311
+ if (!StringStats::CanContainUnicode(child_stats[0])) {
316
312
  expr.function.function = SubstringFunctionASCII;
317
313
  }
318
314
  return nullptr;
@@ -3,7 +3,7 @@
3
3
  #include "duckdb/function/scalar/nested_functions.hpp"
4
4
  #include "duckdb/planner/expression/bound_function_expression.hpp"
5
5
  #include "duckdb/planner/expression/bound_parameter_expression.hpp"
6
- #include "duckdb/storage/statistics/struct_statistics.hpp"
6
+ #include "duckdb/storage/statistics/struct_stats.hpp"
7
7
 
8
8
  namespace duckdb {
9
9
 
@@ -102,15 +102,10 @@ static unique_ptr<FunctionData> StructExtractBind(ClientContext &context, Scalar
102
102
  static unique_ptr<BaseStatistics> PropagateStructExtractStats(ClientContext &context, FunctionStatisticsInput &input) {
103
103
  auto &child_stats = input.child_stats;
104
104
  auto &bind_data = input.bind_data;
105
- if (!child_stats[0]) {
106
- return nullptr;
107
- }
108
- auto &struct_stats = (StructStatistics &)*child_stats[0];
105
+
109
106
  auto &info = (StructExtractBindData &)*bind_data;
110
- if (info.index >= struct_stats.child_stats.size() || !struct_stats.child_stats[info.index]) {
111
- return nullptr;
112
- }
113
- return struct_stats.child_stats[info.index]->Copy();
107
+ auto struct_child_stats = StructStats::GetChildStats(child_stats[0]);
108
+ return struct_child_stats[info.index].ToUnique();
114
109
  }
115
110
 
116
111
  ScalarFunction StructExtractFun::GetFunction() {