duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
- package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/main/config.cpp +66 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
- package/src/duckdb/src/planner/bind_context.cpp +16 -0
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +2 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
- package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +2 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +4 -6
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
- package/src/duckdb/src/storage/table/column_data.cpp +16 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
- package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
- package/src/duckdb/src/storage/table/row_group.cpp +24 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
- package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -9,7 +9,7 @@
|
|
9
9
|
#include "duckdb/function/scalar/date_functions.hpp"
|
10
10
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
11
11
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
12
|
-
|
12
|
+
|
13
13
|
#include "duckdb/common/field_writer.hpp"
|
14
14
|
|
15
15
|
namespace duckdb {
|
@@ -161,34 +161,27 @@ DatePartSpecifier GetDateTypePartSpecifier(const string &specifier, LogicalType
|
|
161
161
|
}
|
162
162
|
|
163
163
|
template <int64_t MIN, int64_t MAX>
|
164
|
-
static unique_ptr<BaseStatistics> PropagateSimpleDatePartStatistics(vector<
|
164
|
+
static unique_ptr<BaseStatistics> PropagateSimpleDatePartStatistics(vector<BaseStatistics> &child_stats) {
|
165
165
|
// we can always propagate simple date part statistics
|
166
166
|
// since the min and max can never exceed these bounds
|
167
|
-
auto result =
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
} else if (child_stats[0]->validity_stats) {
|
173
|
-
result->validity_stats = child_stats[0]->validity_stats->Copy();
|
174
|
-
}
|
175
|
-
return std::move(result);
|
167
|
+
auto result = NumericStats::CreateEmpty(LogicalType::BIGINT);
|
168
|
+
result.CopyValidity(child_stats[0]);
|
169
|
+
NumericStats::SetMin(result, Value::BIGINT(MIN));
|
170
|
+
NumericStats::SetMax(result, Value::BIGINT(MAX));
|
171
|
+
return result.ToUnique();
|
176
172
|
}
|
177
173
|
|
178
174
|
struct DatePart {
|
179
175
|
template <class T, class OP>
|
180
|
-
static unique_ptr<BaseStatistics> PropagateDatePartStatistics(vector<
|
176
|
+
static unique_ptr<BaseStatistics> PropagateDatePartStatistics(vector<BaseStatistics> &child_stats) {
|
181
177
|
// we can only propagate complex date part stats if the child has stats
|
182
|
-
|
183
|
-
|
184
|
-
}
|
185
|
-
auto &nstats = (NumericStatistics &)*child_stats[0];
|
186
|
-
if (nstats.min.IsNull() || nstats.max.IsNull()) {
|
178
|
+
auto &nstats = child_stats[0];
|
179
|
+
if (!NumericStats::HasMinMax(nstats)) {
|
187
180
|
return nullptr;
|
188
181
|
}
|
189
182
|
// run the operator on both the min and the max, this gives us the [min, max] bound
|
190
|
-
auto min =
|
191
|
-
auto max =
|
183
|
+
auto min = NumericStats::GetMinUnsafe<T>(nstats);
|
184
|
+
auto max = NumericStats::GetMaxUnsafe<T>(nstats);
|
192
185
|
if (min > max) {
|
193
186
|
return nullptr;
|
194
187
|
}
|
@@ -198,12 +191,11 @@ struct DatePart {
|
|
198
191
|
}
|
199
192
|
auto min_part = OP::template Operation<T, int64_t>(min);
|
200
193
|
auto max_part = OP::template Operation<T, int64_t>(max);
|
201
|
-
auto result =
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
return std::move(result);
|
194
|
+
auto result = NumericStats::CreateEmpty(LogicalType::BIGINT);
|
195
|
+
NumericStats::SetMin(result, Value::BIGINT(min_part));
|
196
|
+
NumericStats::SetMax(result, Value::BIGINT(max_part));
|
197
|
+
result.CopyValidity(child_stats[0]);
|
198
|
+
return result.ToUnique();
|
207
199
|
}
|
208
200
|
|
209
201
|
template <typename OP>
|
@@ -9,7 +9,6 @@
|
|
9
9
|
#include "duckdb/common/vector_operations/ternary_executor.hpp"
|
10
10
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
11
11
|
#include "duckdb/common/string_util.hpp"
|
12
|
-
#include "duckdb/storage/statistics/numeric_statistics.hpp"
|
13
12
|
|
14
13
|
namespace duckdb {
|
15
14
|
|
@@ -8,7 +8,6 @@
|
|
8
8
|
#include "duckdb/common/types/value.hpp"
|
9
9
|
#include "duckdb/common/string_util.hpp"
|
10
10
|
#include "duckdb/execution/expression_executor.hpp"
|
11
|
-
#include "duckdb/storage/statistics/numeric_statistics.hpp"
|
12
11
|
|
13
12
|
namespace duckdb {
|
14
13
|
|
@@ -588,18 +587,15 @@ static void DateTruncFunction(DataChunk &args, ExpressionState &state, Vector &r
|
|
588
587
|
}
|
589
588
|
|
590
589
|
template <class TA, class TR, class OP>
|
591
|
-
static unique_ptr<BaseStatistics> DateTruncStatistics(vector<
|
590
|
+
static unique_ptr<BaseStatistics> DateTruncStatistics(vector<BaseStatistics> &child_stats) {
|
592
591
|
// we can only propagate date stats if the child has stats
|
593
|
-
|
594
|
-
|
595
|
-
}
|
596
|
-
auto &nstats = (NumericStatistics &)*child_stats[1];
|
597
|
-
if (nstats.min.IsNull() || nstats.max.IsNull()) {
|
592
|
+
auto &nstats = child_stats[1];
|
593
|
+
if (!NumericStats::HasMinMax(nstats)) {
|
598
594
|
return nullptr;
|
599
595
|
}
|
600
596
|
// run the operator on both the min and the max, this gives us the [min, max] bound
|
601
|
-
auto min =
|
602
|
-
auto max =
|
597
|
+
auto min = NumericStats::GetMinUnsafe<TA>(nstats);
|
598
|
+
auto max = NumericStats::GetMaxUnsafe<TA>(nstats);
|
603
599
|
if (min > max) {
|
604
600
|
return nullptr;
|
605
601
|
}
|
@@ -610,11 +606,11 @@ static unique_ptr<BaseStatistics> DateTruncStatistics(vector<unique_ptr<BaseStat
|
|
610
606
|
|
611
607
|
auto min_value = Value::CreateValue(min_part);
|
612
608
|
auto max_value = Value::CreateValue(max_part);
|
613
|
-
auto result =
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
return
|
609
|
+
auto result = NumericStats::CreateEmpty(min_value.type());
|
610
|
+
NumericStats::SetMin(result, min_value);
|
611
|
+
NumericStats::SetMax(result, max_value);
|
612
|
+
result.CopyValidity(child_stats[0]);
|
613
|
+
return result.ToUnique();
|
618
614
|
}
|
619
615
|
|
620
616
|
template <class TA, class TR, class OP>
|
@@ -38,10 +38,8 @@ unique_ptr<FunctionData> StatsBind(ClientContext &context, ScalarFunction &bound
|
|
38
38
|
static unique_ptr<BaseStatistics> StatsPropagateStats(ClientContext &context, FunctionStatisticsInput &input) {
|
39
39
|
auto &child_stats = input.child_stats;
|
40
40
|
auto &bind_data = input.bind_data;
|
41
|
-
|
42
|
-
|
43
|
-
info.stats = child_stats[0]->ToString();
|
44
|
-
}
|
41
|
+
auto &info = (StatsBindData &)*bind_data;
|
42
|
+
info.stats = child_stats[0].ToString();
|
45
43
|
return nullptr;
|
46
44
|
}
|
47
45
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#include "duckdb/common/types/data_chunk.hpp"
|
2
2
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
3
3
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
4
|
-
#include "duckdb/storage/statistics/
|
4
|
+
#include "duckdb/storage/statistics/list_stats.hpp"
|
5
5
|
|
6
6
|
namespace duckdb {
|
7
7
|
|
@@ -121,17 +121,10 @@ static unique_ptr<FunctionData> ListFlattenBind(ClientContext &context, ScalarFu
|
|
121
121
|
|
122
122
|
static unique_ptr<BaseStatistics> ListFlattenStats(ClientContext &context, FunctionStatisticsInput &input) {
|
123
123
|
auto &child_stats = input.child_stats;
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
if (!list_stats.child_stats || list_stats.child_stats->type == LogicalTypeId::SQLNULL) {
|
129
|
-
return nullptr;
|
130
|
-
}
|
131
|
-
|
132
|
-
auto child_copy = list_stats.child_stats->Copy();
|
133
|
-
child_copy->validity_stats = make_unique<ValidityStatistics>(true);
|
134
|
-
return child_copy;
|
124
|
+
auto &list_child_stats = ListStats::GetChildStats(child_stats[0]);
|
125
|
+
auto child_copy = list_child_stats.Copy();
|
126
|
+
child_copy.Set(StatsInfo::CAN_HAVE_NULL_VALUES);
|
127
|
+
return child_copy.ToUnique();
|
135
128
|
}
|
136
129
|
|
137
130
|
void ListFlattenFun::RegisterFunction(BuiltinFunctions &set) {
|
@@ -3,8 +3,6 @@
|
|
3
3
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
4
4
|
#include "duckdb/planner/expression/bound_parameter_expression.hpp"
|
5
5
|
#include "duckdb/planner/expression_binder.hpp"
|
6
|
-
#include "duckdb/storage/statistics/list_statistics.hpp"
|
7
|
-
#include "duckdb/storage/statistics/validity_statistics.hpp"
|
8
6
|
|
9
7
|
namespace duckdb {
|
10
8
|
|
@@ -108,14 +106,11 @@ static unique_ptr<FunctionData> ListConcatBind(ClientContext &context, ScalarFun
|
|
108
106
|
static unique_ptr<BaseStatistics> ListConcatStats(ClientContext &context, FunctionStatisticsInput &input) {
|
109
107
|
auto &child_stats = input.child_stats;
|
110
108
|
D_ASSERT(child_stats.size() == 2);
|
111
|
-
if (!child_stats[0] || !child_stats[1]) {
|
112
|
-
return nullptr;
|
113
|
-
}
|
114
109
|
|
115
|
-
auto &left_stats =
|
116
|
-
auto &right_stats =
|
110
|
+
auto &left_stats = child_stats[0];
|
111
|
+
auto &right_stats = child_stats[1];
|
117
112
|
|
118
|
-
auto stats = left_stats.
|
113
|
+
auto stats = left_stats.ToUnique();
|
119
114
|
stats->Merge(right_stats);
|
120
115
|
|
121
116
|
return stats;
|
@@ -7,8 +7,7 @@
|
|
7
7
|
#include "duckdb/function/scalar/string_functions.hpp"
|
8
8
|
#include "duckdb/parser/expression/bound_expression.hpp"
|
9
9
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
10
|
-
#include "duckdb/storage/statistics/
|
11
|
-
#include "duckdb/storage/statistics/validity_statistics.hpp"
|
10
|
+
#include "duckdb/storage/statistics/list_stats.hpp"
|
12
11
|
|
13
12
|
namespace duckdb {
|
14
13
|
|
@@ -212,17 +211,11 @@ static unique_ptr<FunctionData> ListExtractBind(ClientContext &context, ScalarFu
|
|
212
211
|
|
213
212
|
static unique_ptr<BaseStatistics> ListExtractStats(ClientContext &context, FunctionStatisticsInput &input) {
|
214
213
|
auto &child_stats = input.child_stats;
|
215
|
-
|
216
|
-
|
217
|
-
}
|
218
|
-
auto &list_stats = (ListStatistics &)*child_stats[0];
|
219
|
-
if (!list_stats.child_stats) {
|
220
|
-
return nullptr;
|
221
|
-
}
|
222
|
-
auto child_copy = list_stats.child_stats->Copy();
|
214
|
+
auto &list_child_stats = ListStats::GetChildStats(child_stats[0]);
|
215
|
+
auto child_copy = list_child_stats.Copy();
|
223
216
|
// list_extract always pushes a NULL, since if the offset is out of range for a list it inserts a null
|
224
|
-
child_copy
|
225
|
-
return child_copy;
|
217
|
+
child_copy.Set(StatsInfo::CAN_HAVE_NULL_VALUES);
|
218
|
+
return child_copy.ToUnique();
|
226
219
|
}
|
227
220
|
|
228
221
|
void ListExtractFun::RegisterFunction(BuiltinFunctions &set) {
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
5
5
|
#include "duckdb/common/types/data_chunk.hpp"
|
6
6
|
#include "duckdb/common/pair.hpp"
|
7
|
-
#include "duckdb/storage/statistics/
|
7
|
+
#include "duckdb/storage/statistics/list_stats.hpp"
|
8
8
|
#include "duckdb/planner/expression_binder.hpp"
|
9
9
|
|
10
10
|
namespace duckdb {
|
@@ -49,16 +49,12 @@ static unique_ptr<FunctionData> ListValueBind(ClientContext &context, ScalarFunc
|
|
49
49
|
unique_ptr<BaseStatistics> ListValueStats(ClientContext &context, FunctionStatisticsInput &input) {
|
50
50
|
auto &child_stats = input.child_stats;
|
51
51
|
auto &expr = input.expr;
|
52
|
-
auto list_stats =
|
52
|
+
auto list_stats = ListStats::CreateEmpty(expr.return_type);
|
53
|
+
auto &list_child_stats = ListStats::GetChildStats(list_stats);
|
53
54
|
for (idx_t i = 0; i < child_stats.size(); i++) {
|
54
|
-
|
55
|
-
list_stats->child_stats->Merge(*child_stats[i]);
|
56
|
-
} else {
|
57
|
-
list_stats->child_stats.reset();
|
58
|
-
return std::move(list_stats);
|
59
|
-
}
|
55
|
+
list_child_stats.Merge(child_stats[i]);
|
60
56
|
}
|
61
|
-
return
|
57
|
+
return list_stats.ToUnique();
|
62
58
|
}
|
63
59
|
|
64
60
|
void ListValueFun::RegisterFunction(BuiltinFunctions &set) {
|
@@ -8,7 +8,6 @@
|
|
8
8
|
#include "duckdb/common/algorithm.hpp"
|
9
9
|
#include "duckdb/execution/expression_executor.hpp"
|
10
10
|
#include "duckdb/common/likely.hpp"
|
11
|
-
#include "duckdb/storage/statistics/numeric_statistics.hpp"
|
12
11
|
#include "duckdb/common/types/bit.hpp"
|
13
12
|
#include <cmath>
|
14
13
|
#include <errno.h>
|
@@ -77,25 +76,22 @@ static unique_ptr<BaseStatistics> PropagateAbsStats(ClientContext &context, Func
|
|
77
76
|
auto &expr = input.expr;
|
78
77
|
D_ASSERT(child_stats.size() == 1);
|
79
78
|
// can only propagate stats if the children have stats
|
80
|
-
|
81
|
-
return nullptr;
|
82
|
-
}
|
83
|
-
auto &lstats = (NumericStatistics &)*child_stats[0];
|
79
|
+
auto &lstats = child_stats[0];
|
84
80
|
Value new_min, new_max;
|
85
81
|
bool potential_overflow = true;
|
86
|
-
if (
|
82
|
+
if (NumericStats::HasMinMax(lstats)) {
|
87
83
|
switch (expr.return_type.InternalType()) {
|
88
84
|
case PhysicalType::INT8:
|
89
|
-
potential_overflow = lstats.
|
85
|
+
potential_overflow = NumericStats::Min(lstats).GetValue<int8_t>() == NumericLimits<int8_t>::Minimum();
|
90
86
|
break;
|
91
87
|
case PhysicalType::INT16:
|
92
|
-
potential_overflow = lstats.
|
88
|
+
potential_overflow = NumericStats::Min(lstats).GetValue<int16_t>() == NumericLimits<int16_t>::Minimum();
|
93
89
|
break;
|
94
90
|
case PhysicalType::INT32:
|
95
|
-
potential_overflow = lstats.
|
91
|
+
potential_overflow = NumericStats::Min(lstats).GetValue<int32_t>() == NumericLimits<int32_t>::Minimum();
|
96
92
|
break;
|
97
93
|
case PhysicalType::INT64:
|
98
|
-
potential_overflow = lstats.
|
94
|
+
potential_overflow = NumericStats::Min(lstats).GetValue<int64_t>() == NumericLimits<int64_t>::Minimum();
|
99
95
|
break;
|
100
96
|
default:
|
101
97
|
return nullptr;
|
@@ -108,8 +104,8 @@ static unique_ptr<BaseStatistics> PropagateAbsStats(ClientContext &context, Func
|
|
108
104
|
// no potential overflow
|
109
105
|
|
110
106
|
// compute stats
|
111
|
-
auto current_min = lstats.
|
112
|
-
auto current_max = lstats.
|
107
|
+
auto current_min = NumericStats::Min(lstats).GetValue<int64_t>();
|
108
|
+
auto current_max = NumericStats::Max(lstats).GetValue<int64_t>();
|
113
109
|
|
114
110
|
int64_t min_val, max_val;
|
115
111
|
|
@@ -125,16 +121,17 @@ static unique_ptr<BaseStatistics> PropagateAbsStats(ClientContext &context, Func
|
|
125
121
|
} else {
|
126
122
|
// if both current_min and current_max are > 0, then the abs is a no-op and can be removed entirely
|
127
123
|
*input.expr_ptr = std::move(input.expr.children[0]);
|
128
|
-
return
|
124
|
+
return child_stats[0].ToUnique();
|
129
125
|
}
|
130
126
|
new_min = Value::Numeric(expr.return_type, min_val);
|
131
127
|
new_max = Value::Numeric(expr.return_type, max_val);
|
132
128
|
expr.function.function = ScalarFunction::GetScalarUnaryFunction<AbsOperator>(expr.return_type);
|
133
129
|
}
|
134
|
-
auto stats =
|
135
|
-
|
136
|
-
stats
|
137
|
-
|
130
|
+
auto stats = NumericStats::CreateEmpty(expr.return_type);
|
131
|
+
NumericStats::SetMin(stats, new_min);
|
132
|
+
NumericStats::SetMax(stats, new_max);
|
133
|
+
stats.CopyValidity(lstats);
|
134
|
+
return stats.ToUnique();
|
138
135
|
}
|
139
136
|
|
140
137
|
template <class OP>
|
@@ -12,7 +12,6 @@
|
|
12
12
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
13
13
|
#include "duckdb/function/scalar/operators.hpp"
|
14
14
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
15
|
-
#include "duckdb/storage/statistics/numeric_statistics.hpp"
|
16
15
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
17
16
|
|
18
17
|
#include <limits>
|
@@ -78,15 +77,15 @@ static scalar_function_t GetScalarBinaryFunction(PhysicalType type) {
|
|
78
77
|
//===--------------------------------------------------------------------===//
|
79
78
|
struct AddPropagateStatistics {
|
80
79
|
template <class T, class OP>
|
81
|
-
static bool Operation(LogicalType type,
|
80
|
+
static bool Operation(LogicalType type, BaseStatistics &lstats, BaseStatistics &rstats, Value &new_min,
|
82
81
|
Value &new_max) {
|
83
82
|
T min, max;
|
84
83
|
// new min is min+min
|
85
|
-
if (!OP::Operation(
|
84
|
+
if (!OP::Operation(NumericStats::GetMinUnsafe<T>(lstats), NumericStats::GetMinUnsafe<T>(rstats), min)) {
|
86
85
|
return true;
|
87
86
|
}
|
88
87
|
// new max is max+max
|
89
|
-
if (!OP::Operation(
|
88
|
+
if (!OP::Operation(NumericStats::GetMaxUnsafe<T>(lstats), NumericStats::GetMaxUnsafe<T>(rstats), max)) {
|
90
89
|
return true;
|
91
90
|
}
|
92
91
|
new_min = Value::Numeric(type, min);
|
@@ -97,13 +96,13 @@ struct AddPropagateStatistics {
|
|
97
96
|
|
98
97
|
struct SubtractPropagateStatistics {
|
99
98
|
template <class T, class OP>
|
100
|
-
static bool Operation(LogicalType type,
|
99
|
+
static bool Operation(LogicalType type, BaseStatistics &lstats, BaseStatistics &rstats, Value &new_min,
|
101
100
|
Value &new_max) {
|
102
101
|
T min, max;
|
103
|
-
if (!OP::Operation(
|
102
|
+
if (!OP::Operation(NumericStats::GetMinUnsafe<T>(lstats), NumericStats::GetMaxUnsafe<T>(rstats), min)) {
|
104
103
|
return true;
|
105
104
|
}
|
106
|
-
if (!OP::Operation(
|
105
|
+
if (!OP::Operation(NumericStats::GetMaxUnsafe<T>(lstats), NumericStats::GetMinUnsafe<T>(rstats), max)) {
|
107
106
|
return true;
|
108
107
|
}
|
109
108
|
new_min = Value::Numeric(type, min);
|
@@ -136,14 +135,11 @@ static unique_ptr<BaseStatistics> PropagateNumericStats(ClientContext &context,
|
|
136
135
|
auto &expr = input.expr;
|
137
136
|
D_ASSERT(child_stats.size() == 2);
|
138
137
|
// can only propagate stats if the children have stats
|
139
|
-
|
140
|
-
|
141
|
-
}
|
142
|
-
auto &lstats = (NumericStatistics &)*child_stats[0];
|
143
|
-
auto &rstats = (NumericStatistics &)*child_stats[1];
|
138
|
+
auto &lstats = child_stats[0];
|
139
|
+
auto &rstats = child_stats[1];
|
144
140
|
Value new_min, new_max;
|
145
141
|
bool potential_overflow = true;
|
146
|
-
if (
|
142
|
+
if (NumericStats::HasMinMax(lstats) && NumericStats::HasMinMax(rstats)) {
|
147
143
|
switch (expr.return_type.InternalType()) {
|
148
144
|
case PhysicalType::INT8:
|
149
145
|
potential_overflow =
|
@@ -176,10 +172,11 @@ static unique_ptr<BaseStatistics> PropagateNumericStats(ClientContext &context,
|
|
176
172
|
}
|
177
173
|
expr.function.function = GetScalarIntegerFunction<BASEOP>(expr.return_type.InternalType());
|
178
174
|
}
|
179
|
-
auto
|
180
|
-
|
181
|
-
|
182
|
-
|
175
|
+
auto result = NumericStats::CreateEmpty(expr.return_type);
|
176
|
+
NumericStats::SetMin(result, new_min);
|
177
|
+
NumericStats::SetMax(result, new_max);
|
178
|
+
result.CombineValidity(lstats, rstats);
|
179
|
+
return result.ToUnique();
|
183
180
|
}
|
184
181
|
|
185
182
|
template <class OP, class OPOVERFLOWCHECK, bool IS_SUBTRACT = false>
|
@@ -491,9 +488,9 @@ unique_ptr<FunctionData> DecimalNegateBind(ClientContext &context, ScalarFunctio
|
|
491
488
|
|
492
489
|
struct NegatePropagateStatistics {
|
493
490
|
template <class T>
|
494
|
-
static bool Operation(LogicalType type,
|
495
|
-
auto max_value =
|
496
|
-
auto min_value =
|
491
|
+
static bool Operation(LogicalType type, BaseStatistics &istats, Value &new_min, Value &new_max) {
|
492
|
+
auto max_value = NumericStats::GetMaxUnsafe<T>(istats);
|
493
|
+
auto min_value = NumericStats::GetMinUnsafe<T>(istats);
|
497
494
|
if (!NegateOperator::CanNegate<T>(min_value) || !NegateOperator::CanNegate<T>(max_value)) {
|
498
495
|
return true;
|
499
496
|
}
|
@@ -510,13 +507,10 @@ static unique_ptr<BaseStatistics> NegateBindStatistics(ClientContext &context, F
|
|
510
507
|
auto &expr = input.expr;
|
511
508
|
D_ASSERT(child_stats.size() == 1);
|
512
509
|
// can only propagate stats if the children have stats
|
513
|
-
|
514
|
-
return nullptr;
|
515
|
-
}
|
516
|
-
auto &istats = (NumericStatistics &)*child_stats[0];
|
510
|
+
auto &istats = child_stats[0];
|
517
511
|
Value new_min, new_max;
|
518
512
|
bool potential_overflow = true;
|
519
|
-
if (
|
513
|
+
if (NumericStats::HasMinMax(istats)) {
|
520
514
|
switch (expr.return_type.InternalType()) {
|
521
515
|
case PhysicalType::INT8:
|
522
516
|
potential_overflow =
|
@@ -542,12 +536,11 @@ static unique_ptr<BaseStatistics> NegateBindStatistics(ClientContext &context, F
|
|
542
536
|
new_min = Value(expr.return_type);
|
543
537
|
new_max = Value(expr.return_type);
|
544
538
|
}
|
545
|
-
auto stats =
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
return std::move(stats);
|
539
|
+
auto stats = NumericStats::CreateEmpty(expr.return_type);
|
540
|
+
NumericStats::SetMin(stats, new_min);
|
541
|
+
NumericStats::SetMax(stats, new_max);
|
542
|
+
stats.CopyValidity(istats);
|
543
|
+
return stats.ToUnique();
|
551
544
|
}
|
552
545
|
|
553
546
|
ScalarFunction SubtractFun::GetFunction(const LogicalType &type) {
|
@@ -662,7 +655,7 @@ void SubtractFun::RegisterFunction(BuiltinFunctions &set) {
|
|
662
655
|
//===--------------------------------------------------------------------===//
|
663
656
|
struct MultiplyPropagateStatistics {
|
664
657
|
template <class T, class OP>
|
665
|
-
static bool Operation(LogicalType type,
|
658
|
+
static bool Operation(LogicalType type, BaseStatistics &lstats, BaseStatistics &rstats, Value &new_min,
|
666
659
|
Value &new_max) {
|
667
660
|
// statistics propagation on the multiplication is slightly less straightforward because of negative numbers
|
668
661
|
// the new min/max depend on the signs of the input types
|
@@ -671,8 +664,8 @@ struct MultiplyPropagateStatistics {
|
|
671
664
|
// etc
|
672
665
|
// rather than doing all this switcheroo we just multiply all combinations of lmin/lmax with rmin/rmax
|
673
666
|
// and check what the minimum/maximum value is
|
674
|
-
T lvals[] {
|
675
|
-
T rvals[] {
|
667
|
+
T lvals[] {NumericStats::GetMinUnsafe<T>(lstats), NumericStats::GetMaxUnsafe<T>(lstats)};
|
668
|
+
T rvals[] {NumericStats::GetMinUnsafe<T>(rstats), NumericStats::GetMaxUnsafe<T>(rstats)};
|
676
669
|
T min = NumericLimits<T>::Maximum();
|
677
670
|
T max = NumericLimits<T>::Minimum();
|
678
671
|
// multiplications
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
5
5
|
#include "duckdb/common/vector_operations/unary_executor.hpp"
|
6
6
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
7
|
-
|
7
|
+
|
8
8
|
#include "utf8proc.hpp"
|
9
9
|
|
10
10
|
#include <string.h>
|
@@ -153,11 +153,7 @@ static unique_ptr<BaseStatistics> CaseConvertPropagateStats(ClientContext &conte
|
|
153
153
|
auto &expr = input.expr;
|
154
154
|
D_ASSERT(child_stats.size() == 1);
|
155
155
|
// can only propagate stats if the children have stats
|
156
|
-
if (!child_stats[0]) {
|
157
|
-
return nullptr;
|
158
|
-
}
|
159
|
-
auto &sstats = (StringStatistics &)*child_stats[0];
|
160
|
-
if (!sstats.has_unicode) {
|
156
|
+
if (!StringStats::CanContainUnicode(child_stats[0])) {
|
161
157
|
expr.function.function = CaseConvertFunctionASCII<IS_UPPER>;
|
162
158
|
}
|
163
159
|
return nullptr;
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#include "duckdb/common/exception.hpp"
|
4
4
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
5
5
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
6
|
-
|
6
|
+
|
7
7
|
#include "utf8proc.hpp"
|
8
8
|
|
9
9
|
namespace duckdb {
|
@@ -42,12 +42,8 @@ static unique_ptr<BaseStatistics> InStrPropagateStats(ClientContext &context, Fu
|
|
42
42
|
auto &expr = input.expr;
|
43
43
|
D_ASSERT(child_stats.size() == 2);
|
44
44
|
// can only propagate stats if the children have stats
|
45
|
-
if (!child_stats[0]) {
|
46
|
-
return nullptr;
|
47
|
-
}
|
48
45
|
// for strpos, we only care if the FIRST string has unicode or not
|
49
|
-
|
50
|
-
if (!sstats.has_unicode) {
|
46
|
+
if (!StringStats::CanContainUnicode(child_stats[0])) {
|
51
47
|
expr.function.function = ScalarFunction::BinaryFunction<string_t, string_t, int64_t, InstrAsciiOperator>;
|
52
48
|
}
|
53
49
|
return nullptr;
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "duckdb/common/exception.hpp"
|
5
5
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
6
6
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
7
|
-
|
7
|
+
|
8
8
|
#include "duckdb/planner/expression/bound_parameter_expression.hpp"
|
9
9
|
#include "utf8proc.hpp"
|
10
10
|
|
@@ -78,11 +78,7 @@ static unique_ptr<BaseStatistics> LengthPropagateStats(ClientContext &context, F
|
|
78
78
|
auto &expr = input.expr;
|
79
79
|
D_ASSERT(child_stats.size() == 1);
|
80
80
|
// can only propagate stats if the children have stats
|
81
|
-
if (!child_stats[0]) {
|
82
|
-
return nullptr;
|
83
|
-
}
|
84
|
-
auto &sstats = (StringStatistics &)*child_stats[0];
|
85
|
-
if (!sstats.has_unicode) {
|
81
|
+
if (!StringStats::CanContainUnicode(child_stats[0])) {
|
86
82
|
expr.function.function = ScalarFunction::UnaryFunction<string_t, int64_t, StrLenOperator>;
|
87
83
|
}
|
88
84
|
return nullptr;
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
3
3
|
#include "duckdb/function/scalar/string_functions.hpp"
|
4
4
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
5
|
-
|
5
|
+
|
6
6
|
#include "duckdb/execution/expression_executor.hpp"
|
7
7
|
|
8
8
|
namespace duckdb {
|
@@ -482,11 +482,7 @@ static unique_ptr<BaseStatistics> ILikePropagateStats(ClientContext &context, Fu
|
|
482
482
|
auto &expr = input.expr;
|
483
483
|
D_ASSERT(child_stats.size() >= 1);
|
484
484
|
// can only propagate stats if the children have stats
|
485
|
-
if (!child_stats[0]) {
|
486
|
-
return nullptr;
|
487
|
-
}
|
488
|
-
auto &sstats = (StringStatistics &)*child_stats[0];
|
489
|
-
if (!sstats.has_unicode) {
|
485
|
+
if (!StringStats::CanContainUnicode(child_stats[0])) {
|
490
486
|
expr.function.function = ScalarFunction::BinaryFunction<string_t, string_t, bool, ASCII_OP>;
|
491
487
|
}
|
492
488
|
return nullptr;
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#include "duckdb/common/exception.hpp"
|
5
5
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
6
6
|
#include "duckdb/common/vector_operations/ternary_executor.hpp"
|
7
|
-
|
7
|
+
|
8
8
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
9
9
|
#include "utf8proc.hpp"
|
10
10
|
#include "duckdb/common/types/blob.hpp"
|
@@ -307,12 +307,8 @@ static unique_ptr<BaseStatistics> SubstringPropagateStats(ClientContext &context
|
|
307
307
|
auto &child_stats = input.child_stats;
|
308
308
|
auto &expr = input.expr;
|
309
309
|
// can only propagate stats if the children have stats
|
310
|
-
if (!child_stats[0]) {
|
311
|
-
return nullptr;
|
312
|
-
}
|
313
310
|
// we only care about the stats of the first child (i.e. the string)
|
314
|
-
|
315
|
-
if (!sstats.has_unicode) {
|
311
|
+
if (!StringStats::CanContainUnicode(child_stats[0])) {
|
316
312
|
expr.function.function = SubstringFunctionASCII;
|
317
313
|
}
|
318
314
|
return nullptr;
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
4
4
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
5
5
|
#include "duckdb/planner/expression/bound_parameter_expression.hpp"
|
6
|
-
#include "duckdb/storage/statistics/
|
6
|
+
#include "duckdb/storage/statistics/struct_stats.hpp"
|
7
7
|
|
8
8
|
namespace duckdb {
|
9
9
|
|
@@ -102,15 +102,10 @@ static unique_ptr<FunctionData> StructExtractBind(ClientContext &context, Scalar
|
|
102
102
|
static unique_ptr<BaseStatistics> PropagateStructExtractStats(ClientContext &context, FunctionStatisticsInput &input) {
|
103
103
|
auto &child_stats = input.child_stats;
|
104
104
|
auto &bind_data = input.bind_data;
|
105
|
-
|
106
|
-
return nullptr;
|
107
|
-
}
|
108
|
-
auto &struct_stats = (StructStatistics &)*child_stats[0];
|
105
|
+
|
109
106
|
auto &info = (StructExtractBindData &)*bind_data;
|
110
|
-
|
111
|
-
|
112
|
-
}
|
113
|
-
return struct_stats.child_stats[info.index]->Copy();
|
107
|
+
auto struct_child_stats = StructStats::GetChildStats(child_stats[0]);
|
108
|
+
return struct_child_stats[info.index].ToUnique();
|
114
109
|
}
|
115
110
|
|
116
111
|
ScalarFunction StructExtractFun::GetFunction() {
|