duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
- package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/main/config.cpp +66 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
- package/src/duckdb/src/planner/bind_context.cpp +16 -0
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +2 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
- package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +2 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +4 -6
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
- package/src/duckdb/src/storage/table/column_data.cpp +16 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
- package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
- package/src/duckdb/src/storage/table/row_group.cpp +24 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
- package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -2,86 +2,61 @@
|
|
2
2
|
#include "duckdb/common/field_writer.hpp"
|
3
3
|
#include "duckdb/common/string_util.hpp"
|
4
4
|
#include "duckdb/common/types/vector.hpp"
|
5
|
-
#include "duckdb/storage/statistics/
|
6
|
-
#include "duckdb/storage/statistics/
|
7
|
-
#include "duckdb/storage/statistics/
|
8
|
-
#include "duckdb/storage/statistics/string_statistics.hpp"
|
9
|
-
#include "duckdb/storage/statistics/struct_statistics.hpp"
|
10
|
-
#include "duckdb/storage/statistics/validity_statistics.hpp"
|
5
|
+
#include "duckdb/storage/statistics/base_statistics.hpp"
|
6
|
+
#include "duckdb/storage/statistics/list_stats.hpp"
|
7
|
+
#include "duckdb/storage/statistics/struct_stats.hpp"
|
11
8
|
|
12
9
|
namespace duckdb {
|
13
10
|
|
14
|
-
BaseStatistics::BaseStatistics(
|
15
|
-
: type(std::move(type)), distinct_count(0), stats_type(stats_type) {
|
11
|
+
BaseStatistics::BaseStatistics() : type(LogicalType::INVALID) {
|
16
12
|
}
|
17
13
|
|
18
|
-
BaseStatistics
|
19
|
-
|
20
|
-
|
21
|
-
void BaseStatistics::InitializeBase() {
|
22
|
-
validity_stats = make_unique<ValidityStatistics>(false);
|
23
|
-
if (stats_type == GLOBAL_STATS) {
|
24
|
-
distinct_stats = make_unique<DistinctStatistics>();
|
25
|
-
}
|
14
|
+
BaseStatistics::BaseStatistics(LogicalType type) {
|
15
|
+
Construct(*this, std::move(type));
|
26
16
|
}
|
27
17
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
18
|
+
void BaseStatistics::Construct(BaseStatistics &stats, LogicalType type) {
|
19
|
+
stats.distinct_count = 0;
|
20
|
+
stats.type = std::move(type);
|
21
|
+
switch (GetStatsType(stats.type)) {
|
22
|
+
case StatisticsType::LIST_STATS:
|
23
|
+
ListStats::Construct(stats);
|
24
|
+
break;
|
25
|
+
case StatisticsType::STRUCT_STATS:
|
26
|
+
StructStats::Construct(stats);
|
27
|
+
break;
|
28
|
+
default:
|
29
|
+
break;
|
33
30
|
}
|
34
|
-
return ((ValidityStatistics &)*validity_stats).has_null;
|
35
31
|
}
|
36
32
|
|
37
|
-
|
38
|
-
if (!validity_stats) {
|
39
|
-
// we don't know
|
40
|
-
// solid maybe
|
41
|
-
return true;
|
42
|
-
}
|
43
|
-
return ((ValidityStatistics &)*validity_stats).has_no_null;
|
33
|
+
BaseStatistics::~BaseStatistics() {
|
44
34
|
}
|
45
35
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
36
|
+
BaseStatistics::BaseStatistics(BaseStatistics &&other) noexcept {
|
37
|
+
std::swap(type, other.type);
|
38
|
+
has_null = other.has_null;
|
39
|
+
has_no_null = other.has_no_null;
|
40
|
+
distinct_count = other.distinct_count;
|
41
|
+
stats_union = other.stats_union;
|
42
|
+
std::swap(child_stats, other.child_stats);
|
52
43
|
}
|
53
44
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
45
|
+
BaseStatistics &BaseStatistics::operator=(BaseStatistics &&other) noexcept {
|
46
|
+
std::swap(type, other.type);
|
47
|
+
has_null = other.has_null;
|
48
|
+
has_no_null = other.has_no_null;
|
49
|
+
distinct_count = other.distinct_count;
|
50
|
+
stats_union = other.stats_union;
|
51
|
+
std::swap(child_stats, other.child_stats);
|
52
|
+
return *this;
|
62
53
|
}
|
63
54
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
if (stats_type == GLOBAL_STATS) {
|
68
|
-
MergeInternal(distinct_stats, other.distinct_stats);
|
55
|
+
StatisticsType BaseStatistics::GetStatsType(const LogicalType &type) {
|
56
|
+
if (type.id() == LogicalTypeId::SQLNULL) {
|
57
|
+
return StatisticsType::BASE_STATS;
|
69
58
|
}
|
70
|
-
}
|
71
|
-
|
72
|
-
idx_t BaseStatistics::GetDistinctCount() {
|
73
|
-
if (distinct_stats) {
|
74
|
-
auto &d_stats = (DistinctStatistics &)*distinct_stats;
|
75
|
-
distinct_count = d_stats.GetCount();
|
76
|
-
}
|
77
|
-
return distinct_count;
|
78
|
-
}
|
79
|
-
|
80
|
-
unique_ptr<BaseStatistics> BaseStatistics::CreateEmpty(LogicalType type, StatisticsType stats_type) {
|
81
|
-
unique_ptr<BaseStatistics> result;
|
82
59
|
switch (type.InternalType()) {
|
83
|
-
case PhysicalType::BIT:
|
84
|
-
return make_unique<ValidityStatistics>(false, false);
|
85
60
|
case PhysicalType::BOOL:
|
86
61
|
case PhysicalType::INT8:
|
87
62
|
case PhysicalType::INT16:
|
@@ -94,113 +69,323 @@ unique_ptr<BaseStatistics> BaseStatistics::CreateEmpty(LogicalType type, Statist
|
|
94
69
|
case PhysicalType::INT128:
|
95
70
|
case PhysicalType::FLOAT:
|
96
71
|
case PhysicalType::DOUBLE:
|
97
|
-
|
98
|
-
break;
|
72
|
+
return StatisticsType::NUMERIC_STATS;
|
99
73
|
case PhysicalType::VARCHAR:
|
100
|
-
|
101
|
-
break;
|
74
|
+
return StatisticsType::STRING_STATS;
|
102
75
|
case PhysicalType::STRUCT:
|
103
|
-
|
104
|
-
break;
|
76
|
+
return StatisticsType::STRUCT_STATS;
|
105
77
|
case PhysicalType::LIST:
|
106
|
-
|
107
|
-
|
78
|
+
return StatisticsType::LIST_STATS;
|
79
|
+
case PhysicalType::BIT:
|
108
80
|
case PhysicalType::INTERVAL:
|
109
81
|
default:
|
110
|
-
|
82
|
+
return StatisticsType::BASE_STATS;
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
StatisticsType BaseStatistics::GetStatsType() const {
|
87
|
+
return GetStatsType(GetType());
|
88
|
+
}
|
89
|
+
|
90
|
+
void BaseStatistics::InitializeUnknown() {
|
91
|
+
has_null = true;
|
92
|
+
has_no_null = true;
|
93
|
+
}
|
94
|
+
|
95
|
+
void BaseStatistics::InitializeEmpty() {
|
96
|
+
has_null = false;
|
97
|
+
has_no_null = true;
|
98
|
+
}
|
99
|
+
|
100
|
+
bool BaseStatistics::CanHaveNull() const {
|
101
|
+
return has_null;
|
102
|
+
}
|
103
|
+
|
104
|
+
bool BaseStatistics::CanHaveNoNull() const {
|
105
|
+
return has_no_null;
|
106
|
+
}
|
107
|
+
|
108
|
+
bool BaseStatistics::IsConstant() const {
|
109
|
+
if (type.id() == LogicalTypeId::VALIDITY) {
|
110
|
+
// validity mask
|
111
|
+
if (CanHaveNull() && !CanHaveNoNull()) {
|
112
|
+
return true;
|
113
|
+
}
|
114
|
+
if (!CanHaveNull() && CanHaveNoNull()) {
|
115
|
+
return true;
|
116
|
+
}
|
117
|
+
return false;
|
118
|
+
}
|
119
|
+
switch (GetStatsType()) {
|
120
|
+
case StatisticsType::NUMERIC_STATS:
|
121
|
+
return NumericStats::IsConstant(*this);
|
122
|
+
default:
|
123
|
+
break;
|
111
124
|
}
|
112
|
-
|
125
|
+
return false;
|
126
|
+
}
|
127
|
+
|
128
|
+
void BaseStatistics::Merge(const BaseStatistics &other) {
|
129
|
+
has_null = has_null || other.has_null;
|
130
|
+
has_no_null = has_no_null || other.has_no_null;
|
131
|
+
switch (GetStatsType()) {
|
132
|
+
case StatisticsType::NUMERIC_STATS:
|
133
|
+
NumericStats::Merge(*this, other);
|
134
|
+
break;
|
135
|
+
case StatisticsType::STRING_STATS:
|
136
|
+
StringStats::Merge(*this, other);
|
137
|
+
break;
|
138
|
+
case StatisticsType::LIST_STATS:
|
139
|
+
ListStats::Merge(*this, other);
|
140
|
+
break;
|
141
|
+
case StatisticsType::STRUCT_STATS:
|
142
|
+
StructStats::Merge(*this, other);
|
143
|
+
break;
|
144
|
+
default:
|
145
|
+
break;
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
idx_t BaseStatistics::GetDistinctCount() {
|
150
|
+
return distinct_count;
|
151
|
+
}
|
152
|
+
|
153
|
+
BaseStatistics BaseStatistics::CreateUnknownType(LogicalType type) {
|
154
|
+
switch (GetStatsType(type)) {
|
155
|
+
case StatisticsType::NUMERIC_STATS:
|
156
|
+
return NumericStats::CreateUnknown(std::move(type));
|
157
|
+
case StatisticsType::STRING_STATS:
|
158
|
+
return StringStats::CreateUnknown(std::move(type));
|
159
|
+
case StatisticsType::LIST_STATS:
|
160
|
+
return ListStats::CreateUnknown(std::move(type));
|
161
|
+
case StatisticsType::STRUCT_STATS:
|
162
|
+
return StructStats::CreateUnknown(std::move(type));
|
163
|
+
default:
|
164
|
+
return BaseStatistics(std::move(type));
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
BaseStatistics BaseStatistics::CreateEmptyType(LogicalType type) {
|
169
|
+
switch (GetStatsType(type)) {
|
170
|
+
case StatisticsType::NUMERIC_STATS:
|
171
|
+
return NumericStats::CreateEmpty(std::move(type));
|
172
|
+
case StatisticsType::STRING_STATS:
|
173
|
+
return StringStats::CreateEmpty(std::move(type));
|
174
|
+
case StatisticsType::LIST_STATS:
|
175
|
+
return ListStats::CreateEmpty(std::move(type));
|
176
|
+
case StatisticsType::STRUCT_STATS:
|
177
|
+
return StructStats::CreateEmpty(std::move(type));
|
178
|
+
default:
|
179
|
+
return BaseStatistics(std::move(type));
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
BaseStatistics BaseStatistics::CreateUnknown(LogicalType type) {
|
184
|
+
auto result = CreateUnknownType(std::move(type));
|
185
|
+
result.InitializeUnknown();
|
113
186
|
return result;
|
114
187
|
}
|
115
188
|
|
116
|
-
|
117
|
-
|
118
|
-
|
189
|
+
BaseStatistics BaseStatistics::CreateEmpty(LogicalType type) {
|
190
|
+
if (type.InternalType() == PhysicalType::BIT) {
|
191
|
+
// FIXME: this special case should not be necessary
|
192
|
+
// but currently InitializeEmpty sets StatsInfo::CAN_HAVE_VALID_VALUES
|
193
|
+
BaseStatistics result(std::move(type));
|
194
|
+
result.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
|
195
|
+
result.Set(StatsInfo::CANNOT_HAVE_VALID_VALUES);
|
196
|
+
return result;
|
197
|
+
}
|
198
|
+
auto result = CreateEmptyType(std::move(type));
|
199
|
+
result.InitializeEmpty();
|
119
200
|
return result;
|
120
201
|
}
|
121
202
|
|
122
|
-
void BaseStatistics::
|
123
|
-
|
124
|
-
|
203
|
+
void BaseStatistics::Copy(const BaseStatistics &other) {
|
204
|
+
D_ASSERT(GetType() == other.GetType());
|
205
|
+
CopyBase(other);
|
206
|
+
stats_union = other.stats_union;
|
207
|
+
switch (GetStatsType()) {
|
208
|
+
case StatisticsType::LIST_STATS:
|
209
|
+
ListStats::Copy(*this, other);
|
210
|
+
break;
|
211
|
+
case StatisticsType::STRUCT_STATS:
|
212
|
+
StructStats::Copy(*this, other);
|
213
|
+
break;
|
214
|
+
default:
|
215
|
+
break;
|
125
216
|
}
|
126
|
-
|
127
|
-
|
217
|
+
}
|
218
|
+
|
219
|
+
BaseStatistics BaseStatistics::Copy() const {
|
220
|
+
BaseStatistics result(type);
|
221
|
+
result.Copy(*this);
|
222
|
+
return result;
|
223
|
+
}
|
224
|
+
|
225
|
+
unique_ptr<BaseStatistics> BaseStatistics::ToUnique() const {
|
226
|
+
auto result = unique_ptr<BaseStatistics>(new BaseStatistics(type));
|
227
|
+
result->Copy(*this);
|
228
|
+
return result;
|
229
|
+
}
|
230
|
+
|
231
|
+
void BaseStatistics::CopyBase(const BaseStatistics &other) {
|
232
|
+
has_null = other.has_null;
|
233
|
+
has_no_null = other.has_no_null;
|
234
|
+
distinct_count = other.distinct_count;
|
235
|
+
}
|
236
|
+
|
237
|
+
void BaseStatistics::Set(StatsInfo info) {
|
238
|
+
switch (info) {
|
239
|
+
case StatsInfo::CAN_HAVE_NULL_VALUES:
|
240
|
+
has_null = true;
|
241
|
+
break;
|
242
|
+
case StatsInfo::CANNOT_HAVE_NULL_VALUES:
|
243
|
+
has_null = false;
|
244
|
+
break;
|
245
|
+
case StatsInfo::CAN_HAVE_VALID_VALUES:
|
246
|
+
has_no_null = true;
|
247
|
+
break;
|
248
|
+
case StatsInfo::CANNOT_HAVE_VALID_VALUES:
|
249
|
+
has_no_null = false;
|
250
|
+
break;
|
251
|
+
case StatsInfo::CAN_HAVE_NULL_AND_VALID_VALUES:
|
252
|
+
has_null = true;
|
253
|
+
has_no_null = true;
|
254
|
+
break;
|
255
|
+
default:
|
256
|
+
throw InternalException("Unrecognized StatsInfo for BaseStatistics::Set");
|
128
257
|
}
|
129
258
|
}
|
130
259
|
|
260
|
+
void BaseStatistics::CombineValidity(BaseStatistics &left, BaseStatistics &right) {
|
261
|
+
has_null = left.has_null || right.has_null;
|
262
|
+
has_no_null = left.has_no_null || right.has_no_null;
|
263
|
+
}
|
264
|
+
|
265
|
+
void BaseStatistics::CopyValidity(BaseStatistics &stats) {
|
266
|
+
has_null = stats.has_null;
|
267
|
+
has_no_null = stats.has_no_null;
|
268
|
+
}
|
269
|
+
|
131
270
|
void BaseStatistics::Serialize(Serializer &serializer) const {
|
132
271
|
FieldWriter writer(serializer);
|
133
|
-
|
272
|
+
writer.WriteField<bool>(has_null);
|
273
|
+
writer.WriteField<bool>(has_no_null);
|
134
274
|
Serialize(writer);
|
135
|
-
auto ptype = type.InternalType();
|
136
|
-
if (ptype != PhysicalType::BIT) {
|
137
|
-
writer.WriteField<StatisticsType>(stats_type);
|
138
|
-
writer.WriteOptional<BaseStatistics>(distinct_stats);
|
139
|
-
}
|
140
275
|
writer.Finalize();
|
141
276
|
}
|
142
277
|
|
143
|
-
void BaseStatistics::
|
278
|
+
void BaseStatistics::SetDistinctCount(idx_t count) {
|
279
|
+
this->distinct_count = count;
|
144
280
|
}
|
145
281
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
auto ptype = type.InternalType();
|
151
|
-
switch (ptype) {
|
152
|
-
case PhysicalType::BIT:
|
153
|
-
result = ValidityStatistics::Deserialize(reader);
|
154
|
-
break;
|
155
|
-
case PhysicalType::BOOL:
|
156
|
-
case PhysicalType::INT8:
|
157
|
-
case PhysicalType::INT16:
|
158
|
-
case PhysicalType::INT32:
|
159
|
-
case PhysicalType::INT64:
|
160
|
-
case PhysicalType::UINT8:
|
161
|
-
case PhysicalType::UINT16:
|
162
|
-
case PhysicalType::UINT32:
|
163
|
-
case PhysicalType::UINT64:
|
164
|
-
case PhysicalType::INT128:
|
165
|
-
case PhysicalType::FLOAT:
|
166
|
-
case PhysicalType::DOUBLE:
|
167
|
-
result = NumericStatistics::Deserialize(reader, std::move(type));
|
168
|
-
break;
|
169
|
-
case PhysicalType::VARCHAR:
|
170
|
-
result = StringStatistics::Deserialize(reader, std::move(type));
|
282
|
+
void BaseStatistics::Serialize(FieldWriter &writer) const {
|
283
|
+
switch (GetStatsType()) {
|
284
|
+
case StatisticsType::NUMERIC_STATS:
|
285
|
+
NumericStats::Serialize(*this, writer);
|
171
286
|
break;
|
172
|
-
case
|
173
|
-
|
287
|
+
case StatisticsType::STRING_STATS:
|
288
|
+
StringStats::Serialize(*this, writer);
|
174
289
|
break;
|
175
|
-
case
|
176
|
-
|
290
|
+
case StatisticsType::LIST_STATS:
|
291
|
+
ListStats::Serialize(*this, writer);
|
177
292
|
break;
|
178
|
-
case
|
179
|
-
|
293
|
+
case StatisticsType::STRUCT_STATS:
|
294
|
+
StructStats::Serialize(*this, writer);
|
180
295
|
break;
|
181
296
|
default:
|
182
|
-
|
297
|
+
break;
|
183
298
|
}
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
299
|
+
}
|
300
|
+
BaseStatistics BaseStatistics::DeserializeType(FieldReader &reader, LogicalType type) {
|
301
|
+
switch (GetStatsType(type)) {
|
302
|
+
case StatisticsType::NUMERIC_STATS:
|
303
|
+
return NumericStats::Deserialize(reader, std::move(type));
|
304
|
+
case StatisticsType::STRING_STATS:
|
305
|
+
return StringStats::Deserialize(reader, std::move(type));
|
306
|
+
case StatisticsType::LIST_STATS:
|
307
|
+
return ListStats::Deserialize(reader, std::move(type));
|
308
|
+
case StatisticsType::STRUCT_STATS:
|
309
|
+
return StructStats::Deserialize(reader, std::move(type));
|
310
|
+
default:
|
311
|
+
return BaseStatistics(std::move(type));
|
189
312
|
}
|
313
|
+
}
|
190
314
|
|
315
|
+
BaseStatistics BaseStatistics::Deserialize(Deserializer &source, LogicalType type) {
|
316
|
+
FieldReader reader(source);
|
317
|
+
bool has_null = reader.ReadRequired<bool>();
|
318
|
+
bool has_no_null = reader.ReadRequired<bool>();
|
319
|
+
auto result = DeserializeType(reader, std::move(type));
|
320
|
+
result.has_null = has_null;
|
321
|
+
result.has_no_null = has_no_null;
|
191
322
|
reader.Finalize();
|
192
323
|
return result;
|
193
324
|
}
|
194
325
|
|
195
326
|
string BaseStatistics::ToString() const {
|
196
|
-
|
197
|
-
|
327
|
+
auto has_n = has_null ? "true" : "false";
|
328
|
+
auto has_n_n = has_no_null ? "true" : "false";
|
329
|
+
string result =
|
330
|
+
StringUtil::Format("%s%s", StringUtil::Format("[Has Null: %s, Has No Null: %s]", has_n, has_n_n),
|
331
|
+
distinct_count > 0 ? StringUtil::Format("[Approx Unique: %lld]", distinct_count) : "");
|
332
|
+
switch (GetStatsType()) {
|
333
|
+
case StatisticsType::NUMERIC_STATS:
|
334
|
+
result = NumericStats::ToString(*this) + result;
|
335
|
+
break;
|
336
|
+
case StatisticsType::STRING_STATS:
|
337
|
+
result = StringStats::ToString(*this) + result;
|
338
|
+
break;
|
339
|
+
case StatisticsType::LIST_STATS:
|
340
|
+
result = ListStats::ToString(*this) + result;
|
341
|
+
break;
|
342
|
+
case StatisticsType::STRUCT_STATS:
|
343
|
+
result = StructStats::ToString(*this) + result;
|
344
|
+
break;
|
345
|
+
default:
|
346
|
+
break;
|
347
|
+
}
|
348
|
+
return result;
|
198
349
|
}
|
199
350
|
|
200
351
|
void BaseStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
|
201
352
|
D_ASSERT(vector.GetType() == this->type);
|
202
|
-
|
203
|
-
|
353
|
+
switch (GetStatsType()) {
|
354
|
+
case StatisticsType::NUMERIC_STATS:
|
355
|
+
NumericStats::Verify(*this, vector, sel, count);
|
356
|
+
break;
|
357
|
+
case StatisticsType::STRING_STATS:
|
358
|
+
StringStats::Verify(*this, vector, sel, count);
|
359
|
+
break;
|
360
|
+
case StatisticsType::LIST_STATS:
|
361
|
+
ListStats::Verify(*this, vector, sel, count);
|
362
|
+
break;
|
363
|
+
case StatisticsType::STRUCT_STATS:
|
364
|
+
StructStats::Verify(*this, vector, sel, count);
|
365
|
+
break;
|
366
|
+
default:
|
367
|
+
break;
|
368
|
+
}
|
369
|
+
if (has_null && has_no_null) {
|
370
|
+
// nothing to verify
|
371
|
+
return;
|
372
|
+
}
|
373
|
+
UnifiedVectorFormat vdata;
|
374
|
+
vector.ToUnifiedFormat(count, vdata);
|
375
|
+
for (idx_t i = 0; i < count; i++) {
|
376
|
+
auto idx = sel.get_index(i);
|
377
|
+
auto index = vdata.sel->get_index(idx);
|
378
|
+
bool row_is_valid = vdata.validity.RowIsValid(index);
|
379
|
+
if (row_is_valid && !has_no_null) {
|
380
|
+
throw InternalException(
|
381
|
+
"Statistics mismatch: vector labeled as having only NULL values, but vector contains valid values: %s",
|
382
|
+
vector.ToString(count));
|
383
|
+
}
|
384
|
+
if (!row_is_valid && !has_null) {
|
385
|
+
throw InternalException(
|
386
|
+
"Statistics mismatch: vector labeled as not having NULL values, but vector contains null values: %s",
|
387
|
+
vector.ToString(count));
|
388
|
+
}
|
204
389
|
}
|
205
390
|
}
|
206
391
|
|
@@ -209,4 +394,64 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
|
|
209
394
|
Verify(vector, *sel, count);
|
210
395
|
}
|
211
396
|
|
397
|
+
BaseStatistics BaseStatistics::FromConstantType(const Value &input) {
|
398
|
+
switch (GetStatsType(input.type())) {
|
399
|
+
case StatisticsType::NUMERIC_STATS: {
|
400
|
+
auto result = NumericStats::CreateEmpty(input.type());
|
401
|
+
NumericStats::SetMin(result, input);
|
402
|
+
NumericStats::SetMax(result, input);
|
403
|
+
return result;
|
404
|
+
}
|
405
|
+
case StatisticsType::STRING_STATS: {
|
406
|
+
auto result = StringStats::CreateEmpty(input.type());
|
407
|
+
if (!input.IsNull()) {
|
408
|
+
auto &string_value = StringValue::Get(input);
|
409
|
+
StringStats::Update(result, string_t(string_value));
|
410
|
+
}
|
411
|
+
return result;
|
412
|
+
}
|
413
|
+
case StatisticsType::LIST_STATS: {
|
414
|
+
auto result = ListStats::CreateEmpty(input.type());
|
415
|
+
auto &child_stats = ListStats::GetChildStats(result);
|
416
|
+
if (!input.IsNull()) {
|
417
|
+
auto &list_children = ListValue::GetChildren(input);
|
418
|
+
for (auto &child_element : list_children) {
|
419
|
+
child_stats.Merge(FromConstant(child_element));
|
420
|
+
}
|
421
|
+
}
|
422
|
+
return result;
|
423
|
+
}
|
424
|
+
case StatisticsType::STRUCT_STATS: {
|
425
|
+
auto result = StructStats::CreateEmpty(input.type());
|
426
|
+
auto &child_types = StructType::GetChildTypes(input.type());
|
427
|
+
if (input.IsNull()) {
|
428
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
429
|
+
StructStats::SetChildStats(result, i, FromConstant(Value(child_types[i].second)));
|
430
|
+
}
|
431
|
+
} else {
|
432
|
+
auto &struct_children = StructValue::GetChildren(input);
|
433
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
434
|
+
StructStats::SetChildStats(result, i, FromConstant(struct_children[i]));
|
435
|
+
}
|
436
|
+
}
|
437
|
+
return result;
|
438
|
+
}
|
439
|
+
default:
|
440
|
+
return BaseStatistics(input.type());
|
441
|
+
}
|
442
|
+
}
|
443
|
+
|
444
|
+
BaseStatistics BaseStatistics::FromConstant(const Value &input) {
|
445
|
+
auto result = FromConstantType(input);
|
446
|
+
result.SetDistinctCount(1);
|
447
|
+
if (input.IsNull()) {
|
448
|
+
result.Set(StatsInfo::CAN_HAVE_NULL_VALUES);
|
449
|
+
result.Set(StatsInfo::CANNOT_HAVE_VALID_VALUES);
|
450
|
+
} else {
|
451
|
+
result.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
|
452
|
+
result.Set(StatsInfo::CAN_HAVE_VALID_VALUES);
|
453
|
+
}
|
454
|
+
return result;
|
455
|
+
}
|
456
|
+
|
212
457
|
} // namespace duckdb
|
@@ -1,13 +1,68 @@
|
|
1
1
|
#include "duckdb/storage/statistics/column_statistics.hpp"
|
2
|
+
#include "duckdb/common/serializer.hpp"
|
2
3
|
|
3
4
|
namespace duckdb {
|
4
5
|
|
5
|
-
ColumnStatistics::ColumnStatistics(
|
6
|
+
ColumnStatistics::ColumnStatistics(BaseStatistics stats_p) : stats(std::move(stats_p)) {
|
7
|
+
auto type = stats.GetType().InternalType();
|
8
|
+
if (type != PhysicalType::LIST && type != PhysicalType::STRUCT) {
|
9
|
+
distinct_stats = make_unique<DistinctStatistics>();
|
10
|
+
}
|
11
|
+
}
|
12
|
+
ColumnStatistics::ColumnStatistics(BaseStatistics stats_p, unique_ptr<DistinctStatistics> distinct_stats_p)
|
13
|
+
: stats(std::move(stats_p)), distinct_stats(std::move(distinct_stats_p)) {
|
6
14
|
}
|
7
15
|
|
8
16
|
shared_ptr<ColumnStatistics> ColumnStatistics::CreateEmptyStats(const LogicalType &type) {
|
9
|
-
|
10
|
-
|
17
|
+
return make_shared<ColumnStatistics>(BaseStatistics::CreateEmpty(type));
|
18
|
+
}
|
19
|
+
|
20
|
+
void ColumnStatistics::Merge(ColumnStatistics &other) {
|
21
|
+
stats.Merge(other.stats);
|
22
|
+
if (distinct_stats) {
|
23
|
+
distinct_stats->Merge(*other.distinct_stats);
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
BaseStatistics &ColumnStatistics::Statistics() {
|
28
|
+
return stats;
|
29
|
+
}
|
30
|
+
|
31
|
+
bool ColumnStatistics::HasDistinctStats() {
|
32
|
+
return distinct_stats.get();
|
33
|
+
}
|
34
|
+
|
35
|
+
DistinctStatistics &ColumnStatistics::DistinctStats() {
|
36
|
+
if (!distinct_stats) {
|
37
|
+
throw InternalException("DistinctStats called without distinct_stats");
|
38
|
+
}
|
39
|
+
return *distinct_stats;
|
40
|
+
}
|
41
|
+
|
42
|
+
void ColumnStatistics::SetDistinct(unique_ptr<DistinctStatistics> distinct) {
|
43
|
+
this->distinct_stats = std::move(distinct);
|
44
|
+
}
|
45
|
+
|
46
|
+
void ColumnStatistics::UpdateDistinctStatistics(Vector &v, idx_t count) {
|
47
|
+
if (!distinct_stats) {
|
48
|
+
return;
|
49
|
+
}
|
50
|
+
auto &d_stats = (DistinctStatistics &)*distinct_stats;
|
51
|
+
d_stats.Update(v, count);
|
52
|
+
}
|
53
|
+
|
54
|
+
shared_ptr<ColumnStatistics> ColumnStatistics::Copy() const {
|
55
|
+
return make_shared<ColumnStatistics>(stats.Copy(), distinct_stats ? distinct_stats->Copy() : nullptr);
|
56
|
+
}
|
57
|
+
void ColumnStatistics::Serialize(Serializer &serializer) const {
|
58
|
+
stats.Serialize(serializer);
|
59
|
+
serializer.WriteOptional(distinct_stats);
|
60
|
+
}
|
61
|
+
|
62
|
+
shared_ptr<ColumnStatistics> ColumnStatistics::Deserialize(Deserializer &source, const LogicalType &type) {
|
63
|
+
auto stats = BaseStatistics::Deserialize(source, type);
|
64
|
+
auto distinct_stats = source.ReadOptional<DistinctStatistics>();
|
65
|
+
return make_shared<ColumnStatistics>(stats.Copy(), std::move(distinct_stats));
|
11
66
|
}
|
12
67
|
|
13
68
|
} // namespace duckdb
|
@@ -7,23 +7,18 @@
|
|
7
7
|
|
8
8
|
namespace duckdb {
|
9
9
|
|
10
|
-
DistinctStatistics::DistinctStatistics()
|
11
|
-
: BaseStatistics(LogicalType::INVALID, StatisticsType::LOCAL_STATS), log(make_unique<HyperLogLog>()),
|
12
|
-
sample_count(0), total_count(0) {
|
10
|
+
DistinctStatistics::DistinctStatistics() : log(make_unique<HyperLogLog>()), sample_count(0), total_count(0) {
|
13
11
|
}
|
14
12
|
|
15
13
|
DistinctStatistics::DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count)
|
16
|
-
:
|
17
|
-
sample_count(sample_count), total_count(total_count) {
|
14
|
+
: log(std::move(log)), sample_count(sample_count), total_count(total_count) {
|
18
15
|
}
|
19
16
|
|
20
|
-
unique_ptr<
|
17
|
+
unique_ptr<DistinctStatistics> DistinctStatistics::Copy() const {
|
21
18
|
return make_unique<DistinctStatistics>(log->Copy(), sample_count, total_count);
|
22
19
|
}
|
23
20
|
|
24
|
-
void DistinctStatistics::Merge(const
|
25
|
-
BaseStatistics::Merge(other_p);
|
26
|
-
auto &other = (const DistinctStatistics &)other_p;
|
21
|
+
void DistinctStatistics::Merge(const DistinctStatistics &other) {
|
27
22
|
log = log->Merge(*other.log);
|
28
23
|
sample_count += other.sample_count;
|
29
24
|
total_count += other.total_count;
|