duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
  3. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  4. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  5. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  6. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  7. package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
  8. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  9. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  10. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
  11. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
  12. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
  13. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
  14. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
  15. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
  16. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
  17. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  18. package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
  19. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  20. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  21. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  22. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  23. package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
  24. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  25. package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
  26. package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
  27. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
  28. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  29. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  30. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  31. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  32. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  33. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
  34. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
  35. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
  39. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  40. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  41. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  42. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  43. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  44. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  46. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
  51. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  52. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
  53. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  54. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
  55. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  56. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  57. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  58. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  59. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
  61. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  62. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
  63. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  64. package/src/duckdb/src/main/config.cpp +66 -1
  65. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
  66. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  67. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  68. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  69. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  70. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  71. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  72. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  73. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  74. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  75. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  76. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
  77. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
  78. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  79. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
  80. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
  81. package/src/duckdb/src/planner/bind_context.cpp +16 -0
  82. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
  83. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
  84. package/src/duckdb/src/planner/binder.cpp +2 -1
  85. package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
  86. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  87. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  88. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  89. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
  90. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
  91. package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
  92. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
  93. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  94. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  95. package/src/duckdb/src/storage/compression/rle.cpp +2 -2
  96. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  97. package/src/duckdb/src/storage/data_table.cpp +4 -6
  98. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  99. package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
  100. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
  101. package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
  102. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
  103. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  104. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  105. package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
  106. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  107. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
  108. package/src/duckdb/src/storage/table/column_data.cpp +16 -14
  109. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
  110. package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
  111. package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
  112. package/src/duckdb/src/storage/table/row_group.cpp +24 -23
  113. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
  114. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
  115. package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
  116. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  117. package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
  118. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
  119. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
  120. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
  121. package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
  122. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  123. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  124. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  125. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  126. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  127. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  128. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  129. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  130. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  131. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -1,220 +0,0 @@
1
- #include "duckdb/storage/statistics/string_statistics.hpp"
2
- #include "duckdb/common/field_writer.hpp"
3
- #include "utf8proc_wrapper.hpp"
4
- #include "duckdb/common/string_util.hpp"
5
- #include "duckdb/common/types/vector.hpp"
6
- #include "duckdb/main/error_manager.hpp"
7
-
8
- namespace duckdb {
9
-
10
- StringStatistics::StringStatistics(LogicalType type_p, StatisticsType stats_type)
11
- : BaseStatistics(std::move(type_p), stats_type) {
12
- InitializeBase();
13
- for (idx_t i = 0; i < MAX_STRING_MINMAX_SIZE; i++) {
14
- min[i] = 0xFF;
15
- max[i] = 0;
16
- }
17
- max_string_length = 0;
18
- has_unicode = false;
19
- has_overflow_strings = false;
20
- }
21
-
22
- unique_ptr<BaseStatistics> StringStatistics::Copy() const {
23
- auto result = make_unique<StringStatistics>(type, stats_type);
24
- result->CopyBase(*this);
25
-
26
- memcpy(result->min, min, MAX_STRING_MINMAX_SIZE);
27
- memcpy(result->max, max, MAX_STRING_MINMAX_SIZE);
28
- result->has_unicode = has_unicode;
29
- result->max_string_length = max_string_length;
30
- return std::move(result);
31
- }
32
-
33
- void StringStatistics::Serialize(FieldWriter &writer) const {
34
- writer.WriteBlob(min, MAX_STRING_MINMAX_SIZE);
35
- writer.WriteBlob(max, MAX_STRING_MINMAX_SIZE);
36
- writer.WriteField<bool>(has_unicode);
37
- writer.WriteField<uint32_t>(max_string_length);
38
- writer.WriteField<bool>(has_overflow_strings);
39
- }
40
-
41
- unique_ptr<BaseStatistics> StringStatistics::Deserialize(FieldReader &reader, LogicalType type) {
42
- auto stats = make_unique<StringStatistics>(std::move(type), StatisticsType::LOCAL_STATS);
43
- reader.ReadBlob(stats->min, MAX_STRING_MINMAX_SIZE);
44
- reader.ReadBlob(stats->max, MAX_STRING_MINMAX_SIZE);
45
- stats->has_unicode = reader.ReadRequired<bool>();
46
- stats->max_string_length = reader.ReadRequired<uint32_t>();
47
- stats->has_overflow_strings = reader.ReadRequired<bool>();
48
- return std::move(stats);
49
- }
50
-
51
- static int StringValueComparison(const_data_ptr_t data, idx_t len, const_data_ptr_t comparison) {
52
- D_ASSERT(len <= StringStatistics::MAX_STRING_MINMAX_SIZE);
53
- for (idx_t i = 0; i < len; i++) {
54
- if (data[i] < comparison[i]) {
55
- return -1;
56
- } else if (data[i] > comparison[i]) {
57
- return 1;
58
- }
59
- }
60
- return 0;
61
- }
62
-
63
- static void ConstructValue(const_data_ptr_t data, idx_t size, data_t target[]) {
64
- idx_t value_size =
65
- size > StringStatistics::MAX_STRING_MINMAX_SIZE ? StringStatistics::MAX_STRING_MINMAX_SIZE : size;
66
- memcpy(target, data, value_size);
67
- for (idx_t i = value_size; i < StringStatistics::MAX_STRING_MINMAX_SIZE; i++) {
68
- target[i] = '\0';
69
- }
70
- }
71
-
72
- void StringStatistics::Update(const string_t &value) {
73
- auto data = (const_data_ptr_t)value.GetDataUnsafe();
74
- auto size = value.GetSize();
75
-
76
- //! we can only fit 8 bytes, so we might need to trim our string
77
- // construct the value
78
- data_t target[MAX_STRING_MINMAX_SIZE];
79
- ConstructValue(data, size, target);
80
-
81
- // update the min and max
82
- if (StringValueComparison(target, MAX_STRING_MINMAX_SIZE, min) < 0) {
83
- memcpy(min, target, MAX_STRING_MINMAX_SIZE);
84
- }
85
- if (StringValueComparison(target, MAX_STRING_MINMAX_SIZE, max) > 0) {
86
- memcpy(max, target, MAX_STRING_MINMAX_SIZE);
87
- }
88
- if (size > max_string_length) {
89
- max_string_length = size;
90
- }
91
- if (type.id() == LogicalTypeId::VARCHAR && !has_unicode) {
92
- auto unicode = Utf8Proc::Analyze((const char *)data, size);
93
- if (unicode == UnicodeType::UNICODE) {
94
- has_unicode = true;
95
- } else if (unicode == UnicodeType::INVALID) {
96
- throw InternalException(
97
- ErrorManager::InvalidUnicodeError(string((char *)data, size), "segment statistics update"));
98
- }
99
- }
100
- }
101
-
102
- void StringStatistics::Merge(const BaseStatistics &other_p) {
103
- BaseStatistics::Merge(other_p);
104
- auto &other = (const StringStatistics &)other_p;
105
- if (StringValueComparison(other.min, MAX_STRING_MINMAX_SIZE, min) < 0) {
106
- memcpy(min, other.min, MAX_STRING_MINMAX_SIZE);
107
- }
108
- if (StringValueComparison(other.max, MAX_STRING_MINMAX_SIZE, max) > 0) {
109
- memcpy(max, other.max, MAX_STRING_MINMAX_SIZE);
110
- }
111
- has_unicode = has_unicode || other.has_unicode;
112
- max_string_length = MaxValue<uint32_t>(max_string_length, other.max_string_length);
113
- has_overflow_strings = has_overflow_strings || other.has_overflow_strings;
114
- }
115
-
116
- FilterPropagateResult StringStatistics::CheckZonemap(ExpressionType comparison_type, const string &constant) const {
117
- auto data = (const_data_ptr_t)constant.c_str();
118
- auto size = constant.size();
119
-
120
- idx_t value_size = size > MAX_STRING_MINMAX_SIZE ? MAX_STRING_MINMAX_SIZE : size;
121
- int min_comp = StringValueComparison(data, value_size, min);
122
- int max_comp = StringValueComparison(data, value_size, max);
123
- switch (comparison_type) {
124
- case ExpressionType::COMPARE_EQUAL:
125
- if (min_comp >= 0 && max_comp <= 0) {
126
- return FilterPropagateResult::NO_PRUNING_POSSIBLE;
127
- } else {
128
- return FilterPropagateResult::FILTER_ALWAYS_FALSE;
129
- }
130
- case ExpressionType::COMPARE_NOTEQUAL:
131
- if (min_comp < 0 || max_comp > 0) {
132
- return FilterPropagateResult::FILTER_ALWAYS_TRUE;
133
- }
134
- return FilterPropagateResult::NO_PRUNING_POSSIBLE;
135
- case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
136
- case ExpressionType::COMPARE_GREATERTHAN:
137
- if (max_comp <= 0) {
138
- return FilterPropagateResult::NO_PRUNING_POSSIBLE;
139
- } else {
140
- return FilterPropagateResult::FILTER_ALWAYS_FALSE;
141
- }
142
- case ExpressionType::COMPARE_LESSTHAN:
143
- case ExpressionType::COMPARE_LESSTHANOREQUALTO:
144
- if (min_comp >= 0) {
145
- return FilterPropagateResult::NO_PRUNING_POSSIBLE;
146
- } else {
147
- return FilterPropagateResult::FILTER_ALWAYS_FALSE;
148
- }
149
- default:
150
- throw InternalException("Expression type not implemented for string statistics zone map");
151
- }
152
- }
153
-
154
- static idx_t GetValidMinMaxSubstring(const_data_ptr_t data) {
155
- for (idx_t i = 0; i < StringStatistics::MAX_STRING_MINMAX_SIZE; i++) {
156
- if (data[i] == '\0') {
157
- return i;
158
- }
159
- if ((data[i] & 0x80) != 0) {
160
- return i;
161
- }
162
- }
163
- return StringStatistics::MAX_STRING_MINMAX_SIZE;
164
- }
165
-
166
- string StringStatistics::ToString() const {
167
- idx_t min_len = GetValidMinMaxSubstring(min);
168
- idx_t max_len = GetValidMinMaxSubstring(max);
169
- return StringUtil::Format("[Min: %s, Max: %s, Has Unicode: %s, Max String Length: %lld]%s",
170
- string((const char *)min, min_len), string((const char *)max, max_len),
171
- has_unicode ? "true" : "false", max_string_length, BaseStatistics::ToString());
172
- }
173
-
174
- void StringStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
175
- BaseStatistics::Verify(vector, sel, count);
176
-
177
- string_t min_string((const char *)min, MAX_STRING_MINMAX_SIZE);
178
- string_t max_string((const char *)max, MAX_STRING_MINMAX_SIZE);
179
-
180
- UnifiedVectorFormat vdata;
181
- vector.ToUnifiedFormat(count, vdata);
182
- auto data = (string_t *)vdata.data;
183
- for (idx_t i = 0; i < count; i++) {
184
- auto idx = sel.get_index(i);
185
- auto index = vdata.sel->get_index(idx);
186
- if (!vdata.validity.RowIsValid(index)) {
187
- continue;
188
- }
189
- auto value = data[index];
190
- auto data = value.GetDataUnsafe();
191
- auto len = value.GetSize();
192
- // LCOV_EXCL_START
193
- if (len > max_string_length) {
194
- throw InternalException(
195
- "Statistics mismatch: string value exceeds maximum string length.\nStatistics: %s\nVector: %s",
196
- ToString(), vector.ToString(count));
197
- }
198
- if (type.id() == LogicalTypeId::VARCHAR && !has_unicode) {
199
- auto unicode = Utf8Proc::Analyze(data, len);
200
- if (unicode == UnicodeType::UNICODE) {
201
- throw InternalException("Statistics mismatch: string value contains unicode, but statistics says it "
202
- "shouldn't.\nStatistics: %s\nVector: %s",
203
- ToString(), vector.ToString(count));
204
- } else if (unicode == UnicodeType::INVALID) {
205
- throw InternalException("Invalid unicode detected in vector: %s", vector.ToString(count));
206
- }
207
- }
208
- if (StringValueComparison((const_data_ptr_t)data, MinValue<idx_t>(len, MAX_STRING_MINMAX_SIZE), min) < 0) {
209
- throw InternalException("Statistics mismatch: value is smaller than min.\nStatistics: %s\nVector: %s",
210
- ToString(), vector.ToString(count));
211
- }
212
- if (StringValueComparison((const_data_ptr_t)data, MinValue<idx_t>(len, MAX_STRING_MINMAX_SIZE), max) > 0) {
213
- throw InternalException("Statistics mismatch: value is bigger than max.\nStatistics: %s\nVector: %s",
214
- ToString(), vector.ToString(count));
215
- }
216
- // LCOV_EXCL_STOP
217
- }
218
- }
219
-
220
- } // namespace duckdb
@@ -1,108 +0,0 @@
1
- #include "duckdb/storage/statistics/struct_statistics.hpp"
2
-
3
- #include "duckdb/common/field_writer.hpp"
4
- #include "duckdb/common/types/vector.hpp"
5
-
6
- namespace duckdb {
7
-
8
- StructStatistics::StructStatistics(LogicalType type_p)
9
- : BaseStatistics(std::move(type_p), StatisticsType::LOCAL_STATS) {
10
- D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
11
- InitializeBase();
12
-
13
- auto &child_types = StructType::GetChildTypes(type);
14
- child_stats.resize(child_types.size());
15
- for (idx_t i = 0; i < child_types.size(); i++) {
16
- child_stats[i] = BaseStatistics::CreateEmpty(child_types[i].second, StatisticsType::LOCAL_STATS);
17
- }
18
- }
19
-
20
- void StructStatistics::Merge(const BaseStatistics &other_p) {
21
- BaseStatistics::Merge(other_p);
22
-
23
- auto &other = (const StructStatistics &)other_p;
24
- D_ASSERT(other.child_stats.size() == child_stats.size());
25
- for (idx_t i = 0; i < child_stats.size(); i++) {
26
- if (child_stats[i] && other.child_stats[i]) {
27
- child_stats[i]->Merge(*other.child_stats[i]);
28
- } else {
29
- child_stats[i].reset();
30
- }
31
- }
32
- }
33
-
34
- // LCOV_EXCL_START
35
- FilterPropagateResult StructStatistics::CheckZonemap(ExpressionType comparison_type, const Value &constant) const {
36
- throw InternalException("Struct zonemaps are not supported yet");
37
- }
38
- // LCOV_EXCL_STOP
39
-
40
- unique_ptr<BaseStatistics> StructStatistics::Copy() const {
41
- auto result = make_unique<StructStatistics>(type);
42
- result->CopyBase(*this);
43
-
44
- for (idx_t i = 0; i < child_stats.size(); i++) {
45
- result->child_stats[i] = child_stats[i] ? child_stats[i]->Copy() : nullptr;
46
- }
47
- return std::move(result);
48
- }
49
-
50
- void StructStatistics::Serialize(FieldWriter &writer) const {
51
- writer.WriteField<uint32_t>(child_stats.size());
52
- auto &serializer = writer.GetSerializer();
53
- for (idx_t i = 0; i < child_stats.size(); i++) {
54
- serializer.Write<bool>(child_stats[i] ? true : false);
55
- if (child_stats[i]) {
56
- child_stats[i]->Serialize(serializer);
57
- }
58
- }
59
- }
60
-
61
- unique_ptr<BaseStatistics> StructStatistics::Deserialize(FieldReader &reader, LogicalType type) {
62
- D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
63
- auto result = make_unique<StructStatistics>(std::move(type));
64
- auto &child_types = StructType::GetChildTypes(result->type);
65
-
66
- auto child_type_count = reader.ReadRequired<uint32_t>();
67
- if (child_types.size() != child_type_count) {
68
- throw InternalException("Struct stats deserialization failure: child count does not match type count!");
69
- }
70
- auto &source = reader.GetSource();
71
- for (idx_t i = 0; i < child_types.size(); i++) {
72
- auto has_child = source.Read<bool>();
73
- if (has_child) {
74
- result->child_stats[i] = BaseStatistics::Deserialize(source, child_types[i].second);
75
- } else {
76
- result->child_stats[i].reset();
77
- }
78
- }
79
- return std::move(result);
80
- }
81
-
82
- string StructStatistics::ToString() const {
83
- string result;
84
- result += " {";
85
- auto &child_types = StructType::GetChildTypes(type);
86
- for (idx_t i = 0; i < child_types.size(); i++) {
87
- if (i > 0) {
88
- result += ", ";
89
- }
90
- result += child_types[i].first + ": " + (child_stats[i] ? child_stats[i]->ToString() : "No Stats");
91
- }
92
- result += "}";
93
- result += BaseStatistics::ToString();
94
- return result;
95
- }
96
-
97
- void StructStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
98
- BaseStatistics::Verify(vector, sel, count);
99
-
100
- auto &child_entries = StructVector::GetEntries(vector);
101
- for (idx_t i = 0; i < child_entries.size(); i++) {
102
- if (child_stats[i]) {
103
- child_stats[i]->Verify(*child_entries[i], sel, count);
104
- }
105
- }
106
- }
107
-
108
- } // namespace duckdb
@@ -1,91 +0,0 @@
1
- #include "duckdb/storage/statistics/validity_statistics.hpp"
2
-
3
- #include "duckdb/common/exception.hpp"
4
- #include "duckdb/common/field_writer.hpp"
5
- #include "duckdb/common/string_util.hpp"
6
- #include "duckdb/common/vector_operations/vector_operations.hpp"
7
-
8
- namespace duckdb {
9
-
10
- ValidityStatistics::ValidityStatistics(bool has_null, bool has_no_null)
11
- : BaseStatistics(LogicalType(LogicalTypeId::VALIDITY), StatisticsType::LOCAL_STATS), has_null(has_null),
12
- has_no_null(has_no_null) {
13
- }
14
-
15
- unique_ptr<BaseStatistics> ValidityStatistics::Combine(const unique_ptr<BaseStatistics> &lstats,
16
- const unique_ptr<BaseStatistics> &rstats) {
17
- if (!lstats && !rstats) {
18
- return nullptr;
19
- } else if (!lstats) {
20
- return rstats->Copy();
21
- } else if (!rstats) {
22
- return lstats->Copy();
23
- } else {
24
- auto &l = (ValidityStatistics &)*lstats;
25
- auto &r = (ValidityStatistics &)*rstats;
26
- return make_unique<ValidityStatistics>(l.has_null || r.has_null, l.has_no_null || r.has_no_null);
27
- }
28
- }
29
-
30
- bool ValidityStatistics::IsConstant() const {
31
- if (!has_null) {
32
- return true;
33
- }
34
- if (!has_no_null) {
35
- return true;
36
- }
37
- return false;
38
- }
39
-
40
- void ValidityStatistics::Merge(const BaseStatistics &other_p) {
41
- auto &other = (ValidityStatistics &)other_p;
42
- has_null = has_null || other.has_null;
43
- has_no_null = has_no_null || other.has_no_null;
44
- }
45
-
46
- unique_ptr<BaseStatistics> ValidityStatistics::Copy() const {
47
- return make_unique<ValidityStatistics>(has_null, has_no_null);
48
- }
49
-
50
- void ValidityStatistics::Serialize(FieldWriter &writer) const {
51
- writer.WriteField<bool>(has_null);
52
- writer.WriteField<bool>(has_no_null);
53
- }
54
-
55
- unique_ptr<ValidityStatistics> ValidityStatistics::Deserialize(FieldReader &reader) {
56
- bool has_null = reader.ReadRequired<bool>();
57
- bool has_no_null = reader.ReadRequired<bool>();
58
- return make_unique<ValidityStatistics>(has_null, has_no_null);
59
- }
60
-
61
- void ValidityStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
62
- if (has_null && has_no_null) {
63
- // nothing to verify
64
- return;
65
- }
66
- UnifiedVectorFormat vdata;
67
- vector.ToUnifiedFormat(count, vdata);
68
- for (idx_t i = 0; i < count; i++) {
69
- auto idx = sel.get_index(i);
70
- auto index = vdata.sel->get_index(idx);
71
- bool row_is_valid = vdata.validity.RowIsValid(index);
72
- if (row_is_valid && !has_no_null) {
73
- throw InternalException(
74
- "Statistics mismatch: vector labeled as having only NULL values, but vector contains valid values: %s",
75
- vector.ToString(count));
76
- }
77
- if (!row_is_valid && !has_null) {
78
- throw InternalException(
79
- "Statistics mismatch: vector labeled as not having NULL values, but vector contains null values: %s",
80
- vector.ToString(count));
81
- }
82
- }
83
- }
84
-
85
- string ValidityStatistics::ToString() const {
86
- auto has_n = has_null ? "true" : "false";
87
- auto has_n_n = has_no_null ? "true" : "false";
88
- return StringUtil::Format("[Has Null: %s, Has No Null: %s]", has_n, has_n_n);
89
- }
90
-
91
- } // namespace duckdb