duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
- package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/main/config.cpp +66 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
- package/src/duckdb/src/planner/bind_context.cpp +16 -0
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +2 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
- package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +2 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +4 -6
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
- package/src/duckdb/src/storage/table/column_data.cpp +16 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
- package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
- package/src/duckdb/src/storage/table/row_group.cpp +24 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
- package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -46,8 +46,7 @@ RowGroup::RowGroup(AttachedDatabase &db, BlockManager &block_manager, DataTableI
|
|
46
46
|
|
47
47
|
// set up the statistics
|
48
48
|
for (auto &stats : pointer.statistics) {
|
49
|
-
|
50
|
-
this->stats.push_back(make_shared<SegmentStatistics>(stats_type, std::move(stats)));
|
49
|
+
this->stats.emplace_back(std::move(stats));
|
51
50
|
}
|
52
51
|
this->version_info = std::move(pointer.versions);
|
53
52
|
|
@@ -88,7 +87,7 @@ void RowGroup::InitializeEmpty(const vector<LogicalType> &types) {
|
|
88
87
|
// set up the segment trees for the column segments
|
89
88
|
for (idx_t i = 0; i < types.size(); i++) {
|
90
89
|
auto column_data = ColumnData::CreateColumn(block_manager, GetTableInfo(), i, start, types[i]);
|
91
|
-
stats.
|
90
|
+
stats.emplace_back(types[i]);
|
92
91
|
columns.push_back(std::move(column_data));
|
93
92
|
}
|
94
93
|
}
|
@@ -158,7 +157,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
|
|
158
157
|
InitializeScan(scan_state);
|
159
158
|
|
160
159
|
Vector append_vector(target_type);
|
161
|
-
|
160
|
+
SegmentStatistics altered_col_stats(target_type);
|
162
161
|
while (true) {
|
163
162
|
// scan the table
|
164
163
|
scan_chunk.Reset();
|
@@ -168,7 +167,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
|
|
168
167
|
}
|
169
168
|
// execute the expression
|
170
169
|
executor.ExecuteExpression(scan_chunk, append_vector);
|
171
|
-
column_data->Append(
|
170
|
+
column_data->Append(altered_col_stats.statistics, append_state, append_vector, scan_chunk.size());
|
172
171
|
}
|
173
172
|
|
174
173
|
// set up the row_group based on this row_group
|
@@ -178,11 +177,11 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
|
|
178
177
|
if (i == changed_idx) {
|
179
178
|
// this is the altered column: use the new column
|
180
179
|
row_group->columns.push_back(std::move(column_data));
|
181
|
-
row_group->stats.push_back(std::move(altered_col_stats));
|
180
|
+
row_group->stats.push_back(std::move(altered_col_stats)); // NOLINT: false positive
|
182
181
|
} else {
|
183
182
|
// this column was not altered: use the data directly
|
184
183
|
row_group->columns.push_back(columns[i]);
|
185
|
-
row_group->stats.
|
184
|
+
row_group->stats.emplace_back(stats[i].statistics.Copy());
|
186
185
|
}
|
187
186
|
}
|
188
187
|
row_group->Verify();
|
@@ -196,8 +195,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
|
|
196
195
|
// construct a new column data for the new column
|
197
196
|
auto added_column =
|
198
197
|
ColumnData::CreateColumn(block_manager, GetTableInfo(), columns.size(), start, new_column.Type());
|
199
|
-
|
200
|
-
new_column.Type(), BaseStatistics::CreateEmpty(new_column.Type(), StatisticsType::LOCAL_STATS));
|
198
|
+
SegmentStatistics added_col_stats(new_column.Type());
|
201
199
|
|
202
200
|
idx_t rows_to_write = this->count;
|
203
201
|
if (rows_to_write > 0) {
|
@@ -211,7 +209,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
|
|
211
209
|
dummy_chunk.SetCardinality(rows_in_this_vector);
|
212
210
|
executor.ExecuteExpression(dummy_chunk, result);
|
213
211
|
}
|
214
|
-
added_column->Append(
|
212
|
+
added_column->Append(added_col_stats.statistics, state, result, rows_in_this_vector);
|
215
213
|
}
|
216
214
|
}
|
217
215
|
|
@@ -219,7 +217,9 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
|
|
219
217
|
auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
|
220
218
|
row_group->version_info = version_info;
|
221
219
|
row_group->columns = columns;
|
222
|
-
|
220
|
+
for (auto &stat : stats) {
|
221
|
+
row_group->stats.emplace_back(stat.statistics.Copy());
|
222
|
+
}
|
223
223
|
// now add the new column
|
224
224
|
row_group->columns.push_back(std::move(added_column));
|
225
225
|
row_group->stats.push_back(std::move(added_col_stats));
|
@@ -236,7 +236,9 @@ unique_ptr<RowGroup> RowGroup::RemoveColumn(idx_t removed_column) {
|
|
236
236
|
auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
|
237
237
|
row_group->version_info = version_info;
|
238
238
|
row_group->columns = columns;
|
239
|
-
|
239
|
+
for (auto &stat : stats) {
|
240
|
+
row_group->stats.emplace_back(stat.statistics.Copy());
|
241
|
+
}
|
240
242
|
// now remove the column
|
241
243
|
row_group->columns.erase(row_group->columns.begin() + removed_column);
|
242
244
|
row_group->stats.erase(row_group->stats.begin() + removed_column);
|
@@ -275,7 +277,7 @@ bool RowGroup::CheckZonemap(TableFilterSet &filters, const vector<column_t> &col
|
|
275
277
|
auto &filter = entry.second;
|
276
278
|
auto base_column_index = column_ids[column_index];
|
277
279
|
|
278
|
-
auto propagate_result = filter->CheckStatistics(
|
280
|
+
auto propagate_result = filter->CheckStatistics(stats[base_column_index].statistics);
|
279
281
|
if (propagate_result == FilterPropagateResult::FILTER_ALWAYS_FALSE ||
|
280
282
|
propagate_result == FilterPropagateResult::FILTER_FALSE_OR_NULL) {
|
281
283
|
return false;
|
@@ -628,7 +630,7 @@ void RowGroup::InitializeAppend(RowGroupAppendState &append_state) {
|
|
628
630
|
void RowGroup::Append(RowGroupAppendState &state, DataChunk &chunk, idx_t append_count) {
|
629
631
|
// append to the current row_group
|
630
632
|
for (idx_t i = 0; i < columns.size(); i++) {
|
631
|
-
columns[i]->Append(
|
633
|
+
columns[i]->Append(stats[i].statistics, state.states[i], chunk.data[i], append_count);
|
632
634
|
}
|
633
635
|
state.offset_in_row_group += append_count;
|
634
636
|
}
|
@@ -671,21 +673,21 @@ unique_ptr<BaseStatistics> RowGroup::GetStatistics(idx_t column_idx) {
|
|
671
673
|
D_ASSERT(column_idx < stats.size());
|
672
674
|
|
673
675
|
lock_guard<mutex> slock(stats_lock);
|
674
|
-
return stats[column_idx]
|
676
|
+
return stats[column_idx].statistics.ToUnique();
|
675
677
|
}
|
676
678
|
|
677
679
|
void RowGroup::MergeStatistics(idx_t column_idx, const BaseStatistics &other) {
|
678
680
|
D_ASSERT(column_idx < stats.size());
|
679
681
|
|
680
682
|
lock_guard<mutex> slock(stats_lock);
|
681
|
-
stats[column_idx]
|
683
|
+
stats[column_idx].statistics.Merge(other);
|
682
684
|
}
|
683
685
|
|
684
686
|
void RowGroup::MergeIntoStatistics(idx_t column_idx, BaseStatistics &other) {
|
685
687
|
D_ASSERT(column_idx < stats.size());
|
686
688
|
|
687
689
|
lock_guard<mutex> slock(stats_lock);
|
688
|
-
other.Merge(
|
690
|
+
other.Merge(stats[column_idx].statistics);
|
689
691
|
}
|
690
692
|
|
691
693
|
RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
|
@@ -711,14 +713,14 @@ RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
|
|
711
713
|
auto stats = checkpoint_state->GetStatistics();
|
712
714
|
D_ASSERT(stats);
|
713
715
|
|
714
|
-
result.statistics.push_back(
|
716
|
+
result.statistics.push_back(stats->Copy());
|
715
717
|
result.states.push_back(std::move(checkpoint_state));
|
716
718
|
}
|
717
719
|
D_ASSERT(result.states.size() == result.statistics.size());
|
718
720
|
return result;
|
719
721
|
}
|
720
722
|
|
721
|
-
RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer,
|
723
|
+
RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats) {
|
722
724
|
RowGroupPointer row_group_pointer;
|
723
725
|
|
724
726
|
vector<CompressionType> compression_types;
|
@@ -728,7 +730,7 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, vector<unique_ptr<B
|
|
728
730
|
}
|
729
731
|
auto result = WriteToDisk(writer.GetPartialBlockManager(), compression_types);
|
730
732
|
for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) {
|
731
|
-
global_stats
|
733
|
+
global_stats.GetStats(column_idx).Statistics().Merge(result.statistics[column_idx]);
|
732
734
|
}
|
733
735
|
row_group_pointer.statistics = std::move(result.statistics);
|
734
736
|
|
@@ -805,7 +807,7 @@ void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &main_serializer)
|
|
805
807
|
writer.WriteField<uint64_t>(pointer.tuple_count);
|
806
808
|
auto &serializer = writer.GetSerializer();
|
807
809
|
for (auto &stats : pointer.statistics) {
|
808
|
-
stats
|
810
|
+
stats.Serialize(serializer);
|
809
811
|
}
|
810
812
|
for (auto &data_pointer : pointer.data_pointers) {
|
811
813
|
serializer.Write<block_id_t>(data_pointer.block_id);
|
@@ -828,8 +830,7 @@ RowGroupPointer RowGroup::Deserialize(Deserializer &main_source, const ColumnLis
|
|
828
830
|
|
829
831
|
auto &source = reader.GetSource();
|
830
832
|
for (auto &col : columns.Physical()) {
|
831
|
-
|
832
|
-
result.statistics.push_back(std::move(stats));
|
833
|
+
result.statistics.push_back(BaseStatistics::Deserialize(source, col.Type()));
|
833
834
|
}
|
834
835
|
for (idx_t i = 0; i < columns.PhysicalColumnCount(); i++) {
|
835
836
|
BlockPointer pointer;
|
@@ -280,7 +280,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
|
|
280
280
|
// merge the stats
|
281
281
|
auto stats_lock = stats.GetLock();
|
282
282
|
for (idx_t i = 0; i < types.size(); i++) {
|
283
|
-
current_row_group->MergeIntoStatistics(i,
|
283
|
+
current_row_group->MergeIntoStatistics(i, stats.GetStats(i).Statistics());
|
284
284
|
}
|
285
285
|
}
|
286
286
|
remaining -= append_count;
|
@@ -319,11 +319,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
|
|
319
319
|
state.current_row += append_count;
|
320
320
|
auto stats_lock = stats.GetLock();
|
321
321
|
for (idx_t col_idx = 0; col_idx < types.size(); col_idx++) {
|
322
|
-
|
323
|
-
if (type == PhysicalType::LIST || type == PhysicalType::STRUCT) {
|
324
|
-
continue;
|
325
|
-
}
|
326
|
-
stats.GetStats(col_idx).stats->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
322
|
+
stats.GetStats(col_idx).UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
327
323
|
}
|
328
324
|
return new_row_group;
|
329
325
|
}
|
@@ -513,13 +509,13 @@ void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_i
|
|
513
509
|
auto row_group = (RowGroup *)row_groups->GetSegment(first_id);
|
514
510
|
row_group->UpdateColumn(transaction, updates, row_ids, column_path);
|
515
511
|
|
516
|
-
row_group->MergeIntoStatistics(primary_column_idx,
|
512
|
+
row_group->MergeIntoStatistics(primary_column_idx, stats.GetStats(primary_column_idx).Statistics());
|
517
513
|
}
|
518
514
|
|
519
515
|
//===--------------------------------------------------------------------===//
|
520
516
|
// Checkpoint
|
521
517
|
//===--------------------------------------------------------------------===//
|
522
|
-
void RowGroupCollection::Checkpoint(TableDataWriter &writer,
|
518
|
+
void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &global_stats) {
|
523
519
|
for (auto row_group = (RowGroup *)row_groups->GetRootSegment(); row_group;
|
524
520
|
row_group = (RowGroup *)row_group->Next()) {
|
525
521
|
auto rowg_writer = writer.GetRowGroupWriter(*row_group);
|
@@ -590,7 +586,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont
|
|
590
586
|
while (current_row_group) {
|
591
587
|
auto new_row_group = current_row_group->AddColumn(new_column, executor, default_value, default_vector);
|
592
588
|
// merge in the statistics
|
593
|
-
new_row_group->MergeIntoStatistics(new_column_idx,
|
589
|
+
new_row_group->MergeIntoStatistics(new_column_idx, new_column_stats.Statistics());
|
594
590
|
|
595
591
|
result->row_groups->AppendSegment(std::move(new_row_group));
|
596
592
|
current_row_group = (RowGroup *)current_row_group->Next();
|
@@ -651,7 +647,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(ClientContext &cont
|
|
651
647
|
while (current_row_group) {
|
652
648
|
auto new_row_group = current_row_group->AlterType(target_type, changed_idx, executor,
|
653
649
|
scan_state.table_state.row_group_state, scan_chunk);
|
654
|
-
new_row_group->MergeIntoStatistics(changed_idx,
|
650
|
+
new_row_group->MergeIntoStatistics(changed_idx, changed_stats.Statistics());
|
655
651
|
result->row_groups->AppendSegment(std::move(new_row_group));
|
656
652
|
current_row_group = (RowGroup *)current_row_group->Next();
|
657
653
|
}
|
@@ -696,14 +692,18 @@ void RowGroupCollection::VerifyNewConstraint(DataTable &parent, const BoundConst
|
|
696
692
|
//===--------------------------------------------------------------------===//
|
697
693
|
// Statistics
|
698
694
|
//===--------------------------------------------------------------------===//
|
695
|
+
void RowGroupCollection::CopyStats(TableStatistics &other_stats) {
|
696
|
+
stats.CopyStats(other_stats);
|
697
|
+
}
|
698
|
+
|
699
699
|
unique_ptr<BaseStatistics> RowGroupCollection::CopyStats(column_t column_id) {
|
700
700
|
return stats.CopyStats(column_id);
|
701
701
|
}
|
702
702
|
|
703
|
-
void RowGroupCollection::
|
703
|
+
void RowGroupCollection::SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats) {
|
704
704
|
D_ASSERT(column_id != COLUMN_IDENTIFIER_ROW_ID);
|
705
705
|
auto stats_guard = stats.GetLock();
|
706
|
-
|
706
|
+
stats.GetStats(column_id).SetDistinct(std::move(distinct_stats));
|
707
707
|
}
|
708
708
|
|
709
709
|
} // namespace duckdb
|
@@ -24,7 +24,7 @@ bool StandardColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filte
|
|
24
24
|
return true;
|
25
25
|
}
|
26
26
|
state.segment_checked = true;
|
27
|
-
auto prune_result = filter.CheckStatistics(
|
27
|
+
auto prune_result = filter.CheckStatistics(state.current->stats.statistics);
|
28
28
|
if (prune_result != FilterPropagateResult::FILTER_ALWAYS_FALSE) {
|
29
29
|
return true;
|
30
30
|
}
|
@@ -91,8 +91,7 @@ void StandardColumnData::InitializeAppend(ColumnAppendState &state) {
|
|
91
91
|
void StandardColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata,
|
92
92
|
idx_t count) {
|
93
93
|
ColumnData::AppendData(stats, state, vdata, count);
|
94
|
-
|
95
|
-
validity.AppendData(*stats.validity_stats, state.child_appends[0], vdata, count);
|
94
|
+
validity.AppendData(stats, state.child_appends[0], vdata, count);
|
96
95
|
}
|
97
96
|
|
98
97
|
void StandardColumnData::RevertAppend(row_t start_row) {
|
@@ -136,9 +135,11 @@ unique_ptr<BaseStatistics> StandardColumnData::GetUpdateStatistics() {
|
|
136
135
|
return nullptr;
|
137
136
|
}
|
138
137
|
if (!stats) {
|
139
|
-
stats = BaseStatistics::CreateEmpty(type
|
138
|
+
stats = BaseStatistics::CreateEmpty(type).ToUnique();
|
139
|
+
}
|
140
|
+
if (validity_stats) {
|
141
|
+
stats->Merge(*validity_stats);
|
140
142
|
}
|
141
|
-
stats->validity_stats = std::move(validity_stats);
|
142
143
|
return stats;
|
143
144
|
}
|
144
145
|
|
@@ -169,7 +170,6 @@ struct StandardColumnCheckpointState : public ColumnCheckpointState {
|
|
169
170
|
public:
|
170
171
|
unique_ptr<BaseStatistics> GetStatistics() override {
|
171
172
|
D_ASSERT(global_stats);
|
172
|
-
global_stats->validity_stats = validity_state->GetStatistics();
|
173
173
|
return std::move(global_stats);
|
174
174
|
}
|
175
175
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#include "duckdb/storage/table/struct_column_data.hpp"
|
2
|
-
#include "duckdb/storage/statistics/
|
2
|
+
#include "duckdb/storage/statistics/struct_stats.hpp"
|
3
3
|
#include "duckdb/transaction/transaction.hpp"
|
4
4
|
|
5
5
|
namespace duckdb {
|
@@ -127,12 +127,12 @@ void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, V
|
|
127
127
|
vector.Flatten(count);
|
128
128
|
|
129
129
|
// append the null values
|
130
|
-
validity.Append(
|
130
|
+
validity.Append(stats, state.child_appends[0], vector, count);
|
131
131
|
|
132
|
-
auto &struct_stats = (StructStatistics &)stats;
|
133
132
|
auto &child_entries = StructVector::GetEntries(vector);
|
134
133
|
for (idx_t i = 0; i < child_entries.size(); i++) {
|
135
|
-
sub_columns[i]->Append(
|
134
|
+
sub_columns[i]->Append(StructStats::GetChildStats(stats, i), state.child_appends[i + 1], *child_entries[i],
|
135
|
+
count);
|
136
136
|
}
|
137
137
|
}
|
138
138
|
|
@@ -190,16 +190,18 @@ void StructColumnData::UpdateColumn(TransactionData transaction, const vector<co
|
|
190
190
|
|
191
191
|
unique_ptr<BaseStatistics> StructColumnData::GetUpdateStatistics() {
|
192
192
|
// check if any child column has updates
|
193
|
-
auto stats = BaseStatistics::CreateEmpty(type
|
194
|
-
auto
|
195
|
-
|
193
|
+
auto stats = BaseStatistics::CreateEmpty(type);
|
194
|
+
auto validity_stats = validity.GetUpdateStatistics();
|
195
|
+
if (validity_stats) {
|
196
|
+
stats.Merge(*validity_stats);
|
197
|
+
}
|
196
198
|
for (idx_t i = 0; i < sub_columns.size(); i++) {
|
197
199
|
auto child_stats = sub_columns[i]->GetUpdateStatistics();
|
198
200
|
if (child_stats) {
|
199
|
-
|
201
|
+
StructStats::SetChildStats(stats, i, std::move(child_stats));
|
200
202
|
}
|
201
203
|
}
|
202
|
-
return stats;
|
204
|
+
return stats.ToUnique();
|
203
205
|
}
|
204
206
|
|
205
207
|
void StructColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
|
@@ -230,7 +232,7 @@ struct StructColumnCheckpointState : public ColumnCheckpointState {
|
|
230
232
|
StructColumnCheckpointState(RowGroup &row_group, ColumnData &column_data,
|
231
233
|
PartialBlockManager &partial_block_manager)
|
232
234
|
: ColumnCheckpointState(row_group, column_data, partial_block_manager) {
|
233
|
-
global_stats =
|
235
|
+
global_stats = StructStats::CreateEmpty(column_data.type).ToUnique();
|
234
236
|
}
|
235
237
|
|
236
238
|
unique_ptr<ColumnCheckpointState> validity_state;
|
@@ -238,14 +240,11 @@ struct StructColumnCheckpointState : public ColumnCheckpointState {
|
|
238
240
|
|
239
241
|
public:
|
240
242
|
unique_ptr<BaseStatistics> GetStatistics() override {
|
241
|
-
auto stats =
|
242
|
-
D_ASSERT(stats->child_stats.size() == child_states.size());
|
243
|
-
stats->validity_stats = validity_state->GetStatistics();
|
243
|
+
auto stats = StructStats::CreateEmpty(column_data.type);
|
244
244
|
for (idx_t i = 0; i < child_states.size(); i++) {
|
245
|
-
stats
|
246
|
-
D_ASSERT(stats->child_stats[i]);
|
245
|
+
StructStats::SetChildStats(stats, i, child_states[i]->GetStatistics());
|
247
246
|
}
|
248
|
-
return
|
247
|
+
return stats.ToUnique();
|
249
248
|
}
|
250
249
|
|
251
250
|
void WriteDataPointers(RowGroupWriter &writer) override {
|
@@ -6,10 +6,7 @@ namespace duckdb {
|
|
6
6
|
void TableStatistics::Initialize(const vector<LogicalType> &types, PersistentTableData &data) {
|
7
7
|
D_ASSERT(Empty());
|
8
8
|
|
9
|
-
column_stats
|
10
|
-
for (auto &stats : data.column_stats) {
|
11
|
-
column_stats.push_back(make_shared<ColumnStatistics>(std::move(stats)));
|
12
|
-
}
|
9
|
+
column_stats = std::move(data.table_stats.column_stats);
|
13
10
|
if (column_stats.size() != types.size()) { // LCOV_EXCL_START
|
14
11
|
throw IOException("Table statistics column count is not aligned with table column count. Corrupt file?");
|
15
12
|
} // LCOV_EXCL_STOP
|
@@ -70,7 +67,7 @@ void TableStatistics::MergeStats(TableStatistics &other) {
|
|
70
67
|
auto l = GetLock();
|
71
68
|
D_ASSERT(column_stats.size() == other.column_stats.size());
|
72
69
|
for (idx_t i = 0; i < column_stats.size(); i++) {
|
73
|
-
column_stats[i]->
|
70
|
+
column_stats[i]->Merge(*other.column_stats[i]);
|
74
71
|
}
|
75
72
|
}
|
76
73
|
|
@@ -80,7 +77,7 @@ void TableStatistics::MergeStats(idx_t i, BaseStatistics &stats) {
|
|
80
77
|
}
|
81
78
|
|
82
79
|
void TableStatistics::MergeStats(TableStatisticsLock &lock, idx_t i, BaseStatistics &stats) {
|
83
|
-
column_stats[i]->
|
80
|
+
column_stats[i]->Statistics().Merge(stats);
|
84
81
|
}
|
85
82
|
|
86
83
|
ColumnStatistics &TableStatistics::GetStats(idx_t i) {
|
@@ -89,7 +86,30 @@ ColumnStatistics &TableStatistics::GetStats(idx_t i) {
|
|
89
86
|
|
90
87
|
unique_ptr<BaseStatistics> TableStatistics::CopyStats(idx_t i) {
|
91
88
|
lock_guard<mutex> l(stats_lock);
|
92
|
-
|
89
|
+
auto result = column_stats[i]->Statistics().Copy();
|
90
|
+
if (column_stats[i]->HasDistinctStats()) {
|
91
|
+
result.SetDistinctCount(column_stats[i]->DistinctStats().GetCount());
|
92
|
+
}
|
93
|
+
return result.ToUnique();
|
94
|
+
}
|
95
|
+
|
96
|
+
void TableStatistics::CopyStats(TableStatistics &other) {
|
97
|
+
for (auto &stats : column_stats) {
|
98
|
+
other.column_stats.push_back(stats->Copy());
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
void TableStatistics::Serialize(Serializer &serializer) {
|
103
|
+
for (auto &stats : column_stats) {
|
104
|
+
stats->Serialize(serializer);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
void TableStatistics::Deserialize(Deserializer &source, ColumnList &columns) {
|
109
|
+
for (auto &col : columns.Physical()) {
|
110
|
+
auto stats = ColumnStatistics::Deserialize(source, col.GetType());
|
111
|
+
column_stats.push_back(std::move(stats));
|
112
|
+
}
|
93
113
|
}
|
94
114
|
|
95
115
|
unique_ptr<TableStatisticsLock> TableStatistics::GetLock() {
|
@@ -1,9 +1,7 @@
|
|
1
1
|
#include "duckdb/storage/table/update_segment.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/storage/statistics/distinct_statistics.hpp"
|
4
|
-
|
5
|
-
#include "duckdb/storage/statistics/string_statistics.hpp"
|
6
|
-
#include "duckdb/storage/statistics/validity_statistics.hpp"
|
4
|
+
|
7
5
|
#include "duckdb/storage/table/column_data.hpp"
|
8
6
|
#include "duckdb/transaction/duck_transaction.hpp"
|
9
7
|
#include "duckdb/transaction/update_info.hpp"
|
@@ -55,7 +53,7 @@ UpdateSegment::~UpdateSegment() {
|
|
55
53
|
}
|
56
54
|
|
57
55
|
void UpdateSegment::ClearUpdates() {
|
58
|
-
stats.
|
56
|
+
stats.statistics.Copy(BaseStatistics::CreateEmpty(stats.statistics.GetType()));
|
59
57
|
root.reset();
|
60
58
|
heap.Destroy();
|
61
59
|
}
|
@@ -905,17 +903,17 @@ static UpdateSegment::merge_update_function_t GetMergeUpdateFunction(PhysicalTyp
|
|
905
903
|
//===--------------------------------------------------------------------===//
|
906
904
|
unique_ptr<BaseStatistics> UpdateSegment::GetStatistics() {
|
907
905
|
lock_guard<mutex> stats_guard(stats_lock);
|
908
|
-
return stats.statistics
|
906
|
+
return stats.statistics.ToUnique();
|
909
907
|
}
|
910
908
|
|
911
909
|
idx_t UpdateValidityStatistics(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, idx_t count,
|
912
910
|
SelectionVector &sel) {
|
913
911
|
auto &mask = FlatVector::Validity(update);
|
914
|
-
auto &validity =
|
915
|
-
if (!mask.AllValid() && !validity.
|
912
|
+
auto &validity = stats.statistics;
|
913
|
+
if (!mask.AllValid() && !validity.CanHaveNull()) {
|
916
914
|
for (idx_t i = 0; i < count; i++) {
|
917
915
|
if (!mask.RowIsValid(i)) {
|
918
|
-
validity.
|
916
|
+
validity.SetHasNull();
|
919
917
|
break;
|
920
918
|
}
|
921
919
|
}
|
@@ -932,7 +930,7 @@ idx_t TemplatedUpdateNumericStatistics(UpdateSegment *segment, SegmentStatistics
|
|
932
930
|
|
933
931
|
if (mask.AllValid()) {
|
934
932
|
for (idx_t i = 0; i < count; i++) {
|
935
|
-
|
933
|
+
NumericStats::Update<T>(stats.statistics, update_data[i]);
|
936
934
|
}
|
937
935
|
sel.Initialize(nullptr);
|
938
936
|
return count;
|
@@ -942,7 +940,7 @@ idx_t TemplatedUpdateNumericStatistics(UpdateSegment *segment, SegmentStatistics
|
|
942
940
|
for (idx_t i = 0; i < count; i++) {
|
943
941
|
if (mask.RowIsValid(i)) {
|
944
942
|
sel.set_index(not_null_count++, i);
|
945
|
-
|
943
|
+
NumericStats::Update<T>(stats.statistics, update_data[i]);
|
946
944
|
}
|
947
945
|
}
|
948
946
|
return not_null_count;
|
@@ -955,7 +953,7 @@ idx_t UpdateStringStatistics(UpdateSegment *segment, SegmentStatistics &stats, V
|
|
955
953
|
auto &mask = FlatVector::Validity(update);
|
956
954
|
if (mask.AllValid()) {
|
957
955
|
for (idx_t i = 0; i < count; i++) {
|
958
|
-
(
|
956
|
+
StringStats::Update(stats.statistics, update_data[i]);
|
959
957
|
if (!update_data[i].IsInlined()) {
|
960
958
|
update_data[i] = segment->GetStringHeap().AddBlob(update_data[i]);
|
961
959
|
}
|
@@ -968,7 +966,7 @@ idx_t UpdateStringStatistics(UpdateSegment *segment, SegmentStatistics &stats, V
|
|
968
966
|
for (idx_t i = 0; i < count; i++) {
|
969
967
|
if (mask.RowIsValid(i)) {
|
970
968
|
sel.set_index(not_null_count++, i);
|
971
|
-
(
|
969
|
+
StringStats::Update(stats.statistics, update_data[i]);
|
972
970
|
if (!update_data[i].IsInlined()) {
|
973
971
|
update_data[i] = segment->GetStringHeap().AddBlob(update_data[i]);
|
974
972
|
}
|