duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
  3. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  4. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  5. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  6. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  7. package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
  8. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  9. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  10. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
  11. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
  12. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
  13. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
  14. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
  15. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
  16. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
  17. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  18. package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
  19. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  20. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  21. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  22. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  23. package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
  24. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  25. package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
  26. package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
  27. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
  28. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  29. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  30. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  31. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  32. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  33. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
  34. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
  35. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
  39. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  40. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  41. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  42. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  43. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  44. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  46. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
  51. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  52. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
  53. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  54. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
  55. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  56. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  57. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  58. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  59. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
  61. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  62. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
  63. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  64. package/src/duckdb/src/main/config.cpp +66 -1
  65. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
  66. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  67. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  68. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  69. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  70. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  71. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  72. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  73. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  74. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  75. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  76. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
  77. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
  78. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  79. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
  80. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
  81. package/src/duckdb/src/planner/bind_context.cpp +16 -0
  82. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
  83. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
  84. package/src/duckdb/src/planner/binder.cpp +2 -1
  85. package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
  86. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  87. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  88. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  89. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
  90. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
  91. package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
  92. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
  93. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  94. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  95. package/src/duckdb/src/storage/compression/rle.cpp +2 -2
  96. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  97. package/src/duckdb/src/storage/data_table.cpp +4 -6
  98. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  99. package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
  100. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
  101. package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
  102. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
  103. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  104. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  105. package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
  106. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  107. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
  108. package/src/duckdb/src/storage/table/column_data.cpp +16 -14
  109. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
  110. package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
  111. package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
  112. package/src/duckdb/src/storage/table/row_group.cpp +24 -23
  113. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
  114. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
  115. package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
  116. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  117. package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
  118. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
  119. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
  120. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
  121. package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
  122. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  123. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  124. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  125. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  126. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  127. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  128. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  129. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  130. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  131. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -46,8 +46,7 @@ RowGroup::RowGroup(AttachedDatabase &db, BlockManager &block_manager, DataTableI
46
46
 
47
47
  // set up the statistics
48
48
  for (auto &stats : pointer.statistics) {
49
- auto stats_type = stats->type;
50
- this->stats.push_back(make_shared<SegmentStatistics>(stats_type, std::move(stats)));
49
+ this->stats.emplace_back(std::move(stats));
51
50
  }
52
51
  this->version_info = std::move(pointer.versions);
53
52
 
@@ -88,7 +87,7 @@ void RowGroup::InitializeEmpty(const vector<LogicalType> &types) {
88
87
  // set up the segment trees for the column segments
89
88
  for (idx_t i = 0; i < types.size(); i++) {
90
89
  auto column_data = ColumnData::CreateColumn(block_manager, GetTableInfo(), i, start, types[i]);
91
- stats.push_back(make_shared<SegmentStatistics>(types[i]));
90
+ stats.emplace_back(types[i]);
92
91
  columns.push_back(std::move(column_data));
93
92
  }
94
93
  }
@@ -158,7 +157,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
158
157
  InitializeScan(scan_state);
159
158
 
160
159
  Vector append_vector(target_type);
161
- auto altered_col_stats = make_shared<SegmentStatistics>(target_type);
160
+ SegmentStatistics altered_col_stats(target_type);
162
161
  while (true) {
163
162
  // scan the table
164
163
  scan_chunk.Reset();
@@ -168,7 +167,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
168
167
  }
169
168
  // execute the expression
170
169
  executor.ExecuteExpression(scan_chunk, append_vector);
171
- column_data->Append(*altered_col_stats->statistics, append_state, append_vector, scan_chunk.size());
170
+ column_data->Append(altered_col_stats.statistics, append_state, append_vector, scan_chunk.size());
172
171
  }
173
172
 
174
173
  // set up the row_group based on this row_group
@@ -178,11 +177,11 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
178
177
  if (i == changed_idx) {
179
178
  // this is the altered column: use the new column
180
179
  row_group->columns.push_back(std::move(column_data));
181
- row_group->stats.push_back(std::move(altered_col_stats));
180
+ row_group->stats.push_back(std::move(altered_col_stats)); // NOLINT: false positive
182
181
  } else {
183
182
  // this column was not altered: use the data directly
184
183
  row_group->columns.push_back(columns[i]);
185
- row_group->stats.push_back(stats[i]);
184
+ row_group->stats.emplace_back(stats[i].statistics.Copy());
186
185
  }
187
186
  }
188
187
  row_group->Verify();
@@ -196,8 +195,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
196
195
  // construct a new column data for the new column
197
196
  auto added_column =
198
197
  ColumnData::CreateColumn(block_manager, GetTableInfo(), columns.size(), start, new_column.Type());
199
- auto added_col_stats = make_shared<SegmentStatistics>(
200
- new_column.Type(), BaseStatistics::CreateEmpty(new_column.Type(), StatisticsType::LOCAL_STATS));
198
+ SegmentStatistics added_col_stats(new_column.Type());
201
199
 
202
200
  idx_t rows_to_write = this->count;
203
201
  if (rows_to_write > 0) {
@@ -211,7 +209,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
211
209
  dummy_chunk.SetCardinality(rows_in_this_vector);
212
210
  executor.ExecuteExpression(dummy_chunk, result);
213
211
  }
214
- added_column->Append(*added_col_stats->statistics, state, result, rows_in_this_vector);
212
+ added_column->Append(added_col_stats.statistics, state, result, rows_in_this_vector);
215
213
  }
216
214
  }
217
215
 
@@ -219,7 +217,9 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
219
217
  auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
220
218
  row_group->version_info = version_info;
221
219
  row_group->columns = columns;
222
- row_group->stats = stats;
220
+ for (auto &stat : stats) {
221
+ row_group->stats.emplace_back(stat.statistics.Copy());
222
+ }
223
223
  // now add the new column
224
224
  row_group->columns.push_back(std::move(added_column));
225
225
  row_group->stats.push_back(std::move(added_col_stats));
@@ -236,7 +236,9 @@ unique_ptr<RowGroup> RowGroup::RemoveColumn(idx_t removed_column) {
236
236
  auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
237
237
  row_group->version_info = version_info;
238
238
  row_group->columns = columns;
239
- row_group->stats = stats;
239
+ for (auto &stat : stats) {
240
+ row_group->stats.emplace_back(stat.statistics.Copy());
241
+ }
240
242
  // now remove the column
241
243
  row_group->columns.erase(row_group->columns.begin() + removed_column);
242
244
  row_group->stats.erase(row_group->stats.begin() + removed_column);
@@ -275,7 +277,7 @@ bool RowGroup::CheckZonemap(TableFilterSet &filters, const vector<column_t> &col
275
277
  auto &filter = entry.second;
276
278
  auto base_column_index = column_ids[column_index];
277
279
 
278
- auto propagate_result = filter->CheckStatistics(*stats[base_column_index]->statistics);
280
+ auto propagate_result = filter->CheckStatistics(stats[base_column_index].statistics);
279
281
  if (propagate_result == FilterPropagateResult::FILTER_ALWAYS_FALSE ||
280
282
  propagate_result == FilterPropagateResult::FILTER_FALSE_OR_NULL) {
281
283
  return false;
@@ -628,7 +630,7 @@ void RowGroup::InitializeAppend(RowGroupAppendState &append_state) {
628
630
  void RowGroup::Append(RowGroupAppendState &state, DataChunk &chunk, idx_t append_count) {
629
631
  // append to the current row_group
630
632
  for (idx_t i = 0; i < columns.size(); i++) {
631
- columns[i]->Append(*stats[i]->statistics, state.states[i], chunk.data[i], append_count);
633
+ columns[i]->Append(stats[i].statistics, state.states[i], chunk.data[i], append_count);
632
634
  }
633
635
  state.offset_in_row_group += append_count;
634
636
  }
@@ -671,21 +673,21 @@ unique_ptr<BaseStatistics> RowGroup::GetStatistics(idx_t column_idx) {
671
673
  D_ASSERT(column_idx < stats.size());
672
674
 
673
675
  lock_guard<mutex> slock(stats_lock);
674
- return stats[column_idx]->statistics->Copy();
676
+ return stats[column_idx].statistics.ToUnique();
675
677
  }
676
678
 
677
679
  void RowGroup::MergeStatistics(idx_t column_idx, const BaseStatistics &other) {
678
680
  D_ASSERT(column_idx < stats.size());
679
681
 
680
682
  lock_guard<mutex> slock(stats_lock);
681
- stats[column_idx]->statistics->Merge(other);
683
+ stats[column_idx].statistics.Merge(other);
682
684
  }
683
685
 
684
686
  void RowGroup::MergeIntoStatistics(idx_t column_idx, BaseStatistics &other) {
685
687
  D_ASSERT(column_idx < stats.size());
686
688
 
687
689
  lock_guard<mutex> slock(stats_lock);
688
- other.Merge(*stats[column_idx]->statistics);
690
+ other.Merge(stats[column_idx].statistics);
689
691
  }
690
692
 
691
693
  RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
@@ -711,14 +713,14 @@ RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
711
713
  auto stats = checkpoint_state->GetStatistics();
712
714
  D_ASSERT(stats);
713
715
 
714
- result.statistics.push_back(std::move(stats));
716
+ result.statistics.push_back(stats->Copy());
715
717
  result.states.push_back(std::move(checkpoint_state));
716
718
  }
717
719
  D_ASSERT(result.states.size() == result.statistics.size());
718
720
  return result;
719
721
  }
720
722
 
721
- RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats) {
723
+ RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats) {
722
724
  RowGroupPointer row_group_pointer;
723
725
 
724
726
  vector<CompressionType> compression_types;
@@ -728,7 +730,7 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, vector<unique_ptr<B
728
730
  }
729
731
  auto result = WriteToDisk(writer.GetPartialBlockManager(), compression_types);
730
732
  for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) {
731
- global_stats[column_idx]->Merge(*result.statistics[column_idx]);
733
+ global_stats.GetStats(column_idx).Statistics().Merge(result.statistics[column_idx]);
732
734
  }
733
735
  row_group_pointer.statistics = std::move(result.statistics);
734
736
 
@@ -805,7 +807,7 @@ void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &main_serializer)
805
807
  writer.WriteField<uint64_t>(pointer.tuple_count);
806
808
  auto &serializer = writer.GetSerializer();
807
809
  for (auto &stats : pointer.statistics) {
808
- stats->Serialize(serializer);
810
+ stats.Serialize(serializer);
809
811
  }
810
812
  for (auto &data_pointer : pointer.data_pointers) {
811
813
  serializer.Write<block_id_t>(data_pointer.block_id);
@@ -828,8 +830,7 @@ RowGroupPointer RowGroup::Deserialize(Deserializer &main_source, const ColumnLis
828
830
 
829
831
  auto &source = reader.GetSource();
830
832
  for (auto &col : columns.Physical()) {
831
- auto stats = BaseStatistics::Deserialize(source, col.Type());
832
- result.statistics.push_back(std::move(stats));
833
+ result.statistics.push_back(BaseStatistics::Deserialize(source, col.Type()));
833
834
  }
834
835
  for (idx_t i = 0; i < columns.PhysicalColumnCount(); i++) {
835
836
  BlockPointer pointer;
@@ -280,7 +280,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
280
280
  // merge the stats
281
281
  auto stats_lock = stats.GetLock();
282
282
  for (idx_t i = 0; i < types.size(); i++) {
283
- current_row_group->MergeIntoStatistics(i, *stats.GetStats(i).stats);
283
+ current_row_group->MergeIntoStatistics(i, stats.GetStats(i).Statistics());
284
284
  }
285
285
  }
286
286
  remaining -= append_count;
@@ -319,11 +319,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
319
319
  state.current_row += append_count;
320
320
  auto stats_lock = stats.GetLock();
321
321
  for (idx_t col_idx = 0; col_idx < types.size(); col_idx++) {
322
- auto type = types[col_idx].InternalType();
323
- if (type == PhysicalType::LIST || type == PhysicalType::STRUCT) {
324
- continue;
325
- }
326
- stats.GetStats(col_idx).stats->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
322
+ stats.GetStats(col_idx).UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
327
323
  }
328
324
  return new_row_group;
329
325
  }
@@ -513,13 +509,13 @@ void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_i
513
509
  auto row_group = (RowGroup *)row_groups->GetSegment(first_id);
514
510
  row_group->UpdateColumn(transaction, updates, row_ids, column_path);
515
511
 
516
- row_group->MergeIntoStatistics(primary_column_idx, *stats.GetStats(primary_column_idx).stats);
512
+ row_group->MergeIntoStatistics(primary_column_idx, stats.GetStats(primary_column_idx).Statistics());
517
513
  }
518
514
 
519
515
  //===--------------------------------------------------------------------===//
520
516
  // Checkpoint
521
517
  //===--------------------------------------------------------------------===//
522
- void RowGroupCollection::Checkpoint(TableDataWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats) {
518
+ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &global_stats) {
523
519
  for (auto row_group = (RowGroup *)row_groups->GetRootSegment(); row_group;
524
520
  row_group = (RowGroup *)row_group->Next()) {
525
521
  auto rowg_writer = writer.GetRowGroupWriter(*row_group);
@@ -590,7 +586,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont
590
586
  while (current_row_group) {
591
587
  auto new_row_group = current_row_group->AddColumn(new_column, executor, default_value, default_vector);
592
588
  // merge in the statistics
593
- new_row_group->MergeIntoStatistics(new_column_idx, *new_column_stats.stats);
589
+ new_row_group->MergeIntoStatistics(new_column_idx, new_column_stats.Statistics());
594
590
 
595
591
  result->row_groups->AppendSegment(std::move(new_row_group));
596
592
  current_row_group = (RowGroup *)current_row_group->Next();
@@ -651,7 +647,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(ClientContext &cont
651
647
  while (current_row_group) {
652
648
  auto new_row_group = current_row_group->AlterType(target_type, changed_idx, executor,
653
649
  scan_state.table_state.row_group_state, scan_chunk);
654
- new_row_group->MergeIntoStatistics(changed_idx, *changed_stats.stats);
650
+ new_row_group->MergeIntoStatistics(changed_idx, changed_stats.Statistics());
655
651
  result->row_groups->AppendSegment(std::move(new_row_group));
656
652
  current_row_group = (RowGroup *)current_row_group->Next();
657
653
  }
@@ -696,14 +692,18 @@ void RowGroupCollection::VerifyNewConstraint(DataTable &parent, const BoundConst
696
692
  //===--------------------------------------------------------------------===//
697
693
  // Statistics
698
694
  //===--------------------------------------------------------------------===//
695
+ void RowGroupCollection::CopyStats(TableStatistics &other_stats) {
696
+ stats.CopyStats(other_stats);
697
+ }
698
+
699
699
  unique_ptr<BaseStatistics> RowGroupCollection::CopyStats(column_t column_id) {
700
700
  return stats.CopyStats(column_id);
701
701
  }
702
702
 
703
- void RowGroupCollection::SetStatistics(column_t column_id, const std::function<void(BaseStatistics &)> &set_fun) {
703
+ void RowGroupCollection::SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats) {
704
704
  D_ASSERT(column_id != COLUMN_IDENTIFIER_ROW_ID);
705
705
  auto stats_guard = stats.GetLock();
706
- set_fun(*stats.GetStats(column_id).stats);
706
+ stats.GetStats(column_id).SetDistinct(std::move(distinct_stats));
707
707
  }
708
708
 
709
709
  } // namespace duckdb
@@ -24,7 +24,7 @@ bool StandardColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filte
24
24
  return true;
25
25
  }
26
26
  state.segment_checked = true;
27
- auto prune_result = filter.CheckStatistics(*state.current->stats.statistics);
27
+ auto prune_result = filter.CheckStatistics(state.current->stats.statistics);
28
28
  if (prune_result != FilterPropagateResult::FILTER_ALWAYS_FALSE) {
29
29
  return true;
30
30
  }
@@ -91,8 +91,7 @@ void StandardColumnData::InitializeAppend(ColumnAppendState &state) {
91
91
  void StandardColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata,
92
92
  idx_t count) {
93
93
  ColumnData::AppendData(stats, state, vdata, count);
94
-
95
- validity.AppendData(*stats.validity_stats, state.child_appends[0], vdata, count);
94
+ validity.AppendData(stats, state.child_appends[0], vdata, count);
96
95
  }
97
96
 
98
97
  void StandardColumnData::RevertAppend(row_t start_row) {
@@ -136,9 +135,11 @@ unique_ptr<BaseStatistics> StandardColumnData::GetUpdateStatistics() {
136
135
  return nullptr;
137
136
  }
138
137
  if (!stats) {
139
- stats = BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS);
138
+ stats = BaseStatistics::CreateEmpty(type).ToUnique();
139
+ }
140
+ if (validity_stats) {
141
+ stats->Merge(*validity_stats);
140
142
  }
141
- stats->validity_stats = std::move(validity_stats);
142
143
  return stats;
143
144
  }
144
145
 
@@ -169,7 +170,6 @@ struct StandardColumnCheckpointState : public ColumnCheckpointState {
169
170
  public:
170
171
  unique_ptr<BaseStatistics> GetStatistics() override {
171
172
  D_ASSERT(global_stats);
172
- global_stats->validity_stats = validity_state->GetStatistics();
173
173
  return std::move(global_stats);
174
174
  }
175
175
 
@@ -1,5 +1,5 @@
1
1
  #include "duckdb/storage/table/struct_column_data.hpp"
2
- #include "duckdb/storage/statistics/struct_statistics.hpp"
2
+ #include "duckdb/storage/statistics/struct_stats.hpp"
3
3
  #include "duckdb/transaction/transaction.hpp"
4
4
 
5
5
  namespace duckdb {
@@ -127,12 +127,12 @@ void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, V
127
127
  vector.Flatten(count);
128
128
 
129
129
  // append the null values
130
- validity.Append(*stats.validity_stats, state.child_appends[0], vector, count);
130
+ validity.Append(stats, state.child_appends[0], vector, count);
131
131
 
132
- auto &struct_stats = (StructStatistics &)stats;
133
132
  auto &child_entries = StructVector::GetEntries(vector);
134
133
  for (idx_t i = 0; i < child_entries.size(); i++) {
135
- sub_columns[i]->Append(*struct_stats.child_stats[i], state.child_appends[i + 1], *child_entries[i], count);
134
+ sub_columns[i]->Append(StructStats::GetChildStats(stats, i), state.child_appends[i + 1], *child_entries[i],
135
+ count);
136
136
  }
137
137
  }
138
138
 
@@ -190,16 +190,18 @@ void StructColumnData::UpdateColumn(TransactionData transaction, const vector<co
190
190
 
191
191
  unique_ptr<BaseStatistics> StructColumnData::GetUpdateStatistics() {
192
192
  // check if any child column has updates
193
- auto stats = BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS);
194
- auto &struct_stats = (StructStatistics &)*stats;
195
- stats->validity_stats = validity.GetUpdateStatistics();
193
+ auto stats = BaseStatistics::CreateEmpty(type);
194
+ auto validity_stats = validity.GetUpdateStatistics();
195
+ if (validity_stats) {
196
+ stats.Merge(*validity_stats);
197
+ }
196
198
  for (idx_t i = 0; i < sub_columns.size(); i++) {
197
199
  auto child_stats = sub_columns[i]->GetUpdateStatistics();
198
200
  if (child_stats) {
199
- struct_stats.child_stats[i] = std::move(child_stats);
201
+ StructStats::SetChildStats(stats, i, std::move(child_stats));
200
202
  }
201
203
  }
202
- return stats;
204
+ return stats.ToUnique();
203
205
  }
204
206
 
205
207
  void StructColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
@@ -230,7 +232,7 @@ struct StructColumnCheckpointState : public ColumnCheckpointState {
230
232
  StructColumnCheckpointState(RowGroup &row_group, ColumnData &column_data,
231
233
  PartialBlockManager &partial_block_manager)
232
234
  : ColumnCheckpointState(row_group, column_data, partial_block_manager) {
233
- global_stats = make_unique<StructStatistics>(column_data.type);
235
+ global_stats = StructStats::CreateEmpty(column_data.type).ToUnique();
234
236
  }
235
237
 
236
238
  unique_ptr<ColumnCheckpointState> validity_state;
@@ -238,14 +240,11 @@ struct StructColumnCheckpointState : public ColumnCheckpointState {
238
240
 
239
241
  public:
240
242
  unique_ptr<BaseStatistics> GetStatistics() override {
241
- auto stats = make_unique<StructStatistics>(column_data.type);
242
- D_ASSERT(stats->child_stats.size() == child_states.size());
243
- stats->validity_stats = validity_state->GetStatistics();
243
+ auto stats = StructStats::CreateEmpty(column_data.type);
244
244
  for (idx_t i = 0; i < child_states.size(); i++) {
245
- stats->child_stats[i] = child_states[i]->GetStatistics();
246
- D_ASSERT(stats->child_stats[i]);
245
+ StructStats::SetChildStats(stats, i, child_states[i]->GetStatistics());
247
246
  }
248
- return std::move(stats);
247
+ return stats.ToUnique();
249
248
  }
250
249
 
251
250
  void WriteDataPointers(RowGroupWriter &writer) override {
@@ -6,10 +6,7 @@ namespace duckdb {
6
6
  void TableStatistics::Initialize(const vector<LogicalType> &types, PersistentTableData &data) {
7
7
  D_ASSERT(Empty());
8
8
 
9
- column_stats.reserve(data.column_stats.size());
10
- for (auto &stats : data.column_stats) {
11
- column_stats.push_back(make_shared<ColumnStatistics>(std::move(stats)));
12
- }
9
+ column_stats = std::move(data.table_stats.column_stats);
13
10
  if (column_stats.size() != types.size()) { // LCOV_EXCL_START
14
11
  throw IOException("Table statistics column count is not aligned with table column count. Corrupt file?");
15
12
  } // LCOV_EXCL_STOP
@@ -70,7 +67,7 @@ void TableStatistics::MergeStats(TableStatistics &other) {
70
67
  auto l = GetLock();
71
68
  D_ASSERT(column_stats.size() == other.column_stats.size());
72
69
  for (idx_t i = 0; i < column_stats.size(); i++) {
73
- column_stats[i]->stats->Merge(*other.column_stats[i]->stats);
70
+ column_stats[i]->Merge(*other.column_stats[i]);
74
71
  }
75
72
  }
76
73
 
@@ -80,7 +77,7 @@ void TableStatistics::MergeStats(idx_t i, BaseStatistics &stats) {
80
77
  }
81
78
 
82
79
  void TableStatistics::MergeStats(TableStatisticsLock &lock, idx_t i, BaseStatistics &stats) {
83
- column_stats[i]->stats->Merge(stats);
80
+ column_stats[i]->Statistics().Merge(stats);
84
81
  }
85
82
 
86
83
  ColumnStatistics &TableStatistics::GetStats(idx_t i) {
@@ -89,7 +86,30 @@ ColumnStatistics &TableStatistics::GetStats(idx_t i) {
89
86
 
90
87
  unique_ptr<BaseStatistics> TableStatistics::CopyStats(idx_t i) {
91
88
  lock_guard<mutex> l(stats_lock);
92
- return column_stats[i]->stats->Copy();
89
+ auto result = column_stats[i]->Statistics().Copy();
90
+ if (column_stats[i]->HasDistinctStats()) {
91
+ result.SetDistinctCount(column_stats[i]->DistinctStats().GetCount());
92
+ }
93
+ return result.ToUnique();
94
+ }
95
+
96
+ void TableStatistics::CopyStats(TableStatistics &other) {
97
+ for (auto &stats : column_stats) {
98
+ other.column_stats.push_back(stats->Copy());
99
+ }
100
+ }
101
+
102
+ void TableStatistics::Serialize(Serializer &serializer) {
103
+ for (auto &stats : column_stats) {
104
+ stats->Serialize(serializer);
105
+ }
106
+ }
107
+
108
+ void TableStatistics::Deserialize(Deserializer &source, ColumnList &columns) {
109
+ for (auto &col : columns.Physical()) {
110
+ auto stats = ColumnStatistics::Deserialize(source, col.GetType());
111
+ column_stats.push_back(std::move(stats));
112
+ }
93
113
  }
94
114
 
95
115
  unique_ptr<TableStatisticsLock> TableStatistics::GetLock() {
@@ -1,9 +1,7 @@
1
1
  #include "duckdb/storage/table/update_segment.hpp"
2
2
 
3
3
  #include "duckdb/storage/statistics/distinct_statistics.hpp"
4
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
5
- #include "duckdb/storage/statistics/string_statistics.hpp"
6
- #include "duckdb/storage/statistics/validity_statistics.hpp"
4
+
7
5
  #include "duckdb/storage/table/column_data.hpp"
8
6
  #include "duckdb/transaction/duck_transaction.hpp"
9
7
  #include "duckdb/transaction/update_info.hpp"
@@ -55,7 +53,7 @@ UpdateSegment::~UpdateSegment() {
55
53
  }
56
54
 
57
55
  void UpdateSegment::ClearUpdates() {
58
- stats.Reset();
56
+ stats.statistics.Copy(BaseStatistics::CreateEmpty(stats.statistics.GetType()));
59
57
  root.reset();
60
58
  heap.Destroy();
61
59
  }
@@ -905,17 +903,17 @@ static UpdateSegment::merge_update_function_t GetMergeUpdateFunction(PhysicalTyp
905
903
  //===--------------------------------------------------------------------===//
906
904
  unique_ptr<BaseStatistics> UpdateSegment::GetStatistics() {
907
905
  lock_guard<mutex> stats_guard(stats_lock);
908
- return stats.statistics->Copy();
906
+ return stats.statistics.ToUnique();
909
907
  }
910
908
 
911
909
  idx_t UpdateValidityStatistics(UpdateSegment *segment, SegmentStatistics &stats, Vector &update, idx_t count,
912
910
  SelectionVector &sel) {
913
911
  auto &mask = FlatVector::Validity(update);
914
- auto &validity = (ValidityStatistics &)*stats.statistics;
915
- if (!mask.AllValid() && !validity.has_null) {
912
+ auto &validity = stats.statistics;
913
+ if (!mask.AllValid() && !validity.CanHaveNull()) {
916
914
  for (idx_t i = 0; i < count; i++) {
917
915
  if (!mask.RowIsValid(i)) {
918
- validity.has_null = true;
916
+ validity.SetHasNull();
919
917
  break;
920
918
  }
921
919
  }
@@ -932,7 +930,7 @@ idx_t TemplatedUpdateNumericStatistics(UpdateSegment *segment, SegmentStatistics
932
930
 
933
931
  if (mask.AllValid()) {
934
932
  for (idx_t i = 0; i < count; i++) {
935
- NumericStatistics::Update<T>(stats, update_data[i]);
933
+ NumericStats::Update<T>(stats.statistics, update_data[i]);
936
934
  }
937
935
  sel.Initialize(nullptr);
938
936
  return count;
@@ -942,7 +940,7 @@ idx_t TemplatedUpdateNumericStatistics(UpdateSegment *segment, SegmentStatistics
942
940
  for (idx_t i = 0; i < count; i++) {
943
941
  if (mask.RowIsValid(i)) {
944
942
  sel.set_index(not_null_count++, i);
945
- NumericStatistics::Update<T>(stats, update_data[i]);
943
+ NumericStats::Update<T>(stats.statistics, update_data[i]);
946
944
  }
947
945
  }
948
946
  return not_null_count;
@@ -955,7 +953,7 @@ idx_t UpdateStringStatistics(UpdateSegment *segment, SegmentStatistics &stats, V
955
953
  auto &mask = FlatVector::Validity(update);
956
954
  if (mask.AllValid()) {
957
955
  for (idx_t i = 0; i < count; i++) {
958
- ((StringStatistics &)*stats.statistics).Update(update_data[i]);
956
+ StringStats::Update(stats.statistics, update_data[i]);
959
957
  if (!update_data[i].IsInlined()) {
960
958
  update_data[i] = segment->GetStringHeap().AddBlob(update_data[i]);
961
959
  }
@@ -968,7 +966,7 @@ idx_t UpdateStringStatistics(UpdateSegment *segment, SegmentStatistics &stats, V
968
966
  for (idx_t i = 0; i < count; i++) {
969
967
  if (mask.RowIsValid(i)) {
970
968
  sel.set_index(not_null_count++, i);
971
- ((StringStatistics &)*stats.statistics).Update(update_data[i]);
969
+ StringStats::Update(stats.statistics, update_data[i]);
972
970
  if (!update_data[i].IsInlined()) {
973
971
  update_data[i] = segment->GetStringHeap().AddBlob(update_data[i]);
974
972
  }