duckdb 0.4.1-dev696.0 → 0.4.1-dev723.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -29957,7 +29957,7 @@ class RowDataCollection;
29957
29957
  struct SelectionVector;
29958
29958
  class StringHeap;
29959
29959
  class Vector;
29960
- struct VectorData;
29960
+ struct UnifiedVectorFormat;
29961
29961
 
29962
29962
  // RowOperations contains a set of operations that operate on data using a RowLayout
29963
29963
  struct RowOperations {
@@ -29982,7 +29982,7 @@ struct RowOperations {
29982
29982
  // Read/Write Operators
29983
29983
  //===--------------------------------------------------------------------===//
29984
29984
  //! Scatter group data to the rows. Initialises the ValidityMask.
29985
- static void Scatter(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows,
29985
+ static void Scatter(DataChunk &columns, UnifiedVectorFormat col_data[], const RowLayout &layout, Vector &rows,
29986
29986
  RowDataCollection &string_heap, const SelectionVector &sel, idx_t count);
29987
29987
  //! Gather a single column.
29988
29988
  static void Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel,
@@ -29998,7 +29998,7 @@ struct RowOperations {
29998
29998
  //! Returns the number of matches remaining in the selection.
29999
29999
  using Predicates = vector<ExpressionType>;
30000
30000
 
30001
- static idx_t Match(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows,
30001
+ static idx_t Match(DataChunk &columns, UnifiedVectorFormat col_data[], const RowLayout &layout, Vector &rows,
30002
30002
  const Predicates &predicates, SelectionVector &sel, idx_t count, SelectionVector *no_match,
30003
30003
  idx_t &no_match_count);
30004
30004
 
@@ -30009,15 +30009,15 @@ struct RowOperations {
30009
30009
  static void ComputeEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t ser_count,
30010
30010
  const SelectionVector &sel, idx_t offset = 0);
30011
30011
  //! Compute the entry sizes of vector data with variable size type (used before building heap buffer space).
30012
- static void ComputeEntrySizes(Vector &v, VectorData &vdata, idx_t entry_sizes[], idx_t vcount, idx_t ser_count,
30013
- const SelectionVector &sel, idx_t offset = 0);
30012
+ static void ComputeEntrySizes(Vector &v, UnifiedVectorFormat &vdata, idx_t entry_sizes[], idx_t vcount,
30013
+ idx_t ser_count, const SelectionVector &sel, idx_t offset = 0);
30014
30014
  //! Scatter vector with variable size type to the heap.
30015
30015
  static void HeapScatter(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx,
30016
30016
  data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset = 0);
30017
30017
  //! Scatter vector data with variable size type to the heap.
30018
- static void HeapScatterVData(VectorData &vdata, PhysicalType type, const SelectionVector &sel, idx_t ser_count,
30019
- idx_t col_idx, data_ptr_t *key_locations, data_ptr_t *validitymask_locations,
30020
- idx_t offset = 0);
30018
+ static void HeapScatterVData(UnifiedVectorFormat &vdata, PhysicalType type, const SelectionVector &sel,
30019
+ idx_t ser_count, idx_t col_idx, data_ptr_t *key_locations,
30020
+ data_ptr_t *validitymask_locations, idx_t offset = 0);
30021
30021
  //! Gather a single column with variable size type from the heap.
30022
30022
  static void HeapGather(Vector &v, const idx_t &vcount, const SelectionVector &sel, const idx_t &col_idx,
30023
30023
  data_ptr_t key_locations[], data_ptr_t validitymask_locations[]);
@@ -30256,7 +30256,7 @@ void RowOperations::UpdateFilteredStates(AggregateFilterData &filter_data, Aggre
30256
30256
  idx_t count = filter_data.ApplyFilter(payload);
30257
30257
 
30258
30258
  Vector filtered_addresses(addresses, filter_data.true_sel, count);
30259
- filtered_addresses.Normalify(count);
30259
+ filtered_addresses.Flatten(count);
30260
30260
 
30261
30261
  UpdateStates(aggr, filtered_addresses, filter_data.filtered_payload, arg_idx, count);
30262
30262
  }
@@ -30950,7 +30950,7 @@ namespace duckdb {
30950
30950
 
30951
30951
  using ValidityBytes = TemplatedValidityMask<uint8_t>;
30952
30952
 
30953
- static void ComputeStringEntrySizes(VectorData &vdata, idx_t entry_sizes[], const idx_t ser_count,
30953
+ static void ComputeStringEntrySizes(UnifiedVectorFormat &vdata, idx_t entry_sizes[], const idx_t ser_count,
30954
30954
  const SelectionVector &sel, const idx_t offset) {
30955
30955
  auto strings = (string_t *)vdata.data;
30956
30956
  for (idx_t i = 0; i < ser_count; i++) {
@@ -30979,7 +30979,7 @@ static void ComputeStructEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount
30979
30979
  }
30980
30980
  }
30981
30981
 
30982
- static void ComputeListEntrySizes(Vector &v, VectorData &vdata, idx_t entry_sizes[], idx_t ser_count,
30982
+ static void ComputeListEntrySizes(Vector &v, UnifiedVectorFormat &vdata, idx_t entry_sizes[], idx_t ser_count,
30983
30983
  const SelectionVector &sel, idx_t offset) {
30984
30984
  auto list_data = ListVector::GetData(v);
30985
30985
  auto &child_vector = ListVector::GetEntry(v);
@@ -31022,8 +31022,8 @@ static void ComputeListEntrySizes(Vector &v, VectorData &vdata, idx_t entry_size
31022
31022
  }
31023
31023
  }
31024
31024
 
31025
- void RowOperations::ComputeEntrySizes(Vector &v, VectorData &vdata, idx_t entry_sizes[], idx_t vcount, idx_t ser_count,
31026
- const SelectionVector &sel, idx_t offset) {
31025
+ void RowOperations::ComputeEntrySizes(Vector &v, UnifiedVectorFormat &vdata, idx_t entry_sizes[], idx_t vcount,
31026
+ idx_t ser_count, const SelectionVector &sel, idx_t offset) {
31027
31027
  const auto physical_type = v.GetType().InternalType();
31028
31028
  if (TypeIsConstantSize(physical_type)) {
31029
31029
  const auto type_size = GetTypeIdSize(physical_type);
@@ -31052,13 +31052,13 @@ void RowOperations::ComputeEntrySizes(Vector &v, VectorData &vdata, idx_t entry_
31052
31052
 
31053
31053
  void RowOperations::ComputeEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount, idx_t ser_count,
31054
31054
  const SelectionVector &sel, idx_t offset) {
31055
- VectorData vdata;
31056
- v.Orrify(vcount, vdata);
31055
+ UnifiedVectorFormat vdata;
31056
+ v.ToUnifiedFormat(vcount, vdata);
31057
31057
  ComputeEntrySizes(v, vdata, entry_sizes, vcount, ser_count, sel, offset);
31058
31058
  }
31059
31059
 
31060
31060
  template <class T>
31061
- static void TemplatedHeapScatter(VectorData &vdata, const SelectionVector &sel, idx_t count, idx_t col_idx,
31061
+ static void TemplatedHeapScatter(UnifiedVectorFormat &vdata, const SelectionVector &sel, idx_t count, idx_t col_idx,
31062
31062
  data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
31063
31063
  auto source = (T *)vdata.data;
31064
31064
  if (!validitymask_locations) {
@@ -31093,8 +31093,8 @@ static void TemplatedHeapScatter(VectorData &vdata, const SelectionVector &sel,
31093
31093
 
31094
31094
  static void HeapScatterStringVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx,
31095
31095
  data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
31096
- VectorData vdata;
31097
- v.Orrify(vcount, vdata);
31096
+ UnifiedVectorFormat vdata;
31097
+ v.ToUnifiedFormat(vcount, vdata);
31098
31098
 
31099
31099
  auto strings = (string_t *)vdata.data;
31100
31100
  if (!validitymask_locations) {
@@ -31137,8 +31137,8 @@ static void HeapScatterStringVector(Vector &v, idx_t vcount, const SelectionVect
31137
31137
 
31138
31138
  static void HeapScatterStructVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx,
31139
31139
  data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
31140
- VectorData vdata;
31141
- v.Orrify(vcount, vdata);
31140
+ UnifiedVectorFormat vdata;
31141
+ v.ToUnifiedFormat(vcount, vdata);
31142
31142
 
31143
31143
  auto &children = StructVector::GetEntries(v);
31144
31144
  idx_t num_children = children.size();
@@ -31176,8 +31176,8 @@ static void HeapScatterStructVector(Vector &v, idx_t vcount, const SelectionVect
31176
31176
 
31177
31177
  static void HeapScatterListVector(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_no,
31178
31178
  data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
31179
- VectorData vdata;
31180
- v.Orrify(vcount, vdata);
31179
+ UnifiedVectorFormat vdata;
31180
+ v.ToUnifiedFormat(vcount, vdata);
31181
31181
 
31182
31182
  idx_t entry_idx;
31183
31183
  idx_t idx_in_entry;
@@ -31187,8 +31187,8 @@ static void HeapScatterListVector(Vector &v, idx_t vcount, const SelectionVector
31187
31187
 
31188
31188
  auto &child_vector = ListVector::GetEntry(v);
31189
31189
 
31190
- VectorData list_vdata;
31191
- child_vector.Orrify(ListVector::GetListSize(v), list_vdata);
31190
+ UnifiedVectorFormat list_vdata;
31191
+ child_vector.ToUnifiedFormat(ListVector::GetListSize(v), list_vdata);
31192
31192
  auto child_type = ListType::GetChildType(v.GetType()).InternalType();
31193
31193
 
31194
31194
  idx_t list_entry_sizes[STANDARD_VECTOR_SIZE];
@@ -31278,8 +31278,8 @@ static void HeapScatterListVector(Vector &v, idx_t vcount, const SelectionVector
31278
31278
  void RowOperations::HeapScatter(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count, idx_t col_idx,
31279
31279
  data_ptr_t *key_locations, data_ptr_t *validitymask_locations, idx_t offset) {
31280
31280
  if (TypeIsConstantSize(v.GetType().InternalType())) {
31281
- VectorData vdata;
31282
- v.Orrify(vcount, vdata);
31281
+ UnifiedVectorFormat vdata;
31282
+ v.ToUnifiedFormat(vcount, vdata);
31283
31283
  RowOperations::HeapScatterVData(vdata, v.GetType().InternalType(), sel, ser_count, col_idx, key_locations,
31284
31284
  validitymask_locations, offset);
31285
31285
  } else {
@@ -31302,9 +31302,9 @@ void RowOperations::HeapScatter(Vector &v, idx_t vcount, const SelectionVector &
31302
31302
  }
31303
31303
  }
31304
31304
 
31305
- void RowOperations::HeapScatterVData(VectorData &vdata, PhysicalType type, const SelectionVector &sel, idx_t ser_count,
31306
- idx_t col_idx, data_ptr_t *key_locations, data_ptr_t *validitymask_locations,
31307
- idx_t offset) {
31305
+ void RowOperations::HeapScatterVData(UnifiedVectorFormat &vdata, PhysicalType type, const SelectionVector &sel,
31306
+ idx_t ser_count, idx_t col_idx, data_ptr_t *key_locations,
31307
+ data_ptr_t *validitymask_locations, idx_t offset) {
31308
31308
  switch (type) {
31309
31309
  case PhysicalType::BOOL:
31310
31310
  case PhysicalType::INT8:
@@ -31408,8 +31408,8 @@ idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const Select
31408
31408
  }
31409
31409
 
31410
31410
  template <class T, class OP, bool NO_MATCH_SEL>
31411
- static void TemplatedMatchType(VectorData &col, Vector &rows, SelectionVector &sel, idx_t &count, idx_t col_offset,
31412
- idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
31411
+ static void TemplatedMatchType(UnifiedVectorFormat &col, Vector &rows, SelectionVector &sel, idx_t &count,
31412
+ idx_t col_offset, idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
31413
31413
  // Precompute row_mask indexes
31414
31414
  idx_t entry_idx;
31415
31415
  idx_t idx_in_entry;
@@ -31490,8 +31490,9 @@ static void TemplatedMatchNested(Vector &col, Vector &rows, SelectionVector &sel
31490
31490
  }
31491
31491
 
31492
31492
  template <class OP, bool NO_MATCH_SEL>
31493
- static void TemplatedMatchOp(Vector &vec, VectorData &col, const RowLayout &layout, Vector &rows, SelectionVector &sel,
31494
- idx_t &count, idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
31493
+ static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const RowLayout &layout, Vector &rows,
31494
+ SelectionVector &sel, idx_t &count, idx_t col_no, SelectionVector *no_match,
31495
+ idx_t &no_match_count) {
31495
31496
  if (count == 0) {
31496
31497
  return;
31497
31498
  }
@@ -31561,7 +31562,7 @@ static void TemplatedMatchOp(Vector &vec, VectorData &col, const RowLayout &layo
31561
31562
  }
31562
31563
 
31563
31564
  template <bool NO_MATCH_SEL>
31564
- static void TemplatedMatch(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows,
31565
+ static void TemplatedMatch(DataChunk &columns, UnifiedVectorFormat col_data[], const RowLayout &layout, Vector &rows,
31565
31566
  const Predicates &predicates, SelectionVector &sel, idx_t &count, SelectionVector *no_match,
31566
31567
  idx_t &no_match_count) {
31567
31568
  for (idx_t col_no = 0; col_no < predicates.size(); ++col_no) {
@@ -31600,7 +31601,7 @@ static void TemplatedMatch(DataChunk &columns, VectorData col_data[], const RowL
31600
31601
  }
31601
31602
  }
31602
31603
 
31603
- idx_t RowOperations::Match(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows,
31604
+ idx_t RowOperations::Match(DataChunk &columns, UnifiedVectorFormat col_data[], const RowLayout &layout, Vector &rows,
31604
31605
  const Predicates &predicates, SelectionVector &sel, idx_t count, SelectionVector *no_match,
31605
31606
  idx_t &no_match_count) {
31606
31607
  if (no_match) {
@@ -31621,8 +31622,9 @@ idx_t RowOperations::Match(DataChunk &columns, VectorData col_data[], const RowL
31621
31622
  namespace duckdb {
31622
31623
 
31623
31624
  template <class T>
31624
- void TemplatedRadixScatter(VectorData &vdata, const SelectionVector &sel, idx_t add_count, data_ptr_t *key_locations,
31625
- const bool desc, const bool has_null, const bool nulls_first, const idx_t offset) {
31625
+ void TemplatedRadixScatter(UnifiedVectorFormat &vdata, const SelectionVector &sel, idx_t add_count,
31626
+ data_ptr_t *key_locations, const bool desc, const bool has_null, const bool nulls_first,
31627
+ const idx_t offset) {
31626
31628
  auto source = (T *)vdata.data;
31627
31629
  if (has_null) {
31628
31630
  auto &validity = vdata.validity;
@@ -31665,9 +31667,9 @@ void TemplatedRadixScatter(VectorData &vdata, const SelectionVector &sel, idx_t
31665
31667
  }
31666
31668
  }
31667
31669
 
31668
- void RadixScatterStringVector(VectorData &vdata, const SelectionVector &sel, idx_t add_count, data_ptr_t *key_locations,
31669
- const bool desc, const bool has_null, const bool nulls_first, const idx_t prefix_len,
31670
- idx_t offset) {
31670
+ void RadixScatterStringVector(UnifiedVectorFormat &vdata, const SelectionVector &sel, idx_t add_count,
31671
+ data_ptr_t *key_locations, const bool desc, const bool has_null, const bool nulls_first,
31672
+ const idx_t prefix_len, idx_t offset) {
31671
31673
  auto source = (string_t *)vdata.data;
31672
31674
  if (has_null) {
31673
31675
  auto &validity = vdata.validity;
@@ -31710,7 +31712,7 @@ void RadixScatterStringVector(VectorData &vdata, const SelectionVector &sel, idx
31710
31712
  }
31711
31713
  }
31712
31714
 
31713
- void RadixScatterListVector(Vector &v, VectorData &vdata, const SelectionVector &sel, idx_t add_count,
31715
+ void RadixScatterListVector(Vector &v, UnifiedVectorFormat &vdata, const SelectionVector &sel, idx_t add_count,
31714
31716
  data_ptr_t *key_locations, const bool desc, const bool has_null, const bool nulls_first,
31715
31717
  const idx_t prefix_len, const idx_t width, const idx_t offset) {
31716
31718
  auto list_data = ListVector::GetData(v);
@@ -31786,9 +31788,9 @@ void RadixScatterListVector(Vector &v, VectorData &vdata, const SelectionVector
31786
31788
  }
31787
31789
  }
31788
31790
 
31789
- void RadixScatterStructVector(Vector &v, VectorData &vdata, idx_t vcount, const SelectionVector &sel, idx_t add_count,
31790
- data_ptr_t *key_locations, const bool desc, const bool has_null, const bool nulls_first,
31791
- const idx_t prefix_len, idx_t width, const idx_t offset) {
31791
+ void RadixScatterStructVector(Vector &v, UnifiedVectorFormat &vdata, idx_t vcount, const SelectionVector &sel,
31792
+ idx_t add_count, data_ptr_t *key_locations, const bool desc, const bool has_null,
31793
+ const bool nulls_first, const idx_t prefix_len, idx_t width, const idx_t offset) {
31792
31794
  // serialize null values
31793
31795
  if (has_null) {
31794
31796
  auto &validity = vdata.validity;
@@ -31825,8 +31827,8 @@ void RadixScatterStructVector(Vector &v, VectorData &vdata, idx_t vcount, const
31825
31827
  void RowOperations::RadixScatter(Vector &v, idx_t vcount, const SelectionVector &sel, idx_t ser_count,
31826
31828
  data_ptr_t *key_locations, bool desc, bool has_null, bool nulls_first,
31827
31829
  idx_t prefix_len, idx_t width, idx_t offset) {
31828
- VectorData vdata;
31829
- v.Orrify(vcount, vdata);
31830
+ UnifiedVectorFormat vdata;
31831
+ v.ToUnifiedFormat(vcount, vdata);
31830
31832
  switch (v.GetType().InternalType()) {
31831
31833
  case PhysicalType::BOOL:
31832
31834
  case PhysicalType::INT8:
@@ -31902,7 +31904,7 @@ namespace duckdb {
31902
31904
  using ValidityBytes = RowLayout::ValidityBytes;
31903
31905
 
31904
31906
  template <class T>
31905
- static void TemplatedScatter(VectorData &col, Vector &rows, const SelectionVector &sel, const idx_t count,
31907
+ static void TemplatedScatter(UnifiedVectorFormat &col, Vector &rows, const SelectionVector &sel, const idx_t count,
31906
31908
  const idx_t col_offset, const idx_t col_no) {
31907
31909
  auto data = (T *)col.data;
31908
31910
  auto ptrs = FlatVector::GetData<data_ptr_t>(rows);
@@ -31932,7 +31934,7 @@ static void TemplatedScatter(VectorData &col, Vector &rows, const SelectionVecto
31932
31934
  }
31933
31935
  }
31934
31936
 
31935
- static void ComputeStringEntrySizes(const VectorData &col, idx_t entry_sizes[], const SelectionVector &sel,
31937
+ static void ComputeStringEntrySizes(const UnifiedVectorFormat &col, idx_t entry_sizes[], const SelectionVector &sel,
31936
31938
  const idx_t count, const idx_t offset = 0) {
31937
31939
  auto data = (const string_t *)col.data;
31938
31940
  for (idx_t i = 0; i < count; i++) {
@@ -31945,8 +31947,9 @@ static void ComputeStringEntrySizes(const VectorData &col, idx_t entry_sizes[],
31945
31947
  }
31946
31948
  }
31947
31949
 
31948
- static void ScatterStringVector(VectorData &col, Vector &rows, data_ptr_t str_locations[], const SelectionVector &sel,
31949
- const idx_t count, const idx_t col_offset, const idx_t col_no) {
31950
+ static void ScatterStringVector(UnifiedVectorFormat &col, Vector &rows, data_ptr_t str_locations[],
31951
+ const SelectionVector &sel, const idx_t count, const idx_t col_offset,
31952
+ const idx_t col_no) {
31950
31953
  auto string_data = (string_t *)col.data;
31951
31954
  auto ptrs = FlatVector::GetData<data_ptr_t>(rows);
31952
31955
 
@@ -31971,7 +31974,7 @@ static void ScatterStringVector(VectorData &col, Vector &rows, data_ptr_t str_lo
31971
31974
  }
31972
31975
  }
31973
31976
 
31974
- static void ScatterNestedVector(Vector &vec, VectorData &col, Vector &rows, data_ptr_t data_locations[],
31977
+ static void ScatterNestedVector(Vector &vec, UnifiedVectorFormat &col, Vector &rows, data_ptr_t data_locations[],
31975
31978
  const SelectionVector &sel, const idx_t count, const idx_t col_offset,
31976
31979
  const idx_t col_no, const idx_t vcount) {
31977
31980
  // Store pointers to the data in the row
@@ -31990,7 +31993,7 @@ static void ScatterNestedVector(Vector &vec, VectorData &col, Vector &rows, data
31990
31993
  RowOperations::HeapScatter(vec, vcount, sel, count, col_no, data_locations, validitymask_locations);
31991
31994
  }
31992
31995
 
31993
- void RowOperations::Scatter(DataChunk &columns, VectorData col_data[], const RowLayout &layout, Vector &rows,
31996
+ void RowOperations::Scatter(DataChunk &columns, UnifiedVectorFormat col_data[], const RowLayout &layout, Vector &rows,
31994
31997
  RowDataCollection &string_heap, const SelectionVector &sel, idx_t count) {
31995
31998
  if (count == 0) {
31996
31999
  return;
@@ -35027,14 +35030,14 @@ void LocalSortState::SinkChunk(DataChunk &sort, DataChunk &payload) {
35027
35030
  }
35028
35031
  }
35029
35032
  handles = blob_sorting_data->Build(blob_chunk.size(), data_pointers, nullptr);
35030
- auto blob_data = blob_chunk.Orrify();
35033
+ auto blob_data = blob_chunk.ToUnifiedFormat();
35031
35034
  RowOperations::Scatter(blob_chunk, blob_data.get(), sort_layout->blob_layout, addresses, *blob_sorting_heap,
35032
35035
  sel_ptr, blob_chunk.size());
35033
35036
  }
35034
35037
 
35035
35038
  // Finally, serialize payload data
35036
35039
  handles = payload_data->Build(payload.size(), data_pointers, nullptr);
35037
- auto input_data = payload.Orrify();
35040
+ auto input_data = payload.ToUnifiedFormat();
35038
35041
  RowOperations::Scatter(payload, input_data.get(), *payload_layout, addresses, *payload_heap, sel_ptr,
35039
35042
  payload.size());
35040
35043
  }
@@ -38109,7 +38112,7 @@ void ChunkCollection::Append(DataChunk &new_chunk) {
38109
38112
  idx_t added_data = MinValue<idx_t>(remaining_data, STANDARD_VECTOR_SIZE - last_chunk.size());
38110
38113
  if (added_data > 0) {
38111
38114
  // copy <added_data> elements to the last chunk
38112
- new_chunk.Normalify();
38115
+ new_chunk.Flatten();
38113
38116
  // have to be careful here: setting the cardinality without calling normalify can cause incorrect partial
38114
38117
  // decompression
38115
38118
  idx_t old_count = new_chunk.size();
@@ -38739,9 +38742,9 @@ void DataChunk::Append(const DataChunk &other, bool resize, SelectionVector *sel
38739
38742
  SetCardinality(new_size);
38740
38743
  }
38741
38744
 
38742
- void DataChunk::Normalify() {
38745
+ void DataChunk::Flatten() {
38743
38746
  for (idx_t i = 0; i < ColumnCount(); i++) {
38744
- data[i].Normalify(size());
38747
+ data[i].Flatten(size());
38745
38748
  }
38746
38749
  }
38747
38750
 
@@ -38815,10 +38818,10 @@ void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count_
38815
38818
  }
38816
38819
  }
38817
38820
 
38818
- unique_ptr<VectorData[]> DataChunk::Orrify() {
38819
- auto orrified_data = unique_ptr<VectorData[]>(new VectorData[ColumnCount()]);
38821
+ unique_ptr<UnifiedVectorFormat[]> DataChunk::ToUnifiedFormat() {
38822
+ auto orrified_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[ColumnCount()]);
38820
38823
  for (idx_t col_idx = 0; col_idx < ColumnCount(); col_idx++) {
38821
- data[col_idx].Orrify(size(), orrified_data[col_idx]);
38824
+ data[col_idx].ToUnifiedFormat(size(), orrified_data[col_idx]);
38822
38825
  }
38823
38826
  return orrified_data;
38824
38827
  }
@@ -38977,8 +38980,8 @@ void SetStructMap(DuckDBArrowArrayChildHolder &child_holder, const LogicalType &
38977
38980
  for (idx_t child_idx = 0; child_idx < child_holder.children.size(); child_idx++) {
38978
38981
  auto &list_vector_child = ListVector::GetEntry(*children[child_idx]);
38979
38982
  if (child_idx == 0) {
38980
- VectorData list_data;
38981
- children[child_idx]->Orrify(size, list_data);
38983
+ UnifiedVectorFormat list_data;
38984
+ children[child_idx]->ToUnifiedFormat(size, list_data);
38982
38985
  auto list_child_validity = FlatVector::Validity(list_vector_child);
38983
38986
  if (!list_child_validity.AllValid()) {
38984
38987
  //! Get the offsets to check from the selection vector
@@ -39259,7 +39262,7 @@ void SetArrowChild(DuckDBArrowArrayChildHolder &child_holder, const LogicalType
39259
39262
  }
39260
39263
 
39261
39264
  void DataChunk::ToArrowArray(ArrowArray *out_array) {
39262
- Normalify();
39265
+ Flatten();
39263
39266
  D_ASSERT(out_array);
39264
39267
 
39265
39268
  // Allocate as unique_ptr first to cleanup properly on error
@@ -40847,10 +40850,10 @@ uint64_t MurmurHash64A(const void *key, int len, unsigned int seed);
40847
40850
 
40848
40851
  namespace duckdb {
40849
40852
 
40850
- void AddToLogsInternal(VectorData &vdata, idx_t count, uint64_t indices[], uint8_t counts[], void ***logs[],
40853
+ void AddToLogsInternal(UnifiedVectorFormat &vdata, idx_t count, uint64_t indices[], uint8_t counts[], void ***logs[],
40851
40854
  const SelectionVector *log_sel);
40852
40855
 
40853
- void AddToSingleLogInternal(VectorData &vdata, idx_t count, uint64_t indices[], uint8_t counts[], void *log);
40856
+ void AddToSingleLogInternal(UnifiedVectorFormat &vdata, idx_t count, uint64_t indices[], uint8_t counts[], void *log);
40854
40857
 
40855
40858
  } // namespace duckdb
40856
40859
 
@@ -40895,13 +40898,13 @@ public:
40895
40898
  public:
40896
40899
  //! Compute HLL hashes over vdata, and store them in 'hashes'
40897
40900
  //! Then, compute register indices and prefix lengths, and also store them in 'hashes' as a pair of uint32_t
40898
- static void ProcessEntries(VectorData &vdata, const LogicalType &type, uint64_t hashes[], uint8_t counts[],
40901
+ static void ProcessEntries(UnifiedVectorFormat &vdata, const LogicalType &type, uint64_t hashes[], uint8_t counts[],
40899
40902
  idx_t count);
40900
40903
  //! Add the indices and counts to the logs
40901
- static void AddToLogs(VectorData &vdata, idx_t count, uint64_t indices[], uint8_t counts[], HyperLogLog **logs[],
40902
- const SelectionVector *log_sel);
40904
+ static void AddToLogs(UnifiedVectorFormat &vdata, idx_t count, uint64_t indices[], uint8_t counts[],
40905
+ HyperLogLog **logs[], const SelectionVector *log_sel);
40903
40906
  //! Add the indices and counts to THIS log
40904
- void AddToLog(VectorData &vdata, idx_t count, uint64_t indices[], uint8_t counts[]);
40907
+ void AddToLog(UnifiedVectorFormat &vdata, idx_t count, uint64_t indices[], uint8_t counts[]);
40905
40908
 
40906
40909
  private:
40907
40910
  explicit HyperLogLog(void *hll);
@@ -41105,7 +41108,7 @@ inline uint64_t TemplatedHash(const string_t &elem) {
41105
41108
  }
41106
41109
 
41107
41110
  template <class T>
41108
- void TemplatedComputeHashes(VectorData &vdata, const idx_t &count, uint64_t hashes[]) {
41111
+ void TemplatedComputeHashes(UnifiedVectorFormat &vdata, const idx_t &count, uint64_t hashes[]) {
41109
41112
  T *data = (T *)vdata.data;
41110
41113
  for (idx_t i = 0; i < count; i++) {
41111
41114
  auto idx = vdata.sel->get_index(i);
@@ -41117,7 +41120,7 @@ void TemplatedComputeHashes(VectorData &vdata, const idx_t &count, uint64_t hash
41117
41120
  }
41118
41121
  }
41119
41122
 
41120
- static void ComputeHashes(VectorData &vdata, const LogicalType &type, uint64_t hashes[], idx_t count) {
41123
+ static void ComputeHashes(UnifiedVectorFormat &vdata, const LogicalType &type, uint64_t hashes[], idx_t count) {
41121
41124
  switch (type.InternalType()) {
41122
41125
  case PhysicalType::BOOL:
41123
41126
  case PhysicalType::INT8:
@@ -41164,20 +41167,20 @@ static inline void ComputeIndexAndCount(uint64_t &hash, uint8_t &prefix) {
41164
41167
  hash = index;
41165
41168
  }
41166
41169
 
41167
- void HyperLogLog::ProcessEntries(VectorData &vdata, const LogicalType &type, uint64_t hashes[], uint8_t counts[],
41168
- idx_t count) {
41170
+ void HyperLogLog::ProcessEntries(UnifiedVectorFormat &vdata, const LogicalType &type, uint64_t hashes[],
41171
+ uint8_t counts[], idx_t count) {
41169
41172
  ComputeHashes(vdata, type, hashes, count);
41170
41173
  for (idx_t i = 0; i < count; i++) {
41171
41174
  ComputeIndexAndCount(hashes[i], counts[i]);
41172
41175
  }
41173
41176
  }
41174
41177
 
41175
- void HyperLogLog::AddToLogs(VectorData &vdata, idx_t count, uint64_t indices[], uint8_t counts[], HyperLogLog **logs[],
41176
- const SelectionVector *log_sel) {
41178
+ void HyperLogLog::AddToLogs(UnifiedVectorFormat &vdata, idx_t count, uint64_t indices[], uint8_t counts[],
41179
+ HyperLogLog **logs[], const SelectionVector *log_sel) {
41177
41180
  AddToLogsInternal(vdata, count, indices, counts, (void ****)logs, log_sel);
41178
41181
  }
41179
41182
 
41180
- void HyperLogLog::AddToLog(VectorData &vdata, idx_t count, uint64_t indices[], uint8_t counts[]) {
41183
+ void HyperLogLog::AddToLog(UnifiedVectorFormat &vdata, idx_t count, uint64_t indices[], uint8_t counts[]) {
41181
41184
  lock_guard<mutex> guard(lock);
41182
41185
  AddToSingleLogInternal(vdata, count, indices, counts, hll);
41183
41186
  }
@@ -46024,7 +46027,7 @@ static void TemplatedFlattenConstantVector(data_ptr_t data, data_ptr_t old_data,
46024
46027
  }
46025
46028
  }
46026
46029
 
46027
- void Vector::Normalify(idx_t count) {
46030
+ void Vector::Flatten(idx_t count) {
46028
46031
  switch (GetVectorType()) {
46029
46032
  case VectorType::FLAT_VECTOR:
46030
46033
  // already a flat vector
@@ -46109,13 +46112,13 @@ void Vector::Normalify(idx_t count) {
46109
46112
  for (auto &child : child_entries) {
46110
46113
  D_ASSERT(child->GetVectorType() == VectorType::CONSTANT_VECTOR);
46111
46114
  auto vector = make_unique<Vector>(*child);
46112
- vector->Normalify(count);
46115
+ vector->Flatten(count);
46113
46116
  new_children.push_back(move(vector));
46114
46117
  }
46115
46118
  auxiliary = move(normalified_buffer);
46116
46119
  } break;
46117
46120
  default:
46118
- throw InternalException("Unimplemented type for VectorOperations::Normalify");
46121
+ throw InternalException("Unimplemented type for VectorOperations::Flatten");
46119
46122
  }
46120
46123
  break;
46121
46124
  }
@@ -46133,7 +46136,7 @@ void Vector::Normalify(idx_t count) {
46133
46136
  }
46134
46137
  }
46135
46138
 
46136
- void Vector::Normalify(const SelectionVector &sel, idx_t count) {
46139
+ void Vector::Flatten(const SelectionVector &sel, idx_t count) {
46137
46140
  switch (GetVectorType()) {
46138
46141
  case VectorType::FLAT_VECTOR:
46139
46142
  // already a flat vector
@@ -46152,7 +46155,7 @@ void Vector::Normalify(const SelectionVector &sel, idx_t count) {
46152
46155
  }
46153
46156
  }
46154
46157
 
46155
- void Vector::Orrify(idx_t count, VectorData &data) {
46158
+ void Vector::ToUnifiedFormat(idx_t count, UnifiedVectorFormat &data) {
46156
46159
  switch (GetVectorType()) {
46157
46160
  case VectorType::DICTIONARY_VECTOR: {
46158
46161
  auto &sel = DictionaryVector::SelVector(*this);
@@ -46164,7 +46167,7 @@ void Vector::Orrify(idx_t count, VectorData &data) {
46164
46167
  } else {
46165
46168
  // dictionary with non-flat child: create a new reference to the child and normalify it
46166
46169
  Vector child_vector(child);
46167
- child_vector.Normalify(sel, count);
46170
+ child_vector.Flatten(sel, count);
46168
46171
  auto new_aux = make_buffer<VectorChildBuffer>(move(child_vector));
46169
46172
 
46170
46173
  data.sel = &sel;
@@ -46180,7 +46183,7 @@ void Vector::Orrify(idx_t count, VectorData &data) {
46180
46183
  data.validity = ConstantVector::Validity(*this);
46181
46184
  break;
46182
46185
  default:
46183
- Normalify(count);
46186
+ Flatten(count);
46184
46187
  data.sel = FlatVector::IncrementalSelectionVector();
46185
46188
  data.data = FlatVector::GetData(*this);
46186
46189
  data.validity = FlatVector::Validity(*this);
@@ -46201,8 +46204,8 @@ void Vector::Sequence(int64_t start, int64_t increment) {
46201
46204
  void Vector::Serialize(idx_t count, Serializer &serializer) {
46202
46205
  auto &type = GetType();
46203
46206
 
46204
- VectorData vdata;
46205
- Orrify(count, vdata);
46207
+ UnifiedVectorFormat vdata;
46208
+ ToUnifiedFormat(count, vdata);
46206
46209
 
46207
46210
  const auto write_validity = (count > 0) && !vdata.validity.AllValid();
46208
46211
  serializer.Write<bool>(write_validity);
@@ -46232,7 +46235,7 @@ void Vector::Serialize(idx_t count, Serializer &serializer) {
46232
46235
  break;
46233
46236
  }
46234
46237
  case PhysicalType::STRUCT: {
46235
- Normalify(count);
46238
+ Flatten(count);
46236
46239
  auto &entries = StructVector::GetEntries(*this);
46237
46240
  for (auto &entry : entries) {
46238
46241
  entry->Serialize(count, serializer);
@@ -46576,8 +46579,8 @@ void ConstantVector::Reference(Vector &vector, Vector &source, idx_t position, i
46576
46579
  switch (source_type.InternalType()) {
46577
46580
  case PhysicalType::LIST: {
46578
46581
  // retrieve the list entry from the source vector
46579
- VectorData vdata;
46580
- source.Orrify(count, vdata);
46582
+ UnifiedVectorFormat vdata;
46583
+ source.ToUnifiedFormat(count, vdata);
46581
46584
 
46582
46585
  auto list_index = vdata.sel->get_index(position);
46583
46586
  if (!vdata.validity.RowIsValid(list_index)) {
@@ -46604,8 +46607,8 @@ void ConstantVector::Reference(Vector &vector, Vector &source, idx_t position, i
46604
46607
  break;
46605
46608
  }
46606
46609
  case PhysicalType::STRUCT: {
46607
- VectorData vdata;
46608
- source.Orrify(count, vdata);
46610
+ UnifiedVectorFormat vdata;
46611
+ source.ToUnifiedFormat(count, vdata);
46609
46612
 
46610
46613
  auto struct_index = vdata.sel->get_index(position);
46611
46614
  if (!vdata.validity.RowIsValid(struct_index)) {
@@ -46772,8 +46775,8 @@ void ListVector::Reserve(Vector &vector, idx_t required_capacity) {
46772
46775
  template <class T>
46773
46776
  void TemplatedSearchInMap(Vector &list, T key, vector<idx_t> &offsets, bool is_key_null, idx_t offset, idx_t length) {
46774
46777
  auto &list_vector = ListVector::GetEntry(list);
46775
- VectorData vector_data;
46776
- list_vector.Orrify(ListVector::GetListSize(list), vector_data);
46778
+ UnifiedVectorFormat vector_data;
46779
+ list_vector.ToUnifiedFormat(ListVector::GetListSize(list), vector_data);
46777
46780
  auto data = (T *)vector_data.data;
46778
46781
  auto validity_mask = vector_data.validity;
46779
46782
 
@@ -46804,8 +46807,8 @@ void TemplatedSearchInMap(Vector &list, const Value &key, vector<idx_t> &offsets
46804
46807
  void SearchStringInMap(Vector &list, const string &key, vector<idx_t> &offsets, bool is_key_null, idx_t offset,
46805
46808
  idx_t length) {
46806
46809
  auto &list_vector = ListVector::GetEntry(list);
46807
- VectorData vector_data;
46808
- list_vector.Orrify(ListVector::GetListSize(list), vector_data);
46810
+ UnifiedVectorFormat vector_data;
46811
+ list_vector.ToUnifiedFormat(ListVector::GetListSize(list), vector_data);
46809
46812
  auto data = (string_t *)vector_data.data;
46810
46813
  auto validity_mask = vector_data.validity;
46811
46814
  if (is_key_null) {
@@ -48917,9 +48920,9 @@ static void TemplatedBooleanNullmask(Vector &left, Vector &right, Vector &result
48917
48920
  ConstantVector::SetNull(result, is_null);
48918
48921
  } else {
48919
48922
  // perform generic loop
48920
- VectorData ldata, rdata;
48921
- left.Orrify(count, ldata);
48922
- right.Orrify(count, rdata);
48923
+ UnifiedVectorFormat ldata, rdata;
48924
+ left.ToUnifiedFormat(count, ldata);
48925
+ right.ToUnifiedFormat(count, rdata);
48923
48926
 
48924
48927
  result.SetVectorType(VectorType::FLAT_VECTOR);
48925
48928
  auto left_data = (uint8_t *)ldata.data; // we use uint8 to avoid load of gunk bools
@@ -49219,7 +49222,8 @@ inline idx_t ComparisonSelector::Select<duckdb::LessThanEquals>(Vector &left, Ve
49219
49222
  return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel);
49220
49223
  }
49221
49224
 
49222
- static void ComparesNotNull(VectorData &ldata, VectorData &rdata, ValidityMask &vresult, idx_t count) {
49225
+ static void ComparesNotNull(UnifiedVectorFormat &ldata, UnifiedVectorFormat &rdata, ValidityMask &vresult,
49226
+ idx_t count) {
49223
49227
  for (idx_t i = 0; i < count; ++i) {
49224
49228
  auto lidx = ldata.sel->get_index(i);
49225
49229
  auto ridx = rdata.sel->get_index(i);
@@ -49255,9 +49259,9 @@ static void NestedComparisonExecutor(Vector &left, Vector &right, Vector &result
49255
49259
  auto result_data = FlatVector::GetData<bool>(result);
49256
49260
  auto &result_validity = FlatVector::Validity(result);
49257
49261
 
49258
- VectorData leftv, rightv;
49259
- left.Orrify(count, leftv);
49260
- right.Orrify(count, rightv);
49262
+ UnifiedVectorFormat leftv, rightv;
49263
+ left.ToUnifiedFormat(count, leftv);
49264
+ right.ToUnifiedFormat(count, rightv);
49261
49265
  if (!leftv.validity.AllValid() || !rightv.validity.AllValid()) {
49262
49266
  ComparesNotNull(leftv, rightv, result_validity, count);
49263
49267
  }
@@ -49511,10 +49515,10 @@ static void DistinctExecuteGeneric(Vector &left, Vector &right, Vector &result,
49511
49515
  if (left.GetVectorType() == VectorType::CONSTANT_VECTOR && right.GetVectorType() == VectorType::CONSTANT_VECTOR) {
49512
49516
  DistinctExecuteConstant<LEFT_TYPE, RIGHT_TYPE, RESULT_TYPE, OP>(left, right, result);
49513
49517
  } else {
49514
- VectorData ldata, rdata;
49518
+ UnifiedVectorFormat ldata, rdata;
49515
49519
 
49516
- left.Orrify(count, ldata);
49517
- right.Orrify(count, rdata);
49520
+ left.ToUnifiedFormat(count, ldata);
49521
+ right.ToUnifiedFormat(count, rdata);
49518
49522
 
49519
49523
  result.SetVectorType(VectorType::FLAT_VECTOR);
49520
49524
  auto result_data = FlatVector::GetData<RESULT_TYPE>(result);
@@ -49610,10 +49614,10 @@ DistinctSelectGenericLoopSwitch(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restr
49610
49614
  template <class LEFT_TYPE, class RIGHT_TYPE, class OP>
49611
49615
  static idx_t DistinctSelectGeneric(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
49612
49616
  SelectionVector *true_sel, SelectionVector *false_sel) {
49613
- VectorData ldata, rdata;
49617
+ UnifiedVectorFormat ldata, rdata;
49614
49618
 
49615
- left.Orrify(count, ldata);
49616
- right.Orrify(count, rdata);
49619
+ left.ToUnifiedFormat(count, ldata);
49620
+ right.ToUnifiedFormat(count, rdata);
49617
49621
 
49618
49622
  return DistinctSelectGenericLoopSwitch<LEFT_TYPE, RIGHT_TYPE, OP>((LEFT_TYPE *)ldata.data, (RIGHT_TYPE *)rdata.data,
49619
49623
  ldata.sel, rdata.sel, sel, count, ldata.validity,
@@ -49749,9 +49753,9 @@ template <class OP>
49749
49753
  static idx_t DistinctSelectNotNull(Vector &left, Vector &right, const idx_t count, idx_t &true_count,
49750
49754
  const SelectionVector &sel, SelectionVector &maybe_vec, OptionalSelection &true_opt,
49751
49755
  OptionalSelection &false_opt) {
49752
- VectorData lvdata, rvdata;
49753
- left.Orrify(count, lvdata);
49754
- right.Orrify(count, rvdata);
49756
+ UnifiedVectorFormat lvdata, rvdata;
49757
+ left.ToUnifiedFormat(count, lvdata);
49758
+ right.ToUnifiedFormat(count, rvdata);
49755
49759
 
49756
49760
  auto &lmask = lvdata.validity;
49757
49761
  auto &rmask = rvdata.validity;
@@ -49959,11 +49963,11 @@ static idx_t DistinctSelectStruct(Vector &left, Vector &right, idx_t count, cons
49959
49963
  for (idx_t col_no = 0; col_no < lchildren.size(); ++col_no) {
49960
49964
  // Slice the children to maintain density
49961
49965
  Vector lchild(*lchildren[col_no]);
49962
- lchild.Normalify(vcount);
49966
+ lchild.Flatten(vcount);
49963
49967
  lchild.Slice(slice_sel, count);
49964
49968
 
49965
49969
  Vector rchild(*rchildren[col_no]);
49966
- rchild.Normalify(vcount);
49970
+ rchild.Flatten(vcount);
49967
49971
  rchild.Slice(slice_sel, count);
49968
49972
 
49969
49973
  // Find everything that definitely matches
@@ -50011,7 +50015,7 @@ static idx_t DistinctSelectStruct(Vector &left, Vector &right, idx_t count, cons
50011
50015
  return match_count;
50012
50016
  }
50013
50017
 
50014
- static void PositionListCursor(SelectionVector &cursor, VectorData &vdata, const idx_t pos,
50018
+ static void PositionListCursor(SelectionVector &cursor, UnifiedVectorFormat &vdata, const idx_t pos,
50015
50019
  const SelectionVector &slice_sel, const idx_t count) {
50016
50020
  const auto data = (const list_entry_t *)vdata.data;
50017
50021
  for (idx_t i = 0; i < count; ++i) {
@@ -50034,8 +50038,8 @@ static idx_t DistinctSelectList(Vector &left, Vector &right, idx_t count, const
50034
50038
  SelectionVector lcursor(count);
50035
50039
  SelectionVector rcursor(count);
50036
50040
 
50037
- ListVector::GetEntry(left).Normalify(count);
50038
- ListVector::GetEntry(right).Normalify(count);
50041
+ ListVector::GetEntry(left).Flatten(count);
50042
+ ListVector::GetEntry(right).Flatten(count);
50039
50043
  Vector lchild(ListVector::GetEntry(left), lcursor, count);
50040
50044
  Vector rchild(ListVector::GetEntry(right), rcursor, count);
50041
50045
 
@@ -50054,12 +50058,12 @@ static idx_t DistinctSelectList(Vector &left, Vector &right, idx_t count, const
50054
50058
  // }
50055
50059
 
50056
50060
  // Get pointers to the list entries
50057
- VectorData lvdata;
50058
- left.Orrify(count, lvdata);
50061
+ UnifiedVectorFormat lvdata;
50062
+ left.ToUnifiedFormat(count, lvdata);
50059
50063
  const auto ldata = (const list_entry_t *)lvdata.data;
50060
50064
 
50061
- VectorData rvdata;
50062
- right.Orrify(count, rvdata);
50065
+ UnifiedVectorFormat rvdata;
50066
+ right.ToUnifiedFormat(count, rvdata);
50063
50067
  const auto rdata = (const list_entry_t *)rvdata.data;
50064
50068
 
50065
50069
  // In order to reuse the comparators, we have to track what passed and failed internally.
@@ -50415,8 +50419,8 @@ void IsNullLoop(Vector &input, Vector &result, idx_t count) {
50415
50419
  auto result_data = ConstantVector::GetData<bool>(result);
50416
50420
  *result_data = INVERSE ? !ConstantVector::IsNull(input) : ConstantVector::IsNull(input);
50417
50421
  } else {
50418
- VectorData data;
50419
- input.Orrify(count, data);
50422
+ UnifiedVectorFormat data;
50423
+ input.ToUnifiedFormat(count, data);
50420
50424
 
50421
50425
  result.SetVectorType(VectorType::FLAT_VECTOR);
50422
50426
  auto result_data = FlatVector::GetData<bool>(result);
@@ -50442,8 +50446,8 @@ bool VectorOperations::HasNotNull(Vector &input, idx_t count) {
50442
50446
  if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) {
50443
50447
  return !ConstantVector::IsNull(input);
50444
50448
  } else {
50445
- VectorData data;
50446
- input.Orrify(count, data);
50449
+ UnifiedVectorFormat data;
50450
+ input.ToUnifiedFormat(count, data);
50447
50451
 
50448
50452
  if (data.validity.AllValid()) {
50449
50453
  return true;
@@ -50465,8 +50469,8 @@ bool VectorOperations::HasNull(Vector &input, idx_t count) {
50465
50469
  if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) {
50466
50470
  return ConstantVector::IsNull(input);
50467
50471
  } else {
50468
- VectorData data;
50469
- input.Orrify(count, data);
50472
+ UnifiedVectorFormat data;
50473
+ input.ToUnifiedFormat(count, data);
50470
50474
 
50471
50475
  if (data.validity.AllValid()) {
50472
50476
  return false;
@@ -50484,8 +50488,8 @@ bool VectorOperations::HasNull(Vector &input, idx_t count) {
50484
50488
  idx_t VectorOperations::CountNotNull(Vector &input, const idx_t count) {
50485
50489
  idx_t valid = 0;
50486
50490
 
50487
- VectorData vdata;
50488
- input.Orrify(count, vdata);
50491
+ UnifiedVectorFormat vdata;
50492
+ input.ToUnifiedFormat(count, vdata);
50489
50493
  if (vdata.validity.AllValid()) {
50490
50494
  return count;
50491
50495
  }
@@ -51155,8 +51159,8 @@ bool TransformEnum(Vector &source, Vector &result, idx_t count, string *error_me
51155
51159
  error_message, nullptr);
51156
51160
  }
51157
51161
  default: {
51158
- VectorData vdata;
51159
- source.Orrify(count, vdata);
51162
+ UnifiedVectorFormat vdata;
51163
+ source.ToUnifiedFormat(count, vdata);
51160
51164
 
51161
51165
  result.SetVectorType(VectorType::FLAT_VECTOR);
51162
51166
 
@@ -51526,7 +51530,7 @@ static bool ListCastSwitch(Vector &source, Vector &result, idx_t count, string *
51526
51530
  auto tdata = ConstantVector::GetData<list_entry_t>(result);
51527
51531
  *tdata = *ldata;
51528
51532
  } else {
51529
- source.Normalify(count);
51533
+ source.Flatten(count);
51530
51534
  result.SetVectorType(VectorType::FLAT_VECTOR);
51531
51535
  FlatVector::SetValidity(result, FlatVector::Validity(source));
51532
51536
 
@@ -51562,8 +51566,8 @@ bool FillEnum(Vector &source, Vector &result, idx_t count, string *error_message
51562
51566
 
51563
51567
  auto res_enum_type = result.GetType();
51564
51568
 
51565
- VectorData vdata;
51566
- source.Orrify(count, vdata);
51569
+ UnifiedVectorFormat vdata;
51570
+ source.ToUnifiedFormat(count, vdata);
51567
51571
 
51568
51572
  auto source_data = (SRC_TYPE *)vdata.data;
51569
51573
  auto source_sel = vdata.sel;
@@ -51720,7 +51724,7 @@ static bool StructCastSwitch(Vector &source, Vector &result, idx_t count, string
51720
51724
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
51721
51725
  ConstantVector::SetNull(result, ConstantVector::IsNull(source));
51722
51726
  } else {
51723
- source.Normalify(count);
51727
+ source.Flatten(count);
51724
51728
  FlatVector::Validity(result) = FlatVector::Validity(source);
51725
51729
  }
51726
51730
  return true;
@@ -52129,8 +52133,8 @@ static inline void TemplatedLoopHash(Vector &input, Vector &result, const Select
52129
52133
  } else {
52130
52134
  result.SetVectorType(VectorType::FLAT_VECTOR);
52131
52135
 
52132
- VectorData idata;
52133
- input.Orrify(count, idata);
52136
+ UnifiedVectorFormat idata;
52137
+ input.ToUnifiedFormat(count, idata);
52134
52138
 
52135
52139
  TightLoopHash<HAS_RSEL, T>((T *)idata.data, FlatVector::GetData<hash_t>(result), rsel, count, idata.sel,
52136
52140
  idata.validity);
@@ -52168,8 +52172,8 @@ template <bool HAS_RSEL, bool FIRST_HASH>
52168
52172
  static inline void ListLoopHash(Vector &input, Vector &hashes, const SelectionVector *rsel, idx_t count) {
52169
52173
  auto hdata = FlatVector::GetData<hash_t>(hashes);
52170
52174
 
52171
- VectorData idata;
52172
- input.Orrify(count, idata);
52175
+ UnifiedVectorFormat idata;
52176
+ input.ToUnifiedFormat(count, idata);
52173
52177
  const auto ldata = (const list_entry_t *)idata.data;
52174
52178
 
52175
52179
  // Hash the children into a temporary
@@ -52364,8 +52368,8 @@ void TemplatedLoopCombineHash(Vector &input, Vector &hashes, const SelectionVect
52364
52368
  auto other_hash = HashOp::Operation(*ldata, ConstantVector::IsNull(input));
52365
52369
  *hash_data = CombineHashScalar(*hash_data, other_hash);
52366
52370
  } else {
52367
- VectorData idata;
52368
- input.Orrify(count, idata);
52371
+ UnifiedVectorFormat idata;
52372
+ input.ToUnifiedFormat(count, idata);
52369
52373
  if (hashes.GetVectorType() == VectorType::CONSTANT_VECTOR) {
52370
52374
  // mix constant with non-constant, first get the constant value
52371
52375
  auto constant_hash = *ConstantVector::GetData<hash_t>(hashes);
@@ -52454,7 +52458,7 @@ void VectorOperations::CombineHash(Vector &hashes, Vector &input, const Selectio
52454
52458
  namespace duckdb {
52455
52459
 
52456
52460
  template <class T>
52457
- static void CopyToStorageLoop(VectorData &vdata, idx_t count, data_ptr_t target) {
52461
+ static void CopyToStorageLoop(UnifiedVectorFormat &vdata, idx_t count, data_ptr_t target) {
52458
52462
  auto ldata = (T *)vdata.data;
52459
52463
  auto result_data = (T *)target;
52460
52464
  for (idx_t i = 0; i < count; i++) {
@@ -52471,8 +52475,8 @@ void VectorOperations::WriteToStorage(Vector &source, idx_t count, data_ptr_t ta
52471
52475
  if (count == 0) {
52472
52476
  return;
52473
52477
  }
52474
- VectorData vdata;
52475
- source.Orrify(count, vdata);
52478
+ UnifiedVectorFormat vdata;
52479
+ source.ToUnifiedFormat(count, vdata);
52476
52480
 
52477
52481
  switch (source.GetType().InternalType()) {
52478
52482
  case PhysicalType::BOOL:
@@ -53200,7 +53204,7 @@ idx_t GroupedAggregateHashTable::AddChunk(DataChunk &groups, Vector &group_hashe
53200
53204
  distinct_addresses.Verify(new_group_count);
53201
53205
 
53202
53206
  if (aggr.filter) {
53203
- distinct_addresses.Normalify(new_group_count);
53207
+ distinct_addresses.Flatten(new_group_count);
53204
53208
  RowOperations::UpdateFilteredStates(filter_set.GetFilterData(aggr_idx), aggr, distinct_addresses,
53205
53209
  distinct_payload, payload_idx);
53206
53210
  } else {
@@ -53262,14 +53266,14 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
53262
53266
  D_ASSERT(capacity - entries >= groups.size());
53263
53267
  D_ASSERT(group_hashes.GetType() == LogicalType::HASH);
53264
53268
 
53265
- group_hashes.Normalify(groups.size());
53269
+ group_hashes.Flatten(groups.size());
53266
53270
  auto group_hashes_ptr = FlatVector::GetData<hash_t>(group_hashes);
53267
53271
 
53268
53272
  D_ASSERT(ht_offsets.GetVectorType() == VectorType::FLAT_VECTOR);
53269
53273
  D_ASSERT(ht_offsets.GetType() == LogicalType::BIGINT);
53270
53274
 
53271
53275
  D_ASSERT(addresses.GetType() == LogicalType::POINTER);
53272
- addresses.Normalify(groups.size());
53276
+ addresses.Flatten(groups.size());
53273
53277
  auto addresses_ptr = FlatVector::GetData<data_ptr_t>(addresses);
53274
53278
 
53275
53279
  // now compute the entry in the table based on the hash using a modulo
@@ -53299,8 +53303,8 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(DataChunk &groups, V
53299
53303
  group_chunk.data[groups.ColumnCount()].Reference(group_hashes);
53300
53304
  group_chunk.SetCardinality(groups);
53301
53305
 
53302
- // orrify all the groups
53303
- auto group_data = group_chunk.Orrify();
53306
+ // convert all vectors to unified format
53307
+ auto group_data = group_chunk.ToUnifiedFormat();
53304
53308
 
53305
53309
  idx_t new_group_count = 0;
53306
53310
  while (remaining_entries > 0) {
@@ -54220,8 +54224,8 @@ void TemplatedFillLoop(Vector &vector, Vector &result, const SelectionVector &se
54220
54224
  }
54221
54225
  }
54222
54226
  } else {
54223
- VectorData vdata;
54224
- vector.Orrify(count, vdata);
54227
+ UnifiedVectorFormat vdata;
54228
+ vector.ToUnifiedFormat(count, vdata);
54225
54229
  auto data = (T *)vdata.data;
54226
54230
  for (idx_t i = 0; i < count; i++) {
54227
54231
  auto source_idx = vdata.sel->get_index(i);
@@ -54243,8 +54247,8 @@ void ValidityFillLoop(Vector &vector, Vector &result, const SelectionVector &sel
54243
54247
  }
54244
54248
  }
54245
54249
  } else {
54246
- VectorData vdata;
54247
- vector.Orrify(count, vdata);
54250
+ UnifiedVectorFormat vdata;
54251
+ vector.ToUnifiedFormat(count, vdata);
54248
54252
  if (vdata.validity.AllValid()) {
54249
54253
  return;
54250
54254
  }
@@ -54525,9 +54529,9 @@ idx_t NestedSelector::Select<duckdb::GreaterThanEquals>(Vector &left, Vector &ri
54525
54529
  static inline idx_t SelectNotNull(Vector &left, Vector &right, const idx_t count, const SelectionVector &sel,
54526
54530
  SelectionVector &maybe_vec, OptionalSelection &false_opt) {
54527
54531
 
54528
- VectorData lvdata, rvdata;
54529
- left.Orrify(count, lvdata);
54530
- right.Orrify(count, rvdata);
54532
+ UnifiedVectorFormat lvdata, rvdata;
54533
+ left.ToUnifiedFormat(count, lvdata);
54534
+ right.ToUnifiedFormat(count, rvdata);
54531
54535
 
54532
54536
  auto &lmask = lvdata.validity;
54533
54537
  auto &rmask = rvdata.validity;
@@ -54880,8 +54884,8 @@ static void VerifyNullHandling(const BoundFunctionExpression &expr, DataChunk &a
54880
54884
  idx_t count = args.size();
54881
54885
  ValidityMask combined_mask(count);
54882
54886
  for (auto &arg : args.data) {
54883
- VectorData arg_data;
54884
- arg.Orrify(count, arg_data);
54887
+ UnifiedVectorFormat arg_data;
54888
+ arg.ToUnifiedFormat(count, arg_data);
54885
54889
 
54886
54890
  for (idx_t i = 0; i < count; i++) {
54887
54891
  auto idx = arg_data.sel->get_index(i);
@@ -54892,8 +54896,8 @@ static void VerifyNullHandling(const BoundFunctionExpression &expr, DataChunk &a
54892
54896
  }
54893
54897
 
54894
54898
  // Default is that if any of the arguments are NULL, the result is also NULL
54895
- VectorData result_data;
54896
- result.Orrify(count, result_data);
54899
+ UnifiedVectorFormat result_data;
54900
+ result.ToUnifiedFormat(count, result_data);
54897
54901
  for (idx_t i = 0; i < count; i++) {
54898
54902
  if (!combined_mask.RowIsValid(i)) {
54899
54903
  auto idx = result_data.sel->get_index(i);
@@ -55005,8 +55009,8 @@ void ExpressionExecutor::Execute(const BoundOperatorExpression &expr, Expression
55005
55009
  Execute(*expr.children[child], state->child_states[child].get(), current_sel, remaining_count,
55006
55010
  vector_to_check);
55007
55011
 
55008
- VectorData vdata;
55009
- vector_to_check.Orrify(remaining_count, vdata);
55012
+ UnifiedVectorFormat vdata;
55013
+ vector_to_check.ToUnifiedFormat(remaining_count, vdata);
55010
55014
 
55011
55015
  idx_t result_count = 0;
55012
55016
  next_count = 0;
@@ -55348,7 +55352,7 @@ static inline idx_t DefaultSelectLoop(const SelectionVector *bsel, uint8_t *__re
55348
55352
  }
55349
55353
 
55350
55354
  template <bool NO_NULL>
55351
- static inline idx_t DefaultSelectSwitch(VectorData &idata, const SelectionVector *sel, idx_t count,
55355
+ static inline idx_t DefaultSelectSwitch(UnifiedVectorFormat &idata, const SelectionVector *sel, idx_t count,
55352
55356
  SelectionVector *true_sel, SelectionVector *false_sel) {
55353
55357
  if (true_sel && false_sel) {
55354
55358
  return DefaultSelectLoop<NO_NULL, true, true>(idata.sel, (uint8_t *)idata.data, idata.validity, sel, count,
@@ -55372,8 +55376,8 @@ idx_t ExpressionExecutor::DefaultSelect(const Expression &expr, ExpressionState
55372
55376
  Vector intermediate(LogicalType::BOOLEAN, (data_ptr_t)intermediate_bools);
55373
55377
  Execute(expr, state, sel, count, intermediate);
55374
55378
 
55375
- VectorData idata;
55376
- intermediate.Orrify(count, idata);
55379
+ UnifiedVectorFormat idata;
55380
+ intermediate.ToUnifiedFormat(count, idata);
55377
55381
 
55378
55382
  if (!sel) {
55379
55383
  sel = FlatVector::IncrementalSelectionVector();
@@ -55498,8 +55502,8 @@ unique_ptr<IndexScanState> ART::InitializeScanTwoPredicates(Transaction &transac
55498
55502
  //===--------------------------------------------------------------------===//
55499
55503
  template <class T>
55500
55504
  static void TemplatedGenerateKeys(Vector &input, idx_t count, vector<unique_ptr<Key>> &keys) {
55501
- VectorData idata;
55502
- input.Orrify(count, idata);
55505
+ UnifiedVectorFormat idata;
55506
+ input.ToUnifiedFormat(count, idata);
55503
55507
 
55504
55508
  auto input_data = (T *)idata.data;
55505
55509
  for (idx_t i = 0; i < count; i++) {
@@ -55514,8 +55518,8 @@ static void TemplatedGenerateKeys(Vector &input, idx_t count, vector<unique_ptr<
55514
55518
 
55515
55519
  template <class T>
55516
55520
  static void ConcatenateKeys(Vector &input, idx_t count, vector<unique_ptr<Key>> &keys) {
55517
- VectorData idata;
55518
- input.Orrify(count, idata);
55521
+ UnifiedVectorFormat idata;
55522
+ input.ToUnifiedFormat(count, idata);
55519
55523
 
55520
55524
  auto input_data = (T *)idata.data;
55521
55525
  for (idx_t i = 0; i < count; i++) {
@@ -55640,7 +55644,7 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
55640
55644
  GenerateKeys(input, keys);
55641
55645
 
55642
55646
  // now insert the elements into the index
55643
- row_ids.Normalify(input.size());
55647
+ row_ids.Flatten(input.size());
55644
55648
  auto row_identifiers = FlatVector::GetData<row_t>(row_ids);
55645
55649
  idx_t failed_index = DConstants::INVALID_INDEX;
55646
55650
  for (idx_t i = 0; i < input.size(); i++) {
@@ -55796,7 +55800,7 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
55796
55800
  GenerateKeys(expression, keys);
55797
55801
 
55798
55802
  // now erase the elements from the database
55799
- row_ids.Normalify(input.size());
55803
+ row_ids.Flatten(input.size());
55800
55804
  auto row_identifiers = FlatVector::GetData<row_t>(row_ids);
55801
55805
 
55802
55806
  for (idx_t i = 0; i < input.size(); i++) {
@@ -57327,7 +57331,7 @@ public:
57327
57331
  //! returned by the JoinHashTable::Scan function and can be used to resume a
57328
57332
  //! probe.
57329
57333
  struct ScanStructure {
57330
- unique_ptr<VectorData[]> key_data;
57334
+ unique_ptr<UnifiedVectorFormat[]> key_data;
57331
57335
  Vector pointers;
57332
57336
  idx_t count;
57333
57337
  SelectionVector sel_vector;
@@ -57457,7 +57461,7 @@ private:
57457
57461
  //! hashes. Caller should hold lock in parallel HT.
57458
57462
  void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[]);
57459
57463
 
57460
- idx_t PrepareKeys(DataChunk &keys, unique_ptr<VectorData[]> &key_data, const SelectionVector *&current_sel,
57464
+ idx_t PrepareKeys(DataChunk &keys, unique_ptr<UnifiedVectorFormat[]> &key_data, const SelectionVector *&current_sel,
57461
57465
  SelectionVector &sel, bool build_side);
57462
57466
 
57463
57467
  //! The RowDataCollection holding the main data of the hash table
@@ -57546,7 +57550,7 @@ void JoinHashTable::ApplyBitmask(Vector &hashes, idx_t count) {
57546
57550
  auto indices = ConstantVector::GetData<hash_t>(hashes);
57547
57551
  *indices = *indices & bitmask;
57548
57552
  } else {
57549
- hashes.Normalify(count);
57553
+ hashes.Flatten(count);
57550
57554
  auto indices = FlatVector::GetData<hash_t>(hashes);
57551
57555
  for (idx_t i = 0; i < count; i++) {
57552
57556
  indices[i] &= bitmask;
@@ -57555,8 +57559,8 @@ void JoinHashTable::ApplyBitmask(Vector &hashes, idx_t count) {
57555
57559
  }
57556
57560
 
57557
57561
  void JoinHashTable::ApplyBitmask(Vector &hashes, const SelectionVector &sel, idx_t count, Vector &pointers) {
57558
- VectorData hdata;
57559
- hashes.Orrify(count, hdata);
57562
+ UnifiedVectorFormat hdata;
57563
+ hashes.ToUnifiedFormat(count, hdata);
57560
57564
 
57561
57565
  auto hash_data = (hash_t *)hdata.data;
57562
57566
  auto result_data = FlatVector::GetData<data_ptr_t *>(pointers);
@@ -57585,7 +57589,8 @@ void JoinHashTable::Hash(DataChunk &keys, const SelectionVector &sel, idx_t coun
57585
57589
  }
57586
57590
  }
57587
57591
 
57588
- static idx_t FilterNullValues(VectorData &vdata, const SelectionVector &sel, idx_t count, SelectionVector &result) {
57592
+ static idx_t FilterNullValues(UnifiedVectorFormat &vdata, const SelectionVector &sel, idx_t count,
57593
+ SelectionVector &result) {
57589
57594
  idx_t result_count = 0;
57590
57595
  for (idx_t i = 0; i < count; i++) {
57591
57596
  auto idx = sel.get_index(i);
@@ -57597,9 +57602,9 @@ static idx_t FilterNullValues(VectorData &vdata, const SelectionVector &sel, idx
57597
57602
  return result_count;
57598
57603
  }
57599
57604
 
57600
- idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unique_ptr<VectorData[]> &key_data,
57605
+ idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unique_ptr<UnifiedVectorFormat[]> &key_data,
57601
57606
  const SelectionVector *&current_sel, SelectionVector &sel, bool build_side) {
57602
- key_data = keys.Orrify();
57607
+ key_data = keys.ToUnifiedFormat();
57603
57608
 
57604
57609
  // figure out which keys are NULL, and create a selection vector out of them
57605
57610
  current_sel = FlatVector::IncrementalSelectionVector();
@@ -57650,7 +57655,7 @@ void JoinHashTable::Build(DataChunk &keys, DataChunk &payload) {
57650
57655
  }
57651
57656
 
57652
57657
  // prepare the keys for processing
57653
- unique_ptr<VectorData[]> key_data;
57658
+ unique_ptr<UnifiedVectorFormat[]> key_data;
57654
57659
  const SelectionVector *current_sel;
57655
57660
  SelectionVector sel(STANDARD_VECTOR_SIZE);
57656
57661
  idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true);
@@ -57675,7 +57680,7 @@ void JoinHashTable::Build(DataChunk &keys, DataChunk &payload) {
57675
57680
  DataChunk source_chunk;
57676
57681
  source_chunk.InitializeEmpty(layout.GetTypes());
57677
57682
 
57678
- vector<VectorData> source_data;
57683
+ vector<UnifiedVectorFormat> source_data;
57679
57684
  source_data.reserve(layout.ColumnCount());
57680
57685
 
57681
57686
  // serialize the keys to the key locations
@@ -57687,22 +57692,22 @@ void JoinHashTable::Build(DataChunk &keys, DataChunk &payload) {
57687
57692
  D_ASSERT(build_types.size() == payload.ColumnCount());
57688
57693
  for (idx_t i = 0; i < payload.ColumnCount(); i++) {
57689
57694
  source_chunk.data[source_data.size()].Reference(payload.data[i]);
57690
- VectorData pdata;
57691
- payload.data[i].Orrify(payload.size(), pdata);
57695
+ UnifiedVectorFormat pdata;
57696
+ payload.data[i].ToUnifiedFormat(payload.size(), pdata);
57692
57697
  source_data.emplace_back(move(pdata));
57693
57698
  }
57694
57699
  if (IsRightOuterJoin(join_type)) {
57695
57700
  // for FULL/RIGHT OUTER joins initialize the "found" boolean to false
57696
57701
  source_chunk.data[source_data.size()].Reference(vfound);
57697
- VectorData fdata;
57698
- vfound.Orrify(keys.size(), fdata);
57702
+ UnifiedVectorFormat fdata;
57703
+ vfound.ToUnifiedFormat(keys.size(), fdata);
57699
57704
  source_data.emplace_back(move(fdata));
57700
57705
  }
57701
57706
 
57702
57707
  // serialise the hashes at the end
57703
57708
  source_chunk.data[source_data.size()].Reference(hash_values);
57704
- VectorData hdata;
57705
- hash_values.Orrify(keys.size(), hdata);
57709
+ UnifiedVectorFormat hdata;
57710
+ hash_values.ToUnifiedFormat(keys.size(), hdata);
57706
57711
  source_data.emplace_back(move(hdata));
57707
57712
 
57708
57713
  source_chunk.SetCardinality(keys);
@@ -57717,7 +57722,7 @@ void JoinHashTable::InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_loc
57717
57722
  // use bitmask to get position in array
57718
57723
  ApplyBitmask(hashes, count);
57719
57724
 
57720
- hashes.Normalify(count);
57725
+ hashes.Flatten(count);
57721
57726
 
57722
57727
  D_ASSERT(hashes.GetVectorType() == VectorType::FLAT_VECTOR);
57723
57728
  auto pointers = (data_ptr_t *)hash_map.Ptr();
@@ -58027,8 +58032,8 @@ void ScanStructure::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &chi
58027
58032
  if (ht.null_values_are_equal[col_idx]) {
58028
58033
  continue;
58029
58034
  }
58030
- VectorData jdata;
58031
- join_keys.data[col_idx].Orrify(join_keys.size(), jdata);
58035
+ UnifiedVectorFormat jdata;
58036
+ join_keys.data[col_idx].ToUnifiedFormat(join_keys.size(), jdata);
58032
58037
  if (!jdata.validity.AllValid()) {
58033
58038
  for (idx_t i = 0; i < join_keys.size(); i++) {
58034
58039
  auto jidx = jdata.sel->get_index(i);
@@ -58096,8 +58101,8 @@ void ScanStructure::NextMarkJoin(DataChunk &keys, DataChunk &input, DataChunk &r
58096
58101
  mask.Copy(FlatVector::Validity(last_key), input.size());
58097
58102
  break;
58098
58103
  default: {
58099
- VectorData kdata;
58100
- last_key.Orrify(keys.size(), kdata);
58104
+ UnifiedVectorFormat kdata;
58105
+ last_key.ToUnifiedFormat(keys.size(), kdata);
58101
58106
  for (idx_t i = 0; i < input.size(); i++) {
58102
58107
  auto kidx = kdata.sel->get_index(i);
58103
58108
  mask.Set(i, kdata.validity.RowIsValid(kidx));
@@ -58322,9 +58327,9 @@ struct InitialNestedLoopJoin {
58322
58327
  SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
58323
58328
  // initialize phase of nested loop join
58324
58329
  // fill lvector and rvector with matches from the base vectors
58325
- VectorData left_data, right_data;
58326
- left.Orrify(left_size, left_data);
58327
- right.Orrify(right_size, right_data);
58330
+ UnifiedVectorFormat left_data, right_data;
58331
+ left.ToUnifiedFormat(left_size, left_data);
58332
+ right.ToUnifiedFormat(right_size, right_data);
58328
58333
 
58329
58334
  auto ldata = (T *)left_data.data;
58330
58335
  auto rdata = (T *)right_data.data;
@@ -58356,9 +58361,9 @@ struct RefineNestedLoopJoin {
58356
58361
  template <class T, class OP>
58357
58362
  static idx_t Operation(Vector &left, Vector &right, idx_t left_size, idx_t right_size, idx_t &lpos, idx_t &rpos,
58358
58363
  SelectionVector &lvector, SelectionVector &rvector, idx_t current_match_count) {
58359
- VectorData left_data, right_data;
58360
- left.Orrify(left_size, left_data);
58361
- right.Orrify(right_size, right_data);
58364
+ UnifiedVectorFormat left_data, right_data;
58365
+ left.ToUnifiedFormat(left_size, left_data);
58366
+ right.ToUnifiedFormat(right_size, right_data);
58362
58367
 
58363
58368
  // refine phase of the nested loop join
58364
58369
  // refine lvector and rvector based on matches of subsequent conditions (in case there are multiple conditions
@@ -58504,9 +58509,9 @@ namespace duckdb {
58504
58509
 
58505
58510
  template <class T, class OP>
58506
58511
  static void TemplatedMarkJoin(Vector &left, Vector &right, idx_t lcount, idx_t rcount, bool found_match[]) {
58507
- VectorData left_data, right_data;
58508
- left.Orrify(lcount, left_data);
58509
- right.Orrify(rcount, right_data);
58512
+ UnifiedVectorFormat left_data, right_data;
58513
+ left.ToUnifiedFormat(lcount, left_data);
58514
+ right.ToUnifiedFormat(rcount, right_data);
58510
58515
 
58511
58516
  auto ldata = (T *)left_data.data;
58512
58517
  auto rdata = (T *)right_data.data;
@@ -59895,8 +59900,8 @@ void VerifyNullHandling(DataChunk &chunk, AggregateState &state, const vector<un
59895
59900
  auto &aggr = (BoundAggregateExpression &)*aggregates[aggr_idx];
59896
59901
  if (state.counts[aggr_idx] == 0 && aggr.function.null_handling == FunctionNullHandling::DEFAULT_NULL_HANDLING) {
59897
59902
  // Default is when 0 values go in, NULL comes out
59898
- VectorData vdata;
59899
- chunk.data[aggr_idx].Orrify(1, vdata);
59903
+ UnifiedVectorFormat vdata;
59904
+ chunk.data[aggr_idx].ToUnifiedFormat(1, vdata);
59900
59905
  D_ASSERT(!vdata.validity.RowIsValid(vdata.sel->get_index(0)));
59901
59906
  }
59902
59907
  }
@@ -60533,11 +60538,11 @@ void WindowLocalSinkState::Sink(DataChunk &input_chunk, WindowGlobalSinkState &g
60533
60538
  Vector addresses(LogicalType::POINTER);
60534
60539
  auto key_locations = FlatVector::GetData<data_ptr_t>(addresses);
60535
60540
  auto handles = rows->Build(row_count, key_locations, nullptr, row_sel);
60536
- vector<VectorData> payload_data;
60541
+ vector<UnifiedVectorFormat> payload_data;
60537
60542
  payload_data.reserve(payload_chunk.ColumnCount());
60538
60543
  for (idx_t i = 0; i < payload_chunk.ColumnCount(); i++) {
60539
- VectorData pdata;
60540
- payload_chunk.data[i].Orrify(row_count, pdata);
60544
+ UnifiedVectorFormat pdata;
60545
+ payload_chunk.data[i].ToUnifiedFormat(row_count, pdata);
60541
60546
  payload_data.emplace_back(move(pdata));
60542
60547
  }
60543
60548
  RowOperations::Scatter(payload_chunk, payload_data.data(), payload_layout, addresses, *strings, *row_sel,
@@ -61410,8 +61415,8 @@ static void ComputeWindowExpression(BoundWindowExpression *wexpr, ChunkCollectio
61410
61415
  idx_t pos = 0;
61411
61416
  for (auto &chunk : payload_collection.Chunks()) {
61412
61417
  const auto count = chunk->size();
61413
- VectorData vdata;
61414
- chunk->data[0].Orrify(count, vdata);
61418
+ UnifiedVectorFormat vdata;
61419
+ chunk->data[0].ToUnifiedFormat(count, vdata);
61415
61420
  if (!vdata.validity.AllValid()) {
61416
61421
  // Lazily materialise the contents when we find the first NULL
61417
61422
  if (ignore_nulls.AllValid()) {
@@ -64042,7 +64047,7 @@ public:
64042
64047
  static unique_ptr<DistinctStatistics> Deserialize(FieldReader &reader);
64043
64048
 
64044
64049
  void Update(Vector &update, idx_t count, bool sample = true);
64045
- void Update(VectorData &update_data, const LogicalType &ptype, idx_t count, bool sample = true);
64050
+ void Update(UnifiedVectorFormat &update_data, const LogicalType &ptype, idx_t count, bool sample = true);
64046
64051
 
64047
64052
  string ToString() const override;
64048
64053
  idx_t GetCount() const;
@@ -64481,8 +64486,8 @@ bool PerfectHashJoinExecutor::TemplatedFillSelectionVectorBuild(Vector &source,
64481
64486
  }
64482
64487
  auto min_value = perfect_join_statistics.build_min.GetValueUnsafe<T>();
64483
64488
  auto max_value = perfect_join_statistics.build_max.GetValueUnsafe<T>();
64484
- VectorData vector_data;
64485
- source.Orrify(count, vector_data);
64489
+ UnifiedVectorFormat vector_data;
64490
+ source.ToUnifiedFormat(count, vector_data);
64486
64491
  auto data = reinterpret_cast<T *>(vector_data.data);
64487
64492
  // generate the selection vector
64488
64493
  for (idx_t i = 0, sel_idx = 0; i < count; ++i) {
@@ -64604,8 +64609,8 @@ void PerfectHashJoinExecutor::TemplatedFillSelectionVectorProbe(Vector &source,
64604
64609
  auto min_value = perfect_join_statistics.build_min.GetValueUnsafe<T>();
64605
64610
  auto max_value = perfect_join_statistics.build_max.GetValueUnsafe<T>();
64606
64611
 
64607
- VectorData vector_data;
64608
- source.Orrify(count, vector_data);
64612
+ UnifiedVectorFormat vector_data;
64613
+ source.ToUnifiedFormat(count, vector_data);
64609
64614
  auto data = reinterpret_cast<T *>(vector_data.data);
64610
64615
  auto validity_mask = &vector_data.validity;
64611
64616
  // build selection vector for non-dense build
@@ -67741,8 +67746,8 @@ PhysicalNestedLoopJoin::PhysicalNestedLoopJoin(LogicalOperator &op, unique_ptr<P
67741
67746
 
67742
67747
  static bool HasNullValues(DataChunk &chunk) {
67743
67748
  for (idx_t col_idx = 0; col_idx < chunk.ColumnCount(); col_idx++) {
67744
- VectorData vdata;
67745
- chunk.data[col_idx].Orrify(chunk.size(), vdata);
67749
+ UnifiedVectorFormat vdata;
67750
+ chunk.data[col_idx].ToUnifiedFormat(chunk.size(), vdata);
67746
67751
 
67747
67752
  if (vdata.validity.AllValid()) {
67748
67753
  continue;
@@ -67801,8 +67806,8 @@ void PhysicalJoin::ConstructMarkJoinResult(DataChunk &join_keys, DataChunk &left
67801
67806
  auto bool_result = FlatVector::GetData<bool>(mark_vector);
67802
67807
  auto &mask = FlatVector::Validity(mark_vector);
67803
67808
  for (idx_t col_idx = 0; col_idx < join_keys.ColumnCount(); col_idx++) {
67804
- VectorData jdata;
67805
- join_keys.data[col_idx].Orrify(join_keys.size(), jdata);
67809
+ UnifiedVectorFormat jdata;
67810
+ join_keys.data[col_idx].ToUnifiedFormat(join_keys.size(), jdata);
67806
67811
  if (!jdata.validity.AllValid()) {
67807
67812
  for (idx_t i = 0; i < join_keys.size(); i++) {
67808
67813
  auto jidx = jdata.sel->get_index(i);
@@ -68654,7 +68659,7 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
68654
68659
  // The only part of the join keys that is actually used is the validity mask.
68655
68660
  // Since the payload is sorted, we can just set the tail end of the validity masks to invalid.
68656
68661
  for (auto &key : lhs_table.keys.data) {
68657
- key.Normalify(lhs_table.keys.size());
68662
+ key.Flatten(lhs_table.keys.size());
68658
68663
  auto &mask = FlatVector::Validity(key);
68659
68664
  if (mask.AllValid()) {
68660
68665
  continue;
@@ -69201,8 +69206,8 @@ idx_t PhysicalRangeJoin::LocalSortedTable::MergeNulls(const vector<JoinCondition
69201
69206
  }
69202
69207
  return 0;
69203
69208
  } else if (keys.ColumnCount() > 1) {
69204
- // Normalify the primary, as it will need to merge arbitrary validity masks
69205
- primary.Normalify(count);
69209
+ // Flatten the primary, as it will need to merge arbitrary validity masks
69210
+ primary.Flatten(count);
69206
69211
  auto &pvalidity = FlatVector::Validity(primary);
69207
69212
  D_ASSERT(keys.ColumnCount() == conditions.size());
69208
69213
  for (size_t c = 1; c < keys.data.size(); ++c) {
@@ -69210,10 +69215,10 @@ idx_t PhysicalRangeJoin::LocalSortedTable::MergeNulls(const vector<JoinCondition
69210
69215
  if (conditions[c].comparison == ExpressionType::COMPARE_DISTINCT_FROM) {
69211
69216
  continue;
69212
69217
  }
69213
- // Orrify the rest, as the sort code will do this anyway.
69218
+ // ToUnifiedFormat the rest, as the sort code will do this anyway.
69214
69219
  auto &v = keys.data[c];
69215
- VectorData vdata;
69216
- v.Orrify(count, vdata);
69220
+ UnifiedVectorFormat vdata;
69221
+ v.ToUnifiedFormat(count, vdata);
69217
69222
  auto &vvalidity = vdata.validity;
69218
69223
  if (vvalidity.AllValid()) {
69219
69224
  continue;
@@ -72517,7 +72522,7 @@ SinkResultType PhysicalDelete::Sink(ExecutionContext &context, GlobalSinkState &
72517
72522
 
72518
72523
  lock_guard<mutex> delete_guard(gstate.delete_lock);
72519
72524
  if (return_chunk) {
72520
- row_identifiers.Normalify(input.size());
72525
+ row_identifiers.Flatten(input.size());
72521
72526
  table.Fetch(transaction, ustate.delete_chunk, column_ids, row_identifiers, input.size(), cfs);
72522
72527
  gstate.return_chunk_collection.Append(ustate.delete_chunk);
72523
72528
  }
@@ -72947,7 +72952,7 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, GlobalSinkState &
72947
72952
  auto &gstate = (InsertGlobalState &)state;
72948
72953
  auto &istate = (InsertLocalState &)lstate;
72949
72954
 
72950
- chunk.Normalify();
72955
+ chunk.Flatten();
72951
72956
  istate.default_executor.SetChunk(chunk);
72952
72957
 
72953
72958
  istate.insert_chunk.Reset();
@@ -73163,7 +73168,7 @@ SinkResultType PhysicalUpdate::Sink(ExecutionContext &context, GlobalSinkState &
73163
73168
  DataChunk &update_chunk = ustate.update_chunk;
73164
73169
  DataChunk &mock_chunk = ustate.mock_chunk;
73165
73170
 
73166
- chunk.Normalify();
73171
+ chunk.Flatten();
73167
73172
  ustate.default_executor.SetChunk(chunk);
73168
73173
 
73169
73174
  // update data in the base table
@@ -73541,8 +73546,8 @@ public:
73541
73546
 
73542
73547
  ExpressionExecutor executor;
73543
73548
  DataChunk list_data;
73544
- vector<VectorData> list_vector_data;
73545
- vector<VectorData> list_child_data;
73549
+ vector<UnifiedVectorFormat> list_vector_data;
73550
+ vector<UnifiedVectorFormat> list_child_data;
73546
73551
  };
73547
73552
 
73548
73553
  // this implements a sorted window functions variant
@@ -73572,7 +73577,7 @@ static void UnnestNull(idx_t start, idx_t end, Vector &result) {
73572
73577
  }
73573
73578
 
73574
73579
  template <class T>
73575
- static void TemplatedUnnest(VectorData &vdata, idx_t start, idx_t end, Vector &result) {
73580
+ static void TemplatedUnnest(UnifiedVectorFormat &vdata, idx_t start, idx_t end, Vector &result) {
73576
73581
  auto source_data = (T *)vdata.data;
73577
73582
  auto &source_mask = vdata.validity;
73578
73583
  auto result_data = FlatVector::GetData<T>(result);
@@ -73590,7 +73595,7 @@ static void TemplatedUnnest(VectorData &vdata, idx_t start, idx_t end, Vector &r
73590
73595
  }
73591
73596
  }
73592
73597
 
73593
- static void UnnestValidity(VectorData &vdata, idx_t start, idx_t end, Vector &result) {
73598
+ static void UnnestValidity(UnifiedVectorFormat &vdata, idx_t start, idx_t end, Vector &result) {
73594
73599
  auto &source_mask = vdata.validity;
73595
73600
  auto &result_mask = FlatVector::Validity(result);
73596
73601
 
@@ -73601,7 +73606,8 @@ static void UnnestValidity(VectorData &vdata, idx_t start, idx_t end, Vector &re
73601
73606
  }
73602
73607
  }
73603
73608
 
73604
- static void UnnestVector(VectorData &vdata, Vector &source, idx_t list_size, idx_t start, idx_t end, Vector &result) {
73609
+ static void UnnestVector(UnifiedVectorFormat &vdata, Vector &source, idx_t list_size, idx_t start, idx_t end,
73610
+ Vector &result) {
73605
73611
  switch (result.GetType().InternalType()) {
73606
73612
  case PhysicalType::BOOL:
73607
73613
  case PhysicalType::INT8:
@@ -73655,8 +73661,8 @@ static void UnnestVector(VectorData &vdata, Vector &source, idx_t list_size, idx
73655
73661
  auto &target_entries = StructVector::GetEntries(result);
73656
73662
  UnnestValidity(vdata, start, end, result);
73657
73663
  for (idx_t i = 0; i < source_entries.size(); i++) {
73658
- VectorData sdata;
73659
- source_entries[i]->Orrify(list_size, sdata);
73664
+ UnifiedVectorFormat sdata;
73665
+ source_entries[i]->ToUnifiedFormat(list_size, sdata);
73660
73666
  UnnestVector(sdata, *source_entries[i], list_size, start, end, *target_entries[i]);
73661
73667
  }
73662
73668
  break;
@@ -73693,19 +73699,19 @@ OperatorResultType PhysicalUnnest::ExecuteInternal(ExecutionContext &context, Da
73693
73699
  D_ASSERT(state.list_vector_data.size() == state.list_data.ColumnCount());
73694
73700
  D_ASSERT(state.list_child_data.size() == state.list_data.ColumnCount());
73695
73701
 
73696
- // initialize VectorData object so the nullmask can accessed
73702
+ // initialize UnifiedVectorFormat object so the nullmask can accessed
73697
73703
  for (idx_t col_idx = 0; col_idx < state.list_data.ColumnCount(); col_idx++) {
73698
73704
  auto &list_vector = state.list_data.data[col_idx];
73699
- list_vector.Orrify(state.list_data.size(), state.list_vector_data[col_idx]);
73705
+ list_vector.ToUnifiedFormat(state.list_data.size(), state.list_vector_data[col_idx]);
73700
73706
 
73701
73707
  if (list_vector.GetType() == LogicalType::SQLNULL) {
73702
73708
  // UNNEST(NULL)
73703
73709
  auto &child_vector = list_vector;
73704
- child_vector.Orrify(0, state.list_child_data[col_idx]);
73710
+ child_vector.ToUnifiedFormat(0, state.list_child_data[col_idx]);
73705
73711
  } else {
73706
73712
  auto list_size = ListVector::GetListSize(list_vector);
73707
73713
  auto &child_vector = ListVector::GetEntry(list_vector);
73708
- child_vector.Orrify(list_size, state.list_child_data[col_idx]);
73714
+ child_vector.ToUnifiedFormat(list_size, state.list_child_data[col_idx]);
73709
73715
  }
73710
73716
  }
73711
73717
  state.first_fetch = false;
@@ -75363,7 +75369,7 @@ idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bo
75363
75369
  sel_vector_sizes[r] = 0;
75364
75370
  }
75365
75371
 
75366
- hashes.Normalify(groups.size());
75372
+ hashes.Flatten(groups.size());
75367
75373
  auto hashes_ptr = FlatVector::GetData<hash_t>(hashes);
75368
75374
 
75369
75375
  for (idx_t i = 0; i < groups.size(); i++) {
@@ -75480,7 +75486,7 @@ PerfectAggregateHashTable::~PerfectAggregateHashTable() {
75480
75486
  }
75481
75487
 
75482
75488
  template <class T>
75483
- static void ComputeGroupLocationTemplated(VectorData &group_data, Value &min, uintptr_t *address_data,
75489
+ static void ComputeGroupLocationTemplated(UnifiedVectorFormat &group_data, Value &min, uintptr_t *address_data,
75484
75490
  idx_t current_shift, idx_t count) {
75485
75491
  auto data = (T *)group_data.data;
75486
75492
  auto min_val = min.GetValueUnsafe<T>();
@@ -75508,8 +75514,8 @@ static void ComputeGroupLocationTemplated(VectorData &group_data, Value &min, ui
75508
75514
  }
75509
75515
 
75510
75516
  static void ComputeGroupLocation(Vector &group, Value &min, uintptr_t *address_data, idx_t current_shift, idx_t count) {
75511
- VectorData vdata;
75512
- group.Orrify(count, vdata);
75517
+ UnifiedVectorFormat vdata;
75518
+ group.ToUnifiedFormat(count, vdata);
75513
75519
 
75514
75520
  switch (group.GetType().InternalType()) {
75515
75521
  case PhysicalType::INT8:
@@ -80187,7 +80193,7 @@ void ReservoirSample::ReplaceElement(DataChunk &input, idx_t index_in_chunk) {
80187
80193
 
80188
80194
  idx_t ReservoirSample::FillReservoir(DataChunk &input) {
80189
80195
  idx_t chunk_count = input.size();
80190
- input.Normalify();
80196
+ input.Flatten();
80191
80197
 
80192
80198
  // we have not: append to the reservoir
80193
80199
  idx_t required_count;
@@ -80237,7 +80243,7 @@ void ReservoirSamplePercentage::AddToReservoir(DataChunk &input) {
80237
80243
  idx_t append_to_next_sample = input.size() - append_to_current_sample_count;
80238
80244
  if (append_to_current_sample_count > 0) {
80239
80245
  // we have elements remaining, first add them to the current sample
80240
- input.Normalify();
80246
+ input.Flatten();
80241
80247
 
80242
80248
  input.SetCardinality(append_to_current_sample_count);
80243
80249
  current_sample->AddToReservoir(input);
@@ -80374,7 +80380,7 @@ WindowSegmentTree::WindowSegmentTree(AggregateFunction &aggregate, FunctionData
80374
80380
  #if STANDARD_VECTOR_SIZE < 512
80375
80381
  throw NotImplementedException("Window functions are not supported for vector sizes < 512");
80376
80382
  #endif
80377
- statep.Normalify(STANDARD_VECTOR_SIZE);
80383
+ statep.Flatten(STANDARD_VECTOR_SIZE);
80378
80384
  statev.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
80379
80385
 
80380
80386
  if (input_ref && input_ref->ColumnCount() > 0) {
@@ -81570,8 +81576,8 @@ static void ApproxCountDistinctSimpleUpdateFunction(Vector inputs[], AggregateIn
81570
81576
  agg_state->log = new HyperLogLog();
81571
81577
  }
81572
81578
 
81573
- VectorData vdata;
81574
- inputs[0].Orrify(count, vdata);
81579
+ UnifiedVectorFormat vdata;
81580
+ inputs[0].ToUnifiedFormat(count, vdata);
81575
81581
 
81576
81582
  uint64_t indices[STANDARD_VECTOR_SIZE];
81577
81583
  uint8_t counts[STANDARD_VECTOR_SIZE];
@@ -81584,8 +81590,8 @@ static void ApproxCountDistinctUpdateFunction(Vector inputs[], AggregateInputDat
81584
81590
  Vector &state_vector, idx_t count) {
81585
81591
  D_ASSERT(input_count == 1);
81586
81592
 
81587
- VectorData sdata;
81588
- state_vector.Orrify(count, sdata);
81593
+ UnifiedVectorFormat sdata;
81594
+ state_vector.ToUnifiedFormat(count, sdata);
81589
81595
  auto states = (ApproxDistinctCountState **)sdata.data;
81590
81596
 
81591
81597
  for (idx_t i = 0; i < count; i++) {
@@ -81595,8 +81601,8 @@ static void ApproxCountDistinctUpdateFunction(Vector inputs[], AggregateInputDat
81595
81601
  }
81596
81602
  }
81597
81603
 
81598
- VectorData vdata;
81599
- inputs[0].Orrify(count, vdata);
81604
+ UnifiedVectorFormat vdata;
81605
+ inputs[0].ToUnifiedFormat(count, vdata);
81600
81606
 
81601
81607
  uint64_t indices[STANDARD_VECTOR_SIZE];
81602
81608
  uint8_t counts[STANDARD_VECTOR_SIZE];
@@ -82620,8 +82626,8 @@ struct FirstVectorFunction {
82620
82626
 
82621
82627
  static void Update(Vector inputs[], AggregateInputData &, idx_t input_count, Vector &state_vector, idx_t count) {
82622
82628
  auto &input = inputs[0];
82623
- VectorData sdata;
82624
- state_vector.Orrify(count, sdata);
82629
+ UnifiedVectorFormat sdata;
82630
+ state_vector.ToUnifiedFormat(count, sdata);
82625
82631
 
82626
82632
  auto states = (FirstStateVector **)sdata.data;
82627
82633
  for (idx_t i = 0; i < count; i++) {
@@ -83095,9 +83101,9 @@ struct MaxOperationString : public StringMinMaxBase {
83095
83101
 
83096
83102
  template <typename T, class OP>
83097
83103
  static bool TemplatedOptimumType(Vector &left, idx_t lidx, idx_t lcount, Vector &right, idx_t ridx, idx_t rcount) {
83098
- VectorData lvdata, rvdata;
83099
- left.Orrify(lcount, lvdata);
83100
- right.Orrify(rcount, rvdata);
83104
+ UnifiedVectorFormat lvdata, rvdata;
83105
+ left.ToUnifiedFormat(lcount, lvdata);
83106
+ right.ToUnifiedFormat(rcount, rvdata);
83101
83107
 
83102
83108
  lidx = lvdata.sel->get_index(lidx);
83103
83109
  ridx = rvdata.sel->get_index(ridx);
@@ -83166,9 +83172,9 @@ static bool TemplatedOptimumStruct(Vector &left, idx_t lidx_p, idx_t lcount, Vec
83166
83172
  idx_t rcount) {
83167
83173
  // STRUCT dictionaries apply to all the children
83168
83174
  // so map the indexes first
83169
- VectorData lvdata, rvdata;
83170
- left.Orrify(lcount, lvdata);
83171
- right.Orrify(rcount, rvdata);
83175
+ UnifiedVectorFormat lvdata, rvdata;
83176
+ left.ToUnifiedFormat(lcount, lvdata);
83177
+ right.ToUnifiedFormat(rcount, rvdata);
83172
83178
 
83173
83179
  idx_t lidx = lvdata.sel->get_index(lidx_p);
83174
83180
  idx_t ridx = rvdata.sel->get_index(ridx_p);
@@ -83208,9 +83214,9 @@ static bool TemplatedOptimumStruct(Vector &left, idx_t lidx_p, idx_t lcount, Vec
83208
83214
 
83209
83215
  template <class OP>
83210
83216
  static bool TemplatedOptimumList(Vector &left, idx_t lidx, idx_t lcount, Vector &right, idx_t ridx, idx_t rcount) {
83211
- VectorData lvdata, rvdata;
83212
- left.Orrify(lcount, lvdata);
83213
- right.Orrify(rcount, rvdata);
83217
+ UnifiedVectorFormat lvdata, rvdata;
83218
+ left.ToUnifiedFormat(lcount, lvdata);
83219
+ right.ToUnifiedFormat(rcount, rvdata);
83214
83220
 
83215
83221
  // Update the indexes and vector sizes for recursion.
83216
83222
  lidx = lvdata.sel->get_index(lidx);
@@ -83298,11 +83304,11 @@ struct VectorMinMaxBase {
83298
83304
  template <class STATE, class OP>
83299
83305
  static void Update(Vector inputs[], AggregateInputData &, idx_t input_count, Vector &state_vector, idx_t count) {
83300
83306
  auto &input = inputs[0];
83301
- VectorData idata;
83302
- input.Orrify(count, idata);
83307
+ UnifiedVectorFormat idata;
83308
+ input.ToUnifiedFormat(count, idata);
83303
83309
 
83304
- VectorData sdata;
83305
- state_vector.Orrify(count, sdata);
83310
+ UnifiedVectorFormat sdata;
83311
+ state_vector.ToUnifiedFormat(count, sdata);
83306
83312
 
83307
83313
  auto states = (STATE **)sdata.data;
83308
83314
  for (idx_t i = 0; i < count; i++) {
@@ -87255,7 +87261,7 @@ namespace duckdb {
87255
87261
 
87256
87262
  struct HistogramFunctor {
87257
87263
  template <class T, class MAP_TYPE = map<T, idx_t>>
87258
- static void HistogramUpdate(VectorData &sdata, VectorData &input_data, idx_t count) {
87264
+ static void HistogramUpdate(UnifiedVectorFormat &sdata, UnifiedVectorFormat &input_data, idx_t count) {
87259
87265
 
87260
87266
  auto states = (HistogramAggState<T, MAP_TYPE> **)sdata.data;
87261
87267
  for (idx_t i = 0; i < count; i++) {
@@ -87278,7 +87284,7 @@ struct HistogramFunctor {
87278
87284
 
87279
87285
  struct HistogramStringFunctor {
87280
87286
  template <class T, class MAP_TYPE = map<T, idx_t>>
87281
- static void HistogramUpdate(VectorData &sdata, VectorData &input_data, idx_t count) {
87287
+ static void HistogramUpdate(UnifiedVectorFormat &sdata, UnifiedVectorFormat &input_data, idx_t count) {
87282
87288
 
87283
87289
  auto states = (HistogramAggState<T, MAP_TYPE> **)sdata.data;
87284
87290
  for (idx_t i = 0; i < count; i++) {
@@ -87325,10 +87331,10 @@ static void HistogramUpdateFunction(Vector inputs[], AggregateInputData &, idx_t
87325
87331
  D_ASSERT(input_count == 1);
87326
87332
 
87327
87333
  auto &input = inputs[0];
87328
- VectorData sdata;
87329
- state_vector.Orrify(count, sdata);
87330
- VectorData input_data;
87331
- input.Orrify(count, input_data);
87334
+ UnifiedVectorFormat sdata;
87335
+ state_vector.ToUnifiedFormat(count, sdata);
87336
+ UnifiedVectorFormat input_data;
87337
+ input.ToUnifiedFormat(count, input_data);
87332
87338
 
87333
87339
  OP::template HistogramUpdate<T, MAP_TYPE>(sdata, input_data, count);
87334
87340
  }
@@ -87336,8 +87342,8 @@ static void HistogramUpdateFunction(Vector inputs[], AggregateInputData &, idx_t
87336
87342
  template <class T, class MAP_TYPE>
87337
87343
  static void HistogramCombineFunction(Vector &state, Vector &combined, AggregateInputData &, idx_t count) {
87338
87344
 
87339
- VectorData sdata;
87340
- state.Orrify(count, sdata);
87345
+ UnifiedVectorFormat sdata;
87346
+ state.ToUnifiedFormat(count, sdata);
87341
87347
  auto states_ptr = (HistogramAggState<T, MAP_TYPE> **)sdata.data;
87342
87348
 
87343
87349
  auto combined_ptr = FlatVector::GetData<HistogramAggState<T, MAP_TYPE> *>(combined);
@@ -87362,8 +87368,8 @@ template <class OP, class T, class MAP_TYPE>
87362
87368
  static void HistogramFinalizeFunction(Vector &state_vector, AggregateInputData &, Vector &result, idx_t count,
87363
87369
  idx_t offset) {
87364
87370
 
87365
- VectorData sdata;
87366
- state_vector.Orrify(count, sdata);
87371
+ UnifiedVectorFormat sdata;
87372
+ state_vector.ToUnifiedFormat(count, sdata);
87367
87373
  auto states = (HistogramAggState<T, MAP_TYPE> **)sdata.data;
87368
87374
 
87369
87375
  auto &mask = FlatVector::Validity(result);
@@ -87553,14 +87559,14 @@ static void ListUpdateFunction(Vector inputs[], AggregateInputData &, idx_t inpu
87553
87559
  D_ASSERT(input_count == 1);
87554
87560
 
87555
87561
  auto &input = inputs[0];
87556
- VectorData sdata;
87557
- state_vector.Orrify(count, sdata);
87562
+ UnifiedVectorFormat sdata;
87563
+ state_vector.ToUnifiedFormat(count, sdata);
87558
87564
 
87559
87565
  auto list_vector_type = LogicalType::LIST(input.GetType());
87560
87566
 
87561
87567
  auto states = (ListAggState **)sdata.data;
87562
87568
  if (input.GetVectorType() == VectorType::SEQUENCE_VECTOR) {
87563
- input.Normalify(count);
87569
+ input.Flatten(count);
87564
87570
  }
87565
87571
  for (idx_t i = 0; i < count; i++) {
87566
87572
  auto state = states[sdata.sel->get_index(i)];
@@ -87574,8 +87580,8 @@ static void ListUpdateFunction(Vector inputs[], AggregateInputData &, idx_t inpu
87574
87580
  }
87575
87581
 
87576
87582
  static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputData &, idx_t count) {
87577
- VectorData sdata;
87578
- state.Orrify(count, sdata);
87583
+ UnifiedVectorFormat sdata;
87584
+ state.ToUnifiedFormat(count, sdata);
87579
87585
  auto states_ptr = (ListAggState **)sdata.data;
87580
87586
 
87581
87587
  auto combined_ptr = FlatVector::GetData<ListAggState *>(combined);
@@ -87597,8 +87603,8 @@ static void ListCombineFunction(Vector &state, Vector &combined, AggregateInputD
87597
87603
  }
87598
87604
 
87599
87605
  static void ListFinalize(Vector &state_vector, AggregateInputData &, Vector &result, idx_t count, idx_t offset) {
87600
- VectorData sdata;
87601
- state_vector.Orrify(count, sdata);
87606
+ UnifiedVectorFormat sdata;
87607
+ state_vector.ToUnifiedFormat(count, sdata);
87602
87608
  auto states = (ListAggState **)sdata.data;
87603
87609
 
87604
87610
  D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
@@ -88371,8 +88377,8 @@ struct SortedAggregateFunction {
88371
88377
 
88372
88378
  // We have to scatter the chunks one at a time
88373
88379
  // so build a selection vector for each one.
88374
- VectorData svdata;
88375
- states.Orrify(count, svdata);
88380
+ UnifiedVectorFormat svdata;
88381
+ states.ToUnifiedFormat(count, svdata);
88376
88382
 
88377
88383
  // Build the selection vector for each state.
88378
88384
  auto sdata = (SortedAggregateState **)svdata.data;
@@ -91975,8 +91981,8 @@ struct StructDatePart {
91975
91981
  }
91976
91982
  }
91977
91983
  } else {
91978
- VectorData rdata;
91979
- input.Orrify(count, rdata);
91984
+ UnifiedVectorFormat rdata;
91985
+ input.ToUnifiedFormat(count, rdata);
91980
91986
 
91981
91987
  const auto &arg_valid = rdata.validity;
91982
91988
  auto tdata = (const INPUT_TYPE *)rdata.data;
@@ -93427,9 +93433,9 @@ struct SenaryExecutor {
93427
93433
  auto &result_validity = FlatVector::Validity(result);
93428
93434
 
93429
93435
  bool all_valid = true;
93430
- vector<VectorData> vdata(NCOLS);
93436
+ vector<UnifiedVectorFormat> vdata(NCOLS);
93431
93437
  for (size_t c = 0; c < NCOLS; ++c) {
93432
- input.data[c].Orrify(count, vdata[c]);
93438
+ input.data[c].ToUnifiedFormat(count, vdata[c]);
93433
93439
  all_valid = all_valid && vdata[c].validity.AllValid();
93434
93440
  }
93435
93441
 
@@ -95340,7 +95346,7 @@ static void ConstantOrNullFunction(DataChunk &args, ExpressionState &state, Vect
95340
95346
  auto &input_mask = FlatVector::Validity(args.data[idx]);
95341
95347
  if (!input_mask.AllValid()) {
95342
95348
  // there are null values: need to merge them into the result
95343
- result.Normalify(args.size());
95349
+ result.Flatten(args.size());
95344
95350
  auto &result_mask = FlatVector::Validity(result);
95345
95351
  result_mask.Combine(input_mask, args.size());
95346
95352
  }
@@ -95356,10 +95362,10 @@ static void ConstantOrNullFunction(DataChunk &args, ExpressionState &state, Vect
95356
95362
  break;
95357
95363
  }
95358
95364
  default: {
95359
- VectorData vdata;
95360
- args.data[idx].Orrify(args.size(), vdata);
95365
+ UnifiedVectorFormat vdata;
95366
+ args.data[idx].ToUnifiedFormat(args.size(), vdata);
95361
95367
  if (!vdata.validity.AllValid()) {
95362
- result.Normalify(args.size());
95368
+ result.Flatten(args.size());
95363
95369
  auto &result_mask = FlatVector::Validity(result);
95364
95370
  for (idx_t i = 0; i < args.size(); i++) {
95365
95371
  if (!vdata.validity.RowIsValid(vdata.sel->get_index(i))) {
@@ -95513,8 +95519,8 @@ static void LeastGreatestFunction(DataChunk &args, ExpressionState &state, Vecto
95513
95519
  // copy over the first column
95514
95520
  bool result_has_value[STANDARD_VECTOR_SIZE];
95515
95521
  {
95516
- VectorData vdata;
95517
- args.data[0].Orrify(args.size(), vdata);
95522
+ UnifiedVectorFormat vdata;
95523
+ args.data[0].ToUnifiedFormat(args.size(), vdata);
95518
95524
  auto input_data = (T *)vdata.data;
95519
95525
  for (idx_t i = 0; i < args.size(); i++) {
95520
95526
  auto vindex = vdata.sel->get_index(i);
@@ -95534,8 +95540,8 @@ static void LeastGreatestFunction(DataChunk &args, ExpressionState &state, Vecto
95534
95540
  continue;
95535
95541
  }
95536
95542
 
95537
- VectorData vdata;
95538
- args.data[col_idx].Orrify(args.size(), vdata);
95543
+ UnifiedVectorFormat vdata;
95544
+ args.data[col_idx].ToUnifiedFormat(args.size(), vdata);
95539
95545
 
95540
95546
  auto input_data = (T *)vdata.data;
95541
95547
  if (!vdata.validity.AllValid()) {
@@ -95806,11 +95812,11 @@ static void ExecuteSlice(Vector &result, Vector &s, Vector &b, Vector &e, const
95806
95812
  rdata[0] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, sliced, begin, end);
95807
95813
  }
95808
95814
  } else {
95809
- VectorData sdata, bdata, edata;
95815
+ UnifiedVectorFormat sdata, bdata, edata;
95810
95816
 
95811
- s.Orrify(count, sdata);
95812
- b.Orrify(count, bdata);
95813
- e.Orrify(count, edata);
95817
+ s.ToUnifiedFormat(count, sdata);
95818
+ b.ToUnifiedFormat(count, bdata);
95819
+ e.ToUnifiedFormat(count, edata);
95814
95820
 
95815
95821
  auto rdata = FlatVector::GetData<INPUT_TYPE>(result);
95816
95822
  auto &rmask = FlatVector::Validity(result);
@@ -95851,7 +95857,7 @@ static void ArraySliceFunction(DataChunk &args, ExpressionState &state, Vector &
95851
95857
  Vector &b = args.data[1];
95852
95858
  Vector &e = args.data[2];
95853
95859
 
95854
- s.Normalify(count);
95860
+ s.Flatten(count);
95855
95861
  switch (result.GetType().id()) {
95856
95862
  case LogicalTypeId::LIST:
95857
95863
  // Share the value dictionary as we are just going to slice it
@@ -95965,15 +95971,15 @@ static void TemplatedContainsOrPosition(DataChunk &args, ExpressionState &state,
95965
95971
  auto list_size = ListVector::GetListSize(list);
95966
95972
  auto &child_vector = ListVector::GetEntry(list);
95967
95973
 
95968
- VectorData child_data;
95969
- child_vector.Orrify(list_size, child_data);
95974
+ UnifiedVectorFormat child_data;
95975
+ child_vector.ToUnifiedFormat(list_size, child_data);
95970
95976
 
95971
- VectorData list_data;
95972
- list.Orrify(count, list_data);
95977
+ UnifiedVectorFormat list_data;
95978
+ list.ToUnifiedFormat(count, list_data);
95973
95979
  auto list_entries = (list_entry_t *)list_data.data;
95974
95980
 
95975
- VectorData value_data;
95976
- value_vector.Orrify(count, value_data);
95981
+ UnifiedVectorFormat value_data;
95982
+ value_vector.ToUnifiedFormat(count, value_data);
95977
95983
 
95978
95984
  // not required for a comparison of nested types
95979
95985
  auto child_value = FlatVector::GetData<CHILD_TYPE>(child_vector);
@@ -96192,8 +96198,8 @@ void ListFlattenFunction(DataChunk &args, ExpressionState &state, Vector &result
96192
96198
 
96193
96199
  idx_t count = args.size();
96194
96200
 
96195
- VectorData list_data;
96196
- input.Orrify(count, list_data);
96201
+ UnifiedVectorFormat list_data;
96202
+ input.ToUnifiedFormat(count, list_data);
96197
96203
  auto list_entries = (list_entry_t *)list_data.data;
96198
96204
 
96199
96205
  auto &child_vector = ListVector::GetEntry(input);
@@ -96217,8 +96223,8 @@ void ListFlattenFunction(DataChunk &args, ExpressionState &state, Vector &result
96217
96223
  }
96218
96224
 
96219
96225
  auto child_size = ListVector::GetListSize(input);
96220
- VectorData child_data;
96221
- child_vector.Orrify(child_size, child_data);
96226
+ UnifiedVectorFormat child_data;
96227
+ child_vector.ToUnifiedFormat(child_size, child_data);
96222
96228
  auto child_entries = (list_entry_t *)child_data.data;
96223
96229
  auto &data_vector = ListVector::GetEntry(child_vector);
96224
96230
 
@@ -96395,8 +96401,8 @@ struct DistinctFunctor {
96395
96401
  template <class OP, class T, class MAP_TYPE = unordered_map<T, idx_t>>
96396
96402
  static void ListExecuteFunction(Vector &result, Vector &state_vector, idx_t count) {
96397
96403
 
96398
- VectorData sdata;
96399
- state_vector.Orrify(count, sdata);
96404
+ UnifiedVectorFormat sdata;
96405
+ state_vector.ToUnifiedFormat(count, sdata);
96400
96406
  auto states = (HistogramAggState<T, MAP_TYPE> **)sdata.data;
96401
96407
 
96402
96408
  auto result_data = FlatVector::GetData<list_entry_t>(result);
@@ -96428,8 +96434,8 @@ struct UniqueFunctor {
96428
96434
  template <class OP, class T, class MAP_TYPE = unordered_map<T, idx_t>>
96429
96435
  static void ListExecuteFunction(Vector &result, Vector &state_vector, idx_t count) {
96430
96436
 
96431
- VectorData sdata;
96432
- state_vector.Orrify(count, sdata);
96437
+ UnifiedVectorFormat sdata;
96438
+ state_vector.ToUnifiedFormat(count, sdata);
96433
96439
  auto states = (HistogramAggState<T, MAP_TYPE> **)sdata.data;
96434
96440
 
96435
96441
  auto result_data = FlatVector::GetData<uint64_t>(result);
@@ -96475,11 +96481,11 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
96475
96481
  auto lists_size = ListVector::GetListSize(lists);
96476
96482
  auto &child_vector = ListVector::GetEntry(lists);
96477
96483
 
96478
- VectorData child_data;
96479
- child_vector.Orrify(lists_size, child_data);
96484
+ UnifiedVectorFormat child_data;
96485
+ child_vector.ToUnifiedFormat(lists_size, child_data);
96480
96486
 
96481
- VectorData lists_data;
96482
- lists.Orrify(count, lists_data);
96487
+ UnifiedVectorFormat lists_data;
96488
+ lists.ToUnifiedFormat(count, lists_data);
96483
96489
  auto list_entries = (list_entry_t *)lists_data.data;
96484
96490
 
96485
96491
  // state_buffer holds the state for each list of this chunk
@@ -96818,10 +96824,10 @@ static void ListConcatFunction(DataChunk &args, ExpressionState &state, Vector &
96818
96824
  return;
96819
96825
  }
96820
96826
 
96821
- VectorData lhs_data;
96822
- VectorData rhs_data;
96823
- lhs.Orrify(count, lhs_data);
96824
- rhs.Orrify(count, rhs_data);
96827
+ UnifiedVectorFormat lhs_data;
96828
+ UnifiedVectorFormat rhs_data;
96829
+ lhs.ToUnifiedFormat(count, lhs_data);
96830
+ rhs.ToUnifiedFormat(count, rhs_data);
96825
96831
  auto lhs_entries = (list_entry_t *)lhs_data.data;
96826
96832
  auto rhs_entries = (list_entry_t *)rhs_data.data;
96827
96833
 
@@ -96829,10 +96835,10 @@ static void ListConcatFunction(DataChunk &args, ExpressionState &state, Vector &
96829
96835
  auto rhs_list_size = ListVector::GetListSize(rhs);
96830
96836
  auto &lhs_child = ListVector::GetEntry(lhs);
96831
96837
  auto &rhs_child = ListVector::GetEntry(rhs);
96832
- VectorData lhs_child_data;
96833
- VectorData rhs_child_data;
96834
- lhs_child.Orrify(lhs_list_size, lhs_child_data);
96835
- rhs_child.Orrify(rhs_list_size, rhs_child_data);
96838
+ UnifiedVectorFormat lhs_child_data;
96839
+ UnifiedVectorFormat rhs_child_data;
96840
+ lhs_child.ToUnifiedFormat(lhs_list_size, lhs_child_data);
96841
+ rhs_child.ToUnifiedFormat(rhs_list_size, rhs_child_data);
96836
96842
 
96837
96843
  result.SetVectorType(VectorType::FLAT_VECTOR);
96838
96844
  auto result_entries = FlatVector::GetData<list_entry_t>(result);
@@ -96945,10 +96951,10 @@ void ListConcatFun::RegisterFunction(BuiltinFunctions &set) {
96945
96951
  namespace duckdb {
96946
96952
 
96947
96953
  template <class T, bool HEAP_REF = false, bool VALIDITY_ONLY = false>
96948
- void ListExtractTemplate(idx_t count, VectorData &list_data, VectorData &offsets_data, Vector &child_vector,
96949
- idx_t list_size, Vector &result) {
96950
- VectorData child_data;
96951
- child_vector.Orrify(list_size, child_data);
96954
+ void ListExtractTemplate(idx_t count, UnifiedVectorFormat &list_data, UnifiedVectorFormat &offsets_data,
96955
+ Vector &child_vector, idx_t list_size, Vector &result) {
96956
+ UnifiedVectorFormat child_data;
96957
+ child_vector.ToUnifiedFormat(list_size, child_data);
96952
96958
 
96953
96959
  T *result_data;
96954
96960
 
@@ -97008,8 +97014,8 @@ void ListExtractTemplate(idx_t count, VectorData &list_data, VectorData &offsets
97008
97014
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
97009
97015
  }
97010
97016
  }
97011
- static void ExecuteListExtractInternal(const idx_t count, VectorData &list, VectorData &offsets, Vector &child_vector,
97012
- idx_t list_size, Vector &result) {
97017
+ static void ExecuteListExtractInternal(const idx_t count, UnifiedVectorFormat &list, UnifiedVectorFormat &offsets,
97018
+ Vector &child_vector, idx_t list_size, Vector &result) {
97013
97019
  D_ASSERT(child_vector.GetType() == result.GetType());
97014
97020
  switch (result.GetType().InternalType()) {
97015
97021
  case PhysicalType::BOOL:
@@ -97080,11 +97086,11 @@ static void ExecuteListExtractInternal(const idx_t count, VectorData &list, Vect
97080
97086
 
97081
97087
  static void ExecuteListExtract(Vector &result, Vector &list, Vector &offsets, const idx_t count) {
97082
97088
  D_ASSERT(list.GetType().id() == LogicalTypeId::LIST);
97083
- VectorData list_data;
97084
- VectorData offsets_data;
97089
+ UnifiedVectorFormat list_data;
97090
+ UnifiedVectorFormat offsets_data;
97085
97091
 
97086
- list.Orrify(count, list_data);
97087
- offsets.Orrify(count, offsets_data);
97092
+ list.ToUnifiedFormat(count, list_data);
97093
+ offsets.ToUnifiedFormat(count, offsets_data);
97088
97094
  ExecuteListExtractInternal(count, list_data, offsets_data, ListVector::GetEntry(list),
97089
97095
  ListVector::GetListSize(list), result);
97090
97096
  result.Verify(count);
@@ -97217,8 +97223,8 @@ ListLambdaBindData::~ListLambdaBindData() {
97217
97223
  static void AppendTransformedToResult(Vector &lambda_vector, idx_t &elem_cnt, Vector &result) {
97218
97224
 
97219
97225
  // append the lambda_vector to the result list
97220
- VectorData lambda_child_data;
97221
- lambda_vector.Orrify(elem_cnt, lambda_child_data);
97226
+ UnifiedVectorFormat lambda_child_data;
97227
+ lambda_vector.ToUnifiedFormat(elem_cnt, lambda_child_data);
97222
97228
  ListVector::Append(result, lambda_vector, *lambda_child_data.sel, elem_cnt, 0);
97223
97229
  }
97224
97230
 
@@ -97267,9 +97273,9 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
97267
97273
 
97268
97274
  // slice to get the new lists and append them to the result
97269
97275
  Vector new_lists(input_chunk.data[0], true_sel, true_count);
97270
- new_lists.Normalify(true_count);
97271
- VectorData new_lists_child_data;
97272
- new_lists.Orrify(true_count, new_lists_child_data);
97276
+ new_lists.Flatten(true_count);
97277
+ UnifiedVectorFormat new_lists_child_data;
97278
+ new_lists.ToUnifiedFormat(true_count, new_lists_child_data);
97273
97279
  ListVector::Append(result, new_lists, *new_lists_child_data.sel, true_count, 0);
97274
97280
  }
97275
97281
 
@@ -97283,14 +97289,14 @@ static void ExecuteExpression(vector<LogicalType> &types, vector<LogicalType> &r
97283
97289
 
97284
97290
  // set the list child vector
97285
97291
  Vector slice(child_vector, sel, elem_cnt);
97286
- slice.Normalify(elem_cnt);
97292
+ slice.Flatten(elem_cnt);
97287
97293
  input_chunk.data[0].Reference(slice);
97288
97294
 
97289
97295
  // set the other vectors
97290
97296
  vector<Vector> slices;
97291
97297
  for (idx_t col_idx = 0; col_idx < args.ColumnCount() - 1; col_idx++) {
97292
97298
  slices.emplace_back(Vector(args.data[col_idx + 1], sel_vectors[col_idx], elem_cnt));
97293
- slices[col_idx].Normalify(elem_cnt);
97299
+ slices[col_idx].Flatten(elem_cnt);
97294
97300
  input_chunk.data[col_idx + 1].Reference(slices[col_idx]);
97295
97301
  }
97296
97302
 
@@ -97317,8 +97323,8 @@ static void ListLambdaFunction(DataChunk &args, ExpressionState &state, Vector &
97317
97323
  }
97318
97324
 
97319
97325
  // get the lists data
97320
- VectorData lists_data;
97321
- lists.Orrify(count, lists_data);
97326
+ UnifiedVectorFormat lists_data;
97327
+ lists.ToUnifiedFormat(count, lists_data);
97322
97328
  auto list_entries = (list_entry_t *)lists_data.data;
97323
97329
 
97324
97330
  // get the lambda expression
@@ -97329,8 +97335,8 @@ static void ListLambdaFunction(DataChunk &args, ExpressionState &state, Vector &
97329
97335
  // get the child vector and child data
97330
97336
  auto lists_size = ListVector::GetListSize(lists);
97331
97337
  auto &child_vector = ListVector::GetEntry(lists);
97332
- VectorData child_data;
97333
- child_vector.Orrify(lists_size, child_data);
97338
+ UnifiedVectorFormat child_data;
97339
+ child_vector.ToUnifiedFormat(lists_size, child_data);
97334
97340
 
97335
97341
  // to slice the child vector
97336
97342
  SelectionVector sel(STANDARD_VECTOR_SIZE);
@@ -97340,7 +97346,7 @@ static void ListLambdaFunction(DataChunk &args, ExpressionState &state, Vector &
97340
97346
  result_types.push_back(lambda_expr->return_type);
97341
97347
 
97342
97348
  // non-lambda parameter columns
97343
- vector<VectorData> columns;
97349
+ vector<UnifiedVectorFormat> columns;
97344
97350
  vector<idx_t> indexes;
97345
97351
  vector<SelectionVector> sel_vectors;
97346
97352
 
@@ -97349,8 +97355,8 @@ static void ListLambdaFunction(DataChunk &args, ExpressionState &state, Vector &
97349
97355
 
97350
97356
  // skip the list column
97351
97357
  for (idx_t i = 1; i < args.ColumnCount(); i++) {
97352
- columns.emplace_back(VectorData());
97353
- args.data[i].Orrify(count, columns[i - 1]);
97358
+ columns.emplace_back(UnifiedVectorFormat());
97359
+ args.data[i].ToUnifiedFormat(count, columns[i - 1]);
97354
97360
  indexes.push_back(0);
97355
97361
  sel_vectors.emplace_back(SelectionVector(STANDARD_VECTOR_SIZE));
97356
97362
  types.push_back(args.data[i].GetType());
@@ -97674,12 +97680,12 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
97674
97680
  // get the child vector
97675
97681
  auto lists_size = ListVector::GetListSize(lists);
97676
97682
  auto &child_vector = ListVector::GetEntry(lists);
97677
- VectorData child_data;
97678
- child_vector.Orrify(lists_size, child_data);
97683
+ UnifiedVectorFormat child_data;
97684
+ child_vector.ToUnifiedFormat(lists_size, child_data);
97679
97685
 
97680
97686
  // get the lists data
97681
- VectorData lists_data;
97682
- lists.Orrify(count, lists_data);
97687
+ UnifiedVectorFormat lists_data;
97688
+ lists.ToUnifiedFormat(count, lists_data);
97683
97689
  auto list_entries = (list_entry_t *)lists_data.data;
97684
97690
 
97685
97691
  // create the lists_indices vector, this contains an element for each list's entry,
@@ -97775,7 +97781,7 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
97775
97781
 
97776
97782
  D_ASSERT(sel_sorted_idx == incr_payload_count);
97777
97783
  child_vector.Slice(sel_sorted, sel_sorted_idx);
97778
- child_vector.Normalify(sel_sorted_idx);
97784
+ child_vector.Flatten(sel_sorted_idx);
97779
97785
  }
97780
97786
 
97781
97787
  result.Reference(lists);
@@ -98108,16 +98114,16 @@ public:
98108
98114
  explicit RangeInfoStruct(DataChunk &args_p) : args(args_p) {
98109
98115
  switch (args.ColumnCount()) {
98110
98116
  case 1:
98111
- args.data[0].Orrify(args.size(), vdata[0]);
98117
+ args.data[0].ToUnifiedFormat(args.size(), vdata[0]);
98112
98118
  break;
98113
98119
  case 2:
98114
- args.data[0].Orrify(args.size(), vdata[0]);
98115
- args.data[1].Orrify(args.size(), vdata[1]);
98120
+ args.data[0].ToUnifiedFormat(args.size(), vdata[0]);
98121
+ args.data[1].ToUnifiedFormat(args.size(), vdata[1]);
98116
98122
  break;
98117
98123
  case 3:
98118
- args.data[0].Orrify(args.size(), vdata[0]);
98119
- args.data[1].Orrify(args.size(), vdata[1]);
98120
- args.data[2].Orrify(args.size(), vdata[2]);
98124
+ args.data[0].ToUnifiedFormat(args.size(), vdata[0]);
98125
+ args.data[1].ToUnifiedFormat(args.size(), vdata[1]);
98126
+ args.data[2].ToUnifiedFormat(args.size(), vdata[2]);
98121
98127
  break;
98122
98128
  default:
98123
98129
  throw InternalException("Unsupported number of parameters for range");
@@ -98178,7 +98184,7 @@ public:
98178
98184
 
98179
98185
  private:
98180
98186
  DataChunk &args;
98181
- VectorData vdata[3];
98187
+ UnifiedVectorFormat vdata[3];
98182
98188
  };
98183
98189
 
98184
98190
  template <class OP, bool INCLUSIVE_BOUND>
@@ -98275,12 +98281,12 @@ namespace duckdb {
98275
98281
 
98276
98282
  static void CardinalityFunction(DataChunk &args, ExpressionState &state, Vector &result) {
98277
98283
  auto &map = args.data[0];
98278
- VectorData list_data;
98284
+ UnifiedVectorFormat list_data;
98279
98285
  result.SetVectorType(VectorType::FLAT_VECTOR);
98280
98286
  auto result_data = FlatVector::GetData<uint64_t>(result);
98281
98287
 
98282
98288
  auto &children = StructVector::GetEntries(map);
98283
- children[0]->Orrify(args.size(), list_data);
98289
+ children[0]->ToUnifiedFormat(args.size(), list_data);
98284
98290
  for (idx_t row = 0; row < args.size(); row++) {
98285
98291
  auto list_entry = ((list_entry_t *)list_data.data)[list_data.sel->get_index(row)];
98286
98292
  result_data[row] = list_entry.length;
@@ -98358,19 +98364,19 @@ namespace duckdb {
98358
98364
  // TODO: this doesn't recursively verify maps if maps are nested
98359
98365
  MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVector &sel) {
98360
98366
  D_ASSERT(map.GetType().id() == LogicalTypeId::MAP);
98361
- VectorData map_vdata;
98362
- map.Orrify(count, map_vdata);
98367
+ UnifiedVectorFormat map_vdata;
98368
+ map.ToUnifiedFormat(count, map_vdata);
98363
98369
  auto &map_validity = map_vdata.validity;
98364
98370
 
98365
98371
  auto &key_vector = *(StructVector::GetEntries(map)[0]);
98366
- VectorData key_vdata;
98367
- key_vector.Orrify(count, key_vdata);
98372
+ UnifiedVectorFormat key_vdata;
98373
+ key_vector.ToUnifiedFormat(count, key_vdata);
98368
98374
  auto key_data = (list_entry_t *)key_vdata.data;
98369
98375
  auto &key_validity = key_vdata.validity;
98370
98376
 
98371
98377
  auto &key_entries = ListVector::GetEntry(key_vector);
98372
- VectorData key_entry_vdata;
98373
- key_entries.Orrify(count, key_entry_vdata);
98378
+ UnifiedVectorFormat key_entry_vdata;
98379
+ key_entries.ToUnifiedFormat(count, key_entry_vdata);
98374
98380
  auto &entry_validity = key_entry_vdata.validity;
98375
98381
 
98376
98382
  for (idx_t row = 0; row < count; row++) {
@@ -98534,10 +98540,10 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
98534
98540
  auto &key = args.data[1];
98535
98541
 
98536
98542
  auto key_value = key.GetValue(0);
98537
- VectorData offset_data;
98543
+ UnifiedVectorFormat offset_data;
98538
98544
 
98539
98545
  auto &children = StructVector::GetEntries(map);
98540
- children[0]->Orrify(args.size(), offset_data);
98546
+ children[0]->ToUnifiedFormat(args.size(), offset_data);
98541
98547
  auto &key_type = ListType::GetChildType(children[0]->GetType());
98542
98548
  if (key_type != LogicalTypeId::SQLNULL) {
98543
98549
  key_value = key_value.CastAs(key_type);
@@ -100003,7 +100009,7 @@ static void SetSeedFunction(DataChunk &args, ExpressionState &state, Vector &res
100003
100009
  auto &func_expr = (BoundFunctionExpression &)state.expr;
100004
100010
  auto &info = (SetseedBindData &)*func_expr.bind_info;
100005
100011
  auto &input = args.data[0];
100006
- input.Normalify(args.size());
100012
+ input.Flatten(args.size());
100007
100013
 
100008
100014
  auto input_seeds = FlatVector::GetData<double>(input);
100009
100015
  uint32_t half_max = NumericLimits<uint32_t>::Maximum() / 2;
@@ -102451,8 +102457,8 @@ static void ConcatFunction(DataChunk &args, ExpressionState &state, Vector &resu
102451
102457
  // non-constant vector: set the result type to a flat vector
102452
102458
  result.SetVectorType(VectorType::FLAT_VECTOR);
102453
102459
  // now get the lengths of each of the input elements
102454
- VectorData vdata;
102455
- input.Orrify(args.size(), vdata);
102460
+ UnifiedVectorFormat vdata;
102461
+ input.ToUnifiedFormat(args.size(), vdata);
102456
102462
 
102457
102463
  auto input_data = (string_t *)vdata.data;
102458
102464
  // now add the length of each vector to the result length
@@ -102497,8 +102503,8 @@ static void ConcatFunction(DataChunk &args, ExpressionState &state, Vector &resu
102497
102503
  }
102498
102504
  } else {
102499
102505
  // standard vector
102500
- VectorData idata;
102501
- input.Orrify(args.size(), idata);
102506
+ UnifiedVectorFormat idata;
102507
+ input.ToUnifiedFormat(args.size(), idata);
102502
102508
 
102503
102509
  auto input_data = (string_t *)idata.data;
102504
102510
  for (idx_t i = 0; i < args.size(); i++) {
@@ -102541,9 +102547,9 @@ static void TemplatedConcatWS(DataChunk &args, string_t *sep_data, const Selecti
102541
102547
  const SelectionVector &rsel, idx_t count, Vector &result) {
102542
102548
  vector<idx_t> result_lengths(args.size(), 0);
102543
102549
  vector<bool> has_results(args.size(), false);
102544
- auto orrified_data = unique_ptr<VectorData[]>(new VectorData[args.ColumnCount() - 1]);
102550
+ auto orrified_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[args.ColumnCount() - 1]);
102545
102551
  for (idx_t col_idx = 1; col_idx < args.ColumnCount(); col_idx++) {
102546
- args.data[col_idx].Orrify(args.size(), orrified_data[col_idx - 1]);
102552
+ args.data[col_idx].ToUnifiedFormat(args.size(), orrified_data[col_idx - 1]);
102547
102553
  }
102548
102554
 
102549
102555
  // first figure out the lengths
@@ -102609,8 +102615,8 @@ static void TemplatedConcatWS(DataChunk &args, string_t *sep_data, const Selecti
102609
102615
 
102610
102616
  static void ConcatWSFunction(DataChunk &args, ExpressionState &state, Vector &result) {
102611
102617
  auto &separator = args.data[0];
102612
- VectorData vdata;
102613
- separator.Orrify(args.size(), vdata);
102618
+ UnifiedVectorFormat vdata;
102619
+ separator.ToUnifiedFormat(args.size(), vdata);
102614
102620
 
102615
102621
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
102616
102622
  for (idx_t col_idx = 0; col_idx < args.ColumnCount(); col_idx++) {
@@ -104276,7 +104282,7 @@ static void PrintfFunction(DataChunk &args, ExpressionState &state, Vector &resu
104276
104282
  break;
104277
104283
  default:
104278
104284
  // FLAT VECTOR, we can directly OR the nullmask
104279
- args.data[i].Normalify(args.size());
104285
+ args.data[i].Flatten(args.size());
104280
104286
  result.SetVectorType(VectorType::FLAT_VECTOR);
104281
104287
  result_validity.Combine(FlatVector::Validity(args.data[i]), args.size());
104282
104288
  break;
@@ -105168,12 +105174,12 @@ unique_ptr<Vector> BaseStringSplitFunction(string_t input, string_t delim, const
105168
105174
  }
105169
105175
 
105170
105176
  static void StringSplitExecutor(DataChunk &args, ExpressionState &state, Vector &result, const bool regex) {
105171
- VectorData input_data;
105172
- args.data[0].Orrify(args.size(), input_data);
105177
+ UnifiedVectorFormat input_data;
105178
+ args.data[0].ToUnifiedFormat(args.size(), input_data);
105173
105179
  auto inputs = (string_t *)input_data.data;
105174
105180
 
105175
- VectorData delim_data;
105176
- args.data[1].Orrify(args.size(), delim_data);
105181
+ UnifiedVectorFormat delim_data;
105182
+ args.data[1].ToUnifiedFormat(args.size(), delim_data);
105177
105183
  auto delims = (string_t *)delim_data.data;
105178
105184
 
105179
105185
  D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
@@ -106158,8 +106164,8 @@ static void AggregateStateFinalize(DataChunk &input, ExpressionState &state_p, V
106158
106164
 
106159
106165
  auto state_vec_ptr = FlatVector::GetData<data_ptr_t>(local_state.addresses);
106160
106166
 
106161
- VectorData state_data;
106162
- input.data[0].Orrify(input.size(), state_data);
106167
+ UnifiedVectorFormat state_data;
106168
+ input.data[0].ToUnifiedFormat(input.size(), state_data);
106163
106169
  for (idx_t i = 0; i < input.size(); i++) {
106164
106170
  auto state_idx = state_data.sel->get_index(i);
106165
106171
  auto state_entry = &((string_t *)state_data.data)[state_idx];
@@ -106202,9 +106208,9 @@ static void AggregateStateCombine(DataChunk &input, ExpressionState &state_p, Ve
106202
106208
  input.data[0].GetType().ToString(), input.data[1].GetType().ToString());
106203
106209
  }
106204
106210
 
106205
- VectorData state0_data, state1_data;
106206
- input.data[0].Orrify(input.size(), state0_data);
106207
- input.data[1].Orrify(input.size(), state1_data);
106211
+ UnifiedVectorFormat state0_data, state1_data;
106212
+ input.data[0].ToUnifiedFormat(input.size(), state0_data);
106213
+ input.data[1].ToUnifiedFormat(input.size(), state1_data);
106208
106214
 
106209
106215
  auto result_ptr = FlatVector::GetData<string_t>(result);
106210
106216
 
@@ -108410,7 +108416,7 @@ static void WriteCSVSink(ExecutionContext &context, FunctionData &bind_data, Glo
108410
108416
  }
108411
108417
  }
108412
108418
 
108413
- cast_chunk.Normalify();
108419
+ cast_chunk.Flatten();
108414
108420
  auto &writer = local_data.serializer;
108415
108421
  // now loop over the vectors and output the values
108416
108422
  for (idx_t row_idx = 0; row_idx < cast_chunk.size(); row_idx++) {
@@ -112898,7 +112904,7 @@ unique_ptr<GlobalTableFunctionState> TestVectorTypesInit(ClientContext &context,
112898
112904
  }
112899
112905
  if (bind_data.all_flat) {
112900
112906
  for (auto &entry : result->entries) {
112901
- entry->Normalify();
112907
+ entry->Flatten();
112902
112908
  entry->Verify();
112903
112909
  }
112904
112910
  }
@@ -129828,7 +129834,7 @@ unique_ptr<DataChunk> QueryResult::Fetch() {
129828
129834
  if (!chunk) {
129829
129835
  return nullptr;
129830
129836
  }
129831
- chunk->Normalify();
129837
+ chunk->Flatten();
129832
129838
  return chunk;
129833
129839
  }
129834
129840
 
@@ -181584,7 +181590,7 @@ public:
181584
181590
  virtual void InitializeAppend(ColumnAppendState &state);
181585
181591
  //! Append a vector of type [type] to the end of the column
181586
181592
  virtual void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count);
181587
- virtual void AppendData(BaseStatistics &stats, ColumnAppendState &state, VectorData &vdata, idx_t count);
181593
+ virtual void AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, idx_t count);
181588
181594
  //! Revert a set of appends to the ColumnData
181589
181595
  virtual void RevertAppend(row_t start_row);
181590
181596
 
@@ -181753,8 +181759,8 @@ unique_ptr<AnalyzeState> BitpackingInitAnalyze(ColumnData &col_data, PhysicalTyp
181753
181759
  template <class T>
181754
181760
  bool BitpackingAnalyze(AnalyzeState &state, Vector &input, idx_t count) {
181755
181761
  auto &analyze_state = (BitpackingAnalyzeState<T> &)state;
181756
- VectorData vdata;
181757
- input.Orrify(count, vdata);
181762
+ UnifiedVectorFormat vdata;
181763
+ input.ToUnifiedFormat(count, vdata);
181758
181764
 
181759
181765
  auto data = (T *)vdata.data;
181760
181766
  for (idx_t i = 0; i < count; i++) {
@@ -181842,7 +181848,7 @@ public:
181842
181848
  width_ptr = handle.Ptr() + current_segment->GetBlockOffset() + Storage::BLOCK_SIZE - sizeof(bitpacking_width_t);
181843
181849
  }
181844
181850
 
181845
- void Append(VectorData &vdata, idx_t count) {
181851
+ void Append(UnifiedVectorFormat &vdata, idx_t count) {
181846
181852
  // TODO Optimization: avoid use of compression buffer if we can compress straight to result vector
181847
181853
  auto data = (T *)vdata.data;
181848
181854
 
@@ -181896,8 +181902,8 @@ unique_ptr<CompressionState> BitpackingInitCompression(ColumnDataCheckpointer &c
181896
181902
  template <class T>
181897
181903
  void BitpackingCompress(CompressionState &state_p, Vector &scan_vector, idx_t count) {
181898
181904
  auto &state = (BitpackingCompressState<T> &)state_p;
181899
- VectorData vdata;
181900
- scan_vector.Orrify(count, vdata);
181905
+ UnifiedVectorFormat vdata;
181906
+ scan_vector.ToUnifiedFormat(count, vdata);
181901
181907
  state.Append(vdata, count);
181902
181908
  }
181903
181909
 
@@ -182252,14 +182258,15 @@ public:
182252
182258
  idx_t result_idx);
182253
182259
  static unique_ptr<CompressedSegmentState> StringInitSegment(ColumnSegment &segment, block_id_t block_id);
182254
182260
 
182255
- static idx_t StringAppend(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, idx_t offset,
182261
+ static idx_t StringAppend(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset,
182256
182262
  idx_t count) {
182257
182263
  return StringAppendBase(segment, stats, data, offset, count);
182258
182264
  }
182259
182265
 
182260
182266
  template <bool DUPLICATE_ELIMINATE = false>
182261
- static idx_t StringAppendBase(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, idx_t offset,
182262
- idx_t count, std::unordered_map<string, int32_t> *seen_strings = nullptr) {
182267
+ static idx_t StringAppendBase(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data,
182268
+ idx_t offset, idx_t count,
182269
+ std::unordered_map<string, int32_t> *seen_strings = nullptr) {
182263
182270
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
182264
182271
  auto handle = buffer_manager.Pin(segment.block);
182265
182272
 
@@ -182419,8 +182426,8 @@ using string_set_t = unordered_set<string_t, StringHash, StringEquality>;
182419
182426
  class DictionaryCompressionState : public CompressionState {
182420
182427
  public:
182421
182428
  bool UpdateState(Vector &scan_vector, idx_t count) {
182422
- VectorData vdata;
182423
- scan_vector.Orrify(count, vdata);
182429
+ UnifiedVectorFormat vdata;
182430
+ scan_vector.ToUnifiedFormat(count, vdata);
182424
182431
  auto data = (string_t *)vdata.data;
182425
182432
  Verify();
182426
182433
 
@@ -183125,8 +183132,8 @@ unique_ptr<CompressionState> UncompressedFunctions::InitCompression(ColumnDataCh
183125
183132
 
183126
183133
  void UncompressedFunctions::Compress(CompressionState &state_p, Vector &data, idx_t count) {
183127
183134
  auto &state = (UncompressedCompressState &)state_p;
183128
- VectorData vdata;
183129
- data.Orrify(count, vdata);
183135
+ UnifiedVectorFormat vdata;
183136
+ data.ToUnifiedFormat(count, vdata);
183130
183137
 
183131
183138
  ColumnAppendState append_state;
183132
183139
  idx_t offset = 0;
@@ -183220,7 +183227,7 @@ void FixedSizeFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t ro
183220
183227
  // Append
183221
183228
  //===--------------------------------------------------------------------===//
183222
183229
  template <class T>
183223
- static void AppendLoop(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, VectorData &adata,
183230
+ static void AppendLoop(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, UnifiedVectorFormat &adata,
183224
183231
  idx_t offset, idx_t count) {
183225
183232
  auto sdata = (T *)adata.data;
183226
183233
  auto tdata = (T *)target;
@@ -183249,8 +183256,8 @@ static void AppendLoop(SegmentStatistics &stats, data_ptr_t target, idx_t target
183249
183256
  }
183250
183257
 
183251
183258
  template <>
183252
- void AppendLoop<list_entry_t>(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset, VectorData &adata,
183253
- idx_t offset, idx_t count) {
183259
+ void AppendLoop<list_entry_t>(SegmentStatistics &stats, data_ptr_t target, idx_t target_offset,
183260
+ UnifiedVectorFormat &adata, idx_t offset, idx_t count) {
183254
183261
  auto sdata = (list_entry_t *)adata.data;
183255
183262
  auto tdata = (list_entry_t *)target;
183256
183263
  for (idx_t i = 0; i < count; i++) {
@@ -183261,7 +183268,8 @@ void AppendLoop<list_entry_t>(SegmentStatistics &stats, data_ptr_t target, idx_t
183261
183268
  }
183262
183269
 
183263
183270
  template <class T>
183264
- idx_t FixedSizeAppend(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t count) {
183271
+ idx_t FixedSizeAppend(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset,
183272
+ idx_t count) {
183265
183273
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
183266
183274
  auto handle = buffer_manager.Pin(segment.block);
183267
183275
  D_ASSERT(segment.GetBlockOffset() == 0);
@@ -183581,8 +183589,8 @@ unique_ptr<AnalyzeState> RLEInitAnalyze(ColumnData &col_data, PhysicalType type)
183581
183589
  template <class T>
183582
183590
  bool RLEAnalyze(AnalyzeState &state, Vector &input, idx_t count) {
183583
183591
  auto &rle_state = (RLEAnalyzeState<T> &)state;
183584
- VectorData vdata;
183585
- input.Orrify(count, vdata);
183592
+ UnifiedVectorFormat vdata;
183593
+ input.ToUnifiedFormat(count, vdata);
183586
183594
 
183587
183595
  auto data = (T *)vdata.data;
183588
183596
  for (idx_t i = 0; i < count; i++) {
@@ -183643,7 +183651,7 @@ struct RLECompressState : public CompressionState {
183643
183651
  handle = buffer_manager.Pin(current_segment->block);
183644
183652
  }
183645
183653
 
183646
- void Append(VectorData &vdata, idx_t count) {
183654
+ void Append(UnifiedVectorFormat &vdata, idx_t count) {
183647
183655
  auto data = (T *)vdata.data;
183648
183656
  for (idx_t i = 0; i < count; i++) {
183649
183657
  auto idx = vdata.sel->get_index(i);
@@ -183717,8 +183725,8 @@ unique_ptr<CompressionState> RLEInitCompression(ColumnDataCheckpointer &checkpoi
183717
183725
  template <class T>
183718
183726
  void RLECompress(CompressionState &state_p, Vector &scan_vector, idx_t count) {
183719
183727
  auto &state = (RLECompressState<T> &)state_p;
183720
- VectorData vdata;
183721
- scan_vector.Orrify(count, vdata);
183728
+ UnifiedVectorFormat vdata;
183729
+ scan_vector.ToUnifiedFormat(count, vdata);
183722
183730
 
183723
183731
  state.Append(vdata, count);
183724
183732
  }
@@ -183921,8 +183929,8 @@ unique_ptr<AnalyzeState> UncompressedStringStorage::StringInitAnalyze(ColumnData
183921
183929
 
183922
183930
  bool UncompressedStringStorage::StringAnalyze(AnalyzeState &state_p, Vector &input, idx_t count) {
183923
183931
  auto &state = (StringAnalyzeState &)state_p;
183924
- VectorData vdata;
183925
- input.Orrify(count, vdata);
183932
+ UnifiedVectorFormat vdata;
183933
+ input.ToUnifiedFormat(count, vdata);
183926
183934
 
183927
183935
  state.count += count;
183928
183936
  auto data = (string_t *)vdata.data;
@@ -184670,7 +184678,7 @@ void ValidityScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t s
184670
184678
  }
184671
184679
 
184672
184680
  void ValidityScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) {
184673
- result.Normalify(scan_count);
184681
+ result.Flatten(scan_count);
184674
184682
 
184675
184683
  auto start = segment.GetRelativeIndex(state.row_index);
184676
184684
  if (start % ValidityMask::BITS_PER_VALUE == 0) {
@@ -184729,7 +184737,8 @@ unique_ptr<CompressedSegmentState> ValidityInitSegment(ColumnSegment &segment, b
184729
184737
  return nullptr;
184730
184738
  }
184731
184739
 
184732
- idx_t ValidityAppend(ColumnSegment &segment, SegmentStatistics &stats, VectorData &data, idx_t offset, idx_t vcount) {
184740
+ idx_t ValidityAppend(ColumnSegment &segment, SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset,
184741
+ idx_t vcount) {
184733
184742
  D_ASSERT(segment.GetBlockOffset() == 0);
184734
184743
  auto &validity_stats = (ValidityStatistics &)*stats.statistics;
184735
184744
 
@@ -184874,7 +184883,7 @@ public:
184874
184883
  idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count) override;
184875
184884
 
184876
184885
  void InitializeAppend(ColumnAppendState &state) override;
184877
- void AppendData(BaseStatistics &stats, ColumnAppendState &state, VectorData &vdata, idx_t count) override;
184886
+ void AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, idx_t count) override;
184878
184887
  void RevertAppend(row_t start_row) override;
184879
184888
  idx_t Fetch(ColumnScanState &state, row_t row_id, Vector &result) override;
184880
184889
  void FetchRow(Transaction &transaction, ColumnFetchState &state, row_t row_id, Vector &result,
@@ -185351,8 +185360,8 @@ static void VerifyCheckConstraint(TableCatalogEntry &table, Expression &expr, Da
185351
185360
  } catch (...) { // LCOV_EXCL_START
185352
185361
  throw ConstraintException("CHECK constraint failed: %s (Unknown Error)", table.name);
185353
185362
  } // LCOV_EXCL_STOP
185354
- VectorData vdata;
185355
- result.Orrify(chunk.size(), vdata);
185363
+ UnifiedVectorFormat vdata;
185364
+ result.ToUnifiedFormat(chunk.size(), vdata);
185356
185365
 
185357
185366
  auto dataptr = (int32_t *)vdata.data;
185358
185367
  for (idx_t i = 0; i < chunk.size(); i++) {
@@ -185894,7 +185903,7 @@ idx_t DataTable::Delete(TableCatalogEntry &table, ClientContext &context, Vector
185894
185903
 
185895
185904
  auto &transaction = Transaction::GetTransaction(context);
185896
185905
 
185897
- row_identifiers.Normalify(count);
185906
+ row_identifiers.Flatten(count);
185898
185907
  auto ids = FlatVector::GetData<row_t>(row_identifiers);
185899
185908
  auto first_id = ids[0];
185900
185909
 
@@ -186046,8 +186055,8 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector
186046
186055
  // now perform the actual update
186047
186056
  auto &transaction = Transaction::GetTransaction(context);
186048
186057
 
186049
- updates.Normalify();
186050
- row_ids.Normalify(count);
186058
+ updates.Flatten();
186059
+ row_ids.Flatten(count);
186051
186060
  auto ids = FlatVector::GetData<row_t>(row_ids);
186052
186061
  auto first_id = FlatVector::GetValue<row_t>(row_ids, 0);
186053
186062
  if (first_id >= MAX_ROW_ID) {
@@ -186104,8 +186113,8 @@ void DataTable::UpdateColumn(TableCatalogEntry &table, ClientContext &context, V
186104
186113
  // now perform the actual update
186105
186114
  auto &transaction = Transaction::GetTransaction(context);
186106
186115
 
186107
- updates.Normalify();
186108
- row_ids.Normalify(updates.size());
186116
+ updates.Flatten();
186117
+ row_ids.Flatten(updates.size());
186109
186118
  auto first_id = FlatVector::GetValue<row_t>(row_ids, 0);
186110
186119
  if (first_id >= MAX_ROW_ID) {
186111
186120
  throw NotImplementedException("Cannot update a column-path on transaction local data");
@@ -186606,8 +186615,8 @@ idx_t LocalStorage::Delete(DataTable *table, Vector &row_ids, idx_t count) {
186606
186615
  // Slice out the rows that are being deleted from the storage Chunk
186607
186616
  auto &chunk = storage->collection.GetChunk(chunk_idx);
186608
186617
 
186609
- VectorData row_ids_data;
186610
- row_ids.Orrify(count, row_ids_data);
186618
+ UnifiedVectorFormat row_ids_data;
186619
+ row_ids.ToUnifiedFormat(count, row_ids_data);
186611
186620
  auto row_identifiers = (const row_t *)row_ids_data.data;
186612
186621
  SelectionVector sel(count);
186613
186622
  for (idx_t i = 0; i < count; ++i) {
@@ -186655,8 +186664,8 @@ idx_t LocalStorage::Delete(DataTable *table, Vector &row_ids, idx_t count) {
186655
186664
  template <class T>
186656
186665
  static void TemplatedUpdateLoop(Vector &data_vector, Vector &update_vector, Vector &row_ids, idx_t count,
186657
186666
  idx_t base_index) {
186658
- VectorData udata;
186659
- update_vector.Orrify(count, udata);
186667
+ UnifiedVectorFormat udata;
186668
+ update_vector.ToUnifiedFormat(count, udata);
186660
186669
 
186661
186670
  auto target = FlatVector::GetData<T>(data_vector);
186662
186671
  auto &mask = FlatVector::Validity(data_vector);
@@ -186840,7 +186849,7 @@ void LocalStorage::AddColumn(DataTable *old_dt, DataTable *new_dt, ColumnDefinit
186840
186849
  } else {
186841
186850
  FlatVector::Validity(result).SetAllInvalid(chunk.size());
186842
186851
  }
186843
- result.Normalify(chunk.size());
186852
+ result.Flatten(chunk.size());
186844
186853
  chunk.data.push_back(move(result));
186845
186854
  }
186846
186855
 
@@ -186863,8 +186872,8 @@ void LocalStorage::FetchChunk(DataTable *table, Vector &row_ids, idx_t count, Da
186863
186872
  idx_t chunk_idx = GetChunk(row_ids);
186864
186873
  auto &chunk = storage->collection.GetChunk(chunk_idx);
186865
186874
 
186866
- VectorData row_ids_data;
186867
- row_ids.Orrify(count, row_ids_data);
186875
+ UnifiedVectorFormat row_ids_data;
186876
+ row_ids.ToUnifiedFormat(count, row_ids_data);
186868
186877
  auto row_identifiers = (const row_t *)row_ids_data.data;
186869
186878
  SelectionVector sel(count);
186870
186879
  for (idx_t i = 0; i < count; ++i) {
@@ -187775,12 +187784,12 @@ unique_ptr<DistinctStatistics> DistinctStatistics::Deserialize(FieldReader &read
187775
187784
  }
187776
187785
 
187777
187786
  void DistinctStatistics::Update(Vector &v, idx_t count, bool sample) {
187778
- VectorData vdata;
187779
- v.Orrify(count, vdata);
187787
+ UnifiedVectorFormat vdata;
187788
+ v.ToUnifiedFormat(count, vdata);
187780
187789
  Update(vdata, v.GetType(), count, sample);
187781
187790
  }
187782
187791
 
187783
- void DistinctStatistics::Update(VectorData &vdata, const LogicalType &type, idx_t count, bool sample) {
187792
+ void DistinctStatistics::Update(UnifiedVectorFormat &vdata, const LogicalType &type, idx_t count, bool sample) {
187784
187793
  if (count == 0) {
187785
187794
  return;
187786
187795
  }
@@ -187881,8 +187890,8 @@ void ListStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t co
187881
187890
 
187882
187891
  if (child_stats) {
187883
187892
  auto &child_entry = ListVector::GetEntry(vector);
187884
- VectorData vdata;
187885
- vector.Orrify(count, vdata);
187893
+ UnifiedVectorFormat vdata;
187894
+ vector.ToUnifiedFormat(count, vdata);
187886
187895
 
187887
187896
  auto list_data = (list_entry_t *)vdata.data;
187888
187897
  idx_t total_list_count = 0;
@@ -188057,8 +188066,8 @@ string NumericStatistics::ToString() const {
188057
188066
 
188058
188067
  template <class T>
188059
188068
  void NumericStatistics::TemplatedVerify(Vector &vector, const SelectionVector &sel, idx_t count) const {
188060
- VectorData vdata;
188061
- vector.Orrify(count, vdata);
188069
+ UnifiedVectorFormat vdata;
188070
+ vector.ToUnifiedFormat(count, vdata);
188062
188071
 
188063
188072
  auto data = (T *)vdata.data;
188064
188073
  for (idx_t i = 0; i < count; i++) {
@@ -188322,8 +188331,8 @@ void StringStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t
188322
188331
  string_t min_string((const char *)min, MAX_STRING_MINMAX_SIZE);
188323
188332
  string_t max_string((const char *)max, MAX_STRING_MINMAX_SIZE);
188324
188333
 
188325
- VectorData vdata;
188326
- vector.Orrify(count, vdata);
188334
+ UnifiedVectorFormat vdata;
188335
+ vector.ToUnifiedFormat(count, vdata);
188327
188336
  auto data = (string_t *)vdata.data;
188328
188337
  for (idx_t i = 0; i < count; i++) {
188329
188338
  auto idx = sel.get_index(i);
@@ -188535,8 +188544,8 @@ void ValidityStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_
188535
188544
  // nothing to verify
188536
188545
  return;
188537
188546
  }
188538
- VectorData vdata;
188539
- vector.Orrify(count, vdata);
188547
+ UnifiedVectorFormat vdata;
188548
+ vector.ToUnifiedFormat(count, vdata);
188540
188549
  for (idx_t i = 0; i < count; i++) {
188541
188550
  auto idx = sel.get_index(i);
188542
188551
  auto index = vdata.sel->get_index(idx);
@@ -189551,7 +189560,7 @@ idx_t ColumnData::ScanVector(Transaction *transaction, idx_t vector_index, Colum
189551
189560
  if (!ALLOW_UPDATES && updates->HasUncommittedUpdates(vector_index)) {
189552
189561
  throw TransactionException("Cannot create index with outstanding updates");
189553
189562
  }
189554
- result.Normalify(scan_count);
189563
+ result.Flatten(scan_count);
189555
189564
  if (SCAN_COMMITTED) {
189556
189565
  updates->FetchCommitted(vector_index, result);
189557
189566
  } else {
@@ -189588,7 +189597,7 @@ void ColumnData::ScanCommittedRange(idx_t row_group_start, idx_t offset_in_row_g
189588
189597
  InitializeScanWithOffset(child_state, row_group_start + offset_in_row_group);
189589
189598
  auto scan_count = ScanVector(child_state, result, count);
189590
189599
  if (updates) {
189591
- result.Normalify(scan_count);
189600
+ result.Flatten(scan_count);
189592
189601
  updates->FetchCommittedRange(offset_in_row_group, count, result);
189593
189602
  }
189594
189603
  }
@@ -189605,7 +189614,7 @@ idx_t ColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t count)
189605
189614
  void ColumnData::Select(Transaction &transaction, idx_t vector_index, ColumnScanState &state, Vector &result,
189606
189615
  SelectionVector &sel, idx_t &count, const TableFilter &filter) {
189607
189616
  idx_t scan_count = Scan(transaction, vector_index, state, result);
189608
- result.Normalify(scan_count);
189617
+ result.Flatten(scan_count);
189609
189618
  ColumnSegment::FilterSelection(sel, result, filter, count, FlatVector::Validity(result));
189610
189619
  }
189611
189620
 
@@ -189654,8 +189663,8 @@ void ColumnScanState::NextVector() {
189654
189663
  }
189655
189664
 
189656
189665
  void ColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) {
189657
- VectorData vdata;
189658
- vector.Orrify(count, vdata);
189666
+ UnifiedVectorFormat vdata;
189667
+ vector.ToUnifiedFormat(count, vdata);
189659
189668
  AppendData(stats, state, vdata, count);
189660
189669
  }
189661
189670
 
@@ -189679,7 +189688,7 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) {
189679
189688
  state.current->InitializeAppend(state);
189680
189689
  }
189681
189690
 
189682
- void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, VectorData &vdata, idx_t count) {
189691
+ void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, idx_t count) {
189683
189692
  idx_t offset = 0;
189684
189693
  while (true) {
189685
189694
  // append the data from the vector
@@ -189757,7 +189766,7 @@ void ColumnData::Update(Transaction &transaction, idx_t column_index, Vector &up
189757
189766
  ColumnScanState state;
189758
189767
  auto fetch_count = Fetch(state, row_ids[0], base_vector);
189759
189768
 
189760
- base_vector.Normalify(fetch_count);
189769
+ base_vector.Flatten(fetch_count);
189761
189770
  updates->Update(transaction, column_index, update_vector, row_ids, update_count, base_vector);
189762
189771
  }
189763
189772
 
@@ -189800,7 +189809,7 @@ void ColumnData::CheckpointScan(ColumnSegment *segment, ColumnScanState &state,
189800
189809
  Vector &scan_vector) {
189801
189810
  segment->Scan(state, count, scan_vector, 0, true);
189802
189811
  if (updates) {
189803
- scan_vector.Normalify(count);
189812
+ scan_vector.Flatten(count);
189804
189813
  updates->FetchCommittedRange(state.row_index - row_group_start, count, scan_vector);
189805
189814
  }
189806
189815
  }
@@ -190313,7 +190322,7 @@ void ColumnSegment::InitializeAppend(ColumnAppendState &state) {
190313
190322
  //===--------------------------------------------------------------------===//
190314
190323
  // Append
190315
190324
  //===--------------------------------------------------------------------===//
190316
- idx_t ColumnSegment::Append(ColumnAppendState &state, VectorData &append_data, idx_t offset, idx_t count) {
190325
+ idx_t ColumnSegment::Append(ColumnAppendState &state, UnifiedVectorFormat &append_data, idx_t offset, idx_t count) {
190317
190326
  D_ASSERT(segment_type == ColumnSegmentType::TRANSIENT);
190318
190327
  if (!function->append) {
190319
190328
  throw InternalException("Attempting to append to a segment without append method");
@@ -190808,7 +190817,7 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
190808
190817
  D_ASSERT(count > 0);
190809
190818
  auto &stats = (ListStatistics &)stats_p;
190810
190819
 
190811
- vector.Normalify(count);
190820
+ vector.Flatten(count);
190812
190821
  auto &list_validity = FlatVector::Validity(vector);
190813
190822
 
190814
190823
  // construct the list_entry_t entries to append to the column data
@@ -190840,7 +190849,7 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
190840
190849
  child_count);
190841
190850
  #endif
190842
190851
 
190843
- VectorData vdata;
190852
+ UnifiedVectorFormat vdata;
190844
190853
  vdata.validity = list_validity;
190845
190854
  vdata.sel = FlatVector::IncrementalSelectionVector();
190846
190855
  vdata.data = (data_ptr_t)append_offsets.get();
@@ -191617,7 +191626,7 @@ void RowGroup::Update(Transaction &transaction, DataChunk &update_chunk, row_t *
191617
191626
  D_ASSERT(columns[column]->type.id() == update_chunk.data[i].GetType().id());
191618
191627
  if (offset > 0) {
191619
191628
  Vector sliced_vector(update_chunk.data[i], offset);
191620
- sliced_vector.Normalify(count);
191629
+ sliced_vector.Flatten(count);
191621
191630
  columns[column]->Update(transaction, column, sliced_vector, ids + offset, count);
191622
191631
  } else {
191623
191632
  columns[column]->Update(transaction, column, update_chunk.data[i], ids, count);
@@ -192043,7 +192052,8 @@ void StandardColumnData::InitializeAppend(ColumnAppendState &state) {
192043
192052
  state.child_appends.push_back(move(child_append));
192044
192053
  }
192045
192054
 
192046
- void StandardColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, VectorData &vdata, idx_t count) {
192055
+ void StandardColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata,
192056
+ idx_t count) {
192047
192057
  ColumnData::AppendData(stats, state, vdata, count);
192048
192058
 
192049
192059
  validity.AppendData(*stats.validity_stats, state.child_appends[0], vdata, count);
@@ -192280,7 +192290,7 @@ void StructColumnData::InitializeAppend(ColumnAppendState &state) {
192280
192290
  }
192281
192291
 
192282
192292
  void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) {
192283
- vector.Normalify(count);
192293
+ vector.Flatten(count);
192284
192294
 
192285
192295
  // append the null values
192286
192296
  validity.Append(*stats.validity_stats, state.child_appends[0], vector, count);
@@ -192293,7 +192303,7 @@ void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, V
192293
192303
  if (!struct_validity.AllValid()) {
192294
192304
  // we set the child entries of the struct to NULL
192295
192305
  // for any values in which the struct itself is NULL
192296
- child_entries[i]->Normalify(count);
192306
+ child_entries[i]->Flatten(count);
192297
192307
 
192298
192308
  auto &child_validity = FlatVector::Validity(*child_entries[i]);
192299
192309
  child_validity.Combine(struct_validity, count);
@@ -193575,7 +193585,7 @@ void UpdateSegment::Update(Transaction &transaction, idx_t column_index, Vector
193575
193585
  // obtain an exclusive lock
193576
193586
  auto write_lock = lock.GetExclusiveLock();
193577
193587
 
193578
- update.Normalify(count);
193588
+ update.Flatten(count);
193579
193589
 
193580
193590
  // update statistics
193581
193591
  SelectionVector sel;
@@ -229460,7 +229470,7 @@ static inline int AddToLog(void *log, const uint64_t &index, const uint8_t &coun
229460
229470
  return duckdb_hll::hllDenseSet(hdr->registers + 1, index, count);
229461
229471
  }
229462
229472
 
229463
- void AddToLogsInternal(VectorData &vdata, idx_t count, uint64_t indices[], uint8_t counts[], void ***logs[],
229473
+ void AddToLogsInternal(UnifiedVectorFormat &vdata, idx_t count, uint64_t indices[], uint8_t counts[], void ***logs[],
229464
229474
  const SelectionVector *log_sel) {
229465
229475
  // 'logs' is an array of pointers to AggregateStates
229466
229476
  // AggregateStates have a pointer to a HyperLogLog object
@@ -229473,7 +229483,7 @@ void AddToLogsInternal(VectorData &vdata, idx_t count, uint64_t indices[], uint8
229473
229483
  }
229474
229484
  }
229475
229485
 
229476
- void AddToSingleLogInternal(VectorData &vdata, idx_t count, uint64_t indices[], uint8_t counts[], void *log) {
229486
+ void AddToSingleLogInternal(UnifiedVectorFormat &vdata, idx_t count, uint64_t indices[], uint8_t counts[], void *log) {
229477
229487
  const auto o = (duckdb_hll::robj *)log;
229478
229488
  duckdb_hll::hllhdr *hdr = (duckdb_hll::hllhdr *)o->ptr;
229479
229489
  D_ASSERT(hdr->encoding == HLL_DENSE);