duckdb 0.3.5-dev1285.0 → 0.3.5-dev1297.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -31721,32 +31721,16 @@ static void ComputeStructEntrySizes(Vector &v, idx_t entry_sizes[], idx_t vcount
31721
31721
  const SelectionVector &sel, idx_t offset) {
31722
31722
  // obtain child vectors
31723
31723
  idx_t num_children;
31724
- vector<Vector> struct_vectors;
31725
- if (v.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
31726
- auto &child = DictionaryVector::Child(v);
31727
- auto &dict_sel = DictionaryVector::SelVector(v);
31728
- auto &children = StructVector::GetEntries(child);
31729
- num_children = children.size();
31730
- for (auto &struct_child : children) {
31731
- Vector struct_vector(*struct_child, dict_sel, vcount);
31732
- struct_vectors.push_back(move(struct_vector));
31733
- }
31734
- } else {
31735
- auto &children = StructVector::GetEntries(v);
31736
- num_children = children.size();
31737
- for (auto &struct_child : children) {
31738
- Vector struct_vector(*struct_child);
31739
- struct_vectors.push_back(move(struct_vector));
31740
- }
31741
- }
31724
+ auto &children = StructVector::GetEntries(v);
31725
+ num_children = children.size();
31742
31726
  // add struct validitymask size
31743
31727
  const idx_t struct_validitymask_size = (num_children + 7) / 8;
31744
31728
  for (idx_t i = 0; i < ser_count; i++) {
31745
31729
  entry_sizes[i] += struct_validitymask_size;
31746
31730
  }
31747
31731
  // compute size of child vectors
31748
- for (auto &struct_vector : struct_vectors) {
31749
- RowOperations::ComputeEntrySizes(struct_vector, entry_sizes, vcount, ser_count, sel, offset);
31732
+ for (auto &struct_vector : children) {
31733
+ RowOperations::ComputeEntrySizes(*struct_vector, entry_sizes, vcount, ser_count, sel, offset);
31750
31734
  }
31751
31735
  }
31752
31736
 
@@ -31911,25 +31895,8 @@ static void HeapScatterStructVector(Vector &v, idx_t vcount, const SelectionVect
31911
31895
  VectorData vdata;
31912
31896
  v.Orrify(vcount, vdata);
31913
31897
 
31914
- idx_t num_children;
31915
- vector<Vector> struct_vectors;
31916
- if (v.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
31917
- auto &child = DictionaryVector::Child(v);
31918
- auto &dict_sel = DictionaryVector::SelVector(v);
31919
- auto &children = StructVector::GetEntries(child);
31920
- num_children = children.size();
31921
- for (auto &struct_child : children) {
31922
- Vector struct_vector(*struct_child, dict_sel, vcount);
31923
- struct_vectors.push_back(move(struct_vector));
31924
- }
31925
- } else {
31926
- auto &children = StructVector::GetEntries(v);
31927
- num_children = children.size();
31928
- for (auto &struct_child : children) {
31929
- Vector struct_vector(*struct_child);
31930
- struct_vectors.push_back(move(struct_vector));
31931
- }
31932
- }
31898
+ auto &children = StructVector::GetEntries(v);
31899
+ idx_t num_children = children.size();
31933
31900
 
31934
31901
  // the whole struct itself can be NULL
31935
31902
  idx_t entry_idx;
@@ -31955,8 +31922,8 @@ static void HeapScatterStructVector(Vector &v, idx_t vcount, const SelectionVect
31955
31922
  }
31956
31923
 
31957
31924
  // now serialize the struct vectors
31958
- for (idx_t i = 0; i < struct_vectors.size(); i++) {
31959
- auto &struct_vector = struct_vectors[i];
31925
+ for (idx_t i = 0; i < children.size(); i++) {
31926
+ auto &struct_vector = *children[i];
31960
31927
  RowOperations::HeapScatter(struct_vector, vcount, sel, ser_count, i, key_locations,
31961
31928
  struct_validitymask_locations, offset);
31962
31929
  }
@@ -42828,6 +42795,15 @@ void RowLayout::Initialize(Aggregates aggregates_p, bool align) {
42828
42795
 
42829
42796
  namespace duckdb {
42830
42797
 
42798
+ SelectionData::SelectionData(idx_t count) {
42799
+ owned_data = unique_ptr<sel_t[]>(new sel_t[count]);
42800
+ #ifdef DEBUG
42801
+ for (idx_t i = 0; i < count; i++) {
42802
+ owned_data[i] = std::numeric_limits<sel_t>::max();
42803
+ }
42804
+ #endif
42805
+ }
42806
+
42831
42807
  // LCOV_EXCL_START
42832
42808
  string SelectionVector::ToString(idx_t count) const {
42833
42809
  string result = "Selection Vector (" + to_string(count) + ") [";
@@ -45717,9 +45693,20 @@ void Vector::Slice(const SelectionVector &sel, idx_t count) {
45717
45693
  auto &current_sel = DictionaryVector::SelVector(*this);
45718
45694
  auto sliced_dictionary = current_sel.Slice(sel, count);
45719
45695
  buffer = make_buffer<DictionaryBuffer>(move(sliced_dictionary));
45696
+ if (GetType().InternalType() == PhysicalType::STRUCT) {
45697
+ auto &child_vector = DictionaryVector::Child(*this);
45698
+
45699
+ Vector new_child(child_vector);
45700
+ new_child.auxiliary = make_buffer<VectorStructBuffer>(new_child, sel, count);
45701
+ auxiliary = make_buffer<VectorChildBuffer>(move(new_child));
45702
+ }
45720
45703
  return;
45721
45704
  }
45722
45705
  Vector child_vector(*this);
45706
+ auto internal_type = GetType().InternalType();
45707
+ if (internal_type == PhysicalType::STRUCT) {
45708
+ child_vector.auxiliary = make_buffer<VectorStructBuffer>(*this, sel, count);
45709
+ }
45723
45710
  auto child_ref = make_buffer<VectorChildBuffer>(move(child_vector));
45724
45711
  auto dict_buffer = make_buffer<DictionaryBuffer>(sel);
45725
45712
  vector_type = VectorType::DICTIONARY_VECTOR;
@@ -45728,7 +45715,7 @@ void Vector::Slice(const SelectionVector &sel, idx_t count) {
45728
45715
  }
45729
45716
 
45730
45717
  void Vector::Slice(const SelectionVector &sel, idx_t count, SelCache &cache) {
45731
- if (GetVectorType() == VectorType::DICTIONARY_VECTOR) {
45718
+ if (GetVectorType() == VectorType::DICTIONARY_VECTOR && GetType().InternalType() != PhysicalType::STRUCT) {
45732
45719
  // dictionary vector: need to merge dictionaries
45733
45720
  // check if we have a cached entry
45734
45721
  auto &current_sel = DictionaryVector::SelVector(*this);
@@ -45938,32 +45925,44 @@ void Vector::SetValue(idx_t index, const Value &val) {
45938
45925
  }
45939
45926
  }
45940
45927
 
45941
- Value Vector::GetValue(idx_t index) const {
45942
- switch (GetVectorType()) {
45943
- case VectorType::CONSTANT_VECTOR:
45944
- index = 0;
45945
- break;
45946
- case VectorType::FLAT_VECTOR:
45947
- break;
45948
- // dictionary: apply dictionary and forward to child
45949
- case VectorType::DICTIONARY_VECTOR: {
45950
- auto &sel_vector = DictionaryVector::SelVector(*this);
45951
- auto &child = DictionaryVector::Child(*this);
45952
- return child.GetValue(sel_vector.get_index(index));
45953
- }
45954
- case VectorType::SEQUENCE_VECTOR: {
45955
- int64_t start, increment;
45956
- SequenceVector::GetSequence(*this, start, increment);
45957
- return Value::Numeric(GetType(), start + increment * index);
45958
- }
45959
- default:
45960
- throw InternalException("Unimplemented vector type for Vector::GetValue");
45928
+ Value Vector::GetValue(const Vector &v_p, idx_t index_p) {
45929
+ const Vector *vector = &v_p;
45930
+ idx_t index = index_p;
45931
+ bool finished = false;
45932
+ while (!finished) {
45933
+ switch (vector->GetVectorType()) {
45934
+ case VectorType::CONSTANT_VECTOR:
45935
+ index = 0;
45936
+ finished = true;
45937
+ break;
45938
+ case VectorType::FLAT_VECTOR:
45939
+ finished = true;
45940
+ break;
45941
+ // dictionary: apply dictionary and forward to child
45942
+ case VectorType::DICTIONARY_VECTOR: {
45943
+ auto &sel_vector = DictionaryVector::SelVector(*vector);
45944
+ auto &child = DictionaryVector::Child(*vector);
45945
+ vector = &child;
45946
+ index = sel_vector.get_index(index);
45947
+ break;
45948
+ }
45949
+ case VectorType::SEQUENCE_VECTOR: {
45950
+ int64_t start, increment;
45951
+ SequenceVector::GetSequence(*vector, start, increment);
45952
+ return Value::Numeric(vector->GetType(), start + increment * index);
45953
+ }
45954
+ default:
45955
+ throw InternalException("Unimplemented vector type for Vector::GetValue");
45956
+ }
45961
45957
  }
45958
+ auto data = vector->data;
45959
+ auto &validity = vector->validity;
45960
+ auto &type = vector->GetType();
45962
45961
 
45963
45962
  if (!validity.RowIsValid(index)) {
45964
- return Value(GetType());
45963
+ return Value(vector->GetType());
45965
45964
  }
45966
- switch (GetType().id()) {
45965
+ switch (vector->GetType().id()) {
45967
45966
  case LogicalTypeId::BOOLEAN:
45968
45967
  return Value::BOOLEAN(((bool *)data)[index]);
45969
45968
  case LogicalTypeId::TINYINT:
@@ -46003,9 +46002,9 @@ Value Vector::GetValue(idx_t index) const {
46003
46002
  case LogicalTypeId::UUID:
46004
46003
  return Value::UUID(((hugeint_t *)data)[index]);
46005
46004
  case LogicalTypeId::DECIMAL: {
46006
- auto width = DecimalType::GetWidth(GetType());
46007
- auto scale = DecimalType::GetScale(GetType());
46008
- switch (GetType().InternalType()) {
46005
+ auto width = DecimalType::GetWidth(type);
46006
+ auto scale = DecimalType::GetScale(type);
46007
+ switch (type.InternalType()) {
46009
46008
  case PhysicalType::INT16:
46010
46009
  return Value::DECIMAL(((int16_t *)data)[index], width, scale);
46011
46010
  case PhysicalType::INT32:
@@ -46021,13 +46020,13 @@ Value Vector::GetValue(idx_t index) const {
46021
46020
  case LogicalTypeId::ENUM: {
46022
46021
  switch (type.InternalType()) {
46023
46022
  case PhysicalType::UINT8:
46024
- return Value::ENUM(((uint8_t *)data)[index], GetType());
46023
+ return Value::ENUM(((uint8_t *)data)[index], type);
46025
46024
  case PhysicalType::UINT16:
46026
- return Value::ENUM(((uint16_t *)data)[index], GetType());
46025
+ return Value::ENUM(((uint16_t *)data)[index], type);
46027
46026
  case PhysicalType::UINT32:
46028
- return Value::ENUM(((uint32_t *)data)[index], GetType());
46027
+ return Value::ENUM(((uint32_t *)data)[index], type);
46029
46028
  case PhysicalType::UINT64: // DEDUP_POINTER_ENUM
46030
- return Value::ENUM(((uint64_t *)data)[index], GetType());
46029
+ return Value::ENUM(((uint64_t *)data)[index], type);
46031
46030
  default:
46032
46031
  throw InternalException("ENUM can only have unsigned integers as physical types");
46033
46032
  }
@@ -46056,36 +46055,39 @@ Value Vector::GetValue(idx_t index) const {
46056
46055
  return Value::BLOB((const_data_ptr_t)str.GetDataUnsafe(), str.GetSize());
46057
46056
  }
46058
46057
  case LogicalTypeId::MAP: {
46059
- auto &child_entries = StructVector::GetEntries(*this);
46058
+ auto &child_entries = StructVector::GetEntries(*vector);
46060
46059
  Value key = child_entries[0]->GetValue(index);
46061
46060
  Value value = child_entries[1]->GetValue(index);
46062
46061
  return Value::MAP(move(key), move(value));
46063
46062
  }
46064
46063
  case LogicalTypeId::STRUCT: {
46065
46064
  // we can derive the value schema from the vector schema
46066
- auto &child_entries = StructVector::GetEntries(*this);
46065
+ auto &child_entries = StructVector::GetEntries(*vector);
46067
46066
  child_list_t<Value> children;
46068
46067
  for (idx_t child_idx = 0; child_idx < child_entries.size(); child_idx++) {
46069
46068
  auto &struct_child = child_entries[child_idx];
46070
- children.push_back(
46071
- make_pair(StructType::GetChildName(GetType(), child_idx), struct_child->GetValue(index)));
46069
+ children.push_back(make_pair(StructType::GetChildName(type, child_idx), struct_child->GetValue(index_p)));
46072
46070
  }
46073
46071
  return Value::STRUCT(move(children));
46074
46072
  }
46075
46073
  case LogicalTypeId::LIST: {
46076
46074
  auto offlen = ((list_entry_t *)data)[index];
46077
- auto &child_vec = ListVector::GetEntry(*this);
46078
- vector<Value> children;
46075
+ auto &child_vec = ListVector::GetEntry(*vector);
46076
+ std::vector<Value> children;
46079
46077
  for (idx_t i = offlen.offset; i < offlen.offset + offlen.length; i++) {
46080
46078
  children.push_back(child_vec.GetValue(i));
46081
46079
  }
46082
- return Value::LIST(ListType::GetChildType(GetType()), move(children));
46080
+ return Value::LIST(ListType::GetChildType(type), move(children));
46083
46081
  }
46084
46082
  default:
46085
46083
  throw InternalException("Unimplemented type for value access");
46086
46084
  }
46087
46085
  }
46088
46086
 
46087
+ Value Vector::GetValue(idx_t index) const {
46088
+ return GetValue(*this, index);
46089
+ }
46090
+
46089
46091
  // LCOV_EXCL_START
46090
46092
  string VectorTypeToString(VectorType type) {
46091
46093
  switch (type) {
@@ -46526,32 +46528,39 @@ void Vector::UTFVerify(idx_t count) {
46526
46528
  UTFVerify(*flat_sel, count);
46527
46529
  }
46528
46530
 
46529
- void Vector::Verify(const SelectionVector &sel, idx_t count) {
46531
+ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count) {
46530
46532
  #ifdef DEBUG
46531
46533
  if (count == 0) {
46532
46534
  return;
46533
46535
  }
46534
- if (GetVectorType() == VectorType::DICTIONARY_VECTOR) {
46535
- auto &child = DictionaryVector::Child(*this);
46536
+ Vector *vector = &vector_p;
46537
+ const SelectionVector *sel = &sel_p;
46538
+ SelectionVector owned_sel;
46539
+ auto &type = vector->GetType();
46540
+ auto vtype = vector->GetVectorType();
46541
+ if (vector->GetVectorType() == VectorType::DICTIONARY_VECTOR) {
46542
+ auto &child = DictionaryVector::Child(*vector);
46536
46543
  D_ASSERT(child.GetVectorType() != VectorType::DICTIONARY_VECTOR);
46537
- auto &dict_sel = DictionaryVector::SelVector(*this);
46544
+ auto &dict_sel = DictionaryVector::SelVector(*vector);
46538
46545
  // merge the selection vectors and verify the child
46539
- auto new_buffer = dict_sel.Slice(sel, count);
46540
- SelectionVector new_sel(new_buffer);
46541
- child.Verify(new_sel, count);
46542
- return;
46546
+ auto new_buffer = dict_sel.Slice(*sel, count);
46547
+ owned_sel.Initialize(new_buffer);
46548
+ sel = &owned_sel;
46549
+ vector = &child;
46550
+ vtype = vector->GetVectorType();
46543
46551
  }
46544
- if (TypeIsConstantSize(GetType().InternalType()) &&
46545
- (GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR)) {
46546
- D_ASSERT(!auxiliary);
46552
+ if (TypeIsConstantSize(type.InternalType()) &&
46553
+ (vtype == VectorType::CONSTANT_VECTOR || vtype == VectorType::FLAT_VECTOR)) {
46554
+ D_ASSERT(!vector->auxiliary);
46547
46555
  }
46548
- if (GetType().id() == LogicalTypeId::VARCHAR || GetType().id() == LogicalTypeId::JSON) {
46556
+ if (type.id() == LogicalTypeId::VARCHAR || type.id() == LogicalTypeId::JSON) {
46549
46557
  // verify that there are no '\0' bytes in string values
46550
- switch (GetVectorType()) {
46558
+ switch (vtype) {
46551
46559
  case VectorType::FLAT_VECTOR: {
46552
- auto strings = FlatVector::GetData<string_t>(*this);
46560
+ auto &validity = FlatVector::Validity(*vector);
46561
+ auto strings = FlatVector::GetData<string_t>(*vector);
46553
46562
  for (idx_t i = 0; i < count; i++) {
46554
- auto oidx = sel.get_index(i);
46563
+ auto oidx = sel->get_index(i);
46555
46564
  if (validity.RowIsValid(oidx)) {
46556
46565
  strings[oidx].VerifyNull();
46557
46566
  }
@@ -46563,57 +46572,76 @@ void Vector::Verify(const SelectionVector &sel, idx_t count) {
46563
46572
  }
46564
46573
  }
46565
46574
 
46566
- if (GetType().InternalType() == PhysicalType::STRUCT) {
46567
- auto &child_types = StructType::GetChildTypes(GetType());
46575
+ if (type.InternalType() == PhysicalType::STRUCT) {
46576
+ auto &child_types = StructType::GetChildTypes(type);
46568
46577
  D_ASSERT(!child_types.empty());
46569
- if (GetVectorType() == VectorType::FLAT_VECTOR || GetVectorType() == VectorType::CONSTANT_VECTOR) {
46570
- // create a selection vector of the non-null entries of the struct vector
46571
- auto &children = StructVector::GetEntries(*this);
46572
- D_ASSERT(child_types.size() == children.size());
46573
- for (idx_t child_idx = 0; child_idx < children.size(); child_idx++) {
46574
- if (GetVectorType() == VectorType::CONSTANT_VECTOR) {
46575
- D_ASSERT(children[child_idx]->GetVectorType() == VectorType::CONSTANT_VECTOR);
46576
- if (ConstantVector::IsNull(*this)) {
46577
- D_ASSERT(ConstantVector::IsNull(*children[child_idx]));
46578
- }
46579
- } else if (GetVectorType() == VectorType::FLAT_VECTOR &&
46580
- children[child_idx]->GetVectorType() == VectorType::FLAT_VECTOR) {
46581
- // for any NULL entry in the struct, the child should be NULL as well
46582
- auto &validity = FlatVector::Validity(*this);
46583
- auto &child_validity = FlatVector::Validity(*children[child_idx]);
46584
- for (idx_t i = 0; i < count; i++) {
46585
- auto index = sel.get_index(i);
46586
- if (!validity.RowIsValid(index)) {
46587
- D_ASSERT(!child_validity.RowIsValid(index));
46588
- }
46589
- }
46578
+ // create a selection vector of the non-null entries of the struct vector
46579
+ auto &children = StructVector::GetEntries(*vector);
46580
+ D_ASSERT(child_types.size() == children.size());
46581
+ for (idx_t child_idx = 0; child_idx < children.size(); child_idx++) {
46582
+ D_ASSERT(children[child_idx]->GetType() == child_types[child_idx].second);
46583
+ children[child_idx]->Verify(count);
46584
+ if (vtype == VectorType::CONSTANT_VECTOR) {
46585
+ D_ASSERT(children[child_idx]->GetVectorType() == VectorType::CONSTANT_VECTOR);
46586
+ if (ConstantVector::IsNull(*vector)) {
46587
+ D_ASSERT(ConstantVector::IsNull(*children[child_idx]));
46588
+ }
46589
+ }
46590
+ if (vtype != VectorType::FLAT_VECTOR) {
46591
+ continue;
46592
+ }
46593
+ ValidityMask *child_validity;
46594
+ SelectionVector owned_child_sel;
46595
+ const SelectionVector *child_sel = &owned_child_sel;
46596
+ if (children[child_idx]->GetVectorType() == VectorType::FLAT_VECTOR) {
46597
+ child_sel = FlatVector::IncrementalSelectionVector();
46598
+ child_validity = &FlatVector::Validity(*children[child_idx]);
46599
+ } else if (children[child_idx]->GetVectorType() == VectorType::DICTIONARY_VECTOR) {
46600
+ auto &child = DictionaryVector::Child(*children[child_idx]);
46601
+ if (child.GetVectorType() != VectorType::FLAT_VECTOR) {
46602
+ continue;
46603
+ }
46604
+ child_validity = &FlatVector::Validity(child);
46605
+ child_sel = &DictionaryVector::SelVector(*children[child_idx]);
46606
+ } else if (children[child_idx]->GetVectorType() == VectorType::CONSTANT_VECTOR) {
46607
+ child_sel = ConstantVector::ZeroSelectionVector(count, owned_child_sel);
46608
+ child_validity = &ConstantVector::Validity(*children[child_idx]);
46609
+ } else {
46610
+ continue;
46611
+ }
46612
+ // for any NULL entry in the struct, the child should be NULL as well
46613
+ auto &validity = FlatVector::Validity(*vector);
46614
+ for (idx_t i = 0; i < count; i++) {
46615
+ auto index = sel->get_index(i);
46616
+ if (!validity.RowIsValid(index)) {
46617
+ auto child_index = child_sel->get_index(sel_p.get_index(i));
46618
+ D_ASSERT(!child_validity->RowIsValid(child_index));
46590
46619
  }
46591
- D_ASSERT(children[child_idx]->GetType() == child_types[child_idx].second);
46592
- children[child_idx]->Verify(sel, count);
46593
46620
  }
46594
46621
  }
46595
46622
  }
46596
46623
 
46597
- if (GetType().InternalType() == PhysicalType::LIST) {
46598
- if (GetVectorType() == VectorType::CONSTANT_VECTOR) {
46599
- if (!ConstantVector::IsNull(*this)) {
46600
- auto &child = ListVector::GetEntry(*this);
46601
- SelectionVector child_sel(ListVector::GetListSize(*this));
46624
+ if (type.InternalType() == PhysicalType::LIST) {
46625
+ if (vtype == VectorType::CONSTANT_VECTOR) {
46626
+ if (!ConstantVector::IsNull(*vector)) {
46627
+ auto &child = ListVector::GetEntry(*vector);
46628
+ SelectionVector child_sel(ListVector::GetListSize(*vector));
46602
46629
  idx_t child_count = 0;
46603
- auto le = ConstantVector::GetData<list_entry_t>(*this);
46604
- D_ASSERT(le->offset + le->length <= ListVector::GetListSize(*this));
46630
+ auto le = ConstantVector::GetData<list_entry_t>(*vector);
46631
+ D_ASSERT(le->offset + le->length <= ListVector::GetListSize(*vector));
46605
46632
  for (idx_t k = 0; k < le->length; k++) {
46606
46633
  child_sel.set_index(child_count++, le->offset + k);
46607
46634
  }
46608
- child.Verify(child_sel, child_count);
46635
+ Vector::Verify(child, child_sel, child_count);
46609
46636
  }
46610
- } else if (GetVectorType() == VectorType::FLAT_VECTOR) {
46611
- auto &child = ListVector::GetEntry(*this);
46612
- auto child_size = ListVector::GetListSize(*this);
46613
- auto list_data = FlatVector::GetData<list_entry_t>(*this);
46637
+ } else if (vtype == VectorType::FLAT_VECTOR) {
46638
+ auto &validity = FlatVector::Validity(*vector);
46639
+ auto &child = ListVector::GetEntry(*vector);
46640
+ auto child_size = ListVector::GetListSize(*vector);
46641
+ auto list_data = FlatVector::GetData<list_entry_t>(*vector);
46614
46642
  idx_t total_size = 0;
46615
46643
  for (idx_t i = 0; i < count; i++) {
46616
- auto idx = sel.get_index(i);
46644
+ auto idx = sel->get_index(i);
46617
46645
  auto &le = list_data[idx];
46618
46646
  if (validity.RowIsValid(idx)) {
46619
46647
  D_ASSERT(le.offset + le.length <= child_size);
@@ -46623,7 +46651,7 @@ void Vector::Verify(const SelectionVector &sel, idx_t count) {
46623
46651
  SelectionVector child_sel(total_size);
46624
46652
  idx_t child_count = 0;
46625
46653
  for (idx_t i = 0; i < count; i++) {
46626
- auto idx = sel.get_index(i);
46654
+ auto idx = sel->get_index(i);
46627
46655
  auto &le = list_data[idx];
46628
46656
  if (validity.RowIsValid(idx)) {
46629
46657
  D_ASSERT(le.offset + le.length <= child_size);
@@ -46632,7 +46660,7 @@ void Vector::Verify(const SelectionVector &sel, idx_t count) {
46632
46660
  }
46633
46661
  }
46634
46662
  }
46635
- child.Verify(child_sel, child_count);
46663
+ Vector::Verify(child, child_sel, child_count);
46636
46664
  }
46637
46665
  }
46638
46666
  #endif
@@ -46640,7 +46668,7 @@ void Vector::Verify(const SelectionVector &sel, idx_t count) {
46640
46668
 
46641
46669
  void Vector::Verify(idx_t count) {
46642
46670
  auto flat_sel = FlatVector::IncrementalSelectionVector();
46643
- Verify(*flat_sel, count);
46671
+ Verify(*this, *flat_sel, count);
46644
46672
  }
46645
46673
 
46646
46674
  void FlatVector::SetNull(Vector &vector, idx_t idx, bool is_null) {
@@ -46727,7 +46755,7 @@ void ConstantVector::Reference(Vector &vector, Vector &source, idx_t position, i
46727
46755
  auto &source_entries = StructVector::GetEntries(source);
46728
46756
  auto &target_entries = StructVector::GetEntries(vector);
46729
46757
  for (idx_t i = 0; i < source_entries.size(); i++) {
46730
- ConstantVector::Reference(*target_entries[i], *source_entries[i], struct_index, count);
46758
+ ConstantVector::Reference(*target_entries[i], *source_entries[i], position, count);
46731
46759
  }
46732
46760
  vector.SetVectorType(VectorType::CONSTANT_VECTOR);
46733
46761
  break;
@@ -47087,6 +47115,15 @@ VectorStructBuffer::VectorStructBuffer(const LogicalType &type, idx_t capacity)
47087
47115
  }
47088
47116
  }
47089
47117
 
47118
+ VectorStructBuffer::VectorStructBuffer(Vector &other, const SelectionVector &sel, idx_t count)
47119
+ : VectorBuffer(VectorBufferType::STRUCT_BUFFER) {
47120
+ auto &other_vector = StructVector::GetEntries(other);
47121
+ for (auto &child_vector : other_vector) {
47122
+ auto vector = make_unique<Vector>(*child_vector, sel, count);
47123
+ children.push_back(move(vector));
47124
+ }
47125
+ }
47126
+
47090
47127
  VectorStructBuffer::~VectorStructBuffer() {
47091
47128
  }
47092
47129
 
@@ -49990,22 +50027,6 @@ idx_t PositionComparator::Final<duckdb::DistinctGreaterThan>(Vector &left, Vecto
49990
50027
 
49991
50028
  using StructEntries = vector<unique_ptr<Vector>>;
49992
50029
 
49993
- static StructEntries &StructVectorGetSlicedEntries(Vector &parent, StructEntries &sliced, const idx_t count) {
49994
- // We have to manually slice STRUCT dictionaries.
49995
- auto &children = StructVector::GetEntries(parent);
49996
- if (parent.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
49997
- auto &dict_sel = DictionaryVector::SelVector(parent);
49998
- for (auto &child : children) {
49999
- auto v = make_unique<Vector>(*child, dict_sel, count);
50000
- sliced.push_back(move(v));
50001
- }
50002
-
50003
- return sliced;
50004
- }
50005
-
50006
- return children;
50007
- }
50008
-
50009
50030
  static void ExtractNestedSelection(const SelectionVector &slice_sel, const idx_t count, const SelectionVector &sel,
50010
50031
  OptionalSelection &opt) {
50011
50032
 
@@ -50032,8 +50053,8 @@ static idx_t DistinctSelectStruct(Vector &left, Vector &right, idx_t count, cons
50032
50053
 
50033
50054
  // Avoid allocating in the 99% of the cases where we don't need to.
50034
50055
  StructEntries lsliced, rsliced;
50035
- auto &lchildren = StructVectorGetSlicedEntries(left, lsliced, count);
50036
- auto &rchildren = StructVectorGetSlicedEntries(right, rsliced, count);
50056
+ auto &lchildren = StructVector::GetEntries(left);
50057
+ auto &rchildren = StructVector::GetEntries(right);
50037
50058
  D_ASSERT(lchildren.size() == rchildren.size());
50038
50059
 
50039
50060
  // In order to reuse the comparators, we have to track what passed and failed internally.
@@ -50100,7 +50121,6 @@ static idx_t DistinctSelectStruct(Vector &left, Vector &right, idx_t count, cons
50100
50121
  match_count += true_count;
50101
50122
  }
50102
50123
  }
50103
-
50104
50124
  return match_count;
50105
50125
  }
50106
50126
 
@@ -50127,6 +50147,8 @@ static idx_t DistinctSelectList(Vector &left, Vector &right, idx_t count, const
50127
50147
  SelectionVector lcursor(count);
50128
50148
  SelectionVector rcursor(count);
50129
50149
 
50150
+ ListVector::GetEntry(left).Normalify(count);
50151
+ ListVector::GetEntry(right).Normalify(count);
50130
50152
  Vector lchild(ListVector::GetEntry(left), lcursor, count);
50131
50153
  Vector rchild(ListVector::GetEntry(right), rcursor, count);
50132
50154
 
@@ -51939,40 +51961,48 @@ static void TemplatedCopy(const Vector &source, const SelectionVector &sel, Vect
51939
51961
  }
51940
51962
  }
51941
51963
 
51942
- void VectorOperations::Copy(const Vector &source, Vector &target, const SelectionVector &sel_p, idx_t source_count,
51964
+ void VectorOperations::Copy(const Vector &source_p, Vector &target, const SelectionVector &sel_p, idx_t source_count,
51943
51965
  idx_t source_offset, idx_t target_offset) {
51944
51966
  D_ASSERT(source_offset <= source_count);
51945
- D_ASSERT(source.GetType() == target.GetType());
51967
+ D_ASSERT(source_p.GetType() == target.GetType());
51946
51968
  idx_t copy_count = source_count - source_offset;
51947
51969
 
51948
51970
  SelectionVector owned_sel;
51949
51971
  const SelectionVector *sel = &sel_p;
51950
- switch (source.GetVectorType()) {
51951
- case VectorType::DICTIONARY_VECTOR: {
51952
- // dictionary vector: merge selection vectors
51953
- auto &child = DictionaryVector::Child(source);
51954
- auto &dict_sel = DictionaryVector::SelVector(source);
51955
- // merge the selection vectors and verify the child
51956
- auto new_buffer = dict_sel.Slice(*sel, source_count);
51957
- SelectionVector merged_sel(new_buffer);
51958
- VectorOperations::Copy(child, target, merged_sel, source_count, source_offset, target_offset);
51959
- return;
51960
- }
51961
- case VectorType::SEQUENCE_VECTOR: {
51962
- int64_t start, increment;
51963
- Vector seq(source.GetType());
51964
- SequenceVector::GetSequence(source, start, increment);
51965
- VectorOperations::GenerateSequence(seq, source_count, *sel, start, increment);
51966
- VectorOperations::Copy(seq, target, *sel, source_count, source_offset, target_offset);
51967
- return;
51968
- }
51969
- case VectorType::CONSTANT_VECTOR:
51970
- sel = ConstantVector::ZeroSelectionVector(copy_count, owned_sel);
51971
- break; // carry on with below code
51972
- case VectorType::FLAT_VECTOR:
51973
- break;
51974
- default:
51975
- throw NotImplementedException("FIXME unimplemented vector type for VectorOperations::Copy");
51972
+
51973
+ const Vector *source = &source_p;
51974
+ bool finished = false;
51975
+ while (!finished) {
51976
+ switch (source->GetVectorType()) {
51977
+ case VectorType::DICTIONARY_VECTOR: {
51978
+ // dictionary vector: merge selection vectors
51979
+ auto &child = DictionaryVector::Child(*source);
51980
+ auto &dict_sel = DictionaryVector::SelVector(*source);
51981
+ // merge the selection vectors and verify the child
51982
+ auto new_buffer = dict_sel.Slice(*sel, source_count);
51983
+ owned_sel.Initialize(new_buffer);
51984
+ sel = &owned_sel;
51985
+ source = &child;
51986
+ break;
51987
+ }
51988
+ case VectorType::SEQUENCE_VECTOR: {
51989
+ int64_t start, increment;
51990
+ Vector seq(source->GetType());
51991
+ SequenceVector::GetSequence(*source, start, increment);
51992
+ VectorOperations::GenerateSequence(seq, source_count, *sel, start, increment);
51993
+ VectorOperations::Copy(seq, target, *sel, source_count, source_offset, target_offset);
51994
+ return;
51995
+ }
51996
+ case VectorType::CONSTANT_VECTOR:
51997
+ sel = ConstantVector::ZeroSelectionVector(copy_count, owned_sel);
51998
+ finished = true;
51999
+ break;
52000
+ case VectorType::FLAT_VECTOR:
52001
+ finished = true;
52002
+ break;
52003
+ default:
52004
+ throw NotImplementedException("FIXME unimplemented vector type for VectorOperations::Copy");
52005
+ }
51976
52006
  }
51977
52007
 
51978
52008
  if (copy_count == 0) {
@@ -51989,13 +52019,13 @@ void VectorOperations::Copy(const Vector &source, Vector &target, const Selectio
51989
52019
 
51990
52020
  // first copy the nullmask
51991
52021
  auto &tmask = FlatVector::Validity(target);
51992
- if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) {
51993
- const bool valid = !ConstantVector::IsNull(source);
52022
+ if (source->GetVectorType() == VectorType::CONSTANT_VECTOR) {
52023
+ const bool valid = !ConstantVector::IsNull(*source);
51994
52024
  for (idx_t i = 0; i < copy_count; i++) {
51995
52025
  tmask.Set(target_offset + i, valid);
51996
52026
  }
51997
52027
  } else {
51998
- auto &smask = FlatVector::Validity(source);
52028
+ auto &smask = FlatVector::Validity(*source);
51999
52029
  if (smask.IsMaskSet()) {
52000
52030
  for (idx_t i = 0; i < copy_count; i++) {
52001
52031
  auto idx = sel->get_index(source_offset + i);
@@ -52020,46 +52050,46 @@ void VectorOperations::Copy(const Vector &source, Vector &target, const Selectio
52020
52050
  D_ASSERT(sel);
52021
52051
 
52022
52052
  // now copy over the data
52023
- switch (source.GetType().InternalType()) {
52053
+ switch (source->GetType().InternalType()) {
52024
52054
  case PhysicalType::BOOL:
52025
52055
  case PhysicalType::INT8:
52026
- TemplatedCopy<int8_t>(source, *sel, target, source_offset, target_offset, copy_count);
52056
+ TemplatedCopy<int8_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52027
52057
  break;
52028
52058
  case PhysicalType::INT16:
52029
- TemplatedCopy<int16_t>(source, *sel, target, source_offset, target_offset, copy_count);
52059
+ TemplatedCopy<int16_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52030
52060
  break;
52031
52061
  case PhysicalType::INT32:
52032
- TemplatedCopy<int32_t>(source, *sel, target, source_offset, target_offset, copy_count);
52062
+ TemplatedCopy<int32_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52033
52063
  break;
52034
52064
  case PhysicalType::INT64:
52035
- TemplatedCopy<int64_t>(source, *sel, target, source_offset, target_offset, copy_count);
52065
+ TemplatedCopy<int64_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52036
52066
  break;
52037
52067
  case PhysicalType::UINT8:
52038
- TemplatedCopy<uint8_t>(source, *sel, target, source_offset, target_offset, copy_count);
52068
+ TemplatedCopy<uint8_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52039
52069
  break;
52040
52070
  case PhysicalType::UINT16:
52041
- TemplatedCopy<uint16_t>(source, *sel, target, source_offset, target_offset, copy_count);
52071
+ TemplatedCopy<uint16_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52042
52072
  break;
52043
52073
  case PhysicalType::UINT32:
52044
- TemplatedCopy<uint32_t>(source, *sel, target, source_offset, target_offset, copy_count);
52074
+ TemplatedCopy<uint32_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52045
52075
  break;
52046
52076
  case PhysicalType::UINT64:
52047
- TemplatedCopy<uint64_t>(source, *sel, target, source_offset, target_offset, copy_count);
52077
+ TemplatedCopy<uint64_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52048
52078
  break;
52049
52079
  case PhysicalType::INT128:
52050
- TemplatedCopy<hugeint_t>(source, *sel, target, source_offset, target_offset, copy_count);
52080
+ TemplatedCopy<hugeint_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52051
52081
  break;
52052
52082
  case PhysicalType::FLOAT:
52053
- TemplatedCopy<float>(source, *sel, target, source_offset, target_offset, copy_count);
52083
+ TemplatedCopy<float>(*source, *sel, target, source_offset, target_offset, copy_count);
52054
52084
  break;
52055
52085
  case PhysicalType::DOUBLE:
52056
- TemplatedCopy<double>(source, *sel, target, source_offset, target_offset, copy_count);
52086
+ TemplatedCopy<double>(*source, *sel, target, source_offset, target_offset, copy_count);
52057
52087
  break;
52058
52088
  case PhysicalType::INTERVAL:
52059
- TemplatedCopy<interval_t>(source, *sel, target, source_offset, target_offset, copy_count);
52089
+ TemplatedCopy<interval_t>(*source, *sel, target, source_offset, target_offset, copy_count);
52060
52090
  break;
52061
52091
  case PhysicalType::VARCHAR: {
52062
- auto ldata = FlatVector::GetData<string_t>(source);
52092
+ auto ldata = FlatVector::GetData<string_t>(*source);
52063
52093
  auto tdata = FlatVector::GetData<string_t>(target);
52064
52094
  for (idx_t i = 0; i < copy_count; i++) {
52065
52095
  auto source_idx = sel->get_index(source_offset + i);
@@ -52071,11 +52101,11 @@ void VectorOperations::Copy(const Vector &source, Vector &target, const Selectio
52071
52101
  break;
52072
52102
  }
52073
52103
  case PhysicalType::STRUCT: {
52074
- auto &source_children = StructVector::GetEntries(source);
52104
+ auto &source_children = StructVector::GetEntries(*source);
52075
52105
  auto &target_children = StructVector::GetEntries(target);
52076
52106
  D_ASSERT(source_children.size() == target_children.size());
52077
52107
  for (idx_t i = 0; i < source_children.size(); i++) {
52078
- VectorOperations::Copy(*source_children[i], *target_children[i], *sel, source_count, source_offset,
52108
+ VectorOperations::Copy(*source_children[i], *target_children[i], sel_p, source_count, source_offset,
52079
52109
  target_offset);
52080
52110
  }
52081
52111
  break;
@@ -52083,8 +52113,8 @@ void VectorOperations::Copy(const Vector &source, Vector &target, const Selectio
52083
52113
  case PhysicalType::LIST: {
52084
52114
  D_ASSERT(target.GetType().InternalType() == PhysicalType::LIST);
52085
52115
 
52086
- auto &source_child = ListVector::GetEntry(source);
52087
- auto sdata = FlatVector::GetData<list_entry_t>(source);
52116
+ auto &source_child = ListVector::GetEntry(*source);
52117
+ auto sdata = FlatVector::GetData<list_entry_t>(*source);
52088
52118
  auto tdata = FlatVector::GetData<list_entry_t>(target);
52089
52119
 
52090
52120
  if (target_vector_type == VectorType::CONSTANT_VECTOR) {
@@ -52142,7 +52172,7 @@ void VectorOperations::Copy(const Vector &source, Vector &target, const Selectio
52142
52172
  }
52143
52173
  default:
52144
52174
  throw NotImplementedException("Unimplemented type '%s' for copy!",
52145
- TypeIdToString(source.GetType().InternalType()));
52175
+ TypeIdToString(source->GetType().InternalType()));
52146
52176
  }
52147
52177
 
52148
52178
  if (target_vector_type != VectorType::FLAT_VECTOR) {
@@ -52152,38 +52182,8 @@ void VectorOperations::Copy(const Vector &source, Vector &target, const Selectio
52152
52182
 
52153
52183
  void VectorOperations::Copy(const Vector &source, Vector &target, idx_t source_count, idx_t source_offset,
52154
52184
  idx_t target_offset) {
52155
- switch (source.GetVectorType()) {
52156
- case VectorType::DICTIONARY_VECTOR: {
52157
- // dictionary: continue into child with selection vector
52158
- auto &child = DictionaryVector::Child(source);
52159
- auto &dict_sel = DictionaryVector::SelVector(source);
52160
- VectorOperations::Copy(child, target, dict_sel, source_count, source_offset, target_offset);
52161
- break;
52162
- }
52163
- case VectorType::CONSTANT_VECTOR: {
52164
- SelectionVector owned_sel;
52165
- auto sel = ConstantVector::ZeroSelectionVector(source_count, owned_sel);
52166
- VectorOperations::Copy(source, target, *sel, source_count, source_offset, target_offset);
52167
- break;
52168
- }
52169
- case VectorType::FLAT_VECTOR: {
52170
- auto sel = FlatVector::IncrementalSelectionVector();
52171
- VectorOperations::Copy(source, target, *sel, source_count, source_offset, target_offset);
52172
- break;
52173
- }
52174
- case VectorType::SEQUENCE_VECTOR: {
52175
- int64_t start, increment;
52176
- SequenceVector::GetSequence(source, start, increment);
52177
- Vector flattened(source.GetType());
52178
- VectorOperations::GenerateSequence(flattened, source_count, start, increment);
52179
-
52180
- auto sel = FlatVector::IncrementalSelectionVector();
52181
- VectorOperations::Copy(flattened, target, *sel, source_count, source_offset, target_offset);
52182
- break;
52183
- }
52184
- default:
52185
- throw NotImplementedException("FIXME: unimplemented vector type for VectorOperations::Copy");
52186
- }
52185
+ VectorOperations::Copy(source, target, *FlatVector::IncrementalSelectionVector(), source_count, source_offset,
52186
+ target_offset);
52187
52187
  }
52188
52188
 
52189
52189
  } // namespace duckdb
@@ -54339,11 +54339,14 @@ void ValidityFillLoop(Vector &vector, Vector &result, const SelectionVector &sel
54339
54339
  } else {
54340
54340
  VectorData vdata;
54341
54341
  vector.Orrify(count, vdata);
54342
+ if (vdata.validity.AllValid()) {
54343
+ return;
54344
+ }
54342
54345
  for (idx_t i = 0; i < count; i++) {
54343
54346
  auto source_idx = vdata.sel->get_index(i);
54344
- auto res_idx = sel.get_index(i);
54345
-
54346
- result_mask.Set(res_idx, vdata.validity.RowIsValid(source_idx));
54347
+ if (!vdata.validity.RowIsValid(source_idx)) {
54348
+ result_mask.SetInvalid(sel.get_index(i));
54349
+ }
54347
54350
  }
54348
54351
  }
54349
54352
  }
@@ -54418,7 +54421,7 @@ void ExpressionExecutor::FillSwitch(Vector &vector, Vector &result, const Select
54418
54421
  result_data[result_idx].offset += offset;
54419
54422
  }
54420
54423
 
54421
- result.Verify(sel, count);
54424
+ Vector::Verify(result, sel, count);
54422
54425
  break;
54423
54426
  }
54424
54427
  default:
@@ -67734,6 +67737,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::Execute(ExecutionContext &context
67734
67737
  }
67735
67738
  }
67736
67739
 
67740
+ input.Verify();
67737
67741
  switch (join_type) {
67738
67742
  case JoinType::SEMI:
67739
67743
  case JoinType::ANTI:
@@ -68332,6 +68336,8 @@ SinkResultType PhysicalOrder::Sink(ExecutionContext &context, GlobalSinkState &g
68332
68336
  lstate.executor.Execute(input, sort);
68333
68337
 
68334
68338
  // Sink the data into the local sort state
68339
+ sort.Verify();
68340
+ input.Verify();
68335
68341
  local_sort_state.SinkChunk(sort, input);
68336
68342
 
68337
68343
  // When sorting data reaches a certain size, we sort it
@@ -82027,15 +82033,16 @@ static bool TemplatedOptimumValue(Vector &left, idx_t lidx, idx_t lcount, Vector
82027
82033
  }
82028
82034
 
82029
82035
  template <class OP>
82030
- static bool TemplatedOptimumStruct(Vector &left, idx_t lidx, idx_t lcount, Vector &right, idx_t ridx, idx_t rcount) {
82036
+ static bool TemplatedOptimumStruct(Vector &left, idx_t lidx_p, idx_t lcount, Vector &right, idx_t ridx_p,
82037
+ idx_t rcount) {
82031
82038
  // STRUCT dictionaries apply to all the children
82032
82039
  // so map the indexes first
82033
82040
  VectorData lvdata, rvdata;
82034
82041
  left.Orrify(lcount, lvdata);
82035
82042
  right.Orrify(rcount, rvdata);
82036
82043
 
82037
- lidx = lvdata.sel->get_index(lidx);
82038
- ridx = rvdata.sel->get_index(ridx);
82044
+ idx_t lidx = lvdata.sel->get_index(lidx_p);
82045
+ idx_t ridx = rvdata.sel->get_index(ridx_p);
82039
82046
 
82040
82047
  // DISTINCT semantics are in effect for nested types
82041
82048
  auto lnull = !lvdata.validity.RowIsValid(lidx);
@@ -82053,7 +82060,7 @@ static bool TemplatedOptimumStruct(Vector &left, idx_t lidx, idx_t lcount, Vecto
82053
82060
  auto &rchild = *rchildren[col_no];
82054
82061
 
82055
82062
  // Strict comparisons use the OP for definite
82056
- if (TemplatedOptimumValue<OP>(lchild, lidx, lcount, rchild, ridx, rcount)) {
82063
+ if (TemplatedOptimumValue<OP>(lchild, lidx_p, lcount, rchild, ridx_p, rcount)) {
82057
82064
  return true;
82058
82065
  }
82059
82066
 
@@ -82062,7 +82069,7 @@ static bool TemplatedOptimumStruct(Vector &left, idx_t lidx, idx_t lcount, Vecto
82062
82069
  }
82063
82070
 
82064
82071
  // Strict comparisons use IS NOT DISTINCT for possible
82065
- if (!TemplatedOptimumValue<NotDistinctFrom>(lchild, lidx, lcount, rchild, ridx, rcount)) {
82072
+ if (!TemplatedOptimumValue<NotDistinctFrom>(lchild, lidx_p, lcount, rchild, ridx_p, rcount)) {
82066
82073
  return false;
82067
82074
  }
82068
82075
  }
@@ -96522,23 +96529,11 @@ static void CardinalityFunction(DataChunk &args, ExpressionState &state, Vector
96522
96529
  result.SetVectorType(VectorType::FLAT_VECTOR);
96523
96530
  auto result_data = FlatVector::GetData<uint64_t>(result);
96524
96531
 
96525
- if (map.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
96526
- auto &child = DictionaryVector::Child(map);
96527
- auto &dict_sel = DictionaryVector::SelVector(map);
96528
-
96529
- auto &children = StructVector::GetEntries(child);
96530
- children[0]->Orrify(args.size(), list_data);
96531
- for (idx_t row = 0; row < args.size(); row++) {
96532
- auto list_entry = ((list_entry_t *)list_data.data)[list_data.sel->get_index(dict_sel.get_index(row))];
96533
- result_data[row] = list_entry.length;
96534
- }
96535
- } else {
96536
- auto &children = StructVector::GetEntries(map);
96537
- children[0]->Orrify(args.size(), list_data);
96538
- for (idx_t row = 0; row < args.size(); row++) {
96539
- auto list_entry = ((list_entry_t *)list_data.data)[list_data.sel->get_index(row)];
96540
- result_data[row] = list_entry.length;
96541
- }
96532
+ auto &children = StructVector::GetEntries(map);
96533
+ children[0]->Orrify(args.size(), list_data);
96534
+ for (idx_t row = 0; row < args.size(); row++) {
96535
+ auto list_entry = ((list_entry_t *)list_data.data)[list_data.sel->get_index(row)];
96536
+ result_data[row] = list_entry.length;
96542
96537
  }
96543
96538
 
96544
96539
  if (args.size() == 1) {
@@ -96686,33 +96681,16 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
96686
96681
  auto key_value = key.GetValue(0);
96687
96682
  VectorData offset_data;
96688
96683
 
96689
- if (map.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
96690
- auto &child = DictionaryVector::Child(map);
96691
- auto &children = StructVector::GetEntries(child);
96692
- auto &dict_sel = DictionaryVector::SelVector(map);
96693
- children[0]->Orrify(args.size(), offset_data);
96694
- auto &key_type = ListType::GetChildType(children[0]->GetType());
96695
- if (key_type != LogicalTypeId::SQLNULL) {
96696
- key_value = key_value.CastAs(key_type);
96697
- }
96698
- for (idx_t row = 0; row < args.size(); row++) {
96699
- auto offsets =
96700
- ListVector::Search(*children[0], key_value, offset_data.sel->get_index(dict_sel.get_index(row)));
96701
- auto values = ListVector::GetValuesFromOffsets(*children[1], offsets);
96702
- FillResult(values, result, row);
96703
- }
96704
- } else {
96705
- auto &children = StructVector::GetEntries(map);
96706
- children[0]->Orrify(args.size(), offset_data);
96707
- auto &key_type = ListType::GetChildType(children[0]->GetType());
96708
- if (key_type != LogicalTypeId::SQLNULL) {
96709
- key_value = key_value.CastAs(key_type);
96710
- }
96711
- for (idx_t row = 0; row < args.size(); row++) {
96712
- auto offsets = ListVector::Search(*children[0], key_value, offset_data.sel->get_index(row));
96713
- auto values = ListVector::GetValuesFromOffsets(*children[1], offsets);
96714
- FillResult(values, result, row);
96715
- }
96684
+ auto &children = StructVector::GetEntries(map);
96685
+ children[0]->Orrify(args.size(), offset_data);
96686
+ auto &key_type = ListType::GetChildType(children[0]->GetType());
96687
+ if (key_type != LogicalTypeId::SQLNULL) {
96688
+ key_value = key_value.CastAs(key_type);
96689
+ }
96690
+ for (idx_t row = 0; row < args.size(); row++) {
96691
+ auto offsets = ListVector::Search(*children[0], key_value, offset_data.sel->get_index(row));
96692
+ auto values = ListVector::GetValuesFromOffsets(*children[1], offsets);
96693
+ FillResult(values, result, row);
96716
96694
  }
96717
96695
 
96718
96696
  if (args.size() == 1) {
@@ -103967,19 +103945,10 @@ static void StructExtractFunction(DataChunk &args, ExpressionState &state, Vecto
103967
103945
  auto &vec = args.data[0];
103968
103946
 
103969
103947
  vec.Verify(args.size());
103970
- if (vec.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
103971
- auto &child = DictionaryVector::Child(vec);
103972
- auto &dict_sel = DictionaryVector::SelVector(vec);
103973
- auto &children = StructVector::GetEntries(child);
103974
- D_ASSERT(info.index < children.size());
103975
- auto &struct_child = children[info.index];
103976
- result.Slice(*struct_child, dict_sel, args.size());
103977
- } else {
103978
- auto &children = StructVector::GetEntries(vec);
103979
- D_ASSERT(info.index < children.size());
103980
- auto &struct_child = children[info.index];
103981
- result.Reference(*struct_child);
103982
- }
103948
+ auto &children = StructVector::GetEntries(vec);
103949
+ D_ASSERT(info.index < children.size());
103950
+ auto &struct_child = children[info.index];
103951
+ result.Reference(*struct_child);
103983
103952
  result.Verify(args.size());
103984
103953
  }
103985
103954
 
@@ -106648,8 +106617,28 @@ struct DuckDBViewsFun {
106648
106617
  static void RegisterFunction(BuiltinFunctions &set);
106649
106618
  };
106650
106619
 
106620
+ struct TestType {
106621
+ TestType(LogicalType type_p, string name_p)
106622
+ : type(move(type_p)), name(move(name_p)), min_value(Value::MinimumValue(type)),
106623
+ max_value(Value::MaximumValue(type)) {
106624
+ }
106625
+ TestType(LogicalType type_p, string name_p, Value min, Value max)
106626
+ : type(move(type_p)), name(move(name_p)), min_value(move(min)), max_value(move(max)) {
106627
+ }
106628
+
106629
+ LogicalType type;
106630
+ string name;
106631
+ Value min_value;
106632
+ Value max_value;
106633
+ };
106634
+
106651
106635
  struct TestAllTypesFun {
106652
106636
  static void RegisterFunction(BuiltinFunctions &set);
106637
+ static vector<TestType> GetTestTypes();
106638
+ };
106639
+
106640
+ struct TestVectorTypesFun {
106641
+ static void RegisterFunction(BuiltinFunctions &set);
106653
106642
  };
106654
106643
 
106655
106644
  } // namespace duckdb
@@ -110259,22 +110248,7 @@ struct TestAllTypesData : public GlobalTableFunctionState {
110259
110248
  idx_t offset;
110260
110249
  };
110261
110250
 
110262
- struct TestType {
110263
- TestType(LogicalType type_p, string name_p)
110264
- : type(move(type_p)), name(move(name_p)), min_value(Value::MinimumValue(type)),
110265
- max_value(Value::MaximumValue(type)) {
110266
- }
110267
- TestType(LogicalType type_p, string name_p, Value min, Value max)
110268
- : type(move(type_p)), name(move(name_p)), min_value(move(min)), max_value(move(max)) {
110269
- }
110270
-
110271
- LogicalType type;
110272
- string name;
110273
- Value min_value;
110274
- Value max_value;
110275
- };
110276
-
110277
- static vector<TestType> GetTestTypes() {
110251
+ vector<TestType> TestAllTypesFun::GetTestTypes() {
110278
110252
  vector<TestType> result;
110279
110253
  // scalar types/numerics
110280
110254
  result.emplace_back(LogicalType::BOOLEAN, "bool");
@@ -110445,7 +110419,7 @@ static vector<TestType> GetTestTypes() {
110445
110419
 
110446
110420
  static unique_ptr<FunctionData> TestAllTypesBind(ClientContext &context, TableFunctionBindInput &input,
110447
110421
  vector<LogicalType> &return_types, vector<string> &names) {
110448
- auto test_types = GetTestTypes();
110422
+ auto test_types = TestAllTypesFun::GetTestTypes();
110449
110423
  for (auto &test_type : test_types) {
110450
110424
  return_types.push_back(move(test_type.type));
110451
110425
  names.push_back(move(test_type.name));
@@ -110455,7 +110429,7 @@ static unique_ptr<FunctionData> TestAllTypesBind(ClientContext &context, TableFu
110455
110429
 
110456
110430
  unique_ptr<GlobalTableFunctionState> TestAllTypesInit(ClientContext &context, TableFunctionInitInput &input) {
110457
110431
  auto result = make_unique<TestAllTypesData>();
110458
- auto test_types = GetTestTypes();
110432
+ auto test_types = TestAllTypesFun::GetTestTypes();
110459
110433
  // 3 rows: min, max and NULL
110460
110434
  result->entries.resize(3);
110461
110435
  // initialize the values
@@ -110493,6 +110467,256 @@ void TestAllTypesFun::RegisterFunction(BuiltinFunctions &set) {
110493
110467
  } // namespace duckdb
110494
110468
 
110495
110469
 
110470
+
110471
+
110472
+ namespace duckdb {
110473
+
110474
+ // FLAT, CONSTANT, DICTIONARY, SEQUENCE
110475
+ struct TestVectorBindData : public TableFunctionData {
110476
+ LogicalType type;
110477
+ bool all_flat;
110478
+ };
110479
+
110480
+ struct TestVectorTypesData : public GlobalTableFunctionState {
110481
+ TestVectorTypesData() : offset(0) {
110482
+ }
110483
+
110484
+ vector<unique_ptr<DataChunk>> entries;
110485
+ idx_t offset;
110486
+ };
110487
+
110488
+ struct TestVectorInfo {
110489
+ TestVectorInfo(const LogicalType &type, const map<LogicalTypeId, TestType> &test_type_map,
110490
+ vector<unique_ptr<DataChunk>> &entries)
110491
+ : type(type), test_type_map(test_type_map), entries(entries) {
110492
+ }
110493
+
110494
+ const LogicalType &type;
110495
+ const map<LogicalTypeId, TestType> &test_type_map;
110496
+ vector<unique_ptr<DataChunk>> &entries;
110497
+ };
110498
+
110499
+ struct TestVectorFlat {
110500
+ static constexpr const idx_t TEST_VECTOR_CARDINALITY = 3;
110501
+
110502
+ static vector<Value> GenerateValues(TestVectorInfo &info, const LogicalType &type) {
110503
+ vector<Value> result;
110504
+ switch (type.InternalType()) {
110505
+ case PhysicalType::STRUCT: {
110506
+ vector<child_list_t<Value>> struct_children;
110507
+ auto &child_types = StructType::GetChildTypes(type);
110508
+
110509
+ struct_children.resize(TEST_VECTOR_CARDINALITY);
110510
+ for (auto &child_type : child_types) {
110511
+ auto child_values = GenerateValues(info, child_type.second);
110512
+
110513
+ for (idx_t i = 0; i < child_values.size(); i++) {
110514
+ struct_children[i].push_back(make_pair(child_type.first, move(child_values[i])));
110515
+ }
110516
+ }
110517
+ for (auto &struct_child : struct_children) {
110518
+ result.push_back(Value::STRUCT(move(struct_child)));
110519
+ }
110520
+ break;
110521
+ }
110522
+ case PhysicalType::LIST: {
110523
+ auto &child_type = ListType::GetChildType(type);
110524
+ auto child_values = GenerateValues(info, child_type);
110525
+
110526
+ result.push_back(Value::LIST(child_type, {child_values[0], child_values[1]}));
110527
+ result.push_back(Value::LIST(child_type, {}));
110528
+ result.push_back(Value::LIST(child_type, {child_values[2]}));
110529
+ break;
110530
+ }
110531
+ default: {
110532
+ auto entry = info.test_type_map.find(type.id());
110533
+ if (entry == info.test_type_map.end()) {
110534
+ throw NotImplementedException("Unimplemented type for test_vector_types %s", type.ToString());
110535
+ }
110536
+ result.push_back(entry->second.min_value);
110537
+ result.push_back(entry->second.max_value);
110538
+ result.emplace_back(type);
110539
+ break;
110540
+ }
110541
+ }
110542
+ return result;
110543
+ }
110544
+
110545
+ static void Generate(TestVectorInfo &info) {
110546
+ vector<Value> result_values = GenerateValues(info, info.type);
110547
+ for (idx_t cur_row = 0; cur_row < result_values.size(); cur_row += STANDARD_VECTOR_SIZE) {
110548
+ auto result = make_unique<DataChunk>();
110549
+ result->Initialize({info.type});
110550
+ auto cardinality = MinValue<idx_t>(STANDARD_VECTOR_SIZE, result_values.size() - cur_row);
110551
+ for (idx_t i = 0; i < cardinality; i++) {
110552
+ result->data[0].SetValue(i, result_values[cur_row + i]);
110553
+ }
110554
+ result->SetCardinality(cardinality);
110555
+ info.entries.push_back(move(result));
110556
+ }
110557
+ }
110558
+ };
110559
+
110560
+ struct TestVectorConstant {
110561
+ static void Generate(TestVectorInfo &info) {
110562
+ auto values = TestVectorFlat::GenerateValues(info, info.type);
110563
+ for (idx_t cur_row = 0; cur_row < TestVectorFlat::TEST_VECTOR_CARDINALITY; cur_row += STANDARD_VECTOR_SIZE) {
110564
+ auto result = make_unique<DataChunk>();
110565
+ result->Initialize({info.type});
110566
+ auto cardinality = MinValue<idx_t>(STANDARD_VECTOR_SIZE, TestVectorFlat::TEST_VECTOR_CARDINALITY - cur_row);
110567
+ result->data[0].SetValue(0, values[0]);
110568
+ result->data[0].SetVectorType(VectorType::CONSTANT_VECTOR);
110569
+ result->SetCardinality(cardinality);
110570
+
110571
+ info.entries.push_back(move(result));
110572
+ }
110573
+ }
110574
+ };
110575
+
110576
+ struct TestVectorSequence {
110577
+ static void GenerateVector(TestVectorInfo &info, const LogicalType &type, Vector &result) {
110578
+ D_ASSERT(type == result.GetType());
110579
+ switch (type.id()) {
110580
+ case LogicalTypeId::TINYINT:
110581
+ case LogicalTypeId::SMALLINT:
110582
+ case LogicalTypeId::INTEGER:
110583
+ case LogicalTypeId::BIGINT:
110584
+ case LogicalTypeId::UTINYINT:
110585
+ case LogicalTypeId::USMALLINT:
110586
+ case LogicalTypeId::UINTEGER:
110587
+ case LogicalTypeId::UBIGINT:
110588
+ result.Sequence(3, 2);
110589
+ return;
110590
+ default:
110591
+ break;
110592
+ }
110593
+ switch (type.InternalType()) {
110594
+ case PhysicalType::STRUCT: {
110595
+ auto &child_entries = StructVector::GetEntries(result);
110596
+ for (auto &child_entry : child_entries) {
110597
+ GenerateVector(info, child_entry->GetType(), *child_entry);
110598
+ }
110599
+ break;
110600
+ }
110601
+ case PhysicalType::LIST: {
110602
+ auto data = FlatVector::GetData<list_entry_t>(result);
110603
+ data[0].offset = 0;
110604
+ data[0].length = 2;
110605
+ data[1].offset = 2;
110606
+ data[1].length = 0;
110607
+ data[2].offset = 2;
110608
+ data[2].length = 1;
110609
+
110610
+ GenerateVector(info, ListType::GetChildType(type), ListVector::GetEntry(result));
110611
+ ListVector::SetListSize(result, 3);
110612
+ break;
110613
+ }
110614
+ default: {
110615
+ auto entry = info.test_type_map.find(type.id());
110616
+ if (entry == info.test_type_map.end()) {
110617
+ throw NotImplementedException("Unimplemented type for test_vector_types %s", type.ToString());
110618
+ }
110619
+ result.SetValue(0, entry->second.min_value);
110620
+ result.SetValue(1, entry->second.max_value);
110621
+ result.SetValue(2, Value(type));
110622
+ break;
110623
+ }
110624
+ }
110625
+ }
110626
+
110627
+ static void Generate(TestVectorInfo &info) {
110628
+ #if STANDARD_VECTOR_SIZE > 2
110629
+ auto result = make_unique<DataChunk>();
110630
+ result->Initialize({info.type});
110631
+
110632
+ GenerateVector(info, info.type, result->data[0]);
110633
+ result->SetCardinality(3);
110634
+ info.entries.push_back(move(result));
110635
+ #endif
110636
+ }
110637
+ };
110638
+
110639
+ struct TestVectorDictionary {
110640
+ static void Generate(TestVectorInfo &info) {
110641
+ idx_t current_chunk = info.entries.size();
110642
+
110643
+ unordered_set<idx_t> slice_entries {1, 2};
110644
+
110645
+ TestVectorFlat::Generate(info);
110646
+ idx_t current_idx = 0;
110647
+ for (idx_t i = current_chunk; i < info.entries.size(); i++) {
110648
+ auto &chunk = *info.entries[i];
110649
+ SelectionVector sel(STANDARD_VECTOR_SIZE);
110650
+ idx_t sel_idx = 0;
110651
+ for (idx_t k = 0; k < chunk.size(); k++) {
110652
+ if (slice_entries.count(current_idx + k) > 0) {
110653
+ sel.set_index(sel_idx++, k);
110654
+ }
110655
+ }
110656
+ chunk.Slice(sel, sel_idx);
110657
+ current_idx += chunk.size();
110658
+ }
110659
+ }
110660
+ };
110661
+
110662
+ static unique_ptr<FunctionData> TestVectorTypesBind(ClientContext &context, TableFunctionBindInput &input,
110663
+ vector<LogicalType> &return_types, vector<string> &names) {
110664
+ auto result = make_unique<TestVectorBindData>();
110665
+ result->type = input.inputs[0].type();
110666
+ result->all_flat = BooleanValue::Get(input.inputs[1]);
110667
+
110668
+ return_types.push_back(result->type);
110669
+ names.emplace_back("test_vector");
110670
+ return move(result);
110671
+ }
110672
+
110673
+ unique_ptr<GlobalTableFunctionState> TestVectorTypesInit(ClientContext &context, TableFunctionInitInput &input) {
110674
+ auto &bind_data = (TestVectorBindData &)*input.bind_data;
110675
+
110676
+ auto result = make_unique<TestVectorTypesData>();
110677
+
110678
+ auto test_types = TestAllTypesFun::GetTestTypes();
110679
+
110680
+ map<LogicalTypeId, TestType> test_type_map;
110681
+ for (auto &test_type : test_types) {
110682
+ test_type_map.insert(make_pair(test_type.type.id(), move(test_type)));
110683
+ }
110684
+
110685
+ TestVectorInfo info(bind_data.type, test_type_map, result->entries);
110686
+ TestVectorFlat::Generate(info);
110687
+ TestVectorConstant::Generate(info);
110688
+ TestVectorDictionary::Generate(info);
110689
+ TestVectorSequence::Generate(info);
110690
+ for (auto &entry : result->entries) {
110691
+ entry->Verify();
110692
+ }
110693
+ if (bind_data.all_flat) {
110694
+ for (auto &entry : result->entries) {
110695
+ entry->Normalify();
110696
+ entry->Verify();
110697
+ }
110698
+ }
110699
+ return move(result);
110700
+ }
110701
+
110702
+ void TestVectorTypesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
110703
+ auto &data = (TestVectorTypesData &)*data_p.global_state;
110704
+ if (data.offset >= data.entries.size()) {
110705
+ // finished returning values
110706
+ return;
110707
+ }
110708
+ output.Reference(*data.entries[data.offset]);
110709
+ data.offset++;
110710
+ }
110711
+
110712
+ void TestVectorTypesFun::RegisterFunction(BuiltinFunctions &set) {
110713
+ set.AddFunction(TableFunction("test_vector_types", {LogicalType::ANY, LogicalType::BOOLEAN},
110714
+ TestVectorTypesFunction, TestVectorTypesBind, TestVectorTypesInit));
110715
+ }
110716
+
110717
+ } // namespace duckdb
110718
+
110719
+
110496
110720
  //===----------------------------------------------------------------------===//
110497
110721
  // DuckDB
110498
110722
  //
@@ -110608,6 +110832,7 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
110608
110832
  DuckDBTypesFun::RegisterFunction(*this);
110609
110833
  DuckDBViewsFun::RegisterFunction(*this);
110610
110834
  TestAllTypesFun::RegisterFunction(*this);
110835
+ TestVectorTypesFun::RegisterFunction(*this);
110611
110836
  }
110612
110837
 
110613
110838
  } // namespace duckdb