duckdb 0.5.2-dev1579.0 → 0.5.2-dev1610.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -9200,7 +9200,7 @@ void BoxRenderer::Render(ClientContext &context, const vector<string> &names, co
9200
9200
 
9201
9201
  // for each column, figure out the width
9202
9202
  // start off by figuring out the name of the header by looking at the column name and column type
9203
- idx_t min_width = has_hidden_rows ? minimum_row_length : 0;
9203
+ idx_t min_width = has_hidden_rows || row_count == 0 ? minimum_row_length : 0;
9204
9204
  vector<idx_t> column_map;
9205
9205
  idx_t total_length;
9206
9206
  auto widths = ComputeRenderWidths(names, result, collections, min_width, max_width, column_map, total_length);
@@ -52186,6 +52186,15 @@ idx_t ListVector::GetListSize(const Vector &vec) {
52186
52186
  return ((VectorListBuffer &)*vec.auxiliary).size;
52187
52187
  }
52188
52188
 
52189
+ idx_t ListVector::GetListCapacity(const Vector &vec) {
52190
+ if (vec.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
52191
+ auto &child = DictionaryVector::Child(vec);
52192
+ return ListVector::GetListSize(child);
52193
+ }
52194
+ D_ASSERT(vec.auxiliary);
52195
+ return ((VectorListBuffer &)*vec.auxiliary).capacity;
52196
+ }
52197
+
52189
52198
  void ListVector::ReferenceEntry(Vector &vector, Vector &other) {
52190
52199
  D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST);
52191
52200
  D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR ||
@@ -98646,6 +98655,7 @@ struct HandleVectorCastError {
98646
98655
 
98647
98656
 
98648
98657
 
98658
+
98649
98659
  namespace duckdb {
98650
98660
 
98651
98661
  template <class OP>
@@ -98829,9 +98839,20 @@ struct VectorCastHelpers {
98829
98839
  }
98830
98840
  };
98831
98841
 
98832
- struct VectorStringifiedListParser {
98842
+ struct VectorStringToList {
98833
98843
  static idx_t CountParts(const string_t &input);
98834
98844
  static bool SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start, Vector &child);
98845
+ static bool StringToNestedTypeCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result,
98846
+ ValidityMask &result_mask, idx_t count, CastParameters &parameters,
98847
+ const SelectionVector *sel);
98848
+ };
98849
+
98850
+ struct VectorStringToStruct {
98851
+ static bool SplitStruct(string_t &input, std::vector<std::unique_ptr<Vector>> &varchar_vectors, idx_t &row_idx,
98852
+ string_map_t<idx_t> &child_names, std::vector<ValidityMask *> &child_masks);
98853
+ static bool StringToNestedTypeCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result,
98854
+ ValidityMask &result_mask, idx_t count, CastParameters &parameters,
98855
+ const SelectionVector *sel);
98835
98856
  };
98836
98857
 
98837
98858
  } // namespace duckdb
@@ -99956,6 +99977,8 @@ BoundCastInfo DefaultCasts::PointerCastSwitch(BindCastInput &input, const Logica
99956
99977
 
99957
99978
 
99958
99979
 
99980
+
99981
+
99959
99982
  namespace duckdb {
99960
99983
 
99961
99984
  template <class T>
@@ -100067,8 +100090,9 @@ static BoundCastInfo VectorStringCastNumericSwitch(BindCastInput &input, const L
100067
100090
  }
100068
100091
  }
100069
100092
 
100070
- bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result, ValidityMask &result_mask,
100071
- idx_t count, CastParameters &parameters, const SelectionVector *sel) {
100093
+ bool VectorStringToList::StringToNestedTypeCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result,
100094
+ ValidityMask &result_mask, idx_t count, CastParameters &parameters,
100095
+ const SelectionVector *sel) {
100072
100096
 
100073
100097
  idx_t total_list_size = 0;
100074
100098
  for (idx_t i = 0; i < count; i++) {
@@ -100079,7 +100103,7 @@ bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector
100079
100103
  if (!source_mask.RowIsValid(idx)) {
100080
100104
  continue;
100081
100105
  }
100082
- total_list_size += VectorStringifiedListParser::CountParts(source_data[idx]);
100106
+ total_list_size += VectorStringToList::CountParts(source_data[idx]);
100083
100107
  }
100084
100108
 
100085
100109
  Vector varchar_vector(LogicalType::VARCHAR, total_list_size);
@@ -100103,9 +100127,7 @@ bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector
100103
100127
  }
100104
100128
 
100105
100129
  list_data[i].offset = total;
100106
- auto valid =
100107
- VectorStringifiedListParser::SplitStringifiedList(source_data[idx], child_data, total, varchar_vector);
100108
- if (!valid) {
100130
+ if (!VectorStringToList::SplitStringifiedList(source_data[idx], child_data, total, varchar_vector)) {
100109
100131
  string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
100110
100132
  "' can't be cast to the destination type LIST";
100111
100133
  HandleVectorCastError::Operation<string_t>(text, result_mask, idx, parameters.error_message, all_converted);
@@ -100121,23 +100143,85 @@ bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector
100121
100143
  all_converted;
100122
100144
  }
100123
100145
 
100124
- bool StringListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
100146
+ static LogicalType InitVarcharStructType(const LogicalType &target) {
100147
+ child_list_t<LogicalType> child_types;
100148
+ for (auto &child : StructType::GetChildTypes(target)) {
100149
+ child_types.push_back(make_pair(child.first, LogicalType::VARCHAR));
100150
+ }
100151
+
100152
+ return LogicalType::STRUCT(child_types);
100153
+ }
100154
+
100155
+ bool VectorStringToStruct::StringToNestedTypeCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result,
100156
+ ValidityMask &result_mask, idx_t count,
100157
+ CastParameters &parameters, const SelectionVector *sel) {
100158
+
100159
+ auto varchar_struct_type = InitVarcharStructType(result.GetType());
100160
+ Vector varchar_vector(varchar_struct_type, count);
100161
+ auto &child_vectors = StructVector::GetEntries(varchar_vector);
100162
+ auto &result_children = StructVector::GetEntries(result);
100163
+
100164
+ string_map_t<idx_t> child_names;
100165
+ vector<ValidityMask *> child_masks;
100166
+ for (idx_t child_idx = 0; child_idx < result_children.size(); child_idx++) {
100167
+ child_names.insert({StructType::GetChildName(result.GetType(), child_idx), child_idx});
100168
+ child_masks.emplace_back(&FlatVector::Validity(*child_vectors[child_idx]));
100169
+ child_masks[child_idx]->SetAllInvalid(count);
100170
+ }
100171
+
100172
+ bool all_converted = true;
100173
+ for (idx_t i = 0; i < count; i++) {
100174
+ idx_t idx = i;
100175
+ if (sel) {
100176
+ idx = sel->get_index(i);
100177
+ }
100178
+ if (!source_mask.RowIsValid(idx)) {
100179
+ result_mask.SetInvalid(i);
100180
+ continue;
100181
+ }
100182
+ if (!VectorStringToStruct::SplitStruct(source_data[idx], child_vectors, i, child_names, child_masks)) {
100183
+ string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
100184
+ "' can't be cast to the destination type STRUCT";
100185
+ for (auto &child_mask : child_masks) {
100186
+ child_mask->SetInvalid(idx); // some values may have already been found and set valid
100187
+ }
100188
+ HandleVectorCastError::Operation<string_t>(text, result_mask, idx, parameters.error_message, all_converted);
100189
+ }
100190
+ }
100191
+
100192
+ auto &cast_data = (StructBoundCastData &)*parameters.cast_data;
100193
+ D_ASSERT(cast_data.child_cast_info.size() == result_children.size());
100194
+
100195
+ for (idx_t child_idx = 0; child_idx < result_children.size(); child_idx++) {
100196
+ auto &varchar_vector = *child_vectors[child_idx];
100197
+ auto &result_child_vector = *result_children[child_idx];
100198
+ auto &child_cast_info = cast_data.child_cast_info[child_idx];
100199
+ // get the correct casting function (VARCHAR -> result_child_type) from cast_data
100200
+ // casting functions are determined by BindStructtoStructCast
100201
+ CastParameters child_parameters(parameters, child_cast_info.cast_data.get());
100202
+ if (!child_cast_info.function(varchar_vector, result_child_vector, count, child_parameters)) {
100203
+ all_converted = false;
100204
+ }
100205
+ }
100206
+ return all_converted;
100207
+ }
100208
+
100209
+ template <class T>
100210
+ bool StringToNestedTypeCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
100125
100211
  D_ASSERT(source.GetType().id() == LogicalTypeId::VARCHAR);
100126
- D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
100127
100212
 
100128
100213
  switch (source.GetVectorType()) {
100129
100214
  case VectorType::CONSTANT_VECTOR: {
100130
- result.SetVectorType(VectorType::CONSTANT_VECTOR);
100131
-
100132
100215
  auto source_data = ConstantVector::GetData<string_t>(source);
100133
100216
  auto &source_mask = ConstantVector::Validity(source);
100134
- auto &result_mask = ConstantVector::Validity(result);
100217
+ auto &result_mask = FlatVector::Validity(result);
100135
100218
 
100136
- return StringListCastLoop(source_data, source_mask, result, result_mask, 1, parameters, nullptr);
100219
+ auto ret = T::StringToNestedTypeCastLoop(source_data, source_mask, result, result_mask, 1, parameters, nullptr);
100220
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
100221
+ return ret;
100137
100222
  }
100138
100223
  default: {
100139
100224
  UnifiedVectorFormat unified_source;
100140
- result.SetVectorType(VectorType::FLAT_VECTOR);
100141
100225
 
100142
100226
  source.ToUnifiedFormat(count, unified_source);
100143
100227
  auto source_sel = unified_source.sel;
@@ -100145,17 +100229,12 @@ bool StringListCast(Vector &source, Vector &result, idx_t count, CastParameters
100145
100229
  auto &source_mask = unified_source.validity;
100146
100230
  auto &result_mask = FlatVector::Validity(result);
100147
100231
 
100148
- return StringListCastLoop(source_data, source_mask, result, result_mask, count, parameters, source_sel);
100232
+ return T::StringToNestedTypeCastLoop(source_data, source_mask, result, result_mask, count, parameters,
100233
+ source_sel);
100149
100234
  }
100150
100235
  }
100151
100236
  }
100152
100237
 
100153
- BoundCastInfo StringToListCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
100154
- // second argument allows for a secondary casting function to be passed in the CastParameters
100155
- return BoundCastInfo(&StringListCast,
100156
- ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target));
100157
- }
100158
-
100159
100238
  BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const LogicalType &source,
100160
100239
  const LogicalType &target) {
100161
100240
  switch (target.id()) {
@@ -100186,7 +100265,13 @@ BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const Logical
100186
100265
  case LogicalTypeId::JSON:
100187
100266
  return &DefaultCasts::ReinterpretCast;
100188
100267
  case LogicalTypeId::LIST:
100189
- return StringToListCast(input, source, target);
100268
+ // the second argument allows for a secondary casting function to be passed in the CastParameters
100269
+ return BoundCastInfo(
100270
+ &StringToNestedTypeCast<VectorStringToList>,
100271
+ ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target));
100272
+ case LogicalTypeId::STRUCT:
100273
+ return BoundCastInfo(&StringToNestedTypeCast<VectorStringToStruct>,
100274
+ StructBoundCastData::BindStructToStructCast(input, InitVarcharStructType(target), target));
100190
100275
  default:
100191
100276
  return VectorStringCastNumericSwitch(input, source, target);
100192
100277
  }
@@ -100198,26 +100283,8 @@ BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const Logical
100198
100283
 
100199
100284
  namespace duckdb {
100200
100285
 
100201
- struct StructBoundCastData : public BoundCastData {
100202
- StructBoundCastData(vector<BoundCastInfo> child_casts, LogicalType target_p)
100203
- : child_cast_info(move(child_casts)), target(move(target_p)) {
100204
- }
100205
-
100206
- vector<BoundCastInfo> child_cast_info;
100207
- LogicalType target;
100208
-
100209
- public:
100210
- unique_ptr<BoundCastData> Copy() const override {
100211
- vector<BoundCastInfo> copy_info;
100212
- for (auto &info : child_cast_info) {
100213
- copy_info.push_back(info.Copy());
100214
- }
100215
- return make_unique<StructBoundCastData>(move(copy_info), target);
100216
- }
100217
- };
100218
-
100219
- unique_ptr<BoundCastData> BindStructToStructCast(BindCastInput &input, const LogicalType &source,
100220
- const LogicalType &target) {
100286
+ unique_ptr<BoundCastData> StructBoundCastData::BindStructToStructCast(BindCastInput &input, const LogicalType &source,
100287
+ const LogicalType &target) {
100221
100288
  vector<BoundCastInfo> child_cast_info;
100222
100289
  auto &source_child_types = StructType::GetChildTypes(source);
100223
100290
  auto &result_child_types = StructType::GetChildTypes(target);
@@ -100238,13 +100305,14 @@ static bool StructToStructCast(Vector &source, Vector &result, idx_t count, Cast
100238
100305
  D_ASSERT(source_children.size() == StructType::GetChildTypes(result.GetType()).size());
100239
100306
 
100240
100307
  auto &result_children = StructVector::GetEntries(result);
100308
+ bool all_converted = true;
100241
100309
  for (idx_t c_idx = 0; c_idx < source_child_types.size(); c_idx++) {
100242
100310
  auto &result_child_vector = *result_children[c_idx];
100243
100311
  auto &source_child_vector = *source_children[c_idx];
100244
100312
  CastParameters child_parameters(parameters, cast_data.child_cast_info[c_idx].cast_data.get());
100245
100313
  if (!cast_data.child_cast_info[c_idx].function(source_child_vector, result_child_vector, count,
100246
100314
  child_parameters)) {
100247
- return false;
100315
+ all_converted = false;
100248
100316
  }
100249
100317
  }
100250
100318
  if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) {
@@ -100254,7 +100322,7 @@ static bool StructToStructCast(Vector &source, Vector &result, idx_t count, Cast
100254
100322
  source.Flatten(count);
100255
100323
  FlatVector::Validity(result) = FlatVector::Validity(source);
100256
100324
  }
100257
- return true;
100325
+ return all_converted;
100258
100326
  }
100259
100327
 
100260
100328
  static bool StructToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
@@ -100333,7 +100401,7 @@ BoundCastInfo DefaultCasts::StructCastSwitch(BindCastInput &input, const Logical
100333
100401
  const LogicalType &target) {
100334
100402
  switch (target.id()) {
100335
100403
  case LogicalTypeId::STRUCT:
100336
- return BoundCastInfo(StructToStructCast, BindStructToStructCast(input, source, target));
100404
+ return BoundCastInfo(StructToStructCast, StructBoundCastData::BindStructToStructCast(input, source, target));
100337
100405
  case LogicalTypeId::JSON:
100338
100406
  case LogicalTypeId::VARCHAR: {
100339
100407
  // bind a cast in which we convert all child entries to VARCHAR entries
@@ -100343,7 +100411,8 @@ BoundCastInfo DefaultCasts::StructCastSwitch(BindCastInput &input, const Logical
100343
100411
  varchar_children.push_back(make_pair(child_entry.first, LogicalType::VARCHAR));
100344
100412
  }
100345
100413
  auto varchar_type = LogicalType::STRUCT(move(varchar_children));
100346
- return BoundCastInfo(StructToVarcharCast, BindStructToStructCast(input, source, varchar_type));
100414
+ return BoundCastInfo(StructToVarcharCast,
100415
+ StructBoundCastData::BindStructToStructCast(input, source, varchar_type));
100347
100416
  }
100348
100417
  default:
100349
100418
  return TryVectorNullCast;
@@ -100898,6 +100967,33 @@ BoundCastInfo DefaultCasts::UUIDCastSwitch(BindCastInput &input, const LogicalTy
100898
100967
 
100899
100968
  namespace duckdb {
100900
100969
 
100970
+ static bool IsNull(const char *buf, idx_t start_pos, Vector &child, idx_t row_idx) {
100971
+ if (buf[start_pos] == 'N' && buf[start_pos + 1] == 'U' && buf[start_pos + 2] == 'L' && buf[start_pos + 3] == 'L') {
100972
+ FlatVector::SetNull(child, row_idx, true);
100973
+ return true;
100974
+ }
100975
+ return false;
100976
+ }
100977
+
100978
+ inline static void SkipWhitespace(const char *buf, idx_t &pos, idx_t len) {
100979
+ while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
100980
+ pos++;
100981
+ }
100982
+ }
100983
+
100984
+ static idx_t StringTrim(const char *buf, idx_t &start_pos, idx_t pos) {
100985
+ idx_t trailing_whitespace = 0;
100986
+ while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
100987
+ trailing_whitespace++;
100988
+ }
100989
+ if ((buf[start_pos] == '"' && buf[pos - trailing_whitespace - 1] == '"') ||
100990
+ (buf[start_pos] == '\'' && buf[pos - trailing_whitespace - 1] == '\'')) {
100991
+ start_pos++;
100992
+ trailing_whitespace++;
100993
+ }
100994
+ return (pos - trailing_whitespace);
100995
+ }
100996
+
100901
100997
  struct CountPartOperation {
100902
100998
  idx_t count = 0;
100903
100999
 
@@ -100916,10 +101012,7 @@ struct SplitStringOperation {
100916
101012
  Vector &child;
100917
101013
 
100918
101014
  void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
100919
-
100920
- if ((pos - start_pos) >= 4 && buf[start_pos] == 'N' && buf[start_pos + 1] == 'U' && buf[start_pos + 2] == 'L' &&
100921
- buf[start_pos + 3] == 'L') {
100922
- FlatVector::SetNull(child, child_start, true);
101015
+ if ((pos - start_pos) == 4 && IsNull(buf, start_pos, child, child_start)) {
100923
101016
  child_start++;
100924
101017
  return;
100925
101018
  }
@@ -100941,20 +101034,27 @@ static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
100941
101034
  return false;
100942
101035
  }
100943
101036
 
100944
- static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl) {
101037
+ static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl, char close_bracket) {
101038
+ idx++;
101039
+
100945
101040
  while (idx < len) {
100946
- if (buf[idx] == '[') {
100947
- if (!SkipToClose(++idx, buf, len, lvl)) {
101041
+ if (buf[idx] == '"' || buf[idx] == '\'') {
101042
+ if (!SkipToCloseQuotes(idx, buf, len)) {
101043
+ return false;
101044
+ }
101045
+ } else if (buf[idx] == '{') {
101046
+ if (!SkipToClose(idx, buf, len, lvl, '}')) {
101047
+ return false;
101048
+ }
101049
+ } else if (buf[idx] == '[') {
101050
+ if (!SkipToClose(idx, buf, len, lvl, ']')) {
100948
101051
  return false;
100949
101052
  }
100950
101053
  lvl++;
100951
- idx++;
100952
- }
100953
- if (buf[idx] == '"' || buf[idx] == '\'') {
100954
- SkipToCloseQuotes(idx, buf, len);
100955
- }
100956
- if (buf[idx] == ']') {
100957
- lvl--;
101054
+ } else if (buf[idx] == close_bracket) {
101055
+ if (close_bracket == ']') {
101056
+ lvl--;
101057
+ }
100958
101058
  return true;
100959
101059
  }
100960
101060
  idx++;
@@ -100969,68 +101069,133 @@ static bool SplitStringifiedListInternal(const string_t &input, OP &state) {
100969
101069
  idx_t lvl = 1;
100970
101070
  idx_t pos = 0;
100971
101071
 
100972
- while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
100973
- pos++;
100974
- }
101072
+ SkipWhitespace(buf, pos, len);
100975
101073
  if (pos == len || buf[pos] != '[') {
100976
101074
  return false;
100977
101075
  }
100978
- pos++;
100979
- while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
100980
- pos++;
100981
- }
100982
101076
 
101077
+ SkipWhitespace(buf, ++pos, len);
100983
101078
  idx_t start_pos = pos;
100984
101079
  while (pos < len) {
100985
101080
  if (buf[pos] == '[') {
100986
- if (!SkipToClose(++pos, buf, len, ++lvl)) {
101081
+ if (!SkipToClose(pos, buf, len, ++lvl, ']')) {
100987
101082
  return false;
100988
101083
  }
100989
101084
  } else if (buf[pos] == '"' || buf[pos] == '\'') {
100990
101085
  SkipToCloseQuotes(pos, buf, len);
101086
+ } else if (buf[pos] == '{') {
101087
+ idx_t struct_lvl = 0;
101088
+ SkipToClose(pos, buf, len, struct_lvl, '}');
100991
101089
  } else if (buf[pos] == ',' || buf[pos] == ']') {
100992
101090
  idx_t trailing_whitespace = 0;
100993
101091
  while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
100994
101092
  trailing_whitespace++;
100995
101093
  }
100996
- if (!(buf[pos] == ']' && start_pos == (pos))) {
101094
+ if (!(buf[pos] == ']' && start_pos == pos)) {
100997
101095
  state.HandleValue(buf, start_pos, pos - trailing_whitespace);
100998
101096
  } // else the list is empty
100999
101097
  if (buf[pos] == ']') {
101000
101098
  lvl--;
101001
101099
  break;
101002
101100
  }
101003
- while (pos + 1 < len && StringUtil::CharacterIsSpace(buf[pos + 1])) {
101004
- pos++;
101005
- }
101006
- start_pos = pos + 1;
101007
- }
101008
- pos++;
101009
- }
101010
- pos++;
101011
- while (pos < len) {
101012
- if (!StringUtil::CharacterIsSpace(buf[pos])) {
101013
- return false;
101101
+ SkipWhitespace(buf, ++pos, len);
101102
+ start_pos = pos;
101103
+ continue;
101014
101104
  }
101015
101105
  pos++;
101016
101106
  }
101017
- if (lvl != 0) {
101018
- return false;
101019
- }
101020
- return true;
101107
+ SkipWhitespace(buf, ++pos, len);
101108
+ return (pos == len && lvl == 0);
101021
101109
  }
101022
101110
 
101023
- bool VectorStringifiedListParser::SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start,
101024
- Vector &child) {
101111
+ bool VectorStringToList::SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start,
101112
+ Vector &child) {
101025
101113
  SplitStringOperation state(child_data, child_start, child);
101026
101114
  return SplitStringifiedListInternal<SplitStringOperation>(input, state);
101027
101115
  }
101028
101116
 
101029
- idx_t VectorStringifiedListParser::CountParts(const string_t &input) {
101117
+ idx_t VectorStringToList::CountParts(const string_t &input) {
101030
101118
  CountPartOperation state;
101031
101119
  SplitStringifiedListInternal<CountPartOperation>(input, state);
101032
101120
  return state.count;
101033
101121
  }
101122
+
101123
+ static bool FindKey(const char *buf, idx_t len, idx_t &pos) {
101124
+ while (pos < len) {
101125
+ if (buf[pos] == ':') {
101126
+ return true;
101127
+ }
101128
+ pos++;
101129
+ }
101130
+ return false;
101131
+ }
101132
+
101133
+ static bool FindValue(const char *buf, idx_t len, idx_t &pos, Vector &varchar_child, idx_t &row_idx,
101134
+ ValidityMask *child_mask) {
101135
+ auto start_pos = pos;
101136
+ idx_t lvl = 0;
101137
+ while (pos < len) {
101138
+ if (buf[pos] == '"' || buf[pos] == '\'') {
101139
+ SkipToCloseQuotes(pos, buf, len);
101140
+ } else if (buf[pos] == '{') {
101141
+ SkipToClose(pos, buf, len, lvl, '}');
101142
+ } else if (buf[pos] == '[') {
101143
+ SkipToClose(pos, buf, len, lvl, ']');
101144
+ } else if (buf[pos] == ',' || buf[pos] == '}') {
101145
+ idx_t end_pos = StringTrim(buf, start_pos, pos);
101146
+ if ((end_pos - start_pos) == 4 && IsNull(buf, start_pos, varchar_child, row_idx)) {
101147
+ return true;
101148
+ }
101149
+ FlatVector::GetData<string_t>(varchar_child)[row_idx] =
101150
+ StringVector::AddString(varchar_child, buf + start_pos, end_pos - start_pos);
101151
+ child_mask->SetValid(row_idx); // any child not set to valid will remain invalid
101152
+ return true;
101153
+ }
101154
+ pos++;
101155
+ }
101156
+ return false;
101157
+ }
101158
+
101159
+ bool VectorStringToStruct::SplitStruct(string_t &input, std::vector<std::unique_ptr<Vector>> &varchar_vectors,
101160
+ idx_t &row_idx, string_map_t<idx_t> &child_names,
101161
+ std::vector<ValidityMask *> &child_masks) {
101162
+ const char *buf = input.GetDataUnsafe();
101163
+ idx_t len = input.GetSize();
101164
+ idx_t pos = 0;
101165
+ idx_t child_idx;
101166
+
101167
+ SkipWhitespace(buf, pos, len);
101168
+ if (pos == len || buf[pos] != '{') {
101169
+ return false;
101170
+ }
101171
+ SkipWhitespace(buf, ++pos, len);
101172
+ if (buf[pos] == '}') {
101173
+ pos++;
101174
+ } else {
101175
+ while (pos < len) {
101176
+ auto key_start = pos;
101177
+ if (!FindKey(buf, len, pos)) {
101178
+ return false;
101179
+ }
101180
+ auto key_end = StringTrim(buf, key_start, pos);
101181
+ string_t found_key(buf + key_start, key_end - key_start);
101182
+
101183
+ auto it = child_names.find(found_key);
101184
+ if (it == child_names.end()) {
101185
+ return false; // false key
101186
+ }
101187
+ child_idx = it->second;
101188
+ SkipWhitespace(buf, ++pos, len);
101189
+ if (!FindValue(buf, len, pos, *varchar_vectors[child_idx], row_idx, child_masks[child_idx])) {
101190
+ return false;
101191
+ }
101192
+ SkipWhitespace(buf, ++pos, len);
101193
+ }
101194
+ }
101195
+ SkipWhitespace(buf, pos, len);
101196
+ return (pos == len);
101197
+ }
101198
+
101034
101199
  } // namespace duckdb
101035
101200
 
101036
101201
 
@@ -118794,6 +118959,20 @@ struct RegexpExtractBindData : public RegexpBaseBindData {
118794
118959
  bool Equals(const FunctionData &other_p) const override;
118795
118960
  };
118796
118961
 
118962
+ struct RegexLocalState : public FunctionLocalState {
118963
+ explicit RegexLocalState(RegexpBaseBindData &info)
118964
+ : constant_pattern(duckdb_re2::StringPiece(info.constant_string.c_str(), info.constant_string.size()),
118965
+ info.options) {
118966
+ D_ASSERT(info.constant_pattern);
118967
+ }
118968
+
118969
+ RE2 constant_pattern;
118970
+ };
118971
+
118972
+ unique_ptr<FunctionLocalState> RegexInitLocalState(const BoundFunctionExpression &expr, FunctionData *bind_data);
118973
+ unique_ptr<FunctionData> RegexpMatchesBind(ClientContext &context, ScalarFunction &bound_function,
118974
+ vector<unique_ptr<Expression>> &arguments);
118975
+
118797
118976
  } // namespace duckdb
118798
118977
 
118799
118978
 
@@ -118833,18 +119012,7 @@ static inline duckdb_re2::StringPiece CreateStringPiece(string_t &input) {
118833
119012
  return duckdb_re2::StringPiece(input.GetDataUnsafe(), input.GetSize());
118834
119013
  }
118835
119014
 
118836
- struct RegexLocalState : public FunctionLocalState {
118837
- explicit RegexLocalState(RegexpBaseBindData &info)
118838
- : constant_pattern(duckdb_re2::StringPiece(info.constant_string.c_str(), info.constant_string.size()),
118839
- info.options) {
118840
- D_ASSERT(info.constant_pattern);
118841
- }
118842
-
118843
- RE2 constant_pattern;
118844
- };
118845
-
118846
- static unique_ptr<FunctionLocalState> RegexInitLocalState(const BoundFunctionExpression &expr,
118847
- FunctionData *bind_data) {
119015
+ unique_ptr<FunctionLocalState> RegexInitLocalState(const BoundFunctionExpression &expr, FunctionData *bind_data) {
118848
119016
  auto &info = (RegexpBaseBindData &)*bind_data;
118849
119017
  if (info.constant_pattern) {
118850
119018
  return make_unique<RegexLocalState>(info);
@@ -118951,8 +119119,8 @@ unique_ptr<FunctionData> RegexpMatchesBindData::Copy() const {
118951
119119
  range_success);
118952
119120
  }
118953
119121
 
118954
- static unique_ptr<FunctionData> RegexpMatchesBind(ClientContext &context, ScalarFunction &bound_function,
118955
- vector<unique_ptr<Expression>> &arguments) {
119122
+ unique_ptr<FunctionData> RegexpMatchesBind(ClientContext &context, ScalarFunction &bound_function,
119123
+ vector<unique_ptr<Expression>> &arguments) {
118956
119124
  // pattern is the second argument. If its constant, we can already prepare the pattern and store it for later.
118957
119125
  D_ASSERT(arguments.size() == 2 || arguments.size() == 3);
118958
119126
  RE2::Options options;
@@ -119398,188 +119566,106 @@ void ReverseFun::RegisterFunction(BuiltinFunctions &set) {
119398
119566
 
119399
119567
 
119400
119568
 
119401
-
119402
-
119403
-
119404
119569
  namespace duckdb {
119405
119570
 
119406
- struct StringSplitIterator {
119407
- public:
119408
- explicit StringSplitIterator(idx_t size) : size(size) {
119409
- }
119410
- virtual ~StringSplitIterator() {
119571
+ struct StringSplitInput {
119572
+ StringSplitInput(Vector &result_list, Vector &result_child, idx_t offset)
119573
+ : result_list(result_list), result_child(result_child), offset(offset) {
119411
119574
  }
119412
119575
 
119413
- idx_t size;
119576
+ Vector &result_list;
119577
+ Vector &result_child;
119578
+ idx_t offset;
119414
119579
 
119415
- public:
119416
- virtual idx_t Next(const char *input) = 0;
119417
- bool HasNext() {
119418
- return offset < size;
119419
- }
119420
- idx_t Start() {
119421
- return start;
119580
+ void AddSplit(const char *split_data, idx_t split_size, idx_t list_idx) {
119581
+ auto list_entry = offset + list_idx;
119582
+ if (list_entry >= ListVector::GetListCapacity(result_list)) {
119583
+ ListVector::SetListSize(result_list, offset + list_idx);
119584
+ ListVector::Reserve(result_list, ListVector::GetListCapacity(result_list) * 2);
119585
+ }
119586
+ FlatVector::GetData<string_t>(result_child)[list_entry] =
119587
+ StringVector::AddString(result_child, split_data, split_size);
119422
119588
  }
119423
-
119424
- protected:
119425
- idx_t start = 0; // end of last place a delim match was found
119426
- idx_t offset = 0; // current position
119427
119589
  };
119428
119590
 
119429
- struct AsciiStringSplitIterator : virtual public StringSplitIterator {
119430
- public:
119431
- AsciiStringSplitIterator(size_t size, const char *delim, const size_t delim_size)
119432
- : StringSplitIterator(size), delim(delim), delim_size(delim_size) {
119433
- }
119434
- idx_t Next(const char *input) override {
119435
- // special case: separate by empty delimiter
119591
+ struct RegularStringSplit {
119592
+ static idx_t Find(const char *input_data, idx_t input_size, const char *delim_data, idx_t delim_size,
119593
+ idx_t &match_size, void *data) {
119594
+ match_size = delim_size;
119436
119595
  if (delim_size == 0) {
119437
- offset++;
119438
- start = offset;
119439
- return offset;
119440
- }
119441
- for (offset = start; HasNext(); offset++) {
119442
- // potential delimiter match
119443
- if (input[offset] == delim[0] && offset + delim_size <= size) {
119444
- idx_t i;
119445
- for (i = 1; i < delim_size; i++) {
119446
- if (input[offset + i] != delim[i]) {
119447
- break;
119448
- }
119449
- }
119450
- // delimiter found: skip start over delimiter
119451
- if (i == delim_size) {
119452
- start = offset + delim_size;
119453
- return offset;
119454
- }
119455
- }
119596
+ return 0;
119456
119597
  }
119457
- return offset;
119598
+ return ContainsFun::Find((const unsigned char *)input_data, input_size, (const unsigned char *)delim_data,
119599
+ delim_size);
119458
119600
  }
119601
+ };
119459
119602
 
119460
- protected:
119461
- const char *delim;
119462
- size_t delim_size;
119603
+ struct ConstantRegexpStringSplit {
119604
+ static idx_t Find(const char *input_data, idx_t input_size, const char *delim_data, idx_t delim_size,
119605
+ idx_t &match_size, void *data) {
119606
+ D_ASSERT(data);
119607
+ auto regex = (duckdb_re2::RE2 *)data;
119608
+ duckdb_re2::StringPiece match;
119609
+ if (!regex->Match(duckdb_re2::StringPiece(input_data, input_size), 0, input_size, RE2::UNANCHORED, &match, 1)) {
119610
+ return DConstants::INVALID_INDEX;
119611
+ }
119612
+ match_size = match.size();
119613
+ return match.data() - input_data;
119614
+ }
119463
119615
  };
119464
119616
 
119465
- struct UnicodeStringSplitIterator : virtual public StringSplitIterator {
119466
- public:
119467
- UnicodeStringSplitIterator(size_t input_size, const char *delim, const size_t delim_size)
119468
- : StringSplitIterator(input_size), delim_size(delim_size) {
119469
- int cp_sz;
119470
- for (idx_t i = 0; i < delim_size; i += cp_sz) {
119471
- delim_cps.push_back(utf8proc_codepoint(delim, cp_sz));
119617
+ struct RegexpStringSplit {
119618
+ static idx_t Find(const char *input_data, idx_t input_size, const char *delim_data, idx_t delim_size,
119619
+ idx_t &match_size, void *data) {
119620
+ duckdb_re2::RE2 regex(duckdb_re2::StringPiece(delim_data, delim_size));
119621
+ if (!regex.ok()) {
119622
+ throw InvalidInputException(regex.error());
119472
119623
  }
119624
+ return ConstantRegexpStringSplit::Find(input_data, input_size, delim_data, delim_size, match_size, &regex);
119473
119625
  }
119474
- idx_t Next(const char *input) override {
119475
- // special case: separate by empty delimiter
119476
- if (delim_size == 0) {
119477
- offset = utf8proc_next_grapheme(input, size, offset);
119478
- start = offset;
119479
- return offset;
119480
- }
119481
- int cp_sz;
119482
- for (offset = start; HasNext(); offset = utf8proc_next_grapheme(input, size, offset)) {
119483
- // potential delimiter match
119484
- if (utf8proc_codepoint(&input[offset], cp_sz) == delim_cps[0] && offset + delim_size <= size) {
119485
- idx_t delim_offset = cp_sz;
119486
- for (idx_t i = 1; i < delim_cps.size(); i++) {
119487
- if (utf8proc_codepoint(&input[offset + delim_offset], cp_sz) != delim_cps[i]) {
119626
+ };
119627
+
119628
+ struct StringSplitter {
119629
+ template <class OP>
119630
+ static idx_t Split(string_t input, string_t delim, StringSplitInput &state, void *data) {
119631
+ auto input_data = input.GetDataUnsafe();
119632
+ auto input_size = input.GetSize();
119633
+ auto delim_data = delim.GetDataUnsafe();
119634
+ auto delim_size = delim.GetSize();
119635
+ idx_t list_idx = 0;
119636
+ while (input_size > 0) {
119637
+ idx_t match_size;
119638
+ auto pos = OP::Find(input_data, input_size, delim_data, delim_size, match_size, data);
119639
+ if (pos > input_size) {
119640
+ break;
119641
+ }
119642
+ if (match_size == 0 && pos == 0) {
119643
+ // special case: 0 length match and pos is 0
119644
+ // move to the next character
119645
+ for (pos++; pos < input_size; pos++) {
119646
+ if (LengthFun::IsCharacter(input_data[pos])) {
119488
119647
  break;
119489
119648
  }
119490
- delim_offset += cp_sz;
119491
119649
  }
119492
- // delimiter found: skip start over delimiter
119493
- if (delim_offset == delim_size) {
119494
- start = offset + delim_size;
119495
- return offset;
119650
+ if (pos == input_size) {
119651
+ break;
119496
119652
  }
119497
119653
  }
119498
- }
119499
- return offset;
119500
- }
119501
-
119502
- protected:
119503
- vector<utf8proc_int32_t> delim_cps;
119504
- size_t delim_size;
119505
- };
119654
+ D_ASSERT(input_size >= pos + match_size);
119655
+ state.AddSplit(input_data, pos, list_idx);
119506
119656
 
119507
- struct RegexStringSplitIterator : virtual public StringSplitIterator {
119508
- public:
119509
- RegexStringSplitIterator(size_t input_size, unique_ptr<RE2> re, const bool ascii_only)
119510
- : StringSplitIterator(input_size), re(move(re)), ascii_only(ascii_only) {
119511
- }
119512
- idx_t Next(const char *input) override {
119513
- duckdb_re2::StringPiece input_sp(input, size);
119514
- duckdb_re2::StringPiece match;
119515
- if (re->Match(input_sp, start, size, RE2::UNANCHORED, &match, 1)) {
119516
- offset = match.data() - input;
119517
- // special case: 0 length match
119518
- if (match.empty() && start < size) {
119519
- if (ascii_only) {
119520
- offset++;
119521
- } else {
119522
- offset = utf8proc_next_grapheme(input, size, offset);
119523
- }
119524
- start = offset;
119525
- } else {
119526
- start = offset + match.size();
119527
- }
119528
- } else {
119529
- offset = size;
119657
+ list_idx++;
119658
+ input_data += (pos + match_size);
119659
+ input_size -= (pos + match_size);
119530
119660
  }
119531
- return offset;
119661
+ state.AddSplit(input_data, input_size, list_idx);
119662
+ list_idx++;
119663
+ return list_idx;
119532
119664
  }
119533
-
119534
- protected:
119535
- unique_ptr<RE2> re;
119536
- bool ascii_only;
119537
119665
  };
119538
119666
 
119539
- void BaseStringSplitFunction(const char *input, StringSplitIterator &iter, Vector &result) {
119540
- // special case: empty string
119541
- if (iter.size == 0) {
119542
- Value val = StringVector::AddString(ListVector::GetEntry(result), &input[0], 0);
119543
- ListVector::PushBack(result, val);
119544
- return;
119545
- }
119546
- while (iter.HasNext()) {
119547
- idx_t start = iter.Start();
119548
- idx_t end = iter.Next(input);
119549
- size_t length = end - start;
119550
- Value to_insert(StringVector::AddString(ListVector::GetEntry(result), &input[start], length));
119551
- ListVector::PushBack(result, to_insert);
119552
- }
119553
- }
119554
-
119555
- unique_ptr<Vector> BaseStringSplitFunction(string_t input, string_t delim, const bool regex) {
119556
- const char *input_data = input.GetDataUnsafe();
119557
- size_t input_size = input.GetSize();
119558
- const char *delim_data = delim.GetDataUnsafe();
119559
- size_t delim_size = delim.GetSize();
119560
-
119561
- bool ascii_only = Utf8Proc::Analyze(input_data, input_size) == UnicodeType::ASCII;
119562
-
119563
- auto list_type = LogicalType::LIST(LogicalType::VARCHAR);
119564
- auto output = make_unique<Vector>(list_type);
119565
- unique_ptr<StringSplitIterator> iter;
119566
- if (regex) {
119567
- auto re = make_unique<RE2>(duckdb_re2::StringPiece(delim_data, delim_size));
119568
- if (!re->ok()) {
119569
- throw Exception(re->error());
119570
- }
119571
- iter = make_unique_base<StringSplitIterator, RegexStringSplitIterator>(input_size, move(re), ascii_only);
119572
- } else if (ascii_only) {
119573
- iter = make_unique_base<StringSplitIterator, AsciiStringSplitIterator>(input_size, delim_data, delim_size);
119574
- } else {
119575
- iter = make_unique_base<StringSplitIterator, UnicodeStringSplitIterator>(input_size, delim_data, delim_size);
119576
- }
119577
- BaseStringSplitFunction(input_data, *iter, *output);
119578
-
119579
- return output;
119580
- }
119581
-
119582
- static void StringSplitExecutor(DataChunk &args, ExpressionState &state, Vector &result, const bool regex) {
119667
+ template <class OP>
119668
+ static void StringSplitExecutor(DataChunk &args, ExpressionState &state, Vector &result, void *data = nullptr) {
119583
119669
  UnifiedVectorFormat input_data;
119584
119670
  args.data[0].ToUnifiedFormat(args.size(), input_data);
119585
119671
  auto inputs = (string_t *)input_data.data;
@@ -119594,10 +119680,11 @@ static void StringSplitExecutor(DataChunk &args, ExpressionState &state, Vector
119594
119680
  ListVector::SetListSize(result, 0);
119595
119681
 
119596
119682
  auto list_struct_data = FlatVector::GetData<list_entry_t>(result);
119597
- auto list_vector_type = LogicalType::LIST(LogicalType::VARCHAR);
119598
119683
 
119599
- idx_t total_len = 0;
119684
+ // count all the splits and set up the list entries
119685
+ auto &child_entry = ListVector::GetEntry(result);
119600
119686
  auto &result_mask = FlatVector::Validity(result);
119687
+ idx_t total_splits = 0;
119601
119688
  for (idx_t i = 0; i < args.size(); i++) {
119602
119689
  auto input_idx = input_data.sel->get_index(i);
119603
119690
  auto delim_idx = delim_data.sel->get_index(i);
@@ -119605,36 +119692,43 @@ static void StringSplitExecutor(DataChunk &args, ExpressionState &state, Vector
119605
119692
  result_mask.SetInvalid(i);
119606
119693
  continue;
119607
119694
  }
119608
- string_t input = inputs[input_idx];
119609
-
119610
- unique_ptr<Vector> split_input;
119695
+ StringSplitInput split_input(result, child_entry, total_splits);
119611
119696
  if (!delim_data.validity.RowIsValid(delim_idx)) {
119612
- // special case: delimiter is NULL
119613
- split_input = make_unique<Vector>(list_vector_type);
119614
- Value val(input);
119615
- ListVector::PushBack(*split_input, val);
119616
- } else {
119617
- string_t delim = delims[delim_idx];
119618
- split_input = BaseStringSplitFunction(input, delim, regex);
119697
+ // delim is NULL: copy the complete entry
119698
+ split_input.AddSplit(inputs[input_idx].GetDataUnsafe(), inputs[input_idx].GetSize(), 0);
119699
+ list_struct_data[i].length = 1;
119700
+ list_struct_data[i].offset = total_splits;
119701
+ total_splits++;
119702
+ continue;
119619
119703
  }
119620
- list_struct_data[i].length = ListVector::GetListSize(*split_input);
119621
- list_struct_data[i].offset = total_len;
119622
- total_len += ListVector::GetListSize(*split_input);
119623
- ListVector::Append(result, ListVector::GetEntry(*split_input), ListVector::GetListSize(*split_input));
119704
+ auto list_length = StringSplitter::Split<OP>(inputs[input_idx], delims[delim_idx], split_input, data);
119705
+ list_struct_data[i].length = list_length;
119706
+ list_struct_data[i].offset = total_splits;
119707
+ total_splits += list_length;
119624
119708
  }
119709
+ ListVector::SetListSize(result, total_splits);
119710
+ D_ASSERT(ListVector::GetListSize(result) == total_splits);
119625
119711
 
119626
- D_ASSERT(ListVector::GetListSize(result) == total_len);
119627
119712
  if (args.AllConstant()) {
119628
119713
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
119629
119714
  }
119630
119715
  }
119631
119716
 
119632
119717
  static void StringSplitFunction(DataChunk &args, ExpressionState &state, Vector &result) {
119633
- StringSplitExecutor(args, state, result, false);
119718
+ StringSplitExecutor<RegularStringSplit>(args, state, result, nullptr);
119634
119719
  }
119635
119720
 
119636
119721
  static void StringSplitRegexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
119637
- StringSplitExecutor(args, state, result, true);
119722
+ auto &func_expr = (BoundFunctionExpression &)state.expr;
119723
+ auto &info = (RegexpMatchesBindData &)*func_expr.bind_info;
119724
+ if (info.constant_pattern) {
119725
+ // fast path: pre-compiled regex
119726
+ auto &lstate = (RegexLocalState &)*ExecuteFunctionState::GetFunctionState(state);
119727
+ StringSplitExecutor<ConstantRegexpStringSplit>(args, state, result, &lstate.constant_pattern);
119728
+ } else {
119729
+ // slow path: have to re-compile regex for every row
119730
+ StringSplitExecutor<RegexpStringSplit>(args, state, result);
119731
+ }
119638
119732
  }
119639
119733
 
119640
119734
  void StringSplitFun::RegisterFunction(BuiltinFunctions &set) {
@@ -119645,10 +119739,18 @@ void StringSplitFun::RegisterFunction(BuiltinFunctions &set) {
119645
119739
  regular_fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
119646
119740
  set.AddFunction({"string_split", "str_split", "string_to_array", "split"}, regular_fun);
119647
119741
 
119648
- auto regex_fun =
119649
- ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR}, varchar_list_type, StringSplitRegexFunction);
119650
- regex_fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
119651
- set.AddFunction({"string_split_regex", "str_split_regex", "regexp_split_to_array"}, regex_fun);
119742
+ ScalarFunctionSet regexp_split("string_split_regex");
119743
+ ScalarFunction regex_fun({LogicalType::VARCHAR, LogicalType::VARCHAR}, varchar_list_type, StringSplitRegexFunction,
119744
+ RegexpMatchesBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
119745
+ FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING);
119746
+ regexp_split.AddFunction(regex_fun);
119747
+ // regexp options
119748
+ regex_fun.arguments.emplace_back(LogicalType::VARCHAR);
119749
+ regexp_split.AddFunction(regex_fun);
119750
+ for (auto &name : {"string_split_regex", "str_split_regex", "regexp_split_to_array"}) {
119751
+ regexp_split.name = name;
119752
+ set.AddFunction(regexp_split);
119753
+ }
119652
119754
  }
119653
119755
 
119654
119756
  } // namespace duckdb