duckdb 0.5.2-dev582.0 → 0.5.2-dev602.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.2-dev582.0",
4
+ "version": "0.5.2-dev602.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -15918,6 +15918,7 @@ void FileBuffer::Clear() {
15918
15918
 
15919
15919
  namespace duckdb {
15920
15920
 
15921
+ class ClientContext;
15921
15922
  class Value;
15922
15923
 
15923
15924
  //! Abstract type that provide client-specific context to FileSystem.
@@ -15926,6 +15927,8 @@ public:
15926
15927
  virtual ~FileOpener() {};
15927
15928
 
15928
15929
  virtual bool TryGetCurrentSetting(const string &key, Value &result) = 0;
15930
+
15931
+ static FileOpener *Get(ClientContext &context);
15929
15932
  };
15930
15933
 
15931
15934
  } // namespace duckdb
@@ -38481,8 +38484,9 @@ vector<string> StringUtil::TopNStrings(vector<pair<string, idx_t>> scores, idx_t
38481
38484
  if (scores.empty()) {
38482
38485
  return vector<string>();
38483
38486
  }
38484
- sort(scores.begin(), scores.end(),
38485
- [](const pair<string, idx_t> &a, const pair<string, idx_t> &b) -> bool { return a.second < b.second; });
38487
+ sort(scores.begin(), scores.end(), [](const pair<string, idx_t> &a, const pair<string, idx_t> &b) -> bool {
38488
+ return a.second < b.second || (a.second == b.second && a.first.size() < b.first.size());
38489
+ });
38486
38490
  vector<string> result;
38487
38491
  result.push_back(scores[0].first);
38488
38492
  for (idx_t i = 1; i < MinValue<idx_t>(scores.size(), n); i++) {
@@ -38554,7 +38558,11 @@ vector<string> StringUtil::TopNLevenshtein(const vector<string> &strings, const
38554
38558
  vector<pair<string, idx_t>> scores;
38555
38559
  scores.reserve(strings.size());
38556
38560
  for (auto &str : strings) {
38557
- scores.emplace_back(str, LevenshteinDistance(str, target));
38561
+ if (target.size() < str.size()) {
38562
+ scores.emplace_back(str, LevenshteinDistance(str.substr(0, target.size()), target));
38563
+ } else {
38564
+ scores.emplace_back(str, LevenshteinDistance(str, target));
38565
+ }
38558
38566
  }
38559
38567
  return TopNStrings(scores, n, threshold);
38560
38568
  }
@@ -114621,37 +114629,48 @@ void PrintfFun::RegisterFunction(BuiltinFunctions &set) {
114621
114629
 
114622
114630
  namespace duckdb {
114623
114631
 
114624
- struct RegexpMatchesBindData : public FunctionData {
114625
- RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string);
114626
- ~RegexpMatchesBindData() override;
114632
+ struct RegexpBaseBindData : public FunctionData {
114633
+ RegexpBaseBindData();
114634
+ RegexpBaseBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern = true);
114635
+ virtual ~RegexpBaseBindData();
114627
114636
 
114628
114637
  duckdb_re2::RE2::Options options;
114629
114638
  string constant_string;
114630
114639
  bool constant_pattern;
114640
+
114641
+ virtual bool Equals(const FunctionData &other_p) const override;
114642
+ };
114643
+
114644
+ struct RegexpMatchesBindData : public RegexpBaseBindData {
114645
+ RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern);
114646
+ RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern,
114647
+ string range_min, string range_max, bool range_success);
114648
+
114631
114649
  string range_min;
114632
114650
  string range_max;
114633
114651
  bool range_success;
114634
114652
 
114635
114653
  unique_ptr<FunctionData> Copy() const override;
114636
- bool Equals(const FunctionData &other_p) const override;
114637
114654
  };
114638
114655
 
114639
- struct RegexpReplaceBindData : public FunctionData {
114640
- duckdb_re2::RE2::Options options;
114656
+ struct RegexpReplaceBindData : public RegexpBaseBindData {
114657
+ RegexpReplaceBindData();
114658
+ RegexpReplaceBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern,
114659
+ bool global_replace);
114660
+
114641
114661
  bool global_replace;
114642
114662
 
114643
114663
  unique_ptr<FunctionData> Copy() const override;
114644
114664
  bool Equals(const FunctionData &other_p) const override;
114645
114665
  };
114646
114666
 
114647
- struct RegexpExtractBindData : public FunctionData {
114648
- RegexpExtractBindData(bool constant_pattern, const string &pattern, const string &group_string_p);
114667
+ struct RegexpExtractBindData : public RegexpBaseBindData {
114668
+ RegexpExtractBindData();
114669
+ RegexpExtractBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern,
114670
+ string group_string);
114649
114671
 
114650
- const bool constant_pattern;
114651
- const string constant_string;
114652
-
114653
- const string group_string;
114654
- const duckdb_re2::StringPiece rewrite;
114672
+ string group_string;
114673
+ duckdb_re2::StringPiece rewrite;
114655
114674
 
114656
114675
  unique_ptr<FunctionData> Copy() const override;
114657
114676
  bool Equals(const FunctionData &other_p) const override;
@@ -114672,41 +114691,49 @@ struct RegexpExtractBindData : public FunctionData {
114672
114691
 
114673
114692
  namespace duckdb {
114674
114693
 
114675
- RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string_p)
114676
- : options(options), constant_string(move(constant_string_p)) {
114677
- constant_pattern = !constant_string.empty();
114678
- if (constant_pattern) {
114679
- auto pattern = make_unique<RE2>(constant_string, options);
114680
- if (!pattern->ok()) {
114681
- throw Exception(pattern->error());
114682
- }
114683
-
114684
- range_success = pattern->PossibleMatchRange(&range_min, &range_max, 1000);
114685
- } else {
114686
- range_success = false;
114687
- }
114688
- }
114689
-
114690
114694
  static bool RegexOptionsEquals(const duckdb_re2::RE2::Options &opt_a, const duckdb_re2::RE2::Options &opt_b) {
114691
114695
  return opt_a.case_sensitive() == opt_b.case_sensitive();
114692
114696
  }
114693
114697
 
114694
- RegexpMatchesBindData::~RegexpMatchesBindData() {
114698
+ RegexpBaseBindData::RegexpBaseBindData() : constant_pattern(false) {
114699
+ }
114700
+ RegexpBaseBindData::RegexpBaseBindData(duckdb_re2::RE2::Options options, string constant_string_p,
114701
+ bool constant_pattern)
114702
+ : options(options), constant_string(move(constant_string_p)), constant_pattern(constant_pattern) {
114695
114703
  }
114696
114704
 
114697
- unique_ptr<FunctionData> RegexpMatchesBindData::Copy() const {
114698
- return make_unique<RegexpMatchesBindData>(options, constant_string);
114705
+ RegexpBaseBindData::~RegexpBaseBindData() {
114699
114706
  }
114700
114707
 
114701
- bool RegexpMatchesBindData::Equals(const FunctionData &other_p) const {
114702
- auto &other = (const RegexpMatchesBindData &)other_p;
114703
- return constant_string == other.constant_string && RegexOptionsEquals(options, other.options);
114708
+ bool RegexpBaseBindData::Equals(const FunctionData &other_p) const {
114709
+ auto &other = (RegexpBaseBindData &)other_p;
114710
+ return constant_pattern == other.constant_pattern && constant_string == other.constant_string &&
114711
+ RegexOptionsEquals(options, other.options);
114704
114712
  }
114705
114713
 
114706
114714
  static inline duckdb_re2::StringPiece CreateStringPiece(string_t &input) {
114707
114715
  return duckdb_re2::StringPiece(input.GetDataUnsafe(), input.GetSize());
114708
114716
  }
114709
114717
 
114718
+ struct RegexLocalState : public FunctionLocalState {
114719
+ explicit RegexLocalState(RegexpBaseBindData &info)
114720
+ : constant_pattern(duckdb_re2::StringPiece(info.constant_string.c_str(), info.constant_string.size()),
114721
+ info.options) {
114722
+ D_ASSERT(info.constant_pattern);
114723
+ }
114724
+
114725
+ RE2 constant_pattern;
114726
+ };
114727
+
114728
+ static unique_ptr<FunctionLocalState> RegexInitLocalState(const BoundFunctionExpression &expr,
114729
+ FunctionData *bind_data) {
114730
+ auto &info = (RegexpBaseBindData &)*bind_data;
114731
+ if (info.constant_pattern) {
114732
+ return make_unique<RegexLocalState>(info);
114733
+ }
114734
+ return nullptr;
114735
+ }
114736
+
114710
114737
  static void ParseRegexOptions(const string &options, duckdb_re2::RE2::Options &result, bool *global_replace = nullptr) {
114711
114738
  for (idx_t i = 0; i < options.size(); i++) {
114712
114739
  switch (options[i]) {
@@ -114751,41 +114778,88 @@ static void ParseRegexOptions(const string &options, duckdb_re2::RE2::Options &r
114751
114778
  }
114752
114779
  }
114753
114780
 
114754
- struct RegexPartialMatch {
114755
- static inline bool Operation(const duckdb_re2::StringPiece &input, duckdb_re2::RE2 &re) {
114756
- return duckdb_re2::RE2::PartialMatch(input, re);
114781
+ void ParseRegexOptions(Expression &expr, RE2::Options &target, bool *global_replace = nullptr) {
114782
+ if (expr.HasParameter()) {
114783
+ throw ParameterNotResolvedException();
114757
114784
  }
114758
- };
114785
+ if (!expr.IsFoldable()) {
114786
+ throw InvalidInputException("Regex options field must be a constant");
114787
+ }
114788
+ Value options_str = ExpressionExecutor::EvaluateScalar(expr);
114789
+ if (!options_str.IsNull() && options_str.type().id() == LogicalTypeId::VARCHAR) {
114790
+ ParseRegexOptions(StringValue::Get(options_str), target, global_replace);
114791
+ }
114792
+ }
114759
114793
 
114760
- struct RegexFullMatch {
114761
- static inline bool Operation(const duckdb_re2::StringPiece &input, duckdb_re2::RE2 &re) {
114762
- return duckdb_re2::RE2::FullMatch(input, re);
114794
+ static bool TryParseConstantPattern(Expression &expr, string &constant_string) {
114795
+ if (!expr.IsFoldable()) {
114796
+ return false;
114763
114797
  }
114764
- };
114798
+ Value pattern_str = ExpressionExecutor::EvaluateScalar(expr);
114799
+ if (!pattern_str.IsNull() && pattern_str.type().id() == LogicalTypeId::VARCHAR) {
114800
+ constant_string = StringValue::Get(pattern_str);
114801
+ return true;
114802
+ }
114803
+ return false;
114804
+ }
114765
114805
 
114766
- struct RegexLocalState : public FunctionLocalState {
114767
- explicit RegexLocalState(RegexpMatchesBindData &info)
114768
- : constant_pattern(duckdb_re2::StringPiece(info.constant_string.c_str(), info.constant_string.size()),
114769
- info.options) {
114770
- D_ASSERT(info.constant_pattern);
114806
+ //===--------------------------------------------------------------------===//
114807
+ // Regexp Matches
114808
+ //===--------------------------------------------------------------------===//
114809
+ RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string_p,
114810
+ bool constant_pattern)
114811
+ : RegexpBaseBindData(options, move(constant_string_p), constant_pattern) {
114812
+ if (constant_pattern) {
114813
+ auto pattern = make_unique<RE2>(constant_string, options);
114814
+ if (!pattern->ok()) {
114815
+ throw Exception(pattern->error());
114816
+ }
114817
+
114818
+ range_success = pattern->PossibleMatchRange(&range_min, &range_max, 1000);
114819
+ } else {
114820
+ range_success = false;
114771
114821
  }
114822
+ }
114772
114823
 
114773
- explicit RegexLocalState(RegexpExtractBindData &info)
114774
- : constant_pattern(duckdb_re2::StringPiece(info.constant_string.c_str(), info.constant_string.size())) {
114775
- D_ASSERT(info.constant_pattern);
114824
+ RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string_p,
114825
+ bool constant_pattern, string range_min_p, string range_max_p,
114826
+ bool range_success)
114827
+ : RegexpBaseBindData(options, move(constant_string_p), constant_pattern), range_min(move(range_min_p)),
114828
+ range_max(move(range_max_p)), range_success(range_success) {
114829
+ }
114830
+
114831
+ unique_ptr<FunctionData> RegexpMatchesBindData::Copy() const {
114832
+ return make_unique<RegexpMatchesBindData>(options, constant_string, constant_pattern, range_min, range_max,
114833
+ range_success);
114834
+ }
114835
+
114836
+ static unique_ptr<FunctionData> RegexpMatchesBind(ClientContext &context, ScalarFunction &bound_function,
114837
+ vector<unique_ptr<Expression>> &arguments) {
114838
+ // pattern is the second argument. If its constant, we can already prepare the pattern and store it for later.
114839
+ D_ASSERT(arguments.size() == 2 || arguments.size() == 3);
114840
+ RE2::Options options;
114841
+ options.set_log_errors(false);
114842
+ if (arguments.size() == 3) {
114843
+ ParseRegexOptions(*arguments[2], options);
114776
114844
  }
114777
114845
 
114778
- RE2 constant_pattern;
114846
+ string constant_string;
114847
+ bool constant_pattern;
114848
+ constant_pattern = TryParseConstantPattern(*arguments[1], constant_string);
114849
+ return make_unique<RegexpMatchesBindData>(options, move(constant_string), constant_pattern);
114850
+ }
114851
+
114852
+ struct RegexPartialMatch {
114853
+ static inline bool Operation(const duckdb_re2::StringPiece &input, duckdb_re2::RE2 &re) {
114854
+ return duckdb_re2::RE2::PartialMatch(input, re);
114855
+ }
114779
114856
  };
114780
114857
 
114781
- static unique_ptr<FunctionLocalState> RegexInitLocalState(const BoundFunctionExpression &expr,
114782
- FunctionData *bind_data) {
114783
- auto &info = (RegexpMatchesBindData &)*bind_data;
114784
- if (info.constant_pattern) {
114785
- return make_unique<RegexLocalState>(info);
114858
+ struct RegexFullMatch {
114859
+ static inline bool Operation(const duckdb_re2::StringPiece &input, duckdb_re2::RE2 &re) {
114860
+ return duckdb_re2::RE2::FullMatch(input, re);
114786
114861
  }
114787
- return nullptr;
114788
- }
114862
+ };
114789
114863
 
114790
114864
  template <class OP>
114791
114865
  static void RegexpMatchesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
@@ -114812,153 +114886,106 @@ static void RegexpMatchesFunction(DataChunk &args, ExpressionState &state, Vecto
114812
114886
  }
114813
114887
  }
114814
114888
 
114815
- static unique_ptr<FunctionData> RegexpMatchesBind(ClientContext &context, ScalarFunction &bound_function,
114816
- vector<unique_ptr<Expression>> &arguments) {
114817
- // pattern is the second argument. If its constant, we can already prepare the pattern and store it for later.
114818
- D_ASSERT(arguments.size() == 2 || arguments.size() == 3);
114819
- RE2::Options options;
114820
- options.set_log_errors(false);
114821
- if (arguments.size() == 3) {
114822
- if (arguments[2]->HasParameter()) {
114823
- throw ParameterNotResolvedException();
114824
- }
114825
- if (!arguments[2]->IsFoldable()) {
114826
- throw InvalidInputException("Regex options field must be a constant");
114827
- }
114828
- Value options_str = ExpressionExecutor::EvaluateScalar(*arguments[2]);
114829
- if (!options_str.IsNull() && options_str.type().id() == LogicalTypeId::VARCHAR) {
114830
- ParseRegexOptions(StringValue::Get(options_str), options);
114831
- }
114832
- }
114833
-
114834
- if (arguments[1]->IsFoldable()) {
114835
- Value pattern_str = ExpressionExecutor::EvaluateScalar(*arguments[1]);
114836
- if (!pattern_str.IsNull() && pattern_str.type().id() == LogicalTypeId::VARCHAR) {
114837
- return make_unique<RegexpMatchesBindData>(options, StringValue::Get(pattern_str));
114838
- }
114839
- }
114840
- return make_unique<RegexpMatchesBindData>(options, "");
114889
+ //===--------------------------------------------------------------------===//
114890
+ // Regexp Replace
114891
+ //===--------------------------------------------------------------------===//
114892
+ RegexpReplaceBindData::RegexpReplaceBindData() : global_replace(false) {
114841
114893
  }
114842
114894
 
114843
- static void RegexReplaceFunction(DataChunk &args, ExpressionState &state, Vector &result) {
114844
- auto &func_expr = (BoundFunctionExpression &)state.expr;
114845
- auto &info = (RegexpReplaceBindData &)*func_expr.bind_info;
114846
-
114847
- auto &strings = args.data[0];
114848
- auto &patterns = args.data[1];
114849
- auto &replaces = args.data[2];
114850
-
114851
- TernaryExecutor::Execute<string_t, string_t, string_t, string_t>(
114852
- strings, patterns, replaces, result, args.size(), [&](string_t input, string_t pattern, string_t replace) {
114853
- RE2 re(CreateStringPiece(pattern), info.options);
114854
- std::string sstring = input.GetString();
114855
- if (info.global_replace) {
114856
- RE2::GlobalReplace(&sstring, re, CreateStringPiece(replace));
114857
- } else {
114858
- RE2::Replace(&sstring, re, CreateStringPiece(replace));
114859
- }
114860
- return StringVector::AddString(result, sstring);
114861
- });
114895
+ RegexpReplaceBindData::RegexpReplaceBindData(duckdb_re2::RE2::Options options, string constant_string_p,
114896
+ bool constant_pattern, bool global_replace)
114897
+ : RegexpBaseBindData(options, move(constant_string_p), constant_pattern), global_replace(global_replace) {
114862
114898
  }
114863
114899
 
114864
114900
  unique_ptr<FunctionData> RegexpReplaceBindData::Copy() const {
114865
- auto copy = make_unique<RegexpReplaceBindData>();
114866
- copy->options = options;
114867
- copy->global_replace = global_replace;
114901
+ auto copy = make_unique<RegexpReplaceBindData>(options, constant_string, constant_pattern, global_replace);
114868
114902
  return move(copy);
114869
114903
  }
114870
114904
 
114871
114905
  bool RegexpReplaceBindData::Equals(const FunctionData &other_p) const {
114872
114906
  auto &other = (const RegexpReplaceBindData &)other_p;
114873
- return global_replace == other.global_replace && RegexOptionsEquals(options, other.options);
114907
+ return RegexpBaseBindData::Equals(other) && global_replace == other.global_replace;
114874
114908
  }
114875
114909
 
114876
114910
  static unique_ptr<FunctionData> RegexReplaceBind(ClientContext &context, ScalarFunction &bound_function,
114877
114911
  vector<unique_ptr<Expression>> &arguments) {
114878
114912
  auto data = make_unique<RegexpReplaceBindData>();
114879
- data->options.set_log_errors(false);
114913
+
114914
+ data->constant_pattern = TryParseConstantPattern(*arguments[1], data->constant_string);
114880
114915
  if (arguments.size() == 4) {
114881
- if (arguments[3]->HasParameter()) {
114882
- throw ParameterNotResolvedException();
114883
- }
114884
- if (!arguments[3]->IsFoldable()) {
114885
- throw InvalidInputException("Regex options field must be a constant");
114886
- }
114887
- Value options_str = ExpressionExecutor::EvaluateScalar(*arguments[3]);
114888
- if (!options_str.IsNull() && options_str.type().id() == LogicalTypeId::VARCHAR) {
114889
- ParseRegexOptions(StringValue::Get(options_str), data->options, &data->global_replace);
114890
- }
114916
+ ParseRegexOptions(*arguments[3], data->options, &data->global_replace);
114891
114917
  }
114892
-
114918
+ data->options.set_log_errors(false);
114893
114919
  return move(data);
114894
114920
  }
114895
114921
 
114896
- inline static string_t Extract(const string_t &input, Vector &result, const RE2 &re,
114897
- const duckdb_re2::StringPiece &rewrite) {
114898
- std::string extracted;
114899
- RE2::Extract(input.GetString(), re, rewrite, &extracted);
114900
- return StringVector::AddString(result, extracted.c_str(), extracted.size());
114901
- }
114902
-
114903
- static void RegexExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {
114922
+ static void RegexReplaceFunction(DataChunk &args, ExpressionState &state, Vector &result) {
114904
114923
  auto &func_expr = (BoundFunctionExpression &)state.expr;
114905
- const auto &info = (RegexpExtractBindData &)*func_expr.bind_info;
114924
+ auto &info = (RegexpReplaceBindData &)*func_expr.bind_info;
114906
114925
 
114907
114926
  auto &strings = args.data[0];
114908
114927
  auto &patterns = args.data[1];
114928
+ auto &replaces = args.data[2];
114929
+
114909
114930
  if (info.constant_pattern) {
114910
114931
  auto &lstate = (RegexLocalState &)*ExecuteFunctionState::GetFunctionState(state);
114911
- UnaryExecutor::Execute<string_t, string_t>(strings, result, args.size(), [&](string_t input) {
114912
- return Extract(input, result, lstate.constant_pattern, info.rewrite);
114913
- });
114932
+ BinaryExecutor::Execute<string_t, string_t, string_t>(
114933
+ strings, replaces, result, args.size(), [&](string_t input, string_t replace) {
114934
+ std::string sstring = input.GetString();
114935
+ if (info.global_replace) {
114936
+ RE2::GlobalReplace(&sstring, lstate.constant_pattern, CreateStringPiece(replace));
114937
+ } else {
114938
+ RE2::Replace(&sstring, lstate.constant_pattern, CreateStringPiece(replace));
114939
+ }
114940
+ return StringVector::AddString(result, sstring);
114941
+ });
114914
114942
  } else {
114915
- BinaryExecutor::Execute<string_t, string_t, string_t>(strings, patterns, result, args.size(),
114916
- [&](string_t input, string_t pattern) {
114917
- RE2 re(CreateStringPiece(pattern));
114918
- return Extract(input, result, re, info.rewrite);
114919
- });
114943
+ TernaryExecutor::Execute<string_t, string_t, string_t, string_t>(
114944
+ strings, patterns, replaces, result, args.size(), [&](string_t input, string_t pattern, string_t replace) {
114945
+ RE2 re(CreateStringPiece(pattern), info.options);
114946
+ std::string sstring = input.GetString();
114947
+ if (info.global_replace) {
114948
+ RE2::GlobalReplace(&sstring, re, CreateStringPiece(replace));
114949
+ } else {
114950
+ RE2::Replace(&sstring, re, CreateStringPiece(replace));
114951
+ }
114952
+ return StringVector::AddString(result, sstring);
114953
+ });
114920
114954
  }
114921
114955
  }
114922
114956
 
114923
- static unique_ptr<FunctionLocalState> RegexExtractInitLocalState(const BoundFunctionExpression &expr,
114924
- FunctionData *bind_data) {
114925
- auto &info = (RegexpExtractBindData &)*bind_data;
114926
- if (info.constant_pattern) {
114927
- return make_unique<RegexLocalState>(info);
114928
- }
114929
- return nullptr;
114957
+ //===--------------------------------------------------------------------===//
114958
+ // Regexp Extract
114959
+ //===--------------------------------------------------------------------===//
114960
+ RegexpExtractBindData::RegexpExtractBindData() {
114930
114961
  }
114931
114962
 
114932
- RegexpExtractBindData::RegexpExtractBindData(bool constant_pattern, const string &constant_string,
114933
- const string &group_string_p)
114934
- : constant_pattern(constant_pattern), constant_string(constant_string), group_string(group_string_p),
114963
+ RegexpExtractBindData::RegexpExtractBindData(duckdb_re2::RE2::Options options, string constant_string_p,
114964
+ bool constant_pattern, string group_string_p)
114965
+ : RegexpBaseBindData(options, move(constant_string_p), constant_pattern), group_string(move(group_string_p)),
114935
114966
  rewrite(group_string) {
114936
114967
  }
114937
114968
 
114938
114969
  unique_ptr<FunctionData> RegexpExtractBindData::Copy() const {
114939
- return make_unique<RegexpExtractBindData>(constant_pattern, constant_string, group_string);
114970
+ return make_unique<RegexpExtractBindData>(options, constant_string, constant_pattern, group_string);
114940
114971
  }
114941
114972
 
114942
114973
  bool RegexpExtractBindData::Equals(const FunctionData &other_p) const {
114943
114974
  auto &other = (const RegexpExtractBindData &)other_p;
114944
- return constant_string == other.constant_string && group_string == other.group_string;
114975
+ return RegexpBaseBindData::Equals(other) && group_string == other.group_string;
114945
114976
  }
114946
114977
 
114947
114978
  static unique_ptr<FunctionData> RegexExtractBind(ClientContext &context, ScalarFunction &bound_function,
114948
114979
  vector<unique_ptr<Expression>> &arguments) {
114949
114980
  D_ASSERT(arguments.size() >= 2);
114950
114981
 
114951
- bool constant_pattern = arguments[1]->IsFoldable();
114952
- string pattern = "";
114953
- if (constant_pattern) {
114954
- Value pattern_str = ExpressionExecutor::EvaluateScalar(*arguments[1]);
114955
- if (!pattern_str.IsNull() && pattern_str.type().id() == LogicalTypeId::VARCHAR) {
114956
- pattern = StringValue::Get(pattern_str);
114957
- }
114958
- }
114982
+ duckdb_re2::RE2::Options options;
114983
+
114984
+ string constant_string;
114985
+ bool constant_pattern = TryParseConstantPattern(*arguments[1], constant_string);
114959
114986
 
114960
114987
  string group_string = "";
114961
- if (arguments.size() == 3) {
114988
+ if (arguments.size() >= 3) {
114962
114989
  if (arguments[2]->HasParameter()) {
114963
114990
  throw ParameterNotResolvedException();
114964
114991
  }
@@ -114976,8 +115003,37 @@ static unique_ptr<FunctionData> RegexExtractBind(ClientContext &context, ScalarF
114976
115003
  } else {
114977
115004
  group_string = "\\0";
114978
115005
  }
115006
+ if (arguments.size() >= 4) {
115007
+ ParseRegexOptions(*arguments[3], options);
115008
+ }
115009
+ return make_unique<RegexpExtractBindData>(options, move(constant_string), constant_pattern, move(group_string));
115010
+ }
115011
+
115012
+ inline static string_t Extract(const string_t &input, Vector &result, const RE2 &re,
115013
+ const duckdb_re2::StringPiece &rewrite) {
115014
+ std::string extracted;
115015
+ RE2::Extract(input.GetString(), re, rewrite, &extracted);
115016
+ return StringVector::AddString(result, extracted.c_str(), extracted.size());
115017
+ }
115018
+
115019
+ static void RegexExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {
115020
+ auto &func_expr = (BoundFunctionExpression &)state.expr;
115021
+ const auto &info = (RegexpExtractBindData &)*func_expr.bind_info;
114979
115022
 
114980
- return make_unique<RegexpExtractBindData>(constant_pattern, pattern, group_string);
115023
+ auto &strings = args.data[0];
115024
+ auto &patterns = args.data[1];
115025
+ if (info.constant_pattern) {
115026
+ auto &lstate = (RegexLocalState &)*ExecuteFunctionState::GetFunctionState(state);
115027
+ UnaryExecutor::Execute<string_t, string_t>(strings, result, args.size(), [&](string_t input) {
115028
+ return Extract(input, result, lstate.constant_pattern, info.rewrite);
115029
+ });
115030
+ } else {
115031
+ BinaryExecutor::Execute<string_t, string_t, string_t>(strings, patterns, result, args.size(),
115032
+ [&](string_t input, string_t pattern) {
115033
+ RE2 re(CreateStringPiece(pattern), info.options);
115034
+ return Extract(input, result, re, info.rewrite);
115035
+ });
115036
+ }
114981
115037
  }
114982
115038
 
114983
115039
  void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
@@ -115003,19 +115059,24 @@ void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
115003
115059
 
115004
115060
  ScalarFunctionSet regexp_replace("regexp_replace");
115005
115061
  regexp_replace.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR},
115006
- LogicalType::VARCHAR, RegexReplaceFunction, RegexReplaceBind));
115007
- regexp_replace.AddFunction(
115008
- ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR},
115009
- LogicalType::VARCHAR, RegexReplaceFunction, RegexReplaceBind));
115062
+ LogicalType::VARCHAR, RegexReplaceFunction, RegexReplaceBind, nullptr,
115063
+ nullptr, RegexInitLocalState));
115064
+ regexp_replace.AddFunction(ScalarFunction(
115065
+ {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR,
115066
+ RegexReplaceFunction, RegexReplaceBind, nullptr, nullptr, RegexInitLocalState));
115010
115067
 
115011
115068
  ScalarFunctionSet regexp_extract("regexp_extract");
115012
115069
  regexp_extract.AddFunction(
115013
115070
  ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR, RegexExtractFunction,
115014
- RegexExtractBind, nullptr, nullptr, RegexExtractInitLocalState, LogicalType::INVALID,
115071
+ RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
115015
115072
  FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
115016
115073
  regexp_extract.AddFunction(ScalarFunction(
115017
115074
  {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER}, LogicalType::VARCHAR, RegexExtractFunction,
115018
- RegexExtractBind, nullptr, nullptr, RegexExtractInitLocalState, LogicalType::INVALID,
115075
+ RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
115076
+ FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
115077
+ regexp_extract.AddFunction(ScalarFunction(
115078
+ {LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER, LogicalType::VARCHAR}, LogicalType::VARCHAR,
115079
+ RegexExtractFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
115019
115080
  FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
115020
115081
 
115021
115082
  set.AddFunction(regexp_full_match);
@@ -128400,6 +128461,10 @@ Executor &ClientContext::GetExecutor() {
128400
128461
  return *active_query->executor;
128401
128462
  }
128402
128463
 
128464
+ FileOpener *FileOpener::Get(ClientContext &context) {
128465
+ return ClientData::Get(context).file_opener.get();
128466
+ }
128467
+
128403
128468
  const string &ClientContext::GetCurrentQuery() {
128404
128469
  D_ASSERT(active_query);
128405
128470
  return active_query->query;
@@ -162735,7 +162800,7 @@ bool KeywordHelper::IsKeyword(const string &text) {
162735
162800
  return Parser::IsKeyword(text);
162736
162801
  }
162737
162802
 
162738
- bool KeywordHelper::RequiresQuotes(const string &text) {
162803
+ bool KeywordHelper::RequiresQuotes(const string &text, bool allow_caps) {
162739
162804
  for (size_t i = 0; i < text.size(); i++) {
162740
162805
  if (i > 0 && (text[i] >= '0' && text[i] <= '9')) {
162741
162806
  continue;
@@ -162743,6 +162808,11 @@ bool KeywordHelper::RequiresQuotes(const string &text) {
162743
162808
  if (text[i] >= 'a' && text[i] <= 'z') {
162744
162809
  continue;
162745
162810
  }
162811
+ if (allow_caps) {
162812
+ if (text[i] >= 'A' && text[i] <= 'Z') {
162813
+ continue;
162814
+ }
162815
+ }
162746
162816
  if (text[i] == '_') {
162747
162817
  continue;
162748
162818
  }
@@ -162751,8 +162821,8 @@ bool KeywordHelper::RequiresQuotes(const string &text) {
162751
162821
  return IsKeyword(text);
162752
162822
  }
162753
162823
 
162754
- string KeywordHelper::WriteOptionallyQuoted(const string &text, char quote) {
162755
- if (!RequiresQuotes(text)) {
162824
+ string KeywordHelper::WriteOptionallyQuoted(const string &text, char quote, bool allow_caps) {
162825
+ if (!RequiresQuotes(text, allow_caps)) {
162756
162826
  return text;
162757
162827
  }
162758
162828
  return string(1, quote) + StringUtil::Replace(text, string(1, quote), string(2, quote)) + string(1, quote);