duckdb 0.5.2-dev582.0 → 0.5.2-dev602.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +240 -170
- package/src/duckdb.hpp +26 -5
- package/src/parquet-amalgamation.cpp +32272 -32272
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -15918,6 +15918,7 @@ void FileBuffer::Clear() {
|
|
|
15918
15918
|
|
|
15919
15919
|
namespace duckdb {
|
|
15920
15920
|
|
|
15921
|
+
class ClientContext;
|
|
15921
15922
|
class Value;
|
|
15922
15923
|
|
|
15923
15924
|
//! Abstract type that provide client-specific context to FileSystem.
|
|
@@ -15926,6 +15927,8 @@ public:
|
|
|
15926
15927
|
virtual ~FileOpener() {};
|
|
15927
15928
|
|
|
15928
15929
|
virtual bool TryGetCurrentSetting(const string &key, Value &result) = 0;
|
|
15930
|
+
|
|
15931
|
+
static FileOpener *Get(ClientContext &context);
|
|
15929
15932
|
};
|
|
15930
15933
|
|
|
15931
15934
|
} // namespace duckdb
|
|
@@ -38481,8 +38484,9 @@ vector<string> StringUtil::TopNStrings(vector<pair<string, idx_t>> scores, idx_t
|
|
|
38481
38484
|
if (scores.empty()) {
|
|
38482
38485
|
return vector<string>();
|
|
38483
38486
|
}
|
|
38484
|
-
sort(scores.begin(), scores.end(),
|
|
38485
|
-
|
|
38487
|
+
sort(scores.begin(), scores.end(), [](const pair<string, idx_t> &a, const pair<string, idx_t> &b) -> bool {
|
|
38488
|
+
return a.second < b.second || (a.second == b.second && a.first.size() < b.first.size());
|
|
38489
|
+
});
|
|
38486
38490
|
vector<string> result;
|
|
38487
38491
|
result.push_back(scores[0].first);
|
|
38488
38492
|
for (idx_t i = 1; i < MinValue<idx_t>(scores.size(), n); i++) {
|
|
@@ -38554,7 +38558,11 @@ vector<string> StringUtil::TopNLevenshtein(const vector<string> &strings, const
|
|
|
38554
38558
|
vector<pair<string, idx_t>> scores;
|
|
38555
38559
|
scores.reserve(strings.size());
|
|
38556
38560
|
for (auto &str : strings) {
|
|
38557
|
-
|
|
38561
|
+
if (target.size() < str.size()) {
|
|
38562
|
+
scores.emplace_back(str, LevenshteinDistance(str.substr(0, target.size()), target));
|
|
38563
|
+
} else {
|
|
38564
|
+
scores.emplace_back(str, LevenshteinDistance(str, target));
|
|
38565
|
+
}
|
|
38558
38566
|
}
|
|
38559
38567
|
return TopNStrings(scores, n, threshold);
|
|
38560
38568
|
}
|
|
@@ -114621,37 +114629,48 @@ void PrintfFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
114621
114629
|
|
|
114622
114630
|
namespace duckdb {
|
|
114623
114631
|
|
|
114624
|
-
struct
|
|
114625
|
-
|
|
114626
|
-
|
|
114632
|
+
struct RegexpBaseBindData : public FunctionData {
|
|
114633
|
+
RegexpBaseBindData();
|
|
114634
|
+
RegexpBaseBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern = true);
|
|
114635
|
+
virtual ~RegexpBaseBindData();
|
|
114627
114636
|
|
|
114628
114637
|
duckdb_re2::RE2::Options options;
|
|
114629
114638
|
string constant_string;
|
|
114630
114639
|
bool constant_pattern;
|
|
114640
|
+
|
|
114641
|
+
virtual bool Equals(const FunctionData &other_p) const override;
|
|
114642
|
+
};
|
|
114643
|
+
|
|
114644
|
+
struct RegexpMatchesBindData : public RegexpBaseBindData {
|
|
114645
|
+
RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern);
|
|
114646
|
+
RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern,
|
|
114647
|
+
string range_min, string range_max, bool range_success);
|
|
114648
|
+
|
|
114631
114649
|
string range_min;
|
|
114632
114650
|
string range_max;
|
|
114633
114651
|
bool range_success;
|
|
114634
114652
|
|
|
114635
114653
|
unique_ptr<FunctionData> Copy() const override;
|
|
114636
|
-
bool Equals(const FunctionData &other_p) const override;
|
|
114637
114654
|
};
|
|
114638
114655
|
|
|
114639
|
-
struct RegexpReplaceBindData : public
|
|
114640
|
-
|
|
114656
|
+
struct RegexpReplaceBindData : public RegexpBaseBindData {
|
|
114657
|
+
RegexpReplaceBindData();
|
|
114658
|
+
RegexpReplaceBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern,
|
|
114659
|
+
bool global_replace);
|
|
114660
|
+
|
|
114641
114661
|
bool global_replace;
|
|
114642
114662
|
|
|
114643
114663
|
unique_ptr<FunctionData> Copy() const override;
|
|
114644
114664
|
bool Equals(const FunctionData &other_p) const override;
|
|
114645
114665
|
};
|
|
114646
114666
|
|
|
114647
|
-
struct RegexpExtractBindData : public
|
|
114648
|
-
RegexpExtractBindData(
|
|
114667
|
+
struct RegexpExtractBindData : public RegexpBaseBindData {
|
|
114668
|
+
RegexpExtractBindData();
|
|
114669
|
+
RegexpExtractBindData(duckdb_re2::RE2::Options options, string constant_string, bool constant_pattern,
|
|
114670
|
+
string group_string);
|
|
114649
114671
|
|
|
114650
|
-
|
|
114651
|
-
|
|
114652
|
-
|
|
114653
|
-
const string group_string;
|
|
114654
|
-
const duckdb_re2::StringPiece rewrite;
|
|
114672
|
+
string group_string;
|
|
114673
|
+
duckdb_re2::StringPiece rewrite;
|
|
114655
114674
|
|
|
114656
114675
|
unique_ptr<FunctionData> Copy() const override;
|
|
114657
114676
|
bool Equals(const FunctionData &other_p) const override;
|
|
@@ -114672,41 +114691,49 @@ struct RegexpExtractBindData : public FunctionData {
|
|
|
114672
114691
|
|
|
114673
114692
|
namespace duckdb {
|
|
114674
114693
|
|
|
114675
|
-
RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string_p)
|
|
114676
|
-
: options(options), constant_string(move(constant_string_p)) {
|
|
114677
|
-
constant_pattern = !constant_string.empty();
|
|
114678
|
-
if (constant_pattern) {
|
|
114679
|
-
auto pattern = make_unique<RE2>(constant_string, options);
|
|
114680
|
-
if (!pattern->ok()) {
|
|
114681
|
-
throw Exception(pattern->error());
|
|
114682
|
-
}
|
|
114683
|
-
|
|
114684
|
-
range_success = pattern->PossibleMatchRange(&range_min, &range_max, 1000);
|
|
114685
|
-
} else {
|
|
114686
|
-
range_success = false;
|
|
114687
|
-
}
|
|
114688
|
-
}
|
|
114689
|
-
|
|
114690
114694
|
static bool RegexOptionsEquals(const duckdb_re2::RE2::Options &opt_a, const duckdb_re2::RE2::Options &opt_b) {
|
|
114691
114695
|
return opt_a.case_sensitive() == opt_b.case_sensitive();
|
|
114692
114696
|
}
|
|
114693
114697
|
|
|
114694
|
-
|
|
114698
|
+
RegexpBaseBindData::RegexpBaseBindData() : constant_pattern(false) {
|
|
114699
|
+
}
|
|
114700
|
+
RegexpBaseBindData::RegexpBaseBindData(duckdb_re2::RE2::Options options, string constant_string_p,
|
|
114701
|
+
bool constant_pattern)
|
|
114702
|
+
: options(options), constant_string(move(constant_string_p)), constant_pattern(constant_pattern) {
|
|
114695
114703
|
}
|
|
114696
114704
|
|
|
114697
|
-
|
|
114698
|
-
return make_unique<RegexpMatchesBindData>(options, constant_string);
|
|
114705
|
+
RegexpBaseBindData::~RegexpBaseBindData() {
|
|
114699
114706
|
}
|
|
114700
114707
|
|
|
114701
|
-
bool
|
|
114702
|
-
auto &other = (
|
|
114703
|
-
return
|
|
114708
|
+
bool RegexpBaseBindData::Equals(const FunctionData &other_p) const {
|
|
114709
|
+
auto &other = (RegexpBaseBindData &)other_p;
|
|
114710
|
+
return constant_pattern == other.constant_pattern && constant_string == other.constant_string &&
|
|
114711
|
+
RegexOptionsEquals(options, other.options);
|
|
114704
114712
|
}
|
|
114705
114713
|
|
|
114706
114714
|
static inline duckdb_re2::StringPiece CreateStringPiece(string_t &input) {
|
|
114707
114715
|
return duckdb_re2::StringPiece(input.GetDataUnsafe(), input.GetSize());
|
|
114708
114716
|
}
|
|
114709
114717
|
|
|
114718
|
+
struct RegexLocalState : public FunctionLocalState {
|
|
114719
|
+
explicit RegexLocalState(RegexpBaseBindData &info)
|
|
114720
|
+
: constant_pattern(duckdb_re2::StringPiece(info.constant_string.c_str(), info.constant_string.size()),
|
|
114721
|
+
info.options) {
|
|
114722
|
+
D_ASSERT(info.constant_pattern);
|
|
114723
|
+
}
|
|
114724
|
+
|
|
114725
|
+
RE2 constant_pattern;
|
|
114726
|
+
};
|
|
114727
|
+
|
|
114728
|
+
static unique_ptr<FunctionLocalState> RegexInitLocalState(const BoundFunctionExpression &expr,
|
|
114729
|
+
FunctionData *bind_data) {
|
|
114730
|
+
auto &info = (RegexpBaseBindData &)*bind_data;
|
|
114731
|
+
if (info.constant_pattern) {
|
|
114732
|
+
return make_unique<RegexLocalState>(info);
|
|
114733
|
+
}
|
|
114734
|
+
return nullptr;
|
|
114735
|
+
}
|
|
114736
|
+
|
|
114710
114737
|
static void ParseRegexOptions(const string &options, duckdb_re2::RE2::Options &result, bool *global_replace = nullptr) {
|
|
114711
114738
|
for (idx_t i = 0; i < options.size(); i++) {
|
|
114712
114739
|
switch (options[i]) {
|
|
@@ -114751,41 +114778,88 @@ static void ParseRegexOptions(const string &options, duckdb_re2::RE2::Options &r
|
|
|
114751
114778
|
}
|
|
114752
114779
|
}
|
|
114753
114780
|
|
|
114754
|
-
|
|
114755
|
-
|
|
114756
|
-
|
|
114781
|
+
void ParseRegexOptions(Expression &expr, RE2::Options &target, bool *global_replace = nullptr) {
|
|
114782
|
+
if (expr.HasParameter()) {
|
|
114783
|
+
throw ParameterNotResolvedException();
|
|
114757
114784
|
}
|
|
114758
|
-
|
|
114785
|
+
if (!expr.IsFoldable()) {
|
|
114786
|
+
throw InvalidInputException("Regex options field must be a constant");
|
|
114787
|
+
}
|
|
114788
|
+
Value options_str = ExpressionExecutor::EvaluateScalar(expr);
|
|
114789
|
+
if (!options_str.IsNull() && options_str.type().id() == LogicalTypeId::VARCHAR) {
|
|
114790
|
+
ParseRegexOptions(StringValue::Get(options_str), target, global_replace);
|
|
114791
|
+
}
|
|
114792
|
+
}
|
|
114759
114793
|
|
|
114760
|
-
|
|
114761
|
-
|
|
114762
|
-
return
|
|
114794
|
+
static bool TryParseConstantPattern(Expression &expr, string &constant_string) {
|
|
114795
|
+
if (!expr.IsFoldable()) {
|
|
114796
|
+
return false;
|
|
114763
114797
|
}
|
|
114764
|
-
|
|
114798
|
+
Value pattern_str = ExpressionExecutor::EvaluateScalar(expr);
|
|
114799
|
+
if (!pattern_str.IsNull() && pattern_str.type().id() == LogicalTypeId::VARCHAR) {
|
|
114800
|
+
constant_string = StringValue::Get(pattern_str);
|
|
114801
|
+
return true;
|
|
114802
|
+
}
|
|
114803
|
+
return false;
|
|
114804
|
+
}
|
|
114765
114805
|
|
|
114766
|
-
|
|
114767
|
-
|
|
114768
|
-
|
|
114769
|
-
|
|
114770
|
-
|
|
114806
|
+
//===--------------------------------------------------------------------===//
|
|
114807
|
+
// Regexp Matches
|
|
114808
|
+
//===--------------------------------------------------------------------===//
|
|
114809
|
+
RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string_p,
|
|
114810
|
+
bool constant_pattern)
|
|
114811
|
+
: RegexpBaseBindData(options, move(constant_string_p), constant_pattern) {
|
|
114812
|
+
if (constant_pattern) {
|
|
114813
|
+
auto pattern = make_unique<RE2>(constant_string, options);
|
|
114814
|
+
if (!pattern->ok()) {
|
|
114815
|
+
throw Exception(pattern->error());
|
|
114816
|
+
}
|
|
114817
|
+
|
|
114818
|
+
range_success = pattern->PossibleMatchRange(&range_min, &range_max, 1000);
|
|
114819
|
+
} else {
|
|
114820
|
+
range_success = false;
|
|
114771
114821
|
}
|
|
114822
|
+
}
|
|
114772
114823
|
|
|
114773
|
-
|
|
114774
|
-
|
|
114775
|
-
|
|
114824
|
+
RegexpMatchesBindData::RegexpMatchesBindData(duckdb_re2::RE2::Options options, string constant_string_p,
|
|
114825
|
+
bool constant_pattern, string range_min_p, string range_max_p,
|
|
114826
|
+
bool range_success)
|
|
114827
|
+
: RegexpBaseBindData(options, move(constant_string_p), constant_pattern), range_min(move(range_min_p)),
|
|
114828
|
+
range_max(move(range_max_p)), range_success(range_success) {
|
|
114829
|
+
}
|
|
114830
|
+
|
|
114831
|
+
unique_ptr<FunctionData> RegexpMatchesBindData::Copy() const {
|
|
114832
|
+
return make_unique<RegexpMatchesBindData>(options, constant_string, constant_pattern, range_min, range_max,
|
|
114833
|
+
range_success);
|
|
114834
|
+
}
|
|
114835
|
+
|
|
114836
|
+
static unique_ptr<FunctionData> RegexpMatchesBind(ClientContext &context, ScalarFunction &bound_function,
|
|
114837
|
+
vector<unique_ptr<Expression>> &arguments) {
|
|
114838
|
+
// pattern is the second argument. If its constant, we can already prepare the pattern and store it for later.
|
|
114839
|
+
D_ASSERT(arguments.size() == 2 || arguments.size() == 3);
|
|
114840
|
+
RE2::Options options;
|
|
114841
|
+
options.set_log_errors(false);
|
|
114842
|
+
if (arguments.size() == 3) {
|
|
114843
|
+
ParseRegexOptions(*arguments[2], options);
|
|
114776
114844
|
}
|
|
114777
114845
|
|
|
114778
|
-
|
|
114846
|
+
string constant_string;
|
|
114847
|
+
bool constant_pattern;
|
|
114848
|
+
constant_pattern = TryParseConstantPattern(*arguments[1], constant_string);
|
|
114849
|
+
return make_unique<RegexpMatchesBindData>(options, move(constant_string), constant_pattern);
|
|
114850
|
+
}
|
|
114851
|
+
|
|
114852
|
+
struct RegexPartialMatch {
|
|
114853
|
+
static inline bool Operation(const duckdb_re2::StringPiece &input, duckdb_re2::RE2 &re) {
|
|
114854
|
+
return duckdb_re2::RE2::PartialMatch(input, re);
|
|
114855
|
+
}
|
|
114779
114856
|
};
|
|
114780
114857
|
|
|
114781
|
-
|
|
114782
|
-
|
|
114783
|
-
|
|
114784
|
-
if (info.constant_pattern) {
|
|
114785
|
-
return make_unique<RegexLocalState>(info);
|
|
114858
|
+
struct RegexFullMatch {
|
|
114859
|
+
static inline bool Operation(const duckdb_re2::StringPiece &input, duckdb_re2::RE2 &re) {
|
|
114860
|
+
return duckdb_re2::RE2::FullMatch(input, re);
|
|
114786
114861
|
}
|
|
114787
|
-
|
|
114788
|
-
}
|
|
114862
|
+
};
|
|
114789
114863
|
|
|
114790
114864
|
template <class OP>
|
|
114791
114865
|
static void RegexpMatchesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
@@ -114812,153 +114886,106 @@ static void RegexpMatchesFunction(DataChunk &args, ExpressionState &state, Vecto
|
|
|
114812
114886
|
}
|
|
114813
114887
|
}
|
|
114814
114888
|
|
|
114815
|
-
|
|
114816
|
-
|
|
114817
|
-
|
|
114818
|
-
|
|
114819
|
-
RE2::Options options;
|
|
114820
|
-
options.set_log_errors(false);
|
|
114821
|
-
if (arguments.size() == 3) {
|
|
114822
|
-
if (arguments[2]->HasParameter()) {
|
|
114823
|
-
throw ParameterNotResolvedException();
|
|
114824
|
-
}
|
|
114825
|
-
if (!arguments[2]->IsFoldable()) {
|
|
114826
|
-
throw InvalidInputException("Regex options field must be a constant");
|
|
114827
|
-
}
|
|
114828
|
-
Value options_str = ExpressionExecutor::EvaluateScalar(*arguments[2]);
|
|
114829
|
-
if (!options_str.IsNull() && options_str.type().id() == LogicalTypeId::VARCHAR) {
|
|
114830
|
-
ParseRegexOptions(StringValue::Get(options_str), options);
|
|
114831
|
-
}
|
|
114832
|
-
}
|
|
114833
|
-
|
|
114834
|
-
if (arguments[1]->IsFoldable()) {
|
|
114835
|
-
Value pattern_str = ExpressionExecutor::EvaluateScalar(*arguments[1]);
|
|
114836
|
-
if (!pattern_str.IsNull() && pattern_str.type().id() == LogicalTypeId::VARCHAR) {
|
|
114837
|
-
return make_unique<RegexpMatchesBindData>(options, StringValue::Get(pattern_str));
|
|
114838
|
-
}
|
|
114839
|
-
}
|
|
114840
|
-
return make_unique<RegexpMatchesBindData>(options, "");
|
|
114889
|
+
//===--------------------------------------------------------------------===//
|
|
114890
|
+
// Regexp Replace
|
|
114891
|
+
//===--------------------------------------------------------------------===//
|
|
114892
|
+
RegexpReplaceBindData::RegexpReplaceBindData() : global_replace(false) {
|
|
114841
114893
|
}
|
|
114842
114894
|
|
|
114843
|
-
|
|
114844
|
-
|
|
114845
|
-
|
|
114846
|
-
|
|
114847
|
-
auto &strings = args.data[0];
|
|
114848
|
-
auto &patterns = args.data[1];
|
|
114849
|
-
auto &replaces = args.data[2];
|
|
114850
|
-
|
|
114851
|
-
TernaryExecutor::Execute<string_t, string_t, string_t, string_t>(
|
|
114852
|
-
strings, patterns, replaces, result, args.size(), [&](string_t input, string_t pattern, string_t replace) {
|
|
114853
|
-
RE2 re(CreateStringPiece(pattern), info.options);
|
|
114854
|
-
std::string sstring = input.GetString();
|
|
114855
|
-
if (info.global_replace) {
|
|
114856
|
-
RE2::GlobalReplace(&sstring, re, CreateStringPiece(replace));
|
|
114857
|
-
} else {
|
|
114858
|
-
RE2::Replace(&sstring, re, CreateStringPiece(replace));
|
|
114859
|
-
}
|
|
114860
|
-
return StringVector::AddString(result, sstring);
|
|
114861
|
-
});
|
|
114895
|
+
RegexpReplaceBindData::RegexpReplaceBindData(duckdb_re2::RE2::Options options, string constant_string_p,
|
|
114896
|
+
bool constant_pattern, bool global_replace)
|
|
114897
|
+
: RegexpBaseBindData(options, move(constant_string_p), constant_pattern), global_replace(global_replace) {
|
|
114862
114898
|
}
|
|
114863
114899
|
|
|
114864
114900
|
unique_ptr<FunctionData> RegexpReplaceBindData::Copy() const {
|
|
114865
|
-
auto copy = make_unique<RegexpReplaceBindData>();
|
|
114866
|
-
copy->options = options;
|
|
114867
|
-
copy->global_replace = global_replace;
|
|
114901
|
+
auto copy = make_unique<RegexpReplaceBindData>(options, constant_string, constant_pattern, global_replace);
|
|
114868
114902
|
return move(copy);
|
|
114869
114903
|
}
|
|
114870
114904
|
|
|
114871
114905
|
bool RegexpReplaceBindData::Equals(const FunctionData &other_p) const {
|
|
114872
114906
|
auto &other = (const RegexpReplaceBindData &)other_p;
|
|
114873
|
-
return global_replace == other.global_replace
|
|
114907
|
+
return RegexpBaseBindData::Equals(other) && global_replace == other.global_replace;
|
|
114874
114908
|
}
|
|
114875
114909
|
|
|
114876
114910
|
static unique_ptr<FunctionData> RegexReplaceBind(ClientContext &context, ScalarFunction &bound_function,
|
|
114877
114911
|
vector<unique_ptr<Expression>> &arguments) {
|
|
114878
114912
|
auto data = make_unique<RegexpReplaceBindData>();
|
|
114879
|
-
|
|
114913
|
+
|
|
114914
|
+
data->constant_pattern = TryParseConstantPattern(*arguments[1], data->constant_string);
|
|
114880
114915
|
if (arguments.size() == 4) {
|
|
114881
|
-
|
|
114882
|
-
throw ParameterNotResolvedException();
|
|
114883
|
-
}
|
|
114884
|
-
if (!arguments[3]->IsFoldable()) {
|
|
114885
|
-
throw InvalidInputException("Regex options field must be a constant");
|
|
114886
|
-
}
|
|
114887
|
-
Value options_str = ExpressionExecutor::EvaluateScalar(*arguments[3]);
|
|
114888
|
-
if (!options_str.IsNull() && options_str.type().id() == LogicalTypeId::VARCHAR) {
|
|
114889
|
-
ParseRegexOptions(StringValue::Get(options_str), data->options, &data->global_replace);
|
|
114890
|
-
}
|
|
114916
|
+
ParseRegexOptions(*arguments[3], data->options, &data->global_replace);
|
|
114891
114917
|
}
|
|
114892
|
-
|
|
114918
|
+
data->options.set_log_errors(false);
|
|
114893
114919
|
return move(data);
|
|
114894
114920
|
}
|
|
114895
114921
|
|
|
114896
|
-
|
|
114897
|
-
const duckdb_re2::StringPiece &rewrite) {
|
|
114898
|
-
std::string extracted;
|
|
114899
|
-
RE2::Extract(input.GetString(), re, rewrite, &extracted);
|
|
114900
|
-
return StringVector::AddString(result, extracted.c_str(), extracted.size());
|
|
114901
|
-
}
|
|
114902
|
-
|
|
114903
|
-
static void RegexExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
114922
|
+
static void RegexReplaceFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
114904
114923
|
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
114905
|
-
|
|
114924
|
+
auto &info = (RegexpReplaceBindData &)*func_expr.bind_info;
|
|
114906
114925
|
|
|
114907
114926
|
auto &strings = args.data[0];
|
|
114908
114927
|
auto &patterns = args.data[1];
|
|
114928
|
+
auto &replaces = args.data[2];
|
|
114929
|
+
|
|
114909
114930
|
if (info.constant_pattern) {
|
|
114910
114931
|
auto &lstate = (RegexLocalState &)*ExecuteFunctionState::GetFunctionState(state);
|
|
114911
|
-
|
|
114912
|
-
|
|
114913
|
-
|
|
114932
|
+
BinaryExecutor::Execute<string_t, string_t, string_t>(
|
|
114933
|
+
strings, replaces, result, args.size(), [&](string_t input, string_t replace) {
|
|
114934
|
+
std::string sstring = input.GetString();
|
|
114935
|
+
if (info.global_replace) {
|
|
114936
|
+
RE2::GlobalReplace(&sstring, lstate.constant_pattern, CreateStringPiece(replace));
|
|
114937
|
+
} else {
|
|
114938
|
+
RE2::Replace(&sstring, lstate.constant_pattern, CreateStringPiece(replace));
|
|
114939
|
+
}
|
|
114940
|
+
return StringVector::AddString(result, sstring);
|
|
114941
|
+
});
|
|
114914
114942
|
} else {
|
|
114915
|
-
|
|
114916
|
-
|
|
114917
|
-
|
|
114918
|
-
|
|
114919
|
-
|
|
114943
|
+
TernaryExecutor::Execute<string_t, string_t, string_t, string_t>(
|
|
114944
|
+
strings, patterns, replaces, result, args.size(), [&](string_t input, string_t pattern, string_t replace) {
|
|
114945
|
+
RE2 re(CreateStringPiece(pattern), info.options);
|
|
114946
|
+
std::string sstring = input.GetString();
|
|
114947
|
+
if (info.global_replace) {
|
|
114948
|
+
RE2::GlobalReplace(&sstring, re, CreateStringPiece(replace));
|
|
114949
|
+
} else {
|
|
114950
|
+
RE2::Replace(&sstring, re, CreateStringPiece(replace));
|
|
114951
|
+
}
|
|
114952
|
+
return StringVector::AddString(result, sstring);
|
|
114953
|
+
});
|
|
114920
114954
|
}
|
|
114921
114955
|
}
|
|
114922
114956
|
|
|
114923
|
-
|
|
114924
|
-
|
|
114925
|
-
|
|
114926
|
-
|
|
114927
|
-
return make_unique<RegexLocalState>(info);
|
|
114928
|
-
}
|
|
114929
|
-
return nullptr;
|
|
114957
|
+
//===--------------------------------------------------------------------===//
|
|
114958
|
+
// Regexp Extract
|
|
114959
|
+
//===--------------------------------------------------------------------===//
|
|
114960
|
+
RegexpExtractBindData::RegexpExtractBindData() {
|
|
114930
114961
|
}
|
|
114931
114962
|
|
|
114932
|
-
RegexpExtractBindData::RegexpExtractBindData(
|
|
114933
|
-
|
|
114934
|
-
:
|
|
114963
|
+
RegexpExtractBindData::RegexpExtractBindData(duckdb_re2::RE2::Options options, string constant_string_p,
|
|
114964
|
+
bool constant_pattern, string group_string_p)
|
|
114965
|
+
: RegexpBaseBindData(options, move(constant_string_p), constant_pattern), group_string(move(group_string_p)),
|
|
114935
114966
|
rewrite(group_string) {
|
|
114936
114967
|
}
|
|
114937
114968
|
|
|
114938
114969
|
unique_ptr<FunctionData> RegexpExtractBindData::Copy() const {
|
|
114939
|
-
return make_unique<RegexpExtractBindData>(
|
|
114970
|
+
return make_unique<RegexpExtractBindData>(options, constant_string, constant_pattern, group_string);
|
|
114940
114971
|
}
|
|
114941
114972
|
|
|
114942
114973
|
bool RegexpExtractBindData::Equals(const FunctionData &other_p) const {
|
|
114943
114974
|
auto &other = (const RegexpExtractBindData &)other_p;
|
|
114944
|
-
return
|
|
114975
|
+
return RegexpBaseBindData::Equals(other) && group_string == other.group_string;
|
|
114945
114976
|
}
|
|
114946
114977
|
|
|
114947
114978
|
static unique_ptr<FunctionData> RegexExtractBind(ClientContext &context, ScalarFunction &bound_function,
|
|
114948
114979
|
vector<unique_ptr<Expression>> &arguments) {
|
|
114949
114980
|
D_ASSERT(arguments.size() >= 2);
|
|
114950
114981
|
|
|
114951
|
-
|
|
114952
|
-
|
|
114953
|
-
|
|
114954
|
-
|
|
114955
|
-
if (!pattern_str.IsNull() && pattern_str.type().id() == LogicalTypeId::VARCHAR) {
|
|
114956
|
-
pattern = StringValue::Get(pattern_str);
|
|
114957
|
-
}
|
|
114958
|
-
}
|
|
114982
|
+
duckdb_re2::RE2::Options options;
|
|
114983
|
+
|
|
114984
|
+
string constant_string;
|
|
114985
|
+
bool constant_pattern = TryParseConstantPattern(*arguments[1], constant_string);
|
|
114959
114986
|
|
|
114960
114987
|
string group_string = "";
|
|
114961
|
-
if (arguments.size()
|
|
114988
|
+
if (arguments.size() >= 3) {
|
|
114962
114989
|
if (arguments[2]->HasParameter()) {
|
|
114963
114990
|
throw ParameterNotResolvedException();
|
|
114964
114991
|
}
|
|
@@ -114976,8 +115003,37 @@ static unique_ptr<FunctionData> RegexExtractBind(ClientContext &context, ScalarF
|
|
|
114976
115003
|
} else {
|
|
114977
115004
|
group_string = "\\0";
|
|
114978
115005
|
}
|
|
115006
|
+
if (arguments.size() >= 4) {
|
|
115007
|
+
ParseRegexOptions(*arguments[3], options);
|
|
115008
|
+
}
|
|
115009
|
+
return make_unique<RegexpExtractBindData>(options, move(constant_string), constant_pattern, move(group_string));
|
|
115010
|
+
}
|
|
115011
|
+
|
|
115012
|
+
inline static string_t Extract(const string_t &input, Vector &result, const RE2 &re,
|
|
115013
|
+
const duckdb_re2::StringPiece &rewrite) {
|
|
115014
|
+
std::string extracted;
|
|
115015
|
+
RE2::Extract(input.GetString(), re, rewrite, &extracted);
|
|
115016
|
+
return StringVector::AddString(result, extracted.c_str(), extracted.size());
|
|
115017
|
+
}
|
|
115018
|
+
|
|
115019
|
+
static void RegexExtractFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
115020
|
+
auto &func_expr = (BoundFunctionExpression &)state.expr;
|
|
115021
|
+
const auto &info = (RegexpExtractBindData &)*func_expr.bind_info;
|
|
114979
115022
|
|
|
114980
|
-
|
|
115023
|
+
auto &strings = args.data[0];
|
|
115024
|
+
auto &patterns = args.data[1];
|
|
115025
|
+
if (info.constant_pattern) {
|
|
115026
|
+
auto &lstate = (RegexLocalState &)*ExecuteFunctionState::GetFunctionState(state);
|
|
115027
|
+
UnaryExecutor::Execute<string_t, string_t>(strings, result, args.size(), [&](string_t input) {
|
|
115028
|
+
return Extract(input, result, lstate.constant_pattern, info.rewrite);
|
|
115029
|
+
});
|
|
115030
|
+
} else {
|
|
115031
|
+
BinaryExecutor::Execute<string_t, string_t, string_t>(strings, patterns, result, args.size(),
|
|
115032
|
+
[&](string_t input, string_t pattern) {
|
|
115033
|
+
RE2 re(CreateStringPiece(pattern), info.options);
|
|
115034
|
+
return Extract(input, result, re, info.rewrite);
|
|
115035
|
+
});
|
|
115036
|
+
}
|
|
114981
115037
|
}
|
|
114982
115038
|
|
|
114983
115039
|
void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
|
|
@@ -115003,19 +115059,24 @@ void RegexpFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
115003
115059
|
|
|
115004
115060
|
ScalarFunctionSet regexp_replace("regexp_replace");
|
|
115005
115061
|
regexp_replace.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR},
|
|
115006
|
-
LogicalType::VARCHAR, RegexReplaceFunction, RegexReplaceBind
|
|
115007
|
-
|
|
115008
|
-
|
|
115009
|
-
|
|
115062
|
+
LogicalType::VARCHAR, RegexReplaceFunction, RegexReplaceBind, nullptr,
|
|
115063
|
+
nullptr, RegexInitLocalState));
|
|
115064
|
+
regexp_replace.AddFunction(ScalarFunction(
|
|
115065
|
+
{LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR,
|
|
115066
|
+
RegexReplaceFunction, RegexReplaceBind, nullptr, nullptr, RegexInitLocalState));
|
|
115010
115067
|
|
|
115011
115068
|
ScalarFunctionSet regexp_extract("regexp_extract");
|
|
115012
115069
|
regexp_extract.AddFunction(
|
|
115013
115070
|
ScalarFunction({LogicalType::VARCHAR, LogicalType::VARCHAR}, LogicalType::VARCHAR, RegexExtractFunction,
|
|
115014
|
-
RegexExtractBind, nullptr, nullptr,
|
|
115071
|
+
RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
|
|
115015
115072
|
FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
|
|
115016
115073
|
regexp_extract.AddFunction(ScalarFunction(
|
|
115017
115074
|
{LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER}, LogicalType::VARCHAR, RegexExtractFunction,
|
|
115018
|
-
RegexExtractBind, nullptr, nullptr,
|
|
115075
|
+
RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
|
|
115076
|
+
FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
|
|
115077
|
+
regexp_extract.AddFunction(ScalarFunction(
|
|
115078
|
+
{LogicalType::VARCHAR, LogicalType::VARCHAR, LogicalType::INTEGER, LogicalType::VARCHAR}, LogicalType::VARCHAR,
|
|
115079
|
+
RegexExtractFunction, RegexExtractBind, nullptr, nullptr, RegexInitLocalState, LogicalType::INVALID,
|
|
115019
115080
|
FunctionSideEffects::NO_SIDE_EFFECTS, FunctionNullHandling::SPECIAL_HANDLING));
|
|
115020
115081
|
|
|
115021
115082
|
set.AddFunction(regexp_full_match);
|
|
@@ -128400,6 +128461,10 @@ Executor &ClientContext::GetExecutor() {
|
|
|
128400
128461
|
return *active_query->executor;
|
|
128401
128462
|
}
|
|
128402
128463
|
|
|
128464
|
+
FileOpener *FileOpener::Get(ClientContext &context) {
|
|
128465
|
+
return ClientData::Get(context).file_opener.get();
|
|
128466
|
+
}
|
|
128467
|
+
|
|
128403
128468
|
const string &ClientContext::GetCurrentQuery() {
|
|
128404
128469
|
D_ASSERT(active_query);
|
|
128405
128470
|
return active_query->query;
|
|
@@ -162735,7 +162800,7 @@ bool KeywordHelper::IsKeyword(const string &text) {
|
|
|
162735
162800
|
return Parser::IsKeyword(text);
|
|
162736
162801
|
}
|
|
162737
162802
|
|
|
162738
|
-
bool KeywordHelper::RequiresQuotes(const string &text) {
|
|
162803
|
+
bool KeywordHelper::RequiresQuotes(const string &text, bool allow_caps) {
|
|
162739
162804
|
for (size_t i = 0; i < text.size(); i++) {
|
|
162740
162805
|
if (i > 0 && (text[i] >= '0' && text[i] <= '9')) {
|
|
162741
162806
|
continue;
|
|
@@ -162743,6 +162808,11 @@ bool KeywordHelper::RequiresQuotes(const string &text) {
|
|
|
162743
162808
|
if (text[i] >= 'a' && text[i] <= 'z') {
|
|
162744
162809
|
continue;
|
|
162745
162810
|
}
|
|
162811
|
+
if (allow_caps) {
|
|
162812
|
+
if (text[i] >= 'A' && text[i] <= 'Z') {
|
|
162813
|
+
continue;
|
|
162814
|
+
}
|
|
162815
|
+
}
|
|
162746
162816
|
if (text[i] == '_') {
|
|
162747
162817
|
continue;
|
|
162748
162818
|
}
|
|
@@ -162751,8 +162821,8 @@ bool KeywordHelper::RequiresQuotes(const string &text) {
|
|
|
162751
162821
|
return IsKeyword(text);
|
|
162752
162822
|
}
|
|
162753
162823
|
|
|
162754
|
-
string KeywordHelper::WriteOptionallyQuoted(const string &text, char quote) {
|
|
162755
|
-
if (!RequiresQuotes(text)) {
|
|
162824
|
+
string KeywordHelper::WriteOptionallyQuoted(const string &text, char quote, bool allow_caps) {
|
|
162825
|
+
if (!RequiresQuotes(text, allow_caps)) {
|
|
162756
162826
|
return text;
|
|
162757
162827
|
}
|
|
162758
162828
|
return string(1, quote) + StringUtil::Replace(text, string(1, quote), string(2, quote)) + string(1, quote);
|