duckdb 0.5.2-dev1819.0 → 0.5.2-dev1840.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +156 -100
- package/src/duckdb.hpp +32 -5
- package/src/parquet-amalgamation.cpp +28175 -28179
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -8617,7 +8617,7 @@ private:
|
|
|
8617
8617
|
namespace duckdb {
|
|
8618
8618
|
|
|
8619
8619
|
enum class UnicodeType { INVALID, ASCII, UNICODE };
|
|
8620
|
-
enum class UnicodeInvalidReason { BYTE_MISMATCH,
|
|
8620
|
+
enum class UnicodeInvalidReason { BYTE_MISMATCH, INVALID_UNICODE };
|
|
8621
8621
|
|
|
8622
8622
|
class Utf8Proc {
|
|
8623
8623
|
public:
|
|
@@ -8845,7 +8845,7 @@ list<ColumnDataCollection> BoxRenderer::FetchRenderCollections(ClientContext &co
|
|
|
8845
8845
|
}
|
|
8846
8846
|
|
|
8847
8847
|
string ConvertRenderValue(const string &input) {
|
|
8848
|
-
return StringUtil::Replace(input, "\n", "\\n");
|
|
8848
|
+
return StringUtil::Replace(StringUtil::Replace(input, "\n", "\\n"), string("\0", 1), "\\0");
|
|
8849
8849
|
}
|
|
8850
8850
|
|
|
8851
8851
|
string BoxRenderer::GetRenderValue(ColumnDataRowCollection &rows, idx_t c, idx_t r) {
|
|
@@ -30071,15 +30071,15 @@ PreservedError::PreservedError() : initialized(false) {
|
|
|
30071
30071
|
}
|
|
30072
30072
|
|
|
30073
30073
|
PreservedError::PreservedError(const Exception &exception)
|
|
30074
|
-
: initialized(true), type(exception.type), raw_message(exception.RawMessage()) {
|
|
30074
|
+
: initialized(true), type(exception.type), raw_message(SanitizeErrorMessage(exception.RawMessage())) {
|
|
30075
30075
|
}
|
|
30076
30076
|
|
|
30077
30077
|
PreservedError::PreservedError(const std::exception &exception)
|
|
30078
|
-
: initialized(true), type(ExceptionType::INVALID), raw_message(exception.what()) {
|
|
30078
|
+
: initialized(true), type(ExceptionType::INVALID), raw_message(SanitizeErrorMessage(exception.what())) {
|
|
30079
30079
|
}
|
|
30080
30080
|
|
|
30081
30081
|
PreservedError::PreservedError(const string &message)
|
|
30082
|
-
: initialized(true), type(ExceptionType::INVALID), raw_message(message) {
|
|
30082
|
+
: initialized(true), type(ExceptionType::INVALID), raw_message(SanitizeErrorMessage(message)) {
|
|
30083
30083
|
}
|
|
30084
30084
|
|
|
30085
30085
|
const string &PreservedError::Message() {
|
|
@@ -30089,6 +30089,10 @@ const string &PreservedError::Message() {
|
|
|
30089
30089
|
return final_message;
|
|
30090
30090
|
}
|
|
30091
30091
|
|
|
30092
|
+
string PreservedError::SanitizeErrorMessage(string error) {
|
|
30093
|
+
return StringUtil::Replace(move(error), string("\0", 1), "\\0");
|
|
30094
|
+
}
|
|
30095
|
+
|
|
30092
30096
|
void PreservedError::Throw(const string &prepended_message) const {
|
|
30093
30097
|
D_ASSERT(initialized);
|
|
30094
30098
|
if (!prepended_message.empty()) {
|
|
@@ -40041,6 +40045,9 @@ vector<string> StringUtil::Split(const string &input, const string &split) {
|
|
|
40041
40045
|
}
|
|
40042
40046
|
|
|
40043
40047
|
string StringUtil::Replace(string source, const string &from, const string &to) {
|
|
40048
|
+
if (from.empty()) {
|
|
40049
|
+
throw InternalException("Invalid argument to StringUtil::Replace - empty FROM");
|
|
40050
|
+
}
|
|
40044
40051
|
idx_t start_pos = 0;
|
|
40045
40052
|
while ((start_pos = source.find(from, start_pos)) != string::npos) {
|
|
40046
40053
|
source.replace(start_pos, from.length(), to);
|
|
@@ -41812,7 +41819,7 @@ bool Blob::TryGetBlobSize(string_t str, idx_t &str_len, string *error_message) {
|
|
|
41812
41819
|
}
|
|
41813
41820
|
str_len++;
|
|
41814
41821
|
i += 3;
|
|
41815
|
-
} else if (data[i]
|
|
41822
|
+
} else if (data[i] <= 127) {
|
|
41816
41823
|
str_len++;
|
|
41817
41824
|
} else {
|
|
41818
41825
|
string error = "Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters "
|
|
@@ -41846,7 +41853,7 @@ void Blob::ToBlob(string_t str, data_ptr_t output) {
|
|
|
41846
41853
|
D_ASSERT(data[i + 1] == 'x');
|
|
41847
41854
|
output[blob_idx++] = (byte_a << 4) + byte_b;
|
|
41848
41855
|
i += 3;
|
|
41849
|
-
} else if (data[i]
|
|
41856
|
+
} else if (data[i] <= 127) {
|
|
41850
41857
|
output[blob_idx++] = data_t(data[i]);
|
|
41851
41858
|
} else {
|
|
41852
41859
|
throw ConversionException("Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters "
|
|
@@ -47731,12 +47738,6 @@ void string_t::Verify() const {
|
|
|
47731
47738
|
}
|
|
47732
47739
|
}
|
|
47733
47740
|
|
|
47734
|
-
void string_t::VerifyNull() const {
|
|
47735
|
-
for (idx_t i = 0; i < GetSize(); i++) {
|
|
47736
|
-
D_ASSERT(GetDataUnsafe()[i] != '\0');
|
|
47737
|
-
}
|
|
47738
|
-
}
|
|
47739
|
-
|
|
47740
47741
|
} // namespace duckdb
|
|
47741
47742
|
|
|
47742
47743
|
|
|
@@ -50430,6 +50431,17 @@ bool Value::NotDistinctFrom(const Value &lvalue, const Value &rvalue) {
|
|
|
50430
50431
|
return ValueOperations::NotDistinctFrom(lvalue, rvalue);
|
|
50431
50432
|
}
|
|
50432
50433
|
|
|
50434
|
+
static string SanitizeValue(string input) {
|
|
50435
|
+
// some results might contain padding spaces, e.g. when rendering
|
|
50436
|
+
// VARCHAR(10) and the string only has 6 characters, they will be padded
|
|
50437
|
+
// with spaces to 10 in the rendering. We don't do that here yet as we
|
|
50438
|
+
// are looking at internal structures. So just ignore any extra spaces
|
|
50439
|
+
// on the right
|
|
50440
|
+
StringUtil::RTrim(input);
|
|
50441
|
+
// for result checking code, replace null bytes with their escaped value (\0)
|
|
50442
|
+
return StringUtil::Replace(input, string("\0", 1), "\\0");
|
|
50443
|
+
}
|
|
50444
|
+
|
|
50433
50445
|
bool Value::ValuesAreEqual(CastFunctionSet &set, GetCastFunctionInput &get_input, const Value &result_value,
|
|
50434
50446
|
const Value &value) {
|
|
50435
50447
|
if (result_value.IsNull() != value.IsNull()) {
|
|
@@ -50454,15 +50466,8 @@ bool Value::ValuesAreEqual(CastFunctionSet &set, GetCastFunctionInput &get_input
|
|
|
50454
50466
|
}
|
|
50455
50467
|
case LogicalTypeId::VARCHAR: {
|
|
50456
50468
|
auto other = result_value.CastAs(set, get_input, LogicalType::VARCHAR);
|
|
50457
|
-
|
|
50458
|
-
|
|
50459
|
-
// with spaces to 10 in the rendering. We don't do that here yet as we
|
|
50460
|
-
// are looking at internal structures. So just ignore any extra spaces
|
|
50461
|
-
// on the right
|
|
50462
|
-
string left = other.str_value;
|
|
50463
|
-
string right = value.str_value;
|
|
50464
|
-
StringUtil::RTrim(left);
|
|
50465
|
-
StringUtil::RTrim(right);
|
|
50469
|
+
string left = SanitizeValue(other.str_value);
|
|
50470
|
+
string right = SanitizeValue(value.str_value);
|
|
50466
50471
|
return left == right;
|
|
50467
50472
|
}
|
|
50468
50473
|
default:
|
|
@@ -51767,7 +51772,7 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
|
|
|
51767
51772
|
D_ASSERT(!vector->auxiliary);
|
|
51768
51773
|
}
|
|
51769
51774
|
if (type.id() == LogicalTypeId::VARCHAR || type.id() == LogicalTypeId::JSON) {
|
|
51770
|
-
// verify that
|
|
51775
|
+
// verify that the string is correct unicode
|
|
51771
51776
|
switch (vtype) {
|
|
51772
51777
|
case VectorType::FLAT_VECTOR: {
|
|
51773
51778
|
auto &validity = FlatVector::Validity(*vector);
|
|
@@ -51775,7 +51780,7 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
|
|
|
51775
51780
|
for (idx_t i = 0; i < count; i++) {
|
|
51776
51781
|
auto oidx = sel->get_index(i);
|
|
51777
51782
|
if (validity.RowIsValid(oidx)) {
|
|
51778
|
-
strings[oidx].
|
|
51783
|
+
strings[oidx].Verify();
|
|
51779
51784
|
}
|
|
51780
51785
|
}
|
|
51781
51786
|
break;
|
|
@@ -79839,7 +79844,7 @@ normal:
|
|
|
79839
79844
|
} while (ReadBuffer(start));
|
|
79840
79845
|
goto final_state;
|
|
79841
79846
|
add_value:
|
|
79842
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
79847
|
+
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
|
79843
79848
|
// increase position by 1 and move start to the new position
|
|
79844
79849
|
offset = 0;
|
|
79845
79850
|
has_quotes = false;
|
|
@@ -79852,7 +79857,7 @@ add_value:
|
|
|
79852
79857
|
add_row : {
|
|
79853
79858
|
// check type of newline (\r or \n)
|
|
79854
79859
|
bool carriage_return = buffer[position] == '\r';
|
|
79855
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
79860
|
+
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
|
79856
79861
|
finished_chunk = AddRow(insert_chunk, column);
|
|
79857
79862
|
// increase position by 1 and move start to the new position
|
|
79858
79863
|
offset = 0;
|
|
@@ -79990,7 +79995,7 @@ final_state:
|
|
|
79990
79995
|
}
|
|
79991
79996
|
if (column > 0 || position > start) {
|
|
79992
79997
|
// remaining values to be added to the chunk
|
|
79993
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
79998
|
+
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
|
79994
79999
|
finished_chunk = AddRow(insert_chunk, column);
|
|
79995
80000
|
}
|
|
79996
80001
|
// final stage, only reached after parsing the file is finished
|
|
@@ -80050,7 +80055,7 @@ normal:
|
|
|
80050
80055
|
// file ends during normal scan: go to end state
|
|
80051
80056
|
goto final_state;
|
|
80052
80057
|
add_value:
|
|
80053
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
80058
|
+
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
|
80054
80059
|
// increase position by 1 and move start to the new position
|
|
80055
80060
|
offset = 0;
|
|
80056
80061
|
has_quotes = false;
|
|
@@ -80063,7 +80068,7 @@ add_value:
|
|
|
80063
80068
|
add_row : {
|
|
80064
80069
|
// check type of newline (\r or \n)
|
|
80065
80070
|
bool carriage_return = buffer[position] == '\r';
|
|
80066
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
80071
|
+
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
|
80067
80072
|
finished_chunk = AddRow(insert_chunk, column);
|
|
80068
80073
|
// increase position by 1 and move start to the new position
|
|
80069
80074
|
offset = 0;
|
|
@@ -80174,7 +80179,7 @@ final_state:
|
|
|
80174
80179
|
|
|
80175
80180
|
if (column > 0 || position > start) {
|
|
80176
80181
|
// remaining values to be added to the chunk
|
|
80177
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
80182
|
+
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
|
80178
80183
|
finished_chunk = AddRow(insert_chunk, column);
|
|
80179
80184
|
}
|
|
80180
80185
|
|
|
@@ -80275,8 +80280,8 @@ bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_ch
|
|
|
80275
80280
|
}
|
|
80276
80281
|
}
|
|
80277
80282
|
|
|
80278
|
-
void BufferedCSVReader::AddValue(
|
|
80279
|
-
|
|
80283
|
+
void BufferedCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes) {
|
|
80284
|
+
auto length = str_val.GetSize();
|
|
80280
80285
|
if (length == 0 && column == 0) {
|
|
80281
80286
|
row_empty = true;
|
|
80282
80287
|
} else {
|
|
@@ -80305,18 +80310,16 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
|
|
|
80305
80310
|
// insert the line number into the chunk
|
|
80306
80311
|
idx_t row_entry = parse_chunk.size();
|
|
80307
80312
|
|
|
80308
|
-
str_val[length] = '\0';
|
|
80309
|
-
|
|
80310
80313
|
// test against null string, but only if the value was not quoted
|
|
80311
80314
|
if ((!has_quotes || sql_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
|
|
80312
|
-
|
|
80315
|
+
Equals::Operation(str_val, string_t(options.null_str))) {
|
|
80313
80316
|
FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
|
|
80314
80317
|
} else {
|
|
80315
80318
|
auto &v = parse_chunk.data[column];
|
|
80316
80319
|
auto parse_data = FlatVector::GetData<string_t>(v);
|
|
80317
80320
|
if (!escape_positions.empty()) {
|
|
80318
80321
|
// remove escape characters (if any)
|
|
80319
|
-
string old_val = str_val;
|
|
80322
|
+
string old_val = str_val.GetString();
|
|
80320
80323
|
string new_val = "";
|
|
80321
80324
|
idx_t prev_pos = 0;
|
|
80322
80325
|
for (idx_t i = 0; i < escape_positions.size(); i++) {
|
|
@@ -80333,7 +80336,7 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
|
|
|
80333
80336
|
escape_positions.clear();
|
|
80334
80337
|
parse_data[row_entry] = StringVector::AddStringOrBlob(v, string_t(new_val));
|
|
80335
80338
|
} else {
|
|
80336
|
-
parse_data[row_entry] =
|
|
80339
|
+
parse_data[row_entry] = str_val;
|
|
80337
80340
|
}
|
|
80338
80341
|
}
|
|
80339
80342
|
|
|
@@ -115875,20 +115878,44 @@ void UpperFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
115875
115878
|
namespace duckdb {
|
|
115876
115879
|
|
|
115877
115880
|
struct ChrOperator {
|
|
115878
|
-
|
|
115879
|
-
static inline TR Operation(const TA &input) {
|
|
115880
|
-
char c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
|
115881
|
-
int utf8_bytes = 4;
|
|
115881
|
+
static void GetCodepoint(int32_t input, char c[], int &utf8_bytes) {
|
|
115882
115882
|
if (input < 0 || !Utf8Proc::CodepointToUtf8(input, utf8_bytes, &c[0])) {
|
|
115883
115883
|
throw InvalidInputException("Invalid UTF8 Codepoint %d", input);
|
|
115884
115884
|
}
|
|
115885
|
-
|
|
115885
|
+
}
|
|
115886
|
+
|
|
115887
|
+
template <class TA, class TR>
|
|
115888
|
+
static inline TR Operation(const TA &input) {
|
|
115889
|
+
char c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
|
115890
|
+
int utf8_bytes;
|
|
115891
|
+
GetCodepoint(input, c, utf8_bytes);
|
|
115892
|
+
return string_t(&c[0], utf8_bytes);
|
|
115886
115893
|
}
|
|
115887
115894
|
};
|
|
115888
115895
|
|
|
115896
|
+
#ifdef DUCKDB_DEBUG_NO_INLINE
|
|
115897
|
+
// the chr function depends on the data always being inlined (which is always possible, since it outputs max 4 bytes)
|
|
115898
|
+
// to enable chr when string inlining is disabled we create a special function here
|
|
115899
|
+
static void ChrFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
115900
|
+
auto &code_vec = args.data[0];
|
|
115901
|
+
|
|
115902
|
+
char c[5] = {'\0', '\0', '\0', '\0', '\0'};
|
|
115903
|
+
int utf8_bytes;
|
|
115904
|
+
UnaryExecutor::Execute<int32_t, string_t>(code_vec, result, args.size(), [&](int32_t input) {
|
|
115905
|
+
ChrOperator::GetCodepoint(input, c, utf8_bytes);
|
|
115906
|
+
return StringVector::AddString(result, &c[0], utf8_bytes);
|
|
115907
|
+
});
|
|
115908
|
+
}
|
|
115909
|
+
#endif
|
|
115910
|
+
|
|
115889
115911
|
void CHR::RegisterFunction(BuiltinFunctions &set) {
|
|
115890
115912
|
ScalarFunction chr("chr", {LogicalType::INTEGER}, LogicalType::VARCHAR,
|
|
115891
|
-
|
|
115913
|
+
#ifdef DUCKDB_DEBUG_NO_INLINE
|
|
115914
|
+
ChrFunction
|
|
115915
|
+
#else
|
|
115916
|
+
ScalarFunction::UnaryFunction<int32_t, string_t, ChrOperator>
|
|
115917
|
+
#endif
|
|
115918
|
+
);
|
|
115892
115919
|
set.AddFunction(chr);
|
|
115893
115920
|
}
|
|
115894
115921
|
|
|
@@ -117985,14 +118012,14 @@ struct ASCIILCaseReader {
|
|
|
117985
118012
|
}
|
|
117986
118013
|
};
|
|
117987
118014
|
|
|
117988
|
-
template <char PERCENTAGE, char UNDERSCORE, class READER = StandardCharacterReader>
|
|
118015
|
+
template <char PERCENTAGE, char UNDERSCORE, bool HAS_ESCAPE, class READER = StandardCharacterReader>
|
|
117989
118016
|
bool TemplatedLikeOperator(const char *sdata, idx_t slen, const char *pdata, idx_t plen, char escape) {
|
|
117990
118017
|
idx_t pidx = 0;
|
|
117991
118018
|
idx_t sidx = 0;
|
|
117992
118019
|
for (; pidx < plen && sidx < slen; pidx++) {
|
|
117993
118020
|
char pchar = READER::Operation(pdata, pidx);
|
|
117994
118021
|
char schar = READER::Operation(sdata, sidx);
|
|
117995
|
-
if (pchar == escape) {
|
|
118022
|
+
if (HAS_ESCAPE && pchar == escape) {
|
|
117996
118023
|
pidx++;
|
|
117997
118024
|
if (pidx == plen) {
|
|
117998
118025
|
throw SyntaxException("Like pattern must not end with escape character!");
|
|
@@ -118012,8 +118039,8 @@ bool TemplatedLikeOperator(const char *sdata, idx_t slen, const char *pdata, idx
|
|
|
118012
118039
|
return true; /* tail is acceptable */
|
|
118013
118040
|
}
|
|
118014
118041
|
for (; sidx < slen; sidx++) {
|
|
118015
|
-
if (TemplatedLikeOperator<PERCENTAGE, UNDERSCORE, READER>(
|
|
118016
|
-
|
|
118042
|
+
if (TemplatedLikeOperator<PERCENTAGE, UNDERSCORE, HAS_ESCAPE, READER>(
|
|
118043
|
+
sdata + sidx, slen - sidx, pdata + pidx, plen - pidx, escape)) {
|
|
118017
118044
|
return true;
|
|
118018
118045
|
}
|
|
118019
118046
|
}
|
|
@@ -118170,10 +118197,18 @@ static unique_ptr<FunctionData> LikeBindFunction(ClientContext &context, ScalarF
|
|
|
118170
118197
|
}
|
|
118171
118198
|
|
|
118172
118199
|
bool LikeOperatorFunction(const char *s, idx_t slen, const char *pattern, idx_t plen, char escape) {
|
|
118173
|
-
return TemplatedLikeOperator<'%', '_'>(s, slen, pattern, plen, escape);
|
|
118200
|
+
return TemplatedLikeOperator<'%', '_', true>(s, slen, pattern, plen, escape);
|
|
118201
|
+
}
|
|
118202
|
+
|
|
118203
|
+
bool LikeOperatorFunction(const char *s, idx_t slen, const char *pattern, idx_t plen) {
|
|
118204
|
+
return TemplatedLikeOperator<'%', '_', false>(s, slen, pattern, plen, '\0');
|
|
118174
118205
|
}
|
|
118175
118206
|
|
|
118176
|
-
bool LikeOperatorFunction(string_t &s, string_t &pat
|
|
118207
|
+
bool LikeOperatorFunction(string_t &s, string_t &pat) {
|
|
118208
|
+
return LikeOperatorFunction(s.GetDataUnsafe(), s.GetSize(), pat.GetDataUnsafe(), pat.GetSize());
|
|
118209
|
+
}
|
|
118210
|
+
|
|
118211
|
+
bool LikeOperatorFunction(string_t &s, string_t &pat, char escape) {
|
|
118177
118212
|
return LikeOperatorFunction(s.GetDataUnsafe(), s.GetSize(), pat.GetDataUnsafe(), pat.GetSize(), escape);
|
|
118178
118213
|
}
|
|
118179
118214
|
|
|
@@ -118400,8 +118435,8 @@ struct NotILikeOperator {
|
|
|
118400
118435
|
struct ILikeOperatorASCII {
|
|
118401
118436
|
template <class TA, class TB, class TR>
|
|
118402
118437
|
static inline TR Operation(TA str, TB pattern) {
|
|
118403
|
-
return TemplatedLikeOperator<'%', '_', ASCIILCaseReader>(
|
|
118404
|
-
|
|
118438
|
+
return TemplatedLikeOperator<'%', '_', false, ASCIILCaseReader>(
|
|
118439
|
+
str.GetDataUnsafe(), str.GetSize(), pattern.GetDataUnsafe(), pattern.GetSize(), '\0');
|
|
118405
118440
|
}
|
|
118406
118441
|
};
|
|
118407
118442
|
|
|
@@ -128040,7 +128075,8 @@ vector<TestType> TestAllTypesFun::GetTestTypes() {
|
|
|
128040
128075
|
result.emplace_back(LogicalType::INTERVAL, "interval", Value::INTERVAL(min_interval),
|
|
128041
128076
|
Value::INTERVAL(max_interval));
|
|
128042
128077
|
// strings/blobs
|
|
128043
|
-
result.emplace_back(LogicalType::VARCHAR, "varchar", Value("🦆🦆🦆🦆🦆🦆"),
|
|
128078
|
+
result.emplace_back(LogicalType::VARCHAR, "varchar", Value("🦆🦆🦆🦆🦆🦆"),
|
|
128079
|
+
Value(string("goo\x00se", 6)));
|
|
128044
128080
|
result.emplace_back(LogicalType::JSON, "json", Value("🦆🦆🦆🦆🦆🦆"), Value("goose"));
|
|
128045
128081
|
result.emplace_back(LogicalType::BLOB, "blob", Value::BLOB("thisisalongblob\\x00withnullbytes"),
|
|
128046
128082
|
Value::BLOB("\\x00\\x00\\x00a"));
|
|
@@ -130142,6 +130178,8 @@ interval_t FetchDefaultValue::Operation();
|
|
|
130142
130178
|
template <>
|
|
130143
130179
|
char *FetchDefaultValue::Operation();
|
|
130144
130180
|
template <>
|
|
130181
|
+
duckdb_string FetchDefaultValue::Operation();
|
|
130182
|
+
template <>
|
|
130145
130183
|
duckdb_blob FetchDefaultValue::Operation();
|
|
130146
130184
|
|
|
130147
130185
|
//===--------------------------------------------------------------------===//
|
|
@@ -130165,9 +130203,11 @@ struct ToCStringCastWrapper {
|
|
|
130165
130203
|
auto result_size = result_string.GetSize();
|
|
130166
130204
|
auto result_data = result_string.GetDataUnsafe();
|
|
130167
130205
|
|
|
130168
|
-
|
|
130169
|
-
memcpy(
|
|
130170
|
-
|
|
130206
|
+
char *allocated_data = (char *)duckdb_malloc(result_size + 1);
|
|
130207
|
+
memcpy(allocated_data, result_data, result_size);
|
|
130208
|
+
allocated_data[result_size] = '\0';
|
|
130209
|
+
result.data = allocated_data;
|
|
130210
|
+
result.size = result_size;
|
|
130171
130211
|
return true;
|
|
130172
130212
|
}
|
|
130173
130213
|
};
|
|
@@ -130183,7 +130223,7 @@ struct FromCBlobCastWrapper {
|
|
|
130183
130223
|
};
|
|
130184
130224
|
|
|
130185
130225
|
template <>
|
|
130186
|
-
bool FromCBlobCastWrapper::Operation(duckdb_blob input,
|
|
130226
|
+
bool FromCBlobCastWrapper::Operation(duckdb_blob input, duckdb_string &result);
|
|
130187
130227
|
|
|
130188
130228
|
template <class SOURCE_TYPE, class RESULT_TYPE, class OP>
|
|
130189
130229
|
RESULT_TYPE TryCastCInternal(duckdb_result *result, idx_t col, idx_t row) {
|
|
@@ -130236,7 +130276,7 @@ bool CastDecimalCInternal(duckdb_result *source, RESULT_TYPE &result, idx_t col,
|
|
|
130236
130276
|
|
|
130237
130277
|
//! DECIMAL -> VARCHAR
|
|
130238
130278
|
template <>
|
|
130239
|
-
bool CastDecimalCInternal(duckdb_result *source,
|
|
130279
|
+
bool CastDecimalCInternal(duckdb_result *source, duckdb_string &result, idx_t col, idx_t row);
|
|
130240
130280
|
|
|
130241
130281
|
//! DECIMAL -> DECIMAL (internal fetch)
|
|
130242
130282
|
template <>
|
|
@@ -130264,7 +130304,7 @@ namespace duckdb {
|
|
|
130264
130304
|
|
|
130265
130305
|
//! DECIMAL -> VARCHAR
|
|
130266
130306
|
template <>
|
|
130267
|
-
bool CastDecimalCInternal(duckdb_result *source,
|
|
130307
|
+
bool CastDecimalCInternal(duckdb_result *source, duckdb_string &result, idx_t col, idx_t row) {
|
|
130268
130308
|
auto result_data = (duckdb::DuckDBResultData *)source->internal_data;
|
|
130269
130309
|
auto &query_result = result_data->result;
|
|
130270
130310
|
auto &source_type = query_result->types[col];
|
|
@@ -130293,9 +130333,10 @@ bool CastDecimalCInternal(duckdb_result *source, char *&result, idx_t col, idx_t
|
|
|
130293
130333
|
default:
|
|
130294
130334
|
throw duckdb::InternalException("Unimplemented internal type for decimal");
|
|
130295
130335
|
}
|
|
130296
|
-
result = (char *)duckdb_malloc(sizeof(char) * (result_string.GetSize() + 1));
|
|
130297
|
-
memcpy(result, result_string.GetDataUnsafe(), result_string.GetSize());
|
|
130298
|
-
result[result_string.GetSize()] = '\0';
|
|
130336
|
+
result.data = (char *)duckdb_malloc(sizeof(char) * (result_string.GetSize() + 1));
|
|
130337
|
+
memcpy(result.data, result_string.GetDataUnsafe(), result_string.GetSize());
|
|
130338
|
+
result.data[result_string.GetSize()] = '\0';
|
|
130339
|
+
result.size = result_string.GetSize();
|
|
130299
130340
|
return true;
|
|
130300
130341
|
}
|
|
130301
130342
|
|
|
@@ -130424,6 +130465,14 @@ char *FetchDefaultValue::Operation() {
|
|
|
130424
130465
|
return nullptr;
|
|
130425
130466
|
}
|
|
130426
130467
|
|
|
130468
|
+
template <>
|
|
130469
|
+
duckdb_string FetchDefaultValue::Operation() {
|
|
130470
|
+
duckdb_string result;
|
|
130471
|
+
result.data = nullptr;
|
|
130472
|
+
result.size = 0;
|
|
130473
|
+
return result;
|
|
130474
|
+
}
|
|
130475
|
+
|
|
130427
130476
|
template <>
|
|
130428
130477
|
duckdb_blob FetchDefaultValue::Operation() {
|
|
130429
130478
|
duckdb_blob result;
|
|
@@ -130437,9 +130486,9 @@ duckdb_blob FetchDefaultValue::Operation() {
|
|
|
130437
130486
|
//===--------------------------------------------------------------------===//
|
|
130438
130487
|
|
|
130439
130488
|
template <>
|
|
130440
|
-
bool FromCBlobCastWrapper::Operation(duckdb_blob input,
|
|
130489
|
+
bool FromCBlobCastWrapper::Operation(duckdb_blob input, duckdb_string &result) {
|
|
130441
130490
|
string_t input_str((const char *)input.data, input.size);
|
|
130442
|
-
return ToCStringCastWrapper<duckdb::CastFromBlob>::template Operation<string_t,
|
|
130491
|
+
return ToCStringCastWrapper<duckdb::CastFromBlob>::template Operation<string_t, duckdb_string>(input_str, result);
|
|
130443
130492
|
}
|
|
130444
130493
|
|
|
130445
130494
|
} // namespace duckdb
|
|
@@ -132976,6 +133025,8 @@ RESULT_TYPE GetInternalCValue(duckdb_result *result, idx_t col, idx_t row) {
|
|
|
132976
133025
|
} // namespace duckdb
|
|
132977
133026
|
|
|
132978
133027
|
|
|
133028
|
+
#include <cstring>
|
|
133029
|
+
|
|
132979
133030
|
using duckdb::date_t;
|
|
132980
133031
|
using duckdb::dtime_t;
|
|
132981
133032
|
using duckdb::FetchDefaultValue;
|
|
@@ -133088,17 +133139,31 @@ duckdb_interval duckdb_value_interval(duckdb_result *result, idx_t col, idx_t ro
|
|
|
133088
133139
|
}
|
|
133089
133140
|
|
|
133090
133141
|
char *duckdb_value_varchar(duckdb_result *result, idx_t col, idx_t row) {
|
|
133091
|
-
return
|
|
133142
|
+
return duckdb_value_string(result, col, row).data;
|
|
133143
|
+
}
|
|
133144
|
+
|
|
133145
|
+
duckdb_string duckdb_value_string(duckdb_result *result, idx_t col, idx_t row) {
|
|
133146
|
+
return GetInternalCValue<duckdb_string, ToCStringCastWrapper<StringCast>>(result, col, row);
|
|
133092
133147
|
}
|
|
133093
133148
|
|
|
133094
133149
|
char *duckdb_value_varchar_internal(duckdb_result *result, idx_t col, idx_t row) {
|
|
133150
|
+
return duckdb_value_string_internal(result, col, row).data;
|
|
133151
|
+
}
|
|
133152
|
+
|
|
133153
|
+
duckdb_string duckdb_value_string_internal(duckdb_result *result, idx_t col, idx_t row) {
|
|
133095
133154
|
if (!CanFetchValue(result, col, row)) {
|
|
133096
|
-
return
|
|
133155
|
+
return FetchDefaultValue::Operation<duckdb_string>();
|
|
133097
133156
|
}
|
|
133098
133157
|
if (duckdb_column_type(result, col) != DUCKDB_TYPE_VARCHAR) {
|
|
133099
|
-
return
|
|
133100
|
-
}
|
|
133101
|
-
|
|
133158
|
+
return FetchDefaultValue::Operation<duckdb_string>();
|
|
133159
|
+
}
|
|
133160
|
+
// FIXME: this obviously does not work when there are null bytes in the string
|
|
133161
|
+
// we need to remove the deprecated C result materialization to get that to work correctly
|
|
133162
|
+
// since the deprecated C result materialization stores strings as null-terminated
|
|
133163
|
+
duckdb_string res;
|
|
133164
|
+
res.data = UnsafeFetch<char *>(result, col, row);
|
|
133165
|
+
res.size = strlen(res.data);
|
|
133166
|
+
return res;
|
|
133102
133167
|
}
|
|
133103
133168
|
|
|
133104
133169
|
duckdb_blob duckdb_value_blob(duckdb_result *result, idx_t col, idx_t row) {
|
|
@@ -136411,9 +136476,6 @@ string ErrorManager::InvalidUnicodeError(const string &input, const string &cont
|
|
|
136411
136476
|
}
|
|
136412
136477
|
string base_message;
|
|
136413
136478
|
switch (reason) {
|
|
136414
|
-
case UnicodeInvalidReason::NULL_BYTE:
|
|
136415
|
-
base_message = "Null-byte (\\0)";
|
|
136416
|
-
break;
|
|
136417
136479
|
case UnicodeInvalidReason::BYTE_MISMATCH:
|
|
136418
136480
|
base_message = "Invalid unicode (byte sequence mismatch)";
|
|
136419
136481
|
break;
|
|
@@ -145605,7 +145667,7 @@ string MaterializedQueryResult::ToString() {
|
|
|
145605
145667
|
result += "\t";
|
|
145606
145668
|
}
|
|
145607
145669
|
auto val = row.GetValue(col_idx);
|
|
145608
|
-
result += val.IsNull() ? "NULL" : val.ToString();
|
|
145670
|
+
result += val.IsNull() ? "NULL" : StringUtil::Replace(val.ToString(), string("\0", 1), "\\0");
|
|
145609
145671
|
}
|
|
145610
145672
|
result += "\n";
|
|
145611
145673
|
}
|
|
@@ -276403,35 +276465,29 @@ UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *i
|
|
|
276403
276465
|
int c = (int) s[i];
|
|
276404
276466
|
|
|
276405
276467
|
if ((c & 0x80) == 0) {
|
|
276406
|
-
|
|
276407
|
-
|
|
276408
|
-
|
|
276409
|
-
|
|
276410
|
-
|
|
276411
|
-
|
|
276468
|
+
continue;
|
|
276469
|
+
}
|
|
276470
|
+
int first_pos_seq = i;
|
|
276471
|
+
|
|
276472
|
+
if ((c & 0xE0) == 0xC0) {
|
|
276473
|
+
/* 2 byte sequence */
|
|
276474
|
+
int utf8char = c & 0x1F;
|
|
276475
|
+
type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
276476
|
+
} else if ((c & 0xF0) == 0xE0) {
|
|
276477
|
+
/* 3 byte sequence */
|
|
276478
|
+
int utf8char = c & 0x0F;
|
|
276479
|
+
type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
276480
|
+
} else if ((c & 0xF8) == 0xF0) {
|
|
276481
|
+
/* 4 byte sequence */
|
|
276482
|
+
int utf8char = c & 0x07;
|
|
276483
|
+
type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
276412
276484
|
} else {
|
|
276413
|
-
|
|
276414
|
-
|
|
276415
|
-
|
|
276416
|
-
|
|
276417
|
-
|
|
276418
|
-
|
|
276419
|
-
} else if ((c & 0xF0) == 0xE0) {
|
|
276420
|
-
/* 3 byte sequence */
|
|
276421
|
-
int utf8char = c & 0x0F;
|
|
276422
|
-
type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
276423
|
-
} else if ((c & 0xF8) == 0xF0) {
|
|
276424
|
-
/* 4 byte sequence */
|
|
276425
|
-
int utf8char = c & 0x07;
|
|
276426
|
-
type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
276427
|
-
} else {
|
|
276428
|
-
/* invalid UTF-8 start byte */
|
|
276429
|
-
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
276430
|
-
return UnicodeType::INVALID;
|
|
276431
|
-
}
|
|
276432
|
-
if (type == UnicodeType::INVALID) {
|
|
276433
|
-
return type;
|
|
276434
|
-
}
|
|
276485
|
+
/* invalid UTF-8 start byte */
|
|
276486
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
276487
|
+
return UnicodeType::INVALID;
|
|
276488
|
+
}
|
|
276489
|
+
if (type == UnicodeType::INVALID) {
|
|
276490
|
+
return type;
|
|
276435
276491
|
}
|
|
276436
276492
|
}
|
|
276437
276493
|
return type;
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "a24535880b"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev1840"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -4807,6 +4807,9 @@ private:
|
|
|
4807
4807
|
string raw_message;
|
|
4808
4808
|
//! The final message (stored in the preserved error for compatibility reasons with C-API)
|
|
4809
4809
|
string final_message;
|
|
4810
|
+
|
|
4811
|
+
private:
|
|
4812
|
+
static string SanitizeErrorMessage(string error);
|
|
4810
4813
|
};
|
|
4811
4814
|
|
|
4812
4815
|
} // namespace duckdb
|
|
@@ -17041,6 +17044,11 @@ typedef struct {
|
|
|
17041
17044
|
duckdb_hugeint value;
|
|
17042
17045
|
} duckdb_decimal;
|
|
17043
17046
|
|
|
17047
|
+
typedef struct {
|
|
17048
|
+
char *data;
|
|
17049
|
+
idx_t size;
|
|
17050
|
+
} duckdb_string;
|
|
17051
|
+
|
|
17044
17052
|
typedef struct {
|
|
17045
17053
|
void *data;
|
|
17046
17054
|
idx_t size;
|
|
@@ -17488,12 +17496,21 @@ DUCKDB_API duckdb_timestamp duckdb_value_timestamp(duckdb_result *result, idx_t
|
|
|
17488
17496
|
DUCKDB_API duckdb_interval duckdb_value_interval(duckdb_result *result, idx_t col, idx_t row);
|
|
17489
17497
|
|
|
17490
17498
|
/*!
|
|
17491
|
-
*
|
|
17492
|
-
The
|
|
17499
|
+
* DEPRECATED: use duckdb_value_string instead. This function does not work correctly if the string contains null bytes.
|
|
17500
|
+
* returns: The text value at the specified location as a null-terminated string, or nullptr if the value cannot be
|
|
17501
|
+
converted. The result must be freed with `duckdb_free`.
|
|
17493
17502
|
*/
|
|
17494
17503
|
DUCKDB_API char *duckdb_value_varchar(duckdb_result *result, idx_t col, idx_t row);
|
|
17495
17504
|
|
|
17505
|
+
/*!s
|
|
17506
|
+
* returns: The string value at the specified location.
|
|
17507
|
+
The result must be freed with `duckdb_free`.
|
|
17508
|
+
*/
|
|
17509
|
+
DUCKDB_API duckdb_string duckdb_value_string(duckdb_result *result, idx_t col, idx_t row);
|
|
17510
|
+
|
|
17496
17511
|
/*!
|
|
17512
|
+
* DEPRECATED: use duckdb_value_string_internal instead. This function does not work correctly if the string contains
|
|
17513
|
+
null bytes.
|
|
17497
17514
|
* returns: The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast.
|
|
17498
17515
|
If the column is NOT a VARCHAR column this function will return NULL.
|
|
17499
17516
|
|
|
@@ -17501,6 +17518,16 @@ The result must NOT be freed.
|
|
|
17501
17518
|
*/
|
|
17502
17519
|
DUCKDB_API char *duckdb_value_varchar_internal(duckdb_result *result, idx_t col, idx_t row);
|
|
17503
17520
|
|
|
17521
|
+
/*!
|
|
17522
|
+
* DEPRECATED: use duckdb_value_string_internal instead. This function does not work correctly if the string contains
|
|
17523
|
+
null bytes.
|
|
17524
|
+
* returns: The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast.
|
|
17525
|
+
If the column is NOT a VARCHAR column this function will return NULL.
|
|
17526
|
+
|
|
17527
|
+
The result must NOT be freed.
|
|
17528
|
+
*/
|
|
17529
|
+
DUCKDB_API duckdb_string duckdb_value_string_internal(duckdb_result *result, idx_t col, idx_t row);
|
|
17530
|
+
|
|
17504
17531
|
/*!
|
|
17505
17532
|
* returns: The duckdb_blob value at the specified location. Returns a blob with blob.data set to nullptr if the
|
|
17506
17533
|
value cannot be converted. The resulting "blob.data" must be freed with `duckdb_free.`
|
|
@@ -27933,7 +27960,7 @@ private:
|
|
|
27933
27960
|
bool TryParseComplexCSV(DataChunk &insert_chunk, string &error_message);
|
|
27934
27961
|
|
|
27935
27962
|
//! Adds a value to the current row
|
|
27936
|
-
void AddValue(
|
|
27963
|
+
void AddValue(string_t str_val, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes);
|
|
27937
27964
|
//! Adds a row to the insert_chunk, returns true if the chunk is filled as a result of this row being added
|
|
27938
27965
|
bool AddRow(DataChunk &insert_chunk, idx_t &column);
|
|
27939
27966
|
//! Finalizes a chunk, parsing all values that have been added so far and adding them to the insert_chunk
|