duckdb 0.5.2-dev1819.0 → 0.5.2-dev1840.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.5.2-dev1819.0",
5
+ "version": "0.5.2-dev1840.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -8617,7 +8617,7 @@ private:
8617
8617
  namespace duckdb {
8618
8618
 
8619
8619
  enum class UnicodeType { INVALID, ASCII, UNICODE };
8620
- enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE, INVALID_UNICODE };
8620
+ enum class UnicodeInvalidReason { BYTE_MISMATCH, INVALID_UNICODE };
8621
8621
 
8622
8622
  class Utf8Proc {
8623
8623
  public:
@@ -8845,7 +8845,7 @@ list<ColumnDataCollection> BoxRenderer::FetchRenderCollections(ClientContext &co
8845
8845
  }
8846
8846
 
8847
8847
  string ConvertRenderValue(const string &input) {
8848
- return StringUtil::Replace(input, "\n", "\\n");
8848
+ return StringUtil::Replace(StringUtil::Replace(input, "\n", "\\n"), string("\0", 1), "\\0");
8849
8849
  }
8850
8850
 
8851
8851
  string BoxRenderer::GetRenderValue(ColumnDataRowCollection &rows, idx_t c, idx_t r) {
@@ -30071,15 +30071,15 @@ PreservedError::PreservedError() : initialized(false) {
30071
30071
  }
30072
30072
 
30073
30073
  PreservedError::PreservedError(const Exception &exception)
30074
- : initialized(true), type(exception.type), raw_message(exception.RawMessage()) {
30074
+ : initialized(true), type(exception.type), raw_message(SanitizeErrorMessage(exception.RawMessage())) {
30075
30075
  }
30076
30076
 
30077
30077
  PreservedError::PreservedError(const std::exception &exception)
30078
- : initialized(true), type(ExceptionType::INVALID), raw_message(exception.what()) {
30078
+ : initialized(true), type(ExceptionType::INVALID), raw_message(SanitizeErrorMessage(exception.what())) {
30079
30079
  }
30080
30080
 
30081
30081
  PreservedError::PreservedError(const string &message)
30082
- : initialized(true), type(ExceptionType::INVALID), raw_message(message) {
30082
+ : initialized(true), type(ExceptionType::INVALID), raw_message(SanitizeErrorMessage(message)) {
30083
30083
  }
30084
30084
 
30085
30085
  const string &PreservedError::Message() {
@@ -30089,6 +30089,10 @@ const string &PreservedError::Message() {
30089
30089
  return final_message;
30090
30090
  }
30091
30091
 
30092
+ string PreservedError::SanitizeErrorMessage(string error) {
30093
+ return StringUtil::Replace(move(error), string("\0", 1), "\\0");
30094
+ }
30095
+
30092
30096
  void PreservedError::Throw(const string &prepended_message) const {
30093
30097
  D_ASSERT(initialized);
30094
30098
  if (!prepended_message.empty()) {
@@ -40041,6 +40045,9 @@ vector<string> StringUtil::Split(const string &input, const string &split) {
40041
40045
  }
40042
40046
 
40043
40047
  string StringUtil::Replace(string source, const string &from, const string &to) {
40048
+ if (from.empty()) {
40049
+ throw InternalException("Invalid argument to StringUtil::Replace - empty FROM");
40050
+ }
40044
40051
  idx_t start_pos = 0;
40045
40052
  while ((start_pos = source.find(from, start_pos)) != string::npos) {
40046
40053
  source.replace(start_pos, from.length(), to);
@@ -41812,7 +41819,7 @@ bool Blob::TryGetBlobSize(string_t str, idx_t &str_len, string *error_message) {
41812
41819
  }
41813
41820
  str_len++;
41814
41821
  i += 3;
41815
- } else if (data[i] >= 32 && data[i] <= 127) {
41822
+ } else if (data[i] <= 127) {
41816
41823
  str_len++;
41817
41824
  } else {
41818
41825
  string error = "Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters "
@@ -41846,7 +41853,7 @@ void Blob::ToBlob(string_t str, data_ptr_t output) {
41846
41853
  D_ASSERT(data[i + 1] == 'x');
41847
41854
  output[blob_idx++] = (byte_a << 4) + byte_b;
41848
41855
  i += 3;
41849
- } else if (data[i] >= 32 && data[i] <= 127) {
41856
+ } else if (data[i] <= 127) {
41850
41857
  output[blob_idx++] = data_t(data[i]);
41851
41858
  } else {
41852
41859
  throw ConversionException("Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters "
@@ -47731,12 +47738,6 @@ void string_t::Verify() const {
47731
47738
  }
47732
47739
  }
47733
47740
 
47734
- void string_t::VerifyNull() const {
47735
- for (idx_t i = 0; i < GetSize(); i++) {
47736
- D_ASSERT(GetDataUnsafe()[i] != '\0');
47737
- }
47738
- }
47739
-
47740
47741
  } // namespace duckdb
47741
47742
 
47742
47743
 
@@ -50430,6 +50431,17 @@ bool Value::NotDistinctFrom(const Value &lvalue, const Value &rvalue) {
50430
50431
  return ValueOperations::NotDistinctFrom(lvalue, rvalue);
50431
50432
  }
50432
50433
 
50434
+ static string SanitizeValue(string input) {
50435
+ // some results might contain padding spaces, e.g. when rendering
50436
+ // VARCHAR(10) and the string only has 6 characters, they will be padded
50437
+ // with spaces to 10 in the rendering. We don't do that here yet as we
50438
+ // are looking at internal structures. So just ignore any extra spaces
50439
+ // on the right
50440
+ StringUtil::RTrim(input);
50441
+ // for result checking code, replace null bytes with their escaped value (\0)
50442
+ return StringUtil::Replace(input, string("\0", 1), "\\0");
50443
+ }
50444
+
50433
50445
  bool Value::ValuesAreEqual(CastFunctionSet &set, GetCastFunctionInput &get_input, const Value &result_value,
50434
50446
  const Value &value) {
50435
50447
  if (result_value.IsNull() != value.IsNull()) {
@@ -50454,15 +50466,8 @@ bool Value::ValuesAreEqual(CastFunctionSet &set, GetCastFunctionInput &get_input
50454
50466
  }
50455
50467
  case LogicalTypeId::VARCHAR: {
50456
50468
  auto other = result_value.CastAs(set, get_input, LogicalType::VARCHAR);
50457
- // some results might contain padding spaces, e.g. when rendering
50458
- // VARCHAR(10) and the string only has 6 characters, they will be padded
50459
- // with spaces to 10 in the rendering. We don't do that here yet as we
50460
- // are looking at internal structures. So just ignore any extra spaces
50461
- // on the right
50462
- string left = other.str_value;
50463
- string right = value.str_value;
50464
- StringUtil::RTrim(left);
50465
- StringUtil::RTrim(right);
50469
+ string left = SanitizeValue(other.str_value);
50470
+ string right = SanitizeValue(value.str_value);
50466
50471
  return left == right;
50467
50472
  }
50468
50473
  default:
@@ -51767,7 +51772,7 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
51767
51772
  D_ASSERT(!vector->auxiliary);
51768
51773
  }
51769
51774
  if (type.id() == LogicalTypeId::VARCHAR || type.id() == LogicalTypeId::JSON) {
51770
- // verify that there are no '\0' bytes in string values
51775
+ // verify that the string is correct unicode
51771
51776
  switch (vtype) {
51772
51777
  case VectorType::FLAT_VECTOR: {
51773
51778
  auto &validity = FlatVector::Validity(*vector);
@@ -51775,7 +51780,7 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
51775
51780
  for (idx_t i = 0; i < count; i++) {
51776
51781
  auto oidx = sel->get_index(i);
51777
51782
  if (validity.RowIsValid(oidx)) {
51778
- strings[oidx].VerifyNull();
51783
+ strings[oidx].Verify();
51779
51784
  }
51780
51785
  }
51781
51786
  break;
@@ -79839,7 +79844,7 @@ normal:
79839
79844
  } while (ReadBuffer(start));
79840
79845
  goto final_state;
79841
79846
  add_value:
79842
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
79847
+ AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
79843
79848
  // increase position by 1 and move start to the new position
79844
79849
  offset = 0;
79845
79850
  has_quotes = false;
@@ -79852,7 +79857,7 @@ add_value:
79852
79857
  add_row : {
79853
79858
  // check type of newline (\r or \n)
79854
79859
  bool carriage_return = buffer[position] == '\r';
79855
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
79860
+ AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
79856
79861
  finished_chunk = AddRow(insert_chunk, column);
79857
79862
  // increase position by 1 and move start to the new position
79858
79863
  offset = 0;
@@ -79990,7 +79995,7 @@ final_state:
79990
79995
  }
79991
79996
  if (column > 0 || position > start) {
79992
79997
  // remaining values to be added to the chunk
79993
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
79998
+ AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
79994
79999
  finished_chunk = AddRow(insert_chunk, column);
79995
80000
  }
79996
80001
  // final stage, only reached after parsing the file is finished
@@ -80050,7 +80055,7 @@ normal:
80050
80055
  // file ends during normal scan: go to end state
80051
80056
  goto final_state;
80052
80057
  add_value:
80053
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
80058
+ AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
80054
80059
  // increase position by 1 and move start to the new position
80055
80060
  offset = 0;
80056
80061
  has_quotes = false;
@@ -80063,7 +80068,7 @@ add_value:
80063
80068
  add_row : {
80064
80069
  // check type of newline (\r or \n)
80065
80070
  bool carriage_return = buffer[position] == '\r';
80066
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
80071
+ AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
80067
80072
  finished_chunk = AddRow(insert_chunk, column);
80068
80073
  // increase position by 1 and move start to the new position
80069
80074
  offset = 0;
@@ -80174,7 +80179,7 @@ final_state:
80174
80179
 
80175
80180
  if (column > 0 || position > start) {
80176
80181
  // remaining values to be added to the chunk
80177
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
80182
+ AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
80178
80183
  finished_chunk = AddRow(insert_chunk, column);
80179
80184
  }
80180
80185
 
@@ -80275,8 +80280,8 @@ bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_ch
80275
80280
  }
80276
80281
  }
80277
80282
 
80278
- void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions,
80279
- bool has_quotes) {
80283
+ void BufferedCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes) {
80284
+ auto length = str_val.GetSize();
80280
80285
  if (length == 0 && column == 0) {
80281
80286
  row_empty = true;
80282
80287
  } else {
@@ -80305,18 +80310,16 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
80305
80310
  // insert the line number into the chunk
80306
80311
  idx_t row_entry = parse_chunk.size();
80307
80312
 
80308
- str_val[length] = '\0';
80309
-
80310
80313
  // test against null string, but only if the value was not quoted
80311
80314
  if ((!has_quotes || sql_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
80312
- strcmp(options.null_str.c_str(), str_val) == 0) {
80315
+ Equals::Operation(str_val, string_t(options.null_str))) {
80313
80316
  FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
80314
80317
  } else {
80315
80318
  auto &v = parse_chunk.data[column];
80316
80319
  auto parse_data = FlatVector::GetData<string_t>(v);
80317
80320
  if (!escape_positions.empty()) {
80318
80321
  // remove escape characters (if any)
80319
- string old_val = str_val;
80322
+ string old_val = str_val.GetString();
80320
80323
  string new_val = "";
80321
80324
  idx_t prev_pos = 0;
80322
80325
  for (idx_t i = 0; i < escape_positions.size(); i++) {
@@ -80333,7 +80336,7 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
80333
80336
  escape_positions.clear();
80334
80337
  parse_data[row_entry] = StringVector::AddStringOrBlob(v, string_t(new_val));
80335
80338
  } else {
80336
- parse_data[row_entry] = string_t(str_val, length);
80339
+ parse_data[row_entry] = str_val;
80337
80340
  }
80338
80341
  }
80339
80342
 
@@ -115875,20 +115878,44 @@ void UpperFun::RegisterFunction(BuiltinFunctions &set) {
115875
115878
  namespace duckdb {
115876
115879
 
115877
115880
  struct ChrOperator {
115878
- template <class TA, class TR>
115879
- static inline TR Operation(const TA &input) {
115880
- char c[5] = {'\0', '\0', '\0', '\0', '\0'};
115881
- int utf8_bytes = 4;
115881
+ static void GetCodepoint(int32_t input, char c[], int &utf8_bytes) {
115882
115882
  if (input < 0 || !Utf8Proc::CodepointToUtf8(input, utf8_bytes, &c[0])) {
115883
115883
  throw InvalidInputException("Invalid UTF8 Codepoint %d", input);
115884
115884
  }
115885
- return string_t(&c[0]);
115885
+ }
115886
+
115887
+ template <class TA, class TR>
115888
+ static inline TR Operation(const TA &input) {
115889
+ char c[5] = {'\0', '\0', '\0', '\0', '\0'};
115890
+ int utf8_bytes;
115891
+ GetCodepoint(input, c, utf8_bytes);
115892
+ return string_t(&c[0], utf8_bytes);
115886
115893
  }
115887
115894
  };
115888
115895
 
115896
+ #ifdef DUCKDB_DEBUG_NO_INLINE
115897
+ // the chr function depends on the data always being inlined (which is always possible, since it outputs max 4 bytes)
115898
+ // to enable chr when string inlining is disabled we create a special function here
115899
+ static void ChrFunction(DataChunk &args, ExpressionState &state, Vector &result) {
115900
+ auto &code_vec = args.data[0];
115901
+
115902
+ char c[5] = {'\0', '\0', '\0', '\0', '\0'};
115903
+ int utf8_bytes;
115904
+ UnaryExecutor::Execute<int32_t, string_t>(code_vec, result, args.size(), [&](int32_t input) {
115905
+ ChrOperator::GetCodepoint(input, c, utf8_bytes);
115906
+ return StringVector::AddString(result, &c[0], utf8_bytes);
115907
+ });
115908
+ }
115909
+ #endif
115910
+
115889
115911
  void CHR::RegisterFunction(BuiltinFunctions &set) {
115890
115912
  ScalarFunction chr("chr", {LogicalType::INTEGER}, LogicalType::VARCHAR,
115891
- ScalarFunction::UnaryFunction<int32_t, string_t, ChrOperator>);
115913
+ #ifdef DUCKDB_DEBUG_NO_INLINE
115914
+ ChrFunction
115915
+ #else
115916
+ ScalarFunction::UnaryFunction<int32_t, string_t, ChrOperator>
115917
+ #endif
115918
+ );
115892
115919
  set.AddFunction(chr);
115893
115920
  }
115894
115921
 
@@ -117985,14 +118012,14 @@ struct ASCIILCaseReader {
117985
118012
  }
117986
118013
  };
117987
118014
 
117988
- template <char PERCENTAGE, char UNDERSCORE, class READER = StandardCharacterReader>
118015
+ template <char PERCENTAGE, char UNDERSCORE, bool HAS_ESCAPE, class READER = StandardCharacterReader>
117989
118016
  bool TemplatedLikeOperator(const char *sdata, idx_t slen, const char *pdata, idx_t plen, char escape) {
117990
118017
  idx_t pidx = 0;
117991
118018
  idx_t sidx = 0;
117992
118019
  for (; pidx < plen && sidx < slen; pidx++) {
117993
118020
  char pchar = READER::Operation(pdata, pidx);
117994
118021
  char schar = READER::Operation(sdata, sidx);
117995
- if (pchar == escape) {
118022
+ if (HAS_ESCAPE && pchar == escape) {
117996
118023
  pidx++;
117997
118024
  if (pidx == plen) {
117998
118025
  throw SyntaxException("Like pattern must not end with escape character!");
@@ -118012,8 +118039,8 @@ bool TemplatedLikeOperator(const char *sdata, idx_t slen, const char *pdata, idx
118012
118039
  return true; /* tail is acceptable */
118013
118040
  }
118014
118041
  for (; sidx < slen; sidx++) {
118015
- if (TemplatedLikeOperator<PERCENTAGE, UNDERSCORE, READER>(sdata + sidx, slen - sidx, pdata + pidx,
118016
- plen - pidx, escape)) {
118042
+ if (TemplatedLikeOperator<PERCENTAGE, UNDERSCORE, HAS_ESCAPE, READER>(
118043
+ sdata + sidx, slen - sidx, pdata + pidx, plen - pidx, escape)) {
118017
118044
  return true;
118018
118045
  }
118019
118046
  }
@@ -118170,10 +118197,18 @@ static unique_ptr<FunctionData> LikeBindFunction(ClientContext &context, ScalarF
118170
118197
  }
118171
118198
 
118172
118199
  bool LikeOperatorFunction(const char *s, idx_t slen, const char *pattern, idx_t plen, char escape) {
118173
- return TemplatedLikeOperator<'%', '_'>(s, slen, pattern, plen, escape);
118200
+ return TemplatedLikeOperator<'%', '_', true>(s, slen, pattern, plen, escape);
118201
+ }
118202
+
118203
+ bool LikeOperatorFunction(const char *s, idx_t slen, const char *pattern, idx_t plen) {
118204
+ return TemplatedLikeOperator<'%', '_', false>(s, slen, pattern, plen, '\0');
118174
118205
  }
118175
118206
 
118176
- bool LikeOperatorFunction(string_t &s, string_t &pat, char escape = '\0') {
118207
+ bool LikeOperatorFunction(string_t &s, string_t &pat) {
118208
+ return LikeOperatorFunction(s.GetDataUnsafe(), s.GetSize(), pat.GetDataUnsafe(), pat.GetSize());
118209
+ }
118210
+
118211
+ bool LikeOperatorFunction(string_t &s, string_t &pat, char escape) {
118177
118212
  return LikeOperatorFunction(s.GetDataUnsafe(), s.GetSize(), pat.GetDataUnsafe(), pat.GetSize(), escape);
118178
118213
  }
118179
118214
 
@@ -118400,8 +118435,8 @@ struct NotILikeOperator {
118400
118435
  struct ILikeOperatorASCII {
118401
118436
  template <class TA, class TB, class TR>
118402
118437
  static inline TR Operation(TA str, TB pattern) {
118403
- return TemplatedLikeOperator<'%', '_', ASCIILCaseReader>(str.GetDataUnsafe(), str.GetSize(),
118404
- pattern.GetDataUnsafe(), pattern.GetSize(), '\0');
118438
+ return TemplatedLikeOperator<'%', '_', false, ASCIILCaseReader>(
118439
+ str.GetDataUnsafe(), str.GetSize(), pattern.GetDataUnsafe(), pattern.GetSize(), '\0');
118405
118440
  }
118406
118441
  };
118407
118442
 
@@ -128040,7 +128075,8 @@ vector<TestType> TestAllTypesFun::GetTestTypes() {
128040
128075
  result.emplace_back(LogicalType::INTERVAL, "interval", Value::INTERVAL(min_interval),
128041
128076
  Value::INTERVAL(max_interval));
128042
128077
  // strings/blobs
128043
- result.emplace_back(LogicalType::VARCHAR, "varchar", Value("🦆🦆🦆🦆🦆🦆"), Value("goose"));
128078
+ result.emplace_back(LogicalType::VARCHAR, "varchar", Value("🦆🦆🦆🦆🦆🦆"),
128079
+ Value(string("goo\x00se", 6)));
128044
128080
  result.emplace_back(LogicalType::JSON, "json", Value("🦆🦆🦆🦆🦆🦆"), Value("goose"));
128045
128081
  result.emplace_back(LogicalType::BLOB, "blob", Value::BLOB("thisisalongblob\\x00withnullbytes"),
128046
128082
  Value::BLOB("\\x00\\x00\\x00a"));
@@ -130142,6 +130178,8 @@ interval_t FetchDefaultValue::Operation();
130142
130178
  template <>
130143
130179
  char *FetchDefaultValue::Operation();
130144
130180
  template <>
130181
+ duckdb_string FetchDefaultValue::Operation();
130182
+ template <>
130145
130183
  duckdb_blob FetchDefaultValue::Operation();
130146
130184
 
130147
130185
  //===--------------------------------------------------------------------===//
@@ -130165,9 +130203,11 @@ struct ToCStringCastWrapper {
130165
130203
  auto result_size = result_string.GetSize();
130166
130204
  auto result_data = result_string.GetDataUnsafe();
130167
130205
 
130168
- result = (char *)duckdb_malloc(result_size + 1);
130169
- memcpy(result, result_data, result_size);
130170
- result[result_size] = '\0';
130206
+ char *allocated_data = (char *)duckdb_malloc(result_size + 1);
130207
+ memcpy(allocated_data, result_data, result_size);
130208
+ allocated_data[result_size] = '\0';
130209
+ result.data = allocated_data;
130210
+ result.size = result_size;
130171
130211
  return true;
130172
130212
  }
130173
130213
  };
@@ -130183,7 +130223,7 @@ struct FromCBlobCastWrapper {
130183
130223
  };
130184
130224
 
130185
130225
  template <>
130186
- bool FromCBlobCastWrapper::Operation(duckdb_blob input, char *&result);
130226
+ bool FromCBlobCastWrapper::Operation(duckdb_blob input, duckdb_string &result);
130187
130227
 
130188
130228
  template <class SOURCE_TYPE, class RESULT_TYPE, class OP>
130189
130229
  RESULT_TYPE TryCastCInternal(duckdb_result *result, idx_t col, idx_t row) {
@@ -130236,7 +130276,7 @@ bool CastDecimalCInternal(duckdb_result *source, RESULT_TYPE &result, idx_t col,
130236
130276
 
130237
130277
  //! DECIMAL -> VARCHAR
130238
130278
  template <>
130239
- bool CastDecimalCInternal(duckdb_result *source, char *&result, idx_t col, idx_t row);
130279
+ bool CastDecimalCInternal(duckdb_result *source, duckdb_string &result, idx_t col, idx_t row);
130240
130280
 
130241
130281
  //! DECIMAL -> DECIMAL (internal fetch)
130242
130282
  template <>
@@ -130264,7 +130304,7 @@ namespace duckdb {
130264
130304
 
130265
130305
  //! DECIMAL -> VARCHAR
130266
130306
  template <>
130267
- bool CastDecimalCInternal(duckdb_result *source, char *&result, idx_t col, idx_t row) {
130307
+ bool CastDecimalCInternal(duckdb_result *source, duckdb_string &result, idx_t col, idx_t row) {
130268
130308
  auto result_data = (duckdb::DuckDBResultData *)source->internal_data;
130269
130309
  auto &query_result = result_data->result;
130270
130310
  auto &source_type = query_result->types[col];
@@ -130293,9 +130333,10 @@ bool CastDecimalCInternal(duckdb_result *source, char *&result, idx_t col, idx_t
130293
130333
  default:
130294
130334
  throw duckdb::InternalException("Unimplemented internal type for decimal");
130295
130335
  }
130296
- result = (char *)duckdb_malloc(sizeof(char) * (result_string.GetSize() + 1));
130297
- memcpy(result, result_string.GetDataUnsafe(), result_string.GetSize());
130298
- result[result_string.GetSize()] = '\0';
130336
+ result.data = (char *)duckdb_malloc(sizeof(char) * (result_string.GetSize() + 1));
130337
+ memcpy(result.data, result_string.GetDataUnsafe(), result_string.GetSize());
130338
+ result.data[result_string.GetSize()] = '\0';
130339
+ result.size = result_string.GetSize();
130299
130340
  return true;
130300
130341
  }
130301
130342
 
@@ -130424,6 +130465,14 @@ char *FetchDefaultValue::Operation() {
130424
130465
  return nullptr;
130425
130466
  }
130426
130467
 
130468
+ template <>
130469
+ duckdb_string FetchDefaultValue::Operation() {
130470
+ duckdb_string result;
130471
+ result.data = nullptr;
130472
+ result.size = 0;
130473
+ return result;
130474
+ }
130475
+
130427
130476
  template <>
130428
130477
  duckdb_blob FetchDefaultValue::Operation() {
130429
130478
  duckdb_blob result;
@@ -130437,9 +130486,9 @@ duckdb_blob FetchDefaultValue::Operation() {
130437
130486
  //===--------------------------------------------------------------------===//
130438
130487
 
130439
130488
  template <>
130440
- bool FromCBlobCastWrapper::Operation(duckdb_blob input, char *&result) {
130489
+ bool FromCBlobCastWrapper::Operation(duckdb_blob input, duckdb_string &result) {
130441
130490
  string_t input_str((const char *)input.data, input.size);
130442
- return ToCStringCastWrapper<duckdb::CastFromBlob>::template Operation<string_t, char *>(input_str, result);
130491
+ return ToCStringCastWrapper<duckdb::CastFromBlob>::template Operation<string_t, duckdb_string>(input_str, result);
130443
130492
  }
130444
130493
 
130445
130494
  } // namespace duckdb
@@ -132976,6 +133025,8 @@ RESULT_TYPE GetInternalCValue(duckdb_result *result, idx_t col, idx_t row) {
132976
133025
  } // namespace duckdb
132977
133026
 
132978
133027
 
133028
+ #include <cstring>
133029
+
132979
133030
  using duckdb::date_t;
132980
133031
  using duckdb::dtime_t;
132981
133032
  using duckdb::FetchDefaultValue;
@@ -133088,17 +133139,31 @@ duckdb_interval duckdb_value_interval(duckdb_result *result, idx_t col, idx_t ro
133088
133139
  }
133089
133140
 
133090
133141
  char *duckdb_value_varchar(duckdb_result *result, idx_t col, idx_t row) {
133091
- return GetInternalCValue<char *, ToCStringCastWrapper<StringCast>>(result, col, row);
133142
+ return duckdb_value_string(result, col, row).data;
133143
+ }
133144
+
133145
+ duckdb_string duckdb_value_string(duckdb_result *result, idx_t col, idx_t row) {
133146
+ return GetInternalCValue<duckdb_string, ToCStringCastWrapper<StringCast>>(result, col, row);
133092
133147
  }
133093
133148
 
133094
133149
  char *duckdb_value_varchar_internal(duckdb_result *result, idx_t col, idx_t row) {
133150
+ return duckdb_value_string_internal(result, col, row).data;
133151
+ }
133152
+
133153
+ duckdb_string duckdb_value_string_internal(duckdb_result *result, idx_t col, idx_t row) {
133095
133154
  if (!CanFetchValue(result, col, row)) {
133096
- return nullptr;
133155
+ return FetchDefaultValue::Operation<duckdb_string>();
133097
133156
  }
133098
133157
  if (duckdb_column_type(result, col) != DUCKDB_TYPE_VARCHAR) {
133099
- return nullptr;
133100
- }
133101
- return UnsafeFetch<char *>(result, col, row);
133158
+ return FetchDefaultValue::Operation<duckdb_string>();
133159
+ }
133160
+ // FIXME: this obviously does not work when there are null bytes in the string
133161
+ // we need to remove the deprecated C result materialization to get that to work correctly
133162
+ // since the deprecated C result materialization stores strings as null-terminated
133163
+ duckdb_string res;
133164
+ res.data = UnsafeFetch<char *>(result, col, row);
133165
+ res.size = strlen(res.data);
133166
+ return res;
133102
133167
  }
133103
133168
 
133104
133169
  duckdb_blob duckdb_value_blob(duckdb_result *result, idx_t col, idx_t row) {
@@ -136411,9 +136476,6 @@ string ErrorManager::InvalidUnicodeError(const string &input, const string &cont
136411
136476
  }
136412
136477
  string base_message;
136413
136478
  switch (reason) {
136414
- case UnicodeInvalidReason::NULL_BYTE:
136415
- base_message = "Null-byte (\\0)";
136416
- break;
136417
136479
  case UnicodeInvalidReason::BYTE_MISMATCH:
136418
136480
  base_message = "Invalid unicode (byte sequence mismatch)";
136419
136481
  break;
@@ -145605,7 +145667,7 @@ string MaterializedQueryResult::ToString() {
145605
145667
  result += "\t";
145606
145668
  }
145607
145669
  auto val = row.GetValue(col_idx);
145608
- result += val.IsNull() ? "NULL" : val.ToString();
145670
+ result += val.IsNull() ? "NULL" : StringUtil::Replace(val.ToString(), string("\0", 1), "\\0");
145609
145671
  }
145610
145672
  result += "\n";
145611
145673
  }
@@ -276403,35 +276465,29 @@ UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *i
276403
276465
  int c = (int) s[i];
276404
276466
 
276405
276467
  if ((c & 0x80) == 0) {
276406
- /* 1 byte sequence */
276407
- if (c == '\0') {
276408
- /* NULL byte not allowed */
276409
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
276410
- return UnicodeType::INVALID;
276411
- }
276468
+ continue;
276469
+ }
276470
+ int first_pos_seq = i;
276471
+
276472
+ if ((c & 0xE0) == 0xC0) {
276473
+ /* 2 byte sequence */
276474
+ int utf8char = c & 0x1F;
276475
+ type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
276476
+ } else if ((c & 0xF0) == 0xE0) {
276477
+ /* 3 byte sequence */
276478
+ int utf8char = c & 0x0F;
276479
+ type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
276480
+ } else if ((c & 0xF8) == 0xF0) {
276481
+ /* 4 byte sequence */
276482
+ int utf8char = c & 0x07;
276483
+ type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
276412
276484
  } else {
276413
- int first_pos_seq = i;
276414
-
276415
- if ((c & 0xE0) == 0xC0) {
276416
- /* 2 byte sequence */
276417
- int utf8char = c & 0x1F;
276418
- type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
276419
- } else if ((c & 0xF0) == 0xE0) {
276420
- /* 3 byte sequence */
276421
- int utf8char = c & 0x0F;
276422
- type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
276423
- } else if ((c & 0xF8) == 0xF0) {
276424
- /* 4 byte sequence */
276425
- int utf8char = c & 0x07;
276426
- type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
276427
- } else {
276428
- /* invalid UTF-8 start byte */
276429
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
276430
- return UnicodeType::INVALID;
276431
- }
276432
- if (type == UnicodeType::INVALID) {
276433
- return type;
276434
- }
276485
+ /* invalid UTF-8 start byte */
276486
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
276487
+ return UnicodeType::INVALID;
276488
+ }
276489
+ if (type == UnicodeType::INVALID) {
276490
+ return type;
276435
276491
  }
276436
276492
  }
276437
276493
  return type;
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "bf5b49fcee"
15
- #define DUCKDB_VERSION "v0.5.2-dev1819"
14
+ #define DUCKDB_SOURCE_ID "a24535880b"
15
+ #define DUCKDB_VERSION "v0.5.2-dev1840"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -4807,6 +4807,9 @@ private:
4807
4807
  string raw_message;
4808
4808
  //! The final message (stored in the preserved error for compatibility reasons with C-API)
4809
4809
  string final_message;
4810
+
4811
+ private:
4812
+ static string SanitizeErrorMessage(string error);
4810
4813
  };
4811
4814
 
4812
4815
  } // namespace duckdb
@@ -17041,6 +17044,11 @@ typedef struct {
17041
17044
  duckdb_hugeint value;
17042
17045
  } duckdb_decimal;
17043
17046
 
17047
+ typedef struct {
17048
+ char *data;
17049
+ idx_t size;
17050
+ } duckdb_string;
17051
+
17044
17052
  typedef struct {
17045
17053
  void *data;
17046
17054
  idx_t size;
@@ -17488,12 +17496,21 @@ DUCKDB_API duckdb_timestamp duckdb_value_timestamp(duckdb_result *result, idx_t
17488
17496
  DUCKDB_API duckdb_interval duckdb_value_interval(duckdb_result *result, idx_t col, idx_t row);
17489
17497
 
17490
17498
  /*!
17491
- * returns: The char* value at the specified location, or nullptr if the value cannot be converted.
17492
- The result must be freed with `duckdb_free`.
17499
+ * DEPRECATED: use duckdb_value_string instead. This function does not work correctly if the string contains null bytes.
17500
+ * returns: The text value at the specified location as a null-terminated string, or nullptr if the value cannot be
17501
+ converted. The result must be freed with `duckdb_free`.
17493
17502
  */
17494
17503
  DUCKDB_API char *duckdb_value_varchar(duckdb_result *result, idx_t col, idx_t row);
17495
17504
 
17505
+ /*!s
17506
+ * returns: The string value at the specified location.
17507
+ The result must be freed with `duckdb_free`.
17508
+ */
17509
+ DUCKDB_API duckdb_string duckdb_value_string(duckdb_result *result, idx_t col, idx_t row);
17510
+
17496
17511
  /*!
17512
+ * DEPRECATED: use duckdb_value_string_internal instead. This function does not work correctly if the string contains
17513
+ null bytes.
17497
17514
  * returns: The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast.
17498
17515
  If the column is NOT a VARCHAR column this function will return NULL.
17499
17516
 
@@ -17501,6 +17518,16 @@ The result must NOT be freed.
17501
17518
  */
17502
17519
  DUCKDB_API char *duckdb_value_varchar_internal(duckdb_result *result, idx_t col, idx_t row);
17503
17520
 
17521
+ /*!
17522
+ * DEPRECATED: use duckdb_value_string_internal instead. This function does not work correctly if the string contains
17523
+ null bytes.
17524
+ * returns: The char* value at the specified location. ONLY works on VARCHAR columns and does not auto-cast.
17525
+ If the column is NOT a VARCHAR column this function will return NULL.
17526
+
17527
+ The result must NOT be freed.
17528
+ */
17529
+ DUCKDB_API duckdb_string duckdb_value_string_internal(duckdb_result *result, idx_t col, idx_t row);
17530
+
17504
17531
  /*!
17505
17532
  * returns: The duckdb_blob value at the specified location. Returns a blob with blob.data set to nullptr if the
17506
17533
  value cannot be converted. The resulting "blob.data" must be freed with `duckdb_free.`
@@ -27933,7 +27960,7 @@ private:
27933
27960
  bool TryParseComplexCSV(DataChunk &insert_chunk, string &error_message);
27934
27961
 
27935
27962
  //! Adds a value to the current row
27936
- void AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes);
27963
+ void AddValue(string_t str_val, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes);
27937
27964
  //! Adds a row to the insert_chunk, returns true if the chunk is filled as a result of this row being added
27938
27965
  bool AddRow(DataChunk &insert_chunk, idx_t &column);
27939
27966
  //! Finalizes a chunk, parsing all values that have been added so far and adding them to the insert_chunk