duckdb 0.5.2-dev486.0 → 0.5.2-dev490.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.2-dev486.0",
4
+ "version": "0.5.2-dev490.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -76745,6 +76745,7 @@ bool BufferedCSVReader::TryParseComplexCSV(DataChunk &insert_chunk, string &erro
76745
76745
  bool finished_chunk = false;
76746
76746
  idx_t column = 0;
76747
76747
  vector<idx_t> escape_positions;
76748
+ bool has_quotes = false;
76748
76749
  uint8_t delimiter_pos = 0, escape_pos = 0, quote_pos = 0;
76749
76750
  idx_t offset = 0;
76750
76751
 
@@ -76807,9 +76808,10 @@ normal:
76807
76808
  } while (ReadBuffer(start));
76808
76809
  goto final_state;
76809
76810
  add_value:
76810
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
76811
+ AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
76811
76812
  // increase position by 1 and move start to the new position
76812
76813
  offset = 0;
76814
+ has_quotes = false;
76813
76815
  start = ++position;
76814
76816
  if (position >= buffer_size && !ReadBuffer(start)) {
76815
76817
  // file ends right after delimiter, go to final state
@@ -76819,10 +76821,11 @@ add_value:
76819
76821
  add_row : {
76820
76822
  // check type of newline (\r or \n)
76821
76823
  bool carriage_return = buffer[position] == '\r';
76822
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
76824
+ AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
76823
76825
  finished_chunk = AddRow(insert_chunk, column);
76824
76826
  // increase position by 1 and move start to the new position
76825
76827
  offset = 0;
76828
+ has_quotes = false;
76826
76829
  start = ++position;
76827
76830
  if (position >= buffer_size && !ReadBuffer(start)) {
76828
76831
  // file ends right after newline, go to final state
@@ -76844,6 +76847,7 @@ in_quotes:
76844
76847
  // this state parses the remainder of a quoted value
76845
76848
  quote_pos = 0;
76846
76849
  escape_pos = 0;
76850
+ has_quotes = true;
76847
76851
  position++;
76848
76852
  do {
76849
76853
  for (; position < buffer_size; position++) {
@@ -76955,7 +76959,7 @@ final_state:
76955
76959
  }
76956
76960
  if (column > 0 || position > start) {
76957
76961
  // remaining values to be added to the chunk
76958
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
76962
+ AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
76959
76963
  finished_chunk = AddRow(insert_chunk, column);
76960
76964
  }
76961
76965
  // final stage, only reached after parsing the file is finished
@@ -76973,6 +76977,7 @@ bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error
76973
76977
  bool finished_chunk = false;
76974
76978
  idx_t column = 0;
76975
76979
  idx_t offset = 0;
76980
+ bool has_quotes = false;
76976
76981
  vector<idx_t> escape_positions;
76977
76982
 
76978
76983
  // read values into the buffer (if any)
@@ -77014,9 +77019,10 @@ normal:
77014
77019
  // file ends during normal scan: go to end state
77015
77020
  goto final_state;
77016
77021
  add_value:
77017
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
77022
+ AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
77018
77023
  // increase position by 1 and move start to the new position
77019
77024
  offset = 0;
77025
+ has_quotes = false;
77020
77026
  start = ++position;
77021
77027
  if (position >= buffer_size && !ReadBuffer(start)) {
77022
77028
  // file ends right after delimiter, go to final state
@@ -77026,10 +77032,11 @@ add_value:
77026
77032
  add_row : {
77027
77033
  // check type of newline (\r or \n)
77028
77034
  bool carriage_return = buffer[position] == '\r';
77029
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
77035
+ AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
77030
77036
  finished_chunk = AddRow(insert_chunk, column);
77031
77037
  // increase position by 1 and move start to the new position
77032
77038
  offset = 0;
77039
+ has_quotes = false;
77033
77040
  start = ++position;
77034
77041
  if (position >= buffer_size && !ReadBuffer(start)) {
77035
77042
  // file ends right after delimiter, go to final state
@@ -77049,6 +77056,7 @@ add_row : {
77049
77056
  in_quotes:
77050
77057
  /* state: in_quotes */
77051
77058
  // this state parses the remainder of a quoted value
77059
+ has_quotes = true;
77052
77060
  position++;
77053
77061
  do {
77054
77062
  for (; position < buffer_size; position++) {
@@ -77135,7 +77143,7 @@ final_state:
77135
77143
 
77136
77144
  if (column > 0 || position > start) {
77137
77145
  // remaining values to be added to the chunk
77138
- AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
77146
+ AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
77139
77147
  finished_chunk = AddRow(insert_chunk, column);
77140
77148
  }
77141
77149
 
@@ -77235,7 +77243,8 @@ bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_ch
77235
77243
  }
77236
77244
  }
77237
77245
 
77238
- void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions) {
77246
+ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions,
77247
+ bool has_quotes) {
77239
77248
  if (length == 0 && column == 0) {
77240
77249
  row_empty = true;
77241
77250
  } else {
@@ -77266,8 +77275,9 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
77266
77275
 
77267
77276
  str_val[length] = '\0';
77268
77277
 
77269
- // test against null string
77270
- if (!options.force_not_null[column] && strcmp(options.null_str.c_str(), str_val) == 0) {
77278
+ // test against null string, but only if the value was not quoted
77279
+ if ((!has_quotes || sql_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
77280
+ strcmp(options.null_str.c_str(), str_val) == 0) {
77271
77281
  FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
77272
77282
  } else {
77273
77283
  auto &v = parse_chunk.data[column];
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "33b4ebcc3"
15
- #define DUCKDB_VERSION "v0.5.2-dev486"
14
+ #define DUCKDB_SOURCE_ID "04c95fcbd"
15
+ #define DUCKDB_VERSION "v0.5.2-dev490"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -27270,7 +27270,7 @@ private:
27270
27270
  bool TryParseComplexCSV(DataChunk &insert_chunk, string &error_message);
27271
27271
 
27272
27272
  //! Adds a value to the current row
27273
- void AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions);
27273
+ void AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes);
27274
27274
  //! Adds a row to the insert_chunk, returns true if the chunk is filled as a result of this row being added
27275
27275
  bool AddRow(DataChunk &insert_chunk, idx_t &column);
27276
27276
  //! Finalizes a chunk, parsing all values that have been added so far and adding them to the insert_chunk