duckdb 0.5.2-dev486.0 → 0.5.2-dev492.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +19 -9
- package/src/duckdb.hpp +3 -3
- package/src/parquet-amalgamation.cpp +29321 -29321
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -76745,6 +76745,7 @@ bool BufferedCSVReader::TryParseComplexCSV(DataChunk &insert_chunk, string &erro
|
|
|
76745
76745
|
bool finished_chunk = false;
|
|
76746
76746
|
idx_t column = 0;
|
|
76747
76747
|
vector<idx_t> escape_positions;
|
|
76748
|
+
bool has_quotes = false;
|
|
76748
76749
|
uint8_t delimiter_pos = 0, escape_pos = 0, quote_pos = 0;
|
|
76749
76750
|
idx_t offset = 0;
|
|
76750
76751
|
|
|
@@ -76807,9 +76808,10 @@ normal:
|
|
|
76807
76808
|
} while (ReadBuffer(start));
|
|
76808
76809
|
goto final_state;
|
|
76809
76810
|
add_value:
|
|
76810
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
|
|
76811
|
+
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
76811
76812
|
// increase position by 1 and move start to the new position
|
|
76812
76813
|
offset = 0;
|
|
76814
|
+
has_quotes = false;
|
|
76813
76815
|
start = ++position;
|
|
76814
76816
|
if (position >= buffer_size && !ReadBuffer(start)) {
|
|
76815
76817
|
// file ends right after delimiter, go to final state
|
|
@@ -76819,10 +76821,11 @@ add_value:
|
|
|
76819
76821
|
add_row : {
|
|
76820
76822
|
// check type of newline (\r or \n)
|
|
76821
76823
|
bool carriage_return = buffer[position] == '\r';
|
|
76822
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
|
|
76824
|
+
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
76823
76825
|
finished_chunk = AddRow(insert_chunk, column);
|
|
76824
76826
|
// increase position by 1 and move start to the new position
|
|
76825
76827
|
offset = 0;
|
|
76828
|
+
has_quotes = false;
|
|
76826
76829
|
start = ++position;
|
|
76827
76830
|
if (position >= buffer_size && !ReadBuffer(start)) {
|
|
76828
76831
|
// file ends right after newline, go to final state
|
|
@@ -76844,6 +76847,7 @@ in_quotes:
|
|
|
76844
76847
|
// this state parses the remainder of a quoted value
|
|
76845
76848
|
quote_pos = 0;
|
|
76846
76849
|
escape_pos = 0;
|
|
76850
|
+
has_quotes = true;
|
|
76847
76851
|
position++;
|
|
76848
76852
|
do {
|
|
76849
76853
|
for (; position < buffer_size; position++) {
|
|
@@ -76955,7 +76959,7 @@ final_state:
|
|
|
76955
76959
|
}
|
|
76956
76960
|
if (column > 0 || position > start) {
|
|
76957
76961
|
// remaining values to be added to the chunk
|
|
76958
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
|
|
76962
|
+
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
76959
76963
|
finished_chunk = AddRow(insert_chunk, column);
|
|
76960
76964
|
}
|
|
76961
76965
|
// final stage, only reached after parsing the file is finished
|
|
@@ -76973,6 +76977,7 @@ bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error
|
|
|
76973
76977
|
bool finished_chunk = false;
|
|
76974
76978
|
idx_t column = 0;
|
|
76975
76979
|
idx_t offset = 0;
|
|
76980
|
+
bool has_quotes = false;
|
|
76976
76981
|
vector<idx_t> escape_positions;
|
|
76977
76982
|
|
|
76978
76983
|
// read values into the buffer (if any)
|
|
@@ -77014,9 +77019,10 @@ normal:
|
|
|
77014
77019
|
// file ends during normal scan: go to end state
|
|
77015
77020
|
goto final_state;
|
|
77016
77021
|
add_value:
|
|
77017
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
|
|
77022
|
+
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
77018
77023
|
// increase position by 1 and move start to the new position
|
|
77019
77024
|
offset = 0;
|
|
77025
|
+
has_quotes = false;
|
|
77020
77026
|
start = ++position;
|
|
77021
77027
|
if (position >= buffer_size && !ReadBuffer(start)) {
|
|
77022
77028
|
// file ends right after delimiter, go to final state
|
|
@@ -77026,10 +77032,11 @@ add_value:
|
|
|
77026
77032
|
add_row : {
|
|
77027
77033
|
// check type of newline (\r or \n)
|
|
77028
77034
|
bool carriage_return = buffer[position] == '\r';
|
|
77029
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
|
|
77035
|
+
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
77030
77036
|
finished_chunk = AddRow(insert_chunk, column);
|
|
77031
77037
|
// increase position by 1 and move start to the new position
|
|
77032
77038
|
offset = 0;
|
|
77039
|
+
has_quotes = false;
|
|
77033
77040
|
start = ++position;
|
|
77034
77041
|
if (position >= buffer_size && !ReadBuffer(start)) {
|
|
77035
77042
|
// file ends right after delimiter, go to final state
|
|
@@ -77049,6 +77056,7 @@ add_row : {
|
|
|
77049
77056
|
in_quotes:
|
|
77050
77057
|
/* state: in_quotes */
|
|
77051
77058
|
// this state parses the remainder of a quoted value
|
|
77059
|
+
has_quotes = true;
|
|
77052
77060
|
position++;
|
|
77053
77061
|
do {
|
|
77054
77062
|
for (; position < buffer_size; position++) {
|
|
@@ -77135,7 +77143,7 @@ final_state:
|
|
|
77135
77143
|
|
|
77136
77144
|
if (column > 0 || position > start) {
|
|
77137
77145
|
// remaining values to be added to the chunk
|
|
77138
|
-
AddValue(buffer.get() + start, position - start - offset, column, escape_positions);
|
|
77146
|
+
AddValue(buffer.get() + start, position - start - offset, column, escape_positions, has_quotes);
|
|
77139
77147
|
finished_chunk = AddRow(insert_chunk, column);
|
|
77140
77148
|
}
|
|
77141
77149
|
|
|
@@ -77235,7 +77243,8 @@ bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_ch
|
|
|
77235
77243
|
}
|
|
77236
77244
|
}
|
|
77237
77245
|
|
|
77238
|
-
void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions
|
|
77246
|
+
void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions,
|
|
77247
|
+
bool has_quotes) {
|
|
77239
77248
|
if (length == 0 && column == 0) {
|
|
77240
77249
|
row_empty = true;
|
|
77241
77250
|
} else {
|
|
@@ -77266,8 +77275,9 @@ void BufferedCSVReader::AddValue(char *str_val, idx_t length, idx_t &column, vec
|
|
|
77266
77275
|
|
|
77267
77276
|
str_val[length] = '\0';
|
|
77268
77277
|
|
|
77269
|
-
// test against null string
|
|
77270
|
-
if (!
|
|
77278
|
+
// test against null string, but only if the value was not quoted
|
|
77279
|
+
if ((!has_quotes || sql_types[column].id() != LogicalTypeId::VARCHAR) && !options.force_not_null[column] &&
|
|
77280
|
+
strcmp(options.null_str.c_str(), str_val) == 0) {
|
|
77271
77281
|
FlatVector::SetNull(parse_chunk.data[column], row_entry, true);
|
|
77272
77282
|
} else {
|
|
77273
77283
|
auto &v = parse_chunk.data[column];
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.2-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "27a7b4d19"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.2-dev492"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -27270,7 +27270,7 @@ private:
|
|
|
27270
27270
|
bool TryParseComplexCSV(DataChunk &insert_chunk, string &error_message);
|
|
27271
27271
|
|
|
27272
27272
|
//! Adds a value to the current row
|
|
27273
|
-
void AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions);
|
|
27273
|
+
void AddValue(char *str_val, idx_t length, idx_t &column, vector<idx_t> &escape_positions, bool has_quotes);
|
|
27274
27274
|
//! Adds a row to the insert_chunk, returns true if the chunk is filled as a result of this row being added
|
|
27275
27275
|
bool AddRow(DataChunk &insert_chunk, idx_t &column);
|
|
27276
27276
|
//! Finalizes a chunk, parsing all values that have been added so far and adding them to the insert_chunk
|