duckdb 0.7.2-dev2144.0 → 0.7.2-dev2233.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +7 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/function/table/read_csv.cpp +124 -58
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
- package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
- package/src/duckdb/src/main/settings/settings.cpp +3 -4
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
package/package.json
CHANGED
@@ -647,6 +647,7 @@ void StringColumnReader::PrepareDeltaLengthByteArray(ResizeableBuffer &buffer) {
|
|
647
647
|
auto length_data = (uint32_t *)length_buffer->ptr;
|
648
648
|
byte_array_data = make_uniq<Vector>(LogicalType::VARCHAR, value_count);
|
649
649
|
byte_array_count = value_count;
|
650
|
+
delta_offset = 0;
|
650
651
|
auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
|
651
652
|
for (idx_t i = 0; i < value_count; i++) {
|
652
653
|
auto str_len = length_data[i];
|
@@ -674,6 +675,7 @@ void StringColumnReader::PrepareDeltaByteArray(ResizeableBuffer &buffer) {
|
|
674
675
|
auto suffix_data = (uint32_t *)suffix_buffer->ptr;
|
675
676
|
byte_array_data = make_uniq<Vector>(LogicalType::VARCHAR, prefix_count);
|
676
677
|
byte_array_count = prefix_count;
|
678
|
+
delta_offset = 0;
|
677
679
|
auto string_data = FlatVector::GetData<string_t>(*byte_array_data);
|
678
680
|
for (idx_t i = 0; i < prefix_count; i++) {
|
679
681
|
auto str_len = prefix_data[i] + suffix_data[i];
|
@@ -715,6 +717,7 @@ void StringColumnReader::DeltaByteArray(uint8_t *defines, idx_t num_values, parq
|
|
715
717
|
delta_offset++;
|
716
718
|
}
|
717
719
|
}
|
720
|
+
StringVector::AddHeapReference(result, *byte_array_data);
|
718
721
|
}
|
719
722
|
|
720
723
|
class ParquetStringVectorBuffer : public VectorBuffer {
|
@@ -970,7 +970,7 @@ struct ValueResultEquals {
|
|
970
970
|
};
|
971
971
|
|
972
972
|
bool ColumnDataCollection::ResultEquals(const ColumnDataCollection &left, const ColumnDataCollection &right,
|
973
|
-
string &error_message) {
|
973
|
+
string &error_message, bool ordered) {
|
974
974
|
if (left.ColumnCount() != right.ColumnCount()) {
|
975
975
|
error_message = "Column count mismatch";
|
976
976
|
return false;
|
@@ -985,6 +985,7 @@ bool ColumnDataCollection::ResultEquals(const ColumnDataCollection &left, const
|
|
985
985
|
for (idx_t c = 0; c < left.ColumnCount(); c++) {
|
986
986
|
auto lvalue = left_rows.GetValue(c, r);
|
987
987
|
auto rvalue = right_rows.GetValue(c, r);
|
988
|
+
|
988
989
|
if (!Value::DefaultValuesAreEqual(lvalue, rvalue)) {
|
989
990
|
error_message =
|
990
991
|
StringUtil::Format("%s <> %s (row: %lld, col: %lld)\n", lvalue.ToString(), rvalue.ToString(), r, c);
|
@@ -992,7 +993,11 @@ bool ColumnDataCollection::ResultEquals(const ColumnDataCollection &left, const
|
|
992
993
|
}
|
993
994
|
}
|
994
995
|
if (!error_message.empty()) {
|
995
|
-
|
996
|
+
if (ordered) {
|
997
|
+
return false;
|
998
|
+
} else {
|
999
|
+
break;
|
1000
|
+
}
|
996
1001
|
}
|
997
1002
|
}
|
998
1003
|
if (!error_message.empty()) {
|
@@ -42,6 +42,9 @@ BaseCSVReader::~BaseCSVReader() {
|
|
42
42
|
unique_ptr<CSVFileHandle> BaseCSVReader::OpenCSV(const BufferedCSVReaderOptions &options_p) {
|
43
43
|
auto file_handle = fs.OpenFile(options_p.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
|
44
44
|
options_p.compression, this->opener);
|
45
|
+
if (file_handle->CanSeek()) {
|
46
|
+
file_handle->Reset();
|
47
|
+
}
|
45
48
|
return make_uniq<CSVFileHandle>(std::move(file_handle));
|
46
49
|
}
|
47
50
|
|
@@ -15,6 +15,7 @@
|
|
15
15
|
#include "utf8proc.hpp"
|
16
16
|
#include "duckdb/parser/keyword_helper.hpp"
|
17
17
|
#include "duckdb/main/error_manager.hpp"
|
18
|
+
#include "duckdb/main/client_data.hpp"
|
18
19
|
|
19
20
|
#include <algorithm>
|
20
21
|
#include <cctype>
|
@@ -954,10 +955,10 @@ bool BufferedCSVReader::TryParseComplexCSV(DataChunk &insert_chunk, string &erro
|
|
954
955
|
bool has_quotes = false;
|
955
956
|
uint8_t delimiter_pos = 0, escape_pos = 0, quote_pos = 0;
|
956
957
|
idx_t offset = 0;
|
957
|
-
|
958
|
+
idx_t line_start = 0;
|
958
959
|
// read values into the buffer (if any)
|
959
960
|
if (position >= buffer_size) {
|
960
|
-
if (!ReadBuffer(start)) {
|
961
|
+
if (!ReadBuffer(start, line_start)) {
|
961
962
|
return true;
|
962
963
|
}
|
963
964
|
}
|
@@ -994,7 +995,7 @@ value_start:
|
|
994
995
|
goto in_quotes;
|
995
996
|
}
|
996
997
|
}
|
997
|
-
} while (ReadBuffer(start));
|
998
|
+
} while (ReadBuffer(start, line_start));
|
998
999
|
// file ends while scanning for quote/delimiter, go to final state
|
999
1000
|
goto final_state;
|
1000
1001
|
normal:
|
@@ -1011,7 +1012,7 @@ normal:
|
|
1011
1012
|
goto add_row;
|
1012
1013
|
}
|
1013
1014
|
}
|
1014
|
-
} while (ReadBuffer(start));
|
1015
|
+
} while (ReadBuffer(start, line_start));
|
1015
1016
|
goto final_state;
|
1016
1017
|
add_value:
|
1017
1018
|
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
@@ -1019,7 +1020,7 @@ add_value:
|
|
1019
1020
|
offset = 0;
|
1020
1021
|
has_quotes = false;
|
1021
1022
|
start = ++position;
|
1022
|
-
if (position >= buffer_size && !ReadBuffer(start)) {
|
1023
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1023
1024
|
// file ends right after delimiter, go to final state
|
1024
1025
|
goto final_state;
|
1025
1026
|
}
|
@@ -1029,14 +1030,17 @@ add_row : {
|
|
1029
1030
|
bool carriage_return = buffer[position] == '\r';
|
1030
1031
|
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
1031
1032
|
finished_chunk = AddRow(insert_chunk, column, error_message);
|
1033
|
+
|
1032
1034
|
if (!error_message.empty()) {
|
1033
1035
|
return false;
|
1034
1036
|
}
|
1035
1037
|
// increase position by 1 and move start to the new position
|
1036
1038
|
offset = 0;
|
1037
1039
|
has_quotes = false;
|
1038
|
-
|
1039
|
-
|
1040
|
+
position++;
|
1041
|
+
SkipEmptyLines();
|
1042
|
+
start = position;
|
1043
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1040
1044
|
// file ends right after newline, go to final state
|
1041
1045
|
goto final_state;
|
1042
1046
|
}
|
@@ -1069,7 +1073,7 @@ in_quotes:
|
|
1069
1073
|
goto handle_escape;
|
1070
1074
|
}
|
1071
1075
|
}
|
1072
|
-
} while (ReadBuffer(start));
|
1076
|
+
} while (ReadBuffer(start, line_start));
|
1073
1077
|
// still in quoted state at the end of the file, error:
|
1074
1078
|
error_message = StringUtil::Format("Error in file \"%s\" on line %s: unterminated quotes. (%s)", options.file_path,
|
1075
1079
|
GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
|
@@ -1082,7 +1086,7 @@ unquote:
|
|
1082
1086
|
delimiter_pos = 0;
|
1083
1087
|
quote_pos = 0;
|
1084
1088
|
position++;
|
1085
|
-
if (position >= buffer_size && !ReadBuffer(start)) {
|
1089
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1086
1090
|
// file ends right after unquote, go to final state
|
1087
1091
|
offset = options.quote.size();
|
1088
1092
|
goto final_state;
|
@@ -1116,7 +1120,7 @@ unquote:
|
|
1116
1120
|
goto in_quotes;
|
1117
1121
|
}
|
1118
1122
|
}
|
1119
|
-
} while (ReadBuffer(start));
|
1123
|
+
} while (ReadBuffer(start, line_start));
|
1120
1124
|
error_message = StringUtil::Format(
|
1121
1125
|
"Error in file \"%s\" on line %s: quote should be followed by end of value, end of row or another quote. (%s)",
|
1122
1126
|
options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
|
@@ -1142,7 +1146,7 @@ handle_escape:
|
|
1142
1146
|
goto in_quotes;
|
1143
1147
|
}
|
1144
1148
|
}
|
1145
|
-
} while (ReadBuffer(start));
|
1149
|
+
} while (ReadBuffer(start, line_start));
|
1146
1150
|
error_message =
|
1147
1151
|
StringUtil::Format("Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)",
|
1148
1152
|
options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
|
@@ -1153,7 +1157,7 @@ carriage_return:
|
|
1153
1157
|
if (buffer[position] == '\n') {
|
1154
1158
|
// newline after carriage return: skip
|
1155
1159
|
start = ++position;
|
1156
|
-
if (position >= buffer_size && !ReadBuffer(start)) {
|
1160
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1157
1161
|
// file ends right after newline, go to final state
|
1158
1162
|
goto final_state;
|
1159
1163
|
}
|
@@ -1170,6 +1174,7 @@ final_state:
|
|
1170
1174
|
// remaining values to be added to the chunk
|
1171
1175
|
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
1172
1176
|
finished_chunk = AddRow(insert_chunk, column, error_message);
|
1177
|
+
SkipEmptyLines();
|
1173
1178
|
if (!error_message.empty()) {
|
1174
1179
|
return false;
|
1175
1180
|
}
|
@@ -1184,6 +1189,18 @@ final_state:
|
|
1184
1189
|
return true;
|
1185
1190
|
}
|
1186
1191
|
|
1192
|
+
void BufferedCSVReader::SkipEmptyLines() {
|
1193
|
+
if (parse_chunk.data.size() == 1) {
|
1194
|
+
// Empty lines are null data.
|
1195
|
+
return;
|
1196
|
+
}
|
1197
|
+
for (; position < buffer_size; position++) {
|
1198
|
+
if (!StringUtil::CharacterIsNewline(buffer[position])) {
|
1199
|
+
return;
|
1200
|
+
}
|
1201
|
+
}
|
1202
|
+
}
|
1203
|
+
|
1187
1204
|
bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message) {
|
1188
1205
|
// used for parsing algorithm
|
1189
1206
|
bool finished_chunk = false;
|
@@ -1192,12 +1209,14 @@ bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error
|
|
1192
1209
|
bool has_quotes = false;
|
1193
1210
|
vector<idx_t> escape_positions;
|
1194
1211
|
|
1212
|
+
idx_t line_start = position;
|
1195
1213
|
// read values into the buffer (if any)
|
1196
1214
|
if (position >= buffer_size) {
|
1197
|
-
if (!ReadBuffer(start)) {
|
1215
|
+
if (!ReadBuffer(start, line_start)) {
|
1198
1216
|
return true;
|
1199
1217
|
}
|
1200
1218
|
}
|
1219
|
+
|
1201
1220
|
// start parsing the first value
|
1202
1221
|
goto value_start;
|
1203
1222
|
value_start:
|
@@ -1227,7 +1246,7 @@ normal:
|
|
1227
1246
|
goto add_row;
|
1228
1247
|
}
|
1229
1248
|
}
|
1230
|
-
} while (ReadBuffer(start));
|
1249
|
+
} while (ReadBuffer(start, line_start));
|
1231
1250
|
// file ends during normal scan: go to end state
|
1232
1251
|
goto final_state;
|
1233
1252
|
add_value:
|
@@ -1236,7 +1255,7 @@ add_value:
|
|
1236
1255
|
offset = 0;
|
1237
1256
|
has_quotes = false;
|
1238
1257
|
start = ++position;
|
1239
|
-
if (position >= buffer_size && !ReadBuffer(start)) {
|
1258
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1240
1259
|
// file ends right after delimiter, go to final state
|
1241
1260
|
goto final_state;
|
1242
1261
|
}
|
@@ -1249,14 +1268,19 @@ add_row : {
|
|
1249
1268
|
return false;
|
1250
1269
|
}
|
1251
1270
|
finished_chunk = AddRow(insert_chunk, column, error_message);
|
1271
|
+
if (context.client_data->max_line_length < position - line_start) {
|
1272
|
+
context.client_data->max_line_length = position - line_start;
|
1273
|
+
}
|
1252
1274
|
if (!error_message.empty()) {
|
1253
1275
|
return false;
|
1254
1276
|
}
|
1255
1277
|
// increase position by 1 and move start to the new position
|
1256
1278
|
offset = 0;
|
1257
1279
|
has_quotes = false;
|
1258
|
-
|
1259
|
-
|
1280
|
+
position++;
|
1281
|
+
start = position;
|
1282
|
+
line_start = position;
|
1283
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1260
1284
|
// file ends right after delimiter, go to final state
|
1261
1285
|
goto final_state;
|
1262
1286
|
}
|
@@ -1265,6 +1289,14 @@ add_row : {
|
|
1265
1289
|
goto carriage_return;
|
1266
1290
|
} else {
|
1267
1291
|
SetNewLineDelimiter();
|
1292
|
+
SkipEmptyLines();
|
1293
|
+
|
1294
|
+
start = position;
|
1295
|
+
line_start = position;
|
1296
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1297
|
+
// file ends right after delimiter, go to final state
|
1298
|
+
goto final_state;
|
1299
|
+
}
|
1268
1300
|
// \n newline, move to value start
|
1269
1301
|
if (finished_chunk) {
|
1270
1302
|
return true;
|
@@ -1288,7 +1320,7 @@ in_quotes:
|
|
1288
1320
|
goto handle_escape;
|
1289
1321
|
}
|
1290
1322
|
}
|
1291
|
-
} while (ReadBuffer(start));
|
1323
|
+
} while (ReadBuffer(start, line_start));
|
1292
1324
|
// still in quoted state at the end of the file, error:
|
1293
1325
|
throw InvalidInputException("Error in file \"%s\" on line %s: unterminated quotes. (%s)", options.file_path,
|
1294
1326
|
GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
|
@@ -1298,7 +1330,7 @@ unquote:
|
|
1298
1330
|
// in this state we expect either another quote (entering the quoted state again, and escaping the quote)
|
1299
1331
|
// or a delimiter/newline, ending the current value and moving on to the next value
|
1300
1332
|
position++;
|
1301
|
-
if (position >= buffer_size && !ReadBuffer(start)) {
|
1333
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1302
1334
|
// file ends right after unquote, go to final state
|
1303
1335
|
offset = 1;
|
1304
1336
|
goto final_state;
|
@@ -1325,7 +1357,7 @@ handle_escape:
|
|
1325
1357
|
/* state: handle_escape */
|
1326
1358
|
// escape should be followed by a quote or another escape character
|
1327
1359
|
position++;
|
1328
|
-
if (position >= buffer_size && !ReadBuffer(start)) {
|
1360
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1329
1361
|
error_message = StringUtil::Format(
|
1330
1362
|
"Error in file \"%s\" on line %s: neither QUOTE nor ESCAPE is proceeded by ESCAPE. (%s)", options.file_path,
|
1331
1363
|
GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
|
@@ -1347,7 +1379,7 @@ carriage_return:
|
|
1347
1379
|
// newline after carriage return: skip
|
1348
1380
|
// increase position by 1 and move start to the new position
|
1349
1381
|
start = ++position;
|
1350
|
-
if (position >= buffer_size && !ReadBuffer(start)) {
|
1382
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1351
1383
|
// file ends right after delimiter, go to final state
|
1352
1384
|
goto final_state;
|
1353
1385
|
}
|
@@ -1357,6 +1389,14 @@ carriage_return:
|
|
1357
1389
|
if (finished_chunk) {
|
1358
1390
|
return true;
|
1359
1391
|
}
|
1392
|
+
SkipEmptyLines();
|
1393
|
+
start = position;
|
1394
|
+
line_start = position;
|
1395
|
+
if (position >= buffer_size && !ReadBuffer(start, line_start)) {
|
1396
|
+
// file ends right after delimiter, go to final state
|
1397
|
+
goto final_state;
|
1398
|
+
}
|
1399
|
+
|
1360
1400
|
goto value_start;
|
1361
1401
|
final_state:
|
1362
1402
|
if (finished_chunk) {
|
@@ -1367,6 +1407,10 @@ final_state:
|
|
1367
1407
|
// remaining values to be added to the chunk
|
1368
1408
|
AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
|
1369
1409
|
finished_chunk = AddRow(insert_chunk, column, error_message);
|
1410
|
+
SkipEmptyLines();
|
1411
|
+
if (context.client_data->max_line_length < position - line_start) {
|
1412
|
+
context.client_data->max_line_length = position - line_start;
|
1413
|
+
}
|
1370
1414
|
if (!error_message.empty()) {
|
1371
1415
|
return false;
|
1372
1416
|
}
|
@@ -1382,7 +1426,10 @@ final_state:
|
|
1382
1426
|
return true;
|
1383
1427
|
}
|
1384
1428
|
|
1385
|
-
bool BufferedCSVReader::ReadBuffer(idx_t &start) {
|
1429
|
+
bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
|
1430
|
+
if (start > buffer_size) {
|
1431
|
+
return false;
|
1432
|
+
}
|
1386
1433
|
auto old_buffer = std::move(buffer);
|
1387
1434
|
|
1388
1435
|
// the remaining part of the last buffer
|
@@ -1420,9 +1467,11 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start) {
|
|
1420
1467
|
if (!bom_checked) {
|
1421
1468
|
bom_checked = true;
|
1422
1469
|
if (read_count >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') {
|
1470
|
+
start += 3;
|
1423
1471
|
position += 3;
|
1424
1472
|
}
|
1425
1473
|
}
|
1474
|
+
line_start = start;
|
1426
1475
|
|
1427
1476
|
return read_count > 0;
|
1428
1477
|
}
|
@@ -4,8 +4,8 @@
|
|
4
4
|
namespace duckdb {
|
5
5
|
|
6
6
|
CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
|
7
|
-
idx_t &global_csv_current_position)
|
8
|
-
: context(context), first_buffer(true) {
|
7
|
+
idx_t &global_csv_current_position, idx_t file_number_p)
|
8
|
+
: context(context), first_buffer(true), file_number(file_number_p) {
|
9
9
|
this->handle = AllocateBuffer(buffer_size_p);
|
10
10
|
|
11
11
|
auto buffer = Ptr();
|
@@ -19,23 +19,23 @@ CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle
|
|
19
19
|
}
|
20
20
|
|
21
21
|
CSVBuffer::CSVBuffer(ClientContext &context, BufferHandle buffer_p, idx_t buffer_size_p, idx_t actual_size_p,
|
22
|
-
bool final_buffer, idx_t global_csv_current_position)
|
22
|
+
bool final_buffer, idx_t global_csv_current_position, idx_t file_number_p)
|
23
23
|
: context(context), handle(std::move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer),
|
24
|
-
global_csv_start(global_csv_current_position) {
|
24
|
+
global_csv_start(global_csv_current_position), file_number(file_number_p) {
|
25
25
|
}
|
26
26
|
|
27
|
-
unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_size,
|
28
|
-
idx_t
|
29
|
-
if (file_handle.FinishedReading()) {
|
30
|
-
// this was the last buffer
|
31
|
-
return nullptr;
|
32
|
-
}
|
33
|
-
|
27
|
+
unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t &global_csv_current_position,
|
28
|
+
idx_t file_number_p) {
|
34
29
|
auto next_buffer = AllocateBuffer(buffer_size);
|
35
30
|
idx_t next_buffer_actual_size = file_handle.Read(next_buffer.Ptr(), buffer_size);
|
31
|
+
if (next_buffer_actual_size == 0) {
|
32
|
+
// We are done reading
|
33
|
+
return nullptr;
|
34
|
+
}
|
36
35
|
|
37
|
-
auto next_csv_buffer =
|
38
|
-
|
36
|
+
auto next_csv_buffer =
|
37
|
+
make_uniq<CSVBuffer>(context, std::move(next_buffer), buffer_size, next_buffer_actual_size,
|
38
|
+
file_handle.FinishedReading(), global_csv_current_position, file_number_p);
|
39
39
|
global_csv_current_position += next_buffer_actual_size;
|
40
40
|
return next_csv_buffer;
|
41
41
|
}
|
@@ -65,4 +65,8 @@ idx_t CSVBuffer::GetCSVGlobalStart() {
|
|
65
65
|
return global_csv_start;
|
66
66
|
}
|
67
67
|
|
68
|
+
idx_t CSVBuffer::GetFileNumber() {
|
69
|
+
return file_number;
|
70
|
+
}
|
71
|
+
|
68
72
|
} // namespace duckdb
|
@@ -74,11 +74,6 @@ void BufferedCSVReaderOptions::SetEscape(const string &input) {
|
|
74
74
|
this->has_escape = true;
|
75
75
|
}
|
76
76
|
|
77
|
-
void BufferedCSVReaderOptions::SetParallel(bool use_parallel) {
|
78
|
-
this->has_parallel = true;
|
79
|
-
this->use_parallel = use_parallel;
|
80
|
-
}
|
81
|
-
|
82
77
|
void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
|
83
78
|
this->delimiter = StringUtil::Replace(input, "\\t", "\t");
|
84
79
|
this->has_delimiter = true;
|
@@ -126,8 +121,6 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
126
121
|
}
|
127
122
|
if (loption == "auto_detect") {
|
128
123
|
auto_detect = ParseBoolean(value, loption);
|
129
|
-
} else if (loption == "parallel") {
|
130
|
-
SetParallel(ParseBoolean(value, loption));
|
131
124
|
} else if (loption == "sample_size") {
|
132
125
|
int64_t sample_size = ParseInteger(value, loption);
|
133
126
|
if (sample_size < 1 && sample_size != -1) {
|