duckdb 0.6.2-dev735.0 → 0.6.2-dev758.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev735.0",
5
+ "version": "0.6.2-dev758.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -160,7 +160,6 @@ void BaseCSVReader::AddValue(string_t str_val, idx_t &column, vector<idx_t> &esc
160
160
  } else {
161
161
  row_empty = false;
162
162
  }
163
-
164
163
  if (!sql_types.empty() && column == sql_types.size() && length == 0) {
165
164
  // skip a single trailing delimiter in last column
166
165
  return;
@@ -249,7 +248,7 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
249
248
  return false;
250
249
  } else {
251
250
  throw InvalidInputException(
252
- "Error in file \"%s\" on line %s: expected %lld values per row, but got %d. (%s)",
251
+ "Error in file \"%s\" on line %s: expected %lld values per row, but got %d.\nParser options:\n%s",
253
252
  options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), sql_types.size(), column,
254
253
  options.ToString());
255
254
  }
@@ -309,7 +308,7 @@ void BaseCSVReader::VerifyUTF8(idx_t col_idx, idx_t row_idx, DataChunk &chunk, i
309
308
  int64_t error_line = linenr - (chunk.size() - row_idx) + 1 + offset;
310
309
  D_ASSERT(error_line >= 0);
311
310
  throw InvalidInputException("Error in file \"%s\" at line %llu in column \"%s\": "
312
- "%s. Parser options: %s",
311
+ "%s. Parser options:\n%s",
313
312
  options.file_path, error_line, col_name,
314
313
  ErrorManager::InvalidUnicodeError(s.GetString(), "CSV file"), options.ToString());
315
314
  }
@@ -332,29 +331,27 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
332
331
  // convert the columns in the parsed chunk to the types of the table
333
332
  insert_chunk.SetCardinality(parse_chunk);
334
333
  for (idx_t col_idx = 0; col_idx < sql_types.size(); col_idx++) {
335
- if (sql_types[col_idx].id() == LogicalTypeId::VARCHAR) {
334
+ auto insert_idx = insert_cols_idx[col_idx];
335
+ auto &type = sql_types[col_idx];
336
+ if (type.id() == LogicalTypeId::VARCHAR) {
336
337
  // target type is varchar: no need to convert
337
338
  // just test that all strings are valid utf-8 strings
338
339
  VerifyUTF8(col_idx);
339
- insert_chunk.data[insert_cols_idx[col_idx]].Reference(parse_chunk.data[col_idx]);
340
+ insert_chunk.data[insert_idx].Reference(parse_chunk.data[col_idx]);
340
341
  } else {
341
342
  string error_message;
342
343
  bool success;
343
- if (options.has_format[LogicalTypeId::DATE] && sql_types[col_idx].id() == LogicalTypeId::DATE) {
344
+ if (options.has_format[LogicalTypeId::DATE] && type.id() == LogicalTypeId::DATE) {
344
345
  // use the date format to cast the chunk
345
- success =
346
- TryCastDateVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_cols_idx[col_idx]],
347
- parse_chunk.size(), error_message);
348
- } else if (options.has_format[LogicalTypeId::TIMESTAMP] &&
349
- sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP) {
346
+ success = TryCastDateVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_idx],
347
+ parse_chunk.size(), error_message);
348
+ } else if (options.has_format[LogicalTypeId::TIMESTAMP] && type.id() == LogicalTypeId::TIMESTAMP) {
350
349
  // use the date format to cast the chunk
351
- success = TryCastTimestampVector(options, parse_chunk.data[col_idx],
352
- insert_chunk.data[insert_cols_idx[col_idx]], parse_chunk.size(),
353
- error_message);
350
+ success = TryCastTimestampVector(options, parse_chunk.data[col_idx], insert_chunk.data[insert_idx],
351
+ parse_chunk.size(), error_message);
354
352
  } else {
355
353
  // target type is not varchar: perform a cast
356
- success = VectorOperations::DefaultTryCast(parse_chunk.data[col_idx],
357
- insert_chunk.data[insert_cols_idx[col_idx]],
354
+ success = VectorOperations::DefaultTryCast(parse_chunk.data[col_idx], insert_chunk.data[insert_idx],
358
355
  parse_chunk.size(), &error_message);
359
356
  }
360
357
  if (success) {
@@ -385,13 +382,13 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
385
382
  auto error_line = linenr - (parse_chunk.size() - row_idx) + 1;
386
383
 
387
384
  if (options.auto_detect) {
388
- throw InvalidInputException("%s in column %s, at line %llu. Parser "
389
- "options: %s. Consider either increasing the sample size "
385
+ throw InvalidInputException("%s in column %s, at line %llu.\n\nParser "
386
+ "options:\n%s.\n\nConsider either increasing the sample size "
390
387
  "(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
391
388
  "or skipping column conversion (ALL_VARCHAR=1)",
392
389
  error_message, col_name, error_line, options.ToString());
393
390
  } else {
394
- throw InvalidInputException("%s at line %llu in column %s. Parser options: %s ", error_message,
391
+ throw InvalidInputException("%s at line %llu in column %s. Parser options:\n%s ", error_message,
395
392
  error_line, col_name, options.ToString());
396
393
  }
397
394
  }
@@ -851,10 +851,13 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
851
851
  // #######
852
852
  // type candidates, ordered by descending specificity (~ from high to low)
853
853
  vector<LogicalType> type_candidates = {
854
- LogicalType::VARCHAR, LogicalType::TIMESTAMP,
855
- LogicalType::DATE, LogicalType::TIME,
856
- LogicalType::DOUBLE, /* LogicalType::FLOAT,*/ LogicalType::BIGINT,
857
- LogicalType::INTEGER, /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN,
854
+ LogicalType::VARCHAR,
855
+ LogicalType::TIMESTAMP,
856
+ LogicalType::DATE,
857
+ LogicalType::TIME,
858
+ LogicalType::DOUBLE,
859
+ /* LogicalType::FLOAT,*/ LogicalType::BIGINT,
860
+ /*LogicalType::INTEGER,*/ /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN,
858
861
  LogicalType::SQLNULL};
859
862
  // format template candidates, ordered by descending specificity (~ from high to low)
860
863
  std::map<LogicalTypeId, vector<const char *>> format_template_candidates = {
@@ -251,13 +251,14 @@ bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value
251
251
  }
252
252
 
253
253
  std::string BufferedCSVReaderOptions::ToString() const {
254
- return "DELIMITER='" + delimiter + (has_delimiter ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
255
- ", QUOTE='" + quote + (has_quote ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
256
- ", ESCAPE='" + escape + (has_escape ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
257
- ", HEADER=" + std::to_string(header) +
254
+ return " file=" + file_path + "\n delimiter='" + delimiter +
255
+ (has_delimiter ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + "\n quote='" + quote +
256
+ (has_quote ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) + "\n escape='" + escape +
257
+ (has_escape ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
258
+ "\n header=" + std::to_string(header) +
258
259
  (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
259
- ", SAMPLE_SIZE=" + std::to_string(sample_chunk_size * sample_chunks) +
260
- ", IGNORE_ERRORS=" + std::to_string(ignore_errors) + ", ALL_VARCHAR=" + std::to_string(all_varchar);
260
+ "\n sample_size=" + std::to_string(sample_chunk_size * sample_chunks) +
261
+ "\n ignore_erros=" + std::to_string(ignore_errors) + "\n all_varchar=" + std::to_string(all_varchar);
261
262
  }
262
263
 
263
264
  } // namespace duckdb
@@ -271,6 +271,24 @@ public:
271
271
  atomic<idx_t> bytes_read;
272
272
  //! Size of current file
273
273
  idx_t file_size;
274
+ //! The index of the next file to read (i.e. current file + 1)
275
+ idx_t file_index = 1;
276
+
277
+ double GetProgress(ReadCSVData &bind_data) const {
278
+ idx_t total_files = bind_data.files.size();
279
+
280
+ // get the progress WITHIN the current file
281
+ double progress;
282
+ if (file_size == 0) {
283
+ progress = 1.0;
284
+ } else {
285
+ progress = double(bytes_read) / double(file_size);
286
+ }
287
+ // now get the total percentage of files read
288
+ double percentage = double(file_index) / total_files;
289
+ percentage += (double(1) / double(total_files)) * progress;
290
+ return percentage * 100;
291
+ }
274
292
 
275
293
  private:
276
294
  //! File Handle for current file
@@ -278,8 +296,6 @@ private:
278
296
 
279
297
  shared_ptr<CSVBuffer> current_buffer;
280
298
  shared_ptr<CSVBuffer> next_buffer;
281
- //! The index of the next file to read (i.e. current file + 1)
282
- idx_t file_index = 1;
283
299
 
284
300
  //! Mutex to lock when getting next batch of bytes (Parallel Only)
285
301
  mutex main_mutex;
@@ -348,6 +364,7 @@ unique_ptr<CSVBufferRead> ParallelCSVGlobalState::Next(ClientContext &context, R
348
364
  }
349
365
  return result;
350
366
  }
367
+
351
368
  static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext &context,
352
369
  TableFunctionInitInput &input) {
353
370
  auto &bind_data = (ReadCSVData &)*input.bind_data;
@@ -359,7 +376,6 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
359
376
 
360
377
  bind_data.options.file_path = bind_data.files[0];
361
378
  file_handle = ReadCSV::OpenCSV(bind_data.options, context);
362
-
363
379
  idx_t rows_to_skip = bind_data.options.skip_rows + (bind_data.options.has_header ? 1 : 0);
364
380
  return make_unique<ParallelCSVGlobalState>(context, move(file_handle), bind_data.files,
365
381
  context.db->NumberOfThreads(), bind_data.options.buffer_size,
@@ -379,12 +395,9 @@ public:
379
395
  CSVBufferRead previous_buffer;
380
396
  };
381
397
 
382
- unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
383
- GlobalTableFunctionState *global_state_p) {
398
+ unique_ptr<LocalTableFunctionState> ParallelReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
399
+ GlobalTableFunctionState *global_state_p) {
384
400
  auto &csv_data = (ReadCSVData &)*input.bind_data;
385
- if (csv_data.single_threaded) {
386
- return nullptr;
387
- }
388
401
  auto &global_state = (ParallelCSVGlobalState &)*global_state_p;
389
402
  auto next_local_buffer = global_state.Next(context.client, csv_data);
390
403
  unique_ptr<ParallelCSVReader> csv_reader;
@@ -416,7 +429,6 @@ static void ParallelReadCSVFunction(ClientContext &context, TableFunctionInput &
416
429
  if (!next_chunk) {
417
430
  break;
418
431
  }
419
- // csv_local_state.previous_buffer = csv_local_state.csv_reader->buffer;
420
432
  csv_local_state.csv_reader->SetBufferRead(move(next_chunk));
421
433
  }
422
434
  csv_local_state.csv_reader->ParseCSV(output);
@@ -434,91 +446,172 @@ static void ParallelReadCSVFunction(ClientContext &context, TableFunctionInput &
434
446
  }
435
447
  }
436
448
 
437
- static idx_t CSVReaderGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
438
- LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
439
- auto &bind_data = (ReadCSVData &)*bind_data_p;
440
- if (bind_data.single_threaded) {
441
- return 0;
442
- }
443
- auto &data = (ParallelCSVLocalState &)*local_state;
444
- return data.csv_reader->buffer->batch_index;
445
- }
446
-
447
449
  //===--------------------------------------------------------------------===//
448
450
  // Single-Threaded CSV Reader
449
451
  //===--------------------------------------------------------------------===//
450
452
  struct SingleThreadedCSVState : public GlobalTableFunctionState {
451
- //! The CSV reader
452
- unique_ptr<BufferedCSVReader> csv_reader;
453
+ explicit SingleThreadedCSVState(idx_t total_files) : total_files(total_files), next_file(0), progress_in_files(0) {
454
+ }
455
+
456
+ mutex csv_lock;
457
+ unique_ptr<BufferedCSVReader> initial_reader;
458
+ //! The total number of files to read from
459
+ idx_t total_files;
453
460
  //! The index of the next file to read (i.e. current file + 1)
454
- idx_t file_index;
455
- //! Total File Size
456
- idx_t file_size;
457
- //! How many bytes were read up to this point
458
- atomic<idx_t> bytes_read;
461
+ atomic<idx_t> next_file;
462
+ //! How far along we are in reading the current set of open files
463
+ //! This goes from [0...next_file] * 100
464
+ atomic<idx_t> progress_in_files;
465
+ //! The set of SQL types
466
+ vector<LogicalType> sql_types;
459
467
 
460
468
  idx_t MaxThreads() const override {
461
- return 1;
469
+ return total_files;
470
+ }
471
+
472
+ double GetProgress(ReadCSVData &bind_data) const {
473
+ D_ASSERT(total_files == bind_data.files.size());
474
+ D_ASSERT(progress_in_files <= total_files * 100);
475
+ return (double(progress_in_files) / double(total_files));
476
+ }
477
+
478
+ unique_ptr<BufferedCSVReader> GetCSVReader(ClientContext &context, ReadCSVData &bind_data, idx_t &file_index,
479
+ idx_t &total_size) {
480
+ BufferedCSVReaderOptions options;
481
+ {
482
+ lock_guard<mutex> l(csv_lock);
483
+ if (initial_reader) {
484
+ return move(initial_reader);
485
+ }
486
+ if (next_file >= total_files) {
487
+ return nullptr;
488
+ }
489
+ options = bind_data.options;
490
+ file_index = next_file;
491
+ next_file++;
492
+ }
493
+ // reuse csv_readers was created during binding
494
+ unique_ptr<BufferedCSVReader> result;
495
+ if (options.union_by_name) {
496
+ result = move(bind_data.union_readers[file_index]);
497
+ } else {
498
+ options.file_path = bind_data.files[file_index];
499
+ result = make_unique<BufferedCSVReader>(context, move(options), sql_types);
500
+ }
501
+ total_size = result->file_handle->FileSize();
502
+ return result;
503
+ }
504
+ };
505
+
506
+ struct SingleThreadedCSVLocalState : public LocalTableFunctionState {
507
+ public:
508
+ explicit SingleThreadedCSVLocalState() : bytes_read(0), total_size(0), current_progress(0), file_index(0) {
462
509
  }
510
+
511
+ //! The CSV reader
512
+ unique_ptr<BufferedCSVReader> csv_reader;
513
+ //! The current amount of bytes read by this reader
514
+ idx_t bytes_read;
515
+ //! The total amount of bytes in the file
516
+ idx_t total_size;
517
+ //! The current progress from 0..100
518
+ idx_t current_progress;
519
+ //! The file index of this reader
520
+ idx_t file_index;
463
521
  };
464
522
 
465
523
  static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext &context,
466
524
  TableFunctionInitInput &input) {
467
525
  auto &bind_data = (ReadCSVData &)*input.bind_data;
468
- auto result = make_unique<SingleThreadedCSVState>();
526
+ auto result = make_unique<SingleThreadedCSVState>(bind_data.files.size());
469
527
  if (bind_data.initial_reader) {
470
- result->csv_reader = move(bind_data.initial_reader);
528
+ result->initial_reader = move(bind_data.initial_reader);
471
529
  } else if (bind_data.files.empty()) {
472
530
  // This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
473
531
  return move(result);
474
532
  } else {
475
533
  bind_data.options.file_path = bind_data.files[0];
476
- result->csv_reader = make_unique<BufferedCSVReader>(context, bind_data.options, bind_data.sql_types);
534
+ result->initial_reader = make_unique<BufferedCSVReader>(context, bind_data.options, bind_data.sql_types);
535
+ if (bind_data.options.auto_detect) {
536
+ bind_data.options = result->initial_reader->options;
537
+ }
538
+ }
539
+ if (!bind_data.options.union_by_name) {
540
+ // if we are reading multiple files - run auto-detect only on the first file
541
+ // UNLESS union by name is turned on - in that case we assume that different files have different schemas
542
+ // as such, we need to re-run the auto detection on each file
543
+ bind_data.options.auto_detect = false;
544
+ }
545
+ result->next_file = 1;
546
+ if (result->initial_reader) {
547
+ result->sql_types = result->initial_reader->sql_types;
477
548
  }
478
- result->file_size = result->csv_reader->file_handle->FileSize();
479
- result->file_index = 1;
549
+ return move(result);
550
+ }
551
+
552
+ unique_ptr<LocalTableFunctionState> SingleThreadedReadCSVInitLocal(ExecutionContext &context,
553
+ TableFunctionInitInput &input,
554
+ GlobalTableFunctionState *global_state_p) {
555
+ auto &bind_data = (ReadCSVData &)*input.bind_data;
556
+ auto &data = (SingleThreadedCSVState &)*global_state_p;
557
+ auto result = make_unique<SingleThreadedCSVLocalState>();
558
+ result->csv_reader = data.GetCSVReader(context.client, bind_data, result->file_index, result->total_size);
480
559
  return move(result);
481
560
  }
482
561
 
483
562
  static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
484
563
  auto &bind_data = (ReadCSVData &)*data_p.bind_data;
485
564
  auto &data = (SingleThreadedCSVState &)*data_p.global_state;
486
-
487
- if (!data.csv_reader) {
565
+ auto &lstate = (SingleThreadedCSVLocalState &)*data_p.local_state;
566
+ if (!lstate.csv_reader) {
488
567
  // no csv_reader was set, this can happen when a filename-based filter has filtered out all possible files
489
568
  return;
490
569
  }
491
570
 
492
571
  do {
493
- data.csv_reader->ParseCSV(output);
494
- data.bytes_read = data.csv_reader->bytes_in_chunk;
495
- if (output.size() == 0 && data.file_index < bind_data.files.size()) {
496
- // exhausted this file, but we have more files we can read
497
- // open the next file and increment the counter
498
- bind_data.options.file_path = bind_data.files[data.file_index];
499
- // reuse csv_readers was created during binding
500
- if (bind_data.options.union_by_name) {
501
- data.csv_reader = move(bind_data.union_readers[data.file_index]);
502
- } else {
503
- data.csv_reader =
504
- make_unique<BufferedCSVReader>(context, bind_data.options, data.csv_reader->sql_types);
572
+ lstate.csv_reader->ParseCSV(output);
573
+ // update the number of bytes read
574
+ D_ASSERT(lstate.bytes_read <= lstate.csv_reader->bytes_in_chunk);
575
+ auto bytes_read = MinValue<idx_t>(lstate.total_size, lstate.csv_reader->bytes_in_chunk);
576
+ auto current_progress = lstate.total_size == 0 ? 100 : 100 * bytes_read / lstate.total_size;
577
+ if (current_progress > lstate.current_progress) {
578
+ if (current_progress > 100) {
579
+ throw InternalException("Progress should never exceed 100");
580
+ }
581
+ data.progress_in_files += current_progress - lstate.current_progress;
582
+ lstate.current_progress = current_progress;
583
+ }
584
+ if (output.size() == 0) {
585
+ // exhausted this file, but we might have more files we can read
586
+ auto csv_reader = data.GetCSVReader(context, bind_data, lstate.file_index, lstate.total_size);
587
+ // add any left-over progress for this file to the progress bar
588
+ if (lstate.current_progress < 100) {
589
+ data.progress_in_files += 100 - lstate.current_progress;
590
+ }
591
+ // reset the current progress
592
+ lstate.current_progress = 0;
593
+ lstate.bytes_read = 0;
594
+ lstate.csv_reader = move(csv_reader);
595
+ if (!lstate.csv_reader) {
596
+ // no more files - we are done
597
+ return;
505
598
  }
506
- data.file_index++;
599
+ lstate.bytes_read = 0;
507
600
  } else {
508
601
  break;
509
602
  }
510
603
  } while (true);
511
604
 
512
605
  if (bind_data.options.union_by_name) {
513
- data.csv_reader->SetNullUnionCols(output);
606
+ lstate.csv_reader->SetNullUnionCols(output);
514
607
  }
515
608
  if (bind_data.options.include_file_name) {
516
609
  auto &col = output.data[bind_data.filename_col_idx];
517
- col.SetValue(0, Value(data.csv_reader->options.file_path));
610
+ col.SetValue(0, Value(lstate.csv_reader->options.file_path));
518
611
  col.SetVectorType(VectorType::CONSTANT_VECTOR);
519
612
  }
520
613
  if (bind_data.options.include_parsed_hive_partitions) {
521
- auto partitions = HivePartitioning::Parse(data.csv_reader->options.file_path);
614
+ auto partitions = HivePartitioning::Parse(lstate.csv_reader->options.file_path);
522
615
 
523
616
  idx_t i = bind_data.hive_partition_col_idx;
524
617
 
@@ -531,7 +624,7 @@ static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput
531
624
  for (auto &part : partitions) {
532
625
  if (bind_data.options.names[i] != part.first) {
533
626
  throw IOException("Hive partition names mismatch, expected '" + bind_data.options.names[i] +
534
- "' but found '" + part.first + "' for file '" + data.csv_reader->options.file_path +
627
+ "' but found '" + part.first + "' for file '" + lstate.csv_reader->options.file_path +
535
628
  "'");
536
629
  }
537
630
  auto &col = output.data[i++];
@@ -553,6 +646,16 @@ static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &con
553
646
  }
554
647
  }
555
648
 
649
+ unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
650
+ GlobalTableFunctionState *global_state_p) {
651
+ auto &csv_data = (ReadCSVData &)*input.bind_data;
652
+ if (csv_data.single_threaded) {
653
+ return SingleThreadedReadCSVInitLocal(context, input, global_state_p);
654
+ } else {
655
+ return ParallelReadCSVInitLocal(context, input, global_state_p);
656
+ }
657
+ }
658
+
556
659
  static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
557
660
  auto &bind_data = (ReadCSVData &)*data_p.bind_data;
558
661
  if (bind_data.single_threaded) {
@@ -562,6 +665,17 @@ static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p,
562
665
  }
563
666
  }
564
667
 
668
+ static idx_t CSVReaderGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
669
+ LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
670
+ auto &bind_data = (ReadCSVData &)*bind_data_p;
671
+ if (bind_data.single_threaded) {
672
+ auto &data = (SingleThreadedCSVLocalState &)*local_state;
673
+ return data.file_index;
674
+ }
675
+ auto &data = (ParallelCSVLocalState &)*local_state;
676
+ return data.csv_reader->buffer->batch_index;
677
+ }
678
+
565
679
  static void ReadCSVAddNamedParameters(TableFunction &table_function) {
566
680
  table_function.named_parameters["sep"] = LogicalType::VARCHAR;
567
681
  table_function.named_parameters["delim"] = LogicalType::VARCHAR;
@@ -592,21 +706,13 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
592
706
  double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
593
707
  const GlobalTableFunctionState *global_state) {
594
708
  auto &bind_data = (ReadCSVData &)*bind_data_p;
595
- idx_t file_size, bytes_read;
596
709
  if (bind_data.single_threaded) {
597
- auto &data = (const SingleThreadedCSVState &)*global_state;
598
- file_size = data.file_size;
599
- bytes_read = data.bytes_read;
710
+ auto &data = (SingleThreadedCSVState &)*global_state;
711
+ return data.GetProgress(bind_data);
600
712
  } else {
601
713
  auto &data = (const ParallelCSVGlobalState &)*global_state;
602
- file_size = data.file_size;
603
- bytes_read = data.bytes_read;
604
- }
605
- if (file_size == 0) {
606
- return 100;
714
+ return data.GetProgress(bind_data);
607
715
  }
608
- auto percentage = (bytes_read * 100.0) / file_size;
609
- return percentage;
610
716
  }
611
717
 
612
718
  void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev735"
2
+ #define DUCKDB_VERSION "0.6.2-dev758"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "b85fb31ebf"
5
+ #define DUCKDB_SOURCE_ID "cd29769dcd"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -41,6 +41,11 @@ public:
41
41
  }
42
42
  return nullptr;
43
43
  }
44
+
45
+ bool IsEmpty() {
46
+ return head_count == 0 && get_count == 0 && put_count == 0 && post_count == 0 && total_bytes_received == 0 &&
47
+ total_bytes_sent == 0;
48
+ }
44
49
  };
45
50
 
46
51
  } // namespace duckdb
@@ -377,7 +377,7 @@ void QueryProfiler::QueryTreeToStream(std::ostream &ss) const {
377
377
  return;
378
378
  }
379
379
 
380
- if (context.client_data->http_stats) {
380
+ if (context.client_data->http_stats && !context.client_data->http_stats->IsEmpty()) {
381
381
  string read =
382
382
  "in: " + StringUtil::BytesToHumanReadableString(context.client_data->http_stats->total_bytes_received);
383
383
  string written =