duckdb 0.8.2-dev4572.0 → 0.8.2-dev4653.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/buffered_json_reader.cpp +8 -8
  3. package/src/duckdb/extension/json/json_functions/read_json.cpp +6 -2
  4. package/src/duckdb/extension/json/json_scan.cpp +4 -6
  5. package/src/duckdb/src/common/enum_util.cpp +24 -0
  6. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +213 -2
  7. package/src/duckdb/src/function/table/read_csv.cpp +3 -130
  8. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  9. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +1 -1
  10. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  11. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +24 -0
  12. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -1
  13. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +10 -4
  14. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +3 -3
  15. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
  16. package/src/duckdb/src/include/duckdb.h +3 -3
  17. package/src/duckdb/src/main/connection.cpp +4 -6
  18. package/src/duckdb/src/main/extension/extension_install.cpp +2 -1
  19. package/src/duckdb/src/main/relation/read_csv_relation.cpp +28 -9
  20. package/src/duckdb/src/main/relation/table_function_relation.cpp +8 -2
  21. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -3
  22. package/src/duckdb/src/storage/table/table_statistics.cpp +1 -3
  23. package/src/duckdb/src/storage/wal_replay.cpp +8 -2
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev4572.0",
5
+ "version": "0.8.2-dev4653.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -1,9 +1,8 @@
1
1
  #include "buffered_json_reader.hpp"
2
2
 
3
3
  #include "duckdb/common/file_opener.hpp"
4
- #include "duckdb/common/printer.hpp"
5
- #include "duckdb/common/serializer/serializer.hpp"
6
4
  #include "duckdb/common/serializer/deserializer.hpp"
5
+ #include "duckdb/common/serializer/serializer.hpp"
7
6
 
8
7
  #include <utility>
9
8
 
@@ -24,7 +23,7 @@ bool JSONFileHandle::IsOpen() const {
24
23
  }
25
24
 
26
25
  void JSONFileHandle::Close() {
27
- if (IsOpen()) {
26
+ if (IsOpen() && plain_file_source) {
28
27
  file_handle->Close();
29
28
  file_handle = nullptr;
30
29
  }
@@ -174,12 +173,13 @@ BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReade
174
173
  }
175
174
 
176
175
  void BufferedJSONReader::OpenJSONFile() {
177
- D_ASSERT(!IsOpen());
178
176
  lock_guard<mutex> guard(lock);
179
- auto &file_system = FileSystem::GetFileSystem(context);
180
- auto regular_file_handle =
181
- file_system.OpenFile(file_name.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, options.compression);
182
- file_handle = make_uniq<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
177
+ if (!IsOpen()) {
178
+ auto &file_system = FileSystem::GetFileSystem(context);
179
+ auto regular_file_handle = file_system.OpenFile(file_name.c_str(), FileFlags::FILE_FLAGS_READ,
180
+ FileLockType::NO_LOCK, options.compression);
181
+ file_handle = make_uniq<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
182
+ }
183
183
  Reset();
184
184
  }
185
185
 
@@ -17,6 +17,7 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
17
17
  Vector string_vector(LogicalType::VARCHAR);
18
18
 
19
19
  // Loop through the files (if union_by_name, else just sample the first file)
20
+ idx_t remaining = bind_data.sample_size;
20
21
  for (idx_t file_idx = 0; file_idx < bind_data.files.size(); file_idx++) {
21
22
  // Create global/local state and place the reader in the right field
22
23
  JSONScanGlobalState gstate(context, bind_data);
@@ -28,7 +29,6 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
28
29
  }
29
30
 
30
31
  // Read and detect schema
31
- idx_t remaining = bind_data.sample_size;
32
32
  while (remaining != 0) {
33
33
  allocator.Reset();
34
34
  auto read_count = lstate.ReadNext(gstate);
@@ -56,7 +56,11 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
56
56
  }
57
57
 
58
58
  // Close the file and stop detection if not union_by_name
59
- if (!bind_data.options.file_options.union_by_name) {
59
+ if (bind_data.options.file_options.union_by_name) {
60
+ // When union_by_name=true we sample sample_size per file
61
+ remaining = bind_data.sample_size;
62
+ } else if (remaining == 0) {
63
+ // When union_by_name=false, we sample sample_size in total (across the first files)
60
64
  break;
61
65
  }
62
66
  }
@@ -2,11 +2,11 @@
2
2
 
3
3
  #include "duckdb/common/enum_util.hpp"
4
4
  #include "duckdb/common/multi_file_reader.hpp"
5
+ #include "duckdb/common/serializer/deserializer.hpp"
6
+ #include "duckdb/common/serializer/serializer.hpp"
5
7
  #include "duckdb/main/extension_helper.hpp"
6
8
  #include "duckdb/parallel/task_scheduler.hpp"
7
9
  #include "duckdb/storage/buffer_manager.hpp"
8
- #include "duckdb/common/serializer/serializer.hpp"
9
- #include "duckdb/common/serializer/deserializer.hpp"
10
10
 
11
11
  namespace duckdb {
12
12
 
@@ -558,10 +558,8 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
558
558
  if (current_reader) {
559
559
  // If we performed the final read of this reader in the previous iteration, close it now
560
560
  if (is_last) {
561
- if (gstate.bind_data.type != JSONScanType::SAMPLE) {
562
- TryIncrementFileIndex(gstate);
563
- current_reader->CloseJSONFile();
564
- }
561
+ TryIncrementFileIndex(gstate);
562
+ current_reader->CloseJSONFile();
565
563
  current_reader = nullptr;
566
564
  continue;
567
565
  }
@@ -11,6 +11,7 @@
11
11
 
12
12
  #include "duckdb/common/enum_util.hpp"
13
13
  #include "duckdb/catalog/catalog_entry/table_column_type.hpp"
14
+ #include "duckdb/common/box_renderer.hpp"
14
15
  #include "duckdb/common/enums/access_mode.hpp"
15
16
  #include "duckdb/common/enums/aggregate_handling.hpp"
16
17
  #include "duckdb/common/enums/catalog_type.hpp"
@@ -4797,6 +4798,29 @@ RelationType EnumUtil::FromString<RelationType>(const char *value) {
4797
4798
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
4798
4799
  }
4799
4800
 
4801
+ template<>
4802
+ const char* EnumUtil::ToChars<RenderMode>(RenderMode value) {
4803
+ switch(value) {
4804
+ case RenderMode::ROWS:
4805
+ return "ROWS";
4806
+ case RenderMode::COLUMNS:
4807
+ return "COLUMNS";
4808
+ default:
4809
+ throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
4810
+ }
4811
+ }
4812
+
4813
+ template<>
4814
+ RenderMode EnumUtil::FromString<RenderMode>(const char *value) {
4815
+ if (StringUtil::Equals(value, "ROWS")) {
4816
+ return RenderMode::ROWS;
4817
+ }
4818
+ if (StringUtil::Equals(value, "COLUMNS")) {
4819
+ return RenderMode::COLUMNS;
4820
+ }
4821
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
4822
+ }
4823
+
4800
4824
  template<>
4801
4825
  const char* EnumUtil::ToChars<ResultModifierType>(ResultModifierType value) {
4802
4826
  switch(value) {
@@ -2,6 +2,8 @@
2
2
  #include "duckdb/common/bind_helpers.hpp"
3
3
  #include "duckdb/common/vector_size.hpp"
4
4
  #include "duckdb/common/string_util.hpp"
5
+ #include "duckdb/common/enum_util.hpp"
6
+ #include "duckdb/common/multi_file_reader.hpp"
5
7
 
6
8
  namespace duckdb {
7
9
 
@@ -60,6 +62,10 @@ static int64_t ParseInteger(const Value &value, const string &loption) {
60
62
  return value.GetValue<int64_t>();
61
63
  }
62
64
 
65
+ bool CSVReaderOptions::GetHeader() const {
66
+ return this->dialect_options.header;
67
+ }
68
+
63
69
  void CSVReaderOptions::SetHeader(bool input) {
64
70
  this->dialect_options.header = input;
65
71
  this->has_header = true;
@@ -69,6 +75,10 @@ void CSVReaderOptions::SetCompression(const string &compression_p) {
69
75
  this->compression = FileCompressionTypeFromString(compression_p);
70
76
  }
71
77
 
78
+ string CSVReaderOptions::GetEscape() const {
79
+ return std::string(1, this->dialect_options.state_machine_options.escape);
80
+ }
81
+
72
82
  void CSVReaderOptions::SetEscape(const string &input) {
73
83
  auto escape_str = input;
74
84
  if (escape_str.size() > 1) {
@@ -81,6 +91,19 @@ void CSVReaderOptions::SetEscape(const string &input) {
81
91
  this->has_escape = true;
82
92
  }
83
93
 
94
+ int64_t CSVReaderOptions::GetSkipRows() const {
95
+ return this->dialect_options.skip_rows;
96
+ }
97
+
98
+ void CSVReaderOptions::SetSkipRows(int64_t skip_rows) {
99
+ dialect_options.skip_rows = skip_rows;
100
+ skip_rows_set = true;
101
+ }
102
+
103
+ string CSVReaderOptions::GetDelimiter() const {
104
+ return std::string(1, this->dialect_options.state_machine_options.delimiter);
105
+ }
106
+
84
107
  void CSVReaderOptions::SetDelimiter(const string &input) {
85
108
  auto delim_str = StringUtil::Replace(input, "\\t", "\t");
86
109
  if (delim_str.size() > 1) {
@@ -93,6 +116,10 @@ void CSVReaderOptions::SetDelimiter(const string &input) {
93
116
  this->dialect_options.state_machine_options.delimiter = delim_str[0];
94
117
  }
95
118
 
119
+ string CSVReaderOptions::GetQuote() const {
120
+ return std::string(1, this->dialect_options.state_machine_options.quote);
121
+ }
122
+
96
123
  void CSVReaderOptions::SetQuote(const string &quote_p) {
97
124
  auto quote_str = quote_p;
98
125
  if (quote_str.size() > 1) {
@@ -105,6 +132,10 @@ void CSVReaderOptions::SetQuote(const string &quote_p) {
105
132
  this->has_quote = true;
106
133
  }
107
134
 
135
+ NewLineIdentifier CSVReaderOptions::GetNewline() const {
136
+ return dialect_options.new_line;
137
+ }
138
+
108
139
  void CSVReaderOptions::SetNewline(const string &input) {
109
140
  if (input == "\\n" || input == "\\r") {
110
141
  dialect_options.new_line = NewLineIdentifier::SINGLE;
@@ -152,8 +183,7 @@ void CSVReaderOptions::SetReadOption(const string &loption, const Value &value,
152
183
  sample_chunks = sample_size / STANDARD_VECTOR_SIZE + 1;
153
184
  }
154
185
  } else if (loption == "skip") {
155
- dialect_options.skip_rows = ParseInteger(value, loption);
156
- skip_rows_set = true;
186
+ SetSkipRows(ParseInteger(value, loption));
157
187
  } else if (loption == "max_line_size" || loption == "maximum_line_size") {
158
188
  maximum_line_size = ParseInteger(value, loption);
159
189
  } else if (loption == "sample_chunk_size") {
@@ -296,4 +326,185 @@ string CSVReaderOptions::ToString() const {
296
326
  "\n ignore_errors=" + std::to_string(ignore_errors) + "\n all_varchar=" + std::to_string(all_varchar);
297
327
  }
298
328
 
329
+ static Value StringVectorToValue(const vector<string> &vec) {
330
+ vector<Value> content;
331
+ content.reserve(vec.size());
332
+ for (auto &item : vec) {
333
+ content.push_back(Value(item));
334
+ }
335
+ return Value::LIST(std::move(content));
336
+ }
337
+
338
+ static uint8_t GetCandidateSpecificity(const LogicalType &candidate_type) {
339
+ //! Const ht with accepted auto_types and their weights in specificity
340
+ const duckdb::unordered_map<uint8_t, uint8_t> auto_type_candidates_specificity {
341
+ {(uint8_t)LogicalTypeId::VARCHAR, 0}, {(uint8_t)LogicalTypeId::TIMESTAMP, 1},
342
+ {(uint8_t)LogicalTypeId::DATE, 2}, {(uint8_t)LogicalTypeId::TIME, 3},
343
+ {(uint8_t)LogicalTypeId::DOUBLE, 4}, {(uint8_t)LogicalTypeId::FLOAT, 5},
344
+ {(uint8_t)LogicalTypeId::BIGINT, 6}, {(uint8_t)LogicalTypeId::INTEGER, 7},
345
+ {(uint8_t)LogicalTypeId::SMALLINT, 8}, {(uint8_t)LogicalTypeId::TINYINT, 9},
346
+ {(uint8_t)LogicalTypeId::BOOLEAN, 10}, {(uint8_t)LogicalTypeId::SQLNULL, 11}};
347
+
348
+ auto id = (uint8_t)candidate_type.id();
349
+ auto it = auto_type_candidates_specificity.find(id);
350
+ if (it == auto_type_candidates_specificity.end()) {
351
+ throw BinderException("Auto Type Candidate of type %s is not accepted as a valid input",
352
+ EnumUtil::ToString(candidate_type.id()));
353
+ }
354
+ return it->second;
355
+ }
356
+
357
+ void CSVReaderOptions::FromNamedParameters(named_parameter_map_t &in, ClientContext &context,
358
+ vector<LogicalType> &return_types, vector<string> &names) {
359
+ for (auto &kv : in) {
360
+ if (MultiFileReader::ParseOption(kv.first, kv.second, file_options, context)) {
361
+ continue;
362
+ }
363
+ auto loption = StringUtil::Lower(kv.first);
364
+ if (loption == "columns") {
365
+ explicitly_set_columns = true;
366
+ auto &child_type = kv.second.type();
367
+ if (child_type.id() != LogicalTypeId::STRUCT) {
368
+ throw BinderException("read_csv columns requires a struct as input");
369
+ }
370
+ auto &struct_children = StructValue::GetChildren(kv.second);
371
+ D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
372
+ for (idx_t i = 0; i < struct_children.size(); i++) {
373
+ auto &name = StructType::GetChildName(child_type, i);
374
+ auto &val = struct_children[i];
375
+ names.push_back(name);
376
+ if (val.type().id() != LogicalTypeId::VARCHAR) {
377
+ throw BinderException("read_csv requires a type specification as string");
378
+ }
379
+ return_types.emplace_back(TransformStringToLogicalType(StringValue::Get(val), context));
380
+ }
381
+ if (names.empty()) {
382
+ throw BinderException("read_csv requires at least a single column as input!");
383
+ }
384
+ } else if (loption == "auto_type_candidates") {
385
+ auto_type_candidates.clear();
386
+ map<uint8_t, LogicalType> candidate_types;
387
+ // We always have the extremes of Null and Varchar, so we can default to varchar if the
388
+ // sniffer is not able to confidently detect that column type
389
+ candidate_types[GetCandidateSpecificity(LogicalType::VARCHAR)] = LogicalType::VARCHAR;
390
+ candidate_types[GetCandidateSpecificity(LogicalType::SQLNULL)] = LogicalType::SQLNULL;
391
+
392
+ auto &child_type = kv.second.type();
393
+ if (child_type.id() != LogicalTypeId::LIST) {
394
+ throw BinderException("read_csv auto_types requires a list as input");
395
+ }
396
+ auto &list_children = ListValue::GetChildren(kv.second);
397
+ if (list_children.empty()) {
398
+ throw BinderException("auto_type_candidates requires at least one type");
399
+ }
400
+ for (auto &child : list_children) {
401
+ if (child.type().id() != LogicalTypeId::VARCHAR) {
402
+ throw BinderException("auto_type_candidates requires a type specification as string");
403
+ }
404
+ auto candidate_type = TransformStringToLogicalType(StringValue::Get(child), context);
405
+ candidate_types[GetCandidateSpecificity(candidate_type)] = candidate_type;
406
+ }
407
+ for (auto &candidate_type : candidate_types) {
408
+ auto_type_candidates.emplace_back(candidate_type.second);
409
+ }
410
+ } else if (loption == "column_names" || loption == "names") {
411
+ if (!name_list.empty()) {
412
+ throw BinderException("read_csv_auto column_names/names can only be supplied once");
413
+ }
414
+ if (kv.second.IsNull()) {
415
+ throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
416
+ }
417
+ auto &children = ListValue::GetChildren(kv.second);
418
+ for (auto &child : children) {
419
+ name_list.push_back(StringValue::Get(child));
420
+ }
421
+ } else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
422
+ auto &child_type = kv.second.type();
423
+ if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
424
+ throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
425
+ }
426
+ if (!sql_type_list.empty()) {
427
+ throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
428
+ }
429
+ vector<string> sql_type_names;
430
+ if (child_type.id() == LogicalTypeId::STRUCT) {
431
+ auto &struct_children = StructValue::GetChildren(kv.second);
432
+ D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
433
+ for (idx_t i = 0; i < struct_children.size(); i++) {
434
+ auto &name = StructType::GetChildName(child_type, i);
435
+ auto &val = struct_children[i];
436
+ if (val.type().id() != LogicalTypeId::VARCHAR) {
437
+ throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
438
+ }
439
+ sql_type_names.push_back(StringValue::Get(val));
440
+ sql_types_per_column[name] = i;
441
+ }
442
+ } else {
443
+ auto &list_child = ListType::GetChildType(child_type);
444
+ if (list_child.id() != LogicalTypeId::VARCHAR) {
445
+ throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
446
+ }
447
+ auto &children = ListValue::GetChildren(kv.second);
448
+ for (auto &child : children) {
449
+ sql_type_names.push_back(StringValue::Get(child));
450
+ }
451
+ }
452
+ sql_type_list.reserve(sql_type_names.size());
453
+ for (auto &sql_type : sql_type_names) {
454
+ auto def_type = TransformStringToLogicalType(sql_type);
455
+ if (def_type.id() == LogicalTypeId::USER) {
456
+ throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
457
+ kv.first);
458
+ }
459
+ sql_type_list.push_back(std::move(def_type));
460
+ }
461
+ } else if (loption == "all_varchar") {
462
+ all_varchar = BooleanValue::Get(kv.second);
463
+ } else if (loption == "normalize_names") {
464
+ normalize_names = BooleanValue::Get(kv.second);
465
+ } else {
466
+ SetReadOption(loption, kv.second, names);
467
+ }
468
+ }
469
+ }
470
+
471
+ //! This function is used to remember options set by the sniffer, for use in ReadCSVRelation
472
+ void CSVReaderOptions::ToNamedParameters(named_parameter_map_t &named_params) {
473
+ if (has_delimiter) {
474
+ named_params["delim"] = Value(GetDelimiter());
475
+ }
476
+ if (has_newline) {
477
+ named_params["newline"] = Value(EnumUtil::ToString(GetNewline()));
478
+ }
479
+ if (has_quote) {
480
+ named_params["quote"] = Value(GetQuote());
481
+ }
482
+ if (has_escape) {
483
+ named_params["escape"] = Value(GetEscape());
484
+ }
485
+ if (has_header) {
486
+ named_params["header"] = Value(GetHeader());
487
+ }
488
+ named_params["max_line_size"] = Value::BIGINT(maximum_line_size);
489
+ if (skip_rows_set) {
490
+ named_params["skip"] = Value::BIGINT(GetSkipRows());
491
+ }
492
+ named_params["sample_chunks"] = Value::BIGINT(sample_chunks);
493
+ named_params["sample_chunk_size"] = Value::BIGINT(sample_chunk_size);
494
+ named_params["null_padding"] = Value::BOOLEAN(null_padding);
495
+ if (!date_format.at(LogicalType::DATE).format_specifier.empty()) {
496
+ named_params["dateformat"] = Value(date_format.at(LogicalType::DATE).format_specifier);
497
+ }
498
+ if (!date_format.at(LogicalType::TIMESTAMP).format_specifier.empty()) {
499
+ named_params["timestampformat"] = Value(date_format.at(LogicalType::TIMESTAMP).format_specifier);
500
+ }
501
+
502
+ named_params["normalize_names"] = Value::BOOLEAN(normalize_names);
503
+ if (!name_list.empty() && !named_params.count("column_names") && !named_params.count("names")) {
504
+ named_params["column_names"] = StringVectorToValue(name_list);
505
+ }
506
+ named_params["all_varchar"] = Value::BOOLEAN(all_varchar);
507
+ named_params["maximum_line_size"] = Value::BIGINT(maximum_line_size);
508
+ }
509
+
299
510
  } // namespace duckdb
@@ -85,25 +85,6 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
85
85
  }
86
86
  }
87
87
 
88
- uint8_t GetCandidateSpecificity(const LogicalType &candidate_type) {
89
- //! Const ht with accepted auto_types and their weights in specificity
90
- const duckdb::unordered_map<uint8_t, uint8_t> auto_type_candidates_specificity {
91
- {(uint8_t)LogicalTypeId::VARCHAR, 0}, {(uint8_t)LogicalTypeId::TIMESTAMP, 1},
92
- {(uint8_t)LogicalTypeId::DATE, 2}, {(uint8_t)LogicalTypeId::TIME, 3},
93
- {(uint8_t)LogicalTypeId::DOUBLE, 4}, {(uint8_t)LogicalTypeId::FLOAT, 5},
94
- {(uint8_t)LogicalTypeId::BIGINT, 6}, {(uint8_t)LogicalTypeId::INTEGER, 7},
95
- {(uint8_t)LogicalTypeId::SMALLINT, 8}, {(uint8_t)LogicalTypeId::TINYINT, 9},
96
- {(uint8_t)LogicalTypeId::BOOLEAN, 10}, {(uint8_t)LogicalTypeId::SQLNULL, 11}};
97
-
98
- auto id = (uint8_t)candidate_type.id();
99
- auto it = auto_type_candidates_specificity.find(id);
100
- if (it == auto_type_candidates_specificity.end()) {
101
- throw BinderException("Auto Type Candidate of type %s is not accepted as a valid input",
102
- EnumUtil::ToString(candidate_type.id()));
103
- }
104
- return it->second;
105
- }
106
-
107
88
  static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctionBindInput &input,
108
89
  vector<LogicalType> &return_types, vector<string> &names) {
109
90
 
@@ -111,117 +92,9 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
111
92
  auto &options = result->options;
112
93
  result->files = MultiFileReader::GetFileList(context, input.inputs[0], "CSV");
113
94
 
114
- bool explicitly_set_columns = false;
115
- for (auto &kv : input.named_parameters) {
116
- if (MultiFileReader::ParseOption(kv.first, kv.second, options.file_options, context)) {
117
- continue;
118
- }
119
- auto loption = StringUtil::Lower(kv.first);
120
- if (loption == "columns") {
121
- explicitly_set_columns = true;
122
- auto &child_type = kv.second.type();
123
- if (child_type.id() != LogicalTypeId::STRUCT) {
124
- throw BinderException("read_csv columns requires a struct as input");
125
- }
126
- auto &struct_children = StructValue::GetChildren(kv.second);
127
- D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
128
- for (idx_t i = 0; i < struct_children.size(); i++) {
129
- auto &name = StructType::GetChildName(child_type, i);
130
- auto &val = struct_children[i];
131
- names.push_back(name);
132
- if (val.type().id() != LogicalTypeId::VARCHAR) {
133
- throw BinderException("read_csv requires a type specification as string");
134
- }
135
- return_types.emplace_back(TransformStringToLogicalType(StringValue::Get(val), context));
136
- }
137
- if (names.empty()) {
138
- throw BinderException("read_csv requires at least a single column as input!");
139
- }
140
- } else if (loption == "auto_type_candidates") {
141
- options.auto_type_candidates.clear();
142
- map<uint8_t, LogicalType> candidate_types;
143
- // We always have the extremes of Null and Varchar, so we can default to varchar if the
144
- // sniffer is not able to confidently detect that column type
145
- candidate_types[GetCandidateSpecificity(LogicalType::VARCHAR)] = LogicalType::VARCHAR;
146
- candidate_types[GetCandidateSpecificity(LogicalType::SQLNULL)] = LogicalType::SQLNULL;
147
-
148
- auto &child_type = kv.second.type();
149
- if (child_type.id() != LogicalTypeId::LIST) {
150
- throw BinderException("read_csv auto_types requires a list as input");
151
- }
152
- auto &list_children = ListValue::GetChildren(kv.second);
153
- if (list_children.empty()) {
154
- throw BinderException("auto_type_candidates requires at least one type");
155
- }
156
- for (auto &child : list_children) {
157
- if (child.type().id() != LogicalTypeId::VARCHAR) {
158
- throw BinderException("auto_type_candidates requires a type specification as string");
159
- }
160
- auto candidate_type = TransformStringToLogicalType(StringValue::Get(child), context);
161
- candidate_types[GetCandidateSpecificity(candidate_type)] = candidate_type;
162
- }
163
- for (auto &candidate_type : candidate_types) {
164
- options.auto_type_candidates.emplace_back(candidate_type.second);
165
- }
166
- } else if (loption == "column_names" || loption == "names") {
167
- if (!options.name_list.empty()) {
168
- throw BinderException("read_csv_auto column_names/names can only be supplied once");
169
- }
170
- if (kv.second.IsNull()) {
171
- throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
172
- }
173
- auto &children = ListValue::GetChildren(kv.second);
174
- for (auto &child : children) {
175
- options.name_list.push_back(StringValue::Get(child));
176
- }
177
- } else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
178
- auto &child_type = kv.second.type();
179
- if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
180
- throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
181
- }
182
- if (!options.sql_type_list.empty()) {
183
- throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
184
- }
185
- vector<string> sql_type_names;
186
- if (child_type.id() == LogicalTypeId::STRUCT) {
187
- auto &struct_children = StructValue::GetChildren(kv.second);
188
- D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
189
- for (idx_t i = 0; i < struct_children.size(); i++) {
190
- auto &name = StructType::GetChildName(child_type, i);
191
- auto &val = struct_children[i];
192
- if (val.type().id() != LogicalTypeId::VARCHAR) {
193
- throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
194
- }
195
- sql_type_names.push_back(StringValue::Get(val));
196
- options.sql_types_per_column[name] = i;
197
- }
198
- } else {
199
- auto &list_child = ListType::GetChildType(child_type);
200
- if (list_child.id() != LogicalTypeId::VARCHAR) {
201
- throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
202
- }
203
- auto &children = ListValue::GetChildren(kv.second);
204
- for (auto &child : children) {
205
- sql_type_names.push_back(StringValue::Get(child));
206
- }
207
- }
208
- options.sql_type_list.reserve(sql_type_names.size());
209
- for (auto &sql_type : sql_type_names) {
210
- auto def_type = TransformStringToLogicalType(sql_type);
211
- if (def_type.id() == LogicalTypeId::USER) {
212
- throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
213
- kv.first);
214
- }
215
- options.sql_type_list.push_back(std::move(def_type));
216
- }
217
- } else if (loption == "all_varchar") {
218
- options.all_varchar = BooleanValue::Get(kv.second);
219
- } else if (loption == "normalize_names") {
220
- options.normalize_names = BooleanValue::Get(kv.second);
221
- } else {
222
- options.SetReadOption(loption, kv.second, names);
223
- }
224
- }
95
+ options.FromNamedParameters(input.named_parameters, context, return_types, names);
96
+ bool explicitly_set_columns = options.explicitly_set_columns;
97
+
225
98
  options.file_options.AutoDetectHivePartitioning(result->files, context);
226
99
 
227
100
  if (!options.auto_detect && return_types.empty()) {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev4572"
2
+ #define DUCKDB_VERSION "0.8.2-dev4653"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "53dc13de5c"
5
+ #define DUCKDB_SOURCE_ID "bb287d4b22"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -18,7 +18,7 @@ class ColumnDataCollection;
18
18
  class ColumnDataRowCollection;
19
19
 
20
20
  enum class ValueRenderAlignment { LEFT, MIDDLE, RIGHT };
21
- enum class RenderMode { ROWS, COLUMNS };
21
+ enum class RenderMode : uint8_t { ROWS, COLUMNS };
22
22
 
23
23
  struct BoxRendererConfig {
24
24
  // a max_width of 0 means we default to the terminal width
@@ -216,6 +216,8 @@ enum class QuoteRule : uint8_t;
216
216
 
217
217
  enum class RelationType : uint8_t;
218
218
 
219
+ enum class RenderMode : uint8_t;
220
+
219
221
  enum class ResultModifierType : uint8_t;
220
222
 
221
223
  enum class SampleMethod : uint8_t;
@@ -565,6 +567,9 @@ const char* EnumUtil::ToChars<QuoteRule>(QuoteRule value);
565
567
  template<>
566
568
  const char* EnumUtil::ToChars<RelationType>(RelationType value);
567
569
 
570
+ template<>
571
+ const char* EnumUtil::ToChars<RenderMode>(RenderMode value);
572
+
568
573
  template<>
569
574
  const char* EnumUtil::ToChars<ResultModifierType>(ResultModifierType value);
570
575
 
@@ -950,6 +955,9 @@ QuoteRule EnumUtil::FromString<QuoteRule>(const char *value);
950
955
  template<>
951
956
  RelationType EnumUtil::FromString<RelationType>(const char *value);
952
957
 
958
+ template<>
959
+ RenderMode EnumUtil::FromString<RenderMode>(const char *value);
960
+
953
961
  template<>
954
962
  ResultModifierType EnumUtil::FromString<ResultModifierType>(const char *value);
955
963
 
@@ -159,18 +159,33 @@ struct CSVReaderOptions {
159
159
  string suffix;
160
160
  string write_newline;
161
161
 
162
+ //! The date format to use (if any is specified)
163
+ map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
162
164
  //! The date format to use for writing (if any is specified)
163
165
  map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
166
+ //! Whether or not a type format is specified
167
+ map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
164
168
 
165
169
  void Serialize(Serializer &serializer) const;
166
170
  static CSVReaderOptions Deserialize(Deserializer &deserializer);
167
171
 
168
172
  void SetCompression(const string &compression);
173
+
174
+ bool GetHeader() const;
169
175
  void SetHeader(bool has_header);
176
+
177
+ string GetEscape() const;
170
178
  void SetEscape(const string &escape);
179
+
180
+ int64_t GetSkipRows() const;
181
+ void SetSkipRows(int64_t rows);
182
+
183
+ string GetQuote() const;
171
184
  void SetQuote(const string &quote);
172
185
  void SetDelimiter(const string &delimiter);
186
+ string GetDelimiter() const;
173
187
 
188
+ NewLineIdentifier GetNewline() const;
174
189
  void SetNewline(const string &input);
175
190
  //! Set an option that is supported by both reading and writing functions, called by
176
191
  //! the SetReadOption and SetWriteOption methods
@@ -182,7 +197,16 @@ struct CSVReaderOptions {
182
197
  void SetReadOption(const string &loption, const Value &value, vector<string> &expected_names);
183
198
  void SetWriteOption(const string &loption, const Value &value);
184
199
  void SetDateFormat(LogicalTypeId type, const string &format, bool read_format);
200
+ void ToNamedParameters(named_parameter_map_t &out);
201
+ void FromNamedParameters(named_parameter_map_t &in, ClientContext &context, vector<LogicalType> &return_types,
202
+ vector<string> &names);
185
203
 
186
204
  string ToString() const;
205
+
206
+ named_parameter_map_t OutputReadSettings();
207
+
208
+ public:
209
+ //! Whether columns were explicitly provided through named parameters
210
+ bool explicitly_set_columns = false;
187
211
  };
188
212
  } // namespace duckdb
@@ -131,7 +131,7 @@ public:
131
131
 
132
132
  //! Reads CSV file
133
133
  DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file);
134
- DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, CSVReaderOptions &options);
134
+ DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, named_parameter_map_t &&options);
135
135
  DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, const vector<string> &columns);
136
136
 
137
137
  //! Reads Parquet file
@@ -118,6 +118,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
118
118
  {"st_dwithin_spheroid", "spatial"},
119
119
  {"st_envelope", "spatial"},
120
120
  {"st_equals", "spatial"},
121
+ {"st_extent", "spatial"},
121
122
  {"st_flipcoordinates", "spatial"},
122
123
  {"st_geometrytype", "spatial"},
123
124
  {"st_geomfromgeojson", "spatial"},
@@ -126,6 +127,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
126
127
  {"st_geomfromtext", "spatial"},
127
128
  {"st_geomfromwkb", "spatial"},
128
129
  {"st_intersection", "spatial"},
130
+ {"st_intersection_agg", "spatial"},
129
131
  {"st_intersects", "spatial"},
130
132
  {"st_isclosed", "spatial"},
131
133
  {"st_isempty", "spatial"},
@@ -159,9 +161,14 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
159
161
  {"st_touches", "spatial"},
160
162
  {"st_transform", "spatial"},
161
163
  {"st_union", "spatial"},
164
+ {"st_union_agg", "spatial"},
162
165
  {"st_within", "spatial"},
163
166
  {"st_x", "spatial"},
167
+ {"st_xmax", "spatial"},
168
+ {"st_xmin", "spatial"},
164
169
  {"st_y", "spatial"},
170
+ {"st_ymax", "spatial"},
171
+ {"st_ymin", "spatial"},
165
172
  {"stem", "fts"},
166
173
  {"text", "excel"},
167
174
  {"to_arrow_ipc", "arrow"},
@@ -220,10 +227,9 @@ static constexpr ExtensionEntry EXTENSION_FILE_PREFIXES[] = {
220
227
 
221
228
  // Note: these are currently hardcoded in scripts/generate_extensions_function.py
222
229
  // TODO: automate by passing though to script via duckdb
223
- static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {{".parquet", "parquet"},
224
- {".json", "json"},
225
- {".jsonl", "json"},
226
- {".ndjson", "json"}}; // END_OF_EXTENSION_FILE_POSTFIXES
230
+ static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {
231
+ {".parquet", "parquet"}, {".json", "json"}, {".jsonl", "json"}, {".ndjson", "json"},
232
+ {".shp", "spatial"}, {".gpkg", "spatial"}, {".fgb", "spatial"}}; // END_OF_EXTENSION_FILE_POSTFIXES
227
233
 
228
234
  // Note: these are currently hardcoded in scripts/generate_extensions_function.py
229
235
  // TODO: automate by passing though to script via duckdb
@@ -10,16 +10,16 @@
10
10
 
11
11
  #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
12
12
  #include "duckdb/main/relation/table_function_relation.hpp"
13
+ #include "duckdb/common/shared_ptr.hpp"
14
+ #include "duckdb/common/case_insensitive_map.hpp"
13
15
 
14
16
  namespace duckdb {
15
17
 
16
- struct CSVReaderOptions;
17
-
18
18
  class ReadCSVRelation : public TableFunctionRelation {
19
19
  public:
20
20
  ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, vector<ColumnDefinition> columns,
21
21
  string alias = string());
22
- ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, CSVReaderOptions options,
22
+ ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, named_parameter_map_t &&options,
23
23
  string alias = string());
24
24
 
25
25
  string alias;
@@ -35,6 +35,7 @@ public:
35
35
  string ToString(idx_t depth) override;
36
36
  string GetAlias() override;
37
37
  void AddNamedParameter(const string &name, Value argument);
38
+ void SetNamedParameters(named_parameter_map_t &&named_parameters);
38
39
 
39
40
  private:
40
41
  void InitializeColumns();
@@ -1740,7 +1740,7 @@ DUCKDB_API duckdb_vector duckdb_struct_vector_get_child(duckdb_vector vector, id
1740
1740
  /*!
1741
1741
  Returns whether or not a row is valid (i.e. not NULL) in the given validity mask.
1742
1742
 
1743
- * validity: The validity mask, as obtained through `duckdb_data_chunk_get_validity`
1743
+ * validity: The validity mask, as obtained through `duckdb_vector_get_validity`
1744
1744
  * row: The row index
1745
1745
  * returns: true if the row is valid, false otherwise
1746
1746
  */
@@ -1749,10 +1749,10 @@ DUCKDB_API bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row);
1749
1749
  /*!
1750
1750
  In a validity mask, sets a specific row to either valid or invalid.
1751
1751
 
1752
- Note that `duckdb_data_chunk_ensure_validity_writable` should be called before calling `duckdb_data_chunk_get_validity`,
1752
+ Note that `duckdb_vector_ensure_validity_writable` should be called before calling `duckdb_vector_get_validity`,
1753
1753
  to ensure that there is a validity mask to write to.
1754
1754
 
1755
- * validity: The validity mask, as obtained through `duckdb_data_chunk_get_validity`.
1755
+ * validity: The validity mask, as obtained through `duckdb_vector_get_validity`.
1756
1756
  * row: The row index
1757
1757
  * valid: Whether or not to set the row to valid, or invalid
1758
1758
  */
@@ -219,14 +219,12 @@ shared_ptr<Relation> Connection::Values(const string &values, const vector<strin
219
219
  }
220
220
 
221
221
  shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
222
- CSVReaderOptions options;
223
- return ReadCSV(csv_file, options);
222
+ named_parameter_map_t options;
223
+ return ReadCSV(csv_file, std::move(options));
224
224
  }
225
225
 
226
- shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, CSVReaderOptions &options) {
227
- options.file_path = csv_file;
228
- options.auto_detect = true;
229
- return make_shared<ReadCSVRelation>(context, csv_file, options);
226
+ shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, named_parameter_map_t &&options) {
227
+ return make_shared<ReadCSVRelation>(context, csv_file, std::move(options));
230
228
  }
231
229
 
232
230
  shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, const vector<string> &columns) {
@@ -158,11 +158,12 @@ void WriteExtensionFileToDisk(FileSystem &fs, const string &path, void *data, id
158
158
  }
159
159
 
160
160
  string ExtensionHelper::ExtensionUrlTemplate(optional_ptr<const ClientConfig> client_config, const string &repository) {
161
- string default_endpoint = "http://extensions.duckdb.org";
162
161
  string versioned_path = "/${REVISION}/${PLATFORM}/${NAME}.duckdb_extension";
163
162
  #ifdef WASM_LOADABLE_EXTENSIONS
163
+ string default_endpoint = "https://extensions.duckdb.org";
164
164
  versioned_path = "/duckdb-wasm" + versioned_path + ".wasm";
165
165
  #else
166
+ string default_endpoint = "http://extensions.duckdb.org";
166
167
  versioned_path = versioned_path + ".gz";
167
168
  #endif
168
169
  string custom_endpoint = client_config ? client_config->custom_extension_repo : string();
@@ -1,6 +1,5 @@
1
1
  #include "duckdb/main/relation/read_csv_relation.hpp"
2
2
 
3
- #include "duckdb/common/string_util.hpp"
4
3
  #include "duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp"
5
4
  #include "duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp"
6
5
  #include "duckdb/execution/operator/scan/csv/csv_sniffer.hpp"
@@ -8,6 +7,9 @@
8
7
  #include "duckdb/parser/expression/comparison_expression.hpp"
9
8
  #include "duckdb/parser/expression/constant_expression.hpp"
10
9
  #include "duckdb/parser/expression/function_expression.hpp"
10
+ #include "duckdb/common/string_util.hpp"
11
+ #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
12
+ #include "duckdb/common/multi_file_reader.hpp"
11
13
  #include "duckdb/parser/expression/star_expression.hpp"
12
14
  #include "duckdb/parser/query_node/select_node.hpp"
13
15
  #include "duckdb/parser/tableref/basetableref.hpp"
@@ -34,8 +36,8 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
34
36
  AddNamedParameter("columns", Value::STRUCT(std::move(column_names)));
35
37
  }
36
38
 
37
- ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file,
38
- CSVReaderOptions options, string alias_p)
39
+ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, const string &csv_file,
40
+ named_parameter_map_t &&options, string alias_p)
39
41
  : TableFunctionRelation(context, "read_csv_auto", {Value(csv_file)}, nullptr, false), alias(std::move(alias_p)),
40
42
  auto_detect(true) {
41
43
 
@@ -43,12 +45,24 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
43
45
  alias = StringUtil::Split(csv_file, ".")[0];
44
46
  }
45
47
 
46
- // Force auto_detect for this constructor
47
- options.auto_detect = true;
48
- auto bm_file_handle = BaseCSVReader::OpenCSV(*context, options);
49
- auto buffer_manager = make_shared<CSVBufferManager>(*context, std::move(bm_file_handle), options);
48
+ auto files = MultiFileReader::GetFileList(*context, csv_file, "CSV");
49
+ D_ASSERT(!files.empty());
50
+
51
+ auto &file_name = files[0];
52
+ options["auto_detect"] = Value::BOOLEAN(true);
53
+ CSVReaderOptions csv_options;
54
+ csv_options.file_path = file_name;
55
+ vector<string> empty;
56
+
57
+ vector<LogicalType> unused_types;
58
+ vector<string> unused_names;
59
+ csv_options.FromNamedParameters(options, *context, unused_types, unused_names);
60
+ // Run the auto-detect, populating the options with the detected settings
61
+
62
+ auto bm_file_handle = BaseCSVReader::OpenCSV(*context, csv_options);
63
+ auto buffer_manager = make_shared<CSVBufferManager>(*context, std::move(bm_file_handle), csv_options);
50
64
  CSVStateMachineCache state_machine_cache;
51
- CSVSniffer sniffer(options, buffer_manager, state_machine_cache);
65
+ CSVSniffer sniffer(csv_options, buffer_manager, state_machine_cache);
52
66
  auto sniffer_result = sniffer.SniffCSV();
53
67
  auto &types = sniffer_result.return_types;
54
68
  auto &names = sniffer_result.names;
@@ -56,7 +70,12 @@ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const
56
70
  columns.emplace_back(names[i], types[i]);
57
71
  }
58
72
 
59
- AddNamedParameter("auto_detect", Value::BOOLEAN(true));
73
+ //! Capture the options potentially set/altered by the auto detection phase
74
+ csv_options.ToNamedParameters(options);
75
+
76
+ // No need to auto-detect again
77
+ options["auto_detect"] = Value::BOOLEAN(false);
78
+ SetNamedParameters(std::move(options));
60
79
  }
61
80
 
62
81
  string ReadCSVRelation::GetAlias() {
@@ -9,6 +9,7 @@
9
9
  #include "duckdb/main/client_context.hpp"
10
10
  #include "duckdb/parser/expression/comparison_expression.hpp"
11
11
  #include "duckdb/parser/expression/columnref_expression.hpp"
12
+ #include "duckdb/common/shared_ptr.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
@@ -16,7 +17,12 @@ void TableFunctionRelation::AddNamedParameter(const string &name, Value argument
16
17
  named_parameters[name] = std::move(argument);
17
18
  }
18
19
 
19
- TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
20
+ void TableFunctionRelation::SetNamedParameters(named_parameter_map_t &&options) {
21
+ D_ASSERT(named_parameters.empty());
22
+ named_parameters = std::move(options);
23
+ }
24
+
25
+ TableFunctionRelation::TableFunctionRelation(const shared_ptr<ClientContext> &context, string name_p,
20
26
  vector<Value> parameters_p, named_parameter_map_t named_parameters,
21
27
  shared_ptr<Relation> input_relation_p, bool auto_init)
22
28
  : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
@@ -25,7 +31,7 @@ TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext
25
31
  InitializeColumns();
26
32
  }
27
33
 
28
- TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
34
+ TableFunctionRelation::TableFunctionRelation(const shared_ptr<ClientContext> &context, string name_p,
29
35
  vector<Value> parameters_p, shared_ptr<Relation> input_relation_p,
30
36
  bool auto_init)
31
37
  : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
@@ -131,11 +131,11 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
131
131
  throw FatalException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
132
132
  }
133
133
 
134
- // truncate the WAL
135
- wal->Truncate(0);
136
-
137
134
  // truncate the file
138
135
  block_manager.Truncate();
136
+
137
+ // truncate the WAL
138
+ wal->Truncate(0);
139
139
  }
140
140
 
141
141
  void CheckpointReader::LoadCheckpoint(ClientContext &context, MetadataReader &reader) {
@@ -102,9 +102,7 @@ void TableStatistics::CopyStats(TableStatistics &other) {
102
102
  }
103
103
 
104
104
  void TableStatistics::Serialize(Serializer &serializer) const {
105
- auto column_count = column_stats.size();
106
- serializer.WriteList(100, "column_stats", column_count,
107
- [&](Serializer::List &list, idx_t i) { list.WriteElement(column_stats[i]); });
105
+ serializer.WriteProperty(100, "column_stats", column_stats);
108
106
  }
109
107
 
110
108
  void TableStatistics::Deserialize(Deserializer &deserializer, ColumnList &columns) {
@@ -57,7 +57,10 @@ bool WriteAheadLog::Replay(AttachedDatabase &database, string &path) {
57
57
  deserializer.End();
58
58
  }
59
59
  }
60
- } catch (std::exception &ex) { // LCOV_EXCL_START
60
+ } catch (SerializationException &ex) { // LCOV_EXCL_START
61
+ // serialization exception - torn WAL
62
+ // continue reading
63
+ } catch (std::exception &ex) {
61
64
  Printer::PrintF("Exception in WAL playback during initial read: %s\n", ex.what());
62
65
  return false;
63
66
  } catch (...) {
@@ -104,7 +107,10 @@ bool WriteAheadLog::Replay(AttachedDatabase &database, string &path) {
104
107
  deserializer.End();
105
108
  }
106
109
  }
107
- } catch (std::exception &ex) { // LCOV_EXCL_START
110
+ } catch (SerializationException &ex) { // LCOV_EXCL_START
111
+ // serialization error during WAL replay: rollback
112
+ con.Rollback();
113
+ } catch (std::exception &ex) {
108
114
  // FIXME: this should report a proper warning in the connection
109
115
  Printer::PrintF("Exception in WAL playback: %s\n", ex.what());
110
116
  // exception thrown in WAL replay: rollback