duckdb 0.3.5-dev167.0 → 0.3.5-dev184.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -1237,8 +1237,8 @@ namespace duckdb {
1237
1237
  class TableMacroFunction : public MacroFunction {
1238
1238
  public:
1239
1239
  TableMacroFunction(unique_ptr<QueryNode> query_node);
1240
-
1241
1240
  TableMacroFunction(void);
1241
+
1242
1242
  //! The main query node
1243
1243
  unique_ptr<QueryNode> query_node;
1244
1244
 
@@ -1427,10 +1427,15 @@ public:
1427
1427
  SchemaCatalogEntry *schema;
1428
1428
 
1429
1429
  DUCKDB_API static unique_ptr<CreateMacroInfo> CreateInternalMacroInfo(DefaultMacro &default_macro);
1430
+ DUCKDB_API static unique_ptr<CreateMacroInfo> CreateInternalTableMacroInfo(DefaultMacro &default_macro);
1430
1431
 
1431
1432
  public:
1432
1433
  unique_ptr<CatalogEntry> CreateDefaultEntry(ClientContext &context, const string &entry_name) override;
1433
1434
  vector<string> GetDefaultEntries() override;
1435
+
1436
+ private:
1437
+ static unique_ptr<CreateMacroInfo> CreateInternalTableMacroInfo(DefaultMacro &default_macro,
1438
+ unique_ptr<MacroFunction> function);
1434
1439
  };
1435
1440
 
1436
1441
  } // namespace duckdb
@@ -4610,6 +4615,7 @@ void CatalogSet::Scan(const std::function<void(CatalogEntry *)> &callback) {
4610
4615
 
4611
4616
 
4612
4617
 
4618
+
4613
4619
  namespace duckdb {
4614
4620
 
4615
4621
  static DefaultMacro internal_macros[] = {
@@ -4735,14 +4741,9 @@ static DefaultMacro internal_macros[] = {
4735
4741
  {nullptr, nullptr, {nullptr}, nullptr}
4736
4742
  };
4737
4743
 
4738
- unique_ptr<CreateMacroInfo> DefaultFunctionGenerator::CreateInternalMacroInfo(DefaultMacro &default_macro) {
4739
- // parse the expression
4740
- auto expressions = Parser::ParseExpressionList(default_macro.macro);
4741
- D_ASSERT(expressions.size() == 1);
4742
-
4743
- auto result = make_unique<ScalarMacroFunction>(move(expressions[0]));
4744
+ unique_ptr<CreateMacroInfo> DefaultFunctionGenerator::CreateInternalTableMacroInfo(DefaultMacro &default_macro, unique_ptr<MacroFunction> function) {
4744
4745
  for (idx_t param_idx = 0; default_macro.parameters[param_idx] != nullptr; param_idx++) {
4745
- result->parameters.push_back(
4746
+ function->parameters.push_back(
4746
4747
  make_unique<ColumnRefExpression>(default_macro.parameters[param_idx]));
4747
4748
  }
4748
4749
 
@@ -4751,8 +4752,30 @@ unique_ptr<CreateMacroInfo> DefaultFunctionGenerator::CreateInternalMacroInfo(De
4751
4752
  bind_info->name = default_macro.name;
4752
4753
  bind_info->temporary = true;
4753
4754
  bind_info->internal = true;
4754
- bind_info->function = move(result);
4755
+ bind_info->type = function->type == MacroType::TABLE_MACRO ? CatalogType::TABLE_MACRO_ENTRY : CatalogType::MACRO_ENTRY;
4756
+ bind_info->function = move(function);
4755
4757
  return bind_info;
4758
+
4759
+ }
4760
+
4761
+ unique_ptr<CreateMacroInfo> DefaultFunctionGenerator::CreateInternalMacroInfo(DefaultMacro &default_macro) {
4762
+ // parse the expression
4763
+ auto expressions = Parser::ParseExpressionList(default_macro.macro);
4764
+ D_ASSERT(expressions.size() == 1);
4765
+
4766
+ auto result = make_unique<ScalarMacroFunction>(move(expressions[0]));
4767
+ return CreateInternalTableMacroInfo(default_macro, move(result));
4768
+ }
4769
+
4770
+ unique_ptr<CreateMacroInfo> DefaultFunctionGenerator::CreateInternalTableMacroInfo(DefaultMacro &default_macro) {
4771
+ Parser parser;
4772
+ parser.ParseQuery(default_macro.macro);
4773
+ D_ASSERT(parser.statements.size() == 1);
4774
+ D_ASSERT(parser.statements[0]->type == StatementType::SELECT_STATEMENT);
4775
+
4776
+ auto &select = (SelectStatement &) *parser.statements[0];
4777
+ auto result = make_unique<TableMacroFunction>(move(select.node));
4778
+ return CreateInternalTableMacroInfo(default_macro, move(result));
4756
4779
  }
4757
4780
 
4758
4781
  static unique_ptr<CreateFunctionInfo> GetDefaultFunction(const string &input_schema, const string &input_name) {
@@ -63446,6 +63469,108 @@ string PhysicalTopN::ParamsToString() const {
63446
63469
 
63447
63470
  namespace duckdb {
63448
63471
 
63472
+ static bool ParseBoolean(const Value &value, const string &loption);
63473
+
63474
+ static bool ParseBoolean(const vector<Value> &set, const string &loption) {
63475
+ if (set.empty()) {
63476
+ // no option specified: default to true
63477
+ return true;
63478
+ }
63479
+ if (set.size() > 1) {
63480
+ throw BinderException("\"%s\" expects a single argument as a boolean value (e.g. TRUE or 1)", loption);
63481
+ }
63482
+ return ParseBoolean(set[0], loption);
63483
+ }
63484
+
63485
+ static bool ParseBoolean(const Value &value, const string &loption) {
63486
+
63487
+ if (value.type().id() == LogicalTypeId::LIST) {
63488
+ auto &children = ListValue::GetChildren(value);
63489
+ return ParseBoolean(children, loption);
63490
+ }
63491
+ if (value.type() == LogicalType::FLOAT || value.type() == LogicalType::DOUBLE ||
63492
+ value.type().id() == LogicalTypeId::DECIMAL) {
63493
+ throw BinderException("\"%s\" expects a boolean value (e.g. TRUE or 1)", loption);
63494
+ }
63495
+ return BooleanValue::Get(value.CastAs(LogicalType::BOOLEAN));
63496
+ }
63497
+
63498
+ static string ParseString(const Value &value, const string &loption) {
63499
+ if (value.type().id() == LogicalTypeId::LIST) {
63500
+ auto &children = ListValue::GetChildren(value);
63501
+ if (children.size() != 1) {
63502
+ throw BinderException("\"%s\" expects a single argument as a string value", loption);
63503
+ }
63504
+ return ParseString(children[0], loption);
63505
+ }
63506
+ if (value.type().id() != LogicalTypeId::VARCHAR) {
63507
+ throw BinderException("\"%s\" expects a string argument!", loption);
63508
+ }
63509
+ return value.GetValue<string>();
63510
+ }
63511
+
63512
+ static int64_t ParseInteger(const Value &value, const string &loption) {
63513
+ if (value.type().id() == LogicalTypeId::LIST) {
63514
+ auto &children = ListValue::GetChildren(value);
63515
+ if (children.size() != 1) {
63516
+ // no option specified or multiple options specified
63517
+ throw BinderException("\"%s\" expects a single argument as an integer value", loption);
63518
+ }
63519
+ return ParseInteger(children[0], loption);
63520
+ }
63521
+ return value.GetValue<int64_t>();
63522
+ }
63523
+
63524
+ static vector<bool> ParseColumnList(const vector<Value> &set, vector<string> &names, const string &loption) {
63525
+ vector<bool> result;
63526
+
63527
+ if (set.empty()) {
63528
+ throw BinderException("\"%s\" expects a column list or * as parameter", loption);
63529
+ }
63530
+ // list of options: parse the list
63531
+ unordered_map<string, bool> option_map;
63532
+ for (idx_t i = 0; i < set.size(); i++) {
63533
+ option_map[set[i].ToString()] = false;
63534
+ }
63535
+ result.resize(names.size(), false);
63536
+ for (idx_t i = 0; i < names.size(); i++) {
63537
+ auto entry = option_map.find(names[i]);
63538
+ if (entry != option_map.end()) {
63539
+ result[i] = true;
63540
+ entry->second = true;
63541
+ }
63542
+ }
63543
+ for (auto &entry : option_map) {
63544
+ if (!entry.second) {
63545
+ throw BinderException("\"%s\" expected to find %s, but it was not found in the table", loption,
63546
+ entry.first.c_str());
63547
+ }
63548
+ }
63549
+ return result;
63550
+ }
63551
+
63552
+ static vector<bool> ParseColumnList(const Value &value, vector<string> &names, const string &loption) {
63553
+ vector<bool> result;
63554
+
63555
+ // Only accept a list of arguments
63556
+ if (value.type().id() != LogicalTypeId::LIST) {
63557
+ // Support a single argument if it's '*'
63558
+ if (value.type().id() == LogicalTypeId::VARCHAR && value.GetValue<string>() == "*") {
63559
+ result.resize(names.size(), true);
63560
+ return result;
63561
+ }
63562
+ throw BinderException("\"%s\" expects a column list or * as parameter", loption);
63563
+ }
63564
+ auto &children = ListValue::GetChildren(value);
63565
+ // accept '*' as single argument
63566
+ if (children.size() == 1 && children[0].type().id() == LogicalTypeId::VARCHAR &&
63567
+ children[0].GetValue<string>() == "*") {
63568
+ result.resize(names.size(), true);
63569
+ return result;
63570
+ }
63571
+ return ParseColumnList(children, names, loption);
63572
+ }
63573
+
63449
63574
  struct CSVFileHandle {
63450
63575
  public:
63451
63576
  explicit CSVFileHandle(unique_ptr<FileHandle> file_handle_p) : file_handle(move(file_handle_p)) {
@@ -63569,10 +63694,123 @@ void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
63569
63694
  this->delimiter = StringUtil::Replace(input, "\\t", "\t");
63570
63695
  this->has_delimiter = true;
63571
63696
  if (input.empty()) {
63572
- throw BinderException("DELIM or SEP must not be empty");
63697
+ this->delimiter = string("\0", 1);
63573
63698
  }
63574
63699
  }
63575
63700
 
63701
+ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value &value,
63702
+ vector<string> &expected_names) {
63703
+ if (SetBaseOption(loption, value)) {
63704
+ return;
63705
+ }
63706
+ if (loption == "auto_detect") {
63707
+ auto_detect = ParseBoolean(value, loption);
63708
+ } else if (loption == "sample_size") {
63709
+ int64_t sample_size = ParseInteger(value, loption);
63710
+ if (sample_size < 1 && sample_size != -1) {
63711
+ throw BinderException("Unsupported parameter for SAMPLE_SIZE: cannot be smaller than 1");
63712
+ }
63713
+ if (sample_size == -1) {
63714
+ sample_chunks = std::numeric_limits<uint64_t>::max();
63715
+ sample_chunk_size = STANDARD_VECTOR_SIZE;
63716
+ } else if (sample_size <= STANDARD_VECTOR_SIZE) {
63717
+ sample_chunk_size = sample_size;
63718
+ sample_chunks = 1;
63719
+ } else {
63720
+ sample_chunk_size = STANDARD_VECTOR_SIZE;
63721
+ sample_chunks = sample_size / STANDARD_VECTOR_SIZE;
63722
+ }
63723
+ } else if (loption == "skip") {
63724
+ skip_rows = ParseInteger(value, loption);
63725
+ } else if (loption == "max_line_size" || loption == "maximum_line_size") {
63726
+ maximum_line_size = ParseInteger(value, loption);
63727
+ } else if (loption == "sample_chunk_size") {
63728
+ sample_chunk_size = ParseInteger(value, loption);
63729
+ if (sample_chunk_size > STANDARD_VECTOR_SIZE) {
63730
+ throw BinderException(
63731
+ "Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be bigger than STANDARD_VECTOR_SIZE %d",
63732
+ STANDARD_VECTOR_SIZE);
63733
+ } else if (sample_chunk_size < 1) {
63734
+ throw BinderException("Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be smaller than 1");
63735
+ }
63736
+ } else if (loption == "sample_chunks") {
63737
+ sample_chunks = ParseInteger(value, loption);
63738
+ if (sample_chunks < 1) {
63739
+ throw BinderException("Unsupported parameter for SAMPLE_CHUNKS: cannot be smaller than 1");
63740
+ }
63741
+ } else if (loption == "force_not_null") {
63742
+ force_not_null = ParseColumnList(value, expected_names, loption);
63743
+ } else if (loption == "date_format" || loption == "dateformat") {
63744
+ string format = ParseString(value, loption);
63745
+ auto &date_format = this->date_format[LogicalTypeId::DATE];
63746
+ string error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
63747
+ date_format.format_specifier = format;
63748
+ if (!error.empty()) {
63749
+ throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str());
63750
+ }
63751
+ has_format[LogicalTypeId::DATE] = true;
63752
+ } else if (loption == "timestamp_format" || loption == "timestampformat") {
63753
+ string format = ParseString(value, loption);
63754
+ auto &timestamp_format = date_format[LogicalTypeId::TIMESTAMP];
63755
+ string error = StrTimeFormat::ParseFormatSpecifier(format, timestamp_format);
63756
+ timestamp_format.format_specifier = format;
63757
+ if (!error.empty()) {
63758
+ throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str());
63759
+ }
63760
+ has_format[LogicalTypeId::TIMESTAMP] = true;
63761
+ } else if (loption == "escape") {
63762
+ escape = ParseString(value, loption);
63763
+ has_escape = true;
63764
+ } else if (loption == "ignore_errors") {
63765
+ ignore_errors = ParseBoolean(value, loption);
63766
+ } else {
63767
+ throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
63768
+ }
63769
+ }
63770
+
63771
+ void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value &value) {
63772
+ if (SetBaseOption(loption, value)) {
63773
+ return;
63774
+ }
63775
+
63776
+ if (loption == "force_quote") {
63777
+ force_quote = ParseColumnList(value, names, loption);
63778
+ } else {
63779
+ throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
63780
+ }
63781
+ }
63782
+
63783
+ bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value &value) {
63784
+ // Make sure this function was only called after the option was turned into lowercase
63785
+ D_ASSERT(!std::any_of(loption.begin(), loption.end(), ::isupper));
63786
+
63787
+ if (StringUtil::StartsWith(loption, "delim") || StringUtil::StartsWith(loption, "sep")) {
63788
+ SetDelimiter(ParseString(value, loption));
63789
+ } else if (loption == "quote") {
63790
+ quote = ParseString(value, loption);
63791
+ has_quote = true;
63792
+ } else if (loption == "escape") {
63793
+ escape = ParseString(value, loption);
63794
+ has_escape = true;
63795
+ } else if (loption == "header") {
63796
+ header = ParseBoolean(value, loption);
63797
+ has_header = true;
63798
+ } else if (loption == "null" || loption == "nullstr") {
63799
+ null_str = ParseString(value, loption);
63800
+ } else if (loption == "encoding") {
63801
+ auto encoding = StringUtil::Lower(ParseString(value, loption));
63802
+ if (encoding != "utf8" && encoding != "utf-8") {
63803
+ throw BinderException("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'");
63804
+ }
63805
+ } else if (loption == "compression") {
63806
+ compression = FileCompressionTypeFromString(ParseString(value, loption));
63807
+ } else {
63808
+ // unrecognized option in base CSV
63809
+ return false;
63810
+ }
63811
+ return true;
63812
+ }
63813
+
63576
63814
  std::string BufferedCSVReaderOptions::ToString() const {
63577
63815
  return "DELIMITER='" + delimiter + (has_delimiter ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
63578
63816
  ", QUOTE='" + quote + (has_quote ? "'" : (auto_detect ? "' (auto detected)" : "' (default)")) +
@@ -98924,17 +99162,13 @@ struct BaseCSVData : public TableFunctionData {
98924
99162
  };
98925
99163
 
98926
99164
  struct WriteCSVData : public BaseCSVData {
98927
- WriteCSVData(string file_path, vector<LogicalType> sql_types, vector<string> names)
98928
- : sql_types(move(sql_types)), names(move(names)) {
99165
+ WriteCSVData(string file_path, vector<LogicalType> sql_types, vector<string> names) : sql_types(move(sql_types)) {
98929
99166
  files.push_back(move(file_path));
99167
+ options.names = move(names);
98930
99168
  }
98931
99169
 
98932
99170
  //! The SQL types to write
98933
99171
  vector<LogicalType> sql_types;
98934
- //! The column names of the columns to write
98935
- vector<string> names;
98936
- //! True, if column with that index must be quoted
98937
- vector<bool> force_quote;
98938
99172
  //! The newline string to write
98939
99173
  string newline = "\n";
98940
99174
  //! Whether or not we are writing a simple CSV (delimiter, quote and escape are all 1 byte in length)
@@ -98946,8 +99180,6 @@ struct WriteCSVData : public BaseCSVData {
98946
99180
  struct ReadCSVData : public BaseCSVData {
98947
99181
  //! The expected SQL types to read
98948
99182
  vector<LogicalType> sql_types;
98949
- //! Whether or not to include a file name column
98950
- bool include_file_name = false;
98951
99183
  //! The initial reader (if any): this is used when automatic detection is used during binding.
98952
99184
  //! In this case, the CSV reader is already created and might as well be re-used.
98953
99185
  unique_ptr<BufferedCSVReader> initial_reader;
@@ -98990,76 +99222,9 @@ void SubstringDetection(string &str_1, string &str_2, const string &name_str_1,
98990
99222
  }
98991
99223
  }
98992
99224
 
98993
- static bool ParseBoolean(vector<Value> &set) {
98994
- if (set.empty()) {
98995
- // no option specified: default to true
98996
- return true;
98997
- }
98998
- if (set.size() > 1) {
98999
- throw BinderException("Expected a single argument as a boolean value (e.g. TRUE or 1)");
99000
- }
99001
- if (set[0].type() == LogicalType::FLOAT || set[0].type() == LogicalType::DOUBLE ||
99002
- set[0].type().id() == LogicalTypeId::DECIMAL) {
99003
- throw BinderException("Expected a boolean value (e.g. TRUE or 1)");
99004
- }
99005
- return BooleanValue::Get(set[0].CastAs(LogicalType::BOOLEAN));
99006
- }
99007
-
99008
- static string ParseString(vector<Value> &set) {
99009
- if (set.size() != 1) {
99010
- // no option specified or multiple options specified
99011
- throw BinderException("Expected a single argument as a string value");
99012
- }
99013
- if (set[0].type().id() != LogicalTypeId::VARCHAR) {
99014
- throw BinderException("Expected a string argument!");
99015
- }
99016
- return set[0].GetValue<string>();
99017
- }
99018
-
99019
- static int64_t ParseInteger(vector<Value> &set) {
99020
- if (set.size() != 1) {
99021
- // no option specified or multiple options specified
99022
- throw BinderException("Expected a single argument as a integer value");
99023
- }
99024
- return set[0].GetValue<int64_t>();
99025
- }
99026
-
99027
99225
  //===--------------------------------------------------------------------===//
99028
99226
  // Bind
99029
99227
  //===--------------------------------------------------------------------===//
99030
- static bool ParseBaseOption(BufferedCSVReaderOptions &options, string &loption, vector<Value> &set) {
99031
- if (StringUtil::StartsWith(loption, "delim") || StringUtil::StartsWith(loption, "sep")) {
99032
- options.SetDelimiter(ParseString(set));
99033
- } else if (loption == "quote") {
99034
- options.quote = ParseString(set);
99035
- options.has_quote = true;
99036
- } else if (loption == "escape") {
99037
- options.escape = ParseString(set);
99038
- options.has_escape = true;
99039
- } else if (loption == "header") {
99040
- options.header = ParseBoolean(set);
99041
- options.has_header = true;
99042
- } else if (loption == "null") {
99043
- options.null_str = ParseString(set);
99044
- } else if (loption == "encoding") {
99045
- auto encoding = StringUtil::Lower(ParseString(set));
99046
- if (encoding != "utf8" && encoding != "utf-8") {
99047
- throw BinderException("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'");
99048
- }
99049
- } else if (loption == "compression") {
99050
- options.compression = FileCompressionTypeFromString(ParseString(set));
99051
- } else if (loption == "skip") {
99052
- options.skip_rows = ParseInteger(set);
99053
- } else if (loption == "max_line_size" || loption == "maximum_line_size") {
99054
- options.maximum_line_size = ParseInteger(set);
99055
- } else if (loption == "ignore_errors") {
99056
- options.ignore_errors = ParseBoolean(set);
99057
- } else {
99058
- // unrecognized option in base CSV
99059
- return false;
99060
- }
99061
- return true;
99062
- }
99063
99228
 
99064
99229
  void BaseCSVData::Finalize() {
99065
99230
  // verify that the options are correct in the final pass
@@ -99093,35 +99258,11 @@ void BaseCSVData::Finalize() {
99093
99258
  }
99094
99259
  }
99095
99260
 
99096
- static vector<bool> ParseColumnList(vector<Value> &set, vector<string> &names) {
99097
- vector<bool> result;
99261
+ static Value ConvertVectorToValue(vector<Value> set) {
99098
99262
  if (set.empty()) {
99099
- throw BinderException("Expected a column list or * as parameter");
99263
+ return Value::EMPTYLIST(LogicalType::BOOLEAN);
99100
99264
  }
99101
- if (set.size() == 1 && set[0].type().id() == LogicalTypeId::VARCHAR && set[0].GetValue<string>() == "*") {
99102
- // *, force_not_null on all columns
99103
- result.resize(names.size(), true);
99104
- } else {
99105
- // list of options: parse the list
99106
- unordered_map<string, bool> option_map;
99107
- for (idx_t i = 0; i < set.size(); i++) {
99108
- option_map[set[i].ToString()] = false;
99109
- }
99110
- result.resize(names.size(), false);
99111
- for (idx_t i = 0; i < names.size(); i++) {
99112
- auto entry = option_map.find(names[i]);
99113
- if (entry != option_map.end()) {
99114
- result[i] = true;
99115
- entry->second = true;
99116
- }
99117
- }
99118
- for (auto &entry : option_map) {
99119
- if (!entry.second) {
99120
- throw BinderException("Column %s not found in table", entry.first.c_str());
99121
- }
99122
- }
99123
- }
99124
- return result;
99265
+ return Value::LIST(move(set));
99125
99266
  }
99126
99267
 
99127
99268
  static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &info, vector<string> &names,
@@ -99132,19 +99273,12 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
99132
99273
  for (auto &option : info.options) {
99133
99274
  auto loption = StringUtil::Lower(option.first);
99134
99275
  auto &set = option.second;
99135
- if (ParseBaseOption(bind_data->options, loption, set)) {
99136
- // parsed option in base CSV options: continue
99137
- continue;
99138
- } else if (loption == "force_quote") {
99139
- bind_data->force_quote = ParseColumnList(set, names);
99140
- } else {
99141
- throw NotImplementedException("Unrecognized option for CSV: %s", option.first.c_str());
99142
- }
99276
+ bind_data->options.SetWriteOption(loption, ConvertVectorToValue(move(set)));
99143
99277
  }
99144
99278
  // verify the parsed options
99145
- if (bind_data->force_quote.empty()) {
99279
+ if (bind_data->options.force_quote.empty()) {
99146
99280
  // no FORCE_QUOTE specified: initialize to false
99147
- bind_data->force_quote.resize(names.size(), false);
99281
+ bind_data->options.force_quote.resize(names.size(), false);
99148
99282
  }
99149
99283
  bind_data->Finalize();
99150
99284
  bind_data->is_simple = bind_data->options.delimiter.size() == 1 && bind_data->options.escape.size() == 1 &&
@@ -99171,63 +99305,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
99171
99305
  for (auto &option : info.options) {
99172
99306
  auto loption = StringUtil::Lower(option.first);
99173
99307
  auto &set = option.second;
99174
- if (loption == "auto_detect") {
99175
- options.auto_detect = ParseBoolean(set);
99176
- } else if (ParseBaseOption(options, loption, set)) {
99177
- // parsed option in base CSV options: continue
99178
- continue;
99179
- } else if (loption == "sample_size") {
99180
- int64_t sample_size = ParseInteger(set);
99181
- if (sample_size < 1 && sample_size != -1) {
99182
- throw BinderException("Unsupported parameter for SAMPLE_SIZE: cannot be smaller than 1");
99183
- }
99184
- if (sample_size == -1) {
99185
- options.sample_chunks = std::numeric_limits<uint64_t>::max();
99186
- options.sample_chunk_size = STANDARD_VECTOR_SIZE;
99187
- } else if (sample_size <= STANDARD_VECTOR_SIZE) {
99188
- options.sample_chunk_size = sample_size;
99189
- options.sample_chunks = 1;
99190
- } else {
99191
- options.sample_chunk_size = STANDARD_VECTOR_SIZE;
99192
- options.sample_chunks = sample_size / STANDARD_VECTOR_SIZE;
99193
- }
99194
- } else if (loption == "sample_chunk_size") {
99195
- options.sample_chunk_size = ParseInteger(set);
99196
- if (options.sample_chunk_size > STANDARD_VECTOR_SIZE) {
99197
- throw BinderException(
99198
- "Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be bigger than STANDARD_VECTOR_SIZE %d",
99199
- STANDARD_VECTOR_SIZE);
99200
- } else if (options.sample_chunk_size < 1) {
99201
- throw BinderException("Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be smaller than 1");
99202
- }
99203
- } else if (loption == "sample_chunks") {
99204
- options.sample_chunks = ParseInteger(set);
99205
- if (options.sample_chunks < 1) {
99206
- throw BinderException("Unsupported parameter for SAMPLE_CHUNKS: cannot be smaller than 1");
99207
- }
99208
- } else if (loption == "force_not_null") {
99209
- options.force_not_null = ParseColumnList(set, expected_names);
99210
- } else if (loption == "date_format" || loption == "dateformat") {
99211
- string format = ParseString(set);
99212
- auto &date_format = options.date_format[LogicalTypeId::DATE];
99213
- string error = StrTimeFormat::ParseFormatSpecifier(format, date_format);
99214
- date_format.format_specifier = format;
99215
- if (!error.empty()) {
99216
- throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str());
99217
- }
99218
- options.has_format[LogicalTypeId::DATE] = true;
99219
- } else if (loption == "timestamp_format" || loption == "timestampformat") {
99220
- string format = ParseString(set);
99221
- auto &timestamp_format = options.date_format[LogicalTypeId::TIMESTAMP];
99222
- string error = StrTimeFormat::ParseFormatSpecifier(format, timestamp_format);
99223
- timestamp_format.format_specifier = format;
99224
- if (!error.empty()) {
99225
- throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str());
99226
- }
99227
- options.has_format[LogicalTypeId::TIMESTAMP] = true;
99228
- } else {
99229
- throw NotImplementedException("Unrecognized option for CSV: %s", option.first.c_str());
99230
- }
99308
+ options.SetReadOption(loption, ConvertVectorToValue(move(set)), expected_names);
99231
99309
  }
99232
99310
  // verify the parsed options
99233
99311
  if (options.force_not_null.empty()) {
@@ -99391,7 +99469,7 @@ static unique_ptr<LocalFunctionData> WriteCSVInitializeLocal(ClientContext &cont
99391
99469
 
99392
99470
  // create the chunk with VARCHAR types
99393
99471
  vector<LogicalType> types;
99394
- types.resize(csv_data.names.size(), LogicalType::VARCHAR);
99472
+ types.resize(csv_data.options.names.size(), LogicalType::VARCHAR);
99395
99473
 
99396
99474
  local_data->cast_chunk.Initialize(types);
99397
99475
  return move(local_data);
@@ -99407,11 +99485,12 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
99407
99485
  if (options.header) {
99408
99486
  BufferedSerializer serializer;
99409
99487
  // write the header line to the file
99410
- for (idx_t i = 0; i < csv_data.names.size(); i++) {
99488
+ for (idx_t i = 0; i < csv_data.options.names.size(); i++) {
99411
99489
  if (i != 0) {
99412
99490
  serializer.WriteBufferData(options.delimiter);
99413
99491
  }
99414
- WriteQuotedString(serializer, csv_data, csv_data.names[i].c_str(), csv_data.names[i].size(), false);
99492
+ WriteQuotedString(serializer, csv_data, csv_data.options.names[i].c_str(), csv_data.options.names[i].size(),
99493
+ false);
99415
99494
  }
99416
99495
  serializer.WriteBufferData(csv_data.newline);
99417
99496
 
@@ -99464,7 +99543,7 @@ static void WriteCSVSink(ClientContext &context, FunctionData &bind_data, Global
99464
99543
  // (e.g. integers only require quotes if the delimiter is a number, decimals only require quotes if the
99465
99544
  // delimiter is a number or "." character)
99466
99545
  WriteQuotedString(writer, csv_data, str_value.GetDataUnsafe(), str_value.GetSize(),
99467
- csv_data.force_quote[col_idx]);
99546
+ csv_data.options.force_quote[col_idx]);
99468
99547
  }
99469
99548
  writer.WriteBufferData(csv_data.newline);
99470
99549
  }
@@ -100273,74 +100352,10 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
100273
100352
 
100274
100353
  for (auto &kv : input.named_parameters) {
100275
100354
  auto loption = StringUtil::Lower(kv.first);
100276
- if (loption == "auto_detect") {
100277
- options.auto_detect = BooleanValue::Get(kv.second);
100278
- } else if (loption == "sep" || loption == "delim") {
100279
- options.SetDelimiter(StringValue::Get(kv.second));
100280
- } else if (loption == "header") {
100281
- options.header = BooleanValue::Get(kv.second);
100282
- options.has_header = true;
100283
- } else if (loption == "quote") {
100284
- options.quote = StringValue::Get(kv.second);
100285
- options.has_quote = true;
100286
- } else if (loption == "escape") {
100287
- options.escape = StringValue::Get(kv.second);
100288
- options.has_escape = true;
100289
- } else if (loption == "nullstr") {
100290
- options.null_str = StringValue::Get(kv.second);
100291
- } else if (loption == "sample_size") {
100292
- int64_t sample_size = kv.second.GetValue<int64_t>();
100293
- if (sample_size < 1 && sample_size != -1) {
100294
- throw BinderException("Unsupported parameter for SAMPLE_SIZE: cannot be smaller than 1");
100295
- }
100296
- if (sample_size == -1) {
100297
- options.sample_chunks = std::numeric_limits<uint64_t>::max();
100298
- options.sample_chunk_size = STANDARD_VECTOR_SIZE;
100299
- } else if (sample_size <= STANDARD_VECTOR_SIZE) {
100300
- options.sample_chunk_size = sample_size;
100301
- options.sample_chunks = 1;
100302
- } else {
100303
- options.sample_chunk_size = STANDARD_VECTOR_SIZE;
100304
- options.sample_chunks = sample_size / STANDARD_VECTOR_SIZE;
100305
- }
100306
- } else if (loption == "sample_chunk_size") {
100307
- options.sample_chunk_size = kv.second.GetValue<int64_t>();
100308
- if (options.sample_chunk_size > STANDARD_VECTOR_SIZE) {
100309
- throw BinderException(
100310
- "Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be bigger than STANDARD_VECTOR_SIZE %d",
100311
- STANDARD_VECTOR_SIZE);
100312
- } else if (options.sample_chunk_size < 1) {
100313
- throw BinderException("Unsupported parameter for SAMPLE_CHUNK_SIZE: cannot be smaller than 1");
100314
- }
100315
- } else if (loption == "sample_chunks") {
100316
- options.sample_chunks = kv.second.GetValue<int64_t>();
100317
- if (options.sample_chunks < 1) {
100318
- throw BinderException("Unsupported parameter for SAMPLE_CHUNKS: cannot be smaller than 1");
100319
- }
100320
- } else if (loption == "all_varchar") {
100321
- options.all_varchar = BooleanValue::Get(kv.second);
100322
- } else if (loption == "dateformat") {
100323
- options.has_format[LogicalTypeId::DATE] = true;
100324
- auto &date_format = options.date_format[LogicalTypeId::DATE];
100325
- date_format.format_specifier = StringValue::Get(kv.second);
100326
- string error = StrTimeFormat::ParseFormatSpecifier(date_format.format_specifier, date_format);
100327
- if (!error.empty()) {
100328
- throw InvalidInputException("Could not parse DATEFORMAT: %s", error.c_str());
100329
- }
100330
- } else if (loption == "timestampformat") {
100331
- options.has_format[LogicalTypeId::TIMESTAMP] = true;
100332
- auto &timestamp_format = options.date_format[LogicalTypeId::TIMESTAMP];
100333
- timestamp_format.format_specifier = StringValue::Get(kv.second);
100334
- string error = StrTimeFormat::ParseFormatSpecifier(timestamp_format.format_specifier, timestamp_format);
100335
- if (!error.empty()) {
100336
- throw InvalidInputException("Could not parse TIMESTAMPFORMAT: %s", error.c_str());
100337
- }
100338
- } else if (loption == "normalize_names") {
100339
- options.normalize_names = BooleanValue::Get(kv.second);
100340
- } else if (loption == "columns") {
100355
+ if (loption == "columns") {
100341
100356
  auto &child_type = kv.second.type();
100342
100357
  if (child_type.id() != LogicalTypeId::STRUCT) {
100343
- throw BinderException("read_csv columns requires a a struct as input");
100358
+ throw BinderException("read_csv columns requires a struct as input");
100344
100359
  }
100345
100360
  auto &struct_children = StructValue::GetChildren(kv.second);
100346
100361
  D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
@@ -100356,16 +100371,14 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
100356
100371
  if (names.empty()) {
100357
100372
  throw BinderException("read_csv requires at least a single column as input!");
100358
100373
  }
100359
- } else if (loption == "compression") {
100360
- options.compression = FileCompressionTypeFromString(StringValue::Get(kv.second));
100374
+ } else if (loption == "all_varchar") {
100375
+ options.all_varchar = BooleanValue::Get(kv.second);
100376
+ } else if (loption == "normalize_names") {
100377
+ options.normalize_names = BooleanValue::Get(kv.second);
100361
100378
  } else if (loption == "filename") {
100362
- result->include_file_name = BooleanValue::Get(kv.second);
100363
- } else if (loption == "skip") {
100364
- options.skip_rows = kv.second.GetValue<int64_t>();
100365
- } else if (loption == "max_line_size" || loption == "maximum_line_size") {
100366
- options.maximum_line_size = kv.second.GetValue<int64_t>();
100379
+ options.include_file_name = BooleanValue::Get(kv.second);
100367
100380
  } else {
100368
- throw InternalException("Unrecognized parameter %s", kv.first);
100381
+ options.SetReadOption(loption, kv.second, names);
100369
100382
  }
100370
100383
  }
100371
100384
  if (!options.auto_detect && return_types.empty()) {
@@ -100387,7 +100400,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
100387
100400
  result->sql_types = return_types;
100388
100401
  D_ASSERT(return_types.size() == names.size());
100389
100402
  }
100390
- if (result->include_file_name) {
100403
+ if (result->options.include_file_name) {
100391
100404
  return_types.emplace_back(LogicalType::VARCHAR);
100392
100405
  names.emplace_back("filename");
100393
100406
  }
@@ -100441,7 +100454,7 @@ static void ReadCSVFunction(ClientContext &context, const FunctionData *bind_dat
100441
100454
  break;
100442
100455
  }
100443
100456
  } while (true);
100444
- if (bind_data.include_file_name) {
100457
+ if (bind_data.options.include_file_name) {
100445
100458
  auto &col = output.data.back();
100446
100459
  col.SetValue(0, Value(data.csv_reader->options.file_path));
100447
100460
  col.SetVectorType(VectorType::CONSTANT_VECTOR);
@@ -147892,6 +147905,9 @@ unique_ptr<TableRef> SubqueryRef::Deserialize(FieldReader &reader) {
147892
147905
 
147893
147906
  namespace duckdb {
147894
147907
 
147908
+ TableFunctionRef::TableFunctionRef() : TableRef(TableReferenceType::TABLE_FUNCTION) {
147909
+ }
147910
+
147895
147911
  string TableFunctionRef::ToString() const {
147896
147912
  return BaseToString(function->ToString(), column_name_alias);
147897
147913
  }
@@ -159599,9 +159615,7 @@ unique_ptr<BoundTableRef> Binder::Bind(TableFunctionRef &ref) {
159599
159615
 
159600
159616
  if (func_catalog->type == CatalogType::TABLE_FUNCTION_ENTRY) {
159601
159617
  function = (TableFunctionCatalogEntry *)func_catalog;
159602
-
159603
159618
  } else if (func_catalog->type == CatalogType::TABLE_MACRO_ENTRY) {
159604
-
159605
159619
  auto macro_func = (TableMacroCatalogEntry *)func_catalog;
159606
159620
  auto query_node = BindTableMacro(*fexpr, macro_func, 0);
159607
159621
  D_ASSERT(query_node);
@@ -172990,15 +173004,27 @@ static void UpdateChunk(Vector &data, Vector &updates, Vector &row_ids, idx_t co
172990
173004
  case PhysicalType::INT8:
172991
173005
  TemplatedUpdateLoop<int8_t>(data, updates, row_ids, count, base_index);
172992
173006
  break;
173007
+ case PhysicalType::UINT8:
173008
+ TemplatedUpdateLoop<uint8_t>(data, updates, row_ids, count, base_index);
173009
+ break;
172993
173010
  case PhysicalType::INT16:
172994
173011
  TemplatedUpdateLoop<int16_t>(data, updates, row_ids, count, base_index);
172995
173012
  break;
173013
+ case PhysicalType::UINT16:
173014
+ TemplatedUpdateLoop<uint16_t>(data, updates, row_ids, count, base_index);
173015
+ break;
172996
173016
  case PhysicalType::INT32:
172997
173017
  TemplatedUpdateLoop<int32_t>(data, updates, row_ids, count, base_index);
172998
173018
  break;
173019
+ case PhysicalType::UINT32:
173020
+ TemplatedUpdateLoop<uint32_t>(data, updates, row_ids, count, base_index);
173021
+ break;
172999
173022
  case PhysicalType::INT64:
173000
173023
  TemplatedUpdateLoop<int64_t>(data, updates, row_ids, count, base_index);
173001
173024
  break;
173025
+ case PhysicalType::UINT64:
173026
+ TemplatedUpdateLoop<uint64_t>(data, updates, row_ids, count, base_index);
173027
+ break;
173002
173028
  case PhysicalType::FLOAT:
173003
173029
  TemplatedUpdateLoop<float>(data, updates, row_ids, count, base_index);
173004
173030
  break;