duckdb 0.6.2-dev2015.0 → 0.6.2-dev2057.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev2015.0",
5
+ "version": "0.6.2-dev2057.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -60,6 +60,25 @@ static int64_t ParseInteger(const Value &value, const string &loption) {
60
60
  return value.GetValue<int64_t>();
61
61
  }
62
62
 
63
+ void BufferedCSVReaderOptions::SetHeader(bool input) {
64
+ this->header = input;
65
+ this->has_header = true;
66
+ }
67
+
68
+ void BufferedCSVReaderOptions::SetCompression(const string &compression) {
69
+ this->compression = FileCompressionTypeFromString(compression);
70
+ }
71
+
72
+ void BufferedCSVReaderOptions::SetEscape(const string &input) {
73
+ this->escape = input;
74
+ this->has_escape = true;
75
+ }
76
+
77
+ void BufferedCSVReaderOptions::SetParallel(bool use_parallel) {
78
+ this->has_parallel = true;
79
+ this->use_parallel = use_parallel;
80
+ }
81
+
63
82
  void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
64
83
  this->delimiter = StringUtil::Replace(input, "\\t", "\t");
65
84
  this->has_delimiter = true;
@@ -68,6 +87,11 @@ void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
68
87
  }
69
88
  }
70
89
 
90
+ void BufferedCSVReaderOptions::SetQuote(const string &quote) {
91
+ this->quote = quote;
92
+ this->has_quote = true;
93
+ }
94
+
71
95
  void BufferedCSVReaderOptions::SetNewline(const string &input) {
72
96
  if (input == "\\n" || input == "\\r") {
73
97
  new_line = NewLineIdentifier::SINGLE;
@@ -102,6 +126,8 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
102
126
  }
103
127
  if (loption == "auto_detect") {
104
128
  auto_detect = ParseBoolean(value, loption);
129
+ } else if (loption == "parallel") {
130
+ SetParallel(ParseBoolean(value, loption));
105
131
  } else if (loption == "sample_size") {
106
132
  int64_t sample_size = ParseInteger(value, loption);
107
133
  if (sample_size < 1 && sample_size != -1) {
@@ -143,9 +169,6 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
143
169
  } else if (loption == "timestamp_format" || loption == "timestampformat") {
144
170
  string format = ParseString(value, loption);
145
171
  SetDateFormat(LogicalTypeId::TIMESTAMP, format, true);
146
- } else if (loption == "escape") {
147
- escape = ParseString(value, loption);
148
- has_escape = true;
149
172
  } else if (loption == "ignore_errors") {
150
173
  ignore_errors = ParseBoolean(value, loption);
151
174
  } else if (loption == "union_by_name") {
@@ -193,16 +216,13 @@ bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value
193
216
  if (StringUtil::StartsWith(loption, "delim") || StringUtil::StartsWith(loption, "sep")) {
194
217
  SetDelimiter(ParseString(value, loption));
195
218
  } else if (loption == "quote") {
196
- quote = ParseString(value, loption);
197
- has_quote = true;
219
+ SetQuote(ParseString(value, loption));
198
220
  } else if (loption == "new_line") {
199
221
  SetNewline(ParseString(value, loption));
200
222
  } else if (loption == "escape") {
201
- escape = ParseString(value, loption);
202
- has_escape = true;
223
+ SetEscape(ParseString(value, loption));
203
224
  } else if (loption == "header") {
204
- header = ParseBoolean(value, loption);
205
- has_header = true;
225
+ SetHeader(ParseBoolean(value, loption));
206
226
  } else if (loption == "null" || loption == "nullstr") {
207
227
  null_str = ParseString(value, loption);
208
228
  } else if (loption == "encoding") {
@@ -211,7 +231,7 @@ bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value
211
231
  throw BinderException("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'");
212
232
  }
213
233
  } else if (loption == "compression") {
214
- compression = FileCompressionTypeFromString(ParseString(value, loption));
234
+ SetCompression(ParseString(value, loption));
215
235
  } else {
216
236
  // unrecognized option in base CSV
217
237
  return false;
@@ -227,7 +247,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
227
247
  "\n header=" + std::to_string(header) +
228
248
  (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
229
249
  "\n sample_size=" + std::to_string(sample_chunk_size * sample_chunks) +
230
- "\n ignore_erros=" + std::to_string(ignore_errors) + "\n all_varchar=" + std::to_string(all_varchar);
250
+ "\n ignore_errors=" + std::to_string(ignore_errors) + "\n all_varchar=" + std::to_string(all_varchar);
231
251
  }
232
252
 
233
253
  } // namespace duckdb
@@ -0,0 +1,93 @@
1
+ #include "duckdb/common/exception.hpp"
2
+ #include "duckdb/common/operator/cast_operators.hpp"
3
+ #include "duckdb/common/types/string_type.hpp"
4
+ #include "duckdb/common/types/value.hpp"
5
+ #include "duckdb/common/types/vector.hpp"
6
+ #include "duckdb/common/unicode_bar.hpp"
7
+ #include "duckdb/common/vector_operations/generic_executor.hpp"
8
+ #include "duckdb/function/scalar/string_functions.hpp"
9
+
10
+ namespace duckdb {
11
+
12
+ static string_t BarScalarFunction(double x, double min, double max, double max_width, string &result) {
13
+ static const char *FULL_BLOCK = UnicodeBar::FullBlock();
14
+ static const char *const *PARTIAL_BLOCKS = UnicodeBar::PartialBlocks();
15
+ static const idx_t PARTIAL_BLOCKS_COUNT = UnicodeBar::PartialBlocksCount();
16
+
17
+ if (!Value::IsFinite(max_width)) {
18
+ throw ValueOutOfRangeException("Max bar width must not be NaN or infinity");
19
+ }
20
+ if (max_width < 1) {
21
+ throw ValueOutOfRangeException("Max bar width must be >= 1");
22
+ }
23
+ if (max_width > 1000) {
24
+ throw ValueOutOfRangeException("Max bar width must be <= 1000");
25
+ }
26
+
27
+ double width;
28
+
29
+ if (Value::IsNan(x) || Value::IsNan(min) || Value::IsNan(max) || x <= min) {
30
+ width = 0;
31
+ } else if (x >= max) {
32
+ width = max_width;
33
+ } else {
34
+ width = max_width * (x - min) / (max - min);
35
+ }
36
+
37
+ if (!Value::IsFinite(width)) {
38
+ throw ValueOutOfRangeException("Bar width must not be NaN or infinity");
39
+ }
40
+
41
+ result.clear();
42
+
43
+ int32_t width_as_int = static_cast<int32_t>(width * PARTIAL_BLOCKS_COUNT);
44
+ idx_t full_blocks_count = (width_as_int / PARTIAL_BLOCKS_COUNT);
45
+ for (idx_t i = 0; i < full_blocks_count; i++) {
46
+ result += FULL_BLOCK;
47
+ }
48
+
49
+ idx_t remaining = width_as_int % PARTIAL_BLOCKS_COUNT;
50
+
51
+ if (remaining) {
52
+ result += PARTIAL_BLOCKS[remaining];
53
+ }
54
+
55
+ return string_t(result);
56
+ }
57
+
58
+ static void BarFunction(DataChunk &args, ExpressionState &state, Vector &result) {
59
+ D_ASSERT(args.ColumnCount() == 3 || args.ColumnCount() == 4);
60
+ auto &x_arg = args.data[0];
61
+ auto &min_arg = args.data[1];
62
+ auto &max_arg = args.data[2];
63
+ string buffer;
64
+
65
+ if (args.ColumnCount() == 3) {
66
+ GenericExecutor::ExecuteTernary<PrimitiveType<double>, PrimitiveType<double>, PrimitiveType<double>,
67
+ PrimitiveType<string_t>>(
68
+ x_arg, min_arg, max_arg, result, args.size(),
69
+ [&](PrimitiveType<double> x, PrimitiveType<double> min, PrimitiveType<double> max) {
70
+ return StringVector::AddString(result, BarScalarFunction(x.val, min.val, max.val, 80, buffer));
71
+ });
72
+ } else {
73
+ auto &width_arg = args.data[3];
74
+ GenericExecutor::ExecuteQuaternary<PrimitiveType<double>, PrimitiveType<double>, PrimitiveType<double>,
75
+ PrimitiveType<double>, PrimitiveType<string_t>>(
76
+ x_arg, min_arg, max_arg, width_arg, result, args.size(),
77
+ [&](PrimitiveType<double> x, PrimitiveType<double> min, PrimitiveType<double> max,
78
+ PrimitiveType<double> width) {
79
+ return StringVector::AddString(result, BarScalarFunction(x.val, min.val, max.val, width.val, buffer));
80
+ });
81
+ }
82
+ }
83
+
84
+ void BarFun::RegisterFunction(BuiltinFunctions &set) {
85
+ ScalarFunctionSet bar("bar");
86
+ bar.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE},
87
+ LogicalType::VARCHAR, BarFunction));
88
+ bar.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE},
89
+ LogicalType::VARCHAR, BarFunction));
90
+ set.AddFunction(bar);
91
+ }
92
+
93
+ } // namespace duckdb
@@ -33,6 +33,7 @@ void BuiltinFunctions::RegisterStringFunctions() {
33
33
  Register<UnicodeFun>();
34
34
  Register<NFCNormalizeFun>();
35
35
  Register<StringSplitFun>();
36
+ Register<BarFun>();
36
37
  Register<ASCII>();
37
38
  Register<CHR>();
38
39
  Register<MismatchesFun>();
@@ -38,6 +38,10 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
38
38
  BaseCSVData::Finalize();
39
39
  auto &config = DBConfig::GetConfig(context);
40
40
  single_threaded = !config.options.experimental_parallel_csv_reader;
41
+ if (options.has_parallel) {
42
+ // Override the option set in the config
43
+ single_threaded = !options.use_parallel;
44
+ }
41
45
  bool null_or_empty = options.delimiter.empty() || options.escape.empty() || options.quote.empty() ||
42
46
  options.delimiter[0] == '\0' || options.escape[0] == '\0' || options.quote[0] == '\0';
43
47
  bool complex_options = options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1;
@@ -789,6 +793,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
789
793
  table_function.named_parameters["union_by_name"] = LogicalType::BOOLEAN;
790
794
  table_function.named_parameters["buffer_size"] = LogicalType::UBIGINT;
791
795
  table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
796
+ table_function.named_parameters["parallel"] = LogicalType::BOOLEAN;
792
797
  }
793
798
 
794
799
  double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev2015"
2
+ #define DUCKDB_VERSION "0.6.2-dev2057"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "cbf43430b3"
5
+ #define DUCKDB_SOURCE_ID "e2cacf2ada"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -10,6 +10,7 @@
10
10
 
11
11
  #include "duckdb/common/constants.hpp"
12
12
  #include "duckdb/common/progress_bar/progress_bar_display.hpp"
13
+ #include "duckdb/common/unicode_bar.hpp"
13
14
 
14
15
  namespace duckdb {
15
16
 
@@ -25,18 +26,16 @@ public:
25
26
  void Finish() override;
26
27
 
27
28
  private:
28
- static constexpr const idx_t PARTIAL_BLOCK_COUNT = 8;
29
+ static constexpr const idx_t PARTIAL_BLOCK_COUNT = UnicodeBar::PartialBlocksCount();
29
30
  #ifndef DUCKDB_ASCII_TREE_RENDERER
30
31
  const char *PROGRESS_EMPTY = " ";
31
- const char *PROGRESS_PARTIAL[PARTIAL_BLOCK_COUNT] {
32
- " ", "\xE2\x96\x8F", "\xE2\x96\x8E", "\xE2\x96\x8D", "\xE2\x96\x8C", "\xE2\x96\x8B", "\xE2\x96\x8A",
33
- "\xE2\x96\x89"};
34
- const char *PROGRESS_BLOCK = "\xE2\x96\x88";
32
+ const char *const *PROGRESS_PARTIAL = UnicodeBar::PartialBlocks();
33
+ const char *PROGRESS_BLOCK = UnicodeBar::FullBlock();
35
34
  const char *PROGRESS_START = "\xE2\x96\x95";
36
35
  const char *PROGRESS_END = "\xE2\x96\x8F";
37
36
  #else
38
37
  const char *PROGRESS_EMPTY = " ";
39
- const char *PROGRESS_PARTIAL[PARTIAL_BLOCK_COUNT] {" ", " ", " ", " ", " ", " ", " ", " "};
38
+ const char *const PROGRESS_PARTIAL[PARTIAL_BLOCK_COUNT] = {" ", " ", " ", " ", " ", " ", " ", " "};
40
39
  const char *PROGRESS_BLOCK = "=";
41
40
  const char *PROGRESS_START = "[";
42
41
  const char *PROGRESS_END = "]";
@@ -0,0 +1,35 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/unicode_bar.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ namespace duckdb {
10
+ struct UnicodeBar {
11
+ private:
12
+ static constexpr idx_t PARTIAL_BLOCKS_COUNT = 8;
13
+
14
+ public:
15
+ static constexpr idx_t PartialBlocksCount() {
16
+ return PARTIAL_BLOCKS_COUNT;
17
+ }
18
+
19
+ static const char *const *PartialBlocks() {
20
+ static const char *PARTIAL_BLOCKS[PARTIAL_BLOCKS_COUNT] = {" ",
21
+ "\xE2\x96\x8F",
22
+ "\xE2\x96\x8E",
23
+ "\xE2\x96\x8D",
24
+ "\xE2\x96\x8C",
25
+ "\xE2\x96\x8B",
26
+ "\xE2\x96\x8A",
27
+ "\xE2\x96\x89"};
28
+ return PARTIAL_BLOCKS;
29
+ }
30
+
31
+ static const char *FullBlock() {
32
+ return "\xE2\x96\x88";
33
+ }
34
+ };
35
+ } // namespace duckdb
@@ -274,8 +274,9 @@ private:
274
274
 
275
275
  template <class A_TYPE, class B_TYPE, class C_TYPE, class RESULT_TYPE, class FUNC>
276
276
  static void ExecuteTernaryInternal(Vector &a, Vector &b, Vector &c, Vector &result, idx_t count, FUNC &fun) {
277
- auto constant =
278
- a.GetVectorType() == VectorType::CONSTANT_VECTOR && b.GetVectorType() == VectorType::CONSTANT_VECTOR;
277
+ auto constant = a.GetVectorType() == VectorType::CONSTANT_VECTOR &&
278
+ b.GetVectorType() == VectorType::CONSTANT_VECTOR &&
279
+ c.GetVectorType() == VectorType::CONSTANT_VECTOR;
279
280
 
280
281
  typename A_TYPE::STRUCT_STATE a_state;
281
282
  typename B_TYPE::STRUCT_STATE b_state;
@@ -313,7 +314,8 @@ private:
313
314
  static void ExecuteQuaternaryInternal(Vector &a, Vector &b, Vector &c, Vector &d, Vector &result, idx_t count,
314
315
  FUNC &fun) {
315
316
  auto constant =
316
- a.GetVectorType() == VectorType::CONSTANT_VECTOR && b.GetVectorType() == VectorType::CONSTANT_VECTOR;
317
+ a.GetVectorType() == VectorType::CONSTANT_VECTOR && b.GetVectorType() == VectorType::CONSTANT_VECTOR &&
318
+ c.GetVectorType() == VectorType::CONSTANT_VECTOR && d.GetVectorType() == VectorType::CONSTANT_VECTOR;
317
319
 
318
320
  typename A_TYPE::STRUCT_STATE a_state;
319
321
  typename B_TYPE::STRUCT_STATE b_state;
@@ -38,6 +38,11 @@ struct BufferedCSVReaderOptions {
38
38
  //! New Line separator
39
39
  NewLineIdentifier new_line = NewLineIdentifier::NOT_SET;
40
40
 
41
+ //! Whether or not an option was provided for parallel
42
+ bool has_parallel = false;
43
+ //! Whether or not the read will use the ParallelCSVReader
44
+ bool use_parallel = false;
45
+ //! Whether or not a quote was defined by the user
41
46
  bool has_quote = false;
42
47
  //! Quote used for columns that contain reserved characters, e.g., delimiter
43
48
  string quote = "\"";
@@ -122,7 +127,12 @@ struct BufferedCSVReaderOptions {
122
127
  void Serialize(FieldWriter &writer) const;
123
128
  void Deserialize(FieldReader &reader);
124
129
 
130
+ void SetCompression(const string &compression);
131
+ void SetHeader(bool has_header);
132
+ void SetEscape(const string &escape);
133
+ void SetQuote(const string &quote);
125
134
  void SetDelimiter(const string &delimiter);
135
+ void SetParallel(bool use_parallel);
126
136
 
127
137
  void SetNewline(const string &input);
128
138
  //! Set an option that is supported by both reading and writing functions, called by
@@ -34,9 +34,11 @@ public:
34
34
  return functions.size();
35
35
  }
36
36
  T GetFunctionByOffset(idx_t offset) {
37
+ D_ASSERT(offset < functions.size());
37
38
  return functions[offset];
38
39
  }
39
40
  T &GetFunctionReferenceByOffset(idx_t offset) {
41
+ D_ASSERT(offset < functions.size());
40
42
  return functions[offset];
41
43
  }
42
44
  bool MergeFunctionSet(FunctionSet<T> new_functions) {
@@ -184,6 +184,10 @@ struct StringSplitFun {
184
184
  static void RegisterFunction(BuiltinFunctions &set);
185
185
  };
186
186
 
187
+ struct BarFun {
188
+ static void RegisterFunction(BuiltinFunctions &set);
189
+ };
190
+
187
191
  struct ASCII {
188
192
  static void RegisterFunction(BuiltinFunctions &set);
189
193
  };
@@ -30,6 +30,7 @@ class DatabaseInstance;
30
30
  class DuckDB;
31
31
  class LogicalOperator;
32
32
  class SelectStatement;
33
+ struct BufferedCSVReaderOptions;
33
34
 
34
35
  typedef void (*warning_callback)(std::string);
35
36
 
@@ -127,8 +128,10 @@ public:
127
128
  DUCKDB_API shared_ptr<Relation> Values(const string &values);
128
129
  DUCKDB_API shared_ptr<Relation> Values(const string &values, const vector<string> &column_names,
129
130
  const string &alias = "values");
131
+
130
132
  //! Reads CSV file
131
133
  DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file);
134
+ DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, BufferedCSVReaderOptions &options);
132
135
  DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, const vector<string> &columns);
133
136
 
134
137
  //! Reads Parquet file
@@ -8,26 +8,26 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/main/relation.hpp"
11
+ #include "duckdb/execution/operator/persistent/csv_reader_options.hpp"
12
+ #include "duckdb/main/relation/table_function_relation.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
15
- class ReadCSVRelation : public Relation {
16
+ struct BufferedCSVReaderOptions;
17
+
18
+ class ReadCSVRelation : public TableFunctionRelation {
16
19
  public:
17
20
  ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file, vector<ColumnDefinition> columns,
18
- bool auto_detect = false, string alias = string());
21
+ string alias = string());
22
+ ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file, BufferedCSVReaderOptions options,
23
+ string alias = string());
19
24
 
20
- string csv_file;
21
- bool auto_detect;
22
25
  string alias;
23
- vector<ColumnDefinition> columns;
26
+ bool auto_detect;
27
+ string csv_file;
24
28
 
25
29
  public:
26
- unique_ptr<QueryNode> GetQueryNode() override;
27
- const vector<ColumnDefinition> &Columns() override;
28
- string ToString(idx_t depth) override;
29
30
  string GetAlias() override;
30
- unique_ptr<TableRef> GetTableRef() override;
31
31
  };
32
32
 
33
33
  } // namespace duckdb
@@ -15,10 +15,11 @@ namespace duckdb {
15
15
  class TableFunctionRelation : public Relation {
16
16
  public:
17
17
  TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name, vector<Value> parameters,
18
- named_parameter_map_t named_parameters, shared_ptr<Relation> input_relation_p = nullptr);
18
+ named_parameter_map_t named_parameters, shared_ptr<Relation> input_relation_p = nullptr,
19
+ bool auto_init = true);
19
20
 
20
21
  TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name, vector<Value> parameters,
21
- shared_ptr<Relation> input_relation_p = nullptr);
22
+ shared_ptr<Relation> input_relation_p = nullptr, bool auto_init = true);
22
23
 
23
24
  string name;
24
25
  vector<Value> parameters;
@@ -33,6 +34,14 @@ public:
33
34
  const vector<ColumnDefinition> &Columns() override;
34
35
  string ToString(idx_t depth) override;
35
36
  string GetAlias() override;
37
+ void AddNamedParameter(const string &name, Value argument);
38
+
39
+ private:
40
+ void InitializeColumns();
41
+
42
+ private:
43
+ //! Whether or not to auto initialize the columns on construction
44
+ bool auto_initialize;
36
45
  };
37
46
 
38
47
  } // namespace duckdb
@@ -26,7 +26,7 @@ public:
26
26
  // if the function takes a subquery as argument its in here
27
27
  unique_ptr<SelectStatement> subquery;
28
28
 
29
- // External dependencies of this table funcion
29
+ // External dependencies of this table function
30
30
  unique_ptr<ExternalDependency> external_dependency;
31
31
 
32
32
  public:
@@ -220,14 +220,13 @@ shared_ptr<Relation> Connection::Values(const string &values, const vector<strin
220
220
 
221
221
  shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
222
222
  BufferedCSVReaderOptions options;
223
+ return ReadCSV(csv_file, options);
224
+ }
225
+
226
+ shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, BufferedCSVReaderOptions &options) {
223
227
  options.file_path = csv_file;
224
228
  options.auto_detect = true;
225
- BufferedCSVReader reader(*context, options);
226
- vector<ColumnDefinition> column_list;
227
- for (idx_t i = 0; i < reader.return_types.size(); i++) {
228
- column_list.emplace_back(reader.names[i], reader.return_types[i]);
229
- }
230
- return make_shared<ReadCSVRelation>(context, csv_file, std::move(column_list), true);
229
+ return make_shared<ReadCSVRelation>(context, csv_file, options);
231
230
  }
232
231
 
233
232
  shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, const vector<string> &columns) {
@@ -8,59 +8,51 @@
8
8
  #include "duckdb/parser/expression/constant_expression.hpp"
9
9
  #include "duckdb/parser/expression/function_expression.hpp"
10
10
  #include "duckdb/common/string_util.hpp"
11
+ #include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp"
11
12
 
12
13
  namespace duckdb {
13
14
 
14
15
  ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file_p,
15
- vector<ColumnDefinition> columns_p, bool auto_detect, string alias_p)
16
- : Relation(context, RelationType::READ_CSV_RELATION), csv_file(std::move(csv_file_p)), auto_detect(auto_detect),
17
- alias(std::move(alias_p)), columns(std::move(columns_p)) {
16
+ vector<ColumnDefinition> columns_p, string alias_p)
17
+ : TableFunctionRelation(context, "read_csv", {Value(csv_file_p)}, nullptr, false), alias(std::move(alias_p)),
18
+ auto_detect(false), csv_file(std::move(csv_file_p)) {
19
+
18
20
  if (alias.empty()) {
19
21
  alias = StringUtil::Split(csv_file, ".")[0];
20
22
  }
21
- }
22
23
 
23
- unique_ptr<QueryNode> ReadCSVRelation::GetQueryNode() {
24
- auto result = make_unique<SelectNode>();
25
- result->select_list.push_back(make_unique<StarExpression>());
26
- result->from_table = GetTableRef();
27
- return std::move(result);
28
- }
24
+ columns = move(columns_p);
29
25
 
30
- unique_ptr<TableRef> ReadCSVRelation::GetTableRef() {
31
- auto table_ref = make_unique<TableFunctionRef>();
32
- table_ref->alias = alias;
33
- vector<unique_ptr<ParsedExpression>> children;
34
- // CSV file
35
- children.push_back(make_unique<ConstantExpression>(Value(csv_file)));
36
- if (!auto_detect) {
37
- // parameters
38
- child_list_t<Value> column_names;
39
- for (idx_t i = 0; i < columns.size(); i++) {
40
- column_names.push_back(make_pair(columns[i].Name(), Value(columns[i].Type().ToString())));
41
- }
42
- auto colnames = make_unique<ConstantExpression>(Value::STRUCT(std::move(column_names)));
43
- children.push_back(make_unique<ComparisonExpression>(
44
- ExpressionType::COMPARE_EQUAL, make_unique<ColumnRefExpression>("columns"), std::move(colnames)));
45
- } else {
46
- children.push_back(make_unique<ComparisonExpression>(ExpressionType::COMPARE_EQUAL,
47
- make_unique<ColumnRefExpression>("auto_detect"),
48
- make_unique<ConstantExpression>(Value::BOOLEAN(true))));
26
+ child_list_t<Value> column_names;
27
+ for (idx_t i = 0; i < columns.size(); i++) {
28
+ column_names.push_back(make_pair(columns[i].Name(), Value(columns[i].Type().ToString())));
49
29
  }
50
- table_ref->function = make_unique<FunctionExpression>("read_csv", std::move(children));
51
- return std::move(table_ref);
52
- }
53
30
 
54
- string ReadCSVRelation::GetAlias() {
55
- return alias;
31
+ AddNamedParameter("columns", Value::STRUCT(std::move(column_names)));
56
32
  }
57
33
 
58
- const vector<ColumnDefinition> &ReadCSVRelation::Columns() {
59
- return columns;
34
+ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file_p,
35
+ BufferedCSVReaderOptions options, string alias_p)
36
+ : TableFunctionRelation(context, "read_csv_auto", {Value(csv_file_p)}, nullptr, false), alias(std::move(alias_p)),
37
+ auto_detect(true), csv_file(std::move(csv_file_p)) {
38
+
39
+ if (alias.empty()) {
40
+ alias = StringUtil::Split(csv_file, ".")[0];
41
+ }
42
+
43
+ // Force auto_detect for this constructor
44
+ options.auto_detect = true;
45
+ BufferedCSVReader reader(*context, move(options));
46
+
47
+ for (idx_t i = 0; i < reader.return_types.size(); i++) {
48
+ columns.emplace_back(reader.names[i], reader.return_types[i]);
49
+ }
50
+
51
+ AddNamedParameter("auto_detect", Value::BOOLEAN(true));
60
52
  }
61
53
 
62
- string ReadCSVRelation::ToString(idx_t depth) {
63
- return RenderWhitespace(depth) + "Read CSV [" + csv_file + "]";
54
+ string ReadCSVRelation::GetAlias() {
55
+ return alias;
64
56
  }
65
57
 
66
58
  } // namespace duckdb
@@ -12,21 +12,32 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
+ void TableFunctionRelation::AddNamedParameter(const string &name, Value argument) {
16
+ named_parameters[name] = move(argument);
17
+ }
18
+
15
19
  TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
16
20
  vector<Value> parameters_p, named_parameter_map_t named_parameters,
17
- shared_ptr<Relation> input_relation_p)
21
+ shared_ptr<Relation> input_relation_p, bool auto_init)
18
22
  : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
19
23
  parameters(std::move(parameters_p)), named_parameters(std::move(named_parameters)),
20
- input_relation(std::move(input_relation_p)) {
24
+ input_relation(std::move(input_relation_p)), auto_initialize(auto_init) {
21
25
  context->TryBindRelation(*this, this->columns);
22
26
  }
23
- TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
24
- vector<Value> parameters_p,
25
27
 
26
- shared_ptr<Relation> input_relation_p)
28
+ TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
29
+ vector<Value> parameters_p, shared_ptr<Relation> input_relation_p,
30
+ bool auto_init)
27
31
  : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
28
- parameters(std::move(parameters_p)), input_relation(std::move(input_relation_p)) {
29
- context->TryBindRelation(*this, this->columns);
32
+ parameters(std::move(parameters_p)), input_relation(std::move(input_relation_p)), auto_initialize(auto_init) {
33
+ InitializeColumns();
34
+ }
35
+
36
+ void TableFunctionRelation::InitializeColumns() {
37
+ if (!auto_initialize) {
38
+ return;
39
+ }
40
+ context.GetContext()->TryBindRelation(*this, this->columns);
30
41
  }
31
42
 
32
43
  unique_ptr<QueryNode> TableFunctionRelation::GetQueryNode() {
@@ -1,5 +1,7 @@
1
1
  #include "src/function/scalar/string/ascii.cpp"
2
2
 
3
+ #include "src/function/scalar/string/bar.cpp"
4
+
3
5
  #include "src/function/scalar/string/chr.cpp"
4
6
 
5
7
  #include "src/function/scalar/string/reverse.cpp"