duckdb 0.6.2-dev2015.0 → 0.6.2-dev2042.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev2015.0",
5
+ "version": "0.6.2-dev2042.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -60,6 +60,25 @@ static int64_t ParseInteger(const Value &value, const string &loption) {
60
60
  return value.GetValue<int64_t>();
61
61
  }
62
62
 
63
+ void BufferedCSVReaderOptions::SetHeader(bool input) {
64
+ this->header = input;
65
+ this->has_header = true;
66
+ }
67
+
68
+ void BufferedCSVReaderOptions::SetCompression(const string &compression) {
69
+ this->compression = FileCompressionTypeFromString(compression);
70
+ }
71
+
72
+ void BufferedCSVReaderOptions::SetEscape(const string &input) {
73
+ this->escape = input;
74
+ this->has_escape = true;
75
+ }
76
+
77
+ void BufferedCSVReaderOptions::SetParallel(bool use_parallel) {
78
+ this->has_parallel = true;
79
+ this->use_parallel = use_parallel;
80
+ }
81
+
63
82
  void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
64
83
  this->delimiter = StringUtil::Replace(input, "\\t", "\t");
65
84
  this->has_delimiter = true;
@@ -68,6 +87,11 @@ void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
68
87
  }
69
88
  }
70
89
 
90
+ void BufferedCSVReaderOptions::SetQuote(const string &quote) {
91
+ this->quote = quote;
92
+ this->has_quote = true;
93
+ }
94
+
71
95
  void BufferedCSVReaderOptions::SetNewline(const string &input) {
72
96
  if (input == "\\n" || input == "\\r") {
73
97
  new_line = NewLineIdentifier::SINGLE;
@@ -102,6 +126,8 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
102
126
  }
103
127
  if (loption == "auto_detect") {
104
128
  auto_detect = ParseBoolean(value, loption);
129
+ } else if (loption == "parallel") {
130
+ SetParallel(ParseBoolean(value, loption));
105
131
  } else if (loption == "sample_size") {
106
132
  int64_t sample_size = ParseInteger(value, loption);
107
133
  if (sample_size < 1 && sample_size != -1) {
@@ -143,9 +169,6 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
143
169
  } else if (loption == "timestamp_format" || loption == "timestampformat") {
144
170
  string format = ParseString(value, loption);
145
171
  SetDateFormat(LogicalTypeId::TIMESTAMP, format, true);
146
- } else if (loption == "escape") {
147
- escape = ParseString(value, loption);
148
- has_escape = true;
149
172
  } else if (loption == "ignore_errors") {
150
173
  ignore_errors = ParseBoolean(value, loption);
151
174
  } else if (loption == "union_by_name") {
@@ -193,16 +216,13 @@ bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value
193
216
  if (StringUtil::StartsWith(loption, "delim") || StringUtil::StartsWith(loption, "sep")) {
194
217
  SetDelimiter(ParseString(value, loption));
195
218
  } else if (loption == "quote") {
196
- quote = ParseString(value, loption);
197
- has_quote = true;
219
+ SetQuote(ParseString(value, loption));
198
220
  } else if (loption == "new_line") {
199
221
  SetNewline(ParseString(value, loption));
200
222
  } else if (loption == "escape") {
201
- escape = ParseString(value, loption);
202
- has_escape = true;
223
+ SetEscape(ParseString(value, loption));
203
224
  } else if (loption == "header") {
204
- header = ParseBoolean(value, loption);
205
- has_header = true;
225
+ SetHeader(ParseBoolean(value, loption));
206
226
  } else if (loption == "null" || loption == "nullstr") {
207
227
  null_str = ParseString(value, loption);
208
228
  } else if (loption == "encoding") {
@@ -211,7 +231,7 @@ bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value
211
231
  throw BinderException("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'");
212
232
  }
213
233
  } else if (loption == "compression") {
214
- compression = FileCompressionTypeFromString(ParseString(value, loption));
234
+ SetCompression(ParseString(value, loption));
215
235
  } else {
216
236
  // unrecognized option in base CSV
217
237
  return false;
@@ -227,7 +247,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
227
247
  "\n header=" + std::to_string(header) +
228
248
  (has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
229
249
  "\n sample_size=" + std::to_string(sample_chunk_size * sample_chunks) +
230
- "\n ignore_erros=" + std::to_string(ignore_errors) + "\n all_varchar=" + std::to_string(all_varchar);
250
+ "\n ignore_errors=" + std::to_string(ignore_errors) + "\n all_varchar=" + std::to_string(all_varchar);
231
251
  }
232
252
 
233
253
  } // namespace duckdb
@@ -38,6 +38,10 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
38
38
  BaseCSVData::Finalize();
39
39
  auto &config = DBConfig::GetConfig(context);
40
40
  single_threaded = !config.options.experimental_parallel_csv_reader;
41
+ if (options.has_parallel) {
42
+ // Override the option set in the config
43
+ single_threaded = !options.use_parallel;
44
+ }
41
45
  bool null_or_empty = options.delimiter.empty() || options.escape.empty() || options.quote.empty() ||
42
46
  options.delimiter[0] == '\0' || options.escape[0] == '\0' || options.quote[0] == '\0';
43
47
  bool complex_options = options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1;
@@ -789,6 +793,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
789
793
  table_function.named_parameters["union_by_name"] = LogicalType::BOOLEAN;
790
794
  table_function.named_parameters["buffer_size"] = LogicalType::UBIGINT;
791
795
  table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
796
+ table_function.named_parameters["parallel"] = LogicalType::BOOLEAN;
792
797
  }
793
798
 
794
799
  double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev2015"
2
+ #define DUCKDB_VERSION "0.6.2-dev2042"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "cbf43430b3"
5
+ #define DUCKDB_SOURCE_ID "67e219929b"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -38,6 +38,11 @@ struct BufferedCSVReaderOptions {
38
38
  //! New Line separator
39
39
  NewLineIdentifier new_line = NewLineIdentifier::NOT_SET;
40
40
 
41
+ //! Whether or not an option was provided for parallel
42
+ bool has_parallel = false;
43
+ //! Whether or not the read will use the ParallelCSVReader
44
+ bool use_parallel = false;
45
+ //! Whether or not a quote was defined by the user
41
46
  bool has_quote = false;
42
47
  //! Quote used for columns that contain reserved characters, e.g., delimiter
43
48
  string quote = "\"";
@@ -122,7 +127,12 @@ struct BufferedCSVReaderOptions {
122
127
  void Serialize(FieldWriter &writer) const;
123
128
  void Deserialize(FieldReader &reader);
124
129
 
130
+ void SetCompression(const string &compression);
131
+ void SetHeader(bool has_header);
132
+ void SetEscape(const string &escape);
133
+ void SetQuote(const string &quote);
125
134
  void SetDelimiter(const string &delimiter);
135
+ void SetParallel(bool use_parallel);
126
136
 
127
137
  void SetNewline(const string &input);
128
138
  //! Set an option that is supported by both reading and writing functions, called by
@@ -34,9 +34,11 @@ public:
34
34
  return functions.size();
35
35
  }
36
36
  T GetFunctionByOffset(idx_t offset) {
37
+ D_ASSERT(offset < functions.size());
37
38
  return functions[offset];
38
39
  }
39
40
  T &GetFunctionReferenceByOffset(idx_t offset) {
41
+ D_ASSERT(offset < functions.size());
40
42
  return functions[offset];
41
43
  }
42
44
  bool MergeFunctionSet(FunctionSet<T> new_functions) {
@@ -30,6 +30,7 @@ class DatabaseInstance;
30
30
  class DuckDB;
31
31
  class LogicalOperator;
32
32
  class SelectStatement;
33
+ struct BufferedCSVReaderOptions;
33
34
 
34
35
  typedef void (*warning_callback)(std::string);
35
36
 
@@ -127,8 +128,10 @@ public:
127
128
  DUCKDB_API shared_ptr<Relation> Values(const string &values);
128
129
  DUCKDB_API shared_ptr<Relation> Values(const string &values, const vector<string> &column_names,
129
130
  const string &alias = "values");
131
+
130
132
  //! Reads CSV file
131
133
  DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file);
134
+ DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, BufferedCSVReaderOptions &options);
132
135
  DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, const vector<string> &columns);
133
136
 
134
137
  //! Reads Parquet file
@@ -8,26 +8,26 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/main/relation.hpp"
11
+ #include "duckdb/execution/operator/persistent/csv_reader_options.hpp"
12
+ #include "duckdb/main/relation/table_function_relation.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
15
- class ReadCSVRelation : public Relation {
16
+ struct BufferedCSVReaderOptions;
17
+
18
+ class ReadCSVRelation : public TableFunctionRelation {
16
19
  public:
17
20
  ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file, vector<ColumnDefinition> columns,
18
- bool auto_detect = false, string alias = string());
21
+ string alias = string());
22
+ ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file, BufferedCSVReaderOptions options,
23
+ string alias = string());
19
24
 
20
- string csv_file;
21
- bool auto_detect;
22
25
  string alias;
23
- vector<ColumnDefinition> columns;
26
+ bool auto_detect;
27
+ string csv_file;
24
28
 
25
29
  public:
26
- unique_ptr<QueryNode> GetQueryNode() override;
27
- const vector<ColumnDefinition> &Columns() override;
28
- string ToString(idx_t depth) override;
29
30
  string GetAlias() override;
30
- unique_ptr<TableRef> GetTableRef() override;
31
31
  };
32
32
 
33
33
  } // namespace duckdb
@@ -15,10 +15,11 @@ namespace duckdb {
15
15
  class TableFunctionRelation : public Relation {
16
16
  public:
17
17
  TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name, vector<Value> parameters,
18
- named_parameter_map_t named_parameters, shared_ptr<Relation> input_relation_p = nullptr);
18
+ named_parameter_map_t named_parameters, shared_ptr<Relation> input_relation_p = nullptr,
19
+ bool auto_init = true);
19
20
 
20
21
  TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name, vector<Value> parameters,
21
- shared_ptr<Relation> input_relation_p = nullptr);
22
+ shared_ptr<Relation> input_relation_p = nullptr, bool auto_init = true);
22
23
 
23
24
  string name;
24
25
  vector<Value> parameters;
@@ -33,6 +34,14 @@ public:
33
34
  const vector<ColumnDefinition> &Columns() override;
34
35
  string ToString(idx_t depth) override;
35
36
  string GetAlias() override;
37
+ void AddNamedParameter(const string &name, Value argument);
38
+
39
+ private:
40
+ void InitializeColumns();
41
+
42
+ private:
43
+ //! Whether or not to auto initialize the columns on construction
44
+ bool auto_initialize;
36
45
  };
37
46
 
38
47
  } // namespace duckdb
@@ -26,7 +26,7 @@ public:
26
26
  // if the function takes a subquery as argument its in here
27
27
  unique_ptr<SelectStatement> subquery;
28
28
 
29
- // External dependencies of this table funcion
29
+ // External dependencies of this table function
30
30
  unique_ptr<ExternalDependency> external_dependency;
31
31
 
32
32
  public:
@@ -220,14 +220,13 @@ shared_ptr<Relation> Connection::Values(const string &values, const vector<strin
220
220
 
221
221
  shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
222
222
  BufferedCSVReaderOptions options;
223
+ return ReadCSV(csv_file, options);
224
+ }
225
+
226
+ shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, BufferedCSVReaderOptions &options) {
223
227
  options.file_path = csv_file;
224
228
  options.auto_detect = true;
225
- BufferedCSVReader reader(*context, options);
226
- vector<ColumnDefinition> column_list;
227
- for (idx_t i = 0; i < reader.return_types.size(); i++) {
228
- column_list.emplace_back(reader.names[i], reader.return_types[i]);
229
- }
230
- return make_shared<ReadCSVRelation>(context, csv_file, std::move(column_list), true);
229
+ return make_shared<ReadCSVRelation>(context, csv_file, options);
231
230
  }
232
231
 
233
232
  shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, const vector<string> &columns) {
@@ -8,59 +8,51 @@
8
8
  #include "duckdb/parser/expression/constant_expression.hpp"
9
9
  #include "duckdb/parser/expression/function_expression.hpp"
10
10
  #include "duckdb/common/string_util.hpp"
11
+ #include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp"
11
12
 
12
13
  namespace duckdb {
13
14
 
14
15
  ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file_p,
15
- vector<ColumnDefinition> columns_p, bool auto_detect, string alias_p)
16
- : Relation(context, RelationType::READ_CSV_RELATION), csv_file(std::move(csv_file_p)), auto_detect(auto_detect),
17
- alias(std::move(alias_p)), columns(std::move(columns_p)) {
16
+ vector<ColumnDefinition> columns_p, string alias_p)
17
+ : TableFunctionRelation(context, "read_csv", {Value(csv_file_p)}, nullptr, false), alias(std::move(alias_p)),
18
+ auto_detect(false), csv_file(std::move(csv_file_p)) {
19
+
18
20
  if (alias.empty()) {
19
21
  alias = StringUtil::Split(csv_file, ".")[0];
20
22
  }
21
- }
22
23
 
23
- unique_ptr<QueryNode> ReadCSVRelation::GetQueryNode() {
24
- auto result = make_unique<SelectNode>();
25
- result->select_list.push_back(make_unique<StarExpression>());
26
- result->from_table = GetTableRef();
27
- return std::move(result);
28
- }
24
+ columns = move(columns_p);
29
25
 
30
- unique_ptr<TableRef> ReadCSVRelation::GetTableRef() {
31
- auto table_ref = make_unique<TableFunctionRef>();
32
- table_ref->alias = alias;
33
- vector<unique_ptr<ParsedExpression>> children;
34
- // CSV file
35
- children.push_back(make_unique<ConstantExpression>(Value(csv_file)));
36
- if (!auto_detect) {
37
- // parameters
38
- child_list_t<Value> column_names;
39
- for (idx_t i = 0; i < columns.size(); i++) {
40
- column_names.push_back(make_pair(columns[i].Name(), Value(columns[i].Type().ToString())));
41
- }
42
- auto colnames = make_unique<ConstantExpression>(Value::STRUCT(std::move(column_names)));
43
- children.push_back(make_unique<ComparisonExpression>(
44
- ExpressionType::COMPARE_EQUAL, make_unique<ColumnRefExpression>("columns"), std::move(colnames)));
45
- } else {
46
- children.push_back(make_unique<ComparisonExpression>(ExpressionType::COMPARE_EQUAL,
47
- make_unique<ColumnRefExpression>("auto_detect"),
48
- make_unique<ConstantExpression>(Value::BOOLEAN(true))));
26
+ child_list_t<Value> column_names;
27
+ for (idx_t i = 0; i < columns.size(); i++) {
28
+ column_names.push_back(make_pair(columns[i].Name(), Value(columns[i].Type().ToString())));
49
29
  }
50
- table_ref->function = make_unique<FunctionExpression>("read_csv", std::move(children));
51
- return std::move(table_ref);
52
- }
53
30
 
54
- string ReadCSVRelation::GetAlias() {
55
- return alias;
31
+ AddNamedParameter("columns", Value::STRUCT(std::move(column_names)));
56
32
  }
57
33
 
58
- const vector<ColumnDefinition> &ReadCSVRelation::Columns() {
59
- return columns;
34
+ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file_p,
35
+ BufferedCSVReaderOptions options, string alias_p)
36
+ : TableFunctionRelation(context, "read_csv_auto", {Value(csv_file_p)}, nullptr, false), alias(std::move(alias_p)),
37
+ auto_detect(true), csv_file(std::move(csv_file_p)) {
38
+
39
+ if (alias.empty()) {
40
+ alias = StringUtil::Split(csv_file, ".")[0];
41
+ }
42
+
43
+ // Force auto_detect for this constructor
44
+ options.auto_detect = true;
45
+ BufferedCSVReader reader(*context, move(options));
46
+
47
+ for (idx_t i = 0; i < reader.return_types.size(); i++) {
48
+ columns.emplace_back(reader.names[i], reader.return_types[i]);
49
+ }
50
+
51
+ AddNamedParameter("auto_detect", Value::BOOLEAN(true));
60
52
  }
61
53
 
62
- string ReadCSVRelation::ToString(idx_t depth) {
63
- return RenderWhitespace(depth) + "Read CSV [" + csv_file + "]";
54
+ string ReadCSVRelation::GetAlias() {
55
+ return alias;
64
56
  }
65
57
 
66
58
  } // namespace duckdb
@@ -12,21 +12,32 @@
12
12
 
13
13
  namespace duckdb {
14
14
 
15
+ void TableFunctionRelation::AddNamedParameter(const string &name, Value argument) {
16
+ named_parameters[name] = move(argument);
17
+ }
18
+
15
19
  TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
16
20
  vector<Value> parameters_p, named_parameter_map_t named_parameters,
17
- shared_ptr<Relation> input_relation_p)
21
+ shared_ptr<Relation> input_relation_p, bool auto_init)
18
22
  : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
19
23
  parameters(std::move(parameters_p)), named_parameters(std::move(named_parameters)),
20
- input_relation(std::move(input_relation_p)) {
24
+ input_relation(std::move(input_relation_p)), auto_initialize(auto_init) {
21
25
  context->TryBindRelation(*this, this->columns);
22
26
  }
23
- TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
24
- vector<Value> parameters_p,
25
27
 
26
- shared_ptr<Relation> input_relation_p)
28
+ TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
29
+ vector<Value> parameters_p, shared_ptr<Relation> input_relation_p,
30
+ bool auto_init)
27
31
  : Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
28
- parameters(std::move(parameters_p)), input_relation(std::move(input_relation_p)) {
29
- context->TryBindRelation(*this, this->columns);
32
+ parameters(std::move(parameters_p)), input_relation(std::move(input_relation_p)), auto_initialize(auto_init) {
33
+ InitializeColumns();
34
+ }
35
+
36
+ void TableFunctionRelation::InitializeColumns() {
37
+ if (!auto_initialize) {
38
+ return;
39
+ }
40
+ context.GetContext()->TryBindRelation(*this, this->columns);
30
41
  }
31
42
 
32
43
  unique_ptr<QueryNode> TableFunctionRelation::GetQueryNode() {