duckdb 0.6.2-dev2015.0 → 0.6.2-dev2057.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +31 -11
- package/src/duckdb/src/function/scalar/string/bar.cpp +93 -0
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/table/read_csv.cpp +5 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/progress_bar/display/terminal_progress_bar_display.hpp +5 -6
- package/src/duckdb/src/include/duckdb/common/unicode_bar.hpp +35 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +5 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +10 -0
- package/src/duckdb/src/include/duckdb/function/function_set.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +10 -10
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +11 -2
- package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +1 -1
- package/src/duckdb/src/main/connection.cpp +5 -6
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +30 -38
- package/src/duckdb/src/main/relation/table_function_relation.cpp +18 -7
- package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
package/package.json
CHANGED
|
@@ -60,6 +60,25 @@ static int64_t ParseInteger(const Value &value, const string &loption) {
|
|
|
60
60
|
return value.GetValue<int64_t>();
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
+
void BufferedCSVReaderOptions::SetHeader(bool input) {
|
|
64
|
+
this->header = input;
|
|
65
|
+
this->has_header = true;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
void BufferedCSVReaderOptions::SetCompression(const string &compression) {
|
|
69
|
+
this->compression = FileCompressionTypeFromString(compression);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
void BufferedCSVReaderOptions::SetEscape(const string &input) {
|
|
73
|
+
this->escape = input;
|
|
74
|
+
this->has_escape = true;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void BufferedCSVReaderOptions::SetParallel(bool use_parallel) {
|
|
78
|
+
this->has_parallel = true;
|
|
79
|
+
this->use_parallel = use_parallel;
|
|
80
|
+
}
|
|
81
|
+
|
|
63
82
|
void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
|
|
64
83
|
this->delimiter = StringUtil::Replace(input, "\\t", "\t");
|
|
65
84
|
this->has_delimiter = true;
|
|
@@ -68,6 +87,11 @@ void BufferedCSVReaderOptions::SetDelimiter(const string &input) {
|
|
|
68
87
|
}
|
|
69
88
|
}
|
|
70
89
|
|
|
90
|
+
void BufferedCSVReaderOptions::SetQuote(const string "e) {
|
|
91
|
+
this->quote = quote;
|
|
92
|
+
this->has_quote = true;
|
|
93
|
+
}
|
|
94
|
+
|
|
71
95
|
void BufferedCSVReaderOptions::SetNewline(const string &input) {
|
|
72
96
|
if (input == "\\n" || input == "\\r") {
|
|
73
97
|
new_line = NewLineIdentifier::SINGLE;
|
|
@@ -102,6 +126,8 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
|
102
126
|
}
|
|
103
127
|
if (loption == "auto_detect") {
|
|
104
128
|
auto_detect = ParseBoolean(value, loption);
|
|
129
|
+
} else if (loption == "parallel") {
|
|
130
|
+
SetParallel(ParseBoolean(value, loption));
|
|
105
131
|
} else if (loption == "sample_size") {
|
|
106
132
|
int64_t sample_size = ParseInteger(value, loption);
|
|
107
133
|
if (sample_size < 1 && sample_size != -1) {
|
|
@@ -143,9 +169,6 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
|
143
169
|
} else if (loption == "timestamp_format" || loption == "timestampformat") {
|
|
144
170
|
string format = ParseString(value, loption);
|
|
145
171
|
SetDateFormat(LogicalTypeId::TIMESTAMP, format, true);
|
|
146
|
-
} else if (loption == "escape") {
|
|
147
|
-
escape = ParseString(value, loption);
|
|
148
|
-
has_escape = true;
|
|
149
172
|
} else if (loption == "ignore_errors") {
|
|
150
173
|
ignore_errors = ParseBoolean(value, loption);
|
|
151
174
|
} else if (loption == "union_by_name") {
|
|
@@ -193,16 +216,13 @@ bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value
|
|
|
193
216
|
if (StringUtil::StartsWith(loption, "delim") || StringUtil::StartsWith(loption, "sep")) {
|
|
194
217
|
SetDelimiter(ParseString(value, loption));
|
|
195
218
|
} else if (loption == "quote") {
|
|
196
|
-
|
|
197
|
-
has_quote = true;
|
|
219
|
+
SetQuote(ParseString(value, loption));
|
|
198
220
|
} else if (loption == "new_line") {
|
|
199
221
|
SetNewline(ParseString(value, loption));
|
|
200
222
|
} else if (loption == "escape") {
|
|
201
|
-
|
|
202
|
-
has_escape = true;
|
|
223
|
+
SetEscape(ParseString(value, loption));
|
|
203
224
|
} else if (loption == "header") {
|
|
204
|
-
|
|
205
|
-
has_header = true;
|
|
225
|
+
SetHeader(ParseBoolean(value, loption));
|
|
206
226
|
} else if (loption == "null" || loption == "nullstr") {
|
|
207
227
|
null_str = ParseString(value, loption);
|
|
208
228
|
} else if (loption == "encoding") {
|
|
@@ -211,7 +231,7 @@ bool BufferedCSVReaderOptions::SetBaseOption(const string &loption, const Value
|
|
|
211
231
|
throw BinderException("Copy is only supported for UTF-8 encoded files, ENCODING 'UTF-8'");
|
|
212
232
|
}
|
|
213
233
|
} else if (loption == "compression") {
|
|
214
|
-
|
|
234
|
+
SetCompression(ParseString(value, loption));
|
|
215
235
|
} else {
|
|
216
236
|
// unrecognized option in base CSV
|
|
217
237
|
return false;
|
|
@@ -227,7 +247,7 @@ std::string BufferedCSVReaderOptions::ToString() const {
|
|
|
227
247
|
"\n header=" + std::to_string(header) +
|
|
228
248
|
(has_header ? "" : (auto_detect ? " (auto detected)" : "' (default)")) +
|
|
229
249
|
"\n sample_size=" + std::to_string(sample_chunk_size * sample_chunks) +
|
|
230
|
-
"\n
|
|
250
|
+
"\n ignore_errors=" + std::to_string(ignore_errors) + "\n all_varchar=" + std::to_string(all_varchar);
|
|
231
251
|
}
|
|
232
252
|
|
|
233
253
|
} // namespace duckdb
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#include "duckdb/common/exception.hpp"
|
|
2
|
+
#include "duckdb/common/operator/cast_operators.hpp"
|
|
3
|
+
#include "duckdb/common/types/string_type.hpp"
|
|
4
|
+
#include "duckdb/common/types/value.hpp"
|
|
5
|
+
#include "duckdb/common/types/vector.hpp"
|
|
6
|
+
#include "duckdb/common/unicode_bar.hpp"
|
|
7
|
+
#include "duckdb/common/vector_operations/generic_executor.hpp"
|
|
8
|
+
#include "duckdb/function/scalar/string_functions.hpp"
|
|
9
|
+
|
|
10
|
+
namespace duckdb {
|
|
11
|
+
|
|
12
|
+
static string_t BarScalarFunction(double x, double min, double max, double max_width, string &result) {
|
|
13
|
+
static const char *FULL_BLOCK = UnicodeBar::FullBlock();
|
|
14
|
+
static const char *const *PARTIAL_BLOCKS = UnicodeBar::PartialBlocks();
|
|
15
|
+
static const idx_t PARTIAL_BLOCKS_COUNT = UnicodeBar::PartialBlocksCount();
|
|
16
|
+
|
|
17
|
+
if (!Value::IsFinite(max_width)) {
|
|
18
|
+
throw ValueOutOfRangeException("Max bar width must not be NaN or infinity");
|
|
19
|
+
}
|
|
20
|
+
if (max_width < 1) {
|
|
21
|
+
throw ValueOutOfRangeException("Max bar width must be >= 1");
|
|
22
|
+
}
|
|
23
|
+
if (max_width > 1000) {
|
|
24
|
+
throw ValueOutOfRangeException("Max bar width must be <= 1000");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
double width;
|
|
28
|
+
|
|
29
|
+
if (Value::IsNan(x) || Value::IsNan(min) || Value::IsNan(max) || x <= min) {
|
|
30
|
+
width = 0;
|
|
31
|
+
} else if (x >= max) {
|
|
32
|
+
width = max_width;
|
|
33
|
+
} else {
|
|
34
|
+
width = max_width * (x - min) / (max - min);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (!Value::IsFinite(width)) {
|
|
38
|
+
throw ValueOutOfRangeException("Bar width must not be NaN or infinity");
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
result.clear();
|
|
42
|
+
|
|
43
|
+
int32_t width_as_int = static_cast<int32_t>(width * PARTIAL_BLOCKS_COUNT);
|
|
44
|
+
idx_t full_blocks_count = (width_as_int / PARTIAL_BLOCKS_COUNT);
|
|
45
|
+
for (idx_t i = 0; i < full_blocks_count; i++) {
|
|
46
|
+
result += FULL_BLOCK;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
idx_t remaining = width_as_int % PARTIAL_BLOCKS_COUNT;
|
|
50
|
+
|
|
51
|
+
if (remaining) {
|
|
52
|
+
result += PARTIAL_BLOCKS[remaining];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return string_t(result);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
static void BarFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
59
|
+
D_ASSERT(args.ColumnCount() == 3 || args.ColumnCount() == 4);
|
|
60
|
+
auto &x_arg = args.data[0];
|
|
61
|
+
auto &min_arg = args.data[1];
|
|
62
|
+
auto &max_arg = args.data[2];
|
|
63
|
+
string buffer;
|
|
64
|
+
|
|
65
|
+
if (args.ColumnCount() == 3) {
|
|
66
|
+
GenericExecutor::ExecuteTernary<PrimitiveType<double>, PrimitiveType<double>, PrimitiveType<double>,
|
|
67
|
+
PrimitiveType<string_t>>(
|
|
68
|
+
x_arg, min_arg, max_arg, result, args.size(),
|
|
69
|
+
[&](PrimitiveType<double> x, PrimitiveType<double> min, PrimitiveType<double> max) {
|
|
70
|
+
return StringVector::AddString(result, BarScalarFunction(x.val, min.val, max.val, 80, buffer));
|
|
71
|
+
});
|
|
72
|
+
} else {
|
|
73
|
+
auto &width_arg = args.data[3];
|
|
74
|
+
GenericExecutor::ExecuteQuaternary<PrimitiveType<double>, PrimitiveType<double>, PrimitiveType<double>,
|
|
75
|
+
PrimitiveType<double>, PrimitiveType<string_t>>(
|
|
76
|
+
x_arg, min_arg, max_arg, width_arg, result, args.size(),
|
|
77
|
+
[&](PrimitiveType<double> x, PrimitiveType<double> min, PrimitiveType<double> max,
|
|
78
|
+
PrimitiveType<double> width) {
|
|
79
|
+
return StringVector::AddString(result, BarScalarFunction(x.val, min.val, max.val, width.val, buffer));
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
void BarFun::RegisterFunction(BuiltinFunctions &set) {
|
|
85
|
+
ScalarFunctionSet bar("bar");
|
|
86
|
+
bar.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE},
|
|
87
|
+
LogicalType::VARCHAR, BarFunction));
|
|
88
|
+
bar.AddFunction(ScalarFunction({LogicalType::DOUBLE, LogicalType::DOUBLE, LogicalType::DOUBLE},
|
|
89
|
+
LogicalType::VARCHAR, BarFunction));
|
|
90
|
+
set.AddFunction(bar);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
} // namespace duckdb
|
|
@@ -38,6 +38,10 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
|
|
|
38
38
|
BaseCSVData::Finalize();
|
|
39
39
|
auto &config = DBConfig::GetConfig(context);
|
|
40
40
|
single_threaded = !config.options.experimental_parallel_csv_reader;
|
|
41
|
+
if (options.has_parallel) {
|
|
42
|
+
// Override the option set in the config
|
|
43
|
+
single_threaded = !options.use_parallel;
|
|
44
|
+
}
|
|
41
45
|
bool null_or_empty = options.delimiter.empty() || options.escape.empty() || options.quote.empty() ||
|
|
42
46
|
options.delimiter[0] == '\0' || options.escape[0] == '\0' || options.quote[0] == '\0';
|
|
43
47
|
bool complex_options = options.delimiter.size() > 1 || options.escape.size() > 1 || options.quote.size() > 1;
|
|
@@ -789,6 +793,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
|
|
|
789
793
|
table_function.named_parameters["union_by_name"] = LogicalType::BOOLEAN;
|
|
790
794
|
table_function.named_parameters["buffer_size"] = LogicalType::UBIGINT;
|
|
791
795
|
table_function.named_parameters["decimal_separator"] = LogicalType::VARCHAR;
|
|
796
|
+
table_function.named_parameters["parallel"] = LogicalType::BOOLEAN;
|
|
792
797
|
}
|
|
793
798
|
|
|
794
799
|
double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.6.2-
|
|
2
|
+
#define DUCKDB_VERSION "0.6.2-dev2057"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "e2cacf2ada"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|
package/src/duckdb/src/include/duckdb/common/progress_bar/display/terminal_progress_bar_display.hpp
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
#include "duckdb/common/constants.hpp"
|
|
12
12
|
#include "duckdb/common/progress_bar/progress_bar_display.hpp"
|
|
13
|
+
#include "duckdb/common/unicode_bar.hpp"
|
|
13
14
|
|
|
14
15
|
namespace duckdb {
|
|
15
16
|
|
|
@@ -25,18 +26,16 @@ public:
|
|
|
25
26
|
void Finish() override;
|
|
26
27
|
|
|
27
28
|
private:
|
|
28
|
-
static constexpr const idx_t PARTIAL_BLOCK_COUNT =
|
|
29
|
+
static constexpr const idx_t PARTIAL_BLOCK_COUNT = UnicodeBar::PartialBlocksCount();
|
|
29
30
|
#ifndef DUCKDB_ASCII_TREE_RENDERER
|
|
30
31
|
const char *PROGRESS_EMPTY = " ";
|
|
31
|
-
const char *PROGRESS_PARTIAL
|
|
32
|
-
|
|
33
|
-
"\xE2\x96\x89"};
|
|
34
|
-
const char *PROGRESS_BLOCK = "\xE2\x96\x88";
|
|
32
|
+
const char *const *PROGRESS_PARTIAL = UnicodeBar::PartialBlocks();
|
|
33
|
+
const char *PROGRESS_BLOCK = UnicodeBar::FullBlock();
|
|
35
34
|
const char *PROGRESS_START = "\xE2\x96\x95";
|
|
36
35
|
const char *PROGRESS_END = "\xE2\x96\x8F";
|
|
37
36
|
#else
|
|
38
37
|
const char *PROGRESS_EMPTY = " ";
|
|
39
|
-
const char *PROGRESS_PARTIAL[PARTIAL_BLOCK_COUNT] {" ", " ", " ", " ", " ", " ", " ", " "};
|
|
38
|
+
const char *const PROGRESS_PARTIAL[PARTIAL_BLOCK_COUNT] = {" ", " ", " ", " ", " ", " ", " ", " "};
|
|
40
39
|
const char *PROGRESS_BLOCK = "=";
|
|
41
40
|
const char *PROGRESS_START = "[";
|
|
42
41
|
const char *PROGRESS_END = "]";
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
// DuckDB
|
|
3
|
+
//
|
|
4
|
+
// duckdb/common/unicode_bar.hpp
|
|
5
|
+
//
|
|
6
|
+
//
|
|
7
|
+
//===----------------------------------------------------------------------===//
|
|
8
|
+
|
|
9
|
+
namespace duckdb {
|
|
10
|
+
struct UnicodeBar {
|
|
11
|
+
private:
|
|
12
|
+
static constexpr idx_t PARTIAL_BLOCKS_COUNT = 8;
|
|
13
|
+
|
|
14
|
+
public:
|
|
15
|
+
static constexpr idx_t PartialBlocksCount() {
|
|
16
|
+
return PARTIAL_BLOCKS_COUNT;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
static const char *const *PartialBlocks() {
|
|
20
|
+
static const char *PARTIAL_BLOCKS[PARTIAL_BLOCKS_COUNT] = {" ",
|
|
21
|
+
"\xE2\x96\x8F",
|
|
22
|
+
"\xE2\x96\x8E",
|
|
23
|
+
"\xE2\x96\x8D",
|
|
24
|
+
"\xE2\x96\x8C",
|
|
25
|
+
"\xE2\x96\x8B",
|
|
26
|
+
"\xE2\x96\x8A",
|
|
27
|
+
"\xE2\x96\x89"};
|
|
28
|
+
return PARTIAL_BLOCKS;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static const char *FullBlock() {
|
|
32
|
+
return "\xE2\x96\x88";
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
} // namespace duckdb
|
|
@@ -274,8 +274,9 @@ private:
|
|
|
274
274
|
|
|
275
275
|
template <class A_TYPE, class B_TYPE, class C_TYPE, class RESULT_TYPE, class FUNC>
|
|
276
276
|
static void ExecuteTernaryInternal(Vector &a, Vector &b, Vector &c, Vector &result, idx_t count, FUNC &fun) {
|
|
277
|
-
auto constant =
|
|
278
|
-
|
|
277
|
+
auto constant = a.GetVectorType() == VectorType::CONSTANT_VECTOR &&
|
|
278
|
+
b.GetVectorType() == VectorType::CONSTANT_VECTOR &&
|
|
279
|
+
c.GetVectorType() == VectorType::CONSTANT_VECTOR;
|
|
279
280
|
|
|
280
281
|
typename A_TYPE::STRUCT_STATE a_state;
|
|
281
282
|
typename B_TYPE::STRUCT_STATE b_state;
|
|
@@ -313,7 +314,8 @@ private:
|
|
|
313
314
|
static void ExecuteQuaternaryInternal(Vector &a, Vector &b, Vector &c, Vector &d, Vector &result, idx_t count,
|
|
314
315
|
FUNC &fun) {
|
|
315
316
|
auto constant =
|
|
316
|
-
a.GetVectorType() == VectorType::CONSTANT_VECTOR && b.GetVectorType() == VectorType::CONSTANT_VECTOR
|
|
317
|
+
a.GetVectorType() == VectorType::CONSTANT_VECTOR && b.GetVectorType() == VectorType::CONSTANT_VECTOR &&
|
|
318
|
+
c.GetVectorType() == VectorType::CONSTANT_VECTOR && d.GetVectorType() == VectorType::CONSTANT_VECTOR;
|
|
317
319
|
|
|
318
320
|
typename A_TYPE::STRUCT_STATE a_state;
|
|
319
321
|
typename B_TYPE::STRUCT_STATE b_state;
|
|
@@ -38,6 +38,11 @@ struct BufferedCSVReaderOptions {
|
|
|
38
38
|
//! New Line separator
|
|
39
39
|
NewLineIdentifier new_line = NewLineIdentifier::NOT_SET;
|
|
40
40
|
|
|
41
|
+
//! Whether or not an option was provided for parallel
|
|
42
|
+
bool has_parallel = false;
|
|
43
|
+
//! Whether or not the read will use the ParallelCSVReader
|
|
44
|
+
bool use_parallel = false;
|
|
45
|
+
//! Whether or not a quote was defined by the user
|
|
41
46
|
bool has_quote = false;
|
|
42
47
|
//! Quote used for columns that contain reserved characters, e.g., delimiter
|
|
43
48
|
string quote = "\"";
|
|
@@ -122,7 +127,12 @@ struct BufferedCSVReaderOptions {
|
|
|
122
127
|
void Serialize(FieldWriter &writer) const;
|
|
123
128
|
void Deserialize(FieldReader &reader);
|
|
124
129
|
|
|
130
|
+
void SetCompression(const string &compression);
|
|
131
|
+
void SetHeader(bool has_header);
|
|
132
|
+
void SetEscape(const string &escape);
|
|
133
|
+
void SetQuote(const string "e);
|
|
125
134
|
void SetDelimiter(const string &delimiter);
|
|
135
|
+
void SetParallel(bool use_parallel);
|
|
126
136
|
|
|
127
137
|
void SetNewline(const string &input);
|
|
128
138
|
//! Set an option that is supported by both reading and writing functions, called by
|
|
@@ -34,9 +34,11 @@ public:
|
|
|
34
34
|
return functions.size();
|
|
35
35
|
}
|
|
36
36
|
T GetFunctionByOffset(idx_t offset) {
|
|
37
|
+
D_ASSERT(offset < functions.size());
|
|
37
38
|
return functions[offset];
|
|
38
39
|
}
|
|
39
40
|
T &GetFunctionReferenceByOffset(idx_t offset) {
|
|
41
|
+
D_ASSERT(offset < functions.size());
|
|
40
42
|
return functions[offset];
|
|
41
43
|
}
|
|
42
44
|
bool MergeFunctionSet(FunctionSet<T> new_functions) {
|
|
@@ -184,6 +184,10 @@ struct StringSplitFun {
|
|
|
184
184
|
static void RegisterFunction(BuiltinFunctions &set);
|
|
185
185
|
};
|
|
186
186
|
|
|
187
|
+
struct BarFun {
|
|
188
|
+
static void RegisterFunction(BuiltinFunctions &set);
|
|
189
|
+
};
|
|
190
|
+
|
|
187
191
|
struct ASCII {
|
|
188
192
|
static void RegisterFunction(BuiltinFunctions &set);
|
|
189
193
|
};
|
|
@@ -30,6 +30,7 @@ class DatabaseInstance;
|
|
|
30
30
|
class DuckDB;
|
|
31
31
|
class LogicalOperator;
|
|
32
32
|
class SelectStatement;
|
|
33
|
+
struct BufferedCSVReaderOptions;
|
|
33
34
|
|
|
34
35
|
typedef void (*warning_callback)(std::string);
|
|
35
36
|
|
|
@@ -127,8 +128,10 @@ public:
|
|
|
127
128
|
DUCKDB_API shared_ptr<Relation> Values(const string &values);
|
|
128
129
|
DUCKDB_API shared_ptr<Relation> Values(const string &values, const vector<string> &column_names,
|
|
129
130
|
const string &alias = "values");
|
|
131
|
+
|
|
130
132
|
//! Reads CSV file
|
|
131
133
|
DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file);
|
|
134
|
+
DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, BufferedCSVReaderOptions &options);
|
|
132
135
|
DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, const vector<string> &columns);
|
|
133
136
|
|
|
134
137
|
//! Reads Parquet file
|
|
@@ -8,26 +8,26 @@
|
|
|
8
8
|
|
|
9
9
|
#pragma once
|
|
10
10
|
|
|
11
|
-
#include "duckdb/
|
|
11
|
+
#include "duckdb/execution/operator/persistent/csv_reader_options.hpp"
|
|
12
|
+
#include "duckdb/main/relation/table_function_relation.hpp"
|
|
12
13
|
|
|
13
14
|
namespace duckdb {
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
struct BufferedCSVReaderOptions;
|
|
17
|
+
|
|
18
|
+
class ReadCSVRelation : public TableFunctionRelation {
|
|
16
19
|
public:
|
|
17
20
|
ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file, vector<ColumnDefinition> columns,
|
|
18
|
-
|
|
21
|
+
string alias = string());
|
|
22
|
+
ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file, BufferedCSVReaderOptions options,
|
|
23
|
+
string alias = string());
|
|
19
24
|
|
|
20
|
-
string csv_file;
|
|
21
|
-
bool auto_detect;
|
|
22
25
|
string alias;
|
|
23
|
-
|
|
26
|
+
bool auto_detect;
|
|
27
|
+
string csv_file;
|
|
24
28
|
|
|
25
29
|
public:
|
|
26
|
-
unique_ptr<QueryNode> GetQueryNode() override;
|
|
27
|
-
const vector<ColumnDefinition> &Columns() override;
|
|
28
|
-
string ToString(idx_t depth) override;
|
|
29
30
|
string GetAlias() override;
|
|
30
|
-
unique_ptr<TableRef> GetTableRef() override;
|
|
31
31
|
};
|
|
32
32
|
|
|
33
33
|
} // namespace duckdb
|
|
@@ -15,10 +15,11 @@ namespace duckdb {
|
|
|
15
15
|
class TableFunctionRelation : public Relation {
|
|
16
16
|
public:
|
|
17
17
|
TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name, vector<Value> parameters,
|
|
18
|
-
named_parameter_map_t named_parameters, shared_ptr<Relation> input_relation_p = nullptr
|
|
18
|
+
named_parameter_map_t named_parameters, shared_ptr<Relation> input_relation_p = nullptr,
|
|
19
|
+
bool auto_init = true);
|
|
19
20
|
|
|
20
21
|
TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name, vector<Value> parameters,
|
|
21
|
-
shared_ptr<Relation> input_relation_p = nullptr);
|
|
22
|
+
shared_ptr<Relation> input_relation_p = nullptr, bool auto_init = true);
|
|
22
23
|
|
|
23
24
|
string name;
|
|
24
25
|
vector<Value> parameters;
|
|
@@ -33,6 +34,14 @@ public:
|
|
|
33
34
|
const vector<ColumnDefinition> &Columns() override;
|
|
34
35
|
string ToString(idx_t depth) override;
|
|
35
36
|
string GetAlias() override;
|
|
37
|
+
void AddNamedParameter(const string &name, Value argument);
|
|
38
|
+
|
|
39
|
+
private:
|
|
40
|
+
void InitializeColumns();
|
|
41
|
+
|
|
42
|
+
private:
|
|
43
|
+
//! Whether or not to auto initialize the columns on construction
|
|
44
|
+
bool auto_initialize;
|
|
36
45
|
};
|
|
37
46
|
|
|
38
47
|
} // namespace duckdb
|
|
@@ -26,7 +26,7 @@ public:
|
|
|
26
26
|
// if the function takes a subquery as argument its in here
|
|
27
27
|
unique_ptr<SelectStatement> subquery;
|
|
28
28
|
|
|
29
|
-
// External dependencies of this table
|
|
29
|
+
// External dependencies of this table function
|
|
30
30
|
unique_ptr<ExternalDependency> external_dependency;
|
|
31
31
|
|
|
32
32
|
public:
|
|
@@ -220,14 +220,13 @@ shared_ptr<Relation> Connection::Values(const string &values, const vector<strin
|
|
|
220
220
|
|
|
221
221
|
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
|
|
222
222
|
BufferedCSVReaderOptions options;
|
|
223
|
+
return ReadCSV(csv_file, options);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, BufferedCSVReaderOptions &options) {
|
|
223
227
|
options.file_path = csv_file;
|
|
224
228
|
options.auto_detect = true;
|
|
225
|
-
|
|
226
|
-
vector<ColumnDefinition> column_list;
|
|
227
|
-
for (idx_t i = 0; i < reader.return_types.size(); i++) {
|
|
228
|
-
column_list.emplace_back(reader.names[i], reader.return_types[i]);
|
|
229
|
-
}
|
|
230
|
-
return make_shared<ReadCSVRelation>(context, csv_file, std::move(column_list), true);
|
|
229
|
+
return make_shared<ReadCSVRelation>(context, csv_file, options);
|
|
231
230
|
}
|
|
232
231
|
|
|
233
232
|
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, const vector<string> &columns) {
|
|
@@ -8,59 +8,51 @@
|
|
|
8
8
|
#include "duckdb/parser/expression/constant_expression.hpp"
|
|
9
9
|
#include "duckdb/parser/expression/function_expression.hpp"
|
|
10
10
|
#include "duckdb/common/string_util.hpp"
|
|
11
|
+
#include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp"
|
|
11
12
|
|
|
12
13
|
namespace duckdb {
|
|
13
14
|
|
|
14
15
|
ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file_p,
|
|
15
|
-
vector<ColumnDefinition> columns_p,
|
|
16
|
-
:
|
|
17
|
-
|
|
16
|
+
vector<ColumnDefinition> columns_p, string alias_p)
|
|
17
|
+
: TableFunctionRelation(context, "read_csv", {Value(csv_file_p)}, nullptr, false), alias(std::move(alias_p)),
|
|
18
|
+
auto_detect(false), csv_file(std::move(csv_file_p)) {
|
|
19
|
+
|
|
18
20
|
if (alias.empty()) {
|
|
19
21
|
alias = StringUtil::Split(csv_file, ".")[0];
|
|
20
22
|
}
|
|
21
|
-
}
|
|
22
23
|
|
|
23
|
-
|
|
24
|
-
auto result = make_unique<SelectNode>();
|
|
25
|
-
result->select_list.push_back(make_unique<StarExpression>());
|
|
26
|
-
result->from_table = GetTableRef();
|
|
27
|
-
return std::move(result);
|
|
28
|
-
}
|
|
24
|
+
columns = move(columns_p);
|
|
29
25
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
vector<unique_ptr<ParsedExpression>> children;
|
|
34
|
-
// CSV file
|
|
35
|
-
children.push_back(make_unique<ConstantExpression>(Value(csv_file)));
|
|
36
|
-
if (!auto_detect) {
|
|
37
|
-
// parameters
|
|
38
|
-
child_list_t<Value> column_names;
|
|
39
|
-
for (idx_t i = 0; i < columns.size(); i++) {
|
|
40
|
-
column_names.push_back(make_pair(columns[i].Name(), Value(columns[i].Type().ToString())));
|
|
41
|
-
}
|
|
42
|
-
auto colnames = make_unique<ConstantExpression>(Value::STRUCT(std::move(column_names)));
|
|
43
|
-
children.push_back(make_unique<ComparisonExpression>(
|
|
44
|
-
ExpressionType::COMPARE_EQUAL, make_unique<ColumnRefExpression>("columns"), std::move(colnames)));
|
|
45
|
-
} else {
|
|
46
|
-
children.push_back(make_unique<ComparisonExpression>(ExpressionType::COMPARE_EQUAL,
|
|
47
|
-
make_unique<ColumnRefExpression>("auto_detect"),
|
|
48
|
-
make_unique<ConstantExpression>(Value::BOOLEAN(true))));
|
|
26
|
+
child_list_t<Value> column_names;
|
|
27
|
+
for (idx_t i = 0; i < columns.size(); i++) {
|
|
28
|
+
column_names.push_back(make_pair(columns[i].Name(), Value(columns[i].Type().ToString())));
|
|
49
29
|
}
|
|
50
|
-
table_ref->function = make_unique<FunctionExpression>("read_csv", std::move(children));
|
|
51
|
-
return std::move(table_ref);
|
|
52
|
-
}
|
|
53
30
|
|
|
54
|
-
|
|
55
|
-
return alias;
|
|
31
|
+
AddNamedParameter("columns", Value::STRUCT(std::move(column_names)));
|
|
56
32
|
}
|
|
57
33
|
|
|
58
|
-
const
|
|
59
|
-
|
|
34
|
+
ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, string csv_file_p,
|
|
35
|
+
BufferedCSVReaderOptions options, string alias_p)
|
|
36
|
+
: TableFunctionRelation(context, "read_csv_auto", {Value(csv_file_p)}, nullptr, false), alias(std::move(alias_p)),
|
|
37
|
+
auto_detect(true), csv_file(std::move(csv_file_p)) {
|
|
38
|
+
|
|
39
|
+
if (alias.empty()) {
|
|
40
|
+
alias = StringUtil::Split(csv_file, ".")[0];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Force auto_detect for this constructor
|
|
44
|
+
options.auto_detect = true;
|
|
45
|
+
BufferedCSVReader reader(*context, move(options));
|
|
46
|
+
|
|
47
|
+
for (idx_t i = 0; i < reader.return_types.size(); i++) {
|
|
48
|
+
columns.emplace_back(reader.names[i], reader.return_types[i]);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
AddNamedParameter("auto_detect", Value::BOOLEAN(true));
|
|
60
52
|
}
|
|
61
53
|
|
|
62
|
-
string ReadCSVRelation::
|
|
63
|
-
return
|
|
54
|
+
string ReadCSVRelation::GetAlias() {
|
|
55
|
+
return alias;
|
|
64
56
|
}
|
|
65
57
|
|
|
66
58
|
} // namespace duckdb
|
|
@@ -12,21 +12,32 @@
|
|
|
12
12
|
|
|
13
13
|
namespace duckdb {
|
|
14
14
|
|
|
15
|
+
void TableFunctionRelation::AddNamedParameter(const string &name, Value argument) {
|
|
16
|
+
named_parameters[name] = move(argument);
|
|
17
|
+
}
|
|
18
|
+
|
|
15
19
|
TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
|
|
16
20
|
vector<Value> parameters_p, named_parameter_map_t named_parameters,
|
|
17
|
-
shared_ptr<Relation> input_relation_p)
|
|
21
|
+
shared_ptr<Relation> input_relation_p, bool auto_init)
|
|
18
22
|
: Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
|
|
19
23
|
parameters(std::move(parameters_p)), named_parameters(std::move(named_parameters)),
|
|
20
|
-
input_relation(std::move(input_relation_p)) {
|
|
24
|
+
input_relation(std::move(input_relation_p)), auto_initialize(auto_init) {
|
|
21
25
|
context->TryBindRelation(*this, this->columns);
|
|
22
26
|
}
|
|
23
|
-
TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
|
|
24
|
-
vector<Value> parameters_p,
|
|
25
27
|
|
|
26
|
-
|
|
28
|
+
TableFunctionRelation::TableFunctionRelation(const std::shared_ptr<ClientContext> &context, string name_p,
|
|
29
|
+
vector<Value> parameters_p, shared_ptr<Relation> input_relation_p,
|
|
30
|
+
bool auto_init)
|
|
27
31
|
: Relation(context, RelationType::TABLE_FUNCTION_RELATION), name(std::move(name_p)),
|
|
28
|
-
parameters(std::move(parameters_p)), input_relation(std::move(input_relation_p)) {
|
|
29
|
-
|
|
32
|
+
parameters(std::move(parameters_p)), input_relation(std::move(input_relation_p)), auto_initialize(auto_init) {
|
|
33
|
+
InitializeColumns();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
void TableFunctionRelation::InitializeColumns() {
|
|
37
|
+
if (!auto_initialize) {
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
context.GetContext()->TryBindRelation(*this, this->columns);
|
|
30
41
|
}
|
|
31
42
|
|
|
32
43
|
unique_ptr<QueryNode> TableFunctionRelation::GetQueryNode() {
|