duckdb 0.6.2-dev1166.0 → 0.6.2-dev1170.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +51 -15
- package/src/duckdb/src/function/table/read_csv.cpp +43 -13
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -2
package/package.json
CHANGED
|
@@ -832,6 +832,27 @@ vector<LogicalType> BufferedCSVReader::RefineTypeDetection(const vector<LogicalT
|
|
|
832
832
|
return detected_types;
|
|
833
833
|
}
|
|
834
834
|
|
|
835
|
+
string BufferedCSVReader::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column,
|
|
836
|
+
const vector<string> &names) {
|
|
837
|
+
for (idx_t i = 0; i < names.size(); i++) {
|
|
838
|
+
auto it = sql_types_per_column.find(names[i]);
|
|
839
|
+
if (it != sql_types_per_column.end()) {
|
|
840
|
+
sql_types_per_column.erase(names[i]);
|
|
841
|
+
continue;
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
if (sql_types_per_column.empty()) {
|
|
845
|
+
return string();
|
|
846
|
+
}
|
|
847
|
+
string exception = "COLUMN_TYPES error: Columns with names: ";
|
|
848
|
+
for (auto &col : sql_types_per_column) {
|
|
849
|
+
exception += "\"" + col.first + "\",";
|
|
850
|
+
}
|
|
851
|
+
exception.pop_back();
|
|
852
|
+
exception += " do not exist in the CSV File";
|
|
853
|
+
return exception;
|
|
854
|
+
}
|
|
855
|
+
|
|
835
856
|
vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &requested_types) {
|
|
836
857
|
for (auto &type : requested_types) {
|
|
837
858
|
// auto detect for blobs not supported: there may be invalid UTF-8 in the file
|
|
@@ -887,23 +908,38 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
|
|
|
887
908
|
// #######
|
|
888
909
|
options.num_cols = best_num_cols;
|
|
889
910
|
DetectHeader(best_sql_types_candidates, best_header_row);
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
if (
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
911
|
+
if (!options.sql_type_list.empty()) {
|
|
912
|
+
// user-defined types were supplied for certain columns
|
|
913
|
+
// override the types
|
|
914
|
+
if (!options.sql_types_per_column.empty()) {
|
|
915
|
+
// types supplied as name -> value map
|
|
916
|
+
idx_t found = 0;
|
|
917
|
+
for (idx_t i = 0; i < names.size(); i++) {
|
|
918
|
+
auto it = options.sql_types_per_column.find(names[i]);
|
|
919
|
+
if (it != options.sql_types_per_column.end()) {
|
|
920
|
+
best_sql_types_candidates[i] = {options.sql_type_list[it->second]};
|
|
921
|
+
found++;
|
|
922
|
+
continue;
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
if (!options.union_by_name && found < options.sql_types_per_column.size()) {
|
|
926
|
+
string exception = ColumnTypesError(options.sql_types_per_column, names);
|
|
927
|
+
if (!exception.empty()) {
|
|
928
|
+
throw BinderException(exception);
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
} else {
|
|
932
|
+
// types supplied as list
|
|
933
|
+
if (names.size() < options.sql_type_list.size()) {
|
|
934
|
+
throw BinderException("read_csv: %d types were provided, but CSV file only has %d columns",
|
|
935
|
+
options.sql_type_list.size(), names.size());
|
|
936
|
+
}
|
|
937
|
+
for (idx_t i = 0; i < options.sql_type_list.size(); i++) {
|
|
938
|
+
best_sql_types_candidates[i] = {options.sql_type_list[i]};
|
|
939
|
+
}
|
|
902
940
|
}
|
|
903
|
-
exception.pop_back();
|
|
904
|
-
exception += " do not exist in the CSV File";
|
|
905
|
-
throw BinderException(exception);
|
|
906
941
|
}
|
|
942
|
+
|
|
907
943
|
// #######
|
|
908
944
|
// ### type detection (refining)
|
|
909
945
|
// #######
|
|
@@ -89,24 +89,45 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
|
89
89
|
if (names.empty()) {
|
|
90
90
|
throw BinderException("read_csv requires at least a single column as input!");
|
|
91
91
|
}
|
|
92
|
-
} else if (loption == "column_types") {
|
|
92
|
+
} else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
|
|
93
93
|
auto &child_type = kv.second.type();
|
|
94
|
-
if (child_type.id() != LogicalTypeId::STRUCT) {
|
|
95
|
-
throw BinderException("read_csv_auto
|
|
94
|
+
if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
|
|
95
|
+
throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
|
|
96
96
|
}
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
97
|
+
if (!options.sql_type_list.empty()) {
|
|
98
|
+
throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
|
|
99
|
+
}
|
|
100
|
+
vector<string> sql_type_names;
|
|
101
|
+
if (child_type.id() == LogicalTypeId::STRUCT) {
|
|
102
|
+
auto &struct_children = StructValue::GetChildren(kv.second);
|
|
103
|
+
D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
|
|
104
|
+
for (idx_t i = 0; i < struct_children.size(); i++) {
|
|
105
|
+
auto &name = StructType::GetChildName(child_type, i);
|
|
106
|
+
auto &val = struct_children[i];
|
|
107
|
+
if (val.type().id() != LogicalTypeId::VARCHAR) {
|
|
108
|
+
throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
|
|
109
|
+
}
|
|
110
|
+
sql_type_names.push_back(StringValue::Get(val));
|
|
111
|
+
options.sql_types_per_column[name] = i;
|
|
104
112
|
}
|
|
105
|
-
|
|
113
|
+
} else {
|
|
114
|
+
auto &list_child = ListType::GetChildType(child_type);
|
|
115
|
+
if (list_child.id() != LogicalTypeId::VARCHAR) {
|
|
116
|
+
throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
|
|
117
|
+
}
|
|
118
|
+
auto &children = ListValue::GetChildren(kv.second);
|
|
119
|
+
for (auto &child : children) {
|
|
120
|
+
sql_type_names.push_back(StringValue::Get(child));
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
options.sql_type_list.reserve(sql_type_names.size());
|
|
124
|
+
for (auto &sql_type : sql_type_names) {
|
|
125
|
+
auto def_type = TransformStringToLogicalType(sql_type);
|
|
106
126
|
if (def_type.id() == LogicalTypeId::USER) {
|
|
107
|
-
throw BinderException("Unrecognized type for read_csv_auto
|
|
127
|
+
throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
|
|
128
|
+
kv.first);
|
|
108
129
|
}
|
|
109
|
-
options.
|
|
130
|
+
options.sql_type_list.push_back(move(def_type));
|
|
110
131
|
}
|
|
111
132
|
} else if (loption == "all_varchar") {
|
|
112
133
|
options.all_varchar = BooleanValue::Get(kv.second);
|
|
@@ -173,6 +194,13 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
|
173
194
|
const idx_t first_file_index = 0;
|
|
174
195
|
result->initial_reader = std::move(result->union_readers[first_file_index]);
|
|
175
196
|
D_ASSERT(names.size() == return_types.size());
|
|
197
|
+
|
|
198
|
+
if (!options.sql_types_per_column.empty()) {
|
|
199
|
+
auto exception = BufferedCSVReader::ColumnTypesError(options.sql_types_per_column, names);
|
|
200
|
+
if (!exception.empty()) {
|
|
201
|
+
throw BinderException(exception);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
176
204
|
}
|
|
177
205
|
|
|
178
206
|
if (result->options.include_file_name) {
|
|
@@ -830,6 +858,8 @@ TableFunction ReadCSVTableFunction::GetAutoFunction(bool list_parameter) {
|
|
|
830
858
|
read_csv_auto.cardinality = CSVReaderCardinality;
|
|
831
859
|
ReadCSVAddNamedParameters(read_csv_auto);
|
|
832
860
|
read_csv_auto.named_parameters["column_types"] = LogicalType::ANY;
|
|
861
|
+
read_csv_auto.named_parameters["dtypes"] = LogicalType::ANY;
|
|
862
|
+
read_csv_auto.named_parameters["types"] = LogicalType::ANY;
|
|
833
863
|
return read_csv_auto;
|
|
834
864
|
}
|
|
835
865
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.6.2-
|
|
2
|
+
#define DUCKDB_VERSION "0.6.2-dev1170"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "72d187c5ff"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|
|
@@ -76,6 +76,8 @@ public:
|
|
|
76
76
|
//! Extract a single DataChunk from the CSV file and stores it in insert_chunk
|
|
77
77
|
void ParseCSV(DataChunk &insert_chunk);
|
|
78
78
|
|
|
79
|
+
static string ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names);
|
|
80
|
+
|
|
79
81
|
private:
|
|
80
82
|
//! Initialize Parser
|
|
81
83
|
void Initialize(const vector<LogicalType> &requested_types);
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
#include "duckdb/function/scalar/strftime.hpp"
|
|
14
14
|
#include "duckdb/common/types/value.hpp"
|
|
15
15
|
#include "duckdb/common/field_writer.hpp"
|
|
16
|
+
#include "duckdb/common/case_insensitive_map.hpp"
|
|
16
17
|
|
|
17
18
|
namespace duckdb {
|
|
18
19
|
|
|
@@ -54,8 +55,10 @@ struct BufferedCSVReaderOptions {
|
|
|
54
55
|
//===--------------------------------------------------------------------===//
|
|
55
56
|
// CSVAutoOptions
|
|
56
57
|
//===--------------------------------------------------------------------===//
|
|
57
|
-
//! SQL
|
|
58
|
-
|
|
58
|
+
//! SQL Type list mapping of name to SQL type index in sql_type_list
|
|
59
|
+
case_insensitive_map_t<idx_t> sql_types_per_column;
|
|
60
|
+
//! User-defined SQL type list
|
|
61
|
+
vector<LogicalType> sql_type_list;
|
|
59
62
|
//===--------------------------------------------------------------------===//
|
|
60
63
|
// ReadCSVOptions
|
|
61
64
|
//===--------------------------------------------------------------------===//
|