npm - duckdb - Versions diffs - 0.6.2-dev1166.0 → 0.6.2-dev1170.0 - Mend

duckdb 0.6.2-dev1166.0 → 0.6.2-dev1170.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.6.2-dev1166.0",
+  "version": "0.6.2-dev1170.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp CHANGED Viewed

@@ -832,6 +832,27 @@ vector<LogicalType> BufferedCSVReader::RefineTypeDetection(const vector<LogicalT
 	return detected_types;
 }
+string BufferedCSVReader::ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column,
+                                           const vector<string> &names) {
+	for (idx_t i = 0; i < names.size(); i++) {
+		auto it = sql_types_per_column.find(names[i]);
+		if (it != sql_types_per_column.end()) {
+			sql_types_per_column.erase(names[i]);
+			continue;
+		}
+	}
+	if (sql_types_per_column.empty()) {
+		return string();
+	}
+	string exception = "COLUMN_TYPES error: Columns with names: ";
+	for (auto &col : sql_types_per_column) {
+		exception += "\"" + col.first + "\",";
+	}
+	exception.pop_back();
+	exception += " do not exist in the CSV File";
+	return exception;
+}
 vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &requested_types) {
 	for (auto &type : requested_types) {
 		// auto detect for blobs not supported: there may be invalid UTF-8 in the file
@@ -887,23 +908,38 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
 	// #######
 	options.num_cols = best_num_cols;
 	DetectHeader(best_sql_types_candidates, best_header_row);
-	auto sql_types_per_column = options.sql_types_per_column;
-	for (idx_t i = 0; i < names.size(); i++) {
-		auto it = sql_types_per_column.find(names[i]);
-		if (it != sql_types_per_column.end()) {
-			best_sql_types_candidates[i] = {it->second};
-			sql_types_per_column.erase(names[i]);
-		}
-	}
-	if (!sql_types_per_column.empty()) {
-		string exception = "COLUMN_TYPES error: Columns with names: ";
-		for (auto &col : sql_types_per_column) {
-			exception += "\"" + col.first + "\",";
+	if (!options.sql_type_list.empty()) {
+		// user-defined types were supplied for certain columns
+		// override the types
+		if (!options.sql_types_per_column.empty()) {
+			// types supplied as name -> value map
+			idx_t found = 0;
+			for (idx_t i = 0; i < names.size(); i++) {
+				auto it = options.sql_types_per_column.find(names[i]);
+				if (it != options.sql_types_per_column.end()) {
+					best_sql_types_candidates[i] = {options.sql_type_list[it->second]};
+					found++;
+					continue;
+				}
+			}
+			if (!options.union_by_name && found < options.sql_types_per_column.size()) {
+				string exception = ColumnTypesError(options.sql_types_per_column, names);
+				if (!exception.empty()) {
+					throw BinderException(exception);
+				}
+			}
+		} else {
+			// types supplied as list
+			if (names.size() < options.sql_type_list.size()) {
+				throw BinderException("read_csv: %d types were provided, but CSV file only has %d columns",
+				                      options.sql_type_list.size(), names.size());
+			}
+			for (idx_t i = 0; i < options.sql_type_list.size(); i++) {
+				best_sql_types_candidates[i] = {options.sql_type_list[i]};
+			}
 		}
-		exception.pop_back();
-		exception += " do not exist in the CSV File";
-		throw BinderException(exception);
 	}
 	// #######
 	// ### type detection (refining)
 	// #######

package/src/duckdb/src/function/table/read_csv.cpp CHANGED Viewed

@@ -89,24 +89,45 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 			if (names.empty()) {
 				throw BinderException("read_csv requires at least a single column as input!");
 			}
-		} else if (loption == "column_types") {
+		} else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
 			auto &child_type = kv.second.type();
-			if (child_type.id() != LogicalTypeId::STRUCT) {
-				throw BinderException("read_csv_auto column_types requires a struct as input");
+			if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
+				throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
 			}
-			auto &struct_children = StructValue::GetChildren(kv.second);
-			D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
-			for (idx_t i = 0; i < struct_children.size(); i++) {
-				auto &name = StructType::GetChildName(child_type, i);
-				auto &val = struct_children[i];
-				if (val.type().id() != LogicalTypeId::VARCHAR) {
-					throw BinderException("read_csv_auto requires a type specification as string");
+			if (!options.sql_type_list.empty()) {
+				throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
+			}
+			vector<string> sql_type_names;
+			if (child_type.id() == LogicalTypeId::STRUCT) {
+				auto &struct_children = StructValue::GetChildren(kv.second);
+				D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
+				for (idx_t i = 0; i < struct_children.size(); i++) {
+					auto &name = StructType::GetChildName(child_type, i);
+					auto &val = struct_children[i];
+					if (val.type().id() != LogicalTypeId::VARCHAR) {
+						throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
+					}
+					sql_type_names.push_back(StringValue::Get(val));
+					options.sql_types_per_column[name] = i;
 				}
-				auto def_type = TransformStringToLogicalType(StringValue::Get(val));
+			} else {
+				auto &list_child = ListType::GetChildType(child_type);
+				if (list_child.id() != LogicalTypeId::VARCHAR) {
+					throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
+				}
+				auto &children = ListValue::GetChildren(kv.second);
+				for (auto &child : children) {
+					sql_type_names.push_back(StringValue::Get(child));
+				}
+			}
+			options.sql_type_list.reserve(sql_type_names.size());
+			for (auto &sql_type : sql_type_names) {
+				auto def_type = TransformStringToLogicalType(sql_type);
 				if (def_type.id() == LogicalTypeId::USER) {
-					throw BinderException("Unrecognized type for read_csv_auto column_types definition");
+					throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
+					                      kv.first);
 				}
-				options.sql_types_per_column[name] = def_type;
+				options.sql_type_list.push_back(move(def_type));
 			}
 		} else if (loption == "all_varchar") {
 			options.all_varchar = BooleanValue::Get(kv.second);
@@ -173,6 +194,13 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 		const idx_t first_file_index = 0;
 		result->initial_reader = std::move(result->union_readers[first_file_index]);
 		D_ASSERT(names.size() == return_types.size());
+		if (!options.sql_types_per_column.empty()) {
+			auto exception = BufferedCSVReader::ColumnTypesError(options.sql_types_per_column, names);
+			if (!exception.empty()) {
+				throw BinderException(exception);
+			}
+		}
 	}
 	if (result->options.include_file_name) {
@@ -830,6 +858,8 @@ TableFunction ReadCSVTableFunction::GetAutoFunction(bool list_parameter) {
 	read_csv_auto.cardinality = CSVReaderCardinality;
 	ReadCSVAddNamedParameters(read_csv_auto);
 	read_csv_auto.named_parameters["column_types"] = LogicalType::ANY;
+	read_csv_auto.named_parameters["dtypes"] = LogicalType::ANY;
+	read_csv_auto.named_parameters["types"] = LogicalType::ANY;
 	return read_csv_auto;
 }

package/src/duckdb/src/function/table/version/pragma_version.cpp CHANGED Viewed

@@ -1,8 +1,8 @@
 #ifndef DUCKDB_VERSION
-#define DUCKDB_VERSION "0.6.2-dev1166"
+#define DUCKDB_VERSION "0.6.2-dev1170"
 #endif
 #ifndef DUCKDB_SOURCE_ID
-#define DUCKDB_SOURCE_ID "67ceaf6e2c"
+#define DUCKDB_SOURCE_ID "72d187c5ff"
 #endif
 #include "duckdb/function/table/system_functions.hpp"
 #include "duckdb/main/database.hpp"

package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp CHANGED Viewed

@@ -76,6 +76,8 @@ public:
 	//! Extract a single DataChunk from the CSV file and stores it in insert_chunk
 	void ParseCSV(DataChunk &insert_chunk);
+	static string ColumnTypesError(case_insensitive_map_t<idx_t> sql_types_per_column, const vector<string> &names);
 private:
 	//! Initialize Parser
 	void Initialize(const vector<LogicalType> &requested_types);

package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp CHANGED Viewed

@@ -13,6 +13,7 @@
 #include "duckdb/function/scalar/strftime.hpp"
 #include "duckdb/common/types/value.hpp"
 #include "duckdb/common/field_writer.hpp"
+#include "duckdb/common/case_insensitive_map.hpp"
 namespace duckdb {
@@ -54,8 +55,10 @@ struct BufferedCSVReaderOptions {
 	//===--------------------------------------------------------------------===//
 	// CSVAutoOptions
 	//===--------------------------------------------------------------------===//
-	//! SQL Types defined per specific column
-	unordered_map<string, LogicalType> sql_types_per_column;
+	//! SQL Type list mapping of name to SQL type index in sql_type_list
+	case_insensitive_map_t<idx_t> sql_types_per_column;
+	//! User-defined SQL type list
+	vector<LogicalType> sql_type_list;
 	//===--------------------------------------------------------------------===//
 	// ReadCSVOptions
 	//===--------------------------------------------------------------------===//