npm - duckdb - Versions diffs - 0.6.2-dev499.0 → 0.6.2-dev503.0 - Mend

duckdb 0.6.2-dev499.0 → 0.6.2-dev503.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +38 -1
package/src/duckdb.hpp +581 -576
package/src/parquet-amalgamation.cpp +32070 -32070

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.6.2-dev499.0",
+  "version": "0.6.2-dev503.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -80940,7 +80940,23 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
 	// #######
 	options.num_cols = best_num_cols;
 	DetectHeader(best_sql_types_candidates, best_header_row);
+	auto sql_types_per_column = options.sql_types_per_column;
+	for (idx_t i = 0; i < col_names.size(); i++) {
+		auto it = sql_types_per_column.find(col_names[i]);
+		if (it != sql_types_per_column.end()) {
+			best_sql_types_candidates[i] = {it->second};
+			sql_types_per_column.erase(col_names[i]);
+		}
+	}
+	if (!sql_types_per_column.empty()) {
+		string exception = "COLUMN_TYPES error: Columns with names: ";
+		for (auto &col : sql_types_per_column) {
+			exception += "\"" + col.first + "\",";
+		}
+		exception.pop_back();
+		exception += " do not exist in the CSV File";
+		throw BinderException(exception);
+	}
 	// #######
 	// ### type detection (refining)
 	// #######
@@ -127123,6 +127139,26 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 			if (names.empty()) {
 				throw BinderException("read_csv requires at least a single column as input!");
 			}
+		} else if (loption == "column_types") {
+			auto &child_type = kv.second.type();
+			if (child_type.id() != LogicalTypeId::STRUCT) {
+				throw BinderException("read_csv_auto column_types requires a struct as input");
+			}
+			auto &struct_children = StructValue::GetChildren(kv.second);
+			D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
+			for (idx_t i = 0; i < struct_children.size(); i++) {
+				auto &name = StructType::GetChildName(child_type, i);
+				auto &val = struct_children[i];
+				if (val.type().id() != LogicalTypeId::VARCHAR) {
+					throw BinderException("read_csv_auto requires a type specification as string");
+				}
+				auto def_type = TransformStringToLogicalType(StringValue::Get(val));
+				if (def_type.id() == LogicalTypeId::USER) {
+					throw BinderException("Unrecognized type for read_csv_auto column_types definition");
+				}
+				options.sql_types_per_column[name] = def_type;
+			}
 		} else if (loption == "all_varchar") {
 			options.all_varchar = BooleanValue::Get(kv.second);
 		} else if (loption == "normalize_names") {
@@ -127761,6 +127797,7 @@ TableFunction ReadCSVTableFunction::GetAutoFunction(bool list_parameter) {
 	read_csv_auto.get_batch_index = CSVReaderGetBatchIndex;
 	read_csv_auto.cardinality = CSVReaderCardinality;
 	ReadCSVAddNamedParameters(read_csv_auto);
+	read_csv_auto.named_parameters["column_types"] = LogicalType::ANY;
 	return read_csv_auto;
 }