npm - duckdb - Versions diffs - 0.6.2-dev490.0 → 0.6.2-dev503.0 - Mend

duckdb 0.6.2-dev490.0 → 0.6.2-dev503.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/src/duckdb.cpp +50 -3
package/src/duckdb.hpp +581 -576
package/src/parquet-amalgamation.cpp +29129 -29129

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.6.2-dev490.0",
+  "version": "0.6.2-dev503.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb.cpp CHANGED Viewed

@@ -80940,7 +80940,23 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
 	// #######
 	options.num_cols = best_num_cols;
 	DetectHeader(best_sql_types_candidates, best_header_row);
+	auto sql_types_per_column = options.sql_types_per_column;
+	for (idx_t i = 0; i < col_names.size(); i++) {
+		auto it = sql_types_per_column.find(col_names[i]);
+		if (it != sql_types_per_column.end()) {
+			best_sql_types_candidates[i] = {it->second};
+			sql_types_per_column.erase(col_names[i]);
+		}
+	}
+	if (!sql_types_per_column.empty()) {
+		string exception = "COLUMN_TYPES error: Columns with names: ";
+		for (auto &col : sql_types_per_column) {
+			exception += "\"" + col.first + "\",";
+		}
+		exception.pop_back();
+		exception += " do not exist in the CSV File";
+		throw BinderException(exception);
+	}
 	// #######
 	// ### type detection (refining)
 	// #######
@@ -127123,6 +127139,26 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
 			if (names.empty()) {
 				throw BinderException("read_csv requires at least a single column as input!");
 			}
+		} else if (loption == "column_types") {
+			auto &child_type = kv.second.type();
+			if (child_type.id() != LogicalTypeId::STRUCT) {
+				throw BinderException("read_csv_auto column_types requires a struct as input");
+			}
+			auto &struct_children = StructValue::GetChildren(kv.second);
+			D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
+			for (idx_t i = 0; i < struct_children.size(); i++) {
+				auto &name = StructType::GetChildName(child_type, i);
+				auto &val = struct_children[i];
+				if (val.type().id() != LogicalTypeId::VARCHAR) {
+					throw BinderException("read_csv_auto requires a type specification as string");
+				}
+				auto def_type = TransformStringToLogicalType(StringValue::Get(val));
+				if (def_type.id() == LogicalTypeId::USER) {
+					throw BinderException("Unrecognized type for read_csv_auto column_types definition");
+				}
+				options.sql_types_per_column[name] = def_type;
+			}
 		} else if (loption == "all_varchar") {
 			options.all_varchar = BooleanValue::Get(kv.second);
 		} else if (loption == "normalize_names") {
@@ -127761,6 +127797,7 @@ TableFunction ReadCSVTableFunction::GetAutoFunction(bool list_parameter) {
 	read_csv_auto.get_batch_index = CSVReaderGetBatchIndex;
 	read_csv_auto.cardinality = CSVReaderCardinality;
 	ReadCSVAddNamedParameters(read_csv_auto);
+	read_csv_auto.named_parameters["column_types"] = LogicalType::ANY;
 	return read_csv_auto;
 }
@@ -156976,6 +157013,7 @@ private:
 	void UpdateDPTree(JoinNode *new_plan);
 	void UpdateJoinNodesInFullPlan(JoinNode *node);
+	bool NodeInFullPlan(JoinNode *node);
 	std::pair<JoinRelationSet *, unique_ptr<LogicalOperator>>
 	GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode *node);
@@ -157706,7 +157744,6 @@ string JoinNode::ToString() {
 #include <algorithm>
 namespace std {
@@ -157975,6 +158012,10 @@ unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet *set,
 	return result;
 }
+bool JoinOrderOptimizer::NodeInFullPlan(JoinNode *node) {
+	return join_nodes_in_full_plan.find(node->set->ToString()) != join_nodes_in_full_plan.end();
+}
 void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode *node) {
 	if (!node) {
 		return;
@@ -158205,7 +158246,7 @@ static vector<unordered_set<idx_t>> AddSuperSets(vector<unordered_set<idx_t>> cu
 }
 // works by first creating all sets with cardinality 1
-// then iterates over each previously create group of subsets and will only add a neighbor if the neighbor
+// then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
 // is greater than all relations in the set.
 static vector<unordered_set<idx_t>> GetAllNeighborSets(JoinRelationSet *new_set, unordered_set<idx_t> &exclusion_set,
                                                        vector<idx_t> neighbors) {
@@ -158241,6 +158282,11 @@ static vector<unordered_set<idx_t>> GetAllNeighborSets(JoinRelationSet *new_set,
 }
 void JoinOrderOptimizer::UpdateDPTree(JoinNode *new_plan) {
+	if (!NodeInFullPlan(new_plan)) {
+		// if the new node is not in the full plan, feel free to return
+		// because you won't be updating the full plan.
+		return;
+	}
 	auto new_set = new_plan->set;
 	// now update every plan that uses this plan
 	unordered_set<idx_t> exclusion_set;
@@ -158302,6 +158348,7 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
 					// update the DP tree in case a plan created by the DP algorithm uses the node
 					// that was potentially just updated by EmitPair. You will get a use-after-free
 					// error if future plans rely on the old node that was just replaced.
+					// if node in FullPath, then updateDP tree.
 					UpdateDPTree(node);
 					if (!best_connection || node->GetCost() < best_connection->GetCost()) {