duckdb 0.6.2-dev490.0 → 0.6.2-dev503.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev490.0",
5
+ "version": "0.6.2-dev503.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -80940,7 +80940,23 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
80940
80940
  // #######
80941
80941
  options.num_cols = best_num_cols;
80942
80942
  DetectHeader(best_sql_types_candidates, best_header_row);
80943
-
80943
+ auto sql_types_per_column = options.sql_types_per_column;
80944
+ for (idx_t i = 0; i < col_names.size(); i++) {
80945
+ auto it = sql_types_per_column.find(col_names[i]);
80946
+ if (it != sql_types_per_column.end()) {
80947
+ best_sql_types_candidates[i] = {it->second};
80948
+ sql_types_per_column.erase(col_names[i]);
80949
+ }
80950
+ }
80951
+ if (!sql_types_per_column.empty()) {
80952
+ string exception = "COLUMN_TYPES error: Columns with names: ";
80953
+ for (auto &col : sql_types_per_column) {
80954
+ exception += "\"" + col.first + "\",";
80955
+ }
80956
+ exception.pop_back();
80957
+ exception += " do not exist in the CSV File";
80958
+ throw BinderException(exception);
80959
+ }
80944
80960
  // #######
80945
80961
  // ### type detection (refining)
80946
80962
  // #######
@@ -127123,6 +127139,26 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
127123
127139
  if (names.empty()) {
127124
127140
  throw BinderException("read_csv requires at least a single column as input!");
127125
127141
  }
127142
+ } else if (loption == "column_types") {
127143
+ auto &child_type = kv.second.type();
127144
+ if (child_type.id() != LogicalTypeId::STRUCT) {
127145
+ throw BinderException("read_csv_auto column_types requires a struct as input");
127146
+ }
127147
+ auto &struct_children = StructValue::GetChildren(kv.second);
127148
+ D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
127149
+ for (idx_t i = 0; i < struct_children.size(); i++) {
127150
+ auto &name = StructType::GetChildName(child_type, i);
127151
+ auto &val = struct_children[i];
127152
+ if (val.type().id() != LogicalTypeId::VARCHAR) {
127153
+ throw BinderException("read_csv_auto requires a type specification as string");
127154
+ }
127155
+ auto def_type = TransformStringToLogicalType(StringValue::Get(val));
127156
+ if (def_type.id() == LogicalTypeId::USER) {
127157
+ throw BinderException("Unrecognized type for read_csv_auto column_types definition");
127158
+ }
127159
+ options.sql_types_per_column[name] = def_type;
127160
+ }
127161
+
127126
127162
  } else if (loption == "all_varchar") {
127127
127163
  options.all_varchar = BooleanValue::Get(kv.second);
127128
127164
  } else if (loption == "normalize_names") {
@@ -127761,6 +127797,7 @@ TableFunction ReadCSVTableFunction::GetAutoFunction(bool list_parameter) {
127761
127797
  read_csv_auto.get_batch_index = CSVReaderGetBatchIndex;
127762
127798
  read_csv_auto.cardinality = CSVReaderCardinality;
127763
127799
  ReadCSVAddNamedParameters(read_csv_auto);
127800
+ read_csv_auto.named_parameters["column_types"] = LogicalType::ANY;
127764
127801
  return read_csv_auto;
127765
127802
  }
127766
127803
 
@@ -156976,6 +157013,7 @@ private:
156976
157013
  void UpdateDPTree(JoinNode *new_plan);
156977
157014
 
156978
157015
  void UpdateJoinNodesInFullPlan(JoinNode *node);
157016
+ bool NodeInFullPlan(JoinNode *node);
156979
157017
 
156980
157018
  std::pair<JoinRelationSet *, unique_ptr<LogicalOperator>>
156981
157019
  GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode *node);
@@ -157706,7 +157744,6 @@ string JoinNode::ToString() {
157706
157744
 
157707
157745
 
157708
157746
 
157709
-
157710
157747
  #include <algorithm>
157711
157748
 
157712
157749
  namespace std {
@@ -157975,6 +158012,10 @@ unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet *set,
157975
158012
  return result;
157976
158013
  }
157977
158014
 
158015
+ bool JoinOrderOptimizer::NodeInFullPlan(JoinNode *node) {
158016
+ return join_nodes_in_full_plan.find(node->set->ToString()) != join_nodes_in_full_plan.end();
158017
+ }
158018
+
157978
158019
  void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode *node) {
157979
158020
  if (!node) {
157980
158021
  return;
@@ -158205,7 +158246,7 @@ static vector<unordered_set<idx_t>> AddSuperSets(vector<unordered_set<idx_t>> cu
158205
158246
  }
158206
158247
 
158207
158248
  // works by first creating all sets with cardinality 1
158208
- // then iterates over each previously create group of subsets and will only add a neighbor if the neighbor
158249
+ // then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
158209
158250
  // is greater than all relations in the set.
158210
158251
  static vector<unordered_set<idx_t>> GetAllNeighborSets(JoinRelationSet *new_set, unordered_set<idx_t> &exclusion_set,
158211
158252
  vector<idx_t> neighbors) {
@@ -158241,6 +158282,11 @@ static vector<unordered_set<idx_t>> GetAllNeighborSets(JoinRelationSet *new_set,
158241
158282
  }
158242
158283
 
158243
158284
  void JoinOrderOptimizer::UpdateDPTree(JoinNode *new_plan) {
158285
+ if (!NodeInFullPlan(new_plan)) {
158286
+ // if the new node is not in the full plan, feel free to return
158287
+ // because you won't be updating the full plan.
158288
+ return;
158289
+ }
158244
158290
  auto new_set = new_plan->set;
158245
158291
  // now update every plan that uses this plan
158246
158292
  unordered_set<idx_t> exclusion_set;
@@ -158302,6 +158348,7 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
158302
158348
  // update the DP tree in case a plan created by the DP algorithm uses the node
158303
158349
  // that was potentially just updated by EmitPair. You will get a use-after-free
158304
158350
  // error if future plans rely on the old node that was just replaced.
158351
+ // if node in FullPath, then updateDP tree.
158305
158352
  UpdateDPTree(node);
158306
158353
 
158307
158354
  if (!best_connection || node->GetCost() < best_connection->GetCost()) {