duckdb 0.6.2-dev490.0 → 0.6.2-dev503.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +50 -3
- package/src/duckdb.hpp +581 -576
- package/src/parquet-amalgamation.cpp +29129 -29129
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -80940,7 +80940,23 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
|
|
|
80940
80940
|
// #######
|
|
80941
80941
|
options.num_cols = best_num_cols;
|
|
80942
80942
|
DetectHeader(best_sql_types_candidates, best_header_row);
|
|
80943
|
-
|
|
80943
|
+
auto sql_types_per_column = options.sql_types_per_column;
|
|
80944
|
+
for (idx_t i = 0; i < col_names.size(); i++) {
|
|
80945
|
+
auto it = sql_types_per_column.find(col_names[i]);
|
|
80946
|
+
if (it != sql_types_per_column.end()) {
|
|
80947
|
+
best_sql_types_candidates[i] = {it->second};
|
|
80948
|
+
sql_types_per_column.erase(col_names[i]);
|
|
80949
|
+
}
|
|
80950
|
+
}
|
|
80951
|
+
if (!sql_types_per_column.empty()) {
|
|
80952
|
+
string exception = "COLUMN_TYPES error: Columns with names: ";
|
|
80953
|
+
for (auto &col : sql_types_per_column) {
|
|
80954
|
+
exception += "\"" + col.first + "\",";
|
|
80955
|
+
}
|
|
80956
|
+
exception.pop_back();
|
|
80957
|
+
exception += " do not exist in the CSV File";
|
|
80958
|
+
throw BinderException(exception);
|
|
80959
|
+
}
|
|
80944
80960
|
// #######
|
|
80945
80961
|
// ### type detection (refining)
|
|
80946
80962
|
// #######
|
|
@@ -127123,6 +127139,26 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
|
127123
127139
|
if (names.empty()) {
|
|
127124
127140
|
throw BinderException("read_csv requires at least a single column as input!");
|
|
127125
127141
|
}
|
|
127142
|
+
} else if (loption == "column_types") {
|
|
127143
|
+
auto &child_type = kv.second.type();
|
|
127144
|
+
if (child_type.id() != LogicalTypeId::STRUCT) {
|
|
127145
|
+
throw BinderException("read_csv_auto column_types requires a struct as input");
|
|
127146
|
+
}
|
|
127147
|
+
auto &struct_children = StructValue::GetChildren(kv.second);
|
|
127148
|
+
D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
|
|
127149
|
+
for (idx_t i = 0; i < struct_children.size(); i++) {
|
|
127150
|
+
auto &name = StructType::GetChildName(child_type, i);
|
|
127151
|
+
auto &val = struct_children[i];
|
|
127152
|
+
if (val.type().id() != LogicalTypeId::VARCHAR) {
|
|
127153
|
+
throw BinderException("read_csv_auto requires a type specification as string");
|
|
127154
|
+
}
|
|
127155
|
+
auto def_type = TransformStringToLogicalType(StringValue::Get(val));
|
|
127156
|
+
if (def_type.id() == LogicalTypeId::USER) {
|
|
127157
|
+
throw BinderException("Unrecognized type for read_csv_auto column_types definition");
|
|
127158
|
+
}
|
|
127159
|
+
options.sql_types_per_column[name] = def_type;
|
|
127160
|
+
}
|
|
127161
|
+
|
|
127126
127162
|
} else if (loption == "all_varchar") {
|
|
127127
127163
|
options.all_varchar = BooleanValue::Get(kv.second);
|
|
127128
127164
|
} else if (loption == "normalize_names") {
|
|
@@ -127761,6 +127797,7 @@ TableFunction ReadCSVTableFunction::GetAutoFunction(bool list_parameter) {
|
|
|
127761
127797
|
read_csv_auto.get_batch_index = CSVReaderGetBatchIndex;
|
|
127762
127798
|
read_csv_auto.cardinality = CSVReaderCardinality;
|
|
127763
127799
|
ReadCSVAddNamedParameters(read_csv_auto);
|
|
127800
|
+
read_csv_auto.named_parameters["column_types"] = LogicalType::ANY;
|
|
127764
127801
|
return read_csv_auto;
|
|
127765
127802
|
}
|
|
127766
127803
|
|
|
@@ -156976,6 +157013,7 @@ private:
|
|
|
156976
157013
|
void UpdateDPTree(JoinNode *new_plan);
|
|
156977
157014
|
|
|
156978
157015
|
void UpdateJoinNodesInFullPlan(JoinNode *node);
|
|
157016
|
+
bool NodeInFullPlan(JoinNode *node);
|
|
156979
157017
|
|
|
156980
157018
|
std::pair<JoinRelationSet *, unique_ptr<LogicalOperator>>
|
|
156981
157019
|
GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode *node);
|
|
@@ -157706,7 +157744,6 @@ string JoinNode::ToString() {
|
|
|
157706
157744
|
|
|
157707
157745
|
|
|
157708
157746
|
|
|
157709
|
-
|
|
157710
157747
|
#include <algorithm>
|
|
157711
157748
|
|
|
157712
157749
|
namespace std {
|
|
@@ -157975,6 +158012,10 @@ unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet *set,
|
|
|
157975
158012
|
return result;
|
|
157976
158013
|
}
|
|
157977
158014
|
|
|
158015
|
+
bool JoinOrderOptimizer::NodeInFullPlan(JoinNode *node) {
|
|
158016
|
+
return join_nodes_in_full_plan.find(node->set->ToString()) != join_nodes_in_full_plan.end();
|
|
158017
|
+
}
|
|
158018
|
+
|
|
157978
158019
|
void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode *node) {
|
|
157979
158020
|
if (!node) {
|
|
157980
158021
|
return;
|
|
@@ -158205,7 +158246,7 @@ static vector<unordered_set<idx_t>> AddSuperSets(vector<unordered_set<idx_t>> cu
|
|
|
158205
158246
|
}
|
|
158206
158247
|
|
|
158207
158248
|
// works by first creating all sets with cardinality 1
|
|
158208
|
-
// then iterates over each previously
|
|
158249
|
+
// then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
|
|
158209
158250
|
// is greater than all relations in the set.
|
|
158210
158251
|
static vector<unordered_set<idx_t>> GetAllNeighborSets(JoinRelationSet *new_set, unordered_set<idx_t> &exclusion_set,
|
|
158211
158252
|
vector<idx_t> neighbors) {
|
|
@@ -158241,6 +158282,11 @@ static vector<unordered_set<idx_t>> GetAllNeighborSets(JoinRelationSet *new_set,
|
|
|
158241
158282
|
}
|
|
158242
158283
|
|
|
158243
158284
|
void JoinOrderOptimizer::UpdateDPTree(JoinNode *new_plan) {
|
|
158285
|
+
if (!NodeInFullPlan(new_plan)) {
|
|
158286
|
+
// if the new node is not in the full plan, feel free to return
|
|
158287
|
+
// because you won't be updating the full plan.
|
|
158288
|
+
return;
|
|
158289
|
+
}
|
|
158244
158290
|
auto new_set = new_plan->set;
|
|
158245
158291
|
// now update every plan that uses this plan
|
|
158246
158292
|
unordered_set<idx_t> exclusion_set;
|
|
@@ -158302,6 +158348,7 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
|
|
|
158302
158348
|
// update the DP tree in case a plan created by the DP algorithm uses the node
|
|
158303
158349
|
// that was potentially just updated by EmitPair. You will get a use-after-free
|
|
158304
158350
|
// error if future plans rely on the old node that was just replaced.
|
|
158351
|
+
// if node in FullPath, then updateDP tree.
|
|
158305
158352
|
UpdateDPTree(node);
|
|
158306
158353
|
|
|
158307
158354
|
if (!best_connection || node->GetCost() < best_connection->GetCost()) {
|