duckdb 0.7.2-dev904.0 → 0.7.2-dev982.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +4 -0
- package/src/duckdb/src/common/types/value.cpp +46 -0
- package/src/duckdb/src/execution/column_binding_resolver.cpp +15 -5
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +40 -19
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +5 -13
- package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
- package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
- package/src/duckdb/src/function/scalar/math/numeric.cpp +87 -0
- package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
- package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/table/read_csv.cpp +46 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +5 -4
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +11 -7
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +6 -0
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
- package/src/duckdb/src/include/duckdb.h +1 -1
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
- package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +3 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +4 -0
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +8 -4
- package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -0
- package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +8 -1
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +10 -3
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +60 -12
- package/src/duckdb/src/planner/logical_operator.cpp +3 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +1 -0
- package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +32 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +915 -913
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17371 -17306
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_planner_operator.cpp +2 -0
package/package.json
CHANGED
@@ -20,6 +20,8 @@ string LogicalOperatorToString(LogicalOperatorType type) {
|
|
20
20
|
return "EXPRESSION_GET";
|
21
21
|
case LogicalOperatorType::LOGICAL_ANY_JOIN:
|
22
22
|
return "ANY_JOIN";
|
23
|
+
case LogicalOperatorType::LOGICAL_ASOF_JOIN:
|
24
|
+
return "ASOF_JOIN";
|
23
25
|
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
|
24
26
|
return "COMPARISON_JOIN";
|
25
27
|
case LogicalOperatorType::LOGICAL_DELIM_JOIN:
|
@@ -236,6 +236,8 @@ JoinRefType EnumSerializer::StringToEnum(const char *value) {
|
|
236
236
|
return JoinRefType::CROSS;
|
237
237
|
} else if (StringUtil::Equals(value, "POSITIONAL")) {
|
238
238
|
return JoinRefType::POSITIONAL;
|
239
|
+
} else if (StringUtil::Equals(value, "ASOF")) {
|
240
|
+
return JoinRefType::ASOF;
|
239
241
|
} else {
|
240
242
|
throw NotImplementedException("EnumSerializer::StringToEnum not implemented for enum value");
|
241
243
|
}
|
@@ -252,6 +254,8 @@ const char *EnumSerializer::EnumToString(JoinRefType value) {
|
|
252
254
|
return "CROSS";
|
253
255
|
case JoinRefType::POSITIONAL:
|
254
256
|
return "POSITIONAL";
|
257
|
+
case JoinRefType::ASOF:
|
258
|
+
return "ASOF";
|
255
259
|
default:
|
256
260
|
throw NotImplementedException("ToString not implemented for enum value");
|
257
261
|
}
|
@@ -330,6 +330,52 @@ Value Value::MaximumValue(const LogicalType &type) {
|
|
330
330
|
}
|
331
331
|
}
|
332
332
|
|
333
|
+
Value Value::Infinity(const LogicalType &type) {
|
334
|
+
switch (type.id()) {
|
335
|
+
case LogicalTypeId::DATE:
|
336
|
+
return Value::DATE(date_t::infinity());
|
337
|
+
case LogicalTypeId::TIMESTAMP:
|
338
|
+
return Value::TIMESTAMP(timestamp_t::infinity());
|
339
|
+
case LogicalTypeId::TIMESTAMP_MS:
|
340
|
+
return Value::TIMESTAMPMS(timestamp_t::infinity());
|
341
|
+
case LogicalTypeId::TIMESTAMP_NS:
|
342
|
+
return Value::TIMESTAMPNS(timestamp_t::infinity());
|
343
|
+
case LogicalTypeId::TIMESTAMP_SEC:
|
344
|
+
return Value::TIMESTAMPSEC(timestamp_t::infinity());
|
345
|
+
case LogicalTypeId::TIMESTAMP_TZ:
|
346
|
+
return Value::TIMESTAMPTZ(timestamp_t::infinity());
|
347
|
+
case LogicalTypeId::FLOAT:
|
348
|
+
return Value::FLOAT(std::numeric_limits<float>::infinity());
|
349
|
+
case LogicalTypeId::DOUBLE:
|
350
|
+
return Value::DOUBLE(std::numeric_limits<double>::infinity());
|
351
|
+
default:
|
352
|
+
throw InvalidTypeException(type, "Infinity requires numeric type");
|
353
|
+
}
|
354
|
+
}
|
355
|
+
|
356
|
+
Value Value::NegativeInfinity(const LogicalType &type) {
|
357
|
+
switch (type.id()) {
|
358
|
+
case LogicalTypeId::DATE:
|
359
|
+
return Value::DATE(date_t::ninfinity());
|
360
|
+
case LogicalTypeId::TIMESTAMP:
|
361
|
+
return Value::TIMESTAMP(timestamp_t::ninfinity());
|
362
|
+
case LogicalTypeId::TIMESTAMP_MS:
|
363
|
+
return Value::TIMESTAMPMS(timestamp_t::ninfinity());
|
364
|
+
case LogicalTypeId::TIMESTAMP_NS:
|
365
|
+
return Value::TIMESTAMPNS(timestamp_t::ninfinity());
|
366
|
+
case LogicalTypeId::TIMESTAMP_SEC:
|
367
|
+
return Value::TIMESTAMPSEC(timestamp_t::ninfinity());
|
368
|
+
case LogicalTypeId::TIMESTAMP_TZ:
|
369
|
+
return Value::TIMESTAMPTZ(timestamp_t::ninfinity());
|
370
|
+
case LogicalTypeId::FLOAT:
|
371
|
+
return Value::FLOAT(-std::numeric_limits<float>::infinity());
|
372
|
+
case LogicalTypeId::DOUBLE:
|
373
|
+
return Value::DOUBLE(-std::numeric_limits<double>::infinity());
|
374
|
+
default:
|
375
|
+
throw InvalidTypeException(type, "NegativeInfinity requires numeric type");
|
376
|
+
}
|
377
|
+
}
|
378
|
+
|
333
379
|
Value Value::BOOLEAN(int8_t value) {
|
334
380
|
Value result(LogicalType::BOOLEAN);
|
335
381
|
result.value_.boolean = bool(value);
|
@@ -18,7 +18,10 @@ ColumnBindingResolver::ColumnBindingResolver() {
|
|
18
18
|
}
|
19
19
|
|
20
20
|
void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
|
21
|
-
|
21
|
+
switch (op.type) {
|
22
|
+
case LogicalOperatorType::LOGICAL_ASOF_JOIN:
|
23
|
+
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
|
24
|
+
case LogicalOperatorType::LOGICAL_DELIM_JOIN: {
|
22
25
|
// special case: comparison join
|
23
26
|
auto &comp_join = (LogicalComparisonJoin &)op;
|
24
27
|
// first get the bindings of the LHS and resolve the LHS expressions
|
@@ -41,7 +44,8 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
|
|
41
44
|
// finally update the bindings with the result bindings of the join
|
42
45
|
bindings = op.GetColumnBindings();
|
43
46
|
return;
|
44
|
-
}
|
47
|
+
}
|
48
|
+
case LogicalOperatorType::LOGICAL_ANY_JOIN: {
|
45
49
|
// ANY join, this join is different because we evaluate the expression on the bindings of BOTH join sides at
|
46
50
|
// once i.e. we set the bindings first to the bindings of the entire join, and then resolve the expressions of
|
47
51
|
// this operator
|
@@ -54,19 +58,22 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
|
|
54
58
|
}
|
55
59
|
VisitOperatorExpressions(op);
|
56
60
|
return;
|
57
|
-
}
|
61
|
+
}
|
62
|
+
case LogicalOperatorType::LOGICAL_CREATE_INDEX: {
|
58
63
|
// CREATE INDEX statement, add the columns of the table with table index 0 to the binding set
|
59
64
|
// afterwards bind the expressions of the CREATE INDEX statement
|
60
65
|
auto &create_index = (LogicalCreateIndex &)op;
|
61
66
|
bindings = LogicalOperator::GenerateColumnBindings(0, create_index.table.GetColumns().LogicalColumnCount());
|
62
67
|
VisitOperatorExpressions(op);
|
63
68
|
return;
|
64
|
-
}
|
69
|
+
}
|
70
|
+
case LogicalOperatorType::LOGICAL_GET: {
|
65
71
|
//! We first need to update the current set of bindings and then visit operator expressions
|
66
72
|
bindings = op.GetColumnBindings();
|
67
73
|
VisitOperatorExpressions(op);
|
68
74
|
return;
|
69
|
-
}
|
75
|
+
}
|
76
|
+
case LogicalOperatorType::LOGICAL_INSERT: {
|
70
77
|
//! We want to execute the normal path, but also add a dummy 'excluded' binding if there is a
|
71
78
|
// ON CONFLICT DO UPDATE clause
|
72
79
|
auto &insert_op = (LogicalInsert &)op;
|
@@ -89,6 +96,9 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
|
|
89
96
|
return;
|
90
97
|
}
|
91
98
|
}
|
99
|
+
default:
|
100
|
+
break;
|
101
|
+
}
|
92
102
|
// general case
|
93
103
|
// first visit the children of this operator
|
94
104
|
VisitOperatorChildren(op);
|
@@ -130,8 +130,10 @@ OperatorResultType PhysicalBlockwiseNLJoin::ExecuteInternal(ExecutionContext &co
|
|
130
130
|
}
|
131
131
|
|
132
132
|
// now perform the actual join
|
133
|
-
// we perform a cross product, then execute the expression directly on the cross product
|
133
|
+
// we perform a cross product, then execute the expression directly on the cross product result
|
134
134
|
idx_t result_count = 0;
|
135
|
+
bool found_match[STANDARD_VECTOR_SIZE] = {false};
|
136
|
+
|
135
137
|
do {
|
136
138
|
auto result = state.cross_product.Execute(input, *intermediate_chunk);
|
137
139
|
if (result == OperatorResultType::NEED_MORE_INPUT) {
|
@@ -142,39 +144,58 @@ OperatorResultType PhysicalBlockwiseNLJoin::ExecuteInternal(ExecutionContext &co
|
|
142
144
|
state.left_outer.ConstructLeftJoinResult(input, *intermediate_chunk);
|
143
145
|
state.left_outer.Reset();
|
144
146
|
}
|
147
|
+
|
148
|
+
if (join_type == JoinType::SEMI) {
|
149
|
+
PhysicalJoin::ConstructSemiJoinResult(input, chunk, found_match);
|
150
|
+
}
|
151
|
+
if (join_type == JoinType::ANTI) {
|
152
|
+
PhysicalJoin::ConstructAntiJoinResult(input, chunk, found_match);
|
153
|
+
}
|
154
|
+
|
145
155
|
return OperatorResultType::NEED_MORE_INPUT;
|
146
156
|
}
|
147
157
|
|
148
158
|
// now perform the computation
|
149
159
|
result_count = state.executor.SelectExpression(*intermediate_chunk, state.match_sel);
|
160
|
+
|
161
|
+
// handle anti and semi joins with different logic
|
150
162
|
if (result_count > 0) {
|
151
163
|
// found a match!
|
152
|
-
//
|
153
|
-
if (
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
164
|
+
// handle anti semi join conditions first
|
165
|
+
if (join_type == JoinType::ANTI || join_type == JoinType::SEMI) {
|
166
|
+
if (state.cross_product.ScanLHS()) {
|
167
|
+
found_match[state.cross_product.PositionInChunk()] = true;
|
168
|
+
} else {
|
169
|
+
for (idx_t i = 0; i < result_count; i++) {
|
170
|
+
found_match[state.match_sel.get_index(i)] = true;
|
171
|
+
}
|
172
|
+
}
|
173
|
+
intermediate_chunk->Reset();
|
174
|
+
// trick the loop to continue as semi and anti joins will never produce more output than
|
175
|
+
// the LHS cardinality
|
176
|
+
result_count = 0;
|
158
177
|
} else {
|
159
|
-
//
|
160
|
-
|
161
|
-
|
162
|
-
|
178
|
+
// check if the cross product is scanning the LHS or the RHS in its entirety
|
179
|
+
if (!state.cross_product.ScanLHS()) {
|
180
|
+
// set the match flags in the LHS
|
181
|
+
state.left_outer.SetMatches(state.match_sel, result_count);
|
182
|
+
// set the match flag in the RHS
|
183
|
+
gstate.right_outer.SetMatch(state.cross_product.ScanPosition() +
|
184
|
+
state.cross_product.PositionInChunk());
|
185
|
+
} else {
|
186
|
+
// set the match flag in the LHS
|
187
|
+
state.left_outer.SetMatch(state.cross_product.PositionInChunk());
|
188
|
+
// set the match flags in the RHS
|
189
|
+
gstate.right_outer.SetMatches(state.match_sel, result_count, state.cross_product.ScanPosition());
|
190
|
+
}
|
191
|
+
intermediate_chunk->Slice(state.match_sel, result_count);
|
163
192
|
}
|
164
|
-
intermediate_chunk->Slice(state.match_sel, result_count);
|
165
193
|
} else {
|
166
194
|
// no result: reset the chunk
|
167
195
|
intermediate_chunk->Reset();
|
168
196
|
}
|
169
197
|
} while (result_count == 0);
|
170
198
|
|
171
|
-
if (join_type == JoinType::SEMI || join_type == JoinType::ANTI) {
|
172
|
-
for (idx_t col_idx = 0; col_idx < chunk.ColumnCount(); col_idx++) {
|
173
|
-
chunk.data[col_idx].Reference(intermediate_chunk->data[col_idx]);
|
174
|
-
}
|
175
|
-
chunk.SetCardinality(*intermediate_chunk);
|
176
|
-
}
|
177
|
-
|
178
199
|
return OperatorResultType::HAVE_MORE_OUTPUT;
|
179
200
|
}
|
180
201
|
|
@@ -334,7 +334,9 @@ idx_t PhysicalRangeJoin::SelectJoinTail(const ExpressionType &condition, Vector
|
|
334
334
|
case ExpressionType::COMPARE_DISTINCT_FROM:
|
335
335
|
return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, nullptr);
|
336
336
|
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
337
|
+
return VectorOperations::NotDistinctFrom(left, right, sel, count, true_sel, nullptr);
|
337
338
|
case ExpressionType::COMPARE_EQUAL:
|
339
|
+
return VectorOperations::Equals(left, right, sel, count, true_sel, nullptr);
|
338
340
|
default:
|
339
341
|
throw InternalException("Unsupported comparison type for PhysicalRangeJoin");
|
340
342
|
}
|
@@ -498,12 +498,12 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
|
|
498
498
|
}
|
499
499
|
|
500
500
|
// figure out the exact line number
|
501
|
+
UnifiedVectorFormat inserted_column_data;
|
502
|
+
insert_chunk.data[col_idx].ToUnifiedFormat(parse_chunk.size(), inserted_column_data);
|
501
503
|
idx_t row_idx;
|
502
504
|
for (row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
|
503
|
-
auto &inserted_column = insert_chunk.data[col_idx];
|
504
505
|
auto &parsed_column = parse_chunk.data[col_idx];
|
505
|
-
|
506
|
-
if (FlatVector::IsNull(inserted_column, row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
|
506
|
+
if (!inserted_column_data.validity.RowIsValid(row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
|
507
507
|
break;
|
508
508
|
}
|
509
509
|
}
|
@@ -870,16 +870,7 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
|
|
870
870
|
// #######
|
871
871
|
// ### type detection (initial)
|
872
872
|
// #######
|
873
|
-
|
874
|
-
vector<LogicalType> type_candidates = {
|
875
|
-
LogicalType::VARCHAR,
|
876
|
-
LogicalType::TIMESTAMP,
|
877
|
-
LogicalType::DATE,
|
878
|
-
LogicalType::TIME,
|
879
|
-
LogicalType::DOUBLE,
|
880
|
-
/* LogicalType::FLOAT,*/ LogicalType::BIGINT,
|
881
|
-
/*LogicalType::INTEGER,*/ /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN,
|
882
|
-
LogicalType::SQLNULL};
|
873
|
+
|
883
874
|
// format template candidates, ordered by descending specificity (~ from high to low)
|
884
875
|
std::map<LogicalTypeId, vector<const char *>> format_template_candidates = {
|
885
876
|
{LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
|
@@ -890,8 +881,8 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
|
|
890
881
|
vector<vector<LogicalType>> best_sql_types_candidates;
|
891
882
|
map<LogicalTypeId, vector<string>> best_format_candidates;
|
892
883
|
DataChunk best_header_row;
|
893
|
-
DetectCandidateTypes(
|
894
|
-
best_sql_types_candidates, best_format_candidates, best_header_row);
|
884
|
+
DetectCandidateTypes(options.auto_type_candidates, format_template_candidates, info_candidates, original_options,
|
885
|
+
best_num_cols, best_sql_types_candidates, best_format_candidates, best_header_row);
|
895
886
|
|
896
887
|
if (best_format_candidates.empty() || best_header_row.size() == 0) {
|
897
888
|
throw InvalidInputException(
|
@@ -939,7 +930,8 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
|
|
939
930
|
// #######
|
940
931
|
// ### type detection (refining)
|
941
932
|
// #######
|
942
|
-
return RefineTypeDetection(
|
933
|
+
return RefineTypeDetection(options.auto_type_candidates, requested_types, best_sql_types_candidates,
|
934
|
+
best_format_candidates);
|
943
935
|
}
|
944
936
|
|
945
937
|
bool BufferedCSVReader::TryParseComplexCSV(DataChunk &insert_chunk, string &error_message) {
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "duckdb/execution/operator/projection/physical_projection.hpp"
|
2
2
|
#include "duckdb/parallel/thread_context.hpp"
|
3
3
|
#include "duckdb/execution/expression_executor.hpp"
|
4
|
+
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
6
7
|
|
@@ -35,6 +36,39 @@ unique_ptr<OperatorState> PhysicalProjection::GetOperatorState(ExecutionContext
|
|
35
36
|
return make_unique<ProjectionState>(context, select_list);
|
36
37
|
}
|
37
38
|
|
39
|
+
unique_ptr<PhysicalOperator>
|
40
|
+
PhysicalProjection::CreateJoinProjection(vector<LogicalType> proj_types, const vector<LogicalType> &lhs_types,
|
41
|
+
const vector<LogicalType> &rhs_types, const vector<idx_t> &left_projection_map,
|
42
|
+
const vector<idx_t> &right_projection_map, const idx_t estimated_cardinality) {
|
43
|
+
|
44
|
+
vector<unique_ptr<Expression>> proj_selects;
|
45
|
+
proj_selects.reserve(proj_types.size());
|
46
|
+
|
47
|
+
if (left_projection_map.empty()) {
|
48
|
+
for (storage_t i = 0; i < lhs_types.size(); ++i) {
|
49
|
+
proj_selects.emplace_back(make_unique<BoundReferenceExpression>(lhs_types[i], i));
|
50
|
+
}
|
51
|
+
} else {
|
52
|
+
for (auto i : left_projection_map) {
|
53
|
+
proj_selects.emplace_back(make_unique<BoundReferenceExpression>(lhs_types[i], i));
|
54
|
+
}
|
55
|
+
}
|
56
|
+
const auto left_cols = lhs_types.size();
|
57
|
+
|
58
|
+
if (right_projection_map.empty()) {
|
59
|
+
for (storage_t i = 0; i < rhs_types.size(); ++i) {
|
60
|
+
proj_selects.emplace_back(make_unique<BoundReferenceExpression>(rhs_types[i], left_cols + i));
|
61
|
+
}
|
62
|
+
|
63
|
+
} else {
|
64
|
+
for (auto i : right_projection_map) {
|
65
|
+
proj_selects.emplace_back(make_unique<BoundReferenceExpression>(rhs_types[i], left_cols + i));
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
return make_unique<PhysicalProjection>(std::move(proj_types), std::move(proj_selects), estimated_cardinality);
|
70
|
+
}
|
71
|
+
|
38
72
|
string PhysicalProjection::ParamsToString() const {
|
39
73
|
string extra_info;
|
40
74
|
for (auto &expr : select_list) {
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#include "duckdb/execution/operator/aggregate/physical_window.hpp"
|
2
|
+
#include "duckdb/execution/operator/join/physical_iejoin.hpp"
|
3
|
+
#include "duckdb/execution/operator/projection/physical_projection.hpp"
|
4
|
+
#include "duckdb/execution/physical_plan_generator.hpp"
|
5
|
+
#include "duckdb/main/client_context.hpp"
|
6
|
+
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
7
|
+
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
8
|
+
#include "duckdb/planner/expression/bound_window_expression.hpp"
|
9
|
+
#include "duckdb/planner/operator/logical_asof_join.hpp"
|
10
|
+
|
11
|
+
namespace duckdb {
|
12
|
+
|
13
|
+
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalAsOfJoin &op) {
|
14
|
+
// now visit the children
|
15
|
+
D_ASSERT(op.children.size() == 2);
|
16
|
+
idx_t lhs_cardinality = op.children[0]->EstimateCardinality(context);
|
17
|
+
idx_t rhs_cardinality = op.children[1]->EstimateCardinality(context);
|
18
|
+
auto left = CreatePlan(*op.children[0]);
|
19
|
+
auto right = CreatePlan(*op.children[1]);
|
20
|
+
D_ASSERT(left && right);
|
21
|
+
|
22
|
+
// Validate
|
23
|
+
vector<idx_t> equi_indexes;
|
24
|
+
auto asof_idx = op.conditions.size();
|
25
|
+
for (size_t c = 0; c < op.conditions.size(); ++c) {
|
26
|
+
auto &cond = op.conditions[c];
|
27
|
+
switch (cond.comparison) {
|
28
|
+
case ExpressionType::COMPARE_EQUAL:
|
29
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
30
|
+
equi_indexes.emplace_back(c);
|
31
|
+
break;
|
32
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
33
|
+
D_ASSERT(asof_idx == op.conditions.size());
|
34
|
+
asof_idx = c;
|
35
|
+
break;
|
36
|
+
default:
|
37
|
+
throw InternalException("Invalid ASOF JOIN comparison");
|
38
|
+
}
|
39
|
+
}
|
40
|
+
D_ASSERT(asof_idx < op.conditions.size());
|
41
|
+
|
42
|
+
// Temporary implementation: IEJoin of Window
|
43
|
+
// LEAD(asof_column, 1, infinity) OVER (PARTITION BY equi_column... ORDER BY asof_column) AS asof_temp
|
44
|
+
auto &asof_comp = op.conditions[asof_idx];
|
45
|
+
auto &asof_column = asof_comp.right;
|
46
|
+
auto asof_type = asof_column->return_type;
|
47
|
+
auto asof_temp = make_unique<BoundWindowExpression>(ExpressionType::WINDOW_LEAD, asof_type, nullptr, nullptr);
|
48
|
+
asof_temp->children.emplace_back(asof_column->Copy());
|
49
|
+
asof_temp->offset_expr = make_unique<BoundConstantExpression>(Value::BIGINT(1));
|
50
|
+
asof_temp->default_expr = make_unique<BoundConstantExpression>(Value::Infinity(asof_type));
|
51
|
+
for (auto equi_idx : equi_indexes) {
|
52
|
+
asof_temp->partitions.emplace_back(op.conditions[equi_idx].right->Copy());
|
53
|
+
}
|
54
|
+
asof_temp->orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, asof_column->Copy());
|
55
|
+
asof_temp->start = WindowBoundary::UNBOUNDED_PRECEDING;
|
56
|
+
asof_temp->end = WindowBoundary::CURRENT_ROW_ROWS;
|
57
|
+
|
58
|
+
vector<unique_ptr<Expression>> window_select;
|
59
|
+
window_select.emplace_back(std::move(asof_temp));
|
60
|
+
|
61
|
+
auto window_types = right->types;
|
62
|
+
window_types.emplace_back(asof_type);
|
63
|
+
|
64
|
+
auto window = make_unique<PhysicalWindow>(window_types, std::move(window_select), rhs_cardinality);
|
65
|
+
window->children.emplace_back(std::move(right));
|
66
|
+
|
67
|
+
// IEJoin(left, window, conditions || asof_column < asof_temp)
|
68
|
+
JoinCondition asof_upper;
|
69
|
+
asof_upper.left = asof_comp.left->Copy();
|
70
|
+
asof_upper.right = make_unique<BoundReferenceExpression>(asof_type, window_types.size() - 1);
|
71
|
+
asof_upper.comparison = ExpressionType::COMPARE_LESSTHAN;
|
72
|
+
|
73
|
+
// We have an equality condition, so we may have to deal with projection maps.
|
74
|
+
// IEJoin does not (currently) support them, so we have to do it manually
|
75
|
+
auto proj_types = op.types;
|
76
|
+
op.types.clear();
|
77
|
+
|
78
|
+
auto lhs_types = op.children[0]->types;
|
79
|
+
op.types = lhs_types;
|
80
|
+
|
81
|
+
auto rhs_types = op.children[1]->types;
|
82
|
+
op.types.insert(op.types.end(), rhs_types.begin(), rhs_types.end());
|
83
|
+
|
84
|
+
op.types.emplace_back(asof_type);
|
85
|
+
op.conditions.emplace_back(std::move(asof_upper));
|
86
|
+
auto iejoin = make_unique<PhysicalIEJoin>(op, std::move(left), std::move(window), std::move(op.conditions),
|
87
|
+
op.join_type, op.estimated_cardinality);
|
88
|
+
|
89
|
+
// Project away asof_temp and anything from the projection maps
|
90
|
+
auto proj = PhysicalProjection::CreateJoinProjection(proj_types, lhs_types, rhs_types, op.left_projection_map,
|
91
|
+
op.right_projection_map, lhs_cardinality);
|
92
|
+
proj->children.push_back(std::move(iejoin));
|
93
|
+
|
94
|
+
return proj;
|
95
|
+
}
|
96
|
+
|
97
|
+
} // namespace duckdb
|
@@ -115,6 +115,9 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalOperator &
|
|
115
115
|
case LogicalOperatorType::LOGICAL_DELIM_JOIN:
|
116
116
|
plan = CreatePlan((LogicalDelimJoin &)op);
|
117
117
|
break;
|
118
|
+
case LogicalOperatorType::LOGICAL_ASOF_JOIN:
|
119
|
+
plan = CreatePlan((LogicalAsOfJoin &)op);
|
120
|
+
break;
|
118
121
|
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
|
119
122
|
plan = CreatePlan((LogicalComparisonJoin &)op);
|
120
123
|
break;
|
@@ -2,6 +2,7 @@
|
|
2
2
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
3
3
|
#include "duckdb/function/scalar/trigonometric_functions.hpp"
|
4
4
|
#include "duckdb/common/operator/abs.hpp"
|
5
|
+
#include "duckdb/common/operator/multiply.hpp"
|
5
6
|
#include "duckdb/common/types/hugeint.hpp"
|
6
7
|
#include "duckdb/common/types/cast_helpers.hpp"
|
7
8
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
@@ -1161,4 +1162,90 @@ void EvenFun::RegisterFunction(BuiltinFunctions &set) {
|
|
1161
1162
|
ScalarFunction::UnaryFunction<double, double, EvenOperator>));
|
1162
1163
|
}
|
1163
1164
|
|
1165
|
+
//===--------------------------------------------------------------------===//
|
1166
|
+
// gcd
|
1167
|
+
//===--------------------------------------------------------------------===//
|
1168
|
+
|
1169
|
+
// should be replaced with std::gcd in a newer C++ standard
|
1170
|
+
template <class TA>
|
1171
|
+
TA GreatestCommonDivisor(TA left, TA right) {
|
1172
|
+
TA a = left;
|
1173
|
+
TA b = right;
|
1174
|
+
|
1175
|
+
// This protects the following modulo operations from a corner case,
|
1176
|
+
// where we would get a runtime error due to an integer overflow.
|
1177
|
+
if ((left == NumericLimits<TA>::Minimum() && right == -1) ||
|
1178
|
+
(left == -1 && right == NumericLimits<TA>::Minimum())) {
|
1179
|
+
return 1;
|
1180
|
+
}
|
1181
|
+
|
1182
|
+
while (true) {
|
1183
|
+
if (a == 0) {
|
1184
|
+
return TryAbsOperator::Operation<TA, TA>(b);
|
1185
|
+
}
|
1186
|
+
b %= a;
|
1187
|
+
|
1188
|
+
if (b == 0) {
|
1189
|
+
return TryAbsOperator::Operation<TA, TA>(a);
|
1190
|
+
}
|
1191
|
+
a %= b;
|
1192
|
+
}
|
1193
|
+
}
|
1194
|
+
|
1195
|
+
struct GreatestCommonDivisorOperator {
|
1196
|
+
template <class TA, class TB, class TR>
|
1197
|
+
static inline TR Operation(TA left, TB right) {
|
1198
|
+
return GreatestCommonDivisor(left, right);
|
1199
|
+
}
|
1200
|
+
};
|
1201
|
+
|
1202
|
+
void GreatestCommonDivisorFun::RegisterFunction(BuiltinFunctions &set) {
|
1203
|
+
ScalarFunctionSet funcs("gcd");
|
1204
|
+
|
1205
|
+
funcs.AddFunction(
|
1206
|
+
ScalarFunction({LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT,
|
1207
|
+
ScalarFunction::BinaryFunction<int64_t, int64_t, int64_t, GreatestCommonDivisorOperator>));
|
1208
|
+
funcs.AddFunction(
|
1209
|
+
ScalarFunction({LogicalType::HUGEINT, LogicalType::HUGEINT}, LogicalType::HUGEINT,
|
1210
|
+
ScalarFunction::BinaryFunction<hugeint_t, hugeint_t, hugeint_t, GreatestCommonDivisorOperator>));
|
1211
|
+
|
1212
|
+
set.AddFunction(funcs);
|
1213
|
+
funcs.name = "greatest_common_divisor";
|
1214
|
+
set.AddFunction(funcs);
|
1215
|
+
}
|
1216
|
+
|
1217
|
+
//===--------------------------------------------------------------------===//
|
1218
|
+
// lcm
|
1219
|
+
//===--------------------------------------------------------------------===//
|
1220
|
+
|
1221
|
+
// should be replaced with std::lcm in a newer C++ standard
|
1222
|
+
struct LeastCommonMultipleOperator {
|
1223
|
+
template <class TA, class TB, class TR>
|
1224
|
+
static inline TR Operation(TA left, TB right) {
|
1225
|
+
if (left == 0 || right == 0) {
|
1226
|
+
return 0;
|
1227
|
+
}
|
1228
|
+
TR result;
|
1229
|
+
if (!TryMultiplyOperator::Operation<TA, TB, TR>(left, right / GreatestCommonDivisor(left, right), result)) {
|
1230
|
+
throw OutOfRangeException("lcm value is out of range");
|
1231
|
+
}
|
1232
|
+
return TryAbsOperator::Operation<TR, TR>(result);
|
1233
|
+
}
|
1234
|
+
};
|
1235
|
+
|
1236
|
+
void LeastCommonMultipleFun::RegisterFunction(BuiltinFunctions &set) {
|
1237
|
+
ScalarFunctionSet funcs("lcm");
|
1238
|
+
|
1239
|
+
funcs.AddFunction(
|
1240
|
+
ScalarFunction({LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT,
|
1241
|
+
ScalarFunction::BinaryFunction<int64_t, int64_t, int64_t, LeastCommonMultipleOperator>));
|
1242
|
+
funcs.AddFunction(
|
1243
|
+
ScalarFunction({LogicalType::HUGEINT, LogicalType::HUGEINT}, LogicalType::HUGEINT,
|
1244
|
+
ScalarFunction::BinaryFunction<hugeint_t, hugeint_t, hugeint_t, LeastCommonMultipleOperator>));
|
1245
|
+
|
1246
|
+
set.AddFunction(funcs);
|
1247
|
+
funcs.name = "least_common_multiple";
|
1248
|
+
set.AddFunction(funcs);
|
1249
|
+
}
|
1250
|
+
|
1164
1251
|
} // namespace duckdb
|