duckdb 0.7.2-dev921.0 → 0.7.2-dev982.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +4 -0
- package/src/duckdb/src/common/types/value.cpp +46 -0
- package/src/duckdb/src/execution/column_binding_resolver.cpp +15 -5
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +5 -13
- package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
- package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
- package/src/duckdb/src/function/scalar/math/numeric.cpp +87 -0
- package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
- package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/table/read_csv.cpp +46 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +5 -4
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +11 -7
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +6 -0
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
- package/src/duckdb/src/include/duckdb.h +1 -1
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
- package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +3 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +4 -0
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +8 -4
- package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -0
- package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +8 -1
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +10 -3
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +60 -12
- package/src/duckdb/src/planner/logical_operator.cpp +3 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +1 -0
- package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +32 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +915 -913
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17371 -17306
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_planner_operator.cpp +2 -0
package/package.json
CHANGED
@@ -20,6 +20,8 @@ string LogicalOperatorToString(LogicalOperatorType type) {
|
|
20
20
|
return "EXPRESSION_GET";
|
21
21
|
case LogicalOperatorType::LOGICAL_ANY_JOIN:
|
22
22
|
return "ANY_JOIN";
|
23
|
+
case LogicalOperatorType::LOGICAL_ASOF_JOIN:
|
24
|
+
return "ASOF_JOIN";
|
23
25
|
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
|
24
26
|
return "COMPARISON_JOIN";
|
25
27
|
case LogicalOperatorType::LOGICAL_DELIM_JOIN:
|
@@ -236,6 +236,8 @@ JoinRefType EnumSerializer::StringToEnum(const char *value) {
|
|
236
236
|
return JoinRefType::CROSS;
|
237
237
|
} else if (StringUtil::Equals(value, "POSITIONAL")) {
|
238
238
|
return JoinRefType::POSITIONAL;
|
239
|
+
} else if (StringUtil::Equals(value, "ASOF")) {
|
240
|
+
return JoinRefType::ASOF;
|
239
241
|
} else {
|
240
242
|
throw NotImplementedException("EnumSerializer::StringToEnum not implemented for enum value");
|
241
243
|
}
|
@@ -252,6 +254,8 @@ const char *EnumSerializer::EnumToString(JoinRefType value) {
|
|
252
254
|
return "CROSS";
|
253
255
|
case JoinRefType::POSITIONAL:
|
254
256
|
return "POSITIONAL";
|
257
|
+
case JoinRefType::ASOF:
|
258
|
+
return "ASOF";
|
255
259
|
default:
|
256
260
|
throw NotImplementedException("ToString not implemented for enum value");
|
257
261
|
}
|
@@ -330,6 +330,52 @@ Value Value::MaximumValue(const LogicalType &type) {
|
|
330
330
|
}
|
331
331
|
}
|
332
332
|
|
333
|
+
Value Value::Infinity(const LogicalType &type) {
|
334
|
+
switch (type.id()) {
|
335
|
+
case LogicalTypeId::DATE:
|
336
|
+
return Value::DATE(date_t::infinity());
|
337
|
+
case LogicalTypeId::TIMESTAMP:
|
338
|
+
return Value::TIMESTAMP(timestamp_t::infinity());
|
339
|
+
case LogicalTypeId::TIMESTAMP_MS:
|
340
|
+
return Value::TIMESTAMPMS(timestamp_t::infinity());
|
341
|
+
case LogicalTypeId::TIMESTAMP_NS:
|
342
|
+
return Value::TIMESTAMPNS(timestamp_t::infinity());
|
343
|
+
case LogicalTypeId::TIMESTAMP_SEC:
|
344
|
+
return Value::TIMESTAMPSEC(timestamp_t::infinity());
|
345
|
+
case LogicalTypeId::TIMESTAMP_TZ:
|
346
|
+
return Value::TIMESTAMPTZ(timestamp_t::infinity());
|
347
|
+
case LogicalTypeId::FLOAT:
|
348
|
+
return Value::FLOAT(std::numeric_limits<float>::infinity());
|
349
|
+
case LogicalTypeId::DOUBLE:
|
350
|
+
return Value::DOUBLE(std::numeric_limits<double>::infinity());
|
351
|
+
default:
|
352
|
+
throw InvalidTypeException(type, "Infinity requires numeric type");
|
353
|
+
}
|
354
|
+
}
|
355
|
+
|
356
|
+
Value Value::NegativeInfinity(const LogicalType &type) {
|
357
|
+
switch (type.id()) {
|
358
|
+
case LogicalTypeId::DATE:
|
359
|
+
return Value::DATE(date_t::ninfinity());
|
360
|
+
case LogicalTypeId::TIMESTAMP:
|
361
|
+
return Value::TIMESTAMP(timestamp_t::ninfinity());
|
362
|
+
case LogicalTypeId::TIMESTAMP_MS:
|
363
|
+
return Value::TIMESTAMPMS(timestamp_t::ninfinity());
|
364
|
+
case LogicalTypeId::TIMESTAMP_NS:
|
365
|
+
return Value::TIMESTAMPNS(timestamp_t::ninfinity());
|
366
|
+
case LogicalTypeId::TIMESTAMP_SEC:
|
367
|
+
return Value::TIMESTAMPSEC(timestamp_t::ninfinity());
|
368
|
+
case LogicalTypeId::TIMESTAMP_TZ:
|
369
|
+
return Value::TIMESTAMPTZ(timestamp_t::ninfinity());
|
370
|
+
case LogicalTypeId::FLOAT:
|
371
|
+
return Value::FLOAT(-std::numeric_limits<float>::infinity());
|
372
|
+
case LogicalTypeId::DOUBLE:
|
373
|
+
return Value::DOUBLE(-std::numeric_limits<double>::infinity());
|
374
|
+
default:
|
375
|
+
throw InvalidTypeException(type, "NegativeInfinity requires numeric type");
|
376
|
+
}
|
377
|
+
}
|
378
|
+
|
333
379
|
Value Value::BOOLEAN(int8_t value) {
|
334
380
|
Value result(LogicalType::BOOLEAN);
|
335
381
|
result.value_.boolean = bool(value);
|
@@ -18,7 +18,10 @@ ColumnBindingResolver::ColumnBindingResolver() {
|
|
18
18
|
}
|
19
19
|
|
20
20
|
void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
|
21
|
-
|
21
|
+
switch (op.type) {
|
22
|
+
case LogicalOperatorType::LOGICAL_ASOF_JOIN:
|
23
|
+
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
|
24
|
+
case LogicalOperatorType::LOGICAL_DELIM_JOIN: {
|
22
25
|
// special case: comparison join
|
23
26
|
auto &comp_join = (LogicalComparisonJoin &)op;
|
24
27
|
// first get the bindings of the LHS and resolve the LHS expressions
|
@@ -41,7 +44,8 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
|
|
41
44
|
// finally update the bindings with the result bindings of the join
|
42
45
|
bindings = op.GetColumnBindings();
|
43
46
|
return;
|
44
|
-
}
|
47
|
+
}
|
48
|
+
case LogicalOperatorType::LOGICAL_ANY_JOIN: {
|
45
49
|
// ANY join, this join is different because we evaluate the expression on the bindings of BOTH join sides at
|
46
50
|
// once i.e. we set the bindings first to the bindings of the entire join, and then resolve the expressions of
|
47
51
|
// this operator
|
@@ -54,19 +58,22 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
|
|
54
58
|
}
|
55
59
|
VisitOperatorExpressions(op);
|
56
60
|
return;
|
57
|
-
}
|
61
|
+
}
|
62
|
+
case LogicalOperatorType::LOGICAL_CREATE_INDEX: {
|
58
63
|
// CREATE INDEX statement, add the columns of the table with table index 0 to the binding set
|
59
64
|
// afterwards bind the expressions of the CREATE INDEX statement
|
60
65
|
auto &create_index = (LogicalCreateIndex &)op;
|
61
66
|
bindings = LogicalOperator::GenerateColumnBindings(0, create_index.table.GetColumns().LogicalColumnCount());
|
62
67
|
VisitOperatorExpressions(op);
|
63
68
|
return;
|
64
|
-
}
|
69
|
+
}
|
70
|
+
case LogicalOperatorType::LOGICAL_GET: {
|
65
71
|
//! We first need to update the current set of bindings and then visit operator expressions
|
66
72
|
bindings = op.GetColumnBindings();
|
67
73
|
VisitOperatorExpressions(op);
|
68
74
|
return;
|
69
|
-
}
|
75
|
+
}
|
76
|
+
case LogicalOperatorType::LOGICAL_INSERT: {
|
70
77
|
//! We want to execute the normal path, but also add a dummy 'excluded' binding if there is a
|
71
78
|
// ON CONFLICT DO UPDATE clause
|
72
79
|
auto &insert_op = (LogicalInsert &)op;
|
@@ -89,6 +96,9 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
|
|
89
96
|
return;
|
90
97
|
}
|
91
98
|
}
|
99
|
+
default:
|
100
|
+
break;
|
101
|
+
}
|
92
102
|
// general case
|
93
103
|
// first visit the children of this operator
|
94
104
|
VisitOperatorChildren(op);
|
@@ -334,7 +334,9 @@ idx_t PhysicalRangeJoin::SelectJoinTail(const ExpressionType &condition, Vector
|
|
334
334
|
case ExpressionType::COMPARE_DISTINCT_FROM:
|
335
335
|
return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, nullptr);
|
336
336
|
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
337
|
+
return VectorOperations::NotDistinctFrom(left, right, sel, count, true_sel, nullptr);
|
337
338
|
case ExpressionType::COMPARE_EQUAL:
|
339
|
+
return VectorOperations::Equals(left, right, sel, count, true_sel, nullptr);
|
338
340
|
default:
|
339
341
|
throw InternalException("Unsupported comparison type for PhysicalRangeJoin");
|
340
342
|
}
|
@@ -498,12 +498,12 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
|
|
498
498
|
}
|
499
499
|
|
500
500
|
// figure out the exact line number
|
501
|
+
UnifiedVectorFormat inserted_column_data;
|
502
|
+
insert_chunk.data[col_idx].ToUnifiedFormat(parse_chunk.size(), inserted_column_data);
|
501
503
|
idx_t row_idx;
|
502
504
|
for (row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
|
503
|
-
auto &inserted_column = insert_chunk.data[col_idx];
|
504
505
|
auto &parsed_column = parse_chunk.data[col_idx];
|
505
|
-
|
506
|
-
if (FlatVector::IsNull(inserted_column, row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
|
506
|
+
if (!inserted_column_data.validity.RowIsValid(row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
|
507
507
|
break;
|
508
508
|
}
|
509
509
|
}
|
@@ -870,16 +870,7 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
|
|
870
870
|
// #######
|
871
871
|
// ### type detection (initial)
|
872
872
|
// #######
|
873
|
-
|
874
|
-
vector<LogicalType> type_candidates = {
|
875
|
-
LogicalType::VARCHAR,
|
876
|
-
LogicalType::TIMESTAMP,
|
877
|
-
LogicalType::DATE,
|
878
|
-
LogicalType::TIME,
|
879
|
-
LogicalType::DOUBLE,
|
880
|
-
/* LogicalType::FLOAT,*/ LogicalType::BIGINT,
|
881
|
-
/*LogicalType::INTEGER,*/ /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN,
|
882
|
-
LogicalType::SQLNULL};
|
873
|
+
|
883
874
|
// format template candidates, ordered by descending specificity (~ from high to low)
|
884
875
|
std::map<LogicalTypeId, vector<const char *>> format_template_candidates = {
|
885
876
|
{LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
|
@@ -890,8 +881,8 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
|
|
890
881
|
vector<vector<LogicalType>> best_sql_types_candidates;
|
891
882
|
map<LogicalTypeId, vector<string>> best_format_candidates;
|
892
883
|
DataChunk best_header_row;
|
893
|
-
DetectCandidateTypes(
|
894
|
-
best_sql_types_candidates, best_format_candidates, best_header_row);
|
884
|
+
DetectCandidateTypes(options.auto_type_candidates, format_template_candidates, info_candidates, original_options,
|
885
|
+
best_num_cols, best_sql_types_candidates, best_format_candidates, best_header_row);
|
895
886
|
|
896
887
|
if (best_format_candidates.empty() || best_header_row.size() == 0) {
|
897
888
|
throw InvalidInputException(
|
@@ -939,7 +930,8 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
|
|
939
930
|
// #######
|
940
931
|
// ### type detection (refining)
|
941
932
|
// #######
|
942
|
-
return RefineTypeDetection(
|
933
|
+
return RefineTypeDetection(options.auto_type_candidates, requested_types, best_sql_types_candidates,
|
934
|
+
best_format_candidates);
|
943
935
|
}
|
944
936
|
|
945
937
|
bool BufferedCSVReader::TryParseComplexCSV(DataChunk &insert_chunk, string &error_message) {
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "duckdb/execution/operator/projection/physical_projection.hpp"
|
2
2
|
#include "duckdb/parallel/thread_context.hpp"
|
3
3
|
#include "duckdb/execution/expression_executor.hpp"
|
4
|
+
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
6
7
|
|
@@ -35,6 +36,39 @@ unique_ptr<OperatorState> PhysicalProjection::GetOperatorState(ExecutionContext
|
|
35
36
|
return make_unique<ProjectionState>(context, select_list);
|
36
37
|
}
|
37
38
|
|
39
|
+
unique_ptr<PhysicalOperator>
|
40
|
+
PhysicalProjection::CreateJoinProjection(vector<LogicalType> proj_types, const vector<LogicalType> &lhs_types,
|
41
|
+
const vector<LogicalType> &rhs_types, const vector<idx_t> &left_projection_map,
|
42
|
+
const vector<idx_t> &right_projection_map, const idx_t estimated_cardinality) {
|
43
|
+
|
44
|
+
vector<unique_ptr<Expression>> proj_selects;
|
45
|
+
proj_selects.reserve(proj_types.size());
|
46
|
+
|
47
|
+
if (left_projection_map.empty()) {
|
48
|
+
for (storage_t i = 0; i < lhs_types.size(); ++i) {
|
49
|
+
proj_selects.emplace_back(make_unique<BoundReferenceExpression>(lhs_types[i], i));
|
50
|
+
}
|
51
|
+
} else {
|
52
|
+
for (auto i : left_projection_map) {
|
53
|
+
proj_selects.emplace_back(make_unique<BoundReferenceExpression>(lhs_types[i], i));
|
54
|
+
}
|
55
|
+
}
|
56
|
+
const auto left_cols = lhs_types.size();
|
57
|
+
|
58
|
+
if (right_projection_map.empty()) {
|
59
|
+
for (storage_t i = 0; i < rhs_types.size(); ++i) {
|
60
|
+
proj_selects.emplace_back(make_unique<BoundReferenceExpression>(rhs_types[i], left_cols + i));
|
61
|
+
}
|
62
|
+
|
63
|
+
} else {
|
64
|
+
for (auto i : right_projection_map) {
|
65
|
+
proj_selects.emplace_back(make_unique<BoundReferenceExpression>(rhs_types[i], left_cols + i));
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
return make_unique<PhysicalProjection>(std::move(proj_types), std::move(proj_selects), estimated_cardinality);
|
70
|
+
}
|
71
|
+
|
38
72
|
string PhysicalProjection::ParamsToString() const {
|
39
73
|
string extra_info;
|
40
74
|
for (auto &expr : select_list) {
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#include "duckdb/execution/operator/aggregate/physical_window.hpp"
|
2
|
+
#include "duckdb/execution/operator/join/physical_iejoin.hpp"
|
3
|
+
#include "duckdb/execution/operator/projection/physical_projection.hpp"
|
4
|
+
#include "duckdb/execution/physical_plan_generator.hpp"
|
5
|
+
#include "duckdb/main/client_context.hpp"
|
6
|
+
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
7
|
+
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
8
|
+
#include "duckdb/planner/expression/bound_window_expression.hpp"
|
9
|
+
#include "duckdb/planner/operator/logical_asof_join.hpp"
|
10
|
+
|
11
|
+
namespace duckdb {
|
12
|
+
|
13
|
+
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalAsOfJoin &op) {
|
14
|
+
// now visit the children
|
15
|
+
D_ASSERT(op.children.size() == 2);
|
16
|
+
idx_t lhs_cardinality = op.children[0]->EstimateCardinality(context);
|
17
|
+
idx_t rhs_cardinality = op.children[1]->EstimateCardinality(context);
|
18
|
+
auto left = CreatePlan(*op.children[0]);
|
19
|
+
auto right = CreatePlan(*op.children[1]);
|
20
|
+
D_ASSERT(left && right);
|
21
|
+
|
22
|
+
// Validate
|
23
|
+
vector<idx_t> equi_indexes;
|
24
|
+
auto asof_idx = op.conditions.size();
|
25
|
+
for (size_t c = 0; c < op.conditions.size(); ++c) {
|
26
|
+
auto &cond = op.conditions[c];
|
27
|
+
switch (cond.comparison) {
|
28
|
+
case ExpressionType::COMPARE_EQUAL:
|
29
|
+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
|
30
|
+
equi_indexes.emplace_back(c);
|
31
|
+
break;
|
32
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
33
|
+
D_ASSERT(asof_idx == op.conditions.size());
|
34
|
+
asof_idx = c;
|
35
|
+
break;
|
36
|
+
default:
|
37
|
+
throw InternalException("Invalid ASOF JOIN comparison");
|
38
|
+
}
|
39
|
+
}
|
40
|
+
D_ASSERT(asof_idx < op.conditions.size());
|
41
|
+
|
42
|
+
// Temporary implementation: IEJoin of Window
|
43
|
+
// LEAD(asof_column, 1, infinity) OVER (PARTITION BY equi_column... ORDER BY asof_column) AS asof_temp
|
44
|
+
auto &asof_comp = op.conditions[asof_idx];
|
45
|
+
auto &asof_column = asof_comp.right;
|
46
|
+
auto asof_type = asof_column->return_type;
|
47
|
+
auto asof_temp = make_unique<BoundWindowExpression>(ExpressionType::WINDOW_LEAD, asof_type, nullptr, nullptr);
|
48
|
+
asof_temp->children.emplace_back(asof_column->Copy());
|
49
|
+
asof_temp->offset_expr = make_unique<BoundConstantExpression>(Value::BIGINT(1));
|
50
|
+
asof_temp->default_expr = make_unique<BoundConstantExpression>(Value::Infinity(asof_type));
|
51
|
+
for (auto equi_idx : equi_indexes) {
|
52
|
+
asof_temp->partitions.emplace_back(op.conditions[equi_idx].right->Copy());
|
53
|
+
}
|
54
|
+
asof_temp->orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, asof_column->Copy());
|
55
|
+
asof_temp->start = WindowBoundary::UNBOUNDED_PRECEDING;
|
56
|
+
asof_temp->end = WindowBoundary::CURRENT_ROW_ROWS;
|
57
|
+
|
58
|
+
vector<unique_ptr<Expression>> window_select;
|
59
|
+
window_select.emplace_back(std::move(asof_temp));
|
60
|
+
|
61
|
+
auto window_types = right->types;
|
62
|
+
window_types.emplace_back(asof_type);
|
63
|
+
|
64
|
+
auto window = make_unique<PhysicalWindow>(window_types, std::move(window_select), rhs_cardinality);
|
65
|
+
window->children.emplace_back(std::move(right));
|
66
|
+
|
67
|
+
// IEJoin(left, window, conditions || asof_column < asof_temp)
|
68
|
+
JoinCondition asof_upper;
|
69
|
+
asof_upper.left = asof_comp.left->Copy();
|
70
|
+
asof_upper.right = make_unique<BoundReferenceExpression>(asof_type, window_types.size() - 1);
|
71
|
+
asof_upper.comparison = ExpressionType::COMPARE_LESSTHAN;
|
72
|
+
|
73
|
+
// We have an equality condition, so we may have to deal with projection maps.
|
74
|
+
// IEJoin does not (currently) support them, so we have to do it manually
|
75
|
+
auto proj_types = op.types;
|
76
|
+
op.types.clear();
|
77
|
+
|
78
|
+
auto lhs_types = op.children[0]->types;
|
79
|
+
op.types = lhs_types;
|
80
|
+
|
81
|
+
auto rhs_types = op.children[1]->types;
|
82
|
+
op.types.insert(op.types.end(), rhs_types.begin(), rhs_types.end());
|
83
|
+
|
84
|
+
op.types.emplace_back(asof_type);
|
85
|
+
op.conditions.emplace_back(std::move(asof_upper));
|
86
|
+
auto iejoin = make_unique<PhysicalIEJoin>(op, std::move(left), std::move(window), std::move(op.conditions),
|
87
|
+
op.join_type, op.estimated_cardinality);
|
88
|
+
|
89
|
+
// Project away asof_temp and anything from the projection maps
|
90
|
+
auto proj = PhysicalProjection::CreateJoinProjection(proj_types, lhs_types, rhs_types, op.left_projection_map,
|
91
|
+
op.right_projection_map, lhs_cardinality);
|
92
|
+
proj->children.push_back(std::move(iejoin));
|
93
|
+
|
94
|
+
return proj;
|
95
|
+
}
|
96
|
+
|
97
|
+
} // namespace duckdb
|
@@ -115,6 +115,9 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalOperator &
|
|
115
115
|
case LogicalOperatorType::LOGICAL_DELIM_JOIN:
|
116
116
|
plan = CreatePlan((LogicalDelimJoin &)op);
|
117
117
|
break;
|
118
|
+
case LogicalOperatorType::LOGICAL_ASOF_JOIN:
|
119
|
+
plan = CreatePlan((LogicalAsOfJoin &)op);
|
120
|
+
break;
|
118
121
|
case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
|
119
122
|
plan = CreatePlan((LogicalComparisonJoin &)op);
|
120
123
|
break;
|
@@ -2,6 +2,7 @@
|
|
2
2
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
3
3
|
#include "duckdb/function/scalar/trigonometric_functions.hpp"
|
4
4
|
#include "duckdb/common/operator/abs.hpp"
|
5
|
+
#include "duckdb/common/operator/multiply.hpp"
|
5
6
|
#include "duckdb/common/types/hugeint.hpp"
|
6
7
|
#include "duckdb/common/types/cast_helpers.hpp"
|
7
8
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
@@ -1161,4 +1162,90 @@ void EvenFun::RegisterFunction(BuiltinFunctions &set) {
|
|
1161
1162
|
ScalarFunction::UnaryFunction<double, double, EvenOperator>));
|
1162
1163
|
}
|
1163
1164
|
|
1165
|
+
//===--------------------------------------------------------------------===//
|
1166
|
+
// gcd
|
1167
|
+
//===--------------------------------------------------------------------===//
|
1168
|
+
|
1169
|
+
// should be replaced with std::gcd in a newer C++ standard
|
1170
|
+
template <class TA>
|
1171
|
+
TA GreatestCommonDivisor(TA left, TA right) {
|
1172
|
+
TA a = left;
|
1173
|
+
TA b = right;
|
1174
|
+
|
1175
|
+
// This protects the following modulo operations from a corner case,
|
1176
|
+
// where we would get a runtime error due to an integer overflow.
|
1177
|
+
if ((left == NumericLimits<TA>::Minimum() && right == -1) ||
|
1178
|
+
(left == -1 && right == NumericLimits<TA>::Minimum())) {
|
1179
|
+
return 1;
|
1180
|
+
}
|
1181
|
+
|
1182
|
+
while (true) {
|
1183
|
+
if (a == 0) {
|
1184
|
+
return TryAbsOperator::Operation<TA, TA>(b);
|
1185
|
+
}
|
1186
|
+
b %= a;
|
1187
|
+
|
1188
|
+
if (b == 0) {
|
1189
|
+
return TryAbsOperator::Operation<TA, TA>(a);
|
1190
|
+
}
|
1191
|
+
a %= b;
|
1192
|
+
}
|
1193
|
+
}
|
1194
|
+
|
1195
|
+
struct GreatestCommonDivisorOperator {
|
1196
|
+
template <class TA, class TB, class TR>
|
1197
|
+
static inline TR Operation(TA left, TB right) {
|
1198
|
+
return GreatestCommonDivisor(left, right);
|
1199
|
+
}
|
1200
|
+
};
|
1201
|
+
|
1202
|
+
void GreatestCommonDivisorFun::RegisterFunction(BuiltinFunctions &set) {
|
1203
|
+
ScalarFunctionSet funcs("gcd");
|
1204
|
+
|
1205
|
+
funcs.AddFunction(
|
1206
|
+
ScalarFunction({LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT,
|
1207
|
+
ScalarFunction::BinaryFunction<int64_t, int64_t, int64_t, GreatestCommonDivisorOperator>));
|
1208
|
+
funcs.AddFunction(
|
1209
|
+
ScalarFunction({LogicalType::HUGEINT, LogicalType::HUGEINT}, LogicalType::HUGEINT,
|
1210
|
+
ScalarFunction::BinaryFunction<hugeint_t, hugeint_t, hugeint_t, GreatestCommonDivisorOperator>));
|
1211
|
+
|
1212
|
+
set.AddFunction(funcs);
|
1213
|
+
funcs.name = "greatest_common_divisor";
|
1214
|
+
set.AddFunction(funcs);
|
1215
|
+
}
|
1216
|
+
|
1217
|
+
//===--------------------------------------------------------------------===//
|
1218
|
+
// lcm
|
1219
|
+
//===--------------------------------------------------------------------===//
|
1220
|
+
|
1221
|
+
// should be replaced with std::lcm in a newer C++ standard
|
1222
|
+
struct LeastCommonMultipleOperator {
|
1223
|
+
template <class TA, class TB, class TR>
|
1224
|
+
static inline TR Operation(TA left, TB right) {
|
1225
|
+
if (left == 0 || right == 0) {
|
1226
|
+
return 0;
|
1227
|
+
}
|
1228
|
+
TR result;
|
1229
|
+
if (!TryMultiplyOperator::Operation<TA, TB, TR>(left, right / GreatestCommonDivisor(left, right), result)) {
|
1230
|
+
throw OutOfRangeException("lcm value is out of range");
|
1231
|
+
}
|
1232
|
+
return TryAbsOperator::Operation<TR, TR>(result);
|
1233
|
+
}
|
1234
|
+
};
|
1235
|
+
|
1236
|
+
void LeastCommonMultipleFun::RegisterFunction(BuiltinFunctions &set) {
|
1237
|
+
ScalarFunctionSet funcs("lcm");
|
1238
|
+
|
1239
|
+
funcs.AddFunction(
|
1240
|
+
ScalarFunction({LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT,
|
1241
|
+
ScalarFunction::BinaryFunction<int64_t, int64_t, int64_t, LeastCommonMultipleOperator>));
|
1242
|
+
funcs.AddFunction(
|
1243
|
+
ScalarFunction({LogicalType::HUGEINT, LogicalType::HUGEINT}, LogicalType::HUGEINT,
|
1244
|
+
ScalarFunction::BinaryFunction<hugeint_t, hugeint_t, hugeint_t, LeastCommonMultipleOperator>));
|
1245
|
+
|
1246
|
+
set.AddFunction(funcs);
|
1247
|
+
funcs.name = "least_common_multiple";
|
1248
|
+
set.AddFunction(funcs);
|
1249
|
+
}
|
1250
|
+
|
1164
1251
|
} // namespace duckdb
|
@@ -0,0 +1,201 @@
|
|
1
|
+
#include "duckdb/common/exception.hpp"
|
2
|
+
#include "duckdb/common/string_util.hpp"
|
3
|
+
#include "duckdb/common/types/blob.hpp"
|
4
|
+
#include "duckdb/common/vector_operations/unary_executor.hpp"
|
5
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
6
|
+
#include "duckdb/function/scalar/string_functions.hpp"
|
7
|
+
|
8
|
+
namespace duckdb {
|
9
|
+
|
10
|
+
struct HexStrOperator {
|
11
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
12
|
+
static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
|
13
|
+
auto data = input.GetDataUnsafe();
|
14
|
+
auto size = input.GetSize();
|
15
|
+
|
16
|
+
// Allocate empty space
|
17
|
+
auto target = StringVector::EmptyString(result, size * 2);
|
18
|
+
auto output = target.GetDataWriteable();
|
19
|
+
|
20
|
+
for (idx_t i = 0; i < size; ++i) {
|
21
|
+
*output = Blob::HEX_TABLE[(data[i] >> 4) & 0x0F];
|
22
|
+
output++;
|
23
|
+
*output = Blob::HEX_TABLE[data[i] & 0x0F];
|
24
|
+
output++;
|
25
|
+
}
|
26
|
+
|
27
|
+
target.Finalize();
|
28
|
+
return target;
|
29
|
+
}
|
30
|
+
};
|
31
|
+
|
32
|
+
struct FromHexOperator {
|
33
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
34
|
+
static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
|
35
|
+
auto data = input.GetDataUnsafe();
|
36
|
+
auto size = input.GetSize();
|
37
|
+
|
38
|
+
if (size > NumericLimits<uint32_t>::Maximum()) {
|
39
|
+
throw InvalidInputException("Hexadecimal input length larger than 2^32 are not supported");
|
40
|
+
}
|
41
|
+
|
42
|
+
D_ASSERT(size <= NumericLimits<uint32_t>::Maximum());
|
43
|
+
auto buffer_size = (size + 1) / 2;
|
44
|
+
|
45
|
+
// Allocate empty space
|
46
|
+
auto target = StringVector::EmptyString(result, buffer_size);
|
47
|
+
auto output = target.GetDataWriteable();
|
48
|
+
|
49
|
+
// Treated as a single byte
|
50
|
+
idx_t i = 0;
|
51
|
+
if (size % 2 != 0) {
|
52
|
+
*output = StringUtil::GetHexValue(data[i]);
|
53
|
+
i++;
|
54
|
+
output++;
|
55
|
+
}
|
56
|
+
|
57
|
+
for (; i < size; i += 2) {
|
58
|
+
uint8_t major = StringUtil::GetHexValue(data[i]);
|
59
|
+
uint8_t minor = StringUtil::GetHexValue(data[i + 1]);
|
60
|
+
*output = (major << 4) | minor;
|
61
|
+
output++;
|
62
|
+
}
|
63
|
+
|
64
|
+
target.Finalize();
|
65
|
+
return target;
|
66
|
+
}
|
67
|
+
};
|
68
|
+
|
69
|
+
struct HexIntegralOperator {
|
70
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
71
|
+
static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
|
72
|
+
// Sufficient space for maximum length
|
73
|
+
char buffer[sizeof(INPUT_TYPE) * 2];
|
74
|
+
char *ptr = buffer;
|
75
|
+
idx_t buffer_size = 0;
|
76
|
+
|
77
|
+
bool seen_non_zero = false;
|
78
|
+
for (idx_t offset = sizeof(INPUT_TYPE) * 8; offset >= 4; offset -= 4) {
|
79
|
+
uint8_t byte = (input >> (offset - 4)) & 0x0F;
|
80
|
+
if (byte == 0 && !seen_non_zero && offset > 4) {
|
81
|
+
continue;
|
82
|
+
}
|
83
|
+
seen_non_zero = true;
|
84
|
+
*ptr = Blob::HEX_TABLE[byte];
|
85
|
+
ptr++;
|
86
|
+
buffer_size++;
|
87
|
+
}
|
88
|
+
|
89
|
+
// Allocate empty space
|
90
|
+
auto target = StringVector::EmptyString(result, buffer_size);
|
91
|
+
auto output = target.GetDataWriteable();
|
92
|
+
memcpy(output, buffer, buffer_size);
|
93
|
+
|
94
|
+
target.Finalize();
|
95
|
+
return target;
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
99
|
+
struct HexHugeIntOperator {
|
100
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
101
|
+
static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
|
102
|
+
char buffer[sizeof(INPUT_TYPE) * 2];
|
103
|
+
char *ptr = buffer;
|
104
|
+
idx_t buffer_size = 0;
|
105
|
+
|
106
|
+
uint64_t lower = input.lower;
|
107
|
+
int64_t upper = input.upper;
|
108
|
+
|
109
|
+
bool seen_non_zero = false;
|
110
|
+
for (idx_t offset = 64; offset >= 4; offset -= 4) {
|
111
|
+
uint8_t byte = (upper >> (offset - 4)) & 0x0F;
|
112
|
+
|
113
|
+
if (byte == 0 && !seen_non_zero) {
|
114
|
+
continue;
|
115
|
+
}
|
116
|
+
seen_non_zero = true;
|
117
|
+
*ptr = Blob::HEX_TABLE[byte];
|
118
|
+
ptr++;
|
119
|
+
buffer_size++;
|
120
|
+
}
|
121
|
+
|
122
|
+
for (idx_t offset = 64; offset >= 4; offset -= 4) {
|
123
|
+
uint8_t byte = (lower >> (offset - 4)) & 0x0F;
|
124
|
+
|
125
|
+
// at least one byte space
|
126
|
+
if (byte == 0 && !seen_non_zero && offset > 4) {
|
127
|
+
continue;
|
128
|
+
}
|
129
|
+
seen_non_zero = true;
|
130
|
+
*ptr = Blob::HEX_TABLE[byte];
|
131
|
+
ptr++;
|
132
|
+
buffer_size++;
|
133
|
+
}
|
134
|
+
|
135
|
+
// Allocate empty space
|
136
|
+
auto target = StringVector::EmptyString(result, buffer_size);
|
137
|
+
auto output = target.GetDataWriteable();
|
138
|
+
memcpy(output, buffer, buffer_size);
|
139
|
+
|
140
|
+
target.Finalize();
|
141
|
+
return target;
|
142
|
+
}
|
143
|
+
};
|
144
|
+
|
145
|
+
static void ToHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
146
|
+
D_ASSERT(args.ColumnCount() == 1);
|
147
|
+
auto &input = args.data[0];
|
148
|
+
idx_t count = args.size();
|
149
|
+
|
150
|
+
switch (input.GetType().InternalType()) {
|
151
|
+
case PhysicalType::VARCHAR:
|
152
|
+
UnaryExecutor::ExecuteString<string_t, string_t, HexStrOperator>(input, result, count);
|
153
|
+
break;
|
154
|
+
case PhysicalType::INT64:
|
155
|
+
UnaryExecutor::ExecuteString<int64_t, string_t, HexIntegralOperator>(input, result, count);
|
156
|
+
break;
|
157
|
+
case PhysicalType::INT128:
|
158
|
+
UnaryExecutor::ExecuteString<hugeint_t, string_t, HexHugeIntOperator>(input, result, count);
|
159
|
+
break;
|
160
|
+
case PhysicalType::UINT64:
|
161
|
+
UnaryExecutor::ExecuteString<uint64_t, string_t, HexIntegralOperator>(input, result, count);
|
162
|
+
break;
|
163
|
+
default:
|
164
|
+
throw NotImplementedException("Specifier type not implemented");
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
static void FromHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
169
|
+
D_ASSERT(args.ColumnCount() == 1);
|
170
|
+
D_ASSERT(args.data[0].GetType().InternalType() == PhysicalType::VARCHAR);
|
171
|
+
auto &input = args.data[0];
|
172
|
+
idx_t count = args.size();
|
173
|
+
|
174
|
+
UnaryExecutor::ExecuteString<string_t, string_t, FromHexOperator>(input, result, count);
|
175
|
+
}
|
176
|
+
|
177
|
+
void HexFun::RegisterFunction(BuiltinFunctions &set) {
|
178
|
+
ScalarFunctionSet to_hex("to_hex");
|
179
|
+
ScalarFunctionSet from_hex("from_hex");
|
180
|
+
|
181
|
+
to_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToHexFunction));
|
182
|
+
|
183
|
+
to_hex.AddFunction(ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, ToHexFunction));
|
184
|
+
|
185
|
+
to_hex.AddFunction(ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR, ToHexFunction));
|
186
|
+
|
187
|
+
to_hex.AddFunction(ScalarFunction({LogicalType::HUGEINT}, LogicalType::VARCHAR, ToHexFunction));
|
188
|
+
|
189
|
+
from_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::BLOB, FromHexFunction));
|
190
|
+
|
191
|
+
set.AddFunction(to_hex);
|
192
|
+
set.AddFunction(from_hex);
|
193
|
+
|
194
|
+
// mysql
|
195
|
+
to_hex.name = "hex";
|
196
|
+
from_hex.name = "unhex";
|
197
|
+
set.AddFunction(to_hex);
|
198
|
+
set.AddFunction(from_hex);
|
199
|
+
}
|
200
|
+
|
201
|
+
} // namespace duckdb
|