duckdb 0.7.2-dev921.0 → 0.7.2-dev982.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  3. package/src/duckdb/src/common/serializer/enum_serializer.cpp +4 -0
  4. package/src/duckdb/src/common/types/value.cpp +46 -0
  5. package/src/duckdb/src/execution/column_binding_resolver.cpp +15 -5
  6. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -0
  7. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -3
  8. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +5 -13
  9. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
  10. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
  11. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
  12. package/src/duckdb/src/function/scalar/math/numeric.cpp +87 -0
  13. package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
  14. package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
  15. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  16. package/src/duckdb/src/function/table/read_csv.cpp +46 -0
  17. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  18. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +5 -4
  19. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  20. package/src/duckdb/src/include/duckdb/common/string_util.hpp +13 -0
  21. package/src/duckdb/src/include/duckdb/common/types/value.hpp +11 -7
  22. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
  23. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +6 -0
  24. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
  25. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -0
  26. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
  27. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  28. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  29. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
  30. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
  31. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
  32. package/src/duckdb/src/include/duckdb.h +1 -1
  33. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  34. package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
  35. package/src/duckdb/src/optimizer/filter_pushdown.cpp +3 -1
  36. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +4 -0
  37. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +8 -4
  38. package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
  39. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
  40. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
  41. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -2
  42. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +1 -1
  43. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
  44. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
  45. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -0
  46. package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
  47. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +8 -1
  48. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +10 -3
  49. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +60 -12
  50. package/src/duckdb/src/planner/logical_operator.cpp +3 -0
  51. package/src/duckdb/src/planner/logical_operator_visitor.cpp +1 -0
  52. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
  53. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
  54. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +32 -0
  55. package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
  56. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +915 -913
  57. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
  58. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17371 -17306
  59. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  60. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  61. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev921.0",
5
+ "version": "0.7.2-dev982.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -20,6 +20,8 @@ string LogicalOperatorToString(LogicalOperatorType type) {
20
20
  return "EXPRESSION_GET";
21
21
  case LogicalOperatorType::LOGICAL_ANY_JOIN:
22
22
  return "ANY_JOIN";
23
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
24
+ return "ASOF_JOIN";
23
25
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
24
26
  return "COMPARISON_JOIN";
25
27
  case LogicalOperatorType::LOGICAL_DELIM_JOIN:
@@ -236,6 +236,8 @@ JoinRefType EnumSerializer::StringToEnum(const char *value) {
236
236
  return JoinRefType::CROSS;
237
237
  } else if (StringUtil::Equals(value, "POSITIONAL")) {
238
238
  return JoinRefType::POSITIONAL;
239
+ } else if (StringUtil::Equals(value, "ASOF")) {
240
+ return JoinRefType::ASOF;
239
241
  } else {
240
242
  throw NotImplementedException("EnumSerializer::StringToEnum not implemented for enum value");
241
243
  }
@@ -252,6 +254,8 @@ const char *EnumSerializer::EnumToString(JoinRefType value) {
252
254
  return "CROSS";
253
255
  case JoinRefType::POSITIONAL:
254
256
  return "POSITIONAL";
257
+ case JoinRefType::ASOF:
258
+ return "ASOF";
255
259
  default:
256
260
  throw NotImplementedException("ToString not implemented for enum value");
257
261
  }
@@ -330,6 +330,52 @@ Value Value::MaximumValue(const LogicalType &type) {
330
330
  }
331
331
  }
332
332
 
333
+ Value Value::Infinity(const LogicalType &type) {
334
+ switch (type.id()) {
335
+ case LogicalTypeId::DATE:
336
+ return Value::DATE(date_t::infinity());
337
+ case LogicalTypeId::TIMESTAMP:
338
+ return Value::TIMESTAMP(timestamp_t::infinity());
339
+ case LogicalTypeId::TIMESTAMP_MS:
340
+ return Value::TIMESTAMPMS(timestamp_t::infinity());
341
+ case LogicalTypeId::TIMESTAMP_NS:
342
+ return Value::TIMESTAMPNS(timestamp_t::infinity());
343
+ case LogicalTypeId::TIMESTAMP_SEC:
344
+ return Value::TIMESTAMPSEC(timestamp_t::infinity());
345
+ case LogicalTypeId::TIMESTAMP_TZ:
346
+ return Value::TIMESTAMPTZ(timestamp_t::infinity());
347
+ case LogicalTypeId::FLOAT:
348
+ return Value::FLOAT(std::numeric_limits<float>::infinity());
349
+ case LogicalTypeId::DOUBLE:
350
+ return Value::DOUBLE(std::numeric_limits<double>::infinity());
351
+ default:
352
+ throw InvalidTypeException(type, "Infinity requires numeric type");
353
+ }
354
+ }
355
+
356
+ Value Value::NegativeInfinity(const LogicalType &type) {
357
+ switch (type.id()) {
358
+ case LogicalTypeId::DATE:
359
+ return Value::DATE(date_t::ninfinity());
360
+ case LogicalTypeId::TIMESTAMP:
361
+ return Value::TIMESTAMP(timestamp_t::ninfinity());
362
+ case LogicalTypeId::TIMESTAMP_MS:
363
+ return Value::TIMESTAMPMS(timestamp_t::ninfinity());
364
+ case LogicalTypeId::TIMESTAMP_NS:
365
+ return Value::TIMESTAMPNS(timestamp_t::ninfinity());
366
+ case LogicalTypeId::TIMESTAMP_SEC:
367
+ return Value::TIMESTAMPSEC(timestamp_t::ninfinity());
368
+ case LogicalTypeId::TIMESTAMP_TZ:
369
+ return Value::TIMESTAMPTZ(timestamp_t::ninfinity());
370
+ case LogicalTypeId::FLOAT:
371
+ return Value::FLOAT(-std::numeric_limits<float>::infinity());
372
+ case LogicalTypeId::DOUBLE:
373
+ return Value::DOUBLE(-std::numeric_limits<double>::infinity());
374
+ default:
375
+ throw InvalidTypeException(type, "NegativeInfinity requires numeric type");
376
+ }
377
+ }
378
+
333
379
  Value Value::BOOLEAN(int8_t value) {
334
380
  Value result(LogicalType::BOOLEAN);
335
381
  result.value_.boolean = bool(value);
@@ -18,7 +18,10 @@ ColumnBindingResolver::ColumnBindingResolver() {
18
18
  }
19
19
 
20
20
  void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
21
- if (op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN || op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
21
+ switch (op.type) {
22
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
23
+ case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
24
+ case LogicalOperatorType::LOGICAL_DELIM_JOIN: {
22
25
  // special case: comparison join
23
26
  auto &comp_join = (LogicalComparisonJoin &)op;
24
27
  // first get the bindings of the LHS and resolve the LHS expressions
@@ -41,7 +44,8 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
41
44
  // finally update the bindings with the result bindings of the join
42
45
  bindings = op.GetColumnBindings();
43
46
  return;
44
- } else if (op.type == LogicalOperatorType::LOGICAL_ANY_JOIN) {
47
+ }
48
+ case LogicalOperatorType::LOGICAL_ANY_JOIN: {
45
49
  // ANY join, this join is different because we evaluate the expression on the bindings of BOTH join sides at
46
50
  // once i.e. we set the bindings first to the bindings of the entire join, and then resolve the expressions of
47
51
  // this operator
@@ -54,19 +58,22 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
54
58
  }
55
59
  VisitOperatorExpressions(op);
56
60
  return;
57
- } else if (op.type == LogicalOperatorType::LOGICAL_CREATE_INDEX) {
61
+ }
62
+ case LogicalOperatorType::LOGICAL_CREATE_INDEX: {
58
63
  // CREATE INDEX statement, add the columns of the table with table index 0 to the binding set
59
64
  // afterwards bind the expressions of the CREATE INDEX statement
60
65
  auto &create_index = (LogicalCreateIndex &)op;
61
66
  bindings = LogicalOperator::GenerateColumnBindings(0, create_index.table.GetColumns().LogicalColumnCount());
62
67
  VisitOperatorExpressions(op);
63
68
  return;
64
- } else if (op.type == LogicalOperatorType::LOGICAL_GET) {
69
+ }
70
+ case LogicalOperatorType::LOGICAL_GET: {
65
71
  //! We first need to update the current set of bindings and then visit operator expressions
66
72
  bindings = op.GetColumnBindings();
67
73
  VisitOperatorExpressions(op);
68
74
  return;
69
- } else if (op.type == LogicalOperatorType::LOGICAL_INSERT) {
75
+ }
76
+ case LogicalOperatorType::LOGICAL_INSERT: {
70
77
  //! We want to execute the normal path, but also add a dummy 'excluded' binding if there is a
71
78
  // ON CONFLICT DO UPDATE clause
72
79
  auto &insert_op = (LogicalInsert &)op;
@@ -89,6 +96,9 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
89
96
  return;
90
97
  }
91
98
  }
99
+ default:
100
+ break;
101
+ }
92
102
  // general case
93
103
  // first visit the children of this operator
94
104
  VisitOperatorChildren(op);
@@ -334,7 +334,9 @@ idx_t PhysicalRangeJoin::SelectJoinTail(const ExpressionType &condition, Vector
334
334
  case ExpressionType::COMPARE_DISTINCT_FROM:
335
335
  return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, nullptr);
336
336
  case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
337
+ return VectorOperations::NotDistinctFrom(left, right, sel, count, true_sel, nullptr);
337
338
  case ExpressionType::COMPARE_EQUAL:
339
+ return VectorOperations::Equals(left, right, sel, count, true_sel, nullptr);
338
340
  default:
339
341
  throw InternalException("Unsupported comparison type for PhysicalRangeJoin");
340
342
  }
@@ -498,12 +498,12 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
498
498
  }
499
499
 
500
500
  // figure out the exact line number
501
+ UnifiedVectorFormat inserted_column_data;
502
+ insert_chunk.data[col_idx].ToUnifiedFormat(parse_chunk.size(), inserted_column_data);
501
503
  idx_t row_idx;
502
504
  for (row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
503
- auto &inserted_column = insert_chunk.data[col_idx];
504
505
  auto &parsed_column = parse_chunk.data[col_idx];
505
-
506
- if (FlatVector::IsNull(inserted_column, row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
506
+ if (!inserted_column_data.validity.RowIsValid(row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
507
507
  break;
508
508
  }
509
509
  }
@@ -870,16 +870,7 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
870
870
  // #######
871
871
  // ### type detection (initial)
872
872
  // #######
873
- // type candidates, ordered by descending specificity (~ from high to low)
874
- vector<LogicalType> type_candidates = {
875
- LogicalType::VARCHAR,
876
- LogicalType::TIMESTAMP,
877
- LogicalType::DATE,
878
- LogicalType::TIME,
879
- LogicalType::DOUBLE,
880
- /* LogicalType::FLOAT,*/ LogicalType::BIGINT,
881
- /*LogicalType::INTEGER,*/ /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN,
882
- LogicalType::SQLNULL};
873
+
883
874
  // format template candidates, ordered by descending specificity (~ from high to low)
884
875
  std::map<LogicalTypeId, vector<const char *>> format_template_candidates = {
885
876
  {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
@@ -890,8 +881,8 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
890
881
  vector<vector<LogicalType>> best_sql_types_candidates;
891
882
  map<LogicalTypeId, vector<string>> best_format_candidates;
892
883
  DataChunk best_header_row;
893
- DetectCandidateTypes(type_candidates, format_template_candidates, info_candidates, original_options, best_num_cols,
894
- best_sql_types_candidates, best_format_candidates, best_header_row);
884
+ DetectCandidateTypes(options.auto_type_candidates, format_template_candidates, info_candidates, original_options,
885
+ best_num_cols, best_sql_types_candidates, best_format_candidates, best_header_row);
895
886
 
896
887
  if (best_format_candidates.empty() || best_header_row.size() == 0) {
897
888
  throw InvalidInputException(
@@ -939,7 +930,8 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
939
930
  // #######
940
931
  // ### type detection (refining)
941
932
  // #######
942
- return RefineTypeDetection(type_candidates, requested_types, best_sql_types_candidates, best_format_candidates);
933
+ return RefineTypeDetection(options.auto_type_candidates, requested_types, best_sql_types_candidates,
934
+ best_format_candidates);
943
935
  }
944
936
 
945
937
  bool BufferedCSVReader::TryParseComplexCSV(DataChunk &insert_chunk, string &error_message) {
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/execution/operator/projection/physical_projection.hpp"
2
2
  #include "duckdb/parallel/thread_context.hpp"
3
3
  #include "duckdb/execution/expression_executor.hpp"
4
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
@@ -35,6 +36,39 @@ unique_ptr<OperatorState> PhysicalProjection::GetOperatorState(ExecutionContext
35
36
  return make_unique<ProjectionState>(context, select_list);
36
37
  }
37
38
 
39
+ unique_ptr<PhysicalOperator>
40
+ PhysicalProjection::CreateJoinProjection(vector<LogicalType> proj_types, const vector<LogicalType> &lhs_types,
41
+ const vector<LogicalType> &rhs_types, const vector<idx_t> &left_projection_map,
42
+ const vector<idx_t> &right_projection_map, const idx_t estimated_cardinality) {
43
+
44
+ vector<unique_ptr<Expression>> proj_selects;
45
+ proj_selects.reserve(proj_types.size());
46
+
47
+ if (left_projection_map.empty()) {
48
+ for (storage_t i = 0; i < lhs_types.size(); ++i) {
49
+ proj_selects.emplace_back(make_unique<BoundReferenceExpression>(lhs_types[i], i));
50
+ }
51
+ } else {
52
+ for (auto i : left_projection_map) {
53
+ proj_selects.emplace_back(make_unique<BoundReferenceExpression>(lhs_types[i], i));
54
+ }
55
+ }
56
+ const auto left_cols = lhs_types.size();
57
+
58
+ if (right_projection_map.empty()) {
59
+ for (storage_t i = 0; i < rhs_types.size(); ++i) {
60
+ proj_selects.emplace_back(make_unique<BoundReferenceExpression>(rhs_types[i], left_cols + i));
61
+ }
62
+
63
+ } else {
64
+ for (auto i : right_projection_map) {
65
+ proj_selects.emplace_back(make_unique<BoundReferenceExpression>(rhs_types[i], left_cols + i));
66
+ }
67
+ }
68
+
69
+ return make_unique<PhysicalProjection>(std::move(proj_types), std::move(proj_selects), estimated_cardinality);
70
+ }
71
+
38
72
  string PhysicalProjection::ParamsToString() const {
39
73
  string extra_info;
40
74
  for (auto &expr : select_list) {
@@ -0,0 +1,97 @@
1
+ #include "duckdb/execution/operator/aggregate/physical_window.hpp"
2
+ #include "duckdb/execution/operator/join/physical_iejoin.hpp"
3
+ #include "duckdb/execution/operator/projection/physical_projection.hpp"
4
+ #include "duckdb/execution/physical_plan_generator.hpp"
5
+ #include "duckdb/main/client_context.hpp"
6
+ #include "duckdb/planner/expression/bound_constant_expression.hpp"
7
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
8
+ #include "duckdb/planner/expression/bound_window_expression.hpp"
9
+ #include "duckdb/planner/operator/logical_asof_join.hpp"
10
+
11
+ namespace duckdb {
12
+
13
+ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalAsOfJoin &op) {
14
+ // now visit the children
15
+ D_ASSERT(op.children.size() == 2);
16
+ idx_t lhs_cardinality = op.children[0]->EstimateCardinality(context);
17
+ idx_t rhs_cardinality = op.children[1]->EstimateCardinality(context);
18
+ auto left = CreatePlan(*op.children[0]);
19
+ auto right = CreatePlan(*op.children[1]);
20
+ D_ASSERT(left && right);
21
+
22
+ // Validate
23
+ vector<idx_t> equi_indexes;
24
+ auto asof_idx = op.conditions.size();
25
+ for (size_t c = 0; c < op.conditions.size(); ++c) {
26
+ auto &cond = op.conditions[c];
27
+ switch (cond.comparison) {
28
+ case ExpressionType::COMPARE_EQUAL:
29
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
30
+ equi_indexes.emplace_back(c);
31
+ break;
32
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
33
+ D_ASSERT(asof_idx == op.conditions.size());
34
+ asof_idx = c;
35
+ break;
36
+ default:
37
+ throw InternalException("Invalid ASOF JOIN comparison");
38
+ }
39
+ }
40
+ D_ASSERT(asof_idx < op.conditions.size());
41
+
42
+ // Temporary implementation: IEJoin of Window
43
+ // LEAD(asof_column, 1, infinity) OVER (PARTITION BY equi_column... ORDER BY asof_column) AS asof_temp
44
+ auto &asof_comp = op.conditions[asof_idx];
45
+ auto &asof_column = asof_comp.right;
46
+ auto asof_type = asof_column->return_type;
47
+ auto asof_temp = make_unique<BoundWindowExpression>(ExpressionType::WINDOW_LEAD, asof_type, nullptr, nullptr);
48
+ asof_temp->children.emplace_back(asof_column->Copy());
49
+ asof_temp->offset_expr = make_unique<BoundConstantExpression>(Value::BIGINT(1));
50
+ asof_temp->default_expr = make_unique<BoundConstantExpression>(Value::Infinity(asof_type));
51
+ for (auto equi_idx : equi_indexes) {
52
+ asof_temp->partitions.emplace_back(op.conditions[equi_idx].right->Copy());
53
+ }
54
+ asof_temp->orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, asof_column->Copy());
55
+ asof_temp->start = WindowBoundary::UNBOUNDED_PRECEDING;
56
+ asof_temp->end = WindowBoundary::CURRENT_ROW_ROWS;
57
+
58
+ vector<unique_ptr<Expression>> window_select;
59
+ window_select.emplace_back(std::move(asof_temp));
60
+
61
+ auto window_types = right->types;
62
+ window_types.emplace_back(asof_type);
63
+
64
+ auto window = make_unique<PhysicalWindow>(window_types, std::move(window_select), rhs_cardinality);
65
+ window->children.emplace_back(std::move(right));
66
+
67
+ // IEJoin(left, window, conditions || asof_column < asof_temp)
68
+ JoinCondition asof_upper;
69
+ asof_upper.left = asof_comp.left->Copy();
70
+ asof_upper.right = make_unique<BoundReferenceExpression>(asof_type, window_types.size() - 1);
71
+ asof_upper.comparison = ExpressionType::COMPARE_LESSTHAN;
72
+
73
+ // We have an equality condition, so we may have to deal with projection maps.
74
+ // IEJoin does not (currently) support them, so we have to do it manually
75
+ auto proj_types = op.types;
76
+ op.types.clear();
77
+
78
+ auto lhs_types = op.children[0]->types;
79
+ op.types = lhs_types;
80
+
81
+ auto rhs_types = op.children[1]->types;
82
+ op.types.insert(op.types.end(), rhs_types.begin(), rhs_types.end());
83
+
84
+ op.types.emplace_back(asof_type);
85
+ op.conditions.emplace_back(std::move(asof_upper));
86
+ auto iejoin = make_unique<PhysicalIEJoin>(op, std::move(left), std::move(window), std::move(op.conditions),
87
+ op.join_type, op.estimated_cardinality);
88
+
89
+ // Project away asof_temp and anything from the projection maps
90
+ auto proj = PhysicalProjection::CreateJoinProjection(proj_types, lhs_types, rhs_types, op.left_projection_map,
91
+ op.right_projection_map, lhs_cardinality);
92
+ proj->children.push_back(std::move(iejoin));
93
+
94
+ return proj;
95
+ }
96
+
97
+ } // namespace duckdb
@@ -115,6 +115,9 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalOperator &
115
115
  case LogicalOperatorType::LOGICAL_DELIM_JOIN:
116
116
  plan = CreatePlan((LogicalDelimJoin &)op);
117
117
  break;
118
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
119
+ plan = CreatePlan((LogicalAsOfJoin &)op);
120
+ break;
118
121
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
119
122
  plan = CreatePlan((LogicalComparisonJoin &)op);
120
123
  break;
@@ -2,6 +2,7 @@
2
2
  #include "duckdb/common/vector_operations/vector_operations.hpp"
3
3
  #include "duckdb/function/scalar/trigonometric_functions.hpp"
4
4
  #include "duckdb/common/operator/abs.hpp"
5
+ #include "duckdb/common/operator/multiply.hpp"
5
6
  #include "duckdb/common/types/hugeint.hpp"
6
7
  #include "duckdb/common/types/cast_helpers.hpp"
7
8
  #include "duckdb/planner/expression/bound_function_expression.hpp"
@@ -1161,4 +1162,90 @@ void EvenFun::RegisterFunction(BuiltinFunctions &set) {
1161
1162
  ScalarFunction::UnaryFunction<double, double, EvenOperator>));
1162
1163
  }
1163
1164
 
1165
+ //===--------------------------------------------------------------------===//
1166
+ // gcd
1167
+ //===--------------------------------------------------------------------===//
1168
+
1169
+ // should be replaced with std::gcd in a newer C++ standard
1170
+ template <class TA>
1171
+ TA GreatestCommonDivisor(TA left, TA right) {
1172
+ TA a = left;
1173
+ TA b = right;
1174
+
1175
+ // This protects the following modulo operations from a corner case,
1176
+ // where we would get a runtime error due to an integer overflow.
1177
+ if ((left == NumericLimits<TA>::Minimum() && right == -1) ||
1178
+ (left == -1 && right == NumericLimits<TA>::Minimum())) {
1179
+ return 1;
1180
+ }
1181
+
1182
+ while (true) {
1183
+ if (a == 0) {
1184
+ return TryAbsOperator::Operation<TA, TA>(b);
1185
+ }
1186
+ b %= a;
1187
+
1188
+ if (b == 0) {
1189
+ return TryAbsOperator::Operation<TA, TA>(a);
1190
+ }
1191
+ a %= b;
1192
+ }
1193
+ }
1194
+
1195
+ struct GreatestCommonDivisorOperator {
1196
+ template <class TA, class TB, class TR>
1197
+ static inline TR Operation(TA left, TB right) {
1198
+ return GreatestCommonDivisor(left, right);
1199
+ }
1200
+ };
1201
+
1202
+ void GreatestCommonDivisorFun::RegisterFunction(BuiltinFunctions &set) {
1203
+ ScalarFunctionSet funcs("gcd");
1204
+
1205
+ funcs.AddFunction(
1206
+ ScalarFunction({LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT,
1207
+ ScalarFunction::BinaryFunction<int64_t, int64_t, int64_t, GreatestCommonDivisorOperator>));
1208
+ funcs.AddFunction(
1209
+ ScalarFunction({LogicalType::HUGEINT, LogicalType::HUGEINT}, LogicalType::HUGEINT,
1210
+ ScalarFunction::BinaryFunction<hugeint_t, hugeint_t, hugeint_t, GreatestCommonDivisorOperator>));
1211
+
1212
+ set.AddFunction(funcs);
1213
+ funcs.name = "greatest_common_divisor";
1214
+ set.AddFunction(funcs);
1215
+ }
1216
+
1217
+ //===--------------------------------------------------------------------===//
1218
+ // lcm
1219
+ //===--------------------------------------------------------------------===//
1220
+
1221
+ // should be replaced with std::lcm in a newer C++ standard
1222
+ struct LeastCommonMultipleOperator {
1223
+ template <class TA, class TB, class TR>
1224
+ static inline TR Operation(TA left, TB right) {
1225
+ if (left == 0 || right == 0) {
1226
+ return 0;
1227
+ }
1228
+ TR result;
1229
+ if (!TryMultiplyOperator::Operation<TA, TB, TR>(left, right / GreatestCommonDivisor(left, right), result)) {
1230
+ throw OutOfRangeException("lcm value is out of range");
1231
+ }
1232
+ return TryAbsOperator::Operation<TR, TR>(result);
1233
+ }
1234
+ };
1235
+
1236
+ void LeastCommonMultipleFun::RegisterFunction(BuiltinFunctions &set) {
1237
+ ScalarFunctionSet funcs("lcm");
1238
+
1239
+ funcs.AddFunction(
1240
+ ScalarFunction({LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT,
1241
+ ScalarFunction::BinaryFunction<int64_t, int64_t, int64_t, LeastCommonMultipleOperator>));
1242
+ funcs.AddFunction(
1243
+ ScalarFunction({LogicalType::HUGEINT, LogicalType::HUGEINT}, LogicalType::HUGEINT,
1244
+ ScalarFunction::BinaryFunction<hugeint_t, hugeint_t, hugeint_t, LeastCommonMultipleOperator>));
1245
+
1246
+ set.AddFunction(funcs);
1247
+ funcs.name = "least_common_multiple";
1248
+ set.AddFunction(funcs);
1249
+ }
1250
+
1164
1251
  } // namespace duckdb
@@ -41,6 +41,9 @@ void BuiltinFunctions::RegisterMathFunctions() {
41
41
  Register<SignBitFun>();
42
42
  Register<IsInfiniteFun>();
43
43
  Register<IsFiniteFun>();
44
+
45
+ Register<GreatestCommonDivisorFun>();
46
+ Register<LeastCommonMultipleFun>();
44
47
  }
45
48
 
46
49
  } // namespace duckdb
@@ -0,0 +1,201 @@
1
+ #include "duckdb/common/exception.hpp"
2
+ #include "duckdb/common/string_util.hpp"
3
+ #include "duckdb/common/types/blob.hpp"
4
+ #include "duckdb/common/vector_operations/unary_executor.hpp"
5
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
6
+ #include "duckdb/function/scalar/string_functions.hpp"
7
+
8
+ namespace duckdb {
9
+
10
+ struct HexStrOperator {
11
+ template <class INPUT_TYPE, class RESULT_TYPE>
12
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
13
+ auto data = input.GetDataUnsafe();
14
+ auto size = input.GetSize();
15
+
16
+ // Allocate empty space
17
+ auto target = StringVector::EmptyString(result, size * 2);
18
+ auto output = target.GetDataWriteable();
19
+
20
+ for (idx_t i = 0; i < size; ++i) {
21
+ *output = Blob::HEX_TABLE[(data[i] >> 4) & 0x0F];
22
+ output++;
23
+ *output = Blob::HEX_TABLE[data[i] & 0x0F];
24
+ output++;
25
+ }
26
+
27
+ target.Finalize();
28
+ return target;
29
+ }
30
+ };
31
+
32
+ struct FromHexOperator {
33
+ template <class INPUT_TYPE, class RESULT_TYPE>
34
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
35
+ auto data = input.GetDataUnsafe();
36
+ auto size = input.GetSize();
37
+
38
+ if (size > NumericLimits<uint32_t>::Maximum()) {
39
+ throw InvalidInputException("Hexadecimal input length larger than 2^32 are not supported");
40
+ }
41
+
42
+ D_ASSERT(size <= NumericLimits<uint32_t>::Maximum());
43
+ auto buffer_size = (size + 1) / 2;
44
+
45
+ // Allocate empty space
46
+ auto target = StringVector::EmptyString(result, buffer_size);
47
+ auto output = target.GetDataWriteable();
48
+
49
+ // Treated as a single byte
50
+ idx_t i = 0;
51
+ if (size % 2 != 0) {
52
+ *output = StringUtil::GetHexValue(data[i]);
53
+ i++;
54
+ output++;
55
+ }
56
+
57
+ for (; i < size; i += 2) {
58
+ uint8_t major = StringUtil::GetHexValue(data[i]);
59
+ uint8_t minor = StringUtil::GetHexValue(data[i + 1]);
60
+ *output = (major << 4) | minor;
61
+ output++;
62
+ }
63
+
64
+ target.Finalize();
65
+ return target;
66
+ }
67
+ };
68
+
69
+ struct HexIntegralOperator {
70
+ template <class INPUT_TYPE, class RESULT_TYPE>
71
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
72
+ // Sufficient space for maximum length
73
+ char buffer[sizeof(INPUT_TYPE) * 2];
74
+ char *ptr = buffer;
75
+ idx_t buffer_size = 0;
76
+
77
+ bool seen_non_zero = false;
78
+ for (idx_t offset = sizeof(INPUT_TYPE) * 8; offset >= 4; offset -= 4) {
79
+ uint8_t byte = (input >> (offset - 4)) & 0x0F;
80
+ if (byte == 0 && !seen_non_zero && offset > 4) {
81
+ continue;
82
+ }
83
+ seen_non_zero = true;
84
+ *ptr = Blob::HEX_TABLE[byte];
85
+ ptr++;
86
+ buffer_size++;
87
+ }
88
+
89
+ // Allocate empty space
90
+ auto target = StringVector::EmptyString(result, buffer_size);
91
+ auto output = target.GetDataWriteable();
92
+ memcpy(output, buffer, buffer_size);
93
+
94
+ target.Finalize();
95
+ return target;
96
+ }
97
+ };
98
+
99
+ struct HexHugeIntOperator {
100
+ template <class INPUT_TYPE, class RESULT_TYPE>
101
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
102
+ char buffer[sizeof(INPUT_TYPE) * 2];
103
+ char *ptr = buffer;
104
+ idx_t buffer_size = 0;
105
+
106
+ uint64_t lower = input.lower;
107
+ int64_t upper = input.upper;
108
+
109
+ bool seen_non_zero = false;
110
+ for (idx_t offset = 64; offset >= 4; offset -= 4) {
111
+ uint8_t byte = (upper >> (offset - 4)) & 0x0F;
112
+
113
+ if (byte == 0 && !seen_non_zero) {
114
+ continue;
115
+ }
116
+ seen_non_zero = true;
117
+ *ptr = Blob::HEX_TABLE[byte];
118
+ ptr++;
119
+ buffer_size++;
120
+ }
121
+
122
+ for (idx_t offset = 64; offset >= 4; offset -= 4) {
123
+ uint8_t byte = (lower >> (offset - 4)) & 0x0F;
124
+
125
+ // at least one byte space
126
+ if (byte == 0 && !seen_non_zero && offset > 4) {
127
+ continue;
128
+ }
129
+ seen_non_zero = true;
130
+ *ptr = Blob::HEX_TABLE[byte];
131
+ ptr++;
132
+ buffer_size++;
133
+ }
134
+
135
+ // Allocate empty space
136
+ auto target = StringVector::EmptyString(result, buffer_size);
137
+ auto output = target.GetDataWriteable();
138
+ memcpy(output, buffer, buffer_size);
139
+
140
+ target.Finalize();
141
+ return target;
142
+ }
143
+ };
144
+
145
+ static void ToHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
146
+ D_ASSERT(args.ColumnCount() == 1);
147
+ auto &input = args.data[0];
148
+ idx_t count = args.size();
149
+
150
+ switch (input.GetType().InternalType()) {
151
+ case PhysicalType::VARCHAR:
152
+ UnaryExecutor::ExecuteString<string_t, string_t, HexStrOperator>(input, result, count);
153
+ break;
154
+ case PhysicalType::INT64:
155
+ UnaryExecutor::ExecuteString<int64_t, string_t, HexIntegralOperator>(input, result, count);
156
+ break;
157
+ case PhysicalType::INT128:
158
+ UnaryExecutor::ExecuteString<hugeint_t, string_t, HexHugeIntOperator>(input, result, count);
159
+ break;
160
+ case PhysicalType::UINT64:
161
+ UnaryExecutor::ExecuteString<uint64_t, string_t, HexIntegralOperator>(input, result, count);
162
+ break;
163
+ default:
164
+ throw NotImplementedException("Specifier type not implemented");
165
+ }
166
+ }
167
+
168
+ static void FromHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
169
+ D_ASSERT(args.ColumnCount() == 1);
170
+ D_ASSERT(args.data[0].GetType().InternalType() == PhysicalType::VARCHAR);
171
+ auto &input = args.data[0];
172
+ idx_t count = args.size();
173
+
174
+ UnaryExecutor::ExecuteString<string_t, string_t, FromHexOperator>(input, result, count);
175
+ }
176
+
177
+ void HexFun::RegisterFunction(BuiltinFunctions &set) {
178
+ ScalarFunctionSet to_hex("to_hex");
179
+ ScalarFunctionSet from_hex("from_hex");
180
+
181
+ to_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToHexFunction));
182
+
183
+ to_hex.AddFunction(ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, ToHexFunction));
184
+
185
+ to_hex.AddFunction(ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR, ToHexFunction));
186
+
187
+ to_hex.AddFunction(ScalarFunction({LogicalType::HUGEINT}, LogicalType::VARCHAR, ToHexFunction));
188
+
189
+ from_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::BLOB, FromHexFunction));
190
+
191
+ set.AddFunction(to_hex);
192
+ set.AddFunction(from_hex);
193
+
194
+ // mysql
195
+ to_hex.name = "hex";
196
+ from_hex.name = "unhex";
197
+ set.AddFunction(to_hex);
198
+ set.AddFunction(from_hex);
199
+ }
200
+
201
+ } // namespace duckdb