duckdb 0.7.2-dev904.0 → 0.7.2-dev982.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  3. package/src/duckdb/src/common/serializer/enum_serializer.cpp +4 -0
  4. package/src/duckdb/src/common/types/value.cpp +46 -0
  5. package/src/duckdb/src/execution/column_binding_resolver.cpp +15 -5
  6. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +40 -19
  7. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -0
  8. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -3
  9. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +5 -13
  10. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
  11. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
  12. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
  13. package/src/duckdb/src/function/scalar/math/numeric.cpp +87 -0
  14. package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
  15. package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
  16. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  17. package/src/duckdb/src/function/table/read_csv.cpp +46 -0
  18. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  19. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +5 -4
  20. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  21. package/src/duckdb/src/include/duckdb/common/string_util.hpp +13 -0
  22. package/src/duckdb/src/include/duckdb/common/types/value.hpp +11 -7
  23. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
  24. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +2 -0
  25. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +6 -0
  26. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
  27. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -0
  28. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
  29. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  30. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  31. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
  32. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
  33. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
  34. package/src/duckdb/src/include/duckdb.h +1 -1
  35. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  36. package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
  37. package/src/duckdb/src/optimizer/filter_pushdown.cpp +3 -1
  38. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +4 -0
  39. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +8 -4
  40. package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
  41. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
  42. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
  43. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -2
  44. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +1 -1
  45. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
  46. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
  47. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -0
  48. package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
  49. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +8 -1
  50. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +10 -3
  51. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +60 -12
  52. package/src/duckdb/src/planner/logical_operator.cpp +3 -0
  53. package/src/duckdb/src/planner/logical_operator_visitor.cpp +1 -0
  54. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
  55. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
  56. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +32 -0
  57. package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
  58. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +915 -913
  59. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
  60. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17371 -17306
  61. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  62. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  63. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev904.0",
5
+ "version": "0.7.2-dev982.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -20,6 +20,8 @@ string LogicalOperatorToString(LogicalOperatorType type) {
20
20
  return "EXPRESSION_GET";
21
21
  case LogicalOperatorType::LOGICAL_ANY_JOIN:
22
22
  return "ANY_JOIN";
23
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
24
+ return "ASOF_JOIN";
23
25
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
24
26
  return "COMPARISON_JOIN";
25
27
  case LogicalOperatorType::LOGICAL_DELIM_JOIN:
@@ -236,6 +236,8 @@ JoinRefType EnumSerializer::StringToEnum(const char *value) {
236
236
  return JoinRefType::CROSS;
237
237
  } else if (StringUtil::Equals(value, "POSITIONAL")) {
238
238
  return JoinRefType::POSITIONAL;
239
+ } else if (StringUtil::Equals(value, "ASOF")) {
240
+ return JoinRefType::ASOF;
239
241
  } else {
240
242
  throw NotImplementedException("EnumSerializer::StringToEnum not implemented for enum value");
241
243
  }
@@ -252,6 +254,8 @@ const char *EnumSerializer::EnumToString(JoinRefType value) {
252
254
  return "CROSS";
253
255
  case JoinRefType::POSITIONAL:
254
256
  return "POSITIONAL";
257
+ case JoinRefType::ASOF:
258
+ return "ASOF";
255
259
  default:
256
260
  throw NotImplementedException("ToString not implemented for enum value");
257
261
  }
@@ -330,6 +330,52 @@ Value Value::MaximumValue(const LogicalType &type) {
330
330
  }
331
331
  }
332
332
 
333
+ Value Value::Infinity(const LogicalType &type) {
334
+ switch (type.id()) {
335
+ case LogicalTypeId::DATE:
336
+ return Value::DATE(date_t::infinity());
337
+ case LogicalTypeId::TIMESTAMP:
338
+ return Value::TIMESTAMP(timestamp_t::infinity());
339
+ case LogicalTypeId::TIMESTAMP_MS:
340
+ return Value::TIMESTAMPMS(timestamp_t::infinity());
341
+ case LogicalTypeId::TIMESTAMP_NS:
342
+ return Value::TIMESTAMPNS(timestamp_t::infinity());
343
+ case LogicalTypeId::TIMESTAMP_SEC:
344
+ return Value::TIMESTAMPSEC(timestamp_t::infinity());
345
+ case LogicalTypeId::TIMESTAMP_TZ:
346
+ return Value::TIMESTAMPTZ(timestamp_t::infinity());
347
+ case LogicalTypeId::FLOAT:
348
+ return Value::FLOAT(std::numeric_limits<float>::infinity());
349
+ case LogicalTypeId::DOUBLE:
350
+ return Value::DOUBLE(std::numeric_limits<double>::infinity());
351
+ default:
352
+ throw InvalidTypeException(type, "Infinity requires numeric type");
353
+ }
354
+ }
355
+
356
+ Value Value::NegativeInfinity(const LogicalType &type) {
357
+ switch (type.id()) {
358
+ case LogicalTypeId::DATE:
359
+ return Value::DATE(date_t::ninfinity());
360
+ case LogicalTypeId::TIMESTAMP:
361
+ return Value::TIMESTAMP(timestamp_t::ninfinity());
362
+ case LogicalTypeId::TIMESTAMP_MS:
363
+ return Value::TIMESTAMPMS(timestamp_t::ninfinity());
364
+ case LogicalTypeId::TIMESTAMP_NS:
365
+ return Value::TIMESTAMPNS(timestamp_t::ninfinity());
366
+ case LogicalTypeId::TIMESTAMP_SEC:
367
+ return Value::TIMESTAMPSEC(timestamp_t::ninfinity());
368
+ case LogicalTypeId::TIMESTAMP_TZ:
369
+ return Value::TIMESTAMPTZ(timestamp_t::ninfinity());
370
+ case LogicalTypeId::FLOAT:
371
+ return Value::FLOAT(-std::numeric_limits<float>::infinity());
372
+ case LogicalTypeId::DOUBLE:
373
+ return Value::DOUBLE(-std::numeric_limits<double>::infinity());
374
+ default:
375
+ throw InvalidTypeException(type, "NegativeInfinity requires numeric type");
376
+ }
377
+ }
378
+
333
379
  Value Value::BOOLEAN(int8_t value) {
334
380
  Value result(LogicalType::BOOLEAN);
335
381
  result.value_.boolean = bool(value);
@@ -18,7 +18,10 @@ ColumnBindingResolver::ColumnBindingResolver() {
18
18
  }
19
19
 
20
20
  void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
21
- if (op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN || op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
21
+ switch (op.type) {
22
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
23
+ case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
24
+ case LogicalOperatorType::LOGICAL_DELIM_JOIN: {
22
25
  // special case: comparison join
23
26
  auto &comp_join = (LogicalComparisonJoin &)op;
24
27
  // first get the bindings of the LHS and resolve the LHS expressions
@@ -41,7 +44,8 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
41
44
  // finally update the bindings with the result bindings of the join
42
45
  bindings = op.GetColumnBindings();
43
46
  return;
44
- } else if (op.type == LogicalOperatorType::LOGICAL_ANY_JOIN) {
47
+ }
48
+ case LogicalOperatorType::LOGICAL_ANY_JOIN: {
45
49
  // ANY join, this join is different because we evaluate the expression on the bindings of BOTH join sides at
46
50
  // once i.e. we set the bindings first to the bindings of the entire join, and then resolve the expressions of
47
51
  // this operator
@@ -54,19 +58,22 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
54
58
  }
55
59
  VisitOperatorExpressions(op);
56
60
  return;
57
- } else if (op.type == LogicalOperatorType::LOGICAL_CREATE_INDEX) {
61
+ }
62
+ case LogicalOperatorType::LOGICAL_CREATE_INDEX: {
58
63
  // CREATE INDEX statement, add the columns of the table with table index 0 to the binding set
59
64
  // afterwards bind the expressions of the CREATE INDEX statement
60
65
  auto &create_index = (LogicalCreateIndex &)op;
61
66
  bindings = LogicalOperator::GenerateColumnBindings(0, create_index.table.GetColumns().LogicalColumnCount());
62
67
  VisitOperatorExpressions(op);
63
68
  return;
64
- } else if (op.type == LogicalOperatorType::LOGICAL_GET) {
69
+ }
70
+ case LogicalOperatorType::LOGICAL_GET: {
65
71
  //! We first need to update the current set of bindings and then visit operator expressions
66
72
  bindings = op.GetColumnBindings();
67
73
  VisitOperatorExpressions(op);
68
74
  return;
69
- } else if (op.type == LogicalOperatorType::LOGICAL_INSERT) {
75
+ }
76
+ case LogicalOperatorType::LOGICAL_INSERT: {
70
77
  //! We want to execute the normal path, but also add a dummy 'excluded' binding if there is a
71
78
  // ON CONFLICT DO UPDATE clause
72
79
  auto &insert_op = (LogicalInsert &)op;
@@ -89,6 +96,9 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
89
96
  return;
90
97
  }
91
98
  }
99
+ default:
100
+ break;
101
+ }
92
102
  // general case
93
103
  // first visit the children of this operator
94
104
  VisitOperatorChildren(op);
@@ -130,8 +130,10 @@ OperatorResultType PhysicalBlockwiseNLJoin::ExecuteInternal(ExecutionContext &co
130
130
  }
131
131
 
132
132
  // now perform the actual join
133
- // we perform a cross product, then execute the expression directly on the cross product' result
133
+ // we perform a cross product, then execute the expression directly on the cross product result
134
134
  idx_t result_count = 0;
135
+ bool found_match[STANDARD_VECTOR_SIZE] = {false};
136
+
135
137
  do {
136
138
  auto result = state.cross_product.Execute(input, *intermediate_chunk);
137
139
  if (result == OperatorResultType::NEED_MORE_INPUT) {
@@ -142,39 +144,58 @@ OperatorResultType PhysicalBlockwiseNLJoin::ExecuteInternal(ExecutionContext &co
142
144
  state.left_outer.ConstructLeftJoinResult(input, *intermediate_chunk);
143
145
  state.left_outer.Reset();
144
146
  }
147
+
148
+ if (join_type == JoinType::SEMI) {
149
+ PhysicalJoin::ConstructSemiJoinResult(input, chunk, found_match);
150
+ }
151
+ if (join_type == JoinType::ANTI) {
152
+ PhysicalJoin::ConstructAntiJoinResult(input, chunk, found_match);
153
+ }
154
+
145
155
  return OperatorResultType::NEED_MORE_INPUT;
146
156
  }
147
157
 
148
158
  // now perform the computation
149
159
  result_count = state.executor.SelectExpression(*intermediate_chunk, state.match_sel);
160
+
161
+ // handle anti and semi joins with different logic
150
162
  if (result_count > 0) {
151
163
  // found a match!
152
- // check if the cross product is scanning the LHS or the RHS in its entirety
153
- if (!state.cross_product.ScanLHS()) {
154
- // set the match flags in the LHS
155
- state.left_outer.SetMatches(state.match_sel, result_count);
156
- // set the match flag in the RHS
157
- gstate.right_outer.SetMatch(state.cross_product.ScanPosition() + state.cross_product.PositionInChunk());
164
+ // handle anti semi join conditions first
165
+ if (join_type == JoinType::ANTI || join_type == JoinType::SEMI) {
166
+ if (state.cross_product.ScanLHS()) {
167
+ found_match[state.cross_product.PositionInChunk()] = true;
168
+ } else {
169
+ for (idx_t i = 0; i < result_count; i++) {
170
+ found_match[state.match_sel.get_index(i)] = true;
171
+ }
172
+ }
173
+ intermediate_chunk->Reset();
174
+ // trick the loop to continue as semi and anti joins will never produce more output than
175
+ // the LHS cardinality
176
+ result_count = 0;
158
177
  } else {
159
- // set the match flag in the LHS
160
- state.left_outer.SetMatch(state.cross_product.PositionInChunk());
161
- // set the match flags in the RHS
162
- gstate.right_outer.SetMatches(state.match_sel, result_count, state.cross_product.ScanPosition());
178
+ // check if the cross product is scanning the LHS or the RHS in its entirety
179
+ if (!state.cross_product.ScanLHS()) {
180
+ // set the match flags in the LHS
181
+ state.left_outer.SetMatches(state.match_sel, result_count);
182
+ // set the match flag in the RHS
183
+ gstate.right_outer.SetMatch(state.cross_product.ScanPosition() +
184
+ state.cross_product.PositionInChunk());
185
+ } else {
186
+ // set the match flag in the LHS
187
+ state.left_outer.SetMatch(state.cross_product.PositionInChunk());
188
+ // set the match flags in the RHS
189
+ gstate.right_outer.SetMatches(state.match_sel, result_count, state.cross_product.ScanPosition());
190
+ }
191
+ intermediate_chunk->Slice(state.match_sel, result_count);
163
192
  }
164
- intermediate_chunk->Slice(state.match_sel, result_count);
165
193
  } else {
166
194
  // no result: reset the chunk
167
195
  intermediate_chunk->Reset();
168
196
  }
169
197
  } while (result_count == 0);
170
198
 
171
- if (join_type == JoinType::SEMI || join_type == JoinType::ANTI) {
172
- for (idx_t col_idx = 0; col_idx < chunk.ColumnCount(); col_idx++) {
173
- chunk.data[col_idx].Reference(intermediate_chunk->data[col_idx]);
174
- }
175
- chunk.SetCardinality(*intermediate_chunk);
176
- }
177
-
178
199
  return OperatorResultType::HAVE_MORE_OUTPUT;
179
200
  }
180
201
 
@@ -334,7 +334,9 @@ idx_t PhysicalRangeJoin::SelectJoinTail(const ExpressionType &condition, Vector
334
334
  case ExpressionType::COMPARE_DISTINCT_FROM:
335
335
  return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, nullptr);
336
336
  case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
337
+ return VectorOperations::NotDistinctFrom(left, right, sel, count, true_sel, nullptr);
337
338
  case ExpressionType::COMPARE_EQUAL:
339
+ return VectorOperations::Equals(left, right, sel, count, true_sel, nullptr);
338
340
  default:
339
341
  throw InternalException("Unsupported comparison type for PhysicalRangeJoin");
340
342
  }
@@ -498,12 +498,12 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, bool try_add_line) {
498
498
  }
499
499
 
500
500
  // figure out the exact line number
501
+ UnifiedVectorFormat inserted_column_data;
502
+ insert_chunk.data[col_idx].ToUnifiedFormat(parse_chunk.size(), inserted_column_data);
501
503
  idx_t row_idx;
502
504
  for (row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
503
- auto &inserted_column = insert_chunk.data[col_idx];
504
505
  auto &parsed_column = parse_chunk.data[col_idx];
505
-
506
- if (FlatVector::IsNull(inserted_column, row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
506
+ if (!inserted_column_data.validity.RowIsValid(row_idx) && !FlatVector::IsNull(parsed_column, row_idx)) {
507
507
  break;
508
508
  }
509
509
  }
@@ -870,16 +870,7 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
870
870
  // #######
871
871
  // ### type detection (initial)
872
872
  // #######
873
- // type candidates, ordered by descending specificity (~ from high to low)
874
- vector<LogicalType> type_candidates = {
875
- LogicalType::VARCHAR,
876
- LogicalType::TIMESTAMP,
877
- LogicalType::DATE,
878
- LogicalType::TIME,
879
- LogicalType::DOUBLE,
880
- /* LogicalType::FLOAT,*/ LogicalType::BIGINT,
881
- /*LogicalType::INTEGER,*/ /*LogicalType::SMALLINT, LogicalType::TINYINT,*/ LogicalType::BOOLEAN,
882
- LogicalType::SQLNULL};
873
+
883
874
  // format template candidates, ordered by descending specificity (~ from high to low)
884
875
  std::map<LogicalTypeId, vector<const char *>> format_template_candidates = {
885
876
  {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
@@ -890,8 +881,8 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
890
881
  vector<vector<LogicalType>> best_sql_types_candidates;
891
882
  map<LogicalTypeId, vector<string>> best_format_candidates;
892
883
  DataChunk best_header_row;
893
- DetectCandidateTypes(type_candidates, format_template_candidates, info_candidates, original_options, best_num_cols,
894
- best_sql_types_candidates, best_format_candidates, best_header_row);
884
+ DetectCandidateTypes(options.auto_type_candidates, format_template_candidates, info_candidates, original_options,
885
+ best_num_cols, best_sql_types_candidates, best_format_candidates, best_header_row);
895
886
 
896
887
  if (best_format_candidates.empty() || best_header_row.size() == 0) {
897
888
  throw InvalidInputException(
@@ -939,7 +930,8 @@ vector<LogicalType> BufferedCSVReader::SniffCSV(const vector<LogicalType> &reque
939
930
  // #######
940
931
  // ### type detection (refining)
941
932
  // #######
942
- return RefineTypeDetection(type_candidates, requested_types, best_sql_types_candidates, best_format_candidates);
933
+ return RefineTypeDetection(options.auto_type_candidates, requested_types, best_sql_types_candidates,
934
+ best_format_candidates);
943
935
  }
944
936
 
945
937
  bool BufferedCSVReader::TryParseComplexCSV(DataChunk &insert_chunk, string &error_message) {
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/execution/operator/projection/physical_projection.hpp"
2
2
  #include "duckdb/parallel/thread_context.hpp"
3
3
  #include "duckdb/execution/expression_executor.hpp"
4
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
@@ -35,6 +36,39 @@ unique_ptr<OperatorState> PhysicalProjection::GetOperatorState(ExecutionContext
35
36
  return make_unique<ProjectionState>(context, select_list);
36
37
  }
37
38
 
39
+ unique_ptr<PhysicalOperator>
40
+ PhysicalProjection::CreateJoinProjection(vector<LogicalType> proj_types, const vector<LogicalType> &lhs_types,
41
+ const vector<LogicalType> &rhs_types, const vector<idx_t> &left_projection_map,
42
+ const vector<idx_t> &right_projection_map, const idx_t estimated_cardinality) {
43
+
44
+ vector<unique_ptr<Expression>> proj_selects;
45
+ proj_selects.reserve(proj_types.size());
46
+
47
+ if (left_projection_map.empty()) {
48
+ for (storage_t i = 0; i < lhs_types.size(); ++i) {
49
+ proj_selects.emplace_back(make_unique<BoundReferenceExpression>(lhs_types[i], i));
50
+ }
51
+ } else {
52
+ for (auto i : left_projection_map) {
53
+ proj_selects.emplace_back(make_unique<BoundReferenceExpression>(lhs_types[i], i));
54
+ }
55
+ }
56
+ const auto left_cols = lhs_types.size();
57
+
58
+ if (right_projection_map.empty()) {
59
+ for (storage_t i = 0; i < rhs_types.size(); ++i) {
60
+ proj_selects.emplace_back(make_unique<BoundReferenceExpression>(rhs_types[i], left_cols + i));
61
+ }
62
+
63
+ } else {
64
+ for (auto i : right_projection_map) {
65
+ proj_selects.emplace_back(make_unique<BoundReferenceExpression>(rhs_types[i], left_cols + i));
66
+ }
67
+ }
68
+
69
+ return make_unique<PhysicalProjection>(std::move(proj_types), std::move(proj_selects), estimated_cardinality);
70
+ }
71
+
38
72
  string PhysicalProjection::ParamsToString() const {
39
73
  string extra_info;
40
74
  for (auto &expr : select_list) {
@@ -0,0 +1,97 @@
1
+ #include "duckdb/execution/operator/aggregate/physical_window.hpp"
2
+ #include "duckdb/execution/operator/join/physical_iejoin.hpp"
3
+ #include "duckdb/execution/operator/projection/physical_projection.hpp"
4
+ #include "duckdb/execution/physical_plan_generator.hpp"
5
+ #include "duckdb/main/client_context.hpp"
6
+ #include "duckdb/planner/expression/bound_constant_expression.hpp"
7
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
8
+ #include "duckdb/planner/expression/bound_window_expression.hpp"
9
+ #include "duckdb/planner/operator/logical_asof_join.hpp"
10
+
11
+ namespace duckdb {
12
+
13
+ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalAsOfJoin &op) {
14
+ // now visit the children
15
+ D_ASSERT(op.children.size() == 2);
16
+ idx_t lhs_cardinality = op.children[0]->EstimateCardinality(context);
17
+ idx_t rhs_cardinality = op.children[1]->EstimateCardinality(context);
18
+ auto left = CreatePlan(*op.children[0]);
19
+ auto right = CreatePlan(*op.children[1]);
20
+ D_ASSERT(left && right);
21
+
22
+ // Validate
23
+ vector<idx_t> equi_indexes;
24
+ auto asof_idx = op.conditions.size();
25
+ for (size_t c = 0; c < op.conditions.size(); ++c) {
26
+ auto &cond = op.conditions[c];
27
+ switch (cond.comparison) {
28
+ case ExpressionType::COMPARE_EQUAL:
29
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
30
+ equi_indexes.emplace_back(c);
31
+ break;
32
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
33
+ D_ASSERT(asof_idx == op.conditions.size());
34
+ asof_idx = c;
35
+ break;
36
+ default:
37
+ throw InternalException("Invalid ASOF JOIN comparison");
38
+ }
39
+ }
40
+ D_ASSERT(asof_idx < op.conditions.size());
41
+
42
+ // Temporary implementation: IEJoin of Window
43
+ // LEAD(asof_column, 1, infinity) OVER (PARTITION BY equi_column... ORDER BY asof_column) AS asof_temp
44
+ auto &asof_comp = op.conditions[asof_idx];
45
+ auto &asof_column = asof_comp.right;
46
+ auto asof_type = asof_column->return_type;
47
+ auto asof_temp = make_unique<BoundWindowExpression>(ExpressionType::WINDOW_LEAD, asof_type, nullptr, nullptr);
48
+ asof_temp->children.emplace_back(asof_column->Copy());
49
+ asof_temp->offset_expr = make_unique<BoundConstantExpression>(Value::BIGINT(1));
50
+ asof_temp->default_expr = make_unique<BoundConstantExpression>(Value::Infinity(asof_type));
51
+ for (auto equi_idx : equi_indexes) {
52
+ asof_temp->partitions.emplace_back(op.conditions[equi_idx].right->Copy());
53
+ }
54
+ asof_temp->orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, asof_column->Copy());
55
+ asof_temp->start = WindowBoundary::UNBOUNDED_PRECEDING;
56
+ asof_temp->end = WindowBoundary::CURRENT_ROW_ROWS;
57
+
58
+ vector<unique_ptr<Expression>> window_select;
59
+ window_select.emplace_back(std::move(asof_temp));
60
+
61
+ auto window_types = right->types;
62
+ window_types.emplace_back(asof_type);
63
+
64
+ auto window = make_unique<PhysicalWindow>(window_types, std::move(window_select), rhs_cardinality);
65
+ window->children.emplace_back(std::move(right));
66
+
67
+ // IEJoin(left, window, conditions || asof_column < asof_temp)
68
+ JoinCondition asof_upper;
69
+ asof_upper.left = asof_comp.left->Copy();
70
+ asof_upper.right = make_unique<BoundReferenceExpression>(asof_type, window_types.size() - 1);
71
+ asof_upper.comparison = ExpressionType::COMPARE_LESSTHAN;
72
+
73
+ // We have an equality condition, so we may have to deal with projection maps.
74
+ // IEJoin does not (currently) support them, so we have to do it manually
75
+ auto proj_types = op.types;
76
+ op.types.clear();
77
+
78
+ auto lhs_types = op.children[0]->types;
79
+ op.types = lhs_types;
80
+
81
+ auto rhs_types = op.children[1]->types;
82
+ op.types.insert(op.types.end(), rhs_types.begin(), rhs_types.end());
83
+
84
+ op.types.emplace_back(asof_type);
85
+ op.conditions.emplace_back(std::move(asof_upper));
86
+ auto iejoin = make_unique<PhysicalIEJoin>(op, std::move(left), std::move(window), std::move(op.conditions),
87
+ op.join_type, op.estimated_cardinality);
88
+
89
+ // Project away asof_temp and anything from the projection maps
90
+ auto proj = PhysicalProjection::CreateJoinProjection(proj_types, lhs_types, rhs_types, op.left_projection_map,
91
+ op.right_projection_map, lhs_cardinality);
92
+ proj->children.push_back(std::move(iejoin));
93
+
94
+ return proj;
95
+ }
96
+
97
+ } // namespace duckdb
@@ -115,6 +115,9 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalOperator &
115
115
  case LogicalOperatorType::LOGICAL_DELIM_JOIN:
116
116
  plan = CreatePlan((LogicalDelimJoin &)op);
117
117
  break;
118
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
119
+ plan = CreatePlan((LogicalAsOfJoin &)op);
120
+ break;
118
121
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
119
122
  plan = CreatePlan((LogicalComparisonJoin &)op);
120
123
  break;
@@ -2,6 +2,7 @@
2
2
  #include "duckdb/common/vector_operations/vector_operations.hpp"
3
3
  #include "duckdb/function/scalar/trigonometric_functions.hpp"
4
4
  #include "duckdb/common/operator/abs.hpp"
5
+ #include "duckdb/common/operator/multiply.hpp"
5
6
  #include "duckdb/common/types/hugeint.hpp"
6
7
  #include "duckdb/common/types/cast_helpers.hpp"
7
8
  #include "duckdb/planner/expression/bound_function_expression.hpp"
@@ -1161,4 +1162,90 @@ void EvenFun::RegisterFunction(BuiltinFunctions &set) {
1161
1162
  ScalarFunction::UnaryFunction<double, double, EvenOperator>));
1162
1163
  }
1163
1164
 
1165
+ //===--------------------------------------------------------------------===//
1166
+ // gcd
1167
+ //===--------------------------------------------------------------------===//
1168
+
1169
+ // should be replaced with std::gcd in a newer C++ standard
1170
+ template <class TA>
1171
+ TA GreatestCommonDivisor(TA left, TA right) {
1172
+ TA a = left;
1173
+ TA b = right;
1174
+
1175
+ // This protects the following modulo operations from a corner case,
1176
+ // where we would get a runtime error due to an integer overflow.
1177
+ if ((left == NumericLimits<TA>::Minimum() && right == -1) ||
1178
+ (left == -1 && right == NumericLimits<TA>::Minimum())) {
1179
+ return 1;
1180
+ }
1181
+
1182
+ while (true) {
1183
+ if (a == 0) {
1184
+ return TryAbsOperator::Operation<TA, TA>(b);
1185
+ }
1186
+ b %= a;
1187
+
1188
+ if (b == 0) {
1189
+ return TryAbsOperator::Operation<TA, TA>(a);
1190
+ }
1191
+ a %= b;
1192
+ }
1193
+ }
1194
+
1195
+ struct GreatestCommonDivisorOperator {
1196
+ template <class TA, class TB, class TR>
1197
+ static inline TR Operation(TA left, TB right) {
1198
+ return GreatestCommonDivisor(left, right);
1199
+ }
1200
+ };
1201
+
1202
+ void GreatestCommonDivisorFun::RegisterFunction(BuiltinFunctions &set) {
1203
+ ScalarFunctionSet funcs("gcd");
1204
+
1205
+ funcs.AddFunction(
1206
+ ScalarFunction({LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT,
1207
+ ScalarFunction::BinaryFunction<int64_t, int64_t, int64_t, GreatestCommonDivisorOperator>));
1208
+ funcs.AddFunction(
1209
+ ScalarFunction({LogicalType::HUGEINT, LogicalType::HUGEINT}, LogicalType::HUGEINT,
1210
+ ScalarFunction::BinaryFunction<hugeint_t, hugeint_t, hugeint_t, GreatestCommonDivisorOperator>));
1211
+
1212
+ set.AddFunction(funcs);
1213
+ funcs.name = "greatest_common_divisor";
1214
+ set.AddFunction(funcs);
1215
+ }
1216
+
1217
+ //===--------------------------------------------------------------------===//
1218
+ // lcm
1219
+ //===--------------------------------------------------------------------===//
1220
+
1221
+ // should be replaced with std::lcm in a newer C++ standard
1222
+ struct LeastCommonMultipleOperator {
1223
+ template <class TA, class TB, class TR>
1224
+ static inline TR Operation(TA left, TB right) {
1225
+ if (left == 0 || right == 0) {
1226
+ return 0;
1227
+ }
1228
+ TR result;
1229
+ if (!TryMultiplyOperator::Operation<TA, TB, TR>(left, right / GreatestCommonDivisor(left, right), result)) {
1230
+ throw OutOfRangeException("lcm value is out of range");
1231
+ }
1232
+ return TryAbsOperator::Operation<TR, TR>(result);
1233
+ }
1234
+ };
1235
+
1236
+ void LeastCommonMultipleFun::RegisterFunction(BuiltinFunctions &set) {
1237
+ ScalarFunctionSet funcs("lcm");
1238
+
1239
+ funcs.AddFunction(
1240
+ ScalarFunction({LogicalType::BIGINT, LogicalType::BIGINT}, LogicalType::BIGINT,
1241
+ ScalarFunction::BinaryFunction<int64_t, int64_t, int64_t, LeastCommonMultipleOperator>));
1242
+ funcs.AddFunction(
1243
+ ScalarFunction({LogicalType::HUGEINT, LogicalType::HUGEINT}, LogicalType::HUGEINT,
1244
+ ScalarFunction::BinaryFunction<hugeint_t, hugeint_t, hugeint_t, LeastCommonMultipleOperator>));
1245
+
1246
+ set.AddFunction(funcs);
1247
+ funcs.name = "least_common_multiple";
1248
+ set.AddFunction(funcs);
1249
+ }
1250
+
1164
1251
  } // namespace duckdb
@@ -41,6 +41,9 @@ void BuiltinFunctions::RegisterMathFunctions() {
41
41
  Register<SignBitFun>();
42
42
  Register<IsInfiniteFun>();
43
43
  Register<IsFiniteFun>();
44
+
45
+ Register<GreatestCommonDivisorFun>();
46
+ Register<LeastCommonMultipleFun>();
44
47
  }
45
48
 
46
49
  } // namespace duckdb