duckdb 0.7.2-dev921.0 → 0.7.2-dev982.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  3. package/src/duckdb/src/common/serializer/enum_serializer.cpp +4 -0
  4. package/src/duckdb/src/common/types/value.cpp +46 -0
  5. package/src/duckdb/src/execution/column_binding_resolver.cpp +15 -5
  6. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -0
  7. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -3
  8. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +5 -13
  9. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
  10. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
  11. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
  12. package/src/duckdb/src/function/scalar/math/numeric.cpp +87 -0
  13. package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
  14. package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
  15. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  16. package/src/duckdb/src/function/table/read_csv.cpp +46 -0
  17. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  18. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +5 -4
  19. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  20. package/src/duckdb/src/include/duckdb/common/string_util.hpp +13 -0
  21. package/src/duckdb/src/include/duckdb/common/types/value.hpp +11 -7
  22. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
  23. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +6 -0
  24. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
  25. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -0
  26. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
  27. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  28. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  29. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
  30. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
  31. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
  32. package/src/duckdb/src/include/duckdb.h +1 -1
  33. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  34. package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
  35. package/src/duckdb/src/optimizer/filter_pushdown.cpp +3 -1
  36. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +4 -0
  37. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +8 -4
  38. package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
  39. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
  40. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
  41. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -2
  42. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +1 -1
  43. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
  44. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
  45. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -0
  46. package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
  47. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +8 -1
  48. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +10 -3
  49. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +60 -12
  50. package/src/duckdb/src/planner/logical_operator.cpp +3 -0
  51. package/src/duckdb/src/planner/logical_operator_visitor.cpp +1 -0
  52. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
  53. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
  54. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +32 -0
  55. package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
  56. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +915 -913
  57. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
  58. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17371 -17306
  59. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  60. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  61. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
@@ -7,6 +7,7 @@
7
7
  #include "duckdb/planner/expression_iterator.hpp"
8
8
  #include "duckdb/planner/binder.hpp"
9
9
  #include "duckdb/planner/operator/logical_any_join.hpp"
10
+ #include "duckdb/planner/operator/logical_asof_join.hpp"
10
11
  #include "duckdb/planner/operator/logical_comparison_join.hpp"
11
12
  #include "duckdb/planner/operator/logical_cross_product.hpp"
12
13
  #include "duckdb/planner/operator/logical_filter.hpp"
@@ -104,12 +105,43 @@ void LogicalComparisonJoin::ExtractJoinConditions(JoinType type, unique_ptr<Logi
104
105
  return ExtractJoinConditions(type, left_child, right_child, expressions, conditions, arbitrary_expressions);
105
106
  }
106
107
 
107
- unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(JoinType type, unique_ptr<LogicalOperator> left_child,
108
+ unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(JoinType type, JoinRefType reftype,
109
+ unique_ptr<LogicalOperator> left_child,
108
110
  unique_ptr<LogicalOperator> right_child,
109
111
  vector<JoinCondition> conditions,
110
112
  vector<unique_ptr<Expression>> arbitrary_expressions) {
113
+ // Validate the conditions
111
114
  bool need_to_consider_arbitrary_expressions = true;
112
- if (type == JoinType::INNER) {
115
+ switch (reftype) {
116
+ case JoinRefType::ASOF: {
117
+ need_to_consider_arbitrary_expressions = false;
118
+ auto asof_idx = conditions.size();
119
+ for (size_t c = 0; c < conditions.size(); ++c) {
120
+ auto &cond = conditions[c];
121
+ switch (cond.comparison) {
122
+ case ExpressionType::COMPARE_EQUAL:
123
+ case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
124
+ break;
125
+ case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
126
+ if (asof_idx < conditions.size()) {
127
+ throw BinderException("Multiple ASOF JOIN inequalities");
128
+ }
129
+ asof_idx = c;
130
+ break;
131
+ default:
132
+ throw BinderException("Invalid ASOF JOIN comparison");
133
+ }
134
+ }
135
+ if (asof_idx == conditions.size()) {
136
+ throw BinderException("Missing ASOF JOIN inequality");
137
+ }
138
+ break;
139
+ }
140
+ default:
141
+ break;
142
+ }
143
+
144
+ if (type == JoinType::INNER && reftype == JoinRefType::REGULAR) {
113
145
  // for inner joins we can push arbitrary expressions as a filter
114
146
  // here we prefer to create a comparison join if possible
115
147
  // that way we can use the much faster hash join to process the main join
@@ -144,7 +176,12 @@ unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(JoinType type, uni
144
176
  } else {
145
177
  // we successfully converted expressions into JoinConditions
146
178
  // create a LogicalComparisonJoin
147
- auto comp_join = make_unique<LogicalComparisonJoin>(type);
179
+ unique_ptr<LogicalComparisonJoin> comp_join;
180
+ if (reftype == JoinRefType::ASOF) {
181
+ comp_join = make_unique<LogicalAsOfJoin>(type);
182
+ } else {
183
+ comp_join = make_unique<LogicalComparisonJoin>(type);
184
+ }
148
185
  comp_join->conditions = std::move(conditions);
149
186
  comp_join->children.push_back(std::move(left_child));
150
187
  comp_join->children.push_back(std::move(right_child));
@@ -179,15 +216,16 @@ static bool HasCorrelatedColumns(Expression &expression) {
179
216
  return has_correlated_columns;
180
217
  }
181
218
 
182
- unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(JoinType type, unique_ptr<LogicalOperator> left_child,
219
+ unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(JoinType type, JoinRefType reftype,
220
+ unique_ptr<LogicalOperator> left_child,
183
221
  unique_ptr<LogicalOperator> right_child,
184
222
  unique_ptr<Expression> condition) {
185
223
  vector<JoinCondition> conditions;
186
224
  vector<unique_ptr<Expression>> arbitrary_expressions;
187
225
  LogicalComparisonJoin::ExtractJoinConditions(type, left_child, right_child, std::move(condition), conditions,
188
226
  arbitrary_expressions);
189
- return LogicalComparisonJoin::CreateJoin(type, std::move(left_child), std::move(right_child), std::move(conditions),
190
- std::move(arbitrary_expressions));
227
+ return LogicalComparisonJoin::CreateJoin(type, reftype, std::move(left_child), std::move(right_child),
228
+ std::move(conditions), std::move(arbitrary_expressions));
191
229
  }
192
230
 
193
231
  unique_ptr<LogicalOperator> Binder::CreatePlan(BoundJoinRef &ref) {
@@ -201,7 +239,8 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundJoinRef &ref) {
201
239
  // we reduce expression depth of all columns in the "ref.correlated_columns" set by 1
202
240
  LateralBinder::ReduceExpressionDepth(*right, ref.correlated_columns);
203
241
  }
204
- if (ref.type == JoinType::RIGHT && ClientConfig::GetConfig(context).enable_optimizer) {
242
+ if (ref.type == JoinType::RIGHT && ref.ref_type != JoinRefType::ASOF &&
243
+ ClientConfig::GetConfig(context).enable_optimizer) {
205
244
  // we turn any right outer joins into left outer joins for optimization purposes
206
245
  // they are the same but with sides flipped, so treating them the same simplifies life
207
246
  ref.type = JoinType::LEFT;
@@ -220,7 +259,8 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundJoinRef &ref) {
220
259
  default:
221
260
  break;
222
261
  }
223
- if (ref.type == JoinType::INNER && (ref.condition->HasSubquery() || HasCorrelatedColumns(*ref.condition))) {
262
+ if (ref.type == JoinType::INNER && (ref.condition->HasSubquery() || HasCorrelatedColumns(*ref.condition)) &&
263
+ ref.ref_type == JoinRefType::REGULAR) {
224
264
  // inner join, generate a cross product + filter
225
265
  // this will be later turned into a proper join by the join order optimizer
226
266
  auto root = LogicalCrossProduct::Create(std::move(left), std::move(right));
@@ -235,8 +275,8 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundJoinRef &ref) {
235
275
  }
236
276
 
237
277
  // now create the join operator from the join condition
238
- auto result =
239
- LogicalComparisonJoin::CreateJoin(ref.type, std::move(left), std::move(right), std::move(ref.condition));
278
+ auto result = LogicalComparisonJoin::CreateJoin(ref.type, ref.ref_type, std::move(left), std::move(right),
279
+ std::move(ref.condition));
240
280
 
241
281
  LogicalOperator *join;
242
282
  if (result->type == LogicalOperatorType::LOGICAL_FILTER) {
@@ -254,7 +294,9 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundJoinRef &ref) {
254
294
  }
255
295
 
256
296
  // we visit the expressions depending on the type of join
257
- if (join->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
297
+ switch (join->type) {
298
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
299
+ case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
258
300
  // comparison join
259
301
  // in this join we visit the expressions on the LHS with the LHS as root node
260
302
  // and the expressions on the RHS with the RHS as root node
@@ -263,12 +305,18 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundJoinRef &ref) {
263
305
  PlanSubqueries(&comp_join.conditions[i].left, &comp_join.children[0]);
264
306
  PlanSubqueries(&comp_join.conditions[i].right, &comp_join.children[1]);
265
307
  }
266
- } else if (join->type == LogicalOperatorType::LOGICAL_ANY_JOIN) {
308
+ break;
309
+ }
310
+ case LogicalOperatorType::LOGICAL_ANY_JOIN: {
267
311
  auto &any_join = (LogicalAnyJoin &)*join;
268
312
  // for the any join we just visit the condition
269
313
  if (any_join.condition->HasSubquery()) {
270
314
  throw NotImplementedException("Cannot perform non-inner join on subquery!");
271
315
  }
316
+ break;
317
+ }
318
+ default:
319
+ break;
272
320
  }
273
321
  return result;
274
322
  }
@@ -257,6 +257,9 @@ unique_ptr<LogicalOperator> LogicalOperator::Deserialize(Deserializer &deseriali
257
257
  case LogicalOperatorType::LOGICAL_DELIM_JOIN:
258
258
  result = LogicalDelimJoin::Deserialize(state, reader);
259
259
  break;
260
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
261
+ result = LogicalAsOfJoin::Deserialize(state, reader);
262
+ break;
260
263
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
261
264
  result = LogicalComparisonJoin::Deserialize(state, reader);
262
265
  break;
@@ -65,6 +65,7 @@ void LogicalOperatorVisitor::EnumerateExpressions(LogicalOperator &op,
65
65
  }
66
66
  break;
67
67
  }
68
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
68
69
  case LogicalOperatorType::LOGICAL_DELIM_JOIN:
69
70
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
70
71
  if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
@@ -0,0 +1,8 @@
1
+ #include "duckdb/planner/operator/logical_asof_join.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ LogicalAsOfJoin::LogicalAsOfJoin(JoinType type) : LogicalComparisonJoin(type, LogicalOperatorType::LOGICAL_ASOF_JOIN) {
6
+ }
7
+
8
+ } // namespace duckdb
@@ -262,6 +262,7 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
262
262
  return std::move(join);
263
263
  }
264
264
  case LogicalOperatorType::LOGICAL_ANY_JOIN:
265
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
265
266
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
266
267
  auto &join = (LogicalJoin &)*plan;
267
268
  D_ASSERT(plan->children.size() == 2);
@@ -334,7 +335,8 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
334
335
  auto right = make_unique<BoundColumnRefExpression>(
335
336
  correlated_columns[i].type, ColumnBinding(right_binding.table_index, right_binding.column_index + i));
336
337
 
337
- if (join.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
338
+ if (join.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
339
+ join.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
338
340
  JoinCondition cond;
339
341
  cond.left = std::move(left);
340
342
  cond.right = std::move(right);
@@ -681,6 +681,38 @@ typedef enum PGJoinType {
681
681
  */
682
682
  } PGJoinType;
683
683
 
684
+ /*
685
+ * PGJoinRefType -
686
+ * enums for the types of implied conditions
687
+ *
688
+ * PGJoinRefType specifies the semantics of interpreting the join conditions.
689
+ * These can be explicit (e.g., REGULAR) implied (e.g., NATURAL)
690
+ * or interpreted in a particular manner (e.g., ASOF)
691
+ *
692
+ * This is a generalisation of the old Postgres isNatural flag.
693
+ */
694
+ typedef enum PGJoinRefType {
695
+ PG_JOIN_REGULAR, /* Join conditions are interpreted as is */
696
+ PG_JOIN_NATURAL, /* Join conditions are inferred from the column names */
697
+
698
+ /*
699
+ * ASOF joins are joins with a single inequality predicate
700
+ * and optional equality predicates.
701
+ * The semantics are equivalent to the following window join:
702
+ * times t
703
+ * <jointype> JOIN (
704
+ * SELECT *,
705
+ * LEAD(begin, 1, 'infinity') OVER ([PARTITION BY key] ORDER BY begin) AS end)
706
+ * FROM events) e
707
+ * ON t.ts >= e.begin AND t.ts < e.end [AND t.key = e.key]
708
+ */
709
+ PG_JOIN_ASOF
710
+
711
+ /*
712
+ * Positional join is a candidate to move here
713
+ */
714
+ } PGJoinRefType;
715
+
684
716
  /*
685
717
  * OUTER joins are those for which pushed-down quals must behave differently
686
718
  * from the join's own quals. This is in fact everything except INNER and
@@ -1324,7 +1324,7 @@ typedef struct PGRangeTblRef {
1324
1324
  /*----------
1325
1325
  * PGJoinExpr - for SQL JOIN expressions
1326
1326
  *
1327
- * isNatural, usingClause, and quals are interdependent. The user can write
1327
+ * joinreftype, usingClause, and quals are interdependent. The user can write
1328
1328
  * only one of NATURAL, USING(), or ON() (this is enforced by the grammar).
1329
1329
  * If he writes NATURAL then parse analysis generates the equivalent USING()
1330
1330
  * list, and from that fills in "quals" with the right equality comparisons.
@@ -1347,7 +1347,7 @@ typedef struct PGRangeTblRef {
1347
1347
  typedef struct PGJoinExpr {
1348
1348
  PGNodeTag type;
1349
1349
  PGJoinType jointype; /* type of join */
1350
- bool isNatural; /* Natural join? Will need to shape table */
1350
+ PGJoinRefType joinreftype; /* Regular/Natural/AsOf join? Will need to shape table */
1351
1351
  PGNode *larg; /* left subtree */
1352
1352
  PGNode *rarg; /* right subtree */
1353
1353
  PGList *usingClause; /* USING clause, if any (list of String) */
@@ -1398,4 +1398,4 @@ typedef struct PGOnConflictExpr {
1398
1398
  PGList *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */
1399
1399
  } PGOnConflictExpr;
1400
1400
 
1401
- }
1401
+ }