duckdb 0.8.2-dev2700.0 → 0.8.2-dev2809.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
  3. package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
  4. package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
  5. package/src/duckdb/src/common/enum_util.cpp +5 -0
  6. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  7. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
  8. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  9. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  10. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  11. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
  12. package/src/duckdb/src/execution/window_executor.cpp +10 -1
  13. package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
  14. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
  15. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  16. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  17. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
  18. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  19. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
  20. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  21. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  22. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
  23. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
  24. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  25. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
  26. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  27. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  28. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  29. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
  30. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
  31. package/src/duckdb/src/include/duckdb.h +11 -1
  32. package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
  33. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  34. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  35. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  36. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  37. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
  38. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  39. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
  40. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  41. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  42. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  43. package/src/duckdb/src/parallel/executor.cpp +6 -0
  44. package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
  45. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  46. package/src/duckdb/src/planner/operator/logical_get.cpp +4 -0
  47. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -34,6 +34,8 @@ public:
34
34
  //! If this returns RESULT_READY, the Execute function can be called to obtain a pointer to the result.
35
35
  //! If this returns RESULT_NOT_READY, the ExecuteTask function should be called again.
36
36
  //! If this returns EXECUTION_ERROR, an error occurred during execution.
37
+ //! If this returns NO_TASKS_AVAILABLE, this means currently no meaningful work can be done by the current executor,
38
+ //! but tasks may become available in the future.
37
39
  //! The error message can be obtained by calling GetError() on the PendingQueryResult.
38
40
  DUCKDB_API PendingExecutionResult ExecuteTask();
39
41
 
@@ -43,6 +45,9 @@ public:
43
45
 
44
46
  DUCKDB_API void Close();
45
47
 
48
+ //! Function to determine whether execution is considered finished
49
+ DUCKDB_API static bool IsFinished(PendingExecutionResult result);
50
+
46
51
  private:
47
52
  shared_ptr<ClientContext> context;
48
53
  bool allow_stream_result;
@@ -7,21 +7,14 @@
7
7
  //===----------------------------------------------------------------------===//
8
8
  #pragma once
9
9
 
10
- #include "duckdb/optimizer/join_order/join_node.hpp"
11
- #include "duckdb/planner/column_binding.hpp"
12
10
  #include "duckdb/planner/column_binding_map.hpp"
13
- #include "duckdb/planner/filter/conjunction_filter.hpp"
14
- #include "duckdb/planner/filter/constant_filter.hpp"
11
+ #include "duckdb/optimizer/join_order/query_graph.hpp"
12
+
13
+ #include "duckdb/optimizer/join_order/relation_statistics_helper.hpp"
15
14
 
16
15
  namespace duckdb {
17
16
 
18
- struct RelationAttributes {
19
- string original_name;
20
- // the relation columns used in join filters
21
- // Needed when iterating over columns and initializing total domain values.
22
- unordered_set<idx_t> columns;
23
- double cardinality;
24
- };
17
+ struct FilterInfo;
25
18
 
26
19
  struct RelationsToTDom {
27
20
  //! column binding sets that are equivalent in a join plan.
@@ -33,19 +26,13 @@ struct RelationsToTDom {
33
26
  idx_t tdom_no_hll;
34
27
  bool has_tdom_hll;
35
28
  vector<FilterInfo *> filters;
29
+ vector<string> column_names;
36
30
 
37
31
  RelationsToTDom(const column_binding_set_t &column_binding_set)
38
32
  : equivalent_relations(column_binding_set), tdom_hll(0), tdom_no_hll(NumericLimits<idx_t>::Maximum()),
39
33
  has_tdom_hll(false) {};
40
34
  };
41
35
 
42
- struct NodeOp {
43
- unique_ptr<JoinNode> node;
44
- LogicalOperator &op;
45
-
46
- NodeOp(unique_ptr<JoinNode> node, LogicalOperator &op) : node(std::move(node)), op(op) {};
47
- };
48
-
49
36
  struct Subgraph2Denominator {
50
37
  unordered_set<idx_t> relations;
51
38
  double denom;
@@ -53,69 +40,56 @@ struct Subgraph2Denominator {
53
40
  Subgraph2Denominator() : relations(), denom(1) {};
54
41
  };
55
42
 
56
- class CardinalityEstimator {
43
+ class CardinalityHelper {
57
44
  public:
58
- explicit CardinalityEstimator(ClientContext &context) : context(context) {
45
+ CardinalityHelper() {
59
46
  }
47
+ CardinalityHelper(double cardinality_before_filters, double filter_string)
48
+ : cardinality_before_filters(cardinality_before_filters), filter_strength(filter_string) {};
60
49
 
61
- private:
62
- ClientContext &context;
50
+ public:
51
+ double cardinality_before_filters;
52
+ double filter_strength;
53
+
54
+ vector<string> table_names_joined;
55
+ vector<string> column_names;
56
+ };
63
57
 
64
- //! A mapping of relation id -> RelationAttributes
65
- unordered_map<idx_t, RelationAttributes> relation_attributes;
66
- //! A mapping of (relation, bound_column) -> (actual table, actual column)
67
- column_binding_map_t<ColumnBinding> relation_column_to_original_column;
58
+ class CardinalityEstimator {
59
+ public:
60
+ explicit CardinalityEstimator() {};
68
61
 
62
+ private:
69
63
  vector<RelationsToTDom> relations_to_tdoms;
64
+ unordered_map<string, CardinalityHelper> relation_set_2_cardinality;
65
+ JoinRelationSetManager set_manager;
66
+ vector<RelationStats> relation_stats;
70
67
 
71
68
  public:
72
- static constexpr double DEFAULT_SELECTIVITY = 0.2;
73
-
74
- static void VerifySymmetry(JoinNode &result, JoinNode &entry);
75
-
76
- //! given a binding of (relation, column) used for DP, and a (table, column) in that catalog
77
- //! Add the key value entry into the relation_column_to_original_column
78
- void AddRelationToColumnMapping(ColumnBinding key, ColumnBinding value);
79
- //! Add a column to the relation_to_columns map.
80
- void AddColumnToRelationMap(idx_t table_index, idx_t column_index);
81
- //! Dump all bindings in relation_column_to_original_column into the child_binding_map
82
- // If you have a non-reorderable join, this function is used to keep track of bindings
83
- // in the child join plan.
84
- void CopyRelationMap(column_binding_map_t<ColumnBinding> &child_binding_map);
85
- void MergeBindings(idx_t, idx_t relation_id, vector<column_binding_map_t<ColumnBinding>> &child_binding_maps);
86
- void AddRelationColumnMapping(LogicalGet &get, idx_t relation_id);
87
-
88
- void InitTotalDomains();
89
- void UpdateTotalDomains(JoinNode &node, LogicalOperator &op);
90
- void InitEquivalentRelations(vector<unique_ptr<FilterInfo>> &filter_infos);
91
-
92
- void InitCardinalityEstimatorProps(vector<NodeOp> &node_ops, vector<unique_ptr<FilterInfo>> &filter_infos);
93
- double EstimateCardinalityWithSet(JoinRelationSet &new_set);
94
- void EstimateBaseTableCardinality(JoinNode &node, LogicalOperator &op);
95
- double EstimateCrossProduct(const JoinNode &left, const JoinNode &right);
96
- static double ComputeCost(JoinNode &left, JoinNode &right, double expected_cardinality);
69
+ void RemoveEmptyTotalDomains();
70
+ void UpdateTotalDomains(optional_ptr<JoinRelationSet> set, RelationStats &stats);
71
+ void InitEquivalentRelations(const vector<unique_ptr<FilterInfo>> &filter_infos);
72
+
73
+ void InitCardinalityEstimatorProps(optional_ptr<JoinRelationSet> set, RelationStats &stats);
74
+
75
+ //! cost model needs estimated cardinalities to the fraction since the formula captures
76
+ //! distinct count selectivities and multiplicities. Hence the template
77
+ template <class T>
78
+ T EstimateCardinalityWithSet(JoinRelationSet &new_set);
79
+
80
+ //! used for debugging.
81
+ void AddRelationNamesToTdoms(vector<RelationStats> &stats);
82
+ void PrintRelationToTdomInfo();
97
83
 
98
84
  private:
99
85
  bool SingleColumnFilter(FilterInfo &filter_info);
100
- //! Filter & bindings -> list of indexes into the equivalent_relations array.
101
- // The column binding set at each index is an equivalence set.
102
86
  vector<idx_t> DetermineMatchingEquivalentSets(FilterInfo *filter_info);
103
-
104
87
  //! Given a filter, add the column bindings to the matching equivalent set at the index
105
88
  //! given in matching equivalent sets.
106
89
  //! If there are multiple equivalence sets, they are merged.
107
90
  void AddToEquivalenceSets(FilterInfo *filter_info, vector<idx_t> matching_equivalent_sets);
108
-
109
- optional_ptr<TableFilterSet> GetTableFilters(LogicalOperator &op, idx_t table_index);
110
-
111
91
  void AddRelationTdom(FilterInfo &filter_info);
112
92
  bool EmptyFilter(FilterInfo &filter_info);
113
-
114
- idx_t InspectConjunctionAND(idx_t cardinality, idx_t column_index, ConjunctionAndFilter &fil,
115
- unique_ptr<BaseStatistics> base_stats);
116
- idx_t InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &fil,
117
- unique_ptr<BaseStatistics> base_stats);
118
- idx_t InspectTableFilters(idx_t cardinality, LogicalOperator &op, TableFilterSet &table_filters, idx_t table_index);
119
93
  };
120
94
 
121
95
  } // namespace duckdb
@@ -0,0 +1,37 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/join_order/cost_model.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+ #pragma once
9
+
10
+ #include "duckdb/optimizer/join_order/join_node.hpp"
11
+ #include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ class QueryGraphManager;
16
+
17
+ class CostModel {
18
+ public:
19
+ CostModel(QueryGraphManager &query_graph_manager);
20
+
21
+ private:
22
+ //! query graph storing relation manager information
23
+ QueryGraphManager &query_graph_manager;
24
+
25
+ public:
26
+ void InitCostModel();
27
+
28
+ //! Compute cost of a join relation set
29
+ double ComputeCost(JoinNode &left, JoinNode &right);
30
+
31
+ //! Cardinality Estimator used to calculate cost
32
+ CardinalityEstimator cardinality_estimator;
33
+
34
+ private:
35
+ };
36
+
37
+ } // namespace duckdb
@@ -5,62 +5,47 @@
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
8
-
9
8
  #pragma once
10
9
 
11
- #include "duckdb/common/unordered_map.hpp"
12
- #include "duckdb/common/unordered_set.hpp"
13
- #include "duckdb/optimizer/join_order/estimated_properties.hpp"
14
10
  #include "duckdb/optimizer/join_order/join_relation.hpp"
15
11
  #include "duckdb/optimizer/join_order/query_graph.hpp"
16
- #include "duckdb/parser/expression_map.hpp"
17
- #include "duckdb/planner/logical_operator_visitor.hpp"
18
- #include "duckdb/planner/table_filter.hpp"
19
- #include "duckdb/storage/statistics/distinct_statistics.hpp"
20
12
 
21
13
  namespace duckdb {
22
14
 
23
- class JoinOrderOptimizer;
15
+ struct NeighborInfo;
24
16
 
25
17
  class JoinNode {
26
18
  public:
27
19
  //! Represents a node in the join plan
28
20
  JoinRelationSet &set;
21
+ //! information on how left and right are connected
29
22
  optional_ptr<NeighborInfo> info;
30
- //! If the JoinNode is a base table, then base_cardinality is the cardinality before filters
31
- //! estimated_props.cardinality will be the cardinality after filters. With no filters, the two are equal
32
- bool has_filter;
23
+ //! left and right plans
33
24
  optional_ptr<JoinNode> left;
34
25
  optional_ptr<JoinNode> right;
35
26
 
36
- unique_ptr<EstimatedProperties> estimated_props;
27
+ //! The cost of the join node. The cost is stored here so that the cost of
28
+ //! a join node stays in sync with how the join node is constructed. Storing the cost in an unordered_set
29
+ //! in the cost model is error prone. If the plan enumerator join node is updated and not the cost model
30
+ //! the whole Join Order Optimizer can start exhibiting undesired behavior.
31
+ double cost;
32
+ //! used only to populate logical operators with estimated caridnalities after the best join plan has been found.
33
+ idx_t cardinality;
34
+
35
+ //! Create an intermediate node in the join tree. base_cardinality = estimated_props.cardinality
36
+ JoinNode(JoinRelationSet &set, optional_ptr<NeighborInfo> info, JoinNode &left, JoinNode &right, double cost);
37
37
 
38
38
  //! Create a leaf node in the join tree
39
39
  //! set cost to 0 for leaf nodes
40
40
  //! cost will be the cost to *produce* an intermediate table
41
- JoinNode(JoinRelationSet &set, const double base_cardinality);
42
-
43
- //! Create an intermediate node in the join tree. base_cardinality = estimated_props.cardinality
44
- JoinNode(JoinRelationSet &set, optional_ptr<NeighborInfo> info, JoinNode &left, JoinNode &right,
45
- const double base_cardinality, double cost);
41
+ JoinNode(JoinRelationSet &set);
46
42
 
47
43
  bool operator==(const JoinNode &other) {
48
44
  return other.set.ToString().compare(set.ToString()) == 0;
49
45
  }
50
46
 
51
47
  private:
52
- double base_cardinality;
53
-
54
48
  public:
55
- template <class CARDINALITY_TYPE>
56
- CARDINALITY_TYPE GetCardinality() const {
57
- return estimated_props->GetCardinality<CARDINALITY_TYPE>();
58
- }
59
- double GetCost();
60
- void SetCost(double cost);
61
- double GetBaseTableCardinality();
62
- void SetBaseTableCardinality(double base_card);
63
- void SetEstimatedCardinality(double estimated_card);
64
49
  void PrintJoinNode();
65
50
  string ToString();
66
51
  };
@@ -10,6 +10,7 @@
10
10
 
11
11
  #include "duckdb/common/unordered_map.hpp"
12
12
  #include "duckdb/common/unordered_set.hpp"
13
+ #include "duckdb/optimizer/join_order/query_graph_manager.hpp"
13
14
  #include "duckdb/optimizer/join_order/join_relation.hpp"
14
15
  #include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
15
16
  #include "duckdb/optimizer/join_order/query_graph.hpp"
@@ -22,22 +23,13 @@
22
23
 
23
24
  namespace duckdb {
24
25
 
25
- struct GenerateJoinRelation {
26
- GenerateJoinRelation(JoinRelationSet &set, unique_ptr<LogicalOperator> op_p) : set(set), op(std::move(op_p)) {
27
- }
28
-
29
- JoinRelationSet &set;
30
- unique_ptr<LogicalOperator> op;
31
- };
32
-
33
26
  class JoinOrderOptimizer {
34
27
  public:
35
- explicit JoinOrderOptimizer(ClientContext &context)
36
- : context(context), cardinality_estimator(context), full_plan_found(false), must_update_full_plan(false) {
28
+ explicit JoinOrderOptimizer(ClientContext &context) : context(context), query_graph_manager(context) {
37
29
  }
38
30
 
39
31
  //! Perform join reordering inside a plan
40
- unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan);
32
+ unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan, optional_ptr<RelationStats> stats = nullptr);
41
33
 
42
34
  unique_ptr<JoinNode> CreateJoinTree(JoinRelationSet &set,
43
35
  const vector<reference<NeighborInfo>> &possible_connections, JoinNode &left,
@@ -45,16 +37,10 @@ public:
45
37
 
46
38
  private:
47
39
  ClientContext &context;
48
- //! The total amount of join pairs that have been considered
49
- idx_t pairs = 0;
50
- //! Set of all relations considered in the join optimizer
51
- vector<unique_ptr<SingleJoinRelation>> relations;
52
- //! A mapping of base table index -> index into relations array (relation number)
53
- unordered_map<idx_t, idx_t> relation_mapping;
54
- //! A structure holding all the created JoinRelationSet objects
55
- JoinRelationSetManager set_manager;
56
- //! The set of edges used in the join optimizer
57
- QueryGraph query_graph;
40
+
41
+ //! manages the query graph, relations, and edges between relations
42
+ QueryGraphManager query_graph_manager;
43
+
58
44
  //! The optimal join plan found for the specific JoinRelationSet*
59
45
  unordered_map<JoinRelationSet *, unique_ptr<JoinNode>> plans;
60
46
 
@@ -11,19 +11,8 @@
11
11
  #include "duckdb/common/common.hpp"
12
12
  #include "duckdb/common/unordered_map.hpp"
13
13
  #include "duckdb/common/unordered_set.hpp"
14
- #include "duckdb/common/optional_ptr.hpp"
15
14
 
16
15
  namespace duckdb {
17
- class LogicalOperator;
18
-
19
- //! Represents a single relation and any metadata accompanying that relation
20
- struct SingleJoinRelation {
21
- LogicalOperator &op;
22
- optional_ptr<LogicalOperator> parent;
23
-
24
- SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent) : op(op), parent(parent) {
25
- }
26
- };
27
16
 
28
17
  //! Set of relations, used in the join graph.
29
18
  struct JoinRelationSet {
@@ -0,0 +1,89 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/join_order/plan_enumerator.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/unordered_map.hpp"
12
+ #include "duckdb/common/unordered_set.hpp"
13
+ #include "duckdb/optimizer/join_order/join_relation.hpp"
14
+ #include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
15
+ #include "duckdb/optimizer/join_order/query_graph.hpp"
16
+ #include "duckdb/optimizer/join_order/join_node.hpp"
17
+ #include "duckdb/optimizer/join_order/cost_model.hpp"
18
+ #include "duckdb/parser/expression_map.hpp"
19
+ #include "duckdb/common/reference_map.hpp"
20
+ #include "duckdb/planner/logical_operator.hpp"
21
+ #include "duckdb/planner/logical_operator_visitor.hpp"
22
+
23
+ #include <functional>
24
+
25
+ namespace duckdb {
26
+
27
+ class QueryGraphManager;
28
+
29
+ class PlanEnumerator {
30
+ public:
31
+ explicit PlanEnumerator(QueryGraphManager &query_graph_manager, CostModel &cost_model,
32
+ const QueryGraphEdges &query_graph)
33
+ : query_graph(query_graph), query_graph_manager(query_graph_manager), cost_model(cost_model),
34
+ full_plan_found(false), must_update_full_plan(false) {
35
+ }
36
+
37
+ //! Perform the join order solving
38
+ unique_ptr<JoinNode> SolveJoinOrder();
39
+ void InitLeafPlans();
40
+
41
+ static unique_ptr<LogicalOperator> BuildSideProbeSideSwaps(unique_ptr<LogicalOperator> plan);
42
+
43
+ private:
44
+ QueryGraphEdges const &query_graph;
45
+ //! The total amount of join pairs that have been considered
46
+ idx_t pairs = 0;
47
+ //! The set of edges used in the join optimizer
48
+ QueryGraphManager &query_graph_manager;
49
+ //! Cost model to evaluate cost of joins
50
+ CostModel &cost_model;
51
+ //! A map to store the optimal join plan found for a specific JoinRelationSet*
52
+ reference_map_t<JoinRelationSet, unique_ptr<JoinNode>> plans;
53
+
54
+ bool full_plan_found;
55
+ bool must_update_full_plan;
56
+ unordered_set<string> join_nodes_in_full_plan;
57
+
58
+ unique_ptr<JoinNode> CreateJoinTree(JoinRelationSet &set,
59
+ const vector<reference<NeighborInfo>> &possible_connections, JoinNode &left,
60
+ JoinNode &right);
61
+
62
+ //! Emit a pair as a potential join candidate. Returns the best plan found for the (left, right) connection (either
63
+ //! the newly created plan, or an existing plan)
64
+ JoinNode &EmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector<reference<NeighborInfo>> &info);
65
+ //! Tries to emit a potential join candidate pair. Returns false if too many pairs have already been emitted,
66
+ //! cancelling the dynamic programming step.
67
+ bool TryEmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector<reference<NeighborInfo>> &info);
68
+
69
+ bool EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right, unordered_set<idx_t> &exclusion_set);
70
+ //! Emit a relation set node
71
+ bool EmitCSG(JoinRelationSet &node);
72
+ //! Enumerate the possible connected subgraphs that can be joined together in the join graph
73
+ bool EnumerateCSGRecursive(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set);
74
+ //! Generate cross product edges inside the side
75
+ void GenerateCrossProducts();
76
+
77
+ //! Solve the join order exactly using dynamic programming. Returns true if it was completed successfully (i.e. did
78
+ //! not time-out)
79
+ bool SolveJoinOrderExactly();
80
+ //! Solve the join order approximately using a greedy algorithm
81
+ void SolveJoinOrderApproximately();
82
+
83
+ void UpdateDPTree(JoinNode &new_plan);
84
+
85
+ void UpdateJoinNodesInFullPlan(JoinNode &node);
86
+ bool NodeInFullPlan(JoinNode &node);
87
+ };
88
+
89
+ } // namespace duckdb
@@ -9,47 +9,32 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/optional_ptr.hpp"
13
+ #include "duckdb/optimizer/join_order/join_relation.hpp"
14
+ #include "duckdb/optimizer/join_order/join_node.hpp"
15
+ #include "duckdb/optimizer/join_order/relation_manager.hpp"
12
16
  #include "duckdb/common/pair.hpp"
13
17
  #include "duckdb/common/unordered_map.hpp"
14
18
  #include "duckdb/common/unordered_set.hpp"
15
- #include "duckdb/optimizer/join_order/join_relation.hpp"
16
19
  #include "duckdb/common/vector.hpp"
17
20
  #include "duckdb/planner/column_binding.hpp"
18
- #include "duckdb/common/optional_ptr.hpp"
19
21
 
20
22
  #include <functional>
21
23
 
22
24
  namespace duckdb {
23
- class Expression;
24
- class LogicalOperator;
25
-
26
- struct FilterInfo {
27
- FilterInfo(JoinRelationSet &set, idx_t filter_index) : set(set), filter_index(filter_index) {
28
- }
29
-
30
- JoinRelationSet &set;
31
- idx_t filter_index;
32
- optional_ptr<JoinRelationSet> left_set;
33
- optional_ptr<JoinRelationSet> right_set;
34
- ColumnBinding left_binding;
35
- ColumnBinding right_binding;
36
- };
37
25
 
38
- struct FilterNode {
39
- vector<reference<FilterInfo>> filters;
40
- unordered_map<idx_t, unique_ptr<FilterNode>> children;
41
- };
26
+ struct FilterInfo;
42
27
 
43
28
  struct NeighborInfo {
44
- NeighborInfo(JoinRelationSet &neighbor) : neighbor(neighbor) {
29
+ NeighborInfo(optional_ptr<JoinRelationSet> neighbor) : neighbor(neighbor) {
45
30
  }
46
31
 
47
- JoinRelationSet &neighbor;
48
- vector<reference<FilterInfo>> filters;
32
+ optional_ptr<JoinRelationSet> neighbor;
33
+ vector<optional_ptr<FilterInfo>> filters;
49
34
  };
50
35
 
51
36
  //! The QueryGraph contains edges between relations and allows edges to be created/queried
52
- class QueryGraph {
37
+ class QueryGraphEdges {
53
38
  public:
54
39
  //! Contains a node with info about neighboring relations and child edge infos
55
40
  struct QueryEdge {
@@ -61,22 +46,23 @@ public:
61
46
  string ToString() const;
62
47
  void Print();
63
48
 
64
- //! Create an edge in the edge_set
65
- void CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> info);
66
49
  //! Returns a connection if there is an edge that connects these two sets, or nullptr otherwise
67
- vector<reference<NeighborInfo>> GetConnections(JoinRelationSet &node, JoinRelationSet &other);
50
+ const vector<reference<NeighborInfo>> GetConnections(JoinRelationSet &node, JoinRelationSet &other) const;
68
51
  //! Enumerate the neighbors of a specific node that do not belong to any of the exclusion_set. Note that if a
69
52
  //! neighbor has multiple nodes, this function will return the lowest entry in that set.
70
- vector<idx_t> GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set);
53
+ const vector<idx_t> GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) const;
54
+
71
55
  //! Enumerate all neighbors of a given JoinRelationSet node
72
- void EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback);
56
+ void EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback) const;
57
+ //! Create an edge in the edge_set
58
+ void CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> info);
73
59
 
74
60
  private:
75
61
  //! Get the QueryEdge of a specific node
76
- QueryEdge &GetQueryEdge(JoinRelationSet &left);
62
+ optional_ptr<QueryEdge> GetQueryEdge(JoinRelationSet &left);
77
63
 
78
64
  void EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
79
- const std::function<bool(NeighborInfo &)> &callback);
65
+ const std::function<bool(NeighborInfo &)> &callback) const;
80
66
 
81
67
  QueryEdge root;
82
68
  };
@@ -0,0 +1,113 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/join_order/query_graph_manager.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/optional_ptr.hpp"
13
+ #include "duckdb/common/pair.hpp"
14
+ #include "duckdb/common/unordered_map.hpp"
15
+ #include "duckdb/common/unordered_set.hpp"
16
+ #include "duckdb/common/vector.hpp"
17
+ #include "duckdb/optimizer/join_order/join_node.hpp"
18
+ #include "duckdb/optimizer/join_order/join_relation.hpp"
19
+ #include "duckdb/optimizer/join_order/query_graph.hpp"
20
+ #include "duckdb/optimizer/join_order/relation_manager.hpp"
21
+ #include "duckdb/planner/column_binding.hpp"
22
+ #include "duckdb/planner/logical_operator.hpp"
23
+
24
+ #include <functional>
25
+
26
+ namespace duckdb {
27
+
28
+ struct GenerateJoinRelation {
29
+ GenerateJoinRelation(optional_ptr<JoinRelationSet> set, unique_ptr<LogicalOperator> op_p)
30
+ : set(set), op(std::move(op_p)) {
31
+ }
32
+
33
+ optional_ptr<JoinRelationSet> set;
34
+ unique_ptr<LogicalOperator> op;
35
+ };
36
+
37
+ //! Filter info struct that is used by the cardinality estimator to set the initial cardinality
38
+ //! but is also eventually transformed into a query edge.
39
+ struct FilterInfo {
40
+ FilterInfo(unique_ptr<Expression> filter, JoinRelationSet &set, idx_t filter_index)
41
+ : filter(std::move(filter)), set(set), filter_index(filter_index) {
42
+ }
43
+
44
+ unique_ptr<Expression> filter;
45
+ JoinRelationSet &set;
46
+ idx_t filter_index;
47
+ optional_ptr<JoinRelationSet> left_set;
48
+ optional_ptr<JoinRelationSet> right_set;
49
+ ColumnBinding left_binding;
50
+ ColumnBinding right_binding;
51
+ };
52
+
53
+ //! The QueryGraphManager manages the process of extracting the reorderable and nonreorderable operations
54
+ //! from the logical plan and creating the intermediate structures needed by the plan enumerator.
55
+ //! When the plan enumerator finishes, the Query Graph Manger can then recreate the logical plan.
56
+ class QueryGraphManager {
57
+ public:
58
+ QueryGraphManager(ClientContext &context) : relation_manager(context), context(context) {
59
+ }
60
+
61
+ //! manage relations and the logical operators they represent
62
+ RelationManager relation_manager;
63
+
64
+ //! A structure holding all the created JoinRelationSet objects
65
+ JoinRelationSetManager set_manager;
66
+
67
+ ClientContext &context;
68
+
69
+ //! Extract the join relations, optimizing non-reoderable relations when encountered
70
+ bool Build(LogicalOperator &op);
71
+
72
+ //! Reconstruct the logical plan using the plan found by the plan enumerator
73
+ unique_ptr<LogicalOperator> Reconstruct(unique_ptr<LogicalOperator> plan, JoinNode &node);
74
+
75
+ //! Get a reference to the QueryGraphEdges structure that stores edges between
76
+ //! nodes and hypernodes.
77
+ const QueryGraphEdges &GetQueryGraphEdges() const;
78
+
79
+ //! Get a list of the join filters in the join plan than eventually are
80
+ //! transformed into the query graph edges
81
+ const vector<unique_ptr<FilterInfo>> &GetFilterBindings() const;
82
+
83
+ //! Plan enumerator may not find a full plan and therefore will need to create cross
84
+ //! products to create edges.
85
+ void CreateQueryGraphCrossProduct(JoinRelationSet &left, JoinRelationSet &right);
86
+
87
+ //! after join order optimization, we perform build side probe side optimizations.
88
+ //! (Basically we put lower expected cardinality columns on the build side, and larger
89
+ //! tables on the probe side)
90
+ unique_ptr<LogicalOperator> LeftRightOptimizations(unique_ptr<LogicalOperator> op);
91
+
92
+ private:
93
+ vector<reference<LogicalOperator>> filter_operators;
94
+
95
+ //! Filter information including the column_bindings that join filters
96
+ //! used by the cardinality estimator to estimate distinct counts
97
+ vector<unique_ptr<FilterInfo>> filters_and_bindings;
98
+
99
+ QueryGraphEdges query_graph;
100
+
101
+ void GetColumnBinding(Expression &expression, ColumnBinding &binding);
102
+
103
+ bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
104
+ bool LeftCardLessThanRight(LogicalOperator &op);
105
+
106
+ void CreateHyperGraphEdges();
107
+
108
+ GenerateJoinRelation GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode &node);
109
+
110
+ unique_ptr<LogicalOperator> RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode &node);
111
+ };
112
+
113
+ } // namespace duckdb