duckdb 0.8.2-dev2700.0 → 0.8.2-dev2842.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
- package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
- package/src/duckdb/extension/json/json_deserializer.cpp +10 -10
- package/src/duckdb/extension/json/json_scan.cpp +2 -2
- package/src/duckdb/extension/json/json_serializer.cpp +11 -10
- package/src/duckdb/extension/json/serialize_json.cpp +44 -44
- package/src/duckdb/extension/parquet/parquet_extension.cpp +11 -10
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +6 -6
- package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
- package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/extra_type_info.cpp +2 -2
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +5 -3
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +10 -5
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
- package/src/duckdb/src/common/types/value.cpp +33 -33
- package/src/duckdb/src/common/types/vector.cpp +20 -20
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +6 -6
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +4 -4
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
- package/src/duckdb/src/execution/window_executor.cpp +10 -1
- package/src/duckdb/src/function/table/read_csv.cpp +4 -4
- package/src/duckdb/src/function/table/table_scan.cpp +14 -14
- package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/index_vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +18 -17
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +10 -10
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb.h +11 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
- package/src/duckdb/src/main/pending_query_result.cpp +9 -1
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
- package/src/duckdb/src/main/relation.cpp +4 -4
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
- package/src/duckdb/src/parallel/executor.cpp +6 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +18 -3
- package/src/duckdb/src/parser/tableref/pivotref.cpp +6 -6
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -10
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +6 -6
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -24
- package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +2 -2
- package/src/duckdb/src/planner/operator/logical_get.cpp +26 -22
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +26 -26
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +66 -66
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +78 -78
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +250 -250
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +206 -206
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +116 -116
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +110 -110
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +48 -48
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +54 -54
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +22 -22
- package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
- package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -10,6 +10,7 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/unordered_map.hpp"
|
12
12
|
#include "duckdb/common/unordered_set.hpp"
|
13
|
+
#include "duckdb/optimizer/join_order/query_graph_manager.hpp"
|
13
14
|
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
14
15
|
#include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
|
15
16
|
#include "duckdb/optimizer/join_order/query_graph.hpp"
|
@@ -22,22 +23,13 @@
|
|
22
23
|
|
23
24
|
namespace duckdb {
|
24
25
|
|
25
|
-
struct GenerateJoinRelation {
|
26
|
-
GenerateJoinRelation(JoinRelationSet &set, unique_ptr<LogicalOperator> op_p) : set(set), op(std::move(op_p)) {
|
27
|
-
}
|
28
|
-
|
29
|
-
JoinRelationSet &set;
|
30
|
-
unique_ptr<LogicalOperator> op;
|
31
|
-
};
|
32
|
-
|
33
26
|
class JoinOrderOptimizer {
|
34
27
|
public:
|
35
|
-
explicit JoinOrderOptimizer(ClientContext &context)
|
36
|
-
: context(context), cardinality_estimator(context), full_plan_found(false), must_update_full_plan(false) {
|
28
|
+
explicit JoinOrderOptimizer(ClientContext &context) : context(context), query_graph_manager(context) {
|
37
29
|
}
|
38
30
|
|
39
31
|
//! Perform join reordering inside a plan
|
40
|
-
unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan);
|
32
|
+
unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan, optional_ptr<RelationStats> stats = nullptr);
|
41
33
|
|
42
34
|
unique_ptr<JoinNode> CreateJoinTree(JoinRelationSet &set,
|
43
35
|
const vector<reference<NeighborInfo>> &possible_connections, JoinNode &left,
|
@@ -45,16 +37,10 @@ public:
|
|
45
37
|
|
46
38
|
private:
|
47
39
|
ClientContext &context;
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
//! A mapping of base table index -> index into relations array (relation number)
|
53
|
-
unordered_map<idx_t, idx_t> relation_mapping;
|
54
|
-
//! A structure holding all the created JoinRelationSet objects
|
55
|
-
JoinRelationSetManager set_manager;
|
56
|
-
//! The set of edges used in the join optimizer
|
57
|
-
QueryGraph query_graph;
|
40
|
+
|
41
|
+
//! manages the query graph, relations, and edges between relations
|
42
|
+
QueryGraphManager query_graph_manager;
|
43
|
+
|
58
44
|
//! The optimal join plan found for the specific JoinRelationSet*
|
59
45
|
unordered_map<JoinRelationSet *, unique_ptr<JoinNode>> plans;
|
60
46
|
|
@@ -11,19 +11,8 @@
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
12
|
#include "duckdb/common/unordered_map.hpp"
|
13
13
|
#include "duckdb/common/unordered_set.hpp"
|
14
|
-
#include "duckdb/common/optional_ptr.hpp"
|
15
14
|
|
16
15
|
namespace duckdb {
|
17
|
-
class LogicalOperator;
|
18
|
-
|
19
|
-
//! Represents a single relation and any metadata accompanying that relation
|
20
|
-
struct SingleJoinRelation {
|
21
|
-
LogicalOperator &op;
|
22
|
-
optional_ptr<LogicalOperator> parent;
|
23
|
-
|
24
|
-
SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent) : op(op), parent(parent) {
|
25
|
-
}
|
26
|
-
};
|
27
16
|
|
28
17
|
//! Set of relations, used in the join graph.
|
29
18
|
struct JoinRelationSet {
|
@@ -0,0 +1,89 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/join_order/plan_enumerator.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/unordered_map.hpp"
|
12
|
+
#include "duckdb/common/unordered_set.hpp"
|
13
|
+
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
14
|
+
#include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
|
15
|
+
#include "duckdb/optimizer/join_order/query_graph.hpp"
|
16
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
17
|
+
#include "duckdb/optimizer/join_order/cost_model.hpp"
|
18
|
+
#include "duckdb/parser/expression_map.hpp"
|
19
|
+
#include "duckdb/common/reference_map.hpp"
|
20
|
+
#include "duckdb/planner/logical_operator.hpp"
|
21
|
+
#include "duckdb/planner/logical_operator_visitor.hpp"
|
22
|
+
|
23
|
+
#include <functional>
|
24
|
+
|
25
|
+
namespace duckdb {
|
26
|
+
|
27
|
+
class QueryGraphManager;
|
28
|
+
|
29
|
+
class PlanEnumerator {
|
30
|
+
public:
|
31
|
+
explicit PlanEnumerator(QueryGraphManager &query_graph_manager, CostModel &cost_model,
|
32
|
+
const QueryGraphEdges &query_graph)
|
33
|
+
: query_graph(query_graph), query_graph_manager(query_graph_manager), cost_model(cost_model),
|
34
|
+
full_plan_found(false), must_update_full_plan(false) {
|
35
|
+
}
|
36
|
+
|
37
|
+
//! Perform the join order solving
|
38
|
+
unique_ptr<JoinNode> SolveJoinOrder();
|
39
|
+
void InitLeafPlans();
|
40
|
+
|
41
|
+
static unique_ptr<LogicalOperator> BuildSideProbeSideSwaps(unique_ptr<LogicalOperator> plan);
|
42
|
+
|
43
|
+
private:
|
44
|
+
QueryGraphEdges const &query_graph;
|
45
|
+
//! The total amount of join pairs that have been considered
|
46
|
+
idx_t pairs = 0;
|
47
|
+
//! The set of edges used in the join optimizer
|
48
|
+
QueryGraphManager &query_graph_manager;
|
49
|
+
//! Cost model to evaluate cost of joins
|
50
|
+
CostModel &cost_model;
|
51
|
+
//! A map to store the optimal join plan found for a specific JoinRelationSet*
|
52
|
+
reference_map_t<JoinRelationSet, unique_ptr<JoinNode>> plans;
|
53
|
+
|
54
|
+
bool full_plan_found;
|
55
|
+
bool must_update_full_plan;
|
56
|
+
unordered_set<string> join_nodes_in_full_plan;
|
57
|
+
|
58
|
+
unique_ptr<JoinNode> CreateJoinTree(JoinRelationSet &set,
|
59
|
+
const vector<reference<NeighborInfo>> &possible_connections, JoinNode &left,
|
60
|
+
JoinNode &right);
|
61
|
+
|
62
|
+
//! Emit a pair as a potential join candidate. Returns the best plan found for the (left, right) connection (either
|
63
|
+
//! the newly created plan, or an existing plan)
|
64
|
+
JoinNode &EmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector<reference<NeighborInfo>> &info);
|
65
|
+
//! Tries to emit a potential join candidate pair. Returns false if too many pairs have already been emitted,
|
66
|
+
//! cancelling the dynamic programming step.
|
67
|
+
bool TryEmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector<reference<NeighborInfo>> &info);
|
68
|
+
|
69
|
+
bool EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right, unordered_set<idx_t> &exclusion_set);
|
70
|
+
//! Emit a relation set node
|
71
|
+
bool EmitCSG(JoinRelationSet &node);
|
72
|
+
//! Enumerate the possible connected subgraphs that can be joined together in the join graph
|
73
|
+
bool EnumerateCSGRecursive(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set);
|
74
|
+
//! Generate cross product edges inside the side
|
75
|
+
void GenerateCrossProducts();
|
76
|
+
|
77
|
+
//! Solve the join order exactly using dynamic programming. Returns true if it was completed successfully (i.e. did
|
78
|
+
//! not time-out)
|
79
|
+
bool SolveJoinOrderExactly();
|
80
|
+
//! Solve the join order approximately using a greedy algorithm
|
81
|
+
void SolveJoinOrderApproximately();
|
82
|
+
|
83
|
+
void UpdateDPTree(JoinNode &new_plan);
|
84
|
+
|
85
|
+
void UpdateJoinNodesInFullPlan(JoinNode &node);
|
86
|
+
bool NodeInFullPlan(JoinNode &node);
|
87
|
+
};
|
88
|
+
|
89
|
+
} // namespace duckdb
|
@@ -9,47 +9,32 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/optional_ptr.hpp"
|
13
|
+
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
14
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
15
|
+
#include "duckdb/optimizer/join_order/relation_manager.hpp"
|
12
16
|
#include "duckdb/common/pair.hpp"
|
13
17
|
#include "duckdb/common/unordered_map.hpp"
|
14
18
|
#include "duckdb/common/unordered_set.hpp"
|
15
|
-
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
16
19
|
#include "duckdb/common/vector.hpp"
|
17
20
|
#include "duckdb/planner/column_binding.hpp"
|
18
|
-
#include "duckdb/common/optional_ptr.hpp"
|
19
21
|
|
20
22
|
#include <functional>
|
21
23
|
|
22
24
|
namespace duckdb {
|
23
|
-
class Expression;
|
24
|
-
class LogicalOperator;
|
25
|
-
|
26
|
-
struct FilterInfo {
|
27
|
-
FilterInfo(JoinRelationSet &set, idx_t filter_index) : set(set), filter_index(filter_index) {
|
28
|
-
}
|
29
|
-
|
30
|
-
JoinRelationSet &set;
|
31
|
-
idx_t filter_index;
|
32
|
-
optional_ptr<JoinRelationSet> left_set;
|
33
|
-
optional_ptr<JoinRelationSet> right_set;
|
34
|
-
ColumnBinding left_binding;
|
35
|
-
ColumnBinding right_binding;
|
36
|
-
};
|
37
25
|
|
38
|
-
struct
|
39
|
-
vector<reference<FilterInfo>> filters;
|
40
|
-
unordered_map<idx_t, unique_ptr<FilterNode>> children;
|
41
|
-
};
|
26
|
+
struct FilterInfo;
|
42
27
|
|
43
28
|
struct NeighborInfo {
|
44
|
-
NeighborInfo(JoinRelationSet
|
29
|
+
NeighborInfo(optional_ptr<JoinRelationSet> neighbor) : neighbor(neighbor) {
|
45
30
|
}
|
46
31
|
|
47
|
-
JoinRelationSet
|
48
|
-
vector<
|
32
|
+
optional_ptr<JoinRelationSet> neighbor;
|
33
|
+
vector<optional_ptr<FilterInfo>> filters;
|
49
34
|
};
|
50
35
|
|
51
36
|
//! The QueryGraph contains edges between relations and allows edges to be created/queried
|
52
|
-
class
|
37
|
+
class QueryGraphEdges {
|
53
38
|
public:
|
54
39
|
//! Contains a node with info about neighboring relations and child edge infos
|
55
40
|
struct QueryEdge {
|
@@ -61,22 +46,23 @@ public:
|
|
61
46
|
string ToString() const;
|
62
47
|
void Print();
|
63
48
|
|
64
|
-
//! Create an edge in the edge_set
|
65
|
-
void CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> info);
|
66
49
|
//! Returns a connection if there is an edge that connects these two sets, or nullptr otherwise
|
67
|
-
vector<reference<NeighborInfo>> GetConnections(JoinRelationSet &node, JoinRelationSet &other);
|
50
|
+
const vector<reference<NeighborInfo>> GetConnections(JoinRelationSet &node, JoinRelationSet &other) const;
|
68
51
|
//! Enumerate the neighbors of a specific node that do not belong to any of the exclusion_set. Note that if a
|
69
52
|
//! neighbor has multiple nodes, this function will return the lowest entry in that set.
|
70
|
-
vector<idx_t> GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set);
|
53
|
+
const vector<idx_t> GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) const;
|
54
|
+
|
71
55
|
//! Enumerate all neighbors of a given JoinRelationSet node
|
72
|
-
void EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback);
|
56
|
+
void EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback) const;
|
57
|
+
//! Create an edge in the edge_set
|
58
|
+
void CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> info);
|
73
59
|
|
74
60
|
private:
|
75
61
|
//! Get the QueryEdge of a specific node
|
76
|
-
QueryEdge
|
62
|
+
optional_ptr<QueryEdge> GetQueryEdge(JoinRelationSet &left);
|
77
63
|
|
78
64
|
void EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
|
79
|
-
const std::function<bool(NeighborInfo &)> &callback);
|
65
|
+
const std::function<bool(NeighborInfo &)> &callback) const;
|
80
66
|
|
81
67
|
QueryEdge root;
|
82
68
|
};
|
@@ -0,0 +1,113 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/join_order/query_graph_manager.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/optional_ptr.hpp"
|
13
|
+
#include "duckdb/common/pair.hpp"
|
14
|
+
#include "duckdb/common/unordered_map.hpp"
|
15
|
+
#include "duckdb/common/unordered_set.hpp"
|
16
|
+
#include "duckdb/common/vector.hpp"
|
17
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
18
|
+
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
19
|
+
#include "duckdb/optimizer/join_order/query_graph.hpp"
|
20
|
+
#include "duckdb/optimizer/join_order/relation_manager.hpp"
|
21
|
+
#include "duckdb/planner/column_binding.hpp"
|
22
|
+
#include "duckdb/planner/logical_operator.hpp"
|
23
|
+
|
24
|
+
#include <functional>
|
25
|
+
|
26
|
+
namespace duckdb {
|
27
|
+
|
28
|
+
struct GenerateJoinRelation {
|
29
|
+
GenerateJoinRelation(optional_ptr<JoinRelationSet> set, unique_ptr<LogicalOperator> op_p)
|
30
|
+
: set(set), op(std::move(op_p)) {
|
31
|
+
}
|
32
|
+
|
33
|
+
optional_ptr<JoinRelationSet> set;
|
34
|
+
unique_ptr<LogicalOperator> op;
|
35
|
+
};
|
36
|
+
|
37
|
+
//! Filter info struct that is used by the cardinality estimator to set the initial cardinality
|
38
|
+
//! but is also eventually transformed into a query edge.
|
39
|
+
struct FilterInfo {
|
40
|
+
FilterInfo(unique_ptr<Expression> filter, JoinRelationSet &set, idx_t filter_index)
|
41
|
+
: filter(std::move(filter)), set(set), filter_index(filter_index) {
|
42
|
+
}
|
43
|
+
|
44
|
+
unique_ptr<Expression> filter;
|
45
|
+
JoinRelationSet &set;
|
46
|
+
idx_t filter_index;
|
47
|
+
optional_ptr<JoinRelationSet> left_set;
|
48
|
+
optional_ptr<JoinRelationSet> right_set;
|
49
|
+
ColumnBinding left_binding;
|
50
|
+
ColumnBinding right_binding;
|
51
|
+
};
|
52
|
+
|
53
|
+
//! The QueryGraphManager manages the process of extracting the reorderable and nonreorderable operations
|
54
|
+
//! from the logical plan and creating the intermediate structures needed by the plan enumerator.
|
55
|
+
//! When the plan enumerator finishes, the Query Graph Manger can then recreate the logical plan.
|
56
|
+
class QueryGraphManager {
|
57
|
+
public:
|
58
|
+
QueryGraphManager(ClientContext &context) : relation_manager(context), context(context) {
|
59
|
+
}
|
60
|
+
|
61
|
+
//! manage relations and the logical operators they represent
|
62
|
+
RelationManager relation_manager;
|
63
|
+
|
64
|
+
//! A structure holding all the created JoinRelationSet objects
|
65
|
+
JoinRelationSetManager set_manager;
|
66
|
+
|
67
|
+
ClientContext &context;
|
68
|
+
|
69
|
+
//! Extract the join relations, optimizing non-reoderable relations when encountered
|
70
|
+
bool Build(LogicalOperator &op);
|
71
|
+
|
72
|
+
//! Reconstruct the logical plan using the plan found by the plan enumerator
|
73
|
+
unique_ptr<LogicalOperator> Reconstruct(unique_ptr<LogicalOperator> plan, JoinNode &node);
|
74
|
+
|
75
|
+
//! Get a reference to the QueryGraphEdges structure that stores edges between
|
76
|
+
//! nodes and hypernodes.
|
77
|
+
const QueryGraphEdges &GetQueryGraphEdges() const;
|
78
|
+
|
79
|
+
//! Get a list of the join filters in the join plan than eventually are
|
80
|
+
//! transformed into the query graph edges
|
81
|
+
const vector<unique_ptr<FilterInfo>> &GetFilterBindings() const;
|
82
|
+
|
83
|
+
//! Plan enumerator may not find a full plan and therefore will need to create cross
|
84
|
+
//! products to create edges.
|
85
|
+
void CreateQueryGraphCrossProduct(JoinRelationSet &left, JoinRelationSet &right);
|
86
|
+
|
87
|
+
//! after join order optimization, we perform build side probe side optimizations.
|
88
|
+
//! (Basically we put lower expected cardinality columns on the build side, and larger
|
89
|
+
//! tables on the probe side)
|
90
|
+
unique_ptr<LogicalOperator> LeftRightOptimizations(unique_ptr<LogicalOperator> op);
|
91
|
+
|
92
|
+
private:
|
93
|
+
vector<reference<LogicalOperator>> filter_operators;
|
94
|
+
|
95
|
+
//! Filter information including the column_bindings that join filters
|
96
|
+
//! used by the cardinality estimator to estimate distinct counts
|
97
|
+
vector<unique_ptr<FilterInfo>> filters_and_bindings;
|
98
|
+
|
99
|
+
QueryGraphEdges query_graph;
|
100
|
+
|
101
|
+
void GetColumnBinding(Expression &expression, ColumnBinding &binding);
|
102
|
+
|
103
|
+
bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
|
104
|
+
bool LeftCardLessThanRight(LogicalOperator &op);
|
105
|
+
|
106
|
+
void CreateHyperGraphEdges();
|
107
|
+
|
108
|
+
GenerateJoinRelation GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode &node);
|
109
|
+
|
110
|
+
unique_ptr<LogicalOperator> RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode &node);
|
111
|
+
};
|
112
|
+
|
113
|
+
} // namespace duckdb
|
@@ -0,0 +1,73 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/join_order/relation_manager.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/unordered_map.hpp"
|
12
|
+
#include "duckdb/common/unordered_set.hpp"
|
13
|
+
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
14
|
+
#include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
|
15
|
+
#include "duckdb/optimizer/join_order/relation_statistics_helper.hpp"
|
16
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
17
|
+
#include "duckdb/parser/expression_map.hpp"
|
18
|
+
#include "duckdb/planner/logical_operator.hpp"
|
19
|
+
#include "duckdb/planner/logical_operator_visitor.hpp"
|
20
|
+
|
21
|
+
namespace duckdb {
|
22
|
+
|
23
|
+
struct FilterInfo;
|
24
|
+
|
25
|
+
//! Represents a single relation and any metadata accompanying that relation
|
26
|
+
struct SingleJoinRelation {
|
27
|
+
LogicalOperator &op;
|
28
|
+
optional_ptr<LogicalOperator> parent;
|
29
|
+
RelationStats stats;
|
30
|
+
|
31
|
+
SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent) : op(op), parent(parent) {
|
32
|
+
}
|
33
|
+
SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, RelationStats stats)
|
34
|
+
: op(op), parent(parent), stats(stats) {
|
35
|
+
}
|
36
|
+
};
|
37
|
+
|
38
|
+
class RelationManager {
|
39
|
+
public:
|
40
|
+
explicit RelationManager(ClientContext &context) : context(context) {
|
41
|
+
}
|
42
|
+
|
43
|
+
idx_t NumRelations();
|
44
|
+
|
45
|
+
bool ExtractJoinRelations(LogicalOperator &input_op, vector<reference<LogicalOperator>> &filter_operators,
|
46
|
+
optional_ptr<LogicalOperator> parent = nullptr);
|
47
|
+
|
48
|
+
//! for each join filter in the logical plan op, extract the relations that are referred to on
|
49
|
+
//! both sides of the join filter, along with the tables & indexes.
|
50
|
+
vector<unique_ptr<FilterInfo>> ExtractEdges(LogicalOperator &op,
|
51
|
+
vector<reference<LogicalOperator>> &filter_operators,
|
52
|
+
JoinRelationSetManager &set_manager);
|
53
|
+
|
54
|
+
//! Extract the set of relations referred to inside an expression
|
55
|
+
bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
|
56
|
+
void AddRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
|
57
|
+
|
58
|
+
void AddAggregateRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
|
59
|
+
vector<unique_ptr<SingleJoinRelation>> GetRelations();
|
60
|
+
|
61
|
+
const vector<RelationStats> GetRelationStats();
|
62
|
+
//! A mapping of base table index -> index into relations array (relation number)
|
63
|
+
unordered_map<idx_t, idx_t> relation_mapping;
|
64
|
+
|
65
|
+
void PrintRelationStats();
|
66
|
+
|
67
|
+
private:
|
68
|
+
ClientContext &context;
|
69
|
+
//! Set of all relations considered in the join optimizer
|
70
|
+
vector<unique_ptr<SingleJoinRelation>> relations;
|
71
|
+
};
|
72
|
+
|
73
|
+
} // namespace duckdb
|
@@ -0,0 +1,73 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/join_order/statistics_extractor.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include "duckdb/planner/filter/conjunction_filter.hpp"
|
11
|
+
#include "duckdb/planner/logical_operator.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
class CardinalityEstimator;
|
16
|
+
|
17
|
+
struct DistinctCount {
|
18
|
+
idx_t distinct_count;
|
19
|
+
bool from_hll;
|
20
|
+
};
|
21
|
+
|
22
|
+
struct ExpressionBinding {
|
23
|
+
bool found_expression = false;
|
24
|
+
ColumnBinding child_binding;
|
25
|
+
bool expression_is_constant = false;
|
26
|
+
};
|
27
|
+
|
28
|
+
struct RelationStats {
|
29
|
+
// column_id -> estimated distinct count for column
|
30
|
+
vector<DistinctCount> column_distinct_count;
|
31
|
+
idx_t cardinality;
|
32
|
+
double filter_strength = 1;
|
33
|
+
bool stats_initialized = false;
|
34
|
+
|
35
|
+
// for debug, column names and tables
|
36
|
+
vector<string> column_names;
|
37
|
+
string table_name;
|
38
|
+
|
39
|
+
RelationStats() : cardinality(1), filter_strength(1), stats_initialized(false) {
|
40
|
+
}
|
41
|
+
};
|
42
|
+
|
43
|
+
class RelationStatisticsHelper {
|
44
|
+
public:
|
45
|
+
static constexpr double DEFAULT_SELECTIVITY = 0.2;
|
46
|
+
|
47
|
+
public:
|
48
|
+
static idx_t InspectConjunctionAND(idx_t cardinality, idx_t column_index, ConjunctionAndFilter &filter,
|
49
|
+
BaseStatistics &base_stats);
|
50
|
+
// static idx_t InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &filter,
|
51
|
+
// BaseStatistics &base_stats);
|
52
|
+
//! Extract Statistics from a LogicalGet.
|
53
|
+
static RelationStats ExtractGetStats(LogicalGet &get, ClientContext &context);
|
54
|
+
static RelationStats ExtractDelimGetStats(LogicalDelimGet &delim_get, ClientContext &context);
|
55
|
+
//! Create the statistics for a projection using the statistics of the operator that sits underneath the
|
56
|
+
//! projection. Then also create statistics for any extra columns the projection creates.
|
57
|
+
static RelationStats ExtractDummyScanStats(LogicalDummyScan &dummy_scan, ClientContext &context);
|
58
|
+
static RelationStats ExtractExpressionGetStats(LogicalExpressionGet &expression_get, ClientContext &context);
|
59
|
+
//! All relation extractors for blocking relations
|
60
|
+
static RelationStats ExtractProjectionStats(LogicalProjection &proj, RelationStats &child_stats);
|
61
|
+
static RelationStats ExtractAggregationStats(LogicalAggregate &aggr, RelationStats &child_stats);
|
62
|
+
static RelationStats ExtractWindowStats(LogicalWindow &window, RelationStats &child_stats);
|
63
|
+
//! Called after reordering a query plan with potentially 2+ relations.
|
64
|
+
static RelationStats CombineStatsOfReorderableOperator(vector<ColumnBinding> &bindings,
|
65
|
+
vector<RelationStats> relation_stats);
|
66
|
+
//! Called after reordering a query plan with potentially 2+ relations.
|
67
|
+
static RelationStats CombineStatsOfNonReorderableOperator(LogicalOperator &op, vector<RelationStats> child_stats);
|
68
|
+
static void CopyRelationStats(RelationStats &to, const RelationStats &from);
|
69
|
+
|
70
|
+
private:
|
71
|
+
};
|
72
|
+
|
73
|
+
} // namespace duckdb
|
@@ -39,7 +39,7 @@ class TaskScheduler {
|
|
39
39
|
constexpr static int64_t TASK_TIMEOUT_USECS = 5000;
|
40
40
|
|
41
41
|
public:
|
42
|
-
TaskScheduler(DatabaseInstance &db);
|
42
|
+
explicit TaskScheduler(DatabaseInstance &db);
|
43
43
|
~TaskScheduler();
|
44
44
|
|
45
45
|
DUCKDB_API static TaskScheduler &GetScheduler(ClientContext &context);
|
@@ -67,6 +67,9 @@ public:
|
|
67
67
|
//! Send signals to n threads, signalling for them to wake up and attempt to execute a task
|
68
68
|
void Signal(idx_t n);
|
69
69
|
|
70
|
+
//! Yield to other threads
|
71
|
+
void YieldThread();
|
72
|
+
|
70
73
|
//! Set the allocator flush threshold
|
71
74
|
void SetAllocatorFlushTreshold(idx_t threshold);
|
72
75
|
|
@@ -22,6 +22,17 @@ public:
|
|
22
22
|
vector<unique_ptr<ParsedExpression>> group_expressions;
|
23
23
|
//! The different grouping sets as they map to the group expressions
|
24
24
|
vector<GroupingSet> grouping_sets;
|
25
|
+
|
26
|
+
public:
|
27
|
+
GroupByNode Copy() {
|
28
|
+
GroupByNode node;
|
29
|
+
node.group_expressions.reserve(group_expressions.size());
|
30
|
+
for (auto &expr : group_expressions) {
|
31
|
+
node.group_expressions.push_back(expr->Copy());
|
32
|
+
}
|
33
|
+
node.grouping_sets = grouping_sets;
|
34
|
+
return node;
|
35
|
+
}
|
25
36
|
};
|
26
37
|
|
27
38
|
} // namespace duckdb
|
@@ -22,6 +22,8 @@ struct PGList;
|
|
22
22
|
|
23
23
|
namespace duckdb {
|
24
24
|
|
25
|
+
class GroupByNode;
|
26
|
+
|
25
27
|
//! The parser is responsible for parsing the query and converting it into a set
|
26
28
|
//! of parsed statements. The parsed statements can then be converted into a
|
27
29
|
//! plan and executed.
|
@@ -50,6 +52,8 @@ public:
|
|
50
52
|
//! Parses a list of expressions (i.e. the list found in a SELECT clause)
|
51
53
|
DUCKDB_API static vector<unique_ptr<ParsedExpression>> ParseExpressionList(const string &select_list,
|
52
54
|
ParserOptions options = ParserOptions());
|
55
|
+
//! Parses a list of GROUP BY expressions
|
56
|
+
static GroupByNode ParseGroupByList(const string &group_by, ParserOptions options = ParserOptions());
|
53
57
|
//! Parses a list as found in an ORDER BY expression (i.e. including optional ASCENDING/DESCENDING modifiers)
|
54
58
|
static vector<OrderByNode> ParseOrderList(const string &select_list, ParserOptions options = ParserOptions());
|
55
59
|
//! Parses an update list (i.e. the list found in the SET clause of an UPDATE statement)
|
@@ -49,8 +49,6 @@ public:
|
|
49
49
|
idx_t estimated_cardinality;
|
50
50
|
bool has_estimated_cardinality;
|
51
51
|
|
52
|
-
unique_ptr<EstimatedProperties> estimated_props;
|
53
|
-
|
54
52
|
public:
|
55
53
|
virtual vector<ColumnBinding> GetColumnBindings();
|
56
54
|
static vector<ColumnBinding> GenerateColumnBindings(idx_t table_idx, idx_t column_count);
|
@@ -308,7 +308,8 @@ typedef enum { DuckDBSuccess = 0, DuckDBError = 1 } duckdb_state;
|
|
308
308
|
typedef enum {
|
309
309
|
DUCKDB_PENDING_RESULT_READY = 0,
|
310
310
|
DUCKDB_PENDING_RESULT_NOT_READY = 1,
|
311
|
-
DUCKDB_PENDING_ERROR = 2
|
311
|
+
DUCKDB_PENDING_ERROR = 2,
|
312
|
+
DUCKDB_PENDING_NO_TASKS_AVAILABLE = 3
|
312
313
|
} duckdb_pending_state;
|
313
314
|
|
314
315
|
//===--------------------------------------------------------------------===//
|
@@ -1260,6 +1261,15 @@ Otherwise, all remaining tasks must be executed first.
|
|
1260
1261
|
*/
|
1261
1262
|
DUCKDB_API duckdb_state duckdb_execute_pending(duckdb_pending_result pending_result, duckdb_result *out_result);
|
1262
1263
|
|
1264
|
+
/*!
|
1265
|
+
Returns whether a duckdb_pending_state is finished executing. For example if `pending_state` is
|
1266
|
+
DUCKDB_PENDING_RESULT_READY, this function will return true.
|
1267
|
+
|
1268
|
+
* pending_state: The pending state on which to decide whether to finish execution.
|
1269
|
+
* returns: Boolean indicating pending execution should be considered finished.
|
1270
|
+
*/
|
1271
|
+
DUCKDB_API bool duckdb_pending_execution_is_finished(duckdb_pending_state pending_state);
|
1272
|
+
|
1263
1273
|
//===--------------------------------------------------------------------===//
|
1264
1274
|
// Value Interface
|
1265
1275
|
//===--------------------------------------------------------------------===//
|
@@ -92,6 +92,8 @@ duckdb_pending_state duckdb_pending_execute_task(duckdb_pending_result pending_r
|
|
92
92
|
switch (return_value) {
|
93
93
|
case PendingExecutionResult::RESULT_READY:
|
94
94
|
return DUCKDB_PENDING_RESULT_READY;
|
95
|
+
case PendingExecutionResult::NO_TASKS_AVAILABLE:
|
96
|
+
return DUCKDB_PENDING_NO_TASKS_AVAILABLE;
|
95
97
|
case PendingExecutionResult::RESULT_NOT_READY:
|
96
98
|
return DUCKDB_PENDING_RESULT_NOT_READY;
|
97
99
|
default:
|
@@ -99,6 +101,21 @@ duckdb_pending_state duckdb_pending_execute_task(duckdb_pending_result pending_r
|
|
99
101
|
}
|
100
102
|
}
|
101
103
|
|
104
|
+
bool duckdb_pending_execution_is_finished(duckdb_pending_state pending_state) {
|
105
|
+
switch (pending_state) {
|
106
|
+
case DUCKDB_PENDING_RESULT_READY:
|
107
|
+
return PendingQueryResult::IsFinished(PendingExecutionResult::RESULT_READY);
|
108
|
+
case DUCKDB_PENDING_NO_TASKS_AVAILABLE:
|
109
|
+
return PendingQueryResult::IsFinished(PendingExecutionResult::NO_TASKS_AVAILABLE);
|
110
|
+
case DUCKDB_PENDING_RESULT_NOT_READY:
|
111
|
+
return PendingQueryResult::IsFinished(PendingExecutionResult::RESULT_NOT_READY);
|
112
|
+
case DUCKDB_PENDING_ERROR:
|
113
|
+
return PendingQueryResult::IsFinished(PendingExecutionResult::EXECUTION_ERROR);
|
114
|
+
default:
|
115
|
+
return PendingQueryResult::IsFinished(PendingExecutionResult::EXECUTION_ERROR);
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
102
119
|
duckdb_state duckdb_execute_pending(duckdb_pending_result pending_result, duckdb_result *out_result) {
|
103
120
|
if (!pending_result || !out_result) {
|
104
121
|
return DuckDBError;
|
@@ -55,7 +55,8 @@ PendingExecutionResult PendingQueryResult::ExecuteTaskInternal(ClientContextLock
|
|
55
55
|
|
56
56
|
unique_ptr<QueryResult> PendingQueryResult::ExecuteInternal(ClientContextLock &lock) {
|
57
57
|
CheckExecutableInternal(lock);
|
58
|
-
while
|
58
|
+
// Busy wait while execution is not finished
|
59
|
+
while (!IsFinished(ExecuteTaskInternal(lock))) {
|
59
60
|
}
|
60
61
|
if (HasError()) {
|
61
62
|
return make_uniq<MaterializedQueryResult>(error);
|
@@ -74,4 +75,11 @@ void PendingQueryResult::Close() {
|
|
74
75
|
context.reset();
|
75
76
|
}
|
76
77
|
|
78
|
+
bool PendingQueryResult::IsFinished(PendingExecutionResult result) {
|
79
|
+
if (result == PendingExecutionResult::RESULT_READY || result == PendingExecutionResult::EXECUTION_ERROR) {
|
80
|
+
return true;
|
81
|
+
}
|
82
|
+
return false;
|
83
|
+
}
|
84
|
+
|
77
85
|
} // namespace duckdb
|