duckdb 0.8.2-dev2673.0 → 0.8.2-dev2809.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +6 -6
- package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
- package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/field_writer.cpp +1 -0
- package/src/duckdb/src/common/local_file_system.cpp +5 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
- package/src/duckdb/src/execution/index/art/art.cpp +6 -9
- package/src/duckdb/src/execution/index/art/leaf.cpp +4 -4
- package/src/duckdb/src/execution/index/art/node.cpp +9 -12
- package/src/duckdb/src/execution/index/art/node16.cpp +4 -4
- package/src/duckdb/src/execution/index/art/node256.cpp +4 -4
- package/src/duckdb/src/execution/index/art/node4.cpp +4 -5
- package/src/duckdb/src/execution/index/art/node48.cpp +4 -4
- package/src/duckdb/src/execution/index/art/prefix.cpp +4 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
- package/src/duckdb/src/execution/window_executor.cpp +10 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +6 -4
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/block.hpp +27 -4
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +9 -9
- package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +5 -5
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +19 -16
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +88 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +54 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +45 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +6 -5
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +3 -4
- package/src/duckdb/src/include/duckdb.h +11 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
- package/src/duckdb/src/main/pending_query_result.cpp +9 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
- package/src/duckdb/src/parallel/executor.cpp +6 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +4 -0
- package/src/duckdb/src/storage/buffer/block_manager.cpp +10 -9
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -7
- package/src/duckdb/src/storage/checkpoint_manager.cpp +49 -43
- package/src/duckdb/src/storage/index.cpp +1 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +267 -0
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +80 -0
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +86 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +47 -52
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +4 -3
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
- package/src/duckdb/src/storage/table/persistent_table_data.cpp +1 -2
- package/src/duckdb/src/storage/table/row_group.cpp +9 -10
- package/src/duckdb/src/storage/table/row_group_collection.cpp +6 -3
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/storage/wal_replay.cpp +3 -2
- package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
- package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
- package/src/duckdb/ub_src_storage.cpp +0 -4
- package/src/duckdb/ub_src_storage_metadata.cpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -46
- package/src/duckdb/src/include/duckdb/storage/meta_block_writer.hpp +0 -50
- package/src/duckdb/src/storage/meta_block_reader.cpp +0 -69
- package/src/duckdb/src/storage/meta_block_writer.cpp +0 -80
@@ -9,47 +9,32 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/optional_ptr.hpp"
|
13
|
+
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
14
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
15
|
+
#include "duckdb/optimizer/join_order/relation_manager.hpp"
|
12
16
|
#include "duckdb/common/pair.hpp"
|
13
17
|
#include "duckdb/common/unordered_map.hpp"
|
14
18
|
#include "duckdb/common/unordered_set.hpp"
|
15
|
-
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
16
19
|
#include "duckdb/common/vector.hpp"
|
17
20
|
#include "duckdb/planner/column_binding.hpp"
|
18
|
-
#include "duckdb/common/optional_ptr.hpp"
|
19
21
|
|
20
22
|
#include <functional>
|
21
23
|
|
22
24
|
namespace duckdb {
|
23
|
-
class Expression;
|
24
|
-
class LogicalOperator;
|
25
|
-
|
26
|
-
struct FilterInfo {
|
27
|
-
FilterInfo(JoinRelationSet &set, idx_t filter_index) : set(set), filter_index(filter_index) {
|
28
|
-
}
|
29
|
-
|
30
|
-
JoinRelationSet &set;
|
31
|
-
idx_t filter_index;
|
32
|
-
optional_ptr<JoinRelationSet> left_set;
|
33
|
-
optional_ptr<JoinRelationSet> right_set;
|
34
|
-
ColumnBinding left_binding;
|
35
|
-
ColumnBinding right_binding;
|
36
|
-
};
|
37
25
|
|
38
|
-
struct
|
39
|
-
vector<reference<FilterInfo>> filters;
|
40
|
-
unordered_map<idx_t, unique_ptr<FilterNode>> children;
|
41
|
-
};
|
26
|
+
struct FilterInfo;
|
42
27
|
|
43
28
|
struct NeighborInfo {
|
44
|
-
NeighborInfo(JoinRelationSet
|
29
|
+
NeighborInfo(optional_ptr<JoinRelationSet> neighbor) : neighbor(neighbor) {
|
45
30
|
}
|
46
31
|
|
47
|
-
JoinRelationSet
|
48
|
-
vector<
|
32
|
+
optional_ptr<JoinRelationSet> neighbor;
|
33
|
+
vector<optional_ptr<FilterInfo>> filters;
|
49
34
|
};
|
50
35
|
|
51
36
|
//! The QueryGraph contains edges between relations and allows edges to be created/queried
|
52
|
-
class
|
37
|
+
class QueryGraphEdges {
|
53
38
|
public:
|
54
39
|
//! Contains a node with info about neighboring relations and child edge infos
|
55
40
|
struct QueryEdge {
|
@@ -61,22 +46,23 @@ public:
|
|
61
46
|
string ToString() const;
|
62
47
|
void Print();
|
63
48
|
|
64
|
-
//! Create an edge in the edge_set
|
65
|
-
void CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> info);
|
66
49
|
//! Returns a connection if there is an edge that connects these two sets, or nullptr otherwise
|
67
|
-
vector<reference<NeighborInfo>> GetConnections(JoinRelationSet &node, JoinRelationSet &other);
|
50
|
+
const vector<reference<NeighborInfo>> GetConnections(JoinRelationSet &node, JoinRelationSet &other) const;
|
68
51
|
//! Enumerate the neighbors of a specific node that do not belong to any of the exclusion_set. Note that if a
|
69
52
|
//! neighbor has multiple nodes, this function will return the lowest entry in that set.
|
70
|
-
vector<idx_t> GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set);
|
53
|
+
const vector<idx_t> GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) const;
|
54
|
+
|
71
55
|
//! Enumerate all neighbors of a given JoinRelationSet node
|
72
|
-
void EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback);
|
56
|
+
void EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback) const;
|
57
|
+
//! Create an edge in the edge_set
|
58
|
+
void CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> info);
|
73
59
|
|
74
60
|
private:
|
75
61
|
//! Get the QueryEdge of a specific node
|
76
|
-
QueryEdge
|
62
|
+
optional_ptr<QueryEdge> GetQueryEdge(JoinRelationSet &left);
|
77
63
|
|
78
64
|
void EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
|
79
|
-
const std::function<bool(NeighborInfo &)> &callback);
|
65
|
+
const std::function<bool(NeighborInfo &)> &callback) const;
|
80
66
|
|
81
67
|
QueryEdge root;
|
82
68
|
};
|
@@ -0,0 +1,113 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/join_order/query_graph_manager.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/optional_ptr.hpp"
|
13
|
+
#include "duckdb/common/pair.hpp"
|
14
|
+
#include "duckdb/common/unordered_map.hpp"
|
15
|
+
#include "duckdb/common/unordered_set.hpp"
|
16
|
+
#include "duckdb/common/vector.hpp"
|
17
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
18
|
+
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
19
|
+
#include "duckdb/optimizer/join_order/query_graph.hpp"
|
20
|
+
#include "duckdb/optimizer/join_order/relation_manager.hpp"
|
21
|
+
#include "duckdb/planner/column_binding.hpp"
|
22
|
+
#include "duckdb/planner/logical_operator.hpp"
|
23
|
+
|
24
|
+
#include <functional>
|
25
|
+
|
26
|
+
namespace duckdb {
|
27
|
+
|
28
|
+
struct GenerateJoinRelation {
|
29
|
+
GenerateJoinRelation(optional_ptr<JoinRelationSet> set, unique_ptr<LogicalOperator> op_p)
|
30
|
+
: set(set), op(std::move(op_p)) {
|
31
|
+
}
|
32
|
+
|
33
|
+
optional_ptr<JoinRelationSet> set;
|
34
|
+
unique_ptr<LogicalOperator> op;
|
35
|
+
};
|
36
|
+
|
37
|
+
//! Filter info struct that is used by the cardinality estimator to set the initial cardinality
|
38
|
+
//! but is also eventually transformed into a query edge.
|
39
|
+
struct FilterInfo {
|
40
|
+
FilterInfo(unique_ptr<Expression> filter, JoinRelationSet &set, idx_t filter_index)
|
41
|
+
: filter(std::move(filter)), set(set), filter_index(filter_index) {
|
42
|
+
}
|
43
|
+
|
44
|
+
unique_ptr<Expression> filter;
|
45
|
+
JoinRelationSet &set;
|
46
|
+
idx_t filter_index;
|
47
|
+
optional_ptr<JoinRelationSet> left_set;
|
48
|
+
optional_ptr<JoinRelationSet> right_set;
|
49
|
+
ColumnBinding left_binding;
|
50
|
+
ColumnBinding right_binding;
|
51
|
+
};
|
52
|
+
|
53
|
+
//! The QueryGraphManager manages the process of extracting the reorderable and nonreorderable operations
|
54
|
+
//! from the logical plan and creating the intermediate structures needed by the plan enumerator.
|
55
|
+
//! When the plan enumerator finishes, the Query Graph Manger can then recreate the logical plan.
|
56
|
+
class QueryGraphManager {
|
57
|
+
public:
|
58
|
+
QueryGraphManager(ClientContext &context) : relation_manager(context), context(context) {
|
59
|
+
}
|
60
|
+
|
61
|
+
//! manage relations and the logical operators they represent
|
62
|
+
RelationManager relation_manager;
|
63
|
+
|
64
|
+
//! A structure holding all the created JoinRelationSet objects
|
65
|
+
JoinRelationSetManager set_manager;
|
66
|
+
|
67
|
+
ClientContext &context;
|
68
|
+
|
69
|
+
//! Extract the join relations, optimizing non-reoderable relations when encountered
|
70
|
+
bool Build(LogicalOperator &op);
|
71
|
+
|
72
|
+
//! Reconstruct the logical plan using the plan found by the plan enumerator
|
73
|
+
unique_ptr<LogicalOperator> Reconstruct(unique_ptr<LogicalOperator> plan, JoinNode &node);
|
74
|
+
|
75
|
+
//! Get a reference to the QueryGraphEdges structure that stores edges between
|
76
|
+
//! nodes and hypernodes.
|
77
|
+
const QueryGraphEdges &GetQueryGraphEdges() const;
|
78
|
+
|
79
|
+
//! Get a list of the join filters in the join plan than eventually are
|
80
|
+
//! transformed into the query graph edges
|
81
|
+
const vector<unique_ptr<FilterInfo>> &GetFilterBindings() const;
|
82
|
+
|
83
|
+
//! Plan enumerator may not find a full plan and therefore will need to create cross
|
84
|
+
//! products to create edges.
|
85
|
+
void CreateQueryGraphCrossProduct(JoinRelationSet &left, JoinRelationSet &right);
|
86
|
+
|
87
|
+
//! after join order optimization, we perform build side probe side optimizations.
|
88
|
+
//! (Basically we put lower expected cardinality columns on the build side, and larger
|
89
|
+
//! tables on the probe side)
|
90
|
+
unique_ptr<LogicalOperator> LeftRightOptimizations(unique_ptr<LogicalOperator> op);
|
91
|
+
|
92
|
+
private:
|
93
|
+
vector<reference<LogicalOperator>> filter_operators;
|
94
|
+
|
95
|
+
//! Filter information including the column_bindings that join filters
|
96
|
+
//! used by the cardinality estimator to estimate distinct counts
|
97
|
+
vector<unique_ptr<FilterInfo>> filters_and_bindings;
|
98
|
+
|
99
|
+
QueryGraphEdges query_graph;
|
100
|
+
|
101
|
+
void GetColumnBinding(Expression &expression, ColumnBinding &binding);
|
102
|
+
|
103
|
+
bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
|
104
|
+
bool LeftCardLessThanRight(LogicalOperator &op);
|
105
|
+
|
106
|
+
void CreateHyperGraphEdges();
|
107
|
+
|
108
|
+
GenerateJoinRelation GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode &node);
|
109
|
+
|
110
|
+
unique_ptr<LogicalOperator> RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode &node);
|
111
|
+
};
|
112
|
+
|
113
|
+
} // namespace duckdb
|
@@ -0,0 +1,73 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/join_order/relation_manager.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/unordered_map.hpp"
|
12
|
+
#include "duckdb/common/unordered_set.hpp"
|
13
|
+
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
14
|
+
#include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
|
15
|
+
#include "duckdb/optimizer/join_order/relation_statistics_helper.hpp"
|
16
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
17
|
+
#include "duckdb/parser/expression_map.hpp"
|
18
|
+
#include "duckdb/planner/logical_operator.hpp"
|
19
|
+
#include "duckdb/planner/logical_operator_visitor.hpp"
|
20
|
+
|
21
|
+
namespace duckdb {
|
22
|
+
|
23
|
+
struct FilterInfo;
|
24
|
+
|
25
|
+
//! Represents a single relation and any metadata accompanying that relation
|
26
|
+
struct SingleJoinRelation {
|
27
|
+
LogicalOperator &op;
|
28
|
+
optional_ptr<LogicalOperator> parent;
|
29
|
+
RelationStats stats;
|
30
|
+
|
31
|
+
SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent) : op(op), parent(parent) {
|
32
|
+
}
|
33
|
+
SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, RelationStats stats)
|
34
|
+
: op(op), parent(parent), stats(stats) {
|
35
|
+
}
|
36
|
+
};
|
37
|
+
|
38
|
+
class RelationManager {
|
39
|
+
public:
|
40
|
+
explicit RelationManager(ClientContext &context) : context(context) {
|
41
|
+
}
|
42
|
+
|
43
|
+
idx_t NumRelations();
|
44
|
+
|
45
|
+
bool ExtractJoinRelations(LogicalOperator &input_op, vector<reference<LogicalOperator>> &filter_operators,
|
46
|
+
optional_ptr<LogicalOperator> parent = nullptr);
|
47
|
+
|
48
|
+
//! for each join filter in the logical plan op, extract the relations that are referred to on
|
49
|
+
//! both sides of the join filter, along with the tables & indexes.
|
50
|
+
vector<unique_ptr<FilterInfo>> ExtractEdges(LogicalOperator &op,
|
51
|
+
vector<reference<LogicalOperator>> &filter_operators,
|
52
|
+
JoinRelationSetManager &set_manager);
|
53
|
+
|
54
|
+
//! Extract the set of relations referred to inside an expression
|
55
|
+
bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
|
56
|
+
void AddRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
|
57
|
+
|
58
|
+
void AddAggregateRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
|
59
|
+
vector<unique_ptr<SingleJoinRelation>> GetRelations();
|
60
|
+
|
61
|
+
const vector<RelationStats> GetRelationStats();
|
62
|
+
//! A mapping of base table index -> index into relations array (relation number)
|
63
|
+
unordered_map<idx_t, idx_t> relation_mapping;
|
64
|
+
|
65
|
+
void PrintRelationStats();
|
66
|
+
|
67
|
+
private:
|
68
|
+
ClientContext &context;
|
69
|
+
//! Set of all relations considered in the join optimizer
|
70
|
+
vector<unique_ptr<SingleJoinRelation>> relations;
|
71
|
+
};
|
72
|
+
|
73
|
+
} // namespace duckdb
|
@@ -0,0 +1,73 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/join_order/statistics_extractor.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include "duckdb/planner/filter/conjunction_filter.hpp"
|
11
|
+
#include "duckdb/planner/logical_operator.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
class CardinalityEstimator;
|
16
|
+
|
17
|
+
struct DistinctCount {
|
18
|
+
idx_t distinct_count;
|
19
|
+
bool from_hll;
|
20
|
+
};
|
21
|
+
|
22
|
+
struct ExpressionBinding {
|
23
|
+
bool found_expression = false;
|
24
|
+
ColumnBinding child_binding;
|
25
|
+
bool expression_is_constant = false;
|
26
|
+
};
|
27
|
+
|
28
|
+
struct RelationStats {
|
29
|
+
// column_id -> estimated distinct count for column
|
30
|
+
vector<DistinctCount> column_distinct_count;
|
31
|
+
idx_t cardinality;
|
32
|
+
double filter_strength = 1;
|
33
|
+
bool stats_initialized = false;
|
34
|
+
|
35
|
+
// for debug, column names and tables
|
36
|
+
vector<string> column_names;
|
37
|
+
string table_name;
|
38
|
+
|
39
|
+
RelationStats() : cardinality(1), filter_strength(1), stats_initialized(false) {
|
40
|
+
}
|
41
|
+
};
|
42
|
+
|
43
|
+
class RelationStatisticsHelper {
|
44
|
+
public:
|
45
|
+
static constexpr double DEFAULT_SELECTIVITY = 0.2;
|
46
|
+
|
47
|
+
public:
|
48
|
+
static idx_t InspectConjunctionAND(idx_t cardinality, idx_t column_index, ConjunctionAndFilter &filter,
|
49
|
+
BaseStatistics &base_stats);
|
50
|
+
// static idx_t InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &filter,
|
51
|
+
// BaseStatistics &base_stats);
|
52
|
+
//! Extract Statistics from a LogicalGet.
|
53
|
+
static RelationStats ExtractGetStats(LogicalGet &get, ClientContext &context);
|
54
|
+
static RelationStats ExtractDelimGetStats(LogicalDelimGet &delim_get, ClientContext &context);
|
55
|
+
//! Create the statistics for a projection using the statistics of the operator that sits underneath the
|
56
|
+
//! projection. Then also create statistics for any extra columns the projection creates.
|
57
|
+
static RelationStats ExtractDummyScanStats(LogicalDummyScan &dummy_scan, ClientContext &context);
|
58
|
+
static RelationStats ExtractExpressionGetStats(LogicalExpressionGet &expression_get, ClientContext &context);
|
59
|
+
//! All relation extractors for blocking relations
|
60
|
+
static RelationStats ExtractProjectionStats(LogicalProjection &proj, RelationStats &child_stats);
|
61
|
+
static RelationStats ExtractAggregationStats(LogicalAggregate &aggr, RelationStats &child_stats);
|
62
|
+
static RelationStats ExtractWindowStats(LogicalWindow &window, RelationStats &child_stats);
|
63
|
+
//! Called after reordering a query plan with potentially 2+ relations.
|
64
|
+
static RelationStats CombineStatsOfReorderableOperator(vector<ColumnBinding> &bindings,
|
65
|
+
vector<RelationStats> relation_stats);
|
66
|
+
//! Called after reordering a query plan with potentially 2+ relations.
|
67
|
+
static RelationStats CombineStatsOfNonReorderableOperator(LogicalOperator &op, vector<RelationStats> child_stats);
|
68
|
+
static void CopyRelationStats(RelationStats &to, const RelationStats &from);
|
69
|
+
|
70
|
+
private:
|
71
|
+
};
|
72
|
+
|
73
|
+
} // namespace duckdb
|
@@ -39,7 +39,7 @@ class TaskScheduler {
|
|
39
39
|
constexpr static int64_t TASK_TIMEOUT_USECS = 5000;
|
40
40
|
|
41
41
|
public:
|
42
|
-
TaskScheduler(DatabaseInstance &db);
|
42
|
+
explicit TaskScheduler(DatabaseInstance &db);
|
43
43
|
~TaskScheduler();
|
44
44
|
|
45
45
|
DUCKDB_API static TaskScheduler &GetScheduler(ClientContext &context);
|
@@ -67,6 +67,9 @@ public:
|
|
67
67
|
//! Send signals to n threads, signalling for them to wake up and attempt to execute a task
|
68
68
|
void Signal(idx_t n);
|
69
69
|
|
70
|
+
//! Yield to other threads
|
71
|
+
void YieldThread();
|
72
|
+
|
70
73
|
//! Set the allocator flush threshold
|
71
74
|
void SetAllocatorFlushTreshold(idx_t threshold);
|
72
75
|
|
@@ -49,8 +49,6 @@ public:
|
|
49
49
|
idx_t estimated_cardinality;
|
50
50
|
bool has_estimated_cardinality;
|
51
51
|
|
52
|
-
unique_ptr<EstimatedProperties> estimated_props;
|
53
|
-
|
54
52
|
public:
|
55
53
|
virtual vector<ColumnBinding> GetColumnBindings();
|
56
54
|
static vector<ColumnBinding> GenerateColumnBindings(idx_t table_idx, idx_t column_count);
|
@@ -24,10 +24,33 @@ public:
|
|
24
24
|
};
|
25
25
|
|
26
26
|
struct BlockPointer {
|
27
|
-
BlockPointer(block_id_t block_id_p, uint32_t offset_p) : block_id(block_id_p), offset(offset_p) {
|
28
|
-
|
29
|
-
|
30
|
-
|
27
|
+
BlockPointer(block_id_t block_id_p, uint32_t offset_p) : block_id(block_id_p), offset(offset_p) {
|
28
|
+
}
|
29
|
+
BlockPointer() : block_id(INVALID_BLOCK), offset(0) {
|
30
|
+
}
|
31
|
+
|
32
|
+
block_id_t block_id;
|
33
|
+
uint32_t offset;
|
34
|
+
|
35
|
+
bool IsValid() {
|
36
|
+
return block_id != INVALID_BLOCK;
|
37
|
+
}
|
38
|
+
};
|
39
|
+
|
40
|
+
struct MetaBlockPointer {
|
41
|
+
MetaBlockPointer(idx_t block_pointer, uint32_t offset_p) : block_pointer(block_pointer), offset(offset_p) {
|
42
|
+
}
|
43
|
+
MetaBlockPointer() : block_pointer(DConstants::INVALID_INDEX), offset(0) {
|
44
|
+
}
|
45
|
+
|
46
|
+
idx_t block_pointer;
|
47
|
+
uint32_t offset;
|
48
|
+
|
49
|
+
bool IsValid() {
|
50
|
+
return block_pointer != DConstants::INVALID_INDEX;
|
51
|
+
}
|
52
|
+
block_id_t GetBlockId();
|
53
|
+
uint32_t GetBlockIndex();
|
31
54
|
};
|
32
55
|
|
33
56
|
} // namespace duckdb
|
@@ -19,13 +19,13 @@ class BlockHandle;
|
|
19
19
|
class BufferManager;
|
20
20
|
class ClientContext;
|
21
21
|
class DatabaseInstance;
|
22
|
+
class MetadataManager;
|
22
23
|
|
23
24
|
//! BlockManager is an abstract representation to manage blocks on DuckDB. When writing or reading blocks, the
|
24
25
|
//! BlockManager creates and accesses blocks. The concrete types implements how blocks are stored.
|
25
26
|
class BlockManager {
|
26
27
|
public:
|
27
|
-
explicit BlockManager(BufferManager &buffer_manager)
|
28
|
-
}
|
28
|
+
explicit BlockManager(BufferManager &buffer_manager);
|
29
29
|
virtual ~BlockManager() = default;
|
30
30
|
|
31
31
|
//! The buffer manager
|
@@ -38,7 +38,7 @@ public:
|
|
38
38
|
//! Return the next free block id
|
39
39
|
virtual block_id_t GetFreeBlockId() = 0;
|
40
40
|
//! Returns whether or not a specified block is the root block
|
41
|
-
virtual bool IsRootBlock(
|
41
|
+
virtual bool IsRootBlock(MetaBlockPointer root) = 0;
|
42
42
|
//! Mark a block as "free"; free blocks are immediately added to the free list and can be immediately overwritten
|
43
43
|
virtual void MarkBlockAsFree(block_id_t block_id) = 0;
|
44
44
|
//! Mark a block as "modified"; modified blocks are added to the free list after a checkpoint (i.e. their data is
|
@@ -48,7 +48,7 @@ public:
|
|
48
48
|
//! called.
|
49
49
|
virtual void IncreaseBlockReferenceCount(block_id_t block_id) = 0;
|
50
50
|
//! Get the first meta block id
|
51
|
-
virtual
|
51
|
+
virtual idx_t GetMetaBlock() = 0;
|
52
52
|
//! Read the content of the block from disk
|
53
53
|
virtual void Read(Block &block) = 0;
|
54
54
|
//! Writes the block to disk
|
@@ -69,20 +69,20 @@ public:
|
|
69
69
|
virtual void Truncate();
|
70
70
|
|
71
71
|
//! Register a block with the given block id in the base file
|
72
|
-
shared_ptr<BlockHandle> RegisterBlock(block_id_t block_id
|
73
|
-
//! Clear cached handles for meta blocks
|
74
|
-
void ClearMetaBlockHandles();
|
72
|
+
shared_ptr<BlockHandle> RegisterBlock(block_id_t block_id);
|
75
73
|
//! Convert an existing in-memory buffer into a persistent disk-backed block
|
76
74
|
shared_ptr<BlockHandle> ConvertToPersistent(block_id_t block_id, shared_ptr<BlockHandle> old_block);
|
77
75
|
|
78
76
|
void UnregisterBlock(block_id_t block_id, bool can_destroy);
|
79
77
|
|
78
|
+
MetadataManager &GetMetadataManager();
|
79
|
+
|
80
80
|
private:
|
81
81
|
//! The lock for the set of blocks
|
82
82
|
mutex blocks_lock;
|
83
83
|
//! A mapping of block id -> BlockHandle
|
84
84
|
unordered_map<block_id_t, weak_ptr<BlockHandle>> blocks;
|
85
|
-
//!
|
86
|
-
|
85
|
+
//! The metadata manager
|
86
|
+
unique_ptr<MetadataManager> metadata_manager;
|
87
87
|
};
|
88
88
|
} // namespace duckdb
|
@@ -32,7 +32,7 @@ public:
|
|
32
32
|
|
33
33
|
virtual void WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state) = 0;
|
34
34
|
|
35
|
-
virtual
|
35
|
+
virtual MetadataWriter &GetPayloadWriter() = 0;
|
36
36
|
|
37
37
|
void RegisterPartialBlock(PartialBlockAllocation &&allocation);
|
38
38
|
PartialBlockAllocation GetBlockAllocation(uint32_t segment_size);
|
@@ -50,18 +50,18 @@ protected:
|
|
50
50
|
class SingleFileRowGroupWriter : public RowGroupWriter {
|
51
51
|
public:
|
52
52
|
SingleFileRowGroupWriter(TableCatalogEntry &table, PartialBlockManager &partial_block_manager,
|
53
|
-
|
53
|
+
MetadataWriter &table_data_writer)
|
54
54
|
: RowGroupWriter(table, partial_block_manager), table_data_writer(table_data_writer) {
|
55
55
|
}
|
56
56
|
|
57
|
-
//!
|
57
|
+
//! MetadataWriter is a cursor on a given BlockManager. This returns the
|
58
58
|
//! cursor against which we should write payload data for the specified RowGroup.
|
59
|
-
|
59
|
+
MetadataWriter &table_data_writer;
|
60
60
|
|
61
61
|
public:
|
62
62
|
virtual void WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state) override;
|
63
63
|
|
64
|
-
virtual
|
64
|
+
virtual MetadataWriter &GetPayloadWriter() override;
|
65
65
|
};
|
66
66
|
|
67
67
|
} // namespace duckdb
|
@@ -16,12 +16,12 @@ struct BoundCreateTableInfo;
|
|
16
16
|
//! The table data reader is responsible for reading the data of a table from the block manager
|
17
17
|
class TableDataReader {
|
18
18
|
public:
|
19
|
-
TableDataReader(
|
19
|
+
TableDataReader(MetadataReader &reader, BoundCreateTableInfo &info);
|
20
20
|
|
21
21
|
void ReadTableData();
|
22
22
|
|
23
23
|
private:
|
24
|
-
|
24
|
+
MetadataReader &reader;
|
25
25
|
BoundCreateTableInfo &info;
|
26
26
|
};
|
27
27
|
|
@@ -45,7 +45,7 @@ protected:
|
|
45
45
|
class SingleFileTableDataWriter : public TableDataWriter {
|
46
46
|
public:
|
47
47
|
SingleFileTableDataWriter(SingleFileCheckpointWriter &checkpoint_manager, TableCatalogEntry &table,
|
48
|
-
|
48
|
+
MetadataWriter &table_data_writer, MetadataWriter &meta_data_writer);
|
49
49
|
|
50
50
|
public:
|
51
51
|
virtual void FinalizeTable(TableStatistics &&global_stats, DataTableInfo *info) override;
|
@@ -54,9 +54,9 @@ public:
|
|
54
54
|
private:
|
55
55
|
SingleFileCheckpointWriter &checkpoint_manager;
|
56
56
|
// Writes the actual table data
|
57
|
-
|
57
|
+
MetadataWriter &table_data_writer;
|
58
58
|
// Writes the metadata of the table
|
59
|
-
|
59
|
+
MetadataWriter &meta_data_writer;
|
60
60
|
};
|
61
61
|
|
62
62
|
} // namespace duckdb
|
@@ -16,7 +16,7 @@ namespace duckdb {
|
|
16
16
|
class DatabaseInstance;
|
17
17
|
class ClientContext;
|
18
18
|
class ColumnSegment;
|
19
|
-
class
|
19
|
+
class MetadataReader;
|
20
20
|
class SchemaCatalogEntry;
|
21
21
|
class SequenceCatalogEntry;
|
22
22
|
class TableCatalogEntry;
|
@@ -33,7 +33,8 @@ public:
|
|
33
33
|
//! The database
|
34
34
|
AttachedDatabase &db;
|
35
35
|
|
36
|
-
virtual
|
36
|
+
virtual MetadataManager &GetMetadataManager() = 0;
|
37
|
+
virtual MetadataWriter &GetMetadataWriter() = 0;
|
37
38
|
virtual unique_ptr<TableDataWriter> GetTableDataWriter(TableCatalogEntry &table) = 0;
|
38
39
|
|
39
40
|
protected:
|
@@ -58,17 +59,17 @@ protected:
|
|
58
59
|
Catalog &catalog;
|
59
60
|
|
60
61
|
protected:
|
61
|
-
virtual void LoadCheckpoint(ClientContext &context,
|
62
|
-
virtual void ReadSchema(ClientContext &context,
|
63
|
-
virtual void ReadTable(ClientContext &context,
|
64
|
-
virtual void ReadView(ClientContext &context,
|
65
|
-
virtual void ReadSequence(ClientContext &context,
|
66
|
-
virtual void ReadMacro(ClientContext &context,
|
67
|
-
virtual void ReadTableMacro(ClientContext &context,
|
68
|
-
virtual void ReadIndex(ClientContext &context,
|
69
|
-
virtual void ReadType(ClientContext &context,
|
70
|
-
|
71
|
-
virtual void ReadTableData(ClientContext &context,
|
62
|
+
virtual void LoadCheckpoint(ClientContext &context, MetadataReader &reader);
|
63
|
+
virtual void ReadSchema(ClientContext &context, MetadataReader &reader);
|
64
|
+
virtual void ReadTable(ClientContext &context, MetadataReader &reader);
|
65
|
+
virtual void ReadView(ClientContext &context, MetadataReader &reader);
|
66
|
+
virtual void ReadSequence(ClientContext &context, MetadataReader &reader);
|
67
|
+
virtual void ReadMacro(ClientContext &context, MetadataReader &reader);
|
68
|
+
virtual void ReadTableMacro(ClientContext &context, MetadataReader &reader);
|
69
|
+
virtual void ReadIndex(ClientContext &context, MetadataReader &reader);
|
70
|
+
virtual void ReadType(ClientContext &context, MetadataReader &reader);
|
71
|
+
|
72
|
+
virtual void ReadTableData(ClientContext &context, MetadataReader &reader, BoundCreateTableInfo &bound_info);
|
72
73
|
};
|
73
74
|
|
74
75
|
class SingleFileCheckpointReader final : public CheckpointReader {
|
@@ -78,6 +79,7 @@ public:
|
|
78
79
|
}
|
79
80
|
|
80
81
|
void LoadFromStorage();
|
82
|
+
MetadataManager &GetMetadataManager();
|
81
83
|
|
82
84
|
//! The database
|
83
85
|
SingleFileStorageManager &storage;
|
@@ -98,16 +100,17 @@ public:
|
|
98
100
|
//! connection is available because right now the checkpointing cannot be done online. (TODO)
|
99
101
|
void CreateCheckpoint();
|
100
102
|
|
101
|
-
virtual
|
103
|
+
virtual MetadataWriter &GetMetadataWriter() override;
|
104
|
+
virtual MetadataManager &GetMetadataManager() override;
|
102
105
|
virtual unique_ptr<TableDataWriter> GetTableDataWriter(TableCatalogEntry &table) override;
|
103
106
|
|
104
107
|
BlockManager &GetBlockManager();
|
105
108
|
|
106
109
|
private:
|
107
110
|
//! The metadata writer is responsible for writing schema information
|
108
|
-
unique_ptr<
|
111
|
+
unique_ptr<MetadataWriter> metadata_writer;
|
109
112
|
//! The table data writer is responsible for writing the DataPointers used by the table chunks
|
110
|
-
unique_ptr<
|
113
|
+
unique_ptr<MetadataWriter> table_metadata_writer;
|
111
114
|
//! Because this is single-file storage, we can share partial blocks across
|
112
115
|
//! an entire checkpoint.
|
113
116
|
PartialBlockManager partial_block_manager;
|
@@ -33,7 +33,7 @@ struct RowGroupPointer {
|
|
33
33
|
uint64_t row_start;
|
34
34
|
uint64_t tuple_count;
|
35
35
|
//! The data pointers of the column segments stored in the row group
|
36
|
-
vector<
|
36
|
+
vector<MetaBlockPointer> data_pointers;
|
37
37
|
//! The versions information of the row group (if any)
|
38
38
|
shared_ptr<VersionNode> versions;
|
39
39
|
};
|
@@ -29,7 +29,7 @@ public:
|
|
29
29
|
block_id_t GetFreeBlockId() override {
|
30
30
|
throw InternalException("Cannot perform IO in in-memory database - GetFreeBlockId!");
|
31
31
|
}
|
32
|
-
bool IsRootBlock(
|
32
|
+
bool IsRootBlock(MetaBlockPointer root) override {
|
33
33
|
throw InternalException("Cannot perform IO in in-memory database - IsRootBlock!");
|
34
34
|
}
|
35
35
|
void MarkBlockAsFree(block_id_t block_id) override {
|
@@ -41,7 +41,7 @@ public:
|
|
41
41
|
void IncreaseBlockReferenceCount(block_id_t block_id) override {
|
42
42
|
throw InternalException("Cannot perform IO in in-memory database - IncreaseBlockReferenceCount!");
|
43
43
|
}
|
44
|
-
|
44
|
+
idx_t GetMetaBlock() override {
|
45
45
|
throw InternalException("Cannot perform IO in in-memory database - GetMetaBlock!");
|
46
46
|
}
|
47
47
|
void Read(Block &block) override {
|