duckdb 0.8.2-dev2673.0 → 0.8.2-dev2809.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +6 -6
- package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
- package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/field_writer.cpp +1 -0
- package/src/duckdb/src/common/local_file_system.cpp +5 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
- package/src/duckdb/src/execution/index/art/art.cpp +6 -9
- package/src/duckdb/src/execution/index/art/leaf.cpp +4 -4
- package/src/duckdb/src/execution/index/art/node.cpp +9 -12
- package/src/duckdb/src/execution/index/art/node16.cpp +4 -4
- package/src/duckdb/src/execution/index/art/node256.cpp +4 -4
- package/src/duckdb/src/execution/index/art/node4.cpp +4 -5
- package/src/duckdb/src/execution/index/art/node48.cpp +4 -4
- package/src/duckdb/src/execution/index/art/prefix.cpp +4 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
- package/src/duckdb/src/execution/window_executor.cpp +10 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +6 -4
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/block.hpp +27 -4
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +9 -9
- package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +5 -5
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +19 -16
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +88 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +54 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +45 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +6 -5
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +3 -4
- package/src/duckdb/src/include/duckdb.h +11 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
- package/src/duckdb/src/main/pending_query_result.cpp +9 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
- package/src/duckdb/src/parallel/executor.cpp +6 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +4 -0
- package/src/duckdb/src/storage/buffer/block_manager.cpp +10 -9
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -7
- package/src/duckdb/src/storage/checkpoint_manager.cpp +49 -43
- package/src/duckdb/src/storage/index.cpp +1 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +267 -0
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +80 -0
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +86 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +47 -52
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +4 -3
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
- package/src/duckdb/src/storage/table/persistent_table_data.cpp +1 -2
- package/src/duckdb/src/storage/table/row_group.cpp +9 -10
- package/src/duckdb/src/storage/table/row_group_collection.cpp +6 -3
- package/src/duckdb/src/storage/table_index_list.cpp +1 -1
- package/src/duckdb/src/storage/wal_replay.cpp +3 -2
- package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
- package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
- package/src/duckdb/ub_src_storage.cpp +0 -4
- package/src/duckdb/ub_src_storage_metadata.cpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -46
- package/src/duckdb/src/include/duckdb/storage/meta_block_writer.hpp +0 -50
- package/src/duckdb/src/storage/meta_block_reader.cpp +0 -69
- package/src/duckdb/src/storage/meta_block_writer.cpp +0 -80
@@ -50,6 +50,10 @@ public:
|
|
50
50
|
RowDataCollectionScanner(RowDataCollection &rows, RowDataCollection &heap, const RowLayout &layout, bool external,
|
51
51
|
bool flush = true);
|
52
52
|
|
53
|
+
// Single block scan
|
54
|
+
RowDataCollectionScanner(RowDataCollection &rows, RowDataCollection &heap, const RowLayout &layout, bool external,
|
55
|
+
idx_t block_idx, bool flush);
|
56
|
+
|
53
57
|
//! The type layout of the payload
|
54
58
|
inline const vector<LogicalType> &GetTypes() const {
|
55
59
|
return layout.GetTypes();
|
@@ -93,7 +97,7 @@ private:
|
|
93
97
|
//! Read state
|
94
98
|
ScanState read_state;
|
95
99
|
//! The total count of sorted_data
|
96
|
-
|
100
|
+
idx_t total_count;
|
97
101
|
//! The number of rows scanned so far
|
98
102
|
idx_t total_scanned;
|
99
103
|
//! Addresses used to gather from the sorted data
|
@@ -36,7 +36,7 @@ public:
|
|
36
36
|
ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
37
37
|
const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
|
38
38
|
AttachedDatabase &db, const shared_ptr<vector<FixedSizeAllocator>> &allocators_ptr = nullptr,
|
39
|
-
|
39
|
+
BlockPointer block = BlockPointer());
|
40
40
|
~ART() override;
|
41
41
|
|
42
42
|
//! Root of the tree
|
@@ -80,7 +80,7 @@ public:
|
|
80
80
|
void SearchEqualJoinNoFetch(ARTKey &key, idx_t &result_size);
|
81
81
|
|
82
82
|
//! Serializes the index and returns the pair of block_id offset positions
|
83
|
-
BlockPointer Serialize(
|
83
|
+
BlockPointer Serialize(MetadataWriter &writer) override;
|
84
84
|
|
85
85
|
//! Merge another index into this index. The lock obtained from InitializeLock must be held, and the other
|
86
86
|
//! index must also be locked during the merge
|
@@ -15,8 +15,8 @@
|
|
15
15
|
namespace duckdb {
|
16
16
|
|
17
17
|
// classes
|
18
|
-
class
|
19
|
-
class
|
18
|
+
class MetadataWriter;
|
19
|
+
class MetadataReader;
|
20
20
|
|
21
21
|
// structs
|
22
22
|
struct BlockPointer;
|
@@ -70,9 +70,9 @@ public:
|
|
70
70
|
static string VerifyAndToString(ART &art, Node &node);
|
71
71
|
|
72
72
|
//! Serialize the leaf (chain)
|
73
|
-
static BlockPointer Serialize(ART &art, Node &node,
|
73
|
+
static BlockPointer Serialize(ART &art, Node &node, MetadataWriter &writer);
|
74
74
|
//! Deserialize the leaf (chain)
|
75
|
-
static void Deserialize(ART &art, Node &node,
|
75
|
+
static void Deserialize(ART &art, Node &node, MetadataReader &reader);
|
76
76
|
|
77
77
|
//! Vacuum the leaf (chain)
|
78
78
|
static void Vacuum(ART &art, Node &node);
|
@@ -12,6 +12,7 @@
|
|
12
12
|
#include "duckdb/common/optional_ptr.hpp"
|
13
13
|
#include "duckdb/common/to_string.hpp"
|
14
14
|
#include "duckdb/common/typedefs.hpp"
|
15
|
+
#include "duckdb/common/limits.hpp"
|
15
16
|
|
16
17
|
namespace duckdb {
|
17
18
|
|
@@ -28,12 +29,13 @@ enum class NType : uint8_t {
|
|
28
29
|
class FixedSizeAllocator;
|
29
30
|
class ART;
|
30
31
|
class Prefix;
|
31
|
-
class
|
32
|
-
class
|
32
|
+
class MetadataReader;
|
33
|
+
class MetadataWriter;
|
33
34
|
|
34
35
|
// structs
|
35
36
|
struct BlockPointer;
|
36
37
|
struct ARTFlags;
|
38
|
+
struct MetaBlockPointer;
|
37
39
|
|
38
40
|
//! The Node is the pointer class of the ART index.
|
39
41
|
//! If the node is serialized, then the pointer points to a storage address (and has no type),
|
@@ -70,7 +72,7 @@ public:
|
|
70
72
|
//! Constructs an empty Node
|
71
73
|
Node() : data(0) {};
|
72
74
|
//! Constructs a serialized Node pointer from a block ID and an offset
|
73
|
-
explicit Node(
|
75
|
+
explicit Node(MetadataReader &reader);
|
74
76
|
//! Constructs an in-memory Node from a buffer ID and an offset
|
75
77
|
Node(const uint32_t buffer_id, const uint32_t offset) : data(0) {
|
76
78
|
SetPtr(buffer_id, offset);
|
@@ -95,7 +97,7 @@ public:
|
|
95
97
|
optional_ptr<Node> GetNextChild(ART &art, uint8_t &byte, const bool deserialize = true) const;
|
96
98
|
|
97
99
|
//! Serialize the node
|
98
|
-
BlockPointer Serialize(ART &art,
|
100
|
+
BlockPointer Serialize(ART &art, MetadataWriter &writer);
|
99
101
|
//! Deserialize the node
|
100
102
|
void Deserialize(ART &art);
|
101
103
|
|
@@ -56,9 +56,9 @@ public:
|
|
56
56
|
optional_ptr<Node> GetNextChild(uint8_t &byte);
|
57
57
|
|
58
58
|
//! Serialize this node
|
59
|
-
BlockPointer Serialize(ART &art,
|
59
|
+
BlockPointer Serialize(ART &art, MetadataWriter &writer);
|
60
60
|
//! Deserialize this node
|
61
|
-
void Deserialize(
|
61
|
+
void Deserialize(MetadataReader &reader);
|
62
62
|
|
63
63
|
//! Vacuum the children of the node
|
64
64
|
void Vacuum(ART &art, const ARTFlags &flags);
|
@@ -59,9 +59,9 @@ public:
|
|
59
59
|
optional_ptr<Node> GetNextChild(uint8_t &byte);
|
60
60
|
|
61
61
|
//! Serialize this node
|
62
|
-
BlockPointer Serialize(ART &art,
|
62
|
+
BlockPointer Serialize(ART &art, MetadataWriter &writer);
|
63
63
|
//! Deserialize this node
|
64
|
-
void Deserialize(
|
64
|
+
void Deserialize(MetadataReader &reader);
|
65
65
|
|
66
66
|
//! Vacuum the children of the node
|
67
67
|
void Vacuum(ART &art, const ARTFlags &flags);
|
@@ -54,9 +54,9 @@ public:
|
|
54
54
|
optional_ptr<Node> GetNextChild(uint8_t &byte);
|
55
55
|
|
56
56
|
//! Serialize this node
|
57
|
-
BlockPointer Serialize(ART &art,
|
57
|
+
BlockPointer Serialize(ART &art, MetadataWriter &writer);
|
58
58
|
//! Deserialize this node
|
59
|
-
void Deserialize(
|
59
|
+
void Deserialize(MetadataReader &reader);
|
60
60
|
|
61
61
|
//! Vacuum the children of the node
|
62
62
|
void Vacuum(ART &art, const ARTFlags &flags);
|
@@ -66,9 +66,9 @@ public:
|
|
66
66
|
optional_ptr<Node> GetNextChild(uint8_t &byte);
|
67
67
|
|
68
68
|
//! Serialize this node
|
69
|
-
BlockPointer Serialize(ART &art,
|
69
|
+
BlockPointer Serialize(ART &art, MetadataWriter &writer);
|
70
70
|
//! Deserialize this node
|
71
|
-
void Deserialize(
|
71
|
+
void Deserialize(MetadataReader &reader);
|
72
72
|
|
73
73
|
//! Vacuum the children of the node
|
74
74
|
void Vacuum(ART &art, const ARTFlags &flags);
|
@@ -74,9 +74,9 @@ public:
|
|
74
74
|
static string VerifyAndToString(ART &art, Node &node, const bool only_verify);
|
75
75
|
|
76
76
|
//! Serialize this node and all subsequent nodes
|
77
|
-
static BlockPointer Serialize(ART &art, Node &node,
|
77
|
+
static BlockPointer Serialize(ART &art, Node &node, MetadataWriter &writer);
|
78
78
|
//! Deserialize this node and all subsequent prefix nodes
|
79
|
-
static void Deserialize(ART &art, Node &node,
|
79
|
+
static void Deserialize(ART &art, Node &node, MetadataReader &reader);
|
80
80
|
|
81
81
|
//! Vacuum the child of the node
|
82
82
|
static void Vacuum(ART &art, Node &node, const ARTFlags &flags);
|
@@ -36,7 +36,6 @@ public:
|
|
36
36
|
public:
|
37
37
|
PhysicalOperator(PhysicalOperatorType type, vector<LogicalType> types, idx_t estimated_cardinality)
|
38
38
|
: type(type), types(std::move(types)), estimated_cardinality(estimated_cardinality) {
|
39
|
-
estimated_props = make_uniq<EstimatedProperties>(estimated_cardinality, 0);
|
40
39
|
}
|
41
40
|
|
42
41
|
virtual ~PhysicalOperator() {
|
@@ -50,7 +49,6 @@ public:
|
|
50
49
|
vector<LogicalType> types;
|
51
50
|
//! The estimated cardinality of this physical operator
|
52
51
|
idx_t estimated_cardinality;
|
53
|
-
unique_ptr<EstimatedProperties> estimated_props;
|
54
52
|
|
55
53
|
//! The global sink state of this operator
|
56
54
|
unique_ptr<GlobalSinkState> sink_state;
|
@@ -34,6 +34,8 @@ public:
|
|
34
34
|
//! If this returns RESULT_READY, the Execute function can be called to obtain a pointer to the result.
|
35
35
|
//! If this returns RESULT_NOT_READY, the ExecuteTask function should be called again.
|
36
36
|
//! If this returns EXECUTION_ERROR, an error occurred during execution.
|
37
|
+
//! If this returns NO_TASKS_AVAILABLE, this means currently no meaningful work can be done by the current executor,
|
38
|
+
//! but tasks may become available in the future.
|
37
39
|
//! The error message can be obtained by calling GetError() on the PendingQueryResult.
|
38
40
|
DUCKDB_API PendingExecutionResult ExecuteTask();
|
39
41
|
|
@@ -43,6 +45,9 @@ public:
|
|
43
45
|
|
44
46
|
DUCKDB_API void Close();
|
45
47
|
|
48
|
+
//! Function to determine whether execution is considered finished
|
49
|
+
DUCKDB_API static bool IsFinished(PendingExecutionResult result);
|
50
|
+
|
46
51
|
private:
|
47
52
|
shared_ptr<ClientContext> context;
|
48
53
|
bool allow_stream_result;
|
@@ -7,21 +7,14 @@
|
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
8
|
#pragma once
|
9
9
|
|
10
|
-
#include "duckdb/optimizer/join_order/join_node.hpp"
|
11
|
-
#include "duckdb/planner/column_binding.hpp"
|
12
10
|
#include "duckdb/planner/column_binding_map.hpp"
|
13
|
-
#include "duckdb/
|
14
|
-
|
11
|
+
#include "duckdb/optimizer/join_order/query_graph.hpp"
|
12
|
+
|
13
|
+
#include "duckdb/optimizer/join_order/relation_statistics_helper.hpp"
|
15
14
|
|
16
15
|
namespace duckdb {
|
17
16
|
|
18
|
-
struct
|
19
|
-
string original_name;
|
20
|
-
// the relation columns used in join filters
|
21
|
-
// Needed when iterating over columns and initializing total domain values.
|
22
|
-
unordered_set<idx_t> columns;
|
23
|
-
double cardinality;
|
24
|
-
};
|
17
|
+
struct FilterInfo;
|
25
18
|
|
26
19
|
struct RelationsToTDom {
|
27
20
|
//! column binding sets that are equivalent in a join plan.
|
@@ -33,19 +26,13 @@ struct RelationsToTDom {
|
|
33
26
|
idx_t tdom_no_hll;
|
34
27
|
bool has_tdom_hll;
|
35
28
|
vector<FilterInfo *> filters;
|
29
|
+
vector<string> column_names;
|
36
30
|
|
37
31
|
RelationsToTDom(const column_binding_set_t &column_binding_set)
|
38
32
|
: equivalent_relations(column_binding_set), tdom_hll(0), tdom_no_hll(NumericLimits<idx_t>::Maximum()),
|
39
33
|
has_tdom_hll(false) {};
|
40
34
|
};
|
41
35
|
|
42
|
-
struct NodeOp {
|
43
|
-
unique_ptr<JoinNode> node;
|
44
|
-
LogicalOperator &op;
|
45
|
-
|
46
|
-
NodeOp(unique_ptr<JoinNode> node, LogicalOperator &op) : node(std::move(node)), op(op) {};
|
47
|
-
};
|
48
|
-
|
49
36
|
struct Subgraph2Denominator {
|
50
37
|
unordered_set<idx_t> relations;
|
51
38
|
double denom;
|
@@ -53,69 +40,56 @@ struct Subgraph2Denominator {
|
|
53
40
|
Subgraph2Denominator() : relations(), denom(1) {};
|
54
41
|
};
|
55
42
|
|
56
|
-
class
|
43
|
+
class CardinalityHelper {
|
57
44
|
public:
|
58
|
-
|
45
|
+
CardinalityHelper() {
|
59
46
|
}
|
47
|
+
CardinalityHelper(double cardinality_before_filters, double filter_string)
|
48
|
+
: cardinality_before_filters(cardinality_before_filters), filter_strength(filter_string) {};
|
60
49
|
|
61
|
-
|
62
|
-
|
50
|
+
public:
|
51
|
+
double cardinality_before_filters;
|
52
|
+
double filter_strength;
|
53
|
+
|
54
|
+
vector<string> table_names_joined;
|
55
|
+
vector<string> column_names;
|
56
|
+
};
|
63
57
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
column_binding_map_t<ColumnBinding> relation_column_to_original_column;
|
58
|
+
class CardinalityEstimator {
|
59
|
+
public:
|
60
|
+
explicit CardinalityEstimator() {};
|
68
61
|
|
62
|
+
private:
|
69
63
|
vector<RelationsToTDom> relations_to_tdoms;
|
64
|
+
unordered_map<string, CardinalityHelper> relation_set_2_cardinality;
|
65
|
+
JoinRelationSetManager set_manager;
|
66
|
+
vector<RelationStats> relation_stats;
|
70
67
|
|
71
68
|
public:
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
//!
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
void
|
85
|
-
void
|
86
|
-
void AddRelationColumnMapping(LogicalGet &get, idx_t relation_id);
|
87
|
-
|
88
|
-
void InitTotalDomains();
|
89
|
-
void UpdateTotalDomains(JoinNode &node, LogicalOperator &op);
|
90
|
-
void InitEquivalentRelations(vector<unique_ptr<FilterInfo>> &filter_infos);
|
91
|
-
|
92
|
-
void InitCardinalityEstimatorProps(vector<NodeOp> &node_ops, vector<unique_ptr<FilterInfo>> &filter_infos);
|
93
|
-
double EstimateCardinalityWithSet(JoinRelationSet &new_set);
|
94
|
-
void EstimateBaseTableCardinality(JoinNode &node, LogicalOperator &op);
|
95
|
-
double EstimateCrossProduct(const JoinNode &left, const JoinNode &right);
|
96
|
-
static double ComputeCost(JoinNode &left, JoinNode &right, double expected_cardinality);
|
69
|
+
void RemoveEmptyTotalDomains();
|
70
|
+
void UpdateTotalDomains(optional_ptr<JoinRelationSet> set, RelationStats &stats);
|
71
|
+
void InitEquivalentRelations(const vector<unique_ptr<FilterInfo>> &filter_infos);
|
72
|
+
|
73
|
+
void InitCardinalityEstimatorProps(optional_ptr<JoinRelationSet> set, RelationStats &stats);
|
74
|
+
|
75
|
+
//! cost model needs estimated cardinalities to the fraction since the formula captures
|
76
|
+
//! distinct count selectivities and multiplicities. Hence the template
|
77
|
+
template <class T>
|
78
|
+
T EstimateCardinalityWithSet(JoinRelationSet &new_set);
|
79
|
+
|
80
|
+
//! used for debugging.
|
81
|
+
void AddRelationNamesToTdoms(vector<RelationStats> &stats);
|
82
|
+
void PrintRelationToTdomInfo();
|
97
83
|
|
98
84
|
private:
|
99
85
|
bool SingleColumnFilter(FilterInfo &filter_info);
|
100
|
-
//! Filter & bindings -> list of indexes into the equivalent_relations array.
|
101
|
-
// The column binding set at each index is an equivalence set.
|
102
86
|
vector<idx_t> DetermineMatchingEquivalentSets(FilterInfo *filter_info);
|
103
|
-
|
104
87
|
//! Given a filter, add the column bindings to the matching equivalent set at the index
|
105
88
|
//! given in matching equivalent sets.
|
106
89
|
//! If there are multiple equivalence sets, they are merged.
|
107
90
|
void AddToEquivalenceSets(FilterInfo *filter_info, vector<idx_t> matching_equivalent_sets);
|
108
|
-
|
109
|
-
optional_ptr<TableFilterSet> GetTableFilters(LogicalOperator &op, idx_t table_index);
|
110
|
-
|
111
91
|
void AddRelationTdom(FilterInfo &filter_info);
|
112
92
|
bool EmptyFilter(FilterInfo &filter_info);
|
113
|
-
|
114
|
-
idx_t InspectConjunctionAND(idx_t cardinality, idx_t column_index, ConjunctionAndFilter &fil,
|
115
|
-
unique_ptr<BaseStatistics> base_stats);
|
116
|
-
idx_t InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &fil,
|
117
|
-
unique_ptr<BaseStatistics> base_stats);
|
118
|
-
idx_t InspectTableFilters(idx_t cardinality, LogicalOperator &op, TableFilterSet &table_filters, idx_t table_index);
|
119
93
|
};
|
120
94
|
|
121
95
|
} // namespace duckdb
|
@@ -0,0 +1,37 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/join_order/cost_model.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
11
|
+
#include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
class QueryGraphManager;
|
16
|
+
|
17
|
+
class CostModel {
|
18
|
+
public:
|
19
|
+
CostModel(QueryGraphManager &query_graph_manager);
|
20
|
+
|
21
|
+
private:
|
22
|
+
//! query graph storing relation manager information
|
23
|
+
QueryGraphManager &query_graph_manager;
|
24
|
+
|
25
|
+
public:
|
26
|
+
void InitCostModel();
|
27
|
+
|
28
|
+
//! Compute cost of a join relation set
|
29
|
+
double ComputeCost(JoinNode &left, JoinNode &right);
|
30
|
+
|
31
|
+
//! Cardinality Estimator used to calculate cost
|
32
|
+
CardinalityEstimator cardinality_estimator;
|
33
|
+
|
34
|
+
private:
|
35
|
+
};
|
36
|
+
|
37
|
+
} // namespace duckdb
|
@@ -5,62 +5,47 @@
|
|
5
5
|
//
|
6
6
|
//
|
7
7
|
//===----------------------------------------------------------------------===//
|
8
|
-
|
9
8
|
#pragma once
|
10
9
|
|
11
|
-
#include "duckdb/common/unordered_map.hpp"
|
12
|
-
#include "duckdb/common/unordered_set.hpp"
|
13
|
-
#include "duckdb/optimizer/join_order/estimated_properties.hpp"
|
14
10
|
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
15
11
|
#include "duckdb/optimizer/join_order/query_graph.hpp"
|
16
|
-
#include "duckdb/parser/expression_map.hpp"
|
17
|
-
#include "duckdb/planner/logical_operator_visitor.hpp"
|
18
|
-
#include "duckdb/planner/table_filter.hpp"
|
19
|
-
#include "duckdb/storage/statistics/distinct_statistics.hpp"
|
20
12
|
|
21
13
|
namespace duckdb {
|
22
14
|
|
23
|
-
|
15
|
+
struct NeighborInfo;
|
24
16
|
|
25
17
|
class JoinNode {
|
26
18
|
public:
|
27
19
|
//! Represents a node in the join plan
|
28
20
|
JoinRelationSet &set;
|
21
|
+
//! information on how left and right are connected
|
29
22
|
optional_ptr<NeighborInfo> info;
|
30
|
-
//!
|
31
|
-
//! estimated_props.cardinality will be the cardinality after filters. With no filters, the two are equal
|
32
|
-
bool has_filter;
|
23
|
+
//! left and right plans
|
33
24
|
optional_ptr<JoinNode> left;
|
34
25
|
optional_ptr<JoinNode> right;
|
35
26
|
|
36
|
-
|
27
|
+
//! The cost of the join node. The cost is stored here so that the cost of
|
28
|
+
//! a join node stays in sync with how the join node is constructed. Storing the cost in an unordered_set
|
29
|
+
//! in the cost model is error prone. If the plan enumerator join node is updated and not the cost model
|
30
|
+
//! the whole Join Order Optimizer can start exhibiting undesired behavior.
|
31
|
+
double cost;
|
32
|
+
//! used only to populate logical operators with estimated caridnalities after the best join plan has been found.
|
33
|
+
idx_t cardinality;
|
34
|
+
|
35
|
+
//! Create an intermediate node in the join tree. base_cardinality = estimated_props.cardinality
|
36
|
+
JoinNode(JoinRelationSet &set, optional_ptr<NeighborInfo> info, JoinNode &left, JoinNode &right, double cost);
|
37
37
|
|
38
38
|
//! Create a leaf node in the join tree
|
39
39
|
//! set cost to 0 for leaf nodes
|
40
40
|
//! cost will be the cost to *produce* an intermediate table
|
41
|
-
JoinNode(JoinRelationSet &set
|
42
|
-
|
43
|
-
//! Create an intermediate node in the join tree. base_cardinality = estimated_props.cardinality
|
44
|
-
JoinNode(JoinRelationSet &set, optional_ptr<NeighborInfo> info, JoinNode &left, JoinNode &right,
|
45
|
-
const double base_cardinality, double cost);
|
41
|
+
JoinNode(JoinRelationSet &set);
|
46
42
|
|
47
43
|
bool operator==(const JoinNode &other) {
|
48
44
|
return other.set.ToString().compare(set.ToString()) == 0;
|
49
45
|
}
|
50
46
|
|
51
47
|
private:
|
52
|
-
double base_cardinality;
|
53
|
-
|
54
48
|
public:
|
55
|
-
template <class CARDINALITY_TYPE>
|
56
|
-
CARDINALITY_TYPE GetCardinality() const {
|
57
|
-
return estimated_props->GetCardinality<CARDINALITY_TYPE>();
|
58
|
-
}
|
59
|
-
double GetCost();
|
60
|
-
void SetCost(double cost);
|
61
|
-
double GetBaseTableCardinality();
|
62
|
-
void SetBaseTableCardinality(double base_card);
|
63
|
-
void SetEstimatedCardinality(double estimated_card);
|
64
49
|
void PrintJoinNode();
|
65
50
|
string ToString();
|
66
51
|
};
|
@@ -10,6 +10,7 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/unordered_map.hpp"
|
12
12
|
#include "duckdb/common/unordered_set.hpp"
|
13
|
+
#include "duckdb/optimizer/join_order/query_graph_manager.hpp"
|
13
14
|
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
14
15
|
#include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
|
15
16
|
#include "duckdb/optimizer/join_order/query_graph.hpp"
|
@@ -22,22 +23,13 @@
|
|
22
23
|
|
23
24
|
namespace duckdb {
|
24
25
|
|
25
|
-
struct GenerateJoinRelation {
|
26
|
-
GenerateJoinRelation(JoinRelationSet &set, unique_ptr<LogicalOperator> op_p) : set(set), op(std::move(op_p)) {
|
27
|
-
}
|
28
|
-
|
29
|
-
JoinRelationSet &set;
|
30
|
-
unique_ptr<LogicalOperator> op;
|
31
|
-
};
|
32
|
-
|
33
26
|
class JoinOrderOptimizer {
|
34
27
|
public:
|
35
|
-
explicit JoinOrderOptimizer(ClientContext &context)
|
36
|
-
: context(context), cardinality_estimator(context), full_plan_found(false), must_update_full_plan(false) {
|
28
|
+
explicit JoinOrderOptimizer(ClientContext &context) : context(context), query_graph_manager(context) {
|
37
29
|
}
|
38
30
|
|
39
31
|
//! Perform join reordering inside a plan
|
40
|
-
unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan);
|
32
|
+
unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan, optional_ptr<RelationStats> stats = nullptr);
|
41
33
|
|
42
34
|
unique_ptr<JoinNode> CreateJoinTree(JoinRelationSet &set,
|
43
35
|
const vector<reference<NeighborInfo>> &possible_connections, JoinNode &left,
|
@@ -45,16 +37,10 @@ public:
|
|
45
37
|
|
46
38
|
private:
|
47
39
|
ClientContext &context;
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
//! A mapping of base table index -> index into relations array (relation number)
|
53
|
-
unordered_map<idx_t, idx_t> relation_mapping;
|
54
|
-
//! A structure holding all the created JoinRelationSet objects
|
55
|
-
JoinRelationSetManager set_manager;
|
56
|
-
//! The set of edges used in the join optimizer
|
57
|
-
QueryGraph query_graph;
|
40
|
+
|
41
|
+
//! manages the query graph, relations, and edges between relations
|
42
|
+
QueryGraphManager query_graph_manager;
|
43
|
+
|
58
44
|
//! The optimal join plan found for the specific JoinRelationSet*
|
59
45
|
unordered_map<JoinRelationSet *, unique_ptr<JoinNode>> plans;
|
60
46
|
|
@@ -11,19 +11,8 @@
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
12
12
|
#include "duckdb/common/unordered_map.hpp"
|
13
13
|
#include "duckdb/common/unordered_set.hpp"
|
14
|
-
#include "duckdb/common/optional_ptr.hpp"
|
15
14
|
|
16
15
|
namespace duckdb {
|
17
|
-
class LogicalOperator;
|
18
|
-
|
19
|
-
//! Represents a single relation and any metadata accompanying that relation
|
20
|
-
struct SingleJoinRelation {
|
21
|
-
LogicalOperator &op;
|
22
|
-
optional_ptr<LogicalOperator> parent;
|
23
|
-
|
24
|
-
SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent) : op(op), parent(parent) {
|
25
|
-
}
|
26
|
-
};
|
27
16
|
|
28
17
|
//! Set of relations, used in the join graph.
|
29
18
|
struct JoinRelationSet {
|
@@ -0,0 +1,89 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/optimizer/join_order/plan_enumerator.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/unordered_map.hpp"
|
12
|
+
#include "duckdb/common/unordered_set.hpp"
|
13
|
+
#include "duckdb/optimizer/join_order/join_relation.hpp"
|
14
|
+
#include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
|
15
|
+
#include "duckdb/optimizer/join_order/query_graph.hpp"
|
16
|
+
#include "duckdb/optimizer/join_order/join_node.hpp"
|
17
|
+
#include "duckdb/optimizer/join_order/cost_model.hpp"
|
18
|
+
#include "duckdb/parser/expression_map.hpp"
|
19
|
+
#include "duckdb/common/reference_map.hpp"
|
20
|
+
#include "duckdb/planner/logical_operator.hpp"
|
21
|
+
#include "duckdb/planner/logical_operator_visitor.hpp"
|
22
|
+
|
23
|
+
#include <functional>
|
24
|
+
|
25
|
+
namespace duckdb {
|
26
|
+
|
27
|
+
class QueryGraphManager;
|
28
|
+
|
29
|
+
class PlanEnumerator {
|
30
|
+
public:
|
31
|
+
explicit PlanEnumerator(QueryGraphManager &query_graph_manager, CostModel &cost_model,
|
32
|
+
const QueryGraphEdges &query_graph)
|
33
|
+
: query_graph(query_graph), query_graph_manager(query_graph_manager), cost_model(cost_model),
|
34
|
+
full_plan_found(false), must_update_full_plan(false) {
|
35
|
+
}
|
36
|
+
|
37
|
+
//! Perform the join order solving
|
38
|
+
unique_ptr<JoinNode> SolveJoinOrder();
|
39
|
+
void InitLeafPlans();
|
40
|
+
|
41
|
+
static unique_ptr<LogicalOperator> BuildSideProbeSideSwaps(unique_ptr<LogicalOperator> plan);
|
42
|
+
|
43
|
+
private:
|
44
|
+
QueryGraphEdges const &query_graph;
|
45
|
+
//! The total amount of join pairs that have been considered
|
46
|
+
idx_t pairs = 0;
|
47
|
+
//! The set of edges used in the join optimizer
|
48
|
+
QueryGraphManager &query_graph_manager;
|
49
|
+
//! Cost model to evaluate cost of joins
|
50
|
+
CostModel &cost_model;
|
51
|
+
//! A map to store the optimal join plan found for a specific JoinRelationSet*
|
52
|
+
reference_map_t<JoinRelationSet, unique_ptr<JoinNode>> plans;
|
53
|
+
|
54
|
+
bool full_plan_found;
|
55
|
+
bool must_update_full_plan;
|
56
|
+
unordered_set<string> join_nodes_in_full_plan;
|
57
|
+
|
58
|
+
unique_ptr<JoinNode> CreateJoinTree(JoinRelationSet &set,
|
59
|
+
const vector<reference<NeighborInfo>> &possible_connections, JoinNode &left,
|
60
|
+
JoinNode &right);
|
61
|
+
|
62
|
+
//! Emit a pair as a potential join candidate. Returns the best plan found for the (left, right) connection (either
|
63
|
+
//! the newly created plan, or an existing plan)
|
64
|
+
JoinNode &EmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector<reference<NeighborInfo>> &info);
|
65
|
+
//! Tries to emit a potential join candidate pair. Returns false if too many pairs have already been emitted,
|
66
|
+
//! cancelling the dynamic programming step.
|
67
|
+
bool TryEmitPair(JoinRelationSet &left, JoinRelationSet &right, const vector<reference<NeighborInfo>> &info);
|
68
|
+
|
69
|
+
bool EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right, unordered_set<idx_t> &exclusion_set);
|
70
|
+
//! Emit a relation set node
|
71
|
+
bool EmitCSG(JoinRelationSet &node);
|
72
|
+
//! Enumerate the possible connected subgraphs that can be joined together in the join graph
|
73
|
+
bool EnumerateCSGRecursive(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set);
|
74
|
+
//! Generate cross product edges inside the side
|
75
|
+
void GenerateCrossProducts();
|
76
|
+
|
77
|
+
//! Solve the join order exactly using dynamic programming. Returns true if it was completed successfully (i.e. did
|
78
|
+
//! not time-out)
|
79
|
+
bool SolveJoinOrderExactly();
|
80
|
+
//! Solve the join order approximately using a greedy algorithm
|
81
|
+
void SolveJoinOrderApproximately();
|
82
|
+
|
83
|
+
void UpdateDPTree(JoinNode &new_plan);
|
84
|
+
|
85
|
+
void UpdateJoinNodesInFullPlan(JoinNode &node);
|
86
|
+
bool NodeInFullPlan(JoinNode &node);
|
87
|
+
};
|
88
|
+
|
89
|
+
} // namespace duckdb
|