duckdb 0.8.2-dev2673.0 → 0.8.2-dev2809.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +6 -6
  5. package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
  6. package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
  7. package/src/duckdb/src/common/enum_util.cpp +5 -0
  8. package/src/duckdb/src/common/field_writer.cpp +1 -0
  9. package/src/duckdb/src/common/local_file_system.cpp +5 -0
  10. package/src/duckdb/src/common/types/interval.cpp +3 -0
  11. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  12. package/src/duckdb/src/execution/index/art/art.cpp +6 -9
  13. package/src/duckdb/src/execution/index/art/leaf.cpp +4 -4
  14. package/src/duckdb/src/execution/index/art/node.cpp +9 -12
  15. package/src/duckdb/src/execution/index/art/node16.cpp +4 -4
  16. package/src/duckdb/src/execution/index/art/node256.cpp +4 -4
  17. package/src/duckdb/src/execution/index/art/node4.cpp +4 -5
  18. package/src/duckdb/src/execution/index/art/node48.cpp +4 -4
  19. package/src/duckdb/src/execution/index/art/prefix.cpp +4 -6
  20. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
  21. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  22. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  23. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  24. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
  25. package/src/duckdb/src/execution/window_executor.cpp +10 -1
  26. package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
  27. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  28. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
  29. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  30. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  31. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +2 -2
  32. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +4 -4
  33. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +6 -4
  34. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +2 -2
  35. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +2 -2
  36. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +2 -2
  38. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
  39. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
  40. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  41. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
  42. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  43. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  44. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
  45. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
  46. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  47. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
  48. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  49. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  50. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  51. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
  52. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
  53. package/src/duckdb/src/include/duckdb/storage/block.hpp +27 -4
  54. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +9 -9
  55. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +5 -5
  56. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_reader.hpp +2 -2
  57. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
  58. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +19 -16
  59. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
  60. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +2 -2
  61. package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -2
  62. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +88 -0
  63. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +54 -0
  64. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +45 -0
  65. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -2
  66. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +6 -5
  67. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
  68. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -2
  69. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -2
  70. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +2 -0
  72. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -2
  73. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
  75. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +3 -4
  76. package/src/duckdb/src/include/duckdb.h +11 -1
  77. package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
  78. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  79. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  80. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  81. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  82. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
  83. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  84. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
  85. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  86. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  87. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  88. package/src/duckdb/src/parallel/executor.cpp +6 -0
  89. package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
  90. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  91. package/src/duckdb/src/planner/operator/logical_get.cpp +4 -0
  92. package/src/duckdb/src/storage/buffer/block_manager.cpp +10 -9
  93. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  94. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -4
  95. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -7
  96. package/src/duckdb/src/storage/checkpoint_manager.cpp +49 -43
  97. package/src/duckdb/src/storage/index.cpp +1 -1
  98. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +267 -0
  99. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +80 -0
  100. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +86 -0
  101. package/src/duckdb/src/storage/single_file_block_manager.cpp +47 -52
  102. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  103. package/src/duckdb/src/storage/storage_manager.cpp +4 -3
  104. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
  105. package/src/duckdb/src/storage/table/persistent_table_data.cpp +1 -2
  106. package/src/duckdb/src/storage/table/row_group.cpp +9 -10
  107. package/src/duckdb/src/storage/table/row_group_collection.cpp +6 -3
  108. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  109. package/src/duckdb/src/storage/wal_replay.cpp +3 -2
  110. package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
  111. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
  112. package/src/duckdb/ub_src_storage.cpp +0 -4
  113. package/src/duckdb/ub_src_storage_metadata.cpp +6 -0
  114. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -46
  115. package/src/duckdb/src/include/duckdb/storage/meta_block_writer.hpp +0 -50
  116. package/src/duckdb/src/storage/meta_block_reader.cpp +0 -69
  117. package/src/duckdb/src/storage/meta_block_writer.cpp +0 -80
@@ -9,47 +9,32 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/optional_ptr.hpp"
13
+ #include "duckdb/optimizer/join_order/join_relation.hpp"
14
+ #include "duckdb/optimizer/join_order/join_node.hpp"
15
+ #include "duckdb/optimizer/join_order/relation_manager.hpp"
12
16
  #include "duckdb/common/pair.hpp"
13
17
  #include "duckdb/common/unordered_map.hpp"
14
18
  #include "duckdb/common/unordered_set.hpp"
15
- #include "duckdb/optimizer/join_order/join_relation.hpp"
16
19
  #include "duckdb/common/vector.hpp"
17
20
  #include "duckdb/planner/column_binding.hpp"
18
- #include "duckdb/common/optional_ptr.hpp"
19
21
 
20
22
  #include <functional>
21
23
 
22
24
  namespace duckdb {
23
- class Expression;
24
- class LogicalOperator;
25
-
26
- struct FilterInfo {
27
- FilterInfo(JoinRelationSet &set, idx_t filter_index) : set(set), filter_index(filter_index) {
28
- }
29
-
30
- JoinRelationSet &set;
31
- idx_t filter_index;
32
- optional_ptr<JoinRelationSet> left_set;
33
- optional_ptr<JoinRelationSet> right_set;
34
- ColumnBinding left_binding;
35
- ColumnBinding right_binding;
36
- };
37
25
 
38
- struct FilterNode {
39
- vector<reference<FilterInfo>> filters;
40
- unordered_map<idx_t, unique_ptr<FilterNode>> children;
41
- };
26
+ struct FilterInfo;
42
27
 
43
28
  struct NeighborInfo {
44
- NeighborInfo(JoinRelationSet &neighbor) : neighbor(neighbor) {
29
+ NeighborInfo(optional_ptr<JoinRelationSet> neighbor) : neighbor(neighbor) {
45
30
  }
46
31
 
47
- JoinRelationSet &neighbor;
48
- vector<reference<FilterInfo>> filters;
32
+ optional_ptr<JoinRelationSet> neighbor;
33
+ vector<optional_ptr<FilterInfo>> filters;
49
34
  };
50
35
 
51
36
  //! The QueryGraph contains edges between relations and allows edges to be created/queried
52
- class QueryGraph {
37
+ class QueryGraphEdges {
53
38
  public:
54
39
  //! Contains a node with info about neighboring relations and child edge infos
55
40
  struct QueryEdge {
@@ -61,22 +46,23 @@ public:
61
46
  string ToString() const;
62
47
  void Print();
63
48
 
64
- //! Create an edge in the edge_set
65
- void CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> info);
66
49
  //! Returns a connection if there is an edge that connects these two sets, or nullptr otherwise
67
- vector<reference<NeighborInfo>> GetConnections(JoinRelationSet &node, JoinRelationSet &other);
50
+ const vector<reference<NeighborInfo>> GetConnections(JoinRelationSet &node, JoinRelationSet &other) const;
68
51
  //! Enumerate the neighbors of a specific node that do not belong to any of the exclusion_set. Note that if a
69
52
  //! neighbor has multiple nodes, this function will return the lowest entry in that set.
70
- vector<idx_t> GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set);
53
+ const vector<idx_t> GetNeighbors(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) const;
54
+
71
55
  //! Enumerate all neighbors of a given JoinRelationSet node
72
- void EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback);
56
+ void EnumerateNeighbors(JoinRelationSet &node, const std::function<bool(NeighborInfo &)> &callback) const;
57
+ //! Create an edge in the edge_set
58
+ void CreateEdge(JoinRelationSet &left, JoinRelationSet &right, optional_ptr<FilterInfo> info);
73
59
 
74
60
  private:
75
61
  //! Get the QueryEdge of a specific node
76
- QueryEdge &GetQueryEdge(JoinRelationSet &left);
62
+ optional_ptr<QueryEdge> GetQueryEdge(JoinRelationSet &left);
77
63
 
78
64
  void EnumerateNeighborsDFS(JoinRelationSet &node, reference<QueryEdge> info, idx_t index,
79
- const std::function<bool(NeighborInfo &)> &callback);
65
+ const std::function<bool(NeighborInfo &)> &callback) const;
80
66
 
81
67
  QueryEdge root;
82
68
  };
@@ -0,0 +1,113 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/join_order/query_graph_manager.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/optional_ptr.hpp"
13
+ #include "duckdb/common/pair.hpp"
14
+ #include "duckdb/common/unordered_map.hpp"
15
+ #include "duckdb/common/unordered_set.hpp"
16
+ #include "duckdb/common/vector.hpp"
17
+ #include "duckdb/optimizer/join_order/join_node.hpp"
18
+ #include "duckdb/optimizer/join_order/join_relation.hpp"
19
+ #include "duckdb/optimizer/join_order/query_graph.hpp"
20
+ #include "duckdb/optimizer/join_order/relation_manager.hpp"
21
+ #include "duckdb/planner/column_binding.hpp"
22
+ #include "duckdb/planner/logical_operator.hpp"
23
+
24
+ #include <functional>
25
+
26
+ namespace duckdb {
27
+
28
+ struct GenerateJoinRelation {
29
+ GenerateJoinRelation(optional_ptr<JoinRelationSet> set, unique_ptr<LogicalOperator> op_p)
30
+ : set(set), op(std::move(op_p)) {
31
+ }
32
+
33
+ optional_ptr<JoinRelationSet> set;
34
+ unique_ptr<LogicalOperator> op;
35
+ };
36
+
37
+ //! Filter info struct that is used by the cardinality estimator to set the initial cardinality
38
+ //! but is also eventually transformed into a query edge.
39
+ struct FilterInfo {
40
+ FilterInfo(unique_ptr<Expression> filter, JoinRelationSet &set, idx_t filter_index)
41
+ : filter(std::move(filter)), set(set), filter_index(filter_index) {
42
+ }
43
+
44
+ unique_ptr<Expression> filter;
45
+ JoinRelationSet &set;
46
+ idx_t filter_index;
47
+ optional_ptr<JoinRelationSet> left_set;
48
+ optional_ptr<JoinRelationSet> right_set;
49
+ ColumnBinding left_binding;
50
+ ColumnBinding right_binding;
51
+ };
52
+
53
+ //! The QueryGraphManager manages the process of extracting the reorderable and nonreorderable operations
54
+ //! from the logical plan and creating the intermediate structures needed by the plan enumerator.
55
+ //! When the plan enumerator finishes, the Query Graph Manger can then recreate the logical plan.
56
+ class QueryGraphManager {
57
+ public:
58
+ QueryGraphManager(ClientContext &context) : relation_manager(context), context(context) {
59
+ }
60
+
61
+ //! manage relations and the logical operators they represent
62
+ RelationManager relation_manager;
63
+
64
+ //! A structure holding all the created JoinRelationSet objects
65
+ JoinRelationSetManager set_manager;
66
+
67
+ ClientContext &context;
68
+
69
+ //! Extract the join relations, optimizing non-reoderable relations when encountered
70
+ bool Build(LogicalOperator &op);
71
+
72
+ //! Reconstruct the logical plan using the plan found by the plan enumerator
73
+ unique_ptr<LogicalOperator> Reconstruct(unique_ptr<LogicalOperator> plan, JoinNode &node);
74
+
75
+ //! Get a reference to the QueryGraphEdges structure that stores edges between
76
+ //! nodes and hypernodes.
77
+ const QueryGraphEdges &GetQueryGraphEdges() const;
78
+
79
+ //! Get a list of the join filters in the join plan than eventually are
80
+ //! transformed into the query graph edges
81
+ const vector<unique_ptr<FilterInfo>> &GetFilterBindings() const;
82
+
83
+ //! Plan enumerator may not find a full plan and therefore will need to create cross
84
+ //! products to create edges.
85
+ void CreateQueryGraphCrossProduct(JoinRelationSet &left, JoinRelationSet &right);
86
+
87
+ //! after join order optimization, we perform build side probe side optimizations.
88
+ //! (Basically we put lower expected cardinality columns on the build side, and larger
89
+ //! tables on the probe side)
90
+ unique_ptr<LogicalOperator> LeftRightOptimizations(unique_ptr<LogicalOperator> op);
91
+
92
+ private:
93
+ vector<reference<LogicalOperator>> filter_operators;
94
+
95
+ //! Filter information including the column_bindings that join filters
96
+ //! used by the cardinality estimator to estimate distinct counts
97
+ vector<unique_ptr<FilterInfo>> filters_and_bindings;
98
+
99
+ QueryGraphEdges query_graph;
100
+
101
+ void GetColumnBinding(Expression &expression, ColumnBinding &binding);
102
+
103
+ bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
104
+ bool LeftCardLessThanRight(LogicalOperator &op);
105
+
106
+ void CreateHyperGraphEdges();
107
+
108
+ GenerateJoinRelation GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode &node);
109
+
110
+ unique_ptr<LogicalOperator> RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode &node);
111
+ };
112
+
113
+ } // namespace duckdb
@@ -0,0 +1,73 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/join_order/relation_manager.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/unordered_map.hpp"
12
+ #include "duckdb/common/unordered_set.hpp"
13
+ #include "duckdb/optimizer/join_order/join_relation.hpp"
14
+ #include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
15
+ #include "duckdb/optimizer/join_order/relation_statistics_helper.hpp"
16
+ #include "duckdb/optimizer/join_order/join_node.hpp"
17
+ #include "duckdb/parser/expression_map.hpp"
18
+ #include "duckdb/planner/logical_operator.hpp"
19
+ #include "duckdb/planner/logical_operator_visitor.hpp"
20
+
21
+ namespace duckdb {
22
+
23
+ struct FilterInfo;
24
+
25
+ //! Represents a single relation and any metadata accompanying that relation
26
+ struct SingleJoinRelation {
27
+ LogicalOperator &op;
28
+ optional_ptr<LogicalOperator> parent;
29
+ RelationStats stats;
30
+
31
+ SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent) : op(op), parent(parent) {
32
+ }
33
+ SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, RelationStats stats)
34
+ : op(op), parent(parent), stats(stats) {
35
+ }
36
+ };
37
+
38
+ class RelationManager {
39
+ public:
40
+ explicit RelationManager(ClientContext &context) : context(context) {
41
+ }
42
+
43
+ idx_t NumRelations();
44
+
45
+ bool ExtractJoinRelations(LogicalOperator &input_op, vector<reference<LogicalOperator>> &filter_operators,
46
+ optional_ptr<LogicalOperator> parent = nullptr);
47
+
48
+ //! for each join filter in the logical plan op, extract the relations that are referred to on
49
+ //! both sides of the join filter, along with the tables & indexes.
50
+ vector<unique_ptr<FilterInfo>> ExtractEdges(LogicalOperator &op,
51
+ vector<reference<LogicalOperator>> &filter_operators,
52
+ JoinRelationSetManager &set_manager);
53
+
54
+ //! Extract the set of relations referred to inside an expression
55
+ bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
56
+ void AddRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
57
+
58
+ void AddAggregateRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
59
+ vector<unique_ptr<SingleJoinRelation>> GetRelations();
60
+
61
+ const vector<RelationStats> GetRelationStats();
62
+ //! A mapping of base table index -> index into relations array (relation number)
63
+ unordered_map<idx_t, idx_t> relation_mapping;
64
+
65
+ void PrintRelationStats();
66
+
67
+ private:
68
+ ClientContext &context;
69
+ //! Set of all relations considered in the join optimizer
70
+ vector<unique_ptr<SingleJoinRelation>> relations;
71
+ };
72
+
73
+ } // namespace duckdb
@@ -0,0 +1,73 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/join_order/statistics_extractor.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+ #pragma once
9
+
10
+ #include "duckdb/planner/filter/conjunction_filter.hpp"
11
+ #include "duckdb/planner/logical_operator.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ class CardinalityEstimator;
16
+
17
+ struct DistinctCount {
18
+ idx_t distinct_count;
19
+ bool from_hll;
20
+ };
21
+
22
+ struct ExpressionBinding {
23
+ bool found_expression = false;
24
+ ColumnBinding child_binding;
25
+ bool expression_is_constant = false;
26
+ };
27
+
28
+ struct RelationStats {
29
+ // column_id -> estimated distinct count for column
30
+ vector<DistinctCount> column_distinct_count;
31
+ idx_t cardinality;
32
+ double filter_strength = 1;
33
+ bool stats_initialized = false;
34
+
35
+ // for debug, column names and tables
36
+ vector<string> column_names;
37
+ string table_name;
38
+
39
+ RelationStats() : cardinality(1), filter_strength(1), stats_initialized(false) {
40
+ }
41
+ };
42
+
43
+ class RelationStatisticsHelper {
44
+ public:
45
+ static constexpr double DEFAULT_SELECTIVITY = 0.2;
46
+
47
+ public:
48
+ static idx_t InspectConjunctionAND(idx_t cardinality, idx_t column_index, ConjunctionAndFilter &filter,
49
+ BaseStatistics &base_stats);
50
+ // static idx_t InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &filter,
51
+ // BaseStatistics &base_stats);
52
+ //! Extract Statistics from a LogicalGet.
53
+ static RelationStats ExtractGetStats(LogicalGet &get, ClientContext &context);
54
+ static RelationStats ExtractDelimGetStats(LogicalDelimGet &delim_get, ClientContext &context);
55
+ //! Create the statistics for a projection using the statistics of the operator that sits underneath the
56
+ //! projection. Then also create statistics for any extra columns the projection creates.
57
+ static RelationStats ExtractDummyScanStats(LogicalDummyScan &dummy_scan, ClientContext &context);
58
+ static RelationStats ExtractExpressionGetStats(LogicalExpressionGet &expression_get, ClientContext &context);
59
+ //! All relation extractors for blocking relations
60
+ static RelationStats ExtractProjectionStats(LogicalProjection &proj, RelationStats &child_stats);
61
+ static RelationStats ExtractAggregationStats(LogicalAggregate &aggr, RelationStats &child_stats);
62
+ static RelationStats ExtractWindowStats(LogicalWindow &window, RelationStats &child_stats);
63
+ //! Called after reordering a query plan with potentially 2+ relations.
64
+ static RelationStats CombineStatsOfReorderableOperator(vector<ColumnBinding> &bindings,
65
+ vector<RelationStats> relation_stats);
66
+ //! Called after reordering a query plan with potentially 2+ relations.
67
+ static RelationStats CombineStatsOfNonReorderableOperator(LogicalOperator &op, vector<RelationStats> child_stats);
68
+ static void CopyRelationStats(RelationStats &to, const RelationStats &from);
69
+
70
+ private:
71
+ };
72
+
73
+ } // namespace duckdb
@@ -39,7 +39,7 @@ class TaskScheduler {
39
39
  constexpr static int64_t TASK_TIMEOUT_USECS = 5000;
40
40
 
41
41
  public:
42
- TaskScheduler(DatabaseInstance &db);
42
+ explicit TaskScheduler(DatabaseInstance &db);
43
43
  ~TaskScheduler();
44
44
 
45
45
  DUCKDB_API static TaskScheduler &GetScheduler(ClientContext &context);
@@ -67,6 +67,9 @@ public:
67
67
  //! Send signals to n threads, signalling for them to wake up and attempt to execute a task
68
68
  void Signal(idx_t n);
69
69
 
70
+ //! Yield to other threads
71
+ void YieldThread();
72
+
70
73
  //! Set the allocator flush threshold
71
74
  void SetAllocatorFlushTreshold(idx_t threshold);
72
75
 
@@ -49,8 +49,6 @@ public:
49
49
  idx_t estimated_cardinality;
50
50
  bool has_estimated_cardinality;
51
51
 
52
- unique_ptr<EstimatedProperties> estimated_props;
53
-
54
52
  public:
55
53
  virtual vector<ColumnBinding> GetColumnBindings();
56
54
  static vector<ColumnBinding> GenerateColumnBindings(idx_t table_idx, idx_t column_count);
@@ -24,10 +24,33 @@ public:
24
24
  };
25
25
 
26
26
  struct BlockPointer {
27
- BlockPointer(block_id_t block_id_p, uint32_t offset_p) : block_id(block_id_p), offset(offset_p) {};
28
- BlockPointer() {};
29
- block_id_t block_id {0};
30
- uint32_t offset {0};
27
+ BlockPointer(block_id_t block_id_p, uint32_t offset_p) : block_id(block_id_p), offset(offset_p) {
28
+ }
29
+ BlockPointer() : block_id(INVALID_BLOCK), offset(0) {
30
+ }
31
+
32
+ block_id_t block_id;
33
+ uint32_t offset;
34
+
35
+ bool IsValid() {
36
+ return block_id != INVALID_BLOCK;
37
+ }
38
+ };
39
+
40
+ struct MetaBlockPointer {
41
+ MetaBlockPointer(idx_t block_pointer, uint32_t offset_p) : block_pointer(block_pointer), offset(offset_p) {
42
+ }
43
+ MetaBlockPointer() : block_pointer(DConstants::INVALID_INDEX), offset(0) {
44
+ }
45
+
46
+ idx_t block_pointer;
47
+ uint32_t offset;
48
+
49
+ bool IsValid() {
50
+ return block_pointer != DConstants::INVALID_INDEX;
51
+ }
52
+ block_id_t GetBlockId();
53
+ uint32_t GetBlockIndex();
31
54
  };
32
55
 
33
56
  } // namespace duckdb
@@ -19,13 +19,13 @@ class BlockHandle;
19
19
  class BufferManager;
20
20
  class ClientContext;
21
21
  class DatabaseInstance;
22
+ class MetadataManager;
22
23
 
23
24
  //! BlockManager is an abstract representation to manage blocks on DuckDB. When writing or reading blocks, the
24
25
  //! BlockManager creates and accesses blocks. The concrete types implements how blocks are stored.
25
26
  class BlockManager {
26
27
  public:
27
- explicit BlockManager(BufferManager &buffer_manager) : buffer_manager(buffer_manager) {
28
- }
28
+ explicit BlockManager(BufferManager &buffer_manager);
29
29
  virtual ~BlockManager() = default;
30
30
 
31
31
  //! The buffer manager
@@ -38,7 +38,7 @@ public:
38
38
  //! Return the next free block id
39
39
  virtual block_id_t GetFreeBlockId() = 0;
40
40
  //! Returns whether or not a specified block is the root block
41
- virtual bool IsRootBlock(block_id_t root) = 0;
41
+ virtual bool IsRootBlock(MetaBlockPointer root) = 0;
42
42
  //! Mark a block as "free"; free blocks are immediately added to the free list and can be immediately overwritten
43
43
  virtual void MarkBlockAsFree(block_id_t block_id) = 0;
44
44
  //! Mark a block as "modified"; modified blocks are added to the free list after a checkpoint (i.e. their data is
@@ -48,7 +48,7 @@ public:
48
48
  //! called.
49
49
  virtual void IncreaseBlockReferenceCount(block_id_t block_id) = 0;
50
50
  //! Get the first meta block id
51
- virtual block_id_t GetMetaBlock() = 0;
51
+ virtual idx_t GetMetaBlock() = 0;
52
52
  //! Read the content of the block from disk
53
53
  virtual void Read(Block &block) = 0;
54
54
  //! Writes the block to disk
@@ -69,20 +69,20 @@ public:
69
69
  virtual void Truncate();
70
70
 
71
71
  //! Register a block with the given block id in the base file
72
- shared_ptr<BlockHandle> RegisterBlock(block_id_t block_id, bool is_meta_block = false);
73
- //! Clear cached handles for meta blocks
74
- void ClearMetaBlockHandles();
72
+ shared_ptr<BlockHandle> RegisterBlock(block_id_t block_id);
75
73
  //! Convert an existing in-memory buffer into a persistent disk-backed block
76
74
  shared_ptr<BlockHandle> ConvertToPersistent(block_id_t block_id, shared_ptr<BlockHandle> old_block);
77
75
 
78
76
  void UnregisterBlock(block_id_t block_id, bool can_destroy);
79
77
 
78
+ MetadataManager &GetMetadataManager();
79
+
80
80
  private:
81
81
  //! The lock for the set of blocks
82
82
  mutex blocks_lock;
83
83
  //! A mapping of block id -> BlockHandle
84
84
  unordered_map<block_id_t, weak_ptr<BlockHandle>> blocks;
85
- //! A map to cache the BlockHandles of meta blocks
86
- unordered_map<block_id_t, shared_ptr<BlockHandle>> meta_blocks;
85
+ //! The metadata manager
86
+ unique_ptr<MetadataManager> metadata_manager;
87
87
  };
88
88
  } // namespace duckdb
@@ -32,7 +32,7 @@ public:
32
32
 
33
33
  virtual void WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state) = 0;
34
34
 
35
- virtual MetaBlockWriter &GetPayloadWriter() = 0;
35
+ virtual MetadataWriter &GetPayloadWriter() = 0;
36
36
 
37
37
  void RegisterPartialBlock(PartialBlockAllocation &&allocation);
38
38
  PartialBlockAllocation GetBlockAllocation(uint32_t segment_size);
@@ -50,18 +50,18 @@ protected:
50
50
  class SingleFileRowGroupWriter : public RowGroupWriter {
51
51
  public:
52
52
  SingleFileRowGroupWriter(TableCatalogEntry &table, PartialBlockManager &partial_block_manager,
53
- MetaBlockWriter &table_data_writer)
53
+ MetadataWriter &table_data_writer)
54
54
  : RowGroupWriter(table, partial_block_manager), table_data_writer(table_data_writer) {
55
55
  }
56
56
 
57
- //! MetaBlockWriter is a cursor on a given BlockManager. This returns the
57
+ //! MetadataWriter is a cursor on a given BlockManager. This returns the
58
58
  //! cursor against which we should write payload data for the specified RowGroup.
59
- MetaBlockWriter &table_data_writer;
59
+ MetadataWriter &table_data_writer;
60
60
 
61
61
  public:
62
62
  virtual void WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state) override;
63
63
 
64
- virtual MetaBlockWriter &GetPayloadWriter() override;
64
+ virtual MetadataWriter &GetPayloadWriter() override;
65
65
  };
66
66
 
67
67
  } // namespace duckdb
@@ -16,12 +16,12 @@ struct BoundCreateTableInfo;
16
16
  //! The table data reader is responsible for reading the data of a table from the block manager
17
17
  class TableDataReader {
18
18
  public:
19
- TableDataReader(MetaBlockReader &reader, BoundCreateTableInfo &info);
19
+ TableDataReader(MetadataReader &reader, BoundCreateTableInfo &info);
20
20
 
21
21
  void ReadTableData();
22
22
 
23
23
  private:
24
- MetaBlockReader &reader;
24
+ MetadataReader &reader;
25
25
  BoundCreateTableInfo &info;
26
26
  };
27
27
 
@@ -45,7 +45,7 @@ protected:
45
45
  class SingleFileTableDataWriter : public TableDataWriter {
46
46
  public:
47
47
  SingleFileTableDataWriter(SingleFileCheckpointWriter &checkpoint_manager, TableCatalogEntry &table,
48
- MetaBlockWriter &table_data_writer, MetaBlockWriter &meta_data_writer);
48
+ MetadataWriter &table_data_writer, MetadataWriter &meta_data_writer);
49
49
 
50
50
  public:
51
51
  virtual void FinalizeTable(TableStatistics &&global_stats, DataTableInfo *info) override;
@@ -54,9 +54,9 @@ public:
54
54
  private:
55
55
  SingleFileCheckpointWriter &checkpoint_manager;
56
56
  // Writes the actual table data
57
- MetaBlockWriter &table_data_writer;
57
+ MetadataWriter &table_data_writer;
58
58
  // Writes the metadata of the table
59
- MetaBlockWriter &meta_data_writer;
59
+ MetadataWriter &meta_data_writer;
60
60
  };
61
61
 
62
62
  } // namespace duckdb
@@ -16,7 +16,7 @@ namespace duckdb {
16
16
  class DatabaseInstance;
17
17
  class ClientContext;
18
18
  class ColumnSegment;
19
- class MetaBlockReader;
19
+ class MetadataReader;
20
20
  class SchemaCatalogEntry;
21
21
  class SequenceCatalogEntry;
22
22
  class TableCatalogEntry;
@@ -33,7 +33,8 @@ public:
33
33
  //! The database
34
34
  AttachedDatabase &db;
35
35
 
36
- virtual MetaBlockWriter &GetMetaBlockWriter() = 0;
36
+ virtual MetadataManager &GetMetadataManager() = 0;
37
+ virtual MetadataWriter &GetMetadataWriter() = 0;
37
38
  virtual unique_ptr<TableDataWriter> GetTableDataWriter(TableCatalogEntry &table) = 0;
38
39
 
39
40
  protected:
@@ -58,17 +59,17 @@ protected:
58
59
  Catalog &catalog;
59
60
 
60
61
  protected:
61
- virtual void LoadCheckpoint(ClientContext &context, MetaBlockReader &reader);
62
- virtual void ReadSchema(ClientContext &context, MetaBlockReader &reader);
63
- virtual void ReadTable(ClientContext &context, MetaBlockReader &reader);
64
- virtual void ReadView(ClientContext &context, MetaBlockReader &reader);
65
- virtual void ReadSequence(ClientContext &context, MetaBlockReader &reader);
66
- virtual void ReadMacro(ClientContext &context, MetaBlockReader &reader);
67
- virtual void ReadTableMacro(ClientContext &context, MetaBlockReader &reader);
68
- virtual void ReadIndex(ClientContext &context, MetaBlockReader &reader);
69
- virtual void ReadType(ClientContext &context, MetaBlockReader &reader);
70
-
71
- virtual void ReadTableData(ClientContext &context, MetaBlockReader &reader, BoundCreateTableInfo &bound_info);
62
+ virtual void LoadCheckpoint(ClientContext &context, MetadataReader &reader);
63
+ virtual void ReadSchema(ClientContext &context, MetadataReader &reader);
64
+ virtual void ReadTable(ClientContext &context, MetadataReader &reader);
65
+ virtual void ReadView(ClientContext &context, MetadataReader &reader);
66
+ virtual void ReadSequence(ClientContext &context, MetadataReader &reader);
67
+ virtual void ReadMacro(ClientContext &context, MetadataReader &reader);
68
+ virtual void ReadTableMacro(ClientContext &context, MetadataReader &reader);
69
+ virtual void ReadIndex(ClientContext &context, MetadataReader &reader);
70
+ virtual void ReadType(ClientContext &context, MetadataReader &reader);
71
+
72
+ virtual void ReadTableData(ClientContext &context, MetadataReader &reader, BoundCreateTableInfo &bound_info);
72
73
  };
73
74
 
74
75
  class SingleFileCheckpointReader final : public CheckpointReader {
@@ -78,6 +79,7 @@ public:
78
79
  }
79
80
 
80
81
  void LoadFromStorage();
82
+ MetadataManager &GetMetadataManager();
81
83
 
82
84
  //! The database
83
85
  SingleFileStorageManager &storage;
@@ -98,16 +100,17 @@ public:
98
100
  //! connection is available because right now the checkpointing cannot be done online. (TODO)
99
101
  void CreateCheckpoint();
100
102
 
101
- virtual MetaBlockWriter &GetMetaBlockWriter() override;
103
+ virtual MetadataWriter &GetMetadataWriter() override;
104
+ virtual MetadataManager &GetMetadataManager() override;
102
105
  virtual unique_ptr<TableDataWriter> GetTableDataWriter(TableCatalogEntry &table) override;
103
106
 
104
107
  BlockManager &GetBlockManager();
105
108
 
106
109
  private:
107
110
  //! The metadata writer is responsible for writing schema information
108
- unique_ptr<MetaBlockWriter> metadata_writer;
111
+ unique_ptr<MetadataWriter> metadata_writer;
109
112
  //! The table data writer is responsible for writing the DataPointers used by the table chunks
110
- unique_ptr<MetaBlockWriter> table_metadata_writer;
113
+ unique_ptr<MetadataWriter> table_metadata_writer;
111
114
  //! Because this is single-file storage, we can share partial blocks across
112
115
  //! an entire checkpoint.
113
116
  PartialBlockManager partial_block_manager;
@@ -33,7 +33,7 @@ struct RowGroupPointer {
33
33
  uint64_t row_start;
34
34
  uint64_t tuple_count;
35
35
  //! The data pointers of the column segments stored in the row group
36
- vector<BlockPointer> data_pointers;
36
+ vector<MetaBlockPointer> data_pointers;
37
37
  //! The versions information of the row group (if any)
38
38
  shared_ptr<VersionNode> versions;
39
39
  };
@@ -29,7 +29,7 @@ public:
29
29
  block_id_t GetFreeBlockId() override {
30
30
  throw InternalException("Cannot perform IO in in-memory database - GetFreeBlockId!");
31
31
  }
32
- bool IsRootBlock(block_id_t root) override {
32
+ bool IsRootBlock(MetaBlockPointer root) override {
33
33
  throw InternalException("Cannot perform IO in in-memory database - IsRootBlock!");
34
34
  }
35
35
  void MarkBlockAsFree(block_id_t block_id) override {
@@ -41,7 +41,7 @@ public:
41
41
  void IncreaseBlockReferenceCount(block_id_t block_id) override {
42
42
  throw InternalException("Cannot perform IO in in-memory database - IncreaseBlockReferenceCount!");
43
43
  }
44
- block_id_t GetMetaBlock() override {
44
+ idx_t GetMetaBlock() override {
45
45
  throw InternalException("Cannot perform IO in in-memory database - GetMetaBlock!");
46
46
  }
47
47
  void Read(Block &block) override {