duckdb 0.7.2-dev1901.0 → 0.7.2-dev2233.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  4. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +1 -1
  5. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -2
  6. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +1 -1
  7. package/src/duckdb/src/common/arrow/arrow_appender.cpp +69 -44
  8. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  9. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +20 -2
  10. package/src/duckdb/src/common/box_renderer.cpp +4 -2
  11. package/src/duckdb/src/common/constants.cpp +10 -1
  12. package/src/duckdb/src/common/filename_pattern.cpp +41 -0
  13. package/src/duckdb/src/common/hive_partitioning.cpp +144 -15
  14. package/src/duckdb/src/common/radix_partitioning.cpp +101 -369
  15. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +8 -9
  16. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  17. package/src/duckdb/src/common/row_operations/row_gather.cpp +5 -3
  18. package/src/duckdb/src/common/row_operations/row_match.cpp +117 -22
  19. package/src/duckdb/src/common/row_operations/row_scatter.cpp +2 -2
  20. package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
  21. package/src/duckdb/src/common/sort/sort_state.cpp +2 -1
  22. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  23. package/src/duckdb/src/common/types/{column_data_allocator.cpp → column/column_data_allocator.cpp} +2 -2
  24. package/src/duckdb/src/common/types/{column_data_collection.cpp → column/column_data_collection.cpp} +29 -6
  25. package/src/duckdb/src/common/types/{column_data_collection_segment.cpp → column/column_data_collection_segment.cpp} +2 -1
  26. package/src/duckdb/src/common/types/{column_data_consumer.cpp → column/column_data_consumer.cpp} +1 -1
  27. package/src/duckdb/src/common/types/{partitioned_column_data.cpp → column/partitioned_column_data.cpp} +11 -9
  28. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +316 -0
  29. package/src/duckdb/src/common/types/{row_data_collection.cpp → row/row_data_collection.cpp} +1 -1
  30. package/src/duckdb/src/common/types/{row_data_collection_scanner.cpp → row/row_data_collection_scanner.cpp} +2 -2
  31. package/src/duckdb/src/common/types/{row_layout.cpp → row/row_layout.cpp} +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +465 -0
  33. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +511 -0
  34. package/src/duckdb/src/common/types/row/tuple_data_iterator.cpp +96 -0
  35. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +119 -0
  36. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +1200 -0
  37. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +170 -0
  38. package/src/duckdb/src/common/types/vector.cpp +1 -1
  39. package/src/duckdb/src/execution/aggregate_hashtable.cpp +252 -290
  40. package/src/duckdb/src/execution/join_hashtable.cpp +192 -328
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +4 -4
  42. package/src/duckdb/src/execution/operator/helper/physical_execute.cpp +3 -3
  43. package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +2 -3
  44. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +2 -3
  45. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +36 -21
  46. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  47. package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +1 -1
  48. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +2 -2
  49. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +166 -144
  50. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +5 -5
  51. package/src/duckdb/src/execution/operator/join/physical_join.cpp +2 -10
  52. package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +0 -1
  53. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +2 -2
  54. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
  55. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +71 -22
  56. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +17 -13
  57. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -7
  58. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +124 -29
  59. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +13 -11
  60. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +3 -2
  61. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +25 -24
  62. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  63. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -3
  64. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  65. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  66. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +3 -3
  67. package/src/duckdb/src/execution/partitionable_hashtable.cpp +9 -37
  68. package/src/duckdb/src/execution/physical_operator.cpp +1 -1
  69. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +19 -18
  70. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +2 -1
  71. package/src/duckdb/src/execution/physical_plan/plan_execute.cpp +2 -2
  72. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +5 -6
  73. package/src/duckdb/src/execution/physical_plan/plan_expression_get.cpp +2 -2
  74. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +3 -3
  75. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -1
  76. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +39 -17
  77. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -2
  78. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +5 -5
  79. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +2 -2
  80. package/src/duckdb/src/function/table/read_csv.cpp +124 -58
  81. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  82. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  85. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -0
  86. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +528 -0
  87. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +34 -0
  88. package/src/duckdb/src/include/duckdb/common/helper.hpp +10 -0
  89. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +13 -3
  90. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +8 -0
  91. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +34 -0
  92. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +80 -27
  93. package/src/duckdb/src/include/duckdb/common/reference_map.hpp +38 -0
  94. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +7 -6
  95. package/src/duckdb/src/include/duckdb/common/sort/comparators.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/sort/sort.hpp +1 -1
  98. package/src/duckdb/src/include/duckdb/common/sort/sorted_block.hpp +2 -2
  99. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +1 -1
  100. package/src/duckdb/src/include/duckdb/common/types/{column_data_allocator.hpp → column/column_data_allocator.hpp} +4 -4
  101. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection.hpp → column/column_data_collection.hpp} +4 -4
  102. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_iterators.hpp → column/column_data_collection_iterators.hpp} +2 -2
  103. package/src/duckdb/src/include/duckdb/common/types/{column_data_collection_segment.hpp → column/column_data_collection_segment.hpp} +3 -3
  104. package/src/duckdb/src/include/duckdb/common/types/{column_data_consumer.hpp → column/column_data_consumer.hpp} +8 -4
  105. package/src/duckdb/src/include/duckdb/common/types/{column_data_scan_states.hpp → column/column_data_scan_states.hpp} +1 -1
  106. package/src/duckdb/src/include/duckdb/common/types/{partitioned_column_data.hpp → column/partitioned_column_data.hpp} +15 -7
  107. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +140 -0
  108. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection.hpp → row/row_data_collection.hpp} +1 -1
  109. package/src/duckdb/src/include/duckdb/common/types/{row_data_collection_scanner.hpp → row/row_data_collection_scanner.hpp} +2 -2
  110. package/src/duckdb/src/include/duckdb/common/types/{row_layout.hpp → row/row_layout.hpp} +3 -1
  111. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +116 -0
  112. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +239 -0
  113. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_iterator.hpp +64 -0
  114. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +113 -0
  115. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +124 -0
  116. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +74 -0
  117. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -12
  119. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +34 -31
  120. package/src/duckdb/src/include/duckdb/execution/base_aggregate_hashtable.hpp +2 -2
  121. package/src/duckdb/src/include/duckdb/execution/execution_context.hpp +3 -2
  122. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +41 -67
  124. package/src/duckdb/src/include/duckdb/execution/nested_loop_join.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_execute.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +2 -2
  128. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  129. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -1
  130. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +0 -2
  131. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +2 -2
  132. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +4 -1
  134. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +8 -3
  135. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +5 -7
  136. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +5 -1
  137. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -1
  138. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -1
  139. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  140. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +2 -2
  141. package/src/duckdb/src/include/duckdb/function/function.hpp +2 -0
  142. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +25 -0
  143. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -0
  144. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -2
  145. package/src/duckdb/src/include/duckdb/main/materialized_query_result.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/main/query_result.hpp +14 -1
  147. package/src/duckdb/src/include/duckdb/optimizer/expression_rewriter.hpp +3 -3
  148. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +16 -16
  149. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +8 -8
  150. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +23 -15
  151. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +9 -10
  152. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +18 -11
  153. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +1 -1
  154. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +5 -1
  155. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +3 -2
  156. package/src/duckdb/src/include/duckdb/parser/query_error_context.hpp +4 -2
  157. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +9 -35
  158. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -23
  159. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -3
  160. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -1
  161. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +3 -1
  162. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  163. package/src/duckdb/src/main/appender.cpp +6 -6
  164. package/src/duckdb/src/main/client_context.cpp +1 -1
  165. package/src/duckdb/src/main/connection.cpp +2 -2
  166. package/src/duckdb/src/main/query_result.cpp +13 -0
  167. package/src/duckdb/src/main/settings/settings.cpp +3 -4
  168. package/src/duckdb/src/optimizer/expression_rewriter.cpp +4 -4
  169. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +91 -105
  170. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -8
  171. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +163 -160
  172. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +30 -30
  173. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +37 -38
  174. package/src/duckdb/src/parallel/executor.cpp +1 -1
  175. package/src/duckdb/src/parallel/meta_pipeline.cpp +2 -2
  176. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +1 -1
  177. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  178. package/src/duckdb/src/parser/transformer.cpp +50 -9
  179. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +13 -0
  180. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +15 -5
  181. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +19 -17
  182. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -4
  183. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +20 -21
  184. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -22
  185. package/src/duckdb/src/planner/binder/tableref/bind_subqueryref.cpp +2 -2
  186. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +9 -0
  187. package/src/duckdb/src/planner/binder.cpp +16 -19
  188. package/src/duckdb/src/planner/expression_binder.cpp +8 -8
  189. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +3 -3
  190. package/src/duckdb/src/storage/checkpoint_manager.cpp +23 -23
  191. package/src/duckdb/src/storage/standard_buffer_manager.cpp +1 -1
  192. package/src/duckdb/src/storage/table_index_list.cpp +3 -3
  193. package/src/duckdb/src/verification/statement_verifier.cpp +1 -1
  194. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5552 -5598
  195. package/src/duckdb/ub_src_common.cpp +2 -0
  196. package/src/duckdb/ub_src_common_types.cpp +0 -16
  197. package/src/duckdb/ub_src_common_types_column.cpp +10 -0
  198. package/src/duckdb/ub_src_common_types_row.cpp +20 -0
@@ -15,7 +15,7 @@ namespace std {
15
15
  template <>
16
16
  struct hash<duckdb::JoinNode> {
17
17
  inline string operator()(const duckdb::JoinNode &join_node) const {
18
- return join_node.set->ToString();
18
+ return join_node.set.ToString();
19
19
  }
20
20
  };
21
21
  } // namespace std
@@ -93,15 +93,16 @@ static unique_ptr<LogicalOperator> PushFilter(unique_ptr<LogicalOperator> node,
93
93
  return node;
94
94
  }
95
95
 
96
- bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<LogicalOperator *> &filter_operators,
97
- LogicalOperator *parent) {
96
+ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
97
+ vector<reference<LogicalOperator>> &filter_operators,
98
+ optional_ptr<LogicalOperator> parent) {
98
99
  LogicalOperator *op = &input_op;
99
100
  while (op->children.size() == 1 &&
100
101
  (op->type != LogicalOperatorType::LOGICAL_PROJECTION &&
101
102
  op->type != LogicalOperatorType::LOGICAL_EXPRESSION_GET && op->type != LogicalOperatorType::LOGICAL_GET)) {
102
103
  if (op->type == LogicalOperatorType::LOGICAL_FILTER) {
103
104
  // extract join conditions from filter
104
- filter_operators.push_back(op);
105
+ filter_operators.push_back(*op);
105
106
  }
106
107
  if (op->type == LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY ||
107
108
  op->type == LogicalOperatorType::LOGICAL_WINDOW) {
@@ -124,7 +125,7 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
124
125
  auto &join = op->Cast<LogicalComparisonJoin>();
125
126
  if (join.join_type == JoinType::INNER) {
126
127
  // extract join conditions from inner join
127
- filter_operators.push_back(op);
128
+ filter_operators.push_back(*op);
128
129
  } else {
129
130
  // non-inner join, not reorderable yet
130
131
  non_reorderable_operation = true;
@@ -174,7 +175,7 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
174
175
  unordered_set<idx_t> bindings;
175
176
  LogicalJoin::GetTableReferences(*op, bindings);
176
177
  // now create the relation that refers to all these bindings
177
- auto relation = make_uniq<SingleJoinRelation>(&input_op, parent);
178
+ auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
178
179
  auto relation_id = relations.size();
179
180
  // Add binding information from the nonreorderable join to this relation.
180
181
  for (idx_t it : bindings) {
@@ -194,11 +195,10 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
194
195
  bool can_reorder_right = ExtractJoinRelations(*op->children[1], filter_operators, op);
195
196
  return can_reorder_left && can_reorder_right;
196
197
  }
197
-
198
198
  case LogicalOperatorType::LOGICAL_EXPRESSION_GET: {
199
199
  // base table scan, add to set of relations
200
200
  auto &get = op->Cast<LogicalExpressionGet>();
201
- auto relation = make_uniq<SingleJoinRelation>(&input_op, parent);
201
+ auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
202
202
  //! make sure the optimizer has knowledge of the exact column bindings as well.
203
203
  relation_mapping[get.table_index] = relations.size();
204
204
  relations.push_back(std::move(relation));
@@ -207,7 +207,7 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
207
207
  case LogicalOperatorType::LOGICAL_DUMMY_SCAN: {
208
208
  // table function call, add to set of relations
209
209
  auto &dummy_scan = op->Cast<LogicalDummyScan>();
210
- auto relation = make_uniq<SingleJoinRelation>(&input_op, parent);
210
+ auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
211
211
  relation_mapping[dummy_scan.table_index] = relations.size();
212
212
  relations.push_back(std::move(relation));
213
213
  return true;
@@ -215,13 +215,13 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
215
215
  case LogicalOperatorType::LOGICAL_GET:
216
216
  case LogicalOperatorType::LOGICAL_PROJECTION: {
217
217
  auto table_index = op->GetTableIndex()[0];
218
- auto relation = make_uniq<SingleJoinRelation>(&input_op, parent);
218
+ auto relation = make_uniq<SingleJoinRelation>(input_op, parent);
219
219
  auto relation_id = relations.size();
220
220
 
221
221
  // If the children are empty, operator can't ge a logical get.
222
222
  if (op->children.empty() && op->type == LogicalOperatorType::LOGICAL_GET) {
223
223
  auto &get = op->Cast<LogicalGet>();
224
- cardinality_estimator.AddRelationColumnMapping(&get, relation_id);
224
+ cardinality_estimator.AddRelationColumnMapping(get, relation_id);
225
225
  relation_mapping[table_index] = relation_id;
226
226
  relations.push_back(std::move(relation));
227
227
  return true;
@@ -230,7 +230,6 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
230
230
  // we run the join order optimizer within the subquery as well
231
231
  JoinOrderOptimizer optimizer(context);
232
232
  op->children[0] = optimizer.Optimize(std::move(op->children[0]));
233
-
234
233
  // push one child column binding map back.
235
234
  vector<column_binding_map_t<ColumnBinding>> child_binding_maps;
236
235
  child_binding_maps.emplace_back();
@@ -253,26 +252,26 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<
253
252
  }
254
253
 
255
254
  //! Update the exclusion set with all entries in the subgraph
256
- static void UpdateExclusionSet(JoinRelationSet *node, unordered_set<idx_t> &exclusion_set) {
257
- for (idx_t i = 0; i < node->count; i++) {
258
- exclusion_set.insert(node->relations[i]);
255
+ static void UpdateExclusionSet(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) {
256
+ for (idx_t i = 0; i < node.count; i++) {
257
+ exclusion_set.insert(node.relations[i]);
259
258
  }
260
259
  }
261
260
 
262
261
  //! Create a new JoinTree node by joining together two previous JoinTree nodes
263
- unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet *set,
264
- const vector<NeighborInfo *> &possible_connections,
265
- JoinNode *left, JoinNode *right) {
262
+ unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet &set,
263
+ const vector<reference<NeighborInfo>> &possible_connections,
264
+ JoinNode &left, JoinNode &right) {
266
265
  // for the hash join we want the right side (build side) to have the smallest cardinality
267
266
  // also just a heuristic but for now...
268
267
  // FIXME: we should probably actually benchmark that as well
269
268
  // FIXME: should consider different join algorithms, should we pick a join algorithm here as well? (probably)
270
269
  double expected_cardinality;
271
- NeighborInfo *best_connection = nullptr;
272
- auto plan = plans.find(set);
270
+ optional_ptr<NeighborInfo> best_connection;
271
+ auto plan = plans.find(&set);
273
272
  // if we have already calculated an expected cardinality for this set,
274
273
  // just re-use that cardinality
275
- if (left->GetCardinality<double>() < right->GetCardinality<double>()) {
274
+ if (left.GetCardinality<double>() < right.GetCardinality<double>()) {
276
275
  return CreateJoinTree(set, possible_connections, right, left);
277
276
  }
278
277
  if (plan != plans.end()) {
@@ -280,14 +279,14 @@ unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet *set,
280
279
  throw InternalException("No plan: internal error in join order optimizer");
281
280
  }
282
281
  expected_cardinality = plan->second->GetCardinality<double>();
283
- best_connection = possible_connections.back();
282
+ best_connection = &possible_connections.back().get();
284
283
  } else if (possible_connections.empty()) {
285
284
  // cross product
286
285
  expected_cardinality = cardinality_estimator.EstimateCrossProduct(left, right);
287
286
  } else {
288
287
  // normal join, expect foreign key join
289
288
  expected_cardinality = cardinality_estimator.EstimateCardinalityWithSet(set);
290
- best_connection = possible_connections.back();
289
+ best_connection = &possible_connections.back().get();
291
290
  }
292
291
 
293
292
  auto cost = CardinalityEstimator::ComputeCost(left, right, expected_cardinality);
@@ -296,50 +295,51 @@ unique_ptr<JoinNode> JoinOrderOptimizer::CreateJoinTree(JoinRelationSet *set,
296
295
  return result;
297
296
  }
298
297
 
299
- bool JoinOrderOptimizer::NodeInFullPlan(JoinNode *node) {
300
- return join_nodes_in_full_plan.find(node->set->ToString()) != join_nodes_in_full_plan.end();
298
+ bool JoinOrderOptimizer::NodeInFullPlan(JoinNode &node) {
299
+ return join_nodes_in_full_plan.find(node.set.ToString()) != join_nodes_in_full_plan.end();
301
300
  }
302
301
 
303
- void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode *node) {
304
- if (!node) {
305
- return;
306
- }
307
- if (node->set->count == relations.size()) {
302
+ void JoinOrderOptimizer::UpdateJoinNodesInFullPlan(JoinNode &node) {
303
+ if (node.set.count == relations.size()) {
308
304
  join_nodes_in_full_plan.clear();
309
305
  }
310
- if (node->set->count < relations.size()) {
311
- join_nodes_in_full_plan.insert(node->set->ToString());
306
+ if (node.set.count < relations.size()) {
307
+ join_nodes_in_full_plan.insert(node.set.ToString());
308
+ }
309
+ if (node.left) {
310
+ UpdateJoinNodesInFullPlan(*node.left);
311
+ }
312
+ if (node.right) {
313
+ UpdateJoinNodesInFullPlan(*node.right);
312
314
  }
313
- UpdateJoinNodesInFullPlan(node->left);
314
- UpdateJoinNodesInFullPlan(node->right);
315
315
  }
316
316
 
317
- JoinNode *JoinOrderOptimizer::EmitPair(JoinRelationSet *left, JoinRelationSet *right,
318
- const vector<NeighborInfo *> &info) {
317
+ JoinNode &JoinOrderOptimizer::EmitPair(JoinRelationSet &left, JoinRelationSet &right,
318
+ const vector<reference<NeighborInfo>> &info) {
319
319
  // get the left and right join plans
320
- auto &left_plan = plans[left];
321
- auto &right_plan = plans[right];
320
+ auto &left_plan = plans[&left];
321
+ auto &right_plan = plans[&right];
322
322
  if (!left_plan || !right_plan) {
323
323
  throw InternalException("No left or right plan: internal error in join order optimizer");
324
324
  }
325
- auto new_set = set_manager.Union(left, right);
325
+ auto &new_set = set_manager.Union(left, right);
326
326
  // create the join tree based on combining the two plans
327
- auto new_plan = CreateJoinTree(new_set, info, left_plan.get(), right_plan.get());
327
+ auto new_plan = CreateJoinTree(new_set, info, *left_plan, *right_plan);
328
328
  // check if this plan is the optimal plan we found for this set of relations
329
- auto entry = plans.find(new_set);
329
+ auto entry = plans.find(&new_set);
330
330
  if (entry == plans.end() || new_plan->GetCost() < entry->second->GetCost()) {
331
331
  // the plan is the optimal plan, move it into the dynamic programming tree
332
- auto result = new_plan.get();
332
+ auto &result = *new_plan;
333
333
 
334
334
  //! make sure plans are symmetric for cardinality estimation
335
335
  if (entry != plans.end()) {
336
- cardinality_estimator.VerifySymmetry(result, entry->second.get());
336
+ cardinality_estimator.VerifySymmetry(result, *entry->second);
337
337
  }
338
338
  if (full_plan_found &&
339
- join_nodes_in_full_plan.find(new_plan->set->ToString()) != join_nodes_in_full_plan.end()) {
339
+ join_nodes_in_full_plan.find(new_plan->set.ToString()) != join_nodes_in_full_plan.end()) {
340
340
  must_update_full_plan = true;
341
341
  }
342
- if (new_set->count == relations.size()) {
342
+ if (new_set.count == relations.size()) {
343
343
  full_plan_found = true;
344
344
  // If we find a full plan, we need to keep track of which nodes are in the full plan.
345
345
  // It's possible the DP algorithm updates one of these nodes, then goes on to solve
@@ -355,14 +355,14 @@ JoinNode *JoinOrderOptimizer::EmitPair(JoinRelationSet *left, JoinRelationSet *r
355
355
  }
356
356
 
357
357
  D_ASSERT(new_plan);
358
- plans[new_set] = std::move(new_plan);
358
+ plans[&new_set] = std::move(new_plan);
359
359
  return result;
360
360
  }
361
- return entry->second.get();
361
+ return *entry->second;
362
362
  }
363
363
 
364
- bool JoinOrderOptimizer::TryEmitPair(JoinRelationSet *left, JoinRelationSet *right,
365
- const vector<NeighborInfo *> &info) {
364
+ bool JoinOrderOptimizer::TryEmitPair(JoinRelationSet &left, JoinRelationSet &right,
365
+ const vector<reference<NeighborInfo>> &info) {
366
366
  pairs++;
367
367
  // If a full plan is created, it's possible a node in the plan gets updated. When this happens, make sure you keep
368
368
  // emitting pairs until you emit another final plan. Another final plan is guaranteed to be produced because of
@@ -377,13 +377,13 @@ bool JoinOrderOptimizer::TryEmitPair(JoinRelationSet *left, JoinRelationSet *rig
377
377
  return true;
378
378
  }
379
379
 
380
- bool JoinOrderOptimizer::EmitCSG(JoinRelationSet *node) {
381
- if (node->count == relations.size()) {
380
+ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet &node) {
381
+ if (node.count == relations.size()) {
382
382
  return true;
383
383
  }
384
384
  // create the exclusion set as everything inside the subgraph AND anything with members BELOW it
385
385
  unordered_set<idx_t> exclusion_set;
386
- for (idx_t i = 0; i < node->relations[0]; i++) {
386
+ for (idx_t i = 0; i < node.relations[0]; i++) {
387
387
  exclusion_set.insert(i);
388
388
  }
389
389
  UpdateExclusionSet(node, exclusion_set);
@@ -401,7 +401,7 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet *node) {
401
401
  for (auto neighbor : neighbors) {
402
402
  // since the GetNeighbors only returns the smallest element in a list, the entry might not be connected to
403
403
  // (only!) this neighbor, hence we have to do a connectedness check before we can emit it
404
- auto neighbor_relation = set_manager.GetJoinRelation(neighbor);
404
+ auto &neighbor_relation = set_manager.GetJoinRelation(neighbor);
405
405
  auto connections = query_graph.GetConnections(node, neighbor_relation);
406
406
  if (!connections.empty()) {
407
407
  if (!TryEmitPair(node, neighbor_relation, connections)) {
@@ -415,20 +415,20 @@ bool JoinOrderOptimizer::EmitCSG(JoinRelationSet *node) {
415
415
  return true;
416
416
  }
417
417
 
418
- bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet *left, JoinRelationSet *right,
418
+ bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet &left, JoinRelationSet &right,
419
419
  unordered_set<idx_t> exclusion_set) {
420
420
  // get the neighbors of the second relation under the exclusion set
421
421
  auto neighbors = query_graph.GetNeighbors(right, exclusion_set);
422
422
  if (neighbors.empty()) {
423
423
  return true;
424
424
  }
425
- vector<JoinRelationSet *> union_sets;
426
- union_sets.resize(neighbors.size());
425
+ vector<reference<JoinRelationSet>> union_sets;
426
+ union_sets.reserve(neighbors.size());
427
427
  for (idx_t i = 0; i < neighbors.size(); i++) {
428
- auto neighbor = set_manager.GetJoinRelation(neighbors[i]);
428
+ auto &neighbor = set_manager.GetJoinRelation(neighbors[i]);
429
429
  // emit the combinations of this node and its neighbors
430
- auto combined_set = set_manager.Union(right, neighbor);
431
- if (combined_set->count > right->count && plans.find(combined_set) != plans.end()) {
430
+ auto &combined_set = set_manager.Union(right, neighbor);
431
+ if (combined_set.count > right.count && plans.find(&combined_set) != plans.end()) {
432
432
  auto connections = query_graph.GetConnections(left, combined_set);
433
433
  if (!connections.empty()) {
434
434
  if (!TryEmitPair(left, combined_set, connections)) {
@@ -436,7 +436,7 @@ bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet *left, JoinRelati
436
436
  }
437
437
  }
438
438
  }
439
- union_sets[i] = combined_set;
439
+ union_sets.push_back(combined_set);
440
440
  }
441
441
  // recursively enumerate the sets
442
442
  unordered_set<idx_t> new_exclusion_set = exclusion_set;
@@ -450,24 +450,24 @@ bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet *left, JoinRelati
450
450
  return true;
451
451
  }
452
452
 
453
- bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet *node, unordered_set<idx_t> &exclusion_set) {
453
+ bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet &node, unordered_set<idx_t> &exclusion_set) {
454
454
  // find neighbors of S under the exclusion set
455
455
  auto neighbors = query_graph.GetNeighbors(node, exclusion_set);
456
456
  if (neighbors.empty()) {
457
457
  return true;
458
458
  }
459
- vector<JoinRelationSet *> union_sets;
460
- union_sets.resize(neighbors.size());
459
+ vector<reference<JoinRelationSet>> union_sets;
460
+ union_sets.reserve(neighbors.size());
461
461
  for (idx_t i = 0; i < neighbors.size(); i++) {
462
- auto neighbor = set_manager.GetJoinRelation(neighbors[i]);
462
+ auto &neighbor = set_manager.GetJoinRelation(neighbors[i]);
463
463
  // emit the combinations of this node and its neighbors
464
- auto new_set = set_manager.Union(node, neighbor);
465
- if (new_set->count > node->count && plans.find(new_set) != plans.end()) {
464
+ auto &new_set = set_manager.Union(node, neighbor);
465
+ if (new_set.count > node.count && plans.find(&new_set) != plans.end()) {
466
466
  if (!EmitCSG(new_set)) {
467
467
  return false;
468
468
  }
469
469
  }
470
- union_sets[i] = new_set;
470
+ union_sets.push_back(new_set);
471
471
  }
472
472
  // recursively enumerate the sets
473
473
  unordered_set<idx_t> new_exclusion_set = exclusion_set;
@@ -489,7 +489,7 @@ bool JoinOrderOptimizer::SolveJoinOrderExactly() {
489
489
  // we enumerate over all the possible pairs in the neighborhood
490
490
  for (idx_t i = relations.size(); i > 0; i--) {
491
491
  // for every node in the set, we consider it as the start node once
492
- auto start_node = set_manager.GetJoinRelation(i - 1);
492
+ auto &start_node = set_manager.GetJoinRelation(i - 1);
493
493
  // emit the start node
494
494
  if (!EmitCSG(start_node)) {
495
495
  return false;
@@ -532,8 +532,7 @@ static vector<unordered_set<idx_t>> AddSuperSets(vector<unordered_set<idx_t>> cu
532
532
  // works by first creating all sets with cardinality 1
533
533
  // then iterates over each previously created group of subsets and will only add a neighbor if the neighbor
534
534
  // is greater than all relations in the set.
535
- static vector<unordered_set<idx_t>> GetAllNeighborSets(JoinRelationSet *new_set, unordered_set<idx_t> &exclusion_set,
536
- vector<idx_t> neighbors) {
535
+ static vector<unordered_set<idx_t>> GetAllNeighborSets(unordered_set<idx_t> &exclusion_set, vector<idx_t> neighbors) {
537
536
  vector<unordered_set<idx_t>> ret;
538
537
  sort(neighbors.begin(), neighbors.end());
539
538
  vector<unordered_set<idx_t>> added;
@@ -565,25 +564,25 @@ static vector<unordered_set<idx_t>> GetAllNeighborSets(JoinRelationSet *new_set,
565
564
  return ret;
566
565
  }
567
566
 
568
- void JoinOrderOptimizer::UpdateDPTree(JoinNode *new_plan) {
567
+ void JoinOrderOptimizer::UpdateDPTree(JoinNode &new_plan) {
569
568
  if (!NodeInFullPlan(new_plan)) {
570
569
  // if the new node is not in the full plan, feel free to return
571
570
  // because you won't be updating the full plan.
572
571
  return;
573
572
  }
574
- auto new_set = new_plan->set;
573
+ auto &new_set = new_plan.set;
575
574
  // now update every plan that uses this plan
576
575
  unordered_set<idx_t> exclusion_set;
577
- for (idx_t i = 0; i < new_set->count; i++) {
578
- exclusion_set.insert(new_set->relations[i]);
576
+ for (idx_t i = 0; i < new_set.count; i++) {
577
+ exclusion_set.insert(new_set.relations[i]);
579
578
  }
580
579
  auto neighbors = query_graph.GetNeighbors(new_set, exclusion_set);
581
- auto all_neighbors = GetAllNeighborSets(new_set, exclusion_set, neighbors);
580
+ auto all_neighbors = GetAllNeighborSets(exclusion_set, neighbors);
582
581
  for (auto neighbor : all_neighbors) {
583
- auto neighbor_relation = set_manager.GetJoinRelation(neighbor);
584
- auto combined_set = set_manager.Union(new_set, neighbor_relation);
582
+ auto &neighbor_relation = set_manager.GetJoinRelation(neighbor);
583
+ auto &combined_set = set_manager.Union(new_set, neighbor_relation);
585
584
 
586
- auto combined_set_plan = plans.find(combined_set);
585
+ auto combined_set_plan = plans.find(&combined_set);
587
586
  if (combined_set_plan == plans.end()) {
588
587
  continue;
589
588
  }
@@ -592,14 +591,14 @@ void JoinOrderOptimizer::UpdateDPTree(JoinNode *new_plan) {
592
591
  auto connections = query_graph.GetConnections(new_set, neighbor_relation);
593
592
  // recurse and update up the tree if the combined set produces a plan with a lower cost
594
593
  // only recurse on neighbor relations that have plans.
595
- auto right_plan = plans.find(neighbor_relation);
594
+ auto right_plan = plans.find(&neighbor_relation);
596
595
  if (right_plan == plans.end()) {
597
596
  continue;
598
597
  }
599
- auto updated_plan = EmitPair(new_set, neighbor_relation, connections);
598
+ auto &updated_plan = EmitPair(new_set, neighbor_relation, connections);
600
599
  // <= because the child node has already been replaced. You need to
601
600
  // replace the parent node as well in this case
602
- if (updated_plan->GetCost() < combined_set_plan_cost) {
601
+ if (updated_plan.GetCost() < combined_set_plan_cost) {
603
602
  UpdateDPTree(updated_plan);
604
603
  }
605
604
  }
@@ -609,7 +608,7 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
609
608
  // at this point, we exited the dynamic programming but did not compute the final join order because it took too
610
609
  // long instead, we use a greedy heuristic to obtain a join ordering now we use Greedy Operator Ordering to
611
610
  // construct the result tree first we start out with all the base relations (the to-be-joined relations)
612
- vector<JoinRelationSet *> join_relations; // T in the paper
611
+ vector<reference<JoinRelationSet>> join_relations; // T in the paper
613
612
  for (idx_t i = 0; i < relations.size(); i++) {
614
613
  join_relations.push_back(set_manager.GetJoinRelation(i));
615
614
  }
@@ -618,7 +617,7 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
618
617
  // smallest cost. This is O(r^2) per step, and every step will reduce the total amount of relations to-be-joined
619
618
  // by 1, so the total cost is O(r^3) in the amount of relations
620
619
  idx_t best_left = 0, best_right = 0;
621
- JoinNode *best_connection = nullptr;
620
+ optional_ptr<JoinNode> best_connection;
622
621
  for (idx_t i = 0; i < join_relations.size(); i++) {
623
622
  auto left = join_relations[i];
624
623
  for (idx_t j = i + 1; j < join_relations.size(); j++) {
@@ -627,7 +626,7 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
627
626
  auto connection = query_graph.GetConnections(left, right);
628
627
  if (!connection.empty()) {
629
628
  // we can check the cost of this connection
630
- auto node = EmitPair(left, right, connection);
629
+ auto &node = EmitPair(left, right, connection);
631
630
 
632
631
  // update the DP tree in case a plan created by the DP algorithm uses the node
633
632
  // that was potentially just updated by EmitPair. You will get a use-after-free
@@ -635,9 +634,9 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
635
634
  // if node in FullPath, then updateDP tree.
636
635
  UpdateDPTree(node);
637
636
 
638
- if (!best_connection || node->GetCost() < best_connection->GetCost()) {
637
+ if (!best_connection || node.GetCost() < best_connection->GetCost()) {
639
638
  // best pair found so far
640
- best_connection = node;
639
+ best_connection = &node;
641
640
  best_left = i;
642
641
  best_right = j;
643
642
  }
@@ -647,11 +646,11 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
647
646
  if (!best_connection) {
648
647
  // could not find a connection, but we were not done with finding a completed plan
649
648
  // we have to add a cross product; we add it between the two smallest relations
650
- JoinNode *smallest_plans[2] = {nullptr};
649
+ optional_ptr<JoinNode> smallest_plans[2];
651
650
  idx_t smallest_index[2];
652
651
  for (idx_t i = 0; i < join_relations.size(); i++) {
653
652
  // get the plan for this relation
654
- auto current_plan = plans[join_relations[i]].get();
653
+ auto current_plan = plans[&join_relations[i].get()].get();
655
654
  // check if the cardinality is smaller than the smallest two found so far
656
655
  for (idx_t j = 0; j < 2; j++) {
657
656
  if (!smallest_plans[j] ||
@@ -667,19 +666,19 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
667
666
  }
668
667
  D_ASSERT(smallest_plans[0] && smallest_plans[1]);
669
668
  D_ASSERT(smallest_index[0] != smallest_index[1]);
670
- auto left = smallest_plans[0]->set;
671
- auto right = smallest_plans[1]->set;
669
+ auto &left = smallest_plans[0]->set;
670
+ auto &right = smallest_plans[1]->set;
672
671
  // create a cross product edge (i.e. edge with empty filter) between these two sets in the query graph
673
672
  query_graph.CreateEdge(left, right, nullptr);
674
673
  // now emit the pair and continue with the algorithm
675
674
  auto connections = query_graph.GetConnections(left, right);
676
675
  D_ASSERT(!connections.empty());
677
676
 
678
- best_connection = EmitPair(left, right, connections);
677
+ best_connection = &EmitPair(left, right, connections);
679
678
  best_left = smallest_index[0];
680
679
  best_right = smallest_index[1];
681
680
 
682
- UpdateDPTree(best_connection);
681
+ UpdateDPTree(*best_connection);
683
682
  // the code below assumes best_right > best_left
684
683
  if (best_left > best_right) {
685
684
  std::swap(best_left, best_right);
@@ -709,10 +708,10 @@ void JoinOrderOptimizer::GenerateCrossProducts() {
709
708
  // generate a set of cross products to combine the currently available plans into a full join plan
710
709
  // we create edges between every relation with a high cost
711
710
  for (idx_t i = 0; i < relations.size(); i++) {
712
- auto left = set_manager.GetJoinRelation(i);
711
+ auto &left = set_manager.GetJoinRelation(i);
713
712
  for (idx_t j = 0; j < relations.size(); j++) {
714
713
  if (i != j) {
715
- auto right = set_manager.GetJoinRelation(j);
714
+ auto &right = set_manager.GetJoinRelation(j);
716
715
  query_graph.CreateEdge(left, right, nullptr);
717
716
  query_graph.CreateEdge(right, left, nullptr);
718
717
  }
@@ -723,7 +722,7 @@ void JoinOrderOptimizer::GenerateCrossProducts() {
723
722
  static unique_ptr<LogicalOperator> ExtractJoinRelation(SingleJoinRelation &rel) {
724
723
  auto &children = rel.parent->children;
725
724
  for (idx_t i = 0; i < children.size(); i++) {
726
- if (children[i].get() == rel.op) {
725
+ if (children[i].get() == &rel.op) {
727
726
  // found it! take ownership of it from the parent
728
727
  auto result = std::move(children[i]);
729
728
  children.erase(children.begin() + i);
@@ -733,39 +732,41 @@ static unique_ptr<LogicalOperator> ExtractJoinRelation(SingleJoinRelation &rel)
733
732
  throw Exception("Could not find relation in parent node (?)");
734
733
  }
735
734
 
736
- pair<JoinRelationSet *, unique_ptr<LogicalOperator>>
737
- JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode *node) {
738
- JoinRelationSet *left_node = nullptr, *right_node = nullptr;
739
- JoinRelationSet *result_relation;
735
+ GenerateJoinRelation JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations,
736
+ JoinNode &node) {
737
+ optional_ptr<JoinRelationSet> left_node;
738
+ optional_ptr<JoinRelationSet> right_node;
739
+ optional_ptr<JoinRelationSet> result_relation;
740
740
  unique_ptr<LogicalOperator> result_operator;
741
- if (node->left && node->right) {
741
+ if (node.left && node.right && node.info) {
742
742
  // generate the left and right children
743
- auto left = GenerateJoins(extracted_relations, node->left);
744
- auto right = GenerateJoins(extracted_relations, node->right);
743
+ auto left = GenerateJoins(extracted_relations, *node.left);
744
+ auto right = GenerateJoins(extracted_relations, *node.right);
745
745
 
746
- if (node->info->filters.empty()) {
746
+ if (node.info->filters.empty()) {
747
747
  // no filters, create a cross product
748
- result_operator = LogicalCrossProduct::Create(std::move(left.second), std::move(right.second));
748
+ result_operator = LogicalCrossProduct::Create(std::move(left.op), std::move(right.op));
749
749
  } else {
750
750
  // we have filters, create a join node
751
751
  auto join = make_uniq<LogicalComparisonJoin>(JoinType::INNER);
752
- join->children.push_back(std::move(left.second));
753
- join->children.push_back(std::move(right.second));
752
+ join->children.push_back(std::move(left.op));
753
+ join->children.push_back(std::move(right.op));
754
754
  // set the join conditions from the join node
755
- for (auto &f : node->info->filters) {
755
+ for (auto &filter_ref : node.info->filters) {
756
+ auto &f = filter_ref.get();
756
757
  // extract the filter from the operator it originally belonged to
757
- D_ASSERT(filters[f->filter_index]);
758
- auto condition = std::move(filters[f->filter_index]);
758
+ D_ASSERT(filters[f.filter_index]);
759
+ auto condition = std::move(filters[f.filter_index]);
759
760
  // now create the actual join condition
760
- D_ASSERT((JoinRelationSet::IsSubset(left.first, f->left_set) &&
761
- JoinRelationSet::IsSubset(right.first, f->right_set)) ||
762
- (JoinRelationSet::IsSubset(left.first, f->right_set) &&
763
- JoinRelationSet::IsSubset(right.first, f->left_set)));
761
+ D_ASSERT((JoinRelationSet::IsSubset(left.set, *f.left_set) &&
762
+ JoinRelationSet::IsSubset(right.set, *f.right_set)) ||
763
+ (JoinRelationSet::IsSubset(left.set, *f.right_set) &&
764
+ JoinRelationSet::IsSubset(right.set, *f.left_set)));
764
765
  JoinCondition cond;
765
766
  D_ASSERT(condition->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
766
767
  auto &comparison = condition->Cast<BoundComparisonExpression>();
767
768
  // we need to figure out which side is which by looking at the relations available to us
768
- bool invert = !JoinRelationSet::IsSubset(left.first, f->left_set);
769
+ bool invert = !JoinRelationSet::IsSubset(left.set, *f.left_set);
769
770
  cond.left = !invert ? std::move(comparison.left) : std::move(comparison.right);
770
771
  cond.right = !invert ? std::move(comparison.right) : std::move(comparison.left);
771
772
  cond.comparison = condition->type;
@@ -779,18 +780,18 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
779
780
  D_ASSERT(!join->conditions.empty());
780
781
  result_operator = std::move(join);
781
782
  }
782
- left_node = left.first;
783
- right_node = right.first;
784
- right_node = right.first;
785
- result_relation = set_manager.Union(left_node, right_node);
783
+ left_node = &left.set;
784
+ right_node = &right.set;
785
+ right_node = &right.set;
786
+ result_relation = &set_manager.Union(*left_node, *right_node);
786
787
  } else {
787
788
  // base node, get the entry from the list of extracted relations
788
- D_ASSERT(node->set->count == 1);
789
- D_ASSERT(extracted_relations[node->set->relations[0]]);
790
- result_relation = node->set;
791
- result_operator = std::move(extracted_relations[node->set->relations[0]]);
789
+ D_ASSERT(node.set.count == 1);
790
+ D_ASSERT(extracted_relations[node.set.relations[0]]);
791
+ result_relation = &node.set;
792
+ result_operator = std::move(extracted_relations[node.set.relations[0]]);
792
793
  }
793
- result_operator->estimated_props = node->estimated_props->Copy();
794
+ result_operator->estimated_props = node.estimated_props->Copy();
794
795
  result_operator->estimated_cardinality = result_operator->estimated_props->GetCardinality<idx_t>();
795
796
  result_operator->has_estimated_cardinality = true;
796
797
  if (result_operator->type == LogicalOperatorType::LOGICAL_FILTER &&
@@ -808,16 +809,16 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
808
809
  // hence we should push it here
809
810
  for (auto &filter_info : filter_infos) {
810
811
  // check if the filter has already been extracted
811
- auto info = filter_info.get();
812
- if (filters[info->filter_index]) {
812
+ auto &info = *filter_info;
813
+ if (filters[info.filter_index]) {
813
814
  // now check if the filter is a subset of the current relation
814
815
  // note that infos with an empty relation set are a special case and we do not push them down
815
- if (info->set->count > 0 && JoinRelationSet::IsSubset(result_relation, info->set)) {
816
- auto filter = std::move(filters[info->filter_index]);
816
+ if (info.set.count > 0 && JoinRelationSet::IsSubset(*result_relation, info.set)) {
817
+ auto filter = std::move(filters[info.filter_index]);
817
818
  // if it is, we can push the filter
818
819
  // we can push it either into a join or as a filter
819
820
  // check if we are in a join or in a base table
820
- if (!left_node || !info->left_set) {
821
+ if (!left_node || !info.left_set) {
821
822
  // base table or non-comparison expression, push it as a filter
822
823
  result_operator = PushFilter(std::move(result_operator), std::move(filter));
823
824
  continue;
@@ -826,11 +827,11 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
826
827
  // check if the nodes can be split up into left/right
827
828
  bool found_subset = false;
828
829
  bool invert = false;
829
- if (JoinRelationSet::IsSubset(left_node, info->left_set) &&
830
- JoinRelationSet::IsSubset(right_node, info->right_set)) {
830
+ if (JoinRelationSet::IsSubset(*left_node, *info.left_set) &&
831
+ JoinRelationSet::IsSubset(*right_node, *info.right_set)) {
831
832
  found_subset = true;
832
- } else if (JoinRelationSet::IsSubset(right_node, info->left_set) &&
833
- JoinRelationSet::IsSubset(left_node, info->right_set)) {
833
+ } else if (JoinRelationSet::IsSubset(*right_node, *info.left_set) &&
834
+ JoinRelationSet::IsSubset(*left_node, *info.right_set)) {
834
835
  invert = true;
835
836
  found_subset = true;
836
837
  }
@@ -877,10 +878,10 @@ JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted
877
878
  }
878
879
  }
879
880
  }
880
- return make_pair(result_relation, std::move(result_operator));
881
+ return GenerateJoinRelation(*result_relation, std::move(result_operator));
881
882
  }
882
883
 
883
- unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode *node) {
884
+ unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode &node) {
884
885
  // now we have to rewrite the plan
885
886
  bool root_is_join = plan->children.size() > 1;
886
887
 
@@ -898,14 +899,14 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOp
898
899
  // check if the filter has already been extracted
899
900
  if (filter) {
900
901
  // if not we need to push it
901
- join_tree.second = PushFilter(std::move(join_tree.second), std::move(filter));
902
+ join_tree.op = PushFilter(std::move(join_tree.op), std::move(filter));
902
903
  }
903
904
  }
904
905
 
905
906
  // find the first join in the relation to know where to place this node
906
907
  if (root_is_join) {
907
908
  // first node is the join, return it immediately
908
- return std::move(join_tree.second);
909
+ return std::move(join_tree.op);
909
910
  }
910
911
  D_ASSERT(plan->children.size() == 1);
911
912
  // have to move up through the relations
@@ -919,7 +920,7 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOp
919
920
  op = op->children[0].get();
920
921
  }
921
922
  // have to replace at this node
922
- parent->children[0] = std::move(join_tree.second);
923
+ parent->children[0] = std::move(join_tree.op);
923
924
  return plan;
924
925
  }
925
926
 
@@ -935,7 +936,7 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
935
936
  // group by and this filter cannot be reordered
936
937
  // extract a list of all relations that have to be joined together
937
938
  // and a list of all conditions that is applied to them
938
- vector<LogicalOperator *> filter_operators;
939
+ vector<reference<LogicalOperator>> filter_operators;
939
940
  if (!ExtractJoinRelations(*op, filter_operators)) {
940
941
  // do not support reordering this type of plan
941
942
  return plan;
@@ -947,10 +948,11 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
947
948
  // now that we know we are going to perform join ordering we actually extract the filters, eliminating duplicate
948
949
  // filters in the process
949
950
  expression_set_t filter_set;
950
- for (auto &f_op : filter_operators) {
951
- if (f_op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
952
- f_op->type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
953
- auto &join = f_op->Cast<LogicalComparisonJoin>();
951
+ for (auto &filter_op : filter_operators) {
952
+ auto &f_op = filter_op.get();
953
+ if (f_op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
954
+ f_op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
955
+ auto &join = f_op.Cast<LogicalComparisonJoin>();
954
956
  D_ASSERT(join.join_type == JoinType::INNER);
955
957
  D_ASSERT(join.expressions.empty());
956
958
  for (auto &cond : join.conditions) {
@@ -963,26 +965,27 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
963
965
  }
964
966
  join.conditions.clear();
965
967
  } else {
966
- for (auto &expression : f_op->expressions) {
968
+ for (auto &expression : f_op.expressions) {
967
969
  if (filter_set.find(*expression) == filter_set.end()) {
968
970
  filter_set.insert(*expression);
969
971
  filters.push_back(std::move(expression));
970
972
  }
971
973
  }
972
- f_op->expressions.clear();
974
+ f_op.expressions.clear();
973
975
  }
974
976
  }
975
977
  // create potential edges from the comparisons
976
978
  for (idx_t i = 0; i < filters.size(); i++) {
977
979
  auto &filter = filters[i];
978
- auto info = make_uniq<FilterInfo>();
979
- auto filter_info = info.get();
980
- filter_infos.push_back(std::move(info));
981
980
  // first extract the relation set for the entire filter
982
981
  unordered_set<idx_t> bindings;
983
982
  ExtractBindings(*filter, bindings);
984
- filter_info->set = set_manager.GetJoinRelation(bindings);
985
- filter_info->filter_index = i;
983
+ auto &set = set_manager.GetJoinRelation(bindings);
984
+
985
+ auto info = make_uniq<FilterInfo>(set, i);
986
+ auto filter_info = info.get();
987
+ filter_infos.push_back(std::move(info));
988
+
986
989
  // now check if it can be used as a join predicate
987
990
  if (filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON) {
988
991
  auto &comparison = filter->Cast<BoundComparisonExpression>();
@@ -995,15 +998,15 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
995
998
  if (!left_bindings.empty() && !right_bindings.empty()) {
996
999
  // both the left and the right side have bindings
997
1000
  // first create the relation sets, if they do not exist
998
- filter_info->left_set = set_manager.GetJoinRelation(left_bindings);
999
- filter_info->right_set = set_manager.GetJoinRelation(right_bindings);
1001
+ filter_info->left_set = &set_manager.GetJoinRelation(left_bindings);
1002
+ filter_info->right_set = &set_manager.GetJoinRelation(right_bindings);
1000
1003
  // we can only create a meaningful edge if the sets are not exactly the same
1001
1004
  if (filter_info->left_set != filter_info->right_set) {
1002
1005
  // check if the sets are disjoint
1003
1006
  if (Disjoint(left_bindings, right_bindings)) {
1004
1007
  // they are disjoint, we only need to create one set of edges in the join graph
1005
- query_graph.CreateEdge(filter_info->left_set, filter_info->right_set, filter_info);
1006
- query_graph.CreateEdge(filter_info->right_set, filter_info->left_set, filter_info);
1008
+ query_graph.CreateEdge(*filter_info->left_set, *filter_info->right_set, filter_info);
1009
+ query_graph.CreateEdge(*filter_info->right_set, *filter_info->left_set, filter_info);
1007
1010
  } else {
1008
1011
  continue;
1009
1012
  }
@@ -1019,15 +1022,15 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
1019
1022
  vector<NodeOp> nodes_ops;
1020
1023
  for (idx_t i = 0; i < relations.size(); i++) {
1021
1024
  auto &rel = *relations[i];
1022
- auto node = set_manager.GetJoinRelation(i);
1025
+ auto &node = set_manager.GetJoinRelation(i);
1023
1026
  nodes_ops.emplace_back(make_uniq<JoinNode>(node, 0), rel.op);
1024
1027
  }
1025
1028
 
1026
- cardinality_estimator.InitCardinalityEstimatorProps(&nodes_ops, &filter_infos);
1029
+ cardinality_estimator.InitCardinalityEstimatorProps(nodes_ops, filter_infos);
1027
1030
 
1028
1031
  for (auto &node_op : nodes_ops) {
1029
1032
  D_ASSERT(node_op.node);
1030
- plans[node_op.node->set] = std::move(node_op.node);
1033
+ plans[&node_op.node->set] = std::move(node_op.node);
1031
1034
  }
1032
1035
  // now we perform the actual dynamic programming to compute the final result
1033
1036
  SolveJoinOrder();
@@ -1037,8 +1040,8 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
1037
1040
  for (idx_t i = 0; i < relations.size(); i++) {
1038
1041
  bindings.insert(i);
1039
1042
  }
1040
- auto total_relation = set_manager.GetJoinRelation(bindings);
1041
- auto final_plan = plans.find(total_relation);
1043
+ auto &total_relation = set_manager.GetJoinRelation(bindings);
1044
+ auto final_plan = plans.find(&total_relation);
1042
1045
  if (final_plan == plans.end()) {
1043
1046
  // could not find the final plan
1044
1047
  // this should only happen in case the sets are actually disjunct
@@ -1051,11 +1054,11 @@ unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOpera
1051
1054
  //! solve the join order again
1052
1055
  SolveJoinOrder();
1053
1056
  // now we can obtain the final plan!
1054
- final_plan = plans.find(total_relation);
1057
+ final_plan = plans.find(&total_relation);
1055
1058
  D_ASSERT(final_plan != plans.end());
1056
1059
  }
1057
1060
  // now perform the actual reordering
1058
- return RewritePlan(std::move(plan), final_plan->second.get());
1061
+ return RewritePlan(std::move(plan), *final_plan->second);
1059
1062
  }
1060
1063
 
1061
1064
  } // namespace duckdb