duckdb 0.8.2-dev2700.0 → 0.8.2-dev2809.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
  3. package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
  4. package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
  5. package/src/duckdb/src/common/enum_util.cpp +5 -0
  6. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  7. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
  8. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  9. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  10. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  11. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
  12. package/src/duckdb/src/execution/window_executor.cpp +10 -1
  13. package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
  14. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
  15. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  16. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  17. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
  18. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  19. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
  20. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  21. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  22. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
  23. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
  24. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  25. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
  26. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  27. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  28. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  29. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
  30. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
  31. package/src/duckdb/src/include/duckdb.h +11 -1
  32. package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
  33. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  34. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  35. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  36. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  37. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
  38. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  39. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
  40. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  41. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  42. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  43. package/src/duckdb/src/parallel/executor.cpp +6 -0
  44. package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
  45. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  46. package/src/duckdb/src/planner/operator/logical_get.cpp +4 -0
  47. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
@@ -0,0 +1,73 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/join_order/relation_manager.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/unordered_map.hpp"
12
+ #include "duckdb/common/unordered_set.hpp"
13
+ #include "duckdb/optimizer/join_order/join_relation.hpp"
14
+ #include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
15
+ #include "duckdb/optimizer/join_order/relation_statistics_helper.hpp"
16
+ #include "duckdb/optimizer/join_order/join_node.hpp"
17
+ #include "duckdb/parser/expression_map.hpp"
18
+ #include "duckdb/planner/logical_operator.hpp"
19
+ #include "duckdb/planner/logical_operator_visitor.hpp"
20
+
21
+ namespace duckdb {
22
+
23
+ struct FilterInfo;
24
+
25
+ //! Represents a single relation and any metadata accompanying that relation
26
+ struct SingleJoinRelation {
27
+ LogicalOperator &op;
28
+ optional_ptr<LogicalOperator> parent;
29
+ RelationStats stats;
30
+
31
+ SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent) : op(op), parent(parent) {
32
+ }
33
+ SingleJoinRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, RelationStats stats)
34
+ : op(op), parent(parent), stats(stats) {
35
+ }
36
+ };
37
+
38
+ class RelationManager {
39
+ public:
40
+ explicit RelationManager(ClientContext &context) : context(context) {
41
+ }
42
+
43
+ idx_t NumRelations();
44
+
45
+ bool ExtractJoinRelations(LogicalOperator &input_op, vector<reference<LogicalOperator>> &filter_operators,
46
+ optional_ptr<LogicalOperator> parent = nullptr);
47
+
48
+ //! for each join filter in the logical plan op, extract the relations that are referred to on
49
+ //! both sides of the join filter, along with the tables & indexes.
50
+ vector<unique_ptr<FilterInfo>> ExtractEdges(LogicalOperator &op,
51
+ vector<reference<LogicalOperator>> &filter_operators,
52
+ JoinRelationSetManager &set_manager);
53
+
54
+ //! Extract the set of relations referred to inside an expression
55
+ bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
56
+ void AddRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
57
+
58
+ void AddAggregateRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
59
+ vector<unique_ptr<SingleJoinRelation>> GetRelations();
60
+
61
+ const vector<RelationStats> GetRelationStats();
62
+ //! A mapping of base table index -> index into relations array (relation number)
63
+ unordered_map<idx_t, idx_t> relation_mapping;
64
+
65
+ void PrintRelationStats();
66
+
67
+ private:
68
+ ClientContext &context;
69
+ //! Set of all relations considered in the join optimizer
70
+ vector<unique_ptr<SingleJoinRelation>> relations;
71
+ };
72
+
73
+ } // namespace duckdb
@@ -0,0 +1,73 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/optimizer/join_order/statistics_extractor.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+ #pragma once
9
+
10
+ #include "duckdb/planner/filter/conjunction_filter.hpp"
11
+ #include "duckdb/planner/logical_operator.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ class CardinalityEstimator;
16
+
17
+ struct DistinctCount {
18
+ idx_t distinct_count;
19
+ bool from_hll;
20
+ };
21
+
22
+ struct ExpressionBinding {
23
+ bool found_expression = false;
24
+ ColumnBinding child_binding;
25
+ bool expression_is_constant = false;
26
+ };
27
+
28
+ struct RelationStats {
29
+ // column_id -> estimated distinct count for column
30
+ vector<DistinctCount> column_distinct_count;
31
+ idx_t cardinality;
32
+ double filter_strength = 1;
33
+ bool stats_initialized = false;
34
+
35
+ // for debug, column names and tables
36
+ vector<string> column_names;
37
+ string table_name;
38
+
39
+ RelationStats() : cardinality(1), filter_strength(1), stats_initialized(false) {
40
+ }
41
+ };
42
+
43
+ class RelationStatisticsHelper {
44
+ public:
45
+ static constexpr double DEFAULT_SELECTIVITY = 0.2;
46
+
47
+ public:
48
+ static idx_t InspectConjunctionAND(idx_t cardinality, idx_t column_index, ConjunctionAndFilter &filter,
49
+ BaseStatistics &base_stats);
50
+ // static idx_t InspectConjunctionOR(idx_t cardinality, idx_t column_index, ConjunctionOrFilter &filter,
51
+ // BaseStatistics &base_stats);
52
+ //! Extract Statistics from a LogicalGet.
53
+ static RelationStats ExtractGetStats(LogicalGet &get, ClientContext &context);
54
+ static RelationStats ExtractDelimGetStats(LogicalDelimGet &delim_get, ClientContext &context);
55
+ //! Create the statistics for a projection using the statistics of the operator that sits underneath the
56
+ //! projection. Then also create statistics for any extra columns the projection creates.
57
+ static RelationStats ExtractDummyScanStats(LogicalDummyScan &dummy_scan, ClientContext &context);
58
+ static RelationStats ExtractExpressionGetStats(LogicalExpressionGet &expression_get, ClientContext &context);
59
+ //! All relation extractors for blocking relations
60
+ static RelationStats ExtractProjectionStats(LogicalProjection &proj, RelationStats &child_stats);
61
+ static RelationStats ExtractAggregationStats(LogicalAggregate &aggr, RelationStats &child_stats);
62
+ static RelationStats ExtractWindowStats(LogicalWindow &window, RelationStats &child_stats);
63
+ //! Called after reordering a query plan with potentially 2+ relations.
64
+ static RelationStats CombineStatsOfReorderableOperator(vector<ColumnBinding> &bindings,
65
+ vector<RelationStats> relation_stats);
66
+ //! Called after reordering a query plan with potentially 2+ relations.
67
+ static RelationStats CombineStatsOfNonReorderableOperator(LogicalOperator &op, vector<RelationStats> child_stats);
68
+ static void CopyRelationStats(RelationStats &to, const RelationStats &from);
69
+
70
+ private:
71
+ };
72
+
73
+ } // namespace duckdb
@@ -39,7 +39,7 @@ class TaskScheduler {
39
39
  constexpr static int64_t TASK_TIMEOUT_USECS = 5000;
40
40
 
41
41
  public:
42
- TaskScheduler(DatabaseInstance &db);
42
+ explicit TaskScheduler(DatabaseInstance &db);
43
43
  ~TaskScheduler();
44
44
 
45
45
  DUCKDB_API static TaskScheduler &GetScheduler(ClientContext &context);
@@ -67,6 +67,9 @@ public:
67
67
  //! Send signals to n threads, signalling for them to wake up and attempt to execute a task
68
68
  void Signal(idx_t n);
69
69
 
70
+ //! Yield to other threads
71
+ void YieldThread();
72
+
70
73
  //! Set the allocator flush threshold
71
74
  void SetAllocatorFlushTreshold(idx_t threshold);
72
75
 
@@ -49,8 +49,6 @@ public:
49
49
  idx_t estimated_cardinality;
50
50
  bool has_estimated_cardinality;
51
51
 
52
- unique_ptr<EstimatedProperties> estimated_props;
53
-
54
52
  public:
55
53
  virtual vector<ColumnBinding> GetColumnBindings();
56
54
  static vector<ColumnBinding> GenerateColumnBindings(idx_t table_idx, idx_t column_count);
@@ -308,7 +308,8 @@ typedef enum { DuckDBSuccess = 0, DuckDBError = 1 } duckdb_state;
308
308
  typedef enum {
309
309
  DUCKDB_PENDING_RESULT_READY = 0,
310
310
  DUCKDB_PENDING_RESULT_NOT_READY = 1,
311
- DUCKDB_PENDING_ERROR = 2
311
+ DUCKDB_PENDING_ERROR = 2,
312
+ DUCKDB_PENDING_NO_TASKS_AVAILABLE = 3
312
313
  } duckdb_pending_state;
313
314
 
314
315
  //===--------------------------------------------------------------------===//
@@ -1260,6 +1261,15 @@ Otherwise, all remaining tasks must be executed first.
1260
1261
  */
1261
1262
  DUCKDB_API duckdb_state duckdb_execute_pending(duckdb_pending_result pending_result, duckdb_result *out_result);
1262
1263
 
1264
+ /*!
1265
+ Returns whether a duckdb_pending_state is finished executing. For example if `pending_state` is
1266
+ DUCKDB_PENDING_RESULT_READY, this function will return true.
1267
+
1268
+ * pending_state: The pending state on which to decide whether to finish execution.
1269
+ * returns: Boolean indicating pending execution should be considered finished.
1270
+ */
1271
+ DUCKDB_API bool duckdb_pending_execution_is_finished(duckdb_pending_state pending_state);
1272
+
1263
1273
  //===--------------------------------------------------------------------===//
1264
1274
  // Value Interface
1265
1275
  //===--------------------------------------------------------------------===//
@@ -92,6 +92,8 @@ duckdb_pending_state duckdb_pending_execute_task(duckdb_pending_result pending_r
92
92
  switch (return_value) {
93
93
  case PendingExecutionResult::RESULT_READY:
94
94
  return DUCKDB_PENDING_RESULT_READY;
95
+ case PendingExecutionResult::NO_TASKS_AVAILABLE:
96
+ return DUCKDB_PENDING_NO_TASKS_AVAILABLE;
95
97
  case PendingExecutionResult::RESULT_NOT_READY:
96
98
  return DUCKDB_PENDING_RESULT_NOT_READY;
97
99
  default:
@@ -99,6 +101,21 @@ duckdb_pending_state duckdb_pending_execute_task(duckdb_pending_result pending_r
99
101
  }
100
102
  }
101
103
 
104
+ bool duckdb_pending_execution_is_finished(duckdb_pending_state pending_state) {
105
+ switch (pending_state) {
106
+ case DUCKDB_PENDING_RESULT_READY:
107
+ return PendingQueryResult::IsFinished(PendingExecutionResult::RESULT_READY);
108
+ case DUCKDB_PENDING_NO_TASKS_AVAILABLE:
109
+ return PendingQueryResult::IsFinished(PendingExecutionResult::NO_TASKS_AVAILABLE);
110
+ case DUCKDB_PENDING_RESULT_NOT_READY:
111
+ return PendingQueryResult::IsFinished(PendingExecutionResult::RESULT_NOT_READY);
112
+ case DUCKDB_PENDING_ERROR:
113
+ return PendingQueryResult::IsFinished(PendingExecutionResult::EXECUTION_ERROR);
114
+ default:
115
+ return PendingQueryResult::IsFinished(PendingExecutionResult::EXECUTION_ERROR);
116
+ }
117
+ }
118
+
102
119
  duckdb_state duckdb_execute_pending(duckdb_pending_result pending_result, duckdb_result *out_result) {
103
120
  if (!pending_result || !out_result) {
104
121
  return DuckDBError;
@@ -55,7 +55,8 @@ PendingExecutionResult PendingQueryResult::ExecuteTaskInternal(ClientContextLock
55
55
 
56
56
  unique_ptr<QueryResult> PendingQueryResult::ExecuteInternal(ClientContextLock &lock) {
57
57
  CheckExecutableInternal(lock);
58
- while (ExecuteTaskInternal(lock) == PendingExecutionResult::RESULT_NOT_READY) {
58
+ // Busy wait while execution is not finished
59
+ while (!IsFinished(ExecuteTaskInternal(lock))) {
59
60
  }
60
61
  if (HasError()) {
61
62
  return make_uniq<MaterializedQueryResult>(error);
@@ -74,4 +75,11 @@ void PendingQueryResult::Close() {
74
75
  context.reset();
75
76
  }
76
77
 
78
+ bool PendingQueryResult::IsFinished(PendingExecutionResult result) {
79
+ if (result == PendingExecutionResult::RESULT_READY || result == PendingExecutionResult::EXECUTION_ERROR) {
80
+ return true;
81
+ }
82
+ return false;
83
+ }
84
+
77
85
  } // namespace duckdb