duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
  3. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  4. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  5. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  6. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  7. package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
  8. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  9. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  10. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
  11. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
  12. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
  13. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
  14. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
  15. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
  16. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
  17. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  18. package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
  19. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  20. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  21. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  22. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  23. package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
  24. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  25. package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
  26. package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
  27. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
  28. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  29. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  30. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  31. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  32. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  33. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
  34. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
  35. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
  39. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  40. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  41. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  42. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  43. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  44. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  46. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
  51. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  52. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
  53. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  54. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
  55. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  56. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  57. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  58. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  59. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
  61. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  62. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
  63. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  64. package/src/duckdb/src/main/config.cpp +66 -1
  65. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
  66. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  67. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  68. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  69. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  70. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  71. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  72. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  73. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  74. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  75. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  76. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
  77. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
  78. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  79. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
  80. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
  81. package/src/duckdb/src/planner/bind_context.cpp +16 -0
  82. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
  83. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
  84. package/src/duckdb/src/planner/binder.cpp +2 -1
  85. package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
  86. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  87. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  88. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  89. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
  90. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
  91. package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
  92. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
  93. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  94. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  95. package/src/duckdb/src/storage/compression/rle.cpp +2 -2
  96. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  97. package/src/duckdb/src/storage/data_table.cpp +4 -6
  98. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  99. package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
  100. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
  101. package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
  102. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
  103. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  104. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  105. package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
  106. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  107. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
  108. package/src/duckdb/src/storage/table/column_data.cpp +16 -14
  109. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
  110. package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
  111. package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
  112. package/src/duckdb/src/storage/table/row_group.cpp +24 -23
  113. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
  114. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
  115. package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
  116. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  117. package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
  118. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
  119. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
  120. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
  121. package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
  122. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  123. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  124. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  125. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  126. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  127. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  128. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  129. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  130. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  131. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -1,15 +1,14 @@
1
1
  #include "duckdb/optimizer/statistics_propagator.hpp"
2
2
  #include "duckdb/planner/expression/bound_comparison_expression.hpp"
3
3
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
4
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
5
4
  #include "duckdb/optimizer/expression_rewriter.hpp"
6
5
 
7
6
  namespace duckdb {
8
7
 
9
- FilterPropagateResult StatisticsPropagator::PropagateComparison(BaseStatistics &left, BaseStatistics &right,
8
+ FilterPropagateResult StatisticsPropagator::PropagateComparison(BaseStatistics &lstats, BaseStatistics &rstats,
10
9
  ExpressionType comparison) {
11
10
  // only handle numerics for now
12
- switch (left.type.InternalType()) {
11
+ switch (lstats.GetType().InternalType()) {
13
12
  case PhysicalType::BOOL:
14
13
  case PhysicalType::UINT8:
15
14
  case PhysicalType::UINT16:
@@ -26,9 +25,7 @@ FilterPropagateResult StatisticsPropagator::PropagateComparison(BaseStatistics &
26
25
  default:
27
26
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
28
27
  }
29
- auto &lstats = (NumericStatistics &)left;
30
- auto &rstats = (NumericStatistics &)right;
31
- if (lstats.min.IsNull() || lstats.max.IsNull() || rstats.min.IsNull() || rstats.max.IsNull()) {
28
+ if (!NumericStats::HasMinMax(lstats) || !NumericStats::HasMinMax(rstats)) {
32
29
  // no stats available: nothing to prune
33
30
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
34
31
  }
@@ -38,52 +35,53 @@ FilterPropagateResult StatisticsPropagator::PropagateComparison(BaseStatistics &
38
35
  switch (comparison) {
39
36
  case ExpressionType::COMPARE_EQUAL:
40
37
  // l = r, if l.min > r.max or r.min > l.max equality is not possible
41
- if (lstats.min > rstats.max || rstats.min > lstats.max) {
38
+ if (NumericStats::Min(lstats) > NumericStats::Max(rstats) ||
39
+ NumericStats::Min(rstats) > NumericStats::Max(lstats)) {
42
40
  return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
43
41
  } else {
44
42
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
45
43
  }
46
44
  case ExpressionType::COMPARE_GREATERTHAN:
47
45
  // l > r
48
- if (lstats.min > rstats.max) {
46
+ if (NumericStats::Min(lstats) > NumericStats::Max(rstats)) {
49
47
  // if l.min > r.max, it is always true ONLY if neither side contains nulls
50
48
  return has_null ? FilterPropagateResult::FILTER_TRUE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_TRUE;
51
49
  }
52
50
  // if r.min is bigger or equal to l.max, the filter is always false
53
- if (rstats.min >= lstats.max) {
51
+ if (NumericStats::Min(rstats) >= NumericStats::Max(lstats)) {
54
52
  return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
55
53
  }
56
54
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
57
55
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
58
56
  // l >= r
59
- if (lstats.min >= rstats.max) {
57
+ if (NumericStats::Min(lstats) >= NumericStats::Max(rstats)) {
60
58
  // if l.min >= r.max, it is always true ONLY if neither side contains nulls
61
59
  return has_null ? FilterPropagateResult::FILTER_TRUE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_TRUE;
62
60
  }
63
61
  // if r.min > l.max, the filter is always false
64
- if (rstats.min > lstats.max) {
62
+ if (NumericStats::Min(rstats) > NumericStats::Max(lstats)) {
65
63
  return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
66
64
  }
67
65
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
68
66
  case ExpressionType::COMPARE_LESSTHAN:
69
67
  // l < r
70
- if (lstats.max < rstats.min) {
68
+ if (NumericStats::Max(lstats) < NumericStats::Min(rstats)) {
71
69
  // if l.max < r.min, it is always true ONLY if neither side contains nulls
72
70
  return has_null ? FilterPropagateResult::FILTER_TRUE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_TRUE;
73
71
  }
74
72
  // if l.min >= rstats.max, the filter is always false
75
- if (lstats.min >= rstats.max) {
73
+ if (NumericStats::Min(lstats) >= NumericStats::Max(rstats)) {
76
74
  return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
77
75
  }
78
76
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
79
77
  case ExpressionType::COMPARE_LESSTHANOREQUALTO:
80
78
  // l <= r
81
- if (lstats.max <= rstats.min) {
79
+ if (NumericStats::Max(lstats) <= NumericStats::Min(rstats)) {
82
80
  // if l.max <= r.min, it is always true ONLY if neither side contains nulls
83
81
  return has_null ? FilterPropagateResult::FILTER_TRUE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_TRUE;
84
82
  }
85
83
  // if l.min > rstats.max, the filter is always false
86
- if (lstats.min > rstats.max) {
84
+ if (NumericStats::Min(lstats) > NumericStats::Max(rstats)) {
87
85
  return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
88
86
  }
89
87
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
@@ -2,7 +2,6 @@
2
2
  #include "duckdb/optimizer/statistics_propagator.hpp"
3
3
  #include "duckdb/planner/expression/bound_conjunction_expression.hpp"
4
4
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
5
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
6
5
  #include "duckdb/optimizer/expression_rewriter.hpp"
7
6
  #include "duckdb/execution/expression_executor.hpp"
8
7
 
@@ -1,85 +1,13 @@
1
1
  #include "duckdb/optimizer/statistics_propagator.hpp"
2
2
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
3
3
  #include "duckdb/storage/statistics/distinct_statistics.hpp"
4
- #include "duckdb/storage/statistics/list_statistics.hpp"
5
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
6
- #include "duckdb/storage/statistics/string_statistics.hpp"
7
- #include "duckdb/storage/statistics/struct_statistics.hpp"
4
+ #include "duckdb/storage/statistics/list_stats.hpp"
5
+ #include "duckdb/storage/statistics/struct_stats.hpp"
8
6
 
9
7
  namespace duckdb {
10
8
 
11
- void UpdateDistinctStats(BaseStatistics &distinct_stats, const Value &input) {
12
- Vector v(input);
13
- auto &d_stats = (DistinctStatistics &)distinct_stats;
14
- d_stats.Update(v, 1);
15
- }
16
-
17
9
  unique_ptr<BaseStatistics> StatisticsPropagator::StatisticsFromValue(const Value &input) {
18
- switch (input.type().InternalType()) {
19
- case PhysicalType::BOOL:
20
- case PhysicalType::UINT8:
21
- case PhysicalType::UINT16:
22
- case PhysicalType::UINT32:
23
- case PhysicalType::UINT64:
24
- case PhysicalType::INT8:
25
- case PhysicalType::INT16:
26
- case PhysicalType::INT32:
27
- case PhysicalType::INT64:
28
- case PhysicalType::INT128:
29
- case PhysicalType::FLOAT:
30
- case PhysicalType::DOUBLE: {
31
- auto result = make_unique<NumericStatistics>(input.type(), input, input, StatisticsType::GLOBAL_STATS);
32
- result->validity_stats = make_unique<ValidityStatistics>(input.IsNull(), !input.IsNull());
33
- UpdateDistinctStats(*result->distinct_stats, input);
34
- return std::move(result);
35
- }
36
- case PhysicalType::VARCHAR: {
37
- auto result = make_unique<StringStatistics>(input.type(), StatisticsType::GLOBAL_STATS);
38
- result->validity_stats = make_unique<ValidityStatistics>(input.IsNull(), !input.IsNull());
39
- UpdateDistinctStats(*result->distinct_stats, input);
40
- if (!input.IsNull()) {
41
- auto &string_value = StringValue::Get(input);
42
- result->Update(string_t(string_value));
43
- }
44
- return std::move(result);
45
- }
46
- case PhysicalType::STRUCT: {
47
- auto result = make_unique<StructStatistics>(input.type());
48
- result->validity_stats = make_unique<ValidityStatistics>(input.IsNull(), !input.IsNull());
49
- if (input.IsNull()) {
50
- for (auto &child_stat : result->child_stats) {
51
- child_stat.reset();
52
- }
53
- } else {
54
- auto &struct_children = StructValue::GetChildren(input);
55
- D_ASSERT(result->child_stats.size() == struct_children.size());
56
- for (idx_t i = 0; i < result->child_stats.size(); i++) {
57
- result->child_stats[i] = StatisticsFromValue(struct_children[i]);
58
- }
59
- }
60
- return std::move(result);
61
- }
62
- case PhysicalType::LIST: {
63
- auto result = make_unique<ListStatistics>(input.type());
64
- result->validity_stats = make_unique<ValidityStatistics>(input.IsNull(), !input.IsNull());
65
- if (input.IsNull()) {
66
- result->child_stats.reset();
67
- } else {
68
- auto &list_children = ListValue::GetChildren(input);
69
- for (auto &child_element : list_children) {
70
- auto child_element_stats = StatisticsFromValue(child_element);
71
- if (child_element_stats) {
72
- result->child_stats->Merge(*child_element_stats);
73
- } else {
74
- result->child_stats.reset();
75
- }
76
- }
77
- }
78
- return std::move(result);
79
- }
80
- default:
81
- return nullptr;
82
- }
10
+ return BaseStatistics::FromConstant(input).ToUnique();
83
11
  }
84
12
 
85
13
  unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundConstantExpression &constant,
@@ -5,10 +5,15 @@ namespace duckdb {
5
5
 
6
6
  unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundFunctionExpression &func,
7
7
  unique_ptr<Expression> *expr_ptr) {
8
- vector<unique_ptr<BaseStatistics>> stats;
8
+ vector<BaseStatistics> stats;
9
9
  stats.reserve(func.children.size());
10
10
  for (idx_t i = 0; i < func.children.size(); i++) {
11
- stats.push_back(PropagateExpression(func.children[i]));
11
+ auto stat = PropagateExpression(func.children[i]);
12
+ if (!stat) {
13
+ stats.push_back(BaseStatistics::CreateUnknown(func.children[i]->return_type));
14
+ } else {
15
+ stats.push_back(stat->Copy());
16
+ }
12
17
  }
13
18
  if (!func.function.statistics) {
14
19
  return nullptr;
@@ -62,6 +62,11 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundOperat
62
62
  *expr_ptr = make_unique<BoundConstantExpression>(Value::BOOLEAN(false));
63
63
  return PropagateExpression(*expr_ptr);
64
64
  }
65
+ if (!child_stats[0]->CanHaveNoNull()) {
66
+ // child has no valid values: x IS NULL will always be true
67
+ *expr_ptr = make_unique<BoundConstantExpression>(Value::BOOLEAN(true));
68
+ return PropagateExpression(*expr_ptr);
69
+ }
65
70
  return nullptr;
66
71
  case ExpressionType::OPERATOR_IS_NOT_NULL:
67
72
  if (!child_stats[0]->CanHaveNull()) {
@@ -69,6 +74,11 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundOperat
69
74
  *expr_ptr = make_unique<BoundConstantExpression>(Value::BOOLEAN(true));
70
75
  return PropagateExpression(*expr_ptr);
71
76
  }
77
+ if (!child_stats[0]->CanHaveNoNull()) {
78
+ // child has no valid values: x IS NOT NULL will always be false
79
+ *expr_ptr = make_unique<BoundConstantExpression>(Value::BOOLEAN(false));
80
+ return PropagateExpression(*expr_ptr);
81
+ }
72
82
  return nullptr;
73
83
  default:
74
84
  return nullptr;
@@ -1,6 +1,5 @@
1
1
  #include "duckdb/optimizer/statistics_propagator.hpp"
2
2
  #include "duckdb/planner/operator/logical_aggregate.hpp"
3
- #include "duckdb/storage/statistics/validity_statistics.hpp"
4
3
 
5
4
  namespace duckdb {
6
5
 
@@ -13,14 +12,14 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalAggr
13
12
  aggr.group_stats.resize(aggr.groups.size());
14
13
  for (idx_t group_idx = 0; group_idx < aggr.groups.size(); group_idx++) {
15
14
  auto stats = PropagateExpression(aggr.groups[group_idx]);
16
- aggr.group_stats[group_idx] = stats ? stats->Copy() : nullptr;
15
+ aggr.group_stats[group_idx] = stats ? stats->ToUnique() : nullptr;
17
16
  if (!stats) {
18
17
  continue;
19
18
  }
20
19
  if (aggr.grouping_sets.size() > 1) {
21
20
  // aggregates with multiple grouping sets can introduce NULL values to certain groups
22
21
  // FIXME: actually figure out WHICH groups can have null values introduced
23
- stats->validity_stats = make_unique<ValidityStatistics>(true, true);
22
+ stats->Set(StatsInfo::CAN_HAVE_NULL_VALUES);
24
23
  continue;
25
24
  }
26
25
  ColumnBinding group_binding(aggr.group_index, group_idx);
@@ -5,7 +5,7 @@
5
5
  #include "duckdb/planner/expression/bound_comparison_expression.hpp"
6
6
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
7
7
  #include "duckdb/planner/operator/logical_filter.hpp"
8
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
8
+ #include "duckdb/storage/statistics/base_statistics.hpp"
9
9
 
10
10
  namespace duckdb {
11
11
 
@@ -35,21 +35,20 @@ void StatisticsPropagator::SetStatisticsNotNull(ColumnBinding binding) {
35
35
  if (entry == statistics_map.end()) {
36
36
  return;
37
37
  }
38
- entry->second->validity_stats = make_unique<ValidityStatistics>(false);
38
+ entry->second->Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
39
39
  }
40
40
 
41
41
  void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &stats, ExpressionType comparison_type,
42
42
  const Value &constant) {
43
43
  // regular comparisons removes all null values
44
44
  if (!IsCompareDistinct(comparison_type)) {
45
- stats.validity_stats = make_unique<ValidityStatistics>(false);
45
+ stats.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
46
46
  }
47
- if (!stats.type.IsNumeric()) {
47
+ if (!stats.GetType().IsNumeric()) {
48
48
  // don't handle non-numeric columns here (yet)
49
49
  return;
50
50
  }
51
- auto &numeric_stats = (NumericStatistics &)stats;
52
- if (numeric_stats.min.IsNull() || numeric_stats.max.IsNull()) {
51
+ if (!NumericStats::HasMinMax(stats)) {
53
52
  // no stats available: skip this
54
53
  return;
55
54
  }
@@ -58,19 +57,19 @@ void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &stats, Express
58
57
  case ExpressionType::COMPARE_LESSTHANOREQUALTO:
59
58
  // X < constant OR X <= constant
60
59
  // max becomes the constant
61
- numeric_stats.max = constant;
60
+ NumericStats::SetMax(stats, constant);
62
61
  break;
63
62
  case ExpressionType::COMPARE_GREATERTHAN:
64
63
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
65
64
  // X > constant OR X >= constant
66
65
  // min becomes the constant
67
- numeric_stats.min = constant;
66
+ NumericStats::SetMin(stats, constant);
68
67
  break;
69
68
  case ExpressionType::COMPARE_EQUAL:
70
69
  // X = constant
71
70
  // both min and max become the constant
72
- numeric_stats.min = constant;
73
- numeric_stats.max = constant;
71
+ NumericStats::SetMin(stats, constant);
72
+ NumericStats::SetMax(stats, constant);
74
73
  break;
75
74
  default:
76
75
  break;
@@ -81,17 +80,15 @@ void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &lstats, BaseSt
81
80
  ExpressionType comparison_type) {
82
81
  // regular comparisons removes all null values
83
82
  if (!IsCompareDistinct(comparison_type)) {
84
- lstats.validity_stats = make_unique<ValidityStatistics>(false);
85
- rstats.validity_stats = make_unique<ValidityStatistics>(false);
83
+ lstats.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
84
+ rstats.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES);
86
85
  }
87
- D_ASSERT(lstats.type == rstats.type);
88
- if (!lstats.type.IsNumeric()) {
86
+ D_ASSERT(lstats.GetType() == rstats.GetType());
87
+ if (!lstats.GetType().IsNumeric()) {
89
88
  // don't handle non-numeric columns here (yet)
90
89
  return;
91
90
  }
92
- auto &left_stats = (NumericStatistics &)lstats;
93
- auto &right_stats = (NumericStatistics &)rstats;
94
- if (left_stats.min.IsNull() || left_stats.max.IsNull() || right_stats.min.IsNull() || right_stats.max.IsNull()) {
91
+ if (!NumericStats::HasMinMax(lstats) || !NumericStats::HasMinMax(rstats)) {
95
92
  // no stats available: skip this
96
93
  return;
97
94
  }
@@ -104,14 +101,14 @@ void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &lstats, BaseSt
104
101
 
105
102
  // we know that left.max is AT MOST equal to right.max
106
103
  // because any value in left that is BIGGER than right.max will not pass the filter
107
- if (left_stats.max > right_stats.max) {
108
- left_stats.max = right_stats.max;
104
+ if (NumericStats::Max(lstats) > NumericStats::Max(rstats)) {
105
+ NumericStats::SetMax(lstats, NumericStats::Max(rstats));
109
106
  }
110
107
 
111
108
  // we also know that right.min is AT MOST equal to left.min
112
109
  // because any value in right that is SMALLER than left.min will not pass the filter
113
- if (right_stats.min < left_stats.min) {
114
- right_stats.min = left_stats.min;
110
+ if (NumericStats::Min(rstats) < NumericStats::Min(lstats)) {
111
+ NumericStats::SetMin(rstats, NumericStats::Min(lstats));
115
112
  }
116
113
  // so in our example, the bounds get updated as follows:
117
114
  // left: [-50, 100], right: [-50, 100]
@@ -121,11 +118,11 @@ void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &lstats, BaseSt
121
118
  // LEFT > RIGHT OR LEFT >= RIGHT
122
119
  // we know that every value of left is bigger (or equal to) every value in right
123
120
  // this is essentially the inverse of the less than (or equal to) scenario
124
- if (right_stats.max > left_stats.max) {
125
- right_stats.max = left_stats.max;
121
+ if (NumericStats::Max(rstats) > NumericStats::Max(lstats)) {
122
+ NumericStats::SetMax(rstats, NumericStats::Max(lstats));
126
123
  }
127
- if (left_stats.min < right_stats.min) {
128
- left_stats.min = right_stats.min;
124
+ if (NumericStats::Min(lstats) < NumericStats::Min(rstats)) {
125
+ NumericStats::SetMin(lstats, NumericStats::Min(rstats));
129
126
  }
130
127
  break;
131
128
  case ExpressionType::COMPARE_EQUAL:
@@ -135,16 +132,16 @@ void StatisticsPropagator::UpdateFilterStatistics(BaseStatistics &lstats, BaseSt
135
132
  // so if we have e.g. left = [-50, 250] and right = [-100, 100]
136
133
  // the tighest bounds are [-50, 100]
137
134
  // select the highest min
138
- if (left_stats.min > right_stats.min) {
139
- right_stats.min = left_stats.min;
135
+ if (NumericStats::Min(lstats) > NumericStats::Min(rstats)) {
136
+ NumericStats::SetMin(rstats, NumericStats::Min(lstats));
140
137
  } else {
141
- left_stats.min = right_stats.min;
138
+ NumericStats::SetMin(lstats, NumericStats::Min(rstats));
142
139
  }
143
140
  // select the lowest max
144
- if (left_stats.max < right_stats.max) {
145
- right_stats.max = left_stats.max;
141
+ if (NumericStats::Max(lstats) < NumericStats::Max(rstats)) {
142
+ NumericStats::SetMax(rstats, NumericStats::Max(lstats));
146
143
  } else {
147
- left_stats.max = right_stats.max;
144
+ NumericStats::SetMax(lstats, NumericStats::Max(rstats));
148
145
  }
149
146
  break;
150
147
  default:
@@ -7,7 +7,6 @@
7
7
  #include "duckdb/planner/operator/logical_join.hpp"
8
8
  #include "duckdb/planner/operator/logical_limit.hpp"
9
9
  #include "duckdb/planner/operator/logical_positional_join.hpp"
10
- #include "duckdb/storage/statistics/validity_statistics.hpp"
11
10
 
12
11
  namespace duckdb {
13
12
 
@@ -210,7 +209,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalJoin
210
209
  for (auto &binding : right_bindings) {
211
210
  auto stats = statistics_map.find(binding);
212
211
  if (stats != statistics_map.end()) {
213
- stats->second->validity_stats = make_unique<ValidityStatistics>(true);
212
+ stats->second->Set(StatsInfo::CAN_HAVE_NULL_VALUES);
214
213
  }
215
214
  }
216
215
  }
@@ -219,7 +218,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalJoin
219
218
  for (auto &binding : left_bindings) {
220
219
  auto stats = statistics_map.find(binding);
221
220
  if (stats != statistics_map.end()) {
222
- stats->second->validity_stats = make_unique<ValidityStatistics>(true);
221
+ stats->second->Set(StatsInfo::CAN_HAVE_NULL_VALUES);
223
222
  }
224
223
  }
225
224
  }
@@ -265,7 +264,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalPosi
265
264
  for (auto &binding : left_bindings) {
266
265
  auto stats = statistics_map.find(binding);
267
266
  if (stats != statistics_map.end()) {
268
- stats->second->validity_stats = make_unique<ValidityStatistics>(true);
267
+ stats->second->Set(StatsInfo::CAN_HAVE_NULL_VALUES);
269
268
  }
270
269
  }
271
270
 
@@ -274,7 +273,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalPosi
274
273
  for (auto &binding : right_bindings) {
275
274
  auto stats = statistics_map.find(binding);
276
275
  if (stats != statistics_map.end()) {
277
- stats->second->validity_stats = make_unique<ValidityStatistics>(true);
276
+ stats->second->Set(StatsInfo::CAN_HAVE_NULL_VALUES);
278
277
  }
279
278
  }
280
279
 
@@ -47,18 +47,18 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalSetO
47
47
  switch (setop.type) {
48
48
  case LogicalOperatorType::LOGICAL_UNION:
49
49
  // union: merge the stats of the LHS and RHS together
50
- new_stats = left_entry->second->Copy();
50
+ new_stats = left_entry->second->ToUnique();
51
51
  new_stats->Merge(*right_entry->second);
52
52
  break;
53
53
  case LogicalOperatorType::LOGICAL_EXCEPT:
54
54
  // except: use the stats of the LHS
55
- new_stats = left_entry->second->Copy();
55
+ new_stats = left_entry->second->ToUnique();
56
56
  break;
57
57
  case LogicalOperatorType::LOGICAL_INTERSECT:
58
58
  // intersect: intersect the two stats
59
59
  // FIXME: for now we just use the stats of the LHS, as this is correct
60
60
  // however, the stats can be further refined to the minimal subset of the LHS and RHS
61
- new_stats = left_entry->second->Copy();
61
+ new_stats = left_entry->second->ToUnique();
62
62
  break;
63
63
  default:
64
64
  throw InternalException("Unsupported setop type");
@@ -92,7 +92,7 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(Expression
92
92
  unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(unique_ptr<Expression> &expr) {
93
93
  auto stats = PropagateExpression(*expr, &expr);
94
94
  if (ClientConfig::GetConfig(context).query_verification_enabled && stats) {
95
- expr->verification_stats = stats->Copy();
95
+ expr->verification_stats = stats->ToUnique();
96
96
  }
97
97
  return stats;
98
98
  }
@@ -28,6 +28,10 @@ unique_ptr<TableRef> Transformer::TransformJoin(duckdb_libpgquery::PGJoinExpr *r
28
28
  result->type = JoinType::SEMI;
29
29
  break;
30
30
  }
31
+ case duckdb_libpgquery::PG_JOIN_ANTI: {
32
+ result->type = JoinType::ANTI;
33
+ break;
34
+ }
31
35
  case duckdb_libpgquery::PG_JOIN_POSITION: {
32
36
  result->ref_type = JoinRefType::POSITIONAL;
33
37
  break;
@@ -555,4 +555,20 @@ void BindContext::AddContext(BindContext other) {
555
555
  }
556
556
  }
557
557
 
558
+ void BindContext::RemoveContext(vector<std::pair<string, duckdb::Binding *>> &other_bindings_list) {
559
+ for (auto &other_binding : other_bindings_list) {
560
+ if (bindings.find(other_binding.first) != bindings.end()) {
561
+ bindings.erase(other_binding.first);
562
+ }
563
+ }
564
+
565
+ vector<idx_t> delete_list_indexes;
566
+ for (auto &other_binding : other_bindings_list) {
567
+ auto it =
568
+ std::remove_if(bindings_list.begin(), bindings_list.end(),
569
+ [other_binding](std::pair<string, Binding *> &x) { return x.first == other_binding.first; });
570
+ bindings_list.erase(it, bindings_list.end());
571
+ }
572
+ }
573
+
558
574
  } // namespace duckdb
@@ -104,7 +104,6 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundSelectNode &statement) {
104
104
  PlanSubqueries(&expr, &root);
105
105
  }
106
106
 
107
- // create the projection
108
107
  auto proj = make_unique<LogicalProjection>(statement.projection_index, std::move(statement.select_list));
109
108
  auto &projection = *proj;
110
109
  proj->AddChild(std::move(root));
@@ -7,9 +7,11 @@
7
7
  #include "duckdb/parser/expression/constant_expression.hpp"
8
8
  #include "duckdb/parser/expression/conjunction_expression.hpp"
9
9
  #include "duckdb/parser/expression/bound_expression.hpp"
10
+ #include "duckdb/parser/expression/star_expression.hpp"
10
11
  #include "duckdb/common/string_util.hpp"
11
12
  #include "duckdb/common/case_insensitive_map.hpp"
12
13
  #include "duckdb/planner/expression_binder/lateral_binder.hpp"
14
+ #include "duckdb/planner/query_node/bound_select_node.hpp"
13
15
 
14
16
  namespace duckdb {
15
17
 
@@ -253,6 +255,8 @@ unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
253
255
  }
254
256
  }
255
257
 
258
+ auto right_bindings_list_copy = right_binder.bind_context.GetBindingsList();
259
+
256
260
  bind_context.AddContext(std::move(left_binder.bind_context));
257
261
  bind_context.AddContext(std::move(right_binder.bind_context));
258
262
  MoveCorrelatedExpressions(left_binder);
@@ -269,6 +273,11 @@ unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
269
273
  WhereBinder binder(*this, context);
270
274
  result->condition = binder.Bind(ref.condition);
271
275
  }
276
+
277
+ if (result->type == JoinType::SEMI || result->type == JoinType::ANTI) {
278
+ bind_context.RemoveContext(right_bindings_list_copy);
279
+ }
280
+
272
281
  return std::move(result);
273
282
  }
274
283
 
@@ -3,8 +3,10 @@
3
3
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
4
4
  #include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp"
5
5
  #include "duckdb/main/config.hpp"
6
+ #include "duckdb/parser/parsed_expression_iterator.hpp"
6
7
  #include "duckdb/parser/query_node/select_node.hpp"
7
8
  #include "duckdb/parser/statement/list.hpp"
9
+ #include "duckdb/parser/tableref/joinref.hpp"
8
10
  #include "duckdb/parser/tableref/table_function_ref.hpp"
9
11
  #include "duckdb/planner/bound_query_node.hpp"
10
12
  #include "duckdb/planner/bound_tableref.hpp"
@@ -13,7 +15,6 @@
13
15
  #include "duckdb/planner/expression_iterator.hpp"
14
16
  #include "duckdb/planner/operator/logical_projection.hpp"
15
17
  #include "duckdb/planner/operator/logical_sample.hpp"
16
- #include "duckdb/parser/parsed_expression_iterator.hpp"
17
18
 
18
19
  #include <algorithm>
19
20
 
@@ -19,7 +19,7 @@ BoundOrderByNode::BoundOrderByNode(OrderType type, OrderByNullType null_order, u
19
19
 
20
20
  BoundOrderByNode BoundOrderByNode::Copy() const {
21
21
  if (stats) {
22
- return BoundOrderByNode(type, null_order, expression->Copy(), stats->Copy());
22
+ return BoundOrderByNode(type, null_order, expression->Copy(), stats->ToUnique());
23
23
  } else {
24
24
  return BoundOrderByNode(type, null_order, expression->Copy());
25
25
  }
@@ -102,7 +102,7 @@ unique_ptr<Expression> BoundWindowExpression::Copy() {
102
102
  }
103
103
  for (auto &ps : partitions_stats) {
104
104
  if (ps) {
105
- new_window->partitions_stats.push_back(ps->Copy());
105
+ new_window->partitions_stats.push_back(ps->ToUnique());
106
106
  } else {
107
107
  new_window->partitions_stats.push_back(nullptr);
108
108
  }
@@ -1,8 +1,6 @@
1
1
  #include "duckdb/planner/filter/constant_filter.hpp"
2
-
2
+ #include "duckdb/storage/statistics/base_statistics.hpp"
3
3
  #include "duckdb/common/field_writer.hpp"
4
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
5
- #include "duckdb/storage/statistics/string_statistics.hpp"
6
4
 
7
5
  namespace duckdb {
8
6
 
@@ -12,7 +10,7 @@ ConstantFilter::ConstantFilter(ExpressionType comparison_type_p, Value constant_
12
10
  }
13
11
 
14
12
  FilterPropagateResult ConstantFilter::CheckStatistics(BaseStatistics &stats) {
15
- D_ASSERT(constant.type().id() == stats.type.id());
13
+ D_ASSERT(constant.type().id() == stats.GetType().id());
16
14
  switch (constant.type().InternalType()) {
17
15
  case PhysicalType::UINT8:
18
16
  case PhysicalType::UINT16:
@@ -25,9 +23,9 @@ FilterPropagateResult ConstantFilter::CheckStatistics(BaseStatistics &stats) {
25
23
  case PhysicalType::INT128:
26
24
  case PhysicalType::FLOAT:
27
25
  case PhysicalType::DOUBLE:
28
- return ((NumericStatistics &)stats).CheckZonemap(comparison_type, constant);
26
+ return NumericStats::CheckZonemap(stats, comparison_type, constant);
29
27
  case PhysicalType::VARCHAR:
30
- return ((StringStatistics &)stats).CheckZonemap(comparison_type, StringValue::Get(constant));
28
+ return StringStats::CheckZonemap(stats, comparison_type, StringValue::Get(constant));
31
29
  default:
32
30
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
33
31
  }
@@ -32,7 +32,7 @@ void SingleFileRowGroupWriter::WriteColumnDataPointers(ColumnCheckpointState &co
32
32
  meta_writer.Write<block_id_t>(data_pointer.block_pointer.block_id);
33
33
  meta_writer.Write<uint32_t>(data_pointer.block_pointer.offset);
34
34
  meta_writer.Write<CompressionType>(data_pointer.compression_type);
35
- data_pointer.statistics->Serialize(meta_writer);
35
+ data_pointer.statistics.Serialize(meta_writer);
36
36
  }
37
37
  }
38
38
 
@@ -24,10 +24,7 @@ void TableDataReader::ReadTableData() {
24
24
  D_ASSERT(!columns.empty());
25
25
 
26
26
  // deserialize the total table statistics
27
- info.data->column_stats.reserve(columns.PhysicalColumnCount());
28
- for (auto &col : columns.Physical()) {
29
- info.data->column_stats.push_back(BaseStatistics::Deserialize(reader, col.Type()));
30
- }
27
+ info.data->table_stats.Deserialize(reader, columns);
31
28
 
32
29
  // deserialize each of the individual row groups
33
30
  auto row_group_count = reader.Read<uint64_t>();