duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
  3. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
  4. package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
  5. package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
  6. package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
  7. package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
  8. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
  9. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
  10. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
  11. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
  12. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
  13. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
  14. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
  15. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
  16. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
  17. package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
  18. package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
  19. package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
  20. package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
  21. package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
  22. package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
  23. package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
  24. package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
  25. package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
  26. package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
  27. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
  28. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
  29. package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
  30. package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
  31. package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
  32. package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
  33. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
  34. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
  35. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
  39. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
  40. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
  41. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
  42. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
  43. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
  44. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
  46. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
  51. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
  52. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
  53. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
  54. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
  55. package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
  56. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
  57. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
  58. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
  59. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
  61. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  62. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
  63. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  64. package/src/duckdb/src/main/config.cpp +66 -1
  65. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
  66. package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
  67. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
  68. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
  69. package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
  70. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
  71. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
  72. package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
  73. package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
  74. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
  75. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
  76. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
  77. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
  78. package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
  79. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
  80. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
  81. package/src/duckdb/src/planner/bind_context.cpp +16 -0
  82. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
  83. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
  84. package/src/duckdb/src/planner/binder.cpp +2 -1
  85. package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
  86. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
  87. package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
  88. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  89. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
  90. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
  91. package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
  92. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
  93. package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
  94. package/src/duckdb/src/storage/compression/patas.cpp +1 -1
  95. package/src/duckdb/src/storage/compression/rle.cpp +2 -2
  96. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
  97. package/src/duckdb/src/storage/data_table.cpp +4 -6
  98. package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
  99. package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
  100. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
  101. package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
  102. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
  103. package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
  104. package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
  105. package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
  106. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  107. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
  108. package/src/duckdb/src/storage/table/column_data.cpp +16 -14
  109. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
  110. package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
  111. package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
  112. package/src/duckdb/src/storage/table/row_group.cpp +24 -23
  113. package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
  114. package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
  115. package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
  116. package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
  117. package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
  118. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
  119. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
  120. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
  121. package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
  122. package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
  123. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
  124. package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
  125. package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
  126. package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
  127. package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
  128. package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
  129. package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
  130. package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
  131. package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -0,0 +1,157 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/storage/statistics/numeric_stats.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/exception.hpp"
13
+ #include "duckdb/common/types/hugeint.hpp"
14
+ #include "duckdb/common/enums/filter_propagate_result.hpp"
15
+ #include "duckdb/common/enums/expression_type.hpp"
16
+ #include "duckdb/common/operator/comparison_operators.hpp"
17
+ #include "duckdb/common/types/value.hpp"
18
+
19
+ namespace duckdb {
20
+ class BaseStatistics;
21
+ class FieldWriter;
22
+ class FieldReader;
23
+ struct SelectionVector;
24
+ class Vector;
25
+
26
+ struct NumericValueUnion {
27
+ union Val {
28
+ int8_t boolean;
29
+ int8_t tinyint;
30
+ int16_t smallint;
31
+ int32_t integer;
32
+ int64_t bigint;
33
+ uint8_t utinyint;
34
+ uint16_t usmallint;
35
+ uint32_t uinteger;
36
+ uint64_t ubigint;
37
+ hugeint_t hugeint;
38
+ float float_;
39
+ double double_;
40
+ } value_;
41
+
42
+ template <class T>
43
+ T &GetReferenceUnsafe() {
44
+ throw InternalException("NumericValueUnion::GetReferenceUnsafe called on unsupported type");
45
+ }
46
+ };
47
+
48
+ struct NumericStatsData {
49
+ //! Whether or not the value has a max value
50
+ bool has_min;
51
+ //! Whether or not the segment has a min value
52
+ bool has_max;
53
+ //! The minimum value of the segment
54
+ NumericValueUnion min;
55
+ //! The maximum value of the segment
56
+ NumericValueUnion max;
57
+ };
58
+
59
+ struct NumericStats {
60
+ //! Unknown statistics - i.e. "has_min" is false, "has_max" is false
61
+ DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type);
62
+ //! Empty statistics - i.e. "min = MaxValue<type>, max = MinValue<type>"
63
+ DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type);
64
+
65
+ //! Returns true if the stats has a constant value
66
+ DUCKDB_API static bool IsConstant(const BaseStatistics &stats);
67
+ //! Returns true if the stats has both a min and max value defined
68
+ DUCKDB_API static bool HasMinMax(const BaseStatistics &stats);
69
+ //! Returns true if the stats has a min value defined
70
+ DUCKDB_API static bool HasMin(const BaseStatistics &stats);
71
+ //! Returns true if the stats has a max value defined
72
+ DUCKDB_API static bool HasMax(const BaseStatistics &stats);
73
+ //! Returns the min value - throws an exception if there is no min value
74
+ DUCKDB_API static Value Min(const BaseStatistics &stats);
75
+ //! Returns the max value - throws an exception if there is no max value
76
+ DUCKDB_API static Value Max(const BaseStatistics &stats);
77
+ //! Sets the min value of the statistics
78
+ DUCKDB_API static void SetMin(BaseStatistics &stats, const Value &val);
79
+ //! Sets the max value of the statistics
80
+ DUCKDB_API static void SetMax(BaseStatistics &stats, const Value &val);
81
+
82
+ //! Check whether or not a given comparison with a constant could possibly be satisfied by rows given the statistics
83
+ DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
84
+ const Value &constant);
85
+
86
+ DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other_p);
87
+
88
+ DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
89
+ DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
90
+
91
+ DUCKDB_API static string ToString(const BaseStatistics &stats);
92
+
93
+ template <class T>
94
+ static inline void UpdateValue(T new_value, T &min, T &max) {
95
+ if (LessThan::Operation(new_value, min)) {
96
+ min = new_value;
97
+ }
98
+ if (GreaterThan::Operation(new_value, max)) {
99
+ max = new_value;
100
+ }
101
+ }
102
+
103
+ template <class T>
104
+ static inline void Update(BaseStatistics &stats, T new_value) {
105
+ auto &nstats = NumericStats::GetDataUnsafe(stats);
106
+ UpdateValue<T>(new_value, nstats.min.GetReferenceUnsafe<T>(), nstats.max.GetReferenceUnsafe<T>());
107
+ }
108
+
109
+ static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
110
+
111
+ template <class T>
112
+ static T GetMinUnsafe(const BaseStatistics &stats) {
113
+ return NumericStats::Min(stats).template GetValueUnsafe<T>();
114
+ }
115
+ template <class T>
116
+ static T GetMaxUnsafe(const BaseStatistics &stats) {
117
+ return NumericStats::Max(stats).template GetValueUnsafe<T>();
118
+ }
119
+
120
+ private:
121
+ static NumericStatsData &GetDataUnsafe(BaseStatistics &stats);
122
+ static const NumericStatsData &GetDataUnsafe(const BaseStatistics &stats);
123
+ static Value MinOrNull(const BaseStatistics &stats);
124
+ static Value MaxOrNull(const BaseStatistics &stats);
125
+ template <class T>
126
+ static void TemplatedVerify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
127
+ };
128
+
129
+ template <>
130
+ void NumericStats::Update<interval_t>(BaseStatistics &stats, interval_t new_value);
131
+ template <>
132
+ void NumericStats::Update<list_entry_t>(BaseStatistics &stats, list_entry_t new_value);
133
+
134
+ template <>
135
+ int8_t &NumericValueUnion::GetReferenceUnsafe();
136
+ template <>
137
+ int16_t &NumericValueUnion::GetReferenceUnsafe();
138
+ template <>
139
+ int32_t &NumericValueUnion::GetReferenceUnsafe();
140
+ template <>
141
+ int64_t &NumericValueUnion::GetReferenceUnsafe();
142
+ template <>
143
+ hugeint_t &NumericValueUnion::GetReferenceUnsafe();
144
+ template <>
145
+ uint8_t &NumericValueUnion::GetReferenceUnsafe();
146
+ template <>
147
+ uint16_t &NumericValueUnion::GetReferenceUnsafe();
148
+ template <>
149
+ uint32_t &NumericValueUnion::GetReferenceUnsafe();
150
+ template <>
151
+ uint64_t &NumericValueUnion::GetReferenceUnsafe();
152
+ template <>
153
+ float &NumericValueUnion::GetReferenceUnsafe();
154
+ template <>
155
+ double &NumericValueUnion::GetReferenceUnsafe();
156
+
157
+ } // namespace duckdb
@@ -17,15 +17,10 @@ namespace duckdb {
17
17
  class SegmentStatistics {
18
18
  public:
19
19
  SegmentStatistics(LogicalType type);
20
- SegmentStatistics(LogicalType type, unique_ptr<BaseStatistics> statistics);
21
-
22
- LogicalType type;
20
+ SegmentStatistics(BaseStatistics statistics);
23
21
 
24
22
  //! Type-specific statistics of the segment
25
- unique_ptr<BaseStatistics> statistics;
26
-
27
- public:
28
- void Reset();
23
+ BaseStatistics statistics;
29
24
  };
30
25
 
31
26
  } // namespace duckdb
@@ -0,0 +1,74 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/storage/statistics/string_stats.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/exception.hpp"
13
+ #include "duckdb/common/types/hugeint.hpp"
14
+ #include "duckdb/common/enums/filter_propagate_result.hpp"
15
+ #include "duckdb/common/enums/expression_type.hpp"
16
+ #include "duckdb/common/operator/comparison_operators.hpp"
17
+
18
+ namespace duckdb {
19
+ class BaseStatistics;
20
+ class FieldWriter;
21
+ class FieldReader;
22
+ struct SelectionVector;
23
+ class Vector;
24
+
25
+ struct StringStatsData {
26
+ constexpr static uint32_t MAX_STRING_MINMAX_SIZE = 8;
27
+
28
+ //! The minimum value of the segment, potentially truncated
29
+ data_t min[MAX_STRING_MINMAX_SIZE];
30
+ //! The maximum value of the segment, potentially truncated
31
+ data_t max[MAX_STRING_MINMAX_SIZE];
32
+ //! Whether or not the column can contain unicode characters
33
+ bool has_unicode;
34
+ //! Whether or not the maximum string length is known
35
+ bool has_max_string_length;
36
+ //! The maximum string length in bytes
37
+ uint32_t max_string_length;
38
+ };
39
+
40
+ struct StringStats {
41
+ //! Unknown statistics - i.e. "has_unicode" is true, "max_string_length" is unknown, "min" is \0, max is \xFF
42
+ DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type);
43
+ //! Empty statistics - i.e. "has_unicode" is false, "max_string_length" is 0, "min" is \xFF, max is \x00
44
+ DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type);
45
+ //! Whether or not the statistics have a maximum string length defined
46
+ DUCKDB_API static bool HasMaxStringLength(const BaseStatistics &stats);
47
+ //! Returns the maximum string length, or throws an exception if !HasMaxStringLength()
48
+ DUCKDB_API static uint32_t MaxStringLength(const BaseStatistics &stats);
49
+ //! Whether or not the strings can contain unicode
50
+ DUCKDB_API static bool CanContainUnicode(const BaseStatistics &stats);
51
+
52
+ //! Resets the max string length so HasMaxStringLength() is false
53
+ DUCKDB_API static void ResetMaxStringLength(BaseStatistics &stats);
54
+ //! FIXME: make this part of Set on statistics
55
+ DUCKDB_API static void SetContainsUnicode(BaseStatistics &stats);
56
+
57
+ DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
58
+ DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
59
+
60
+ DUCKDB_API static string ToString(const BaseStatistics &stats);
61
+
62
+ DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
63
+ const string &value);
64
+
65
+ DUCKDB_API static void Update(BaseStatistics &stats, const string_t &value);
66
+ DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
67
+ DUCKDB_API static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
68
+
69
+ private:
70
+ static StringStatsData &GetDataUnsafe(BaseStatistics &stats);
71
+ static const StringStatsData &GetDataUnsafe(const BaseStatistics &stats);
72
+ };
73
+
74
+ } // namespace duckdb
@@ -0,0 +1,42 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/storage/statistics/struct_stats.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/common.hpp"
12
+ #include "duckdb/common/exception.hpp"
13
+
14
+ namespace duckdb {
15
+ class BaseStatistics;
16
+ class FieldWriter;
17
+ class FieldReader;
18
+ struct SelectionVector;
19
+ class Vector;
20
+
21
+ struct StructStats {
22
+ DUCKDB_API static void Construct(BaseStatistics &stats);
23
+ DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type);
24
+ DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type);
25
+
26
+ DUCKDB_API static const BaseStatistics *GetChildStats(const BaseStatistics &stats);
27
+ DUCKDB_API static const BaseStatistics &GetChildStats(const BaseStatistics &stats, idx_t i);
28
+ DUCKDB_API static BaseStatistics &GetChildStats(BaseStatistics &stats, idx_t i);
29
+ DUCKDB_API static void SetChildStats(BaseStatistics &stats, idx_t i, const BaseStatistics &new_stats);
30
+ DUCKDB_API static void SetChildStats(BaseStatistics &stats, idx_t i, unique_ptr<BaseStatistics> new_stats);
31
+
32
+ DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
33
+ DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
34
+
35
+ DUCKDB_API static string ToString(const BaseStatistics &stats);
36
+
37
+ DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
38
+ DUCKDB_API static void Copy(BaseStatistics &stats, const BaseStatistics &other);
39
+ DUCKDB_API static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
40
+ };
41
+
42
+ } // namespace duckdb
@@ -9,7 +9,7 @@
9
9
  #include "duckdb/storage/buffer_manager.hpp"
10
10
  #include "duckdb/storage/checkpoint/string_checkpoint_state.hpp"
11
11
  #include "duckdb/storage/segment/uncompressed.hpp"
12
- #include "duckdb/storage/statistics/string_statistics.hpp"
12
+
13
13
  #include "duckdb/storage/string_uncompressed.hpp"
14
14
  #include "duckdb/storage/table/append_state.hpp"
15
15
  #include "duckdb/storage/table/column_segment.hpp"
@@ -173,8 +173,7 @@ public:
173
173
 
174
174
  public:
175
175
  static inline void UpdateStringStats(SegmentStatistics &stats, const string_t &new_value) {
176
- auto &sstats = (StringStatistics &)*stats.statistics;
177
- sstats.Update(new_value);
176
+ StringStats::Update(stats.statistics, new_value);
178
177
  }
179
178
 
180
179
  static void SetDictionary(ColumnSegment &segment, BufferHandle &handle, StringDictionaryContainer dict);
@@ -57,7 +57,7 @@ public:
57
57
  static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
58
58
  block_id_t id, idx_t offset, const LogicalType &type_p,
59
59
  idx_t start, idx_t count, CompressionType compression_type,
60
- unique_ptr<BaseStatistics> statistics);
60
+ BaseStatistics statistics);
61
61
  static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start,
62
62
  idx_t segment_size = Storage::BLOCK_SIZE);
63
63
  static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
@@ -124,7 +124,7 @@ public:
124
124
 
125
125
  public:
126
126
  ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
127
- idx_t start, idx_t count, CompressionFunction *function, unique_ptr<BaseStatistics> statistics,
127
+ idx_t start, idx_t count, CompressionFunction *function, BaseStatistics statistics,
128
128
  block_id_t block_id, idx_t offset, idx_t segment_size);
129
129
  ColumnSegment(ColumnSegment &other, idx_t start);
130
130
 
@@ -12,6 +12,7 @@
12
12
  #include "duckdb/common/vector.hpp"
13
13
  #include "duckdb/storage/table/segment_tree.hpp"
14
14
  #include "duckdb/storage/data_pointer.hpp"
15
+ #include "duckdb/storage/table/table_statistics.hpp"
15
16
 
16
17
  namespace duckdb {
17
18
  class BaseStatistics;
@@ -22,7 +23,7 @@ public:
22
23
  ~PersistentTableData();
23
24
 
24
25
  vector<RowGroupPointer> row_groups;
25
- vector<unique_ptr<BaseStatistics>> column_stats;
26
+ TableStatistics table_stats;
26
27
  };
27
28
 
28
29
  } // namespace duckdb
@@ -29,6 +29,7 @@ struct DataTableInfo;
29
29
  class ExpressionExecutor;
30
30
  class RowGroupWriter;
31
31
  class UpdateSegment;
32
+ class TableStatistics;
32
33
  class TableStorageInfo;
33
34
  class Vector;
34
35
  struct ColumnCheckpointState;
@@ -38,7 +39,7 @@ struct VersionNode;
38
39
 
39
40
  struct RowGroupWriteData {
40
41
  vector<unique_ptr<ColumnCheckpointState>> states;
41
- vector<unique_ptr<BaseStatistics>> statistics;
42
+ vector<BaseStatistics> statistics;
42
43
  };
43
44
 
44
45
  class RowGroup : public SegmentBase {
@@ -69,7 +70,7 @@ private:
69
70
  //! The column data of the row_group
70
71
  vector<shared_ptr<ColumnData>> columns;
71
72
  //! The segment statistics for each of the columns
72
- vector<shared_ptr<SegmentStatistics>> stats;
73
+ vector<SegmentStatistics> stats;
73
74
 
74
75
  public:
75
76
  DatabaseInstance &GetDatabase();
@@ -132,7 +133,7 @@ public:
132
133
  idx_t Delete(TransactionData transaction, DataTable *table, row_t *row_ids, idx_t count);
133
134
 
134
135
  RowGroupWriteData WriteToDisk(PartialBlockManager &manager, const vector<CompressionType> &compression_types);
135
- RowGroupPointer Checkpoint(RowGroupWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats);
136
+ RowGroupPointer Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats);
136
137
  static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
137
138
  static RowGroupPointer Deserialize(Deserializer &source, const ColumnList &columns);
138
139
 
@@ -78,7 +78,7 @@ public:
78
78
  void UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,
79
79
  DataChunk &updates);
80
80
 
81
- void Checkpoint(TableDataWriter &writer, vector<unique_ptr<BaseStatistics>> &global_stats);
81
+ void Checkpoint(TableDataWriter &writer, TableStatistics &global_stats);
82
82
 
83
83
  void CommitDropColumn(idx_t index);
84
84
  void CommitDropTable();
@@ -93,8 +93,9 @@ public:
93
93
  vector<column_t> bound_columns, Expression &cast_expr);
94
94
  void VerifyNewConstraint(DataTable &parent, const BoundConstraint &constraint);
95
95
 
96
+ void CopyStats(TableStatistics &stats);
96
97
  unique_ptr<BaseStatistics> CopyStats(column_t column_id);
97
- void SetStatistics(column_t column_id, const std::function<void(BaseStatistics &)> &set_fun);
98
+ void SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats);
98
99
 
99
100
  private:
100
101
  bool IsEmpty(SegmentLock &) const;
@@ -14,6 +14,7 @@
14
14
  #include "duckdb/storage/statistics/column_statistics.hpp"
15
15
 
16
16
  namespace duckdb {
17
+ class ColumnList;
17
18
  class PersistentTableData;
18
19
 
19
20
  class TableStatisticsLock {
@@ -38,6 +39,7 @@ public:
38
39
  void MergeStats(idx_t i, BaseStatistics &stats);
39
40
  void MergeStats(TableStatisticsLock &lock, idx_t i, BaseStatistics &stats);
40
41
 
42
+ void CopyStats(TableStatistics &other);
41
43
  unique_ptr<BaseStatistics> CopyStats(idx_t i);
42
44
  ColumnStatistics &GetStats(idx_t i);
43
45
 
@@ -45,6 +47,9 @@ public:
45
47
 
46
48
  unique_ptr<TableStatisticsLock> GetLock();
47
49
 
50
+ void Serialize(Serializer &serializer);
51
+ void Deserialize(Deserializer &source, ColumnList &columns);
52
+
48
53
  private:
49
54
  //! The statistics lock
50
55
  mutex stats_lock;
@@ -9,6 +9,9 @@
9
9
  #include "duckdb/common/thread.hpp"
10
10
  #endif
11
11
 
12
+ #include <cstdio>
13
+ #include <inttypes.h>
14
+
12
15
  namespace duckdb {
13
16
 
14
17
  #define DUCKDB_GLOBAL(_PARAM) \
@@ -214,9 +217,71 @@ void DBConfig::SetDefaultMaxMemory() {
214
217
  }
215
218
  }
216
219
 
220
+ idx_t CGroupBandwidthQuota(idx_t physical_cores, FileSystem &fs) {
221
+ static constexpr const char *CPU_MAX = "/sys/fs/cgroup/cpu.max";
222
+ static constexpr const char *CFS_QUOTA = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us";
223
+ static constexpr const char *CFS_PERIOD = "/sys/fs/cgroup/cpu/cpu.cfs_period_us";
224
+
225
+ int64_t quota, period;
226
+ char byte_buffer[1000];
227
+ unique_ptr<FileHandle> handle;
228
+ int64_t read_bytes;
229
+
230
+ if (fs.FileExists(CPU_MAX)) {
231
+ // cgroup v2
232
+ // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
233
+ handle =
234
+ fs.OpenFile(CPU_MAX, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK, FileSystem::DEFAULT_COMPRESSION);
235
+ read_bytes = fs.Read(*handle, (void *)byte_buffer, 999);
236
+ byte_buffer[read_bytes] = '\0';
237
+ if (std::sscanf(byte_buffer, "%" SCNd64 " %" SCNd64 "", &quota, &period) != 2) {
238
+ return physical_cores;
239
+ }
240
+ } else if (fs.FileExists(CFS_QUOTA) && fs.FileExists(CFS_PERIOD)) {
241
+ // cgroup v1
242
+ // https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html#management
243
+
244
+ // Read the quota, this indicates how many microseconds the CPU can be utilized by this cgroup per period
245
+ handle = fs.OpenFile(CFS_QUOTA, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
246
+ FileSystem::DEFAULT_COMPRESSION);
247
+ read_bytes = fs.Read(*handle, (void *)byte_buffer, 999);
248
+ byte_buffer[read_bytes] = '\0';
249
+ if (std::sscanf(byte_buffer, "%" SCNd64 "", &quota) != 1) {
250
+ return physical_cores;
251
+ }
252
+
253
+ // Read the time period, a cgroup can utilize the CPU up to quota microseconds every period
254
+ handle = fs.OpenFile(CFS_PERIOD, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
255
+ FileSystem::DEFAULT_COMPRESSION);
256
+ read_bytes = fs.Read(*handle, (void *)byte_buffer, 999);
257
+ byte_buffer[read_bytes] = '\0';
258
+ if (std::sscanf(byte_buffer, "%" SCNd64 "", &period) != 1) {
259
+ return physical_cores;
260
+ }
261
+ } else {
262
+ // No cgroup quota
263
+ return physical_cores;
264
+ }
265
+ if (quota > 0 && period > 0) {
266
+ return idx_t(std::ceil((double)quota / (double)period));
267
+ } else {
268
+ return physical_cores;
269
+ }
270
+ }
271
+
272
+ idx_t GetSystemMaxThreadsInternal(FileSystem &fs) {
273
+ idx_t physical_cores = std::thread::hardware_concurrency();
274
+ #ifdef __linux__
275
+ auto cores_available_per_period = CGroupBandwidthQuota(physical_cores, fs);
276
+ return MaxValue<idx_t>(cores_available_per_period, 1);
277
+ #else
278
+ return physical_cores;
279
+ #endif
280
+ }
281
+
217
282
  void DBConfig::SetDefaultMaxThreads() {
218
283
  #ifndef DUCKDB_NO_THREADS
219
- options.maximum_threads = std::thread::hardware_concurrency();
284
+ options.maximum_threads = GetSystemMaxThreadsInternal(*file_system);
220
285
  #else
221
286
  options.maximum_threads = 1;
222
287
  #endif
@@ -6,7 +6,6 @@
6
6
  #include "duckdb/planner/operator/logical_comparison_join.hpp"
7
7
  #include "duckdb/planner/operator/logical_get.hpp"
8
8
  #include "duckdb/storage/data_table.hpp"
9
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
10
9
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
11
10
 
12
11
  namespace duckdb {
@@ -5,15 +5,21 @@ namespace duckdb {
5
5
 
6
6
  unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundAggregateExpression &aggr,
7
7
  unique_ptr<Expression> *expr_ptr) {
8
- vector<unique_ptr<BaseStatistics>> stats;
8
+ vector<BaseStatistics> stats;
9
9
  stats.reserve(aggr.children.size());
10
10
  for (auto &child : aggr.children) {
11
- stats.push_back(PropagateExpression(child));
11
+ auto stat = PropagateExpression(child);
12
+ if (!stat) {
13
+ stats.push_back(BaseStatistics::CreateUnknown(child->return_type));
14
+ } else {
15
+ stats.push_back(stat->Copy());
16
+ }
12
17
  }
13
18
  if (!aggr.function.statistics) {
14
19
  return nullptr;
15
20
  }
16
- return aggr.function.statistics(context, aggr, aggr.bind_info.get(), stats, node_stats.get());
21
+ AggregateStatisticsInput input(aggr.bind_info.get(), stats, node_stats.get());
22
+ return aggr.function.statistics(context, aggr, input);
17
23
  }
18
24
 
19
25
  } // namespace duckdb
@@ -5,7 +5,6 @@
5
5
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
6
6
  #include "duckdb/planner/expression/bound_function_expression.hpp"
7
7
  #include "duckdb/storage/statistics/base_statistics.hpp"
8
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
9
8
  #include "duckdb/common/operator/subtract.hpp"
10
9
 
11
10
  namespace duckdb {
@@ -44,14 +43,14 @@ bool GetCastType(hugeint_t range, LogicalType &cast_type) {
44
43
  }
45
44
 
46
45
  template <class T>
47
- unique_ptr<Expression> TemplatedCastToSmallestType(unique_ptr<Expression> expr, NumericStatistics &num_stats) {
46
+ unique_ptr<Expression> TemplatedCastToSmallestType(unique_ptr<Expression> expr, BaseStatistics &stats) {
48
47
  // Compute range
49
- if (num_stats.min.IsNull() || num_stats.max.IsNull()) {
48
+ if (!NumericStats::HasMinMax(stats)) {
50
49
  return expr;
51
50
  }
52
51
 
53
- auto signed_min_val = num_stats.min.GetValue<T>();
54
- auto signed_max_val = num_stats.max.GetValue<T>();
52
+ auto signed_min_val = NumericStats::Min(stats).GetValue<T>();
53
+ auto signed_max_val = NumericStats::Max(stats).GetValue<T>();
55
54
  if (signed_max_val < signed_min_val) {
56
55
  return expr;
57
56
  }
@@ -82,7 +81,7 @@ unique_ptr<Expression> TemplatedCastToSmallestType(unique_ptr<Expression> expr,
82
81
  return BoundCastExpression::AddDefaultCastToType(std::move(minus_expr), cast_type);
83
82
  }
84
83
 
85
- unique_ptr<Expression> CastToSmallestType(unique_ptr<Expression> expr, NumericStatistics &num_stats) {
84
+ unique_ptr<Expression> CastToSmallestType(unique_ptr<Expression> expr, BaseStatistics &num_stats) {
86
85
  auto physical_type = expr->return_type.InternalType();
87
86
  switch (physical_type) {
88
87
  case PhysicalType::UINT8:
@@ -111,7 +110,7 @@ void StatisticsPropagator::PropagateAndCompress(unique_ptr<Expression> &expr, un
111
110
  stats = PropagateExpression(expr);
112
111
  if (stats) {
113
112
  if (expr->return_type.IsIntegral()) {
114
- expr = CastToSmallestType(std::move(expr), (NumericStatistics &)*stats);
113
+ expr = CastToSmallestType(std::move(expr), *stats);
115
114
  }
116
115
  }
117
116
  }
@@ -1,24 +1,27 @@
1
1
  #include "duckdb/optimizer/statistics_propagator.hpp"
2
2
  #include "duckdb/planner/expression/bound_cast_expression.hpp"
3
- #include "duckdb/storage/statistics/numeric_statistics.hpp"
4
3
 
5
4
  namespace duckdb {
6
5
 
7
- static unique_ptr<BaseStatistics> StatisticsOperationsNumericNumericCast(const BaseStatistics *input_p,
6
+ static unique_ptr<BaseStatistics> StatisticsOperationsNumericNumericCast(const BaseStatistics &input,
8
7
  const LogicalType &target) {
9
- auto &input = (NumericStatistics &)*input_p;
10
-
11
- Value min = input.min, max = input.max;
8
+ if (!NumericStats::HasMinMax(input)) {
9
+ return nullptr;
10
+ }
11
+ Value min = NumericStats::Min(input);
12
+ Value max = NumericStats::Max(input);
12
13
  if (!min.DefaultTryCastAs(target) || !max.DefaultTryCastAs(target)) {
13
14
  // overflow in cast: bailout
14
15
  return nullptr;
15
16
  }
16
- auto stats = make_unique<NumericStatistics>(target, std::move(min), std::move(max), input.stats_type);
17
- stats->CopyBase(*input_p);
18
- return std::move(stats);
17
+ auto result = NumericStats::CreateEmpty(target);
18
+ result.CopyBase(input);
19
+ NumericStats::SetMin(result, min);
20
+ NumericStats::SetMax(result, max);
21
+ return result.ToUnique();
19
22
  }
20
23
 
21
- static unique_ptr<BaseStatistics> StatisticsNumericCastSwitch(const BaseStatistics *input, const LogicalType &target) {
24
+ static unique_ptr<BaseStatistics> StatisticsNumericCastSwitch(const BaseStatistics &input, const LogicalType &target) {
22
25
  switch (target.InternalType()) {
23
26
  case PhysicalType::INT8:
24
27
  case PhysicalType::INT16:
@@ -48,13 +51,13 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundCastEx
48
51
  case PhysicalType::INT128:
49
52
  case PhysicalType::FLOAT:
50
53
  case PhysicalType::DOUBLE:
51
- result_stats = StatisticsNumericCastSwitch(child_stats.get(), cast.return_type);
54
+ result_stats = StatisticsNumericCastSwitch(*child_stats, cast.return_type);
52
55
  break;
53
56
  default:
54
57
  return nullptr;
55
58
  }
56
59
  if (cast.try_cast && result_stats) {
57
- result_stats->validity_stats = make_unique<ValidityStatistics>(true, true);
60
+ result_stats->Set(StatsInfo::CAN_HAVE_NULL_VALUES);
58
61
  }
59
62
  return result_stats;
60
63
  }
@@ -9,7 +9,7 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundColumn
9
9
  if (stats == statistics_map.end()) {
10
10
  return nullptr;
11
11
  }
12
- return stats->second->Copy();
12
+ return stats->second->ToUnique();
13
13
  }
14
14
 
15
15
  } // namespace duckdb