duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
- package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/main/config.cpp +66 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
- package/src/duckdb/src/planner/bind_context.cpp +16 -0
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +2 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
- package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +2 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +4 -6
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
- package/src/duckdb/src/storage/table/column_data.cpp +16 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
- package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
- package/src/duckdb/src/storage/table/row_group.cpp +24 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
- package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -0,0 +1,157 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/storage/statistics/numeric_stats.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/exception.hpp"
|
13
|
+
#include "duckdb/common/types/hugeint.hpp"
|
14
|
+
#include "duckdb/common/enums/filter_propagate_result.hpp"
|
15
|
+
#include "duckdb/common/enums/expression_type.hpp"
|
16
|
+
#include "duckdb/common/operator/comparison_operators.hpp"
|
17
|
+
#include "duckdb/common/types/value.hpp"
|
18
|
+
|
19
|
+
namespace duckdb {
|
20
|
+
class BaseStatistics;
|
21
|
+
class FieldWriter;
|
22
|
+
class FieldReader;
|
23
|
+
struct SelectionVector;
|
24
|
+
class Vector;
|
25
|
+
|
26
|
+
struct NumericValueUnion {
|
27
|
+
union Val {
|
28
|
+
int8_t boolean;
|
29
|
+
int8_t tinyint;
|
30
|
+
int16_t smallint;
|
31
|
+
int32_t integer;
|
32
|
+
int64_t bigint;
|
33
|
+
uint8_t utinyint;
|
34
|
+
uint16_t usmallint;
|
35
|
+
uint32_t uinteger;
|
36
|
+
uint64_t ubigint;
|
37
|
+
hugeint_t hugeint;
|
38
|
+
float float_;
|
39
|
+
double double_;
|
40
|
+
} value_;
|
41
|
+
|
42
|
+
template <class T>
|
43
|
+
T &GetReferenceUnsafe() {
|
44
|
+
throw InternalException("NumericValueUnion::GetReferenceUnsafe called on unsupported type");
|
45
|
+
}
|
46
|
+
};
|
47
|
+
|
48
|
+
struct NumericStatsData {
|
49
|
+
//! Whether or not the value has a max value
|
50
|
+
bool has_min;
|
51
|
+
//! Whether or not the segment has a min value
|
52
|
+
bool has_max;
|
53
|
+
//! The minimum value of the segment
|
54
|
+
NumericValueUnion min;
|
55
|
+
//! The maximum value of the segment
|
56
|
+
NumericValueUnion max;
|
57
|
+
};
|
58
|
+
|
59
|
+
struct NumericStats {
|
60
|
+
//! Unknown statistics - i.e. "has_min" is false, "has_max" is false
|
61
|
+
DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type);
|
62
|
+
//! Empty statistics - i.e. "min = MaxValue<type>, max = MinValue<type>"
|
63
|
+
DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type);
|
64
|
+
|
65
|
+
//! Returns true if the stats has a constant value
|
66
|
+
DUCKDB_API static bool IsConstant(const BaseStatistics &stats);
|
67
|
+
//! Returns true if the stats has both a min and max value defined
|
68
|
+
DUCKDB_API static bool HasMinMax(const BaseStatistics &stats);
|
69
|
+
//! Returns true if the stats has a min value defined
|
70
|
+
DUCKDB_API static bool HasMin(const BaseStatistics &stats);
|
71
|
+
//! Returns true if the stats has a max value defined
|
72
|
+
DUCKDB_API static bool HasMax(const BaseStatistics &stats);
|
73
|
+
//! Returns the min value - throws an exception if there is no min value
|
74
|
+
DUCKDB_API static Value Min(const BaseStatistics &stats);
|
75
|
+
//! Returns the max value - throws an exception if there is no max value
|
76
|
+
DUCKDB_API static Value Max(const BaseStatistics &stats);
|
77
|
+
//! Sets the min value of the statistics
|
78
|
+
DUCKDB_API static void SetMin(BaseStatistics &stats, const Value &val);
|
79
|
+
//! Sets the max value of the statistics
|
80
|
+
DUCKDB_API static void SetMax(BaseStatistics &stats, const Value &val);
|
81
|
+
|
82
|
+
//! Check whether or not a given comparison with a constant could possibly be satisfied by rows given the statistics
|
83
|
+
DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
|
84
|
+
const Value &constant);
|
85
|
+
|
86
|
+
DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other_p);
|
87
|
+
|
88
|
+
DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
|
89
|
+
DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
|
90
|
+
|
91
|
+
DUCKDB_API static string ToString(const BaseStatistics &stats);
|
92
|
+
|
93
|
+
template <class T>
|
94
|
+
static inline void UpdateValue(T new_value, T &min, T &max) {
|
95
|
+
if (LessThan::Operation(new_value, min)) {
|
96
|
+
min = new_value;
|
97
|
+
}
|
98
|
+
if (GreaterThan::Operation(new_value, max)) {
|
99
|
+
max = new_value;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
template <class T>
|
104
|
+
static inline void Update(BaseStatistics &stats, T new_value) {
|
105
|
+
auto &nstats = NumericStats::GetDataUnsafe(stats);
|
106
|
+
UpdateValue<T>(new_value, nstats.min.GetReferenceUnsafe<T>(), nstats.max.GetReferenceUnsafe<T>());
|
107
|
+
}
|
108
|
+
|
109
|
+
static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
|
110
|
+
|
111
|
+
template <class T>
|
112
|
+
static T GetMinUnsafe(const BaseStatistics &stats) {
|
113
|
+
return NumericStats::Min(stats).template GetValueUnsafe<T>();
|
114
|
+
}
|
115
|
+
template <class T>
|
116
|
+
static T GetMaxUnsafe(const BaseStatistics &stats) {
|
117
|
+
return NumericStats::Max(stats).template GetValueUnsafe<T>();
|
118
|
+
}
|
119
|
+
|
120
|
+
private:
|
121
|
+
static NumericStatsData &GetDataUnsafe(BaseStatistics &stats);
|
122
|
+
static const NumericStatsData &GetDataUnsafe(const BaseStatistics &stats);
|
123
|
+
static Value MinOrNull(const BaseStatistics &stats);
|
124
|
+
static Value MaxOrNull(const BaseStatistics &stats);
|
125
|
+
template <class T>
|
126
|
+
static void TemplatedVerify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
|
127
|
+
};
|
128
|
+
|
129
|
+
template <>
|
130
|
+
void NumericStats::Update<interval_t>(BaseStatistics &stats, interval_t new_value);
|
131
|
+
template <>
|
132
|
+
void NumericStats::Update<list_entry_t>(BaseStatistics &stats, list_entry_t new_value);
|
133
|
+
|
134
|
+
template <>
|
135
|
+
int8_t &NumericValueUnion::GetReferenceUnsafe();
|
136
|
+
template <>
|
137
|
+
int16_t &NumericValueUnion::GetReferenceUnsafe();
|
138
|
+
template <>
|
139
|
+
int32_t &NumericValueUnion::GetReferenceUnsafe();
|
140
|
+
template <>
|
141
|
+
int64_t &NumericValueUnion::GetReferenceUnsafe();
|
142
|
+
template <>
|
143
|
+
hugeint_t &NumericValueUnion::GetReferenceUnsafe();
|
144
|
+
template <>
|
145
|
+
uint8_t &NumericValueUnion::GetReferenceUnsafe();
|
146
|
+
template <>
|
147
|
+
uint16_t &NumericValueUnion::GetReferenceUnsafe();
|
148
|
+
template <>
|
149
|
+
uint32_t &NumericValueUnion::GetReferenceUnsafe();
|
150
|
+
template <>
|
151
|
+
uint64_t &NumericValueUnion::GetReferenceUnsafe();
|
152
|
+
template <>
|
153
|
+
float &NumericValueUnion::GetReferenceUnsafe();
|
154
|
+
template <>
|
155
|
+
double &NumericValueUnion::GetReferenceUnsafe();
|
156
|
+
|
157
|
+
} // namespace duckdb
|
@@ -17,15 +17,10 @@ namespace duckdb {
|
|
17
17
|
class SegmentStatistics {
|
18
18
|
public:
|
19
19
|
SegmentStatistics(LogicalType type);
|
20
|
-
SegmentStatistics(
|
21
|
-
|
22
|
-
LogicalType type;
|
20
|
+
SegmentStatistics(BaseStatistics statistics);
|
23
21
|
|
24
22
|
//! Type-specific statistics of the segment
|
25
|
-
|
26
|
-
|
27
|
-
public:
|
28
|
-
void Reset();
|
23
|
+
BaseStatistics statistics;
|
29
24
|
};
|
30
25
|
|
31
26
|
} // namespace duckdb
|
@@ -0,0 +1,74 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/storage/statistics/string_stats.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/exception.hpp"
|
13
|
+
#include "duckdb/common/types/hugeint.hpp"
|
14
|
+
#include "duckdb/common/enums/filter_propagate_result.hpp"
|
15
|
+
#include "duckdb/common/enums/expression_type.hpp"
|
16
|
+
#include "duckdb/common/operator/comparison_operators.hpp"
|
17
|
+
|
18
|
+
namespace duckdb {
|
19
|
+
class BaseStatistics;
|
20
|
+
class FieldWriter;
|
21
|
+
class FieldReader;
|
22
|
+
struct SelectionVector;
|
23
|
+
class Vector;
|
24
|
+
|
25
|
+
struct StringStatsData {
|
26
|
+
constexpr static uint32_t MAX_STRING_MINMAX_SIZE = 8;
|
27
|
+
|
28
|
+
//! The minimum value of the segment, potentially truncated
|
29
|
+
data_t min[MAX_STRING_MINMAX_SIZE];
|
30
|
+
//! The maximum value of the segment, potentially truncated
|
31
|
+
data_t max[MAX_STRING_MINMAX_SIZE];
|
32
|
+
//! Whether or not the column can contain unicode characters
|
33
|
+
bool has_unicode;
|
34
|
+
//! Whether or not the maximum string length is known
|
35
|
+
bool has_max_string_length;
|
36
|
+
//! The maximum string length in bytes
|
37
|
+
uint32_t max_string_length;
|
38
|
+
};
|
39
|
+
|
40
|
+
struct StringStats {
|
41
|
+
//! Unknown statistics - i.e. "has_unicode" is true, "max_string_length" is unknown, "min" is \0, max is \xFF
|
42
|
+
DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type);
|
43
|
+
//! Empty statistics - i.e. "has_unicode" is false, "max_string_length" is 0, "min" is \xFF, max is \x00
|
44
|
+
DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type);
|
45
|
+
//! Whether or not the statistics have a maximum string length defined
|
46
|
+
DUCKDB_API static bool HasMaxStringLength(const BaseStatistics &stats);
|
47
|
+
//! Returns the maximum string length, or throws an exception if !HasMaxStringLength()
|
48
|
+
DUCKDB_API static uint32_t MaxStringLength(const BaseStatistics &stats);
|
49
|
+
//! Whether or not the strings can contain unicode
|
50
|
+
DUCKDB_API static bool CanContainUnicode(const BaseStatistics &stats);
|
51
|
+
|
52
|
+
//! Resets the max string length so HasMaxStringLength() is false
|
53
|
+
DUCKDB_API static void ResetMaxStringLength(BaseStatistics &stats);
|
54
|
+
//! FIXME: make this part of Set on statistics
|
55
|
+
DUCKDB_API static void SetContainsUnicode(BaseStatistics &stats);
|
56
|
+
|
57
|
+
DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
|
58
|
+
DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
|
59
|
+
|
60
|
+
DUCKDB_API static string ToString(const BaseStatistics &stats);
|
61
|
+
|
62
|
+
DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
|
63
|
+
const string &value);
|
64
|
+
|
65
|
+
DUCKDB_API static void Update(BaseStatistics &stats, const string_t &value);
|
66
|
+
DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
|
67
|
+
DUCKDB_API static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
|
68
|
+
|
69
|
+
private:
|
70
|
+
static StringStatsData &GetDataUnsafe(BaseStatistics &stats);
|
71
|
+
static const StringStatsData &GetDataUnsafe(const BaseStatistics &stats);
|
72
|
+
};
|
73
|
+
|
74
|
+
} // namespace duckdb
|
@@ -0,0 +1,42 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/storage/statistics/struct_stats.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/exception.hpp"
|
13
|
+
|
14
|
+
namespace duckdb {
|
15
|
+
class BaseStatistics;
|
16
|
+
class FieldWriter;
|
17
|
+
class FieldReader;
|
18
|
+
struct SelectionVector;
|
19
|
+
class Vector;
|
20
|
+
|
21
|
+
struct StructStats {
|
22
|
+
DUCKDB_API static void Construct(BaseStatistics &stats);
|
23
|
+
DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type);
|
24
|
+
DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type);
|
25
|
+
|
26
|
+
DUCKDB_API static const BaseStatistics *GetChildStats(const BaseStatistics &stats);
|
27
|
+
DUCKDB_API static const BaseStatistics &GetChildStats(const BaseStatistics &stats, idx_t i);
|
28
|
+
DUCKDB_API static BaseStatistics &GetChildStats(BaseStatistics &stats, idx_t i);
|
29
|
+
DUCKDB_API static void SetChildStats(BaseStatistics &stats, idx_t i, const BaseStatistics &new_stats);
|
30
|
+
DUCKDB_API static void SetChildStats(BaseStatistics &stats, idx_t i, unique_ptr<BaseStatistics> new_stats);
|
31
|
+
|
32
|
+
DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
|
33
|
+
DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
|
34
|
+
|
35
|
+
DUCKDB_API static string ToString(const BaseStatistics &stats);
|
36
|
+
|
37
|
+
DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
|
38
|
+
DUCKDB_API static void Copy(BaseStatistics &stats, const BaseStatistics &other);
|
39
|
+
DUCKDB_API static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
|
40
|
+
};
|
41
|
+
|
42
|
+
} // namespace duckdb
|
@@ -9,7 +9,7 @@
|
|
9
9
|
#include "duckdb/storage/buffer_manager.hpp"
|
10
10
|
#include "duckdb/storage/checkpoint/string_checkpoint_state.hpp"
|
11
11
|
#include "duckdb/storage/segment/uncompressed.hpp"
|
12
|
-
|
12
|
+
|
13
13
|
#include "duckdb/storage/string_uncompressed.hpp"
|
14
14
|
#include "duckdb/storage/table/append_state.hpp"
|
15
15
|
#include "duckdb/storage/table/column_segment.hpp"
|
@@ -173,8 +173,7 @@ public:
|
|
173
173
|
|
174
174
|
public:
|
175
175
|
static inline void UpdateStringStats(SegmentStatistics &stats, const string_t &new_value) {
|
176
|
-
|
177
|
-
sstats.Update(new_value);
|
176
|
+
StringStats::Update(stats.statistics, new_value);
|
178
177
|
}
|
179
178
|
|
180
179
|
static void SetDictionary(ColumnSegment &segment, BufferHandle &handle, StringDictionaryContainer dict);
|
@@ -57,7 +57,7 @@ public:
|
|
57
57
|
static unique_ptr<ColumnSegment> CreatePersistentSegment(DatabaseInstance &db, BlockManager &block_manager,
|
58
58
|
block_id_t id, idx_t offset, const LogicalType &type_p,
|
59
59
|
idx_t start, idx_t count, CompressionType compression_type,
|
60
|
-
|
60
|
+
BaseStatistics statistics);
|
61
61
|
static unique_ptr<ColumnSegment> CreateTransientSegment(DatabaseInstance &db, const LogicalType &type, idx_t start,
|
62
62
|
idx_t segment_size = Storage::BLOCK_SIZE);
|
63
63
|
static unique_ptr<ColumnSegment> CreateSegment(ColumnSegment &other, idx_t start);
|
@@ -124,7 +124,7 @@ public:
|
|
124
124
|
|
125
125
|
public:
|
126
126
|
ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type, ColumnSegmentType segment_type,
|
127
|
-
idx_t start, idx_t count, CompressionFunction *function,
|
127
|
+
idx_t start, idx_t count, CompressionFunction *function, BaseStatistics statistics,
|
128
128
|
block_id_t block_id, idx_t offset, idx_t segment_size);
|
129
129
|
ColumnSegment(ColumnSegment &other, idx_t start);
|
130
130
|
|
@@ -12,6 +12,7 @@
|
|
12
12
|
#include "duckdb/common/vector.hpp"
|
13
13
|
#include "duckdb/storage/table/segment_tree.hpp"
|
14
14
|
#include "duckdb/storage/data_pointer.hpp"
|
15
|
+
#include "duckdb/storage/table/table_statistics.hpp"
|
15
16
|
|
16
17
|
namespace duckdb {
|
17
18
|
class BaseStatistics;
|
@@ -22,7 +23,7 @@ public:
|
|
22
23
|
~PersistentTableData();
|
23
24
|
|
24
25
|
vector<RowGroupPointer> row_groups;
|
25
|
-
|
26
|
+
TableStatistics table_stats;
|
26
27
|
};
|
27
28
|
|
28
29
|
} // namespace duckdb
|
@@ -29,6 +29,7 @@ struct DataTableInfo;
|
|
29
29
|
class ExpressionExecutor;
|
30
30
|
class RowGroupWriter;
|
31
31
|
class UpdateSegment;
|
32
|
+
class TableStatistics;
|
32
33
|
class TableStorageInfo;
|
33
34
|
class Vector;
|
34
35
|
struct ColumnCheckpointState;
|
@@ -38,7 +39,7 @@ struct VersionNode;
|
|
38
39
|
|
39
40
|
struct RowGroupWriteData {
|
40
41
|
vector<unique_ptr<ColumnCheckpointState>> states;
|
41
|
-
vector<
|
42
|
+
vector<BaseStatistics> statistics;
|
42
43
|
};
|
43
44
|
|
44
45
|
class RowGroup : public SegmentBase {
|
@@ -69,7 +70,7 @@ private:
|
|
69
70
|
//! The column data of the row_group
|
70
71
|
vector<shared_ptr<ColumnData>> columns;
|
71
72
|
//! The segment statistics for each of the columns
|
72
|
-
vector<
|
73
|
+
vector<SegmentStatistics> stats;
|
73
74
|
|
74
75
|
public:
|
75
76
|
DatabaseInstance &GetDatabase();
|
@@ -132,7 +133,7 @@ public:
|
|
132
133
|
idx_t Delete(TransactionData transaction, DataTable *table, row_t *row_ids, idx_t count);
|
133
134
|
|
134
135
|
RowGroupWriteData WriteToDisk(PartialBlockManager &manager, const vector<CompressionType> &compression_types);
|
135
|
-
RowGroupPointer Checkpoint(RowGroupWriter &writer,
|
136
|
+
RowGroupPointer Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats);
|
136
137
|
static void Serialize(RowGroupPointer &pointer, Serializer &serializer);
|
137
138
|
static RowGroupPointer Deserialize(Deserializer &source, const ColumnList &columns);
|
138
139
|
|
@@ -78,7 +78,7 @@ public:
|
|
78
78
|
void UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,
|
79
79
|
DataChunk &updates);
|
80
80
|
|
81
|
-
void Checkpoint(TableDataWriter &writer,
|
81
|
+
void Checkpoint(TableDataWriter &writer, TableStatistics &global_stats);
|
82
82
|
|
83
83
|
void CommitDropColumn(idx_t index);
|
84
84
|
void CommitDropTable();
|
@@ -93,8 +93,9 @@ public:
|
|
93
93
|
vector<column_t> bound_columns, Expression &cast_expr);
|
94
94
|
void VerifyNewConstraint(DataTable &parent, const BoundConstraint &constraint);
|
95
95
|
|
96
|
+
void CopyStats(TableStatistics &stats);
|
96
97
|
unique_ptr<BaseStatistics> CopyStats(column_t column_id);
|
97
|
-
void
|
98
|
+
void SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats);
|
98
99
|
|
99
100
|
private:
|
100
101
|
bool IsEmpty(SegmentLock &) const;
|
@@ -14,6 +14,7 @@
|
|
14
14
|
#include "duckdb/storage/statistics/column_statistics.hpp"
|
15
15
|
|
16
16
|
namespace duckdb {
|
17
|
+
class ColumnList;
|
17
18
|
class PersistentTableData;
|
18
19
|
|
19
20
|
class TableStatisticsLock {
|
@@ -38,6 +39,7 @@ public:
|
|
38
39
|
void MergeStats(idx_t i, BaseStatistics &stats);
|
39
40
|
void MergeStats(TableStatisticsLock &lock, idx_t i, BaseStatistics &stats);
|
40
41
|
|
42
|
+
void CopyStats(TableStatistics &other);
|
41
43
|
unique_ptr<BaseStatistics> CopyStats(idx_t i);
|
42
44
|
ColumnStatistics &GetStats(idx_t i);
|
43
45
|
|
@@ -45,6 +47,9 @@ public:
|
|
45
47
|
|
46
48
|
unique_ptr<TableStatisticsLock> GetLock();
|
47
49
|
|
50
|
+
void Serialize(Serializer &serializer);
|
51
|
+
void Deserialize(Deserializer &source, ColumnList &columns);
|
52
|
+
|
48
53
|
private:
|
49
54
|
//! The statistics lock
|
50
55
|
mutex stats_lock;
|
@@ -9,6 +9,9 @@
|
|
9
9
|
#include "duckdb/common/thread.hpp"
|
10
10
|
#endif
|
11
11
|
|
12
|
+
#include <cstdio>
|
13
|
+
#include <inttypes.h>
|
14
|
+
|
12
15
|
namespace duckdb {
|
13
16
|
|
14
17
|
#define DUCKDB_GLOBAL(_PARAM) \
|
@@ -214,9 +217,71 @@ void DBConfig::SetDefaultMaxMemory() {
|
|
214
217
|
}
|
215
218
|
}
|
216
219
|
|
220
|
+
idx_t CGroupBandwidthQuota(idx_t physical_cores, FileSystem &fs) {
|
221
|
+
static constexpr const char *CPU_MAX = "/sys/fs/cgroup/cpu.max";
|
222
|
+
static constexpr const char *CFS_QUOTA = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us";
|
223
|
+
static constexpr const char *CFS_PERIOD = "/sys/fs/cgroup/cpu/cpu.cfs_period_us";
|
224
|
+
|
225
|
+
int64_t quota, period;
|
226
|
+
char byte_buffer[1000];
|
227
|
+
unique_ptr<FileHandle> handle;
|
228
|
+
int64_t read_bytes;
|
229
|
+
|
230
|
+
if (fs.FileExists(CPU_MAX)) {
|
231
|
+
// cgroup v2
|
232
|
+
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
|
233
|
+
handle =
|
234
|
+
fs.OpenFile(CPU_MAX, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK, FileSystem::DEFAULT_COMPRESSION);
|
235
|
+
read_bytes = fs.Read(*handle, (void *)byte_buffer, 999);
|
236
|
+
byte_buffer[read_bytes] = '\0';
|
237
|
+
if (std::sscanf(byte_buffer, "%" SCNd64 " %" SCNd64 "", "a, &period) != 2) {
|
238
|
+
return physical_cores;
|
239
|
+
}
|
240
|
+
} else if (fs.FileExists(CFS_QUOTA) && fs.FileExists(CFS_PERIOD)) {
|
241
|
+
// cgroup v1
|
242
|
+
// https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html#management
|
243
|
+
|
244
|
+
// Read the quota, this indicates how many microseconds the CPU can be utilized by this cgroup per period
|
245
|
+
handle = fs.OpenFile(CFS_QUOTA, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
|
246
|
+
FileSystem::DEFAULT_COMPRESSION);
|
247
|
+
read_bytes = fs.Read(*handle, (void *)byte_buffer, 999);
|
248
|
+
byte_buffer[read_bytes] = '\0';
|
249
|
+
if (std::sscanf(byte_buffer, "%" SCNd64 "", "a) != 1) {
|
250
|
+
return physical_cores;
|
251
|
+
}
|
252
|
+
|
253
|
+
// Read the time period, a cgroup can utilize the CPU up to quota microseconds every period
|
254
|
+
handle = fs.OpenFile(CFS_PERIOD, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
|
255
|
+
FileSystem::DEFAULT_COMPRESSION);
|
256
|
+
read_bytes = fs.Read(*handle, (void *)byte_buffer, 999);
|
257
|
+
byte_buffer[read_bytes] = '\0';
|
258
|
+
if (std::sscanf(byte_buffer, "%" SCNd64 "", &period) != 1) {
|
259
|
+
return physical_cores;
|
260
|
+
}
|
261
|
+
} else {
|
262
|
+
// No cgroup quota
|
263
|
+
return physical_cores;
|
264
|
+
}
|
265
|
+
if (quota > 0 && period > 0) {
|
266
|
+
return idx_t(std::ceil((double)quota / (double)period));
|
267
|
+
} else {
|
268
|
+
return physical_cores;
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
idx_t GetSystemMaxThreadsInternal(FileSystem &fs) {
|
273
|
+
idx_t physical_cores = std::thread::hardware_concurrency();
|
274
|
+
#ifdef __linux__
|
275
|
+
auto cores_available_per_period = CGroupBandwidthQuota(physical_cores, fs);
|
276
|
+
return MaxValue<idx_t>(cores_available_per_period, 1);
|
277
|
+
#else
|
278
|
+
return physical_cores;
|
279
|
+
#endif
|
280
|
+
}
|
281
|
+
|
217
282
|
void DBConfig::SetDefaultMaxThreads() {
|
218
283
|
#ifndef DUCKDB_NO_THREADS
|
219
|
-
options.maximum_threads =
|
284
|
+
options.maximum_threads = GetSystemMaxThreadsInternal(*file_system);
|
220
285
|
#else
|
221
286
|
options.maximum_threads = 1;
|
222
287
|
#endif
|
@@ -6,7 +6,6 @@
|
|
6
6
|
#include "duckdb/planner/operator/logical_comparison_join.hpp"
|
7
7
|
#include "duckdb/planner/operator/logical_get.hpp"
|
8
8
|
#include "duckdb/storage/data_table.hpp"
|
9
|
-
#include "duckdb/storage/statistics/numeric_statistics.hpp"
|
10
9
|
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
11
10
|
|
12
11
|
namespace duckdb {
|
@@ -5,15 +5,21 @@ namespace duckdb {
|
|
5
5
|
|
6
6
|
unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundAggregateExpression &aggr,
|
7
7
|
unique_ptr<Expression> *expr_ptr) {
|
8
|
-
vector<
|
8
|
+
vector<BaseStatistics> stats;
|
9
9
|
stats.reserve(aggr.children.size());
|
10
10
|
for (auto &child : aggr.children) {
|
11
|
-
|
11
|
+
auto stat = PropagateExpression(child);
|
12
|
+
if (!stat) {
|
13
|
+
stats.push_back(BaseStatistics::CreateUnknown(child->return_type));
|
14
|
+
} else {
|
15
|
+
stats.push_back(stat->Copy());
|
16
|
+
}
|
12
17
|
}
|
13
18
|
if (!aggr.function.statistics) {
|
14
19
|
return nullptr;
|
15
20
|
}
|
16
|
-
|
21
|
+
AggregateStatisticsInput input(aggr.bind_info.get(), stats, node_stats.get());
|
22
|
+
return aggr.function.statistics(context, aggr, input);
|
17
23
|
}
|
18
24
|
|
19
25
|
} // namespace duckdb
|
@@ -5,7 +5,6 @@
|
|
5
5
|
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
6
6
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
7
7
|
#include "duckdb/storage/statistics/base_statistics.hpp"
|
8
|
-
#include "duckdb/storage/statistics/numeric_statistics.hpp"
|
9
8
|
#include "duckdb/common/operator/subtract.hpp"
|
10
9
|
|
11
10
|
namespace duckdb {
|
@@ -44,14 +43,14 @@ bool GetCastType(hugeint_t range, LogicalType &cast_type) {
|
|
44
43
|
}
|
45
44
|
|
46
45
|
template <class T>
|
47
|
-
unique_ptr<Expression> TemplatedCastToSmallestType(unique_ptr<Expression> expr,
|
46
|
+
unique_ptr<Expression> TemplatedCastToSmallestType(unique_ptr<Expression> expr, BaseStatistics &stats) {
|
48
47
|
// Compute range
|
49
|
-
if (
|
48
|
+
if (!NumericStats::HasMinMax(stats)) {
|
50
49
|
return expr;
|
51
50
|
}
|
52
51
|
|
53
|
-
auto signed_min_val =
|
54
|
-
auto signed_max_val =
|
52
|
+
auto signed_min_val = NumericStats::Min(stats).GetValue<T>();
|
53
|
+
auto signed_max_val = NumericStats::Max(stats).GetValue<T>();
|
55
54
|
if (signed_max_val < signed_min_val) {
|
56
55
|
return expr;
|
57
56
|
}
|
@@ -82,7 +81,7 @@ unique_ptr<Expression> TemplatedCastToSmallestType(unique_ptr<Expression> expr,
|
|
82
81
|
return BoundCastExpression::AddDefaultCastToType(std::move(minus_expr), cast_type);
|
83
82
|
}
|
84
83
|
|
85
|
-
unique_ptr<Expression> CastToSmallestType(unique_ptr<Expression> expr,
|
84
|
+
unique_ptr<Expression> CastToSmallestType(unique_ptr<Expression> expr, BaseStatistics &num_stats) {
|
86
85
|
auto physical_type = expr->return_type.InternalType();
|
87
86
|
switch (physical_type) {
|
88
87
|
case PhysicalType::UINT8:
|
@@ -111,7 +110,7 @@ void StatisticsPropagator::PropagateAndCompress(unique_ptr<Expression> &expr, un
|
|
111
110
|
stats = PropagateExpression(expr);
|
112
111
|
if (stats) {
|
113
112
|
if (expr->return_type.IsIntegral()) {
|
114
|
-
expr = CastToSmallestType(std::move(expr),
|
113
|
+
expr = CastToSmallestType(std::move(expr), *stats);
|
115
114
|
}
|
116
115
|
}
|
117
116
|
}
|
@@ -1,24 +1,27 @@
|
|
1
1
|
#include "duckdb/optimizer/statistics_propagator.hpp"
|
2
2
|
#include "duckdb/planner/expression/bound_cast_expression.hpp"
|
3
|
-
#include "duckdb/storage/statistics/numeric_statistics.hpp"
|
4
3
|
|
5
4
|
namespace duckdb {
|
6
5
|
|
7
|
-
static unique_ptr<BaseStatistics> StatisticsOperationsNumericNumericCast(const BaseStatistics
|
6
|
+
static unique_ptr<BaseStatistics> StatisticsOperationsNumericNumericCast(const BaseStatistics &input,
|
8
7
|
const LogicalType &target) {
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
if (!NumericStats::HasMinMax(input)) {
|
9
|
+
return nullptr;
|
10
|
+
}
|
11
|
+
Value min = NumericStats::Min(input);
|
12
|
+
Value max = NumericStats::Max(input);
|
12
13
|
if (!min.DefaultTryCastAs(target) || !max.DefaultTryCastAs(target)) {
|
13
14
|
// overflow in cast: bailout
|
14
15
|
return nullptr;
|
15
16
|
}
|
16
|
-
auto
|
17
|
-
|
18
|
-
|
17
|
+
auto result = NumericStats::CreateEmpty(target);
|
18
|
+
result.CopyBase(input);
|
19
|
+
NumericStats::SetMin(result, min);
|
20
|
+
NumericStats::SetMax(result, max);
|
21
|
+
return result.ToUnique();
|
19
22
|
}
|
20
23
|
|
21
|
-
static unique_ptr<BaseStatistics> StatisticsNumericCastSwitch(const BaseStatistics
|
24
|
+
static unique_ptr<BaseStatistics> StatisticsNumericCastSwitch(const BaseStatistics &input, const LogicalType &target) {
|
22
25
|
switch (target.InternalType()) {
|
23
26
|
case PhysicalType::INT8:
|
24
27
|
case PhysicalType::INT16:
|
@@ -48,13 +51,13 @@ unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundCastEx
|
|
48
51
|
case PhysicalType::INT128:
|
49
52
|
case PhysicalType::FLOAT:
|
50
53
|
case PhysicalType::DOUBLE:
|
51
|
-
result_stats = StatisticsNumericCastSwitch(child_stats
|
54
|
+
result_stats = StatisticsNumericCastSwitch(*child_stats, cast.return_type);
|
52
55
|
break;
|
53
56
|
default:
|
54
57
|
return nullptr;
|
55
58
|
}
|
56
59
|
if (cast.try_cast && result_stats) {
|
57
|
-
result_stats->
|
60
|
+
result_stats->Set(StatsInfo::CAN_HAVE_NULL_VALUES);
|
58
61
|
}
|
59
62
|
return result_stats;
|
60
63
|
}
|