duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
- package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/main/config.cpp +66 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
- package/src/duckdb/src/planner/bind_context.cpp +16 -0
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +2 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
- package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +2 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +4 -6
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
- package/src/duckdb/src/storage/table/column_data.cpp +16 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
- package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
- package/src/duckdb/src/storage/table/row_group.cpp +24 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
- package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -4,15 +4,13 @@
|
|
4
4
|
|
5
5
|
#include "src/storage/statistics/distinct_statistics.cpp"
|
6
6
|
|
7
|
-
#include "src/storage/statistics/
|
7
|
+
#include "src/storage/statistics/list_stats.cpp"
|
8
8
|
|
9
|
-
#include "src/storage/statistics/
|
9
|
+
#include "src/storage/statistics/numeric_stats.cpp"
|
10
10
|
|
11
11
|
#include "src/storage/statistics/segment_statistics.cpp"
|
12
12
|
|
13
|
-
#include "src/storage/statistics/
|
13
|
+
#include "src/storage/statistics/string_stats.cpp"
|
14
14
|
|
15
|
-
#include "src/storage/statistics/
|
16
|
-
|
17
|
-
#include "src/storage/statistics/validity_statistics.cpp"
|
15
|
+
#include "src/storage/statistics/struct_stats.cpp"
|
18
16
|
|
@@ -1,36 +0,0 @@
|
|
1
|
-
//===----------------------------------------------------------------------===//
|
2
|
-
// DuckDB
|
3
|
-
//
|
4
|
-
// duckdb/storage/statistics/list_statistics.hpp
|
5
|
-
//
|
6
|
-
//
|
7
|
-
//===----------------------------------------------------------------------===//
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include "duckdb/storage/statistics/base_statistics.hpp"
|
12
|
-
#include "duckdb/common/enums/filter_propagate_result.hpp"
|
13
|
-
#include "duckdb/storage/statistics/validity_statistics.hpp"
|
14
|
-
|
15
|
-
namespace duckdb {
|
16
|
-
class Value;
|
17
|
-
|
18
|
-
class ListStatistics : public BaseStatistics {
|
19
|
-
public:
|
20
|
-
explicit ListStatistics(LogicalType type);
|
21
|
-
|
22
|
-
unique_ptr<BaseStatistics> child_stats;
|
23
|
-
|
24
|
-
public:
|
25
|
-
void Merge(const BaseStatistics &other) override;
|
26
|
-
FilterPropagateResult CheckZonemap(ExpressionType comparison_type, const Value &constant) const;
|
27
|
-
|
28
|
-
unique_ptr<BaseStatistics> Copy() const override;
|
29
|
-
void Serialize(FieldWriter &serializer) const override;
|
30
|
-
static unique_ptr<BaseStatistics> Deserialize(FieldReader &source, LogicalType type);
|
31
|
-
void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const override;
|
32
|
-
|
33
|
-
string ToString() const override;
|
34
|
-
};
|
35
|
-
|
36
|
-
} // namespace duckdb
|
@@ -1,75 +0,0 @@
|
|
1
|
-
//===----------------------------------------------------------------------===//
|
2
|
-
// DuckDB
|
3
|
-
//
|
4
|
-
// duckdb/storage/statistics/numeric_statistics.hpp
|
5
|
-
//
|
6
|
-
//
|
7
|
-
//===----------------------------------------------------------------------===//
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include "duckdb/storage/statistics/base_statistics.hpp"
|
12
|
-
#include "duckdb/storage/statistics/segment_statistics.hpp"
|
13
|
-
#include "duckdb/storage/statistics/validity_statistics.hpp"
|
14
|
-
#include "duckdb/common/serializer.hpp"
|
15
|
-
#include "duckdb/common/limits.hpp"
|
16
|
-
#include "duckdb/common/exception.hpp"
|
17
|
-
#include "duckdb/common/string_util.hpp"
|
18
|
-
#include "duckdb/common/types/value.hpp"
|
19
|
-
#include "duckdb/common/windows_undefs.hpp"
|
20
|
-
#include "duckdb/common/enums/filter_propagate_result.hpp"
|
21
|
-
|
22
|
-
namespace duckdb {
|
23
|
-
|
24
|
-
class NumericStatistics : public BaseStatistics {
|
25
|
-
public:
|
26
|
-
DUCKDB_API explicit NumericStatistics(LogicalType type, StatisticsType stats_type);
|
27
|
-
DUCKDB_API NumericStatistics(LogicalType type, Value min, Value max, StatisticsType stats_type);
|
28
|
-
|
29
|
-
//! The minimum value of the segment
|
30
|
-
Value min;
|
31
|
-
//! The maximum value of the segment
|
32
|
-
Value max;
|
33
|
-
|
34
|
-
public:
|
35
|
-
DUCKDB_API void Merge(const BaseStatistics &other) override;
|
36
|
-
|
37
|
-
DUCKDB_API bool IsConstant() const override;
|
38
|
-
|
39
|
-
DUCKDB_API FilterPropagateResult CheckZonemap(ExpressionType comparison_type, const Value &constant) const;
|
40
|
-
|
41
|
-
unique_ptr<BaseStatistics> Copy() const override;
|
42
|
-
void Serialize(FieldWriter &writer) const override;
|
43
|
-
static unique_ptr<BaseStatistics> Deserialize(FieldReader &reader, LogicalType type);
|
44
|
-
void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const override;
|
45
|
-
|
46
|
-
string ToString() const override;
|
47
|
-
|
48
|
-
private:
|
49
|
-
template <class T>
|
50
|
-
void TemplatedVerify(Vector &vector, const SelectionVector &sel, idx_t count) const;
|
51
|
-
|
52
|
-
public:
|
53
|
-
template <class T>
|
54
|
-
static inline void UpdateValue(T new_value, T &min, T &max) {
|
55
|
-
if (LessThan::Operation(new_value, min)) {
|
56
|
-
min = new_value;
|
57
|
-
}
|
58
|
-
if (GreaterThan::Operation(new_value, max)) {
|
59
|
-
max = new_value;
|
60
|
-
}
|
61
|
-
}
|
62
|
-
|
63
|
-
template <class T>
|
64
|
-
static inline void Update(SegmentStatistics &stats, T new_value) {
|
65
|
-
auto &nstats = (NumericStatistics &)*stats.statistics;
|
66
|
-
UpdateValue<T>(new_value, nstats.min.GetReferenceUnsafe<T>(), nstats.max.GetReferenceUnsafe<T>());
|
67
|
-
}
|
68
|
-
};
|
69
|
-
|
70
|
-
template <>
|
71
|
-
void NumericStatistics::Update<interval_t>(SegmentStatistics &stats, interval_t new_value);
|
72
|
-
template <>
|
73
|
-
void NumericStatistics::Update<list_entry_t>(SegmentStatistics &stats, list_entry_t new_value);
|
74
|
-
|
75
|
-
} // namespace duckdb
|
@@ -1,49 +0,0 @@
|
|
1
|
-
//===----------------------------------------------------------------------===//
|
2
|
-
// DuckDB
|
3
|
-
//
|
4
|
-
// duckdb/storage/statistics/string_statistics.hpp
|
5
|
-
//
|
6
|
-
//
|
7
|
-
//===----------------------------------------------------------------------===//
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include "duckdb/storage/statistics/base_statistics.hpp"
|
12
|
-
#include "duckdb/common/enums/filter_propagate_result.hpp"
|
13
|
-
#include "duckdb/storage/statistics/validity_statistics.hpp"
|
14
|
-
|
15
|
-
namespace duckdb {
|
16
|
-
|
17
|
-
class StringStatistics : public BaseStatistics {
|
18
|
-
public:
|
19
|
-
constexpr static uint32_t MAX_STRING_MINMAX_SIZE = 8;
|
20
|
-
|
21
|
-
public:
|
22
|
-
DUCKDB_API explicit StringStatistics(LogicalType type, StatisticsType stats_type);
|
23
|
-
|
24
|
-
//! The minimum value of the segment, potentially truncated
|
25
|
-
data_t min[MAX_STRING_MINMAX_SIZE];
|
26
|
-
//! The maximum value of the segment, potentially truncated
|
27
|
-
data_t max[MAX_STRING_MINMAX_SIZE];
|
28
|
-
//! Whether or not the column can contain unicode characters
|
29
|
-
bool has_unicode;
|
30
|
-
//! The maximum string length in bytes
|
31
|
-
uint32_t max_string_length;
|
32
|
-
//! Whether or not the segment contains any big strings in overflow blocks
|
33
|
-
bool has_overflow_strings;
|
34
|
-
|
35
|
-
public:
|
36
|
-
DUCKDB_API void Update(const string_t &value);
|
37
|
-
DUCKDB_API void Merge(const BaseStatistics &other) override;
|
38
|
-
|
39
|
-
unique_ptr<BaseStatistics> Copy() const override;
|
40
|
-
void Serialize(FieldWriter &writer) const override;
|
41
|
-
static unique_ptr<BaseStatistics> Deserialize(FieldReader &reader, LogicalType type);
|
42
|
-
void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const override;
|
43
|
-
|
44
|
-
FilterPropagateResult CheckZonemap(ExpressionType comparison_type, const string &value) const;
|
45
|
-
|
46
|
-
string ToString() const override;
|
47
|
-
};
|
48
|
-
|
49
|
-
} // namespace duckdb
|
@@ -1,36 +0,0 @@
|
|
1
|
-
//===----------------------------------------------------------------------===//
|
2
|
-
// DuckDB
|
3
|
-
//
|
4
|
-
// duckdb/storage/statistics/struct_statistics.hpp
|
5
|
-
//
|
6
|
-
//
|
7
|
-
//===----------------------------------------------------------------------===//
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include "duckdb/storage/statistics/base_statistics.hpp"
|
12
|
-
#include "duckdb/common/enums/filter_propagate_result.hpp"
|
13
|
-
#include "duckdb/storage/statistics/validity_statistics.hpp"
|
14
|
-
|
15
|
-
namespace duckdb {
|
16
|
-
class Value;
|
17
|
-
|
18
|
-
class StructStatistics : public BaseStatistics {
|
19
|
-
public:
|
20
|
-
explicit StructStatistics(LogicalType type);
|
21
|
-
|
22
|
-
vector<unique_ptr<BaseStatistics>> child_stats;
|
23
|
-
|
24
|
-
public:
|
25
|
-
void Merge(const BaseStatistics &other) override;
|
26
|
-
FilterPropagateResult CheckZonemap(ExpressionType comparison_type, const Value &constant) const;
|
27
|
-
|
28
|
-
unique_ptr<BaseStatistics> Copy() const override;
|
29
|
-
void Serialize(FieldWriter &writer) const override;
|
30
|
-
static unique_ptr<BaseStatistics> Deserialize(FieldReader &reader, LogicalType type);
|
31
|
-
void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const override;
|
32
|
-
|
33
|
-
string ToString() const override;
|
34
|
-
};
|
35
|
-
|
36
|
-
} // namespace duckdb
|
@@ -1,45 +0,0 @@
|
|
1
|
-
//===----------------------------------------------------------------------===//
|
2
|
-
// DuckDB
|
3
|
-
//
|
4
|
-
// duckdb/storage/statistics/validity_statistics.hpp
|
5
|
-
//
|
6
|
-
//
|
7
|
-
//===----------------------------------------------------------------------===//
|
8
|
-
|
9
|
-
#pragma once
|
10
|
-
|
11
|
-
#include "duckdb/storage/statistics/base_statistics.hpp"
|
12
|
-
|
13
|
-
namespace duckdb {
|
14
|
-
class Serializer;
|
15
|
-
class Deserializer;
|
16
|
-
class Vector;
|
17
|
-
|
18
|
-
class ValidityStatistics : public BaseStatistics {
|
19
|
-
public:
|
20
|
-
DUCKDB_API explicit ValidityStatistics(bool has_null = false, bool has_no_null = true);
|
21
|
-
|
22
|
-
//! Whether or not the segment can contain NULL values
|
23
|
-
bool has_null;
|
24
|
-
//! Whether or not the segment can contain values that are not null
|
25
|
-
bool has_no_null;
|
26
|
-
|
27
|
-
public:
|
28
|
-
DUCKDB_API void Merge(const BaseStatistics &other) override;
|
29
|
-
|
30
|
-
DUCKDB_API bool IsConstant() const override;
|
31
|
-
|
32
|
-
unique_ptr<BaseStatistics> Copy() const override;
|
33
|
-
|
34
|
-
void Serialize(FieldWriter &writer) const override;
|
35
|
-
static unique_ptr<ValidityStatistics> Deserialize(FieldReader &reader);
|
36
|
-
|
37
|
-
void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const override;
|
38
|
-
|
39
|
-
static unique_ptr<BaseStatistics> Combine(const unique_ptr<BaseStatistics> &lstats,
|
40
|
-
const unique_ptr<BaseStatistics> &rstats);
|
41
|
-
|
42
|
-
string ToString() const override;
|
43
|
-
};
|
44
|
-
|
45
|
-
} // namespace duckdb
|
@@ -1,94 +0,0 @@
|
|
1
|
-
#include "duckdb/storage/statistics/list_statistics.hpp"
|
2
|
-
|
3
|
-
#include "duckdb/common/field_writer.hpp"
|
4
|
-
#include "duckdb/common/string_util.hpp"
|
5
|
-
#include "duckdb/common/types/vector.hpp"
|
6
|
-
|
7
|
-
namespace duckdb {
|
8
|
-
|
9
|
-
ListStatistics::ListStatistics(LogicalType type_p) : BaseStatistics(std::move(type_p), StatisticsType::LOCAL_STATS) {
|
10
|
-
D_ASSERT(type.InternalType() == PhysicalType::LIST);
|
11
|
-
InitializeBase();
|
12
|
-
auto &child_type = ListType::GetChildType(type);
|
13
|
-
child_stats = BaseStatistics::CreateEmpty(child_type, StatisticsType::LOCAL_STATS);
|
14
|
-
}
|
15
|
-
|
16
|
-
void ListStatistics::Merge(const BaseStatistics &other_p) {
|
17
|
-
BaseStatistics::Merge(other_p);
|
18
|
-
|
19
|
-
auto &other = (const ListStatistics &)other_p;
|
20
|
-
if (child_stats && other.child_stats) {
|
21
|
-
child_stats->Merge(*other.child_stats);
|
22
|
-
} else {
|
23
|
-
child_stats.reset();
|
24
|
-
}
|
25
|
-
}
|
26
|
-
|
27
|
-
// LCOV_EXCL_START
|
28
|
-
FilterPropagateResult ListStatistics::CheckZonemap(ExpressionType comparison_type, const Value &constant) const {
|
29
|
-
throw InternalException("List zonemaps are not supported yet");
|
30
|
-
}
|
31
|
-
// LCOV_EXCL_STOP
|
32
|
-
|
33
|
-
unique_ptr<BaseStatistics> ListStatistics::Copy() const {
|
34
|
-
auto result = make_unique<ListStatistics>(type);
|
35
|
-
result->CopyBase(*this);
|
36
|
-
|
37
|
-
result->child_stats = child_stats ? child_stats->Copy() : nullptr;
|
38
|
-
return std::move(result);
|
39
|
-
}
|
40
|
-
|
41
|
-
void ListStatistics::Serialize(FieldWriter &writer) const {
|
42
|
-
writer.WriteSerializable(*child_stats);
|
43
|
-
}
|
44
|
-
|
45
|
-
unique_ptr<BaseStatistics> ListStatistics::Deserialize(FieldReader &reader, LogicalType type) {
|
46
|
-
D_ASSERT(type.InternalType() == PhysicalType::LIST);
|
47
|
-
auto result = make_unique<ListStatistics>(std::move(type));
|
48
|
-
auto &child_type = ListType::GetChildType(result->type);
|
49
|
-
result->child_stats = reader.ReadRequiredSerializable<BaseStatistics>(child_type);
|
50
|
-
return std::move(result);
|
51
|
-
}
|
52
|
-
|
53
|
-
string ListStatistics::ToString() const {
|
54
|
-
return StringUtil::Format("[%s]%s", child_stats ? child_stats->ToString() : "No Stats", BaseStatistics::ToString());
|
55
|
-
}
|
56
|
-
|
57
|
-
void ListStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
|
58
|
-
BaseStatistics::Verify(vector, sel, count);
|
59
|
-
|
60
|
-
if (child_stats) {
|
61
|
-
auto &child_entry = ListVector::GetEntry(vector);
|
62
|
-
UnifiedVectorFormat vdata;
|
63
|
-
vector.ToUnifiedFormat(count, vdata);
|
64
|
-
|
65
|
-
auto list_data = (list_entry_t *)vdata.data;
|
66
|
-
idx_t total_list_count = 0;
|
67
|
-
for (idx_t i = 0; i < count; i++) {
|
68
|
-
auto idx = sel.get_index(i);
|
69
|
-
auto index = vdata.sel->get_index(idx);
|
70
|
-
auto list = list_data[index];
|
71
|
-
if (vdata.validity.RowIsValid(index)) {
|
72
|
-
for (idx_t list_idx = 0; list_idx < list.length; list_idx++) {
|
73
|
-
total_list_count++;
|
74
|
-
}
|
75
|
-
}
|
76
|
-
}
|
77
|
-
SelectionVector list_sel(total_list_count);
|
78
|
-
idx_t list_count = 0;
|
79
|
-
for (idx_t i = 0; i < count; i++) {
|
80
|
-
auto idx = sel.get_index(i);
|
81
|
-
auto index = vdata.sel->get_index(idx);
|
82
|
-
auto list = list_data[index];
|
83
|
-
if (vdata.validity.RowIsValid(index)) {
|
84
|
-
for (idx_t list_idx = 0; list_idx < list.length; list_idx++) {
|
85
|
-
list_sel.set_index(list_count++, list.offset + list_idx);
|
86
|
-
}
|
87
|
-
}
|
88
|
-
}
|
89
|
-
|
90
|
-
child_stats->Verify(child_entry, list_sel, list_count);
|
91
|
-
}
|
92
|
-
}
|
93
|
-
|
94
|
-
} // namespace duckdb
|
@@ -1,307 +0,0 @@
|
|
1
|
-
#include "duckdb/storage/statistics/numeric_statistics.hpp"
|
2
|
-
|
3
|
-
#include "duckdb/common/field_writer.hpp"
|
4
|
-
#include "duckdb/common/operator/comparison_operators.hpp"
|
5
|
-
#include "duckdb/common/types/vector.hpp"
|
6
|
-
|
7
|
-
namespace duckdb {
|
8
|
-
|
9
|
-
template <>
|
10
|
-
void NumericStatistics::Update<interval_t>(SegmentStatistics &stats, interval_t new_value) {
|
11
|
-
}
|
12
|
-
|
13
|
-
template <>
|
14
|
-
void NumericStatistics::Update<list_entry_t>(SegmentStatistics &stats, list_entry_t new_value) {
|
15
|
-
}
|
16
|
-
|
17
|
-
NumericStatistics::NumericStatistics(LogicalType type_p, StatisticsType stats_type)
|
18
|
-
: BaseStatistics(std::move(type_p), stats_type) {
|
19
|
-
InitializeBase();
|
20
|
-
min = Value::MaximumValue(type);
|
21
|
-
max = Value::MinimumValue(type);
|
22
|
-
}
|
23
|
-
|
24
|
-
NumericStatistics::NumericStatistics(LogicalType type_p, Value min_p, Value max_p, StatisticsType stats_type)
|
25
|
-
: BaseStatistics(std::move(type_p), stats_type), min(std::move(min_p)), max(std::move(max_p)) {
|
26
|
-
InitializeBase();
|
27
|
-
}
|
28
|
-
|
29
|
-
void NumericStatistics::Merge(const BaseStatistics &other_p) {
|
30
|
-
BaseStatistics::Merge(other_p);
|
31
|
-
auto &other = (const NumericStatistics &)other_p;
|
32
|
-
if (other.min.IsNull() || min.IsNull()) {
|
33
|
-
min = Value(type);
|
34
|
-
} else if (other.min < min) {
|
35
|
-
min = other.min;
|
36
|
-
}
|
37
|
-
if (other.max.IsNull() || max.IsNull()) {
|
38
|
-
max = Value(type);
|
39
|
-
} else if (other.max > max) {
|
40
|
-
max = other.max;
|
41
|
-
}
|
42
|
-
}
|
43
|
-
|
44
|
-
FilterPropagateResult NumericStatistics::CheckZonemap(ExpressionType comparison_type, const Value &constant) const {
|
45
|
-
if (constant.IsNull()) {
|
46
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
47
|
-
}
|
48
|
-
if (min.IsNull() || max.IsNull()) {
|
49
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
50
|
-
}
|
51
|
-
switch (comparison_type) {
|
52
|
-
case ExpressionType::COMPARE_EQUAL:
|
53
|
-
if (constant == min && constant == max) {
|
54
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
55
|
-
} else if (constant >= min && constant <= max) {
|
56
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
57
|
-
} else {
|
58
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
59
|
-
}
|
60
|
-
case ExpressionType::COMPARE_NOTEQUAL:
|
61
|
-
if (constant < min || constant > max) {
|
62
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
63
|
-
} else if (min == max && min == constant) {
|
64
|
-
// corner case of a cluster with one numeric equal to the target constant
|
65
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
66
|
-
}
|
67
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
68
|
-
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
69
|
-
// X >= C
|
70
|
-
// this can be true only if max(X) >= C
|
71
|
-
// if min(X) >= C, then this is always true
|
72
|
-
if (min >= constant) {
|
73
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
74
|
-
} else if (max >= constant) {
|
75
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
76
|
-
} else {
|
77
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
78
|
-
}
|
79
|
-
case ExpressionType::COMPARE_GREATERTHAN:
|
80
|
-
// X > C
|
81
|
-
// this can be true only if max(X) > C
|
82
|
-
// if min(X) > C, then this is always true
|
83
|
-
if (min > constant) {
|
84
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
85
|
-
} else if (max > constant) {
|
86
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
87
|
-
} else {
|
88
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
89
|
-
}
|
90
|
-
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
91
|
-
// X <= C
|
92
|
-
// this can be true only if min(X) <= C
|
93
|
-
// if max(X) <= C, then this is always true
|
94
|
-
if (max <= constant) {
|
95
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
96
|
-
} else if (min <= constant) {
|
97
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
98
|
-
} else {
|
99
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
100
|
-
}
|
101
|
-
case ExpressionType::COMPARE_LESSTHAN:
|
102
|
-
// X < C
|
103
|
-
// this can be true only if min(X) < C
|
104
|
-
// if max(X) < C, then this is always true
|
105
|
-
if (max < constant) {
|
106
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
107
|
-
} else if (min < constant) {
|
108
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
109
|
-
} else {
|
110
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
111
|
-
}
|
112
|
-
default:
|
113
|
-
throw InternalException("Expression type in zonemap check not implemented");
|
114
|
-
}
|
115
|
-
}
|
116
|
-
|
117
|
-
unique_ptr<BaseStatistics> NumericStatistics::Copy() const {
|
118
|
-
auto result = make_unique<NumericStatistics>(type, min, max, stats_type);
|
119
|
-
result->CopyBase(*this);
|
120
|
-
return std::move(result);
|
121
|
-
}
|
122
|
-
|
123
|
-
bool NumericStatistics::IsConstant() const {
|
124
|
-
return max <= min;
|
125
|
-
}
|
126
|
-
|
127
|
-
void SerializeNumericStatsValue(const Value &val, FieldWriter &writer) {
|
128
|
-
writer.WriteField<bool>(val.IsNull());
|
129
|
-
if (val.IsNull()) {
|
130
|
-
return;
|
131
|
-
}
|
132
|
-
switch (val.type().InternalType()) {
|
133
|
-
case PhysicalType::BOOL:
|
134
|
-
writer.WriteField<bool>(BooleanValue::Get(val));
|
135
|
-
break;
|
136
|
-
case PhysicalType::INT8:
|
137
|
-
writer.WriteField<int8_t>(TinyIntValue::Get(val));
|
138
|
-
break;
|
139
|
-
case PhysicalType::INT16:
|
140
|
-
writer.WriteField<int16_t>(SmallIntValue::Get(val));
|
141
|
-
break;
|
142
|
-
case PhysicalType::INT32:
|
143
|
-
writer.WriteField<int32_t>(IntegerValue::Get(val));
|
144
|
-
break;
|
145
|
-
case PhysicalType::INT64:
|
146
|
-
writer.WriteField<int64_t>(BigIntValue::Get(val));
|
147
|
-
break;
|
148
|
-
case PhysicalType::UINT8:
|
149
|
-
writer.WriteField<int8_t>(UTinyIntValue::Get(val));
|
150
|
-
break;
|
151
|
-
case PhysicalType::UINT16:
|
152
|
-
writer.WriteField<int16_t>(USmallIntValue::Get(val));
|
153
|
-
break;
|
154
|
-
case PhysicalType::UINT32:
|
155
|
-
writer.WriteField<int32_t>(UIntegerValue::Get(val));
|
156
|
-
break;
|
157
|
-
case PhysicalType::UINT64:
|
158
|
-
writer.WriteField<int64_t>(UBigIntValue::Get(val));
|
159
|
-
break;
|
160
|
-
case PhysicalType::INT128:
|
161
|
-
writer.WriteField<hugeint_t>(HugeIntValue::Get(val));
|
162
|
-
break;
|
163
|
-
case PhysicalType::FLOAT:
|
164
|
-
writer.WriteField<float>(FloatValue::Get(val));
|
165
|
-
break;
|
166
|
-
case PhysicalType::DOUBLE:
|
167
|
-
writer.WriteField<double>(DoubleValue::Get(val));
|
168
|
-
break;
|
169
|
-
default:
|
170
|
-
throw InternalException("Unsupported type for serializing numeric statistics");
|
171
|
-
}
|
172
|
-
}
|
173
|
-
|
174
|
-
void NumericStatistics::Serialize(FieldWriter &writer) const {
|
175
|
-
SerializeNumericStatsValue(min, writer);
|
176
|
-
SerializeNumericStatsValue(max, writer);
|
177
|
-
}
|
178
|
-
|
179
|
-
Value DeserializeNumericStatsValue(const LogicalType &type, FieldReader &reader) {
|
180
|
-
auto is_null = reader.ReadRequired<bool>();
|
181
|
-
if (is_null) {
|
182
|
-
return Value(type);
|
183
|
-
}
|
184
|
-
Value result;
|
185
|
-
switch (type.InternalType()) {
|
186
|
-
case PhysicalType::BOOL:
|
187
|
-
result = Value::BOOLEAN(reader.ReadRequired<bool>());
|
188
|
-
break;
|
189
|
-
case PhysicalType::INT8:
|
190
|
-
result = Value::TINYINT(reader.ReadRequired<int8_t>());
|
191
|
-
break;
|
192
|
-
case PhysicalType::INT16:
|
193
|
-
result = Value::SMALLINT(reader.ReadRequired<int16_t>());
|
194
|
-
break;
|
195
|
-
case PhysicalType::INT32:
|
196
|
-
result = Value::INTEGER(reader.ReadRequired<int32_t>());
|
197
|
-
break;
|
198
|
-
case PhysicalType::INT64:
|
199
|
-
result = Value::BIGINT(reader.ReadRequired<int64_t>());
|
200
|
-
break;
|
201
|
-
case PhysicalType::UINT8:
|
202
|
-
result = Value::UTINYINT(reader.ReadRequired<uint8_t>());
|
203
|
-
break;
|
204
|
-
case PhysicalType::UINT16:
|
205
|
-
result = Value::USMALLINT(reader.ReadRequired<uint16_t>());
|
206
|
-
break;
|
207
|
-
case PhysicalType::UINT32:
|
208
|
-
result = Value::UINTEGER(reader.ReadRequired<uint32_t>());
|
209
|
-
break;
|
210
|
-
case PhysicalType::UINT64:
|
211
|
-
result = Value::UBIGINT(reader.ReadRequired<uint64_t>());
|
212
|
-
break;
|
213
|
-
case PhysicalType::INT128:
|
214
|
-
result = Value::HUGEINT(reader.ReadRequired<hugeint_t>());
|
215
|
-
break;
|
216
|
-
case PhysicalType::FLOAT:
|
217
|
-
result = Value::FLOAT(reader.ReadRequired<float>());
|
218
|
-
break;
|
219
|
-
case PhysicalType::DOUBLE:
|
220
|
-
result = Value::DOUBLE(reader.ReadRequired<double>());
|
221
|
-
break;
|
222
|
-
default:
|
223
|
-
throw InternalException("Unsupported type for deserializing numeric statistics");
|
224
|
-
}
|
225
|
-
result.Reinterpret(type);
|
226
|
-
return result;
|
227
|
-
}
|
228
|
-
|
229
|
-
unique_ptr<BaseStatistics> NumericStatistics::Deserialize(FieldReader &reader, LogicalType type) {
|
230
|
-
auto min = DeserializeNumericStatsValue(type, reader);
|
231
|
-
auto max = DeserializeNumericStatsValue(type, reader);
|
232
|
-
return make_unique_base<BaseStatistics, NumericStatistics>(std::move(type), std::move(min), std::move(max),
|
233
|
-
StatisticsType::LOCAL_STATS);
|
234
|
-
}
|
235
|
-
|
236
|
-
string NumericStatistics::ToString() const {
|
237
|
-
return StringUtil::Format("[Min: %s, Max: %s]%s", min.ToString(), max.ToString(), BaseStatistics::ToString());
|
238
|
-
}
|
239
|
-
|
240
|
-
template <class T>
|
241
|
-
void NumericStatistics::TemplatedVerify(Vector &vector, const SelectionVector &sel, idx_t count) const {
|
242
|
-
UnifiedVectorFormat vdata;
|
243
|
-
vector.ToUnifiedFormat(count, vdata);
|
244
|
-
|
245
|
-
auto data = (T *)vdata.data;
|
246
|
-
for (idx_t i = 0; i < count; i++) {
|
247
|
-
auto idx = sel.get_index(i);
|
248
|
-
auto index = vdata.sel->get_index(idx);
|
249
|
-
if (!vdata.validity.RowIsValid(index)) {
|
250
|
-
continue;
|
251
|
-
}
|
252
|
-
if (!min.IsNull() && LessThan::Operation(data[index], min.GetValueUnsafe<T>())) { // LCOV_EXCL_START
|
253
|
-
throw InternalException("Statistics mismatch: value is smaller than min.\nStatistics: %s\nVector: %s",
|
254
|
-
ToString(), vector.ToString(count));
|
255
|
-
} // LCOV_EXCL_STOP
|
256
|
-
if (!max.IsNull() && GreaterThan::Operation(data[index], max.GetValueUnsafe<T>())) {
|
257
|
-
throw InternalException("Statistics mismatch: value is bigger than max.\nStatistics: %s\nVector: %s",
|
258
|
-
ToString(), vector.ToString(count));
|
259
|
-
}
|
260
|
-
}
|
261
|
-
}
|
262
|
-
|
263
|
-
void NumericStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
|
264
|
-
BaseStatistics::Verify(vector, sel, count);
|
265
|
-
|
266
|
-
switch (type.InternalType()) {
|
267
|
-
case PhysicalType::BOOL:
|
268
|
-
break;
|
269
|
-
case PhysicalType::INT8:
|
270
|
-
TemplatedVerify<int8_t>(vector, sel, count);
|
271
|
-
break;
|
272
|
-
case PhysicalType::INT16:
|
273
|
-
TemplatedVerify<int16_t>(vector, sel, count);
|
274
|
-
break;
|
275
|
-
case PhysicalType::INT32:
|
276
|
-
TemplatedVerify<int32_t>(vector, sel, count);
|
277
|
-
break;
|
278
|
-
case PhysicalType::INT64:
|
279
|
-
TemplatedVerify<int64_t>(vector, sel, count);
|
280
|
-
break;
|
281
|
-
case PhysicalType::UINT8:
|
282
|
-
TemplatedVerify<uint8_t>(vector, sel, count);
|
283
|
-
break;
|
284
|
-
case PhysicalType::UINT16:
|
285
|
-
TemplatedVerify<uint16_t>(vector, sel, count);
|
286
|
-
break;
|
287
|
-
case PhysicalType::UINT32:
|
288
|
-
TemplatedVerify<uint32_t>(vector, sel, count);
|
289
|
-
break;
|
290
|
-
case PhysicalType::UINT64:
|
291
|
-
TemplatedVerify<uint64_t>(vector, sel, count);
|
292
|
-
break;
|
293
|
-
case PhysicalType::INT128:
|
294
|
-
TemplatedVerify<hugeint_t>(vector, sel, count);
|
295
|
-
break;
|
296
|
-
case PhysicalType::FLOAT:
|
297
|
-
TemplatedVerify<float>(vector, sel, count);
|
298
|
-
break;
|
299
|
-
case PhysicalType::DOUBLE:
|
300
|
-
TemplatedVerify<double>(vector, sel, count);
|
301
|
-
break;
|
302
|
-
default:
|
303
|
-
throw InternalException("Unsupported type %s for numeric statistics verify", type.ToString());
|
304
|
-
}
|
305
|
-
}
|
306
|
-
|
307
|
-
} // namespace duckdb
|