duckdb 0.7.2-dev225.0 → 0.7.2-dev314.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +6 -13
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -13
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
- package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +27 -34
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -31
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/main/config.cpp +66 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
- package/src/duckdb/src/planner/bind_context.cpp +16 -0
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
- package/src/duckdb/src/planner/binder.cpp +2 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
- package/src/duckdb/src/storage/compression/bitpacking.cpp +3 -3
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +3 -3
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +2 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +4 -6
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
- package/src/duckdb/src/storage/table/column_data.cpp +16 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
- package/src/duckdb/src/storage/table/list_column_data.cpp +7 -11
- package/src/duckdb/src/storage/table/row_group.cpp +24 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +923 -919
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +2 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +15684 -15571
- package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -3,7 +3,7 @@
|
|
3
3
|
#include "duckdb/parser/expression/bound_expression.hpp"
|
4
4
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
5
5
|
#include "duckdb/common/case_insensitive_map.hpp"
|
6
|
-
#include "duckdb/storage/statistics/
|
6
|
+
#include "duckdb/storage/statistics/struct_stats.hpp"
|
7
7
|
#include "duckdb/planner/expression_binder.hpp"
|
8
8
|
|
9
9
|
namespace duckdb {
|
@@ -81,22 +81,19 @@ static unique_ptr<FunctionData> StructInsertBind(ClientContext &context, ScalarF
|
|
81
81
|
unique_ptr<BaseStatistics> StructInsertStats(ClientContext &context, FunctionStatisticsInput &input) {
|
82
82
|
auto &child_stats = input.child_stats;
|
83
83
|
auto &expr = input.expr;
|
84
|
-
|
85
|
-
return nullptr;
|
86
|
-
}
|
87
|
-
auto &existing_struct_stats = (StructStatistics &)*child_stats[0];
|
88
|
-
auto new_struct_stats = make_unique<StructStatistics>(expr.return_type);
|
84
|
+
auto new_struct_stats = StructStats::CreateUnknown(expr.return_type);
|
89
85
|
|
90
|
-
|
91
|
-
|
92
|
-
|
86
|
+
auto existing_count = StructType::GetChildCount(child_stats[0].GetType());
|
87
|
+
auto existing_stats = StructStats::GetChildStats(child_stats[0]);
|
88
|
+
for (idx_t i = 0; i < existing_count; i++) {
|
89
|
+
StructStats::SetChildStats(new_struct_stats, i, existing_stats[i]);
|
93
90
|
}
|
94
|
-
|
95
|
-
auto offset =
|
91
|
+
auto new_count = StructType::GetChildCount(expr.return_type);
|
92
|
+
auto offset = new_count - child_stats.size();
|
96
93
|
for (idx_t i = 1; i < child_stats.size(); i++) {
|
97
|
-
new_struct_stats
|
94
|
+
StructStats::SetChildStats(new_struct_stats, offset + i, child_stats[i]);
|
98
95
|
}
|
99
|
-
return
|
96
|
+
return new_struct_stats.ToUnique();
|
100
97
|
}
|
101
98
|
|
102
99
|
void StructInsertFun::RegisterFunction(BuiltinFunctions &set) {
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#include "duckdb/parser/expression/bound_expression.hpp"
|
4
4
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
5
5
|
#include "duckdb/common/case_insensitive_map.hpp"
|
6
|
-
#include "duckdb/storage/statistics/
|
6
|
+
#include "duckdb/storage/statistics/struct_stats.hpp"
|
7
7
|
#include "duckdb/planner/expression_binder.hpp"
|
8
8
|
|
9
9
|
namespace duckdb {
|
@@ -61,12 +61,11 @@ static unique_ptr<FunctionData> StructPackBind(ClientContext &context, ScalarFun
|
|
61
61
|
unique_ptr<BaseStatistics> StructPackStats(ClientContext &context, FunctionStatisticsInput &input) {
|
62
62
|
auto &child_stats = input.child_stats;
|
63
63
|
auto &expr = input.expr;
|
64
|
-
auto struct_stats =
|
65
|
-
|
66
|
-
|
67
|
-
struct_stats->child_stats[i] = child_stats[i] ? child_stats[i]->Copy() : nullptr;
|
64
|
+
auto struct_stats = StructStats::CreateUnknown(expr.return_type);
|
65
|
+
for (idx_t i = 0; i < child_stats.size(); i++) {
|
66
|
+
StructStats::SetChildStats(struct_stats, i, child_stats[i]);
|
68
67
|
}
|
69
|
-
return
|
68
|
+
return struct_stats.ToUnique();
|
70
69
|
}
|
71
70
|
|
72
71
|
void StructPackFun::RegisterFunction(BuiltinFunctions &set) {
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.7.2-
|
2
|
+
#define DUCKDB_VERSION "0.7.2-dev314"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "7e05c8c034"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -20,7 +20,7 @@ class PhysicalPerfectHashAggregate : public PhysicalOperator {
|
|
20
20
|
public:
|
21
21
|
PhysicalPerfectHashAggregate(ClientContext &context, vector<LogicalType> types,
|
22
22
|
vector<unique_ptr<Expression>> aggregates, vector<unique_ptr<Expression>> groups,
|
23
|
-
vector<unique_ptr<BaseStatistics>> group_stats, vector<idx_t> required_bits,
|
23
|
+
const vector<unique_ptr<BaseStatistics>> &group_stats, vector<idx_t> required_bits,
|
24
24
|
idx_t estimated_cardinality);
|
25
25
|
|
26
26
|
//! The groups
|
@@ -29,6 +29,17 @@ struct AggregateInputData {
|
|
29
29
|
Allocator &allocator;
|
30
30
|
};
|
31
31
|
|
32
|
+
struct AggregateStatisticsInput {
|
33
|
+
AggregateStatisticsInput(FunctionData *bind_data_p, vector<BaseStatistics> &child_stats_p,
|
34
|
+
NodeStatistics *node_stats_p)
|
35
|
+
: bind_data(bind_data_p), child_stats(child_stats_p), node_stats(node_stats_p) {
|
36
|
+
}
|
37
|
+
|
38
|
+
FunctionData *bind_data;
|
39
|
+
vector<BaseStatistics> &child_stats;
|
40
|
+
NodeStatistics *node_stats;
|
41
|
+
};
|
42
|
+
|
32
43
|
//! The type used for sizing hashed aggregate function states
|
33
44
|
typedef idx_t (*aggregate_size_t)();
|
34
45
|
//! The type used for initializing hashed aggregate function states
|
@@ -43,9 +54,7 @@ typedef void (*aggregate_finalize_t)(Vector &state, AggregateInputData &aggr_inp
|
|
43
54
|
idx_t offset);
|
44
55
|
//! The type used for propagating statistics in aggregate functions (optional)
|
45
56
|
typedef unique_ptr<BaseStatistics> (*aggregate_statistics_t)(ClientContext &context, BoundAggregateExpression &expr,
|
46
|
-
|
47
|
-
vector<unique_ptr<BaseStatistics>> &child_stats,
|
48
|
-
NodeStatistics *node_stats);
|
57
|
+
AggregateStatisticsInput &input);
|
49
58
|
//! Binds the scalar function and creates the function data
|
50
59
|
typedef unique_ptr<FunctionData> (*bind_aggregate_function_t)(ClientContext &context, AggregateFunction &function,
|
51
60
|
vector<unique_ptr<Expression>> &arguments);
|
@@ -29,13 +29,13 @@ class ScalarFunctionCatalogEntry;
|
|
29
29
|
|
30
30
|
struct FunctionStatisticsInput {
|
31
31
|
FunctionStatisticsInput(BoundFunctionExpression &expr_p, FunctionData *bind_data_p,
|
32
|
-
vector<
|
32
|
+
vector<BaseStatistics> &child_stats_p, unique_ptr<Expression> *expr_ptr_p)
|
33
33
|
: expr(expr_p), bind_data(bind_data_p), child_stats(child_stats_p), expr_ptr(expr_ptr_p) {
|
34
34
|
}
|
35
35
|
|
36
36
|
BoundFunctionExpression &expr;
|
37
37
|
FunctionData *bind_data;
|
38
|
-
vector<
|
38
|
+
vector<BaseStatistics> &child_stats;
|
39
39
|
unique_ptr<Expression> *expr_ptr;
|
40
40
|
};
|
41
41
|
|
@@ -141,6 +141,8 @@ public:
|
|
141
141
|
|
142
142
|
//! Add all the bindings from a BindContext to this BindContext. The other BindContext is destroyed in the process.
|
143
143
|
void AddContext(BindContext other);
|
144
|
+
//! For semi and anti joins we remove the binding context of the right table after binding the condition.
|
145
|
+
void RemoveContext(vector<std::pair<string, duckdb::Binding *>> &other_bindings_list);
|
144
146
|
|
145
147
|
//! Gets a binding of the specified name. Returns a nullptr and sets the out_error if the binding could not be
|
146
148
|
//! found.
|
@@ -12,6 +12,7 @@
|
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
14
|
class DuckTableEntry;
|
15
|
+
class TableStatistics;
|
15
16
|
|
16
17
|
//! The table data writer is responsible for writing the data of a table to
|
17
18
|
//! storage.
|
@@ -30,7 +31,7 @@ public:
|
|
30
31
|
|
31
32
|
CompressionType GetColumnCompressionType(idx_t i);
|
32
33
|
|
33
|
-
virtual void FinalizeTable(
|
34
|
+
virtual void FinalizeTable(TableStatistics &&global_stats, DataTableInfo *info) = 0;
|
34
35
|
virtual unique_ptr<RowGroupWriter> GetRowGroupWriter(RowGroup &row_group) = 0;
|
35
36
|
|
36
37
|
virtual void AddRowGroup(RowGroupPointer &&row_group_pointer, unique_ptr<RowGroupWriter> &&writer);
|
@@ -47,7 +48,7 @@ public:
|
|
47
48
|
MetaBlockWriter &table_data_writer, MetaBlockWriter &meta_data_writer);
|
48
49
|
|
49
50
|
public:
|
50
|
-
virtual void FinalizeTable(
|
51
|
+
virtual void FinalizeTable(TableStatistics &&global_stats, DataTableInfo *info) override;
|
51
52
|
virtual unique_ptr<RowGroupWriter> GetRowGroupWriter(RowGroup &row_group) override;
|
52
53
|
|
53
54
|
private:
|
@@ -18,7 +18,7 @@
|
|
18
18
|
#include "duckdb/function/compression/compression.hpp"
|
19
19
|
#include "duckdb/main/config.hpp"
|
20
20
|
#include "duckdb/storage/buffer_manager.hpp"
|
21
|
-
|
21
|
+
|
22
22
|
#include "duckdb/storage/table/column_data_checkpointer.hpp"
|
23
23
|
#include "duckdb/storage/table/column_segment.hpp"
|
24
24
|
#include "duckdb/common/operator/subtract.hpp"
|
@@ -150,7 +150,7 @@ public:
|
|
150
150
|
|
151
151
|
if (is_valid) {
|
152
152
|
T floating_point_value = Load<T>((const_data_ptr_t)&value);
|
153
|
-
|
153
|
+
NumericStats::Update<T>(current_segment->stats.statistics, floating_point_value);
|
154
154
|
} else {
|
155
155
|
//! FIXME: find a cheaper alternative to storing a NULL
|
156
156
|
// store this as "value_identical", only using 9 bits for a NULL
|
@@ -17,7 +17,7 @@
|
|
17
17
|
#include "duckdb/function/compression_function.hpp"
|
18
18
|
#include "duckdb/main/config.hpp"
|
19
19
|
#include "duckdb/storage/buffer_manager.hpp"
|
20
|
-
|
20
|
+
|
21
21
|
#include "duckdb/storage/table/column_data_checkpointer.hpp"
|
22
22
|
#include "duckdb/storage/table/column_segment.hpp"
|
23
23
|
#include "duckdb/common/operator/subtract.hpp"
|
@@ -17,7 +17,7 @@
|
|
17
17
|
#include "duckdb/function/compression_function.hpp"
|
18
18
|
#include "duckdb/main/config.hpp"
|
19
19
|
#include "duckdb/storage/buffer_manager.hpp"
|
20
|
-
|
20
|
+
|
21
21
|
#include "duckdb/storage/table/column_data_checkpointer.hpp"
|
22
22
|
#include "duckdb/storage/table/column_segment.hpp"
|
23
23
|
#include "duckdb/common/operator/subtract.hpp"
|
@@ -18,7 +18,7 @@
|
|
18
18
|
#include "duckdb/function/compression/compression.hpp"
|
19
19
|
#include "duckdb/main/config.hpp"
|
20
20
|
#include "duckdb/storage/buffer_manager.hpp"
|
21
|
-
|
21
|
+
|
22
22
|
#include "duckdb/storage/table/column_data_checkpointer.hpp"
|
23
23
|
#include "duckdb/storage/table/column_segment.hpp"
|
24
24
|
#include "duckdb/common/operator/subtract.hpp"
|
@@ -49,7 +49,7 @@ public:
|
|
49
49
|
}
|
50
50
|
|
51
51
|
if (is_valid) {
|
52
|
-
|
52
|
+
NumericStats::Update<VALUE_TYPE>(state_wrapper->current_segment->stats.statistics, value);
|
53
53
|
}
|
54
54
|
|
55
55
|
state_wrapper->WriteValue(Load<EXACT_TYPE>((const_data_ptr_t)&value));
|
@@ -17,7 +17,7 @@
|
|
17
17
|
#include "duckdb/function/compression_function.hpp"
|
18
18
|
#include "duckdb/main/config.hpp"
|
19
19
|
#include "duckdb/storage/buffer_manager.hpp"
|
20
|
-
|
20
|
+
|
21
21
|
#include "duckdb/storage/table/column_data_checkpointer.hpp"
|
22
22
|
#include "duckdb/storage/table/column_segment.hpp"
|
23
23
|
#include "duckdb/common/operator/subtract.hpp"
|
@@ -17,7 +17,7 @@
|
|
17
17
|
#include "duckdb/function/compression_function.hpp"
|
18
18
|
#include "duckdb/main/config.hpp"
|
19
19
|
#include "duckdb/storage/buffer_manager.hpp"
|
20
|
-
|
20
|
+
|
21
21
|
#include "duckdb/storage/table/column_data_checkpointer.hpp"
|
22
22
|
#include "duckdb/storage/table/column_segment.hpp"
|
23
23
|
#include "duckdb/common/operator/subtract.hpp"
|
@@ -18,12 +18,15 @@
|
|
18
18
|
namespace duckdb {
|
19
19
|
|
20
20
|
struct DataPointer {
|
21
|
+
DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
|
22
|
+
}
|
23
|
+
|
21
24
|
uint64_t row_start;
|
22
25
|
uint64_t tuple_count;
|
23
26
|
BlockPointer block_pointer;
|
24
27
|
CompressionType compression_type;
|
25
28
|
//! Type-specific statistics of the segment
|
26
|
-
|
29
|
+
BaseStatistics statistics;
|
27
30
|
};
|
28
31
|
|
29
32
|
struct RowGroupPointer {
|
@@ -32,7 +35,7 @@ struct RowGroupPointer {
|
|
32
35
|
//! The data pointers of the column segments stored in the row group
|
33
36
|
vector<BlockPointer> data_pointers;
|
34
37
|
//! The per-column statistics of the row group
|
35
|
-
vector<
|
38
|
+
vector<BaseStatistics> statistics;
|
36
39
|
//! The versions information of the row group (if any)
|
37
40
|
shared_ptr<VersionNode> versions;
|
38
41
|
};
|
@@ -169,7 +169,7 @@ public:
|
|
169
169
|
//! Get statistics of a physical column within the table
|
170
170
|
unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id);
|
171
171
|
//! Sets statistics of a physical column within the table
|
172
|
-
void
|
172
|
+
void SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats);
|
173
173
|
|
174
174
|
//! Checkpoint the table to the specified table data writer
|
175
175
|
void Checkpoint(TableDataWriter &writer);
|
@@ -13,6 +13,8 @@
|
|
13
13
|
#include "duckdb/common/operator/comparison_operators.hpp"
|
14
14
|
#include "duckdb/common/enums/expression_type.hpp"
|
15
15
|
#include "duckdb/common/types/value.hpp"
|
16
|
+
#include "duckdb/storage/statistics/numeric_stats.hpp"
|
17
|
+
#include "duckdb/storage/statistics/string_stats.hpp"
|
16
18
|
|
17
19
|
namespace duckdb {
|
18
20
|
struct SelectionVector;
|
@@ -22,61 +24,121 @@ class Deserializer;
|
|
22
24
|
class FieldWriter;
|
23
25
|
class FieldReader;
|
24
26
|
class Vector;
|
25
|
-
class ValidityStatistics;
|
26
|
-
class DistinctStatistics;
|
27
27
|
struct UnifiedVectorFormat;
|
28
28
|
|
29
|
-
enum
|
29
|
+
enum class StatsInfo : uint8_t {
|
30
|
+
CAN_HAVE_NULL_VALUES = 0,
|
31
|
+
CANNOT_HAVE_NULL_VALUES = 1,
|
32
|
+
CAN_HAVE_VALID_VALUES = 2,
|
33
|
+
CANNOT_HAVE_VALID_VALUES = 3,
|
34
|
+
CAN_HAVE_NULL_AND_VALID_VALUES = 4
|
35
|
+
};
|
30
36
|
|
31
|
-
class
|
32
|
-
public:
|
33
|
-
BaseStatistics(LogicalType type, StatisticsType stats_type);
|
34
|
-
virtual ~BaseStatistics();
|
37
|
+
enum class StatisticsType : uint8_t { NUMERIC_STATS, STRING_STATS, LIST_STATS, STRUCT_STATS, BASE_STATS };
|
35
38
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
unique_ptr<BaseStatistics> distinct_stats;
|
42
|
-
idx_t distinct_count; // estimate that one may have even if distinct_stats==nullptr
|
39
|
+
class BaseStatistics {
|
40
|
+
friend struct NumericStats;
|
41
|
+
friend struct StringStats;
|
42
|
+
friend struct StructStats;
|
43
|
+
friend struct ListStats;
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
public:
|
46
|
+
DUCKDB_API ~BaseStatistics();
|
47
|
+
// disable copy constructors
|
48
|
+
BaseStatistics(const BaseStatistics &other) = delete;
|
49
|
+
BaseStatistics &operator=(const BaseStatistics &) = delete;
|
50
|
+
//! enable move constructors
|
51
|
+
DUCKDB_API BaseStatistics(BaseStatistics &&other) noexcept;
|
52
|
+
DUCKDB_API BaseStatistics &operator=(BaseStatistics &&) noexcept;
|
47
53
|
|
48
54
|
public:
|
49
|
-
|
55
|
+
//! Creates a set of statistics for data that is unknown, i.e. "has_null" is true, "has_no_null" is true, etc
|
56
|
+
//! This can be used in case nothing is known about the data - or can be used as a baseline when only a few things
|
57
|
+
//! are known
|
58
|
+
static BaseStatistics CreateUnknown(LogicalType type);
|
59
|
+
//! Creates statistics for an empty database, i.e. "has_null" is false, "has_no_null" is false, etc
|
60
|
+
//! This is used when incrementally constructing statistics by constantly adding new values
|
61
|
+
static BaseStatistics CreateEmpty(LogicalType type);
|
62
|
+
|
63
|
+
DUCKDB_API StatisticsType GetStatsType() const;
|
64
|
+
DUCKDB_API static StatisticsType GetStatsType(const LogicalType &type);
|
50
65
|
|
51
66
|
DUCKDB_API bool CanHaveNull() const;
|
52
67
|
DUCKDB_API bool CanHaveNoNull() const;
|
53
68
|
|
54
|
-
void
|
69
|
+
void SetDistinctCount(idx_t distinct_count);
|
70
|
+
|
71
|
+
bool IsConstant() const;
|
72
|
+
|
73
|
+
const LogicalType &GetType() const {
|
74
|
+
return type;
|
75
|
+
}
|
55
76
|
|
56
|
-
|
57
|
-
|
77
|
+
void Set(StatsInfo info);
|
78
|
+
void CombineValidity(BaseStatistics &left, BaseStatistics &right);
|
79
|
+
void CopyValidity(BaseStatistics &stats);
|
80
|
+
inline void SetHasNull() {
|
81
|
+
has_null = true;
|
82
|
+
}
|
83
|
+
inline void SetHasNoNull() {
|
84
|
+
has_no_null = true;
|
58
85
|
}
|
59
86
|
|
60
|
-
|
87
|
+
void Merge(const BaseStatistics &other);
|
88
|
+
|
89
|
+
void Copy(const BaseStatistics &other);
|
61
90
|
|
62
|
-
|
91
|
+
BaseStatistics Copy() const;
|
92
|
+
unique_ptr<BaseStatistics> ToUnique() const;
|
63
93
|
void CopyBase(const BaseStatistics &orig);
|
64
94
|
|
65
|
-
|
66
|
-
|
95
|
+
void Serialize(Serializer &serializer) const;
|
96
|
+
void Serialize(FieldWriter &writer) const;
|
67
97
|
|
68
|
-
|
98
|
+
idx_t GetDistinctCount();
|
69
99
|
|
70
|
-
static
|
100
|
+
static BaseStatistics Deserialize(Deserializer &source, LogicalType type);
|
71
101
|
|
72
102
|
//! Verify that a vector does not violate the statistics
|
73
|
-
|
103
|
+
void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const;
|
74
104
|
void Verify(Vector &vector, idx_t count) const;
|
75
105
|
|
76
|
-
|
106
|
+
string ToString() const;
|
107
|
+
|
108
|
+
static BaseStatistics FromConstant(const Value &input);
|
109
|
+
|
110
|
+
private:
|
111
|
+
BaseStatistics();
|
112
|
+
explicit BaseStatistics(LogicalType type);
|
77
113
|
|
78
|
-
|
79
|
-
|
114
|
+
static void Construct(BaseStatistics &stats, LogicalType type);
|
115
|
+
|
116
|
+
void InitializeUnknown();
|
117
|
+
void InitializeEmpty();
|
118
|
+
|
119
|
+
static BaseStatistics CreateUnknownType(LogicalType type);
|
120
|
+
static BaseStatistics CreateEmptyType(LogicalType type);
|
121
|
+
static BaseStatistics DeserializeType(FieldReader &reader, LogicalType type);
|
122
|
+
static BaseStatistics FromConstantType(const Value &input);
|
123
|
+
|
124
|
+
private:
|
125
|
+
//! The type of the logical segment
|
126
|
+
LogicalType type;
|
127
|
+
//! Whether or not the segment can contain NULL values
|
128
|
+
bool has_null;
|
129
|
+
//! Whether or not the segment can contain values that are not null
|
130
|
+
bool has_no_null;
|
131
|
+
// estimate that one may have even if distinct_stats==nullptr
|
132
|
+
idx_t distinct_count;
|
133
|
+
//! Numeric and String stats
|
134
|
+
union {
|
135
|
+
//! Numeric stats data, for numeric stats
|
136
|
+
NumericStatsData numeric_data;
|
137
|
+
//! String stats data, for string stats
|
138
|
+
StringStatsData string_data;
|
139
|
+
} stats_union;
|
140
|
+
//! Child stats (for LIST and STRUCT)
|
141
|
+
unique_ptr<BaseStatistics[]> child_stats;
|
80
142
|
};
|
81
143
|
|
82
144
|
} // namespace duckdb
|
@@ -9,17 +9,36 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/storage/statistics/base_statistics.hpp"
|
12
|
+
#include "duckdb/storage/statistics/distinct_statistics.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
|
15
16
|
class ColumnStatistics {
|
16
17
|
public:
|
17
|
-
explicit ColumnStatistics(
|
18
|
-
|
19
|
-
unique_ptr<BaseStatistics> stats;
|
18
|
+
explicit ColumnStatistics(BaseStatistics stats_p);
|
19
|
+
ColumnStatistics(BaseStatistics stats_p, unique_ptr<DistinctStatistics> distinct_stats_p);
|
20
20
|
|
21
21
|
public:
|
22
22
|
static shared_ptr<ColumnStatistics> CreateEmptyStats(const LogicalType &type);
|
23
|
+
|
24
|
+
void Merge(ColumnStatistics &other);
|
25
|
+
|
26
|
+
void UpdateDistinctStatistics(Vector &v, idx_t count);
|
27
|
+
|
28
|
+
BaseStatistics &Statistics();
|
29
|
+
|
30
|
+
bool HasDistinctStats();
|
31
|
+
DistinctStatistics &DistinctStats();
|
32
|
+
void SetDistinct(unique_ptr<DistinctStatistics> distinct_stats);
|
33
|
+
|
34
|
+
shared_ptr<ColumnStatistics> Copy() const;
|
35
|
+
void Serialize(Serializer &serializer) const;
|
36
|
+
static shared_ptr<ColumnStatistics> Deserialize(Deserializer &source, const LogicalType &type);
|
37
|
+
|
38
|
+
private:
|
39
|
+
BaseStatistics stats;
|
40
|
+
//! The approximate count distinct stats of the column
|
41
|
+
unique_ptr<DistinctStatistics> distinct_stats;
|
23
42
|
};
|
24
43
|
|
25
44
|
} // namespace duckdb
|
@@ -17,7 +17,7 @@ class Serializer;
|
|
17
17
|
class Deserializer;
|
18
18
|
class Vector;
|
19
19
|
|
20
|
-
class DistinctStatistics
|
20
|
+
class DistinctStatistics {
|
21
21
|
public:
|
22
22
|
DistinctStatistics();
|
23
23
|
explicit DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count);
|
@@ -30,12 +30,12 @@ public:
|
|
30
30
|
atomic<idx_t> total_count;
|
31
31
|
|
32
32
|
public:
|
33
|
-
void Merge(const
|
33
|
+
void Merge(const DistinctStatistics &other);
|
34
34
|
|
35
|
-
unique_ptr<
|
35
|
+
unique_ptr<DistinctStatistics> Copy() const;
|
36
36
|
|
37
|
-
void Serialize(Serializer &serializer) const
|
38
|
-
void Serialize(FieldWriter &writer) const
|
37
|
+
void Serialize(Serializer &serializer) const;
|
38
|
+
void Serialize(FieldWriter &writer) const;
|
39
39
|
|
40
40
|
static unique_ptr<DistinctStatistics> Deserialize(Deserializer &source);
|
41
41
|
static unique_ptr<DistinctStatistics> Deserialize(FieldReader &reader);
|
@@ -43,7 +43,7 @@ public:
|
|
43
43
|
void Update(Vector &update, idx_t count, bool sample = true);
|
44
44
|
void Update(UnifiedVectorFormat &update_data, const LogicalType &ptype, idx_t count, bool sample = true);
|
45
45
|
|
46
|
-
string ToString() const
|
46
|
+
string ToString() const;
|
47
47
|
idx_t GetCount() const;
|
48
48
|
|
49
49
|
private:
|
@@ -0,0 +1,41 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/storage/statistics/list_stats.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/common.hpp"
|
12
|
+
#include "duckdb/common/exception.hpp"
|
13
|
+
#include "duckdb/common/types/hugeint.hpp"
|
14
|
+
|
15
|
+
namespace duckdb {
|
16
|
+
class BaseStatistics;
|
17
|
+
class FieldWriter;
|
18
|
+
class FieldReader;
|
19
|
+
struct SelectionVector;
|
20
|
+
class Vector;
|
21
|
+
|
22
|
+
struct ListStats {
|
23
|
+
DUCKDB_API static void Construct(BaseStatistics &stats);
|
24
|
+
DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type);
|
25
|
+
DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type);
|
26
|
+
|
27
|
+
DUCKDB_API static const BaseStatistics &GetChildStats(const BaseStatistics &stats);
|
28
|
+
DUCKDB_API static BaseStatistics &GetChildStats(BaseStatistics &stats);
|
29
|
+
DUCKDB_API static void SetChildStats(BaseStatistics &stats, unique_ptr<BaseStatistics> new_stats);
|
30
|
+
|
31
|
+
DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
|
32
|
+
DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
|
33
|
+
|
34
|
+
DUCKDB_API static string ToString(const BaseStatistics &stats);
|
35
|
+
|
36
|
+
DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
|
37
|
+
DUCKDB_API static void Copy(BaseStatistics &stats, const BaseStatistics &other);
|
38
|
+
DUCKDB_API static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count);
|
39
|
+
};
|
40
|
+
|
41
|
+
} // namespace duckdb
|