duckdb 0.7.2-dev16.0 → 0.7.2-dev314.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-extension.cpp +2 -0
- package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
- package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
- package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
- package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/parquet-extension.cpp +11 -2
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
- package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
- package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
- package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/common/types/bit.cpp +95 -58
- package/src/duckdb/src/common/types/value.cpp +149 -53
- package/src/duckdb/src/common/types/vector.cpp +13 -10
- package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
- package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
- package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +254 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -16
- package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +16 -5
- package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
- package/src/duckdb/src/function/scalar/bit/bitstring.cpp +99 -0
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
- package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
- package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
- package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
- package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
- package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
- package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
- package/src/duckdb/src/function/scalar/operators/multiply.cpp +0 -6
- package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
- package/src/duckdb/src/function/table/read_csv.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/function/table_function.cpp +19 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -8
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
- package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +12 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +76 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +28 -0
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb.h +49 -1
- package/src/duckdb/src/include/duckdb.hpp +0 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
- package/src/duckdb/src/main/capi/result-c.cpp +27 -1
- package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
- package/src/duckdb/src/main/client_context.cpp +8 -1
- package/src/duckdb/src/main/config.cpp +66 -1
- package/src/duckdb/src/main/database.cpp +10 -2
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +98 -67
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +16 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
- package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
- package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
- package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +8 -0
- package/src/duckdb/src/parser/query_node.cpp +1 -1
- package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
- package/src/duckdb/src/parser/tableref/pivotref.cpp +296 -0
- package/src/duckdb/src/parser/tableref.cpp +3 -0
- package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +150 -0
- package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +105 -0
- package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
- package/src/duckdb/src/parser/transformer.cpp +15 -3
- package/src/duckdb/src/planner/bind_context.cpp +16 -0
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -3
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +365 -0
- package/src/duckdb/src/planner/binder.cpp +7 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
- package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
- package/src/duckdb/src/storage/compression/bitpacking.cpp +28 -24
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +43 -45
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +19 -15
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +4 -6
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
- package/src/duckdb/src/storage/table/column_data.cpp +16 -11
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
- package/src/duckdb/src/storage/table/list_column_data.cpp +39 -58
- package/src/duckdb/src/storage/table/row_group.cpp +24 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +3 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +34 -1
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1020 -530
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +7 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +23560 -22737
- package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
- package/src/duckdb/ub_src_main_capi.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
- package/src/duckdb/ub_src_parser_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
- package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
- package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -1,307 +0,0 @@
|
|
1
|
-
#include "duckdb/storage/statistics/numeric_statistics.hpp"
|
2
|
-
|
3
|
-
#include "duckdb/common/field_writer.hpp"
|
4
|
-
#include "duckdb/common/operator/comparison_operators.hpp"
|
5
|
-
#include "duckdb/common/types/vector.hpp"
|
6
|
-
|
7
|
-
namespace duckdb {
|
8
|
-
|
9
|
-
template <>
|
10
|
-
void NumericStatistics::Update<interval_t>(SegmentStatistics &stats, interval_t new_value) {
|
11
|
-
}
|
12
|
-
|
13
|
-
template <>
|
14
|
-
void NumericStatistics::Update<list_entry_t>(SegmentStatistics &stats, list_entry_t new_value) {
|
15
|
-
}
|
16
|
-
|
17
|
-
NumericStatistics::NumericStatistics(LogicalType type_p, StatisticsType stats_type)
|
18
|
-
: BaseStatistics(std::move(type_p), stats_type) {
|
19
|
-
InitializeBase();
|
20
|
-
min = Value::MaximumValue(type);
|
21
|
-
max = Value::MinimumValue(type);
|
22
|
-
}
|
23
|
-
|
24
|
-
NumericStatistics::NumericStatistics(LogicalType type_p, Value min_p, Value max_p, StatisticsType stats_type)
|
25
|
-
: BaseStatistics(std::move(type_p), stats_type), min(std::move(min_p)), max(std::move(max_p)) {
|
26
|
-
InitializeBase();
|
27
|
-
}
|
28
|
-
|
29
|
-
void NumericStatistics::Merge(const BaseStatistics &other_p) {
|
30
|
-
BaseStatistics::Merge(other_p);
|
31
|
-
auto &other = (const NumericStatistics &)other_p;
|
32
|
-
if (other.min.IsNull() || min.IsNull()) {
|
33
|
-
min = Value(type);
|
34
|
-
} else if (other.min < min) {
|
35
|
-
min = other.min;
|
36
|
-
}
|
37
|
-
if (other.max.IsNull() || max.IsNull()) {
|
38
|
-
max = Value(type);
|
39
|
-
} else if (other.max > max) {
|
40
|
-
max = other.max;
|
41
|
-
}
|
42
|
-
}
|
43
|
-
|
44
|
-
FilterPropagateResult NumericStatistics::CheckZonemap(ExpressionType comparison_type, const Value &constant) const {
|
45
|
-
if (constant.IsNull()) {
|
46
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
47
|
-
}
|
48
|
-
if (min.IsNull() || max.IsNull()) {
|
49
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
50
|
-
}
|
51
|
-
switch (comparison_type) {
|
52
|
-
case ExpressionType::COMPARE_EQUAL:
|
53
|
-
if (constant == min && constant == max) {
|
54
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
55
|
-
} else if (constant >= min && constant <= max) {
|
56
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
57
|
-
} else {
|
58
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
59
|
-
}
|
60
|
-
case ExpressionType::COMPARE_NOTEQUAL:
|
61
|
-
if (constant < min || constant > max) {
|
62
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
63
|
-
} else if (min == max && min == constant) {
|
64
|
-
// corner case of a cluster with one numeric equal to the target constant
|
65
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
66
|
-
}
|
67
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
68
|
-
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
69
|
-
// X >= C
|
70
|
-
// this can be true only if max(X) >= C
|
71
|
-
// if min(X) >= C, then this is always true
|
72
|
-
if (min >= constant) {
|
73
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
74
|
-
} else if (max >= constant) {
|
75
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
76
|
-
} else {
|
77
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
78
|
-
}
|
79
|
-
case ExpressionType::COMPARE_GREATERTHAN:
|
80
|
-
// X > C
|
81
|
-
// this can be true only if max(X) > C
|
82
|
-
// if min(X) > C, then this is always true
|
83
|
-
if (min > constant) {
|
84
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
85
|
-
} else if (max > constant) {
|
86
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
87
|
-
} else {
|
88
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
89
|
-
}
|
90
|
-
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
91
|
-
// X <= C
|
92
|
-
// this can be true only if min(X) <= C
|
93
|
-
// if max(X) <= C, then this is always true
|
94
|
-
if (max <= constant) {
|
95
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
96
|
-
} else if (min <= constant) {
|
97
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
98
|
-
} else {
|
99
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
100
|
-
}
|
101
|
-
case ExpressionType::COMPARE_LESSTHAN:
|
102
|
-
// X < C
|
103
|
-
// this can be true only if min(X) < C
|
104
|
-
// if max(X) < C, then this is always true
|
105
|
-
if (max < constant) {
|
106
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
107
|
-
} else if (min < constant) {
|
108
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
109
|
-
} else {
|
110
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
111
|
-
}
|
112
|
-
default:
|
113
|
-
throw InternalException("Expression type in zonemap check not implemented");
|
114
|
-
}
|
115
|
-
}
|
116
|
-
|
117
|
-
unique_ptr<BaseStatistics> NumericStatistics::Copy() const {
|
118
|
-
auto result = make_unique<NumericStatistics>(type, min, max, stats_type);
|
119
|
-
result->CopyBase(*this);
|
120
|
-
return std::move(result);
|
121
|
-
}
|
122
|
-
|
123
|
-
bool NumericStatistics::IsConstant() const {
|
124
|
-
return max <= min;
|
125
|
-
}
|
126
|
-
|
127
|
-
void SerializeNumericStatsValue(const Value &val, FieldWriter &writer) {
|
128
|
-
writer.WriteField<bool>(val.IsNull());
|
129
|
-
if (val.IsNull()) {
|
130
|
-
return;
|
131
|
-
}
|
132
|
-
switch (val.type().InternalType()) {
|
133
|
-
case PhysicalType::BOOL:
|
134
|
-
writer.WriteField<bool>(BooleanValue::Get(val));
|
135
|
-
break;
|
136
|
-
case PhysicalType::INT8:
|
137
|
-
writer.WriteField<int8_t>(TinyIntValue::Get(val));
|
138
|
-
break;
|
139
|
-
case PhysicalType::INT16:
|
140
|
-
writer.WriteField<int16_t>(SmallIntValue::Get(val));
|
141
|
-
break;
|
142
|
-
case PhysicalType::INT32:
|
143
|
-
writer.WriteField<int32_t>(IntegerValue::Get(val));
|
144
|
-
break;
|
145
|
-
case PhysicalType::INT64:
|
146
|
-
writer.WriteField<int64_t>(BigIntValue::Get(val));
|
147
|
-
break;
|
148
|
-
case PhysicalType::UINT8:
|
149
|
-
writer.WriteField<int8_t>(UTinyIntValue::Get(val));
|
150
|
-
break;
|
151
|
-
case PhysicalType::UINT16:
|
152
|
-
writer.WriteField<int16_t>(USmallIntValue::Get(val));
|
153
|
-
break;
|
154
|
-
case PhysicalType::UINT32:
|
155
|
-
writer.WriteField<int32_t>(UIntegerValue::Get(val));
|
156
|
-
break;
|
157
|
-
case PhysicalType::UINT64:
|
158
|
-
writer.WriteField<int64_t>(UBigIntValue::Get(val));
|
159
|
-
break;
|
160
|
-
case PhysicalType::INT128:
|
161
|
-
writer.WriteField<hugeint_t>(HugeIntValue::Get(val));
|
162
|
-
break;
|
163
|
-
case PhysicalType::FLOAT:
|
164
|
-
writer.WriteField<float>(FloatValue::Get(val));
|
165
|
-
break;
|
166
|
-
case PhysicalType::DOUBLE:
|
167
|
-
writer.WriteField<double>(DoubleValue::Get(val));
|
168
|
-
break;
|
169
|
-
default:
|
170
|
-
throw InternalException("Unsupported type for serializing numeric statistics");
|
171
|
-
}
|
172
|
-
}
|
173
|
-
|
174
|
-
void NumericStatistics::Serialize(FieldWriter &writer) const {
|
175
|
-
SerializeNumericStatsValue(min, writer);
|
176
|
-
SerializeNumericStatsValue(max, writer);
|
177
|
-
}
|
178
|
-
|
179
|
-
Value DeserializeNumericStatsValue(const LogicalType &type, FieldReader &reader) {
|
180
|
-
auto is_null = reader.ReadRequired<bool>();
|
181
|
-
if (is_null) {
|
182
|
-
return Value(type);
|
183
|
-
}
|
184
|
-
Value result;
|
185
|
-
switch (type.InternalType()) {
|
186
|
-
case PhysicalType::BOOL:
|
187
|
-
result = Value::BOOLEAN(reader.ReadRequired<bool>());
|
188
|
-
break;
|
189
|
-
case PhysicalType::INT8:
|
190
|
-
result = Value::TINYINT(reader.ReadRequired<int8_t>());
|
191
|
-
break;
|
192
|
-
case PhysicalType::INT16:
|
193
|
-
result = Value::SMALLINT(reader.ReadRequired<int16_t>());
|
194
|
-
break;
|
195
|
-
case PhysicalType::INT32:
|
196
|
-
result = Value::INTEGER(reader.ReadRequired<int32_t>());
|
197
|
-
break;
|
198
|
-
case PhysicalType::INT64:
|
199
|
-
result = Value::BIGINT(reader.ReadRequired<int64_t>());
|
200
|
-
break;
|
201
|
-
case PhysicalType::UINT8:
|
202
|
-
result = Value::UTINYINT(reader.ReadRequired<uint8_t>());
|
203
|
-
break;
|
204
|
-
case PhysicalType::UINT16:
|
205
|
-
result = Value::USMALLINT(reader.ReadRequired<uint16_t>());
|
206
|
-
break;
|
207
|
-
case PhysicalType::UINT32:
|
208
|
-
result = Value::UINTEGER(reader.ReadRequired<uint32_t>());
|
209
|
-
break;
|
210
|
-
case PhysicalType::UINT64:
|
211
|
-
result = Value::UBIGINT(reader.ReadRequired<uint64_t>());
|
212
|
-
break;
|
213
|
-
case PhysicalType::INT128:
|
214
|
-
result = Value::HUGEINT(reader.ReadRequired<hugeint_t>());
|
215
|
-
break;
|
216
|
-
case PhysicalType::FLOAT:
|
217
|
-
result = Value::FLOAT(reader.ReadRequired<float>());
|
218
|
-
break;
|
219
|
-
case PhysicalType::DOUBLE:
|
220
|
-
result = Value::DOUBLE(reader.ReadRequired<double>());
|
221
|
-
break;
|
222
|
-
default:
|
223
|
-
throw InternalException("Unsupported type for deserializing numeric statistics");
|
224
|
-
}
|
225
|
-
result.Reinterpret(type);
|
226
|
-
return result;
|
227
|
-
}
|
228
|
-
|
229
|
-
unique_ptr<BaseStatistics> NumericStatistics::Deserialize(FieldReader &reader, LogicalType type) {
|
230
|
-
auto min = DeserializeNumericStatsValue(type, reader);
|
231
|
-
auto max = DeserializeNumericStatsValue(type, reader);
|
232
|
-
return make_unique_base<BaseStatistics, NumericStatistics>(std::move(type), std::move(min), std::move(max),
|
233
|
-
StatisticsType::LOCAL_STATS);
|
234
|
-
}
|
235
|
-
|
236
|
-
string NumericStatistics::ToString() const {
|
237
|
-
return StringUtil::Format("[Min: %s, Max: %s]%s", min.ToString(), max.ToString(), BaseStatistics::ToString());
|
238
|
-
}
|
239
|
-
|
240
|
-
template <class T>
|
241
|
-
void NumericStatistics::TemplatedVerify(Vector &vector, const SelectionVector &sel, idx_t count) const {
|
242
|
-
UnifiedVectorFormat vdata;
|
243
|
-
vector.ToUnifiedFormat(count, vdata);
|
244
|
-
|
245
|
-
auto data = (T *)vdata.data;
|
246
|
-
for (idx_t i = 0; i < count; i++) {
|
247
|
-
auto idx = sel.get_index(i);
|
248
|
-
auto index = vdata.sel->get_index(idx);
|
249
|
-
if (!vdata.validity.RowIsValid(index)) {
|
250
|
-
continue;
|
251
|
-
}
|
252
|
-
if (!min.IsNull() && LessThan::Operation(data[index], min.GetValueUnsafe<T>())) { // LCOV_EXCL_START
|
253
|
-
throw InternalException("Statistics mismatch: value is smaller than min.\nStatistics: %s\nVector: %s",
|
254
|
-
ToString(), vector.ToString(count));
|
255
|
-
} // LCOV_EXCL_STOP
|
256
|
-
if (!max.IsNull() && GreaterThan::Operation(data[index], max.GetValueUnsafe<T>())) {
|
257
|
-
throw InternalException("Statistics mismatch: value is bigger than max.\nStatistics: %s\nVector: %s",
|
258
|
-
ToString(), vector.ToString(count));
|
259
|
-
}
|
260
|
-
}
|
261
|
-
}
|
262
|
-
|
263
|
-
void NumericStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
|
264
|
-
BaseStatistics::Verify(vector, sel, count);
|
265
|
-
|
266
|
-
switch (type.InternalType()) {
|
267
|
-
case PhysicalType::BOOL:
|
268
|
-
break;
|
269
|
-
case PhysicalType::INT8:
|
270
|
-
TemplatedVerify<int8_t>(vector, sel, count);
|
271
|
-
break;
|
272
|
-
case PhysicalType::INT16:
|
273
|
-
TemplatedVerify<int16_t>(vector, sel, count);
|
274
|
-
break;
|
275
|
-
case PhysicalType::INT32:
|
276
|
-
TemplatedVerify<int32_t>(vector, sel, count);
|
277
|
-
break;
|
278
|
-
case PhysicalType::INT64:
|
279
|
-
TemplatedVerify<int64_t>(vector, sel, count);
|
280
|
-
break;
|
281
|
-
case PhysicalType::UINT8:
|
282
|
-
TemplatedVerify<uint8_t>(vector, sel, count);
|
283
|
-
break;
|
284
|
-
case PhysicalType::UINT16:
|
285
|
-
TemplatedVerify<uint16_t>(vector, sel, count);
|
286
|
-
break;
|
287
|
-
case PhysicalType::UINT32:
|
288
|
-
TemplatedVerify<uint32_t>(vector, sel, count);
|
289
|
-
break;
|
290
|
-
case PhysicalType::UINT64:
|
291
|
-
TemplatedVerify<uint64_t>(vector, sel, count);
|
292
|
-
break;
|
293
|
-
case PhysicalType::INT128:
|
294
|
-
TemplatedVerify<hugeint_t>(vector, sel, count);
|
295
|
-
break;
|
296
|
-
case PhysicalType::FLOAT:
|
297
|
-
TemplatedVerify<float>(vector, sel, count);
|
298
|
-
break;
|
299
|
-
case PhysicalType::DOUBLE:
|
300
|
-
TemplatedVerify<double>(vector, sel, count);
|
301
|
-
break;
|
302
|
-
default:
|
303
|
-
throw InternalException("Unsupported type %s for numeric statistics verify", type.ToString());
|
304
|
-
}
|
305
|
-
}
|
306
|
-
|
307
|
-
} // namespace duckdb
|
@@ -1,220 +0,0 @@
|
|
1
|
-
#include "duckdb/storage/statistics/string_statistics.hpp"
|
2
|
-
#include "duckdb/common/field_writer.hpp"
|
3
|
-
#include "utf8proc_wrapper.hpp"
|
4
|
-
#include "duckdb/common/string_util.hpp"
|
5
|
-
#include "duckdb/common/types/vector.hpp"
|
6
|
-
#include "duckdb/main/error_manager.hpp"
|
7
|
-
|
8
|
-
namespace duckdb {
|
9
|
-
|
10
|
-
StringStatistics::StringStatistics(LogicalType type_p, StatisticsType stats_type)
|
11
|
-
: BaseStatistics(std::move(type_p), stats_type) {
|
12
|
-
InitializeBase();
|
13
|
-
for (idx_t i = 0; i < MAX_STRING_MINMAX_SIZE; i++) {
|
14
|
-
min[i] = 0xFF;
|
15
|
-
max[i] = 0;
|
16
|
-
}
|
17
|
-
max_string_length = 0;
|
18
|
-
has_unicode = false;
|
19
|
-
has_overflow_strings = false;
|
20
|
-
}
|
21
|
-
|
22
|
-
unique_ptr<BaseStatistics> StringStatistics::Copy() const {
|
23
|
-
auto result = make_unique<StringStatistics>(type, stats_type);
|
24
|
-
result->CopyBase(*this);
|
25
|
-
|
26
|
-
memcpy(result->min, min, MAX_STRING_MINMAX_SIZE);
|
27
|
-
memcpy(result->max, max, MAX_STRING_MINMAX_SIZE);
|
28
|
-
result->has_unicode = has_unicode;
|
29
|
-
result->max_string_length = max_string_length;
|
30
|
-
return std::move(result);
|
31
|
-
}
|
32
|
-
|
33
|
-
void StringStatistics::Serialize(FieldWriter &writer) const {
|
34
|
-
writer.WriteBlob(min, MAX_STRING_MINMAX_SIZE);
|
35
|
-
writer.WriteBlob(max, MAX_STRING_MINMAX_SIZE);
|
36
|
-
writer.WriteField<bool>(has_unicode);
|
37
|
-
writer.WriteField<uint32_t>(max_string_length);
|
38
|
-
writer.WriteField<bool>(has_overflow_strings);
|
39
|
-
}
|
40
|
-
|
41
|
-
unique_ptr<BaseStatistics> StringStatistics::Deserialize(FieldReader &reader, LogicalType type) {
|
42
|
-
auto stats = make_unique<StringStatistics>(std::move(type), StatisticsType::LOCAL_STATS);
|
43
|
-
reader.ReadBlob(stats->min, MAX_STRING_MINMAX_SIZE);
|
44
|
-
reader.ReadBlob(stats->max, MAX_STRING_MINMAX_SIZE);
|
45
|
-
stats->has_unicode = reader.ReadRequired<bool>();
|
46
|
-
stats->max_string_length = reader.ReadRequired<uint32_t>();
|
47
|
-
stats->has_overflow_strings = reader.ReadRequired<bool>();
|
48
|
-
return std::move(stats);
|
49
|
-
}
|
50
|
-
|
51
|
-
static int StringValueComparison(const_data_ptr_t data, idx_t len, const_data_ptr_t comparison) {
|
52
|
-
D_ASSERT(len <= StringStatistics::MAX_STRING_MINMAX_SIZE);
|
53
|
-
for (idx_t i = 0; i < len; i++) {
|
54
|
-
if (data[i] < comparison[i]) {
|
55
|
-
return -1;
|
56
|
-
} else if (data[i] > comparison[i]) {
|
57
|
-
return 1;
|
58
|
-
}
|
59
|
-
}
|
60
|
-
return 0;
|
61
|
-
}
|
62
|
-
|
63
|
-
static void ConstructValue(const_data_ptr_t data, idx_t size, data_t target[]) {
|
64
|
-
idx_t value_size =
|
65
|
-
size > StringStatistics::MAX_STRING_MINMAX_SIZE ? StringStatistics::MAX_STRING_MINMAX_SIZE : size;
|
66
|
-
memcpy(target, data, value_size);
|
67
|
-
for (idx_t i = value_size; i < StringStatistics::MAX_STRING_MINMAX_SIZE; i++) {
|
68
|
-
target[i] = '\0';
|
69
|
-
}
|
70
|
-
}
|
71
|
-
|
72
|
-
void StringStatistics::Update(const string_t &value) {
|
73
|
-
auto data = (const_data_ptr_t)value.GetDataUnsafe();
|
74
|
-
auto size = value.GetSize();
|
75
|
-
|
76
|
-
//! we can only fit 8 bytes, so we might need to trim our string
|
77
|
-
// construct the value
|
78
|
-
data_t target[MAX_STRING_MINMAX_SIZE];
|
79
|
-
ConstructValue(data, size, target);
|
80
|
-
|
81
|
-
// update the min and max
|
82
|
-
if (StringValueComparison(target, MAX_STRING_MINMAX_SIZE, min) < 0) {
|
83
|
-
memcpy(min, target, MAX_STRING_MINMAX_SIZE);
|
84
|
-
}
|
85
|
-
if (StringValueComparison(target, MAX_STRING_MINMAX_SIZE, max) > 0) {
|
86
|
-
memcpy(max, target, MAX_STRING_MINMAX_SIZE);
|
87
|
-
}
|
88
|
-
if (size > max_string_length) {
|
89
|
-
max_string_length = size;
|
90
|
-
}
|
91
|
-
if (type.id() == LogicalTypeId::VARCHAR && !has_unicode) {
|
92
|
-
auto unicode = Utf8Proc::Analyze((const char *)data, size);
|
93
|
-
if (unicode == UnicodeType::UNICODE) {
|
94
|
-
has_unicode = true;
|
95
|
-
} else if (unicode == UnicodeType::INVALID) {
|
96
|
-
throw InternalException(
|
97
|
-
ErrorManager::InvalidUnicodeError(string((char *)data, size), "segment statistics update"));
|
98
|
-
}
|
99
|
-
}
|
100
|
-
}
|
101
|
-
|
102
|
-
void StringStatistics::Merge(const BaseStatistics &other_p) {
|
103
|
-
BaseStatistics::Merge(other_p);
|
104
|
-
auto &other = (const StringStatistics &)other_p;
|
105
|
-
if (StringValueComparison(other.min, MAX_STRING_MINMAX_SIZE, min) < 0) {
|
106
|
-
memcpy(min, other.min, MAX_STRING_MINMAX_SIZE);
|
107
|
-
}
|
108
|
-
if (StringValueComparison(other.max, MAX_STRING_MINMAX_SIZE, max) > 0) {
|
109
|
-
memcpy(max, other.max, MAX_STRING_MINMAX_SIZE);
|
110
|
-
}
|
111
|
-
has_unicode = has_unicode || other.has_unicode;
|
112
|
-
max_string_length = MaxValue<uint32_t>(max_string_length, other.max_string_length);
|
113
|
-
has_overflow_strings = has_overflow_strings || other.has_overflow_strings;
|
114
|
-
}
|
115
|
-
|
116
|
-
FilterPropagateResult StringStatistics::CheckZonemap(ExpressionType comparison_type, const string &constant) const {
|
117
|
-
auto data = (const_data_ptr_t)constant.c_str();
|
118
|
-
auto size = constant.size();
|
119
|
-
|
120
|
-
idx_t value_size = size > MAX_STRING_MINMAX_SIZE ? MAX_STRING_MINMAX_SIZE : size;
|
121
|
-
int min_comp = StringValueComparison(data, value_size, min);
|
122
|
-
int max_comp = StringValueComparison(data, value_size, max);
|
123
|
-
switch (comparison_type) {
|
124
|
-
case ExpressionType::COMPARE_EQUAL:
|
125
|
-
if (min_comp >= 0 && max_comp <= 0) {
|
126
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
127
|
-
} else {
|
128
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
129
|
-
}
|
130
|
-
case ExpressionType::COMPARE_NOTEQUAL:
|
131
|
-
if (min_comp < 0 || max_comp > 0) {
|
132
|
-
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
133
|
-
}
|
134
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
135
|
-
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
136
|
-
case ExpressionType::COMPARE_GREATERTHAN:
|
137
|
-
if (max_comp <= 0) {
|
138
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
139
|
-
} else {
|
140
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
141
|
-
}
|
142
|
-
case ExpressionType::COMPARE_LESSTHAN:
|
143
|
-
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
144
|
-
if (min_comp >= 0) {
|
145
|
-
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
146
|
-
} else {
|
147
|
-
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
148
|
-
}
|
149
|
-
default:
|
150
|
-
throw InternalException("Expression type not implemented for string statistics zone map");
|
151
|
-
}
|
152
|
-
}
|
153
|
-
|
154
|
-
static idx_t GetValidMinMaxSubstring(const_data_ptr_t data) {
|
155
|
-
for (idx_t i = 0; i < StringStatistics::MAX_STRING_MINMAX_SIZE; i++) {
|
156
|
-
if (data[i] == '\0') {
|
157
|
-
return i;
|
158
|
-
}
|
159
|
-
if ((data[i] & 0x80) != 0) {
|
160
|
-
return i;
|
161
|
-
}
|
162
|
-
}
|
163
|
-
return StringStatistics::MAX_STRING_MINMAX_SIZE;
|
164
|
-
}
|
165
|
-
|
166
|
-
string StringStatistics::ToString() const {
|
167
|
-
idx_t min_len = GetValidMinMaxSubstring(min);
|
168
|
-
idx_t max_len = GetValidMinMaxSubstring(max);
|
169
|
-
return StringUtil::Format("[Min: %s, Max: %s, Has Unicode: %s, Max String Length: %lld]%s",
|
170
|
-
string((const char *)min, min_len), string((const char *)max, max_len),
|
171
|
-
has_unicode ? "true" : "false", max_string_length, BaseStatistics::ToString());
|
172
|
-
}
|
173
|
-
|
174
|
-
void StringStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
|
175
|
-
BaseStatistics::Verify(vector, sel, count);
|
176
|
-
|
177
|
-
string_t min_string((const char *)min, MAX_STRING_MINMAX_SIZE);
|
178
|
-
string_t max_string((const char *)max, MAX_STRING_MINMAX_SIZE);
|
179
|
-
|
180
|
-
UnifiedVectorFormat vdata;
|
181
|
-
vector.ToUnifiedFormat(count, vdata);
|
182
|
-
auto data = (string_t *)vdata.data;
|
183
|
-
for (idx_t i = 0; i < count; i++) {
|
184
|
-
auto idx = sel.get_index(i);
|
185
|
-
auto index = vdata.sel->get_index(idx);
|
186
|
-
if (!vdata.validity.RowIsValid(index)) {
|
187
|
-
continue;
|
188
|
-
}
|
189
|
-
auto value = data[index];
|
190
|
-
auto data = value.GetDataUnsafe();
|
191
|
-
auto len = value.GetSize();
|
192
|
-
// LCOV_EXCL_START
|
193
|
-
if (len > max_string_length) {
|
194
|
-
throw InternalException(
|
195
|
-
"Statistics mismatch: string value exceeds maximum string length.\nStatistics: %s\nVector: %s",
|
196
|
-
ToString(), vector.ToString(count));
|
197
|
-
}
|
198
|
-
if (type.id() == LogicalTypeId::VARCHAR && !has_unicode) {
|
199
|
-
auto unicode = Utf8Proc::Analyze(data, len);
|
200
|
-
if (unicode == UnicodeType::UNICODE) {
|
201
|
-
throw InternalException("Statistics mismatch: string value contains unicode, but statistics says it "
|
202
|
-
"shouldn't.\nStatistics: %s\nVector: %s",
|
203
|
-
ToString(), vector.ToString(count));
|
204
|
-
} else if (unicode == UnicodeType::INVALID) {
|
205
|
-
throw InternalException("Invalid unicode detected in vector: %s", vector.ToString(count));
|
206
|
-
}
|
207
|
-
}
|
208
|
-
if (StringValueComparison((const_data_ptr_t)data, MinValue<idx_t>(len, MAX_STRING_MINMAX_SIZE), min) < 0) {
|
209
|
-
throw InternalException("Statistics mismatch: value is smaller than min.\nStatistics: %s\nVector: %s",
|
210
|
-
ToString(), vector.ToString(count));
|
211
|
-
}
|
212
|
-
if (StringValueComparison((const_data_ptr_t)data, MinValue<idx_t>(len, MAX_STRING_MINMAX_SIZE), max) > 0) {
|
213
|
-
throw InternalException("Statistics mismatch: value is bigger than max.\nStatistics: %s\nVector: %s",
|
214
|
-
ToString(), vector.ToString(count));
|
215
|
-
}
|
216
|
-
// LCOV_EXCL_STOP
|
217
|
-
}
|
218
|
-
}
|
219
|
-
|
220
|
-
} // namespace duckdb
|
@@ -1,108 +0,0 @@
|
|
1
|
-
#include "duckdb/storage/statistics/struct_statistics.hpp"
|
2
|
-
|
3
|
-
#include "duckdb/common/field_writer.hpp"
|
4
|
-
#include "duckdb/common/types/vector.hpp"
|
5
|
-
|
6
|
-
namespace duckdb {
|
7
|
-
|
8
|
-
StructStatistics::StructStatistics(LogicalType type_p)
|
9
|
-
: BaseStatistics(std::move(type_p), StatisticsType::LOCAL_STATS) {
|
10
|
-
D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
|
11
|
-
InitializeBase();
|
12
|
-
|
13
|
-
auto &child_types = StructType::GetChildTypes(type);
|
14
|
-
child_stats.resize(child_types.size());
|
15
|
-
for (idx_t i = 0; i < child_types.size(); i++) {
|
16
|
-
child_stats[i] = BaseStatistics::CreateEmpty(child_types[i].second, StatisticsType::LOCAL_STATS);
|
17
|
-
}
|
18
|
-
}
|
19
|
-
|
20
|
-
void StructStatistics::Merge(const BaseStatistics &other_p) {
|
21
|
-
BaseStatistics::Merge(other_p);
|
22
|
-
|
23
|
-
auto &other = (const StructStatistics &)other_p;
|
24
|
-
D_ASSERT(other.child_stats.size() == child_stats.size());
|
25
|
-
for (idx_t i = 0; i < child_stats.size(); i++) {
|
26
|
-
if (child_stats[i] && other.child_stats[i]) {
|
27
|
-
child_stats[i]->Merge(*other.child_stats[i]);
|
28
|
-
} else {
|
29
|
-
child_stats[i].reset();
|
30
|
-
}
|
31
|
-
}
|
32
|
-
}
|
33
|
-
|
34
|
-
// LCOV_EXCL_START
|
35
|
-
FilterPropagateResult StructStatistics::CheckZonemap(ExpressionType comparison_type, const Value &constant) const {
|
36
|
-
throw InternalException("Struct zonemaps are not supported yet");
|
37
|
-
}
|
38
|
-
// LCOV_EXCL_STOP
|
39
|
-
|
40
|
-
unique_ptr<BaseStatistics> StructStatistics::Copy() const {
|
41
|
-
auto result = make_unique<StructStatistics>(type);
|
42
|
-
result->CopyBase(*this);
|
43
|
-
|
44
|
-
for (idx_t i = 0; i < child_stats.size(); i++) {
|
45
|
-
result->child_stats[i] = child_stats[i] ? child_stats[i]->Copy() : nullptr;
|
46
|
-
}
|
47
|
-
return std::move(result);
|
48
|
-
}
|
49
|
-
|
50
|
-
void StructStatistics::Serialize(FieldWriter &writer) const {
|
51
|
-
writer.WriteField<uint32_t>(child_stats.size());
|
52
|
-
auto &serializer = writer.GetSerializer();
|
53
|
-
for (idx_t i = 0; i < child_stats.size(); i++) {
|
54
|
-
serializer.Write<bool>(child_stats[i] ? true : false);
|
55
|
-
if (child_stats[i]) {
|
56
|
-
child_stats[i]->Serialize(serializer);
|
57
|
-
}
|
58
|
-
}
|
59
|
-
}
|
60
|
-
|
61
|
-
unique_ptr<BaseStatistics> StructStatistics::Deserialize(FieldReader &reader, LogicalType type) {
|
62
|
-
D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
|
63
|
-
auto result = make_unique<StructStatistics>(std::move(type));
|
64
|
-
auto &child_types = StructType::GetChildTypes(result->type);
|
65
|
-
|
66
|
-
auto child_type_count = reader.ReadRequired<uint32_t>();
|
67
|
-
if (child_types.size() != child_type_count) {
|
68
|
-
throw InternalException("Struct stats deserialization failure: child count does not match type count!");
|
69
|
-
}
|
70
|
-
auto &source = reader.GetSource();
|
71
|
-
for (idx_t i = 0; i < child_types.size(); i++) {
|
72
|
-
auto has_child = source.Read<bool>();
|
73
|
-
if (has_child) {
|
74
|
-
result->child_stats[i] = BaseStatistics::Deserialize(source, child_types[i].second);
|
75
|
-
} else {
|
76
|
-
result->child_stats[i].reset();
|
77
|
-
}
|
78
|
-
}
|
79
|
-
return std::move(result);
|
80
|
-
}
|
81
|
-
|
82
|
-
string StructStatistics::ToString() const {
|
83
|
-
string result;
|
84
|
-
result += " {";
|
85
|
-
auto &child_types = StructType::GetChildTypes(type);
|
86
|
-
for (idx_t i = 0; i < child_types.size(); i++) {
|
87
|
-
if (i > 0) {
|
88
|
-
result += ", ";
|
89
|
-
}
|
90
|
-
result += child_types[i].first + ": " + (child_stats[i] ? child_stats[i]->ToString() : "No Stats");
|
91
|
-
}
|
92
|
-
result += "}";
|
93
|
-
result += BaseStatistics::ToString();
|
94
|
-
return result;
|
95
|
-
}
|
96
|
-
|
97
|
-
void StructStatistics::Verify(Vector &vector, const SelectionVector &sel, idx_t count) const {
|
98
|
-
BaseStatistics::Verify(vector, sel, count);
|
99
|
-
|
100
|
-
auto &child_entries = StructVector::GetEntries(vector);
|
101
|
-
for (idx_t i = 0; i < child_entries.size(); i++) {
|
102
|
-
if (child_stats[i]) {
|
103
|
-
child_stats[i]->Verify(*child_entries[i], sel, count);
|
104
|
-
}
|
105
|
-
}
|
106
|
-
}
|
107
|
-
|
108
|
-
} // namespace duckdb
|