duckdb 0.7.2-dev16.0 → 0.7.2-dev314.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-extension.cpp +2 -0
- package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
- package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
- package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
- package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/parquet-extension.cpp +11 -2
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
- package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
- package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
- package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/common/types/bit.cpp +95 -58
- package/src/duckdb/src/common/types/value.cpp +149 -53
- package/src/duckdb/src/common/types/vector.cpp +13 -10
- package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
- package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
- package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +254 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -16
- package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +16 -5
- package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
- package/src/duckdb/src/function/scalar/bit/bitstring.cpp +99 -0
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
- package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
- package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
- package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
- package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
- package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
- package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
- package/src/duckdb/src/function/scalar/operators/multiply.cpp +0 -6
- package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
- package/src/duckdb/src/function/table/read_csv.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/function/table_function.cpp +19 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -8
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
- package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +12 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +76 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +28 -0
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb.h +49 -1
- package/src/duckdb/src/include/duckdb.hpp +0 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
- package/src/duckdb/src/main/capi/result-c.cpp +27 -1
- package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
- package/src/duckdb/src/main/client_context.cpp +8 -1
- package/src/duckdb/src/main/config.cpp +66 -1
- package/src/duckdb/src/main/database.cpp +10 -2
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +98 -67
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +16 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
- package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
- package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
- package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +8 -0
- package/src/duckdb/src/parser/query_node.cpp +1 -1
- package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
- package/src/duckdb/src/parser/tableref/pivotref.cpp +296 -0
- package/src/duckdb/src/parser/tableref.cpp +3 -0
- package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +150 -0
- package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +105 -0
- package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
- package/src/duckdb/src/parser/transformer.cpp +15 -3
- package/src/duckdb/src/planner/bind_context.cpp +16 -0
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -3
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +365 -0
- package/src/duckdb/src/planner/binder.cpp +7 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
- package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
- package/src/duckdb/src/storage/compression/bitpacking.cpp +28 -24
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +43 -45
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +19 -15
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +4 -6
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
- package/src/duckdb/src/storage/table/column_data.cpp +16 -11
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
- package/src/duckdb/src/storage/table/list_column_data.cpp +39 -58
- package/src/duckdb/src/storage/table/row_group.cpp +24 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +3 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +34 -1
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1020 -530
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +7 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +23560 -22737
- package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
- package/src/duckdb/ub_src_main_capi.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
- package/src/duckdb/ub_src_parser_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
- package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
- package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -0,0 +1,273 @@
|
|
1
|
+
#include "duckdb/storage/statistics/string_stats.hpp"
|
2
|
+
#include "duckdb/storage/statistics/base_statistics.hpp"
|
3
|
+
#include "duckdb/common/field_writer.hpp"
|
4
|
+
#include "utf8proc_wrapper.hpp"
|
5
|
+
#include "duckdb/common/string_util.hpp"
|
6
|
+
#include "duckdb/common/types/vector.hpp"
|
7
|
+
#include "duckdb/main/error_manager.hpp"
|
8
|
+
|
9
|
+
namespace duckdb {
|
10
|
+
|
11
|
+
BaseStatistics StringStats::CreateUnknown(LogicalType type) {
|
12
|
+
BaseStatistics result(std::move(type));
|
13
|
+
result.InitializeUnknown();
|
14
|
+
auto &string_data = StringStats::GetDataUnsafe(result);
|
15
|
+
for (idx_t i = 0; i < StringStatsData::MAX_STRING_MINMAX_SIZE; i++) {
|
16
|
+
string_data.min[i] = 0;
|
17
|
+
string_data.max[i] = 0xFF;
|
18
|
+
}
|
19
|
+
string_data.max_string_length = 0;
|
20
|
+
string_data.has_max_string_length = false;
|
21
|
+
string_data.has_unicode = true;
|
22
|
+
return result;
|
23
|
+
}
|
24
|
+
|
25
|
+
BaseStatistics StringStats::CreateEmpty(LogicalType type) {
|
26
|
+
BaseStatistics result(std::move(type));
|
27
|
+
result.InitializeEmpty();
|
28
|
+
auto &string_data = StringStats::GetDataUnsafe(result);
|
29
|
+
for (idx_t i = 0; i < StringStatsData::MAX_STRING_MINMAX_SIZE; i++) {
|
30
|
+
string_data.min[i] = 0xFF;
|
31
|
+
string_data.max[i] = 0;
|
32
|
+
}
|
33
|
+
string_data.max_string_length = 0;
|
34
|
+
string_data.has_max_string_length = true;
|
35
|
+
string_data.has_unicode = false;
|
36
|
+
return result;
|
37
|
+
}
|
38
|
+
|
39
|
+
StringStatsData &StringStats::GetDataUnsafe(BaseStatistics &stats) {
|
40
|
+
D_ASSERT(stats.GetStatsType() == StatisticsType::STRING_STATS);
|
41
|
+
return stats.stats_union.string_data;
|
42
|
+
}
|
43
|
+
|
44
|
+
const StringStatsData &StringStats::GetDataUnsafe(const BaseStatistics &stats) {
|
45
|
+
D_ASSERT(stats.GetStatsType() == StatisticsType::STRING_STATS);
|
46
|
+
return stats.stats_union.string_data;
|
47
|
+
}
|
48
|
+
|
49
|
+
bool StringStats::HasMaxStringLength(const BaseStatistics &stats) {
|
50
|
+
if (stats.GetType().id() == LogicalTypeId::SQLNULL) {
|
51
|
+
return false;
|
52
|
+
}
|
53
|
+
return StringStats::GetDataUnsafe(stats).has_max_string_length;
|
54
|
+
}
|
55
|
+
|
56
|
+
uint32_t StringStats::MaxStringLength(const BaseStatistics &stats) {
|
57
|
+
if (!HasMaxStringLength(stats)) {
|
58
|
+
throw InternalException("MaxStringLength called on statistics that does not have a max string length");
|
59
|
+
}
|
60
|
+
return StringStats::GetDataUnsafe(stats).max_string_length;
|
61
|
+
}
|
62
|
+
|
63
|
+
bool StringStats::CanContainUnicode(const BaseStatistics &stats) {
|
64
|
+
if (stats.GetType().id() == LogicalTypeId::SQLNULL) {
|
65
|
+
return true;
|
66
|
+
}
|
67
|
+
return StringStats::GetDataUnsafe(stats).has_unicode;
|
68
|
+
}
|
69
|
+
|
70
|
+
void StringStats::ResetMaxStringLength(BaseStatistics &stats) {
|
71
|
+
StringStats::GetDataUnsafe(stats).has_max_string_length = false;
|
72
|
+
}
|
73
|
+
|
74
|
+
void StringStats::SetContainsUnicode(BaseStatistics &stats) {
|
75
|
+
StringStats::GetDataUnsafe(stats).has_unicode = true;
|
76
|
+
}
|
77
|
+
|
78
|
+
void StringStats::Serialize(const BaseStatistics &stats, FieldWriter &writer) {
|
79
|
+
auto &string_data = StringStats::GetDataUnsafe(stats);
|
80
|
+
writer.WriteBlob(string_data.min, StringStatsData::MAX_STRING_MINMAX_SIZE);
|
81
|
+
writer.WriteBlob(string_data.max, StringStatsData::MAX_STRING_MINMAX_SIZE);
|
82
|
+
writer.WriteField<bool>(string_data.has_unicode);
|
83
|
+
writer.WriteField<bool>(string_data.has_max_string_length);
|
84
|
+
writer.WriteField<uint32_t>(string_data.max_string_length);
|
85
|
+
}
|
86
|
+
|
87
|
+
BaseStatistics StringStats::Deserialize(FieldReader &reader, LogicalType type) {
|
88
|
+
BaseStatistics result(std::move(type));
|
89
|
+
auto &string_data = StringStats::GetDataUnsafe(result);
|
90
|
+
reader.ReadBlob(string_data.min, StringStatsData::MAX_STRING_MINMAX_SIZE);
|
91
|
+
reader.ReadBlob(string_data.max, StringStatsData::MAX_STRING_MINMAX_SIZE);
|
92
|
+
string_data.has_unicode = reader.ReadRequired<bool>();
|
93
|
+
string_data.has_max_string_length = reader.ReadRequired<bool>();
|
94
|
+
string_data.max_string_length = reader.ReadRequired<uint32_t>();
|
95
|
+
return result;
|
96
|
+
}
|
97
|
+
|
98
|
+
static int StringValueComparison(const_data_ptr_t data, idx_t len, const_data_ptr_t comparison) {
|
99
|
+
D_ASSERT(len <= StringStatsData::MAX_STRING_MINMAX_SIZE);
|
100
|
+
for (idx_t i = 0; i < len; i++) {
|
101
|
+
if (data[i] < comparison[i]) {
|
102
|
+
return -1;
|
103
|
+
} else if (data[i] > comparison[i]) {
|
104
|
+
return 1;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
return 0;
|
108
|
+
}
|
109
|
+
|
110
|
+
static void ConstructValue(const_data_ptr_t data, idx_t size, data_t target[]) {
|
111
|
+
idx_t value_size = size > StringStatsData::MAX_STRING_MINMAX_SIZE ? StringStatsData::MAX_STRING_MINMAX_SIZE : size;
|
112
|
+
memcpy(target, data, value_size);
|
113
|
+
for (idx_t i = value_size; i < StringStatsData::MAX_STRING_MINMAX_SIZE; i++) {
|
114
|
+
target[i] = '\0';
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
void StringStats::Update(BaseStatistics &stats, const string_t &value) {
|
119
|
+
auto data = (const_data_ptr_t)value.GetDataUnsafe();
|
120
|
+
auto size = value.GetSize();
|
121
|
+
|
122
|
+
//! we can only fit 8 bytes, so we might need to trim our string
|
123
|
+
// construct the value
|
124
|
+
data_t target[StringStatsData::MAX_STRING_MINMAX_SIZE];
|
125
|
+
ConstructValue(data, size, target);
|
126
|
+
|
127
|
+
// update the min and max
|
128
|
+
auto &string_data = StringStats::GetDataUnsafe(stats);
|
129
|
+
if (StringValueComparison(target, StringStatsData::MAX_STRING_MINMAX_SIZE, string_data.min) < 0) {
|
130
|
+
memcpy(string_data.min, target, StringStatsData::MAX_STRING_MINMAX_SIZE);
|
131
|
+
}
|
132
|
+
if (StringValueComparison(target, StringStatsData::MAX_STRING_MINMAX_SIZE, string_data.max) > 0) {
|
133
|
+
memcpy(string_data.max, target, StringStatsData::MAX_STRING_MINMAX_SIZE);
|
134
|
+
}
|
135
|
+
if (size > string_data.max_string_length) {
|
136
|
+
string_data.max_string_length = size;
|
137
|
+
}
|
138
|
+
if (stats.GetType().id() == LogicalTypeId::VARCHAR && !string_data.has_unicode) {
|
139
|
+
auto unicode = Utf8Proc::Analyze((const char *)data, size);
|
140
|
+
if (unicode == UnicodeType::UNICODE) {
|
141
|
+
string_data.has_unicode = true;
|
142
|
+
} else if (unicode == UnicodeType::INVALID) {
|
143
|
+
throw InternalException(
|
144
|
+
ErrorManager::InvalidUnicodeError(string((char *)data, size), "segment statistics update"));
|
145
|
+
}
|
146
|
+
}
|
147
|
+
}
|
148
|
+
|
149
|
+
void StringStats::Merge(BaseStatistics &stats, const BaseStatistics &other) {
|
150
|
+
if (other.GetType().id() == LogicalTypeId::VALIDITY) {
|
151
|
+
return;
|
152
|
+
}
|
153
|
+
auto &string_data = StringStats::GetDataUnsafe(stats);
|
154
|
+
auto &other_data = StringStats::GetDataUnsafe(other);
|
155
|
+
if (StringValueComparison(other_data.min, StringStatsData::MAX_STRING_MINMAX_SIZE, string_data.min) < 0) {
|
156
|
+
memcpy(string_data.min, other_data.min, StringStatsData::MAX_STRING_MINMAX_SIZE);
|
157
|
+
}
|
158
|
+
if (StringValueComparison(other_data.max, StringStatsData::MAX_STRING_MINMAX_SIZE, string_data.max) > 0) {
|
159
|
+
memcpy(string_data.max, other_data.max, StringStatsData::MAX_STRING_MINMAX_SIZE);
|
160
|
+
}
|
161
|
+
string_data.has_unicode = string_data.has_unicode || other_data.has_unicode;
|
162
|
+
string_data.has_max_string_length = string_data.has_max_string_length && other_data.has_max_string_length;
|
163
|
+
string_data.max_string_length = MaxValue<uint32_t>(string_data.max_string_length, other_data.max_string_length);
|
164
|
+
}
|
165
|
+
|
166
|
+
FilterPropagateResult StringStats::CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
|
167
|
+
const string &constant) {
|
168
|
+
auto &string_data = StringStats::GetDataUnsafe(stats);
|
169
|
+
auto data = (const_data_ptr_t)constant.c_str();
|
170
|
+
auto size = constant.size();
|
171
|
+
|
172
|
+
idx_t value_size = size > StringStatsData::MAX_STRING_MINMAX_SIZE ? StringStatsData::MAX_STRING_MINMAX_SIZE : size;
|
173
|
+
int min_comp = StringValueComparison(data, value_size, string_data.min);
|
174
|
+
int max_comp = StringValueComparison(data, value_size, string_data.max);
|
175
|
+
switch (comparison_type) {
|
176
|
+
case ExpressionType::COMPARE_EQUAL:
|
177
|
+
if (min_comp >= 0 && max_comp <= 0) {
|
178
|
+
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
179
|
+
} else {
|
180
|
+
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
181
|
+
}
|
182
|
+
case ExpressionType::COMPARE_NOTEQUAL:
|
183
|
+
if (min_comp < 0 || max_comp > 0) {
|
184
|
+
return FilterPropagateResult::FILTER_ALWAYS_TRUE;
|
185
|
+
}
|
186
|
+
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
187
|
+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
188
|
+
case ExpressionType::COMPARE_GREATERTHAN:
|
189
|
+
if (max_comp <= 0) {
|
190
|
+
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
191
|
+
} else {
|
192
|
+
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
193
|
+
}
|
194
|
+
case ExpressionType::COMPARE_LESSTHAN:
|
195
|
+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
196
|
+
if (min_comp >= 0) {
|
197
|
+
return FilterPropagateResult::NO_PRUNING_POSSIBLE;
|
198
|
+
} else {
|
199
|
+
return FilterPropagateResult::FILTER_ALWAYS_FALSE;
|
200
|
+
}
|
201
|
+
default:
|
202
|
+
throw InternalException("Expression type not implemented for string statistics zone map");
|
203
|
+
}
|
204
|
+
}
|
205
|
+
|
206
|
+
static idx_t GetValidMinMaxSubstring(const_data_ptr_t data) {
|
207
|
+
for (idx_t i = 0; i < StringStatsData::MAX_STRING_MINMAX_SIZE; i++) {
|
208
|
+
if (data[i] == '\0') {
|
209
|
+
return i;
|
210
|
+
}
|
211
|
+
if ((data[i] & 0x80) != 0) {
|
212
|
+
return i;
|
213
|
+
}
|
214
|
+
}
|
215
|
+
return StringStatsData::MAX_STRING_MINMAX_SIZE;
|
216
|
+
}
|
217
|
+
|
218
|
+
string StringStats::ToString(const BaseStatistics &stats) {
|
219
|
+
auto &string_data = StringStats::GetDataUnsafe(stats);
|
220
|
+
idx_t min_len = GetValidMinMaxSubstring(string_data.min);
|
221
|
+
idx_t max_len = GetValidMinMaxSubstring(string_data.max);
|
222
|
+
return StringUtil::Format(
|
223
|
+
"[Min: %s, Max: %s, Has Unicode: %s, Max String Length: %s]", string((const char *)string_data.min, min_len),
|
224
|
+
string((const char *)string_data.max, max_len), string_data.has_unicode ? "true" : "false",
|
225
|
+
string_data.has_max_string_length ? to_string(string_data.max_string_length) : "?");
|
226
|
+
}
|
227
|
+
|
228
|
+
void StringStats::Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count) {
|
229
|
+
auto &string_data = StringStats::GetDataUnsafe(stats);
|
230
|
+
|
231
|
+
UnifiedVectorFormat vdata;
|
232
|
+
vector.ToUnifiedFormat(count, vdata);
|
233
|
+
auto data = (string_t *)vdata.data;
|
234
|
+
for (idx_t i = 0; i < count; i++) {
|
235
|
+
auto idx = sel.get_index(i);
|
236
|
+
auto index = vdata.sel->get_index(idx);
|
237
|
+
if (!vdata.validity.RowIsValid(index)) {
|
238
|
+
continue;
|
239
|
+
}
|
240
|
+
auto value = data[index];
|
241
|
+
auto data = value.GetDataUnsafe();
|
242
|
+
auto len = value.GetSize();
|
243
|
+
// LCOV_EXCL_START
|
244
|
+
if (string_data.has_max_string_length && len > string_data.max_string_length) {
|
245
|
+
throw InternalException(
|
246
|
+
"Statistics mismatch: string value exceeds maximum string length.\nStatistics: %s\nVector: %s",
|
247
|
+
stats.ToString(), vector.ToString(count));
|
248
|
+
}
|
249
|
+
if (stats.GetType().id() == LogicalTypeId::VARCHAR && !string_data.has_unicode) {
|
250
|
+
auto unicode = Utf8Proc::Analyze(data, len);
|
251
|
+
if (unicode == UnicodeType::UNICODE) {
|
252
|
+
throw InternalException("Statistics mismatch: string value contains unicode, but statistics says it "
|
253
|
+
"shouldn't.\nStatistics: %s\nVector: %s",
|
254
|
+
stats.ToString(), vector.ToString(count));
|
255
|
+
} else if (unicode == UnicodeType::INVALID) {
|
256
|
+
throw InternalException("Invalid unicode detected in vector: %s", vector.ToString(count));
|
257
|
+
}
|
258
|
+
}
|
259
|
+
if (StringValueComparison((const_data_ptr_t)data, MinValue<idx_t>(len, StringStatsData::MAX_STRING_MINMAX_SIZE),
|
260
|
+
string_data.min) < 0) {
|
261
|
+
throw InternalException("Statistics mismatch: value is smaller than min.\nStatistics: %s\nVector: %s",
|
262
|
+
stats.ToString(), vector.ToString(count));
|
263
|
+
}
|
264
|
+
if (StringValueComparison((const_data_ptr_t)data, MinValue<idx_t>(len, StringStatsData::MAX_STRING_MINMAX_SIZE),
|
265
|
+
string_data.max) > 0) {
|
266
|
+
throw InternalException("Statistics mismatch: value is bigger than max.\nStatistics: %s\nVector: %s",
|
267
|
+
stats.ToString(), vector.ToString(count));
|
268
|
+
}
|
269
|
+
// LCOV_EXCL_STOP
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
} // namespace duckdb
|
@@ -0,0 +1,131 @@
|
|
1
|
+
#include "duckdb/storage/statistics/struct_stats.hpp"
|
2
|
+
#include "duckdb/storage/statistics/base_statistics.hpp"
|
3
|
+
#include "duckdb/common/field_writer.hpp"
|
4
|
+
#include "duckdb/common/types/vector.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
void StructStats::Construct(BaseStatistics &stats) {
|
9
|
+
auto &child_types = StructType::GetChildTypes(stats.GetType());
|
10
|
+
stats.child_stats = unique_ptr<BaseStatistics[]>(new BaseStatistics[child_types.size()]);
|
11
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
12
|
+
BaseStatistics::Construct(stats.child_stats[i], child_types[i].second);
|
13
|
+
}
|
14
|
+
}
|
15
|
+
|
16
|
+
BaseStatistics StructStats::CreateUnknown(LogicalType type) {
|
17
|
+
auto &child_types = StructType::GetChildTypes(type);
|
18
|
+
BaseStatistics result(std::move(type));
|
19
|
+
result.InitializeUnknown();
|
20
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
21
|
+
result.child_stats[i].Copy(BaseStatistics::CreateUnknown(child_types[i].second));
|
22
|
+
}
|
23
|
+
return result;
|
24
|
+
}
|
25
|
+
|
26
|
+
BaseStatistics StructStats::CreateEmpty(LogicalType type) {
|
27
|
+
auto &child_types = StructType::GetChildTypes(type);
|
28
|
+
BaseStatistics result(std::move(type));
|
29
|
+
result.InitializeEmpty();
|
30
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
31
|
+
result.child_stats[i].Copy(BaseStatistics::CreateEmpty(child_types[i].second));
|
32
|
+
}
|
33
|
+
return result;
|
34
|
+
}
|
35
|
+
|
36
|
+
const BaseStatistics *StructStats::GetChildStats(const BaseStatistics &stats) {
|
37
|
+
D_ASSERT(stats.GetStatsType() == StatisticsType::STRUCT_STATS);
|
38
|
+
return stats.child_stats.get();
|
39
|
+
}
|
40
|
+
|
41
|
+
const BaseStatistics &StructStats::GetChildStats(const BaseStatistics &stats, idx_t i) {
|
42
|
+
D_ASSERT(stats.GetStatsType() == StatisticsType::STRUCT_STATS);
|
43
|
+
if (i >= StructType::GetChildCount(stats.GetType())) {
|
44
|
+
throw InternalException("Calling StructStats::GetChildStats but there are no stats for this index");
|
45
|
+
}
|
46
|
+
return stats.child_stats[i];
|
47
|
+
}
|
48
|
+
|
49
|
+
BaseStatistics &StructStats::GetChildStats(BaseStatistics &stats, idx_t i) {
|
50
|
+
D_ASSERT(stats.GetStatsType() == StatisticsType::STRUCT_STATS);
|
51
|
+
if (i >= StructType::GetChildCount(stats.GetType())) {
|
52
|
+
throw InternalException("Calling StructStats::GetChildStats but there are no stats for this index");
|
53
|
+
}
|
54
|
+
return stats.child_stats[i];
|
55
|
+
}
|
56
|
+
|
57
|
+
void StructStats::SetChildStats(BaseStatistics &stats, idx_t i, const BaseStatistics &new_stats) {
|
58
|
+
D_ASSERT(stats.GetStatsType() == StatisticsType::STRUCT_STATS);
|
59
|
+
D_ASSERT(i < StructType::GetChildCount(stats.GetType()));
|
60
|
+
stats.child_stats[i].Copy(new_stats);
|
61
|
+
}
|
62
|
+
|
63
|
+
void StructStats::SetChildStats(BaseStatistics &stats, idx_t i, unique_ptr<BaseStatistics> new_stats) {
|
64
|
+
D_ASSERT(stats.GetStatsType() == StatisticsType::STRUCT_STATS);
|
65
|
+
if (!new_stats) {
|
66
|
+
StructStats::SetChildStats(stats, i,
|
67
|
+
BaseStatistics::CreateUnknown(StructType::GetChildType(stats.GetType(), i)));
|
68
|
+
} else {
|
69
|
+
StructStats::SetChildStats(stats, i, *new_stats);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
void StructStats::Copy(BaseStatistics &stats, const BaseStatistics &other) {
|
74
|
+
auto count = StructType::GetChildCount(stats.GetType());
|
75
|
+
for (idx_t i = 0; i < count; i++) {
|
76
|
+
stats.child_stats[i].Copy(other.child_stats[i]);
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
void StructStats::Merge(BaseStatistics &stats, const BaseStatistics &other) {
|
81
|
+
if (other.GetType().id() == LogicalTypeId::VALIDITY) {
|
82
|
+
return;
|
83
|
+
}
|
84
|
+
D_ASSERT(stats.GetType() == other.GetType());
|
85
|
+
auto child_count = StructType::GetChildCount(stats.GetType());
|
86
|
+
for (idx_t i = 0; i < child_count; i++) {
|
87
|
+
stats.child_stats[i].Merge(other.child_stats[i]);
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
void StructStats::Serialize(const BaseStatistics &stats, FieldWriter &writer) {
|
92
|
+
auto child_stats = StructStats::GetChildStats(stats);
|
93
|
+
auto child_count = StructType::GetChildCount(stats.GetType());
|
94
|
+
for (idx_t i = 0; i < child_count; i++) {
|
95
|
+
writer.WriteSerializable(child_stats[i]);
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
BaseStatistics StructStats::Deserialize(FieldReader &reader, LogicalType type) {
|
100
|
+
D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
|
101
|
+
auto &child_types = StructType::GetChildTypes(type);
|
102
|
+
BaseStatistics result(std::move(type));
|
103
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
104
|
+
result.child_stats[i].Copy(
|
105
|
+
reader.ReadRequiredSerializable<BaseStatistics, BaseStatistics>(child_types[i].second));
|
106
|
+
}
|
107
|
+
return result;
|
108
|
+
}
|
109
|
+
|
110
|
+
string StructStats::ToString(const BaseStatistics &stats) {
|
111
|
+
string result;
|
112
|
+
result += " {";
|
113
|
+
auto &child_types = StructType::GetChildTypes(stats.GetType());
|
114
|
+
for (idx_t i = 0; i < child_types.size(); i++) {
|
115
|
+
if (i > 0) {
|
116
|
+
result += ", ";
|
117
|
+
}
|
118
|
+
result += child_types[i].first + ": " + stats.child_stats[i].ToString();
|
119
|
+
}
|
120
|
+
result += "}";
|
121
|
+
return result;
|
122
|
+
}
|
123
|
+
|
124
|
+
void StructStats::Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count) {
|
125
|
+
auto &child_entries = StructVector::GetEntries(vector);
|
126
|
+
for (idx_t i = 0; i < child_entries.size(); i++) {
|
127
|
+
stats.child_stats[i].Verify(*child_entries[i], sel, count);
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
} // namespace duckdb
|
@@ -96,7 +96,7 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
|
|
96
96
|
} // LCOV_EXCL_STOP
|
97
97
|
|
98
98
|
// merge the segment stats into the global stats
|
99
|
-
global_stats->Merge(
|
99
|
+
global_stats->Merge(segment->stats.statistics);
|
100
100
|
|
101
101
|
// get the buffer of the segment and pin it
|
102
102
|
auto &db = column_data.GetDatabase();
|
@@ -104,7 +104,7 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
|
|
104
104
|
block_id_t block_id = INVALID_BLOCK;
|
105
105
|
uint32_t offset_in_block = 0;
|
106
106
|
|
107
|
-
if (!segment->stats.statistics
|
107
|
+
if (!segment->stats.statistics.IsConstant()) {
|
108
108
|
// non-constant block
|
109
109
|
PartialBlockAllocation allocation = partial_block_manager.GetBlockAllocation(segment_size);
|
110
110
|
block_id = allocation.state.block_id;
|
@@ -145,7 +145,7 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
|
|
145
145
|
}
|
146
146
|
|
147
147
|
// construct the data pointer
|
148
|
-
DataPointer data_pointer;
|
148
|
+
DataPointer data_pointer(segment->stats.statistics.Copy());
|
149
149
|
data_pointer.block_pointer.block_id = block_id;
|
150
150
|
data_pointer.block_pointer.offset = offset_in_block;
|
151
151
|
data_pointer.row_start = row_group.start;
|
@@ -155,7 +155,6 @@ void ColumnCheckpointState::FlushSegment(unique_ptr<ColumnSegment> segment, idx_
|
|
155
155
|
}
|
156
156
|
data_pointer.tuple_count = tuple_count;
|
157
157
|
data_pointer.compression_type = segment->function->type;
|
158
|
-
data_pointer.statistics = segment->stats.statistics->Copy();
|
159
158
|
|
160
159
|
// append the segment to the new segment tree
|
161
160
|
new_tree.AppendSegment(std::move(segment));
|
@@ -254,7 +254,7 @@ void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, Uni
|
|
254
254
|
while (true) {
|
255
255
|
// append the data from the vector
|
256
256
|
idx_t copied_elements = state.current->Append(state, vdata, offset, count);
|
257
|
-
stats.Merge(
|
257
|
+
stats.Merge(state.current->stats.statistics);
|
258
258
|
if (copied_elements == count) {
|
259
259
|
// finished copying everything
|
260
260
|
break;
|
@@ -389,7 +389,7 @@ unique_ptr<ColumnCheckpointState> ColumnData::Checkpoint(RowGroup &row_group,
|
|
389
389
|
// scan the segments of the column data
|
390
390
|
// set up the checkpoint state
|
391
391
|
auto checkpoint_state = CreateCheckpointState(row_group, partial_block_manager);
|
392
|
-
checkpoint_state->global_stats = BaseStatistics::CreateEmpty(type
|
392
|
+
checkpoint_state->global_stats = BaseStatistics::CreateEmpty(type).ToUnique();
|
393
393
|
|
394
394
|
auto l = data.Lock();
|
395
395
|
auto nodes = data.MoveSegments(l);
|
@@ -414,13 +414,19 @@ void ColumnData::DeserializeColumn(Deserializer &source) {
|
|
414
414
|
idx_t data_pointer_count = source.Read<idx_t>();
|
415
415
|
for (idx_t data_ptr = 0; data_ptr < data_pointer_count; data_ptr++) {
|
416
416
|
// read the data pointer
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
417
|
+
auto row_start = source.Read<idx_t>();
|
418
|
+
auto tuple_count = source.Read<idx_t>();
|
419
|
+
auto block_pointer_block_id = source.Read<block_id_t>();
|
420
|
+
auto block_pointer_offset = source.Read<uint32_t>();
|
421
|
+
auto compression_type = source.Read<CompressionType>();
|
422
|
+
auto stats = BaseStatistics::Deserialize(source, type);
|
423
|
+
|
424
|
+
DataPointer data_pointer(std::move(stats));
|
425
|
+
data_pointer.row_start = row_start;
|
426
|
+
data_pointer.tuple_count = tuple_count;
|
427
|
+
data_pointer.block_pointer.block_id = block_pointer_block_id;
|
428
|
+
data_pointer.block_pointer.offset = block_pointer_offset;
|
429
|
+
data_pointer.compression_type = compression_type;
|
424
430
|
|
425
431
|
// create a persistent segment
|
426
432
|
auto segment = ColumnSegment::CreatePersistentSegment(
|
@@ -466,8 +472,7 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T
|
|
466
472
|
column_info.segment_start = segment->start;
|
467
473
|
column_info.segment_count = segment->count;
|
468
474
|
column_info.compression_type = CompressionTypeToString(segment->function->type);
|
469
|
-
column_info.segment_stats =
|
470
|
-
segment->stats.statistics ? segment->stats.statistics->ToString() : string("No Stats");
|
475
|
+
column_info.segment_stats = segment->stats.statistics.ToString();
|
471
476
|
column_info.has_updates = updates ? true : false;
|
472
477
|
// persistent
|
473
478
|
// block_id
|
@@ -218,16 +218,15 @@ void ColumnDataCheckpointer::WritePersistentSegments() {
|
|
218
218
|
D_ASSERT(segment->segment_type == ColumnSegmentType::PERSISTENT);
|
219
219
|
|
220
220
|
// set up the data pointer directly using the data from the persistent segment
|
221
|
-
DataPointer pointer;
|
221
|
+
DataPointer pointer(segment->stats.statistics.Copy());
|
222
222
|
pointer.block_pointer.block_id = segment->GetBlockId();
|
223
223
|
pointer.block_pointer.offset = segment->GetBlockOffset();
|
224
224
|
pointer.row_start = segment->start;
|
225
225
|
pointer.tuple_count = segment->count;
|
226
226
|
pointer.compression_type = segment->function->type;
|
227
|
-
pointer.statistics = segment->stats.statistics->Copy();
|
228
227
|
|
229
228
|
// merge the persistent stats into the global column stats
|
230
|
-
state.global_stats->Merge(
|
229
|
+
state.global_stats->Merge(segment->stats.statistics);
|
231
230
|
|
232
231
|
// directly append the current segment to the new tree
|
233
232
|
state.new_tree.AppendSegment(std::move(nodes[segment_idx].node));
|
@@ -19,7 +19,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc
|
|
19
19
|
block_id_t block_id, idx_t offset,
|
20
20
|
const LogicalType &type, idx_t start, idx_t count,
|
21
21
|
CompressionType compression_type,
|
22
|
-
|
22
|
+
BaseStatistics statistics) {
|
23
23
|
auto &config = DBConfig::GetConfig(db);
|
24
24
|
CompressionFunction *function;
|
25
25
|
shared_ptr<BlockHandle> block;
|
@@ -48,7 +48,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance
|
|
48
48
|
buffer_manager.Allocate(segment_size, false, &block);
|
49
49
|
}
|
50
50
|
return make_unique<ColumnSegment>(db, std::move(block), type, ColumnSegmentType::TRANSIENT, start, 0, function,
|
51
|
-
|
51
|
+
BaseStatistics::CreateEmpty(type), INVALID_BLOCK, 0, segment_size);
|
52
52
|
}
|
53
53
|
|
54
54
|
unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx_t start) {
|
@@ -57,10 +57,9 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx
|
|
57
57
|
|
58
58
|
ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
|
59
59
|
ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction *function_p,
|
60
|
-
|
61
|
-
idx_t segment_size_p)
|
60
|
+
BaseStatistics statistics, block_id_t block_id_p, idx_t offset_p, idx_t segment_size_p)
|
62
61
|
: SegmentBase(start, count), db(db), type(std::move(type_p)), type_size(GetTypeIdSize(type.InternalType())),
|
63
|
-
segment_type(segment_type), function(function_p), stats(
|
62
|
+
segment_type(segment_type), function(function_p), stats(std::move(statistics)), block(std::move(block)),
|
64
63
|
block_id(block_id_p), offset(offset_p), segment_size(segment_size_p) {
|
65
64
|
D_ASSERT(function);
|
66
65
|
if (function->init_segment) {
|
@@ -181,13 +180,12 @@ void ColumnSegment::ConvertToPersistent(BlockManager *block_manager, block_id_t
|
|
181
180
|
block_id = block_id_p;
|
182
181
|
offset = 0;
|
183
182
|
|
184
|
-
D_ASSERT(stats.statistics);
|
185
183
|
if (block_id == INVALID_BLOCK) {
|
186
184
|
// constant block: reset the block buffer
|
187
|
-
D_ASSERT(stats.statistics
|
185
|
+
D_ASSERT(stats.statistics.IsConstant());
|
188
186
|
block.reset();
|
189
187
|
} else {
|
190
|
-
D_ASSERT(!stats.statistics
|
188
|
+
D_ASSERT(!stats.statistics.IsConstant());
|
191
189
|
// non-constant block: write the block to disk
|
192
190
|
// the data for the block already exists in-memory of our block
|
193
191
|
// instead of copying the data we alter some metadata so the buffer points to an on-disk block
|