duckdb 0.7.2-dev16.0 → 0.7.2-dev314.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-extension.cpp +2 -0
- package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
- package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
- package/src/duckdb/extension/parquet/column_reader.cpp +5 -6
- package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/parquet-extension.cpp +11 -2
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
- package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
- package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
- package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/common/types/bit.cpp +95 -58
- package/src/duckdb/src/common/types/value.cpp +149 -53
- package/src/duckdb/src/common/types/vector.cpp +13 -10
- package/src/duckdb/src/execution/column_binding_resolver.cpp +6 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +2 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +32 -6
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +15 -15
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -12
- package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
- package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +254 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -4
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +11 -16
- package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +16 -5
- package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
- package/src/duckdb/src/function/scalar/bit/bitstring.cpp +99 -0
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +17 -25
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +3 -8
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_value.cpp +5 -9
- package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
- package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
- package/src/duckdb/src/function/scalar/math/numeric.cpp +14 -17
- package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
- package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
- package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
- package/src/duckdb/src/function/scalar/operators/multiply.cpp +0 -6
- package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +4 -9
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +10 -13
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +5 -6
- package/src/duckdb/src/function/table/read_csv.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/function/table_function.cpp +19 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -8
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -3
- package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +12 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +76 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +28 -0
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +6 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +157 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb.h +49 -1
- package/src/duckdb/src/include/duckdb.hpp +0 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
- package/src/duckdb/src/main/capi/result-c.cpp +27 -1
- package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
- package/src/duckdb/src/main/client_context.cpp +8 -1
- package/src/duckdb/src/main/config.cpp +66 -1
- package/src/duckdb/src/main/database.cpp +10 -2
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +98 -67
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +16 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +28 -31
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +4 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
- package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
- package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
- package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +8 -0
- package/src/duckdb/src/parser/query_node.cpp +1 -1
- package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
- package/src/duckdb/src/parser/tableref/pivotref.cpp +296 -0
- package/src/duckdb/src/parser/tableref.cpp +3 -0
- package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +150 -0
- package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +4 -0
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +105 -0
- package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
- package/src/duckdb/src/parser/transformer.cpp +15 -3
- package/src/duckdb/src/planner/bind_context.cpp +16 -0
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -3
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +0 -1
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +9 -0
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +365 -0
- package/src/duckdb/src/planner/binder.cpp +7 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
- package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -4
- package/src/duckdb/src/storage/compression/bitpacking.cpp +28 -24
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +43 -45
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +19 -15
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +4 -6
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +58 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +4 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +117 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +529 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +131 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -4
- package/src/duckdb/src/storage/table/column_data.cpp +16 -11
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -3
- package/src/duckdb/src/storage/table/column_segment.cpp +6 -8
- package/src/duckdb/src/storage/table/list_column_data.cpp +39 -58
- package/src/duckdb/src/storage/table/row_group.cpp +24 -23
- package/src/duckdb/src/storage/table/row_group_collection.cpp +12 -12
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +15 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +10 -12
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +3 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +34 -1
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1020 -530
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +7 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +23560 -22737
- package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
- package/src/duckdb/ub_src_main_capi.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
- package/src/duckdb/ub_src_parser_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
- package/src/duckdb/ub_src_storage_statistics.cpp +4 -6
- package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
@@ -1,5 +1,5 @@
|
|
1
1
|
#include "duckdb/storage/table/list_column_data.hpp"
|
2
|
-
#include "duckdb/storage/statistics/
|
2
|
+
#include "duckdb/storage/statistics/list_stats.hpp"
|
3
3
|
#include "duckdb/transaction/transaction.hpp"
|
4
4
|
|
5
5
|
namespace duckdb {
|
@@ -39,15 +39,14 @@ void ListColumnData::InitializeScan(ColumnScanState &state) {
|
|
39
39
|
state.child_states.push_back(std::move(child_state));
|
40
40
|
}
|
41
41
|
|
42
|
-
|
42
|
+
uint64_t ListColumnData::FetchListOffset(idx_t row_idx) {
|
43
43
|
auto segment = (ColumnSegment *)data.GetSegment(row_idx);
|
44
44
|
ColumnFetchState fetch_state;
|
45
45
|
Vector result(type, 1);
|
46
46
|
segment->FetchRow(fetch_state, row_idx, result, 0);
|
47
47
|
|
48
48
|
// initialize the child scan with the required offset
|
49
|
-
|
50
|
-
return list_data[0];
|
49
|
+
return FlatVector::GetData<uint64_t>(result)[0];
|
51
50
|
}
|
52
51
|
|
53
52
|
void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
|
@@ -63,8 +62,7 @@ void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_
|
|
63
62
|
state.child_states.push_back(std::move(validity_state));
|
64
63
|
|
65
64
|
// we need to read the list at position row_idx to get the correct row offset of the child
|
66
|
-
auto
|
67
|
-
auto child_offset = list_entry.offset;
|
65
|
+
auto child_offset = row_idx == start ? 0 : FetchListOffset(row_idx - 1);
|
68
66
|
|
69
67
|
D_ASSERT(child_offset <= child_column->GetMaxEntry());
|
70
68
|
ColumnScanState child_state;
|
@@ -89,26 +87,26 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co
|
|
89
87
|
// updates not supported for lists
|
90
88
|
D_ASSERT(!updates);
|
91
89
|
|
92
|
-
|
90
|
+
Vector offset_vector(LogicalType::UBIGINT, count);
|
91
|
+
idx_t scan_count = ScanVector(state, offset_vector, count);
|
93
92
|
D_ASSERT(scan_count > 0);
|
94
93
|
validity.ScanCount(state.child_states[0], result, count);
|
95
94
|
|
96
|
-
auto data = FlatVector::GetData<
|
97
|
-
auto first_entry = data[0];
|
95
|
+
auto data = FlatVector::GetData<uint64_t>(offset_vector);
|
98
96
|
auto last_entry = data[scan_count - 1];
|
99
97
|
|
100
|
-
#ifdef DEBUG
|
101
|
-
for (idx_t i = 1; i < scan_count; i++) {
|
102
|
-
D_ASSERT(data[i].offset == data[i - 1].offset + data[i - 1].length);
|
103
|
-
}
|
104
|
-
#endif
|
105
98
|
// shift all offsets so they are 0 at the first entry
|
99
|
+
auto result_data = FlatVector::GetData<list_entry_t>(result);
|
100
|
+
auto base_offset = state.last_offset;
|
101
|
+
idx_t current_offset = 0;
|
106
102
|
for (idx_t i = 0; i < scan_count; i++) {
|
107
|
-
|
103
|
+
result_data[i].offset = current_offset;
|
104
|
+
result_data[i].length = data[i] - current_offset - base_offset;
|
105
|
+
current_offset += result_data[i].length;
|
108
106
|
}
|
109
107
|
|
110
|
-
D_ASSERT(last_entry
|
111
|
-
idx_t child_scan_count = last_entry
|
108
|
+
D_ASSERT(last_entry >= base_offset);
|
109
|
+
idx_t child_scan_count = last_entry - base_offset;
|
112
110
|
ListVector::Reserve(result, child_scan_count);
|
113
111
|
|
114
112
|
if (child_scan_count > 0) {
|
@@ -118,6 +116,7 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co
|
|
118
116
|
child_column->start + child_column->GetMaxEntry());
|
119
117
|
child_column->ScanCount(state.child_states[1], child_entry, child_scan_count);
|
120
118
|
}
|
119
|
+
state.last_offset = last_entry;
|
121
120
|
|
122
121
|
ListVector::SetListSize(result, child_scan_count);
|
123
122
|
return scan_count;
|
@@ -130,19 +129,19 @@ void ListColumnData::Skip(ColumnScanState &state, idx_t count) {
|
|
130
129
|
// we need to read the list entries/offsets to figure out how much to skip
|
131
130
|
// note that we only need to read the first and last entry
|
132
131
|
// however, let's just read all "count" entries for now
|
133
|
-
|
134
|
-
Vector result(type, (data_ptr_t)data.get());
|
132
|
+
Vector result(LogicalType::UBIGINT, count);
|
135
133
|
idx_t scan_count = ScanVector(state, result, count);
|
136
134
|
if (scan_count == 0) {
|
137
135
|
return;
|
138
136
|
}
|
139
137
|
|
140
|
-
auto
|
141
|
-
auto
|
142
|
-
idx_t child_scan_count = last_entry
|
138
|
+
auto data = FlatVector::GetData<uint64_t>(result);
|
139
|
+
auto last_entry = data[scan_count - 1];
|
140
|
+
idx_t child_scan_count = last_entry - state.last_offset;
|
143
141
|
if (child_scan_count == 0) {
|
144
142
|
return;
|
145
143
|
}
|
144
|
+
state.last_offset = last_entry;
|
146
145
|
|
147
146
|
// skip the child state forward by the child_scan_count
|
148
147
|
child_column->Skip(state.child_states[1], child_scan_count);
|
@@ -163,10 +162,8 @@ void ListColumnData::InitializeAppend(ColumnAppendState &state) {
|
|
163
162
|
state.child_appends.push_back(std::move(child_append_state));
|
164
163
|
}
|
165
164
|
|
166
|
-
void ListColumnData::Append(BaseStatistics &
|
165
|
+
void ListColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) {
|
167
166
|
D_ASSERT(count > 0);
|
168
|
-
auto &stats = (ListStatistics &)stats_p;
|
169
|
-
|
170
167
|
UnifiedVectorFormat list_data;
|
171
168
|
vector.ToUnifiedFormat(count, list_data);
|
172
169
|
auto &list_validity = list_data.validity;
|
@@ -177,8 +174,8 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
|
|
177
174
|
idx_t child_count = 0;
|
178
175
|
|
179
176
|
ValidityMask append_mask(count);
|
180
|
-
auto append_offsets = unique_ptr<
|
181
|
-
bool child_contiguous =
|
177
|
+
auto append_offsets = unique_ptr<uint64_t[]>(new uint64_t[count]);
|
178
|
+
bool child_contiguous = true;
|
182
179
|
for (idx_t i = 0; i < count; i++) {
|
183
180
|
auto input_idx = list_data.sel->get_index(i);
|
184
181
|
if (list_validity.RowIsValid(input_idx)) {
|
@@ -186,17 +183,11 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
|
|
186
183
|
if (input_list.offset != child_count) {
|
187
184
|
child_contiguous = false;
|
188
185
|
}
|
189
|
-
append_offsets[i]
|
190
|
-
append_offsets[i].length = input_list.length;
|
186
|
+
append_offsets[i] = start_offset + child_count + input_list.length;
|
191
187
|
child_count += input_list.length;
|
192
188
|
} else {
|
193
189
|
append_mask.SetInvalid(i);
|
194
|
-
|
195
|
-
append_offsets[i].offset = append_offsets[i - 1].offset + append_offsets[i - 1].length;
|
196
|
-
} else {
|
197
|
-
append_offsets[i].offset = start_offset;
|
198
|
-
}
|
199
|
-
append_offsets[i].length = 0;
|
190
|
+
append_offsets[i] = start_offset + child_count;
|
200
191
|
}
|
201
192
|
}
|
202
193
|
auto &list_child = ListVector::GetEntry(vector);
|
@@ -218,27 +209,19 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V
|
|
218
209
|
D_ASSERT(current_count == child_count);
|
219
210
|
child_vector.Slice(list_child, child_sel, child_count);
|
220
211
|
}
|
221
|
-
#ifdef DEBUG
|
222
|
-
D_ASSERT(append_offsets[0].offset == start_offset);
|
223
|
-
for (idx_t i = 1; i < count; i++) {
|
224
|
-
D_ASSERT(append_offsets[i].offset == append_offsets[i - 1].offset + append_offsets[i - 1].length);
|
225
|
-
}
|
226
|
-
D_ASSERT(append_offsets[count - 1].offset + append_offsets[count - 1].length - append_offsets[0].offset ==
|
227
|
-
child_count);
|
228
|
-
#endif
|
229
212
|
|
230
213
|
UnifiedVectorFormat vdata;
|
231
|
-
vdata.validity = append_mask;
|
232
214
|
vdata.sel = FlatVector::IncrementalSelectionVector();
|
233
215
|
vdata.data = (data_ptr_t)append_offsets.get();
|
234
216
|
|
235
217
|
// append the list offsets
|
236
218
|
ColumnData::AppendData(stats, state, vdata, count);
|
237
219
|
// append the validity data
|
238
|
-
validity
|
220
|
+
vdata.validity = append_mask;
|
221
|
+
validity.AppendData(stats, state.child_appends[0], vdata, count);
|
239
222
|
// append the child vector
|
240
223
|
if (child_count > 0) {
|
241
|
-
child_column->Append(
|
224
|
+
child_column->Append(ListStats::GetChildStats(stats), state.child_appends[1], child_vector, child_count);
|
242
225
|
}
|
243
226
|
}
|
244
227
|
|
@@ -248,8 +231,8 @@ void ListColumnData::RevertAppend(row_t start_row) {
|
|
248
231
|
auto column_count = GetMaxEntry();
|
249
232
|
if (column_count > start) {
|
250
233
|
// revert append in the child column
|
251
|
-
auto
|
252
|
-
child_column->RevertAppend(
|
234
|
+
auto list_offset = FetchListOffset(column_count - 1);
|
235
|
+
child_column->RevertAppend(list_offset);
|
253
236
|
}
|
254
237
|
}
|
255
238
|
|
@@ -281,19 +264,18 @@ void ListColumnData::FetchRow(TransactionData transaction, ColumnFetchState &sta
|
|
281
264
|
auto child_state = make_unique<ColumnFetchState>();
|
282
265
|
state.child_states.push_back(std::move(child_state));
|
283
266
|
}
|
284
|
-
// fetch the list_entry_t and the validity mask for that list
|
285
|
-
auto segment = (ColumnSegment *)data.GetSegment(row_id);
|
286
267
|
|
287
268
|
// now perform the fetch within the segment
|
288
|
-
|
269
|
+
auto start_offset = idx_t(row_id) == this->start ? 0 : FetchListOffset(row_id - 1);
|
270
|
+
auto end_offset = FetchListOffset(row_id);
|
289
271
|
validity.FetchRow(transaction, *state.child_states[0], row_id, result, result_idx);
|
290
272
|
|
291
273
|
auto &validity = FlatVector::Validity(result);
|
292
274
|
auto list_data = FlatVector::GetData<list_entry_t>(result);
|
293
275
|
auto &list_entry = list_data[result_idx];
|
294
|
-
auto original_offset = list_entry.offset;
|
295
276
|
// set the list entry offset to the size of the current list
|
296
277
|
list_entry.offset = ListVector::GetListSize(result);
|
278
|
+
list_entry.length = end_offset - start_offset;
|
297
279
|
if (!validity.RowIsValid(result_idx)) {
|
298
280
|
// the list is NULL! no need to fetch the child
|
299
281
|
D_ASSERT(list_entry.length == 0);
|
@@ -307,7 +289,7 @@ void ListColumnData::FetchRow(TransactionData transaction, ColumnFetchState &sta
|
|
307
289
|
auto &child_type = ListType::GetChildType(result.GetType());
|
308
290
|
Vector child_scan(child_type, child_scan_count);
|
309
291
|
// seek the scan towards the specified position and read [length] entries
|
310
|
-
child_column->InitializeScanWithOffset(*child_state, start +
|
292
|
+
child_column->InitializeScanWithOffset(*child_state, start + start_offset);
|
311
293
|
D_ASSERT(child_type.InternalType() == PhysicalType::STRUCT ||
|
312
294
|
child_state->row_index + child_scan_count - this->start <= child_column->GetMaxEntry());
|
313
295
|
child_column->ScanCount(*child_state, child_scan, child_scan_count);
|
@@ -324,7 +306,7 @@ void ListColumnData::CommitDropColumn() {
|
|
324
306
|
struct ListColumnCheckpointState : public ColumnCheckpointState {
|
325
307
|
ListColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, PartialBlockManager &partial_block_manager)
|
326
308
|
: ColumnCheckpointState(row_group, column_data, partial_block_manager) {
|
327
|
-
global_stats =
|
309
|
+
global_stats = ListStats::CreateEmpty(column_data.type).ToUnique();
|
328
310
|
}
|
329
311
|
|
330
312
|
unique_ptr<ColumnCheckpointState> validity_state;
|
@@ -333,10 +315,8 @@ struct ListColumnCheckpointState : public ColumnCheckpointState {
|
|
333
315
|
public:
|
334
316
|
unique_ptr<BaseStatistics> GetStatistics() override {
|
335
317
|
auto stats = global_stats->Copy();
|
336
|
-
|
337
|
-
stats
|
338
|
-
list_stats.child_stats = child_state->GetStatistics();
|
339
|
-
return stats;
|
318
|
+
ListStats::SetChildStats(stats, child_state->GetStatistics());
|
319
|
+
return stats.ToUnique();
|
340
320
|
}
|
341
321
|
|
342
322
|
void WriteDataPointers(RowGroupWriter &writer) override {
|
@@ -376,6 +356,7 @@ void ListColumnData::DeserializeColumn(Deserializer &source) {
|
|
376
356
|
}
|
377
357
|
|
378
358
|
void ListColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, TableStorageInfo &result) {
|
359
|
+
ColumnData::GetStorageInfo(row_group_index, col_path, result);
|
379
360
|
col_path.push_back(0);
|
380
361
|
validity.GetStorageInfo(row_group_index, col_path, result);
|
381
362
|
col_path.back() = 1;
|
@@ -46,8 +46,7 @@ RowGroup::RowGroup(AttachedDatabase &db, BlockManager &block_manager, DataTableI
|
|
46
46
|
|
47
47
|
// set up the statistics
|
48
48
|
for (auto &stats : pointer.statistics) {
|
49
|
-
|
50
|
-
this->stats.push_back(make_shared<SegmentStatistics>(stats_type, std::move(stats)));
|
49
|
+
this->stats.emplace_back(std::move(stats));
|
51
50
|
}
|
52
51
|
this->version_info = std::move(pointer.versions);
|
53
52
|
|
@@ -88,7 +87,7 @@ void RowGroup::InitializeEmpty(const vector<LogicalType> &types) {
|
|
88
87
|
// set up the segment trees for the column segments
|
89
88
|
for (idx_t i = 0; i < types.size(); i++) {
|
90
89
|
auto column_data = ColumnData::CreateColumn(block_manager, GetTableInfo(), i, start, types[i]);
|
91
|
-
stats.
|
90
|
+
stats.emplace_back(types[i]);
|
92
91
|
columns.push_back(std::move(column_data));
|
93
92
|
}
|
94
93
|
}
|
@@ -158,7 +157,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
|
|
158
157
|
InitializeScan(scan_state);
|
159
158
|
|
160
159
|
Vector append_vector(target_type);
|
161
|
-
|
160
|
+
SegmentStatistics altered_col_stats(target_type);
|
162
161
|
while (true) {
|
163
162
|
// scan the table
|
164
163
|
scan_chunk.Reset();
|
@@ -168,7 +167,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
|
|
168
167
|
}
|
169
168
|
// execute the expression
|
170
169
|
executor.ExecuteExpression(scan_chunk, append_vector);
|
171
|
-
column_data->Append(
|
170
|
+
column_data->Append(altered_col_stats.statistics, append_state, append_vector, scan_chunk.size());
|
172
171
|
}
|
173
172
|
|
174
173
|
// set up the row_group based on this row_group
|
@@ -178,11 +177,11 @@ unique_ptr<RowGroup> RowGroup::AlterType(const LogicalType &target_type, idx_t c
|
|
178
177
|
if (i == changed_idx) {
|
179
178
|
// this is the altered column: use the new column
|
180
179
|
row_group->columns.push_back(std::move(column_data));
|
181
|
-
row_group->stats.push_back(std::move(altered_col_stats));
|
180
|
+
row_group->stats.push_back(std::move(altered_col_stats)); // NOLINT: false positive
|
182
181
|
} else {
|
183
182
|
// this column was not altered: use the data directly
|
184
183
|
row_group->columns.push_back(columns[i]);
|
185
|
-
row_group->stats.
|
184
|
+
row_group->stats.emplace_back(stats[i].statistics.Copy());
|
186
185
|
}
|
187
186
|
}
|
188
187
|
row_group->Verify();
|
@@ -196,8 +195,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
|
|
196
195
|
// construct a new column data for the new column
|
197
196
|
auto added_column =
|
198
197
|
ColumnData::CreateColumn(block_manager, GetTableInfo(), columns.size(), start, new_column.Type());
|
199
|
-
|
200
|
-
new_column.Type(), BaseStatistics::CreateEmpty(new_column.Type(), StatisticsType::LOCAL_STATS));
|
198
|
+
SegmentStatistics added_col_stats(new_column.Type());
|
201
199
|
|
202
200
|
idx_t rows_to_write = this->count;
|
203
201
|
if (rows_to_write > 0) {
|
@@ -211,7 +209,7 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
|
|
211
209
|
dummy_chunk.SetCardinality(rows_in_this_vector);
|
212
210
|
executor.ExecuteExpression(dummy_chunk, result);
|
213
211
|
}
|
214
|
-
added_column->Append(
|
212
|
+
added_column->Append(added_col_stats.statistics, state, result, rows_in_this_vector);
|
215
213
|
}
|
216
214
|
}
|
217
215
|
|
@@ -219,7 +217,9 @@ unique_ptr<RowGroup> RowGroup::AddColumn(ColumnDefinition &new_column, Expressio
|
|
219
217
|
auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
|
220
218
|
row_group->version_info = version_info;
|
221
219
|
row_group->columns = columns;
|
222
|
-
|
220
|
+
for (auto &stat : stats) {
|
221
|
+
row_group->stats.emplace_back(stat.statistics.Copy());
|
222
|
+
}
|
223
223
|
// now add the new column
|
224
224
|
row_group->columns.push_back(std::move(added_column));
|
225
225
|
row_group->stats.push_back(std::move(added_col_stats));
|
@@ -236,7 +236,9 @@ unique_ptr<RowGroup> RowGroup::RemoveColumn(idx_t removed_column) {
|
|
236
236
|
auto row_group = make_unique<RowGroup>(db, block_manager, table_info, this->start, this->count);
|
237
237
|
row_group->version_info = version_info;
|
238
238
|
row_group->columns = columns;
|
239
|
-
|
239
|
+
for (auto &stat : stats) {
|
240
|
+
row_group->stats.emplace_back(stat.statistics.Copy());
|
241
|
+
}
|
240
242
|
// now remove the column
|
241
243
|
row_group->columns.erase(row_group->columns.begin() + removed_column);
|
242
244
|
row_group->stats.erase(row_group->stats.begin() + removed_column);
|
@@ -275,7 +277,7 @@ bool RowGroup::CheckZonemap(TableFilterSet &filters, const vector<column_t> &col
|
|
275
277
|
auto &filter = entry.second;
|
276
278
|
auto base_column_index = column_ids[column_index];
|
277
279
|
|
278
|
-
auto propagate_result = filter->CheckStatistics(
|
280
|
+
auto propagate_result = filter->CheckStatistics(stats[base_column_index].statistics);
|
279
281
|
if (propagate_result == FilterPropagateResult::FILTER_ALWAYS_FALSE ||
|
280
282
|
propagate_result == FilterPropagateResult::FILTER_FALSE_OR_NULL) {
|
281
283
|
return false;
|
@@ -628,7 +630,7 @@ void RowGroup::InitializeAppend(RowGroupAppendState &append_state) {
|
|
628
630
|
void RowGroup::Append(RowGroupAppendState &state, DataChunk &chunk, idx_t append_count) {
|
629
631
|
// append to the current row_group
|
630
632
|
for (idx_t i = 0; i < columns.size(); i++) {
|
631
|
-
columns[i]->Append(
|
633
|
+
columns[i]->Append(stats[i].statistics, state.states[i], chunk.data[i], append_count);
|
632
634
|
}
|
633
635
|
state.offset_in_row_group += append_count;
|
634
636
|
}
|
@@ -671,21 +673,21 @@ unique_ptr<BaseStatistics> RowGroup::GetStatistics(idx_t column_idx) {
|
|
671
673
|
D_ASSERT(column_idx < stats.size());
|
672
674
|
|
673
675
|
lock_guard<mutex> slock(stats_lock);
|
674
|
-
return stats[column_idx]
|
676
|
+
return stats[column_idx].statistics.ToUnique();
|
675
677
|
}
|
676
678
|
|
677
679
|
void RowGroup::MergeStatistics(idx_t column_idx, const BaseStatistics &other) {
|
678
680
|
D_ASSERT(column_idx < stats.size());
|
679
681
|
|
680
682
|
lock_guard<mutex> slock(stats_lock);
|
681
|
-
stats[column_idx]
|
683
|
+
stats[column_idx].statistics.Merge(other);
|
682
684
|
}
|
683
685
|
|
684
686
|
void RowGroup::MergeIntoStatistics(idx_t column_idx, BaseStatistics &other) {
|
685
687
|
D_ASSERT(column_idx < stats.size());
|
686
688
|
|
687
689
|
lock_guard<mutex> slock(stats_lock);
|
688
|
-
other.Merge(
|
690
|
+
other.Merge(stats[column_idx].statistics);
|
689
691
|
}
|
690
692
|
|
691
693
|
RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
|
@@ -711,14 +713,14 @@ RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
|
|
711
713
|
auto stats = checkpoint_state->GetStatistics();
|
712
714
|
D_ASSERT(stats);
|
713
715
|
|
714
|
-
result.statistics.push_back(
|
716
|
+
result.statistics.push_back(stats->Copy());
|
715
717
|
result.states.push_back(std::move(checkpoint_state));
|
716
718
|
}
|
717
719
|
D_ASSERT(result.states.size() == result.statistics.size());
|
718
720
|
return result;
|
719
721
|
}
|
720
722
|
|
721
|
-
RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer,
|
723
|
+
RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats) {
|
722
724
|
RowGroupPointer row_group_pointer;
|
723
725
|
|
724
726
|
vector<CompressionType> compression_types;
|
@@ -728,7 +730,7 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, vector<unique_ptr<B
|
|
728
730
|
}
|
729
731
|
auto result = WriteToDisk(writer.GetPartialBlockManager(), compression_types);
|
730
732
|
for (idx_t column_idx = 0; column_idx < columns.size(); column_idx++) {
|
731
|
-
global_stats
|
733
|
+
global_stats.GetStats(column_idx).Statistics().Merge(result.statistics[column_idx]);
|
732
734
|
}
|
733
735
|
row_group_pointer.statistics = std::move(result.statistics);
|
734
736
|
|
@@ -805,7 +807,7 @@ void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &main_serializer)
|
|
805
807
|
writer.WriteField<uint64_t>(pointer.tuple_count);
|
806
808
|
auto &serializer = writer.GetSerializer();
|
807
809
|
for (auto &stats : pointer.statistics) {
|
808
|
-
stats
|
810
|
+
stats.Serialize(serializer);
|
809
811
|
}
|
810
812
|
for (auto &data_pointer : pointer.data_pointers) {
|
811
813
|
serializer.Write<block_id_t>(data_pointer.block_id);
|
@@ -828,8 +830,7 @@ RowGroupPointer RowGroup::Deserialize(Deserializer &main_source, const ColumnLis
|
|
828
830
|
|
829
831
|
auto &source = reader.GetSource();
|
830
832
|
for (auto &col : columns.Physical()) {
|
831
|
-
|
832
|
-
result.statistics.push_back(std::move(stats));
|
833
|
+
result.statistics.push_back(BaseStatistics::Deserialize(source, col.Type()));
|
833
834
|
}
|
834
835
|
for (idx_t i = 0; i < columns.PhysicalColumnCount(); i++) {
|
835
836
|
BlockPointer pointer;
|
@@ -280,7 +280,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
|
|
280
280
|
// merge the stats
|
281
281
|
auto stats_lock = stats.GetLock();
|
282
282
|
for (idx_t i = 0; i < types.size(); i++) {
|
283
|
-
current_row_group->MergeIntoStatistics(i,
|
283
|
+
current_row_group->MergeIntoStatistics(i, stats.GetStats(i).Statistics());
|
284
284
|
}
|
285
285
|
}
|
286
286
|
remaining -= append_count;
|
@@ -319,11 +319,7 @@ bool RowGroupCollection::Append(DataChunk &chunk, TableAppendState &state) {
|
|
319
319
|
state.current_row += append_count;
|
320
320
|
auto stats_lock = stats.GetLock();
|
321
321
|
for (idx_t col_idx = 0; col_idx < types.size(); col_idx++) {
|
322
|
-
|
323
|
-
if (type == PhysicalType::LIST || type == PhysicalType::STRUCT) {
|
324
|
-
continue;
|
325
|
-
}
|
326
|
-
stats.GetStats(col_idx).stats->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
322
|
+
stats.GetStats(col_idx).UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
327
323
|
}
|
328
324
|
return new_row_group;
|
329
325
|
}
|
@@ -513,13 +509,13 @@ void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_i
|
|
513
509
|
auto row_group = (RowGroup *)row_groups->GetSegment(first_id);
|
514
510
|
row_group->UpdateColumn(transaction, updates, row_ids, column_path);
|
515
511
|
|
516
|
-
row_group->MergeIntoStatistics(primary_column_idx,
|
512
|
+
row_group->MergeIntoStatistics(primary_column_idx, stats.GetStats(primary_column_idx).Statistics());
|
517
513
|
}
|
518
514
|
|
519
515
|
//===--------------------------------------------------------------------===//
|
520
516
|
// Checkpoint
|
521
517
|
//===--------------------------------------------------------------------===//
|
522
|
-
void RowGroupCollection::Checkpoint(TableDataWriter &writer,
|
518
|
+
void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &global_stats) {
|
523
519
|
for (auto row_group = (RowGroup *)row_groups->GetRootSegment(); row_group;
|
524
520
|
row_group = (RowGroup *)row_group->Next()) {
|
525
521
|
auto rowg_writer = writer.GetRowGroupWriter(*row_group);
|
@@ -590,7 +586,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont
|
|
590
586
|
while (current_row_group) {
|
591
587
|
auto new_row_group = current_row_group->AddColumn(new_column, executor, default_value, default_vector);
|
592
588
|
// merge in the statistics
|
593
|
-
new_row_group->MergeIntoStatistics(new_column_idx,
|
589
|
+
new_row_group->MergeIntoStatistics(new_column_idx, new_column_stats.Statistics());
|
594
590
|
|
595
591
|
result->row_groups->AppendSegment(std::move(new_row_group));
|
596
592
|
current_row_group = (RowGroup *)current_row_group->Next();
|
@@ -651,7 +647,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AlterType(ClientContext &cont
|
|
651
647
|
while (current_row_group) {
|
652
648
|
auto new_row_group = current_row_group->AlterType(target_type, changed_idx, executor,
|
653
649
|
scan_state.table_state.row_group_state, scan_chunk);
|
654
|
-
new_row_group->MergeIntoStatistics(changed_idx,
|
650
|
+
new_row_group->MergeIntoStatistics(changed_idx, changed_stats.Statistics());
|
655
651
|
result->row_groups->AppendSegment(std::move(new_row_group));
|
656
652
|
current_row_group = (RowGroup *)current_row_group->Next();
|
657
653
|
}
|
@@ -696,14 +692,18 @@ void RowGroupCollection::VerifyNewConstraint(DataTable &parent, const BoundConst
|
|
696
692
|
//===--------------------------------------------------------------------===//
|
697
693
|
// Statistics
|
698
694
|
//===--------------------------------------------------------------------===//
|
695
|
+
void RowGroupCollection::CopyStats(TableStatistics &other_stats) {
|
696
|
+
stats.CopyStats(other_stats);
|
697
|
+
}
|
698
|
+
|
699
699
|
unique_ptr<BaseStatistics> RowGroupCollection::CopyStats(column_t column_id) {
|
700
700
|
return stats.CopyStats(column_id);
|
701
701
|
}
|
702
702
|
|
703
|
-
void RowGroupCollection::
|
703
|
+
void RowGroupCollection::SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats) {
|
704
704
|
D_ASSERT(column_id != COLUMN_IDENTIFIER_ROW_ID);
|
705
705
|
auto stats_guard = stats.GetLock();
|
706
|
-
|
706
|
+
stats.GetStats(column_id).SetDistinct(std::move(distinct_stats));
|
707
707
|
}
|
708
708
|
|
709
709
|
} // namespace duckdb
|
@@ -24,7 +24,7 @@ bool StandardColumnData::CheckZonemap(ColumnScanState &state, TableFilter &filte
|
|
24
24
|
return true;
|
25
25
|
}
|
26
26
|
state.segment_checked = true;
|
27
|
-
auto prune_result = filter.CheckStatistics(
|
27
|
+
auto prune_result = filter.CheckStatistics(state.current->stats.statistics);
|
28
28
|
if (prune_result != FilterPropagateResult::FILTER_ALWAYS_FALSE) {
|
29
29
|
return true;
|
30
30
|
}
|
@@ -91,8 +91,7 @@ void StandardColumnData::InitializeAppend(ColumnAppendState &state) {
|
|
91
91
|
void StandardColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata,
|
92
92
|
idx_t count) {
|
93
93
|
ColumnData::AppendData(stats, state, vdata, count);
|
94
|
-
|
95
|
-
validity.AppendData(*stats.validity_stats, state.child_appends[0], vdata, count);
|
94
|
+
validity.AppendData(stats, state.child_appends[0], vdata, count);
|
96
95
|
}
|
97
96
|
|
98
97
|
void StandardColumnData::RevertAppend(row_t start_row) {
|
@@ -136,9 +135,11 @@ unique_ptr<BaseStatistics> StandardColumnData::GetUpdateStatistics() {
|
|
136
135
|
return nullptr;
|
137
136
|
}
|
138
137
|
if (!stats) {
|
139
|
-
stats = BaseStatistics::CreateEmpty(type
|
138
|
+
stats = BaseStatistics::CreateEmpty(type).ToUnique();
|
139
|
+
}
|
140
|
+
if (validity_stats) {
|
141
|
+
stats->Merge(*validity_stats);
|
140
142
|
}
|
141
|
-
stats->validity_stats = std::move(validity_stats);
|
142
143
|
return stats;
|
143
144
|
}
|
144
145
|
|
@@ -169,7 +170,6 @@ struct StandardColumnCheckpointState : public ColumnCheckpointState {
|
|
169
170
|
public:
|
170
171
|
unique_ptr<BaseStatistics> GetStatistics() override {
|
171
172
|
D_ASSERT(global_stats);
|
172
|
-
global_stats->validity_stats = validity_state->GetStatistics();
|
173
173
|
return std::move(global_stats);
|
174
174
|
}
|
175
175
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#include "duckdb/storage/table/struct_column_data.hpp"
|
2
|
-
#include "duckdb/storage/statistics/
|
2
|
+
#include "duckdb/storage/statistics/struct_stats.hpp"
|
3
3
|
#include "duckdb/transaction/transaction.hpp"
|
4
4
|
|
5
5
|
namespace duckdb {
|
@@ -127,12 +127,12 @@ void StructColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, V
|
|
127
127
|
vector.Flatten(count);
|
128
128
|
|
129
129
|
// append the null values
|
130
|
-
validity.Append(
|
130
|
+
validity.Append(stats, state.child_appends[0], vector, count);
|
131
131
|
|
132
|
-
auto &struct_stats = (StructStatistics &)stats;
|
133
132
|
auto &child_entries = StructVector::GetEntries(vector);
|
134
133
|
for (idx_t i = 0; i < child_entries.size(); i++) {
|
135
|
-
sub_columns[i]->Append(
|
134
|
+
sub_columns[i]->Append(StructStats::GetChildStats(stats, i), state.child_appends[i + 1], *child_entries[i],
|
135
|
+
count);
|
136
136
|
}
|
137
137
|
}
|
138
138
|
|
@@ -190,16 +190,18 @@ void StructColumnData::UpdateColumn(TransactionData transaction, const vector<co
|
|
190
190
|
|
191
191
|
unique_ptr<BaseStatistics> StructColumnData::GetUpdateStatistics() {
|
192
192
|
// check if any child column has updates
|
193
|
-
auto stats = BaseStatistics::CreateEmpty(type
|
194
|
-
auto
|
195
|
-
|
193
|
+
auto stats = BaseStatistics::CreateEmpty(type);
|
194
|
+
auto validity_stats = validity.GetUpdateStatistics();
|
195
|
+
if (validity_stats) {
|
196
|
+
stats.Merge(*validity_stats);
|
197
|
+
}
|
196
198
|
for (idx_t i = 0; i < sub_columns.size(); i++) {
|
197
199
|
auto child_stats = sub_columns[i]->GetUpdateStatistics();
|
198
200
|
if (child_stats) {
|
199
|
-
|
201
|
+
StructStats::SetChildStats(stats, i, std::move(child_stats));
|
200
202
|
}
|
201
203
|
}
|
202
|
-
return stats;
|
204
|
+
return stats.ToUnique();
|
203
205
|
}
|
204
206
|
|
205
207
|
void StructColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
|
@@ -230,7 +232,7 @@ struct StructColumnCheckpointState : public ColumnCheckpointState {
|
|
230
232
|
StructColumnCheckpointState(RowGroup &row_group, ColumnData &column_data,
|
231
233
|
PartialBlockManager &partial_block_manager)
|
232
234
|
: ColumnCheckpointState(row_group, column_data, partial_block_manager) {
|
233
|
-
global_stats =
|
235
|
+
global_stats = StructStats::CreateEmpty(column_data.type).ToUnique();
|
234
236
|
}
|
235
237
|
|
236
238
|
unique_ptr<ColumnCheckpointState> validity_state;
|
@@ -238,14 +240,11 @@ struct StructColumnCheckpointState : public ColumnCheckpointState {
|
|
238
240
|
|
239
241
|
public:
|
240
242
|
unique_ptr<BaseStatistics> GetStatistics() override {
|
241
|
-
auto stats =
|
242
|
-
D_ASSERT(stats->child_stats.size() == child_states.size());
|
243
|
-
stats->validity_stats = validity_state->GetStatistics();
|
243
|
+
auto stats = StructStats::CreateEmpty(column_data.type);
|
244
244
|
for (idx_t i = 0; i < child_states.size(); i++) {
|
245
|
-
stats
|
246
|
-
D_ASSERT(stats->child_stats[i]);
|
245
|
+
StructStats::SetChildStats(stats, i, child_states[i]->GetStatistics());
|
247
246
|
}
|
248
|
-
return
|
247
|
+
return stats.ToUnique();
|
249
248
|
}
|
250
249
|
|
251
250
|
void WriteDataPointers(RowGroupWriter &writer) override {
|