duckdb 0.7.1 → 0.7.2-dev1034.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +12 -7
- package/lib/duckdb.d.ts +55 -2
- package/lib/duckdb.js +20 -1
- package/package.json +1 -1
- package/src/connection.cpp +1 -2
- package/src/database.cpp +1 -1
- package/src/duckdb/extension/icu/icu-extension.cpp +4 -0
- package/src/duckdb/extension/icu/icu-list-range.cpp +207 -0
- package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
- package/src/duckdb/extension/icu/include/icu-list-range.hpp +17 -0
- package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
- package/src/duckdb/extension/json/include/json_common.hpp +1 -0
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -0
- package/src/duckdb/extension/json/include/json_serializer.hpp +77 -0
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +147 -0
- package/src/duckdb/extension/json/json_functions/read_json.cpp +6 -5
- package/src/duckdb/extension/json/json_functions.cpp +12 -4
- package/src/duckdb/extension/json/json_scan.cpp +2 -2
- package/src/duckdb/extension/json/json_serializer.cpp +217 -0
- package/src/duckdb/extension/parquet/column_reader.cpp +94 -15
- package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
- package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -2
- package/src/duckdb/extension/parquet/include/decode_utils.hpp +5 -4
- package/src/duckdb/extension/parquet/include/generated_column_reader.hpp +1 -11
- package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +2 -1
- package/src/duckdb/extension/parquet/parquet-extension.cpp +12 -2
- package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +26 -32
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +16 -6
- package/src/duckdb/src/catalog/catalog.cpp +34 -5
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -21
- package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +3 -3
- package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +8 -2
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -0
- package/src/duckdb/src/catalog/default/default_functions.cpp +3 -0
- package/src/duckdb/src/catalog/dependency_list.cpp +12 -0
- package/src/duckdb/src/catalog/duck_catalog.cpp +34 -7
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +48 -4
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/box_renderer.cpp +109 -23
- package/src/duckdb/src/common/enums/expression_type.cpp +8 -222
- package/src/duckdb/src/common/enums/join_type.cpp +3 -22
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
- package/src/duckdb/src/common/exception.cpp +15 -1
- package/src/duckdb/src/common/field_writer.cpp +1 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +1 -1
- package/src/duckdb/src/common/preserved_error.cpp +7 -5
- package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +4 -0
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +15 -2
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +1176 -0
- package/src/duckdb/src/common/sort/sort_state.cpp +5 -7
- package/src/duckdb/src/common/sort/sorted_block.cpp +0 -1
- package/src/duckdb/src/common/string_util.cpp +4 -1
- package/src/duckdb/src/common/types/bit.cpp +166 -87
- package/src/duckdb/src/common/types/blob.cpp +1 -1
- package/src/duckdb/src/common/types/chunk_collection.cpp +2 -2
- package/src/duckdb/src/common/types/column_data_collection.cpp +39 -2
- package/src/duckdb/src/common/types/column_data_collection_segment.cpp +11 -6
- package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
- package/src/duckdb/src/common/types/time.cpp +13 -0
- package/src/duckdb/src/common/types/value.cpp +320 -154
- package/src/duckdb/src/common/types/vector.cpp +155 -127
- package/src/duckdb/src/common/types.cpp +313 -153
- package/src/duckdb/src/common/vector_operations/vector_cast.cpp +2 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +10 -5
- package/src/duckdb/src/execution/column_binding_resolver.cpp +21 -5
- package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
- package/src/duckdb/src/execution/index/art/art.cpp +6 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +4 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +117 -26
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +3 -0
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +5 -3
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -17
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +12 -4
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -11
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +3 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +6 -3
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +6 -14
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +2 -2
- package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
- package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +20 -5
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +20 -40
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +14 -2
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +21 -16
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +95 -47
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +5 -8
- package/src/duckdb/src/execution/physical_plan/plan_positional_join.cpp +14 -5
- package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
- package/src/duckdb/src/execution/window_segment_tree.cpp +173 -1
- package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
- package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +269 -0
- package/src/duckdb/src/function/aggregate/distributive/bool.cpp +2 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +3 -4
- package/src/duckdb/src/function/aggregate/distributive/first.cpp +1 -0
- package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -0
- package/src/duckdb/src/function/aggregate/distributive/sum.cpp +19 -16
- package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
- package/src/duckdb/src/function/aggregate/holistic/approximate_quantile.cpp +5 -2
- package/src/duckdb/src/function/aggregate/holistic/mode.cpp +1 -1
- package/src/duckdb/src/function/aggregate/holistic/quantile.cpp +16 -1
- package/src/duckdb/src/function/aggregate/nested/list.cpp +8 -8
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +58 -16
- package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -1
- package/src/duckdb/src/function/cast/enum_casts.cpp +25 -3
- package/src/duckdb/src/function/cast/list_casts.cpp +17 -4
- package/src/duckdb/src/function/cast/map_cast.cpp +5 -2
- package/src/duckdb/src/function/cast/string_cast.cpp +36 -10
- package/src/duckdb/src/function/cast/struct_cast.cpp +24 -4
- package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
- package/src/duckdb/src/function/cast/union_casts.cpp +33 -7
- package/src/duckdb/src/function/function_binder.cpp +1 -8
- package/src/duckdb/src/function/scalar/bit/bitstring.cpp +100 -0
- package/src/duckdb/src/function/scalar/date/current.cpp +0 -2
- package/src/duckdb/src/function/scalar/date/date_diff.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_part.cpp +18 -26
- package/src/duckdb/src/function/scalar/date/date_sub.cpp +0 -1
- package/src/duckdb/src/function/scalar/date/date_trunc.cpp +10 -14
- package/src/duckdb/src/function/scalar/generic/stats.cpp +2 -4
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +4 -146
- package/src/duckdb/src/function/scalar/list/flatten.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_aggregates.cpp +1 -1
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +8 -12
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +5 -12
- package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +7 -3
- package/src/duckdb/src/function/scalar/list/list_value.cpp +6 -10
- package/src/duckdb/src/function/scalar/map/map.cpp +47 -1
- package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
- package/src/duckdb/src/function/scalar/map/map_extract.cpp +68 -26
- package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
- package/src/duckdb/src/function/scalar/math/numeric.cpp +101 -17
- package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
- package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +29 -48
- package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
- package/src/duckdb/src/function/scalar/operators/multiply.cpp +5 -6
- package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
- package/src/duckdb/src/function/scalar/string/instr.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/length.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +243 -0
- package/src/duckdb/src/function/scalar/string/regexp/regexp_util.cpp +79 -0
- package/src/duckdb/src/function/scalar/string/regexp.cpp +21 -80
- package/src/duckdb/src/function/scalar/string/substring.cpp +2 -6
- package/src/duckdb/src/function/scalar/string_functions.cpp +2 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +5 -10
- package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +11 -14
- package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +6 -7
- package/src/duckdb/src/function/table/arrow.cpp +5 -2
- package/src/duckdb/src/function/table/arrow_conversion.cpp +25 -1
- package/src/duckdb/src/function/table/checkpoint.cpp +5 -1
- package/src/duckdb/src/function/table/read_csv.cpp +55 -0
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +2 -2
- package/src/duckdb/src/function/table/system/test_all_types.cpp +2 -2
- package/src/duckdb/src/function/table/table_scan.cpp +1 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/function/table_function.cpp +30 -11
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +6 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
- package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +3 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +8 -2
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
- package/src/duckdb/src/include/duckdb/common/enums/aggregate_handling.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -3
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +7 -4
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/set_operation_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/exception.hpp +69 -2
- package/src/duckdb/src/include/duckdb/common/field_writer.hpp +12 -4
- package/src/duckdb/src/include/duckdb/common/{http_stats.hpp → http_state.hpp} +18 -4
- package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +45 -0
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_deserializer.hpp +4 -2
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +8 -2
- package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +113 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +336 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +268 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +126 -0
- package/src/duckdb/src/include/duckdb/common/serializer.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +12 -7
- package/src/duckdb/src/include/duckdb/common/types/time.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +17 -48
- package/src/duckdb/src/include/duckdb/common/types/value_map.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types.hpp +45 -8
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +6 -0
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +54 -0
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +18 -6
- package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +84 -0
- package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +28 -64
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -6
- package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +138 -0
- package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +59 -0
- package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +81 -1
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +12 -1
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +10 -0
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/relation.hpp +2 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/rule/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +24 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +6 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/recursive_cte_node.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +13 -2
- package/src/duckdb/src/include/duckdb/parser/result_modifier.hpp +24 -1
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
- package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +6 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/expressionlistref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +87 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +33 -0
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +15 -4
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +64 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +9 -38
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +8 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_fetch.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +7 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +93 -29
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +22 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -6
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +41 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +114 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +62 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/segment_statistics.hpp +2 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +74 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +42 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +6 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -5
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +6 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +10 -6
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +8 -5
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +37 -0
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +10 -1
- package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +271 -26
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -2
- package/src/duckdb/src/include/duckdb.h +50 -2
- package/src/duckdb/src/include/duckdb.hpp +0 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
- package/src/duckdb/src/main/capi/result-c.cpp +27 -1
- package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
- package/src/duckdb/src/main/client_context.cpp +38 -34
- package/src/duckdb/src/main/client_data.cpp +7 -6
- package/src/duckdb/src/main/config.cpp +70 -1
- package/src/duckdb/src/main/database.cpp +19 -2
- package/src/duckdb/src/main/extension/extension_install.cpp +7 -2
- package/src/duckdb/src/main/prepared_statement.cpp +4 -0
- package/src/duckdb/src/main/query_profiler.cpp +17 -15
- package/src/duckdb/src/main/relation/explain_relation.cpp +3 -3
- package/src/duckdb/src/main/relation.cpp +3 -2
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +1 -1
- package/src/duckdb/src/optimizer/filter_combiner.cpp +1 -1
- package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +14 -8
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +105 -71
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +31 -12
- package/src/duckdb/src/optimizer/optimizer.cpp +1 -0
- package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +33 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +5 -12
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_single_join.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
- package/src/duckdb/src/optimizer/rule/move_constants.cpp +10 -4
- package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +30 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +9 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_aggregate.cpp +9 -3
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +6 -7
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +14 -11
- package/src/duckdb/src/optimizer/statistics/expression/propagate_columnref.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +13 -15
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +0 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_constant.cpp +3 -75
- package/src/duckdb/src/optimizer/statistics/expression/propagate_function.cpp +7 -2
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +10 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +2 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +29 -32
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +5 -5
- package/src/duckdb/src/optimizer/statistics/operator/propagate_set_operation.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +2 -1
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +2 -2
- package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -4
- package/src/duckdb/src/parser/common_table_expression_info.cpp +19 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +17 -0
- package/src/duckdb/src/parser/expression/case_expression.cpp +28 -0
- package/src/duckdb/src/parser/expression/cast_expression.cpp +17 -0
- package/src/duckdb/src/parser/expression/collate_expression.cpp +16 -0
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -0
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +16 -0
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +17 -0
- package/src/duckdb/src/parser/expression/constant_expression.cpp +14 -0
- package/src/duckdb/src/parser/expression/default_expression.cpp +7 -0
- package/src/duckdb/src/parser/expression/function_expression.cpp +35 -0
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +16 -0
- package/src/duckdb/src/parser/expression/operator_expression.cpp +15 -0
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +15 -0
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +14 -0
- package/src/duckdb/src/parser/expression/star_expression.cpp +26 -6
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +20 -0
- package/src/duckdb/src/parser/expression/window_expression.cpp +43 -0
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
- package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
- package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
- package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +22 -10
- package/src/duckdb/src/parser/parsed_expression.cpp +72 -0
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +15 -1
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +21 -0
- package/src/duckdb/src/parser/query_node/select_node.cpp +31 -0
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +17 -0
- package/src/duckdb/src/parser/query_node.cpp +51 -1
- package/src/duckdb/src/parser/result_modifier.cpp +78 -0
- package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
- package/src/duckdb/src/parser/statement/select_statement.cpp +12 -0
- package/src/duckdb/src/parser/tableref/basetableref.cpp +21 -0
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +4 -0
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +17 -0
- package/src/duckdb/src/parser/tableref/joinref.cpp +29 -0
- package/src/duckdb/src/parser/tableref/pivotref.cpp +373 -0
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +15 -0
- package/src/duckdb/src/parser/tableref/table_function.cpp +17 -0
- package/src/duckdb/src/parser/tableref.cpp +49 -0
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +11 -0
- package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +17 -2
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +63 -42
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
- package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +7 -0
- package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +0 -7
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +3 -2
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +179 -0
- package/src/duckdb/src/parser/transform/statement/transform_rename.cpp +3 -4
- package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +2 -3
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +12 -1
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +121 -0
- package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
- package/src/duckdb/src/parser/transformer.cpp +15 -3
- package/src/duckdb/src/planner/bind_context.cpp +18 -25
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +9 -7
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +4 -3
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +23 -12
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +3 -2
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +176 -0
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +4 -0
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +163 -24
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +109 -94
- package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +11 -0
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +9 -4
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +5 -3
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +3 -2
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +9 -1
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +12 -8
- package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +4 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +19 -3
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +366 -0
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -1
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -0
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -13
- package/src/duckdb/src/planner/binder.cpp +19 -24
- package/src/duckdb/src/planner/bound_result_modifier.cpp +27 -1
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +9 -2
- package/src/duckdb/src/planner/expression/bound_expression.cpp +4 -0
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +146 -0
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +6 -3
- package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/select_binder.cpp +1 -132
- package/src/duckdb/src/planner/expression_binder.cpp +10 -3
- package/src/duckdb/src/planner/expression_iterator.cpp +17 -10
- package/src/duckdb/src/planner/filter/constant_filter.cpp +4 -6
- package/src/duckdb/src/planner/logical_operator.cpp +7 -2
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +6 -0
- package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
- package/src/duckdb/src/planner/operator/logical_distinct.cpp +3 -0
- package/src/duckdb/src/planner/planner.cpp +2 -1
- package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
- package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +4 -15
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +10 -4
- package/src/duckdb/src/storage/checkpoint_manager.cpp +9 -3
- package/src/duckdb/src/storage/compression/bitpacking.cpp +28 -24
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +43 -45
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +9 -10
- package/src/duckdb/src/storage/compression/patas.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +19 -15
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +5 -5
- package/src/duckdb/src/storage/data_table.cpp +20 -20
- package/src/duckdb/src/storage/index.cpp +12 -1
- package/src/duckdb/src/storage/local_storage.cpp +20 -23
- package/src/duckdb/src/storage/meta_block_reader.cpp +22 -0
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +373 -128
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +57 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +8 -9
- package/src/duckdb/src/storage/statistics/list_stats.cpp +121 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +591 -0
- package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +65 -0
- package/src/duckdb/src/storage/statistics/segment_statistics.cpp +2 -11
- package/src/duckdb/src/storage/statistics/string_stats.cpp +273 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +133 -0
- package/src/duckdb/src/storage/storage_info.cpp +2 -2
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +4 -10
- package/src/duckdb/src/storage/table/column_data.cpp +45 -46
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +7 -8
- package/src/duckdb/src/storage/table/column_segment.cpp +13 -14
- package/src/duckdb/src/storage/table/list_column_data.cpp +41 -59
- package/src/duckdb/src/storage/table/persistent_table_data.cpp +2 -1
- package/src/duckdb/src/storage/table/row_group.cpp +38 -32
- package/src/duckdb/src/storage/table/row_group_collection.cpp +94 -78
- package/src/duckdb/src/storage/table/scan_state.cpp +22 -3
- package/src/duckdb/src/storage/table/standard_column_data.cpp +7 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +16 -16
- package/src/duckdb/src/storage/table/table_statistics.cpp +27 -7
- package/src/duckdb/src/storage/table/update_segment.cpp +20 -18
- package/src/duckdb/src/storage/wal_replay.cpp +8 -5
- package/src/duckdb/src/storage/write_ahead_log.cpp +2 -2
- package/src/duckdb/src/transaction/commit_state.cpp +11 -7
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +0 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +35 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +36 -2
- package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1022 -530
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +8 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +24462 -22828
- package/src/duckdb/third_party/re2/re2/re2.cc +9 -0
- package/src/duckdb/third_party/re2/re2/re2.h +2 -0
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
- package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
- package/src/duckdb/ub_src_common_serializer.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_string_regexp.cpp +4 -0
- package/src/duckdb/ub_src_main_capi.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
- package/src/duckdb/ub_src_parser.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
- package/src/duckdb/ub_src_parser_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
- package/src/duckdb/ub_src_planner_expression_binder.cpp +2 -0
- package/src/duckdb/ub_src_planner_operator.cpp +2 -0
- package/src/duckdb/ub_src_storage_statistics.cpp +6 -6
- package/src/duckdb/ub_src_storage_table.cpp +0 -2
- package/src/duckdb_node.hpp +2 -1
- package/src/statement.cpp +5 -5
- package/src/utils.cpp +27 -2
- package/test/extension.test.ts +44 -26
- package/test/syntax_error.test.ts +3 -1
- package/filelist.cache +0 -0
- package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
- package/src/duckdb/src/include/duckdb/storage/statistics/list_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_statistics.hpp +0 -75
- package/src/duckdb/src/include/duckdb/storage/statistics/string_statistics.hpp +0 -49
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_statistics.hpp +0 -36
- package/src/duckdb/src/include/duckdb/storage/statistics/validity_statistics.hpp +0 -45
- package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
- package/src/duckdb/src/storage/statistics/list_statistics.cpp +0 -94
- package/src/duckdb/src/storage/statistics/numeric_statistics.cpp +0 -307
- package/src/duckdb/src/storage/statistics/string_statistics.cpp +0 -220
- package/src/duckdb/src/storage/statistics/struct_statistics.cpp +0 -108
- package/src/duckdb/src/storage/statistics/validity_statistics.cpp +0 -91
- package/src/duckdb/src/storage/table/segment_tree.cpp +0 -179
| @@ -32,10 +32,9 @@ ColumnData::ColumnData(ColumnData &other, idx_t start, ColumnData *parent) | |
| 32 32 | 
             
            		updates = make_unique<UpdateSegment>(*other.updates, *this);
         | 
| 33 33 | 
             
            	}
         | 
| 34 34 | 
             
            	idx_t offset = 0;
         | 
| 35 | 
            -
            	for (auto segment  | 
| 36 | 
            -
            		 | 
| 37 | 
            -
            		 | 
| 38 | 
            -
            		offset += segment->count;
         | 
| 35 | 
            +
            	for (auto &segment : other.data.Segments()) {
         | 
| 36 | 
            +
            		this->data.AppendSegment(ColumnSegment::CreateSegment(segment, start + offset));
         | 
| 37 | 
            +
            		offset += segment.count;
         | 
| 39 38 | 
             
            	}
         | 
| 40 39 | 
             
            }
         | 
| 41 40 |  | 
| @@ -75,7 +74,8 @@ idx_t ColumnData::GetMaxEntry() { | |
| 75 74 | 
             
            }
         | 
| 76 75 |  | 
| 77 76 | 
             
            void ColumnData::InitializeScan(ColumnScanState &state) {
         | 
| 78 | 
            -
            	state.current =  | 
| 77 | 
            +
            	state.current = data.GetRootSegment();
         | 
| 78 | 
            +
            	state.segment_tree = &data;
         | 
| 79 79 | 
             
            	state.row_index = state.current ? state.current->start : 0;
         | 
| 80 80 | 
             
            	state.internal_index = state.row_index;
         | 
| 81 81 | 
             
            	state.initialized = false;
         | 
| @@ -84,7 +84,8 @@ void ColumnData::InitializeScan(ColumnScanState &state) { | |
| 84 84 | 
             
            }
         | 
| 85 85 |  | 
| 86 86 | 
             
            void ColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
         | 
| 87 | 
            -
            	state.current =  | 
| 87 | 
            +
            	state.current = data.GetSegment(row_idx);
         | 
| 88 | 
            +
            	state.segment_tree = &data;
         | 
| 88 89 | 
             
            	state.row_index = row_idx;
         | 
| 89 90 | 
             
            	state.internal_index = state.current->start;
         | 
| 90 91 | 
             
            	state.initialized = false;
         | 
| @@ -125,11 +126,12 @@ idx_t ColumnData::ScanVector(ColumnScanState &state, Vector &result, idx_t remai | |
| 125 126 | 
             
            		}
         | 
| 126 127 |  | 
| 127 128 | 
             
            		if (remaining > 0) {
         | 
| 128 | 
            -
            			 | 
| 129 | 
            +
            			auto next = data.GetNextSegment(state.current);
         | 
| 130 | 
            +
            			if (!next) {
         | 
| 129 131 | 
             
            				break;
         | 
| 130 132 | 
             
            			}
         | 
| 131 133 | 
             
            			state.previous_states.emplace_back(std::move(state.scan_state));
         | 
| 132 | 
            -
            			state.current =  | 
| 134 | 
            +
            			state.current = next;
         | 
| 133 135 | 
             
            			state.current->InitializeScan(state);
         | 
| 134 136 | 
             
            			state.segment_checked = false;
         | 
| 135 137 | 
             
            			D_ASSERT(state.row_index >= state.current->start &&
         | 
| @@ -234,14 +236,14 @@ void ColumnData::InitializeAppend(ColumnAppendState &state) { | |
| 234 236 | 
             
            		// no segments yet, append an empty segment
         | 
| 235 237 | 
             
            		AppendTransientSegment(l, start);
         | 
| 236 238 | 
             
            	}
         | 
| 237 | 
            -
            	auto segment =  | 
| 239 | 
            +
            	auto segment = data.GetLastSegment(l);
         | 
| 238 240 | 
             
            	if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
         | 
| 239 241 | 
             
            		// no transient segments yet
         | 
| 240 242 | 
             
            		auto total_rows = segment->start + segment->count;
         | 
| 241 243 | 
             
            		AppendTransientSegment(l, total_rows);
         | 
| 242 | 
            -
            		state.current =  | 
| 244 | 
            +
            		state.current = data.GetLastSegment(l);
         | 
| 243 245 | 
             
            	} else {
         | 
| 244 | 
            -
            		state.current =  | 
| 246 | 
            +
            		state.current = segment;
         | 
| 245 247 | 
             
            	}
         | 
| 246 248 |  | 
| 247 249 | 
             
            	D_ASSERT(state.current->segment_type == ColumnSegmentType::TRANSIENT);
         | 
| @@ -254,7 +256,7 @@ void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, Uni | |
| 254 256 | 
             
            	while (true) {
         | 
| 255 257 | 
             
            		// append the data from the vector
         | 
| 256 258 | 
             
            		idx_t copied_elements = state.current->Append(state, vdata, offset, count);
         | 
| 257 | 
            -
            		stats.Merge( | 
| 259 | 
            +
            		stats.Merge(state.current->stats.statistics);
         | 
| 258 260 | 
             
            		if (copied_elements == count) {
         | 
| 259 261 | 
             
            			// finished copying everything
         | 
| 260 262 | 
             
            			break;
         | 
| @@ -264,7 +266,7 @@ void ColumnData::AppendData(BaseStatistics &stats, ColumnAppendState &state, Uni | |
| 264 266 | 
             
            		{
         | 
| 265 267 | 
             
            			auto l = data.Lock();
         | 
| 266 268 | 
             
            			AppendTransientSegment(l, state.current->start + state.current->count);
         | 
| 267 | 
            -
            			state.current =  | 
| 269 | 
            +
            			state.current = data.GetLastSegment(l);
         | 
| 268 270 | 
             
            			state.current->InitializeAppend(state);
         | 
| 269 271 | 
             
            		}
         | 
| 270 272 | 
             
            		offset += copied_elements;
         | 
| @@ -284,7 +286,7 @@ void ColumnData::RevertAppend(row_t start_row) { | |
| 284 286 | 
             
            	// find the segment index that the current row belongs to
         | 
| 285 287 | 
             
            	idx_t segment_index = data.GetSegmentIndex(l, start_row);
         | 
| 286 288 | 
             
            	auto segment = data.GetSegmentByIndex(l, segment_index);
         | 
| 287 | 
            -
            	auto &transient =  | 
| 289 | 
            +
            	auto &transient = *segment;
         | 
| 288 290 | 
             
            	D_ASSERT(transient.segment_type == ColumnSegmentType::TRANSIENT);
         | 
| 289 291 |  | 
| 290 292 | 
             
            	// remove any segments AFTER this segment: they should be deleted entirely
         | 
| @@ -299,14 +301,14 @@ idx_t ColumnData::Fetch(ColumnScanState &state, row_t row_id, Vector &result) { | |
| 299 301 | 
             
            	D_ASSERT(idx_t(row_id) >= start);
         | 
| 300 302 | 
             
            	// perform the fetch within the segment
         | 
| 301 303 | 
             
            	state.row_index = start + ((row_id - start) / STANDARD_VECTOR_SIZE * STANDARD_VECTOR_SIZE);
         | 
| 302 | 
            -
            	state.current =  | 
| 304 | 
            +
            	state.current = data.GetSegment(state.row_index);
         | 
| 303 305 | 
             
            	state.internal_index = state.current->start;
         | 
| 304 306 | 
             
            	return ScanVector(state, result, STANDARD_VECTOR_SIZE);
         | 
| 305 307 | 
             
            }
         | 
| 306 308 |  | 
| 307 309 | 
             
            void ColumnData::FetchRow(TransactionData transaction, ColumnFetchState &state, row_t row_id, Vector &result,
         | 
| 308 310 | 
             
                                      idx_t result_idx) {
         | 
| 309 | 
            -
            	auto segment =  | 
| 311 | 
            +
            	auto segment = data.GetSegment(row_id);
         | 
| 310 312 |  | 
| 311 313 | 
             
            	// now perform the fetch within the segment
         | 
| 312 314 | 
             
            	segment->FetchRow(state, row_id, result, result_idx);
         | 
| @@ -357,15 +359,14 @@ void ColumnData::AppendTransientSegment(SegmentLock &l, idx_t start_row) { | |
| 357 359 | 
             
            }
         | 
| 358 360 |  | 
| 359 361 | 
             
            void ColumnData::CommitDropColumn() {
         | 
| 360 | 
            -
            	auto  | 
| 361 | 
            -
             | 
| 362 | 
            -
            		if (segment | 
| 363 | 
            -
            			auto block_id = segment | 
| 362 | 
            +
            	for (auto &segment_p : data.Segments()) {
         | 
| 363 | 
            +
            		auto &segment = segment_p;
         | 
| 364 | 
            +
            		if (segment.segment_type == ColumnSegmentType::PERSISTENT) {
         | 
| 365 | 
            +
            			auto block_id = segment.GetBlockId();
         | 
| 364 366 | 
             
            			if (block_id != INVALID_BLOCK) {
         | 
| 365 367 | 
             
            				block_manager.MarkBlockAsModified(block_id);
         | 
| 366 368 | 
             
            			}
         | 
| 367 369 | 
             
            		}
         | 
| 368 | 
            -
            		segment = (ColumnSegment *)segment->Next();
         | 
| 369 370 | 
             
            	}
         | 
| 370 371 | 
             
            }
         | 
| 371 372 |  | 
| @@ -389,7 +390,7 @@ unique_ptr<ColumnCheckpointState> ColumnData::Checkpoint(RowGroup &row_group, | |
| 389 390 | 
             
            	// scan the segments of the column data
         | 
| 390 391 | 
             
            	// set up the checkpoint state
         | 
| 391 392 | 
             
            	auto checkpoint_state = CreateCheckpointState(row_group, partial_block_manager);
         | 
| 392 | 
            -
            	checkpoint_state->global_stats = BaseStatistics::CreateEmpty(type | 
| 393 | 
            +
            	checkpoint_state->global_stats = BaseStatistics::CreateEmpty(type).ToUnique();
         | 
| 393 394 |  | 
| 394 395 | 
             
            	auto l = data.Lock();
         | 
| 395 396 | 
             
            	auto nodes = data.MoveSegments(l);
         | 
| @@ -414,13 +415,19 @@ void ColumnData::DeserializeColumn(Deserializer &source) { | |
| 414 415 | 
             
            	idx_t data_pointer_count = source.Read<idx_t>();
         | 
| 415 416 | 
             
            	for (idx_t data_ptr = 0; data_ptr < data_pointer_count; data_ptr++) {
         | 
| 416 417 | 
             
            		// read the data pointer
         | 
| 417 | 
            -
            		 | 
| 418 | 
            -
            		 | 
| 419 | 
            -
            		 | 
| 420 | 
            -
            		 | 
| 421 | 
            -
            		 | 
| 422 | 
            -
            		 | 
| 423 | 
            -
             | 
| 418 | 
            +
            		auto row_start = source.Read<idx_t>();
         | 
| 419 | 
            +
            		auto tuple_count = source.Read<idx_t>();
         | 
| 420 | 
            +
            		auto block_pointer_block_id = source.Read<block_id_t>();
         | 
| 421 | 
            +
            		auto block_pointer_offset = source.Read<uint32_t>();
         | 
| 422 | 
            +
            		auto compression_type = source.Read<CompressionType>();
         | 
| 423 | 
            +
            		auto stats = BaseStatistics::Deserialize(source, type);
         | 
| 424 | 
            +
             | 
| 425 | 
            +
            		DataPointer data_pointer(std::move(stats));
         | 
| 426 | 
            +
            		data_pointer.row_start = row_start;
         | 
| 427 | 
            +
            		data_pointer.tuple_count = tuple_count;
         | 
| 428 | 
            +
            		data_pointer.block_pointer.block_id = block_pointer_block_id;
         | 
| 429 | 
            +
            		data_pointer.block_pointer.offset = block_pointer_offset;
         | 
| 430 | 
            +
            		data_pointer.compression_type = compression_type;
         | 
| 424 431 |  | 
| 425 432 | 
             
            		// create a persistent segment
         | 
| 426 433 | 
             
            		auto segment = ColumnSegment::CreatePersistentSegment(
         | 
| @@ -458,7 +465,6 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T | |
| 458 465 | 
             
            	while (segment) {
         | 
| 459 466 | 
             
            		ColumnSegmentInfo column_info;
         | 
| 460 467 | 
             
            		column_info.row_group_index = row_group_index;
         | 
| 461 | 
            -
            		;
         | 
| 462 468 | 
             
            		column_info.column_id = col_path[0];
         | 
| 463 469 | 
             
            		column_info.column_path = col_path_str;
         | 
| 464 470 | 
             
            		column_info.segment_idx = segment_idx;
         | 
| @@ -466,8 +472,7 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T | |
| 466 472 | 
             
            		column_info.segment_start = segment->start;
         | 
| 467 473 | 
             
            		column_info.segment_count = segment->count;
         | 
| 468 474 | 
             
            		column_info.compression_type = CompressionTypeToString(segment->function->type);
         | 
| 469 | 
            -
            		column_info.segment_stats =
         | 
| 470 | 
            -
            		    segment->stats.statistics ? segment->stats.statistics->ToString() : string("No Stats");
         | 
| 475 | 
            +
            		column_info.segment_stats = segment->stats.statistics.ToString();
         | 
| 471 476 | 
             
            		column_info.has_updates = updates ? true : false;
         | 
| 472 477 | 
             
            		// persistent
         | 
| 473 478 | 
             
            		// block_id
         | 
| @@ -482,7 +487,7 @@ void ColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, T | |
| 482 487 | 
             
            		result.column_segments.push_back(std::move(column_info));
         | 
| 483 488 |  | 
| 484 489 | 
             
            		segment_idx++;
         | 
| 485 | 
            -
            		segment = (ColumnSegment *)segment | 
| 490 | 
            +
            		segment = (ColumnSegment *)data.GetNextSegment(segment);
         | 
| 486 491 | 
             
            	}
         | 
| 487 492 | 
             
            }
         | 
| 488 493 |  | 
| @@ -490,19 +495,13 @@ void ColumnData::Verify(RowGroup &parent) { | |
| 490 495 | 
             
            #ifdef DEBUG
         | 
| 491 496 | 
             
            	D_ASSERT(this->start == parent.start);
         | 
| 492 497 | 
             
            	data.Verify();
         | 
| 493 | 
            -
            	 | 
| 494 | 
            -
            	 | 
| 495 | 
            -
             | 
| 496 | 
            -
            		D_ASSERT( | 
| 497 | 
            -
            		 | 
| 498 | 
            -
            		 | 
| 499 | 
            -
             | 
| 500 | 
            -
            			prev_end = root->start + root->count;
         | 
| 501 | 
            -
            			if (!root->next) {
         | 
| 502 | 
            -
            				D_ASSERT(prev_end == parent.start + parent.count);
         | 
| 503 | 
            -
            			}
         | 
| 504 | 
            -
            			root = root->Next();
         | 
| 505 | 
            -
            		}
         | 
| 498 | 
            +
            	idx_t current_index = 0;
         | 
| 499 | 
            +
            	idx_t current_start = this->start;
         | 
| 500 | 
            +
            	for (auto &segment : data.Segments()) {
         | 
| 501 | 
            +
            		D_ASSERT(segment.index == current_index);
         | 
| 502 | 
            +
            		D_ASSERT(segment.start == current_start);
         | 
| 503 | 
            +
            		current_start += segment.count;
         | 
| 504 | 
            +
            		current_index++;
         | 
| 506 505 | 
             
            	}
         | 
| 507 506 | 
             
            #endif
         | 
| 508 507 | 
             
            }
         | 
| @@ -38,7 +38,7 @@ ColumnCheckpointState &ColumnDataCheckpointer::GetCheckpointState() { | |
| 38 38 | 
             
            void ColumnDataCheckpointer::ScanSegments(const std::function<void(Vector &, idx_t)> &callback) {
         | 
| 39 39 | 
             
            	Vector scan_vector(intermediate.GetType(), nullptr);
         | 
| 40 40 | 
             
            	for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
         | 
| 41 | 
            -
            		auto segment =  | 
| 41 | 
            +
            		auto segment = nodes[segment_idx].node.get();
         | 
| 42 42 | 
             
            		ColumnScanState scan_state;
         | 
| 43 43 | 
             
            		scan_state.current = segment;
         | 
| 44 44 | 
             
            		segment->InitializeScan(scan_state);
         | 
| @@ -163,7 +163,7 @@ void ColumnDataCheckpointer::WriteToDisk() { | |
| 163 163 | 
             
            	// since the segments will be rewritten their old on disk data is no longer required
         | 
| 164 164 | 
             
            	auto &block_manager = col_data.block_manager;
         | 
| 165 165 | 
             
            	for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
         | 
| 166 | 
            -
            		auto segment =  | 
| 166 | 
            +
            		auto segment = nodes[segment_idx].node.get();
         | 
| 167 167 | 
             
            		if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
         | 
| 168 168 | 
             
            			// persistent segment has updates: mark it as modified and rewrite the block with the merged updates
         | 
| 169 169 | 
             
            			auto block_id = segment->GetBlockId();
         | 
| @@ -194,7 +194,7 @@ void ColumnDataCheckpointer::WriteToDisk() { | |
| 194 194 |  | 
| 195 195 | 
             
            bool ColumnDataCheckpointer::HasChanges() {
         | 
| 196 196 | 
             
            	for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
         | 
| 197 | 
            -
            		auto segment =  | 
| 197 | 
            +
            		auto segment = nodes[segment_idx].node.get();
         | 
| 198 198 | 
             
            		if (segment->segment_type == ColumnSegmentType::TRANSIENT) {
         | 
| 199 199 | 
             
            			// transient segment: always need to write to disk
         | 
| 200 200 | 
             
            			return true;
         | 
| @@ -214,20 +214,19 @@ void ColumnDataCheckpointer::WritePersistentSegments() { | |
| 214 214 | 
             
            	// all segments are persistent and there are no updates
         | 
| 215 215 | 
             
            	// we only need to write the metadata
         | 
| 216 216 | 
             
            	for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
         | 
| 217 | 
            -
            		auto segment =  | 
| 217 | 
            +
            		auto segment = nodes[segment_idx].node.get();
         | 
| 218 218 | 
             
            		D_ASSERT(segment->segment_type == ColumnSegmentType::PERSISTENT);
         | 
| 219 219 |  | 
| 220 220 | 
             
            		// set up the data pointer directly using the data from the persistent segment
         | 
| 221 | 
            -
            		DataPointer pointer;
         | 
| 221 | 
            +
            		DataPointer pointer(segment->stats.statistics.Copy());
         | 
| 222 222 | 
             
            		pointer.block_pointer.block_id = segment->GetBlockId();
         | 
| 223 223 | 
             
            		pointer.block_pointer.offset = segment->GetBlockOffset();
         | 
| 224 224 | 
             
            		pointer.row_start = segment->start;
         | 
| 225 225 | 
             
            		pointer.tuple_count = segment->count;
         | 
| 226 226 | 
             
            		pointer.compression_type = segment->function->type;
         | 
| 227 | 
            -
            		pointer.statistics = segment->stats.statistics->Copy();
         | 
| 228 227 |  | 
| 229 228 | 
             
            		// merge the persistent stats into the global column stats
         | 
| 230 | 
            -
            		state.global_stats->Merge( | 
| 229 | 
            +
            		state.global_stats->Merge(segment->stats.statistics);
         | 
| 231 230 |  | 
| 232 231 | 
             
            		// directly append the current segment to the new tree
         | 
| 233 232 | 
             
            		state.new_tree.AppendSegment(std::move(nodes[segment_idx].node));
         | 
| @@ -236,7 +235,7 @@ void ColumnDataCheckpointer::WritePersistentSegments() { | |
| 236 235 | 
             
            	}
         | 
| 237 236 | 
             
            }
         | 
| 238 237 |  | 
| 239 | 
            -
            void ColumnDataCheckpointer::Checkpoint(vector<SegmentNode | 
| 238 | 
            +
            void ColumnDataCheckpointer::Checkpoint(vector<SegmentNode<ColumnSegment>> nodes) {
         | 
| 240 239 | 
             
            	D_ASSERT(!nodes.empty());
         | 
| 241 240 | 
             
            	this->nodes = std::move(nodes);
         | 
| 242 241 | 
             
            	// first check if any of the segments have changes
         | 
| @@ -19,7 +19,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreatePersistentSegment(DatabaseInstanc | |
| 19 19 | 
             
                                                                             block_id_t block_id, idx_t offset,
         | 
| 20 20 | 
             
                                                                             const LogicalType &type, idx_t start, idx_t count,
         | 
| 21 21 | 
             
                                                                             CompressionType compression_type,
         | 
| 22 | 
            -
                                                                              | 
| 22 | 
            +
                                                                             BaseStatistics statistics) {
         | 
| 23 23 | 
             
            	auto &config = DBConfig::GetConfig(db);
         | 
| 24 24 | 
             
            	CompressionFunction *function;
         | 
| 25 25 | 
             
            	shared_ptr<BlockHandle> block;
         | 
| @@ -48,7 +48,7 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateTransientSegment(DatabaseInstance | |
| 48 48 | 
             
            		buffer_manager.Allocate(segment_size, false, &block);
         | 
| 49 49 | 
             
            	}
         | 
| 50 50 | 
             
            	return make_unique<ColumnSegment>(db, std::move(block), type, ColumnSegmentType::TRANSIENT, start, 0, function,
         | 
| 51 | 
            -
            	                                   | 
| 51 | 
            +
            	                                  BaseStatistics::CreateEmpty(type), INVALID_BLOCK, 0, segment_size);
         | 
| 52 52 | 
             
            }
         | 
| 53 53 |  | 
| 54 54 | 
             
            unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx_t start) {
         | 
| @@ -57,11 +57,11 @@ unique_ptr<ColumnSegment> ColumnSegment::CreateSegment(ColumnSegment &other, idx | |
| 57 57 |  | 
| 58 58 | 
             
            ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block, LogicalType type_p,
         | 
| 59 59 | 
             
                                         ColumnSegmentType segment_type, idx_t start, idx_t count, CompressionFunction *function_p,
         | 
| 60 | 
            -
                                          | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
                   | 
| 64 | 
            -
                   | 
| 60 | 
            +
                                         BaseStatistics statistics, block_id_t block_id_p, idx_t offset_p, idx_t segment_size_p)
         | 
| 61 | 
            +
                : SegmentBase<ColumnSegment>(start, count), db(db), type(std::move(type_p)),
         | 
| 62 | 
            +
                  type_size(GetTypeIdSize(type.InternalType())), segment_type(segment_type), function(function_p),
         | 
| 63 | 
            +
                  stats(std::move(statistics)), block(std::move(block)), block_id(block_id_p), offset(offset_p),
         | 
| 64 | 
            +
                  segment_size(segment_size_p) {
         | 
| 65 65 | 
             
            	D_ASSERT(function);
         | 
| 66 66 | 
             
            	if (function->init_segment) {
         | 
| 67 67 | 
             
            		segment_state = function->init_segment(*this, block_id);
         | 
| @@ -69,10 +69,10 @@ ColumnSegment::ColumnSegment(DatabaseInstance &db, shared_ptr<BlockHandle> block | |
| 69 69 | 
             
            }
         | 
| 70 70 |  | 
| 71 71 | 
             
            ColumnSegment::ColumnSegment(ColumnSegment &other, idx_t start)
         | 
| 72 | 
            -
                : SegmentBase(start, other.count), db(other.db), type(std::move(other.type)), | 
| 73 | 
            -
                   | 
| 74 | 
            -
                  block(std::move(other.block)), block_id(other.block_id), offset(other.offset), | 
| 75 | 
            -
                  segment_state(std::move(other.segment_state)) {
         | 
| 72 | 
            +
                : SegmentBase<ColumnSegment>(start, other.count.load()), db(other.db), type(std::move(other.type)),
         | 
| 73 | 
            +
                  type_size(other.type_size), segment_type(other.segment_type), function(other.function),
         | 
| 74 | 
            +
                  stats(std::move(other.stats)), block(std::move(other.block)), block_id(other.block_id), offset(other.offset),
         | 
| 75 | 
            +
                  segment_size(other.segment_size), segment_state(std::move(other.segment_state)) {
         | 
| 76 76 | 
             
            }
         | 
| 77 77 |  | 
| 78 78 | 
             
            ColumnSegment::~ColumnSegment() {
         | 
| @@ -181,13 +181,12 @@ void ColumnSegment::ConvertToPersistent(BlockManager *block_manager, block_id_t | |
| 181 181 | 
             
            	block_id = block_id_p;
         | 
| 182 182 | 
             
            	offset = 0;
         | 
| 183 183 |  | 
| 184 | 
            -
            	D_ASSERT(stats.statistics);
         | 
| 185 184 | 
             
            	if (block_id == INVALID_BLOCK) {
         | 
| 186 185 | 
             
            		// constant block: reset the block buffer
         | 
| 187 | 
            -
            		D_ASSERT(stats.statistics | 
| 186 | 
            +
            		D_ASSERT(stats.statistics.IsConstant());
         | 
| 188 187 | 
             
            		block.reset();
         | 
| 189 188 | 
             
            	} else {
         | 
| 190 | 
            -
            		D_ASSERT(!stats.statistics | 
| 189 | 
            +
            		D_ASSERT(!stats.statistics.IsConstant());
         | 
| 191 190 | 
             
            		// non-constant block: write the block to disk
         | 
| 192 191 | 
             
            		// the data for the block already exists in-memory of our block
         | 
| 193 192 | 
             
            		// instead of copying the data we alter some metadata so the buffer points to an on-disk block
         | 
| @@ -1,6 +1,7 @@ | |
| 1 1 | 
             
            #include "duckdb/storage/table/list_column_data.hpp"
         | 
| 2 | 
            -
            #include "duckdb/storage/statistics/ | 
| 2 | 
            +
            #include "duckdb/storage/statistics/list_stats.hpp"
         | 
| 3 3 | 
             
            #include "duckdb/transaction/transaction.hpp"
         | 
| 4 | 
            +
            #include "duckdb/storage/table/column_checkpoint_state.hpp"
         | 
| 4 5 |  | 
| 5 6 | 
             
            namespace duckdb {
         | 
| 6 7 |  | 
| @@ -39,15 +40,14 @@ void ListColumnData::InitializeScan(ColumnScanState &state) { | |
| 39 40 | 
             
            	state.child_states.push_back(std::move(child_state));
         | 
| 40 41 | 
             
            }
         | 
| 41 42 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
            	auto segment =  | 
| 43 | 
            +
            uint64_t ListColumnData::FetchListOffset(idx_t row_idx) {
         | 
| 44 | 
            +
            	auto segment = data.GetSegment(row_idx);
         | 
| 44 45 | 
             
            	ColumnFetchState fetch_state;
         | 
| 45 46 | 
             
            	Vector result(type, 1);
         | 
| 46 47 | 
             
            	segment->FetchRow(fetch_state, row_idx, result, 0);
         | 
| 47 48 |  | 
| 48 49 | 
             
            	// initialize the child scan with the required offset
         | 
| 49 | 
            -
            	 | 
| 50 | 
            -
            	return list_data[0];
         | 
| 50 | 
            +
            	return FlatVector::GetData<uint64_t>(result)[0];
         | 
| 51 51 | 
             
            }
         | 
| 52 52 |  | 
| 53 53 | 
             
            void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx) {
         | 
| @@ -63,8 +63,7 @@ void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_ | |
| 63 63 | 
             
            	state.child_states.push_back(std::move(validity_state));
         | 
| 64 64 |  | 
| 65 65 | 
             
            	// we need to read the list at position row_idx to get the correct row offset of the child
         | 
| 66 | 
            -
            	auto  | 
| 67 | 
            -
            	auto child_offset = list_entry.offset;
         | 
| 66 | 
            +
            	auto child_offset = row_idx == start ? 0 : FetchListOffset(row_idx - 1);
         | 
| 68 67 |  | 
| 69 68 | 
             
            	D_ASSERT(child_offset <= child_column->GetMaxEntry());
         | 
| 70 69 | 
             
            	ColumnScanState child_state;
         | 
| @@ -89,26 +88,26 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co | |
| 89 88 | 
             
            	// updates not supported for lists
         | 
| 90 89 | 
             
            	D_ASSERT(!updates);
         | 
| 91 90 |  | 
| 92 | 
            -
            	 | 
| 91 | 
            +
            	Vector offset_vector(LogicalType::UBIGINT, count);
         | 
| 92 | 
            +
            	idx_t scan_count = ScanVector(state, offset_vector, count);
         | 
| 93 93 | 
             
            	D_ASSERT(scan_count > 0);
         | 
| 94 94 | 
             
            	validity.ScanCount(state.child_states[0], result, count);
         | 
| 95 95 |  | 
| 96 | 
            -
            	auto data = FlatVector::GetData< | 
| 97 | 
            -
            	auto first_entry = data[0];
         | 
| 96 | 
            +
            	auto data = FlatVector::GetData<uint64_t>(offset_vector);
         | 
| 98 97 | 
             
            	auto last_entry = data[scan_count - 1];
         | 
| 99 98 |  | 
| 100 | 
            -
            #ifdef DEBUG
         | 
| 101 | 
            -
            	for (idx_t i = 1; i < scan_count; i++) {
         | 
| 102 | 
            -
            		D_ASSERT(data[i].offset == data[i - 1].offset + data[i - 1].length);
         | 
| 103 | 
            -
            	}
         | 
| 104 | 
            -
            #endif
         | 
| 105 99 | 
             
            	// shift all offsets so they are 0 at the first entry
         | 
| 100 | 
            +
            	auto result_data = FlatVector::GetData<list_entry_t>(result);
         | 
| 101 | 
            +
            	auto base_offset = state.last_offset;
         | 
| 102 | 
            +
            	idx_t current_offset = 0;
         | 
| 106 103 | 
             
            	for (idx_t i = 0; i < scan_count; i++) {
         | 
| 107 | 
            -
            		 | 
| 104 | 
            +
            		result_data[i].offset = current_offset;
         | 
| 105 | 
            +
            		result_data[i].length = data[i] - current_offset - base_offset;
         | 
| 106 | 
            +
            		current_offset += result_data[i].length;
         | 
| 108 107 | 
             
            	}
         | 
| 109 108 |  | 
| 110 | 
            -
            	D_ASSERT(last_entry | 
| 111 | 
            -
            	idx_t child_scan_count = last_entry | 
| 109 | 
            +
            	D_ASSERT(last_entry >= base_offset);
         | 
| 110 | 
            +
            	idx_t child_scan_count = last_entry - base_offset;
         | 
| 112 111 | 
             
            	ListVector::Reserve(result, child_scan_count);
         | 
| 113 112 |  | 
| 114 113 | 
             
            	if (child_scan_count > 0) {
         | 
| @@ -118,6 +117,7 @@ idx_t ListColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t co | |
| 118 117 | 
             
            		             child_column->start + child_column->GetMaxEntry());
         | 
| 119 118 | 
             
            		child_column->ScanCount(state.child_states[1], child_entry, child_scan_count);
         | 
| 120 119 | 
             
            	}
         | 
| 120 | 
            +
            	state.last_offset = last_entry;
         | 
| 121 121 |  | 
| 122 122 | 
             
            	ListVector::SetListSize(result, child_scan_count);
         | 
| 123 123 | 
             
            	return scan_count;
         | 
| @@ -130,19 +130,19 @@ void ListColumnData::Skip(ColumnScanState &state, idx_t count) { | |
| 130 130 | 
             
            	// we need to read the list entries/offsets to figure out how much to skip
         | 
| 131 131 | 
             
            	// note that we only need to read the first and last entry
         | 
| 132 132 | 
             
            	// however, let's just read all "count" entries for now
         | 
| 133 | 
            -
            	 | 
| 134 | 
            -
            	Vector result(type, (data_ptr_t)data.get());
         | 
| 133 | 
            +
            	Vector result(LogicalType::UBIGINT, count);
         | 
| 135 134 | 
             
            	idx_t scan_count = ScanVector(state, result, count);
         | 
| 136 135 | 
             
            	if (scan_count == 0) {
         | 
| 137 136 | 
             
            		return;
         | 
| 138 137 | 
             
            	}
         | 
| 139 138 |  | 
| 140 | 
            -
            	auto  | 
| 141 | 
            -
            	auto  | 
| 142 | 
            -
            	idx_t child_scan_count = last_entry | 
| 139 | 
            +
            	auto data = FlatVector::GetData<uint64_t>(result);
         | 
| 140 | 
            +
            	auto last_entry = data[scan_count - 1];
         | 
| 141 | 
            +
            	idx_t child_scan_count = last_entry - state.last_offset;
         | 
| 143 142 | 
             
            	if (child_scan_count == 0) {
         | 
| 144 143 | 
             
            		return;
         | 
| 145 144 | 
             
            	}
         | 
| 145 | 
            +
            	state.last_offset = last_entry;
         | 
| 146 146 |  | 
| 147 147 | 
             
            	// skip the child state forward by the child_scan_count
         | 
| 148 148 | 
             
            	child_column->Skip(state.child_states[1], child_scan_count);
         | 
| @@ -163,10 +163,8 @@ void ListColumnData::InitializeAppend(ColumnAppendState &state) { | |
| 163 163 | 
             
            	state.child_appends.push_back(std::move(child_append_state));
         | 
| 164 164 | 
             
            }
         | 
| 165 165 |  | 
| 166 | 
            -
            void ListColumnData::Append(BaseStatistics & | 
| 166 | 
            +
            void ListColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) {
         | 
| 167 167 | 
             
            	D_ASSERT(count > 0);
         | 
| 168 | 
            -
            	auto &stats = (ListStatistics &)stats_p;
         | 
| 169 | 
            -
             | 
| 170 168 | 
             
            	UnifiedVectorFormat list_data;
         | 
| 171 169 | 
             
            	vector.ToUnifiedFormat(count, list_data);
         | 
| 172 170 | 
             
            	auto &list_validity = list_data.validity;
         | 
| @@ -177,8 +175,8 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V | |
| 177 175 | 
             
            	idx_t child_count = 0;
         | 
| 178 176 |  | 
| 179 177 | 
             
            	ValidityMask append_mask(count);
         | 
| 180 | 
            -
            	auto append_offsets = unique_ptr< | 
| 181 | 
            -
            	bool child_contiguous =  | 
| 178 | 
            +
            	auto append_offsets = unique_ptr<uint64_t[]>(new uint64_t[count]);
         | 
| 179 | 
            +
            	bool child_contiguous = true;
         | 
| 182 180 | 
             
            	for (idx_t i = 0; i < count; i++) {
         | 
| 183 181 | 
             
            		auto input_idx = list_data.sel->get_index(i);
         | 
| 184 182 | 
             
            		if (list_validity.RowIsValid(input_idx)) {
         | 
| @@ -186,17 +184,11 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V | |
| 186 184 | 
             
            			if (input_list.offset != child_count) {
         | 
| 187 185 | 
             
            				child_contiguous = false;
         | 
| 188 186 | 
             
            			}
         | 
| 189 | 
            -
            			append_offsets[i] | 
| 190 | 
            -
            			append_offsets[i].length = input_list.length;
         | 
| 187 | 
            +
            			append_offsets[i] = start_offset + child_count + input_list.length;
         | 
| 191 188 | 
             
            			child_count += input_list.length;
         | 
| 192 189 | 
             
            		} else {
         | 
| 193 190 | 
             
            			append_mask.SetInvalid(i);
         | 
| 194 | 
            -
            			 | 
| 195 | 
            -
            				append_offsets[i].offset = append_offsets[i - 1].offset + append_offsets[i - 1].length;
         | 
| 196 | 
            -
            			} else {
         | 
| 197 | 
            -
            				append_offsets[i].offset = start_offset;
         | 
| 198 | 
            -
            			}
         | 
| 199 | 
            -
            			append_offsets[i].length = 0;
         | 
| 191 | 
            +
            			append_offsets[i] = start_offset + child_count;
         | 
| 200 192 | 
             
            		}
         | 
| 201 193 | 
             
            	}
         | 
| 202 194 | 
             
            	auto &list_child = ListVector::GetEntry(vector);
         | 
| @@ -218,27 +210,19 @@ void ListColumnData::Append(BaseStatistics &stats_p, ColumnAppendState &state, V | |
| 218 210 | 
             
            		D_ASSERT(current_count == child_count);
         | 
| 219 211 | 
             
            		child_vector.Slice(list_child, child_sel, child_count);
         | 
| 220 212 | 
             
            	}
         | 
| 221 | 
            -
            #ifdef DEBUG
         | 
| 222 | 
            -
            	D_ASSERT(append_offsets[0].offset == start_offset);
         | 
| 223 | 
            -
            	for (idx_t i = 1; i < count; i++) {
         | 
| 224 | 
            -
            		D_ASSERT(append_offsets[i].offset == append_offsets[i - 1].offset + append_offsets[i - 1].length);
         | 
| 225 | 
            -
            	}
         | 
| 226 | 
            -
            	D_ASSERT(append_offsets[count - 1].offset + append_offsets[count - 1].length - append_offsets[0].offset ==
         | 
| 227 | 
            -
            	         child_count);
         | 
| 228 | 
            -
            #endif
         | 
| 229 213 |  | 
| 230 214 | 
             
            	UnifiedVectorFormat vdata;
         | 
| 231 | 
            -
            	vdata.validity = append_mask;
         | 
| 232 215 | 
             
            	vdata.sel = FlatVector::IncrementalSelectionVector();
         | 
| 233 216 | 
             
            	vdata.data = (data_ptr_t)append_offsets.get();
         | 
| 234 217 |  | 
| 235 218 | 
             
            	// append the list offsets
         | 
| 236 219 | 
             
            	ColumnData::AppendData(stats, state, vdata, count);
         | 
| 237 220 | 
             
            	// append the validity data
         | 
| 238 | 
            -
            	validity | 
| 221 | 
            +
            	vdata.validity = append_mask;
         | 
| 222 | 
            +
            	validity.AppendData(stats, state.child_appends[0], vdata, count);
         | 
| 239 223 | 
             
            	// append the child vector
         | 
| 240 224 | 
             
            	if (child_count > 0) {
         | 
| 241 | 
            -
            		child_column->Append( | 
| 225 | 
            +
            		child_column->Append(ListStats::GetChildStats(stats), state.child_appends[1], child_vector, child_count);
         | 
| 242 226 | 
             
            	}
         | 
| 243 227 | 
             
            }
         | 
| 244 228 |  | 
| @@ -248,8 +232,8 @@ void ListColumnData::RevertAppend(row_t start_row) { | |
| 248 232 | 
             
            	auto column_count = GetMaxEntry();
         | 
| 249 233 | 
             
            	if (column_count > start) {
         | 
| 250 234 | 
             
            		// revert append in the child column
         | 
| 251 | 
            -
            		auto  | 
| 252 | 
            -
            		child_column->RevertAppend( | 
| 235 | 
            +
            		auto list_offset = FetchListOffset(column_count - 1);
         | 
| 236 | 
            +
            		child_column->RevertAppend(list_offset);
         | 
| 253 237 | 
             
            	}
         | 
| 254 238 | 
             
            }
         | 
| 255 239 |  | 
| @@ -281,19 +265,18 @@ void ListColumnData::FetchRow(TransactionData transaction, ColumnFetchState &sta | |
| 281 265 | 
             
            		auto child_state = make_unique<ColumnFetchState>();
         | 
| 282 266 | 
             
            		state.child_states.push_back(std::move(child_state));
         | 
| 283 267 | 
             
            	}
         | 
| 284 | 
            -
            	// fetch the list_entry_t and the validity mask for that list
         | 
| 285 | 
            -
            	auto segment = (ColumnSegment *)data.GetSegment(row_id);
         | 
| 286 268 |  | 
| 287 269 | 
             
            	// now perform the fetch within the segment
         | 
| 288 | 
            -
            	 | 
| 270 | 
            +
            	auto start_offset = idx_t(row_id) == this->start ? 0 : FetchListOffset(row_id - 1);
         | 
| 271 | 
            +
            	auto end_offset = FetchListOffset(row_id);
         | 
| 289 272 | 
             
            	validity.FetchRow(transaction, *state.child_states[0], row_id, result, result_idx);
         | 
| 290 273 |  | 
| 291 274 | 
             
            	auto &validity = FlatVector::Validity(result);
         | 
| 292 275 | 
             
            	auto list_data = FlatVector::GetData<list_entry_t>(result);
         | 
| 293 276 | 
             
            	auto &list_entry = list_data[result_idx];
         | 
| 294 | 
            -
            	auto original_offset = list_entry.offset;
         | 
| 295 277 | 
             
            	// set the list entry offset to the size of the current list
         | 
| 296 278 | 
             
            	list_entry.offset = ListVector::GetListSize(result);
         | 
| 279 | 
            +
            	list_entry.length = end_offset - start_offset;
         | 
| 297 280 | 
             
            	if (!validity.RowIsValid(result_idx)) {
         | 
| 298 281 | 
             
            		// the list is NULL! no need to fetch the child
         | 
| 299 282 | 
             
            		D_ASSERT(list_entry.length == 0);
         | 
| @@ -307,7 +290,7 @@ void ListColumnData::FetchRow(TransactionData transaction, ColumnFetchState &sta | |
| 307 290 | 
             
            		auto &child_type = ListType::GetChildType(result.GetType());
         | 
| 308 291 | 
             
            		Vector child_scan(child_type, child_scan_count);
         | 
| 309 292 | 
             
            		// seek the scan towards the specified position and read [length] entries
         | 
| 310 | 
            -
            		child_column->InitializeScanWithOffset(*child_state, start +  | 
| 293 | 
            +
            		child_column->InitializeScanWithOffset(*child_state, start + start_offset);
         | 
| 311 294 | 
             
            		D_ASSERT(child_type.InternalType() == PhysicalType::STRUCT ||
         | 
| 312 295 | 
             
            		         child_state->row_index + child_scan_count - this->start <= child_column->GetMaxEntry());
         | 
| 313 296 | 
             
            		child_column->ScanCount(*child_state, child_scan, child_scan_count);
         | 
| @@ -324,7 +307,7 @@ void ListColumnData::CommitDropColumn() { | |
| 324 307 | 
             
            struct ListColumnCheckpointState : public ColumnCheckpointState {
         | 
| 325 308 | 
             
            	ListColumnCheckpointState(RowGroup &row_group, ColumnData &column_data, PartialBlockManager &partial_block_manager)
         | 
| 326 309 | 
             
            	    : ColumnCheckpointState(row_group, column_data, partial_block_manager) {
         | 
| 327 | 
            -
            		global_stats =  | 
| 310 | 
            +
            		global_stats = ListStats::CreateEmpty(column_data.type).ToUnique();
         | 
| 328 311 | 
             
            	}
         | 
| 329 312 |  | 
| 330 313 | 
             
            	unique_ptr<ColumnCheckpointState> validity_state;
         | 
| @@ -333,10 +316,8 @@ struct ListColumnCheckpointState : public ColumnCheckpointState { | |
| 333 316 | 
             
            public:
         | 
| 334 317 | 
             
            	unique_ptr<BaseStatistics> GetStatistics() override {
         | 
| 335 318 | 
             
            		auto stats = global_stats->Copy();
         | 
| 336 | 
            -
            		 | 
| 337 | 
            -
            		stats | 
| 338 | 
            -
            		list_stats.child_stats = child_state->GetStatistics();
         | 
| 339 | 
            -
            		return stats;
         | 
| 319 | 
            +
            		ListStats::SetChildStats(stats, child_state->GetStatistics());
         | 
| 320 | 
            +
            		return stats.ToUnique();
         | 
| 340 321 | 
             
            	}
         | 
| 341 322 |  | 
| 342 323 | 
             
            	void WriteDataPointers(RowGroupWriter &writer) override {
         | 
| @@ -376,6 +357,7 @@ void ListColumnData::DeserializeColumn(Deserializer &source) { | |
| 376 357 | 
             
            }
         | 
| 377 358 |  | 
| 378 359 | 
             
            void ListColumnData::GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, TableStorageInfo &result) {
         | 
| 360 | 
            +
            	ColumnData::GetStorageInfo(row_group_index, col_path, result);
         | 
| 379 361 | 
             
            	col_path.push_back(0);
         | 
| 380 362 | 
             
            	validity.GetStorageInfo(row_group_index, col_path, result);
         | 
| 381 363 | 
             
            	col_path.back() = 1;
         | 
| @@ -3,7 +3,8 @@ | |
| 3 3 |  | 
| 4 4 | 
             
            namespace duckdb {
         | 
| 5 5 |  | 
| 6 | 
            -
            PersistentTableData::PersistentTableData(idx_t column_count) | 
| 6 | 
            +
            PersistentTableData::PersistentTableData(idx_t column_count)
         | 
| 7 | 
            +
                : total_rows(0), row_group_count(0), block_id(INVALID_BLOCK), offset(0) {
         | 
| 7 8 | 
             
            }
         | 
| 8 9 |  | 
| 9 10 | 
             
            PersistentTableData::~PersistentTableData() {
         |