duckdb 0.7.2-dev2867.0 → 0.7.2-dev3117.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +5 -1
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
- package/src/duckdb/extension/json/include/json_serializer.hpp +8 -1
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +1 -3
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -3
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +3 -2
- package/src/duckdb/extension/parquet/parquet-extension.cpp +9 -7
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +18 -7
- package/src/duckdb/src/catalog/default/default_functions.cpp +2 -0
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -3
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +2 -2
- package/src/duckdb/src/common/enum_util.cpp +5908 -0
- package/src/duckdb/src/common/enums/expression_type.cpp +216 -4
- package/src/duckdb/src/common/enums/join_type.cpp +6 -5
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/exception.cpp +1 -1
- package/src/duckdb/src/common/exception_format_value.cpp +2 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +14 -0
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +143 -0
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +160 -0
- package/src/duckdb/src/common/sort/partition_state.cpp +1 -1
- package/src/duckdb/src/common/string_util.cpp +6 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +3 -3
- package/src/duckdb/src/common/types.cpp +11 -10
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +4 -4
- package/src/duckdb/src/core_functions/function_list.cpp +2 -0
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -1
- package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +2 -3
- package/src/duckdb/src/core_functions/scalar/string/format_bytes.cpp +29 -0
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +3 -3
- package/src/duckdb/src/execution/index/art/art.cpp +5 -1
- package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +2 -2
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +65 -45
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +17 -11
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -39
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +10 -9
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +6 -21
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +13 -13
- package/src/duckdb/src/execution/operator/helper/physical_limit_percent.cpp +15 -14
- package/src/duckdb/src/execution/operator/helper/physical_load.cpp +3 -2
- package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +4 -4
- package/src/duckdb/src/execution/operator/helper/physical_pragma.cpp +4 -2
- package/src/duckdb/src/execution/operator/helper/physical_prepare.cpp +4 -2
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +10 -8
- package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +7 -6
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +2 -1
- package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +4 -2
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +8 -8
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +17 -16
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +12 -9
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/join/physical_cross_product.cpp +3 -4
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +16 -15
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +13 -12
- package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +12 -10
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +13 -11
- package/src/duckdb/src/execution/operator/join/physical_positional_join.cpp +8 -6
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +13 -13
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +8 -8
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +165 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +160 -145
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +11 -26
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +14 -19
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +7 -6
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +18 -30
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +14 -18
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +6 -4
- package/src/duckdb/src/execution/operator/scan/physical_dummy_scan.cpp +4 -19
- package/src/duckdb/src/execution/operator/scan/physical_empty_result.cpp +3 -2
- package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +14 -5
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +6 -4
- package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +3 -19
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +13 -25
- package/src/duckdb/src/execution/operator/schema/physical_create_function.cpp +4 -19
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +8 -9
- package/src/duckdb/src/execution/operator/schema/physical_create_schema.cpp +4 -19
- package/src/duckdb/src/execution/operator/schema/physical_create_sequence.cpp +4 -19
- package/src/duckdb/src/execution/operator/schema/physical_create_table.cpp +4 -19
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +9 -26
- package/src/duckdb/src/execution/operator/schema/physical_create_view.cpp +4 -19
- package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +4 -19
- package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +3 -19
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +9 -8
- package/src/duckdb/src/execution/operator/set/physical_union.cpp +1 -1
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +2 -2
- package/src/duckdb/src/execution/physical_operator.cpp +11 -5
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +16 -16
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +5 -4
- package/src/duckdb/src/function/table/arrow_conversion.cpp +3 -3
- package/src/duckdb/src/function/table/copy_csv.cpp +85 -29
- package/src/duckdb/src/function/table/read_csv.cpp +17 -11
- package/src/duckdb/src/function/table/system/duckdb_settings.cpp +2 -1
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +2 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +958 -0
- package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +16 -4
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +44 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +45 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +93 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +92 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/vector.hpp +61 -14
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +15 -0
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +10 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +8 -11
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -7
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +2 -5
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit_percent.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_load.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_pragma.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_prepare.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reset.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_transaction.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_positional_join.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +68 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +3 -5
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_delete.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_dummy_scan.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_empty_result.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_alter.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_attach.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_function.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_schema.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_sequence.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_table.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +2 -5
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_view.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_drop.hpp +1 -3
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +2 -4
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +7 -4
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +26 -6
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +6 -6
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +32 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -2
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +0 -1
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/database.hpp +1 -3
- package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +24 -0
- package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +63 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +16 -3
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +51 -7
- package/src/duckdb/src/include/duckdb/parallel/task.hpp +21 -2
- package/src/duckdb/src/include/duckdb/parallel/task_counter.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/optimistic_data_writer.hpp +46 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +24 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +46 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +9 -10
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -2
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +9 -34
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +26 -0
- package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +25 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +6 -0
- package/src/duckdb/src/main/client_context.cpp +1 -0
- package/src/duckdb/src/main/client_verify.cpp +5 -0
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +22 -34
- package/src/duckdb/src/main/database_path_and_type.cpp +23 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +19 -15
- package/src/duckdb/src/main/relation/join_relation.cpp +2 -1
- package/src/duckdb/src/main/relation/setop_relation.cpp +2 -3
- package/src/duckdb/src/parallel/event.cpp +1 -1
- package/src/duckdb/src/parallel/executor.cpp +39 -3
- package/src/duckdb/src/parallel/executor_task.cpp +11 -0
- package/src/duckdb/src/parallel/interrupt.cpp +57 -0
- package/src/duckdb/src/parallel/pipeline.cpp +49 -6
- package/src/duckdb/src/parallel/pipeline_executor.cpp +248 -69
- package/src/duckdb/src/parallel/pipeline_initialize_event.cpp +1 -1
- package/src/duckdb/src/parallel/task_scheduler.cpp +57 -22
- package/src/duckdb/src/parser/base_expression.cpp +6 -0
- package/src/duckdb/src/parser/expression/window_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +2 -2
- package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
- package/src/duckdb/src/parser/result_modifier.cpp +2 -2
- package/src/duckdb/src/parser/statement/select_statement.cpp +0 -44
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -3
- package/src/duckdb/src/parser/tableref.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +6 -0
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +4 -1
- package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +17 -3
- package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +8 -2
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +7 -0
- package/src/duckdb/src/planner/operator/logical_aggregate.cpp +14 -2
- package/src/duckdb/src/planner/operator/logical_column_data_get.cpp +11 -0
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -2
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +11 -0
- package/src/duckdb/src/planner/operator/logical_delete.cpp +10 -0
- package/src/duckdb/src/planner/operator/logical_delim_get.cpp +12 -1
- package/src/duckdb/src/planner/operator/logical_dummy_scan.cpp +12 -1
- package/src/duckdb/src/planner/operator/logical_expression_get.cpp +12 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +10 -4
- package/src/duckdb/src/planner/operator/logical_insert.cpp +12 -1
- package/src/duckdb/src/planner/operator/logical_pivot.cpp +11 -0
- package/src/duckdb/src/planner/operator/logical_projection.cpp +11 -0
- package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +11 -0
- package/src/duckdb/src/planner/operator/logical_set_operation.cpp +11 -0
- package/src/duckdb/src/planner/operator/logical_unnest.cpp +12 -1
- package/src/duckdb/src/planner/operator/logical_update.cpp +10 -0
- package/src/duckdb/src/planner/operator/logical_window.cpp +11 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
- package/src/duckdb/src/storage/data_table.cpp +5 -0
- package/src/duckdb/src/storage/local_storage.cpp +40 -110
- package/src/duckdb/src/storage/optimistic_data_writer.cpp +96 -0
- package/src/duckdb/src/storage/partial_block_manager.cpp +73 -9
- package/src/duckdb/src/storage/single_file_block_manager.cpp +3 -1
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +17 -12
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +3 -0
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +90 -82
- package/src/duckdb/src/storage/table/column_data.cpp +19 -45
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +7 -7
- package/src/duckdb/src/storage/table/column_segment.cpp +1 -1
- package/src/duckdb/src/storage/table/list_column_data.cpp +6 -11
- package/src/duckdb/src/storage/table/row_group.cpp +13 -14
- package/src/duckdb/src/storage/table/row_group_collection.cpp +10 -4
- package/src/duckdb/src/storage/table/standard_column_data.cpp +6 -10
- package/src/duckdb/src/storage/table/struct_column_data.cpp +7 -13
- package/src/duckdb/src/storage/table/update_segment.cpp +0 -25
- package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -6
- package/src/duckdb/src/transaction/commit_state.cpp +4 -4
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +20 -0
- package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +13 -0
- package/src/duckdb/src/verification/statement_verifier.cpp +7 -0
- package/src/duckdb/ub_src_common.cpp +2 -2
- package/src/duckdb/ub_src_common_serializer.cpp +4 -2
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_main.cpp +2 -0
- package/src/duckdb/ub_src_parallel.cpp +2 -0
- package/src/duckdb/ub_src_storage.cpp +2 -0
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +0 -1180
- package/src/duckdb/src/common/vector.cpp +0 -12
- package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +0 -113
@@ -0,0 +1,165 @@
|
|
1
|
+
#include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
|
2
|
+
#include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
|
3
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
4
|
+
#include "duckdb/common/types/batched_data_collection.hpp"
|
5
|
+
#include "duckdb/common/file_system.hpp"
|
6
|
+
#include "duckdb/common/file_opener.hpp"
|
7
|
+
#include "duckdb/common/allocator.hpp"
|
8
|
+
#include <algorithm>
|
9
|
+
|
10
|
+
namespace duckdb {
|
11
|
+
|
12
|
+
PhysicalBatchCopyToFile::PhysicalBatchCopyToFile(vector<LogicalType> types, CopyFunction function_p,
|
13
|
+
unique_ptr<FunctionData> bind_data, idx_t estimated_cardinality)
|
14
|
+
: PhysicalOperator(PhysicalOperatorType::BATCH_COPY_TO_FILE, std::move(types), estimated_cardinality),
|
15
|
+
function(std::move(function_p)), bind_data(std::move(bind_data)) {
|
16
|
+
if (!function.flush_batch || !function.prepare_batch) {
|
17
|
+
throw InternalException(
|
18
|
+
"PhysicalBatchCopyToFile created for copy function that does not have prepare_batch/flush_batch defined");
|
19
|
+
}
|
20
|
+
}
|
21
|
+
|
22
|
+
//===--------------------------------------------------------------------===//
|
23
|
+
// Sink
|
24
|
+
//===--------------------------------------------------------------------===//
|
25
|
+
class BatchCopyToGlobalState : public GlobalSinkState {
|
26
|
+
public:
|
27
|
+
explicit BatchCopyToGlobalState(unique_ptr<GlobalFunctionData> global_state)
|
28
|
+
: rows_copied(0), global_state(std::move(global_state)) {
|
29
|
+
}
|
30
|
+
|
31
|
+
mutex lock;
|
32
|
+
mutex flush_lock;
|
33
|
+
atomic<idx_t> rows_copied;
|
34
|
+
unique_ptr<GlobalFunctionData> global_state;
|
35
|
+
map<idx_t, unique_ptr<PreparedBatchData>> batch_data;
|
36
|
+
};
|
37
|
+
|
38
|
+
class BatchCopyToLocalState : public LocalSinkState {
|
39
|
+
public:
|
40
|
+
explicit BatchCopyToLocalState(unique_ptr<LocalFunctionData> local_state_p)
|
41
|
+
: local_state(std::move(local_state_p)), rows_copied(0), batch_index(0) {
|
42
|
+
}
|
43
|
+
|
44
|
+
unique_ptr<LocalFunctionData> local_state;
|
45
|
+
unique_ptr<ColumnDataCollection> collection;
|
46
|
+
ColumnDataAppendState append_state;
|
47
|
+
idx_t rows_copied;
|
48
|
+
idx_t batch_index;
|
49
|
+
|
50
|
+
void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
|
51
|
+
collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), op.children[0]->types);
|
52
|
+
collection->InitializeAppend(append_state);
|
53
|
+
}
|
54
|
+
};
|
55
|
+
|
56
|
+
SinkResultType PhysicalBatchCopyToFile::Sink(ExecutionContext &context, DataChunk &chunk,
|
57
|
+
OperatorSinkInput &input) const {
|
58
|
+
auto &state = input.local_state.Cast<BatchCopyToLocalState>();
|
59
|
+
if (!state.collection) {
|
60
|
+
state.InitializeCollection(context.client, *this);
|
61
|
+
}
|
62
|
+
state.rows_copied += chunk.size();
|
63
|
+
state.collection->Append(state.append_state, chunk);
|
64
|
+
return SinkResultType::NEED_MORE_INPUT;
|
65
|
+
}
|
66
|
+
|
67
|
+
void PhysicalBatchCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
|
68
|
+
LocalSinkState &lstate) const {
|
69
|
+
auto &state = lstate.Cast<BatchCopyToLocalState>();
|
70
|
+
auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
|
71
|
+
gstate.rows_copied += state.rows_copied;
|
72
|
+
}
|
73
|
+
|
74
|
+
SinkFinalizeType PhysicalBatchCopyToFile::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
75
|
+
GlobalSinkState &gstate_p) const {
|
76
|
+
auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
|
77
|
+
FlushBatchData(context, gstate_p, NumericLimits<int64_t>::Maximum());
|
78
|
+
if (function.copy_to_finalize) {
|
79
|
+
function.copy_to_finalize(context, *bind_data, *gstate.global_state);
|
80
|
+
|
81
|
+
if (use_tmp_file) {
|
82
|
+
PhysicalCopyToFile::MoveTmpFile(context, file_path);
|
83
|
+
}
|
84
|
+
}
|
85
|
+
return SinkFinalizeType::READY;
|
86
|
+
}
|
87
|
+
|
88
|
+
void PhysicalBatchCopyToFile::PrepareBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t batch_index,
|
89
|
+
unique_ptr<ColumnDataCollection> collection) const {
|
90
|
+
auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
|
91
|
+
|
92
|
+
// prepare the batch
|
93
|
+
auto batch_data = function.prepare_batch(context, *bind_data, *gstate.global_state, std::move(collection));
|
94
|
+
// move the batch data to the set of prepared batch data
|
95
|
+
lock_guard<mutex> l(gstate.lock);
|
96
|
+
gstate.batch_data[batch_index] = std::move(batch_data);
|
97
|
+
}
|
98
|
+
|
99
|
+
void PhysicalBatchCopyToFile::FlushBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t min_index) const {
|
100
|
+
auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
|
101
|
+
|
102
|
+
// flush batch data to disk (if there are any to flush)
|
103
|
+
while (true) {
|
104
|
+
// grab the flush lock - we can only call flush_batch with this lock
|
105
|
+
// otherwise the data might end up in the wrong order
|
106
|
+
lock_guard<mutex> l(gstate.flush_lock);
|
107
|
+
unique_ptr<PreparedBatchData> batch_data;
|
108
|
+
{
|
109
|
+
// fetch the next batch to flush (if any)
|
110
|
+
lock_guard<mutex> l(gstate.lock);
|
111
|
+
if (gstate.batch_data.empty()) {
|
112
|
+
// no batch data left to flush
|
113
|
+
break;
|
114
|
+
}
|
115
|
+
auto entry = gstate.batch_data.begin();
|
116
|
+
if (entry->first >= min_index) {
|
117
|
+
// this data is past the min_index - we cannot write it yet
|
118
|
+
break;
|
119
|
+
}
|
120
|
+
if (!entry->second) {
|
121
|
+
// this batch is in process of being prepared but is not ready yet
|
122
|
+
break;
|
123
|
+
}
|
124
|
+
batch_data = std::move(entry->second);
|
125
|
+
gstate.batch_data.erase(entry);
|
126
|
+
}
|
127
|
+
function.flush_batch(context, *bind_data, *gstate.global_state, *batch_data);
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
void PhysicalBatchCopyToFile::NextBatch(ExecutionContext &context, GlobalSinkState &gstate_p,
|
132
|
+
LocalSinkState &lstate) const {
|
133
|
+
auto &state = lstate.Cast<BatchCopyToLocalState>();
|
134
|
+
if (state.collection) {
|
135
|
+
// we finished processing this batch
|
136
|
+
// start flushing data
|
137
|
+
PrepareBatchData(context.client, gstate_p, state.batch_index, std::move(state.collection));
|
138
|
+
FlushBatchData(context.client, gstate_p, lstate.partition_info.min_batch_index.GetIndex());
|
139
|
+
}
|
140
|
+
state.batch_index = lstate.partition_info.batch_index.GetIndex();
|
141
|
+
|
142
|
+
state.InitializeCollection(context.client, *this);
|
143
|
+
}
|
144
|
+
|
145
|
+
unique_ptr<LocalSinkState> PhysicalBatchCopyToFile::GetLocalSinkState(ExecutionContext &context) const {
|
146
|
+
return make_uniq<BatchCopyToLocalState>(function.copy_to_initialize_local(context, *bind_data));
|
147
|
+
}
|
148
|
+
|
149
|
+
unique_ptr<GlobalSinkState> PhysicalBatchCopyToFile::GetGlobalSinkState(ClientContext &context) const {
|
150
|
+
return make_uniq<BatchCopyToGlobalState>(function.copy_to_initialize_global(context, *bind_data, file_path));
|
151
|
+
}
|
152
|
+
|
153
|
+
//===--------------------------------------------------------------------===//
|
154
|
+
// Source
|
155
|
+
//===--------------------------------------------------------------------===//
|
156
|
+
SourceResultType PhysicalBatchCopyToFile::GetData(ExecutionContext &context, DataChunk &chunk,
|
157
|
+
OperatorSourceInput &input) const {
|
158
|
+
auto &g = sink_state->Cast<BatchCopyToGlobalState>();
|
159
|
+
|
160
|
+
chunk.SetCardinality(1);
|
161
|
+
chunk.SetValue(0, 0, Value::BIGINT(g.rows_copied));
|
162
|
+
return SourceResultType::FINISHED;
|
163
|
+
}
|
164
|
+
|
165
|
+
} // namespace duckdb
|
@@ -81,20 +81,35 @@ public:
|
|
81
81
|
}
|
82
82
|
auto new_row_group = new_collection->Append(scan_chunk, append_state);
|
83
83
|
if (new_row_group) {
|
84
|
-
writer.
|
84
|
+
writer.WriteNewRowGroup(*new_collection);
|
85
85
|
}
|
86
86
|
}
|
87
87
|
}
|
88
|
-
|
89
88
|
new_collection->FinalizeAppend(TransactionData(0, 0), append_state);
|
90
|
-
writer.
|
89
|
+
writer.WriteLastRowGroup(*new_collection);
|
91
90
|
}
|
92
91
|
current_collections.clear();
|
93
92
|
return new_collection;
|
94
93
|
}
|
95
94
|
};
|
96
95
|
|
96
|
+
enum class RowGroupBatchType : uint8_t { FLUSHED, NOT_FLUSHED };
|
97
|
+
struct RowGroupBatchEntry {
|
98
|
+
RowGroupBatchEntry(idx_t batch_idx, unique_ptr<RowGroupCollection> collection_p, RowGroupBatchType type)
|
99
|
+
: batch_idx(batch_idx), total_rows(collection_p->GetTotalRows()), collection(std::move(collection_p)),
|
100
|
+
type(type) {
|
101
|
+
}
|
102
|
+
|
103
|
+
idx_t batch_idx;
|
104
|
+
idx_t total_rows;
|
105
|
+
unique_ptr<RowGroupCollection> collection;
|
106
|
+
RowGroupBatchType type;
|
107
|
+
};
|
108
|
+
|
97
109
|
class BatchInsertGlobalState : public GlobalSinkState {
|
110
|
+
public:
|
111
|
+
static constexpr const idx_t BATCH_FLUSH_THRESHOLD = LocalStorage::MERGE_THRESHOLD * 3;
|
112
|
+
|
98
113
|
public:
|
99
114
|
explicit BatchInsertGlobalState(DuckTableEntry &table) : table(table), insert_count(0) {
|
100
115
|
}
|
@@ -102,38 +117,60 @@ public:
|
|
102
117
|
mutex lock;
|
103
118
|
DuckTableEntry &table;
|
104
119
|
idx_t insert_count;
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
120
|
+
vector<RowGroupBatchEntry> collections;
|
121
|
+
idx_t next_start = 0;
|
122
|
+
|
123
|
+
void FindMergeCollections(idx_t min_batch_index, optional_idx &merged_batch_index,
|
124
|
+
vector<unique_ptr<RowGroupCollection>> &result) {
|
125
|
+
bool merge = false;
|
126
|
+
idx_t start_index = next_start;
|
127
|
+
idx_t current_idx;
|
128
|
+
idx_t total_count = 0;
|
129
|
+
for (current_idx = start_index; current_idx < collections.size(); current_idx++) {
|
130
|
+
auto &entry = collections[current_idx];
|
131
|
+
if (entry.batch_idx >= min_batch_index) {
|
132
|
+
// this entry is AFTER the min_batch_index
|
133
|
+
// we might still find new entries!
|
134
|
+
break;
|
135
|
+
}
|
136
|
+
if (entry.type == RowGroupBatchType::FLUSHED) {
|
137
|
+
// already flushed: cannot flush anything here
|
138
|
+
if (total_count > 0) {
|
139
|
+
merge = true;
|
140
|
+
break;
|
141
|
+
}
|
142
|
+
start_index = current_idx + 1;
|
143
|
+
if (start_index > next_start) {
|
144
|
+
// avoid checking this segment again in the future
|
145
|
+
next_start = start_index;
|
146
|
+
}
|
147
|
+
total_count = 0;
|
148
|
+
continue;
|
149
|
+
}
|
150
|
+
// not flushed - add to set of indexes to flush
|
151
|
+
total_count += entry.total_rows;
|
152
|
+
if (total_count >= BATCH_FLUSH_THRESHOLD) {
|
153
|
+
merge = true;
|
154
|
+
break;
|
155
|
+
}
|
117
156
|
}
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
157
|
+
if (merge && total_count > 0) {
|
158
|
+
D_ASSERT(current_idx > start_index);
|
159
|
+
merged_batch_index = collections[start_index].batch_idx;
|
160
|
+
for (idx_t idx = start_index; idx < current_idx; idx++) {
|
161
|
+
auto &entry = collections[idx];
|
162
|
+
if (!entry.collection || entry.type == RowGroupBatchType::FLUSHED) {
|
163
|
+
throw InternalException("Adding a row group collection that should not be flushed");
|
164
|
+
}
|
165
|
+
result.push_back(std::move(entry.collection));
|
166
|
+
entry.total_rows = total_count;
|
167
|
+
entry.type = RowGroupBatchType::FLUSHED;
|
168
|
+
}
|
169
|
+
if (start_index + 1 < current_idx) {
|
170
|
+
// erase all entries except the first one
|
171
|
+
collections.erase(collections.begin() + start_index + 1, collections.begin() + current_idx);
|
172
|
+
}
|
128
173
|
}
|
129
|
-
return true;
|
130
|
-
}
|
131
|
-
|
132
|
-
bool CheckMerge(idx_t batch_index, idx_t &merge_count) {
|
133
|
-
return CheckMergeInternal(batch_index, nullptr, &merge_count);
|
134
|
-
}
|
135
|
-
bool CheckMerge(idx_t batch_index, vector<unique_ptr<RowGroupCollection>> &result) {
|
136
|
-
return CheckMergeInternal(batch_index, &result, nullptr);
|
137
174
|
}
|
138
175
|
|
139
176
|
unique_ptr<RowGroupCollection> MergeCollections(ClientContext &context,
|
@@ -146,83 +183,65 @@ public:
|
|
146
183
|
return merger.Flush(writer);
|
147
184
|
}
|
148
185
|
|
149
|
-
void
|
150
|
-
|
151
|
-
throw InternalException("PhysicalBatchInsert::AddCollection error: batch index %d is present in multiple "
|
152
|
-
"collections. This occurs when "
|
153
|
-
"batch indexes are not uniquely distributed over threads",
|
154
|
-
batch_index);
|
155
|
-
}
|
156
|
-
}
|
157
|
-
|
158
|
-
void AddCollection(ClientContext &context, idx_t batch_index, unique_ptr<RowGroupCollection> current_collection,
|
186
|
+
void AddCollection(ClientContext &context, idx_t batch_index, idx_t min_batch_index,
|
187
|
+
unique_ptr<RowGroupCollection> current_collection,
|
159
188
|
optional_ptr<OptimisticDataWriter> writer = nullptr,
|
160
189
|
optional_ptr<bool> written_to_disk = nullptr) {
|
190
|
+
if (batch_index < min_batch_index) {
|
191
|
+
throw InternalException(
|
192
|
+
"Batch index of the added collection (%llu) is smaller than the min batch index (%llu)", batch_index,
|
193
|
+
min_batch_index);
|
194
|
+
}
|
195
|
+
auto new_count = current_collection->GetTotalRows();
|
196
|
+
auto batch_type =
|
197
|
+
new_count < RowGroup::ROW_GROUP_SIZE ? RowGroupBatchType::NOT_FLUSHED : RowGroupBatchType::FLUSHED;
|
198
|
+
if (batch_type == RowGroupBatchType::FLUSHED && writer) {
|
199
|
+
writer->WriteLastRowGroup(*current_collection);
|
200
|
+
}
|
201
|
+
optional_idx merged_batch_index;
|
161
202
|
vector<unique_ptr<RowGroupCollection>> merge_collections;
|
162
|
-
idx_t merge_count;
|
163
203
|
{
|
164
204
|
lock_guard<mutex> l(lock);
|
165
|
-
auto new_count = current_collection->GetTotalRows();
|
166
205
|
insert_count += new_count;
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
}
|
181
|
-
}
|
182
|
-
// check forwards
|
183
|
-
for (end_batch_index = batch_index;; end_batch_index++) {
|
184
|
-
if (!CheckMerge(end_batch_index + 1, merge_count)) {
|
185
|
-
break;
|
186
|
-
}
|
187
|
-
}
|
188
|
-
// merging together creates a big enough row group
|
189
|
-
// merge!
|
190
|
-
if (merge_count >= RowGroup::ROW_GROUP_SIZE) {
|
191
|
-
// gather the row groups to merge
|
192
|
-
// note that we need to gather them in order of batch index
|
193
|
-
for (idx_t i = start_batch_index; i <= end_batch_index; i++) {
|
194
|
-
if (i == batch_index) {
|
195
|
-
merge_collections.push_back(std::move(current_collection));
|
196
|
-
current_collection.reset();
|
197
|
-
continue;
|
198
|
-
}
|
199
|
-
auto can_merge = CheckMerge(i, merge_collections);
|
200
|
-
if (!can_merge) {
|
201
|
-
throw InternalException("Could not merge row group in batch insert?!");
|
202
|
-
}
|
203
|
-
}
|
204
|
-
}
|
206
|
+
|
207
|
+
// add the collection to the batch index
|
208
|
+
RowGroupBatchEntry new_entry(batch_index, std::move(current_collection), batch_type);
|
209
|
+
|
210
|
+
auto it = std::lower_bound(
|
211
|
+
collections.begin(), collections.end(), new_entry,
|
212
|
+
[&](const RowGroupBatchEntry &a, const RowGroupBatchEntry &b) { return a.batch_idx < b.batch_idx; });
|
213
|
+
if (it != collections.end() && it->batch_idx == new_entry.batch_idx) {
|
214
|
+
throw InternalException(
|
215
|
+
"PhysicalBatchInsert::AddCollection error: batch index %d is present in multiple "
|
216
|
+
"collections. This occurs when "
|
217
|
+
"batch indexes are not uniquely distributed over threads",
|
218
|
+
batch_index);
|
205
219
|
}
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
current_collection.reset();
|
220
|
+
collections.insert(it, std::move(new_entry));
|
221
|
+
if (writer) {
|
222
|
+
FindMergeCollections(min_batch_index, merged_batch_index, merge_collections);
|
210
223
|
}
|
211
224
|
}
|
212
225
|
if (!merge_collections.empty()) {
|
213
226
|
// merge together the collections
|
214
227
|
D_ASSERT(writer);
|
215
228
|
auto final_collection = MergeCollections(context, std::move(merge_collections), *writer);
|
216
|
-
D_ASSERT(final_collection->GetTotalRows() == merge_count);
|
217
|
-
D_ASSERT(final_collection->GetTotalRows() >= RowGroup::ROW_GROUP_SIZE);
|
218
229
|
if (written_to_disk) {
|
219
230
|
*written_to_disk = true;
|
220
231
|
}
|
221
|
-
// add the merged-together collection to the
|
232
|
+
// add the merged-together collection to the set of batch indexes
|
222
233
|
{
|
223
234
|
lock_guard<mutex> l(lock);
|
224
|
-
|
225
|
-
|
235
|
+
RowGroupBatchEntry new_entry(merged_batch_index.GetIndex(), std::move(final_collection),
|
236
|
+
RowGroupBatchType::FLUSHED);
|
237
|
+
auto it = std::lower_bound(collections.begin(), collections.end(), new_entry,
|
238
|
+
[&](const RowGroupBatchEntry &a, const RowGroupBatchEntry &b) {
|
239
|
+
return a.batch_idx < b.batch_idx;
|
240
|
+
});
|
241
|
+
if (it->batch_idx != merged_batch_index.GetIndex()) {
|
242
|
+
throw InternalException("Merged batch index was no longer present in collection");
|
243
|
+
}
|
244
|
+
it->collection = std::move(new_entry.collection);
|
226
245
|
}
|
227
246
|
}
|
228
247
|
}
|
@@ -244,16 +263,6 @@ public:
|
|
244
263
|
optional_ptr<OptimisticDataWriter> writer;
|
245
264
|
bool written_to_disk;
|
246
265
|
|
247
|
-
void FlushToDisk() {
|
248
|
-
if (!current_collection) {
|
249
|
-
return;
|
250
|
-
}
|
251
|
-
if (!written_to_disk && current_collection->GetTotalRows() < LocalStorage::MERGE_THRESHOLD) {
|
252
|
-
return;
|
253
|
-
}
|
254
|
-
writer->FlushToDisk(*current_collection, true);
|
255
|
-
}
|
256
|
-
|
257
266
|
void CreateNewCollection(DuckTableEntry &table, const vector<LogicalType> &insert_types) {
|
258
267
|
auto &table_info = table.GetStorage().info;
|
259
268
|
auto &block_manager = TableIOManager::Get(table.GetStorage()).GetBlockManagerForRowData();
|
@@ -286,35 +295,50 @@ unique_ptr<LocalSinkState> PhysicalBatchInsert::GetLocalSinkState(ExecutionConte
|
|
286
295
|
return make_uniq<BatchInsertLocalState>(context.client, insert_types, bound_defaults);
|
287
296
|
}
|
288
297
|
|
289
|
-
|
290
|
-
DataChunk &chunk) const {
|
298
|
+
void PhysicalBatchInsert::NextBatch(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p) const {
|
291
299
|
auto &gstate = state.Cast<BatchInsertGlobalState>();
|
292
300
|
auto &lstate = lstate_p.Cast<BatchInsertLocalState>();
|
293
301
|
|
302
|
+
auto &table = gstate.table;
|
303
|
+
auto batch_index = lstate.partition_info.batch_index.GetIndex();
|
304
|
+
if (lstate.current_collection) {
|
305
|
+
if (lstate.current_index == batch_index) {
|
306
|
+
throw InternalException("NextBatch called with the same batch index?");
|
307
|
+
}
|
308
|
+
// batch index has changed: move the old collection to the global state and create a new collection
|
309
|
+
TransactionData tdata(0, 0);
|
310
|
+
lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
|
311
|
+
gstate.AddCollection(context.client, lstate.current_index, lstate.partition_info.min_batch_index.GetIndex(),
|
312
|
+
std::move(lstate.current_collection), lstate.writer, &lstate.written_to_disk);
|
313
|
+
lstate.CreateNewCollection(table, insert_types);
|
314
|
+
}
|
315
|
+
lstate.current_index = batch_index;
|
316
|
+
}
|
317
|
+
|
318
|
+
SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
|
319
|
+
auto &gstate = input.global_state.Cast<BatchInsertGlobalState>();
|
320
|
+
auto &lstate = input.local_state.Cast<BatchInsertLocalState>();
|
321
|
+
|
294
322
|
auto &table = gstate.table;
|
295
323
|
PhysicalInsert::ResolveDefaults(table, chunk, column_index_map, lstate.default_executor, lstate.insert_chunk);
|
296
324
|
|
325
|
+
auto batch_index = lstate.partition_info.batch_index.GetIndex();
|
297
326
|
if (!lstate.current_collection) {
|
298
327
|
lock_guard<mutex> l(gstate.lock);
|
299
328
|
// no collection yet: create a new one
|
300
329
|
lstate.CreateNewCollection(table, insert_types);
|
301
330
|
lstate.writer = &table.GetStorage().CreateOptimisticWriter(context.client);
|
302
|
-
} else if (lstate.current_index !=
|
303
|
-
|
304
|
-
TransactionData tdata(0, 0);
|
305
|
-
lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
|
306
|
-
lstate.FlushToDisk();
|
307
|
-
gstate.AddCollection(context.client, lstate.current_index, std::move(lstate.current_collection), lstate.writer,
|
308
|
-
&lstate.written_to_disk);
|
309
|
-
lstate.CreateNewCollection(table, insert_types);
|
331
|
+
} else if (lstate.current_index != batch_index) {
|
332
|
+
throw InternalException("Current batch differs from batch - but NextBatch was not called!?");
|
310
333
|
}
|
311
|
-
lstate.current_index =
|
334
|
+
lstate.current_index = batch_index;
|
312
335
|
|
313
336
|
table.GetStorage().VerifyAppendConstraints(table, context.client, lstate.insert_chunk);
|
314
337
|
|
315
338
|
auto new_row_group = lstate.current_collection->Append(lstate.insert_chunk, lstate.current_append_state);
|
316
339
|
if (new_row_group) {
|
317
|
-
|
340
|
+
// we have already written to disk - flush the next row group as well
|
341
|
+
lstate.writer->WriteNewRowGroup(*lstate.current_collection);
|
318
342
|
lstate.written_to_disk = true;
|
319
343
|
}
|
320
344
|
return SinkResultType::NEED_MORE_INPUT;
|
@@ -331,12 +355,17 @@ void PhysicalBatchInsert::Combine(ExecutionContext &context, GlobalSinkState &gs
|
|
331
355
|
if (!lstate.current_collection) {
|
332
356
|
return;
|
333
357
|
}
|
334
|
-
lstate.FlushToDisk();
|
335
|
-
lstate.writer->FinalFlush();
|
336
358
|
|
337
|
-
|
338
|
-
|
339
|
-
|
359
|
+
if (lstate.current_collection->GetTotalRows() > 0) {
|
360
|
+
TransactionData tdata(0, 0);
|
361
|
+
lstate.current_collection->FinalizeAppend(tdata, lstate.current_append_state);
|
362
|
+
gstate.AddCollection(context.client, lstate.current_index, lstate.partition_info.min_batch_index.GetIndex(),
|
363
|
+
std::move(lstate.current_collection));
|
364
|
+
}
|
365
|
+
{
|
366
|
+
lock_guard<mutex> l(gstate.lock);
|
367
|
+
gstate.table.GetStorage().FinalizeOptimisticWriter(context.client, *lstate.writer);
|
368
|
+
}
|
340
369
|
}
|
341
370
|
|
342
371
|
SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
@@ -349,15 +378,15 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
|
|
349
378
|
unique_ptr<CollectionMerger> current_merger;
|
350
379
|
|
351
380
|
auto &storage = gstate.table.GetStorage();
|
352
|
-
for (auto &
|
353
|
-
if (
|
354
|
-
// this collection has
|
381
|
+
for (auto &entry : gstate.collections) {
|
382
|
+
if (entry.type == RowGroupBatchType::NOT_FLUSHED) {
|
383
|
+
// this collection has not been flushed: add it to the merge set
|
355
384
|
if (!current_merger) {
|
356
385
|
current_merger = make_uniq<CollectionMerger>(context);
|
357
386
|
}
|
358
|
-
current_merger->AddCollection(std::move(collection
|
387
|
+
current_merger->AddCollection(std::move(entry.collection));
|
359
388
|
} else {
|
360
|
-
// this collection has
|
389
|
+
// this collection has been flushed: it does not need to be merged
|
361
390
|
// create a separate collection merger only for this entry
|
362
391
|
if (current_merger) {
|
363
392
|
// we have small collections remaining: flush them
|
@@ -365,7 +394,7 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
|
|
365
394
|
current_merger.reset();
|
366
395
|
}
|
367
396
|
auto larger_merger = make_uniq<CollectionMerger>(context);
|
368
|
-
larger_merger->AddCollection(std::move(collection
|
397
|
+
larger_merger->AddCollection(std::move(entry.collection));
|
369
398
|
mergers.push_back(std::move(larger_merger));
|
370
399
|
}
|
371
400
|
}
|
@@ -380,7 +409,7 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
|
|
380
409
|
for (auto &merger : mergers) {
|
381
410
|
final_collections.push_back(merger->Flush(writer));
|
382
411
|
}
|
383
|
-
|
412
|
+
storage.FinalizeOptimisticWriter(context, writer);
|
384
413
|
|
385
414
|
// finally, merge the row groups into the local storage
|
386
415
|
for (auto &collection : final_collections) {
|
@@ -392,29 +421,15 @@ SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event,
|
|
392
421
|
//===--------------------------------------------------------------------===//
|
393
422
|
// Source
|
394
423
|
//===--------------------------------------------------------------------===//
|
395
|
-
class BatchInsertSourceState : public GlobalSourceState {
|
396
|
-
public:
|
397
|
-
explicit BatchInsertSourceState() : finished(false) {
|
398
|
-
}
|
399
424
|
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
unique_ptr<GlobalSourceState> PhysicalBatchInsert::GetGlobalSourceState(ClientContext &context) const {
|
404
|
-
return make_uniq<BatchInsertSourceState>();
|
405
|
-
}
|
406
|
-
|
407
|
-
void PhysicalBatchInsert::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
|
408
|
-
LocalSourceState &lstate) const {
|
409
|
-
auto &state = gstate.Cast<BatchInsertSourceState>();
|
425
|
+
SourceResultType PhysicalBatchInsert::GetData(ExecutionContext &context, DataChunk &chunk,
|
426
|
+
OperatorSourceInput &input) const {
|
410
427
|
auto &insert_gstate = sink_state->Cast<BatchInsertGlobalState>();
|
411
|
-
|
412
|
-
return;
|
413
|
-
}
|
428
|
+
|
414
429
|
chunk.SetCardinality(1);
|
415
430
|
chunk.SetValue(0, 0, Value::BIGINT(insert_gstate.insert_count));
|
416
|
-
|
417
|
-
return;
|
431
|
+
|
432
|
+
return SourceResultType::FINISHED;
|
418
433
|
}
|
419
434
|
|
420
435
|
} // namespace duckdb
|
@@ -43,7 +43,7 @@ public:
|
|
43
43
|
// Sink
|
44
44
|
//===--------------------------------------------------------------------===//
|
45
45
|
|
46
|
-
void MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
|
46
|
+
void PhysicalCopyToFile::MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
|
47
47
|
auto &fs = FileSystem::GetFileSystem(context);
|
48
48
|
auto file_path = tmp_file_path.substr(0, tmp_file_path.length() - 4);
|
49
49
|
if (fs.FileExists(file_path)) {
|
@@ -58,22 +58,21 @@ PhysicalCopyToFile::PhysicalCopyToFile(vector<LogicalType> types, CopyFunction f
|
|
58
58
|
function(std::move(function_p)), bind_data(std::move(bind_data)), parallel(false) {
|
59
59
|
}
|
60
60
|
|
61
|
-
SinkResultType PhysicalCopyToFile::Sink(ExecutionContext &context,
|
62
|
-
|
63
|
-
auto &
|
64
|
-
auto &l = lstate.Cast<CopyToFunctionLocalState>();
|
61
|
+
SinkResultType PhysicalCopyToFile::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
|
62
|
+
auto &g = input.global_state.Cast<CopyToFunctionGlobalState>();
|
63
|
+
auto &l = input.local_state.Cast<CopyToFunctionLocalState>();
|
65
64
|
|
66
65
|
if (partition_output) {
|
67
|
-
l.part_buffer->Append(*l.part_buffer_append_state,
|
66
|
+
l.part_buffer->Append(*l.part_buffer_append_state, chunk);
|
68
67
|
return SinkResultType::NEED_MORE_INPUT;
|
69
68
|
}
|
70
69
|
|
71
70
|
{
|
72
71
|
lock_guard<mutex> glock(g.lock);
|
73
|
-
g.rows_copied +=
|
72
|
+
g.rows_copied += chunk.size();
|
74
73
|
}
|
75
74
|
function.copy_to_sink(context, *bind_data, per_thread_output ? *l.global_state : *g.global_state, *l.local_state,
|
76
|
-
|
75
|
+
chunk);
|
77
76
|
return SinkResultType::NEED_MORE_INPUT;
|
78
77
|
}
|
79
78
|
|
@@ -231,29 +230,15 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
|
|
231
230
|
//===--------------------------------------------------------------------===//
|
232
231
|
// Source
|
233
232
|
//===--------------------------------------------------------------------===//
|
234
|
-
class CopyToFileState : public GlobalSourceState {
|
235
|
-
public:
|
236
|
-
CopyToFileState() : finished(false) {
|
237
|
-
}
|
238
|
-
|
239
|
-
bool finished;
|
240
|
-
};
|
241
233
|
|
242
|
-
|
243
|
-
|
244
|
-
}
|
245
|
-
|
246
|
-
void PhysicalCopyToFile::GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
|
247
|
-
LocalSourceState &lstate) const {
|
248
|
-
auto &state = (CopyToFileState &)gstate;
|
234
|
+
SourceResultType PhysicalCopyToFile::GetData(ExecutionContext &context, DataChunk &chunk,
|
235
|
+
OperatorSourceInput &input) const {
|
249
236
|
auto &g = sink_state->Cast<CopyToFunctionGlobalState>();
|
250
|
-
if (state.finished) {
|
251
|
-
return;
|
252
|
-
}
|
253
237
|
|
254
238
|
chunk.SetCardinality(1);
|
255
239
|
chunk.SetValue(0, 0, Value::BIGINT(g.rows_copied));
|
256
|
-
|
240
|
+
|
241
|
+
return SourceResultType::FINISHED;
|
257
242
|
}
|
258
243
|
|
259
244
|
} // namespace duckdb
|