duckdb 1.1.2-dev2.0 → 1.1.2-dev6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +4 -2
- package/package.json +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +0 -5
- package/src/duckdb/extension/icu/third_party/icu/common/rbbiscan.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/rbbitblb.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ucurr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uresbund.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uresimp.h +31 -31
- package/src/duckdb/extension/icu/third_party/icu/common/ustring.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +12 -12
- package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/listformatter.cpp +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_decimalquantity.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +28 -28
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +7 -7
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucol.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucoleitr.h +41 -41
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/umsg.h +41 -41
- package/src/duckdb/extension/icu/third_party/icu/i18n/usrchimp.h +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +13 -7
- package/src/duckdb/extension/parquet/column_writer.cpp +2 -1
- package/src/duckdb/extension/parquet/geo_parquet.cpp +24 -9
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +3 -1
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -1
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +0 -4
- package/src/duckdb/extension/parquet/parquet_extension.cpp +20 -6
- package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -2
- package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +0 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +5 -5
- package/src/duckdb/src/common/allocator.cpp +3 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +1 -0
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +11 -0
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +6 -4
- package/src/duckdb/src/common/enum_util.cpp +33 -0
- package/src/duckdb/src/common/exception.cpp +3 -0
- package/src/duckdb/src/common/extra_type_info.cpp +1 -44
- package/src/duckdb/src/common/field_writer.cpp +97 -0
- package/src/duckdb/src/common/render_tree.cpp +7 -5
- package/src/duckdb/src/common/row_operations/row_match.cpp +359 -0
- package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +27 -0
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +36 -0
- package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
- package/src/duckdb/src/common/serializer.cpp +24 -0
- package/src/duckdb/src/common/sort/comparators.cpp +2 -2
- package/src/duckdb/src/common/types/bit.cpp +57 -34
- package/src/duckdb/src/common/types/data_chunk.cpp +32 -29
- package/src/duckdb/src/common/types/vector_cache.cpp +12 -6
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +14 -0
- package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +20 -1
- package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +32 -7
- package/src/duckdb/src/core_functions/function_list.cpp +1 -2
- package/src/duckdb/src/core_functions/scalar/bit/bitstring.cpp +23 -5
- package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +12 -6
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +1 -1
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +4 -3
- package/src/duckdb/src/execution/expression_executor/execute_case.cpp +4 -3
- package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +3 -2
- package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -1
- package/src/duckdb/src/execution/expression_executor/execute_function.cpp +2 -1
- package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +3 -2
- package/src/duckdb/src/execution/expression_executor/execute_reference.cpp +1 -1
- package/src/duckdb/src/execution/expression_executor.cpp +9 -3
- package/src/duckdb/src/execution/expression_executor_state.cpp +11 -9
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +238 -0
- package/src/duckdb/src/execution/index/art/plan_art.cpp +94 -0
- package/src/duckdb/src/execution/index/index_type_set.cpp +4 -1
- package/src/duckdb/src/execution/join_hashtable.cpp +7 -8
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +6 -4
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -4
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +44 -5
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +28 -24
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +25 -26
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +5 -3
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +4 -4
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +73 -27
- package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +695 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1487 -0
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +72 -0
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +280 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +666 -0
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +14 -4
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +207 -0
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +207 -0
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +6 -1
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +14 -87
- package/src/duckdb/src/execution/physical_plan/plan_export.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +1 -1
- package/src/duckdb/src/execution/reservoir_sample.cpp +1 -1
- package/src/duckdb/src/execution/window_executor.cpp +3 -3
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
- package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -2
- package/src/duckdb/src/function/scalar/string/concat.cpp +118 -151
- package/src/duckdb/src/function/table/arrow.cpp +13 -0
- package/src/duckdb/src/function/table/arrow_conversion.cpp +12 -7
- package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
- package/src/duckdb/src/function/table/read_csv.cpp +2 -30
- package/src/duckdb/src/function/table/sniff_csv.cpp +2 -1
- package/src/duckdb/src/function/table/system/duckdb_secrets.cpp +15 -7
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/atomic.hpp +13 -1
- package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +3 -4
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info/enum_type_info.hpp +53 -0
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +5 -5
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +36 -33
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +10 -13
- package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/vector_cache.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/windows_undefs.hpp +2 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +2 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +0 -6
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/index_type.hpp +16 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +91 -36
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -5
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/database.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +20 -22
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -9
- package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +8 -1
- package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -0
- package/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp +5 -5
- package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +15 -5
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_export.hpp +10 -13
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +5 -1
- package/src/duckdb/src/include/duckdb.h +2 -2
- package/src/duckdb/src/main/appender.cpp +3 -0
- package/src/duckdb/src/main/capi/profiling_info-c.cpp +5 -2
- package/src/duckdb/src/main/client_context.cpp +8 -2
- package/src/duckdb/src/main/connection.cpp +1 -1
- package/src/duckdb/src/main/database.cpp +13 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_install.cpp +9 -1
- package/src/duckdb/src/main/extension/extension_load.cpp +3 -2
- package/src/duckdb/src/main/extension_install_info.cpp +1 -1
- package/src/duckdb/src/main/profiling_info.cpp +78 -58
- package/src/duckdb/src/main/query_profiler.cpp +79 -89
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +1 -1
- package/src/duckdb/src/main/secret/secret.cpp +2 -1
- package/src/duckdb/src/main/secret/secret_manager.cpp +14 -0
- package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +4 -2
- package/src/duckdb/src/optimizer/deliminator.cpp +0 -7
- package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +7 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -1
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +21 -21
- package/src/duckdb/src/parallel/task_scheduler.cpp +9 -0
- package/src/duckdb/src/parser/parsed_data/exported_table_data.cpp +22 -0
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +3 -0
- package/src/duckdb/src/parser/statement/insert_statement.cpp +7 -1
- package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +89 -87
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +4 -9
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +4 -0
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +4 -3
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +13 -3
- package/src/duckdb/src/planner/expression_binder.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_export.cpp +28 -0
- package/src/duckdb/src/planner/table_binding.cpp +1 -2
- package/src/duckdb/src/planner/table_filter.cpp +6 -2
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +2 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
- package/src/duckdb/src/storage/compression/bitpacking.cpp +7 -3
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +16 -0
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +29 -0
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +15 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +2 -1
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +3 -5
- package/src/duckdb/src/storage/storage_info.cpp +4 -4
- package/src/duckdb/src/storage/table/row_group_collection.cpp +1 -1
- package/src/duckdb/src/storage/table/row_version_manager.cpp +5 -1
- package/src/duckdb/src/storage/temporary_file_manager.cpp +1 -1
- package/src/duckdb/src/transaction/duck_transaction.cpp +15 -14
- package/src/duckdb/third_party/brotli/common/brotli_platform.h +1 -1
- package/src/duckdb/third_party/brotli/dec/decode.cpp +1 -1
- package/src/duckdb/third_party/brotli/enc/memory.cpp +4 -4
- package/src/duckdb/third_party/fsst/libfsst.cpp +1 -1
- package/src/duckdb/third_party/hyperloglog/sds.cpp +1 -1
- package/src/duckdb/third_party/hyperloglog/sds.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/datatype/timestamp.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/mb/pg_wchar.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/bitmapset.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/lockoptions.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/makefuncs.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/pg_list.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/value.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/parser/gramparse.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/parser/scanner.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/parser/scansup.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/include/pg_functions.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1964 -1964
- package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +1 -1
- package/src/duckdb/third_party/lz4/lz4.cpp +1 -1
- package/src/duckdb/third_party/mbedtls/include/des_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/aes_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/aria_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/asn1write.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/camellia_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ccm_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/chacha20.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/chachapoly.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/cmac.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/config_psa.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ecdsa.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ecp.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/gcm_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/md5.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/nist_kw.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs12.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs5.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/psa_util.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/ripemd160.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/threading.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/mbedtls/timing.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/platform_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/psa/crypto.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/rsa_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/sha1_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/sha256_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/sha512_alt.h +1 -1
- package/src/duckdb/third_party/mbedtls/include/ssl_misc.h +1 -1
- package/src/duckdb/third_party/mbedtls/library/aesni.h +1 -1
- package/src/duckdb/third_party/mbedtls/library/padlock.h +1 -1
- package/src/duckdb/third_party/miniz/miniz.cpp +1 -1
- package/src/duckdb/third_party/parquet/parquet_types.cpp +1 -1
- package/src/duckdb/third_party/parquet/windows_compatibility.h +1 -1
- package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
- package/src/duckdb/third_party/pcg/pcg_uint128.hpp +1 -1
- package/src/duckdb/third_party/skiplist/Node.h +4 -4
- package/src/duckdb/third_party/snappy/snappy.cc +1 -1
- package/src/duckdb/third_party/snappy/snappy_version.hpp +1 -1
- package/src/duckdb/third_party/thrift/thrift/thrift-config.h +1 -1
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1 -1
- package/src/duckdb/third_party/zstd/include/zstd_static.h +1 -1
- package/src/duckdb/ub_src_execution_index_art.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
- package/src/duckdb/ub_src_planner_operator.cpp +2 -0
@@ -19,6 +19,12 @@ using std::stringstream;
|
|
19
19
|
|
20
20
|
void ReorderTableEntries(catalog_entry_vector_t &tables);
|
21
21
|
|
22
|
+
PhysicalExport::PhysicalExport(vector<LogicalType> types, CopyFunction function, unique_ptr<CopyInfo> info,
|
23
|
+
idx_t estimated_cardinality, unique_ptr<BoundExportData> exported_tables)
|
24
|
+
: PhysicalOperator(PhysicalOperatorType::EXPORT, std::move(types), estimated_cardinality),
|
25
|
+
function(std::move(function)), info(std::move(info)), exported_tables(std::move(exported_tables)) {
|
26
|
+
}
|
27
|
+
|
22
28
|
static void WriteCatalogEntries(stringstream &ss, catalog_entry_vector_t &entries) {
|
23
29
|
for (auto &entry : entries) {
|
24
30
|
if (entry.get().internal) {
|
@@ -121,6 +127,10 @@ void PhysicalExport::ExtractEntries(ClientContext &context, vector<reference<Sch
|
|
121
127
|
ExportEntries &result) {
|
122
128
|
for (auto &schema_p : schema_list) {
|
123
129
|
auto &schema = schema_p.get();
|
130
|
+
auto &catalog = schema.ParentCatalog();
|
131
|
+
if (catalog.IsSystemCatalog() || catalog.IsTemporaryCatalog()) {
|
132
|
+
continue;
|
133
|
+
}
|
124
134
|
if (!schema.internal) {
|
125
135
|
result.schemas.push_back(schema);
|
126
136
|
}
|
@@ -225,8 +235,8 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
|
|
225
235
|
|
226
236
|
// consider the order of tables because of foreign key constraint
|
227
237
|
entries.tables.clear();
|
228
|
-
for (idx_t i = 0; i < exported_tables
|
229
|
-
entries.tables.push_back(exported_tables
|
238
|
+
for (idx_t i = 0; i < exported_tables->data.size(); i++) {
|
239
|
+
entries.tables.push_back(exported_tables->data[i].entry);
|
230
240
|
}
|
231
241
|
|
232
242
|
// order macro's by timestamp so nested macro's are imported nicely
|
@@ -252,8 +262,8 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
|
|
252
262
|
// write the load.sql file
|
253
263
|
// for every table, we write COPY INTO statement with the specified options
|
254
264
|
stringstream load_ss;
|
255
|
-
for (idx_t i = 0; i < exported_tables
|
256
|
-
auto exported_table_info = exported_tables
|
265
|
+
for (idx_t i = 0; i < exported_tables->data.size(); i++) {
|
266
|
+
auto exported_table_info = exported_tables->data[i].table_data;
|
257
267
|
WriteCopyStatement(fs, load_ss, *info, exported_table_info, function);
|
258
268
|
}
|
259
269
|
WriteStringStreamToFile(fs, load_ss, fs.JoinPath(info->file_path, "load.sql"));
|
@@ -0,0 +1,207 @@
|
|
1
|
+
#include "duckdb/execution/operator/schema/physical_create_index.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
|
4
|
+
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
5
|
+
#include "duckdb/catalog/catalog_entry/duck_index_entry.hpp"
|
6
|
+
#include "duckdb/main/client_context.hpp"
|
7
|
+
#include "duckdb/storage/index.hpp"
|
8
|
+
#include "duckdb/storage/storage_manager.hpp"
|
9
|
+
#include "duckdb/storage/table/append_state.hpp"
|
10
|
+
#include "duckdb/main/database_manager.hpp"
|
11
|
+
#include "duckdb/execution/index/art/art_key.hpp"
|
12
|
+
#include "duckdb/execution/index/art/node.hpp"
|
13
|
+
#include "duckdb/execution/index/art/leaf.hpp"
|
14
|
+
|
15
|
+
namespace duckdb {
|
16
|
+
|
17
|
+
PhysicalCreateIndex::PhysicalCreateIndex(LogicalOperator &op, TableCatalogEntry &table_p,
|
18
|
+
const vector<column_t> &column_ids, unique_ptr<CreateIndexInfo> info,
|
19
|
+
vector<unique_ptr<Expression>> unbound_expressions,
|
20
|
+
idx_t estimated_cardinality, const bool sorted)
|
21
|
+
: PhysicalOperator(PhysicalOperatorType::CREATE_INDEX, op.types, estimated_cardinality),
|
22
|
+
table(table_p.Cast<DuckTableEntry>()), info(std::move(info)), unbound_expressions(std::move(unbound_expressions)),
|
23
|
+
sorted(sorted) {
|
24
|
+
// convert virtual column ids to storage column ids
|
25
|
+
for (auto &column_id : column_ids) {
|
26
|
+
storage_ids.push_back(table.GetColumns().LogicalToPhysical(LogicalIndex(column_id)).index);
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
30
|
+
//===--------------------------------------------------------------------===//
|
31
|
+
// Sink
|
32
|
+
//===--------------------------------------------------------------------===//
|
33
|
+
|
34
|
+
class CreateIndexGlobalSinkState : public GlobalSinkState {
|
35
|
+
public:
|
36
|
+
//! Global index to be added to the table
|
37
|
+
unique_ptr<Index> global_index;
|
38
|
+
};
|
39
|
+
|
40
|
+
class CreateIndexLocalSinkState : public LocalSinkState {
|
41
|
+
public:
|
42
|
+
explicit CreateIndexLocalSinkState(ClientContext &context) : arena_allocator(Allocator::Get(context)) {};
|
43
|
+
|
44
|
+
unique_ptr<Index> local_index;
|
45
|
+
ArenaAllocator arena_allocator;
|
46
|
+
vector<ARTKey> keys;
|
47
|
+
DataChunk key_chunk;
|
48
|
+
vector<column_t> key_column_ids;
|
49
|
+
};
|
50
|
+
|
51
|
+
unique_ptr<GlobalSinkState> PhysicalCreateIndex::GetGlobalSinkState(ClientContext &context) const {
|
52
|
+
auto state = make_uniq<CreateIndexGlobalSinkState>();
|
53
|
+
|
54
|
+
// create the global index
|
55
|
+
switch (info->index_type) {
|
56
|
+
case IndexType::ART: {
|
57
|
+
auto &storage = table.GetStorage();
|
58
|
+
state->global_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
|
59
|
+
info->constraint_type, storage.db);
|
60
|
+
break;
|
61
|
+
}
|
62
|
+
default:
|
63
|
+
throw InternalException("Unimplemented index type");
|
64
|
+
}
|
65
|
+
return (std::move(state));
|
66
|
+
}
|
67
|
+
|
68
|
+
unique_ptr<LocalSinkState> PhysicalCreateIndex::GetLocalSinkState(ExecutionContext &context) const {
|
69
|
+
auto state = make_uniq<CreateIndexLocalSinkState>(context.client);
|
70
|
+
|
71
|
+
// create the local index
|
72
|
+
switch (info->index_type) {
|
73
|
+
case IndexType::ART: {
|
74
|
+
auto &storage = table.GetStorage();
|
75
|
+
state->local_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
|
76
|
+
info->constraint_type, storage.db);
|
77
|
+
break;
|
78
|
+
}
|
79
|
+
default:
|
80
|
+
throw InternalException("Unimplemented index type");
|
81
|
+
}
|
82
|
+
state->keys = vector<ARTKey>(STANDARD_VECTOR_SIZE);
|
83
|
+
state->key_chunk.Initialize(Allocator::Get(context.client), state->local_index->logical_types);
|
84
|
+
|
85
|
+
for (idx_t i = 0; i < state->key_chunk.ColumnCount(); i++) {
|
86
|
+
state->key_column_ids.push_back(i);
|
87
|
+
}
|
88
|
+
return std::move(state);
|
89
|
+
}
|
90
|
+
|
91
|
+
SinkResultType PhysicalCreateIndex::SinkUnsorted(Vector &row_identifiers, OperatorSinkInput &input) const {
|
92
|
+
|
93
|
+
auto &l_state = input.local_state.Cast<CreateIndexLocalSinkState>();
|
94
|
+
auto count = l_state.key_chunk.size();
|
95
|
+
|
96
|
+
// get the corresponding row IDs
|
97
|
+
row_identifiers.Flatten(count);
|
98
|
+
auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
|
99
|
+
|
100
|
+
// insert the row IDs
|
101
|
+
auto &art = l_state.local_index->Cast<ART>();
|
102
|
+
for (idx_t i = 0; i < count; i++) {
|
103
|
+
if (!art.Insert(*art.tree, l_state.keys[i], 0, row_ids[i])) {
|
104
|
+
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
return SinkResultType::NEED_MORE_INPUT;
|
109
|
+
}
|
110
|
+
|
111
|
+
SinkResultType PhysicalCreateIndex::SinkSorted(Vector &row_identifiers, OperatorSinkInput &input) const {
|
112
|
+
|
113
|
+
auto &l_state = input.local_state.Cast<CreateIndexLocalSinkState>();
|
114
|
+
auto &storage = table.GetStorage();
|
115
|
+
auto &l_index = l_state.local_index;
|
116
|
+
|
117
|
+
// create an ART from the chunk
|
118
|
+
auto art = make_uniq<ART>(l_index->column_ids, l_index->table_io_manager, l_index->unbound_expressions,
|
119
|
+
l_index->constraint_type, storage.db, l_index->Cast<ART>().allocators);
|
120
|
+
if (!art->ConstructFromSorted(l_state.key_chunk.size(), l_state.keys, row_identifiers)) {
|
121
|
+
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
122
|
+
}
|
123
|
+
|
124
|
+
// merge into the local ART
|
125
|
+
if (!l_index->MergeIndexes(*art)) {
|
126
|
+
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
127
|
+
}
|
128
|
+
|
129
|
+
return SinkResultType::NEED_MORE_INPUT;
|
130
|
+
}
|
131
|
+
|
132
|
+
SinkResultType PhysicalCreateIndex::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
|
133
|
+
|
134
|
+
D_ASSERT(chunk.ColumnCount() >= 2);
|
135
|
+
|
136
|
+
// generate the keys for the given input
|
137
|
+
auto &l_state = input.local_state.Cast<CreateIndexLocalSinkState>();
|
138
|
+
l_state.key_chunk.ReferenceColumns(chunk, l_state.key_column_ids);
|
139
|
+
l_state.arena_allocator.Reset();
|
140
|
+
ART::GenerateKeys(l_state.arena_allocator, l_state.key_chunk, l_state.keys);
|
141
|
+
|
142
|
+
// insert the keys and their corresponding row IDs
|
143
|
+
auto &row_identifiers = chunk.data[chunk.ColumnCount() - 1];
|
144
|
+
if (sorted) {
|
145
|
+
return SinkSorted(row_identifiers, input);
|
146
|
+
}
|
147
|
+
return SinkUnsorted(row_identifiers, input);
|
148
|
+
}
|
149
|
+
|
150
|
+
SinkCombineResultType PhysicalCreateIndex::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
|
151
|
+
|
152
|
+
auto &gstate = input.global_state.Cast<CreateIndexGlobalSinkState>();
|
153
|
+
auto &lstate = input.local_state.Cast<CreateIndexLocalSinkState>();
|
154
|
+
|
155
|
+
// merge the local index into the global index
|
156
|
+
if (!gstate.global_index->MergeIndexes(*lstate.local_index)) {
|
157
|
+
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
158
|
+
}
|
159
|
+
|
160
|
+
return SinkCombineResultType::FINISHED;
|
161
|
+
}
|
162
|
+
|
163
|
+
SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
164
|
+
OperatorSinkFinalizeInput &input) const {
|
165
|
+
|
166
|
+
// here, we set the resulting global index as the newly created index of the table
|
167
|
+
auto &state = input.global_state.Cast<CreateIndexGlobalSinkState>();
|
168
|
+
|
169
|
+
// vacuum excess memory and verify
|
170
|
+
state.global_index->Vacuum();
|
171
|
+
D_ASSERT(!state.global_index->VerifyAndToString(true).empty());
|
172
|
+
|
173
|
+
auto &storage = table.GetStorage();
|
174
|
+
if (!storage.IsRoot()) {
|
175
|
+
throw TransactionException("Transaction conflict: cannot add an index to a table that has been altered!");
|
176
|
+
}
|
177
|
+
|
178
|
+
auto &schema = table.schema;
|
179
|
+
auto index_entry = schema.CreateIndex(context, *info, table).get();
|
180
|
+
if (!index_entry) {
|
181
|
+
D_ASSERT(info->on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT);
|
182
|
+
// index already exists, but error ignored because of IF NOT EXISTS
|
183
|
+
return SinkFinalizeType::READY;
|
184
|
+
}
|
185
|
+
auto &index = index_entry->Cast<DuckIndexEntry>();
|
186
|
+
|
187
|
+
index.index = state.global_index.get();
|
188
|
+
index.info = storage.info;
|
189
|
+
for (auto &parsed_expr : info->parsed_expressions) {
|
190
|
+
index.parsed_expressions.push_back(parsed_expr->Copy());
|
191
|
+
}
|
192
|
+
|
193
|
+
// add index to storage
|
194
|
+
storage.info->indexes.AddIndex(std::move(state.global_index));
|
195
|
+
return SinkFinalizeType::READY;
|
196
|
+
}
|
197
|
+
|
198
|
+
//===--------------------------------------------------------------------===//
|
199
|
+
// Source
|
200
|
+
//===--------------------------------------------------------------------===//
|
201
|
+
|
202
|
+
SourceResultType PhysicalCreateIndex::GetData(ExecutionContext &context, DataChunk &chunk,
|
203
|
+
OperatorSourceInput &input) const {
|
204
|
+
return SourceResultType::FINISHED;
|
205
|
+
}
|
206
|
+
|
207
|
+
} // namespace duckdb
|
@@ -0,0 +1,207 @@
|
|
1
|
+
#include "duckdb/execution/partitionable_hashtable.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/radix_partitioning.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
RadixPartitionInfo::RadixPartitionInfo(const idx_t n_partitions_upper_bound)
|
8
|
+
: n_partitions(PreviousPowerOfTwo(n_partitions_upper_bound)),
|
9
|
+
radix_bits(RadixPartitioning::RadixBits(n_partitions)), radix_mask(RadixPartitioning::Mask(radix_bits)),
|
10
|
+
radix_shift(RadixPartitioning::Shift(radix_bits)) {
|
11
|
+
|
12
|
+
D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
|
13
|
+
D_ASSERT(n_partitions > 0);
|
14
|
+
D_ASSERT(n_partitions == RadixPartitioning::NumberOfPartitions(radix_bits));
|
15
|
+
D_ASSERT(IsPowerOfTwo(n_partitions));
|
16
|
+
}
|
17
|
+
|
18
|
+
PartitionableHashTable::PartitionableHashTable(ClientContext &context, Allocator &allocator,
|
19
|
+
RadixPartitionInfo &partition_info_p, vector<LogicalType> group_types_p,
|
20
|
+
vector<LogicalType> payload_types_p,
|
21
|
+
vector<BoundAggregateExpression *> bindings_p)
|
22
|
+
: context(context), allocator(allocator), group_types(std::move(group_types_p)),
|
23
|
+
payload_types(std::move(payload_types_p)), bindings(std::move(bindings_p)), is_partitioned(false),
|
24
|
+
partition_info(partition_info_p), hashes(LogicalType::HASH), hashes_subset(LogicalType::HASH) {
|
25
|
+
|
26
|
+
sel_vectors.resize(partition_info.n_partitions);
|
27
|
+
sel_vector_sizes.resize(partition_info.n_partitions);
|
28
|
+
group_subset.Initialize(allocator, group_types);
|
29
|
+
if (!payload_types.empty()) {
|
30
|
+
payload_subset.Initialize(allocator, payload_types);
|
31
|
+
}
|
32
|
+
|
33
|
+
for (hash_t r = 0; r < partition_info.n_partitions; r++) {
|
34
|
+
sel_vectors[r].Initialize();
|
35
|
+
}
|
36
|
+
|
37
|
+
RowLayout layout;
|
38
|
+
layout.Initialize(group_types, AggregateObject::CreateAggregateObjects(bindings));
|
39
|
+
tuple_size = layout.GetRowWidth();
|
40
|
+
}
|
41
|
+
|
42
|
+
HtEntryType PartitionableHashTable::GetHTEntrySize() {
|
43
|
+
// we need at least STANDARD_VECTOR_SIZE entries to fit in the hash table
|
44
|
+
if (GroupedAggregateHashTable::GetMaxCapacity(HtEntryType::HT_WIDTH_32, tuple_size) < STANDARD_VECTOR_SIZE) {
|
45
|
+
return HtEntryType::HT_WIDTH_64;
|
46
|
+
}
|
47
|
+
return HtEntryType::HT_WIDTH_32;
|
48
|
+
}
|
49
|
+
|
50
|
+
bool OverMemoryLimit(ClientContext &context, const bool is_partitioned, const RadixPartitionInfo &partition_info,
|
51
|
+
const GroupedAggregateHashTable &ht) {
|
52
|
+
const auto n_partitions = is_partitioned ? partition_info.n_partitions : 1;
|
53
|
+
const auto max_memory = BufferManager::GetBufferManager(context).GetMaxMemory();
|
54
|
+
const auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
55
|
+
const auto memory_per_partition = 0.6 * max_memory / num_threads / n_partitions;
|
56
|
+
return ht.TotalSize() > memory_per_partition;
|
57
|
+
}
|
58
|
+
|
59
|
+
idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes,
|
60
|
+
DataChunk &payload, const unsafe_vector<idx_t> &filter) {
|
61
|
+
// If this is false, a single AddChunk would overflow the max capacity
|
62
|
+
D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
|
63
|
+
if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity() ||
|
64
|
+
OverMemoryLimit(context, is_partitioned, partition_info, *list.back())) {
|
65
|
+
idx_t new_capacity = GroupedAggregateHashTable::InitialCapacity();
|
66
|
+
if (!list.empty()) {
|
67
|
+
new_capacity = list.back()->Capacity();
|
68
|
+
// early release first part of ht and prevent adding of more data
|
69
|
+
list.back()->Finalize();
|
70
|
+
}
|
71
|
+
list.push_back(make_uniq<GroupedAggregateHashTable>(context, allocator, group_types, payload_types, bindings,
|
72
|
+
GetHTEntrySize(), new_capacity));
|
73
|
+
}
|
74
|
+
return list.back()->AddChunk(append_state, groups, group_hashes, payload, filter);
|
75
|
+
}
|
76
|
+
|
77
|
+
idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition,
|
78
|
+
const unsafe_vector<idx_t> &filter) {
|
79
|
+
groups.Hash(hashes);
|
80
|
+
|
81
|
+
// we partition when we are asked to or when the unpartitioned ht runs out of space
|
82
|
+
if (!IsPartitioned() && do_partition) {
|
83
|
+
Partition(false);
|
84
|
+
}
|
85
|
+
|
86
|
+
if (!IsPartitioned()) {
|
87
|
+
return ListAddChunk(unpartitioned_hts, groups, hashes, payload, filter);
|
88
|
+
}
|
89
|
+
|
90
|
+
// makes no sense to do this with 1 partition
|
91
|
+
D_ASSERT(partition_info.n_partitions > 0);
|
92
|
+
|
93
|
+
for (hash_t r = 0; r < partition_info.n_partitions; r++) {
|
94
|
+
sel_vector_sizes[r] = 0;
|
95
|
+
}
|
96
|
+
|
97
|
+
hashes.Flatten(groups.size());
|
98
|
+
auto hashes_ptr = FlatVector::GetData<hash_t>(hashes);
|
99
|
+
|
100
|
+
// Determine for every partition how much data will be sinked into it
|
101
|
+
for (idx_t i = 0; i < groups.size(); i++) {
|
102
|
+
auto partition = partition_info.GetHashPartition(hashes_ptr[i]);
|
103
|
+
D_ASSERT(partition < partition_info.n_partitions);
|
104
|
+
sel_vectors[partition].set_index(sel_vector_sizes[partition]++, i);
|
105
|
+
}
|
106
|
+
|
107
|
+
#ifdef DEBUG
|
108
|
+
// make sure we have lost no rows
|
109
|
+
idx_t total_count = 0;
|
110
|
+
for (idx_t r = 0; r < partition_info.n_partitions; r++) {
|
111
|
+
total_count += sel_vector_sizes[r];
|
112
|
+
}
|
113
|
+
D_ASSERT(total_count == groups.size());
|
114
|
+
#endif
|
115
|
+
idx_t group_count = 0;
|
116
|
+
for (hash_t r = 0; r < partition_info.n_partitions; r++) {
|
117
|
+
group_subset.Slice(groups, sel_vectors[r], sel_vector_sizes[r]);
|
118
|
+
if (!payload_types.empty()) {
|
119
|
+
payload_subset.Slice(payload, sel_vectors[r], sel_vector_sizes[r]);
|
120
|
+
} else {
|
121
|
+
payload_subset.SetCardinality(sel_vector_sizes[r]);
|
122
|
+
}
|
123
|
+
hashes_subset.Slice(hashes, sel_vectors[r], sel_vector_sizes[r]);
|
124
|
+
|
125
|
+
group_count += ListAddChunk(radix_partitioned_hts[r], group_subset, hashes_subset, payload_subset, filter);
|
126
|
+
}
|
127
|
+
return group_count;
|
128
|
+
}
|
129
|
+
|
130
|
+
void PartitionableHashTable::Partition(bool sink_done) {
|
131
|
+
D_ASSERT(!IsPartitioned());
|
132
|
+
D_ASSERT(radix_partitioned_hts.empty());
|
133
|
+
D_ASSERT(partition_info.n_partitions > 1);
|
134
|
+
|
135
|
+
vector<GroupedAggregateHashTable *> partition_hts(partition_info.n_partitions);
|
136
|
+
radix_partitioned_hts.resize(partition_info.n_partitions);
|
137
|
+
for (auto &unpartitioned_ht : unpartitioned_hts) {
|
138
|
+
for (idx_t r = 0; r < partition_info.n_partitions; r++) {
|
139
|
+
radix_partitioned_hts[r].push_back(make_uniq<GroupedAggregateHashTable>(
|
140
|
+
context, allocator, group_types, payload_types, bindings, GetHTEntrySize()));
|
141
|
+
partition_hts[r] = radix_partitioned_hts[r].back().get();
|
142
|
+
}
|
143
|
+
unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits, sink_done);
|
144
|
+
unpartitioned_ht.reset();
|
145
|
+
}
|
146
|
+
unpartitioned_hts.clear();
|
147
|
+
is_partitioned = true;
|
148
|
+
}
|
149
|
+
|
150
|
+
bool PartitionableHashTable::IsPartitioned() {
|
151
|
+
return is_partitioned;
|
152
|
+
}
|
153
|
+
|
154
|
+
HashTableList PartitionableHashTable::GetPartition(idx_t partition) {
|
155
|
+
D_ASSERT(IsPartitioned());
|
156
|
+
D_ASSERT(partition < partition_info.n_partitions);
|
157
|
+
D_ASSERT(radix_partitioned_hts.size() > partition);
|
158
|
+
return std::move(radix_partitioned_hts[partition]);
|
159
|
+
}
|
160
|
+
|
161
|
+
HashTableList PartitionableHashTable::GetUnpartitioned() {
|
162
|
+
D_ASSERT(!IsPartitioned());
|
163
|
+
return std::move(unpartitioned_hts);
|
164
|
+
}
|
165
|
+
|
166
|
+
idx_t PartitionableHashTable::GetPartitionCount(idx_t partition) const {
|
167
|
+
idx_t total_size = 0;
|
168
|
+
for (const auto &ht : radix_partitioned_hts[partition]) {
|
169
|
+
total_size += ht->Count();
|
170
|
+
}
|
171
|
+
return total_size;
|
172
|
+
}
|
173
|
+
|
174
|
+
idx_t PartitionableHashTable::GetPartitionSize(idx_t partition) const {
|
175
|
+
idx_t total_size = 0;
|
176
|
+
for (const auto &ht : radix_partitioned_hts[partition]) {
|
177
|
+
total_size += ht->DataSize();
|
178
|
+
}
|
179
|
+
return total_size;
|
180
|
+
}
|
181
|
+
|
182
|
+
void PartitionableHashTable::Finalize() {
|
183
|
+
if (IsPartitioned()) {
|
184
|
+
for (auto &ht_list : radix_partitioned_hts) {
|
185
|
+
for (auto &ht : ht_list) {
|
186
|
+
D_ASSERT(ht);
|
187
|
+
ht->Finalize();
|
188
|
+
}
|
189
|
+
}
|
190
|
+
} else {
|
191
|
+
for (auto &ht : unpartitioned_hts) {
|
192
|
+
D_ASSERT(ht);
|
193
|
+
ht->Finalize();
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
void PartitionableHashTable::Append(GroupedAggregateHashTable &ht) {
|
199
|
+
if (unpartitioned_hts.empty()) {
|
200
|
+
unpartitioned_hts.push_back(make_uniq<GroupedAggregateHashTable>(context, allocator, group_types, payload_types,
|
201
|
+
bindings, GetHTEntrySize(),
|
202
|
+
GroupedAggregateHashTable::InitialCapacity()));
|
203
|
+
}
|
204
|
+
unpartitioned_hts.back()->Append(ht);
|
205
|
+
}
|
206
|
+
|
207
|
+
} // namespace duckdb
|
@@ -130,7 +130,12 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload)
|
|
130
130
|
// compute the actual pointer to the data by adding it to the base HT pointer and multiplying by the tuple size
|
131
131
|
for (idx_t i = 0; i < groups.size(); i++) {
|
132
132
|
const auto group = address_data[i];
|
133
|
-
|
133
|
+
if (group >= total_groups) {
|
134
|
+
throw InvalidInputException("Perfect hash aggregate: aggregate group %llu exceeded total groups %llu. This "
|
135
|
+
"likely means that the statistics in your data source are corrupt.\n* PRAGMA "
|
136
|
+
"disable_optimizer to disable optimizations that rely on correct statistics",
|
137
|
+
group, total_groups);
|
138
|
+
}
|
134
139
|
group_is_set[group] = true;
|
135
140
|
address_data[i] = uintptr_t(data) + group * tuple_size;
|
136
141
|
}
|
@@ -117,10 +117,6 @@ void CheckForPerfectJoinOpt(LogicalComparisonJoin &op, PerfectHashJoinStats &joi
|
|
117
117
|
if (join_state.build_range > MAX_BUILD_SIZE) {
|
118
118
|
return;
|
119
119
|
}
|
120
|
-
if (NumericStats::Min(stats_build) <= NumericStats::Min(stats_probe) &&
|
121
|
-
NumericStats::Max(stats_probe) <= NumericStats::Max(stats_build)) {
|
122
|
-
join_state.is_probe_in_domain = true;
|
123
|
-
}
|
124
120
|
join_state.is_build_small = true;
|
125
121
|
return;
|
126
122
|
}
|
@@ -1,24 +1,16 @@
|
|
1
1
|
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
2
|
-
#include "duckdb/execution/operator/projection/physical_projection.hpp"
|
3
2
|
#include "duckdb/execution/operator/filter/physical_filter.hpp"
|
4
|
-
#include "duckdb/execution/operator/scan/physical_table_scan.hpp"
|
5
3
|
#include "duckdb/execution/operator/schema/physical_create_art_index.hpp"
|
6
|
-
#include "duckdb/execution/operator/order/physical_order.hpp"
|
7
4
|
#include "duckdb/execution/physical_plan_generator.hpp"
|
8
5
|
#include "duckdb/planner/operator/logical_create_index.hpp"
|
9
|
-
|
10
|
-
#include "duckdb/
|
11
|
-
#include "duckdb/
|
12
|
-
#include "duckdb/
|
6
|
+
|
7
|
+
#include "duckdb/main/database.hpp"
|
8
|
+
#include "duckdb/execution/index/index_type.hpp"
|
9
|
+
#include "duckdb/execution/index/bound_index.hpp"
|
13
10
|
|
14
11
|
namespace duckdb {
|
15
12
|
|
16
13
|
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateIndex &op) {
|
17
|
-
// generate a physical plan for the parallel index creation which consists of the following operators
|
18
|
-
// table scan - projection (for expression execution) - filter (NOT NULL) - order (if applicable) - create index
|
19
|
-
|
20
|
-
D_ASSERT(op.children.size() == 1);
|
21
|
-
auto table_scan = CreatePlan(*op.children[0]);
|
22
14
|
|
23
15
|
// validate that all expressions contain valid scalar functions
|
24
16
|
// e.g. get_current_timestamp(), random(), and sequence values are not allowed as index keys
|
@@ -30,12 +22,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
|
|
30
22
|
}
|
31
23
|
}
|
32
24
|
|
33
|
-
//
|
34
|
-
|
35
|
-
|
36
|
-
if (op.info->index_type != ART::TYPE_NAME) {
|
25
|
+
// Do we have a valid index type?
|
26
|
+
const auto index_type = context.db->config.GetIndexTypes().FindByName(op.info->index_type);
|
27
|
+
if (!index_type) {
|
37
28
|
throw BinderException("Unknown index type: " + op.info->index_type);
|
38
29
|
}
|
30
|
+
if (!index_type->create_plan) {
|
31
|
+
throw InternalException("Index type '%s' is missing a create_plan function", op.info->index_type);
|
32
|
+
}
|
39
33
|
|
40
34
|
// table scan operator for index key columns and row IDs
|
41
35
|
dependencies.AddDependency(op.table);
|
@@ -43,78 +37,11 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
|
|
43
37
|
D_ASSERT(op.info->scan_types.size() - 1 <= op.info->names.size());
|
44
38
|
D_ASSERT(op.info->scan_types.size() - 1 <= op.info->column_ids.size());
|
45
39
|
|
46
|
-
|
47
|
-
|
48
|
-
vector<LogicalType> new_column_types;
|
49
|
-
vector<unique_ptr<Expression>> select_list;
|
50
|
-
for (idx_t i = 0; i < op.expressions.size(); i++) {
|
51
|
-
new_column_types.push_back(op.expressions[i]->return_type);
|
52
|
-
select_list.push_back(std::move(op.expressions[i]));
|
53
|
-
}
|
54
|
-
new_column_types.emplace_back(LogicalType::ROW_TYPE);
|
55
|
-
select_list.push_back(make_uniq<BoundReferenceExpression>(LogicalType::ROW_TYPE, op.info->scan_types.size() - 1));
|
56
|
-
|
57
|
-
auto projection = make_uniq<PhysicalProjection>(new_column_types, std::move(select_list), op.estimated_cardinality);
|
58
|
-
projection->children.push_back(std::move(table_scan));
|
59
|
-
|
60
|
-
// filter operator for IS_NOT_NULL on each key column
|
61
|
-
|
62
|
-
vector<LogicalType> filter_types;
|
63
|
-
vector<unique_ptr<Expression>> filter_select_list;
|
64
|
-
|
65
|
-
for (idx_t i = 0; i < new_column_types.size() - 1; i++) {
|
66
|
-
filter_types.push_back(new_column_types[i]);
|
67
|
-
auto is_not_null_expr =
|
68
|
-
make_uniq<BoundOperatorExpression>(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN);
|
69
|
-
auto bound_ref = make_uniq<BoundReferenceExpression>(new_column_types[i], i);
|
70
|
-
is_not_null_expr->children.push_back(std::move(bound_ref));
|
71
|
-
filter_select_list.push_back(std::move(is_not_null_expr));
|
72
|
-
}
|
73
|
-
|
74
|
-
auto null_filter =
|
75
|
-
make_uniq<PhysicalFilter>(std::move(filter_types), std::move(filter_select_list), op.estimated_cardinality);
|
76
|
-
null_filter->types.emplace_back(LogicalType::ROW_TYPE);
|
77
|
-
null_filter->children.push_back(std::move(projection));
|
78
|
-
|
79
|
-
// determine if we sort the data prior to index creation
|
80
|
-
// we don't sort, if either VARCHAR or compound key
|
81
|
-
auto perform_sorting = true;
|
82
|
-
if (op.unbound_expressions.size() > 1) {
|
83
|
-
perform_sorting = false;
|
84
|
-
} else if (op.unbound_expressions[0]->return_type.InternalType() == PhysicalType::VARCHAR) {
|
85
|
-
perform_sorting = false;
|
86
|
-
}
|
87
|
-
|
88
|
-
// actual physical create index operator
|
89
|
-
|
90
|
-
auto physical_create_index =
|
91
|
-
make_uniq<PhysicalCreateARTIndex>(op, op.table, op.info->column_ids, std::move(op.info),
|
92
|
-
std::move(op.unbound_expressions), op.estimated_cardinality, perform_sorting);
|
93
|
-
|
94
|
-
if (perform_sorting) {
|
95
|
-
|
96
|
-
// optional order operator
|
97
|
-
vector<BoundOrderByNode> orders;
|
98
|
-
vector<idx_t> projections;
|
99
|
-
for (idx_t i = 0; i < new_column_types.size() - 1; i++) {
|
100
|
-
auto col_expr = make_uniq_base<Expression, BoundReferenceExpression>(new_column_types[i], i);
|
101
|
-
orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(col_expr));
|
102
|
-
projections.emplace_back(i);
|
103
|
-
}
|
104
|
-
projections.emplace_back(new_column_types.size() - 1);
|
105
|
-
|
106
|
-
auto physical_order = make_uniq<PhysicalOrder>(new_column_types, std::move(orders), std::move(projections),
|
107
|
-
op.estimated_cardinality);
|
108
|
-
physical_order->children.push_back(std::move(null_filter));
|
109
|
-
|
110
|
-
physical_create_index->children.push_back(std::move(physical_order));
|
111
|
-
} else {
|
112
|
-
|
113
|
-
// no ordering
|
114
|
-
physical_create_index->children.push_back(std::move(null_filter));
|
115
|
-
}
|
40
|
+
D_ASSERT(op.children.size() == 1);
|
41
|
+
auto table_scan = CreatePlan(*op.children[0]);
|
116
42
|
|
117
|
-
|
43
|
+
PlanIndexInput input(context, op, table_scan);
|
44
|
+
return index_type->create_plan(input);
|
118
45
|
}
|
119
46
|
|
120
47
|
} // namespace duckdb
|
@@ -11,7 +11,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalExport &op
|
|
11
11
|
throw PermissionException("Export is disabled through configuration");
|
12
12
|
}
|
13
13
|
auto export_node = make_uniq<PhysicalExport>(op.types, op.function, std::move(op.copy_info),
|
14
|
-
op.estimated_cardinality, op.exported_tables);
|
14
|
+
op.estimated_cardinality, std::move(op.exported_tables));
|
15
15
|
// plan the underlying copy statements, if any
|
16
16
|
if (!op.children.empty()) {
|
17
17
|
auto plan = CreatePlan(*op.children[0]);
|
@@ -159,7 +159,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
|
|
159
159
|
vector<unique_ptr<Expression>> expressions;
|
160
160
|
for (auto &column_id : column_ids) {
|
161
161
|
if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
|
162
|
-
types.emplace_back(LogicalType::
|
162
|
+
types.emplace_back(LogicalType::ROW_TYPE);
|
163
163
|
expressions.push_back(make_uniq<BoundConstantExpression>(Value::BIGINT(0)));
|
164
164
|
} else {
|
165
165
|
auto type = op.returned_types[column_id];
|
@@ -79,7 +79,7 @@ unique_ptr<DataChunk> ReservoirSample::GetChunk() {
|
|
79
79
|
for (idx_t i = samples_remaining; i < collected_sample_count; i++) {
|
80
80
|
sel.set_index(i - samples_remaining, i);
|
81
81
|
}
|
82
|
-
ret->Initialize(allocator, reservoir_types
|
82
|
+
ret->Initialize(allocator, reservoir_types);
|
83
83
|
ret->Slice(*reservoir_data_chunk, sel, STANDARD_VECTOR_SIZE);
|
84
84
|
ret->SetCardinality(STANDARD_VECTOR_SIZE);
|
85
85
|
// reduce capacity and cardinality of the sample data chunk
|
@@ -1671,23 +1671,23 @@ void WindowLeadLagExecutor::EvaluateInternal(WindowExecutorGlobalState &gstate,
|
|
1671
1671
|
// else offset is zero, so don't move.
|
1672
1672
|
|
1673
1673
|
if (can_shift) {
|
1674
|
+
const auto target_limit = MinValue(partition_end[i], row_end) - row_idx;
|
1674
1675
|
if (!delta) {
|
1675
1676
|
// Copy source[index:index+width] => result[i:]
|
1676
1677
|
const auto index = NumericCast<idx_t>(val_idx);
|
1677
1678
|
const auto source_limit = partition_end[i] - index;
|
1678
|
-
const auto target_limit = MinValue(partition_end[i], row_end) - row_idx;
|
1679
1679
|
const auto width = MinValue(source_limit, target_limit);
|
1680
1680
|
auto &source = payload_collection.data[0];
|
1681
1681
|
VectorOperations::Copy(source, result, index + width, index, i);
|
1682
1682
|
i += width;
|
1683
1683
|
row_idx += width;
|
1684
1684
|
} else if (wexpr.default_expr) {
|
1685
|
-
const auto width = MinValue(delta,
|
1685
|
+
const auto width = MinValue(delta, target_limit);
|
1686
1686
|
llstate.leadlag_default.CopyCell(result, i, width);
|
1687
1687
|
i += width;
|
1688
1688
|
row_idx += width;
|
1689
1689
|
} else {
|
1690
|
-
for (idx_t nulls = MinValue(delta,
|
1690
|
+
for (idx_t nulls = MinValue(delta, target_limit); nulls--; ++i, ++row_idx) {
|
1691
1691
|
FlatVector::SetNull(result, i, true);
|
1692
1692
|
}
|
1693
1693
|
}
|