duckdb 0.8.2-dev145.0 → 0.8.2-dev1493.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +42 -5
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +2 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +30 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +3 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +64 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +5 -0
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +8 -0
- package/src/statement.cpp +10 -3
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -32,7 +32,7 @@ string PhysicalComparisonJoin::ParamsToString() const {
|
|
32
32
|
}
|
33
33
|
extra_info += "\n[INFOSEPARATOR]\n";
|
34
34
|
extra_info += StringUtil::Format("EC: %llu\n", estimated_props->GetCardinality<idx_t>());
|
35
|
-
extra_info += StringUtil::Format("Cost: %llu",
|
35
|
+
extra_info += StringUtil::Format("Cost: %llu", estimated_props->GetCost<idx_t>());
|
36
36
|
return extra_info;
|
37
37
|
}
|
38
38
|
|
@@ -96,7 +96,7 @@ public:
|
|
96
96
|
class HashJoinLocalSinkState : public LocalSinkState {
|
97
97
|
public:
|
98
98
|
HashJoinLocalSinkState(const PhysicalHashJoin &op, ClientContext &context) : build_executor(context) {
|
99
|
-
auto &allocator =
|
99
|
+
auto &allocator = BufferAllocator::Get(context);
|
100
100
|
if (!op.right_projection_map.empty()) {
|
101
101
|
build_chunk.Initialize(allocator, op.build_types);
|
102
102
|
}
|
@@ -124,7 +124,7 @@ public:
|
|
124
124
|
unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &context) const {
|
125
125
|
auto result =
|
126
126
|
make_uniq<JoinHashTable>(BufferManager::GetBufferManager(context), conditions, build_types, join_type);
|
127
|
-
result->max_ht_size = double(BufferManager::GetBufferManager(context).GetMaxMemory()
|
127
|
+
result->max_ht_size = double(0.6) * BufferManager::GetBufferManager(context).GetMaxMemory();
|
128
128
|
if (!delim_types.empty() && join_type == JoinType::MARK) {
|
129
129
|
// correlated MARK join
|
130
130
|
if (delim_types.size() + 1 == conditions.size()) {
|
@@ -162,7 +162,7 @@ unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &c
|
|
162
162
|
payload_types.push_back(aggr->return_type);
|
163
163
|
info.correlated_aggregates.push_back(std::move(aggr));
|
164
164
|
|
165
|
-
auto &allocator =
|
165
|
+
auto &allocator = BufferAllocator::Get(context);
|
166
166
|
info.correlated_counts = make_uniq<GroupedAggregateHashTable>(context, allocator, delim_types,
|
167
167
|
payload_types, correlated_aggregates);
|
168
168
|
info.correlated_types = delim_types;
|
@@ -312,10 +312,10 @@ void HashJoinGlobalSinkState::InitializeProbeSpill() {
|
|
312
312
|
}
|
313
313
|
}
|
314
314
|
|
315
|
-
class
|
315
|
+
class HashJoinRepartitionTask : public ExecutorTask {
|
316
316
|
public:
|
317
|
-
|
318
|
-
|
317
|
+
HashJoinRepartitionTask(shared_ptr<Event> event_p, ClientContext &context, JoinHashTable &global_ht,
|
318
|
+
JoinHashTable &local_ht)
|
319
319
|
: ExecutorTask(context), event(std::move(event_p)), global_ht(global_ht), local_ht(local_ht) {
|
320
320
|
}
|
321
321
|
|
@@ -349,7 +349,7 @@ public:
|
|
349
349
|
partition_tasks.reserve(local_hts.size());
|
350
350
|
for (auto &local_ht : local_hts) {
|
351
351
|
partition_tasks.push_back(
|
352
|
-
make_uniq<
|
352
|
+
make_uniq<HashJoinRepartitionTask>(shared_from_this(), context, *sink.hash_table, *local_ht));
|
353
353
|
}
|
354
354
|
SetTasks(std::move(partition_tasks));
|
355
355
|
}
|
@@ -434,7 +434,7 @@ public:
|
|
434
434
|
};
|
435
435
|
|
436
436
|
unique_ptr<OperatorState> PhysicalHashJoin::GetOperatorState(ExecutionContext &context) const {
|
437
|
-
auto &allocator =
|
437
|
+
auto &allocator = BufferAllocator::Get(context.client);
|
438
438
|
auto &sink = sink_state->Cast<HashJoinGlobalSinkState>();
|
439
439
|
auto state = make_uniq<HashJoinOperatorState>(context.client);
|
440
440
|
if (sink.perfect_join_executor) {
|
@@ -532,7 +532,18 @@ public:
|
|
532
532
|
bool AssignTask(HashJoinGlobalSinkState &sink, HashJoinLocalSourceState &lstate);
|
533
533
|
|
534
534
|
idx_t MaxThreads() override {
|
535
|
-
|
535
|
+
D_ASSERT(op.sink_state);
|
536
|
+
auto &gstate = op.sink_state->Cast<HashJoinGlobalSinkState>();
|
537
|
+
|
538
|
+
idx_t count;
|
539
|
+
if (gstate.probe_spill) {
|
540
|
+
count = probe_count;
|
541
|
+
} else if (IsRightOuterJoin(op.join_type)) {
|
542
|
+
count = gstate.hash_table->Count();
|
543
|
+
} else {
|
544
|
+
return 0;
|
545
|
+
}
|
546
|
+
return count / ((idx_t)STANDARD_VECTOR_SIZE * parallel_scan_chunk_count);
|
536
547
|
}
|
537
548
|
|
538
549
|
public:
|
@@ -611,7 +622,7 @@ unique_ptr<GlobalSourceState> PhysicalHashJoin::GetGlobalSourceState(ClientConte
|
|
611
622
|
|
612
623
|
unique_ptr<LocalSourceState> PhysicalHashJoin::GetLocalSourceState(ExecutionContext &context,
|
613
624
|
GlobalSourceState &gstate) const {
|
614
|
-
return make_uniq<HashJoinLocalSourceState>(*this,
|
625
|
+
return make_uniq<HashJoinLocalSourceState>(*this, BufferAllocator::Get(context.client));
|
615
626
|
}
|
616
627
|
|
617
628
|
HashJoinGlobalSourceState::HashJoinGlobalSourceState(const PhysicalHashJoin &op, ClientContext &context)
|
@@ -60,7 +60,7 @@ void PhysicalJoin::BuildJoinPipelines(Pipeline ¤t, MetaPipeline &meta_pipe
|
|
60
60
|
// Join can become a source operator if it's RIGHT/OUTER, or if the hash join goes out-of-core
|
61
61
|
bool add_child_pipeline = false;
|
62
62
|
auto &join_op = op.Cast<PhysicalJoin>();
|
63
|
-
if (
|
63
|
+
if (join_op.IsSource()) {
|
64
64
|
add_child_pipeline = true;
|
65
65
|
}
|
66
66
|
|
@@ -208,6 +208,7 @@ public:
|
|
208
208
|
idx_t right_position;
|
209
209
|
idx_t right_chunk_index;
|
210
210
|
idx_t right_base;
|
211
|
+
idx_t prev_left_index;
|
211
212
|
|
212
213
|
// Secondary predicate shared data
|
213
214
|
SelectionVector sel;
|
@@ -431,7 +432,8 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
|
|
431
432
|
}
|
432
433
|
}
|
433
434
|
|
434
|
-
static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison
|
435
|
+
static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison,
|
436
|
+
idx_t &prev_left_index) {
|
435
437
|
const auto cmp = MergeJoinComparisonValue(comparison);
|
436
438
|
|
437
439
|
// The sort parameters should all be the same
|
@@ -465,6 +467,20 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
|
|
465
467
|
|
466
468
|
idx_t result_count = 0;
|
467
469
|
while (true) {
|
470
|
+
if (l.entry_idx < prev_left_index) {
|
471
|
+
// left side smaller: found match
|
472
|
+
l.result.set_index(result_count, sel_t(l.entry_idx));
|
473
|
+
r.result.set_index(result_count, sel_t(r.entry_idx));
|
474
|
+
result_count++;
|
475
|
+
// move left side forward
|
476
|
+
l.entry_idx++;
|
477
|
+
l_ptr += entry_size;
|
478
|
+
if (result_count == STANDARD_VECTOR_SIZE) {
|
479
|
+
// out of space!
|
480
|
+
break;
|
481
|
+
}
|
482
|
+
continue;
|
483
|
+
}
|
468
484
|
if (l.entry_idx < l.not_null) {
|
469
485
|
int comp_res;
|
470
486
|
if (all_constant) {
|
@@ -474,7 +490,6 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
|
|
474
490
|
rread.entry_idx = r.entry_idx;
|
475
491
|
comp_res = Comparators::CompareTuple(lread, rread, l_ptr, r_ptr, l.state.sort_layout, external);
|
476
492
|
}
|
477
|
-
|
478
493
|
if (comp_res <= cmp) {
|
479
494
|
// left side smaller: found match
|
480
495
|
l.result.set_index(result_count, sel_t(l.entry_idx));
|
@@ -490,6 +505,8 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
|
|
490
505
|
continue;
|
491
506
|
}
|
492
507
|
}
|
508
|
+
|
509
|
+
prev_left_index = l.entry_idx;
|
493
510
|
// right side smaller or equal, or left side exhausted: move
|
494
511
|
// right pointer forward reset left side to start
|
495
512
|
r.entry_idx++;
|
@@ -521,6 +538,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
|
|
521
538
|
state.right_chunk_index = 0;
|
522
539
|
state.right_base = 0;
|
523
540
|
state.left_position = 0;
|
541
|
+
state.prev_left_index = 0;
|
524
542
|
state.right_position = 0;
|
525
543
|
state.first_fetch = false;
|
526
544
|
state.finished = false;
|
@@ -547,7 +565,8 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
|
|
547
565
|
BlockMergeInfo right_info(gstate.table->global_sort_state, state.right_chunk_index, state.right_position,
|
548
566
|
rhs_not_null);
|
549
567
|
|
550
|
-
idx_t result_count =
|
568
|
+
idx_t result_count =
|
569
|
+
MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison, state.prev_left_index);
|
551
570
|
if (result_count == 0) {
|
552
571
|
// exhausted this chunk on the right side
|
553
572
|
// move to the next right chunk
|
@@ -167,8 +167,8 @@ PhysicalRangeJoin::PhysicalRangeJoin(LogicalOperator &op, PhysicalOperatorType t
|
|
167
167
|
// TODO: use stats to improve the choice?
|
168
168
|
// TODO: Prefer fixed length types?
|
169
169
|
if (conditions.size() > 1) {
|
170
|
-
|
171
|
-
|
170
|
+
vector<JoinCondition> conditions_p(conditions.size());
|
171
|
+
std::swap(conditions_p, conditions);
|
172
172
|
idx_t range_position = 0;
|
173
173
|
idx_t other_position = conditions_p.size();
|
174
174
|
for (idx_t i = 0; i < conditions_p.size(); ++i) {
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#include "duckdb/execution/operator/persistent/base_csv_reader.hpp"
|
2
|
-
|
3
2
|
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
4
3
|
#include "duckdb/common/file_system.hpp"
|
5
4
|
#include "duckdb/common/string_util.hpp"
|
@@ -10,6 +9,7 @@
|
|
10
9
|
#include "duckdb/common/vector_operations/unary_executor.hpp"
|
11
10
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
12
11
|
#include "duckdb/function/scalar/strftime_format.hpp"
|
12
|
+
#include "duckdb/main/appender.hpp"
|
13
13
|
#include "duckdb/main/database.hpp"
|
14
14
|
#include "duckdb/parser/column_definition.hpp"
|
15
15
|
#include "duckdb/storage/data_table.hpp"
|
@@ -18,7 +18,8 @@
|
|
18
18
|
#include "duckdb/parser/keyword_helper.hpp"
|
19
19
|
#include "duckdb/main/error_manager.hpp"
|
20
20
|
#include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
|
21
|
-
|
21
|
+
#include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
|
22
|
+
#include "duckdb/main/client_data.hpp"
|
22
23
|
#include <algorithm>
|
23
24
|
#include <cctype>
|
24
25
|
#include <cstring>
|
@@ -448,6 +449,17 @@ bool TryCastFloatingVectorCommaSeparated(BufferedCSVReaderOptions &options, Vect
|
|
448
449
|
}
|
449
450
|
}
|
450
451
|
|
452
|
+
// Location of erroneous value in the current parse chunk
|
453
|
+
struct ErrorLocation {
|
454
|
+
idx_t row_idx;
|
455
|
+
idx_t col_idx;
|
456
|
+
idx_t row_line;
|
457
|
+
|
458
|
+
ErrorLocation(idx_t row_idx, idx_t col_idx, idx_t row_line)
|
459
|
+
: row_idx(row_idx), col_idx(col_idx), row_line(row_line) {
|
460
|
+
}
|
461
|
+
};
|
462
|
+
|
451
463
|
bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_add_line) {
|
452
464
|
if (parse_chunk.size() == 0) {
|
453
465
|
return true;
|
@@ -506,10 +518,7 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
|
|
506
518
|
if (try_add_line) {
|
507
519
|
return false;
|
508
520
|
}
|
509
|
-
|
510
|
-
conversion_error_ignored = true;
|
511
|
-
continue;
|
512
|
-
}
|
521
|
+
|
513
522
|
string col_name = to_string(col_idx);
|
514
523
|
if (col_idx < names.size()) {
|
515
524
|
col_name = "\"" + names[col_idx] + "\"";
|
@@ -527,16 +536,18 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
|
|
527
536
|
}
|
528
537
|
}
|
529
538
|
|
530
|
-
idx_t error_line;
|
531
539
|
// The line_error must be summed with linenr (All lines emmited from this batch)
|
532
540
|
// But subtracted from the parse_chunk
|
533
541
|
D_ASSERT(line_error + linenr >= parse_chunk.size());
|
534
542
|
line_error += linenr;
|
535
543
|
line_error -= parse_chunk.size();
|
536
544
|
|
537
|
-
error_line = GetLineError(line_error, buffer_idx);
|
545
|
+
auto error_line = GetLineError(line_error, buffer_idx);
|
546
|
+
|
547
|
+
if (options.ignore_errors) {
|
548
|
+
conversion_error_ignored = true;
|
538
549
|
|
539
|
-
if (options.auto_detect) {
|
550
|
+
} else if (options.auto_detect) {
|
540
551
|
throw InvalidInputException("%s in column %s, at line %llu.\n\nParser "
|
541
552
|
"options:\n%s.\n\nConsider either increasing the sample size "
|
542
553
|
"(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
|
@@ -550,11 +561,19 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
|
|
550
561
|
}
|
551
562
|
if (conversion_error_ignored) {
|
552
563
|
D_ASSERT(options.ignore_errors);
|
564
|
+
|
553
565
|
SelectionVector succesful_rows(parse_chunk.size());
|
554
566
|
idx_t sel_size = 0;
|
555
567
|
|
568
|
+
// Keep track of failed cells
|
569
|
+
vector<ErrorLocation> failed_cells;
|
570
|
+
|
556
571
|
for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
|
557
|
-
|
572
|
+
|
573
|
+
auto global_row_idx = row_idx + linenr - parse_chunk.size();
|
574
|
+
auto row_line = GetLineError(global_row_idx, buffer_idx, false);
|
575
|
+
|
576
|
+
bool row_failed = false;
|
558
577
|
for (idx_t c = 0; c < reader_data.column_ids.size(); c++) {
|
559
578
|
auto col_idx = reader_data.column_ids[c];
|
560
579
|
auto result_idx = reader_data.column_mapping[c];
|
@@ -564,14 +583,82 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
|
|
564
583
|
|
565
584
|
bool was_already_null = FlatVector::IsNull(parse_vector, row_idx);
|
566
585
|
if (!was_already_null && FlatVector::IsNull(result_vector, row_idx)) {
|
567
|
-
|
568
|
-
|
586
|
+
row_failed = true;
|
587
|
+
failed_cells.emplace_back(row_idx, col_idx, row_line);
|
569
588
|
}
|
570
589
|
}
|
571
|
-
if (!
|
590
|
+
if (!row_failed) {
|
572
591
|
succesful_rows.set_index(sel_size++, row_idx);
|
573
592
|
}
|
574
593
|
}
|
594
|
+
|
595
|
+
// Now do a second pass to produce the reject table entries
|
596
|
+
if (!failed_cells.empty() && !options.rejects_table_name.empty()) {
|
597
|
+
auto limit = options.rejects_limit;
|
598
|
+
|
599
|
+
auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_table_name);
|
600
|
+
lock_guard<mutex> lock(rejects->write_lock);
|
601
|
+
|
602
|
+
// short circuit if we already have too many rejects
|
603
|
+
if (limit == 0 || rejects->count < limit) {
|
604
|
+
auto &table = rejects->GetTable(context);
|
605
|
+
InternalAppender appender(context, table);
|
606
|
+
auto file_name = GetFileName();
|
607
|
+
|
608
|
+
for (auto &cell : failed_cells) {
|
609
|
+
if (limit != 0 && rejects->count >= limit) {
|
610
|
+
break;
|
611
|
+
}
|
612
|
+
rejects->count++;
|
613
|
+
|
614
|
+
auto row_idx = cell.row_idx;
|
615
|
+
auto col_idx = cell.col_idx;
|
616
|
+
auto row_line = cell.row_line;
|
617
|
+
|
618
|
+
auto col_name = to_string(col_idx);
|
619
|
+
if (col_idx < names.size()) {
|
620
|
+
col_name = "\"" + names[col_idx] + "\"";
|
621
|
+
}
|
622
|
+
|
623
|
+
auto &parse_vector = parse_chunk.data[col_idx];
|
624
|
+
auto parsed_str = FlatVector::GetData<string_t>(parse_vector)[row_idx];
|
625
|
+
auto &type = insert_chunk.data[col_idx].GetType();
|
626
|
+
auto row_error_msg = StringUtil::Format("Could not convert string '%s' to '%s'",
|
627
|
+
parsed_str.GetString(), type.ToString());
|
628
|
+
|
629
|
+
// Add the row to the rejects table
|
630
|
+
appender.BeginRow();
|
631
|
+
appender.Append(string_t(file_name));
|
632
|
+
appender.Append(row_line);
|
633
|
+
appender.Append(col_idx);
|
634
|
+
appender.Append(string_t(col_name));
|
635
|
+
appender.Append(parsed_str);
|
636
|
+
|
637
|
+
if (!options.rejects_recovery_columns.empty()) {
|
638
|
+
child_list_t<Value> recovery_key;
|
639
|
+
for (auto &key_idx : options.rejects_recovery_column_ids) {
|
640
|
+
// Figure out if the recovery key is valid.
|
641
|
+
// If not, error out for real.
|
642
|
+
auto &component_vector = parse_chunk.data[key_idx];
|
643
|
+
if (FlatVector::IsNull(component_vector, row_idx)) {
|
644
|
+
throw InvalidInputException("%s at line %llu in column %s. Parser options:\n%s ",
|
645
|
+
"Could not parse recovery column", row_line, col_name,
|
646
|
+
options.ToString());
|
647
|
+
}
|
648
|
+
auto component = Value(FlatVector::GetData<string_t>(component_vector)[row_idx]);
|
649
|
+
recovery_key.emplace_back(names[key_idx], component);
|
650
|
+
}
|
651
|
+
appender.Append(Value::STRUCT(recovery_key));
|
652
|
+
}
|
653
|
+
|
654
|
+
appender.Append(string_t(row_error_msg));
|
655
|
+
appender.EndRow();
|
656
|
+
}
|
657
|
+
appender.Close();
|
658
|
+
}
|
659
|
+
}
|
660
|
+
|
661
|
+
// Now slice the insert chunk to only include the succesful rows
|
575
662
|
insert_chunk.Slice(succesful_rows, sel_size);
|
576
663
|
}
|
577
664
|
parse_chunk.Reset();
|
@@ -13,7 +13,7 @@ CSVFileHandle::CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<Fi
|
|
13
13
|
|
14
14
|
unique_ptr<FileHandle> CSVFileHandle::OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
|
15
15
|
FileCompressionType compression) {
|
16
|
-
auto file_handle = fs.OpenFile(path
|
16
|
+
auto file_handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
|
17
17
|
if (file_handle->CanSeek()) {
|
18
18
|
file_handle->Reset();
|
19
19
|
}
|
@@ -179,6 +179,26 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
179
179
|
allow_quoted_nulls = ParseBoolean(value, loption);
|
180
180
|
} else if (loption == "parallel") {
|
181
181
|
parallel_mode = ParseBoolean(value, loption) ? ParallelMode::PARALLEL : ParallelMode::SINGLE_THREADED;
|
182
|
+
} else if (loption == "rejects_table") {
|
183
|
+
// skip, handled in SetRejectsOptions
|
184
|
+
auto table_name = ParseString(value, loption);
|
185
|
+
if (table_name.empty()) {
|
186
|
+
throw BinderException("REJECTS_TABLE option cannot be empty");
|
187
|
+
}
|
188
|
+
rejects_table_name = table_name;
|
189
|
+
} else if (loption == "rejects_recovery_columns") {
|
190
|
+
// Get the list of columns to use as a recovery key
|
191
|
+
auto &children = ListValue::GetChildren(value);
|
192
|
+
for (auto &child : children) {
|
193
|
+
auto col_name = child.GetValue<string>();
|
194
|
+
rejects_recovery_columns.push_back(col_name);
|
195
|
+
}
|
196
|
+
} else if (loption == "rejects_limit") {
|
197
|
+
int64_t limit = ParseInteger(value, loption);
|
198
|
+
if (limit < 0) {
|
199
|
+
throw BinderException("Unsupported parameter for REJECTS_LIMIT: cannot be negative");
|
200
|
+
}
|
201
|
+
rejects_limit = limit;
|
182
202
|
} else {
|
183
203
|
throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
|
184
204
|
}
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#include "duckdb/main/appender.hpp"
|
2
|
+
#include "duckdb/parser/parsed_data/create_table_info.hpp"
|
3
|
+
#include "duckdb/function/table/read_csv.hpp"
|
4
|
+
#include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
|
5
|
+
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
6
|
+
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
TableCatalogEntry &CSVRejectsTable::GetTable(ClientContext &context) {
|
10
|
+
auto &temp_catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
|
11
|
+
auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, name);
|
12
|
+
return table_entry;
|
13
|
+
}
|
14
|
+
|
15
|
+
shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &name) {
|
16
|
+
auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(name);
|
17
|
+
auto &cache = ObjectCache::GetObjectCache(context);
|
18
|
+
return cache.GetOrCreate<CSVRejectsTable>(key, name);
|
19
|
+
}
|
20
|
+
|
21
|
+
void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData &data) {
|
22
|
+
// (Re)Create the temporary rejects table
|
23
|
+
auto &catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
|
24
|
+
auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, name);
|
25
|
+
info->temporary = true;
|
26
|
+
info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
|
27
|
+
info->columns.AddColumn(ColumnDefinition("file", LogicalType::VARCHAR));
|
28
|
+
info->columns.AddColumn(ColumnDefinition("line", LogicalType::BIGINT));
|
29
|
+
info->columns.AddColumn(ColumnDefinition("column", LogicalType::BIGINT));
|
30
|
+
info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
|
31
|
+
info->columns.AddColumn(ColumnDefinition("parsed_value", LogicalType::VARCHAR));
|
32
|
+
|
33
|
+
if (!data.options.rejects_recovery_columns.empty()) {
|
34
|
+
child_list_t<LogicalType> recovery_key_components;
|
35
|
+
for (auto &col_name : data.options.rejects_recovery_columns) {
|
36
|
+
recovery_key_components.emplace_back(col_name, LogicalType::VARCHAR);
|
37
|
+
}
|
38
|
+
info->columns.AddColumn(ColumnDefinition("recovery_columns", LogicalType::STRUCT(recovery_key_components)));
|
39
|
+
}
|
40
|
+
|
41
|
+
info->columns.AddColumn(ColumnDefinition("error", LogicalType::VARCHAR));
|
42
|
+
|
43
|
+
catalog.CreateTable(context, std::move(info));
|
44
|
+
|
45
|
+
count = 0;
|
46
|
+
}
|
47
|
+
|
48
|
+
} // namespace duckdb
|
@@ -635,12 +635,11 @@ void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
|
|
635
635
|
}
|
636
636
|
}
|
637
637
|
|
638
|
-
idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx) {
|
638
|
+
idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx, bool stop_at_first) {
|
639
639
|
while (true) {
|
640
640
|
if (buffer->line_info->CanItGetLine(file_idx, buffer_idx)) {
|
641
641
|
auto cur_start = verification_positions.beginning_of_first_line + buffer->buffer->GetCSVGlobalStart();
|
642
|
-
|
643
|
-
return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false);
|
642
|
+
return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false, stop_at_first);
|
644
643
|
}
|
645
644
|
}
|
646
645
|
}
|
@@ -1,9 +1,11 @@
|
|
1
1
|
#include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/common/allocator.hpp"
|
4
|
+
#include "duckdb/common/types/batched_data_collection.hpp"
|
5
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
2
6
|
#include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
|
3
7
|
#include "duckdb/parallel/base_pipeline_event.hpp"
|
4
|
-
|
5
|
-
#include "duckdb/common/types/batched_data_collection.hpp"
|
6
|
-
#include "duckdb/common/allocator.hpp"
|
8
|
+
|
7
9
|
#include <algorithm>
|
8
10
|
|
9
11
|
namespace duckdb {
|
@@ -67,7 +69,7 @@ public:
|
|
67
69
|
optional_idx batch_index;
|
68
70
|
|
69
71
|
void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
|
70
|
-
collection = make_uniq<ColumnDataCollection>(
|
72
|
+
collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
|
71
73
|
collection->InitializeAppend(append_state);
|
72
74
|
}
|
73
75
|
};
|
@@ -116,7 +116,7 @@ public:
|
|
116
116
|
optional_idx batch_index;
|
117
117
|
|
118
118
|
void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
|
119
|
-
collection = make_uniq<ColumnDataCollection>(
|
119
|
+
collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), op.children[0]->types);
|
120
120
|
collection->InitializeAppend(append_state);
|
121
121
|
}
|
122
122
|
};
|
@@ -353,7 +353,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
|
|
353
353
|
} else {
|
354
354
|
// the collection is too large for a batch - we need to repartition
|
355
355
|
// create an empty collection
|
356
|
-
current_collection = make_uniq<ColumnDataCollection>(
|
356
|
+
current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
|
357
357
|
}
|
358
358
|
if (current_collection) {
|
359
359
|
current_collection->InitializeAppend(append_state);
|
@@ -373,7 +373,7 @@ void PhysicalFixedBatchCopy::RepartitionBatches(ClientContext &context, GlobalSi
|
|
373
373
|
}
|
374
374
|
// the collection is full - move it to the result and create a new one
|
375
375
|
gstate.AddTask(make_uniq<PrepareBatchTask>(gstate.scheduled_batch_index++, std::move(current_collection)));
|
376
|
-
current_collection = make_uniq<ColumnDataCollection>(
|
376
|
+
current_collection = make_uniq<ColumnDataCollection>(BufferAllocator::Get(context), children[0]->types);
|
377
377
|
current_collection->InitializeAppend(append_state);
|
378
378
|
}
|
379
379
|
}
|
@@ -41,7 +41,7 @@ PhysicalInsert::PhysicalInsert(vector<LogicalType> types_p, TableCatalogEntry &t
|
|
41
41
|
return;
|
42
42
|
}
|
43
43
|
|
44
|
-
D_ASSERT(set_expressions.size() == set_columns.size());
|
44
|
+
D_ASSERT(this->set_expressions.size() == this->set_columns.size());
|
45
45
|
|
46
46
|
// One or more columns are referenced from the existing table,
|
47
47
|
// we use the 'insert_types' to figure out which types these columns have
|
@@ -16,6 +16,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
|
|
16
16
|
pivot_map[bound_pivot.pivot_values[p]] = bound_pivot.group_count + p;
|
17
17
|
}
|
18
18
|
// extract the empty aggregate expressions
|
19
|
+
ArenaAllocator allocator(Allocator::DefaultAllocator());
|
19
20
|
for (auto &aggr_expr : bound_pivot.aggregates) {
|
20
21
|
auto &aggr = aggr_expr->Cast<BoundAggregateExpression>();
|
21
22
|
// for each aggregate, initialize an empty aggregate state and finalize it immediately
|
@@ -23,7 +24,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
|
|
23
24
|
aggr.function.initialize(state.get());
|
24
25
|
Vector state_vector(Value::POINTER(CastPointerToValue(state.get())));
|
25
26
|
Vector result_vector(aggr_expr->return_type);
|
26
|
-
AggregateInputData aggr_input_data(aggr.bind_info.get(),
|
27
|
+
AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
|
27
28
|
aggr.function.finalize(state_vector, aggr_input_data, result_vector, 1, 0);
|
28
29
|
empty_aggregates.push_back(result_vector.GetValue(0));
|
29
30
|
}
|
@@ -64,6 +64,9 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline ¤t, MetaPipeline &met
|
|
64
64
|
state.SetPipelineSource(current, delim_join.distinct->Cast<PhysicalOperator>());
|
65
65
|
return;
|
66
66
|
}
|
67
|
+
case PhysicalOperatorType::CTE_SCAN: {
|
68
|
+
break;
|
69
|
+
}
|
67
70
|
case PhysicalOperatorType::RECURSIVE_CTE_SCAN:
|
68
71
|
if (!meta_pipeline.HasRecursiveCTE()) {
|
69
72
|
throw InternalException("Recursive CTE scan found without recursive CTE node");
|
@@ -76,4 +79,20 @@ void PhysicalColumnDataScan::BuildPipelines(Pipeline ¤t, MetaPipeline &met
|
|
76
79
|
state.SetPipelineSource(current, *this);
|
77
80
|
}
|
78
81
|
|
82
|
+
string PhysicalColumnDataScan::ParamsToString() const {
|
83
|
+
string result = "";
|
84
|
+
switch (type) {
|
85
|
+
case PhysicalOperatorType::CTE_SCAN:
|
86
|
+
case PhysicalOperatorType::RECURSIVE_CTE_SCAN: {
|
87
|
+
result += "\n[INFOSEPARATOR]\n";
|
88
|
+
result += StringUtil::Format("idx: %llu", cte_index);
|
89
|
+
break;
|
90
|
+
}
|
91
|
+
default:
|
92
|
+
break;
|
93
|
+
}
|
94
|
+
|
95
|
+
return result;
|
96
|
+
}
|
97
|
+
|
79
98
|
} // namespace duckdb
|
@@ -16,17 +16,18 @@ PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction fu
|
|
16
16
|
: PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
|
17
17
|
function(std::move(function_p)), bind_data(std::move(bind_data_p)), column_ids(std::move(column_ids_p)),
|
18
18
|
names(std::move(names_p)), table_filters(std::move(table_filters_p)) {
|
19
|
+
extra_info.file_filters = "";
|
19
20
|
}
|
20
21
|
|
21
22
|
PhysicalTableScan::PhysicalTableScan(vector<LogicalType> types, TableFunction function_p,
|
22
23
|
unique_ptr<FunctionData> bind_data_p, vector<LogicalType> returned_types_p,
|
23
24
|
vector<column_t> column_ids_p, vector<idx_t> projection_ids_p,
|
24
25
|
vector<string> names_p, unique_ptr<TableFilterSet> table_filters_p,
|
25
|
-
idx_t estimated_cardinality)
|
26
|
+
idx_t estimated_cardinality, ExtraOperatorInfo extra_info)
|
26
27
|
: PhysicalOperator(PhysicalOperatorType::TABLE_SCAN, std::move(types), estimated_cardinality),
|
27
28
|
function(std::move(function_p)), bind_data(std::move(bind_data_p)), returned_types(std::move(returned_types_p)),
|
28
29
|
column_ids(std::move(column_ids_p)), projection_ids(std::move(projection_ids_p)), names(std::move(names_p)),
|
29
|
-
table_filters(std::move(table_filters_p)) {
|
30
|
+
table_filters(std::move(table_filters_p)), extra_info(extra_info) {
|
30
31
|
}
|
31
32
|
|
32
33
|
class TableScanGlobalSourceState : public GlobalSourceState {
|
@@ -149,6 +150,10 @@ string PhysicalTableScan::ParamsToString() const {
|
|
149
150
|
}
|
150
151
|
}
|
151
152
|
}
|
153
|
+
if (!extra_info.file_filters.empty()) {
|
154
|
+
result += "\n[INFOSEPARATOR]\n";
|
155
|
+
result += "File Filters: " + extra_info.file_filters;
|
156
|
+
}
|
152
157
|
result += "\n[INFOSEPARATOR]\n";
|
153
158
|
result += StringUtil::Format("EC: %llu", estimated_props->GetCardinality<idx_t>());
|
154
159
|
return result;
|