duckdb 0.8.2-dev157.0 → 0.8.2-dev1573.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types.cpp +8 -655
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -32,7 +32,7 @@ string PhysicalComparisonJoin::ParamsToString() const {
|
|
32
32
|
}
|
33
33
|
extra_info += "\n[INFOSEPARATOR]\n";
|
34
34
|
extra_info += StringUtil::Format("EC: %llu\n", estimated_props->GetCardinality<idx_t>());
|
35
|
-
extra_info += StringUtil::Format("Cost: %llu",
|
35
|
+
extra_info += StringUtil::Format("Cost: %llu", estimated_props->GetCost<idx_t>());
|
36
36
|
return extra_info;
|
37
37
|
}
|
38
38
|
|
@@ -96,7 +96,7 @@ public:
|
|
96
96
|
class HashJoinLocalSinkState : public LocalSinkState {
|
97
97
|
public:
|
98
98
|
HashJoinLocalSinkState(const PhysicalHashJoin &op, ClientContext &context) : build_executor(context) {
|
99
|
-
auto &allocator =
|
99
|
+
auto &allocator = BufferAllocator::Get(context);
|
100
100
|
if (!op.right_projection_map.empty()) {
|
101
101
|
build_chunk.Initialize(allocator, op.build_types);
|
102
102
|
}
|
@@ -124,7 +124,7 @@ public:
|
|
124
124
|
unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &context) const {
|
125
125
|
auto result =
|
126
126
|
make_uniq<JoinHashTable>(BufferManager::GetBufferManager(context), conditions, build_types, join_type);
|
127
|
-
result->max_ht_size = double(BufferManager::GetBufferManager(context).GetMaxMemory()
|
127
|
+
result->max_ht_size = double(0.6) * BufferManager::GetBufferManager(context).GetMaxMemory();
|
128
128
|
if (!delim_types.empty() && join_type == JoinType::MARK) {
|
129
129
|
// correlated MARK join
|
130
130
|
if (delim_types.size() + 1 == conditions.size()) {
|
@@ -162,7 +162,7 @@ unique_ptr<JoinHashTable> PhysicalHashJoin::InitializeHashTable(ClientContext &c
|
|
162
162
|
payload_types.push_back(aggr->return_type);
|
163
163
|
info.correlated_aggregates.push_back(std::move(aggr));
|
164
164
|
|
165
|
-
auto &allocator =
|
165
|
+
auto &allocator = BufferAllocator::Get(context);
|
166
166
|
info.correlated_counts = make_uniq<GroupedAggregateHashTable>(context, allocator, delim_types,
|
167
167
|
payload_types, correlated_aggregates);
|
168
168
|
info.correlated_types = delim_types;
|
@@ -312,10 +312,10 @@ void HashJoinGlobalSinkState::InitializeProbeSpill() {
|
|
312
312
|
}
|
313
313
|
}
|
314
314
|
|
315
|
-
class
|
315
|
+
class HashJoinRepartitionTask : public ExecutorTask {
|
316
316
|
public:
|
317
|
-
|
318
|
-
|
317
|
+
HashJoinRepartitionTask(shared_ptr<Event> event_p, ClientContext &context, JoinHashTable &global_ht,
|
318
|
+
JoinHashTable &local_ht)
|
319
319
|
: ExecutorTask(context), event(std::move(event_p)), global_ht(global_ht), local_ht(local_ht) {
|
320
320
|
}
|
321
321
|
|
@@ -349,7 +349,7 @@ public:
|
|
349
349
|
partition_tasks.reserve(local_hts.size());
|
350
350
|
for (auto &local_ht : local_hts) {
|
351
351
|
partition_tasks.push_back(
|
352
|
-
make_uniq<
|
352
|
+
make_uniq<HashJoinRepartitionTask>(shared_from_this(), context, *sink.hash_table, *local_ht));
|
353
353
|
}
|
354
354
|
SetTasks(std::move(partition_tasks));
|
355
355
|
}
|
@@ -434,7 +434,7 @@ public:
|
|
434
434
|
};
|
435
435
|
|
436
436
|
unique_ptr<OperatorState> PhysicalHashJoin::GetOperatorState(ExecutionContext &context) const {
|
437
|
-
auto &allocator =
|
437
|
+
auto &allocator = BufferAllocator::Get(context.client);
|
438
438
|
auto &sink = sink_state->Cast<HashJoinGlobalSinkState>();
|
439
439
|
auto state = make_uniq<HashJoinOperatorState>(context.client);
|
440
440
|
if (sink.perfect_join_executor) {
|
@@ -532,7 +532,18 @@ public:
|
|
532
532
|
bool AssignTask(HashJoinGlobalSinkState &sink, HashJoinLocalSourceState &lstate);
|
533
533
|
|
534
534
|
idx_t MaxThreads() override {
|
535
|
-
|
535
|
+
D_ASSERT(op.sink_state);
|
536
|
+
auto &gstate = op.sink_state->Cast<HashJoinGlobalSinkState>();
|
537
|
+
|
538
|
+
idx_t count;
|
539
|
+
if (gstate.probe_spill) {
|
540
|
+
count = probe_count;
|
541
|
+
} else if (IsRightOuterJoin(op.join_type)) {
|
542
|
+
count = gstate.hash_table->Count();
|
543
|
+
} else {
|
544
|
+
return 0;
|
545
|
+
}
|
546
|
+
return count / ((idx_t)STANDARD_VECTOR_SIZE * parallel_scan_chunk_count);
|
536
547
|
}
|
537
548
|
|
538
549
|
public:
|
@@ -611,7 +622,7 @@ unique_ptr<GlobalSourceState> PhysicalHashJoin::GetGlobalSourceState(ClientConte
|
|
611
622
|
|
612
623
|
unique_ptr<LocalSourceState> PhysicalHashJoin::GetLocalSourceState(ExecutionContext &context,
|
613
624
|
GlobalSourceState &gstate) const {
|
614
|
-
return make_uniq<HashJoinLocalSourceState>(*this,
|
625
|
+
return make_uniq<HashJoinLocalSourceState>(*this, BufferAllocator::Get(context.client));
|
615
626
|
}
|
616
627
|
|
617
628
|
HashJoinGlobalSourceState::HashJoinGlobalSourceState(const PhysicalHashJoin &op, ClientContext &context)
|
@@ -16,7 +16,7 @@
|
|
16
16
|
|
17
17
|
namespace duckdb {
|
18
18
|
|
19
|
-
PhysicalIEJoin::PhysicalIEJoin(
|
19
|
+
PhysicalIEJoin::PhysicalIEJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
|
20
20
|
unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond, JoinType join_type,
|
21
21
|
idx_t estimated_cardinality)
|
22
22
|
: PhysicalRangeJoin(op, PhysicalOperatorType::IE_JOIN, std::move(left), std::move(right), std::move(cond),
|
@@ -641,6 +641,8 @@ public:
|
|
641
641
|
: op(op), true_sel(STANDARD_VECTOR_SIZE), left_executor(context), right_executor(context),
|
642
642
|
left_matches(nullptr), right_matches(nullptr) {
|
643
643
|
auto &allocator = Allocator::Get(context);
|
644
|
+
unprojected.Initialize(allocator, op.unprojected_types);
|
645
|
+
|
644
646
|
if (op.conditions.size() < 3) {
|
645
647
|
return;
|
646
648
|
}
|
@@ -696,6 +698,8 @@ public:
|
|
696
698
|
ExpressionExecutor right_executor;
|
697
699
|
DataChunk right_keys;
|
698
700
|
|
701
|
+
DataChunk unprojected;
|
702
|
+
|
699
703
|
// Outer joins
|
700
704
|
idx_t outer_idx;
|
701
705
|
idx_t outer_count;
|
@@ -703,13 +707,14 @@ public:
|
|
703
707
|
bool *right_matches;
|
704
708
|
};
|
705
709
|
|
706
|
-
void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &
|
710
|
+
void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &result, LocalSourceState &state_p) const {
|
707
711
|
auto &state = state_p.Cast<IEJoinLocalSourceState>();
|
708
712
|
auto &ie_sink = sink_state->Cast<IEJoinGlobalState>();
|
709
713
|
auto &left_table = *ie_sink.tables[0];
|
710
714
|
auto &right_table = *ie_sink.tables[1];
|
711
715
|
|
712
716
|
const auto left_cols = children[0]->GetTypes().size();
|
717
|
+
auto &chunk = state.unprojected;
|
713
718
|
do {
|
714
719
|
SelectionVector lsel(STANDARD_VECTOR_SIZE);
|
715
720
|
SelectionVector rsel(STANDARD_VECTOR_SIZE);
|
@@ -720,6 +725,7 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
|
|
720
725
|
}
|
721
726
|
|
722
727
|
// found matches: extract them
|
728
|
+
|
723
729
|
chunk.Reset();
|
724
730
|
SliceSortedPayload(chunk, left_table.global_sort_state, state.left_block_index, lsel, result_count, 0);
|
725
731
|
SliceSortedPayload(chunk, right_table.global_sort_state, state.right_block_index, rsel, result_count,
|
@@ -762,6 +768,10 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
|
|
762
768
|
}
|
763
769
|
}
|
764
770
|
|
771
|
+
// We need all of the data to compute other predicates,
|
772
|
+
// but we only return what is in the projection map
|
773
|
+
ProjectResult(chunk, result);
|
774
|
+
|
765
775
|
// found matches: mark the found matches if required
|
766
776
|
if (left_table.found_match) {
|
767
777
|
for (idx_t i = 0; i < result_count; i++) {
|
@@ -773,8 +783,8 @@ void PhysicalIEJoin::ResolveComplexJoin(ExecutionContext &context, DataChunk &ch
|
|
773
783
|
right_table.found_match[state.right_base + rsel[sel->get_index(i)]] = true;
|
774
784
|
}
|
775
785
|
}
|
776
|
-
|
777
|
-
} while (
|
786
|
+
result.Verify();
|
787
|
+
} while (result.size() == 0);
|
778
788
|
}
|
779
789
|
|
780
790
|
class IEJoinGlobalSourceState : public GlobalSourceState {
|
@@ -961,15 +971,18 @@ SourceResultType PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &r
|
|
961
971
|
ie_gstate.GetNextPair(context.client, ie_sink, ie_lstate);
|
962
972
|
continue;
|
963
973
|
}
|
964
|
-
|
974
|
+
auto &chunk = ie_lstate.unprojected;
|
975
|
+
chunk.Reset();
|
976
|
+
SliceSortedPayload(chunk, ie_sink.tables[0]->global_sort_state, ie_lstate.left_block_index, ie_lstate.true_sel,
|
965
977
|
count);
|
966
978
|
|
967
979
|
// Fill in NULLs to the right
|
968
|
-
for (auto col_idx = left_cols; col_idx <
|
969
|
-
|
970
|
-
ConstantVector::SetNull(
|
980
|
+
for (auto col_idx = left_cols; col_idx < chunk.ColumnCount(); ++col_idx) {
|
981
|
+
chunk.data[col_idx].SetVectorType(VectorType::CONSTANT_VECTOR);
|
982
|
+
ConstantVector::SetNull(chunk.data[col_idx], true);
|
971
983
|
}
|
972
984
|
|
985
|
+
ProjectResult(chunk, result);
|
973
986
|
result.SetCardinality(count);
|
974
987
|
result.Verify();
|
975
988
|
|
@@ -984,15 +997,18 @@ SourceResultType PhysicalIEJoin::GetData(ExecutionContext &context, DataChunk &r
|
|
984
997
|
continue;
|
985
998
|
}
|
986
999
|
|
987
|
-
|
988
|
-
|
1000
|
+
auto &chunk = ie_lstate.unprojected;
|
1001
|
+
chunk.Reset();
|
1002
|
+
SliceSortedPayload(chunk, ie_sink.tables[1]->global_sort_state, ie_lstate.right_block_index, ie_lstate.true_sel,
|
1003
|
+
count, left_cols);
|
989
1004
|
|
990
1005
|
// Fill in NULLs to the left
|
991
1006
|
for (idx_t col_idx = 0; col_idx < left_cols; ++col_idx) {
|
992
|
-
|
993
|
-
ConstantVector::SetNull(
|
1007
|
+
chunk.data[col_idx].SetVectorType(VectorType::CONSTANT_VECTOR);
|
1008
|
+
ConstantVector::SetNull(chunk.data[col_idx], true);
|
994
1009
|
}
|
995
1010
|
|
1011
|
+
ProjectResult(chunk, result);
|
996
1012
|
result.SetCardinality(count);
|
997
1013
|
result.Verify();
|
998
1014
|
|
@@ -60,7 +60,7 @@ void PhysicalJoin::BuildJoinPipelines(Pipeline ¤t, MetaPipeline &meta_pipe
|
|
60
60
|
// Join can become a source operator if it's RIGHT/OUTER, or if the hash join goes out-of-core
|
61
61
|
bool add_child_pipeline = false;
|
62
62
|
auto &join_op = op.Cast<PhysicalJoin>();
|
63
|
-
if (
|
63
|
+
if (join_op.IsSource()) {
|
64
64
|
add_child_pipeline = true;
|
65
65
|
}
|
66
66
|
|
@@ -14,7 +14,7 @@
|
|
14
14
|
|
15
15
|
namespace duckdb {
|
16
16
|
|
17
|
-
PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(
|
17
|
+
PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left,
|
18
18
|
unique_ptr<PhysicalOperator> right, vector<JoinCondition> cond,
|
19
19
|
JoinType join_type, idx_t estimated_cardinality)
|
20
20
|
: PhysicalRangeJoin(op, PhysicalOperatorType::PIECEWISE_MERGE_JOIN, std::move(left), std::move(right),
|
@@ -208,6 +208,7 @@ public:
|
|
208
208
|
idx_t right_position;
|
209
209
|
idx_t right_chunk_index;
|
210
210
|
idx_t right_base;
|
211
|
+
idx_t prev_left_index;
|
211
212
|
|
212
213
|
// Secondary predicate shared data
|
213
214
|
SelectionVector sel;
|
@@ -431,7 +432,8 @@ void PhysicalPiecewiseMergeJoin::ResolveSimpleJoin(ExecutionContext &context, Da
|
|
431
432
|
}
|
432
433
|
}
|
433
434
|
|
434
|
-
static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison
|
435
|
+
static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const ExpressionType comparison,
|
436
|
+
idx_t &prev_left_index) {
|
435
437
|
const auto cmp = MergeJoinComparisonValue(comparison);
|
436
438
|
|
437
439
|
// The sort parameters should all be the same
|
@@ -465,6 +467,20 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
|
|
465
467
|
|
466
468
|
idx_t result_count = 0;
|
467
469
|
while (true) {
|
470
|
+
if (l.entry_idx < prev_left_index) {
|
471
|
+
// left side smaller: found match
|
472
|
+
l.result.set_index(result_count, sel_t(l.entry_idx));
|
473
|
+
r.result.set_index(result_count, sel_t(r.entry_idx));
|
474
|
+
result_count++;
|
475
|
+
// move left side forward
|
476
|
+
l.entry_idx++;
|
477
|
+
l_ptr += entry_size;
|
478
|
+
if (result_count == STANDARD_VECTOR_SIZE) {
|
479
|
+
// out of space!
|
480
|
+
break;
|
481
|
+
}
|
482
|
+
continue;
|
483
|
+
}
|
468
484
|
if (l.entry_idx < l.not_null) {
|
469
485
|
int comp_res;
|
470
486
|
if (all_constant) {
|
@@ -474,7 +490,6 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
|
|
474
490
|
rread.entry_idx = r.entry_idx;
|
475
491
|
comp_res = Comparators::CompareTuple(lread, rread, l_ptr, r_ptr, l.state.sort_layout, external);
|
476
492
|
}
|
477
|
-
|
478
493
|
if (comp_res <= cmp) {
|
479
494
|
// left side smaller: found match
|
480
495
|
l.result.set_index(result_count, sel_t(l.entry_idx));
|
@@ -490,6 +505,8 @@ static idx_t MergeJoinComplexBlocks(BlockMergeInfo &l, BlockMergeInfo &r, const
|
|
490
505
|
continue;
|
491
506
|
}
|
492
507
|
}
|
508
|
+
|
509
|
+
prev_left_index = l.entry_idx;
|
493
510
|
// right side smaller or equal, or left side exhausted: move
|
494
511
|
// right pointer forward reset left side to start
|
495
512
|
r.entry_idx++;
|
@@ -521,6 +538,7 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
|
|
521
538
|
state.right_chunk_index = 0;
|
522
539
|
state.right_base = 0;
|
523
540
|
state.left_position = 0;
|
541
|
+
state.prev_left_index = 0;
|
524
542
|
state.right_position = 0;
|
525
543
|
state.first_fetch = false;
|
526
544
|
state.finished = false;
|
@@ -547,7 +565,8 @@ OperatorResultType PhysicalPiecewiseMergeJoin::ResolveComplexJoin(ExecutionConte
|
|
547
565
|
BlockMergeInfo right_info(gstate.table->global_sort_state, state.right_chunk_index, state.right_position,
|
548
566
|
rhs_not_null);
|
549
567
|
|
550
|
-
idx_t result_count =
|
568
|
+
idx_t result_count =
|
569
|
+
MergeJoinComplexBlocks(left_info, right_info, conditions[0].comparison, state.prev_left_index);
|
551
570
|
if (result_count == 0) {
|
552
571
|
// exhausted this chunk on the right side
|
553
572
|
// move to the next right chunk
|
@@ -159,16 +159,16 @@ void PhysicalRangeJoin::GlobalSortedTable::Finalize(Pipeline &pipeline, Event &e
|
|
159
159
|
}
|
160
160
|
}
|
161
161
|
|
162
|
-
PhysicalRangeJoin::PhysicalRangeJoin(
|
163
|
-
unique_ptr<PhysicalOperator>
|
164
|
-
idx_t estimated_cardinality)
|
162
|
+
PhysicalRangeJoin::PhysicalRangeJoin(LogicalComparisonJoin &op, PhysicalOperatorType type,
|
163
|
+
unique_ptr<PhysicalOperator> left, unique_ptr<PhysicalOperator> right,
|
164
|
+
vector<JoinCondition> cond, JoinType join_type, idx_t estimated_cardinality)
|
165
165
|
: PhysicalComparisonJoin(op, type, std::move(cond), join_type, estimated_cardinality) {
|
166
166
|
// Reorder the conditions so that ranges are at the front.
|
167
167
|
// TODO: use stats to improve the choice?
|
168
168
|
// TODO: Prefer fixed length types?
|
169
169
|
if (conditions.size() > 1) {
|
170
|
-
|
171
|
-
|
170
|
+
vector<JoinCondition> conditions_p(conditions.size());
|
171
|
+
std::swap(conditions_p, conditions);
|
172
172
|
idx_t range_position = 0;
|
173
173
|
idx_t other_position = conditions_p.size();
|
174
174
|
for (idx_t i = 0; i < conditions_p.size(); ++i) {
|
@@ -188,6 +188,30 @@ PhysicalRangeJoin::PhysicalRangeJoin(LogicalOperator &op, PhysicalOperatorType t
|
|
188
188
|
|
189
189
|
children.push_back(std::move(left));
|
190
190
|
children.push_back(std::move(right));
|
191
|
+
|
192
|
+
// Fill out the left projection map.
|
193
|
+
left_projection_map = op.left_projection_map;
|
194
|
+
if (left_projection_map.empty()) {
|
195
|
+
const auto left_count = children[0]->types.size();
|
196
|
+
left_projection_map.reserve(left_count);
|
197
|
+
for (column_t i = 0; i < left_count; ++i) {
|
198
|
+
left_projection_map.emplace_back(i);
|
199
|
+
}
|
200
|
+
}
|
201
|
+
// Fill out the right projection map.
|
202
|
+
right_projection_map = op.right_projection_map;
|
203
|
+
if (right_projection_map.empty()) {
|
204
|
+
const auto right_count = children[1]->types.size();
|
205
|
+
right_projection_map.reserve(right_count);
|
206
|
+
for (column_t i = 0; i < right_count; ++i) {
|
207
|
+
right_projection_map.emplace_back(i);
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
211
|
+
// Construct the unprojected type layout from the children's types
|
212
|
+
unprojected_types = children[0]->GetTypes();
|
213
|
+
auto &types = children[1]->GetTypes();
|
214
|
+
unprojected_types.insert(unprojected_types.end(), types.begin(), types.end());
|
191
215
|
}
|
192
216
|
|
193
217
|
idx_t PhysicalRangeJoin::LocalSortedTable::MergeNulls(const vector<JoinCondition> &conditions) {
|
@@ -266,6 +290,18 @@ idx_t PhysicalRangeJoin::LocalSortedTable::MergeNulls(const vector<JoinCondition
|
|
266
290
|
}
|
267
291
|
}
|
268
292
|
|
293
|
+
void PhysicalRangeJoin::ProjectResult(DataChunk &chunk, DataChunk &result) const {
|
294
|
+
const auto left_projected = left_projection_map.size();
|
295
|
+
for (idx_t i = 0; i < left_projected; ++i) {
|
296
|
+
result.data[i].Reference(chunk.data[left_projection_map[i]]);
|
297
|
+
}
|
298
|
+
const auto left_width = children[0]->types.size();
|
299
|
+
for (idx_t i = 0; i < right_projection_map.size(); ++i) {
|
300
|
+
result.data[left_projected + i].Reference(chunk.data[left_width + right_projection_map[i]]);
|
301
|
+
}
|
302
|
+
result.SetCardinality(chunk);
|
303
|
+
}
|
304
|
+
|
269
305
|
BufferHandle PhysicalRangeJoin::SliceSortedPayload(DataChunk &payload, GlobalSortState &state, const idx_t block_idx,
|
270
306
|
const SelectionVector &result, const idx_t result_count,
|
271
307
|
const idx_t left_cols) {
|
@@ -1,5 +1,4 @@
|
|
1
1
|
#include "duckdb/execution/operator/persistent/base_csv_reader.hpp"
|
2
|
-
|
3
2
|
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
4
3
|
#include "duckdb/common/file_system.hpp"
|
5
4
|
#include "duckdb/common/string_util.hpp"
|
@@ -10,6 +9,7 @@
|
|
10
9
|
#include "duckdb/common/vector_operations/unary_executor.hpp"
|
11
10
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
12
11
|
#include "duckdb/function/scalar/strftime_format.hpp"
|
12
|
+
#include "duckdb/main/appender.hpp"
|
13
13
|
#include "duckdb/main/database.hpp"
|
14
14
|
#include "duckdb/parser/column_definition.hpp"
|
15
15
|
#include "duckdb/storage/data_table.hpp"
|
@@ -18,7 +18,8 @@
|
|
18
18
|
#include "duckdb/parser/keyword_helper.hpp"
|
19
19
|
#include "duckdb/main/error_manager.hpp"
|
20
20
|
#include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
|
21
|
-
|
21
|
+
#include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
|
22
|
+
#include "duckdb/main/client_data.hpp"
|
22
23
|
#include <algorithm>
|
23
24
|
#include <cctype>
|
24
25
|
#include <cstring>
|
@@ -448,6 +449,17 @@ bool TryCastFloatingVectorCommaSeparated(BufferedCSVReaderOptions &options, Vect
|
|
448
449
|
}
|
449
450
|
}
|
450
451
|
|
452
|
+
// Location of erroneous value in the current parse chunk
|
453
|
+
struct ErrorLocation {
|
454
|
+
idx_t row_idx;
|
455
|
+
idx_t col_idx;
|
456
|
+
idx_t row_line;
|
457
|
+
|
458
|
+
ErrorLocation(idx_t row_idx, idx_t col_idx, idx_t row_line)
|
459
|
+
: row_idx(row_idx), col_idx(col_idx), row_line(row_line) {
|
460
|
+
}
|
461
|
+
};
|
462
|
+
|
451
463
|
bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_add_line) {
|
452
464
|
if (parse_chunk.size() == 0) {
|
453
465
|
return true;
|
@@ -506,10 +518,7 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
|
|
506
518
|
if (try_add_line) {
|
507
519
|
return false;
|
508
520
|
}
|
509
|
-
|
510
|
-
conversion_error_ignored = true;
|
511
|
-
continue;
|
512
|
-
}
|
521
|
+
|
513
522
|
string col_name = to_string(col_idx);
|
514
523
|
if (col_idx < names.size()) {
|
515
524
|
col_name = "\"" + names[col_idx] + "\"";
|
@@ -527,16 +536,18 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
|
|
527
536
|
}
|
528
537
|
}
|
529
538
|
|
530
|
-
idx_t error_line;
|
531
539
|
// The line_error must be summed with linenr (All lines emmited from this batch)
|
532
540
|
// But subtracted from the parse_chunk
|
533
541
|
D_ASSERT(line_error + linenr >= parse_chunk.size());
|
534
542
|
line_error += linenr;
|
535
543
|
line_error -= parse_chunk.size();
|
536
544
|
|
537
|
-
error_line = GetLineError(line_error, buffer_idx);
|
545
|
+
auto error_line = GetLineError(line_error, buffer_idx);
|
546
|
+
|
547
|
+
if (options.ignore_errors) {
|
548
|
+
conversion_error_ignored = true;
|
538
549
|
|
539
|
-
if (options.auto_detect) {
|
550
|
+
} else if (options.auto_detect) {
|
540
551
|
throw InvalidInputException("%s in column %s, at line %llu.\n\nParser "
|
541
552
|
"options:\n%s.\n\nConsider either increasing the sample size "
|
542
553
|
"(SAMPLE_SIZE=X [X rows] or SAMPLE_SIZE=-1 [all rows]), "
|
@@ -550,11 +561,19 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
|
|
550
561
|
}
|
551
562
|
if (conversion_error_ignored) {
|
552
563
|
D_ASSERT(options.ignore_errors);
|
564
|
+
|
553
565
|
SelectionVector succesful_rows(parse_chunk.size());
|
554
566
|
idx_t sel_size = 0;
|
555
567
|
|
568
|
+
// Keep track of failed cells
|
569
|
+
vector<ErrorLocation> failed_cells;
|
570
|
+
|
556
571
|
for (idx_t row_idx = 0; row_idx < parse_chunk.size(); row_idx++) {
|
557
|
-
|
572
|
+
|
573
|
+
auto global_row_idx = row_idx + linenr - parse_chunk.size();
|
574
|
+
auto row_line = GetLineError(global_row_idx, buffer_idx, false);
|
575
|
+
|
576
|
+
bool row_failed = false;
|
558
577
|
for (idx_t c = 0; c < reader_data.column_ids.size(); c++) {
|
559
578
|
auto col_idx = reader_data.column_ids[c];
|
560
579
|
auto result_idx = reader_data.column_mapping[c];
|
@@ -564,14 +583,82 @@ bool BaseCSVReader::Flush(DataChunk &insert_chunk, idx_t buffer_idx, bool try_ad
|
|
564
583
|
|
565
584
|
bool was_already_null = FlatVector::IsNull(parse_vector, row_idx);
|
566
585
|
if (!was_already_null && FlatVector::IsNull(result_vector, row_idx)) {
|
567
|
-
|
568
|
-
|
586
|
+
row_failed = true;
|
587
|
+
failed_cells.emplace_back(row_idx, col_idx, row_line);
|
569
588
|
}
|
570
589
|
}
|
571
|
-
if (!
|
590
|
+
if (!row_failed) {
|
572
591
|
succesful_rows.set_index(sel_size++, row_idx);
|
573
592
|
}
|
574
593
|
}
|
594
|
+
|
595
|
+
// Now do a second pass to produce the reject table entries
|
596
|
+
if (!failed_cells.empty() && !options.rejects_table_name.empty()) {
|
597
|
+
auto limit = options.rejects_limit;
|
598
|
+
|
599
|
+
auto rejects = CSVRejectsTable::GetOrCreate(context, options.rejects_table_name);
|
600
|
+
lock_guard<mutex> lock(rejects->write_lock);
|
601
|
+
|
602
|
+
// short circuit if we already have too many rejects
|
603
|
+
if (limit == 0 || rejects->count < limit) {
|
604
|
+
auto &table = rejects->GetTable(context);
|
605
|
+
InternalAppender appender(context, table);
|
606
|
+
auto file_name = GetFileName();
|
607
|
+
|
608
|
+
for (auto &cell : failed_cells) {
|
609
|
+
if (limit != 0 && rejects->count >= limit) {
|
610
|
+
break;
|
611
|
+
}
|
612
|
+
rejects->count++;
|
613
|
+
|
614
|
+
auto row_idx = cell.row_idx;
|
615
|
+
auto col_idx = cell.col_idx;
|
616
|
+
auto row_line = cell.row_line;
|
617
|
+
|
618
|
+
auto col_name = to_string(col_idx);
|
619
|
+
if (col_idx < names.size()) {
|
620
|
+
col_name = "\"" + names[col_idx] + "\"";
|
621
|
+
}
|
622
|
+
|
623
|
+
auto &parse_vector = parse_chunk.data[col_idx];
|
624
|
+
auto parsed_str = FlatVector::GetData<string_t>(parse_vector)[row_idx];
|
625
|
+
auto &type = insert_chunk.data[col_idx].GetType();
|
626
|
+
auto row_error_msg = StringUtil::Format("Could not convert string '%s' to '%s'",
|
627
|
+
parsed_str.GetString(), type.ToString());
|
628
|
+
|
629
|
+
// Add the row to the rejects table
|
630
|
+
appender.BeginRow();
|
631
|
+
appender.Append(string_t(file_name));
|
632
|
+
appender.Append(row_line);
|
633
|
+
appender.Append(col_idx);
|
634
|
+
appender.Append(string_t(col_name));
|
635
|
+
appender.Append(parsed_str);
|
636
|
+
|
637
|
+
if (!options.rejects_recovery_columns.empty()) {
|
638
|
+
child_list_t<Value> recovery_key;
|
639
|
+
for (auto &key_idx : options.rejects_recovery_column_ids) {
|
640
|
+
// Figure out if the recovery key is valid.
|
641
|
+
// If not, error out for real.
|
642
|
+
auto &component_vector = parse_chunk.data[key_idx];
|
643
|
+
if (FlatVector::IsNull(component_vector, row_idx)) {
|
644
|
+
throw InvalidInputException("%s at line %llu in column %s. Parser options:\n%s ",
|
645
|
+
"Could not parse recovery column", row_line, col_name,
|
646
|
+
options.ToString());
|
647
|
+
}
|
648
|
+
auto component = Value(FlatVector::GetData<string_t>(component_vector)[row_idx]);
|
649
|
+
recovery_key.emplace_back(names[key_idx], component);
|
650
|
+
}
|
651
|
+
appender.Append(Value::STRUCT(recovery_key));
|
652
|
+
}
|
653
|
+
|
654
|
+
appender.Append(string_t(row_error_msg));
|
655
|
+
appender.EndRow();
|
656
|
+
}
|
657
|
+
appender.Close();
|
658
|
+
}
|
659
|
+
}
|
660
|
+
|
661
|
+
// Now slice the insert chunk to only include the succesful rows
|
575
662
|
insert_chunk.Slice(succesful_rows, sel_size);
|
576
663
|
}
|
577
664
|
parse_chunk.Reset();
|
@@ -13,7 +13,7 @@ CSVFileHandle::CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<Fi
|
|
13
13
|
|
14
14
|
unique_ptr<FileHandle> CSVFileHandle::OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
|
15
15
|
FileCompressionType compression) {
|
16
|
-
auto file_handle = fs.OpenFile(path
|
16
|
+
auto file_handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
|
17
17
|
if (file_handle->CanSeek()) {
|
18
18
|
file_handle->Reset();
|
19
19
|
}
|
@@ -179,6 +179,26 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
179
179
|
allow_quoted_nulls = ParseBoolean(value, loption);
|
180
180
|
} else if (loption == "parallel") {
|
181
181
|
parallel_mode = ParseBoolean(value, loption) ? ParallelMode::PARALLEL : ParallelMode::SINGLE_THREADED;
|
182
|
+
} else if (loption == "rejects_table") {
|
183
|
+
// skip, handled in SetRejectsOptions
|
184
|
+
auto table_name = ParseString(value, loption);
|
185
|
+
if (table_name.empty()) {
|
186
|
+
throw BinderException("REJECTS_TABLE option cannot be empty");
|
187
|
+
}
|
188
|
+
rejects_table_name = table_name;
|
189
|
+
} else if (loption == "rejects_recovery_columns") {
|
190
|
+
// Get the list of columns to use as a recovery key
|
191
|
+
auto &children = ListValue::GetChildren(value);
|
192
|
+
for (auto &child : children) {
|
193
|
+
auto col_name = child.GetValue<string>();
|
194
|
+
rejects_recovery_columns.push_back(col_name);
|
195
|
+
}
|
196
|
+
} else if (loption == "rejects_limit") {
|
197
|
+
int64_t limit = ParseInteger(value, loption);
|
198
|
+
if (limit < 0) {
|
199
|
+
throw BinderException("Unsupported parameter for REJECTS_LIMIT: cannot be negative");
|
200
|
+
}
|
201
|
+
rejects_limit = limit;
|
182
202
|
} else {
|
183
203
|
throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
|
184
204
|
}
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#include "duckdb/main/appender.hpp"
|
2
|
+
#include "duckdb/parser/parsed_data/create_table_info.hpp"
|
3
|
+
#include "duckdb/function/table/read_csv.hpp"
|
4
|
+
#include "duckdb/execution/operator/persistent/csv_rejects_table.hpp"
|
5
|
+
#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
|
6
|
+
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
TableCatalogEntry &CSVRejectsTable::GetTable(ClientContext &context) {
|
10
|
+
auto &temp_catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
|
11
|
+
auto &table_entry = temp_catalog.GetEntry<TableCatalogEntry>(context, TEMP_CATALOG, DEFAULT_SCHEMA, name);
|
12
|
+
return table_entry;
|
13
|
+
}
|
14
|
+
|
15
|
+
shared_ptr<CSVRejectsTable> CSVRejectsTable::GetOrCreate(ClientContext &context, const string &name) {
|
16
|
+
auto key = "CSV_REJECTS_TABLE_CACHE_ENTRY_" + StringUtil::Upper(name);
|
17
|
+
auto &cache = ObjectCache::GetObjectCache(context);
|
18
|
+
return cache.GetOrCreate<CSVRejectsTable>(key, name);
|
19
|
+
}
|
20
|
+
|
21
|
+
void CSVRejectsTable::InitializeTable(ClientContext &context, const ReadCSVData &data) {
|
22
|
+
// (Re)Create the temporary rejects table
|
23
|
+
auto &catalog = Catalog::GetCatalog(context, TEMP_CATALOG);
|
24
|
+
auto info = make_uniq<CreateTableInfo>(TEMP_CATALOG, DEFAULT_SCHEMA, name);
|
25
|
+
info->temporary = true;
|
26
|
+
info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT;
|
27
|
+
info->columns.AddColumn(ColumnDefinition("file", LogicalType::VARCHAR));
|
28
|
+
info->columns.AddColumn(ColumnDefinition("line", LogicalType::BIGINT));
|
29
|
+
info->columns.AddColumn(ColumnDefinition("column", LogicalType::BIGINT));
|
30
|
+
info->columns.AddColumn(ColumnDefinition("column_name", LogicalType::VARCHAR));
|
31
|
+
info->columns.AddColumn(ColumnDefinition("parsed_value", LogicalType::VARCHAR));
|
32
|
+
|
33
|
+
if (!data.options.rejects_recovery_columns.empty()) {
|
34
|
+
child_list_t<LogicalType> recovery_key_components;
|
35
|
+
for (auto &col_name : data.options.rejects_recovery_columns) {
|
36
|
+
recovery_key_components.emplace_back(col_name, LogicalType::VARCHAR);
|
37
|
+
}
|
38
|
+
info->columns.AddColumn(ColumnDefinition("recovery_columns", LogicalType::STRUCT(recovery_key_components)));
|
39
|
+
}
|
40
|
+
|
41
|
+
info->columns.AddColumn(ColumnDefinition("error", LogicalType::VARCHAR));
|
42
|
+
|
43
|
+
catalog.CreateTable(context, std::move(info));
|
44
|
+
|
45
|
+
count = 0;
|
46
|
+
}
|
47
|
+
|
48
|
+
} // namespace duckdb
|
@@ -635,12 +635,11 @@ void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
|
|
635
635
|
}
|
636
636
|
}
|
637
637
|
|
638
|
-
idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx) {
|
638
|
+
idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx, bool stop_at_first) {
|
639
639
|
while (true) {
|
640
640
|
if (buffer->line_info->CanItGetLine(file_idx, buffer_idx)) {
|
641
641
|
auto cur_start = verification_positions.beginning_of_first_line + buffer->buffer->GetCSVGlobalStart();
|
642
|
-
|
643
|
-
return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false);
|
642
|
+
return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false, stop_at_first);
|
644
643
|
}
|
645
644
|
}
|
646
645
|
}
|