duckdb 0.8.2-dev150.0 → 0.8.2-dev1549.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types.cpp +8 -655
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +13 -22
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +5 -0
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -15,7 +15,7 @@ public:
|
|
15
15
|
// some derivatives
|
16
16
|
D_ASSERT(miniblocks_per_block > 0);
|
17
17
|
values_per_miniblock = block_value_count / miniblocks_per_block;
|
18
|
-
miniblock_bit_widths =
|
18
|
+
miniblock_bit_widths = unique_ptr<uint8_t[]>(new data_t[miniblocks_per_block]);
|
19
19
|
|
20
20
|
// init state to something sane
|
21
21
|
values_left_in_block = 0;
|
@@ -96,7 +96,7 @@ public:
|
|
96
96
|
if (values_left_in_miniblock == 0) {
|
97
97
|
return;
|
98
98
|
}
|
99
|
-
auto data =
|
99
|
+
auto data = unique_ptr<uint32_t[]>(new uint32_t[values_left_in_miniblock]);
|
100
100
|
GetBatch<uint32_t>(data_ptr_cast(data.get()), values_left_in_miniblock);
|
101
101
|
}
|
102
102
|
|
@@ -112,7 +112,7 @@ private:
|
|
112
112
|
int64_t start_value;
|
113
113
|
idx_t values_per_miniblock;
|
114
114
|
|
115
|
-
|
115
|
+
unique_ptr<uint8_t[]> miniblock_bit_widths;
|
116
116
|
idx_t values_left_in_block;
|
117
117
|
idx_t values_left_in_miniblock;
|
118
118
|
idx_t miniblock_offset;
|
@@ -35,9 +35,9 @@ public:
|
|
35
35
|
return res;
|
36
36
|
}
|
37
37
|
|
38
|
-
static
|
39
|
-
|
40
|
-
|
38
|
+
static unique_ptr<ColumnReader> CreateReader(ParquetReader &reader, const LogicalType &type_p,
|
39
|
+
const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define,
|
40
|
+
idx_t max_repeat);
|
41
41
|
};
|
42
42
|
|
43
43
|
} // namespace duckdb
|
@@ -20,14 +20,14 @@ class ParquetFileMetadataCache : public ObjectCacheEntry {
|
|
20
20
|
public:
|
21
21
|
ParquetFileMetadataCache() : metadata(nullptr) {
|
22
22
|
}
|
23
|
-
ParquetFileMetadataCache(
|
23
|
+
ParquetFileMetadataCache(unique_ptr<duckdb_parquet::format::FileMetaData> file_metadata, time_t r_time)
|
24
24
|
: metadata(std::move(file_metadata)), read_time(r_time) {
|
25
25
|
}
|
26
26
|
|
27
27
|
~ParquetFileMetadataCache() override = default;
|
28
28
|
|
29
29
|
//! Parquet file metadata
|
30
|
-
|
30
|
+
unique_ptr<const duckdb_parquet::format::FileMetaData> metadata;
|
31
31
|
|
32
32
|
//! read time
|
33
33
|
time_t read_time;
|
@@ -15,8 +15,8 @@ struct LogicalType;
|
|
15
15
|
|
16
16
|
struct ParquetStatisticsUtils {
|
17
17
|
|
18
|
-
static
|
19
|
-
|
18
|
+
static unique_ptr<BaseStatistics> TransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type,
|
19
|
+
const ColumnChunk &column_chunk);
|
20
20
|
|
21
21
|
static Value ConvertValue(const LogicalType &type, const duckdb_parquet::format::SchemaElement &schema_ele,
|
22
22
|
const std::string &stats);
|
@@ -37,8 +37,7 @@ public:
|
|
37
37
|
* @param throwIfNotFound fail if a stream is required and not found
|
38
38
|
* @return the new stream
|
39
39
|
*/
|
40
|
-
virtual
|
41
|
-
const = 0;
|
40
|
+
virtual unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0;
|
42
41
|
|
43
42
|
/**
|
44
43
|
* visit all streams of given node and execute visitor logic
|
@@ -63,7 +62,7 @@ public:
|
|
63
62
|
* Get the RowGroupIndex.
|
64
63
|
* @return a vector of RowIndex belonging to the stripe
|
65
64
|
*/
|
66
|
-
virtual
|
65
|
+
virtual unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
|
67
66
|
|
68
67
|
/**
|
69
68
|
* Get stride index provider which is used by string dictionary reader to
|
@@ -84,8 +83,7 @@ public:
|
|
84
83
|
* @param throwIfNotFound fail if a stream is required and not found
|
85
84
|
* @return the new stream
|
86
85
|
*/
|
87
|
-
virtual
|
88
|
-
bool throwIfNotFound) const = 0;
|
86
|
+
virtual unique_ptr<SeekableInputStream> getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0;
|
89
87
|
|
90
88
|
/**
|
91
89
|
* visit all streams of given node and execute visitor logic
|
@@ -110,7 +108,7 @@ public:
|
|
110
108
|
* Get the RowGroupIndex.
|
111
109
|
* @return a vector of RowIndex belonging to the stripe
|
112
110
|
*/
|
113
|
-
virtual
|
111
|
+
virtual unique_ptr<proto::RowIndex> getRowGroupIndex(const StreamIdentifier &si) const = 0;
|
114
112
|
|
115
113
|
/**
|
116
114
|
* Get stride index provider which is used by string dictionary reader to
|
@@ -157,10 +155,10 @@ public:
|
|
157
155
|
}
|
158
156
|
|
159
157
|
// Creates a reader for the given stripe.
|
160
|
-
static
|
161
|
-
|
162
|
-
|
163
|
-
|
158
|
+
static unique_ptr<SelectiveColumnReader> build(const std::shared_ptr<const dwio::common::TypeWithId> &requestedType,
|
159
|
+
const std::shared_ptr<const dwio::common::TypeWithId> &dataType,
|
160
|
+
StripeStreams &stripe, common::ScanSpec *scanSpec,
|
161
|
+
uint32_t sequence = 0);
|
164
162
|
|
165
163
|
// Seeks to offset and reads the rows in 'rows' and applies
|
166
164
|
// filters and value processing as given by 'scanSpec supplied at
|
@@ -336,7 +334,7 @@ public:
|
|
336
334
|
return kind_;
|
337
335
|
}
|
338
336
|
|
339
|
-
virtual
|
337
|
+
virtual unique_ptr<Filter> clone() const = 0;
|
340
338
|
|
341
339
|
/**
|
342
340
|
* A filter becomes non-deterministic when applies to nested column,
|
@@ -17,8 +17,8 @@
|
|
17
17
|
#include "duckdb/common/types/column/column_data_collection.hpp"
|
18
18
|
#endif
|
19
19
|
|
20
|
-
#include "parquet_types.h"
|
21
20
|
#include "column_writer.hpp"
|
21
|
+
#include "parquet_types.h"
|
22
22
|
#include "thrift/protocol/TCompactProtocol.h"
|
23
23
|
|
24
24
|
namespace duckdb {
|
@@ -27,13 +27,31 @@ class FileOpener;
|
|
27
27
|
|
28
28
|
struct PreparedRowGroup {
|
29
29
|
duckdb_parquet::format::RowGroup row_group;
|
30
|
-
vector<
|
30
|
+
vector<unique_ptr<ColumnWriterState>> states;
|
31
|
+
vector<shared_ptr<StringHeap>> heaps;
|
32
|
+
};
|
33
|
+
|
34
|
+
struct FieldID;
|
35
|
+
struct ChildFieldIDs {
|
36
|
+
ChildFieldIDs();
|
37
|
+
ChildFieldIDs Copy() const;
|
38
|
+
unique_ptr<case_insensitive_map_t<FieldID>> ids;
|
39
|
+
};
|
40
|
+
|
41
|
+
struct FieldID {
|
42
|
+
static constexpr const auto DUCKDB_FIELD_ID = "__duckdb_field_id";
|
43
|
+
FieldID();
|
44
|
+
explicit FieldID(int32_t field_id);
|
45
|
+
FieldID Copy() const;
|
46
|
+
bool set;
|
47
|
+
int32_t field_id;
|
48
|
+
ChildFieldIDs child_field_ids;
|
31
49
|
};
|
32
50
|
|
33
51
|
class ParquetWriter {
|
34
52
|
public:
|
35
53
|
ParquetWriter(FileSystem &fs, string file_name, vector<LogicalType> types, vector<string> names,
|
36
|
-
duckdb_parquet::format::CompressionCodec::type codec);
|
54
|
+
duckdb_parquet::format::CompressionCodec::type codec, ChildFieldIDs field_ids);
|
37
55
|
|
38
56
|
public:
|
39
57
|
void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
|
@@ -62,13 +80,14 @@ private:
|
|
62
80
|
vector<LogicalType> sql_types;
|
63
81
|
vector<string> column_names;
|
64
82
|
duckdb_parquet::format::CompressionCodec::type codec;
|
83
|
+
ChildFieldIDs field_ids;
|
65
84
|
|
66
|
-
|
85
|
+
unique_ptr<BufferedFileWriter> writer;
|
67
86
|
shared_ptr<duckdb_apache::thrift::protocol::TProtocol> protocol;
|
68
87
|
duckdb_parquet::format::FileMetaData file_meta_data;
|
69
88
|
std::mutex lock;
|
70
89
|
|
71
|
-
vector<
|
90
|
+
vector<unique_ptr<ColumnWriter>> column_writers;
|
72
91
|
};
|
73
92
|
|
74
93
|
} // namespace duckdb
|
@@ -28,7 +28,7 @@ public:
|
|
28
28
|
StringColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p,
|
29
29
|
idx_t max_define_p, idx_t max_repeat_p);
|
30
30
|
|
31
|
-
|
31
|
+
unique_ptr<string_t[]> dict_strings;
|
32
32
|
idx_t fixed_width_string_length;
|
33
33
|
idx_t delta_offset = 0;
|
34
34
|
|
@@ -19,10 +19,9 @@ public:
|
|
19
19
|
|
20
20
|
public:
|
21
21
|
StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p,
|
22
|
-
idx_t max_define_p, idx_t max_repeat_p,
|
23
|
-
vector<duckdb::unique_ptr<ColumnReader>> child_readers_p);
|
22
|
+
idx_t max_define_p, idx_t max_repeat_p, vector<unique_ptr<ColumnReader>> child_readers_p);
|
24
23
|
|
25
|
-
vector<
|
24
|
+
vector<unique_ptr<ColumnReader>> child_readers;
|
26
25
|
|
27
26
|
public:
|
28
27
|
ColumnReader *GetChildReader(idx_t child_idx);
|
@@ -17,13 +17,13 @@ namespace duckdb {
|
|
17
17
|
|
18
18
|
class ZStdFileSystem : public CompressedFileSystem {
|
19
19
|
public:
|
20
|
-
|
20
|
+
unique_ptr<FileHandle> OpenCompressedFile(unique_ptr<FileHandle> handle, bool write) override;
|
21
21
|
|
22
22
|
std::string GetName() const override {
|
23
23
|
return "ZStdFileSystem";
|
24
24
|
}
|
25
25
|
|
26
|
-
|
26
|
+
unique_ptr<StreamWrapper> CreateStream() override;
|
27
27
|
idx_t InBufferSize() override;
|
28
28
|
idx_t OutBufferSize() override;
|
29
29
|
};
|
@@ -1,7 +1,8 @@
|
|
1
1
|
#define DUCKDB_EXTENSION_MAIN
|
2
2
|
|
3
|
-
#include "duckdb.hpp"
|
4
3
|
#include "parquet_extension.hpp"
|
4
|
+
|
5
|
+
#include "duckdb.hpp"
|
5
6
|
#include "parquet_metadata.hpp"
|
6
7
|
#include "parquet_reader.hpp"
|
7
8
|
#include "parquet_writer.hpp"
|
@@ -14,15 +15,18 @@
|
|
14
15
|
#include <vector>
|
15
16
|
#ifndef DUCKDB_AMALGAMATION
|
16
17
|
#include "duckdb/catalog/catalog.hpp"
|
18
|
+
#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
|
17
19
|
#include "duckdb/common/constants.hpp"
|
18
20
|
#include "duckdb/common/enums/file_compression_type.hpp"
|
19
21
|
#include "duckdb/common/field_writer.hpp"
|
20
22
|
#include "duckdb/common/file_system.hpp"
|
23
|
+
#include "duckdb/common/multi_file_reader.hpp"
|
21
24
|
#include "duckdb/common/types/chunk_collection.hpp"
|
22
25
|
#include "duckdb/function/copy_function.hpp"
|
23
26
|
#include "duckdb/function/table_function.hpp"
|
24
27
|
#include "duckdb/main/client_context.hpp"
|
25
28
|
#include "duckdb/main/config.hpp"
|
29
|
+
#include "duckdb/main/extension_util.hpp"
|
26
30
|
#include "duckdb/parser/expression/constant_expression.hpp"
|
27
31
|
#include "duckdb/parser/expression/function_expression.hpp"
|
28
32
|
#include "duckdb/parser/parsed_data/create_copy_function_info.hpp"
|
@@ -30,10 +34,7 @@
|
|
30
34
|
#include "duckdb/parser/tableref/table_function_ref.hpp"
|
31
35
|
#include "duckdb/planner/operator/logical_get.hpp"
|
32
36
|
#include "duckdb/storage/statistics/base_statistics.hpp"
|
33
|
-
#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp"
|
34
|
-
#include "duckdb/common/multi_file_reader.hpp"
|
35
37
|
#include "duckdb/storage/table/row_group.hpp"
|
36
|
-
#include "duckdb/main/extension_util.hpp"
|
37
38
|
#endif
|
38
39
|
|
39
40
|
namespace duckdb {
|
@@ -115,6 +116,7 @@ struct ParquetWriteBindData : public TableFunctionData {
|
|
115
116
|
vector<string> column_names;
|
116
117
|
duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY;
|
117
118
|
idx_t row_group_size = RowGroup::ROW_GROUP_SIZE;
|
119
|
+
ChildFieldIDs field_ids;
|
118
120
|
};
|
119
121
|
|
120
122
|
struct ParquetWriteGlobalState : public GlobalFunctionData {
|
@@ -123,10 +125,12 @@ struct ParquetWriteGlobalState : public GlobalFunctionData {
|
|
123
125
|
|
124
126
|
struct ParquetWriteLocalState : public LocalFunctionData {
|
125
127
|
explicit ParquetWriteLocalState(ClientContext &context, const vector<LogicalType> &types)
|
126
|
-
: buffer(
|
128
|
+
: buffer(context, types, ColumnDataAllocatorType::HYBRID) {
|
129
|
+
buffer.InitializeAppend(append_state);
|
127
130
|
}
|
128
131
|
|
129
132
|
ColumnDataCollection buffer;
|
133
|
+
ColumnDataAppendState append_state;
|
130
134
|
};
|
131
135
|
|
132
136
|
void ParquetOptions::Serialize(FieldWriter &writer) const {
|
@@ -171,7 +175,6 @@ public:
|
|
171
175
|
table_function.serialize = ParquetScanSerialize;
|
172
176
|
table_function.deserialize = ParquetScanDeserialize;
|
173
177
|
table_function.get_batch_info = ParquetGetBatchInfo;
|
174
|
-
|
175
178
|
table_function.projection_pushdown = true;
|
176
179
|
table_function.filter_pushdown = true;
|
177
180
|
table_function.filter_prune = true;
|
@@ -293,7 +296,7 @@ public:
|
|
293
296
|
ParquetOptions parquet_options(context);
|
294
297
|
for (auto &kv : input.named_parameters) {
|
295
298
|
auto loption = StringUtil::Lower(kv.first);
|
296
|
-
if (MultiFileReader::ParseOption(kv.first, kv.second, parquet_options.file_options)) {
|
299
|
+
if (MultiFileReader::ParseOption(kv.first, kv.second, parquet_options.file_options, context)) {
|
297
300
|
continue;
|
298
301
|
}
|
299
302
|
if (loption == "binary_as_string") {
|
@@ -302,9 +305,7 @@ public:
|
|
302
305
|
parquet_options.file_row_number = BooleanValue::Get(kv.second);
|
303
306
|
}
|
304
307
|
}
|
305
|
-
|
306
|
-
parquet_options.file_options.hive_partitioning = MultiFileReaderOptions::AutoDetectHivePartitioning(files);
|
307
|
-
}
|
308
|
+
parquet_options.file_options.AutoDetectHivePartitioning(files, context);
|
308
309
|
return ParquetScanBindInternal(context, std::move(files), return_types, names, parquet_options);
|
309
310
|
}
|
310
311
|
|
@@ -371,7 +372,7 @@ public:
|
|
371
372
|
}
|
372
373
|
MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options, bind_data.reader_bind,
|
373
374
|
bind_data.types, bind_data.names, input.column_ids, input.filters,
|
374
|
-
bind_data.files[0]);
|
375
|
+
bind_data.files[0], context);
|
375
376
|
}
|
376
377
|
|
377
378
|
result->column_ids = input.column_ids;
|
@@ -518,6 +519,7 @@ public:
|
|
518
519
|
static void ParquetComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,
|
519
520
|
vector<unique_ptr<Expression>> &filters) {
|
520
521
|
auto &data = bind_data_p->Cast<ParquetReadBindData>();
|
522
|
+
|
521
523
|
auto reset_reader = MultiFileReader::ComplexFilterPushdown(context, data.files,
|
522
524
|
data.parquet_options.file_options, get, filters);
|
523
525
|
if (reset_reader) {
|
@@ -564,9 +566,10 @@ public:
|
|
564
566
|
shared_ptr<ParquetReader> reader;
|
565
567
|
try {
|
566
568
|
reader = make_shared<ParquetReader>(context, file, pq_options);
|
567
|
-
MultiFileReader::InitializeReader(
|
568
|
-
|
569
|
-
|
569
|
+
MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options,
|
570
|
+
bind_data.reader_bind, bind_data.types, bind_data.names,
|
571
|
+
parallel_state.column_ids, parallel_state.filters,
|
572
|
+
bind_data.files.front(), context);
|
570
573
|
} catch (...) {
|
571
574
|
parallel_lock.lock();
|
572
575
|
parallel_state.error_opening_file = true;
|
@@ -585,8 +588,157 @@ public:
|
|
585
588
|
}
|
586
589
|
};
|
587
590
|
|
591
|
+
static case_insensitive_map_t<LogicalType> GetChildNameToTypeMap(const LogicalType &type) {
|
592
|
+
case_insensitive_map_t<LogicalType> name_to_type_map;
|
593
|
+
switch (type.id()) {
|
594
|
+
case LogicalTypeId::LIST:
|
595
|
+
name_to_type_map.emplace("element", ListType::GetChildType(type));
|
596
|
+
break;
|
597
|
+
case LogicalTypeId::MAP:
|
598
|
+
name_to_type_map.emplace("key", MapType::KeyType(type));
|
599
|
+
name_to_type_map.emplace("value", MapType::ValueType(type));
|
600
|
+
break;
|
601
|
+
case LogicalTypeId::STRUCT:
|
602
|
+
for (auto &child_type : StructType::GetChildTypes(type)) {
|
603
|
+
if (child_type.first == FieldID::DUCKDB_FIELD_ID) {
|
604
|
+
throw BinderException("Cannot have column named \"%s\" with FIELD_IDS", FieldID::DUCKDB_FIELD_ID);
|
605
|
+
}
|
606
|
+
name_to_type_map.emplace(child_type);
|
607
|
+
}
|
608
|
+
break;
|
609
|
+
default: // LCOV_EXCL_START
|
610
|
+
throw InternalException("Unexpected type in GetChildNameToTypeMap");
|
611
|
+
} // LCOV_EXCL_STOP
|
612
|
+
return name_to_type_map;
|
613
|
+
}
|
614
|
+
|
615
|
+
static void GetChildNamesAndTypes(const LogicalType &type, vector<string> &child_names,
|
616
|
+
vector<LogicalType> &child_types) {
|
617
|
+
switch (type.id()) {
|
618
|
+
case LogicalTypeId::LIST:
|
619
|
+
child_names.emplace_back("element");
|
620
|
+
child_types.emplace_back(ListType::GetChildType(type));
|
621
|
+
break;
|
622
|
+
case LogicalTypeId::MAP:
|
623
|
+
child_names.emplace_back("key");
|
624
|
+
child_names.emplace_back("value");
|
625
|
+
child_types.emplace_back(MapType::KeyType(type));
|
626
|
+
child_types.emplace_back(MapType::ValueType(type));
|
627
|
+
break;
|
628
|
+
case LogicalTypeId::STRUCT:
|
629
|
+
for (auto &child_type : StructType::GetChildTypes(type)) {
|
630
|
+
child_names.emplace_back(child_type.first);
|
631
|
+
child_types.emplace_back(child_type.second);
|
632
|
+
}
|
633
|
+
break;
|
634
|
+
default: // LCOV_EXCL_START
|
635
|
+
throw InternalException("Unexpected type in GetChildNamesAndTypes");
|
636
|
+
} // LCOV_EXCL_STOP
|
637
|
+
}
|
638
|
+
|
639
|
+
static void GenerateFieldIDs(ChildFieldIDs &field_ids, idx_t &field_id, const vector<string> &names,
|
640
|
+
const vector<LogicalType> &sql_types) {
|
641
|
+
D_ASSERT(names.size() == sql_types.size());
|
642
|
+
for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
|
643
|
+
const auto &col_name = names[col_idx];
|
644
|
+
auto inserted = field_ids.ids->insert(make_pair(col_name, FieldID(field_id++)));
|
645
|
+
D_ASSERT(inserted.second);
|
646
|
+
|
647
|
+
const auto &col_type = sql_types[col_idx];
|
648
|
+
if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
|
649
|
+
col_type.id() != LogicalTypeId::STRUCT) {
|
650
|
+
continue;
|
651
|
+
}
|
652
|
+
|
653
|
+
// Cannot use GetChildNameToTypeMap here because we lose order, and we want to generate depth-first
|
654
|
+
vector<string> child_names;
|
655
|
+
vector<LogicalType> child_types;
|
656
|
+
GetChildNamesAndTypes(col_type, child_names, child_types);
|
657
|
+
|
658
|
+
GenerateFieldIDs(inserted.first->second.child_field_ids, field_id, child_names, child_types);
|
659
|
+
}
|
660
|
+
}
|
661
|
+
|
662
|
+
static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
|
663
|
+
unordered_set<uint32_t> &unique_field_ids,
|
664
|
+
const case_insensitive_map_t<LogicalType> &name_to_type_map) {
|
665
|
+
const auto &struct_type = field_ids_value.type();
|
666
|
+
if (struct_type.id() != LogicalTypeId::STRUCT) {
|
667
|
+
throw BinderException(
|
668
|
+
"Expected FIELD_IDS to be a STRUCT, e.g., {col1: 42, col2: {%s: 43, nested_col: 44}, col3: 44}",
|
669
|
+
FieldID::DUCKDB_FIELD_ID);
|
670
|
+
}
|
671
|
+
const auto &struct_children = StructValue::GetChildren(field_ids_value);
|
672
|
+
D_ASSERT(StructType::GetChildTypes(struct_type).size() == struct_children.size());
|
673
|
+
for (idx_t i = 0; i < struct_children.size(); i++) {
|
674
|
+
const auto &col_name = StringUtil::Lower(StructType::GetChildName(struct_type, i));
|
675
|
+
if (col_name == FieldID::DUCKDB_FIELD_ID) {
|
676
|
+
continue;
|
677
|
+
}
|
678
|
+
|
679
|
+
auto it = name_to_type_map.find(col_name);
|
680
|
+
if (it == name_to_type_map.end()) {
|
681
|
+
string names;
|
682
|
+
for (const auto &name : name_to_type_map) {
|
683
|
+
if (!names.empty()) {
|
684
|
+
names += ", ";
|
685
|
+
}
|
686
|
+
names += name.first;
|
687
|
+
}
|
688
|
+
throw BinderException("Column name \"%s\" specified in FIELD_IDS not found. Available column names: [%s]",
|
689
|
+
col_name, names);
|
690
|
+
}
|
691
|
+
D_ASSERT(field_ids.ids->find(col_name) == field_ids.ids->end()); // Caught by STRUCT - deduplicates keys
|
692
|
+
|
693
|
+
const auto &child_value = struct_children[i];
|
694
|
+
const auto &child_type = child_value.type();
|
695
|
+
optional_ptr<const Value> field_id_value;
|
696
|
+
optional_ptr<const Value> child_field_ids_value;
|
697
|
+
|
698
|
+
if (child_type.id() == LogicalTypeId::STRUCT) {
|
699
|
+
const auto &nested_children = StructValue::GetChildren(child_value);
|
700
|
+
D_ASSERT(StructType::GetChildTypes(child_type).size() == nested_children.size());
|
701
|
+
for (idx_t nested_i = 0; nested_i < nested_children.size(); nested_i++) {
|
702
|
+
const auto &field_id_or_nested_col = StructType::GetChildName(child_type, nested_i);
|
703
|
+
if (field_id_or_nested_col == FieldID::DUCKDB_FIELD_ID) {
|
704
|
+
field_id_value = &nested_children[nested_i];
|
705
|
+
} else {
|
706
|
+
child_field_ids_value = &child_value;
|
707
|
+
}
|
708
|
+
}
|
709
|
+
} else {
|
710
|
+
field_id_value = &child_value;
|
711
|
+
}
|
712
|
+
|
713
|
+
FieldID field_id;
|
714
|
+
if (field_id_value) {
|
715
|
+
Value field_id_integer_value = field_id_value->DefaultCastAs(LogicalType::INTEGER);
|
716
|
+
const uint32_t field_id_int = IntegerValue::Get(field_id_integer_value);
|
717
|
+
if (!unique_field_ids.insert(field_id_int).second) {
|
718
|
+
throw BinderException("Duplicate field_id %s found in FIELD_IDS", field_id_integer_value.ToString());
|
719
|
+
}
|
720
|
+
field_id = FieldID(field_id_int);
|
721
|
+
}
|
722
|
+
auto inserted = field_ids.ids->insert(make_pair(col_name, std::move(field_id)));
|
723
|
+
D_ASSERT(inserted.second);
|
724
|
+
|
725
|
+
if (child_field_ids_value) {
|
726
|
+
const auto &col_type = it->second;
|
727
|
+
if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP &&
|
728
|
+
col_type.id() != LogicalTypeId::STRUCT) {
|
729
|
+
throw BinderException("Column \"%s\" with type \"%s\" cannot have a nested FIELD_IDS specification",
|
730
|
+
col_name, LogicalTypeIdToString(col_type.id()));
|
731
|
+
}
|
732
|
+
|
733
|
+
GetFieldIDs(*child_field_ids_value, inserted.first->second.child_field_ids, unique_field_ids,
|
734
|
+
GetChildNameToTypeMap(col_type));
|
735
|
+
}
|
736
|
+
}
|
737
|
+
}
|
738
|
+
|
588
739
|
unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
|
589
740
|
vector<LogicalType> &sql_types) {
|
741
|
+
D_ASSERT(names.size() == sql_types.size());
|
590
742
|
auto bind_data = make_uniq<ParquetWriteBindData>();
|
591
743
|
for (auto &option : info.options) {
|
592
744
|
auto loption = StringUtil::Lower(option.first);
|
@@ -609,7 +761,27 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info
|
|
609
761
|
continue;
|
610
762
|
}
|
611
763
|
}
|
612
|
-
throw
|
764
|
+
throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", loption);
|
765
|
+
} else if (loption == "field_ids") {
|
766
|
+
if (option.second.size() != 1) {
|
767
|
+
throw BinderException("FIELD_IDS requires exactly one argument");
|
768
|
+
}
|
769
|
+
if (option.second[0].type().id() == LogicalTypeId::VARCHAR &&
|
770
|
+
StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") {
|
771
|
+
idx_t field_id = 0;
|
772
|
+
GenerateFieldIDs(bind_data->field_ids, field_id, names, sql_types);
|
773
|
+
} else {
|
774
|
+
unordered_set<uint32_t> unique_field_ids;
|
775
|
+
case_insensitive_map_t<LogicalType> name_to_type_map;
|
776
|
+
for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) {
|
777
|
+
if (names[col_idx] == FieldID::DUCKDB_FIELD_ID) {
|
778
|
+
throw BinderException("Cannot have a column named \"%s\" when writing FIELD_IDS",
|
779
|
+
FieldID::DUCKDB_FIELD_ID);
|
780
|
+
}
|
781
|
+
name_to_type_map.emplace(names[col_idx], sql_types[col_idx]);
|
782
|
+
}
|
783
|
+
GetFieldIDs(option.second[0], bind_data->field_ids, unique_field_ids, name_to_type_map);
|
784
|
+
}
|
613
785
|
} else {
|
614
786
|
throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
|
615
787
|
}
|
@@ -625,8 +797,8 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
|
|
625
797
|
auto &parquet_bind = bind_data.Cast<ParquetWriteBindData>();
|
626
798
|
|
627
799
|
auto &fs = FileSystem::GetFileSystem(context);
|
628
|
-
global_state->writer =
|
629
|
-
|
800
|
+
global_state->writer = make_uniq<ParquetWriter>(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names,
|
801
|
+
parquet_bind.codec, parquet_bind.field_ids.Copy());
|
630
802
|
return std::move(global_state);
|
631
803
|
}
|
632
804
|
|
@@ -637,12 +809,12 @@ void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, Glob
|
|
637
809
|
auto &local_state = lstate.Cast<ParquetWriteLocalState>();
|
638
810
|
|
639
811
|
// append data to the local (buffered) chunk collection
|
640
|
-
local_state.buffer.Append(input);
|
812
|
+
local_state.buffer.Append(local_state.append_state, input);
|
641
813
|
if (local_state.buffer.Count() > bind_data.row_group_size) {
|
642
814
|
// if the chunk collection exceeds a certain size we flush it to the parquet file
|
815
|
+
local_state.append_state.current_chunk_state.handles.clear();
|
643
816
|
global_state.writer->Flush(local_state.buffer);
|
644
|
-
|
645
|
-
local_state.buffer.Reset();
|
817
|
+
local_state.buffer.InitializeAppend(local_state.append_state);
|
646
818
|
}
|
647
819
|
}
|
648
820
|
|
@@ -48,7 +48,7 @@ using duckdb_parquet::format::SchemaElement;
|
|
48
48
|
using duckdb_parquet::format::Statistics;
|
49
49
|
using duckdb_parquet::format::Type;
|
50
50
|
|
51
|
-
static
|
51
|
+
static unique_ptr<duckdb_apache::thrift::protocol::TProtocol>
|
52
52
|
CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, bool prefetch_mode) {
|
53
53
|
auto transport = make_shared<ThriftFileTransport>(allocator, file_handle, prefetch_mode);
|
54
54
|
return make_uniq<duckdb_apache::thrift::protocol::TCompactProtocolT<ThriftFileTransport>>(std::move(transport));
|
@@ -76,7 +76,7 @@ static shared_ptr<ParquetFileMetadataCache> LoadMetadata(Allocator &allocator, F
|
|
76
76
|
}
|
77
77
|
// read four-byte footer length from just before the end magic bytes
|
78
78
|
auto footer_len = *reinterpret_cast<uint32_t *>(buf.ptr);
|
79
|
-
if (footer_len
|
79
|
+
if (footer_len == 0 || file_size < 12 + footer_len) {
|
80
80
|
throw InvalidInputException("Footer length error in file '%s'", file_handle.path);
|
81
81
|
}
|
82
82
|
auto metadata_pos = file_size - (footer_len + 8);
|
@@ -271,7 +271,7 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(idx_t depth, idx_t
|
|
271
271
|
}
|
272
272
|
if (s_ele.__isset.num_children && s_ele.num_children > 0) { // inner node
|
273
273
|
child_list_t<LogicalType> child_types;
|
274
|
-
vector<
|
274
|
+
vector<unique_ptr<ColumnReader>> child_readers;
|
275
275
|
|
276
276
|
idx_t c_idx = 0;
|
277
277
|
while (c_idx < (idx_t)s_ele.num_children) {
|
@@ -287,7 +287,7 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(idx_t depth, idx_t
|
|
287
287
|
c_idx++;
|
288
288
|
}
|
289
289
|
D_ASSERT(!child_types.empty());
|
290
|
-
|
290
|
+
unique_ptr<ColumnReader> result;
|
291
291
|
LogicalType result_type;
|
292
292
|
|
293
293
|
bool is_repeated = repetition_type == FieldRepetitionType::REPEATED;
|
@@ -429,7 +429,7 @@ ParquetOptions::ParquetOptions(ClientContext &context) {
|
|
429
429
|
|
430
430
|
ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, ParquetOptions parquet_options_p)
|
431
431
|
: fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
|
432
|
-
parquet_options(parquet_options_p) {
|
432
|
+
parquet_options(std::move(parquet_options_p)) {
|
433
433
|
file_name = std::move(file_name_p);
|
434
434
|
file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ);
|
435
435
|
if (!file_handle->CanSeek()) {
|
@@ -457,7 +457,7 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
|
|
457
457
|
ParquetReader::ParquetReader(ClientContext &context_p, ParquetOptions parquet_options_p,
|
458
458
|
shared_ptr<ParquetFileMetadataCache> metadata_p)
|
459
459
|
: fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
|
460
|
-
metadata(std::move(metadata_p)), parquet_options(parquet_options_p) {
|
460
|
+
metadata(std::move(metadata_p)), parquet_options(std::move(parquet_options_p)) {
|
461
461
|
InitializeSchema();
|
462
462
|
}
|
463
463
|
|
@@ -1,12 +1,13 @@
|
|
1
1
|
#include "parquet_statistics.hpp"
|
2
|
+
|
3
|
+
#include "duckdb.hpp"
|
2
4
|
#include "parquet_decimal_utils.hpp"
|
3
5
|
#include "parquet_timestamp.hpp"
|
4
6
|
#include "string_column_reader.hpp"
|
5
|
-
#include "duckdb.hpp"
|
6
7
|
#ifndef DUCKDB_AMALGAMATION
|
7
8
|
#include "duckdb/common/types/blob.hpp"
|
8
|
-
#include "duckdb/common/types/value.hpp"
|
9
9
|
#include "duckdb/common/types/time.hpp"
|
10
|
+
#include "duckdb/common/types/value.hpp"
|
10
11
|
#endif
|
11
12
|
|
12
13
|
namespace duckdb {
|
@@ -14,9 +15,9 @@ namespace duckdb {
|
|
14
15
|
using duckdb_parquet::format::ConvertedType;
|
15
16
|
using duckdb_parquet::format::Type;
|
16
17
|
|
17
|
-
static
|
18
|
-
|
19
|
-
|
18
|
+
static unique_ptr<BaseStatistics> CreateNumericStats(const LogicalType &type,
|
19
|
+
const duckdb_parquet::format::SchemaElement &schema_ele,
|
20
|
+
const duckdb_parquet::format::Statistics &parquet_stats) {
|
20
21
|
auto stats = NumericStats::CreateUnknown(type);
|
21
22
|
|
22
23
|
// for reasons unknown to science, Parquet defines *both* `min` and `min_value` as well as `max` and
|
@@ -226,7 +227,7 @@ unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(con
|
|
226
227
|
return nullptr;
|
227
228
|
}
|
228
229
|
auto &parquet_stats = column_chunk.meta_data.statistics;
|
229
|
-
|
230
|
+
unique_ptr<BaseStatistics> row_group_stats;
|
230
231
|
|
231
232
|
switch (type.id()) {
|
232
233
|
case LogicalTypeId::UTINYINT:
|