duckdb 0.8.2-dev161.0 → 0.8.2-dev1764.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +70 -50
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/value.cpp +11 -6
- package/src/duckdb/src/common/types.cpp +9 -656
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/arrow.cpp +19 -0
- package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +23 -8
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
- package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/columns.test.ts +24 -1
- package/test/test_all_types.test.ts +234 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -12,24 +12,23 @@
|
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
using JSONPathType = JSONCommon::JSONPathType;
|
16
|
+
|
17
|
+
static JSONPathType CheckPath(const Value &path_val, string &path, size_t &len) {
|
18
18
|
if (path_val.IsNull()) {
|
19
19
|
throw InvalidInputException("JSON path cannot be NULL");
|
20
20
|
}
|
21
|
-
|
22
|
-
throw InvalidInputException(error);
|
23
|
-
}
|
21
|
+
const auto path_str_val = path_val.DefaultCastAs(LogicalType::VARCHAR);
|
24
22
|
auto path_str = path_str_val.GetValueUnsafe<string_t>();
|
25
23
|
len = path_str.GetSize();
|
26
24
|
auto ptr = path_str.GetData();
|
27
25
|
// Empty strings and invalid $ paths yield an error
|
28
26
|
if (len == 0) {
|
29
|
-
throw
|
27
|
+
throw BinderException("Empty JSON path");
|
30
28
|
}
|
29
|
+
JSONPathType path_type = JSONPathType::REGULAR;
|
31
30
|
if (*ptr == '$') {
|
32
|
-
JSONCommon::
|
31
|
+
path_type = JSONCommon::ValidatePath(ptr, len, true);
|
33
32
|
}
|
34
33
|
// Copy over string to the bind data
|
35
34
|
if (*ptr == '/' || *ptr == '$') {
|
@@ -38,19 +37,20 @@ static void CheckPath(const Value &path_val, string &path, size_t &len) {
|
|
38
37
|
path = "/" + string(ptr, len);
|
39
38
|
len++;
|
40
39
|
}
|
40
|
+
return path_type;
|
41
41
|
}
|
42
42
|
|
43
|
-
JSONReadFunctionData::JSONReadFunctionData(bool constant, string path_p, idx_t len)
|
44
|
-
: constant(constant), path(std::move(path_p)), ptr(path.c_str()), len(len) {
|
43
|
+
JSONReadFunctionData::JSONReadFunctionData(bool constant, string path_p, idx_t len, JSONPathType path_type_p)
|
44
|
+
: constant(constant), path(std::move(path_p)), path_type(path_type_p), ptr(path.c_str()), len(len) {
|
45
45
|
}
|
46
46
|
|
47
47
|
unique_ptr<FunctionData> JSONReadFunctionData::Copy() const {
|
48
|
-
return make_uniq<JSONReadFunctionData>(constant, path, len);
|
48
|
+
return make_uniq<JSONReadFunctionData>(constant, path, len, path_type);
|
49
49
|
}
|
50
50
|
|
51
51
|
bool JSONReadFunctionData::Equals(const FunctionData &other_p) const {
|
52
52
|
auto &other = (const JSONReadFunctionData &)other_p;
|
53
|
-
return constant == other.constant && path == other.path && len == other.len;
|
53
|
+
return constant == other.constant && path == other.path && len == other.len && path_type == other.path_type;
|
54
54
|
}
|
55
55
|
|
56
56
|
unique_ptr<FunctionData> JSONReadFunctionData::Bind(ClientContext &context, ScalarFunction &bound_function,
|
@@ -59,12 +59,16 @@ unique_ptr<FunctionData> JSONReadFunctionData::Bind(ClientContext &context, Scal
|
|
59
59
|
bool constant = false;
|
60
60
|
string path = "";
|
61
61
|
size_t len = 0;
|
62
|
+
JSONPathType path_type = JSONPathType::REGULAR;
|
62
63
|
if (arguments[1]->return_type.id() != LogicalTypeId::SQLNULL && arguments[1]->IsFoldable()) {
|
63
64
|
constant = true;
|
64
65
|
const auto path_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
|
65
|
-
CheckPath(path_val, path, len);
|
66
|
+
path_type = CheckPath(path_val, path, len);
|
67
|
+
}
|
68
|
+
if (path_type == JSONCommon::JSONPathType::WILDCARD) {
|
69
|
+
bound_function.return_type = LogicalType::LIST(bound_function.return_type);
|
66
70
|
}
|
67
|
-
return make_uniq<JSONReadFunctionData>(constant, std::move(path), len);
|
71
|
+
return make_uniq<JSONReadFunctionData>(constant, std::move(path), len, path_type);
|
68
72
|
}
|
69
73
|
|
70
74
|
JSONReadManyFunctionData::JSONReadManyFunctionData(vector<string> paths_p, vector<size_t> lens_p)
|
@@ -90,10 +94,7 @@ unique_ptr<FunctionData> JSONReadManyFunctionData::Bind(ClientContext &context,
|
|
90
94
|
throw ParameterNotResolvedException();
|
91
95
|
}
|
92
96
|
if (!arguments[1]->IsFoldable()) {
|
93
|
-
throw
|
94
|
-
}
|
95
|
-
if (arguments[1]->return_type.id() == LogicalTypeId::SQLNULL) {
|
96
|
-
return make_uniq<JSONReadManyFunctionData>(vector<string>(), vector<size_t>());
|
97
|
+
throw BinderException("List of paths must be constant");
|
97
98
|
}
|
98
99
|
|
99
100
|
vector<string> paths;
|
@@ -102,7 +103,9 @@ unique_ptr<FunctionData> JSONReadManyFunctionData::Bind(ClientContext &context,
|
|
102
103
|
for (auto &path_val : ListValue::GetChildren(paths_val)) {
|
103
104
|
paths.emplace_back("");
|
104
105
|
lens.push_back(0);
|
105
|
-
CheckPath(path_val, paths.back(), lens.back())
|
106
|
+
if (CheckPath(path_val, paths.back(), lens.back()) == JSONPathType::WILDCARD) {
|
107
|
+
throw BinderException("Cannot have wildcards in JSON path when supplying multiple paths");
|
108
|
+
}
|
106
109
|
}
|
107
110
|
|
108
111
|
return make_uniq<JSONReadManyFunctionData>(std::move(paths), std::move(lens));
|
@@ -120,11 +123,8 @@ unique_ptr<FunctionLocalState> JSONFunctionLocalState::Init(ExpressionState &sta
|
|
120
123
|
}
|
121
124
|
|
122
125
|
unique_ptr<FunctionLocalState> JSONFunctionLocalState::InitCastLocalState(CastLocalStateParameters ¶meters) {
|
123
|
-
|
124
|
-
|
125
|
-
} else {
|
126
|
-
return make_uniq<JSONFunctionLocalState>(Allocator::DefaultAllocator());
|
127
|
-
}
|
126
|
+
return parameters.context ? make_uniq<JSONFunctionLocalState>(*parameters.context)
|
127
|
+
: make_uniq<JSONFunctionLocalState>(Allocator::DefaultAllocator());
|
128
128
|
}
|
129
129
|
|
130
130
|
JSONFunctionLocalState &JSONFunctionLocalState::ResetAndGet(ExpressionState &state) {
|
@@ -19,7 +19,7 @@ void JSONScanData::Bind(ClientContext &context, TableFunctionBindInput &input) {
|
|
19
19
|
auto_detect = info.auto_detect;
|
20
20
|
|
21
21
|
for (auto &kv : input.named_parameters) {
|
22
|
-
if (MultiFileReader::ParseOption(kv.first, kv.second, options.file_options)) {
|
22
|
+
if (MultiFileReader::ParseOption(kv.first, kv.second, options.file_options, context)) {
|
23
23
|
continue;
|
24
24
|
}
|
25
25
|
auto loption = StringUtil::Lower(kv.first);
|
@@ -51,10 +51,7 @@ void JSONScanData::Bind(ClientContext &context, TableFunctionBindInput &input) {
|
|
51
51
|
}
|
52
52
|
|
53
53
|
files = MultiFileReader::GetFileList(context, input.inputs[0], "JSON");
|
54
|
-
|
55
|
-
if (options.file_options.auto_detect_hive_partitioning) {
|
56
|
-
options.file_options.hive_partitioning = MultiFileReaderOptions::AutoDetectHivePartitioning(files);
|
57
|
-
}
|
54
|
+
options.file_options.AutoDetectHivePartitioning(files, context);
|
58
55
|
|
59
56
|
InitializeReaders(context);
|
60
57
|
}
|
@@ -236,7 +233,7 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
236
233
|
for (auto &reader : gstate.json_readers) {
|
237
234
|
MultiFileReader::FinalizeBind(reader->GetOptions().file_options, gstate.bind_data.reader_bind,
|
238
235
|
reader->GetFileName(), gstate.names, dummy_types, bind_data.names,
|
239
|
-
input.column_ids, reader->reader_data);
|
236
|
+
input.column_ids, reader->reader_data, context);
|
240
237
|
}
|
241
238
|
|
242
239
|
return std::move(result);
|
@@ -1,27 +1,25 @@
|
|
1
1
|
#include "column_reader.hpp"
|
2
|
-
#include "parquet_timestamp.hpp"
|
3
|
-
#include "utf8proc_wrapper.hpp"
|
4
|
-
#include "parquet_reader.hpp"
|
5
2
|
|
6
3
|
#include "boolean_column_reader.hpp"
|
7
|
-
#include "cast_column_reader.hpp"
|
8
|
-
#include "row_number_column_reader.hpp"
|
9
4
|
#include "callback_column_reader.hpp"
|
10
|
-
#include "
|
5
|
+
#include "cast_column_reader.hpp"
|
6
|
+
#include "duckdb.hpp"
|
11
7
|
#include "list_column_reader.hpp"
|
8
|
+
#include "miniz_wrapper.hpp"
|
9
|
+
#include "parquet_decimal_utils.hpp"
|
10
|
+
#include "parquet_reader.hpp"
|
11
|
+
#include "parquet_timestamp.hpp"
|
12
|
+
#include "row_number_column_reader.hpp"
|
13
|
+
#include "snappy.h"
|
12
14
|
#include "string_column_reader.hpp"
|
13
15
|
#include "struct_column_reader.hpp"
|
14
16
|
#include "templated_column_reader.hpp"
|
15
|
-
|
16
|
-
#include "snappy.h"
|
17
|
-
#include "miniz_wrapper.hpp"
|
17
|
+
#include "utf8proc_wrapper.hpp"
|
18
18
|
#include "zstd.h"
|
19
|
-
#include <iostream>
|
20
19
|
|
21
|
-
#include "duckdb.hpp"
|
22
20
|
#ifndef DUCKDB_AMALGAMATION
|
23
|
-
#include "duckdb/common/types/blob.hpp"
|
24
21
|
#include "duckdb/common/types/bit.hpp"
|
22
|
+
#include "duckdb/common/types/blob.hpp"
|
25
23
|
#include "duckdb/common/types/chunk_collection.hpp"
|
26
24
|
#endif
|
27
25
|
|
@@ -614,7 +612,7 @@ uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len
|
|
614
612
|
|
615
613
|
void StringColumnReader::Dictionary(shared_ptr<ResizeableBuffer> data, idx_t num_entries) {
|
616
614
|
dict = std::move(data);
|
617
|
-
dict_strings =
|
615
|
+
dict_strings = unique_ptr<string_t[]>(new string_t[num_entries]);
|
618
616
|
for (idx_t dict_idx = 0; dict_idx < num_entries; dict_idx++) {
|
619
617
|
uint32_t str_len;
|
620
618
|
if (fixed_width_string_length == 0) {
|
@@ -873,7 +871,7 @@ idx_t ListColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data
|
|
873
871
|
|
874
872
|
ListColumnReader::ListColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p,
|
875
873
|
idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p,
|
876
|
-
|
874
|
+
unique_ptr<ColumnReader> child_column_reader_p)
|
877
875
|
: ColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p),
|
878
876
|
child_column_reader(std::move(child_column_reader_p)),
|
879
877
|
read_cache(reader.allocator, ListType::GetChildType(Type())), read_vector(read_cache), overflow_child_count(0) {
|
@@ -889,8 +887,8 @@ ListColumnReader::ListColumnReader(ParquetReader &reader, LogicalType type_p, co
|
|
889
887
|
void ListColumnReader::ApplyPendingSkips(idx_t num_values) {
|
890
888
|
pending_skips -= num_values;
|
891
889
|
|
892
|
-
auto define_out =
|
893
|
-
auto repeat_out =
|
890
|
+
auto define_out = unique_ptr<uint8_t[]>(new uint8_t[num_values]);
|
891
|
+
auto repeat_out = unique_ptr<uint8_t[]>(new uint8_t[num_values]);
|
894
892
|
|
895
893
|
idx_t remaining = num_values;
|
896
894
|
idx_t read = 0;
|
@@ -953,7 +951,7 @@ idx_t RowNumberColumnReader::Read(uint64_t num_values, parquet_filter_t &filter,
|
|
953
951
|
//===--------------------------------------------------------------------===//
|
954
952
|
// Cast Column Reader
|
955
953
|
//===--------------------------------------------------------------------===//
|
956
|
-
CastColumnReader::CastColumnReader(
|
954
|
+
CastColumnReader::CastColumnReader(unique_ptr<ColumnReader> child_reader_p, LogicalType target_type_p)
|
957
955
|
: ColumnReader(child_reader_p->Reader(), std::move(target_type_p), child_reader_p->Schema(),
|
958
956
|
child_reader_p->FileIdx(), child_reader_p->MaxDefine(), child_reader_p->MaxRepeat()),
|
959
957
|
child_reader(std::move(child_reader_p)) {
|
@@ -1005,7 +1003,7 @@ idx_t CastColumnReader::GroupRowsAvailable() {
|
|
1005
1003
|
//===--------------------------------------------------------------------===//
|
1006
1004
|
StructColumnReader::StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p,
|
1007
1005
|
idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p,
|
1008
|
-
vector<
|
1006
|
+
vector<unique_ptr<ColumnReader>> child_readers_p)
|
1009
1007
|
: ColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p),
|
1010
1008
|
child_readers(std::move(child_readers_p)) {
|
1011
1009
|
D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
|
@@ -1155,9 +1153,9 @@ protected:
|
|
1155
1153
|
};
|
1156
1154
|
|
1157
1155
|
template <bool FIXED_LENGTH>
|
1158
|
-
static
|
1159
|
-
|
1160
|
-
|
1156
|
+
static unique_ptr<ColumnReader> CreateDecimalReaderInternal(ParquetReader &reader, const LogicalType &type_p,
|
1157
|
+
const SchemaElement &schema_p, idx_t file_idx_p,
|
1158
|
+
idx_t max_define, idx_t max_repeat) {
|
1161
1159
|
switch (type_p.InternalType()) {
|
1162
1160
|
case PhysicalType::INT16:
|
1163
1161
|
return make_uniq<DecimalColumnReader<int16_t, FIXED_LENGTH>>(reader, type_p, schema_p, file_idx_p, max_define,
|
@@ -184,7 +184,7 @@ ColumnWriterState::~ColumnWriterState() {
|
|
184
184
|
}
|
185
185
|
|
186
186
|
void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
|
187
|
-
|
187
|
+
unique_ptr<data_t[]> &compressed_buf) {
|
188
188
|
switch (writer.GetCodec()) {
|
189
189
|
case CompressionCodec::UNCOMPRESSED:
|
190
190
|
compressed_size = temp_writer.blob.size;
|
@@ -192,7 +192,7 @@ void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compres
|
|
192
192
|
break;
|
193
193
|
case CompressionCodec::SNAPPY: {
|
194
194
|
compressed_size = duckdb_snappy::MaxCompressedLength(temp_writer.blob.size);
|
195
|
-
compressed_buf =
|
195
|
+
compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
|
196
196
|
duckdb_snappy::RawCompress(const_char_ptr_cast(temp_writer.blob.data.get()), temp_writer.blob.size,
|
197
197
|
char_ptr_cast(compressed_buf.get()), &compressed_size);
|
198
198
|
compressed_data = compressed_buf.get();
|
@@ -202,7 +202,7 @@ void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compres
|
|
202
202
|
case CompressionCodec::GZIP: {
|
203
203
|
MiniZStream s;
|
204
204
|
compressed_size = s.MaxCompressedLength(temp_writer.blob.size);
|
205
|
-
compressed_buf =
|
205
|
+
compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
|
206
206
|
s.Compress(const_char_ptr_cast(temp_writer.blob.data.get()), temp_writer.blob.size,
|
207
207
|
char_ptr_cast(compressed_buf.get()), &compressed_size);
|
208
208
|
compressed_data = compressed_buf.get();
|
@@ -210,7 +210,7 @@ void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compres
|
|
210
210
|
}
|
211
211
|
case CompressionCodec::ZSTD: {
|
212
212
|
compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.blob.size);
|
213
|
-
compressed_buf =
|
213
|
+
compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
|
214
214
|
compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size,
|
215
215
|
(const void *)temp_writer.blob.data.get(), temp_writer.blob.size,
|
216
216
|
ZSTD_CLEVEL_DEFAULT);
|
@@ -303,14 +303,14 @@ struct PageInformation {
|
|
303
303
|
|
304
304
|
struct PageWriteInformation {
|
305
305
|
PageHeader page_header;
|
306
|
-
|
307
|
-
|
306
|
+
unique_ptr<BufferedSerializer> temp_writer;
|
307
|
+
unique_ptr<ColumnWriterPageState> page_state;
|
308
308
|
idx_t write_page_idx = 0;
|
309
309
|
idx_t write_count = 0;
|
310
310
|
idx_t max_write_count = 0;
|
311
311
|
size_t compressed_size;
|
312
312
|
data_ptr_t compressed_data;
|
313
|
-
|
313
|
+
unique_ptr<data_t[]> compressed_buf;
|
314
314
|
};
|
315
315
|
|
316
316
|
class BasicColumnWriterState : public ColumnWriterState {
|
@@ -325,7 +325,7 @@ public:
|
|
325
325
|
idx_t col_idx;
|
326
326
|
vector<PageInformation> page_info;
|
327
327
|
vector<PageWriteInformation> write_info;
|
328
|
-
|
328
|
+
unique_ptr<ColumnWriterStatistics> stats_state;
|
329
329
|
idx_t current_page = 0;
|
330
330
|
};
|
331
331
|
|
@@ -355,8 +355,7 @@ public:
|
|
355
355
|
static constexpr const idx_t STRING_LENGTH_SIZE = sizeof(uint32_t);
|
356
356
|
|
357
357
|
public:
|
358
|
-
|
359
|
-
Allocator &allocator) override;
|
358
|
+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override;
|
360
359
|
void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
|
361
360
|
void BeginWrite(ColumnWriterState &state) override;
|
362
361
|
void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
|
@@ -372,10 +371,10 @@ protected:
|
|
372
371
|
void FlushPage(BasicColumnWriterState &state);
|
373
372
|
|
374
373
|
//! Initializes the state used to track statistics during writing. Only used for scalar types.
|
375
|
-
virtual
|
374
|
+
virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
|
376
375
|
|
377
376
|
//! Initialize the writer for a specific page. Only used for scalar types.
|
378
|
-
virtual
|
377
|
+
virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state);
|
379
378
|
|
380
379
|
//! Flushes the writer for a specific page. Only used for scalar types.
|
381
380
|
virtual void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state);
|
@@ -391,16 +390,14 @@ protected:
|
|
391
390
|
}
|
392
391
|
//! The number of elements in the dictionary
|
393
392
|
virtual idx_t DictionarySize(BasicColumnWriterState &state_p);
|
394
|
-
void WriteDictionary(BasicColumnWriterState &state,
|
395
|
-
idx_t row_count);
|
393
|
+
void WriteDictionary(BasicColumnWriterState &state, unique_ptr<BufferedSerializer> temp_writer, idx_t row_count);
|
396
394
|
virtual void FlushDictionary(BasicColumnWriterState &state, ColumnWriterStatistics *stats);
|
397
395
|
|
398
396
|
void SetParquetStatistics(BasicColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column);
|
399
397
|
void RegisterToRowGroup(duckdb_parquet::format::RowGroup &row_group);
|
400
398
|
};
|
401
399
|
|
402
|
-
unique_ptr<ColumnWriterState> BasicColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group
|
403
|
-
Allocator &allocator) {
|
400
|
+
unique_ptr<ColumnWriterState> BasicColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) {
|
404
401
|
auto result = make_uniq<BasicColumnWriterState>(row_group, row_group.columns.size());
|
405
402
|
RegisterToRowGroup(row_group);
|
406
403
|
return std::move(result);
|
@@ -698,8 +695,8 @@ idx_t BasicColumnWriter::DictionarySize(BasicColumnWriterState &state) {
|
|
698
695
|
throw InternalException("This page does not have a dictionary");
|
699
696
|
}
|
700
697
|
|
701
|
-
void BasicColumnWriter::WriteDictionary(BasicColumnWriterState &state,
|
702
|
-
|
698
|
+
void BasicColumnWriter::WriteDictionary(BasicColumnWriterState &state, unique_ptr<BufferedSerializer> temp_writer,
|
699
|
+
idx_t row_count) {
|
703
700
|
D_ASSERT(temp_writer);
|
704
701
|
D_ASSERT(temp_writer->blob.size > 0);
|
705
702
|
|
@@ -761,7 +758,7 @@ public:
|
|
761
758
|
|
762
759
|
struct BaseParquetOperator {
|
763
760
|
template <class SRC, class TGT>
|
764
|
-
static
|
761
|
+
static unique_ptr<ColumnWriterStatistics> InitializeStats() {
|
765
762
|
return make_uniq<NumericStatisticsState<SRC, TGT, BaseParquetOperator>>();
|
766
763
|
}
|
767
764
|
|
@@ -805,7 +802,7 @@ struct ParquetHugeintOperator {
|
|
805
802
|
}
|
806
803
|
|
807
804
|
template <class SRC, class TGT>
|
808
|
-
static
|
805
|
+
static unique_ptr<ColumnWriterStatistics> InitializeStats() {
|
809
806
|
return make_uniq<ColumnWriterStatistics>();
|
810
807
|
}
|
811
808
|
|
@@ -837,7 +834,7 @@ public:
|
|
837
834
|
~StandardColumnWriter() override = default;
|
838
835
|
|
839
836
|
public:
|
840
|
-
|
837
|
+
unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
|
841
838
|
return OP::template InitializeStats<SRC, TGT>();
|
842
839
|
}
|
843
840
|
|
@@ -897,7 +894,7 @@ public:
|
|
897
894
|
~BooleanColumnWriter() override = default;
|
898
895
|
|
899
896
|
public:
|
900
|
-
|
897
|
+
unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
|
901
898
|
return make_uniq<BooleanStatisticsState>();
|
902
899
|
}
|
903
900
|
|
@@ -928,7 +925,7 @@ public:
|
|
928
925
|
}
|
929
926
|
}
|
930
927
|
|
931
|
-
|
928
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
|
932
929
|
return make_uniq<BooleanWriterPageState>();
|
933
930
|
}
|
934
931
|
|
@@ -1022,7 +1019,7 @@ public:
|
|
1022
1019
|
~FixedDecimalColumnWriter() override = default;
|
1023
1020
|
|
1024
1021
|
public:
|
1025
|
-
|
1022
|
+
unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
|
1026
1023
|
return make_uniq<FixedDecimalStatistics>();
|
1027
1024
|
}
|
1028
1025
|
|
@@ -1196,8 +1193,8 @@ public:
|
|
1196
1193
|
|
1197
1194
|
class StringColumnWriterState : public BasicColumnWriterState {
|
1198
1195
|
public:
|
1199
|
-
StringColumnWriterState(duckdb_parquet::format::RowGroup &row_group,
|
1200
|
-
: BasicColumnWriterState(row_group, col_idx)
|
1196
|
+
StringColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx)
|
1197
|
+
: BasicColumnWriterState(row_group, col_idx) {
|
1201
1198
|
}
|
1202
1199
|
~StringColumnWriterState() override = default;
|
1203
1200
|
|
@@ -1208,7 +1205,6 @@ public:
|
|
1208
1205
|
|
1209
1206
|
// Dictionary and accompanying string heap
|
1210
1207
|
string_map_t<uint32_t> dictionary;
|
1211
|
-
StringHeap dictionary_heap;
|
1212
1208
|
// key_bit_width== 0 signifies the chunk is written in plain encoding
|
1213
1209
|
uint32_t key_bit_width;
|
1214
1210
|
|
@@ -1243,13 +1239,12 @@ public:
|
|
1243
1239
|
~StringColumnWriter() override = default;
|
1244
1240
|
|
1245
1241
|
public:
|
1246
|
-
|
1242
|
+
unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
|
1247
1243
|
return make_uniq<StringStatisticsState>();
|
1248
1244
|
}
|
1249
1245
|
|
1250
|
-
|
1251
|
-
|
1252
|
-
auto result = make_uniq<StringColumnWriterState>(row_group, allocator, row_group.columns.size());
|
1246
|
+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override {
|
1247
|
+
auto result = make_uniq<StringColumnWriterState>(row_group, row_group.columns.size());
|
1253
1248
|
RegisterToRowGroup(row_group);
|
1254
1249
|
return std::move(result);
|
1255
1250
|
}
|
@@ -1279,11 +1274,8 @@ public:
|
|
1279
1274
|
if (validity.RowIsValid(vector_index)) {
|
1280
1275
|
run_length++;
|
1281
1276
|
const auto &value = strings[vector_index];
|
1282
|
-
//
|
1283
|
-
auto found =
|
1284
|
-
? state.dictionary.insert(string_map_t<uint32_t>::value_type(
|
1285
|
-
state.dictionary_heap.AddBlob(value), new_value_index))
|
1286
|
-
: state.dictionary.insert(string_map_t<uint32_t>::value_type(value, new_value_index));
|
1277
|
+
// Try to insert into the dictionary. If it's already there, we get back the value index
|
1278
|
+
auto found = state.dictionary.insert(string_map_t<uint32_t>::value_type(value, new_value_index));
|
1287
1279
|
state.estimated_plain_size += value.GetSize() + STRING_LENGTH_SIZE;
|
1288
1280
|
if (found.second) {
|
1289
1281
|
// string didn't exist yet in the dictionary
|
@@ -1359,7 +1351,7 @@ public:
|
|
1359
1351
|
}
|
1360
1352
|
}
|
1361
1353
|
|
1362
|
-
|
1354
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
|
1363
1355
|
auto &state = state_p.Cast<StringColumnWriterState>();
|
1364
1356
|
return make_uniq<StringWriterPageState>(state.key_bit_width, state.dictionary);
|
1365
1357
|
}
|
@@ -1456,7 +1448,7 @@ public:
|
|
1456
1448
|
uint32_t bit_width;
|
1457
1449
|
|
1458
1450
|
public:
|
1459
|
-
|
1451
|
+
unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
|
1460
1452
|
return make_uniq<StringStatisticsState>();
|
1461
1453
|
}
|
1462
1454
|
|
@@ -1499,7 +1491,7 @@ public:
|
|
1499
1491
|
}
|
1500
1492
|
}
|
1501
1493
|
|
1502
|
-
|
1494
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
|
1503
1495
|
return make_uniq<EnumWriterPageState>(bit_width);
|
1504
1496
|
}
|
1505
1497
|
|
@@ -1557,17 +1549,16 @@ public:
|
|
1557
1549
|
class StructColumnWriter : public ColumnWriter {
|
1558
1550
|
public:
|
1559
1551
|
StructColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
|
1560
|
-
idx_t max_define, vector<
|
1552
|
+
idx_t max_define, vector<unique_ptr<ColumnWriter>> child_writers_p, bool can_have_nulls)
|
1561
1553
|
: ColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
|
1562
1554
|
child_writers(std::move(child_writers_p)) {
|
1563
1555
|
}
|
1564
1556
|
~StructColumnWriter() override = default;
|
1565
1557
|
|
1566
|
-
vector<
|
1558
|
+
vector<unique_ptr<ColumnWriter>> child_writers;
|
1567
1559
|
|
1568
1560
|
public:
|
1569
|
-
|
1570
|
-
Allocator &allocator) override;
|
1561
|
+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override;
|
1571
1562
|
bool HasAnalyze() override;
|
1572
1563
|
void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
|
1573
1564
|
void FinalizeAnalyze(ColumnWriterState &state) override;
|
@@ -1587,16 +1578,15 @@ public:
|
|
1587
1578
|
|
1588
1579
|
duckdb_parquet::format::RowGroup &row_group;
|
1589
1580
|
idx_t col_idx;
|
1590
|
-
vector<
|
1581
|
+
vector<unique_ptr<ColumnWriterState>> child_states;
|
1591
1582
|
};
|
1592
1583
|
|
1593
|
-
unique_ptr<ColumnWriterState> StructColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group
|
1594
|
-
Allocator &allocator) {
|
1584
|
+
unique_ptr<ColumnWriterState> StructColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) {
|
1595
1585
|
auto result = make_uniq<StructColumnWriterState>(row_group, row_group.columns.size());
|
1596
1586
|
|
1597
1587
|
result->child_states.reserve(child_writers.size());
|
1598
1588
|
for (auto &child_writer : child_writers) {
|
1599
|
-
result->child_states.push_back(child_writer->InitializeWriteState(row_group
|
1589
|
+
result->child_states.push_back(child_writer->InitializeWriteState(row_group));
|
1600
1590
|
}
|
1601
1591
|
return std::move(result);
|
1602
1592
|
}
|
@@ -1680,17 +1670,16 @@ void StructColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
|
|
1680
1670
|
class ListColumnWriter : public ColumnWriter {
|
1681
1671
|
public:
|
1682
1672
|
ListColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
|
1683
|
-
idx_t max_define,
|
1673
|
+
idx_t max_define, unique_ptr<ColumnWriter> child_writer_p, bool can_have_nulls)
|
1684
1674
|
: ColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
|
1685
1675
|
child_writer(std::move(child_writer_p)) {
|
1686
1676
|
}
|
1687
1677
|
~ListColumnWriter() override = default;
|
1688
1678
|
|
1689
|
-
|
1679
|
+
unique_ptr<ColumnWriter> child_writer;
|
1690
1680
|
|
1691
1681
|
public:
|
1692
|
-
|
1693
|
-
Allocator &allocator) override;
|
1682
|
+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override;
|
1694
1683
|
bool HasAnalyze() override;
|
1695
1684
|
void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
|
1696
1685
|
void FinalizeAnalyze(ColumnWriterState &state) override;
|
@@ -1710,14 +1699,13 @@ public:
|
|
1710
1699
|
|
1711
1700
|
duckdb_parquet::format::RowGroup &row_group;
|
1712
1701
|
idx_t col_idx;
|
1713
|
-
|
1702
|
+
unique_ptr<ColumnWriterState> child_state;
|
1714
1703
|
idx_t parent_index = 0;
|
1715
1704
|
};
|
1716
1705
|
|
1717
|
-
unique_ptr<ColumnWriterState> ListColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group
|
1718
|
-
Allocator &allocator) {
|
1706
|
+
unique_ptr<ColumnWriterState> ListColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) {
|
1719
1707
|
auto result = make_uniq<ListColumnWriterState>(row_group, row_group.columns.size());
|
1720
|
-
result->child_state = child_writer->InitializeWriteState(row_group
|
1708
|
+
result->child_state = child_writer->InitializeWriteState(row_group);
|
1721
1709
|
return std::move(result);
|
1722
1710
|
}
|
1723
1711
|
|
@@ -1818,12 +1806,24 @@ void ListColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
|
|
1818
1806
|
unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parquet::format::SchemaElement> &schemas,
|
1819
1807
|
ParquetWriter &writer, const LogicalType &type,
|
1820
1808
|
const string &name, vector<string> schema_path,
|
1809
|
+
optional_ptr<const ChildFieldIDs> field_ids,
|
1821
1810
|
idx_t max_repeat, idx_t max_define, bool can_have_nulls) {
|
1822
1811
|
auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
|
1823
1812
|
if (!can_have_nulls) {
|
1824
1813
|
max_define--;
|
1825
1814
|
}
|
1826
1815
|
idx_t schema_idx = schemas.size();
|
1816
|
+
|
1817
|
+
optional_ptr<const FieldID> field_id;
|
1818
|
+
optional_ptr<const ChildFieldIDs> child_field_ids;
|
1819
|
+
if (field_ids) {
|
1820
|
+
auto field_id_it = field_ids->ids->find(name);
|
1821
|
+
if (field_id_it != field_ids->ids->end()) {
|
1822
|
+
field_id = &field_id_it->second;
|
1823
|
+
child_field_ids = &field_id->child_field_ids;
|
1824
|
+
}
|
1825
|
+
}
|
1826
|
+
|
1827
1827
|
if (type.id() == LogicalTypeId::STRUCT) {
|
1828
1828
|
auto &child_types = StructType::GetChildTypes(type);
|
1829
1829
|
// set up the schema element for this struct
|
@@ -1834,15 +1834,19 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
|
|
1834
1834
|
schema_element.__isset.type = false;
|
1835
1835
|
schema_element.__isset.repetition_type = true;
|
1836
1836
|
schema_element.name = name;
|
1837
|
+
if (field_id && field_id->set) {
|
1838
|
+
schema_element.__isset.field_id = true;
|
1839
|
+
schema_element.field_id = field_id->field_id;
|
1840
|
+
}
|
1837
1841
|
schemas.push_back(std::move(schema_element));
|
1838
1842
|
schema_path.push_back(name);
|
1839
1843
|
|
1840
1844
|
// construct the child types recursively
|
1841
|
-
vector<
|
1845
|
+
vector<unique_ptr<ColumnWriter>> child_writers;
|
1842
1846
|
child_writers.reserve(child_types.size());
|
1843
1847
|
for (auto &child_type : child_types) {
|
1844
1848
|
child_writers.push_back(CreateWriterRecursive(schemas, writer, child_type.second, child_type.first,
|
1845
|
-
schema_path, max_repeat, max_define + 1));
|
1849
|
+
schema_path, child_field_ids, max_repeat, max_define + 1));
|
1846
1850
|
}
|
1847
1851
|
return make_uniq<StructColumnWriter>(writer, schema_idx, std::move(schema_path), max_repeat, max_define,
|
1848
1852
|
std::move(child_writers), can_have_nulls);
|
@@ -1861,6 +1865,10 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
|
|
1861
1865
|
optional_element.__isset.repetition_type = true;
|
1862
1866
|
optional_element.__isset.converted_type = true;
|
1863
1867
|
optional_element.name = name;
|
1868
|
+
if (field_id && field_id->set) {
|
1869
|
+
optional_element.__isset.field_id = true;
|
1870
|
+
optional_element.field_id = field_id->field_id;
|
1871
|
+
}
|
1864
1872
|
schemas.push_back(std::move(optional_element));
|
1865
1873
|
schema_path.push_back(name);
|
1866
1874
|
|
@@ -1875,8 +1883,8 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
|
|
1875
1883
|
schemas.push_back(std::move(repeated_element));
|
1876
1884
|
schema_path.emplace_back("list");
|
1877
1885
|
|
1878
|
-
auto child_writer =
|
1879
|
-
|
1886
|
+
auto child_writer = CreateWriterRecursive(schemas, writer, child_type, "element", schema_path, child_field_ids,
|
1887
|
+
max_repeat + 1, max_define + 2);
|
1880
1888
|
return make_uniq<ListColumnWriter>(writer, schema_idx, std::move(schema_path), max_repeat, max_define,
|
1881
1889
|
std::move(child_writer), can_have_nulls);
|
1882
1890
|
}
|
@@ -1899,6 +1907,10 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
|
|
1899
1907
|
top_element.__isset.converted_type = true;
|
1900
1908
|
top_element.__isset.type = false;
|
1901
1909
|
top_element.name = name;
|
1910
|
+
if (field_id && field_id->set) {
|
1911
|
+
top_element.__isset.field_id = true;
|
1912
|
+
top_element.field_id = field_id->field_id;
|
1913
|
+
}
|
1902
1914
|
schemas.push_back(std::move(top_element));
|
1903
1915
|
schema_path.push_back(name);
|
1904
1916
|
|
@@ -1916,13 +1928,13 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
|
|
1916
1928
|
// construct the child types recursively
|
1917
1929
|
vector<LogicalType> kv_types {MapType::KeyType(type), MapType::ValueType(type)};
|
1918
1930
|
vector<string> kv_names {"key", "value"};
|
1919
|
-
vector<
|
1931
|
+
vector<unique_ptr<ColumnWriter>> child_writers;
|
1920
1932
|
child_writers.reserve(2);
|
1921
1933
|
for (idx_t i = 0; i < 2; i++) {
|
1922
1934
|
// key needs to be marked as REQUIRED
|
1923
1935
|
bool is_key = i == 0;
|
1924
1936
|
auto child_writer = CreateWriterRecursive(schemas, writer, kv_types[i], kv_names[i], schema_path,
|
1925
|
-
max_repeat + 1, max_define + 2, !is_key);
|
1937
|
+
child_field_ids, max_repeat + 1, max_define + 2, !is_key);
|
1926
1938
|
|
1927
1939
|
child_writers.push_back(std::move(child_writer));
|
1928
1940
|
}
|
@@ -1938,6 +1950,10 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
|
|
1938
1950
|
schema_element.__isset.type = true;
|
1939
1951
|
schema_element.__isset.repetition_type = true;
|
1940
1952
|
schema_element.name = name;
|
1953
|
+
if (field_id && field_id->set) {
|
1954
|
+
schema_element.__isset.field_id = true;
|
1955
|
+
schema_element.field_id = field_id->field_id;
|
1956
|
+
}
|
1941
1957
|
ParquetWriter::SetSchemaProperties(type, schema_element);
|
1942
1958
|
schemas.push_back(std::move(schema_element));
|
1943
1959
|
schema_path.push_back(name);
|