duckdb 0.8.2-dev11.0 → 0.8.2-dev1182.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +14 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/lib/duckdb.d.ts +59 -0
- package/lib/duckdb.js +21 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -7
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +191 -19
- package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -5
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -10
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +0 -12
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +42 -5
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/file_system.cpp +15 -0
- package/src/duckdb/src/common/local_file_system.cpp +1 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +181 -18
- package/src/duckdb/src/common/radix_partitioning.cpp +27 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/vector.cpp +15 -14
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +6 -4
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +0 -17
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +413 -282
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_operator.cpp +17 -14
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/function.cpp +2 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system/test_all_types.cpp +38 -18
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +24 -6
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +21 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/config.cpp +2 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
- package/src/duckdb/src/main/settings/settings.cpp +40 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/pipeline_executor.cpp +7 -6
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/query_node/cte_node.cpp +75 -0
- package/src/duckdb/src/parser/query_node.cpp +18 -1
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -0
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +5 -0
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +7 -2
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb_node.hpp +1 -0
- package/src/statement.cpp +103 -4
- package/test/columns.test.ts +243 -0
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
namespace duckdb {
|
4
4
|
|
5
|
+
using JSONPathType = JSONCommon::JSONPathType;
|
6
|
+
|
5
7
|
string JSONCommon::ValToString(yyjson_val *val, idx_t max_len) {
|
6
8
|
JSONAllocator json_allocator(Allocator::DefaultAllocator());
|
7
9
|
idx_t len;
|
@@ -18,61 +20,291 @@ void JSONCommon::ThrowValFormatError(string error_string, yyjson_val *val) {
|
|
18
20
|
throw InvalidInputException(error_string);
|
19
21
|
}
|
20
22
|
|
21
|
-
string ThrowPathError(const char *ptr, const char *end) {
|
23
|
+
string ThrowPathError(const char *ptr, const char *end, const bool binder) {
|
22
24
|
ptr--;
|
23
|
-
|
25
|
+
auto msg = StringUtil::Format("JSON path error near '%s'", string(ptr, end - ptr));
|
26
|
+
if (binder) {
|
27
|
+
throw BinderException(msg);
|
28
|
+
} else {
|
29
|
+
throw InvalidInputException(msg);
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
static inline idx_t ReadString(const char *ptr, const char *const end, const bool escaped) {
|
34
|
+
const char *const before = ptr;
|
35
|
+
if (escaped) {
|
36
|
+
while (ptr != end) {
|
37
|
+
if (*ptr == '"') {
|
38
|
+
break;
|
39
|
+
}
|
40
|
+
ptr++;
|
41
|
+
}
|
42
|
+
return ptr == end ? 0 : ptr - before;
|
43
|
+
} else {
|
44
|
+
while (ptr != end) {
|
45
|
+
if (*ptr == '.' || *ptr == '[') {
|
46
|
+
break;
|
47
|
+
}
|
48
|
+
ptr++;
|
49
|
+
}
|
50
|
+
return ptr - before;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
static inline idx_t ReadInteger(const char *ptr, const char *const end, idx_t &idx) {
|
55
|
+
static constexpr auto IDX_T_SAFE_DIG = 19;
|
56
|
+
static constexpr auto IDX_T_MAX = ((idx_t)(~(idx_t)0));
|
57
|
+
|
58
|
+
const char *const before = ptr;
|
59
|
+
idx = 0;
|
60
|
+
for (idx_t i = 0; i < IDX_T_SAFE_DIG; i++) {
|
61
|
+
if (ptr == end) {
|
62
|
+
// No closing ']'
|
63
|
+
return 0;
|
64
|
+
}
|
65
|
+
if (*ptr == ']') {
|
66
|
+
break;
|
67
|
+
}
|
68
|
+
uint8_t add = (uint8_t)(*ptr - '0');
|
69
|
+
if (add <= 9) {
|
70
|
+
idx = add + idx * 10;
|
71
|
+
} else {
|
72
|
+
// Not a digit
|
73
|
+
return 0;
|
74
|
+
}
|
75
|
+
ptr++;
|
76
|
+
}
|
77
|
+
// Invalid if overflow
|
78
|
+
return idx >= (idx_t)IDX_T_MAX ? 0 : ptr - before;
|
79
|
+
}
|
80
|
+
|
81
|
+
static inline bool ReadKey(const char *&ptr, const char *const end, const char *&key_ptr, idx_t &key_len) {
|
82
|
+
D_ASSERT(ptr != end);
|
83
|
+
if (*ptr == '*') { // Wildcard
|
84
|
+
ptr++;
|
85
|
+
key_len = DConstants::INVALID_INDEX;
|
86
|
+
return true;
|
87
|
+
}
|
88
|
+
bool escaped = false;
|
89
|
+
if (*ptr == '"') {
|
90
|
+
ptr++; // Skip past opening '"'
|
91
|
+
escaped = true;
|
92
|
+
}
|
93
|
+
key_ptr = ptr;
|
94
|
+
key_len = ReadString(ptr, end, escaped);
|
95
|
+
if (key_len == 0) {
|
96
|
+
return false;
|
97
|
+
}
|
98
|
+
ptr += key_len;
|
99
|
+
if (escaped) {
|
100
|
+
ptr++; // Skip past closing '"'
|
101
|
+
}
|
102
|
+
return true;
|
24
103
|
}
|
25
104
|
|
26
|
-
|
105
|
+
static inline bool ReadArrayIndex(const char *&ptr, const char *const end, idx_t &array_index, bool &from_back) {
|
106
|
+
D_ASSERT(ptr != end);
|
107
|
+
from_back = false;
|
108
|
+
if (*ptr == '*') { // Wildcard
|
109
|
+
ptr++;
|
110
|
+
if (ptr == end || *ptr != ']') {
|
111
|
+
return false;
|
112
|
+
}
|
113
|
+
array_index = DConstants::INVALID_INDEX;
|
114
|
+
} else {
|
115
|
+
if (*ptr == '#') { // SQLite syntax to index from back of array
|
116
|
+
ptr++; // Skip over '#'
|
117
|
+
if (ptr == end) {
|
118
|
+
return false;
|
119
|
+
}
|
120
|
+
if (*ptr == ']') {
|
121
|
+
// [#] always returns NULL in SQLite, so we return an array index that will do the same
|
122
|
+
array_index = NumericLimits<uint32_t>::Maximum();
|
123
|
+
ptr++;
|
124
|
+
return true;
|
125
|
+
}
|
126
|
+
if (*ptr != '-') {
|
127
|
+
return false;
|
128
|
+
}
|
129
|
+
from_back = true;
|
130
|
+
}
|
131
|
+
if (*ptr == '-') {
|
132
|
+
ptr++; // Skip over '-'
|
133
|
+
from_back = true;
|
134
|
+
}
|
135
|
+
auto idx_len = ReadInteger(ptr, end, array_index);
|
136
|
+
if (idx_len == 0) {
|
137
|
+
return false;
|
138
|
+
}
|
139
|
+
ptr += idx_len;
|
140
|
+
}
|
141
|
+
ptr++; // Skip past closing ']'
|
142
|
+
return true;
|
143
|
+
}
|
144
|
+
|
145
|
+
JSONPathType JSONCommon::ValidatePath(const char *ptr, const idx_t &len, const bool binder) {
|
146
|
+
D_ASSERT(len >= 1 && *ptr == '$');
|
147
|
+
JSONPathType path_type = JSONPathType::REGULAR;
|
27
148
|
const char *const end = ptr + len;
|
28
|
-
// Skip past '$'
|
29
|
-
ptr++;
|
149
|
+
ptr++; // Skip past '$'
|
30
150
|
while (ptr != end) {
|
31
151
|
const auto &c = *ptr++;
|
32
|
-
if (
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
152
|
+
if (ptr == end) {
|
153
|
+
ThrowPathError(ptr, end, binder);
|
154
|
+
}
|
155
|
+
switch (c) {
|
156
|
+
case '.': { // Object field
|
157
|
+
const char *key_ptr;
|
158
|
+
idx_t key_len;
|
159
|
+
if (!ReadKey(ptr, end, key_ptr, key_len)) {
|
160
|
+
ThrowPathError(ptr, end, binder);
|
39
161
|
}
|
40
|
-
|
41
|
-
|
42
|
-
ThrowPathError(ptr, end);
|
162
|
+
if (key_len == DConstants::INVALID_INDEX) {
|
163
|
+
path_type = JSONPathType::WILDCARD;
|
43
164
|
}
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
165
|
+
break;
|
166
|
+
}
|
167
|
+
case '[': { // Array index
|
168
|
+
idx_t array_index;
|
169
|
+
bool from_back;
|
170
|
+
if (!ReadArrayIndex(ptr, end, array_index, from_back)) {
|
171
|
+
ThrowPathError(ptr, end, binder);
|
48
172
|
}
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
173
|
+
if (array_index == DConstants::INVALID_INDEX) {
|
174
|
+
path_type = JSONPathType::WILDCARD;
|
175
|
+
}
|
176
|
+
break;
|
177
|
+
}
|
178
|
+
default:
|
179
|
+
ThrowPathError(ptr, end, binder);
|
180
|
+
}
|
181
|
+
}
|
182
|
+
return path_type;
|
183
|
+
}
|
184
|
+
|
185
|
+
yyjson_val *JSONCommon::GetPath(yyjson_val *val, const char *ptr, const idx_t &len) {
|
186
|
+
// Path has been validated at this point
|
187
|
+
const char *const end = ptr + len;
|
188
|
+
ptr++; // Skip past '$'
|
189
|
+
while (val != nullptr && ptr != end) {
|
190
|
+
const auto &c = *ptr++;
|
191
|
+
D_ASSERT(ptr != end);
|
192
|
+
switch (c) {
|
193
|
+
case '.': { // Object field
|
194
|
+
if (!unsafe_yyjson_is_obj(val)) {
|
195
|
+
return nullptr;
|
196
|
+
}
|
197
|
+
const char *key_ptr;
|
198
|
+
idx_t key_len;
|
199
|
+
#ifdef DEBUG
|
200
|
+
bool success =
|
201
|
+
#endif
|
202
|
+
ReadKey(ptr, end, key_ptr, key_len);
|
203
|
+
#ifdef DEBUG
|
204
|
+
D_ASSERT(success);
|
205
|
+
#endif
|
206
|
+
val = yyjson_obj_getn(val, key_ptr, key_len);
|
207
|
+
break;
|
208
|
+
}
|
209
|
+
case '[': { // Array index
|
210
|
+
if (!unsafe_yyjson_is_arr(val)) {
|
211
|
+
return nullptr;
|
212
|
+
}
|
213
|
+
idx_t array_index;
|
214
|
+
bool from_back;
|
215
|
+
#ifdef DEBUG
|
216
|
+
bool success =
|
217
|
+
#endif
|
218
|
+
ReadArrayIndex(ptr, end, array_index, from_back);
|
219
|
+
#ifdef DEBUG
|
220
|
+
D_ASSERT(success);
|
221
|
+
#endif
|
222
|
+
if (from_back && array_index != 0) {
|
223
|
+
array_index = unsafe_yyjson_get_len(val) - array_index;
|
224
|
+
}
|
225
|
+
val = yyjson_arr_get(val, array_index);
|
226
|
+
break;
|
227
|
+
}
|
228
|
+
default: // LCOV_EXCL_START
|
229
|
+
throw InternalException(
|
230
|
+
"Invalid JSON Path encountered in JSONCommon::GetPath, call JSONCommon::ValidatePath first!");
|
231
|
+
} // LCOV_EXCL_STOP
|
232
|
+
}
|
233
|
+
return val;
|
234
|
+
}
|
235
|
+
|
236
|
+
void GetWildcardPathInternal(yyjson_val *val, const char *ptr, const char *const end, vector<yyjson_val *> &vals) {
|
237
|
+
while (val != nullptr && ptr != end) {
|
238
|
+
const auto &c = *ptr++;
|
239
|
+
D_ASSERT(ptr != end);
|
240
|
+
switch (c) {
|
241
|
+
case '.': { // Object field
|
242
|
+
if (!unsafe_yyjson_is_obj(val)) {
|
243
|
+
return;
|
244
|
+
}
|
245
|
+
const char *key_ptr;
|
246
|
+
idx_t key_len;
|
247
|
+
#ifdef DEBUG
|
248
|
+
bool success =
|
249
|
+
#endif
|
250
|
+
ReadKey(ptr, end, key_ptr, key_len);
|
251
|
+
#ifdef DEBUG
|
252
|
+
D_ASSERT(success);
|
253
|
+
#endif
|
254
|
+
if (key_len == DConstants::INVALID_INDEX) { // Wildcard
|
255
|
+
size_t idx, max;
|
256
|
+
yyjson_val *key, *obj_val;
|
257
|
+
yyjson_obj_foreach(val, idx, max, key, obj_val) {
|
258
|
+
GetWildcardPathInternal(obj_val, ptr, end, vals);
|
57
259
|
}
|
58
|
-
|
59
|
-
|
260
|
+
return;
|
261
|
+
}
|
262
|
+
val = yyjson_obj_getn(val, key_ptr, key_len);
|
263
|
+
break;
|
264
|
+
}
|
265
|
+
case '[': { // Array index
|
266
|
+
if (!unsafe_yyjson_is_arr(val)) {
|
267
|
+
return;
|
268
|
+
}
|
269
|
+
idx_t array_index;
|
270
|
+
bool from_back;
|
271
|
+
#ifdef DEBUG
|
272
|
+
bool success =
|
273
|
+
#endif
|
274
|
+
ReadArrayIndex(ptr, end, array_index, from_back);
|
275
|
+
#ifdef DEBUG
|
276
|
+
D_ASSERT(success);
|
277
|
+
#endif
|
278
|
+
|
279
|
+
if (array_index == DConstants::INVALID_INDEX) { // Wildcard
|
280
|
+
size_t idx, max;
|
281
|
+
yyjson_val *arr_val;
|
282
|
+
yyjson_arr_foreach(val, idx, max, arr_val) {
|
283
|
+
GetWildcardPathInternal(arr_val, ptr, end, vals);
|
60
284
|
}
|
61
|
-
|
62
|
-
ptr++;
|
285
|
+
return;
|
63
286
|
}
|
64
|
-
|
65
|
-
|
66
|
-
if (idx_len == 0) {
|
67
|
-
ThrowPathError(ptr, end);
|
287
|
+
if (from_back && array_index != 0) {
|
288
|
+
array_index = unsafe_yyjson_get_len(val) - array_index;
|
68
289
|
}
|
69
|
-
|
70
|
-
|
71
|
-
ptr++;
|
72
|
-
} else {
|
73
|
-
ThrowPathError(ptr, end);
|
290
|
+
val = yyjson_arr_get(val, array_index);
|
291
|
+
break;
|
74
292
|
}
|
293
|
+
default: // LCOV_EXCL_START
|
294
|
+
throw InternalException(
|
295
|
+
"Invalid JSON Path encountered in GetWildcardPathInternal, call JSONCommon::ValidatePath first!");
|
296
|
+
} // LCOV_EXCL_STOP
|
75
297
|
}
|
298
|
+
if (val != nullptr) {
|
299
|
+
vals.emplace_back(val);
|
300
|
+
}
|
301
|
+
}
|
302
|
+
|
303
|
+
void JSONCommon::GetWildcardPath(yyjson_val *val, const char *ptr, const idx_t &len, vector<yyjson_val *> &vals) {
|
304
|
+
// Path has been validated at this point
|
305
|
+
const char *const end = ptr + len;
|
306
|
+
ptr++; // Skip past '$'
|
307
|
+
GetWildcardPathInternal(val, ptr, end, vals);
|
76
308
|
}
|
77
309
|
|
78
310
|
} // namespace duckdb
|
@@ -395,7 +395,7 @@ static inline void ExtractStructureObject(yyjson_val *obj, JSONStructureNode &no
|
|
395
395
|
|
396
396
|
static inline void ExtractStructureVal(yyjson_val *val, JSONStructureNode &node) {
|
397
397
|
D_ASSERT(!yyjson_is_arr(val) && !yyjson_is_obj(val));
|
398
|
-
node.GetOrCreateDescription(JSONCommon::ValTypeToLogicalTypeId
|
398
|
+
node.GetOrCreateDescription(JSONCommon::ValTypeToLogicalTypeId(val));
|
399
399
|
}
|
400
400
|
|
401
401
|
void JSONStructure::ExtractStructure(yyjson_val *val, JSONStructureNode &node) {
|
@@ -41,7 +41,7 @@ static LogicalType StructureStringToType(yyjson_val *val, ClientContext &context
|
|
41
41
|
|
42
42
|
static LogicalType StructureStringToTypeArray(yyjson_val *arr, ClientContext &context) {
|
43
43
|
if (yyjson_arr_size(arr) != 1) {
|
44
|
-
throw
|
44
|
+
throw BinderException("Too many values in array of JSON structure");
|
45
45
|
}
|
46
46
|
return LogicalType::LIST(StructureStringToType(yyjson_arr_get_first(arr), context));
|
47
47
|
}
|
@@ -62,7 +62,7 @@ static LogicalType StructureToTypeObject(yyjson_val *obj, ClientContext &context
|
|
62
62
|
}
|
63
63
|
D_ASSERT(yyjson_obj_size(obj) == names.size());
|
64
64
|
if (child_types.empty()) {
|
65
|
-
throw
|
65
|
+
throw BinderException("Empty object in JSON structure");
|
66
66
|
}
|
67
67
|
return LogicalType::STRUCT(child_types);
|
68
68
|
}
|
@@ -76,7 +76,7 @@ static LogicalType StructureStringToType(yyjson_val *val, ClientContext &context
|
|
76
76
|
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
|
77
77
|
return TransformStringToLogicalType(unsafe_yyjson_get_str(val), context);
|
78
78
|
default:
|
79
|
-
throw
|
79
|
+
throw BinderException("invalid JSON structure");
|
80
80
|
}
|
81
81
|
}
|
82
82
|
|
@@ -89,20 +89,15 @@ static unique_ptr<FunctionData> JSONTransformBind(ClientContext &context, Scalar
|
|
89
89
|
if (arguments[1]->return_type == LogicalTypeId::SQLNULL) {
|
90
90
|
bound_function.return_type = LogicalTypeId::SQLNULL;
|
91
91
|
} else if (!arguments[1]->IsFoldable()) {
|
92
|
-
throw
|
92
|
+
throw BinderException("JSON structure must be a constant!");
|
93
93
|
} else {
|
94
94
|
auto structure_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
|
95
95
|
if (!structure_val.DefaultTryCastAs(JSONCommon::JSONType())) {
|
96
|
-
throw
|
96
|
+
throw BinderException("Cannot cast JSON structure to string");
|
97
97
|
}
|
98
98
|
auto structure_string = structure_val.GetValueUnsafe<string_t>();
|
99
99
|
JSONAllocator json_allocator(Allocator::DefaultAllocator());
|
100
|
-
|
101
|
-
auto doc =
|
102
|
-
JSONCommon::ReadDocumentUnsafe(structure_string, JSONCommon::READ_FLAG, json_allocator.GetYYAlc(), &err);
|
103
|
-
if (err.code != YYJSON_READ_SUCCESS) {
|
104
|
-
JSONCommon::ThrowParseError(structure_string.GetData(), structure_string.GetSize(), err);
|
105
|
-
}
|
100
|
+
auto doc = JSONCommon::ReadDocument(structure_string, JSONCommon::READ_FLAG, json_allocator.GetYYAlc());
|
106
101
|
bound_function.return_type = StructureStringToType(doc->root, context);
|
107
102
|
}
|
108
103
|
return make_uniq<VariableReturnBindData>(bound_function.return_type);
|
@@ -114,10 +109,9 @@ static inline string_t GetString(yyjson_val *val) {
|
|
114
109
|
|
115
110
|
template <class T, class OP = TryCast>
|
116
111
|
static inline bool GetValueNumerical(yyjson_val *val, T &result, JSONTransformOptions &options) {
|
112
|
+
D_ASSERT(unsafe_yyjson_get_tag(val) != (YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE));
|
117
113
|
bool success;
|
118
114
|
switch (unsafe_yyjson_get_tag(val)) {
|
119
|
-
case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
|
120
|
-
return false;
|
121
115
|
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
|
122
116
|
success = OP::template Operation<string_t, T>(GetString(val), result, options.strict_cast);
|
123
117
|
break;
|
@@ -150,10 +144,9 @@ static inline bool GetValueNumerical(yyjson_val *val, T &result, JSONTransformOp
|
|
150
144
|
|
151
145
|
template <class T, class OP = TryCastToDecimal>
|
152
146
|
static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_t s, JSONTransformOptions &options) {
|
147
|
+
D_ASSERT(unsafe_yyjson_get_tag(val) != (YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE));
|
153
148
|
bool success;
|
154
149
|
switch (unsafe_yyjson_get_tag(val)) {
|
155
|
-
case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
|
156
|
-
return false;
|
157
150
|
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
|
158
151
|
success = OP::template Operation<string_t, T>(GetString(val), result, &options.error_message, w, s);
|
159
152
|
break;
|
@@ -186,9 +179,8 @@ static inline bool GetValueDecimal(yyjson_val *val, T &result, uint8_t w, uint8_
|
|
186
179
|
}
|
187
180
|
|
188
181
|
static inline bool GetValueString(yyjson_val *val, yyjson_alc *alc, string_t &result, Vector &vector) {
|
182
|
+
D_ASSERT(unsafe_yyjson_get_tag(val) != (YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE));
|
189
183
|
switch (unsafe_yyjson_get_tag(val)) {
|
190
|
-
case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
|
191
|
-
return true;
|
192
184
|
case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
|
193
185
|
result = string_t(unsafe_yyjson_get_str(val), unsafe_yyjson_get_len(val));
|
194
186
|
return true;
|
@@ -318,26 +310,14 @@ static bool TransformStringWithFormat(Vector &string_vector, StrpTimeFormat &for
|
|
318
310
|
auto &target_validity = FlatVector::Validity(result);
|
319
311
|
|
320
312
|
bool success = true;
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
}
|
330
|
-
}
|
331
|
-
} else {
|
332
|
-
for (idx_t i = 0; i < count; i++) {
|
333
|
-
if (!source_validity.RowIsValid(i)) {
|
334
|
-
target_validity.SetInvalid(i);
|
335
|
-
} else if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
|
336
|
-
target_validity.SetInvalid(i);
|
337
|
-
if (success && options.strict_cast) {
|
338
|
-
options.object_index = i;
|
339
|
-
success = false;
|
340
|
-
}
|
313
|
+
for (idx_t i = 0; i < count; i++) {
|
314
|
+
if (!source_validity.RowIsValid(i)) {
|
315
|
+
target_validity.SetInvalid(i);
|
316
|
+
} else if (!OP::template Operation<T>(format, source_strings[i], target_vals[i], options.error_message)) {
|
317
|
+
target_validity.SetInvalid(i);
|
318
|
+
if (success && options.strict_cast) {
|
319
|
+
options.object_index = i;
|
320
|
+
success = false;
|
341
321
|
}
|
342
322
|
}
|
343
323
|
}
|
@@ -3,7 +3,7 @@
|
|
3
3
|
namespace duckdb {
|
4
4
|
|
5
5
|
static inline string_t GetType(yyjson_val *val, yyjson_alc *alc, Vector &result) {
|
6
|
-
return JSONCommon::ValTypeToStringT
|
6
|
+
return JSONCommon::ValTypeToStringT(val);
|
7
7
|
}
|
8
8
|
|
9
9
|
static void UnaryTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
@@ -12,24 +12,23 @@
|
|
12
12
|
|
13
13
|
namespace duckdb {
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
using JSONPathType = JSONCommon::JSONPathType;
|
16
|
+
|
17
|
+
static JSONPathType CheckPath(const Value &path_val, string &path, size_t &len) {
|
18
18
|
if (path_val.IsNull()) {
|
19
19
|
throw InvalidInputException("JSON path cannot be NULL");
|
20
20
|
}
|
21
|
-
|
22
|
-
throw InvalidInputException(error);
|
23
|
-
}
|
21
|
+
const auto path_str_val = path_val.DefaultCastAs(LogicalType::VARCHAR);
|
24
22
|
auto path_str = path_str_val.GetValueUnsafe<string_t>();
|
25
23
|
len = path_str.GetSize();
|
26
24
|
auto ptr = path_str.GetData();
|
27
25
|
// Empty strings and invalid $ paths yield an error
|
28
26
|
if (len == 0) {
|
29
|
-
throw
|
27
|
+
throw BinderException("Empty JSON path");
|
30
28
|
}
|
29
|
+
JSONPathType path_type = JSONPathType::REGULAR;
|
31
30
|
if (*ptr == '$') {
|
32
|
-
JSONCommon::
|
31
|
+
path_type = JSONCommon::ValidatePath(ptr, len, true);
|
33
32
|
}
|
34
33
|
// Copy over string to the bind data
|
35
34
|
if (*ptr == '/' || *ptr == '$') {
|
@@ -38,19 +37,20 @@ static void CheckPath(const Value &path_val, string &path, size_t &len) {
|
|
38
37
|
path = "/" + string(ptr, len);
|
39
38
|
len++;
|
40
39
|
}
|
40
|
+
return path_type;
|
41
41
|
}
|
42
42
|
|
43
|
-
JSONReadFunctionData::JSONReadFunctionData(bool constant, string path_p, idx_t len)
|
44
|
-
: constant(constant), path(std::move(path_p)), ptr(path.c_str()), len(len) {
|
43
|
+
JSONReadFunctionData::JSONReadFunctionData(bool constant, string path_p, idx_t len, JSONPathType path_type_p)
|
44
|
+
: constant(constant), path(std::move(path_p)), path_type(path_type_p), ptr(path.c_str()), len(len) {
|
45
45
|
}
|
46
46
|
|
47
47
|
unique_ptr<FunctionData> JSONReadFunctionData::Copy() const {
|
48
|
-
return make_uniq<JSONReadFunctionData>(constant, path, len);
|
48
|
+
return make_uniq<JSONReadFunctionData>(constant, path, len, path_type);
|
49
49
|
}
|
50
50
|
|
51
51
|
bool JSONReadFunctionData::Equals(const FunctionData &other_p) const {
|
52
52
|
auto &other = (const JSONReadFunctionData &)other_p;
|
53
|
-
return constant == other.constant && path == other.path && len == other.len;
|
53
|
+
return constant == other.constant && path == other.path && len == other.len && path_type == other.path_type;
|
54
54
|
}
|
55
55
|
|
56
56
|
unique_ptr<FunctionData> JSONReadFunctionData::Bind(ClientContext &context, ScalarFunction &bound_function,
|
@@ -59,12 +59,16 @@ unique_ptr<FunctionData> JSONReadFunctionData::Bind(ClientContext &context, Scal
|
|
59
59
|
bool constant = false;
|
60
60
|
string path = "";
|
61
61
|
size_t len = 0;
|
62
|
+
JSONPathType path_type = JSONPathType::REGULAR;
|
62
63
|
if (arguments[1]->return_type.id() != LogicalTypeId::SQLNULL && arguments[1]->IsFoldable()) {
|
63
64
|
constant = true;
|
64
65
|
const auto path_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
|
65
|
-
CheckPath(path_val, path, len);
|
66
|
+
path_type = CheckPath(path_val, path, len);
|
67
|
+
}
|
68
|
+
if (path_type == JSONCommon::JSONPathType::WILDCARD) {
|
69
|
+
bound_function.return_type = LogicalType::LIST(bound_function.return_type);
|
66
70
|
}
|
67
|
-
return make_uniq<JSONReadFunctionData>(constant, std::move(path), len);
|
71
|
+
return make_uniq<JSONReadFunctionData>(constant, std::move(path), len, path_type);
|
68
72
|
}
|
69
73
|
|
70
74
|
JSONReadManyFunctionData::JSONReadManyFunctionData(vector<string> paths_p, vector<size_t> lens_p)
|
@@ -90,10 +94,7 @@ unique_ptr<FunctionData> JSONReadManyFunctionData::Bind(ClientContext &context,
|
|
90
94
|
throw ParameterNotResolvedException();
|
91
95
|
}
|
92
96
|
if (!arguments[1]->IsFoldable()) {
|
93
|
-
throw
|
94
|
-
}
|
95
|
-
if (arguments[1]->return_type.id() == LogicalTypeId::SQLNULL) {
|
96
|
-
return make_uniq<JSONReadManyFunctionData>(vector<string>(), vector<size_t>());
|
97
|
+
throw BinderException("List of paths must be constant");
|
97
98
|
}
|
98
99
|
|
99
100
|
vector<string> paths;
|
@@ -102,7 +103,9 @@ unique_ptr<FunctionData> JSONReadManyFunctionData::Bind(ClientContext &context,
|
|
102
103
|
for (auto &path_val : ListValue::GetChildren(paths_val)) {
|
103
104
|
paths.emplace_back("");
|
104
105
|
lens.push_back(0);
|
105
|
-
CheckPath(path_val, paths.back(), lens.back())
|
106
|
+
if (CheckPath(path_val, paths.back(), lens.back()) == JSONPathType::WILDCARD) {
|
107
|
+
throw BinderException("Cannot have wildcards in JSON path when supplying multiple paths");
|
108
|
+
}
|
106
109
|
}
|
107
110
|
|
108
111
|
return make_uniq<JSONReadManyFunctionData>(std::move(paths), std::move(lens));
|
@@ -120,11 +123,8 @@ unique_ptr<FunctionLocalState> JSONFunctionLocalState::Init(ExpressionState &sta
|
|
120
123
|
}
|
121
124
|
|
122
125
|
unique_ptr<FunctionLocalState> JSONFunctionLocalState::InitCastLocalState(CastLocalStateParameters ¶meters) {
|
123
|
-
|
124
|
-
|
125
|
-
} else {
|
126
|
-
return make_uniq<JSONFunctionLocalState>(Allocator::DefaultAllocator());
|
127
|
-
}
|
126
|
+
return parameters.context ? make_uniq<JSONFunctionLocalState>(*parameters.context)
|
127
|
+
: make_uniq<JSONFunctionLocalState>(Allocator::DefaultAllocator());
|
128
128
|
}
|
129
129
|
|
130
130
|
JSONFunctionLocalState &JSONFunctionLocalState::ResetAndGet(ExpressionState &state) {
|
@@ -19,7 +19,7 @@ void JSONScanData::Bind(ClientContext &context, TableFunctionBindInput &input) {
|
|
19
19
|
auto_detect = info.auto_detect;
|
20
20
|
|
21
21
|
for (auto &kv : input.named_parameters) {
|
22
|
-
if (MultiFileReader::ParseOption(kv.first, kv.second, options.file_options)) {
|
22
|
+
if (MultiFileReader::ParseOption(kv.first, kv.second, options.file_options, context)) {
|
23
23
|
continue;
|
24
24
|
}
|
25
25
|
auto loption = StringUtil::Lower(kv.first);
|
@@ -51,10 +51,7 @@ void JSONScanData::Bind(ClientContext &context, TableFunctionBindInput &input) {
|
|
51
51
|
}
|
52
52
|
|
53
53
|
files = MultiFileReader::GetFileList(context, input.inputs[0], "JSON");
|
54
|
-
|
55
|
-
if (options.file_options.auto_detect_hive_partitioning) {
|
56
|
-
options.file_options.hive_partitioning = MultiFileReaderOptions::AutoDetectHivePartitioning(files);
|
57
|
-
}
|
54
|
+
options.file_options.AutoDetectHivePartitioning(files, context);
|
58
55
|
|
59
56
|
InitializeReaders(context);
|
60
57
|
}
|
@@ -236,7 +233,7 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
236
233
|
for (auto &reader : gstate.json_readers) {
|
237
234
|
MultiFileReader::FinalizeBind(reader->GetOptions().file_options, gstate.bind_data.reader_bind,
|
238
235
|
reader->GetFileName(), gstate.names, dummy_types, bind_data.names,
|
239
|
-
input.column_ids, reader->reader_data);
|
236
|
+
input.column_ids, reader->reader_data, context);
|
240
237
|
}
|
241
238
|
|
242
239
|
return std::move(result);
|