duckdb 0.8.2-dev161.0 → 0.8.2-dev1764.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +15 -12
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +91 -38
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/parquet_extension.cpp +194 -20
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +14 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +79 -12
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +44 -19
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +4619 -4446
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/exception.cpp +2 -2
- package/src/duckdb/src/common/extra_type_info.cpp +506 -0
- package/src/duckdb/src/common/file_system.cpp +19 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
- package/src/duckdb/src/common/local_file_system.cpp +14 -14
- package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
- package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
- package/src/duckdb/src/common/re2_regex.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +70 -50
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/bit.cpp +51 -0
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/uuid.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/value.cpp +11 -6
- package/src/duckdb/src/common/types.cpp +9 -656
- package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
- package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
- package/src/duckdb/src/function/function.cpp +3 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/arrow.cpp +19 -0
- package/src/duckdb/src/function/table/arrow_conversion.cpp +35 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system/test_all_types.cpp +7 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +23 -8
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
- package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
- package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
- package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
- package/src/duckdb/src/main/config.cpp +4 -0
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +93 -88
- package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation.cpp +6 -5
- package/src/duckdb/src/main/settings/settings.cpp +79 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
- package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
- package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
- package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
- package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
- package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
- package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
- package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
- package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
- package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
- package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
- package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/parser.cpp +8 -2
- package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
- package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
- package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
- package/src/duckdb/src/parser/query_node.cpp +15 -37
- package/src/duckdb/src/parser/result_modifier.cpp +0 -74
- package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
- package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
- package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
- package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
- package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
- package/src/duckdb/src/parser/tableref.cpp +0 -44
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +44 -31
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/compression/rle.cpp +0 -1
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
- package/src/statement.cpp +10 -3
- package/test/columns.test.ts +24 -1
- package/test/test_all_types.test.ts +234 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -22,6 +22,9 @@ struct SelectionVector;
|
|
22
22
|
//! Generic radix partitioning functions
|
23
23
|
struct RadixPartitioning {
|
24
24
|
public:
|
25
|
+
//! 4096 partitions ought to be enough to go out-of-core properly
|
26
|
+
static constexpr const idx_t MAX_RADIX_BITS = 12;
|
27
|
+
|
25
28
|
//! The number of partitions for a given number of radix bits
|
26
29
|
static inline constexpr idx_t NumberOfPartitions(idx_t radix_bits) {
|
27
30
|
return idx_t(1) << radix_bits;
|
@@ -38,10 +41,12 @@ public:
|
|
38
41
|
throw InternalException("RadixPartitioning::RadixBits unable to find partition count!");
|
39
42
|
}
|
40
43
|
|
44
|
+
//! Radix bits begin after uint16_t because these bits are used as salt in the aggregate HT
|
41
45
|
static inline constexpr idx_t Shift(idx_t radix_bits) {
|
42
|
-
return
|
46
|
+
return (sizeof(hash_t) - sizeof(uint16_t)) * 8 - radix_bits;
|
43
47
|
}
|
44
48
|
|
49
|
+
//! Mask of the radix bits of the hash
|
45
50
|
static inline constexpr hash_t Mask(idx_t radix_bits) {
|
46
51
|
return (hash_t(1 << radix_bits) - 1) << Shift(radix_bits);
|
47
52
|
}
|
@@ -49,26 +54,6 @@ public:
|
|
49
54
|
//! Select using a cutoff on the radix bits of the hash
|
50
55
|
static idx_t Select(Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, idx_t cutoff,
|
51
56
|
SelectionVector *true_sel, SelectionVector *false_sel);
|
52
|
-
|
53
|
-
//! Convert hashes to bins
|
54
|
-
static void HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count);
|
55
|
-
};
|
56
|
-
|
57
|
-
//! Templated radix partitioning constants, can be templated to the number of radix bits
|
58
|
-
template <idx_t radix_bits>
|
59
|
-
struct RadixPartitioningConstants {
|
60
|
-
public:
|
61
|
-
//! Bitmask of the upper bits of the 5th byte
|
62
|
-
static constexpr const idx_t NUM_PARTITIONS = RadixPartitioning::NumberOfPartitions(radix_bits);
|
63
|
-
static constexpr const idx_t SHIFT = RadixPartitioning::Shift(radix_bits);
|
64
|
-
static constexpr const hash_t MASK = RadixPartitioning::Mask(radix_bits);
|
65
|
-
|
66
|
-
public:
|
67
|
-
//! Apply bitmask and right shift to get a number between 0 and NUM_PARTITIONS
|
68
|
-
static inline hash_t ApplyMask(hash_t hash) {
|
69
|
-
D_ASSERT((hash & MASK) >> SHIFT < NUM_PARTITIONS);
|
70
|
-
return (hash & MASK) >> SHIFT;
|
71
|
-
}
|
72
57
|
};
|
73
58
|
|
74
59
|
//! RadixPartitionedColumnData is a PartitionedColumnData that partitions input based on the radix of a hash
|
@@ -13,7 +13,7 @@
|
|
13
13
|
|
14
14
|
namespace duckdb {
|
15
15
|
|
16
|
-
class
|
16
|
+
class ArenaAllocator;
|
17
17
|
struct AggregateObject;
|
18
18
|
struct AggregateFilterData;
|
19
19
|
class DataChunk;
|
@@ -26,10 +26,10 @@ class Vector;
|
|
26
26
|
struct UnifiedVectorFormat;
|
27
27
|
|
28
28
|
struct RowOperationsState {
|
29
|
-
RowOperationsState(
|
29
|
+
explicit RowOperationsState(ArenaAllocator &allocator) : allocator(allocator) {
|
30
30
|
}
|
31
31
|
|
32
|
-
|
32
|
+
ArenaAllocator &allocator;
|
33
33
|
};
|
34
34
|
|
35
35
|
// RowOperations contains a set of operations that operate on data using a RowLayout
|
@@ -42,8 +42,8 @@ public:
|
|
42
42
|
using Orders = vector<BoundOrderByNode>;
|
43
43
|
using Types = vector<LogicalType>;
|
44
44
|
|
45
|
-
using GroupingPartition = unique_ptr<
|
46
|
-
using GroupingAppend = unique_ptr<
|
45
|
+
using GroupingPartition = unique_ptr<PartitionedTupleData>;
|
46
|
+
using GroupingAppend = unique_ptr<PartitionedTupleDataAppendState>;
|
47
47
|
|
48
48
|
static void GenerateOrderings(Orders &partitions, Orders &orders,
|
49
49
|
const vector<unique_ptr<Expression>> &partition_bys, const Orders &order_bys,
|
@@ -53,10 +53,14 @@ public:
|
|
53
53
|
const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
|
54
54
|
const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
|
55
55
|
|
56
|
+
unique_ptr<RadixPartitionedTupleData> CreatePartition(idx_t new_bits) const;
|
57
|
+
void SyncPartitioning(const PartitionGlobalSinkState &other);
|
58
|
+
|
56
59
|
void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
57
60
|
void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
58
61
|
|
59
|
-
void BuildSortState(
|
62
|
+
void BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const;
|
63
|
+
void BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
|
60
64
|
|
61
65
|
ClientContext &context;
|
62
66
|
BufferManager &buffer_manager;
|
@@ -64,9 +68,11 @@ public:
|
|
64
68
|
mutex lock;
|
65
69
|
|
66
70
|
// OVER(PARTITION BY...) (hash grouping)
|
67
|
-
unique_ptr<
|
71
|
+
unique_ptr<RadixPartitionedTupleData> grouping_data;
|
68
72
|
//! Payload plus hash column
|
69
|
-
|
73
|
+
TupleDataLayout grouping_types;
|
74
|
+
//! The number of radix bits if this partition is being synced with another
|
75
|
+
idx_t fixed_bits;
|
70
76
|
|
71
77
|
// OVER(...) (sorting)
|
72
78
|
Orders partitions;
|
@@ -83,6 +89,7 @@ public:
|
|
83
89
|
|
84
90
|
// Threading
|
85
91
|
idx_t memory_per_thread;
|
92
|
+
idx_t max_bits;
|
86
93
|
atomic<idx_t> count;
|
87
94
|
|
88
95
|
private:
|
@@ -102,8 +109,8 @@ public:
|
|
102
109
|
ExpressionExecutor executor;
|
103
110
|
DataChunk group_chunk;
|
104
111
|
DataChunk payload_chunk;
|
105
|
-
unique_ptr<
|
106
|
-
unique_ptr<
|
112
|
+
unique_ptr<PartitionedTupleData> local_partition;
|
113
|
+
unique_ptr<PartitionedTupleDataAppendState> local_append;
|
107
114
|
|
108
115
|
// OVER(...) (sorting)
|
109
116
|
size_t sort_cols;
|
@@ -127,7 +134,7 @@ class PartitionLocalMergeState;
|
|
127
134
|
|
128
135
|
class PartitionGlobalMergeState {
|
129
136
|
public:
|
130
|
-
using GroupDataPtr = unique_ptr<
|
137
|
+
using GroupDataPtr = unique_ptr<TupleDataCollection>;
|
131
138
|
|
132
139
|
PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
|
133
140
|
|
@@ -175,10 +182,18 @@ public:
|
|
175
182
|
|
176
183
|
class PartitionGlobalMergeStates {
|
177
184
|
public:
|
185
|
+
struct Callback {
|
186
|
+
virtual bool HasError() const {
|
187
|
+
return false;
|
188
|
+
}
|
189
|
+
};
|
190
|
+
|
178
191
|
using PartitionGlobalMergeStatePtr = unique_ptr<PartitionGlobalMergeState>;
|
179
192
|
|
180
193
|
explicit PartitionGlobalMergeStates(PartitionGlobalSinkState &sink);
|
181
194
|
|
195
|
+
bool ExecuteTask(PartitionLocalMergeState &local_state, Callback &callback);
|
196
|
+
|
182
197
|
vector<PartitionGlobalMergeStatePtr> states;
|
183
198
|
};
|
184
199
|
|
@@ -16,6 +16,17 @@
|
|
16
16
|
|
17
17
|
namespace duckdb {
|
18
18
|
|
19
|
+
#ifndef DUCKDB_QUOTE_DEFINE
|
20
|
+
// Preprocessor trick to allow text to be converted to C-string / string
|
21
|
+
// Expecte use is:
|
22
|
+
// #ifdef SOME_DEFINE
|
23
|
+
// string str = DUCKDB_QUOTE_DEFINE(SOME_DEFINE)
|
24
|
+
// ...do something with str
|
25
|
+
// #endif SOME_DEFINE
|
26
|
+
#define DUCKDB_QUOTE_DEFINE_IMPL(x) #x
|
27
|
+
#define DUCKDB_QUOTE_DEFINE(x) DUCKDB_QUOTE_DEFINE_IMPL(x)
|
28
|
+
#endif
|
29
|
+
|
19
30
|
/**
|
20
31
|
* String Utility Functions
|
21
32
|
* Note that these are not the most efficient implementations (i.e., they copy
|
@@ -24,7 +24,7 @@ struct BatchedChunkScanState {
|
|
24
24
|
//! Scans over a BatchedDataCollection are ordered by batch index
|
25
25
|
class BatchedDataCollection {
|
26
26
|
public:
|
27
|
-
DUCKDB_API BatchedDataCollection(vector<LogicalType> types);
|
27
|
+
DUCKDB_API BatchedDataCollection(ClientContext &context, vector<LogicalType> types, bool buffer_managed = false);
|
28
28
|
|
29
29
|
//! Appends a datachunk with the given batch index to the batched collection
|
30
30
|
DUCKDB_API void Append(DataChunk &input, idx_t batch_index);
|
@@ -51,7 +51,9 @@ private:
|
|
51
51
|
ColumnDataAppendState append_state;
|
52
52
|
};
|
53
53
|
|
54
|
+
ClientContext &context;
|
54
55
|
vector<LogicalType> types;
|
56
|
+
bool buffer_managed;
|
55
57
|
//! The data of the batched chunk collection - a set of batch_index -> ColumnDataCollection pointers
|
56
58
|
map<idx_t, unique_ptr<ColumnDataCollection>> data;
|
57
59
|
//! The last batch collection that was inserted into
|
@@ -8,8 +8,12 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
+
#include "duckdb/common/assert.hpp"
|
11
12
|
#include "duckdb/common/common.hpp"
|
13
|
+
#include "duckdb/common/hugeint.hpp"
|
14
|
+
#include "duckdb/common/limits.hpp"
|
12
15
|
#include "duckdb/common/types.hpp"
|
16
|
+
#include "duckdb/common/types/string_type.hpp"
|
13
17
|
|
14
18
|
namespace duckdb {
|
15
19
|
|
@@ -37,7 +41,33 @@ public:
|
|
37
41
|
//! Convert a string to a bit. This function should ONLY be called after calling GetBitSize, since it does NOT
|
38
42
|
//! perform data validation.
|
39
43
|
DUCKDB_API static void ToBit(string_t str, string_t &output);
|
44
|
+
|
40
45
|
DUCKDB_API static string ToBit(string_t str);
|
46
|
+
|
47
|
+
//! output needs to have enough space allocated before calling this function (blob size + 1)
|
48
|
+
DUCKDB_API static void BlobToBit(string_t blob, string_t &output);
|
49
|
+
|
50
|
+
DUCKDB_API static string BlobToBit(string_t blob);
|
51
|
+
|
52
|
+
//! output_str needs to have enough space allocated before calling this function (sizeof(T) + 1)
|
53
|
+
template <class T>
|
54
|
+
static void NumericToBit(T numeric, string_t &output_str);
|
55
|
+
|
56
|
+
template <class T>
|
57
|
+
static string NumericToBit(T numeric);
|
58
|
+
|
59
|
+
//! bit is expected to fit inside of output num (bit size <= sizeof(T) + 1)
|
60
|
+
template <class T>
|
61
|
+
static void BitToNumeric(string_t bit, T &output_num);
|
62
|
+
|
63
|
+
template <class T>
|
64
|
+
static T BitToNumeric(string_t bit);
|
65
|
+
|
66
|
+
//! bit is expected to fit inside of output_blob (bit size = output_blob + 1)
|
67
|
+
static void BitToBlob(string_t bit, string_t &output_blob);
|
68
|
+
|
69
|
+
static string BitToBlob(string_t bit);
|
70
|
+
|
41
71
|
//! Creates a new bitstring of determined length
|
42
72
|
DUCKDB_API static void BitString(const string_t &input, const idx_t &len, string_t &result);
|
43
73
|
DUCKDB_API static void SetEmptyBitString(string_t &target, string_t &input);
|
@@ -58,5 +88,56 @@ private:
|
|
58
88
|
static idx_t GetBitInternal(string_t bit_string, idx_t n);
|
59
89
|
static void SetBitInternal(string_t &bit_string, idx_t n, idx_t new_value);
|
60
90
|
static idx_t GetBitIndex(idx_t n);
|
91
|
+
static uint8_t GetFirstByte(const string_t &str);
|
61
92
|
};
|
93
|
+
|
94
|
+
//===--------------------------------------------------------------------===//
|
95
|
+
// Bit Template definitions
|
96
|
+
//===--------------------------------------------------------------------===//
|
97
|
+
template <class T>
|
98
|
+
void Bit::NumericToBit(T numeric, string_t &output_str) {
|
99
|
+
D_ASSERT(output_str.GetSize() >= sizeof(T) + 1);
|
100
|
+
|
101
|
+
auto output = output_str.GetDataWriteable();
|
102
|
+
auto data = const_data_ptr_cast(&numeric);
|
103
|
+
|
104
|
+
*output = 0; // set padding to 0
|
105
|
+
++output;
|
106
|
+
for (idx_t idx = 0; idx < sizeof(T); ++idx) {
|
107
|
+
output[idx] = data[sizeof(T) - idx - 1];
|
108
|
+
}
|
109
|
+
Bit::Finalize(output_str);
|
110
|
+
}
|
111
|
+
|
112
|
+
template <class T>
|
113
|
+
string Bit::NumericToBit(T numeric) {
|
114
|
+
auto bit_len = sizeof(T) + 1;
|
115
|
+
auto buffer = make_unsafe_uniq_array<char>(bit_len);
|
116
|
+
string_t output_str(buffer.get(), bit_len);
|
117
|
+
Bit::NumericToBit(numeric, output_str);
|
118
|
+
return output_str.GetString();
|
119
|
+
}
|
120
|
+
|
121
|
+
template <class T>
|
122
|
+
T Bit::BitToNumeric(string_t bit) {
|
123
|
+
T output;
|
124
|
+
Bit::BitToNumeric(bit, output);
|
125
|
+
return (output);
|
126
|
+
}
|
127
|
+
|
128
|
+
template <class T>
|
129
|
+
void Bit::BitToNumeric(string_t bit, T &output_num) {
|
130
|
+
D_ASSERT(bit.GetSize() <= sizeof(T) + 1);
|
131
|
+
|
132
|
+
output_num = 0;
|
133
|
+
auto data = const_data_ptr_cast(bit.GetData());
|
134
|
+
auto output = data_ptr_cast(&output_num);
|
135
|
+
|
136
|
+
idx_t padded_byte_idx = sizeof(T) - bit.GetSize() + 1;
|
137
|
+
output[sizeof(T) - 1 - padded_byte_idx] = GetFirstByte(bit);
|
138
|
+
for (idx_t idx = padded_byte_idx + 1; idx < sizeof(T); ++idx) {
|
139
|
+
output[sizeof(T) - 1 - idx] = data[1 + idx - padded_byte_idx];
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
62
143
|
} // namespace duckdb
|
@@ -53,7 +53,7 @@ public:
|
|
53
53
|
void Initialize(ColumnDataAllocator &other);
|
54
54
|
void InitializeChunkState(ChunkManagementState &state, ChunkMetaData &meta_data);
|
55
55
|
data_ptr_t GetDataPointer(ChunkManagementState &state, uint32_t block_id, uint32_t offset);
|
56
|
-
void UnswizzlePointers(ChunkManagementState &state, Vector &result,
|
56
|
+
void UnswizzlePointers(ChunkManagementState &state, Vector &result, idx_t v_offset, uint16_t count,
|
57
57
|
uint32_t block_id, uint32_t offset);
|
58
58
|
|
59
59
|
//! Deletes the block with the given id
|
@@ -143,7 +143,12 @@ public:
|
|
143
143
|
//! Initialize the column data collection
|
144
144
|
void Initialize(vector<LogicalType> types);
|
145
145
|
|
146
|
-
//! Get
|
146
|
+
//! Get references to the string heaps in this ColumnDataCollection
|
147
|
+
vector<shared_ptr<StringHeap>> GetHeapReferences();
|
148
|
+
//! Get the allocator type of this ColumnDataCollection
|
149
|
+
ColumnDataAllocatorType GetAllocatorType() const;
|
150
|
+
|
151
|
+
//! Get a vector of the segments in this ColumnDataCollection
|
147
152
|
const vector<unique_ptr<ColumnDataCollectionSegment>> &GetSegments() const;
|
148
153
|
|
149
154
|
private:
|
package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp
CHANGED
@@ -94,7 +94,7 @@ public:
|
|
94
94
|
//! The set of child indices
|
95
95
|
vector<VectorDataIndex> child_indices;
|
96
96
|
//! The string heap for the column data collection (only used for IN_MEMORY_ALLOCATOR)
|
97
|
-
StringHeap heap;
|
97
|
+
shared_ptr<StringHeap> heap;
|
98
98
|
|
99
99
|
public:
|
100
100
|
void AllocateNewChunk();
|
@@ -20,7 +20,9 @@ enum class ColumnDataAllocatorType : uint8_t {
|
|
20
20
|
BUFFER_MANAGER_ALLOCATOR,
|
21
21
|
//! Use an in-memory allocator, allocating data for every chunk
|
22
22
|
//! This causes the column data collection to allocate blocks that are not tied to a buffer manager
|
23
|
-
IN_MEMORY_ALLOCATOR
|
23
|
+
IN_MEMORY_ALLOCATOR,
|
24
|
+
//! Use a buffer manager to allocate vectors, but use a StringHeap for strings
|
25
|
+
HYBRID
|
24
26
|
};
|
25
27
|
|
26
28
|
enum class ColumnDataScanProperties : uint8_t {
|
@@ -72,13 +72,13 @@ struct date_t { // NOLINT
|
|
72
72
|
};
|
73
73
|
|
74
74
|
// special values
|
75
|
-
static inline date_t infinity() {
|
75
|
+
static inline date_t infinity() { // NOLINT
|
76
76
|
return date_t(NumericLimits<int32_t>::Maximum());
|
77
|
-
}
|
78
|
-
static inline date_t ninfinity() {
|
77
|
+
} // NOLINT
|
78
|
+
static inline date_t ninfinity() { // NOLINT
|
79
79
|
return date_t(-NumericLimits<int32_t>::Maximum());
|
80
|
-
}
|
81
|
-
static inline date_t epoch() {
|
80
|
+
} // NOLINT
|
81
|
+
static inline date_t epoch() { // NOLINT
|
82
82
|
return date_t(0);
|
83
83
|
} // NOLINT
|
84
84
|
};
|
@@ -158,6 +158,8 @@ public:
|
|
158
158
|
DUCKDB_API static int64_t EpochNanoseconds(date_t date);
|
159
159
|
//! Extract the epoch from the date (microseconds since 1970-01-01)
|
160
160
|
DUCKDB_API static int64_t EpochMicroseconds(date_t date);
|
161
|
+
//! Extract the epoch from the date (milliseconds since 1970-01-01)
|
162
|
+
DUCKDB_API static int64_t EpochMilliseconds(date_t date);
|
161
163
|
//! Convert the epoch (seconds since 1970-01-01) to a date_t
|
162
164
|
DUCKDB_API static date_t EpochToDate(int64_t epoch);
|
163
165
|
|
@@ -34,28 +34,26 @@ struct LinkedList {
|
|
34
34
|
|
35
35
|
// forward declarations
|
36
36
|
struct ListSegmentFunctions;
|
37
|
-
typedef ListSegment *(*create_segment_t)(const ListSegmentFunctions &functions,
|
37
|
+
typedef ListSegment *(*create_segment_t)(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
|
38
38
|
uint16_t capacity);
|
39
|
-
typedef void (*write_data_to_segment_t)(const ListSegmentFunctions &functions,
|
39
|
+
typedef void (*write_data_to_segment_t)(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
|
40
40
|
ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count);
|
41
41
|
typedef void (*read_data_from_segment_t)(const ListSegmentFunctions &functions, const ListSegment *segment,
|
42
42
|
Vector &result, idx_t &total_count);
|
43
43
|
typedef ListSegment *(*copy_data_from_segment_t)(const ListSegmentFunctions &functions, const ListSegment *source,
|
44
|
-
|
45
|
-
typedef void (*destroy_segment_t)(const ListSegmentFunctions &functions, ListSegment *segment, Allocator &allocator);
|
44
|
+
ArenaAllocator &allocator);
|
46
45
|
|
47
46
|
struct ListSegmentFunctions {
|
48
47
|
create_segment_t create_segment;
|
49
48
|
write_data_to_segment_t write_data;
|
50
49
|
read_data_from_segment_t read_data;
|
51
50
|
copy_data_from_segment_t copy_data;
|
52
|
-
destroy_segment_t destroy;
|
53
51
|
vector<ListSegmentFunctions> child_functions;
|
54
52
|
|
55
|
-
void AppendRow(
|
53
|
+
void AppendRow(ArenaAllocator &allocator, LinkedList &linked_list, Vector &input, idx_t &entry_idx,
|
54
|
+
idx_t &count) const;
|
56
55
|
void BuildListVector(const LinkedList &linked_list, Vector &result, idx_t &initial_total_count) const;
|
57
|
-
void CopyLinkedList(const LinkedList &source_list, LinkedList &target_list,
|
58
|
-
void Destroy(Allocator &allocator, LinkedList &linked_list) const;
|
56
|
+
void CopyLinkedList(const LinkedList &source_list, LinkedList &target_list, ArenaAllocator &allocator) const;
|
59
57
|
};
|
60
58
|
|
61
59
|
void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type);
|
@@ -48,7 +48,6 @@ struct PartitionTupleDataAllocators {
|
|
48
48
|
//! partitioning, e.g., radix, hive
|
49
49
|
class PartitionedTupleData {
|
50
50
|
public:
|
51
|
-
unique_ptr<PartitionedTupleData> CreateShared();
|
52
51
|
virtual ~PartitionedTupleData();
|
53
52
|
|
54
53
|
public:
|
@@ -124,7 +123,11 @@ protected:
|
|
124
123
|
void BuildBufferSpace(PartitionedTupleDataAppendState &state);
|
125
124
|
//! Create a collection for a specific a partition
|
126
125
|
unique_ptr<TupleDataCollection> CreatePartitionCollection(idx_t partition_index) const {
|
127
|
-
|
126
|
+
if (allocators) {
|
127
|
+
return make_uniq<TupleDataCollection>(allocators->allocators[partition_index]);
|
128
|
+
} else {
|
129
|
+
return make_uniq<TupleDataCollection>(buffer_manager, layout);
|
130
|
+
}
|
128
131
|
}
|
129
132
|
|
130
133
|
protected:
|
@@ -17,6 +17,7 @@ namespace duckdb {
|
|
17
17
|
class TupleDataAllocator;
|
18
18
|
struct TupleDataScatterFunction;
|
19
19
|
struct TupleDataGatherFunction;
|
20
|
+
struct RowOperationsState;
|
20
21
|
|
21
22
|
typedef void (*tuple_data_scatter_function_t)(const Vector &source, const TupleDataVectorFormat &source_format,
|
22
23
|
const SelectionVector &append_sel, const idx_t append_count,
|
@@ -38,6 +38,9 @@ struct CombinedListData {
|
|
38
38
|
};
|
39
39
|
|
40
40
|
struct TupleDataVectorFormat {
|
41
|
+
const SelectionVector *original_sel;
|
42
|
+
SelectionVector original_owned_sel;
|
43
|
+
|
41
44
|
UnifiedVectorFormat data;
|
42
45
|
vector<TupleDataVectorFormat> child_formats;
|
43
46
|
unique_ptr<CombinedListData> combined_list_data;
|
@@ -83,6 +83,10 @@ public:
|
|
83
83
|
return value.pointer.prefix;
|
84
84
|
}
|
85
85
|
|
86
|
+
char *GetPrefixWriteable() const {
|
87
|
+
return (char *)value.pointer.prefix;
|
88
|
+
}
|
89
|
+
|
86
90
|
idx_t GetSize() const {
|
87
91
|
return value.inlined.length;
|
88
92
|
}
|
@@ -95,6 +99,11 @@ public:
|
|
95
99
|
return GetString();
|
96
100
|
}
|
97
101
|
|
102
|
+
char *GetPointer() const {
|
103
|
+
D_ASSERT(!IsInlined());
|
104
|
+
return value.pointer.ptr;
|
105
|
+
}
|
106
|
+
|
98
107
|
void SetPointer(char *new_ptr) {
|
99
108
|
D_ASSERT(!IsInlined());
|
100
109
|
value.pointer.ptr = new_ptr;
|
@@ -394,6 +394,7 @@ struct ListValue {
|
|
394
394
|
struct UnionValue {
|
395
395
|
DUCKDB_API static const Value &GetValue(const Value &value);
|
396
396
|
DUCKDB_API static uint8_t GetTag(const Value &value);
|
397
|
+
DUCKDB_API static const LogicalType &GetType(const Value &value);
|
397
398
|
};
|
398
399
|
|
399
400
|
//! Return the internal integral value for any type that is stored as an integral value internally
|
@@ -27,19 +27,6 @@ class Vector;
|
|
27
27
|
class ClientContext;
|
28
28
|
class FieldWriter;
|
29
29
|
|
30
|
-
//! Extra Type Info Type
|
31
|
-
enum class ExtraTypeInfoType : uint8_t {
|
32
|
-
INVALID_TYPE_INFO = 0,
|
33
|
-
GENERIC_TYPE_INFO = 1,
|
34
|
-
DECIMAL_TYPE_INFO = 2,
|
35
|
-
STRING_TYPE_INFO = 3,
|
36
|
-
LIST_TYPE_INFO = 4,
|
37
|
-
STRUCT_TYPE_INFO = 5,
|
38
|
-
ENUM_TYPE_INFO = 6,
|
39
|
-
USER_TYPE_INFO = 7,
|
40
|
-
AGGREGATE_STATE_TYPE_INFO = 8
|
41
|
-
};
|
42
|
-
|
43
30
|
struct string_t;
|
44
31
|
|
45
32
|
template <class T>
|
@@ -328,8 +315,6 @@ struct LogicalType {
|
|
328
315
|
|
329
316
|
DUCKDB_API static LogicalType MaxLogicalType(const LogicalType &left, const LogicalType &right);
|
330
317
|
|
331
|
-
DUCKDB_API static ExtraTypeInfoType GetExtraTypeInfoType(const ExtraTypeInfo &type);
|
332
|
-
|
333
318
|
//! Gets the decimal properties of a numeric type. Fails if the type is not numeric.
|
334
319
|
DUCKDB_API bool GetDecimalProperties(uint8_t &width, uint8_t &scale) const;
|
335
320
|
|
@@ -480,6 +465,7 @@ bool ApproxEqual(float l, float r);
|
|
480
465
|
bool ApproxEqual(double l, double r);
|
481
466
|
|
482
467
|
struct aggregate_state_t {
|
468
|
+
aggregate_state_t() {}
|
483
469
|
aggregate_state_t(string function_name_p, LogicalType return_type_p, vector<LogicalType> bound_argument_types_p) : function_name(std::move(function_name_p)), return_type(std::move(return_type_p)), bound_argument_types(std::move(bound_argument_types_p)) {
|
484
470
|
}
|
485
471
|
|