duckdb 0.8.2-dev1.0 → 0.8.2-dev1182.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +16 -14
- package/binding.gyp.in +1 -1
- package/configure.py +1 -1
- package/duckdb_extension_config.cmake +10 -0
- package/lib/duckdb.d.ts +59 -0
- package/lib/duckdb.js +21 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
- package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
- package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
- package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
- package/src/duckdb/extension/icu/{icu-extension.cpp → icu_extension.cpp} +29 -34
- package/src/duckdb/extension/icu/include/{icu-extension.hpp → icu_extension.hpp} +2 -2
- package/src/duckdb/extension/json/include/json_common.hpp +47 -231
- package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
- package/src/duckdb/extension/json/include/{json-extension.hpp → json_extension.hpp} +2 -2
- package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
- package/src/duckdb/extension/json/json_common.cpp +272 -40
- package/src/duckdb/extension/json/{json-extension.cpp → json_extension.cpp} +4 -4
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
- package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +24 -24
- package/src/duckdb/extension/json/json_scan.cpp +3 -6
- package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
- package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
- package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
- package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
- package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
- package/src/duckdb/extension/parquet/{parquet-extension.cpp → parquet_extension.cpp} +190 -19
- package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -5
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
- package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
- package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
- package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
- package/src/duckdb/src/common/allocator.cpp +16 -4
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -10
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +0 -12
- package/src/duckdb/src/common/assert.cpp +3 -0
- package/src/duckdb/src/common/enum_util.cpp +42 -5
- package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
- package/src/duckdb/src/common/file_system.cpp +15 -0
- package/src/duckdb/src/common/local_file_system.cpp +1 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +181 -18
- package/src/duckdb/src/common/radix_partitioning.cpp +27 -9
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
- package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +9 -0
- package/src/duckdb/src/common/types/list_segment.cpp +24 -74
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
- package/src/duckdb/src/common/types/vector.cpp +15 -14
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +6 -4
- package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
- package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
- package/src/duckdb/src/core_functions/function_list.cpp +4 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
- package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +0 -17
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
- package/src/duckdb/src/execution/index/art/art.cpp +149 -139
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
- package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
- package/src/duckdb/src/execution/index/art/node.cpp +113 -120
- package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
- package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
- package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
- package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
- package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
- package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +413 -282
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
- package/src/duckdb/src/execution/physical_operator.cpp +17 -14
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
- package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
- package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
- package/src/duckdb/src/function/function.cpp +2 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
- package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
- package/src/duckdb/src/function/table/read_csv.cpp +100 -17
- package/src/duckdb/src/function/table/system/test_all_types.cpp +38 -18
- package/src/duckdb/src/function/table/table_scan.cpp +9 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
- package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +24 -6
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +21 -1
- package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
- package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
- package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
- package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
- package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
- package/src/duckdb/src/include/duckdb.h +28 -0
- package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
- package/src/duckdb/src/main/config.cpp +2 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +106 -99
- package/src/duckdb/src/main/settings/settings.cpp +40 -18
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
- package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
- package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
- package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
- package/src/duckdb/src/parallel/executor.cpp +15 -0
- package/src/duckdb/src/parallel/pipeline_executor.cpp +7 -6
- package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
- package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
- package/src/duckdb/src/parser/query_node/cte_node.cpp +75 -0
- package/src/duckdb/src/parser/query_node.cpp +18 -1
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -0
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
- package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
- package/src/duckdb/src/parser/transformer.cpp +15 -0
- package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
- package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
- package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
- package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
- package/src/duckdb/src/planner/binder.cpp +5 -0
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
- package/src/duckdb/src/planner/expression_binder.cpp +3 -0
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
- package/src/duckdb/src/planner/logical_operator.cpp +5 -0
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
- package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
- package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
- package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
- package/src/duckdb/src/storage/data_table.cpp +1 -1
- package/src/duckdb/src/storage/local_storage.cpp +3 -3
- package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
- package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +7 -2
- package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
- package/src/duckdb/src/storage/table/row_group.cpp +25 -9
- package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
- package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
- package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
- package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
- package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
- package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
- package/src/duckdb/ub_src_planner_operator.cpp +4 -0
- package/src/duckdb_node.hpp +1 -0
- package/src/statement.cpp +103 -4
- package/test/columns.test.ts +243 -0
- package/test/test_all_types.test.ts +233 -0
- package/tsconfig.json +1 -0
- package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
- package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
- /package/src/duckdb/extension/parquet/include/{parquet-extension.hpp → parquet_extension.hpp} +0 -0
@@ -333,7 +333,7 @@ struct StandardValueCopy : public BaseValueCopy<T> {
|
|
333
333
|
|
334
334
|
struct StringValueCopy : public BaseValueCopy<string_t> {
|
335
335
|
static string_t Operation(ColumnDataMetaData &meta_data, string_t input) {
|
336
|
-
return input.IsInlined() ? input : meta_data.segment.heap
|
336
|
+
return input.IsInlined() ? input : meta_data.segment.heap->AddBlob(input);
|
337
337
|
}
|
338
338
|
};
|
339
339
|
|
@@ -423,7 +423,8 @@ void ColumnDataCopy<string_t>(ColumnDataMetaData &meta_data, const UnifiedVector
|
|
423
423
|
idx_t offset, idx_t copy_count) {
|
424
424
|
|
425
425
|
const auto &allocator_type = meta_data.segment.allocator->GetType();
|
426
|
-
if (allocator_type == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR
|
426
|
+
if (allocator_type == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR ||
|
427
|
+
allocator_type == ColumnDataAllocatorType::HYBRID) {
|
427
428
|
// strings cannot be spilled to disk - use StringHeap
|
428
429
|
TemplatedColumnDataCopy<StringValueCopy>(meta_data, source_data, source, offset, copy_count);
|
429
430
|
return;
|
@@ -930,6 +931,7 @@ void ColumnDataCollection::Verify() {
|
|
930
931
|
#endif
|
931
932
|
}
|
932
933
|
|
934
|
+
// LCOV_EXCL_START
|
933
935
|
string ColumnDataCollection::ToString() const {
|
934
936
|
DataChunk chunk;
|
935
937
|
InitializeScanChunk(chunk);
|
@@ -950,6 +952,7 @@ string ColumnDataCollection::ToString() const {
|
|
950
952
|
|
951
953
|
return result;
|
952
954
|
}
|
955
|
+
// LCOV_EXCL_STOP
|
953
956
|
|
954
957
|
void ColumnDataCollection::Print() const {
|
955
958
|
Printer::Print(ToString());
|
@@ -1030,6 +1033,18 @@ bool ColumnDataCollection::ResultEquals(const ColumnDataCollection &left, const
|
|
1030
1033
|
return true;
|
1031
1034
|
}
|
1032
1035
|
|
1036
|
+
vector<shared_ptr<StringHeap>> ColumnDataCollection::GetHeapReferences() {
|
1037
|
+
vector<shared_ptr<StringHeap>> result(segments.size(), nullptr);
|
1038
|
+
for (idx_t segment_idx = 0; segment_idx < segments.size(); segment_idx++) {
|
1039
|
+
result[segment_idx] = segments[segment_idx]->heap;
|
1040
|
+
}
|
1041
|
+
return result;
|
1042
|
+
}
|
1043
|
+
|
1044
|
+
ColumnDataAllocatorType ColumnDataCollection::GetAllocatorType() const {
|
1045
|
+
return allocator->GetType();
|
1046
|
+
}
|
1047
|
+
|
1033
1048
|
const vector<unique_ptr<ColumnDataCollectionSegment>> &ColumnDataCollection::GetSegments() const {
|
1034
1049
|
return segments;
|
1035
1050
|
}
|
@@ -6,7 +6,8 @@ namespace duckdb {
|
|
6
6
|
|
7
7
|
ColumnDataCollectionSegment::ColumnDataCollectionSegment(shared_ptr<ColumnDataAllocator> allocator_p,
|
8
8
|
vector<LogicalType> types_p)
|
9
|
-
: allocator(std::move(allocator_p)), types(std::move(types_p)), count(0),
|
9
|
+
: allocator(std::move(allocator_p)), types(std::move(types_p)), count(0),
|
10
|
+
heap(make_shared<StringHeap>(allocator->GetAllocator())) {
|
10
11
|
}
|
11
12
|
|
12
13
|
idx_t ColumnDataCollectionSegment::GetDataSize(idx_t type_size) {
|
@@ -26,7 +27,8 @@ VectorDataIndex ColumnDataCollectionSegment::AllocateVectorInternal(const Logica
|
|
26
27
|
auto type_size = internal_type == PhysicalType::STRUCT ? 0 : GetTypeIdSize(internal_type);
|
27
28
|
allocator->AllocateData(GetDataSize(type_size) + ValidityMask::STANDARD_MASK_SIZE, meta_data.block_id,
|
28
29
|
meta_data.offset, chunk_state);
|
29
|
-
if (allocator->GetType() == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR
|
30
|
+
if (allocator->GetType() == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR ||
|
31
|
+
allocator->GetType() == ColumnDataAllocatorType::HYBRID) {
|
30
32
|
chunk_meta.block_ids.insert(meta_data.block_id);
|
31
33
|
}
|
32
34
|
|
@@ -203,10 +205,17 @@ idx_t ColumnDataCollectionSegment::ReadVector(ChunkManagementState &state, Vecto
|
|
203
205
|
}
|
204
206
|
} else if (internal_type == PhysicalType::VARCHAR) {
|
205
207
|
if (allocator->GetType() == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR) {
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
208
|
+
auto next_index = vector_index;
|
209
|
+
idx_t offset = 0;
|
210
|
+
while (next_index.IsValid()) {
|
211
|
+
auto ¤t_vdata = GetVectorData(next_index);
|
212
|
+
for (auto &swizzle_segment : current_vdata.swizzle_data) {
|
213
|
+
auto &string_heap_segment = GetVectorData(swizzle_segment.child_index);
|
214
|
+
allocator->UnswizzlePointers(state, result, offset + swizzle_segment.offset, swizzle_segment.count,
|
215
|
+
string_heap_segment.block_id, string_heap_segment.offset);
|
216
|
+
}
|
217
|
+
offset += current_vdata.count;
|
218
|
+
next_index = current_vdata.next_data;
|
210
219
|
}
|
211
220
|
}
|
212
221
|
if (state.properties == ColumnDataScanProperties::DISALLOW_ZERO_COPY) {
|
@@ -32,13 +32,13 @@ PartitionedColumnData::~PartitionedColumnData() {
|
|
32
32
|
|
33
33
|
void PartitionedColumnData::InitializeAppendState(PartitionedColumnDataAppendState &state) const {
|
34
34
|
state.partition_sel.Initialize();
|
35
|
-
state.slice_chunk.Initialize(context, types);
|
35
|
+
state.slice_chunk.Initialize(BufferAllocator::Get(context), types);
|
36
36
|
InitializeAppendStateInternal(state);
|
37
37
|
}
|
38
38
|
|
39
39
|
unique_ptr<DataChunk> PartitionedColumnData::CreatePartitionBuffer() const {
|
40
40
|
auto result = make_uniq<DataChunk>();
|
41
|
-
result->Initialize(
|
41
|
+
result->Initialize(BufferAllocator::Get(context), types, BufferSize());
|
42
42
|
return result;
|
43
43
|
}
|
44
44
|
|
@@ -309,7 +309,7 @@ void DataChunk::Hash(Vector &result) {
|
|
309
309
|
|
310
310
|
void DataChunk::Hash(vector<idx_t> &column_ids, Vector &result) {
|
311
311
|
D_ASSERT(result.GetType().id() == LogicalType::HASH);
|
312
|
-
D_ASSERT(column_ids.
|
312
|
+
D_ASSERT(!column_ids.empty());
|
313
313
|
|
314
314
|
VectorOperations::Hash(data[column_ids[0]], result, size());
|
315
315
|
for (idx_t i = 1; i < column_ids.size(); i++) {
|
@@ -327,7 +327,7 @@ void DataChunk::Verify() {
|
|
327
327
|
#endif
|
328
328
|
}
|
329
329
|
|
330
|
-
void DataChunk::Print() {
|
330
|
+
void DataChunk::Print() const {
|
331
331
|
Printer::Print(ToString());
|
332
332
|
}
|
333
333
|
|
@@ -441,6 +441,15 @@ int64_t Date::EpochMicroseconds(date_t date) {
|
|
441
441
|
return result;
|
442
442
|
}
|
443
443
|
|
444
|
+
int64_t Date::EpochMilliseconds(date_t date) {
|
445
|
+
int64_t result;
|
446
|
+
const auto MILLIS_PER_DAY = Interval::MICROS_PER_DAY / Interval::MICROS_PER_MSEC;
|
447
|
+
if (!TryMultiplyOperator::Operation<int64_t, int64_t, int64_t>(date.days, MILLIS_PER_DAY, result)) {
|
448
|
+
throw ConversionException("Could not convert DATE (%s) to milliseconds", Date::ToString(date));
|
449
|
+
}
|
450
|
+
return result;
|
451
|
+
}
|
452
|
+
|
444
453
|
int32_t Date::ExtractYear(date_t d, int32_t *last_year) {
|
445
454
|
auto n = d.days;
|
446
455
|
// cached look up: check if year of this date is the same as the last one we looked up
|
@@ -12,8 +12,8 @@ static idx_t GetAllocationSize(uint16_t capacity) {
|
|
12
12
|
}
|
13
13
|
|
14
14
|
template <class T>
|
15
|
-
static data_ptr_t AllocatePrimitiveData(
|
16
|
-
return allocator.
|
15
|
+
static data_ptr_t AllocatePrimitiveData(ArenaAllocator &allocator, uint16_t capacity) {
|
16
|
+
return allocator.Allocate(GetAllocationSize<T>(capacity));
|
17
17
|
}
|
18
18
|
|
19
19
|
template <class T>
|
@@ -34,8 +34,8 @@ static idx_t GetAllocationSizeList(uint16_t capacity) {
|
|
34
34
|
return AlignValue(sizeof(ListSegment) + capacity * (sizeof(bool) + sizeof(uint64_t)) + sizeof(LinkedList));
|
35
35
|
}
|
36
36
|
|
37
|
-
static data_ptr_t AllocateListData(
|
38
|
-
return allocator.
|
37
|
+
static data_ptr_t AllocateListData(ArenaAllocator &allocator, uint16_t capacity) {
|
38
|
+
return allocator.Allocate(GetAllocationSizeList(capacity));
|
39
39
|
}
|
40
40
|
|
41
41
|
static uint64_t *GetListLengthData(ListSegment *segment) {
|
@@ -65,8 +65,8 @@ static idx_t GetAllocationSizeStruct(uint16_t capacity, idx_t child_count) {
|
|
65
65
|
return AlignValue(sizeof(ListSegment) + capacity * sizeof(bool) + child_count * sizeof(ListSegment *));
|
66
66
|
}
|
67
67
|
|
68
|
-
static data_ptr_t AllocateStructData(
|
69
|
-
return allocator.
|
68
|
+
static data_ptr_t AllocateStructData(ArenaAllocator &allocator, uint16_t capacity, idx_t child_count) {
|
69
|
+
return allocator.Allocate(GetAllocationSizeStruct(capacity, child_count));
|
70
70
|
}
|
71
71
|
|
72
72
|
static ListSegment **GetStructData(ListSegment *segment) {
|
@@ -98,20 +98,8 @@ static uint16_t GetCapacityForNewSegment(uint16_t capacity) {
|
|
98
98
|
//===--------------------------------------------------------------------===//
|
99
99
|
// Create & Destroy
|
100
100
|
//===--------------------------------------------------------------------===//
|
101
|
-
static void DestroyLinkedList(const ListSegmentFunctions &functions, Allocator &allocator, LinkedList &list) {
|
102
|
-
auto segment = list.first_segment;
|
103
|
-
while (segment) {
|
104
|
-
auto next_segment = segment->next;
|
105
|
-
functions.destroy(functions, segment, allocator);
|
106
|
-
segment = next_segment;
|
107
|
-
}
|
108
|
-
list.first_segment = nullptr;
|
109
|
-
list.last_segment = nullptr;
|
110
|
-
list.total_capacity = 0;
|
111
|
-
}
|
112
|
-
|
113
101
|
template <class T>
|
114
|
-
static ListSegment *CreatePrimitiveSegment(const ListSegmentFunctions &,
|
102
|
+
static ListSegment *CreatePrimitiveSegment(const ListSegmentFunctions &, ArenaAllocator &allocator, uint16_t capacity) {
|
115
103
|
// allocate data and set the header
|
116
104
|
auto segment = (ListSegment *)AllocatePrimitiveData<T>(allocator, capacity);
|
117
105
|
segment->capacity = capacity;
|
@@ -120,13 +108,7 @@ static ListSegment *CreatePrimitiveSegment(const ListSegmentFunctions &, Allocat
|
|
120
108
|
return segment;
|
121
109
|
}
|
122
110
|
|
123
|
-
|
124
|
-
void DestroyPrimitiveSegment(const ListSegmentFunctions &, ListSegment *segment, Allocator &allocator) {
|
125
|
-
D_ASSERT(segment);
|
126
|
-
allocator.FreeData(data_ptr_cast(segment), GetAllocationSize<T>(segment->capacity));
|
127
|
-
}
|
128
|
-
|
129
|
-
static ListSegment *CreateListSegment(const ListSegmentFunctions &, Allocator &allocator, uint16_t capacity) {
|
111
|
+
static ListSegment *CreateListSegment(const ListSegmentFunctions &, ArenaAllocator &allocator, uint16_t capacity) {
|
130
112
|
// allocate data and set the header
|
131
113
|
auto segment = reinterpret_cast<ListSegment *>(AllocateListData(allocator, capacity));
|
132
114
|
segment->capacity = capacity;
|
@@ -141,16 +123,7 @@ static ListSegment *CreateListSegment(const ListSegmentFunctions &, Allocator &a
|
|
141
123
|
return segment;
|
142
124
|
}
|
143
125
|
|
144
|
-
|
145
|
-
// destroy the child list
|
146
|
-
auto linked_child_list = Load<LinkedList>(data_ptr_cast(GetListChildData(segment)));
|
147
|
-
DestroyLinkedList(functions.child_functions[0], allocator, linked_child_list);
|
148
|
-
|
149
|
-
// destroy the list segment itself
|
150
|
-
allocator.FreeData(data_ptr_cast(segment), GetAllocationSizeList(segment->capacity));
|
151
|
-
}
|
152
|
-
|
153
|
-
static ListSegment *CreateStructSegment(const ListSegmentFunctions &functions, Allocator &allocator,
|
126
|
+
static ListSegment *CreateStructSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
|
154
127
|
uint16_t capacity) {
|
155
128
|
// allocate data and set header
|
156
129
|
auto segment =
|
@@ -170,21 +143,8 @@ static ListSegment *CreateStructSegment(const ListSegmentFunctions &functions, A
|
|
170
143
|
return segment;
|
171
144
|
}
|
172
145
|
|
173
|
-
|
174
|
-
|
175
|
-
auto child_segments = GetStructData(segment);
|
176
|
-
for (idx_t i = 0; i < functions.child_functions.size(); i++) {
|
177
|
-
auto child_function = functions.child_functions[i];
|
178
|
-
auto child_segment = Load<ListSegment *>(data_ptr_cast(child_segments + i));
|
179
|
-
child_function.destroy(child_function, child_segment, allocator);
|
180
|
-
}
|
181
|
-
|
182
|
-
// destroy the struct segment itself
|
183
|
-
allocator.FreeData(data_ptr_cast(segment),
|
184
|
-
GetAllocationSizeStruct(segment->capacity, functions.child_functions.size()));
|
185
|
-
}
|
186
|
-
|
187
|
-
static ListSegment *GetSegment(const ListSegmentFunctions &functions, Allocator &allocator, LinkedList &linked_list) {
|
146
|
+
static ListSegment *GetSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
|
147
|
+
LinkedList &linked_list) {
|
188
148
|
ListSegment *segment;
|
189
149
|
|
190
150
|
// determine segment
|
@@ -214,7 +174,7 @@ static ListSegment *GetSegment(const ListSegmentFunctions &functions, Allocator
|
|
214
174
|
// Append
|
215
175
|
//===--------------------------------------------------------------------===//
|
216
176
|
template <class T>
|
217
|
-
static void WriteDataToPrimitiveSegment(const ListSegmentFunctions &functions,
|
177
|
+
static void WriteDataToPrimitiveSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
|
218
178
|
ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count) {
|
219
179
|
|
220
180
|
// get the vector data and the source index of the entry that we want to write
|
@@ -232,8 +192,8 @@ static void WriteDataToPrimitiveSegment(const ListSegmentFunctions &functions, A
|
|
232
192
|
}
|
233
193
|
}
|
234
194
|
|
235
|
-
static void WriteDataToVarcharSegment(const ListSegmentFunctions &functions,
|
236
|
-
Vector &input, idx_t &entry_idx, idx_t &count) {
|
195
|
+
static void WriteDataToVarcharSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
|
196
|
+
ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count) {
|
237
197
|
|
238
198
|
// get the vector data and the source index of the entry that we want to write
|
239
199
|
auto input_data = FlatVector::GetData<string_t>(input);
|
@@ -275,8 +235,8 @@ static void WriteDataToVarcharSegment(const ListSegmentFunctions &functions, All
|
|
275
235
|
Store<LinkedList>(child_segments, data_ptr_cast(GetListChildData(segment)));
|
276
236
|
}
|
277
237
|
|
278
|
-
static void WriteDataToListSegment(const ListSegmentFunctions &functions,
|
279
|
-
Vector &input, idx_t &entry_idx, idx_t &count) {
|
238
|
+
static void WriteDataToListSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
|
239
|
+
ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count) {
|
280
240
|
|
281
241
|
// get the vector data and the source index of the entry that we want to write
|
282
242
|
auto input_data = FlatVector::GetData<list_entry_t>(input);
|
@@ -315,8 +275,8 @@ static void WriteDataToListSegment(const ListSegmentFunctions &functions, Alloca
|
|
315
275
|
Store<uint64_t>(list_length, data_ptr_cast(list_length_data + segment->count));
|
316
276
|
}
|
317
277
|
|
318
|
-
static void WriteDataToStructSegment(const ListSegmentFunctions &functions,
|
319
|
-
Vector &input, idx_t &entry_idx, idx_t &count) {
|
278
|
+
static void WriteDataToStructSegment(const ListSegmentFunctions &functions, ArenaAllocator &allocator,
|
279
|
+
ListSegment *segment, Vector &input, idx_t &entry_idx, idx_t &count) {
|
320
280
|
|
321
281
|
// write null validity
|
322
282
|
auto null_mask = GetNullMask(segment);
|
@@ -338,8 +298,8 @@ static void WriteDataToStructSegment(const ListSegmentFunctions &functions, Allo
|
|
338
298
|
}
|
339
299
|
}
|
340
300
|
|
341
|
-
void ListSegmentFunctions::AppendRow(
|
342
|
-
idx_t &count) const {
|
301
|
+
void ListSegmentFunctions::AppendRow(ArenaAllocator &allocator, LinkedList &linked_list, Vector &input,
|
302
|
+
idx_t &entry_idx, idx_t &count) const {
|
343
303
|
|
344
304
|
D_ASSERT(input.GetVectorType() == VectorType::FLAT_VECTOR);
|
345
305
|
auto &write_data_to_segment = *this;
|
@@ -503,7 +463,7 @@ void ListSegmentFunctions::BuildListVector(const LinkedList &linked_list, Vector
|
|
503
463
|
//===--------------------------------------------------------------------===//
|
504
464
|
template <class T>
|
505
465
|
static ListSegment *CopyDataFromPrimitiveSegment(const ListSegmentFunctions &, const ListSegment *source,
|
506
|
-
|
466
|
+
ArenaAllocator &allocator) {
|
507
467
|
|
508
468
|
auto target = (ListSegment *)AllocatePrimitiveData<T>(allocator, source->capacity);
|
509
469
|
memcpy(target, source, sizeof(ListSegment) + source->capacity * (sizeof(bool) + sizeof(T)));
|
@@ -512,7 +472,7 @@ static ListSegment *CopyDataFromPrimitiveSegment(const ListSegmentFunctions &, c
|
|
512
472
|
}
|
513
473
|
|
514
474
|
static ListSegment *CopyDataFromListSegment(const ListSegmentFunctions &functions, const ListSegment *source,
|
515
|
-
|
475
|
+
ArenaAllocator &allocator) {
|
516
476
|
|
517
477
|
// create an empty linked list for the child vector of target
|
518
478
|
auto source_linked_child_list = Load<LinkedList>(const_data_ptr_cast(GetListChildData(source)));
|
@@ -538,7 +498,7 @@ static ListSegment *CopyDataFromListSegment(const ListSegmentFunctions &function
|
|
538
498
|
}
|
539
499
|
|
540
500
|
static ListSegment *CopyDataFromStructSegment(const ListSegmentFunctions &functions, const ListSegment *source,
|
541
|
-
|
501
|
+
ArenaAllocator &allocator) {
|
542
502
|
|
543
503
|
auto source_child_count = functions.child_functions.size();
|
544
504
|
auto target = reinterpret_cast<ListSegment *>(AllocateStructData(allocator, source->capacity, source_child_count));
|
@@ -560,7 +520,7 @@ static ListSegment *CopyDataFromStructSegment(const ListSegmentFunctions &functi
|
|
560
520
|
}
|
561
521
|
|
562
522
|
void ListSegmentFunctions::CopyLinkedList(const LinkedList &source_list, LinkedList &target_list,
|
563
|
-
|
523
|
+
ArenaAllocator &allocator) const {
|
564
524
|
auto ©_data_from_segment = *this;
|
565
525
|
auto source_segment = source_list.first_segment;
|
566
526
|
|
@@ -578,12 +538,6 @@ void ListSegmentFunctions::CopyLinkedList(const LinkedList &source_list, LinkedL
|
|
578
538
|
}
|
579
539
|
}
|
580
540
|
|
581
|
-
//===--------------------------------------------------------------------===//
|
582
|
-
// Destroy
|
583
|
-
//===--------------------------------------------------------------------===//
|
584
|
-
void ListSegmentFunctions::Destroy(Allocator &allocator, LinkedList &linked_list) const {
|
585
|
-
DestroyLinkedList(*this, allocator, linked_list);
|
586
|
-
}
|
587
541
|
//===--------------------------------------------------------------------===//
|
588
542
|
// Functions
|
589
543
|
//===--------------------------------------------------------------------===//
|
@@ -593,7 +547,6 @@ void SegmentPrimitiveFunction(ListSegmentFunctions &functions) {
|
|
593
547
|
functions.write_data = WriteDataToPrimitiveSegment<T>;
|
594
548
|
functions.read_data = ReadDataFromPrimitiveSegment<T>;
|
595
549
|
functions.copy_data = CopyDataFromPrimitiveSegment<T>;
|
596
|
-
functions.destroy = DestroyPrimitiveSegment<T>;
|
597
550
|
}
|
598
551
|
|
599
552
|
void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type) {
|
@@ -645,7 +598,6 @@ void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType
|
|
645
598
|
functions.write_data = WriteDataToVarcharSegment;
|
646
599
|
functions.read_data = ReadDataFromVarcharSegment;
|
647
600
|
functions.copy_data = CopyDataFromListSegment;
|
648
|
-
functions.destroy = DestroyListSegment;
|
649
601
|
|
650
602
|
functions.child_functions.emplace_back();
|
651
603
|
SegmentPrimitiveFunction<char>(functions.child_functions.back());
|
@@ -656,7 +608,6 @@ void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType
|
|
656
608
|
functions.write_data = WriteDataToListSegment;
|
657
609
|
functions.read_data = ReadDataFromListSegment;
|
658
610
|
functions.copy_data = CopyDataFromListSegment;
|
659
|
-
functions.destroy = DestroyListSegment;
|
660
611
|
|
661
612
|
// recurse
|
662
613
|
functions.child_functions.emplace_back();
|
@@ -668,7 +619,6 @@ void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType
|
|
668
619
|
functions.write_data = WriteDataToStructSegment;
|
669
620
|
functions.read_data = ReadDataFromStructSegment;
|
670
621
|
functions.copy_data = CopyDataFromStructSegment;
|
671
|
-
functions.destroy = DestroyStructSegment;
|
672
622
|
|
673
623
|
// recurse
|
674
624
|
auto child_types = StructType::GetChildTypes(type);
|
@@ -16,15 +16,6 @@ PartitionedTupleData::PartitionedTupleData(const PartitionedTupleData &other)
|
|
16
16
|
: type(other.type), buffer_manager(other.buffer_manager), layout(other.layout.Copy()) {
|
17
17
|
}
|
18
18
|
|
19
|
-
unique_ptr<PartitionedTupleData> PartitionedTupleData::CreateShared() {
|
20
|
-
switch (type) {
|
21
|
-
case PartitionedTupleDataType::RADIX:
|
22
|
-
return make_uniq<RadixPartitionedTupleData>(Cast<RadixPartitionedTupleData>());
|
23
|
-
default:
|
24
|
-
throw NotImplementedException("CreateShared for this type of PartitionedTupleData");
|
25
|
-
}
|
26
|
-
}
|
27
|
-
|
28
19
|
PartitionedTupleData::~PartitionedTupleData() {
|
29
20
|
}
|
30
21
|
|
@@ -233,6 +224,9 @@ void PartitionedTupleData::Combine(PartitionedTupleData &other) {
|
|
233
224
|
}
|
234
225
|
|
235
226
|
void PartitionedTupleData::Partition(TupleDataCollection &source, TupleDataPinProperties properties) {
|
227
|
+
if (source.Count() == 0) {
|
228
|
+
return;
|
229
|
+
}
|
236
230
|
#ifdef DEBUG
|
237
231
|
const auto count_before = source.Count();
|
238
232
|
#endif
|
@@ -220,6 +220,8 @@ void TupleDataCollection::AppendUnified(TupleDataPinState &pin_state, TupleDataC
|
|
220
220
|
|
221
221
|
static inline void ToUnifiedFormatInternal(TupleDataVectorFormat &format, Vector &vector, const idx_t count) {
|
222
222
|
vector.ToUnifiedFormat(count, format.data);
|
223
|
+
format.original_sel = format.data.sel;
|
224
|
+
format.original_owned_sel.Initialize(format.data.owned_sel);
|
223
225
|
switch (vector.GetType().InternalType()) {
|
224
226
|
case PhysicalType::STRUCT: {
|
225
227
|
auto &entries = StructVector::GetEntries(vector);
|
@@ -296,7 +296,7 @@ static void ApplySliceRecursive(const Vector &source_v, TupleDataVectorFormat &s
|
|
296
296
|
D_ASSERT(source_format.combined_list_data);
|
297
297
|
auto &combined_list_data = *source_format.combined_list_data;
|
298
298
|
|
299
|
-
combined_list_data.selection_data = source_format.
|
299
|
+
combined_list_data.selection_data = source_format.original_sel->Slice(combined_sel, count);
|
300
300
|
source_format.data.owned_sel.Initialize(combined_list_data.selection_data);
|
301
301
|
source_format.data.sel = &source_format.data.owned_sel;
|
302
302
|
|
@@ -376,8 +376,8 @@ void TupleDataCollection::ListWithinListComputeHeapSizes(Vector &heap_sizes_v, c
|
|
376
376
|
for (idx_t i = 0; i < child_list_child_count; i++) {
|
377
377
|
combined_sel.set_index(i, 0);
|
378
378
|
}
|
379
|
-
idx_t combined_list_offset = 0;
|
380
379
|
|
380
|
+
idx_t combined_list_offset = 0;
|
381
381
|
for (idx_t i = 0; i < append_count; i++) {
|
382
382
|
const auto list_idx = list_sel.get_index(append_sel.get_index(i));
|
383
383
|
if (!list_validity.RowIsValid(list_idx)) {
|
@@ -96,6 +96,39 @@ void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset,
|
|
96
96
|
memcpy(target_validity + target_offset_entries, source_validity + source_offset_entries,
|
97
97
|
sizeof(validity_t) * EntryCount(count));
|
98
98
|
return;
|
99
|
+
} else if (IsAligned(target_offset)) {
|
100
|
+
// Simple common case where we are shifting into an aligned mask (e.g., 0 in Slice above)
|
101
|
+
const idx_t entire_units = count / BITS_PER_VALUE;
|
102
|
+
const idx_t ragged = count % BITS_PER_VALUE;
|
103
|
+
const idx_t tail = source_offset % BITS_PER_VALUE;
|
104
|
+
const idx_t head = BITS_PER_VALUE - tail;
|
105
|
+
auto source_validity = other.GetData() + (source_offset / BITS_PER_VALUE);
|
106
|
+
auto target_validity = this->GetData() + (target_offset / BITS_PER_VALUE);
|
107
|
+
auto src_entry = *source_validity++;
|
108
|
+
for (idx_t i = 0; i < entire_units; ++i) {
|
109
|
+
// Start with head of previous src
|
110
|
+
validity_t tgt_entry = src_entry >> tail;
|
111
|
+
src_entry = *source_validity++;
|
112
|
+
// Add in tail of current src
|
113
|
+
tgt_entry |= (src_entry << head);
|
114
|
+
*target_validity++ = tgt_entry;
|
115
|
+
}
|
116
|
+
// Finish last ragged entry
|
117
|
+
if (ragged) {
|
118
|
+
// Start with head of previous src
|
119
|
+
validity_t tgt_entry = (src_entry >> tail);
|
120
|
+
// Add in the tail of the next src, if head was too small
|
121
|
+
if (head < ragged) {
|
122
|
+
src_entry = *source_validity++;
|
123
|
+
tgt_entry |= (src_entry << head);
|
124
|
+
}
|
125
|
+
// Mask off the bits that go past the ragged end
|
126
|
+
tgt_entry &= (ValidityBuffer::MAX_ENTRY >> (BITS_PER_VALUE - ragged));
|
127
|
+
// Restore the ragged end of the target
|
128
|
+
tgt_entry |= *target_validity & (ValidityBuffer::MAX_ENTRY << ragged);
|
129
|
+
*target_validity++ = tgt_entry;
|
130
|
+
}
|
131
|
+
return;
|
99
132
|
}
|
100
133
|
|
101
134
|
// FIXME: use bitwise operations here
|
@@ -855,38 +855,39 @@ void Vector::Flatten(const SelectionVector &sel, idx_t count) {
|
|
855
855
|
}
|
856
856
|
}
|
857
857
|
|
858
|
-
void Vector::ToUnifiedFormat(idx_t count, UnifiedVectorFormat &
|
858
|
+
void Vector::ToUnifiedFormat(idx_t count, UnifiedVectorFormat &format) {
|
859
859
|
switch (GetVectorType()) {
|
860
860
|
case VectorType::DICTIONARY_VECTOR: {
|
861
861
|
auto &sel = DictionaryVector::SelVector(*this);
|
862
|
+
format.owned_sel.Initialize(sel);
|
863
|
+
format.sel = &format.owned_sel;
|
864
|
+
|
862
865
|
auto &child = DictionaryVector::Child(*this);
|
863
866
|
if (child.GetVectorType() == VectorType::FLAT_VECTOR) {
|
864
|
-
data
|
865
|
-
|
866
|
-
data.validity = FlatVector::Validity(child);
|
867
|
+
format.data = FlatVector::GetData(child);
|
868
|
+
format.validity = FlatVector::Validity(child);
|
867
869
|
} else {
|
868
|
-
// dictionary with non-flat child: create a new reference to the child and
|
870
|
+
// dictionary with non-flat child: create a new reference to the child and flatten it
|
869
871
|
Vector child_vector(child);
|
870
872
|
child_vector.Flatten(sel, count);
|
871
873
|
auto new_aux = make_buffer<VectorChildBuffer>(std::move(child_vector));
|
872
874
|
|
873
|
-
data
|
874
|
-
|
875
|
-
data.validity = FlatVector::Validity(new_aux->data);
|
875
|
+
format.data = FlatVector::GetData(new_aux->data);
|
876
|
+
format.validity = FlatVector::Validity(new_aux->data);
|
876
877
|
this->auxiliary = std::move(new_aux);
|
877
878
|
}
|
878
879
|
break;
|
879
880
|
}
|
880
881
|
case VectorType::CONSTANT_VECTOR:
|
881
|
-
|
882
|
-
|
883
|
-
|
882
|
+
format.sel = ConstantVector::ZeroSelectionVector(count, format.owned_sel);
|
883
|
+
format.data = ConstantVector::GetData(*this);
|
884
|
+
format.validity = ConstantVector::Validity(*this);
|
884
885
|
break;
|
885
886
|
default:
|
886
887
|
Flatten(count);
|
887
|
-
|
888
|
-
|
889
|
-
|
888
|
+
format.sel = FlatVector::IncrementalSelectionVector();
|
889
|
+
format.data = FlatVector::GetData(*this);
|
890
|
+
format.validity = FlatVector::Validity(*this);
|
890
891
|
break;
|
891
892
|
}
|
892
893
|
}
|
@@ -564,10 +564,12 @@ static idx_t DistinctSelectList(Vector &left, Vector &right, idx_t count, const
|
|
564
564
|
SelectionVector lcursor(count);
|
565
565
|
SelectionVector rcursor(count);
|
566
566
|
|
567
|
-
ListVector::GetEntry(left)
|
568
|
-
ListVector::GetEntry(right)
|
569
|
-
|
570
|
-
|
567
|
+
Vector lentry_flattened(ListVector::GetEntry(left));
|
568
|
+
Vector rentry_flattened(ListVector::GetEntry(right));
|
569
|
+
lentry_flattened.Flatten(ListVector::GetListSize(left));
|
570
|
+
rentry_flattened.Flatten(ListVector::GetListSize(right));
|
571
|
+
Vector lchild(lentry_flattened, lcursor, count);
|
572
|
+
Vector rchild(rentry_flattened, rcursor, count);
|
571
573
|
|
572
574
|
// To perform the positional comparison, we use a vectorisation of the following algorithm:
|
573
575
|
// bool CompareLists(T *left, idx_t nleft, T *right, nright) {
|
@@ -63,9 +63,9 @@ struct ListFunction {
|
|
63
63
|
|
64
64
|
template <class STATE>
|
65
65
|
static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
|
66
|
-
|
67
|
-
list_bind_data.functions.Destroy(aggr_input_data.allocator, state.linked_list);
|
66
|
+
// nop
|
68
67
|
}
|
68
|
+
|
69
69
|
static bool IgnoreNull() {
|
70
70
|
return false;
|
71
71
|
}
|
@@ -37,16 +37,16 @@ struct RegrAvgFunction {
|
|
37
37
|
};
|
38
38
|
struct RegrAvgXFunction : RegrAvgFunction {
|
39
39
|
template <class A_TYPE, class B_TYPE, class STATE, class OP>
|
40
|
-
static void Operation(STATE &state, const A_TYPE &
|
41
|
-
state.sum +=
|
40
|
+
static void Operation(STATE &state, const A_TYPE &y, const B_TYPE &x, AggregateBinaryInput &idata) {
|
41
|
+
state.sum += x;
|
42
42
|
state.count++;
|
43
43
|
}
|
44
44
|
};
|
45
45
|
|
46
46
|
struct RegrAvgYFunction : RegrAvgFunction {
|
47
47
|
template <class A_TYPE, class B_TYPE, class STATE, class OP>
|
48
|
-
static void Operation(STATE &state, const A_TYPE &
|
49
|
-
state.sum +=
|
48
|
+
static void Operation(STATE &state, const A_TYPE &y, const B_TYPE &x, AggregateBinaryInput &idata) {
|
49
|
+
state.sum += y;
|
50
50
|
state.count++;
|
51
51
|
}
|
52
52
|
};
|
@@ -23,11 +23,11 @@ struct RegrInterceptOperation {
|
|
23
23
|
}
|
24
24
|
|
25
25
|
template <class A_TYPE, class B_TYPE, class STATE, class OP>
|
26
|
-
static void Operation(STATE &state, const A_TYPE &
|
26
|
+
static void Operation(STATE &state, const A_TYPE &y, const B_TYPE &x, AggregateBinaryInput &idata) {
|
27
27
|
state.count++;
|
28
|
-
state.sum_x +=
|
29
|
-
state.sum_y +=
|
30
|
-
RegrSlopeOperation::Operation<A_TYPE, B_TYPE, RegrSlopeState, OP>(state.slope,
|
28
|
+
state.sum_x += x;
|
29
|
+
state.sum_y += y;
|
30
|
+
RegrSlopeOperation::Operation<A_TYPE, B_TYPE, RegrSlopeState, OP>(state.slope, y, x, idata);
|
31
31
|
}
|
32
32
|
|
33
33
|
template <class STATE, class OP>
|
@@ -1,3 +1,4 @@
|
|
1
|
+
// REGR_R2(y, x)
|
1
2
|
// Returns the coefficient of determination for non-null pairs in a group.
|
2
3
|
// It is computed for non-null pairs using the following formula:
|
3
4
|
// null if var_pop(x) = 0, else
|
@@ -24,10 +25,10 @@ struct RegrR2Operation {
|
|
24
25
|
}
|
25
26
|
|
26
27
|
template <class A_TYPE, class B_TYPE, class STATE, class OP>
|
27
|
-
static void Operation(STATE &state, const A_TYPE &
|
28
|
-
CorrOperation::Operation<A_TYPE, B_TYPE, CorrState, OP>(state.corr,
|
29
|
-
STDDevBaseOperation::Execute<A_TYPE, StddevState>(state.var_pop_x,
|
30
|
-
STDDevBaseOperation::Execute<A_TYPE, StddevState>(state.var_pop_y,
|
28
|
+
static void Operation(STATE &state, const A_TYPE &y, const B_TYPE &x, AggregateBinaryInput &idata) {
|
29
|
+
CorrOperation::Operation<A_TYPE, B_TYPE, CorrState, OP>(state.corr, y, x, idata);
|
30
|
+
STDDevBaseOperation::Execute<A_TYPE, StddevState>(state.var_pop_x, x);
|
31
|
+
STDDevBaseOperation::Execute<A_TYPE, StddevState>(state.var_pop_y, y);
|
31
32
|
}
|
32
33
|
|
33
34
|
template <class STATE, class OP>
|