duckdb 1.0.1-dev22.0 → 1.0.1-dev27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +1 -1
- package/binding.gyp +41 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-dateadd.cpp +4 -2
- package/src/duckdb/extension/icu/icu-datefunc.cpp +6 -2
- package/src/duckdb/extension/icu/icu-datesub.cpp +13 -2
- package/src/duckdb/extension/icu/icu-strptime.cpp +6 -6
- package/src/duckdb/extension/icu/icu-table-range.cpp +92 -73
- package/src/duckdb/extension/icu/icu-timebucket.cpp +12 -2
- package/src/duckdb/extension/icu/icu-timezone.cpp +3 -3
- package/src/duckdb/extension/icu/icu_extension.cpp +61 -9
- package/src/duckdb/extension/json/include/json_executors.hpp +20 -23
- package/src/duckdb/extension/json/include/json_functions.hpp +4 -0
- package/src/duckdb/extension/json/include/json_scan.hpp +6 -2
- package/src/duckdb/extension/json/include/json_structure.hpp +12 -9
- package/src/duckdb/extension/json/json_common.cpp +66 -10
- package/src/duckdb/extension/json/json_extension.cpp +13 -5
- package/src/duckdb/extension/json/json_functions/json_array_length.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -4
- package/src/duckdb/extension/json/json_functions/json_exists.cpp +32 -0
- package/src/duckdb/extension/json/json_functions/json_extract.cpp +2 -2
- package/src/duckdb/extension/json/json_functions/json_keys.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_pretty.cpp +32 -0
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +5 -1
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +305 -94
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_type.cpp +3 -3
- package/src/duckdb/extension/json/json_functions/json_value.cpp +42 -0
- package/src/duckdb/extension/json/json_functions/read_json.cpp +16 -2
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -2
- package/src/duckdb/extension/json/json_functions.cpp +5 -1
- package/src/duckdb/extension/json/json_scan.cpp +13 -12
- package/src/duckdb/extension/json/serialize_json.cpp +5 -3
- package/src/duckdb/extension/parquet/column_reader.cpp +206 -43
- package/src/duckdb/extension/parquet/column_writer.cpp +133 -62
- package/src/duckdb/extension/parquet/geo_parquet.cpp +391 -0
- package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +16 -5
- package/src/duckdb/extension/parquet/include/column_reader.hpp +37 -12
- package/src/duckdb/extension/parquet/include/column_writer.hpp +10 -11
- package/src/duckdb/extension/parquet/include/expression_column_reader.hpp +52 -0
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +139 -0
- package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +13 -8
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -0
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +7 -3
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +55 -8
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +3 -3
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +8 -0
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +21 -7
- package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +33 -11
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +5 -2
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +48 -14
- package/src/duckdb/extension/parquet/parquet_crypto.cpp +109 -61
- package/src/duckdb/extension/parquet/parquet_extension.cpp +305 -72
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -4
- package/src/duckdb/extension/parquet/parquet_reader.cpp +151 -40
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +50 -16
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +42 -1
- package/src/duckdb/extension/parquet/parquet_writer.cpp +67 -75
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +3 -1
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +5 -1
- package/src/duckdb/src/catalog/catalog.cpp +14 -16
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +14 -11
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +39 -19
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +92 -78
- package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +10 -2
- package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +10 -3
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +3 -3
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +7 -7
- package/src/duckdb/src/catalog/catalog_entry.cpp +6 -3
- package/src/duckdb/src/catalog/catalog_set.cpp +14 -19
- package/src/duckdb/src/catalog/default/default_functions.cpp +179 -166
- package/src/duckdb/src/catalog/default/default_generator.cpp +24 -0
- package/src/duckdb/src/catalog/default/default_schemas.cpp +4 -3
- package/src/duckdb/src/catalog/default/default_table_functions.cpp +148 -0
- package/src/duckdb/src/catalog/default/default_views.cpp +7 -3
- package/src/duckdb/src/catalog/duck_catalog.cpp +7 -1
- package/src/duckdb/src/common/adbc/adbc.cpp +120 -58
- package/src/duckdb/src/common/allocator.cpp +71 -6
- package/src/duckdb/src/common/arrow/appender/bool_data.cpp +8 -7
- package/src/duckdb/src/common/arrow/appender/fixed_size_list_data.cpp +1 -1
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +4 -5
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +55 -21
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +85 -10
- package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +142 -0
- package/src/duckdb/src/common/arrow/arrow_query_result.cpp +56 -0
- package/src/duckdb/src/common/arrow/physical_arrow_batch_collector.cpp +37 -0
- package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +128 -0
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +101 -0
- package/src/duckdb/src/common/cgroups.cpp +189 -0
- package/src/duckdb/src/common/compressed_file_system.cpp +6 -3
- package/src/duckdb/src/common/encryption_state.cpp +38 -0
- package/src/duckdb/src/common/enum_util.cpp +682 -14
- package/src/duckdb/src/common/enums/file_compression_type.cpp +24 -0
- package/src/duckdb/src/common/enums/metric_type.cpp +208 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +8 -2
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/enums/relation_type.cpp +4 -0
- package/src/duckdb/src/common/enums/statement_type.cpp +15 -0
- package/src/duckdb/src/common/error_data.cpp +22 -20
- package/src/duckdb/src/common/exception/binder_exception.cpp +5 -0
- package/src/duckdb/src/common/exception.cpp +11 -1
- package/src/duckdb/src/common/extra_type_info.cpp +3 -0
- package/src/duckdb/src/common/file_buffer.cpp +1 -1
- package/src/duckdb/src/common/file_system.cpp +25 -3
- package/src/duckdb/src/common/filename_pattern.cpp +1 -0
- package/src/duckdb/src/common/fsst.cpp +15 -14
- package/src/duckdb/src/common/gzip_file_system.cpp +3 -1
- package/src/duckdb/src/common/hive_partitioning.cpp +103 -43
- package/src/duckdb/src/common/http_util.cpp +25 -0
- package/src/duckdb/src/common/local_file_system.cpp +48 -27
- package/src/duckdb/src/common/multi_file_list.cpp +113 -22
- package/src/duckdb/src/common/multi_file_reader.cpp +59 -58
- package/src/duckdb/src/common/operator/cast_operators.cpp +133 -34
- package/src/duckdb/src/common/operator/string_cast.cpp +42 -11
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +2 -2
- package/src/duckdb/src/common/progress_bar/terminal_progress_bar_display.cpp +1 -1
- package/src/duckdb/src/common/radix_partitioning.cpp +31 -21
- package/src/duckdb/src/common/random_engine.cpp +4 -0
- package/src/duckdb/src/common/re2_regex.cpp +47 -12
- package/src/duckdb/src/common/render_tree.cpp +243 -0
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +58 -5
- package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +79 -43
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +6 -4
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +18 -9
- package/src/duckdb/src/common/serializer/memory_stream.cpp +1 -0
- package/src/duckdb/src/common/sort/partition_state.cpp +33 -18
- package/src/duckdb/src/common/sort/radix_sort.cpp +22 -15
- package/src/duckdb/src/common/sort/sort_state.cpp +19 -16
- package/src/duckdb/src/common/sort/sorted_block.cpp +11 -10
- package/src/duckdb/src/common/string_util.cpp +167 -10
- package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +108 -0
- package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +267 -0
- package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +116 -0
- package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +482 -0
- package/src/duckdb/src/common/tree_renderer/tree_renderer.cpp +12 -0
- package/src/duckdb/src/common/tree_renderer.cpp +16 -508
- package/src/duckdb/src/common/types/batched_data_collection.cpp +78 -9
- package/src/duckdb/src/common/types/bit.cpp +24 -22
- package/src/duckdb/src/common/types/blob.cpp +15 -11
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -9
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +3 -4
- package/src/duckdb/src/common/types/column/column_data_consumer.cpp +2 -2
- package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +70 -21
- package/src/duckdb/src/common/types/data_chunk.cpp +10 -1
- package/src/duckdb/src/common/types/date.cpp +8 -19
- package/src/duckdb/src/common/types/decimal.cpp +3 -2
- package/src/duckdb/src/common/types/hugeint.cpp +11 -3
- package/src/duckdb/src/common/types/hyperloglog.cpp +212 -227
- package/src/duckdb/src/common/types/interval.cpp +1 -1
- package/src/duckdb/src/common/types/list_segment.cpp +83 -49
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +22 -83
- package/src/duckdb/src/common/types/row/row_data_collection.cpp +2 -2
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +20 -4
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +28 -7
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +29 -14
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +152 -102
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +4 -1
- package/src/duckdb/src/common/types/selection_vector.cpp +17 -1
- package/src/duckdb/src/common/types/time.cpp +62 -31
- package/src/duckdb/src/common/types/timestamp.cpp +70 -12
- package/src/duckdb/src/common/types/uuid.cpp +1 -1
- package/src/duckdb/src/common/types/validity_mask.cpp +40 -5
- package/src/duckdb/src/common/types/value.cpp +50 -8
- package/src/duckdb/src/common/types/varint.cpp +295 -0
- package/src/duckdb/src/common/types/vector.cpp +165 -54
- package/src/duckdb/src/common/types/vector_buffer.cpp +5 -4
- package/src/duckdb/src/common/types.cpp +106 -26
- package/src/duckdb/src/common/vector_operations/vector_copy.cpp +13 -25
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +6 -0
- package/src/duckdb/src/common/virtual_file_system.cpp +3 -3
- package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +35 -82
- package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +283 -46
- package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +4 -4
- package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +3 -2
- package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +226 -338
- package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +2 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +388 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +63 -21
- package/src/duckdb/src/core_functions/aggregate/holistic/mad.cpp +330 -0
- package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +136 -97
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +601 -1485
- package/src/duckdb/src/core_functions/aggregate/nested/binned_histogram.cpp +405 -0
- package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +136 -165
- package/src/duckdb/src/core_functions/function_list.cpp +35 -8
- package/src/duckdb/src/core_functions/lambda_functions.cpp +5 -7
- package/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +172 -198
- package/src/duckdb/src/core_functions/scalar/blob/create_sort_key.cpp +341 -54
- package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +89 -29
- package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +133 -71
- package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/generic/can_implicitly_cast.cpp +40 -0
- package/src/duckdb/src/core_functions/scalar/generic/error.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/generic/least.cpp +161 -58
- package/src/duckdb/src/core_functions/scalar/generic/typeof.cpp +13 -0
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +59 -75
- package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +93 -40
- package/src/duckdb/src/core_functions/scalar/list/list_has_any_or_all.cpp +227 -0
- package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +20 -19
- package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +0 -2
- package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +106 -8
- package/src/duckdb/src/core_functions/scalar/map/map_contains.cpp +56 -0
- package/src/duckdb/src/core_functions/scalar/map/map_extract.cpp +73 -118
- package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +98 -2
- package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +1 -2
- package/src/duckdb/src/core_functions/scalar/random/setseed.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/string/bar.cpp +1 -1
- package/src/duckdb/src/core_functions/scalar/string/hex.cpp +5 -1
- package/src/duckdb/src/core_functions/scalar/string/md5.cpp +10 -37
- package/src/duckdb/src/core_functions/scalar/string/printf.cpp +18 -2
- package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +45 -0
- package/src/duckdb/src/core_functions/scalar/string/reverse.cpp +4 -5
- package/src/duckdb/src/core_functions/scalar/string/sha1.cpp +35 -0
- package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +5 -2
- package/src/duckdb/src/core_functions/scalar/string/url_encode.cpp +49 -0
- package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +1 -2
- package/src/duckdb/src/core_functions/scalar/union/union_extract.cpp +4 -2
- package/src/duckdb/src/execution/adaptive_filter.cpp +30 -11
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +13 -18
- package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +4 -9
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +683 -670
- package/src/duckdb/src/execution/index/art/art_key.cpp +121 -38
- package/src/duckdb/src/execution/index/art/base_leaf.cpp +168 -0
- package/src/duckdb/src/execution/index/art/base_node.cpp +163 -0
- package/src/duckdb/src/execution/index/art/iterator.cpp +148 -77
- package/src/duckdb/src/execution/index/art/leaf.cpp +159 -263
- package/src/duckdb/src/execution/index/art/node.cpp +493 -247
- package/src/duckdb/src/execution/index/art/node256.cpp +31 -91
- package/src/duckdb/src/execution/index/art/node256_leaf.cpp +71 -0
- package/src/duckdb/src/execution/index/art/node48.cpp +75 -143
- package/src/duckdb/src/execution/index/art/prefix.cpp +424 -244
- package/src/duckdb/src/execution/index/bound_index.cpp +7 -1
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +22 -18
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +22 -73
- package/src/duckdb/src/execution/join_hashtable.cpp +637 -179
- package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +15 -10
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +13 -8
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +525 -132
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +147 -138
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +531 -312
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -3
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +9 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +13 -17
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +60 -16
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +105 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +24 -24
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +25 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +275 -112
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +106 -11
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +253 -115
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +93 -52
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +116 -76
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +29 -14
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +70 -26
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +81 -60
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +88 -50
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +161 -51
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +59 -17
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +5 -5
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +0 -21
- package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +109 -0
- package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +5 -13
- package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_load.cpp +12 -4
- package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +0 -16
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +4 -2
- package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +5 -0
- package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +3 -1
- package/src/duckdb/src/execution/operator/helper/physical_set_variable.cpp +39 -0
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +4 -2
- package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +16 -5
- package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +5 -4
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +59 -21
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +7 -4
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +333 -176
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +57 -34
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +16 -8
- package/src/duckdb/src/execution/operator/join/physical_left_delim_join.cpp +10 -4
- package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +2 -5
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +3 -3
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +5 -5
- package/src/duckdb/src/execution/operator/join/physical_right_delim_join.cpp +7 -2
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +17 -12
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +12 -9
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +35 -17
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +17 -11
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +5 -1
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +156 -47
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +10 -2
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +1 -3
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -2
- package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +13 -6
- package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +22 -3
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +19 -3
- package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +37 -22
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +77 -21
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +27 -55
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +41 -44
- package/src/duckdb/src/execution/operator/set/physical_cte.cpp +4 -6
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +4 -6
- package/src/duckdb/src/execution/operator/set/physical_union.cpp +18 -4
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +3 -2
- package/src/duckdb/src/execution/physical_operator.cpp +45 -4
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -7
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +8 -3
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +13 -6
- package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +3 -3
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +111 -19
- package/src/duckdb/src/execution/physical_plan/plan_limit.cpp +19 -2
- package/src/duckdb/src/execution/physical_plan/plan_set.cpp +9 -0
- package/src/duckdb/src/execution/physical_plan/plan_window.cpp +3 -1
- package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -3
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +49 -49
- package/src/duckdb/src/execution/reservoir_sample.cpp +2 -2
- package/src/duckdb/src/execution/window_executor.cpp +556 -318
- package/src/duckdb/src/execution/window_segment_tree.cpp +1058 -485
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +5 -5
- package/src/duckdb/src/function/aggregate/distributive/first.cpp +92 -95
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +10 -9
- package/src/duckdb/src/function/aggregate_function.cpp +8 -0
- package/src/duckdb/src/function/cast/cast_function_set.cpp +10 -1
- package/src/duckdb/src/function/cast/decimal_cast.cpp +10 -1
- package/src/duckdb/src/function/cast/default_casts.cpp +2 -0
- package/src/duckdb/src/function/cast/numeric_casts.cpp +3 -0
- package/src/duckdb/src/function/cast/string_cast.cpp +8 -5
- package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
- package/src/duckdb/src/function/cast/union_casts.cpp +1 -1
- package/src/duckdb/src/function/cast/varint_casts.cpp +283 -0
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -1
- package/src/duckdb/src/function/cast_rules.cpp +104 -15
- package/src/duckdb/src/function/compression_config.cpp +35 -33
- package/src/duckdb/src/function/copy_function.cpp +27 -0
- package/src/duckdb/src/function/function_binder.cpp +39 -11
- package/src/duckdb/src/function/macro_function.cpp +75 -32
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +10 -0
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -0
- package/src/duckdb/src/function/scalar/generic/binning.cpp +507 -0
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +58 -0
- package/src/duckdb/src/function/scalar/generic_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +33 -47
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +70 -143
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +93 -84
- package/src/duckdb/src/function/scalar/list/list_zip.cpp +3 -0
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +24 -11
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +4 -4
- package/src/duckdb/src/function/scalar/strftime_format.cpp +196 -57
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +9 -7
- package/src/duckdb/src/function/scalar/string/concat.cpp +239 -123
- package/src/duckdb/src/function/scalar/string/concat_ws.cpp +149 -0
- package/src/duckdb/src/function/scalar/string/contains.cpp +18 -7
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/substring.cpp +6 -11
- package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +7 -3
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
- package/src/duckdb/src/function/scalar_function.cpp +5 -2
- package/src/duckdb/src/function/scalar_macro_function.cpp +2 -2
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +20 -39
- package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +135 -0
- package/src/duckdb/src/function/table/arrow.cpp +194 -52
- package/src/duckdb/src/function/table/arrow_conversion.cpp +212 -69
- package/src/duckdb/src/function/table/copy_csv.cpp +43 -14
- package/src/duckdb/src/function/table/query_function.cpp +80 -0
- package/src/duckdb/src/function/table/range.cpp +222 -142
- package/src/duckdb/src/function/table/read_csv.cpp +25 -13
- package/src/duckdb/src/function/table/sniff_csv.cpp +55 -35
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +141 -129
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +25 -14
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +20 -14
- package/src/duckdb/src/function/table/system/duckdb_indexes.cpp +15 -1
- package/src/duckdb/src/function/table/system/duckdb_variables.cpp +84 -0
- package/src/duckdb/src/function/table/system/test_all_types.cpp +1 -0
- package/src/duckdb/src/function/table/system/test_vector_types.cpp +33 -3
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/table_scan.cpp +45 -22
- package/src/duckdb/src/function/table/unnest.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
- package/src/duckdb/src/function/table_function.cpp +5 -4
- package/src/duckdb/src/function/table_macro_function.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +8 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +5 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +3 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -4
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +5 -5
- package/src/duckdb/src/include/duckdb/catalog/default/builtin_types/types.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +4 -5
- package/src/duckdb/src/include/duckdb/catalog/default/default_generator.hpp +4 -5
- package/src/duckdb/src/include/duckdb/catalog/default/default_schemas.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/default/default_table_functions.hpp +47 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +9 -1
- package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +120 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +37 -11
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +9 -8
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +6 -4
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +92 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +26 -4
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +90 -11
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_buffer.hpp +8 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_merge_event.hpp +62 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +52 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +42 -0
- package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_batch_collector.hpp +30 -0
- package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_collector.hpp +65 -0
- package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +43 -0
- package/src/duckdb/src/include/duckdb/common/bswap.hpp +18 -16
- package/src/duckdb/src/include/duckdb/common/cgroups.hpp +30 -0
- package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/dl.hpp +8 -1
- package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +48 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +88 -0
- package/src/duckdb/src/include/duckdb/common/enums/checkpoint_type.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/copy_overwrite_mode.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/enums/destroy_buffer_upon.hpp +21 -0
- package/src/duckdb/src/include/duckdb/common/enums/explain_format.hpp +17 -0
- package/src/duckdb/src/include/duckdb/common/enums/file_compression_type.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +88 -0
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enums/set_scope.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +23 -2
- package/src/duckdb/src/include/duckdb/common/enums/stream_execution_result.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/error_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +20 -2
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +12 -0
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/file_open_flags.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +160 -96
- package/src/duckdb/src/include/duckdb/common/fsst.hpp +9 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -8
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +16 -7
- package/src/duckdb/src/include/duckdb/common/http_util.hpp +19 -0
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +19 -6
- package/src/duckdb/src/include/duckdb/common/limits.hpp +9 -2
- package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +38 -6
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +9 -2
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +82 -50
- package/src/duckdb/src/include/duckdb/common/operator/abs.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +23 -1
- package/src/duckdb/src/include/duckdb/common/operator/double_cast_operator.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +10 -5
- package/src/duckdb/src/include/duckdb/common/optionally_owned_ptr.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/owning_string_map.hpp +155 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -3
- package/src/duckdb/src/include/duckdb/common/platform.hpp +58 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +172 -27
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/random_engine.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/render_tree.hpp +77 -0
- package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +12 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +6 -2
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +5 -3
- package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +15 -7
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +245 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +10 -11
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +12 -6
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +37 -7
- package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +44 -0
- package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +44 -0
- package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +44 -0
- package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +119 -0
- package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +9 -123
- package/src/duckdb/src/include/duckdb/common/type_visitor.hpp +96 -0
- package/src/duckdb/src/include/duckdb/common/typedefs.hpp +11 -1
- package/src/duckdb/src/include/duckdb/common/types/arrow_string_view_type.hpp +84 -0
- package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +36 -1
- package/src/duckdb/src/include/duckdb/common/types/bit.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +4 -2
- package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +52 -0
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +0 -3
- package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +65 -0
- package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +49 -40
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +4 -3
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/types/time.hpp +11 -6
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +13 -3
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +103 -12
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +12 -3
- package/src/duckdb/src/include/duckdb/common/types/varint.hpp +107 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/types.hpp +6 -39
- package/src/duckdb/src/include/duckdb/common/union_by_name.hpp +42 -10
- package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +29 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +0 -7
- package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/winapi.hpp +8 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +8 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/stddev.hpp +8 -4
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/histogram_helpers.hpp +99 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +16 -7
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +396 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +10 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_helpers.hpp +65 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp +349 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_state.hpp +300 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/sort_key_helpers.hpp +55 -0
- package/src/duckdb/src/include/duckdb/core_functions/array_kernels.hpp +107 -0
- package/src/duckdb/src/include/duckdb/core_functions/create_sort_key.hpp +55 -0
- package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +1 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/array_functions.hpp +24 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +80 -8
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +54 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +30 -21
- package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +25 -14
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +2 -48
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +25 -2
- package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +102 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +94 -101
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +43 -25
- package/src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp +109 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp +140 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +43 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +41 -52
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +133 -74
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +46 -29
- package/src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp +53 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +52 -35
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +96 -57
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +9 -4
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +48 -10
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +114 -36
- package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +158 -67
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/aggregate_object.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp +19 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +81 -23
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +27 -8
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +31 -22
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +48 -5
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +7 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +22 -12
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +35 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +81 -39
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +18 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +9 -7
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/header_value.hpp +26 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +6 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +36 -19
- package/src/duckdb/src/include/duckdb/execution/operator/filter/physical_filter.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +53 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_collector.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +6 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +18 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +6 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set_variable.hpp +43 -0
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +59 -0
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_comparison_join.hpp +8 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +5 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_left_delim_join.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +4 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_right_delim_join.hpp +3 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_memory_manager.hpp +5 -37
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +8 -2
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +9 -3
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +8 -6
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +21 -6
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +137 -110
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +57 -126
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +21 -4
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +10 -10
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +37 -7
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +24 -11
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +41 -1
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +15 -5
- package/src/duckdb/src/include/duckdb/function/pragma/pragma_functions.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/replacement_scan.hpp +20 -4
- package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +6 -0
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +77 -109
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +6 -3
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +25 -12
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +9 -8
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +38 -4
- package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -57
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +142 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_datetime_type.hpp +18 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +7 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_variable_size_type.hpp +10 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/range.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -1
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +5 -5
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +14 -2
- package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/appender.hpp +14 -4
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +25 -7
- package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +79 -0
- package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +10 -20
- package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +11 -12
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/capi/cast/generic.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +809 -0
- package/src/duckdb/src/include/duckdb/main/chunk_scan_state/batched_data_collection.hpp +35 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +68 -2
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +30 -22
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +79 -1
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +9 -3
- package/src/duckdb/src/include/duckdb/main/config.hpp +55 -7
- package/src/duckdb/src/include/duckdb/main/connection.hpp +5 -1
- package/src/duckdb/src/include/duckdb/main/database.hpp +16 -5
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +9 -8
- package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +21 -6
- package/src/duckdb/src/include/duckdb/main/extension.hpp +20 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -0
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +29 -23
- package/src/duckdb/src/include/duckdb/main/extension_install_info.hpp +6 -0
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +5 -6
- package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +2 -5
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +87 -0
- package/src/duckdb/src/include/duckdb/main/profiling_node.hpp +60 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +72 -34
- package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/relation/delim_get_relation.hpp +30 -0
- package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/materialized_relation.hpp +1 -4
- package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/relation/read_json_relation.hpp +6 -0
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/relation.hpp +7 -4
- package/src/duckdb/src/include/duckdb/main/secret/default_secrets.hpp +36 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +108 -0
- package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +14 -4
- package/src/duckdb/src/include/duckdb/main/settings.hpp +227 -3
- package/src/duckdb/src/include/duckdb/main/stream_query_result.hpp +8 -0
- package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +51 -0
- package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/cte_filter_pusher.hpp +46 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +31 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +51 -10
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +17 -5
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +15 -13
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +9 -4
- package/src/duckdb/src/include/duckdb/optimizer/limit_pushdown.hpp +25 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/rule/join_dependent_filter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +6 -1
- package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +54 -2
- package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +27 -8
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline_prepare_finish_event.hpp +25 -0
- package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +63 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +10 -1
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +11 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +9 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_expression_iterator.hpp +13 -6
- package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -3
- package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +2 -0
- package/src/duckdb/src/include/duckdb/parser/statement/explain_statement.hpp +5 -1
- package/src/duckdb/src/include/duckdb/parser/statement/set_statement.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/statement/transaction_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +0 -2
- package/src/duckdb/src/include/duckdb/parser/tableref/column_data_ref.hpp +9 -7
- package/src/duckdb/src/include/duckdb/parser/tableref/delimgetref.hpp +37 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +0 -2
- package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +0 -2
- package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +17 -9
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -14
- package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_expanded_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +6 -5
- package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +19 -11
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/filter/struct_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +7 -2
- package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +6 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +10 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -2
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +15 -5
- package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +24 -2
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_delimgetref.hpp +26 -0
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_subqueryref.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_table_function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/tableref/list.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +48 -3
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +21 -7
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +65 -51
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +14 -5
- package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +0 -4
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +3 -4
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +4 -4
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +6 -4
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +19 -17
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +3 -4
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +13 -11
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +19 -19
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +3 -4
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +10 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +15 -0
- package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +14 -10
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +6 -8
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +7 -4
- package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -7
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +29 -4
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +22 -7
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +15 -2
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -2
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +5 -16
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +51 -13
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +6 -3
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +29 -19
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +23 -7
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +27 -18
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +6 -3
- package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +5 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +77 -6
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +23 -11
- package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +5 -2
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +18 -4
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +89 -14
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/temporary_memory_manager.hpp +33 -15
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +9 -9
- package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +3 -1
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +4 -16
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +27 -4
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +11 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +6 -2
- package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +5 -5
- package/src/duckdb/src/include/duckdb/transaction/transaction_context.hpp +6 -2
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +5 -3
- package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +48 -0
- package/src/duckdb/src/include/duckdb.h +1779 -739
- package/src/duckdb/src/include/duckdb_extension.h +921 -0
- package/src/duckdb/src/main/appender.cpp +53 -7
- package/src/duckdb/src/main/attached_database.cpp +87 -17
- package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +226 -0
- package/src/duckdb/src/main/buffered_data/buffered_data.cpp +35 -0
- package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +48 -23
- package/src/duckdb/src/main/capi/aggregate_function-c.cpp +327 -0
- package/src/duckdb/src/main/capi/appender-c.cpp +18 -0
- package/src/duckdb/src/main/capi/cast/utils-c.cpp +2 -2
- package/src/duckdb/src/main/capi/cast_function-c.cpp +210 -0
- package/src/duckdb/src/main/capi/config-c.cpp +3 -3
- package/src/duckdb/src/main/capi/data_chunk-c.cpp +18 -7
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +223 -24
- package/src/duckdb/src/main/capi/helper-c.cpp +51 -11
- package/src/duckdb/src/main/capi/logical_types-c.cpp +105 -46
- package/src/duckdb/src/main/capi/pending-c.cpp +7 -6
- package/src/duckdb/src/main/capi/prepared-c.cpp +18 -7
- package/src/duckdb/src/main/capi/profiling_info-c.cpp +84 -0
- package/src/duckdb/src/main/capi/result-c.cpp +139 -37
- package/src/duckdb/src/main/capi/scalar_function-c.cpp +269 -0
- package/src/duckdb/src/main/capi/table_description-c.cpp +82 -0
- package/src/duckdb/src/main/capi/table_function-c.cpp +161 -95
- package/src/duckdb/src/main/capi/value-c.cpp +2 -2
- package/src/duckdb/src/main/chunk_scan_state/batched_data_collection.cpp +57 -0
- package/src/duckdb/src/main/client_config.cpp +17 -0
- package/src/duckdb/src/main/client_context.cpp +67 -52
- package/src/duckdb/src/main/client_data.cpp +3 -3
- package/src/duckdb/src/main/config.cpp +120 -62
- package/src/duckdb/src/main/connection.cpp +14 -2
- package/src/duckdb/src/main/database.cpp +96 -35
- package/src/duckdb/src/main/database_manager.cpp +25 -23
- package/src/duckdb/src/main/database_path_and_type.cpp +2 -2
- package/src/duckdb/src/main/db_instance_cache.cpp +54 -19
- package/src/duckdb/src/main/extension/extension_helper.cpp +47 -42
- package/src/duckdb/src/main/extension/extension_install.cpp +155 -87
- package/src/duckdb/src/main/extension/extension_load.cpp +180 -26
- package/src/duckdb/src/main/extension/extension_util.cpp +8 -0
- package/src/duckdb/src/main/extension.cpp +72 -5
- package/src/duckdb/src/main/pending_query_result.cpp +20 -12
- package/src/duckdb/src/main/prepared_statement.cpp +6 -6
- package/src/duckdb/src/main/prepared_statement_data.cpp +28 -17
- package/src/duckdb/src/main/profiling_info.cpp +196 -0
- package/src/duckdb/src/main/query_profiler.cpp +413 -224
- package/src/duckdb/src/main/query_result.cpp +1 -1
- package/src/duckdb/src/main/relation/create_table_relation.cpp +4 -2
- package/src/duckdb/src/main/relation/create_view_relation.cpp +0 -6
- package/src/duckdb/src/main/relation/delim_get_relation.cpp +44 -0
- package/src/duckdb/src/main/relation/explain_relation.cpp +4 -3
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -0
- package/src/duckdb/src/main/relation/limit_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/materialized_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/query_relation.cpp +42 -15
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +7 -14
- package/src/duckdb/src/main/relation/read_json_relation.cpp +20 -0
- package/src/duckdb/src/main/relation/setop_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/table_function_relation.cpp +6 -0
- package/src/duckdb/src/main/relation/view_relation.cpp +10 -0
- package/src/duckdb/src/main/relation.cpp +12 -8
- package/src/duckdb/src/main/secret/default_secrets.cpp +108 -0
- package/src/duckdb/src/main/secret/secret.cpp +145 -2
- package/src/duckdb/src/main/secret/secret_manager.cpp +85 -35
- package/src/duckdb/src/main/secret/secret_storage.cpp +29 -17
- package/src/duckdb/src/main/settings/settings.cpp +503 -11
- package/src/duckdb/src/main/stream_query_result.cpp +75 -2
- package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +248 -0
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +28 -6
- package/src/duckdb/src/optimizer/compressed_materialization/compress_comparison_join.cpp +152 -0
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +11 -1
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +3 -0
- package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +117 -0
- package/src/duckdb/src/optimizer/filter_combiner.cpp +30 -9
- package/src/duckdb/src/optimizer/filter_pullup.cpp +54 -2
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +71 -3
- package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +154 -0
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +245 -114
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +42 -20
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +6 -2
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +32 -10
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +97 -131
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +265 -51
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +21 -17
- package/src/duckdb/src/optimizer/limit_pushdown.cpp +42 -0
- package/src/duckdb/src/optimizer/optimizer.cpp +51 -8
- package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +17 -17
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +22 -4
- package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +1 -18
- package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +6 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +4 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +91 -0
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +21 -25
- package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +1 -0
- package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +3 -0
- package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +8 -2
- package/src/duckdb/src/optimizer/rule/join_dependent_filter.cpp +135 -0
- package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +1 -1
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +6 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +7 -6
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -1
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +46 -7
- package/src/duckdb/src/parallel/executor.cpp +129 -51
- package/src/duckdb/src/parallel/executor_task.cpp +16 -3
- package/src/duckdb/src/parallel/meta_pipeline.cpp +98 -29
- package/src/duckdb/src/parallel/pipeline.cpp +17 -3
- package/src/duckdb/src/parallel/pipeline_executor.cpp +14 -2
- package/src/duckdb/src/parallel/pipeline_prepare_finish_event.cpp +34 -0
- package/src/duckdb/src/parallel/task_executor.cpp +84 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +94 -16
- package/src/duckdb/src/parallel/thread_context.cpp +1 -1
- package/src/duckdb/src/parser/expression/function_expression.cpp +14 -0
- package/src/duckdb/src/parser/expression/star_expression.cpp +35 -2
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +5 -1
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +17 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +37 -28
- package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +44 -2
- package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +21 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +29 -25
- package/src/duckdb/src/parser/parser.cpp +41 -1
- package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +1 -0
- package/src/duckdb/src/parser/statement/explain_statement.cpp +28 -13
- package/src/duckdb/src/parser/statement/relation_statement.cpp +5 -0
- package/src/duckdb/src/parser/statement/set_statement.cpp +4 -2
- package/src/duckdb/src/parser/statement/transaction_statement.cpp +3 -3
- package/src/duckdb/src/parser/tableref/column_data_ref.cpp +1 -27
- package/src/duckdb/src/parser/tableref/delimgetref.cpp +30 -0
- package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +35 -29
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +32 -32
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +2 -1
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +17 -0
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +5 -0
- package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +36 -34
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +30 -14
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +2 -1
- package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +27 -19
- package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +31 -28
- package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +25 -27
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +53 -42
- package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +6 -6
- package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_explain.cpp +38 -3
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +1 -2
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_select.cpp +26 -21
- package/src/duckdb/src/parser/transform/statement/transform_set.cpp +8 -8
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +5 -2
- package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +6 -4
- package/src/duckdb/src/parser/transform/statement/transform_transaction.cpp +27 -6
- package/src/duckdb/src/parser/transform/statement/transform_update.cpp +8 -9
- package/src/duckdb/src/parser/transform/statement/transform_upsert.cpp +11 -12
- package/src/duckdb/src/parser/transform/statement/transform_vacuum.cpp +3 -3
- package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +16 -10
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +1 -1
- package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
- package/src/duckdb/src/parser/transformer.cpp +11 -7
- package/src/duckdb/src/planner/bind_context.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +22 -7
- package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +3 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +11 -4
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +9 -54
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -5
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -27
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +7 -7
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +9 -2
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +26 -7
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +5 -0
- package/src/duckdb/src/planner/binder/expression/bind_unpacked_star_expression.cpp +91 -0
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -8
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +6 -10
- package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +14 -10
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +3 -3
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +46 -7
- package/src/duckdb/src/planner/binder/statement/bind_call.cpp +13 -20
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +105 -13
- package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -3
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +75 -55
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +5 -4
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +24 -8
- package/src/duckdb/src/planner/binder/statement/bind_explain.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +5 -105
- package/src/duckdb/src/planner/binder/statement/bind_extension.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +109 -41
- package/src/duckdb/src/planner/binder/statement/bind_set.cpp +23 -7
- package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +4 -1
- package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +17 -3
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -4
- package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +8 -6
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +55 -42
- package/src/duckdb/src/planner/binder/tableref/bind_column_data_ref.cpp +3 -2
- package/src/duckdb/src/planner/binder/tableref/bind_delimgetref.cpp +16 -0
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +31 -1
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +6 -0
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +2 -0
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +106 -46
- package/src/duckdb/src/planner/binder/tableref/plan_delimgetref.cpp +11 -0
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +15 -2
- package/src/duckdb/src/planner/binder/tableref/plan_table_function.cpp +4 -0
- package/src/duckdb/src/planner/binder.cpp +172 -15
- package/src/duckdb/src/planner/collation_binding.cpp +99 -0
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -4
- package/src/duckdb/src/planner/expression/bound_between_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_case_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +14 -12
- package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_comparison_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_conjunction_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_constant_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_expanded_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -2
- package/src/duckdb/src/planner/expression/bound_lambda_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_lambdaref_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_operator_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_unnest_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +6 -6
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +7 -0
- package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +26 -22
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +7 -1
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +61 -43
- package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +3 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +10 -3
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/where_binder.cpp +9 -2
- package/src/duckdb/src/planner/expression_binder.cpp +121 -21
- package/src/duckdb/src/planner/expression_iterator.cpp +26 -1
- package/src/duckdb/src/planner/filter/conjunction_filter.cpp +33 -0
- package/src/duckdb/src/planner/filter/constant_filter.cpp +15 -0
- package/src/duckdb/src/planner/filter/null_filter.cpp +22 -0
- package/src/duckdb/src/planner/filter/struct_filter.cpp +16 -0
- package/src/duckdb/src/planner/logical_operator.cpp +24 -7
- package/src/duckdb/src/planner/operator/logical_aggregate.cpp +13 -7
- package/src/duckdb/src/planner/operator/logical_any_join.cpp +5 -2
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +13 -5
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +64 -8
- package/src/duckdb/src/planner/operator/logical_cteref.cpp +7 -0
- package/src/duckdb/src/planner/operator/logical_distinct.cpp +6 -5
- package/src/duckdb/src/planner/operator/logical_get.cpp +60 -18
- package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +7 -0
- package/src/duckdb/src/planner/operator/logical_order.cpp +7 -4
- package/src/duckdb/src/planner/operator/logical_top_n.cpp +2 -2
- package/src/duckdb/src/planner/operator/logical_vacuum.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +2 -3
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +27 -10
- package/src/duckdb/src/planner/table_filter.cpp +51 -0
- package/src/duckdb/src/storage/arena_allocator.cpp +28 -10
- package/src/duckdb/src/storage/block.cpp +3 -2
- package/src/duckdb/src/storage/buffer/block_handle.cpp +29 -14
- package/src/duckdb/src/storage/buffer/block_manager.cpp +6 -5
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -1
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +264 -125
- package/src/duckdb/src/storage/buffer_manager.cpp +5 -1
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +0 -6
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +26 -3
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +21 -9
- package/src/duckdb/src/storage/checkpoint_manager.cpp +49 -24
- package/src/duckdb/src/storage/compression/alp/alp.cpp +6 -11
- package/src/duckdb/src/storage/compression/alprd.cpp +5 -9
- package/src/duckdb/src/storage/compression/bitpacking.cpp +35 -31
- package/src/duckdb/src/storage/compression/chimp/chimp.cpp +6 -8
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +71 -58
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +15 -13
- package/src/duckdb/src/storage/compression/fsst.cpp +66 -53
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +4 -5
- package/src/duckdb/src/storage/compression/patas.cpp +6 -17
- package/src/duckdb/src/storage/compression/rle.cpp +20 -18
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +71 -52
- package/src/duckdb/src/storage/compression/uncompressed.cpp +2 -2
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +8 -7
- package/src/duckdb/src/storage/data_pointer.cpp +22 -0
- package/src/duckdb/src/storage/data_table.cpp +41 -12
- package/src/duckdb/src/storage/local_storage.cpp +22 -8
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +33 -17
- package/src/duckdb/src/storage/metadata/metadata_reader.cpp +4 -4
- package/src/duckdb/src/storage/metadata/metadata_writer.cpp +3 -3
- package/src/duckdb/src/storage/partial_block_manager.cpp +19 -8
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +11 -8
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_extension_install_info.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +19 -5
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +21 -1
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +4 -2
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +8 -4
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +4 -4
- package/src/duckdb/src/storage/single_file_block_manager.cpp +170 -34
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +221 -64
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +4 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +36 -26
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +4 -15
- package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -8
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +2 -1
- package/src/duckdb/src/storage/storage_info.cpp +34 -9
- package/src/duckdb/src/storage/storage_manager.cpp +147 -74
- package/src/duckdb/src/storage/table/array_column_data.cpp +37 -17
- package/src/duckdb/src/storage/table/chunk_info.cpp +38 -0
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -6
- package/src/duckdb/src/storage/table/column_data.cpp +252 -31
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -12
- package/src/duckdb/src/storage/table/column_segment.cpp +63 -34
- package/src/duckdb/src/storage/table/list_column_data.cpp +34 -15
- package/src/duckdb/src/storage/table/row_group.cpp +228 -120
- package/src/duckdb/src/storage/table/row_group_collection.cpp +122 -120
- package/src/duckdb/src/storage/table/row_version_manager.cpp +27 -1
- package/src/duckdb/src/storage/table/scan_state.cpp +101 -18
- package/src/duckdb/src/storage/table/standard_column_data.cpp +20 -34
- package/src/duckdb/src/storage/table/struct_column_data.cpp +39 -42
- package/src/duckdb/src/storage/table/table_statistics.cpp +2 -1
- package/src/duckdb/src/storage/table/update_segment.cpp +9 -8
- package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -2
- package/src/duckdb/src/storage/table_index_list.cpp +8 -7
- package/src/duckdb/src/storage/temporary_file_manager.cpp +11 -9
- package/src/duckdb/src/storage/temporary_memory_manager.cpp +227 -39
- package/src/duckdb/src/storage/wal_replay.cpp +68 -28
- package/src/duckdb/src/storage/write_ahead_log.cpp +56 -47
- package/src/duckdb/src/transaction/cleanup_state.cpp +9 -1
- package/src/duckdb/src/transaction/commit_state.cpp +7 -170
- package/src/duckdb/src/transaction/duck_transaction.cpp +87 -19
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +65 -10
- package/src/duckdb/src/transaction/meta_transaction.cpp +18 -3
- package/src/duckdb/src/transaction/transaction_context.cpp +21 -17
- package/src/duckdb/src/transaction/undo_buffer.cpp +20 -14
- package/src/duckdb/src/transaction/wal_write_state.cpp +292 -0
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +0 -1
- package/src/duckdb/third_party/brotli/common/brotli_constants.h +204 -0
- package/src/duckdb/third_party/brotli/common/brotli_platform.h +543 -0
- package/src/duckdb/third_party/brotli/common/constants.cpp +17 -0
- package/src/duckdb/third_party/brotli/common/context.cpp +156 -0
- package/src/duckdb/third_party/brotli/common/context.h +110 -0
- package/src/duckdb/third_party/brotli/common/dictionary.cpp +5912 -0
- package/src/duckdb/third_party/brotli/common/dictionary.h +60 -0
- package/src/duckdb/third_party/brotli/common/platform.cpp +24 -0
- package/src/duckdb/third_party/brotli/common/shared_dictionary.cpp +517 -0
- package/src/duckdb/third_party/brotli/common/shared_dictionary_internal.h +71 -0
- package/src/duckdb/third_party/brotli/common/transform.cpp +287 -0
- package/src/duckdb/third_party/brotli/common/transform.h +77 -0
- package/src/duckdb/third_party/brotli/common/version.h +51 -0
- package/src/duckdb/third_party/brotli/dec/bit_reader.cpp +74 -0
- package/src/duckdb/third_party/brotli/dec/bit_reader.h +419 -0
- package/src/duckdb/third_party/brotli/dec/decode.cpp +2758 -0
- package/src/duckdb/third_party/brotli/dec/huffman.cpp +338 -0
- package/src/duckdb/third_party/brotli/dec/huffman.h +118 -0
- package/src/duckdb/third_party/brotli/dec/prefix.h +733 -0
- package/src/duckdb/third_party/brotli/dec/state.cpp +178 -0
- package/src/duckdb/third_party/brotli/dec/state.h +386 -0
- package/src/duckdb/third_party/brotli/enc/backward_references.cpp +3775 -0
- package/src/duckdb/third_party/brotli/enc/backward_references.h +36 -0
- package/src/duckdb/third_party/brotli/enc/backward_references_hq.cpp +935 -0
- package/src/duckdb/third_party/brotli/enc/backward_references_hq.h +92 -0
- package/src/duckdb/third_party/brotli/enc/bit_cost.cpp +410 -0
- package/src/duckdb/third_party/brotli/enc/bit_cost.h +60 -0
- package/src/duckdb/third_party/brotli/enc/block_splitter.cpp +1653 -0
- package/src/duckdb/third_party/brotli/enc/block_splitter.h +48 -0
- package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp +1431 -0
- package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.h +85 -0
- package/src/duckdb/third_party/brotli/enc/brotli_hash.h +4352 -0
- package/src/duckdb/third_party/brotli/enc/brotli_params.h +47 -0
- package/src/duckdb/third_party/brotli/enc/cluster.cpp +1025 -0
- package/src/duckdb/third_party/brotli/enc/cluster.h +1017 -0
- package/src/duckdb/third_party/brotli/enc/command.cpp +24 -0
- package/src/duckdb/third_party/brotli/enc/command.h +187 -0
- package/src/duckdb/third_party/brotli/enc/compound_dictionary.cpp +209 -0
- package/src/duckdb/third_party/brotli/enc/compound_dictionary.h +75 -0
- package/src/duckdb/third_party/brotli/enc/compress_fragment.cpp +796 -0
- package/src/duckdb/third_party/brotli/enc/compress_fragment.h +82 -0
- package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp +653 -0
- package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.h +68 -0
- package/src/duckdb/third_party/brotli/enc/dictionary_hash.cpp +1844 -0
- package/src/duckdb/third_party/brotli/enc/dictionary_hash.h +21 -0
- package/src/duckdb/third_party/brotli/enc/encode.cpp +1990 -0
- package/src/duckdb/third_party/brotli/enc/encoder_dict.cpp +636 -0
- package/src/duckdb/third_party/brotli/enc/encoder_dict.h +153 -0
- package/src/duckdb/third_party/brotli/enc/entropy_encode.cpp +500 -0
- package/src/duckdb/third_party/brotli/enc/entropy_encode.h +119 -0
- package/src/duckdb/third_party/brotli/enc/entropy_encode_static.h +538 -0
- package/src/duckdb/third_party/brotli/enc/fast_log.cpp +101 -0
- package/src/duckdb/third_party/brotli/enc/fast_log.h +63 -0
- package/src/duckdb/third_party/brotli/enc/find_match_length.h +68 -0
- package/src/duckdb/third_party/brotli/enc/histogram.cpp +96 -0
- package/src/duckdb/third_party/brotli/enc/histogram.h +210 -0
- package/src/duckdb/third_party/brotli/enc/literal_cost.cpp +176 -0
- package/src/duckdb/third_party/brotli/enc/literal_cost.h +28 -0
- package/src/duckdb/third_party/brotli/enc/memory.cpp +190 -0
- package/src/duckdb/third_party/brotli/enc/memory.h +127 -0
- package/src/duckdb/third_party/brotli/enc/metablock.cpp +1225 -0
- package/src/duckdb/third_party/brotli/enc/metablock.h +102 -0
- package/src/duckdb/third_party/brotli/enc/prefix.h +50 -0
- package/src/duckdb/third_party/brotli/enc/quality.h +202 -0
- package/src/duckdb/third_party/brotli/enc/ringbuffer.h +164 -0
- package/src/duckdb/third_party/brotli/enc/state.h +106 -0
- package/src/duckdb/third_party/brotli/enc/static_dict.cpp +538 -0
- package/src/duckdb/third_party/brotli/enc/static_dict.h +37 -0
- package/src/duckdb/third_party/brotli/enc/static_dict_lut.h +5862 -0
- package/src/duckdb/third_party/brotli/enc/utf8_util.cpp +81 -0
- package/src/duckdb/third_party/brotli/enc/utf8_util.h +29 -0
- package/src/duckdb/third_party/brotli/enc/write_bits.h +84 -0
- package/src/duckdb/third_party/brotli/include/brotli/decode.h +405 -0
- package/src/duckdb/third_party/brotli/include/brotli/encode.h +489 -0
- package/src/duckdb/third_party/brotli/include/brotli/port.h +238 -0
- package/src/duckdb/third_party/brotli/include/brotli/shared_dictionary.h +96 -0
- package/src/duckdb/third_party/brotli/include/brotli/types.h +83 -0
- package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +20 -4
- package/src/duckdb/third_party/fmt/include/fmt/format.h +54 -10
- package/src/duckdb/third_party/fsst/fsst.h +2 -2
- package/src/duckdb/third_party/fsst/libfsst.hpp +2 -2
- package/src/duckdb/third_party/httplib/httplib.hpp +6763 -5580
- package/src/duckdb/third_party/hyperloglog/hyperloglog.cpp +13 -30
- package/src/duckdb/third_party/hyperloglog/hyperloglog.hpp +8 -2
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +22 -9
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1041 -554
- package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +21605 -21752
- package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +538 -299
- package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +1 -0
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +36 -12
- package/src/duckdb/third_party/mbedtls/library/md.cpp +6 -6
- package/src/duckdb/third_party/mbedtls/library/sha1.cpp +2 -0
- package/src/duckdb/third_party/mbedtls/library/sha256.cpp +3 -0
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +99 -47
- package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
- package/src/duckdb/third_party/re2/re2/prog.cc +2 -2
- package/src/duckdb/third_party/snappy/snappy-internal.h +398 -0
- package/src/duckdb/third_party/snappy/snappy-sinksource.cc +111 -9
- package/src/duckdb/third_party/snappy/snappy-sinksource.h +158 -0
- package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +523 -3
- package/src/duckdb/third_party/snappy/snappy-stubs-public.h +34 -1
- package/src/duckdb/third_party/snappy/snappy.cc +2626 -0
- package/src/duckdb/third_party/snappy/snappy.h +223 -0
- package/src/duckdb/third_party/snappy/snappy_version.hpp +11 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc.hpp +69 -101
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +53 -0
- package/src/duckdb/third_party/utf8proc/utf8proc.cpp +627 -678
- package/src/duckdb/third_party/utf8proc/utf8proc_data.cpp +15008 -12868
- package/src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp +185 -29
- package/src/duckdb/ub_extension_json_json_functions.cpp +6 -0
- package/src/duckdb/ub_src_catalog_default.cpp +4 -0
- package/src/duckdb/ub_src_common.cpp +7 -1
- package/src/duckdb/ub_src_common_arrow.cpp +10 -0
- package/src/duckdb/ub_src_common_enums.cpp +2 -0
- package/src/duckdb/ub_src_common_tree_renderer.cpp +10 -0
- package/src/duckdb/ub_src_common_types.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +4 -0
- package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +2 -4
- package/src/duckdb/ub_src_core_functions_scalar_map.cpp +2 -0
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +4 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +5 -3
- package/src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_helper.cpp +4 -0
- package/src/duckdb/ub_src_function.cpp +4 -0
- package/src/duckdb/ub_src_function_cast.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_generic.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_list.cpp +0 -2
- package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_function_table.cpp +2 -0
- package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/duckdb/ub_src_main.cpp +4 -0
- package/src/duckdb/ub_src_main_buffered_data.cpp +4 -0
- package/src/duckdb/ub_src_main_capi.cpp +10 -0
- package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
- package/src/duckdb/ub_src_main_relation.cpp +2 -0
- package/src/duckdb/ub_src_main_secret.cpp +2 -0
- package/src/duckdb/ub_src_optimizer.cpp +8 -0
- package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
- package/src/duckdb/ub_src_parallel.cpp +4 -0
- package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
- package/src/duckdb/ub_src_planner.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
- package/src/duckdb/ub_src_planner_binder_tableref.cpp +4 -0
- package/src/duckdb/ub_src_storage_statistics.cpp +0 -2
- package/src/duckdb/ub_src_transaction.cpp +2 -0
- package/test/columns.test.ts +1 -1
- package/test/prepare.test.ts +1 -1
- package/test/test_all_types.test.ts +1 -1
@@ -2,12 +2,14 @@
|
|
2
2
|
|
3
3
|
#include "duckdb/common/algorithm.hpp"
|
4
4
|
#include "duckdb/common/helper.hpp"
|
5
|
+
#include "duckdb/common/sort/partition_state.hpp"
|
5
6
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
6
7
|
#include "duckdb/execution/merge_sort_tree.hpp"
|
7
8
|
#include "duckdb/planner/expression/bound_constant_expression.hpp"
|
8
9
|
#include "duckdb/execution/window_executor.hpp"
|
9
10
|
|
10
11
|
#include <numeric>
|
12
|
+
#include <thread>
|
11
13
|
#include <utility>
|
12
14
|
|
13
15
|
namespace duckdb {
|
@@ -18,50 +20,201 @@ namespace duckdb {
|
|
18
20
|
WindowAggregatorState::WindowAggregatorState() : allocator(Allocator::DefaultAllocator()) {
|
19
21
|
}
|
20
22
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
23
|
+
class WindowAggregatorGlobalState : public WindowAggregatorState {
|
24
|
+
public:
|
25
|
+
WindowAggregatorGlobalState(const WindowAggregator &aggregator_p, idx_t group_count)
|
26
|
+
: aggregator(aggregator_p), winputs(inputs), locals(0), finalized(0) {
|
27
|
+
|
28
|
+
if (!aggregator.arg_types.empty()) {
|
29
|
+
winputs.Initialize(Allocator::DefaultAllocator(), aggregator.arg_types, group_count);
|
30
|
+
}
|
31
|
+
if (aggregator.aggr.filter) {
|
32
|
+
// Start with all invalid and set the ones that pass
|
33
|
+
filter_mask.Initialize(group_count, false);
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
//! The aggregator data
|
38
|
+
const WindowAggregator &aggregator;
|
39
|
+
|
40
|
+
//! Partition data chunk
|
41
|
+
DataChunk inputs;
|
42
|
+
WindowDataChunk winputs;
|
43
|
+
|
44
|
+
//! The filtered rows in inputs.
|
45
|
+
ValidityArray filter_mask;
|
46
|
+
|
47
|
+
//! Lock for single threading
|
48
|
+
mutable mutex lock;
|
49
|
+
|
50
|
+
//! Count of local tasks
|
51
|
+
mutable std::atomic<idx_t> locals;
|
52
|
+
|
53
|
+
//! Number of finalised states
|
54
|
+
std::atomic<idx_t> finalized;
|
55
|
+
};
|
56
|
+
|
57
|
+
WindowAggregator::WindowAggregator(AggregateObject aggr_p, const vector<LogicalType> &arg_types_p,
|
58
|
+
const LogicalType &result_type_p, const WindowExcludeMode exclude_mode_p)
|
59
|
+
: aggr(std::move(aggr_p)), arg_types(arg_types_p), result_type(result_type_p),
|
60
|
+
state_size(aggr.function.state_size(aggr.function)), exclude_mode(exclude_mode_p) {
|
25
61
|
}
|
26
62
|
|
27
63
|
WindowAggregator::~WindowAggregator() {
|
28
64
|
}
|
29
65
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
66
|
+
unique_ptr<WindowAggregatorState> WindowAggregator::GetGlobalState(idx_t group_count, const ValidityMask &) const {
|
67
|
+
return make_uniq<WindowAggregatorGlobalState>(*this, group_count);
|
68
|
+
}
|
69
|
+
|
70
|
+
void WindowAggregator::Sink(WindowAggregatorState &gsink, WindowAggregatorState &lstate, DataChunk &arg_chunk,
|
71
|
+
idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
|
72
|
+
auto &gasink = gsink.Cast<WindowAggregatorGlobalState>();
|
73
|
+
auto &winputs = gasink.winputs;
|
74
|
+
auto &filter_mask = gasink.filter_mask;
|
75
|
+
if (winputs.chunk.ColumnCount()) {
|
76
|
+
winputs.Copy(arg_chunk, input_idx);
|
36
77
|
}
|
37
78
|
if (filter_sel) {
|
38
|
-
// Lazy instantiation
|
39
|
-
if (!filter_mask.IsMaskSet()) {
|
40
|
-
// Start with all invalid and set the ones that pass
|
41
|
-
filter_bits.resize(ValidityMask::ValidityMaskSize(partition_count), 0);
|
42
|
-
filter_mask.Initialize(filter_bits.data());
|
43
|
-
}
|
44
79
|
for (idx_t f = 0; f < filtered; ++f) {
|
45
|
-
filter_mask.SetValid(
|
80
|
+
filter_mask.SetValid(input_idx + filter_sel->get_index(f));
|
46
81
|
}
|
47
|
-
filter_pos += payload_chunk.size();
|
48
82
|
}
|
49
83
|
}
|
50
84
|
|
51
|
-
void WindowAggregator::Finalize(const FrameStats &stats) {
|
85
|
+
void WindowAggregator::Finalize(WindowAggregatorState &gstate, WindowAggregatorState &lstate, const FrameStats &stats) {
|
52
86
|
}
|
53
87
|
|
54
88
|
//===--------------------------------------------------------------------===//
|
55
|
-
//
|
89
|
+
// WindowConstantAggregator
|
56
90
|
//===--------------------------------------------------------------------===//
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
statef(Value::POINTER(CastPointerToValue(state.data()))) {
|
91
|
+
struct WindowAggregateStates {
|
92
|
+
explicit WindowAggregateStates(const AggregateObject &aggr);
|
93
|
+
~WindowAggregateStates() {
|
94
|
+
Destroy();
|
95
|
+
}
|
63
96
|
|
64
|
-
|
97
|
+
//! The number of states
|
98
|
+
idx_t GetCount() const {
|
99
|
+
return states.size() / state_size;
|
100
|
+
}
|
101
|
+
data_ptr_t *GetData() {
|
102
|
+
return FlatVector::GetData<data_ptr_t>(*statef);
|
103
|
+
}
|
104
|
+
data_ptr_t GetStatePtr(idx_t idx) {
|
105
|
+
return states.data() + idx * state_size;
|
106
|
+
}
|
107
|
+
const_data_ptr_t GetStatePtr(idx_t idx) const {
|
108
|
+
return states.data() + idx * state_size;
|
109
|
+
}
|
110
|
+
//! Initialise all the states
|
111
|
+
void Initialize(idx_t count);
|
112
|
+
//! Combine the states into the target
|
113
|
+
void Combine(WindowAggregateStates &target,
|
114
|
+
AggregateCombineType combine_type = AggregateCombineType::PRESERVE_INPUT);
|
115
|
+
//! Finalize the states into an output vector
|
116
|
+
void Finalize(Vector &result);
|
117
|
+
//! Destroy the states
|
118
|
+
void Destroy();
|
119
|
+
|
120
|
+
//! A description of the aggregator
|
121
|
+
const AggregateObject aggr;
|
122
|
+
//! The size of each state
|
123
|
+
const idx_t state_size;
|
124
|
+
//! The allocator to use
|
125
|
+
ArenaAllocator allocator;
|
126
|
+
//! Data pointer that contains the state data
|
127
|
+
vector<data_t> states;
|
128
|
+
//! Reused result state container for the window functions
|
129
|
+
unique_ptr<Vector> statef;
|
130
|
+
};
|
131
|
+
|
132
|
+
WindowAggregateStates::WindowAggregateStates(const AggregateObject &aggr)
|
133
|
+
: aggr(aggr), state_size(aggr.function.state_size(aggr.function)), allocator(Allocator::DefaultAllocator()) {
|
134
|
+
}
|
135
|
+
|
136
|
+
void WindowAggregateStates::Initialize(idx_t count) {
|
137
|
+
states.resize(count * state_size);
|
138
|
+
auto state_ptr = states.data();
|
139
|
+
|
140
|
+
statef = make_uniq<Vector>(LogicalType::POINTER, count);
|
141
|
+
auto state_f_data = FlatVector::GetData<data_ptr_t>(*statef);
|
142
|
+
|
143
|
+
for (idx_t i = 0; i < count; ++i, state_ptr += state_size) {
|
144
|
+
state_f_data[i] = state_ptr;
|
145
|
+
aggr.function.initialize(aggr.function, state_ptr);
|
146
|
+
}
|
147
|
+
|
148
|
+
// Prevent conversion of results to constants
|
149
|
+
statef->SetVectorType(VectorType::FLAT_VECTOR);
|
150
|
+
}
|
151
|
+
|
152
|
+
void WindowAggregateStates::Combine(WindowAggregateStates &target, AggregateCombineType combine_type) {
|
153
|
+
AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator, AggregateCombineType::ALLOW_DESTRUCTIVE);
|
154
|
+
aggr.function.combine(*statef, *target.statef, aggr_input_data, GetCount());
|
155
|
+
}
|
156
|
+
|
157
|
+
void WindowAggregateStates::Finalize(Vector &result) {
|
158
|
+
AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
|
159
|
+
aggr.function.finalize(*statef, aggr_input_data, result, GetCount(), 0);
|
160
|
+
}
|
161
|
+
|
162
|
+
void WindowAggregateStates::Destroy() {
|
163
|
+
if (states.empty()) {
|
164
|
+
return;
|
165
|
+
}
|
166
|
+
|
167
|
+
AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
|
168
|
+
if (aggr.function.destructor) {
|
169
|
+
aggr.function.destructor(*statef, aggr_input_data, GetCount());
|
170
|
+
}
|
171
|
+
|
172
|
+
states.clear();
|
173
|
+
}
|
174
|
+
|
175
|
+
class WindowConstantAggregatorGlobalState : public WindowAggregatorGlobalState {
|
176
|
+
public:
|
177
|
+
WindowConstantAggregatorGlobalState(const WindowConstantAggregator &aggregator, idx_t count,
|
178
|
+
const ValidityMask &partition_mask);
|
179
|
+
|
180
|
+
void Finalize(const FrameStats &stats);
|
181
|
+
|
182
|
+
//! Partition starts
|
183
|
+
vector<idx_t> partition_offsets;
|
184
|
+
//! Reused result state container for the window functions
|
185
|
+
WindowAggregateStates statef;
|
186
|
+
//! Aggregate results
|
187
|
+
unique_ptr<Vector> results;
|
188
|
+
};
|
189
|
+
|
190
|
+
class WindowConstantAggregatorLocalState : public WindowAggregatorState {
|
191
|
+
public:
|
192
|
+
explicit WindowConstantAggregatorLocalState(const WindowConstantAggregatorGlobalState &gstate);
|
193
|
+
~WindowConstantAggregatorLocalState() override {
|
194
|
+
}
|
195
|
+
|
196
|
+
void Sink(DataChunk &payload_chunk, idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered);
|
197
|
+
void Combine(WindowConstantAggregatorGlobalState &gstate);
|
198
|
+
|
199
|
+
public:
|
200
|
+
//! The global state we are sharing
|
201
|
+
const WindowConstantAggregatorGlobalState &gstate;
|
202
|
+
//! Reusable chunk for sinking
|
203
|
+
DataChunk inputs;
|
204
|
+
//! A vector of pointers to "state", used for intermediate window segment aggregation
|
205
|
+
Vector statep;
|
206
|
+
//! Reused result state container for the window functions
|
207
|
+
WindowAggregateStates statef;
|
208
|
+
//! The current result partition being read
|
209
|
+
idx_t partition;
|
210
|
+
//! Shared SV for evaluation
|
211
|
+
SelectionVector matches;
|
212
|
+
};
|
213
|
+
|
214
|
+
WindowConstantAggregatorGlobalState::WindowConstantAggregatorGlobalState(const WindowConstantAggregator &aggregator,
|
215
|
+
idx_t group_count,
|
216
|
+
const ValidityMask &partition_mask)
|
217
|
+
: WindowAggregatorGlobalState(aggregator, STANDARD_VECTOR_SIZE), statef(aggregator.aggr) {
|
65
218
|
|
66
219
|
// Locate the partition boundaries
|
67
220
|
if (partition_mask.AllValid()) {
|
@@ -69,7 +222,7 @@ WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const L
|
|
69
222
|
} else {
|
70
223
|
idx_t entry_idx;
|
71
224
|
idx_t shift;
|
72
|
-
for (idx_t start = 0; start <
|
225
|
+
for (idx_t start = 0; start < group_count;) {
|
73
226
|
partition_mask.GetEntryIndex(start, entry_idx, shift);
|
74
227
|
|
75
228
|
// If start is aligned with the start of a block,
|
@@ -81,7 +234,7 @@ WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const L
|
|
81
234
|
}
|
82
235
|
|
83
236
|
// Loop over the block
|
84
|
-
for (; shift < ValidityMask::BITS_PER_VALUE && start <
|
237
|
+
for (; shift < ValidityMask::BITS_PER_VALUE && start < group_count; ++shift, ++start) {
|
85
238
|
if (partition_mask.RowIsValid(block, shift)) {
|
86
239
|
partition_offsets.emplace_back(start);
|
87
240
|
}
|
@@ -90,45 +243,70 @@ WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const L
|
|
90
243
|
}
|
91
244
|
|
92
245
|
// Initialise the vector for caching the results
|
93
|
-
results = make_uniq<Vector>(result_type, partition_offsets.size());
|
94
|
-
partition_offsets.emplace_back(count);
|
246
|
+
results = make_uniq<Vector>(aggregator.result_type, partition_offsets.size());
|
95
247
|
|
96
|
-
//
|
97
|
-
|
248
|
+
// Initialise the final states
|
249
|
+
statef.Initialize(partition_offsets.size());
|
98
250
|
|
99
|
-
//
|
100
|
-
|
251
|
+
// Add final guard
|
252
|
+
partition_offsets.emplace_back(group_count);
|
101
253
|
}
|
102
254
|
|
103
|
-
|
104
|
-
|
255
|
+
WindowConstantAggregatorLocalState::WindowConstantAggregatorLocalState(
|
256
|
+
const WindowConstantAggregatorGlobalState &gstate)
|
257
|
+
: gstate(gstate), statep(Value::POINTER(0)), statef(gstate.statef.aggr), partition(0) {
|
258
|
+
matches.Initialize();
|
259
|
+
|
260
|
+
// Start the aggregates
|
261
|
+
auto &partition_offsets = gstate.partition_offsets;
|
262
|
+
auto &aggregator = gstate.aggregator;
|
263
|
+
statef.Initialize(partition_offsets.size() - 1);
|
264
|
+
|
265
|
+
// Set up shared buffer
|
266
|
+
inputs.Initialize(Allocator::DefaultAllocator(), aggregator.arg_types);
|
267
|
+
|
268
|
+
gstate.locals++;
|
105
269
|
}
|
106
270
|
|
107
|
-
|
108
|
-
|
109
|
-
|
271
|
+
WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const vector<LogicalType> &arg_types,
|
272
|
+
const LogicalType &result_type,
|
273
|
+
const WindowExcludeMode exclude_mode_p)
|
274
|
+
: WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode_p) {
|
275
|
+
}
|
110
276
|
|
111
|
-
|
112
|
-
|
113
|
-
|
277
|
+
unique_ptr<WindowAggregatorState> WindowConstantAggregator::GetGlobalState(idx_t group_count,
|
278
|
+
const ValidityMask &partition_mask) const {
|
279
|
+
return make_uniq<WindowConstantAggregatorGlobalState>(*this, group_count, partition_mask);
|
114
280
|
}
|
115
281
|
|
116
|
-
void WindowConstantAggregator::Sink(
|
282
|
+
void WindowConstantAggregator::Sink(WindowAggregatorState &gsink, WindowAggregatorState &lstate, DataChunk &arg_chunk,
|
283
|
+
idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
|
284
|
+
auto &lastate = lstate.Cast<WindowConstantAggregatorLocalState>();
|
285
|
+
|
286
|
+
lastate.Sink(arg_chunk, input_idx, filter_sel, filtered);
|
287
|
+
}
|
288
|
+
|
289
|
+
void WindowConstantAggregatorLocalState::Sink(DataChunk &payload_chunk, idx_t row,
|
290
|
+
optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
|
291
|
+
auto &partition_offsets = gstate.partition_offsets;
|
292
|
+
auto &aggregator = gstate.aggregator;
|
293
|
+
const auto &aggr = aggregator.aggr;
|
117
294
|
const auto chunk_begin = row;
|
118
295
|
const auto chunk_end = chunk_begin + payload_chunk.size();
|
296
|
+
idx_t partition =
|
297
|
+
idx_t(std::upper_bound(partition_offsets.begin(), partition_offsets.end(), row) - partition_offsets.begin()) -
|
298
|
+
1;
|
119
299
|
|
120
|
-
|
121
|
-
|
122
|
-
}
|
300
|
+
auto state_f_data = statef.GetData();
|
301
|
+
auto state_p_data = FlatVector::GetData<data_ptr_t>(statep);
|
123
302
|
|
124
|
-
AggregateInputData aggr_input_data(aggr.GetFunctionData(),
|
303
|
+
AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
|
125
304
|
idx_t begin = 0;
|
126
305
|
idx_t filter_idx = 0;
|
127
306
|
auto partition_end = partition_offsets[partition + 1];
|
128
307
|
while (row < chunk_end) {
|
129
308
|
if (row == partition_end) {
|
130
|
-
|
131
|
-
AggregateInit();
|
309
|
+
++partition;
|
132
310
|
partition_end = partition_offsets[partition + 1];
|
133
311
|
}
|
134
312
|
partition_end = MinValue(partition_end, chunk_end);
|
@@ -174,9 +352,11 @@ void WindowConstantAggregator::Sink(DataChunk &payload_chunk, SelectionVector *f
|
|
174
352
|
|
175
353
|
// Aggregate the filtered rows into a single state
|
176
354
|
const auto count = inputs.size();
|
355
|
+
auto state = state_f_data[partition];
|
177
356
|
if (aggr.function.simple_update) {
|
178
|
-
aggr.function.simple_update(inputs.data.data(), aggr_input_data, inputs.ColumnCount(), state
|
357
|
+
aggr.function.simple_update(inputs.data.data(), aggr_input_data, inputs.ColumnCount(), state, count);
|
179
358
|
} else {
|
359
|
+
state_p_data[0] = state_f_data[partition];
|
180
360
|
aggr.function.update(inputs.data.data(), aggr_input_data, inputs.ColumnCount(), statep, count);
|
181
361
|
}
|
182
362
|
|
@@ -186,34 +366,36 @@ void WindowConstantAggregator::Sink(DataChunk &payload_chunk, SelectionVector *f
|
|
186
366
|
}
|
187
367
|
}
|
188
368
|
|
189
|
-
void WindowConstantAggregator::Finalize(
|
190
|
-
|
191
|
-
|
369
|
+
void WindowConstantAggregator::Finalize(WindowAggregatorState &gstate, WindowAggregatorState &lstate,
|
370
|
+
const FrameStats &stats) {
|
371
|
+
auto &gastate = gstate.Cast<WindowConstantAggregatorGlobalState>();
|
372
|
+
auto &lastate = lstate.Cast<WindowConstantAggregatorLocalState>();
|
192
373
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
}
|
198
|
-
~WindowConstantAggregatorState() override {
|
199
|
-
}
|
374
|
+
// Single-threaded combine
|
375
|
+
lock_guard<mutex> finalize_guard(gastate.lock);
|
376
|
+
lastate.statef.Combine(gastate.statef);
|
377
|
+
lastate.statef.Destroy();
|
200
378
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
}
|
379
|
+
// Last one out turns off the lights!
|
380
|
+
if (++gastate.finalized == gastate.locals) {
|
381
|
+
gastate.statef.Finalize(*gastate.results);
|
382
|
+
gastate.statef.Destroy();
|
383
|
+
}
|
384
|
+
}
|
207
385
|
|
208
|
-
unique_ptr<WindowAggregatorState> WindowConstantAggregator::GetLocalState() const {
|
209
|
-
return make_uniq<
|
386
|
+
unique_ptr<WindowAggregatorState> WindowConstantAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
|
387
|
+
return make_uniq<WindowConstantAggregatorLocalState>(gstate.Cast<WindowConstantAggregatorGlobalState>());
|
210
388
|
}
|
211
389
|
|
212
|
-
void WindowConstantAggregator::Evaluate(
|
213
|
-
idx_t count, idx_t row_idx) const {
|
390
|
+
void WindowConstantAggregator::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
|
391
|
+
const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
|
392
|
+
auto &gasink = gsink.Cast<WindowConstantAggregatorGlobalState>();
|
393
|
+
const auto &partition_offsets = gasink.partition_offsets;
|
394
|
+
const auto &results = *gasink.results;
|
395
|
+
|
214
396
|
auto begins = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
215
397
|
// Chunk up the constants and copy them one at a time
|
216
|
-
auto &lcstate = lstate.Cast<
|
398
|
+
auto &lcstate = lstate.Cast<WindowConstantAggregatorLocalState>();
|
217
399
|
idx_t matched = 0;
|
218
400
|
idx_t target_offset = 0;
|
219
401
|
for (idx_t i = 0; i < count; ++i) {
|
@@ -222,7 +404,7 @@ void WindowConstantAggregator::Evaluate(WindowAggregatorState &lstate, const Dat
|
|
222
404
|
while (partition_offsets[lcstate.partition + 1] <= begin) {
|
223
405
|
// Flush the previous partition's data
|
224
406
|
if (matched) {
|
225
|
-
VectorOperations::Copy(
|
407
|
+
VectorOperations::Copy(results, result, lcstate.matches, matched, 0, target_offset);
|
226
408
|
target_offset += matched;
|
227
409
|
matched = 0;
|
228
410
|
}
|
@@ -234,16 +416,22 @@ void WindowConstantAggregator::Evaluate(WindowAggregatorState &lstate, const Dat
|
|
234
416
|
|
235
417
|
// Flush the last partition
|
236
418
|
if (matched) {
|
237
|
-
|
419
|
+
// Optimize constant result
|
420
|
+
if (target_offset == 0 && matched == count) {
|
421
|
+
VectorOperations::Copy(results, result, lcstate.matches, 1, 0, target_offset);
|
422
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
423
|
+
} else {
|
424
|
+
VectorOperations::Copy(results, result, lcstate.matches, matched, 0, target_offset);
|
425
|
+
}
|
238
426
|
}
|
239
427
|
}
|
240
428
|
|
241
429
|
//===--------------------------------------------------------------------===//
|
242
430
|
// WindowCustomAggregator
|
243
431
|
//===--------------------------------------------------------------------===//
|
244
|
-
WindowCustomAggregator::WindowCustomAggregator(AggregateObject aggr, const LogicalType &
|
245
|
-
const
|
246
|
-
: WindowAggregator(std::move(aggr),
|
432
|
+
WindowCustomAggregator::WindowCustomAggregator(AggregateObject aggr, const vector<LogicalType> &arg_types,
|
433
|
+
const LogicalType &result_type, const WindowExcludeMode exclude_mode)
|
434
|
+
: WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode) {
|
247
435
|
}
|
248
436
|
|
249
437
|
WindowCustomAggregator::~WindowCustomAggregator() {
|
@@ -282,12 +470,28 @@ static void InitSubFrames(SubFrames &frames, const WindowExcludeMode exclude_mod
|
|
282
470
|
frames.resize(nframes, {0, 0});
|
283
471
|
}
|
284
472
|
|
473
|
+
class WindowCustomAggregatorGlobalState : public WindowAggregatorGlobalState {
|
474
|
+
public:
|
475
|
+
explicit WindowCustomAggregatorGlobalState(const WindowCustomAggregator &aggregator, idx_t group_count)
|
476
|
+
: WindowAggregatorGlobalState(aggregator, group_count) {
|
477
|
+
|
478
|
+
gcstate = make_uniq<WindowCustomAggregatorState>(aggregator.aggr, aggregator.exclude_mode);
|
479
|
+
}
|
480
|
+
|
481
|
+
//! Traditional packed filter mask for API
|
482
|
+
ValidityMask filter_packed;
|
483
|
+
//! Data pointer that contains a single local state, used for global custom window execution state
|
484
|
+
unique_ptr<WindowCustomAggregatorState> gcstate;
|
485
|
+
//! Partition description for custom window APIs
|
486
|
+
unique_ptr<WindowPartitionInput> partition_input;
|
487
|
+
};
|
488
|
+
|
285
489
|
WindowCustomAggregatorState::WindowCustomAggregatorState(const AggregateObject &aggr,
|
286
490
|
const WindowExcludeMode exclude_mode)
|
287
|
-
: aggr(aggr), state(aggr.function.state_size(
|
288
|
-
frames(3, {0, 0}) {
|
491
|
+
: aggr(aggr), state(aggr.function.state_size(aggr.function)),
|
492
|
+
statef(Value::POINTER(CastPointerToValue(state.data()))), frames(3, {0, 0}) {
|
289
493
|
// if we have a frame-by-frame method, share the single state
|
290
|
-
aggr.function.initialize(state.data());
|
494
|
+
aggr.function.initialize(aggr.function, state.data());
|
291
495
|
|
292
496
|
InitSubFrames(frames, exclude_mode);
|
293
497
|
}
|
@@ -299,21 +503,41 @@ WindowCustomAggregatorState::~WindowCustomAggregatorState() {
|
|
299
503
|
}
|
300
504
|
}
|
301
505
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
506
|
+
unique_ptr<WindowAggregatorState> WindowCustomAggregator::GetGlobalState(idx_t group_count,
|
507
|
+
const ValidityMask &) const {
|
508
|
+
return make_uniq<WindowCustomAggregatorGlobalState>(*this, group_count);
|
509
|
+
}
|
510
|
+
|
511
|
+
void WindowCustomAggregator::Finalize(WindowAggregatorState &gsink, WindowAggregatorState &lstate,
|
512
|
+
const FrameStats &stats) {
|
513
|
+
// Single threaded Finalize for now
|
514
|
+
auto &gcsink = gsink.Cast<WindowCustomAggregatorGlobalState>();
|
515
|
+
lock_guard<mutex> gestate_guard(gcsink.lock);
|
516
|
+
if (gcsink.finalized) {
|
517
|
+
return;
|
518
|
+
}
|
519
|
+
|
520
|
+
WindowAggregator::Finalize(gsink, lstate, stats);
|
521
|
+
|
522
|
+
auto &inputs = gcsink.inputs;
|
523
|
+
auto &filter_mask = gcsink.filter_mask;
|
524
|
+
auto &filter_packed = gcsink.filter_packed;
|
525
|
+
filter_mask.Pack(filter_packed, filter_mask.target_count);
|
526
|
+
|
527
|
+
gcsink.partition_input =
|
528
|
+
make_uniq<WindowPartitionInput>(inputs.data.data(), inputs.ColumnCount(), inputs.size(), filter_packed, stats);
|
306
529
|
|
307
530
|
if (aggr.function.window_init) {
|
308
|
-
|
309
|
-
auto &gcstate = gstate->Cast<WindowCustomAggregatorState>();
|
531
|
+
auto &gcstate = *gcsink.gcstate;
|
310
532
|
|
311
533
|
AggregateInputData aggr_input_data(aggr.GetFunctionData(), gcstate.allocator);
|
312
|
-
aggr.function.window_init(aggr_input_data, *partition_input, gcstate.state.data());
|
534
|
+
aggr.function.window_init(aggr_input_data, *gcsink.partition_input, gcstate.state.data());
|
313
535
|
}
|
536
|
+
|
537
|
+
++gcsink.finalized;
|
314
538
|
}
|
315
539
|
|
316
|
-
unique_ptr<WindowAggregatorState> WindowCustomAggregator::GetLocalState() const {
|
540
|
+
unique_ptr<WindowAggregatorState> WindowCustomAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
|
317
541
|
return make_uniq<WindowCustomAggregatorState>(aggr, exclude_mode);
|
318
542
|
}
|
319
543
|
|
@@ -374,29 +598,30 @@ static void EvaluateSubFrames(const DataChunk &bounds, const WindowExcludeMode e
|
|
374
598
|
}
|
375
599
|
}
|
376
600
|
|
377
|
-
void WindowCustomAggregator::Evaluate(
|
378
|
-
idx_t count, idx_t row_idx) const {
|
601
|
+
void WindowCustomAggregator::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
|
602
|
+
const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
|
379
603
|
auto &lcstate = lstate.Cast<WindowCustomAggregatorState>();
|
380
604
|
auto &frames = lcstate.frames;
|
381
605
|
const_data_ptr_t gstate_p = nullptr;
|
382
|
-
|
383
|
-
|
384
|
-
gstate_p = gcstate
|
606
|
+
auto &gcsink = gsink.Cast<WindowCustomAggregatorGlobalState>();
|
607
|
+
if (gcsink.gcstate) {
|
608
|
+
gstate_p = gcsink.gcstate->state.data();
|
385
609
|
}
|
386
610
|
|
387
611
|
EvaluateSubFrames(bounds, exclude_mode, count, row_idx, frames, [&](idx_t i) {
|
388
612
|
// Extract the range
|
389
613
|
AggregateInputData aggr_input_data(aggr.GetFunctionData(), lstate.allocator);
|
390
|
-
aggr.function.window(aggr_input_data, *partition_input, gstate_p, lcstate.state.data(), frames, result,
|
614
|
+
aggr.function.window(aggr_input_data, *gcsink.partition_input, gstate_p, lcstate.state.data(), frames, result,
|
615
|
+
i);
|
391
616
|
});
|
392
617
|
}
|
393
618
|
|
394
619
|
//===--------------------------------------------------------------------===//
|
395
620
|
// WindowNaiveAggregator
|
396
621
|
//===--------------------------------------------------------------------===//
|
397
|
-
WindowNaiveAggregator::WindowNaiveAggregator(AggregateObject aggr, const LogicalType &
|
398
|
-
const
|
399
|
-
: WindowAggregator(std::move(aggr),
|
622
|
+
WindowNaiveAggregator::WindowNaiveAggregator(AggregateObject aggr, const vector<LogicalType> &arg_types,
|
623
|
+
const LogicalType &result_type, const WindowExcludeMode exclude_mode)
|
624
|
+
: WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode) {
|
400
625
|
}
|
401
626
|
|
402
627
|
WindowNaiveAggregator::~WindowNaiveAggregator() {
|
@@ -405,44 +630,47 @@ WindowNaiveAggregator::~WindowNaiveAggregator() {
|
|
405
630
|
class WindowNaiveState : public WindowAggregatorState {
|
406
631
|
public:
|
407
632
|
struct HashRow {
|
408
|
-
|
633
|
+
HashRow(WindowNaiveState &state, const DataChunk &inputs) : state(state), inputs(inputs) {
|
409
634
|
}
|
410
635
|
|
411
636
|
size_t operator()(const idx_t &i) const {
|
412
|
-
return state.Hash(i);
|
637
|
+
return state.Hash(inputs, i);
|
413
638
|
}
|
414
639
|
|
415
640
|
WindowNaiveState &state;
|
641
|
+
const DataChunk &inputs;
|
416
642
|
};
|
417
643
|
|
418
644
|
struct EqualRow {
|
419
|
-
|
645
|
+
EqualRow(WindowNaiveState &state, const DataChunk &inputs) : state(state), inputs(inputs) {
|
420
646
|
}
|
421
647
|
|
422
648
|
bool operator()(const idx_t &lhs, const idx_t &rhs) const {
|
423
|
-
return state.KeyEqual(lhs, rhs);
|
649
|
+
return state.KeyEqual(inputs, lhs, rhs);
|
424
650
|
}
|
425
651
|
|
426
652
|
WindowNaiveState &state;
|
653
|
+
const DataChunk &inputs;
|
427
654
|
};
|
428
655
|
|
429
656
|
using RowSet = std::unordered_set<idx_t, HashRow, EqualRow>;
|
430
657
|
|
431
|
-
explicit WindowNaiveState(const WindowNaiveAggregator &
|
658
|
+
explicit WindowNaiveState(const WindowNaiveAggregator &gsink);
|
432
659
|
|
433
|
-
void Evaluate(const DataChunk &bounds, Vector &result, idx_t count,
|
660
|
+
void Evaluate(const WindowAggregatorGlobalState &gsink, const DataChunk &bounds, Vector &result, idx_t count,
|
661
|
+
idx_t row_idx);
|
434
662
|
|
435
663
|
protected:
|
436
664
|
//! Flush the accumulated intermediate states into the result states
|
437
|
-
void FlushStates();
|
665
|
+
void FlushStates(const WindowAggregatorGlobalState &gsink);
|
438
666
|
|
439
667
|
//! Hashes a value for the hash table
|
440
|
-
size_t Hash(idx_t rid);
|
668
|
+
size_t Hash(const DataChunk &inputs, idx_t rid);
|
441
669
|
//! Compares two values for the hash table
|
442
|
-
bool KeyEqual(const idx_t &lhs, const idx_t &rhs);
|
670
|
+
bool KeyEqual(const DataChunk &inputs, const idx_t &lhs, const idx_t &rhs);
|
443
671
|
|
444
672
|
//! The global state
|
445
|
-
const WindowNaiveAggregator &
|
673
|
+
const WindowNaiveAggregator &aggregator;
|
446
674
|
//! Data pointer that contains a vector of states, used for row aggregation
|
447
675
|
vector<data_t> state;
|
448
676
|
//! Reused result state container for the aggregate
|
@@ -459,21 +687,12 @@ protected:
|
|
459
687
|
SubFrames frames;
|
460
688
|
//! The optional hash table used for DISTINCT
|
461
689
|
Vector hashes;
|
462
|
-
HashRow hash_row;
|
463
|
-
EqualRow equal_row;
|
464
|
-
RowSet row_set;
|
465
690
|
};
|
466
691
|
|
467
|
-
WindowNaiveState::WindowNaiveState(const WindowNaiveAggregator &
|
468
|
-
:
|
469
|
-
statep((LogicalType::POINTER)), flush_count(0), hashes(LogicalType::HASH)
|
470
|
-
|
471
|
-
InitSubFrames(frames, gstate.exclude_mode);
|
472
|
-
|
473
|
-
auto &inputs = gstate.GetInputs();
|
474
|
-
if (inputs.ColumnCount() > 0) {
|
475
|
-
leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes());
|
476
|
-
}
|
692
|
+
WindowNaiveState::WindowNaiveState(const WindowNaiveAggregator &aggregator_p)
|
693
|
+
: aggregator(aggregator_p), state(aggregator.state_size * STANDARD_VECTOR_SIZE), statef(LogicalType::POINTER),
|
694
|
+
statep((LogicalType::POINTER)), flush_count(0), hashes(LogicalType::HASH) {
|
695
|
+
InitSubFrames(frames, aggregator.exclude_mode);
|
477
696
|
|
478
697
|
update_sel.Initialize();
|
479
698
|
|
@@ -485,28 +704,26 @@ WindowNaiveState::WindowNaiveState(const WindowNaiveAggregator &gstate)
|
|
485
704
|
auto fdata = FlatVector::GetData<data_ptr_t>(statef);
|
486
705
|
for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; ++i) {
|
487
706
|
fdata[i] = state_ptr;
|
488
|
-
state_ptr +=
|
707
|
+
state_ptr += aggregator.state_size;
|
489
708
|
}
|
490
709
|
}
|
491
710
|
|
492
|
-
void WindowNaiveState::FlushStates() {
|
711
|
+
void WindowNaiveState::FlushStates(const WindowAggregatorGlobalState &gsink) {
|
493
712
|
if (!flush_count) {
|
494
713
|
return;
|
495
714
|
}
|
496
715
|
|
497
|
-
auto &inputs =
|
716
|
+
auto &inputs = gsink.inputs;
|
498
717
|
leaves.Slice(inputs, update_sel, flush_count);
|
499
718
|
|
500
|
-
auto &aggr =
|
719
|
+
auto &aggr = aggregator.aggr;
|
501
720
|
AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
|
502
721
|
aggr.function.update(leaves.data.data(), aggr_input_data, leaves.ColumnCount(), statep, flush_count);
|
503
722
|
|
504
723
|
flush_count = 0;
|
505
724
|
}
|
506
725
|
|
507
|
-
size_t WindowNaiveState::Hash(idx_t rid) {
|
508
|
-
auto &inputs = gstate.GetInputs();
|
509
|
-
|
726
|
+
size_t WindowNaiveState::Hash(const DataChunk &inputs, idx_t rid) {
|
510
727
|
auto s = UnsafeNumericCast<sel_t>(rid);
|
511
728
|
SelectionVector sel(&s);
|
512
729
|
leaves.Slice(inputs, sel, 1);
|
@@ -515,9 +732,7 @@ size_t WindowNaiveState::Hash(idx_t rid) {
|
|
515
732
|
return *FlatVector::GetData<hash_t>(hashes);
|
516
733
|
}
|
517
734
|
|
518
|
-
bool WindowNaiveState::KeyEqual(const idx_t &lhs, const idx_t &rhs) {
|
519
|
-
auto &inputs = gstate.GetInputs();
|
520
|
-
|
735
|
+
bool WindowNaiveState::KeyEqual(const DataChunk &inputs, const idx_t &lhs, const idx_t &rhs) {
|
521
736
|
auto l = UnsafeNumericCast<sel_t>(lhs);
|
522
737
|
SelectionVector lsel(&l);
|
523
738
|
|
@@ -538,16 +753,26 @@ bool WindowNaiveState::KeyEqual(const idx_t &lhs, const idx_t &rhs) {
|
|
538
753
|
return true;
|
539
754
|
}
|
540
755
|
|
541
|
-
void WindowNaiveState::Evaluate(const DataChunk &bounds, Vector &result,
|
542
|
-
|
543
|
-
auto &
|
756
|
+
void WindowNaiveState::Evaluate(const WindowAggregatorGlobalState &gsink, const DataChunk &bounds, Vector &result,
|
757
|
+
idx_t count, idx_t row_idx) {
|
758
|
+
auto &aggr = aggregator.aggr;
|
759
|
+
auto &filter_mask = gsink.filter_mask;
|
760
|
+
auto &inputs = gsink.inputs;
|
761
|
+
|
762
|
+
if (leaves.ColumnCount() == 0 && inputs.ColumnCount() > 0) {
|
763
|
+
leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes());
|
764
|
+
}
|
544
765
|
|
545
766
|
auto fdata = FlatVector::GetData<data_ptr_t>(statef);
|
546
767
|
auto pdata = FlatVector::GetData<data_ptr_t>(statep);
|
547
768
|
|
548
|
-
|
769
|
+
HashRow hash_row(*this, inputs);
|
770
|
+
EqualRow equal_row(*this, inputs);
|
771
|
+
RowSet row_set(STANDARD_VECTOR_SIZE, hash_row, equal_row);
|
772
|
+
|
773
|
+
EvaluateSubFrames(bounds, aggregator.exclude_mode, count, row_idx, frames, [&](idx_t rid) {
|
549
774
|
auto agg_state = fdata[rid];
|
550
|
-
aggr.function.initialize(agg_state);
|
775
|
+
aggr.function.initialize(aggr.function, agg_state);
|
551
776
|
|
552
777
|
// Just update the aggregate with the unfiltered input rows
|
553
778
|
row_set.clear();
|
@@ -565,14 +790,14 @@ void WindowNaiveState::Evaluate(const DataChunk &bounds, Vector &result, idx_t c
|
|
565
790
|
pdata[flush_count] = agg_state;
|
566
791
|
update_sel[flush_count++] = UnsafeNumericCast<sel_t>(f);
|
567
792
|
if (flush_count >= STANDARD_VECTOR_SIZE) {
|
568
|
-
FlushStates();
|
793
|
+
FlushStates(gsink);
|
569
794
|
}
|
570
795
|
}
|
571
796
|
}
|
572
797
|
});
|
573
798
|
|
574
799
|
// Flush the final states
|
575
|
-
FlushStates();
|
800
|
+
FlushStates(gsink);
|
576
801
|
|
577
802
|
// Finalise the result aggregates and write to the result
|
578
803
|
AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
|
@@ -584,55 +809,57 @@ void WindowNaiveState::Evaluate(const DataChunk &bounds, Vector &result, idx_t c
|
|
584
809
|
}
|
585
810
|
}
|
586
811
|
|
587
|
-
unique_ptr<WindowAggregatorState> WindowNaiveAggregator::GetLocalState() const {
|
812
|
+
unique_ptr<WindowAggregatorState> WindowNaiveAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
|
588
813
|
return make_uniq<WindowNaiveState>(*this);
|
589
814
|
}
|
590
815
|
|
591
|
-
void WindowNaiveAggregator::Evaluate(
|
592
|
-
idx_t count, idx_t row_idx) const {
|
593
|
-
auto &
|
594
|
-
|
816
|
+
void WindowNaiveAggregator::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
|
817
|
+
const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
|
818
|
+
const auto &gnstate = gsink.Cast<WindowAggregatorGlobalState>();
|
819
|
+
auto &lnstate = lstate.Cast<WindowNaiveState>();
|
820
|
+
lnstate.Evaluate(gnstate, bounds, result, count, row_idx);
|
595
821
|
}
|
596
822
|
|
597
823
|
//===--------------------------------------------------------------------===//
|
598
824
|
// WindowSegmentTree
|
599
825
|
//===--------------------------------------------------------------------===//
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
}
|
826
|
+
class WindowSegmentTreeGlobalState : public WindowAggregatorGlobalState {
|
827
|
+
public:
|
828
|
+
using AtomicCounters = vector<std::atomic<idx_t>>;
|
604
829
|
|
605
|
-
|
606
|
-
WindowAggregator::Finalize(stats);
|
830
|
+
WindowSegmentTreeGlobalState(const WindowSegmentTree &aggregator, idx_t group_count);
|
607
831
|
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
}
|
832
|
+
ArenaAllocator &CreateTreeAllocator() {
|
833
|
+
lock_guard<mutex> tree_lock(lock);
|
834
|
+
tree_allocators.emplace_back(make_uniq<ArenaAllocator>(Allocator::DefaultAllocator()));
|
835
|
+
return *tree_allocators.back();
|
613
836
|
}
|
614
|
-
}
|
615
837
|
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
838
|
+
//! The owning aggregator
|
839
|
+
const WindowSegmentTree &tree;
|
840
|
+
//! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
|
841
|
+
WindowAggregateStates levels_flat_native;
|
842
|
+
//! For each level, the starting location in the levels_flat_native array
|
843
|
+
vector<idx_t> levels_flat_start;
|
844
|
+
//! The level being built (read)
|
845
|
+
std::atomic<idx_t> build_level;
|
846
|
+
//! The number of entries started so far at each level
|
847
|
+
unique_ptr<AtomicCounters> build_started;
|
848
|
+
//! The number of entries completed so far at each level
|
849
|
+
unique_ptr<AtomicCounters> build_completed;
|
850
|
+
//! The tree allocators.
|
851
|
+
//! We need to hold onto them for the tree lifetime,
|
852
|
+
//! not the lifetime of the local state that constructed part of the tree
|
853
|
+
vector<unique_ptr<ArenaAllocator>> tree_allocators;
|
854
|
+
|
855
|
+
// TREE_FANOUT needs to cleanly divide STANDARD_VECTOR_SIZE
|
856
|
+
static constexpr idx_t TREE_FANOUT = 16;
|
857
|
+
};
|
858
|
+
|
859
|
+
WindowSegmentTree::WindowSegmentTree(AggregateObject aggr, const vector<LogicalType> &arg_types,
|
860
|
+
const LogicalType &result_type, WindowAggregationMode mode_p,
|
861
|
+
const WindowExcludeMode exclude_mode_p)
|
862
|
+
: WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode_p), mode(mode_p) {
|
636
863
|
}
|
637
864
|
|
638
865
|
class WindowSegmentTreePart {
|
@@ -643,7 +870,7 @@ public:
|
|
643
870
|
enum FramePart : uint8_t { FULL = 0, LEFT = 1, RIGHT = 2 };
|
644
871
|
|
645
872
|
WindowSegmentTreePart(ArenaAllocator &allocator, const AggregateObject &aggr, const DataChunk &inputs,
|
646
|
-
const
|
873
|
+
const ValidityArray &filter_mask);
|
647
874
|
~WindowSegmentTreePart();
|
648
875
|
|
649
876
|
unique_ptr<WindowSegmentTreePart> Copy() const {
|
@@ -652,23 +879,23 @@ public:
|
|
652
879
|
|
653
880
|
void FlushStates(bool combining);
|
654
881
|
void ExtractFrame(idx_t begin, idx_t end, data_ptr_t current_state);
|
655
|
-
void WindowSegmentValue(const
|
882
|
+
void WindowSegmentValue(const WindowSegmentTreeGlobalState &tree, idx_t l_idx, idx_t begin, idx_t end,
|
656
883
|
data_ptr_t current_state);
|
657
884
|
//! Writes result and calls destructors
|
658
885
|
void Finalize(Vector &result, idx_t count);
|
659
886
|
|
660
887
|
void Combine(WindowSegmentTreePart &other, idx_t count);
|
661
888
|
|
662
|
-
void Evaluate(const
|
663
|
-
idx_t row_idx, FramePart frame_part);
|
889
|
+
void Evaluate(const WindowSegmentTreeGlobalState &tree, const idx_t *begins, const idx_t *ends, Vector &result,
|
890
|
+
idx_t count, idx_t row_idx, FramePart frame_part);
|
664
891
|
|
665
892
|
protected:
|
666
893
|
//! Initialises the accumulation state vector (statef)
|
667
894
|
void Initialize(idx_t count);
|
668
895
|
//! Accumulate upper tree levels
|
669
|
-
void EvaluateUpperLevels(const
|
670
|
-
idx_t row_idx, FramePart frame_part);
|
671
|
-
void EvaluateLeaves(const
|
896
|
+
void EvaluateUpperLevels(const WindowSegmentTreeGlobalState &tree, const idx_t *begins, const idx_t *ends,
|
897
|
+
idx_t count, idx_t row_idx, FramePart frame_part);
|
898
|
+
void EvaluateLeaves(const WindowSegmentTreeGlobalState &tree, const idx_t *begins, const idx_t *ends, idx_t count,
|
672
899
|
idx_t row_idx, FramePart frame_part, FramePart leaf_part);
|
673
900
|
|
674
901
|
public:
|
@@ -681,7 +908,7 @@ public:
|
|
681
908
|
//! The partition arguments
|
682
909
|
const DataChunk &inputs;
|
683
910
|
//! The filtered rows in inputs
|
684
|
-
const
|
911
|
+
const ValidityArray &filter_mask;
|
685
912
|
//! The size of a single aggregate state
|
686
913
|
const idx_t state_size;
|
687
914
|
//! Data pointer that contains a vector of states, used for intermediate window segment aggregation
|
@@ -704,28 +931,41 @@ public:
|
|
704
931
|
|
705
932
|
class WindowSegmentTreeState : public WindowAggregatorState {
|
706
933
|
public:
|
707
|
-
WindowSegmentTreeState(
|
708
|
-
: aggr(aggr), inputs(inputs), filter_mask(filter_mask), part(allocator, aggr, inputs, filter_mask) {
|
934
|
+
WindowSegmentTreeState() {
|
709
935
|
}
|
710
936
|
|
711
|
-
|
712
|
-
const
|
713
|
-
|
714
|
-
const DataChunk &inputs;
|
715
|
-
//! The filtered rows in inputs
|
716
|
-
const ValidityMask &filter_mask;
|
937
|
+
void Finalize(WindowSegmentTreeGlobalState &gstate);
|
938
|
+
void Evaluate(const WindowSegmentTreeGlobalState &gsink, const DataChunk &bounds, Vector &result, idx_t count,
|
939
|
+
idx_t row_idx);
|
717
940
|
//! The left (default) segment tree part
|
718
|
-
WindowSegmentTreePart part;
|
941
|
+
unique_ptr<WindowSegmentTreePart> part;
|
719
942
|
//! The right segment tree part (for EXCLUDE)
|
720
943
|
unique_ptr<WindowSegmentTreePart> right_part;
|
721
944
|
};
|
722
945
|
|
946
|
+
void WindowSegmentTree::Finalize(WindowAggregatorState &gsink, WindowAggregatorState &lstate, const FrameStats &stats) {
|
947
|
+
|
948
|
+
auto &gasink = gsink.Cast<WindowSegmentTreeGlobalState>();
|
949
|
+
auto &inputs = gasink.inputs;
|
950
|
+
|
951
|
+
WindowAggregator::Finalize(gsink, lstate, stats);
|
952
|
+
|
953
|
+
if (inputs.ColumnCount() > 0) {
|
954
|
+
if (aggr.function.combine && UseCombineAPI()) {
|
955
|
+
lstate.Cast<WindowSegmentTreeState>().Finalize(gasink);
|
956
|
+
}
|
957
|
+
}
|
958
|
+
|
959
|
+
++gasink.finalized;
|
960
|
+
}
|
961
|
+
|
723
962
|
WindowSegmentTreePart::WindowSegmentTreePart(ArenaAllocator &allocator, const AggregateObject &aggr,
|
724
|
-
const DataChunk &inputs, const
|
963
|
+
const DataChunk &inputs, const ValidityArray &filter_mask)
|
725
964
|
: allocator(allocator), aggr(aggr),
|
726
965
|
order_insensitive(aggr.function.order_dependent == AggregateOrderDependent::NOT_ORDER_DEPENDENT), inputs(inputs),
|
727
|
-
filter_mask(filter_mask), state_size(aggr.function.state_size()),
|
728
|
-
|
966
|
+
filter_mask(filter_mask), state_size(aggr.function.state_size(aggr.function)),
|
967
|
+
state(state_size * STANDARD_VECTOR_SIZE), statep(LogicalType::POINTER), statel(LogicalType::POINTER),
|
968
|
+
statef(LogicalType::POINTER), flush_count(0) {
|
729
969
|
if (inputs.ColumnCount() > 0) {
|
730
970
|
leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes());
|
731
971
|
filter_sel.Initialize();
|
@@ -746,8 +986,13 @@ WindowSegmentTreePart::WindowSegmentTreePart(ArenaAllocator &allocator, const Ag
|
|
746
986
|
WindowSegmentTreePart::~WindowSegmentTreePart() {
|
747
987
|
}
|
748
988
|
|
749
|
-
unique_ptr<WindowAggregatorState> WindowSegmentTree::
|
750
|
-
|
989
|
+
unique_ptr<WindowAggregatorState> WindowSegmentTree::GetGlobalState(idx_t group_count,
|
990
|
+
const ValidityMask &partition_mask) const {
|
991
|
+
return make_uniq<WindowSegmentTreeGlobalState>(*this, group_count);
|
992
|
+
}
|
993
|
+
|
994
|
+
unique_ptr<WindowAggregatorState> WindowSegmentTree::GetLocalState(const WindowAggregatorState &gstate) const {
|
995
|
+
return make_uniq<WindowSegmentTreeState>();
|
751
996
|
}
|
752
997
|
|
753
998
|
void WindowSegmentTreePart::FlushStates(bool combining) {
|
@@ -800,8 +1045,8 @@ void WindowSegmentTreePart::ExtractFrame(idx_t begin, idx_t end, data_ptr_t stat
|
|
800
1045
|
}
|
801
1046
|
}
|
802
1047
|
|
803
|
-
void WindowSegmentTreePart::WindowSegmentValue(const
|
804
|
-
data_ptr_t state_ptr) {
|
1048
|
+
void WindowSegmentTreePart::WindowSegmentValue(const WindowSegmentTreeGlobalState &tree, idx_t l_idx, idx_t begin,
|
1049
|
+
idx_t end, data_ptr_t state_ptr) {
|
805
1050
|
D_ASSERT(begin <= end);
|
806
1051
|
if (begin == end || inputs.ColumnCount() == 0) {
|
807
1052
|
return;
|
@@ -812,9 +1057,9 @@ void WindowSegmentTreePart::WindowSegmentValue(const WindowSegmentTree &tree, id
|
|
812
1057
|
ExtractFrame(begin, end, state_ptr);
|
813
1058
|
} else {
|
814
1059
|
// find out where the states begin
|
815
|
-
auto begin_ptr = tree.levels_flat_native.
|
1060
|
+
auto begin_ptr = tree.levels_flat_native.GetStatePtr(begin + tree.levels_flat_start[l_idx - 1]);
|
816
1061
|
// set up a vector of pointers that point towards the set of states
|
817
|
-
auto ldata = FlatVector::GetData<
|
1062
|
+
auto ldata = FlatVector::GetData<const_data_ptr_t>(statel);
|
818
1063
|
auto pdata = FlatVector::GetData<data_ptr_t>(statep);
|
819
1064
|
for (idx_t i = 0; i < count; i++) {
|
820
1065
|
pdata[flush_count] = state_ptr;
|
@@ -837,20 +1082,12 @@ void WindowSegmentTreePart::Finalize(Vector &result, idx_t count) {
|
|
837
1082
|
}
|
838
1083
|
}
|
839
1084
|
|
840
|
-
|
841
|
-
|
1085
|
+
WindowSegmentTreeGlobalState::WindowSegmentTreeGlobalState(const WindowSegmentTree &aggregator, idx_t group_count)
|
1086
|
+
: WindowAggregatorGlobalState(aggregator, group_count), tree(aggregator), levels_flat_native(aggregator.aggr) {
|
842
1087
|
|
843
|
-
|
844
|
-
auto >state = gstate->Cast<WindowSegmentTreeState>().part;
|
1088
|
+
D_ASSERT(inputs.ColumnCount() > 0);
|
845
1089
|
|
846
1090
|
// compute space required to store internal nodes of segment tree
|
847
|
-
internal_nodes = 0;
|
848
|
-
idx_t level_nodes = inputs.size();
|
849
|
-
do {
|
850
|
-
level_nodes = (level_nodes + (TREE_FANOUT - 1)) / TREE_FANOUT;
|
851
|
-
internal_nodes += level_nodes;
|
852
|
-
} while (level_nodes > 1);
|
853
|
-
levels_flat_native = make_unsafe_uniq_array<data_t>(internal_nodes * state_size);
|
854
1091
|
levels_flat_start.push_back(0);
|
855
1092
|
|
856
1093
|
idx_t levels_flat_offset = 0;
|
@@ -861,12 +1098,6 @@ void WindowSegmentTree::ConstructTree() {
|
|
861
1098
|
while ((level_size =
|
862
1099
|
(level_current == 0 ? inputs.size() : levels_flat_offset - levels_flat_start[level_current - 1])) > 1) {
|
863
1100
|
for (idx_t pos = 0; pos < level_size; pos += TREE_FANOUT) {
|
864
|
-
// compute the aggregate for this entry in the segment tree
|
865
|
-
data_ptr_t state_ptr = levels_flat_native.get() + (levels_flat_offset * state_size);
|
866
|
-
aggr.function.initialize(state_ptr);
|
867
|
-
gtstate.WindowSegmentValue(*this, level_current, pos, MinValue(level_size, pos + TREE_FANOUT), state_ptr);
|
868
|
-
gtstate.FlushStates(level_current > 0);
|
869
|
-
|
870
1101
|
levels_flat_offset++;
|
871
1102
|
}
|
872
1103
|
|
@@ -876,46 +1107,120 @@ void WindowSegmentTree::ConstructTree() {
|
|
876
1107
|
|
877
1108
|
// Corner case: single element in the window
|
878
1109
|
if (levels_flat_offset == 0) {
|
879
|
-
|
1110
|
+
++levels_flat_offset;
|
1111
|
+
}
|
1112
|
+
|
1113
|
+
levels_flat_native.Initialize(levels_flat_offset);
|
1114
|
+
|
1115
|
+
// Start by building from the bottom level
|
1116
|
+
build_level = 0;
|
1117
|
+
|
1118
|
+
build_started = make_uniq<AtomicCounters>(levels_flat_start.size());
|
1119
|
+
for (auto &counter : *build_started) {
|
1120
|
+
counter = 0;
|
1121
|
+
}
|
1122
|
+
|
1123
|
+
build_completed = make_uniq<AtomicCounters>(levels_flat_start.size());
|
1124
|
+
for (auto &counter : *build_completed) {
|
1125
|
+
counter = 0;
|
880
1126
|
}
|
881
1127
|
}
|
882
1128
|
|
883
|
-
void
|
884
|
-
|
1129
|
+
void WindowSegmentTreeState::Finalize(WindowSegmentTreeGlobalState &gstate) {
|
1130
|
+
// Single part for constructing the tree
|
1131
|
+
auto &inputs = gstate.inputs;
|
1132
|
+
auto &tree = gstate.tree;
|
1133
|
+
auto &filter_mask = gstate.filter_mask;
|
1134
|
+
WindowSegmentTreePart gtstate(gstate.CreateTreeAllocator(), tree.aggr, inputs, filter_mask);
|
1135
|
+
|
1136
|
+
auto &levels_flat_native = gstate.levels_flat_native;
|
1137
|
+
const auto &levels_flat_start = gstate.levels_flat_start;
|
1138
|
+
// iterate over the levels of the segment tree
|
1139
|
+
for (;;) {
|
1140
|
+
const idx_t level_current = gstate.build_level.load();
|
1141
|
+
if (level_current >= levels_flat_start.size()) {
|
1142
|
+
break;
|
1143
|
+
}
|
1144
|
+
|
1145
|
+
// level 0 is data itself
|
1146
|
+
const auto level_size =
|
1147
|
+
(level_current == 0 ? inputs.size()
|
1148
|
+
: levels_flat_start[level_current] - levels_flat_start[level_current - 1]);
|
1149
|
+
if (level_size <= 1) {
|
1150
|
+
break;
|
1151
|
+
}
|
1152
|
+
const idx_t build_count = (level_size + gstate.TREE_FANOUT - 1) / gstate.TREE_FANOUT;
|
1153
|
+
|
1154
|
+
// Build the next fan-in
|
1155
|
+
const idx_t build_idx = (*gstate.build_started).at(level_current)++;
|
1156
|
+
if (build_idx >= build_count) {
|
1157
|
+
// Nothing left at this level, so wait until other threads are done.
|
1158
|
+
// Since we are only building TREE_FANOUT values at a time, this will be quick.
|
1159
|
+
while (level_current == gstate.build_level.load()) {
|
1160
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
1161
|
+
}
|
1162
|
+
continue;
|
1163
|
+
}
|
1164
|
+
|
1165
|
+
// compute the aggregate for this entry in the segment tree
|
1166
|
+
const idx_t pos = build_idx * gstate.TREE_FANOUT;
|
1167
|
+
const idx_t levels_flat_offset = levels_flat_start[level_current] + build_idx;
|
1168
|
+
auto state_ptr = levels_flat_native.GetStatePtr(levels_flat_offset);
|
1169
|
+
gtstate.WindowSegmentValue(gstate, level_current, pos, MinValue(level_size, pos + gstate.TREE_FANOUT),
|
1170
|
+
state_ptr);
|
1171
|
+
gtstate.FlushStates(level_current > 0);
|
1172
|
+
|
1173
|
+
// If that was the last one, mark the level as complete.
|
1174
|
+
const idx_t build_complete = ++(*gstate.build_completed).at(level_current);
|
1175
|
+
if (build_complete == build_count) {
|
1176
|
+
gstate.build_level++;
|
1177
|
+
continue;
|
1178
|
+
}
|
1179
|
+
}
|
1180
|
+
}
|
885
1181
|
|
1182
|
+
void WindowSegmentTree::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
|
1183
|
+
const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
|
1184
|
+
const auto >state = gsink.Cast<WindowSegmentTreeGlobalState>();
|
886
1185
|
auto <state = lstate.Cast<WindowSegmentTreeState>();
|
1186
|
+
ltstate.Evaluate(gtstate, bounds, result, count, row_idx);
|
1187
|
+
}
|
1188
|
+
|
1189
|
+
void WindowSegmentTreeState::Evaluate(const WindowSegmentTreeGlobalState >state, const DataChunk &bounds,
|
1190
|
+
Vector &result, idx_t count, idx_t row_idx) {
|
887
1191
|
auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
|
888
1192
|
auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
|
889
1193
|
auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
|
890
1194
|
auto peer_end = FlatVector::GetData<const idx_t>(bounds.data[PEER_END]);
|
891
1195
|
|
892
|
-
|
893
|
-
|
1196
|
+
if (!part) {
|
1197
|
+
part =
|
1198
|
+
make_uniq<WindowSegmentTreePart>(allocator, gtstate.aggregator.aggr, gtstate.inputs, gtstate.filter_mask);
|
1199
|
+
}
|
1200
|
+
|
1201
|
+
if (gtstate.aggregator.exclude_mode != WindowExcludeMode::NO_OTHER) {
|
894
1202
|
// 1. evaluate the tree left of the excluded part
|
895
|
-
part
|
1203
|
+
part->Evaluate(gtstate, window_begin, peer_begin, result, count, row_idx, WindowSegmentTreePart::LEFT);
|
896
1204
|
|
897
1205
|
// 2. set up a second state for the right of the excluded part
|
898
|
-
if (!
|
899
|
-
|
1206
|
+
if (!right_part) {
|
1207
|
+
right_part = part->Copy();
|
900
1208
|
}
|
901
|
-
auto &right_part = *ltstate.right_part;
|
902
1209
|
|
903
1210
|
// 3. evaluate the tree right of the excluded part
|
904
|
-
right_part
|
1211
|
+
right_part->Evaluate(gtstate, peer_end, window_end, result, count, row_idx, WindowSegmentTreePart::RIGHT);
|
905
1212
|
|
906
1213
|
// 4. combine the buffer state into the Segment Tree State
|
907
|
-
part
|
1214
|
+
part->Combine(*right_part, count);
|
908
1215
|
} else {
|
909
|
-
part
|
1216
|
+
part->Evaluate(gtstate, window_begin, window_end, result, count, row_idx, WindowSegmentTreePart::FULL);
|
910
1217
|
}
|
911
1218
|
|
912
|
-
part
|
1219
|
+
part->Finalize(result, count);
|
913
1220
|
}
|
914
1221
|
|
915
|
-
void WindowSegmentTreePart::Evaluate(const
|
1222
|
+
void WindowSegmentTreePart::Evaluate(const WindowSegmentTreeGlobalState &tree, const idx_t *begins, const idx_t *ends,
|
916
1223
|
Vector &result, idx_t count, idx_t row_idx, FramePart frame_part) {
|
917
|
-
D_ASSERT(aggr.function.combine && tree.UseCombineAPI());
|
918
|
-
|
919
1224
|
Initialize(count);
|
920
1225
|
|
921
1226
|
if (order_insensitive) {
|
@@ -936,15 +1241,15 @@ void WindowSegmentTreePart::Initialize(idx_t count) {
|
|
936
1241
|
auto fdata = FlatVector::GetData<data_ptr_t>(statef);
|
937
1242
|
for (idx_t rid = 0; rid < count; ++rid) {
|
938
1243
|
auto state_ptr = fdata[rid];
|
939
|
-
aggr.function.initialize(state_ptr);
|
1244
|
+
aggr.function.initialize(aggr.function, state_ptr);
|
940
1245
|
}
|
941
1246
|
}
|
942
1247
|
|
943
|
-
void WindowSegmentTreePart::EvaluateUpperLevels(const
|
944
|
-
idx_t count, idx_t row_idx, FramePart frame_part) {
|
1248
|
+
void WindowSegmentTreePart::EvaluateUpperLevels(const WindowSegmentTreeGlobalState &tree, const idx_t *begins,
|
1249
|
+
const idx_t *ends, idx_t count, idx_t row_idx, FramePart frame_part) {
|
945
1250
|
auto fdata = FlatVector::GetData<data_ptr_t>(statef);
|
946
1251
|
|
947
|
-
const auto exclude_mode = tree.exclude_mode;
|
1252
|
+
const auto exclude_mode = tree.tree.exclude_mode;
|
948
1253
|
const bool begin_on_curr_row = frame_part == FramePart::RIGHT && exclude_mode == WindowExcludeMode::CURRENT_ROW;
|
949
1254
|
const bool end_on_curr_row = frame_part == FramePart::LEFT && exclude_mode == WindowExcludeMode::CURRENT_ROW;
|
950
1255
|
|
@@ -1034,8 +1339,9 @@ void WindowSegmentTreePart::EvaluateUpperLevels(const WindowSegmentTree &tree, c
|
|
1034
1339
|
FlushStates(true);
|
1035
1340
|
}
|
1036
1341
|
|
1037
|
-
void WindowSegmentTreePart::EvaluateLeaves(const
|
1038
|
-
idx_t count, idx_t row_idx, FramePart frame_part,
|
1342
|
+
void WindowSegmentTreePart::EvaluateLeaves(const WindowSegmentTreeGlobalState &tree, const idx_t *begins,
|
1343
|
+
const idx_t *ends, idx_t count, idx_t row_idx, FramePart frame_part,
|
1344
|
+
FramePart leaf_part) {
|
1039
1345
|
|
1040
1346
|
auto fdata = FlatVector::GetData<data_ptr_t>(statef);
|
1041
1347
|
|
@@ -1044,7 +1350,7 @@ void WindowSegmentTreePart::EvaluateLeaves(const WindowSegmentTree &tree, const
|
|
1044
1350
|
// The current row is the leftmost value of the right hand side.
|
1045
1351
|
const bool compute_left = leaf_part != FramePart::RIGHT;
|
1046
1352
|
const bool compute_right = leaf_part != FramePart::LEFT;
|
1047
|
-
const auto exclude_mode = tree.exclude_mode;
|
1353
|
+
const auto exclude_mode = tree.tree.exclude_mode;
|
1048
1354
|
const bool begin_on_curr_row = frame_part == FramePart::RIGHT && exclude_mode == WindowExcludeMode::CURRENT_ROW;
|
1049
1355
|
const bool end_on_curr_row = frame_part == FramePart::LEFT && exclude_mode == WindowExcludeMode::CURRENT_ROW;
|
1050
1356
|
// with EXCLUDE TIES, in addition to the frame part right of the peer group's end, we also need to consider the
|
@@ -1087,81 +1393,236 @@ void WindowSegmentTreePart::EvaluateLeaves(const WindowSegmentTree &tree, const
|
|
1087
1393
|
//===--------------------------------------------------------------------===//
|
1088
1394
|
// WindowDistinctAggregator
|
1089
1395
|
//===--------------------------------------------------------------------===//
|
1090
|
-
WindowDistinctAggregator::WindowDistinctAggregator(AggregateObject aggr, const LogicalType &
|
1091
|
-
const
|
1092
|
-
ClientContext &context)
|
1093
|
-
: WindowAggregator(std::move(aggr), result_type, exclude_mode_p
|
1094
|
-
|
1396
|
+
WindowDistinctAggregator::WindowDistinctAggregator(AggregateObject aggr, const vector<LogicalType> &arg_types,
|
1397
|
+
const LogicalType &result_type,
|
1398
|
+
const WindowExcludeMode exclude_mode_p, ClientContext &context)
|
1399
|
+
: WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode_p), context(context) {
|
1400
|
+
}
|
1401
|
+
|
1402
|
+
class WindowDistinctAggregatorLocalState;
|
1403
|
+
|
1404
|
+
class WindowDistinctAggregatorGlobalState;
|
1405
|
+
|
1406
|
+
class WindowDistinctSortTree : public MergeSortTree<idx_t, idx_t> {
|
1407
|
+
public:
|
1408
|
+
// prev_idx, input_idx
|
1409
|
+
using ZippedTuple = std::tuple<idx_t, idx_t>;
|
1410
|
+
using ZippedElements = vector<ZippedTuple>;
|
1411
|
+
|
1412
|
+
explicit WindowDistinctSortTree(WindowDistinctAggregatorGlobalState &gdastate, idx_t count) : gdastate(gdastate) {
|
1413
|
+
// Set up for parallel build
|
1414
|
+
build_level = 0;
|
1415
|
+
build_complete = 0;
|
1416
|
+
build_run = 0;
|
1417
|
+
build_run_length = 1;
|
1418
|
+
build_num_runs = count;
|
1419
|
+
}
|
1420
|
+
|
1421
|
+
void Build(WindowDistinctAggregatorLocalState &ldastate);
|
1422
|
+
|
1423
|
+
protected:
|
1424
|
+
bool TryNextRun(idx_t &level_idx, idx_t &run_idx);
|
1425
|
+
void BuildRun(idx_t level_nr, idx_t i, WindowDistinctAggregatorLocalState &ldastate);
|
1095
1426
|
|
1427
|
+
WindowDistinctAggregatorGlobalState &gdastate;
|
1428
|
+
};
|
1429
|
+
|
1430
|
+
class WindowDistinctAggregatorGlobalState : public WindowAggregatorGlobalState {
|
1431
|
+
public:
|
1432
|
+
using GlobalSortStatePtr = unique_ptr<GlobalSortState>;
|
1433
|
+
using ZippedTuple = WindowDistinctSortTree::ZippedTuple;
|
1434
|
+
using ZippedElements = WindowDistinctSortTree::ZippedElements;
|
1435
|
+
|
1436
|
+
WindowDistinctAggregatorGlobalState(const WindowDistinctAggregator &aggregator, idx_t group_count);
|
1437
|
+
|
1438
|
+
//! Compute the block starts
|
1439
|
+
void MeasurePayloadBlocks();
|
1440
|
+
//! Patch up the previous index block boundaries
|
1441
|
+
void PatchPrevIdcs();
|
1442
|
+
bool TryPrepareNextStage(WindowDistinctAggregatorLocalState &lstate);
|
1443
|
+
|
1444
|
+
// Single threaded sorting for now
|
1445
|
+
ClientContext &context;
|
1446
|
+
idx_t memory_per_thread;
|
1447
|
+
|
1448
|
+
//! Finalize guard
|
1449
|
+
mutex lock;
|
1450
|
+
//! Finalize stage
|
1451
|
+
atomic<PartitionSortStage> stage;
|
1452
|
+
//! Tasks launched
|
1453
|
+
idx_t total_tasks = 0;
|
1454
|
+
//! Tasks launched
|
1455
|
+
idx_t tasks_assigned = 0;
|
1456
|
+
//! Tasks landed
|
1457
|
+
mutable atomic<idx_t> tasks_completed;
|
1458
|
+
|
1459
|
+
//! The sorted payload data types (partition index)
|
1460
|
+
vector<LogicalType> payload_types;
|
1461
|
+
//! The aggregate arguments + partition index
|
1462
|
+
vector<LogicalType> sort_types;
|
1463
|
+
|
1464
|
+
//! Sorting operations
|
1465
|
+
GlobalSortStatePtr global_sort;
|
1466
|
+
//! The block starts (the scanner doesn't know this) plus the total count
|
1467
|
+
vector<idx_t> block_starts;
|
1468
|
+
|
1469
|
+
//! The block boundary seconds
|
1470
|
+
mutable ZippedElements seconds;
|
1471
|
+
//! The MST with the distinct back pointers
|
1472
|
+
mutable MergeSortTree<ZippedTuple> zipped_tree;
|
1473
|
+
//! The merge sort tree for the aggregate.
|
1474
|
+
WindowDistinctSortTree merge_sort_tree;
|
1475
|
+
|
1476
|
+
//! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
|
1477
|
+
WindowAggregateStates levels_flat_native;
|
1478
|
+
//! For each level, the starting location in the levels_flat_native array
|
1479
|
+
vector<idx_t> levels_flat_start;
|
1480
|
+
};
|
1481
|
+
|
1482
|
+
WindowDistinctAggregatorGlobalState::WindowDistinctAggregatorGlobalState(const WindowDistinctAggregator &aggregator,
|
1483
|
+
idx_t group_count)
|
1484
|
+
: WindowAggregatorGlobalState(aggregator, group_count), context(aggregator.context),
|
1485
|
+
stage(PartitionSortStage::INIT), tasks_completed(0), merge_sort_tree(*this, group_count),
|
1486
|
+
levels_flat_native(aggregator.aggr) {
|
1096
1487
|
payload_types.emplace_back(LogicalType::UBIGINT);
|
1097
|
-
payload_chunk.Initialize(Allocator::DefaultAllocator(), payload_types);
|
1098
|
-
}
|
1099
1488
|
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1489
|
+
// 1: functionComputePrevIdcs(𝑖𝑛)
|
1490
|
+
// 2: sorted ← []
|
1491
|
+
// We sort the aggregate arguments and use the partition index as a tie-breaker.
|
1492
|
+
// TODO: Use a hash table?
|
1493
|
+
sort_types = aggregator.arg_types;
|
1494
|
+
for (const auto &type : payload_types) {
|
1495
|
+
sort_types.emplace_back(type);
|
1104
1496
|
}
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
for (idx_t i = 0; i < internal_nodes; i++) {
|
1111
|
-
address_data[count++] = data_ptr_t(levels_flat_native.get() + i * state_size);
|
1112
|
-
if (count == STANDARD_VECTOR_SIZE) {
|
1113
|
-
aggr.function.destructor(addresses, aggr_input_data, count);
|
1114
|
-
count = 0;
|
1115
|
-
}
|
1497
|
+
|
1498
|
+
vector<BoundOrderByNode> orders;
|
1499
|
+
for (const auto &type : sort_types) {
|
1500
|
+
auto expr = make_uniq<BoundConstantExpression>(Value(type));
|
1501
|
+
orders.emplace_back(BoundOrderByNode(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(expr)));
|
1116
1502
|
}
|
1117
|
-
|
1118
|
-
|
1503
|
+
|
1504
|
+
RowLayout payload_layout;
|
1505
|
+
payload_layout.Initialize(payload_types);
|
1506
|
+
|
1507
|
+
global_sort = make_uniq<GlobalSortState>(BufferManager::GetBufferManager(context), orders, payload_layout);
|
1508
|
+
|
1509
|
+
memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
|
1510
|
+
|
1511
|
+
// 6: prevIdcs ← []
|
1512
|
+
// 7: prevIdcs[0] ← “-”
|
1513
|
+
auto &prev_idcs = zipped_tree.Allocate(group_count);
|
1514
|
+
|
1515
|
+
// To handle FILTER clauses we make the missing elements
|
1516
|
+
// point to themselves so they won't be counted.
|
1517
|
+
for (idx_t i = 0; i < group_count; ++i) {
|
1518
|
+
prev_idcs[i] = ZippedTuple(i + 1, i);
|
1519
|
+
}
|
1520
|
+
|
1521
|
+
// compute space required to store aggregation states of merge sort tree
|
1522
|
+
// this is one aggregate state per entry per level
|
1523
|
+
idx_t internal_nodes = 0;
|
1524
|
+
levels_flat_start.push_back(internal_nodes);
|
1525
|
+
for (idx_t level_nr = 0; level_nr < zipped_tree.tree.size(); ++level_nr) {
|
1526
|
+
internal_nodes += zipped_tree.tree[level_nr].first.size();
|
1527
|
+
levels_flat_start.push_back(internal_nodes);
|
1528
|
+
}
|
1529
|
+
levels_flat_native.Initialize(internal_nodes);
|
1530
|
+
|
1531
|
+
merge_sort_tree.tree.reserve(zipped_tree.tree.size());
|
1532
|
+
for (idx_t level_nr = 0; level_nr < zipped_tree.tree.size(); ++level_nr) {
|
1533
|
+
auto &zipped_level = zipped_tree.tree[level_nr].first;
|
1534
|
+
WindowDistinctSortTree::Elements level;
|
1535
|
+
WindowDistinctSortTree::Offsets cascades;
|
1536
|
+
level.resize(zipped_level.size());
|
1537
|
+
merge_sort_tree.tree.emplace_back(std::move(level), std::move(cascades));
|
1119
1538
|
}
|
1120
1539
|
}
|
1121
1540
|
|
1122
|
-
|
1123
|
-
|
1541
|
+
class WindowDistinctAggregatorLocalState : public WindowAggregatorState {
|
1542
|
+
public:
|
1543
|
+
explicit WindowDistinctAggregatorLocalState(const WindowDistinctAggregatorGlobalState &aggregator);
|
1544
|
+
|
1545
|
+
void Sink(DataChunk &arg_chunk, idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered);
|
1546
|
+
void Sorted();
|
1547
|
+
void ExecuteTask();
|
1548
|
+
void Evaluate(const WindowDistinctAggregatorGlobalState &gdstate, const DataChunk &bounds, Vector &result,
|
1549
|
+
idx_t count, idx_t row_idx);
|
1550
|
+
|
1551
|
+
//! Thread-local sorting data
|
1552
|
+
LocalSortState local_sort;
|
1553
|
+
//! Finalize stage
|
1554
|
+
PartitionSortStage stage = PartitionSortStage::INIT;
|
1555
|
+
//! Finalize scan block index
|
1556
|
+
idx_t block_idx;
|
1557
|
+
//! Thread-local tree aggregation
|
1558
|
+
Vector update_v;
|
1559
|
+
Vector source_v;
|
1560
|
+
Vector target_v;
|
1561
|
+
DataChunk leaves;
|
1562
|
+
SelectionVector sel;
|
1124
1563
|
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
// 1: functionComputePrevIdcs(𝑖𝑛)
|
1129
|
-
// 2: sorted ← []
|
1130
|
-
vector<LogicalType> sort_types;
|
1131
|
-
for (const auto &col : arg_chunk.data) {
|
1132
|
-
sort_types.emplace_back(col.GetType());
|
1133
|
-
}
|
1564
|
+
protected:
|
1565
|
+
//! Flush the accumulated intermediate states into the result states
|
1566
|
+
void FlushStates();
|
1134
1567
|
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1568
|
+
//! The aggregator we are working with
|
1569
|
+
const WindowDistinctAggregatorGlobalState &gastate;
|
1570
|
+
DataChunk sort_chunk;
|
1571
|
+
DataChunk payload_chunk;
|
1572
|
+
//! Reused result state container for the window functions
|
1573
|
+
WindowAggregateStates statef;
|
1574
|
+
//! A vector of pointers to "state", used for buffering intermediate aggregates
|
1575
|
+
Vector statep;
|
1576
|
+
//! Reused state pointers for combining tree elements
|
1577
|
+
Vector statel;
|
1578
|
+
//! Count of buffered values
|
1579
|
+
idx_t flush_count;
|
1580
|
+
//! The frame boundaries, used for the window functions
|
1581
|
+
SubFrames frames;
|
1582
|
+
};
|
1138
1583
|
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1584
|
+
WindowDistinctAggregatorLocalState::WindowDistinctAggregatorLocalState(
|
1585
|
+
const WindowDistinctAggregatorGlobalState &gastate)
|
1586
|
+
: update_v(LogicalType::POINTER), source_v(LogicalType::POINTER), target_v(LogicalType::POINTER), gastate(gastate),
|
1587
|
+
statef(gastate.aggregator.aggr), statep(LogicalType::POINTER), statel(LogicalType::POINTER), flush_count(0) {
|
1588
|
+
InitSubFrames(frames, gastate.aggregator.exclude_mode);
|
1589
|
+
payload_chunk.Initialize(Allocator::DefaultAllocator(), gastate.payload_types);
|
1144
1590
|
|
1145
|
-
|
1146
|
-
|
1591
|
+
auto &global_sort = gastate.global_sort;
|
1592
|
+
local_sort.Initialize(*global_sort, global_sort->buffer_manager);
|
1147
1593
|
|
1148
|
-
|
1149
|
-
|
1594
|
+
sort_chunk.Initialize(Allocator::DefaultAllocator(), gastate.sort_types);
|
1595
|
+
sort_chunk.data.back().Reference(payload_chunk.data[0]);
|
1150
1596
|
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1597
|
+
//! Input data chunk, used for leaf segment aggregation
|
1598
|
+
leaves.Initialize(Allocator::DefaultAllocator(), gastate.inputs.GetTypes());
|
1599
|
+
sel.Initialize();
|
1600
|
+
|
1601
|
+
gastate.locals++;
|
1602
|
+
}
|
1603
|
+
|
1604
|
+
unique_ptr<WindowAggregatorState> WindowDistinctAggregator::GetGlobalState(idx_t group_count,
|
1605
|
+
const ValidityMask &partition_mask) const {
|
1606
|
+
return make_uniq<WindowDistinctAggregatorGlobalState>(*this, group_count);
|
1607
|
+
}
|
1608
|
+
|
1609
|
+
void WindowDistinctAggregator::Sink(WindowAggregatorState &gsink, WindowAggregatorState &lstate, DataChunk &arg_chunk,
|
1610
|
+
idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
|
1611
|
+
WindowAggregator::Sink(gsink, lstate, arg_chunk, input_idx, filter_sel, filtered);
|
1156
1612
|
|
1613
|
+
auto &ldstate = lstate.Cast<WindowDistinctAggregatorLocalState>();
|
1614
|
+
ldstate.Sink(arg_chunk, input_idx, filter_sel, filtered);
|
1615
|
+
}
|
1616
|
+
|
1617
|
+
void WindowDistinctAggregatorLocalState::Sink(DataChunk &arg_chunk, idx_t input_idx,
|
1618
|
+
optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
|
1157
1619
|
// 3: for i ← 0 to in.size do
|
1158
1620
|
// 4: sorted[i] ← (in[i], i)
|
1159
1621
|
const auto count = arg_chunk.size();
|
1160
1622
|
payload_chunk.Reset();
|
1161
1623
|
auto &sorted_vec = payload_chunk.data[0];
|
1162
1624
|
auto sorted = FlatVector::GetData<idx_t>(sorted_vec);
|
1163
|
-
std::iota(sorted, sorted + count,
|
1164
|
-
payload_pos += count;
|
1625
|
+
std::iota(sorted, sorted + count, input_idx);
|
1165
1626
|
|
1166
1627
|
for (column_t c = 0; c < arg_chunk.ColumnCount(); ++c) {
|
1167
1628
|
sort_chunk.data[c].Reference(arg_chunk.data[c]);
|
@@ -1178,61 +1639,178 @@ void WindowDistinctAggregator::Sink(DataChunk &arg_chunk, SelectionVector *filte
|
|
1178
1639
|
|
1179
1640
|
local_sort.SinkChunk(sort_chunk, payload_chunk);
|
1180
1641
|
|
1181
|
-
if (local_sort.SizeInBytes() > memory_per_thread) {
|
1182
|
-
local_sort.Sort(*global_sort, true);
|
1642
|
+
if (local_sort.SizeInBytes() > gastate.memory_per_thread) {
|
1643
|
+
local_sort.Sort(*gastate.global_sort, true);
|
1183
1644
|
}
|
1184
1645
|
}
|
1185
1646
|
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1647
|
+
void WindowDistinctAggregatorLocalState::ExecuteTask() {
|
1648
|
+
auto &global_sort = *gastate.global_sort;
|
1649
|
+
switch (stage) {
|
1650
|
+
case PartitionSortStage::INIT:
|
1651
|
+
// AddLocalState is thread-safe
|
1652
|
+
global_sort.AddLocalState(local_sort);
|
1653
|
+
break;
|
1654
|
+
case PartitionSortStage::MERGE: {
|
1655
|
+
MergeSorter merge_sorter(global_sort, global_sort.buffer_manager);
|
1656
|
+
merge_sorter.PerformInMergeRound();
|
1657
|
+
break;
|
1658
|
+
}
|
1659
|
+
case PartitionSortStage::SORTED:
|
1660
|
+
Sorted();
|
1661
|
+
break;
|
1662
|
+
default:
|
1663
|
+
break;
|
1664
|
+
}
|
1191
1665
|
|
1192
|
-
|
1193
|
-
}
|
1666
|
+
++gastate.tasks_completed;
|
1667
|
+
}
|
1194
1668
|
|
1195
|
-
void
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1669
|
+
void WindowDistinctAggregatorGlobalState::MeasurePayloadBlocks() {
|
1670
|
+
const auto &blocks = global_sort->sorted_blocks[0]->payload_data->data_blocks;
|
1671
|
+
idx_t count = 0;
|
1672
|
+
for (const auto &block : blocks) {
|
1673
|
+
block_starts.emplace_back(count);
|
1674
|
+
count += block->count;
|
1675
|
+
}
|
1676
|
+
block_starts.emplace_back(count);
|
1677
|
+
}
|
1678
|
+
|
1679
|
+
bool WindowDistinctAggregatorGlobalState::TryPrepareNextStage(WindowDistinctAggregatorLocalState &lstate) {
|
1680
|
+
lock_guard<mutex> stage_guard(lock);
|
1681
|
+
|
1682
|
+
switch (stage.load()) {
|
1683
|
+
case PartitionSortStage::INIT:
|
1684
|
+
// Wait for all the local sorts to be processed
|
1685
|
+
if (tasks_completed < locals) {
|
1686
|
+
return false;
|
1687
|
+
}
|
1688
|
+
global_sort->PrepareMergePhase();
|
1689
|
+
if (!(global_sort->sorted_blocks.size() / 2)) {
|
1690
|
+
if (global_sort->sorted_blocks.empty()) {
|
1691
|
+
lstate.stage = stage = PartitionSortStage::FINISHED;
|
1692
|
+
return true;
|
1693
|
+
}
|
1694
|
+
MeasurePayloadBlocks();
|
1695
|
+
seconds.resize(block_starts.size() - 1);
|
1696
|
+
total_tasks = seconds.size();
|
1697
|
+
tasks_completed = 0;
|
1698
|
+
tasks_assigned = 0;
|
1699
|
+
lstate.stage = stage = PartitionSortStage::SORTED;
|
1700
|
+
lstate.block_idx = tasks_assigned++;
|
1701
|
+
return true;
|
1702
|
+
}
|
1200
1703
|
global_sort->InitializeMergeRound();
|
1201
|
-
|
1202
|
-
|
1704
|
+
lstate.stage = stage = PartitionSortStage::MERGE;
|
1705
|
+
total_tasks = locals;
|
1706
|
+
tasks_assigned = 1;
|
1707
|
+
tasks_completed = 0;
|
1708
|
+
return true;
|
1709
|
+
case PartitionSortStage::MERGE:
|
1710
|
+
if (tasks_assigned < total_tasks) {
|
1711
|
+
lstate.stage = PartitionSortStage::MERGE;
|
1712
|
+
++tasks_assigned;
|
1713
|
+
return true;
|
1714
|
+
} else if (tasks_completed < tasks_assigned) {
|
1715
|
+
return false;
|
1716
|
+
}
|
1203
1717
|
global_sort->CompleteMergeRound(true);
|
1718
|
+
if (!(global_sort->sorted_blocks.size() / 2)) {
|
1719
|
+
MeasurePayloadBlocks();
|
1720
|
+
seconds.resize(block_starts.size() - 1);
|
1721
|
+
total_tasks = seconds.size();
|
1722
|
+
tasks_completed = 0;
|
1723
|
+
tasks_assigned = 0;
|
1724
|
+
lstate.stage = stage = PartitionSortStage::SORTED;
|
1725
|
+
lstate.block_idx = tasks_assigned++;
|
1726
|
+
return true;
|
1727
|
+
}
|
1728
|
+
global_sort->InitializeMergeRound();
|
1729
|
+
lstate.stage = PartitionSortStage::MERGE;
|
1730
|
+
total_tasks = locals;
|
1731
|
+
tasks_assigned = 1;
|
1732
|
+
tasks_completed = 0;
|
1733
|
+
return true;
|
1734
|
+
case PartitionSortStage::SORTED:
|
1735
|
+
if (tasks_assigned < total_tasks) {
|
1736
|
+
lstate.stage = PartitionSortStage::SORTED;
|
1737
|
+
lstate.block_idx = tasks_assigned++;
|
1738
|
+
return true;
|
1739
|
+
} else if (tasks_completed < tasks_assigned) {
|
1740
|
+
lstate.stage = PartitionSortStage::FINISHED;
|
1741
|
+
// Sleep while other tasks finish
|
1742
|
+
return false;
|
1743
|
+
}
|
1744
|
+
// Last task patches the boundaries
|
1745
|
+
PatchPrevIdcs();
|
1746
|
+
break;
|
1747
|
+
default:
|
1748
|
+
break;
|
1204
1749
|
}
|
1205
1750
|
|
1206
|
-
|
1207
|
-
scan_chunk.Initialize(Allocator::DefaultAllocator(), payload_types);
|
1751
|
+
lstate.stage = stage = PartitionSortStage::FINISHED;
|
1208
1752
|
|
1209
|
-
|
1210
|
-
|
1211
|
-
scanner->Scan(scan_chunk);
|
1212
|
-
idx_t scan_idx = 0;
|
1753
|
+
return true;
|
1754
|
+
}
|
1213
1755
|
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
DistinctSortTree::ZippedElements prev_idcs;
|
1219
|
-
prev_idcs.resize(count);
|
1756
|
+
void WindowDistinctAggregator::Finalize(WindowAggregatorState &gsink, WindowAggregatorState &lstate,
|
1757
|
+
const FrameStats &stats) {
|
1758
|
+
auto &gdsink = gsink.Cast<WindowDistinctAggregatorGlobalState>();
|
1759
|
+
auto &ldstate = lstate.Cast<WindowDistinctAggregatorLocalState>();
|
1220
1760
|
|
1221
|
-
//
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1761
|
+
// 5: Sort sorted lexicographically increasing
|
1762
|
+
ldstate.ExecuteTask();
|
1763
|
+
|
1764
|
+
// Merge in parallel
|
1765
|
+
while (gdsink.stage.load() != PartitionSortStage::FINISHED) {
|
1766
|
+
if (gdsink.TryPrepareNextStage(ldstate)) {
|
1767
|
+
ldstate.ExecuteTask();
|
1768
|
+
} else {
|
1769
|
+
std::this_thread::yield();
|
1226
1770
|
}
|
1227
1771
|
}
|
1228
1772
|
|
1773
|
+
// These are a parallel implementations,
|
1774
|
+
// so every thread can call them.
|
1775
|
+
gdsink.zipped_tree.Build();
|
1776
|
+
gdsink.merge_sort_tree.Build(ldstate);
|
1777
|
+
|
1778
|
+
++gdsink.finalized;
|
1779
|
+
}
|
1780
|
+
|
1781
|
+
void WindowDistinctAggregatorLocalState::Sorted() {
|
1782
|
+
using ZippedTuple = WindowDistinctAggregatorGlobalState::ZippedTuple;
|
1783
|
+
auto &global_sort = gastate.global_sort;
|
1784
|
+
auto &prev_idcs = gastate.zipped_tree.LowestLevel();
|
1785
|
+
auto &aggregator = gastate.aggregator;
|
1786
|
+
auto &scan_chunk = payload_chunk;
|
1787
|
+
|
1788
|
+
auto scanner = make_uniq<PayloadScanner>(*global_sort, block_idx);
|
1789
|
+
const auto in_size = gastate.block_starts.at(block_idx + 1);
|
1790
|
+
scanner->Scan(scan_chunk);
|
1791
|
+
idx_t scan_idx = 0;
|
1792
|
+
|
1229
1793
|
auto *input_idx = FlatVector::GetData<idx_t>(scan_chunk.data[0]);
|
1230
|
-
|
1231
|
-
prev_idcs[i] = ZippedTuple(0, i);
|
1794
|
+
idx_t i = 0;
|
1232
1795
|
|
1233
1796
|
SBIterator curr(*global_sort, ExpressionType::COMPARE_LESSTHAN);
|
1234
1797
|
SBIterator prev(*global_sort, ExpressionType::COMPARE_LESSTHAN);
|
1235
|
-
auto prefix_layout = global_sort->sort_layout.GetPrefixComparisonLayout(
|
1798
|
+
auto prefix_layout = global_sort->sort_layout.GetPrefixComparisonLayout(aggregator.arg_types.size());
|
1799
|
+
|
1800
|
+
const auto block_begin = gastate.block_starts.at(block_idx);
|
1801
|
+
if (!block_begin) {
|
1802
|
+
// First block, so set up initial sentinel
|
1803
|
+
i = input_idx[scan_idx++];
|
1804
|
+
prev_idcs[i] = ZippedTuple(0, i);
|
1805
|
+
std::get<0>(gastate.seconds[block_idx]) = i;
|
1806
|
+
} else {
|
1807
|
+
// Move to the to end of the previous block
|
1808
|
+
// so we can record the comparison result for the first row
|
1809
|
+
curr.SetIndex(block_begin - 1);
|
1810
|
+
prev.SetIndex(block_begin - 1);
|
1811
|
+
scan_idx = 0;
|
1812
|
+
std::get<0>(gastate.seconds[block_idx]) = input_idx[scan_idx];
|
1813
|
+
}
|
1236
1814
|
|
1237
1815
|
// 8: for i ← 1 to in.size do
|
1238
1816
|
for (++curr; curr.GetIndex() < in_size; ++curr, ++prev) {
|
@@ -1265,105 +1843,148 @@ void WindowDistinctAggregator::Finalize(const FrameStats &stats) {
|
|
1265
1843
|
prev_idcs[i] = ZippedTuple(0, i);
|
1266
1844
|
}
|
1267
1845
|
}
|
1846
|
+
|
1847
|
+
// Save the last value of i for patching up the block boundaries
|
1848
|
+
std::get<1>(gastate.seconds[block_idx]) = i;
|
1849
|
+
}
|
1850
|
+
|
1851
|
+
void WindowDistinctAggregatorGlobalState::PatchPrevIdcs() {
|
1268
1852
|
// 13: return prevIdcs
|
1269
1853
|
|
1270
|
-
|
1854
|
+
// Patch up the indices at block boundaries
|
1855
|
+
// (We don't need to patch block 0.)
|
1856
|
+
auto &prev_idcs = zipped_tree.LowestLevel();
|
1857
|
+
for (idx_t block_idx = 1; block_idx < seconds.size(); ++block_idx) {
|
1858
|
+
// We only need to patch if the first index in the block
|
1859
|
+
// was a back link to the previous block (10:)
|
1860
|
+
auto i = std::get<0>(seconds.at(block_idx));
|
1861
|
+
if (std::get<0>(prev_idcs[i])) {
|
1862
|
+
auto second = std::get<1>(seconds.at(block_idx - 1));
|
1863
|
+
prev_idcs[i] = ZippedTuple(second + 1, i);
|
1864
|
+
}
|
1865
|
+
}
|
1866
|
+
}
|
1867
|
+
|
1868
|
+
bool WindowDistinctSortTree::TryNextRun(idx_t &level_idx, idx_t &run_idx) {
|
1869
|
+
const auto fanout = FANOUT;
|
1870
|
+
|
1871
|
+
lock_guard<mutex> stage_guard(build_lock);
|
1872
|
+
|
1873
|
+
// Verify we are not done
|
1874
|
+
if (build_level >= tree.size()) {
|
1875
|
+
return false;
|
1876
|
+
}
|
1877
|
+
|
1878
|
+
// Finished with this level?
|
1879
|
+
if (build_complete >= build_num_runs) {
|
1880
|
+
auto &zipped_tree = gdastate.zipped_tree;
|
1881
|
+
std::swap(tree[build_level].second, zipped_tree.tree[build_level].second);
|
1882
|
+
|
1883
|
+
++build_level;
|
1884
|
+
if (build_level >= tree.size()) {
|
1885
|
+
zipped_tree.tree.clear();
|
1886
|
+
return false;
|
1887
|
+
}
|
1888
|
+
|
1889
|
+
const auto count = LowestLevel().size();
|
1890
|
+
build_run_length *= fanout;
|
1891
|
+
build_num_runs = (count + build_run_length - 1) / build_run_length;
|
1892
|
+
build_run = 0;
|
1893
|
+
build_complete = 0;
|
1894
|
+
}
|
1895
|
+
|
1896
|
+
// If all runs are in flight,
|
1897
|
+
// yield until the next level is ready
|
1898
|
+
if (build_run >= build_num_runs) {
|
1899
|
+
return false;
|
1900
|
+
}
|
1901
|
+
|
1902
|
+
level_idx = build_level;
|
1903
|
+
run_idx = build_run++;
|
1904
|
+
|
1905
|
+
return true;
|
1906
|
+
}
|
1907
|
+
|
1908
|
+
void WindowDistinctSortTree::Build(WindowDistinctAggregatorLocalState &ldastate) {
|
1909
|
+
// Fan in parent levels until we are at the top
|
1910
|
+
// Note that we don't build the top layer as that would just be all the data.
|
1911
|
+
while (build_level.load() < tree.size()) {
|
1912
|
+
idx_t level_idx;
|
1913
|
+
idx_t run_idx;
|
1914
|
+
if (TryNextRun(level_idx, run_idx)) {
|
1915
|
+
BuildRun(level_idx, run_idx, ldastate);
|
1916
|
+
} else {
|
1917
|
+
std::this_thread::yield();
|
1918
|
+
}
|
1919
|
+
}
|
1271
1920
|
}
|
1272
1921
|
|
1273
|
-
|
1274
|
-
|
1275
|
-
auto &
|
1276
|
-
auto &
|
1277
|
-
auto &
|
1278
|
-
const auto state_size = wda.state_size;
|
1279
|
-
auto &internal_nodes = wda.internal_nodes;
|
1280
|
-
auto &levels_flat_native = wda.levels_flat_native;
|
1281
|
-
auto &levels_flat_start = wda.levels_flat_start;
|
1922
|
+
void WindowDistinctSortTree::BuildRun(idx_t level_nr, idx_t run_idx, WindowDistinctAggregatorLocalState &ldastate) {
|
1923
|
+
auto &aggr = gdastate.aggregator.aggr;
|
1924
|
+
auto &allocator = gdastate.allocator;
|
1925
|
+
auto &inputs = gdastate.inputs;
|
1926
|
+
auto &levels_flat_native = gdastate.levels_flat_native;
|
1282
1927
|
|
1283
1928
|
//! Input data chunk, used for leaf segment aggregation
|
1284
|
-
|
1285
|
-
|
1286
|
-
SelectionVector sel;
|
1287
|
-
sel.Initialize();
|
1929
|
+
auto &leaves = ldastate.leaves;
|
1930
|
+
auto &sel = ldastate.sel;
|
1288
1931
|
|
1289
1932
|
AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
|
1290
1933
|
|
1291
1934
|
//! The states to update
|
1292
|
-
|
1935
|
+
auto &update_v = ldastate.update_v;
|
1293
1936
|
auto updates = FlatVector::GetData<data_ptr_t>(update_v);
|
1294
|
-
idx_t nupdate = 0;
|
1295
1937
|
|
1296
|
-
|
1938
|
+
auto &source_v = ldastate.source_v;
|
1297
1939
|
auto sources = FlatVector::GetData<data_ptr_t>(source_v);
|
1298
|
-
|
1940
|
+
auto &target_v = ldastate.target_v;
|
1299
1941
|
auto targets = FlatVector::GetData<data_ptr_t>(target_v);
|
1300
|
-
idx_t ncombine = 0;
|
1301
|
-
|
1302
|
-
// compute space required to store aggregation states of merge sort tree
|
1303
|
-
// this is one aggregate state per entry per level
|
1304
|
-
MergeSortTree<ZippedTuple> zipped_tree(std::move(prev_idcs));
|
1305
|
-
internal_nodes = 0;
|
1306
|
-
for (idx_t level_nr = 0; level_nr < zipped_tree.tree.size(); ++level_nr) {
|
1307
|
-
internal_nodes += zipped_tree.tree[level_nr].first.size();
|
1308
|
-
}
|
1309
|
-
levels_flat_native = make_unsafe_uniq_array<data_t>(internal_nodes * state_size);
|
1310
|
-
levels_flat_start.push_back(0);
|
1311
|
-
idx_t levels_flat_offset = 0;
|
1312
1942
|
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
for (idx_t level_nr = 0; level_nr < zipped_tree.tree.size(); ++level_nr) {
|
1317
|
-
auto &zipped_level = zipped_tree.tree[level_nr].first;
|
1318
|
-
vector<ElementType> level;
|
1319
|
-
level.reserve(zipped_level.size());
|
1320
|
-
|
1321
|
-
for (idx_t i = 0; i < zipped_level.size(); i += level_width) {
|
1322
|
-
// Reset the combine state
|
1323
|
-
data_ptr_t prev_state = nullptr;
|
1324
|
-
auto next_limit = MinValue<idx_t>(zipped_level.size(), i + level_width);
|
1325
|
-
for (auto j = i; j < next_limit; ++j) {
|
1326
|
-
// Initialise the next aggregate
|
1327
|
-
auto curr_state = levels_flat_native.get() + (levels_flat_offset++ * state_size);
|
1328
|
-
aggr.function.initialize(curr_state);
|
1329
|
-
|
1330
|
-
// Update this state (if it matches)
|
1331
|
-
const auto prev_idx = std::get<0>(zipped_level[j]);
|
1332
|
-
level.emplace_back(prev_idx);
|
1333
|
-
if (prev_idx < i + 1) {
|
1334
|
-
updates[nupdate] = curr_state;
|
1335
|
-
// input_idx
|
1336
|
-
sel[nupdate] = UnsafeNumericCast<sel_t>(std::get<1>(zipped_level[j]));
|
1337
|
-
++nupdate;
|
1338
|
-
}
|
1943
|
+
auto &zipped_tree = gdastate.zipped_tree;
|
1944
|
+
auto &zipped_level = zipped_tree.tree[level_nr].first;
|
1945
|
+
auto &level = tree[level_nr].first;
|
1339
1946
|
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1354
|
-
|
1355
|
-
|
1356
|
-
|
1357
|
-
|
1358
|
-
|
1359
|
-
}
|
1360
|
-
}
|
1947
|
+
// Reset the combine state
|
1948
|
+
idx_t nupdate = 0;
|
1949
|
+
idx_t ncombine = 0;
|
1950
|
+
data_ptr_t prev_state = nullptr;
|
1951
|
+
idx_t i = run_idx * build_run_length;
|
1952
|
+
auto next_limit = MinValue<idx_t>(zipped_level.size(), i + build_run_length);
|
1953
|
+
idx_t levels_flat_offset = level_nr * zipped_level.size() + i;
|
1954
|
+
for (auto j = i; j < next_limit; ++j) {
|
1955
|
+
// Initialise the next aggregate
|
1956
|
+
auto curr_state = levels_flat_native.GetStatePtr(levels_flat_offset++);
|
1957
|
+
|
1958
|
+
// Update this state (if it matches)
|
1959
|
+
const auto prev_idx = std::get<0>(zipped_level[j]);
|
1960
|
+
level[j] = prev_idx;
|
1961
|
+
if (prev_idx < i + 1) {
|
1962
|
+
updates[nupdate] = curr_state;
|
1963
|
+
// input_idx
|
1964
|
+
sel[nupdate] = UnsafeNumericCast<sel_t>(std::get<1>(zipped_level[j]));
|
1965
|
+
++nupdate;
|
1361
1966
|
}
|
1362
1967
|
|
1363
|
-
|
1364
|
-
|
1365
|
-
|
1366
|
-
|
1968
|
+
// Merge the previous state (if any)
|
1969
|
+
if (prev_state) {
|
1970
|
+
sources[ncombine] = prev_state;
|
1971
|
+
targets[ncombine] = curr_state;
|
1972
|
+
++ncombine;
|
1973
|
+
}
|
1974
|
+
prev_state = curr_state;
|
1975
|
+
|
1976
|
+
// Flush the states if one is maxed out.
|
1977
|
+
if (MaxValue<idx_t>(ncombine, nupdate) >= STANDARD_VECTOR_SIZE) {
|
1978
|
+
// Push the updates first so they propagate
|
1979
|
+
leaves.Reference(inputs);
|
1980
|
+
leaves.Slice(sel, nupdate);
|
1981
|
+
aggr.function.update(leaves.data.data(), aggr_input_data, leaves.ColumnCount(), update_v, nupdate);
|
1982
|
+
nupdate = 0;
|
1983
|
+
|
1984
|
+
// Combine the states sequentially
|
1985
|
+
aggr.function.combine(source_v, target_v, aggr_input_data, ncombine);
|
1986
|
+
ncombine = 0;
|
1987
|
+
}
|
1367
1988
|
}
|
1368
1989
|
|
1369
1990
|
// Flush any remaining states
|
@@ -1378,64 +1999,16 @@ WindowDistinctAggregator::DistinctSortTree::DistinctSortTree(ZippedElements &&pr
|
|
1378
1999
|
aggr.function.combine(source_v, target_v, aggr_input_data, ncombine);
|
1379
2000
|
ncombine = 0;
|
1380
2001
|
}
|
1381
|
-
}
|
1382
|
-
|
1383
|
-
class WindowDistinctState : public WindowAggregatorState {
|
1384
|
-
public:
|
1385
|
-
WindowDistinctState(const AggregateObject &aggr, const DataChunk &inputs, const WindowDistinctAggregator &tree);
|
1386
|
-
|
1387
|
-
void Evaluate(const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
|
1388
|
-
|
1389
|
-
protected:
|
1390
|
-
//! Flush the accumulated intermediate states into the result states
|
1391
|
-
void FlushStates();
|
1392
|
-
|
1393
|
-
//! The aggregate function
|
1394
|
-
const AggregateObject &aggr;
|
1395
|
-
//! The aggregate function
|
1396
|
-
const DataChunk &inputs;
|
1397
|
-
//! The merge sort tree data
|
1398
|
-
const WindowDistinctAggregator &tree;
|
1399
|
-
//! The size of a single aggregate state
|
1400
|
-
const idx_t state_size;
|
1401
|
-
//! Data pointer that contains a vector of states, used for row aggregation
|
1402
|
-
vector<data_t> state;
|
1403
|
-
//! Reused result state container for the window functions
|
1404
|
-
Vector statef;
|
1405
|
-
//! A vector of pointers to "state", used for buffering intermediate aggregates
|
1406
|
-
Vector statep;
|
1407
|
-
//! Reused state pointers for combining tree elements
|
1408
|
-
Vector statel;
|
1409
|
-
//! Count of buffered values
|
1410
|
-
idx_t flush_count;
|
1411
|
-
//! The frame boundaries, used for the window functions
|
1412
|
-
SubFrames frames;
|
1413
|
-
};
|
1414
2002
|
|
1415
|
-
|
1416
|
-
const WindowDistinctAggregator &tree)
|
1417
|
-
: aggr(aggr), inputs(inputs), tree(tree), state_size(aggr.function.state_size()),
|
1418
|
-
state((state_size * STANDARD_VECTOR_SIZE)), statef(LogicalType::POINTER), statep(LogicalType::POINTER),
|
1419
|
-
statel(LogicalType::POINTER), flush_count(0) {
|
1420
|
-
InitSubFrames(frames, tree.exclude_mode);
|
1421
|
-
|
1422
|
-
// Build the finalise vector that just points to the result states
|
1423
|
-
data_ptr_t state_ptr = state.data();
|
1424
|
-
D_ASSERT(statef.GetVectorType() == VectorType::FLAT_VECTOR);
|
1425
|
-
statef.SetVectorType(VectorType::CONSTANT_VECTOR);
|
1426
|
-
statef.Flatten(STANDARD_VECTOR_SIZE);
|
1427
|
-
auto fdata = FlatVector::GetData<data_ptr_t>(statef);
|
1428
|
-
for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; ++i) {
|
1429
|
-
fdata[i] = state_ptr;
|
1430
|
-
state_ptr += state_size;
|
1431
|
-
}
|
2003
|
+
++build_complete;
|
1432
2004
|
}
|
1433
2005
|
|
1434
|
-
void
|
2006
|
+
void WindowDistinctAggregatorLocalState::FlushStates() {
|
1435
2007
|
if (!flush_count) {
|
1436
2008
|
return;
|
1437
2009
|
}
|
1438
2010
|
|
2011
|
+
const auto &aggr = gastate.aggregator.aggr;
|
1439
2012
|
AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
|
1440
2013
|
statel.Verify(flush_count);
|
1441
2014
|
aggr.function.combine(statel, statep, aggr_input_data, flush_count);
|
@@ -1443,17 +2016,20 @@ void WindowDistinctState::FlushStates() {
|
|
1443
2016
|
flush_count = 0;
|
1444
2017
|
}
|
1445
2018
|
|
1446
|
-
void
|
1447
|
-
|
1448
|
-
auto ldata = FlatVector::GetData<
|
2019
|
+
void WindowDistinctAggregatorLocalState::Evaluate(const WindowDistinctAggregatorGlobalState &gdstate,
|
2020
|
+
const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
|
2021
|
+
auto ldata = FlatVector::GetData<const_data_ptr_t>(statel);
|
1449
2022
|
auto pdata = FlatVector::GetData<data_ptr_t>(statep);
|
1450
2023
|
|
1451
|
-
const auto &merge_sort_tree =
|
1452
|
-
const auto
|
2024
|
+
const auto &merge_sort_tree = gdstate.merge_sort_tree;
|
2025
|
+
const auto &levels_flat_native = gdstate.levels_flat_native;
|
2026
|
+
const auto exclude_mode = gdstate.aggregator.exclude_mode;
|
1453
2027
|
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
2028
|
+
// Build the finalise vector that just points to the result states
|
2029
|
+
statef.Initialize(count);
|
2030
|
+
|
2031
|
+
EvaluateSubFrames(bounds, exclude_mode, count, row_idx, frames, [&](idx_t rid) {
|
2032
|
+
auto agg_state = statef.GetStatePtr(rid);
|
1457
2033
|
|
1458
2034
|
// TODO: Extend AggregateLowerBound to handle subframes, just like SelectNth.
|
1459
2035
|
const auto lower = frames[0].start;
|
@@ -1463,8 +2039,8 @@ void WindowDistinctState::Evaluate(const DataChunk &bounds, Vector &result, idx_
|
|
1463
2039
|
if (run_pos != run_begin) {
|
1464
2040
|
// Find the source aggregate
|
1465
2041
|
// Buffer a merge of the indicated state into the current state
|
1466
|
-
const auto agg_idx =
|
1467
|
-
const auto running_agg =
|
2042
|
+
const auto agg_idx = gdstate.levels_flat_start[level] + run_pos - 1;
|
2043
|
+
const auto running_agg = levels_flat_native.GetStatePtr(agg_idx);
|
1468
2044
|
pdata[flush_count] = agg_state;
|
1469
2045
|
ldata[flush_count++] = running_agg;
|
1470
2046
|
if (flush_count >= STANDARD_VECTOR_SIZE) {
|
@@ -1478,23 +2054,20 @@ void WindowDistinctState::Evaluate(const DataChunk &bounds, Vector &result, idx_
|
|
1478
2054
|
FlushStates();
|
1479
2055
|
|
1480
2056
|
// Finalise the result aggregates and write to the result
|
1481
|
-
|
1482
|
-
|
1483
|
-
|
1484
|
-
// Destruct the result aggregates
|
1485
|
-
if (aggr.function.destructor) {
|
1486
|
-
aggr.function.destructor(statef, aggr_input_data, count);
|
1487
|
-
}
|
2057
|
+
statef.Finalize(result);
|
2058
|
+
statef.Destroy();
|
1488
2059
|
}
|
1489
2060
|
|
1490
|
-
unique_ptr<WindowAggregatorState> WindowDistinctAggregator::GetLocalState() const {
|
1491
|
-
return make_uniq<
|
2061
|
+
unique_ptr<WindowAggregatorState> WindowDistinctAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
|
2062
|
+
return make_uniq<WindowDistinctAggregatorLocalState>(gstate.Cast<const WindowDistinctAggregatorGlobalState>());
|
1492
2063
|
}
|
1493
2064
|
|
1494
|
-
void WindowDistinctAggregator::Evaluate(
|
1495
|
-
idx_t count, idx_t row_idx) const {
|
1496
|
-
|
1497
|
-
|
2065
|
+
void WindowDistinctAggregator::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
|
2066
|
+
const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
|
2067
|
+
|
2068
|
+
const auto &gdstate = gsink.Cast<WindowDistinctAggregatorGlobalState>();
|
2069
|
+
auto &ldstate = lstate.Cast<WindowDistinctAggregatorLocalState>();
|
2070
|
+
ldstate.Evaluate(gdstate, bounds, result, count, row_idx);
|
1498
2071
|
}
|
1499
2072
|
|
1500
2073
|
} // namespace duckdb
|