duckdb 1.0.1-dev22.0 → 1.0.1-dev27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1389) hide show
  1. package/.github/workflows/NodeJS.yml +1 -1
  2. package/binding.gyp +41 -0
  3. package/package.json +1 -1
  4. package/src/duckdb/extension/icu/icu-dateadd.cpp +4 -2
  5. package/src/duckdb/extension/icu/icu-datefunc.cpp +6 -2
  6. package/src/duckdb/extension/icu/icu-datesub.cpp +13 -2
  7. package/src/duckdb/extension/icu/icu-strptime.cpp +6 -6
  8. package/src/duckdb/extension/icu/icu-table-range.cpp +92 -73
  9. package/src/duckdb/extension/icu/icu-timebucket.cpp +12 -2
  10. package/src/duckdb/extension/icu/icu-timezone.cpp +3 -3
  11. package/src/duckdb/extension/icu/icu_extension.cpp +61 -9
  12. package/src/duckdb/extension/json/include/json_executors.hpp +20 -23
  13. package/src/duckdb/extension/json/include/json_functions.hpp +4 -0
  14. package/src/duckdb/extension/json/include/json_scan.hpp +6 -2
  15. package/src/duckdb/extension/json/include/json_structure.hpp +12 -9
  16. package/src/duckdb/extension/json/json_common.cpp +66 -10
  17. package/src/duckdb/extension/json/json_extension.cpp +13 -5
  18. package/src/duckdb/extension/json/json_functions/json_array_length.cpp +1 -1
  19. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -4
  20. package/src/duckdb/extension/json/json_functions/json_exists.cpp +32 -0
  21. package/src/duckdb/extension/json/json_functions/json_extract.cpp +2 -2
  22. package/src/duckdb/extension/json/json_functions/json_keys.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_pretty.cpp +32 -0
  24. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +5 -1
  25. package/src/duckdb/extension/json/json_functions/json_structure.cpp +305 -94
  26. package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
  27. package/src/duckdb/extension/json/json_functions/json_type.cpp +3 -3
  28. package/src/duckdb/extension/json/json_functions/json_value.cpp +42 -0
  29. package/src/duckdb/extension/json/json_functions/read_json.cpp +16 -2
  30. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -2
  31. package/src/duckdb/extension/json/json_functions.cpp +5 -1
  32. package/src/duckdb/extension/json/json_scan.cpp +13 -12
  33. package/src/duckdb/extension/json/serialize_json.cpp +5 -3
  34. package/src/duckdb/extension/parquet/column_reader.cpp +206 -43
  35. package/src/duckdb/extension/parquet/column_writer.cpp +133 -62
  36. package/src/duckdb/extension/parquet/geo_parquet.cpp +391 -0
  37. package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +16 -5
  38. package/src/duckdb/extension/parquet/include/column_reader.hpp +37 -12
  39. package/src/duckdb/extension/parquet/include/column_writer.hpp +10 -11
  40. package/src/duckdb/extension/parquet/include/expression_column_reader.hpp +52 -0
  41. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +139 -0
  42. package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +13 -8
  43. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -0
  44. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +7 -3
  45. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +55 -8
  46. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +3 -3
  47. package/src/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp +1 -1
  48. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +8 -0
  49. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +21 -7
  50. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +33 -11
  51. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +5 -2
  52. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +48 -14
  53. package/src/duckdb/extension/parquet/parquet_crypto.cpp +109 -61
  54. package/src/duckdb/extension/parquet/parquet_extension.cpp +305 -72
  55. package/src/duckdb/extension/parquet/parquet_metadata.cpp +4 -4
  56. package/src/duckdb/extension/parquet/parquet_reader.cpp +151 -40
  57. package/src/duckdb/extension/parquet/parquet_statistics.cpp +50 -16
  58. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +42 -1
  59. package/src/duckdb/extension/parquet/parquet_writer.cpp +67 -75
  60. package/src/duckdb/extension/parquet/serialize_parquet.cpp +3 -1
  61. package/src/duckdb/extension/parquet/zstd_file_system.cpp +5 -1
  62. package/src/duckdb/src/catalog/catalog.cpp +14 -16
  63. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +14 -11
  64. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +39 -19
  65. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +92 -78
  66. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +10 -2
  67. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +10 -3
  68. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +3 -3
  69. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +7 -7
  70. package/src/duckdb/src/catalog/catalog_entry.cpp +6 -3
  71. package/src/duckdb/src/catalog/catalog_set.cpp +14 -19
  72. package/src/duckdb/src/catalog/default/default_functions.cpp +179 -166
  73. package/src/duckdb/src/catalog/default/default_generator.cpp +24 -0
  74. package/src/duckdb/src/catalog/default/default_schemas.cpp +4 -3
  75. package/src/duckdb/src/catalog/default/default_table_functions.cpp +148 -0
  76. package/src/duckdb/src/catalog/default/default_views.cpp +7 -3
  77. package/src/duckdb/src/catalog/duck_catalog.cpp +7 -1
  78. package/src/duckdb/src/common/adbc/adbc.cpp +120 -58
  79. package/src/duckdb/src/common/allocator.cpp +71 -6
  80. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +8 -7
  81. package/src/duckdb/src/common/arrow/appender/fixed_size_list_data.cpp +1 -1
  82. package/src/duckdb/src/common/arrow/appender/union_data.cpp +4 -5
  83. package/src/duckdb/src/common/arrow/arrow_appender.cpp +55 -21
  84. package/src/duckdb/src/common/arrow/arrow_converter.cpp +85 -10
  85. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +142 -0
  86. package/src/duckdb/src/common/arrow/arrow_query_result.cpp +56 -0
  87. package/src/duckdb/src/common/arrow/physical_arrow_batch_collector.cpp +37 -0
  88. package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +128 -0
  89. package/src/duckdb/src/common/arrow/schema_metadata.cpp +101 -0
  90. package/src/duckdb/src/common/cgroups.cpp +189 -0
  91. package/src/duckdb/src/common/compressed_file_system.cpp +6 -3
  92. package/src/duckdb/src/common/encryption_state.cpp +38 -0
  93. package/src/duckdb/src/common/enum_util.cpp +682 -14
  94. package/src/duckdb/src/common/enums/file_compression_type.cpp +24 -0
  95. package/src/duckdb/src/common/enums/metric_type.cpp +208 -0
  96. package/src/duckdb/src/common/enums/optimizer_type.cpp +8 -2
  97. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  98. package/src/duckdb/src/common/enums/relation_type.cpp +4 -0
  99. package/src/duckdb/src/common/enums/statement_type.cpp +15 -0
  100. package/src/duckdb/src/common/error_data.cpp +22 -20
  101. package/src/duckdb/src/common/exception/binder_exception.cpp +5 -0
  102. package/src/duckdb/src/common/exception.cpp +11 -1
  103. package/src/duckdb/src/common/extra_type_info.cpp +3 -0
  104. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  105. package/src/duckdb/src/common/file_system.cpp +25 -3
  106. package/src/duckdb/src/common/filename_pattern.cpp +1 -0
  107. package/src/duckdb/src/common/fsst.cpp +15 -14
  108. package/src/duckdb/src/common/gzip_file_system.cpp +3 -1
  109. package/src/duckdb/src/common/hive_partitioning.cpp +103 -43
  110. package/src/duckdb/src/common/http_util.cpp +25 -0
  111. package/src/duckdb/src/common/local_file_system.cpp +48 -27
  112. package/src/duckdb/src/common/multi_file_list.cpp +113 -22
  113. package/src/duckdb/src/common/multi_file_reader.cpp +59 -58
  114. package/src/duckdb/src/common/operator/cast_operators.cpp +133 -34
  115. package/src/duckdb/src/common/operator/string_cast.cpp +42 -11
  116. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +2 -2
  117. package/src/duckdb/src/common/progress_bar/terminal_progress_bar_display.cpp +1 -1
  118. package/src/duckdb/src/common/radix_partitioning.cpp +31 -21
  119. package/src/duckdb/src/common/random_engine.cpp +4 -0
  120. package/src/duckdb/src/common/re2_regex.cpp +47 -12
  121. package/src/duckdb/src/common/render_tree.cpp +243 -0
  122. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +1 -1
  123. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  124. package/src/duckdb/src/common/row_operations/row_matcher.cpp +58 -5
  125. package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +79 -43
  126. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  127. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +6 -4
  128. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +18 -9
  129. package/src/duckdb/src/common/serializer/memory_stream.cpp +1 -0
  130. package/src/duckdb/src/common/sort/partition_state.cpp +33 -18
  131. package/src/duckdb/src/common/sort/radix_sort.cpp +22 -15
  132. package/src/duckdb/src/common/sort/sort_state.cpp +19 -16
  133. package/src/duckdb/src/common/sort/sorted_block.cpp +11 -10
  134. package/src/duckdb/src/common/string_util.cpp +167 -10
  135. package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +108 -0
  136. package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +267 -0
  137. package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +116 -0
  138. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +482 -0
  139. package/src/duckdb/src/common/tree_renderer/tree_renderer.cpp +12 -0
  140. package/src/duckdb/src/common/tree_renderer.cpp +16 -508
  141. package/src/duckdb/src/common/types/batched_data_collection.cpp +78 -9
  142. package/src/duckdb/src/common/types/bit.cpp +24 -22
  143. package/src/duckdb/src/common/types/blob.cpp +15 -11
  144. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -9
  145. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -4
  146. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +3 -4
  147. package/src/duckdb/src/common/types/column/column_data_consumer.cpp +2 -2
  148. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +70 -21
  149. package/src/duckdb/src/common/types/data_chunk.cpp +10 -1
  150. package/src/duckdb/src/common/types/date.cpp +8 -19
  151. package/src/duckdb/src/common/types/decimal.cpp +3 -2
  152. package/src/duckdb/src/common/types/hugeint.cpp +11 -3
  153. package/src/duckdb/src/common/types/hyperloglog.cpp +212 -227
  154. package/src/duckdb/src/common/types/interval.cpp +1 -1
  155. package/src/duckdb/src/common/types/list_segment.cpp +83 -49
  156. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +22 -83
  157. package/src/duckdb/src/common/types/row/row_data_collection.cpp +2 -2
  158. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +20 -4
  159. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +28 -7
  160. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +29 -14
  161. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +152 -102
  162. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +4 -1
  163. package/src/duckdb/src/common/types/selection_vector.cpp +17 -1
  164. package/src/duckdb/src/common/types/time.cpp +62 -31
  165. package/src/duckdb/src/common/types/timestamp.cpp +70 -12
  166. package/src/duckdb/src/common/types/uuid.cpp +1 -1
  167. package/src/duckdb/src/common/types/validity_mask.cpp +40 -5
  168. package/src/duckdb/src/common/types/value.cpp +50 -8
  169. package/src/duckdb/src/common/types/varint.cpp +295 -0
  170. package/src/duckdb/src/common/types/vector.cpp +165 -54
  171. package/src/duckdb/src/common/types/vector_buffer.cpp +5 -4
  172. package/src/duckdb/src/common/types.cpp +106 -26
  173. package/src/duckdb/src/common/vector_operations/vector_copy.cpp +13 -25
  174. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +6 -0
  175. package/src/duckdb/src/common/virtual_file_system.cpp +3 -3
  176. package/src/duckdb/src/core_functions/aggregate/distributive/approx_count.cpp +35 -82
  177. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +283 -46
  178. package/src/duckdb/src/core_functions/aggregate/distributive/bitagg.cpp +4 -4
  179. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +3 -2
  180. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +226 -338
  181. package/src/duckdb/src/core_functions/aggregate/distributive/sum.cpp +2 -0
  182. package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +388 -0
  183. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +63 -21
  184. package/src/duckdb/src/core_functions/aggregate/holistic/mad.cpp +330 -0
  185. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +136 -97
  186. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +601 -1485
  187. package/src/duckdb/src/core_functions/aggregate/nested/binned_histogram.cpp +405 -0
  188. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +136 -165
  189. package/src/duckdb/src/core_functions/function_list.cpp +35 -8
  190. package/src/duckdb/src/core_functions/lambda_functions.cpp +5 -7
  191. package/src/duckdb/src/core_functions/scalar/array/array_functions.cpp +172 -198
  192. package/src/duckdb/src/core_functions/scalar/blob/create_sort_key.cpp +341 -54
  193. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -2
  194. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +89 -29
  195. package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +1 -1
  196. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +2 -2
  197. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +133 -71
  198. package/src/duckdb/src/core_functions/scalar/date/to_interval.cpp +1 -1
  199. package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +1 -1
  200. package/src/duckdb/src/core_functions/scalar/generic/can_implicitly_cast.cpp +40 -0
  201. package/src/duckdb/src/core_functions/scalar/generic/error.cpp +1 -1
  202. package/src/duckdb/src/core_functions/scalar/generic/least.cpp +161 -58
  203. package/src/duckdb/src/core_functions/scalar/generic/typeof.cpp +13 -0
  204. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +1 -1
  205. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +59 -75
  206. package/src/duckdb/src/core_functions/scalar/list/list_distance.cpp +93 -40
  207. package/src/duckdb/src/core_functions/scalar/list/list_has_any_or_all.cpp +227 -0
  208. package/src/duckdb/src/core_functions/scalar/list/list_reduce.cpp +20 -19
  209. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +0 -2
  210. package/src/duckdb/src/core_functions/scalar/list/list_value.cpp +106 -8
  211. package/src/duckdb/src/core_functions/scalar/map/map_contains.cpp +56 -0
  212. package/src/duckdb/src/core_functions/scalar/map/map_extract.cpp +73 -118
  213. package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +98 -2
  214. package/src/duckdb/src/core_functions/scalar/operators/bitwise.cpp +1 -2
  215. package/src/duckdb/src/core_functions/scalar/random/setseed.cpp +1 -1
  216. package/src/duckdb/src/core_functions/scalar/string/bar.cpp +1 -1
  217. package/src/duckdb/src/core_functions/scalar/string/hex.cpp +5 -1
  218. package/src/duckdb/src/core_functions/scalar/string/md5.cpp +10 -37
  219. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +18 -2
  220. package/src/duckdb/src/core_functions/scalar/string/repeat.cpp +45 -0
  221. package/src/duckdb/src/core_functions/scalar/string/reverse.cpp +4 -5
  222. package/src/duckdb/src/core_functions/scalar/string/sha1.cpp +35 -0
  223. package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +5 -2
  224. package/src/duckdb/src/core_functions/scalar/string/url_encode.cpp +49 -0
  225. package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +1 -2
  226. package/src/duckdb/src/core_functions/scalar/union/union_extract.cpp +4 -2
  227. package/src/duckdb/src/execution/adaptive_filter.cpp +30 -11
  228. package/src/duckdb/src/execution/aggregate_hashtable.cpp +13 -18
  229. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +4 -9
  230. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  231. package/src/duckdb/src/execution/index/art/art.cpp +683 -670
  232. package/src/duckdb/src/execution/index/art/art_key.cpp +121 -38
  233. package/src/duckdb/src/execution/index/art/base_leaf.cpp +168 -0
  234. package/src/duckdb/src/execution/index/art/base_node.cpp +163 -0
  235. package/src/duckdb/src/execution/index/art/iterator.cpp +148 -77
  236. package/src/duckdb/src/execution/index/art/leaf.cpp +159 -263
  237. package/src/duckdb/src/execution/index/art/node.cpp +493 -247
  238. package/src/duckdb/src/execution/index/art/node256.cpp +31 -91
  239. package/src/duckdb/src/execution/index/art/node256_leaf.cpp +71 -0
  240. package/src/duckdb/src/execution/index/art/node48.cpp +75 -143
  241. package/src/duckdb/src/execution/index/art/prefix.cpp +424 -244
  242. package/src/duckdb/src/execution/index/bound_index.cpp +7 -1
  243. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +22 -18
  244. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +22 -73
  245. package/src/duckdb/src/execution/join_hashtable.cpp +637 -179
  246. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +4 -4
  247. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +15 -10
  248. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +13 -8
  249. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +525 -132
  250. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +147 -138
  251. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +531 -312
  252. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +1 -1
  253. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -3
  254. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +9 -2
  255. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +13 -17
  256. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +60 -16
  257. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +105 -0
  258. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +24 -24
  259. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +25 -2
  260. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +275 -112
  261. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +106 -11
  262. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +253 -115
  263. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +93 -52
  264. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +116 -76
  265. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +29 -14
  266. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
  267. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +70 -26
  268. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +81 -60
  269. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +88 -50
  270. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +161 -51
  271. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +59 -17
  272. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +5 -5
  273. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +0 -21
  274. package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +109 -0
  275. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +5 -13
  276. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +1 -1
  277. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +12 -4
  278. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +0 -16
  279. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +4 -2
  280. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +5 -0
  281. package/src/duckdb/src/execution/operator/helper/physical_result_collector.cpp +3 -1
  282. package/src/duckdb/src/execution/operator/helper/physical_set_variable.cpp +39 -0
  283. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +4 -2
  284. package/src/duckdb/src/execution/operator/helper/physical_transaction.cpp +16 -5
  285. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  286. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  287. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  288. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +5 -4
  289. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +59 -21
  290. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +7 -4
  291. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +333 -176
  292. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +57 -34
  293. package/src/duckdb/src/execution/operator/join/physical_join.cpp +16 -8
  294. package/src/duckdb/src/execution/operator/join/physical_left_delim_join.cpp +10 -4
  295. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +2 -5
  296. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +3 -3
  297. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +5 -5
  298. package/src/duckdb/src/execution/operator/join/physical_right_delim_join.cpp +7 -2
  299. package/src/duckdb/src/execution/operator/order/physical_order.cpp +17 -12
  300. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +12 -9
  301. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +35 -17
  302. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +17 -11
  303. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +5 -1
  304. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +156 -47
  305. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +10 -2
  306. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +1 -3
  307. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -2
  308. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +13 -6
  309. package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +22 -3
  310. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +19 -3
  311. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +37 -22
  312. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +77 -21
  313. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +27 -55
  314. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +41 -44
  315. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +4 -6
  316. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +4 -6
  317. package/src/duckdb/src/execution/operator/set/physical_union.cpp +18 -4
  318. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +3 -2
  319. package/src/duckdb/src/execution/physical_operator.cpp +45 -4
  320. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +18 -7
  321. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +8 -3
  322. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +13 -6
  323. package/src/duckdb/src/execution/physical_plan/plan_explain.cpp +3 -3
  324. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +111 -19
  325. package/src/duckdb/src/execution/physical_plan/plan_limit.cpp +19 -2
  326. package/src/duckdb/src/execution/physical_plan/plan_set.cpp +9 -0
  327. package/src/duckdb/src/execution/physical_plan/plan_window.cpp +3 -1
  328. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -3
  329. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +49 -49
  330. package/src/duckdb/src/execution/reservoir_sample.cpp +2 -2
  331. package/src/duckdb/src/execution/window_executor.cpp +556 -318
  332. package/src/duckdb/src/execution/window_segment_tree.cpp +1058 -485
  333. package/src/duckdb/src/function/aggregate/distributive/count.cpp +5 -5
  334. package/src/duckdb/src/function/aggregate/distributive/first.cpp +92 -95
  335. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +10 -9
  336. package/src/duckdb/src/function/aggregate_function.cpp +8 -0
  337. package/src/duckdb/src/function/cast/cast_function_set.cpp +10 -1
  338. package/src/duckdb/src/function/cast/decimal_cast.cpp +10 -1
  339. package/src/duckdb/src/function/cast/default_casts.cpp +2 -0
  340. package/src/duckdb/src/function/cast/numeric_casts.cpp +3 -0
  341. package/src/duckdb/src/function/cast/string_cast.cpp +8 -5
  342. package/src/duckdb/src/function/cast/time_casts.cpp +2 -2
  343. package/src/duckdb/src/function/cast/union_casts.cpp +1 -1
  344. package/src/duckdb/src/function/cast/varint_casts.cpp +283 -0
  345. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -1
  346. package/src/duckdb/src/function/cast_rules.cpp +104 -15
  347. package/src/duckdb/src/function/compression_config.cpp +35 -33
  348. package/src/duckdb/src/function/copy_function.cpp +27 -0
  349. package/src/duckdb/src/function/function_binder.cpp +39 -11
  350. package/src/duckdb/src/function/macro_function.cpp +75 -32
  351. package/src/duckdb/src/function/pragma/pragma_queries.cpp +10 -0
  352. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -0
  353. package/src/duckdb/src/function/scalar/generic/binning.cpp +507 -0
  354. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +58 -0
  355. package/src/duckdb/src/function/scalar/generic_functions.cpp +1 -0
  356. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +33 -47
  357. package/src/duckdb/src/function/scalar/list/list_extract.cpp +70 -143
  358. package/src/duckdb/src/function/scalar/list/list_resize.cpp +93 -84
  359. package/src/duckdb/src/function/scalar/list/list_zip.cpp +3 -0
  360. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +24 -11
  361. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +4 -4
  362. package/src/duckdb/src/function/scalar/strftime_format.cpp +196 -57
  363. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +9 -7
  364. package/src/duckdb/src/function/scalar/string/concat.cpp +239 -123
  365. package/src/duckdb/src/function/scalar/string/concat_ws.cpp +149 -0
  366. package/src/duckdb/src/function/scalar/string/contains.cpp +18 -7
  367. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  368. package/src/duckdb/src/function/scalar/string/substring.cpp +6 -11
  369. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  370. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +7 -3
  371. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  372. package/src/duckdb/src/function/scalar_function.cpp +5 -2
  373. package/src/duckdb/src/function/scalar_macro_function.cpp +2 -2
  374. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +20 -39
  375. package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +135 -0
  376. package/src/duckdb/src/function/table/arrow.cpp +194 -52
  377. package/src/duckdb/src/function/table/arrow_conversion.cpp +212 -69
  378. package/src/duckdb/src/function/table/copy_csv.cpp +43 -14
  379. package/src/duckdb/src/function/table/query_function.cpp +80 -0
  380. package/src/duckdb/src/function/table/range.cpp +222 -142
  381. package/src/duckdb/src/function/table/read_csv.cpp +25 -13
  382. package/src/duckdb/src/function/table/sniff_csv.cpp +55 -35
  383. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +141 -129
  384. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +25 -14
  385. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +20 -14
  386. package/src/duckdb/src/function/table/system/duckdb_indexes.cpp +15 -1
  387. package/src/duckdb/src/function/table/system/duckdb_variables.cpp +84 -0
  388. package/src/duckdb/src/function/table/system/test_all_types.cpp +1 -0
  389. package/src/duckdb/src/function/table/system/test_vector_types.cpp +33 -3
  390. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  391. package/src/duckdb/src/function/table/table_scan.cpp +45 -22
  392. package/src/duckdb/src/function/table/unnest.cpp +2 -2
  393. package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
  394. package/src/duckdb/src/function/table_function.cpp +5 -4
  395. package/src/duckdb/src/function/table_macro_function.cpp +2 -2
  396. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +8 -4
  397. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +5 -2
  398. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +3 -0
  399. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +2 -2
  400. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -4
  401. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +5 -5
  402. package/src/duckdb/src/include/duckdb/catalog/default/builtin_types/types.hpp +2 -1
  403. package/src/duckdb/src/include/duckdb/catalog/default/default_functions.hpp +4 -5
  404. package/src/duckdb/src/include/duckdb/catalog/default/default_generator.hpp +4 -5
  405. package/src/duckdb/src/include/duckdb/catalog/default/default_schemas.hpp +2 -1
  406. package/src/duckdb/src/include/duckdb/catalog/default/default_table_functions.hpp +47 -0
  407. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +2 -0
  408. package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +2 -2
  409. package/src/duckdb/src/include/duckdb/common/allocator.hpp +9 -1
  410. package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +120 -0
  411. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +37 -11
  412. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +9 -8
  413. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +1 -0
  414. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +6 -4
  415. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +92 -0
  416. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +2 -2
  417. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +26 -4
  418. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +90 -11
  419. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +6 -6
  420. package/src/duckdb/src/include/duckdb/common/arrow/arrow_buffer.hpp +8 -1
  421. package/src/duckdb/src/include/duckdb/common/arrow/arrow_merge_event.hpp +62 -0
  422. package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +52 -0
  423. package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +42 -0
  424. package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_batch_collector.hpp +30 -0
  425. package/src/duckdb/src/include/duckdb/common/arrow/physical_arrow_collector.hpp +65 -0
  426. package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +43 -0
  427. package/src/duckdb/src/include/duckdb/common/bswap.hpp +18 -16
  428. package/src/duckdb/src/include/duckdb/common/cgroups.hpp +30 -0
  429. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -0
  430. package/src/duckdb/src/include/duckdb/common/dl.hpp +8 -1
  431. package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +48 -0
  432. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +88 -0
  433. package/src/duckdb/src/include/duckdb/common/enums/checkpoint_type.hpp +2 -2
  434. package/src/duckdb/src/include/duckdb/common/enums/copy_overwrite_mode.hpp +6 -1
  435. package/src/duckdb/src/include/duckdb/common/enums/destroy_buffer_upon.hpp +21 -0
  436. package/src/duckdb/src/include/duckdb/common/enums/explain_format.hpp +17 -0
  437. package/src/duckdb/src/include/duckdb/common/enums/file_compression_type.hpp +4 -0
  438. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +2 -2
  439. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +88 -0
  440. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +6 -1
  441. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +2 -1
  442. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  443. package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
  444. package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +3 -1
  445. package/src/duckdb/src/include/duckdb/common/enums/set_scope.hpp +2 -1
  446. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +23 -2
  447. package/src/duckdb/src/include/duckdb/common/enums/stream_execution_result.hpp +25 -0
  448. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  449. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +1 -0
  450. package/src/duckdb/src/include/duckdb/common/error_data.hpp +5 -2
  451. package/src/duckdb/src/include/duckdb/common/exception/binder_exception.hpp +1 -0
  452. package/src/duckdb/src/include/duckdb/common/exception.hpp +20 -2
  453. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +12 -0
  454. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -0
  455. package/src/duckdb/src/include/duckdb/common/file_open_flags.hpp +16 -0
  456. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +18 -0
  457. package/src/duckdb/src/include/duckdb/common/file_system.hpp +3 -0
  458. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +4 -0
  459. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +160 -96
  460. package/src/duckdb/src/include/duckdb/common/fsst.hpp +9 -2
  461. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -8
  462. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +16 -7
  463. package/src/duckdb/src/include/duckdb/common/http_util.hpp +19 -0
  464. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +19 -6
  465. package/src/duckdb/src/include/duckdb/common/limits.hpp +9 -2
  466. package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +38 -6
  467. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +9 -2
  468. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +5 -1
  469. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +82 -50
  470. package/src/duckdb/src/include/duckdb/common/operator/abs.hpp +11 -0
  471. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +7 -3
  472. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +23 -1
  473. package/src/duckdb/src/include/duckdb/common/operator/double_cast_operator.hpp +2 -1
  474. package/src/duckdb/src/include/duckdb/common/operator/integer_cast_operator.hpp +1 -1
  475. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +4 -0
  476. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +2 -0
  477. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +10 -5
  478. package/src/duckdb/src/include/duckdb/common/optionally_owned_ptr.hpp +1 -0
  479. package/src/duckdb/src/include/duckdb/common/owning_string_map.hpp +155 -0
  480. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -3
  481. package/src/duckdb/src/include/duckdb/common/platform.hpp +58 -0
  482. package/src/duckdb/src/include/duckdb/common/radix.hpp +172 -27
  483. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +5 -1
  484. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +1 -0
  485. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
  486. package/src/duckdb/src/include/duckdb/common/render_tree.hpp +77 -0
  487. package/src/duckdb/src/include/duckdb/common/row_operations/row_matcher.hpp +12 -0
  488. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +6 -2
  489. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +5 -3
  490. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +15 -7
  491. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +3 -1
  492. package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +245 -0
  493. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +10 -0
  494. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +10 -11
  495. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +12 -6
  496. package/src/duckdb/src/include/duckdb/common/string_util.hpp +37 -7
  497. package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +44 -0
  498. package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +44 -0
  499. package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +44 -0
  500. package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +119 -0
  501. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +9 -123
  502. package/src/duckdb/src/include/duckdb/common/type_visitor.hpp +96 -0
  503. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +11 -1
  504. package/src/duckdb/src/include/duckdb/common/types/arrow_string_view_type.hpp +84 -0
  505. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +36 -1
  506. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +1 -1
  507. package/src/duckdb/src/include/duckdb/common/types/cast_helpers.hpp +2 -2
  508. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +4 -2
  509. package/src/duckdb/src/include/duckdb/common/types/column/partitioned_column_data.hpp +52 -0
  510. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -0
  511. package/src/duckdb/src/include/duckdb/common/types/date.hpp +0 -3
  512. package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +65 -0
  513. package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +5 -2
  514. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +49 -40
  515. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +5 -1
  516. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +2 -1
  517. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  518. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +4 -3
  519. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +3 -1
  520. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
  521. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
  522. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +1 -1
  523. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +4 -0
  524. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +4 -1
  525. package/src/duckdb/src/include/duckdb/common/types/time.hpp +11 -6
  526. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +13 -3
  527. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +103 -12
  528. package/src/duckdb/src/include/duckdb/common/types/value.hpp +12 -3
  529. package/src/duckdb/src/include/duckdb/common/types/varint.hpp +107 -0
  530. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +5 -1
  531. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +7 -2
  532. package/src/duckdb/src/include/duckdb/common/types.hpp +6 -39
  533. package/src/duckdb/src/include/duckdb/common/union_by_name.hpp +42 -10
  534. package/src/duckdb/src/include/duckdb/common/vector_operations/generic_executor.hpp +29 -0
  535. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +0 -7
  536. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +2 -0
  537. package/src/duckdb/src/include/duckdb/common/winapi.hpp +8 -0
  538. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +8 -4
  539. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/stddev.hpp +8 -4
  540. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
  541. package/src/duckdb/src/include/duckdb/core_functions/aggregate/histogram_helpers.hpp +99 -0
  542. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +16 -7
  543. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +396 -0
  544. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +10 -0
  545. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_helpers.hpp +65 -0
  546. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_sort_tree.hpp +349 -0
  547. package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_state.hpp +300 -0
  548. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +1 -1
  549. package/src/duckdb/src/include/duckdb/core_functions/aggregate/sort_key_helpers.hpp +55 -0
  550. package/src/duckdb/src/include/duckdb/core_functions/array_kernels.hpp +107 -0
  551. package/src/duckdb/src/include/duckdb/core_functions/create_sort_key.hpp +55 -0
  552. package/src/duckdb/src/include/duckdb/core_functions/lambda_functions.hpp +1 -2
  553. package/src/duckdb/src/include/duckdb/core_functions/scalar/array_functions.hpp +24 -0
  554. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +9 -0
  555. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +27 -0
  556. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +80 -8
  557. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +9 -0
  558. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +54 -0
  559. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +30 -21
  560. package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +25 -14
  561. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +2 -48
  562. package/src/duckdb/src/include/duckdb/execution/executor.hpp +25 -2
  563. package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +102 -0
  564. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +94 -101
  565. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +43 -25
  566. package/src/duckdb/src/include/duckdb/execution/index/art/base_leaf.hpp +109 -0
  567. package/src/duckdb/src/include/duckdb/execution/index/art/base_node.hpp +140 -0
  568. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +43 -24
  569. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +41 -52
  570. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +133 -74
  571. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +46 -29
  572. package/src/duckdb/src/include/duckdb/execution/index/art/node256_leaf.hpp +53 -0
  573. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +52 -35
  574. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +96 -57
  575. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +9 -4
  576. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +48 -10
  577. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +0 -2
  578. package/src/duckdb/src/include/duckdb/execution/index/index_pointer.hpp +4 -2
  579. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +114 -36
  580. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +158 -67
  581. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/aggregate_object.hpp +1 -1
  582. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
  583. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  584. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_streaming_window.hpp +19 -2
  585. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -1
  586. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +1 -1
  587. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +75 -0
  588. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +81 -23
  589. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +27 -8
  590. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +2 -1
  591. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +31 -22
  592. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +4 -2
  593. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +48 -5
  594. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +7 -3
  595. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +22 -12
  596. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +35 -0
  597. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +81 -39
  598. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +2 -1
  599. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +18 -1
  600. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +9 -7
  601. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +5 -4
  602. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/header_value.hpp +26 -0
  603. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +6 -9
  604. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/skip_scanner.hpp +3 -0
  605. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -3
  606. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +36 -19
  607. package/src/duckdb/src/include/duckdb/execution/operator/filter/physical_filter.hpp +1 -1
  608. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +21 -0
  609. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +53 -0
  610. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_collector.hpp +3 -0
  611. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +6 -2
  612. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +18 -0
  613. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
  614. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +6 -0
  615. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set.hpp +2 -2
  616. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_set_variable.hpp +43 -0
  617. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +1 -1
  618. package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +59 -0
  619. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
  620. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_comparison_join.hpp +8 -1
  621. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +5 -2
  622. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +4 -2
  623. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -0
  624. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_join.hpp +1 -1
  625. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_left_delim_join.hpp +3 -1
  626. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +4 -1
  627. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_right_delim_join.hpp +3 -1
  628. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +1 -1
  629. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +1 -1
  630. package/src/duckdb/src/include/duckdb/execution/operator/persistent/batch_memory_manager.hpp +5 -37
  631. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +5 -4
  632. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +8 -2
  633. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +1 -1
  634. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -0
  635. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +9 -3
  636. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +8 -6
  637. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +2 -2
  638. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +1 -1
  639. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +1 -1
  640. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +21 -6
  641. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +3 -2
  642. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  643. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +137 -110
  644. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +57 -126
  645. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +21 -4
  646. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +1 -1
  647. package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +10 -10
  648. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +37 -7
  649. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +24 -11
  650. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +4 -4
  651. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +41 -1
  652. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +15 -5
  653. package/src/duckdb/src/include/duckdb/function/pragma/pragma_functions.hpp +1 -0
  654. package/src/duckdb/src/include/duckdb/function/replacement_scan.hpp +20 -4
  655. package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +6 -0
  656. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +77 -109
  657. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +1 -1
  658. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +6 -3
  659. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +25 -12
  660. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +9 -8
  661. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +38 -4
  662. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +1 -1
  663. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -57
  664. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +142 -0
  665. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_datetime_type.hpp +18 -0
  666. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +7 -0
  667. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_variable_size_type.hpp +10 -0
  668. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
  669. package/src/duckdb/src/include/duckdb/function/table/range.hpp +4 -0
  670. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -1
  671. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  672. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +5 -5
  673. package/src/duckdb/src/include/duckdb/function/table_function.hpp +14 -2
  674. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +1 -1
  675. package/src/duckdb/src/include/duckdb/main/appender.hpp +14 -4
  676. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +25 -7
  677. package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +79 -0
  678. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +10 -20
  679. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +11 -12
  680. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
  681. package/src/duckdb/src/include/duckdb/main/capi/cast/generic.hpp +1 -1
  682. package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +2 -2
  683. package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +809 -0
  684. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/batched_data_collection.hpp +35 -0
  685. package/src/duckdb/src/include/duckdb/main/client_config.hpp +68 -2
  686. package/src/duckdb/src/include/duckdb/main/client_context.hpp +30 -22
  687. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +79 -1
  688. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +9 -3
  689. package/src/duckdb/src/include/duckdb/main/config.hpp +55 -7
  690. package/src/duckdb/src/include/duckdb/main/connection.hpp +5 -1
  691. package/src/duckdb/src/include/duckdb/main/database.hpp +16 -5
  692. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +9 -8
  693. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +21 -6
  694. package/src/duckdb/src/include/duckdb/main/extension.hpp +20 -0
  695. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -0
  696. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +29 -23
  697. package/src/duckdb/src/include/duckdb/main/extension_install_info.hpp +6 -0
  698. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +3 -0
  699. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +4 -2
  700. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +5 -6
  701. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +2 -5
  702. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +87 -0
  703. package/src/duckdb/src/include/duckdb/main/profiling_node.hpp +60 -0
  704. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +72 -34
  705. package/src/duckdb/src/include/duckdb/main/query_result.hpp +1 -1
  706. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +2 -1
  707. package/src/duckdb/src/include/duckdb/main/relation/delim_get_relation.hpp +30 -0
  708. package/src/duckdb/src/include/duckdb/main/relation/explain_relation.hpp +3 -1
  709. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +3 -0
  710. package/src/duckdb/src/include/duckdb/main/relation/materialized_relation.hpp +1 -4
  711. package/src/duckdb/src/include/duckdb/main/relation/query_relation.hpp +4 -1
  712. package/src/duckdb/src/include/duckdb/main/relation/read_json_relation.hpp +6 -0
  713. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
  714. package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +2 -0
  715. package/src/duckdb/src/include/duckdb/main/relation.hpp +7 -4
  716. package/src/duckdb/src/include/duckdb/main/secret/default_secrets.hpp +36 -0
  717. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +108 -0
  718. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +14 -4
  719. package/src/duckdb/src/include/duckdb/main/settings.hpp +227 -3
  720. package/src/duckdb/src/include/duckdb/main/stream_query_result.hpp +8 -0
  721. package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +51 -0
  722. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +7 -0
  723. package/src/duckdb/src/include/duckdb/optimizer/cte_filter_pusher.hpp +46 -0
  724. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
  725. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
  726. package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +31 -0
  727. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +51 -10
  728. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +1 -0
  729. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +17 -5
  730. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +1 -1
  731. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +15 -13
  732. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +9 -4
  733. package/src/duckdb/src/include/duckdb/optimizer/limit_pushdown.hpp +25 -0
  734. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +1 -0
  735. package/src/duckdb/src/include/duckdb/optimizer/rule/join_dependent_filter.hpp +37 -0
  736. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +6 -1
  737. package/src/duckdb/src/include/duckdb/parallel/interrupt.hpp +54 -2
  738. package/src/duckdb/src/include/duckdb/parallel/meta_pipeline.hpp +27 -8
  739. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -0
  740. package/src/duckdb/src/include/duckdb/parallel/pipeline_prepare_finish_event.hpp +25 -0
  741. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +63 -0
  742. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +10 -1
  743. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
  744. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +5 -0
  745. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +5 -0
  746. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +5 -0
  747. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +2 -0
  748. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +11 -1
  749. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +9 -0
  750. package/src/duckdb/src/include/duckdb/parser/parsed_expression_iterator.hpp +13 -6
  751. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +1 -1
  752. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -3
  753. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +2 -0
  754. package/src/duckdb/src/include/duckdb/parser/statement/explain_statement.hpp +5 -1
  755. package/src/duckdb/src/include/duckdb/parser/statement/set_statement.hpp +2 -2
  756. package/src/duckdb/src/include/duckdb/parser/statement/transaction_statement.hpp +1 -1
  757. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +0 -2
  758. package/src/duckdb/src/include/duckdb/parser/tableref/column_data_ref.hpp +9 -7
  759. package/src/duckdb/src/include/duckdb/parser/tableref/delimgetref.hpp +37 -0
  760. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +4 -0
  761. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +0 -2
  762. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +0 -2
  763. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +0 -1
  764. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  765. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +17 -9
  766. package/src/duckdb/src/include/duckdb/planner/binder.hpp +24 -14
  767. package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +44 -0
  768. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +1 -1
  769. package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +1 -1
  770. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +1 -1
  771. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +1 -1
  772. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +1 -1
  773. package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +1 -1
  774. package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +1 -1
  775. package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +1 -1
  776. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +1 -1
  777. package/src/duckdb/src/include/duckdb/planner/expression/bound_expanded_expression.hpp +1 -1
  778. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -1
  779. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +1 -1
  780. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +1 -1
  781. package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +1 -1
  782. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +2 -0
  783. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +1 -1
  784. package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +1 -1
  785. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +2 -2
  786. package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +1 -1
  787. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +1 -1
  788. package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -2
  789. package/src/duckdb/src/include/duckdb/planner/expression_binder/column_alias_binder.hpp +2 -0
  790. package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -0
  791. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +6 -5
  792. package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +1 -0
  793. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +19 -11
  794. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
  795. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
  796. package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
  797. package/src/duckdb/src/include/duckdb/planner/filter/struct_filter.hpp +2 -0
  798. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +7 -2
  799. package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +2 -1
  800. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
  801. package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +1 -1
  802. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +6 -1
  803. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +10 -2
  804. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +1 -0
  805. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +1 -1
  806. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +1 -1
  807. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -1
  808. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -2
  809. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +15 -5
  810. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +1 -0
  811. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +1 -1
  812. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -1
  813. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +24 -2
  814. package/src/duckdb/src/include/duckdb/planner/tableref/bound_delimgetref.hpp +26 -0
  815. package/src/duckdb/src/include/duckdb/planner/tableref/bound_joinref.hpp +6 -0
  816. package/src/duckdb/src/include/duckdb/planner/tableref/bound_subqueryref.hpp +1 -1
  817. package/src/duckdb/src/include/duckdb/planner/tableref/bound_table_function.hpp +2 -0
  818. package/src/duckdb/src/include/duckdb/planner/tableref/list.hpp +2 -0
  819. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
  820. package/src/duckdb/src/include/duckdb/storage/block.hpp +4 -2
  821. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +48 -3
  822. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +21 -7
  823. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +65 -51
  824. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +14 -5
  825. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +0 -4
  826. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +3 -2
  827. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +1 -0
  828. package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +3 -4
  829. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +2 -0
  830. package/src/duckdb/src/include/duckdb/storage/compression/alp/algorithm/alp.hpp +4 -4
  831. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_analyze.hpp +6 -4
  832. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +19 -17
  833. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +2 -2
  834. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_scan.hpp +3 -4
  835. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_utils.hpp +3 -2
  836. package/src/duckdb/src/include/duckdb/storage/compression/alprd/algorithm/alprd.hpp +3 -2
  837. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_analyze.hpp +13 -11
  838. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +19 -19
  839. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_scan.hpp +3 -4
  840. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -1
  841. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -1
  842. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +10 -2
  843. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +3 -2
  844. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +15 -0
  845. package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +14 -10
  846. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +6 -8
  847. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +7 -4
  848. package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -7
  849. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +29 -4
  850. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +22 -7
  851. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +15 -2
  852. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +8 -2
  853. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +5 -16
  854. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats_union.hpp +51 -13
  855. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +6 -3
  856. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +29 -19
  857. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +23 -7
  858. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +27 -18
  859. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +6 -3
  860. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +5 -2
  861. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  862. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +5 -1
  863. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +77 -6
  864. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +23 -11
  865. package/src/duckdb/src/include/duckdb/storage/table/data_table_info.hpp +3 -0
  866. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +5 -2
  867. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +18 -4
  868. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +7 -1
  869. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  870. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +89 -14
  871. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +4 -2
  872. package/src/duckdb/src/include/duckdb/storage/table/struct_column_data.hpp +4 -2
  873. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +2 -2
  874. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +1 -1
  875. package/src/duckdb/src/include/duckdb/storage/temporary_memory_manager.hpp +33 -15
  876. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +9 -9
  877. package/src/duckdb/src/include/duckdb/transaction/cleanup_state.hpp +3 -1
  878. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +4 -16
  879. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +27 -4
  880. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +11 -0
  881. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +6 -2
  882. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +5 -5
  883. package/src/duckdb/src/include/duckdb/transaction/transaction_context.hpp +6 -2
  884. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +5 -3
  885. package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +48 -0
  886. package/src/duckdb/src/include/duckdb.h +1779 -739
  887. package/src/duckdb/src/include/duckdb_extension.h +921 -0
  888. package/src/duckdb/src/main/appender.cpp +53 -7
  889. package/src/duckdb/src/main/attached_database.cpp +87 -17
  890. package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +226 -0
  891. package/src/duckdb/src/main/buffered_data/buffered_data.cpp +35 -0
  892. package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +48 -23
  893. package/src/duckdb/src/main/capi/aggregate_function-c.cpp +327 -0
  894. package/src/duckdb/src/main/capi/appender-c.cpp +18 -0
  895. package/src/duckdb/src/main/capi/cast/utils-c.cpp +2 -2
  896. package/src/duckdb/src/main/capi/cast_function-c.cpp +210 -0
  897. package/src/duckdb/src/main/capi/config-c.cpp +3 -3
  898. package/src/duckdb/src/main/capi/data_chunk-c.cpp +18 -7
  899. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +223 -24
  900. package/src/duckdb/src/main/capi/helper-c.cpp +51 -11
  901. package/src/duckdb/src/main/capi/logical_types-c.cpp +105 -46
  902. package/src/duckdb/src/main/capi/pending-c.cpp +7 -6
  903. package/src/duckdb/src/main/capi/prepared-c.cpp +18 -7
  904. package/src/duckdb/src/main/capi/profiling_info-c.cpp +84 -0
  905. package/src/duckdb/src/main/capi/result-c.cpp +139 -37
  906. package/src/duckdb/src/main/capi/scalar_function-c.cpp +269 -0
  907. package/src/duckdb/src/main/capi/table_description-c.cpp +82 -0
  908. package/src/duckdb/src/main/capi/table_function-c.cpp +161 -95
  909. package/src/duckdb/src/main/capi/value-c.cpp +2 -2
  910. package/src/duckdb/src/main/chunk_scan_state/batched_data_collection.cpp +57 -0
  911. package/src/duckdb/src/main/client_config.cpp +17 -0
  912. package/src/duckdb/src/main/client_context.cpp +67 -52
  913. package/src/duckdb/src/main/client_data.cpp +3 -3
  914. package/src/duckdb/src/main/config.cpp +120 -62
  915. package/src/duckdb/src/main/connection.cpp +14 -2
  916. package/src/duckdb/src/main/database.cpp +96 -35
  917. package/src/duckdb/src/main/database_manager.cpp +25 -23
  918. package/src/duckdb/src/main/database_path_and_type.cpp +2 -2
  919. package/src/duckdb/src/main/db_instance_cache.cpp +54 -19
  920. package/src/duckdb/src/main/extension/extension_helper.cpp +47 -42
  921. package/src/duckdb/src/main/extension/extension_install.cpp +155 -87
  922. package/src/duckdb/src/main/extension/extension_load.cpp +180 -26
  923. package/src/duckdb/src/main/extension/extension_util.cpp +8 -0
  924. package/src/duckdb/src/main/extension.cpp +72 -5
  925. package/src/duckdb/src/main/pending_query_result.cpp +20 -12
  926. package/src/duckdb/src/main/prepared_statement.cpp +6 -6
  927. package/src/duckdb/src/main/prepared_statement_data.cpp +28 -17
  928. package/src/duckdb/src/main/profiling_info.cpp +196 -0
  929. package/src/duckdb/src/main/query_profiler.cpp +413 -224
  930. package/src/duckdb/src/main/query_result.cpp +1 -1
  931. package/src/duckdb/src/main/relation/create_table_relation.cpp +4 -2
  932. package/src/duckdb/src/main/relation/create_view_relation.cpp +0 -6
  933. package/src/duckdb/src/main/relation/delim_get_relation.cpp +44 -0
  934. package/src/duckdb/src/main/relation/explain_relation.cpp +4 -3
  935. package/src/duckdb/src/main/relation/join_relation.cpp +5 -0
  936. package/src/duckdb/src/main/relation/limit_relation.cpp +1 -1
  937. package/src/duckdb/src/main/relation/materialized_relation.cpp +3 -3
  938. package/src/duckdb/src/main/relation/query_relation.cpp +42 -15
  939. package/src/duckdb/src/main/relation/read_csv_relation.cpp +7 -14
  940. package/src/duckdb/src/main/relation/read_json_relation.cpp +20 -0
  941. package/src/duckdb/src/main/relation/setop_relation.cpp +1 -1
  942. package/src/duckdb/src/main/relation/table_function_relation.cpp +6 -0
  943. package/src/duckdb/src/main/relation/view_relation.cpp +10 -0
  944. package/src/duckdb/src/main/relation.cpp +12 -8
  945. package/src/duckdb/src/main/secret/default_secrets.cpp +108 -0
  946. package/src/duckdb/src/main/secret/secret.cpp +145 -2
  947. package/src/duckdb/src/main/secret/secret_manager.cpp +85 -35
  948. package/src/duckdb/src/main/secret/secret_storage.cpp +29 -17
  949. package/src/duckdb/src/main/settings/settings.cpp +503 -11
  950. package/src/duckdb/src/main/stream_query_result.cpp +75 -2
  951. package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +248 -0
  952. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +28 -6
  953. package/src/duckdb/src/optimizer/compressed_materialization/compress_comparison_join.cpp +152 -0
  954. package/src/duckdb/src/optimizer/compressed_materialization.cpp +11 -1
  955. package/src/duckdb/src/optimizer/cse_optimizer.cpp +3 -0
  956. package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +117 -0
  957. package/src/duckdb/src/optimizer/filter_combiner.cpp +30 -9
  958. package/src/duckdb/src/optimizer/filter_pullup.cpp +54 -2
  959. package/src/duckdb/src/optimizer/filter_pushdown.cpp +71 -3
  960. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +154 -0
  961. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +245 -114
  962. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +42 -20
  963. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +6 -2
  964. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +32 -10
  965. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +97 -131
  966. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +265 -51
  967. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +21 -17
  968. package/src/duckdb/src/optimizer/limit_pushdown.cpp +42 -0
  969. package/src/duckdb/src/optimizer/optimizer.cpp +51 -8
  970. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +17 -17
  971. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +22 -4
  972. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +1 -18
  973. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +6 -0
  974. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +4 -2
  975. package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +91 -0
  976. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +21 -25
  977. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +1 -0
  978. package/src/duckdb/src/optimizer/rule/empty_needle_removal.cpp +3 -0
  979. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +2 -2
  980. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +8 -2
  981. package/src/duckdb/src/optimizer/rule/join_dependent_filter.cpp +135 -0
  982. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +1 -1
  983. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +1 -1
  984. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +6 -1
  985. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +7 -6
  986. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -1
  987. package/src/duckdb/src/optimizer/topn_optimizer.cpp +46 -7
  988. package/src/duckdb/src/parallel/executor.cpp +129 -51
  989. package/src/duckdb/src/parallel/executor_task.cpp +16 -3
  990. package/src/duckdb/src/parallel/meta_pipeline.cpp +98 -29
  991. package/src/duckdb/src/parallel/pipeline.cpp +17 -3
  992. package/src/duckdb/src/parallel/pipeline_executor.cpp +14 -2
  993. package/src/duckdb/src/parallel/pipeline_prepare_finish_event.cpp +34 -0
  994. package/src/duckdb/src/parallel/task_executor.cpp +84 -0
  995. package/src/duckdb/src/parallel/task_scheduler.cpp +94 -16
  996. package/src/duckdb/src/parallel/thread_context.cpp +1 -1
  997. package/src/duckdb/src/parser/expression/function_expression.cpp +14 -0
  998. package/src/duckdb/src/parser/expression/star_expression.cpp +35 -2
  999. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +5 -1
  1000. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +17 -0
  1001. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +37 -28
  1002. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +44 -2
  1003. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +21 -1
  1004. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +29 -25
  1005. package/src/duckdb/src/parser/parser.cpp +41 -1
  1006. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +1 -0
  1007. package/src/duckdb/src/parser/statement/explain_statement.cpp +28 -13
  1008. package/src/duckdb/src/parser/statement/relation_statement.cpp +5 -0
  1009. package/src/duckdb/src/parser/statement/set_statement.cpp +4 -2
  1010. package/src/duckdb/src/parser/statement/transaction_statement.cpp +3 -3
  1011. package/src/duckdb/src/parser/tableref/column_data_ref.cpp +1 -27
  1012. package/src/duckdb/src/parser/tableref/delimgetref.cpp +30 -0
  1013. package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
  1014. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +35 -29
  1015. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +32 -32
  1016. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +2 -1
  1017. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +17 -0
  1018. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +5 -0
  1019. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +36 -34
  1020. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +30 -14
  1021. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  1022. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +2 -1
  1023. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +27 -19
  1024. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +31 -28
  1025. package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +25 -27
  1026. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +1 -1
  1027. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +53 -42
  1028. package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +6 -6
  1029. package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +1 -1
  1030. package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
  1031. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
  1032. package/src/duckdb/src/parser/transform/statement/transform_explain.cpp +38 -3
  1033. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +1 -2
  1034. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +1 -1
  1035. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +1 -1
  1036. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +26 -21
  1037. package/src/duckdb/src/parser/transform/statement/transform_set.cpp +8 -8
  1038. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +5 -2
  1039. package/src/duckdb/src/parser/transform/statement/transform_show_select.cpp +6 -4
  1040. package/src/duckdb/src/parser/transform/statement/transform_transaction.cpp +27 -6
  1041. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +8 -9
  1042. package/src/duckdb/src/parser/transform/statement/transform_upsert.cpp +11 -12
  1043. package/src/duckdb/src/parser/transform/statement/transform_vacuum.cpp +3 -3
  1044. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +16 -10
  1045. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +1 -1
  1046. package/src/duckdb/src/parser/transform/tableref/transform_subquery.cpp +1 -1
  1047. package/src/duckdb/src/parser/transformer.cpp +11 -7
  1048. package/src/duckdb/src/planner/bind_context.cpp +3 -3
  1049. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +22 -7
  1050. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +3 -3
  1051. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +3 -2
  1052. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +11 -4
  1053. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +9 -54
  1054. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -5
  1055. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -27
  1056. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +7 -7
  1057. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +9 -2
  1058. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +26 -7
  1059. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +5 -0
  1060. package/src/duckdb/src/planner/binder/expression/bind_unpacked_star_expression.cpp +91 -0
  1061. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +2 -2
  1062. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -8
  1063. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +1 -1
  1064. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +6 -10
  1065. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +14 -10
  1066. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +3 -3
  1067. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +46 -7
  1068. package/src/duckdb/src/planner/binder/statement/bind_call.cpp +13 -20
  1069. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +105 -13
  1070. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -3
  1071. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +75 -55
  1072. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
  1073. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +5 -4
  1074. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
  1075. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +24 -8
  1076. package/src/duckdb/src/planner/binder/statement/bind_explain.cpp +2 -2
  1077. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +5 -105
  1078. package/src/duckdb/src/planner/binder/statement/bind_extension.cpp +2 -2
  1079. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +109 -41
  1080. package/src/duckdb/src/planner/binder/statement/bind_set.cpp +23 -7
  1081. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +4 -1
  1082. package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +17 -3
  1083. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -4
  1084. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +8 -6
  1085. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +55 -42
  1086. package/src/duckdb/src/planner/binder/tableref/bind_column_data_ref.cpp +3 -2
  1087. package/src/duckdb/src/planner/binder/tableref/bind_delimgetref.cpp +16 -0
  1088. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +31 -1
  1089. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +6 -0
  1090. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +2 -0
  1091. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +106 -46
  1092. package/src/duckdb/src/planner/binder/tableref/plan_delimgetref.cpp +11 -0
  1093. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +15 -2
  1094. package/src/duckdb/src/planner/binder/tableref/plan_table_function.cpp +4 -0
  1095. package/src/duckdb/src/planner/binder.cpp +172 -15
  1096. package/src/duckdb/src/planner/collation_binding.cpp +99 -0
  1097. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +10 -4
  1098. package/src/duckdb/src/planner/expression/bound_between_expression.cpp +1 -1
  1099. package/src/duckdb/src/planner/expression/bound_case_expression.cpp +1 -1
  1100. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +14 -12
  1101. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
  1102. package/src/duckdb/src/planner/expression/bound_comparison_expression.cpp +1 -1
  1103. package/src/duckdb/src/planner/expression/bound_conjunction_expression.cpp +1 -1
  1104. package/src/duckdb/src/planner/expression/bound_constant_expression.cpp +1 -1
  1105. package/src/duckdb/src/planner/expression/bound_expanded_expression.cpp +1 -1
  1106. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -2
  1107. package/src/duckdb/src/planner/expression/bound_lambda_expression.cpp +1 -1
  1108. package/src/duckdb/src/planner/expression/bound_lambdaref_expression.cpp +1 -1
  1109. package/src/duckdb/src/planner/expression/bound_operator_expression.cpp +1 -1
  1110. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +1 -1
  1111. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
  1112. package/src/duckdb/src/planner/expression/bound_subquery_expression.cpp +1 -1
  1113. package/src/duckdb/src/planner/expression/bound_unnest_expression.cpp +1 -1
  1114. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +6 -6
  1115. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
  1116. package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
  1117. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +1 -1
  1118. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +7 -0
  1119. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
  1120. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +26 -22
  1121. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +7 -1
  1122. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
  1123. package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
  1124. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +2 -2
  1125. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +61 -43
  1126. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +2 -2
  1127. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +4 -4
  1128. package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +3 -2
  1129. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +10 -3
  1130. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
  1131. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +9 -2
  1132. package/src/duckdb/src/planner/expression_binder.cpp +121 -21
  1133. package/src/duckdb/src/planner/expression_iterator.cpp +26 -1
  1134. package/src/duckdb/src/planner/filter/conjunction_filter.cpp +33 -0
  1135. package/src/duckdb/src/planner/filter/constant_filter.cpp +15 -0
  1136. package/src/duckdb/src/planner/filter/null_filter.cpp +22 -0
  1137. package/src/duckdb/src/planner/filter/struct_filter.cpp +16 -0
  1138. package/src/duckdb/src/planner/logical_operator.cpp +24 -7
  1139. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +13 -7
  1140. package/src/duckdb/src/planner/operator/logical_any_join.cpp +5 -2
  1141. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +13 -5
  1142. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +64 -8
  1143. package/src/duckdb/src/planner/operator/logical_cteref.cpp +7 -0
  1144. package/src/duckdb/src/planner/operator/logical_distinct.cpp +6 -5
  1145. package/src/duckdb/src/planner/operator/logical_get.cpp +60 -18
  1146. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +7 -0
  1147. package/src/duckdb/src/planner/operator/logical_order.cpp +7 -4
  1148. package/src/duckdb/src/planner/operator/logical_top_n.cpp +2 -2
  1149. package/src/duckdb/src/planner/operator/logical_vacuum.cpp +1 -1
  1150. package/src/duckdb/src/planner/planner.cpp +2 -3
  1151. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +27 -10
  1152. package/src/duckdb/src/planner/table_filter.cpp +51 -0
  1153. package/src/duckdb/src/storage/arena_allocator.cpp +28 -10
  1154. package/src/duckdb/src/storage/block.cpp +3 -2
  1155. package/src/duckdb/src/storage/buffer/block_handle.cpp +29 -14
  1156. package/src/duckdb/src/storage/buffer/block_manager.cpp +6 -5
  1157. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +1 -1
  1158. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +264 -125
  1159. package/src/duckdb/src/storage/buffer_manager.cpp +5 -1
  1160. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +0 -6
  1161. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +26 -3
  1162. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +21 -9
  1163. package/src/duckdb/src/storage/checkpoint_manager.cpp +49 -24
  1164. package/src/duckdb/src/storage/compression/alp/alp.cpp +6 -11
  1165. package/src/duckdb/src/storage/compression/alprd.cpp +5 -9
  1166. package/src/duckdb/src/storage/compression/bitpacking.cpp +35 -31
  1167. package/src/duckdb/src/storage/compression/chimp/chimp.cpp +6 -8
  1168. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +71 -58
  1169. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +15 -13
  1170. package/src/duckdb/src/storage/compression/fsst.cpp +66 -53
  1171. package/src/duckdb/src/storage/compression/numeric_constant.cpp +4 -5
  1172. package/src/duckdb/src/storage/compression/patas.cpp +6 -17
  1173. package/src/duckdb/src/storage/compression/rle.cpp +20 -18
  1174. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +71 -52
  1175. package/src/duckdb/src/storage/compression/uncompressed.cpp +2 -2
  1176. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +8 -7
  1177. package/src/duckdb/src/storage/data_pointer.cpp +22 -0
  1178. package/src/duckdb/src/storage/data_table.cpp +41 -12
  1179. package/src/duckdb/src/storage/local_storage.cpp +22 -8
  1180. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +33 -17
  1181. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +4 -4
  1182. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +3 -3
  1183. package/src/duckdb/src/storage/partial_block_manager.cpp +19 -8
  1184. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +11 -8
  1185. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +1 -1
  1186. package/src/duckdb/src/storage/serialization/serialize_extension_install_info.cpp +2 -0
  1187. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +3 -3
  1188. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +19 -5
  1189. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +21 -1
  1190. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +4 -2
  1191. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +2 -2
  1192. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +2 -0
  1193. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +8 -4
  1194. package/src/duckdb/src/storage/serialization/serialize_types.cpp +4 -4
  1195. package/src/duckdb/src/storage/single_file_block_manager.cpp +170 -34
  1196. package/src/duckdb/src/storage/standard_buffer_manager.cpp +221 -64
  1197. package/src/duckdb/src/storage/statistics/column_statistics.cpp +4 -3
  1198. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +36 -26
  1199. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +4 -15
  1200. package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -8
  1201. package/src/duckdb/src/storage/statistics/struct_stats.cpp +2 -1
  1202. package/src/duckdb/src/storage/storage_info.cpp +34 -9
  1203. package/src/duckdb/src/storage/storage_manager.cpp +147 -74
  1204. package/src/duckdb/src/storage/table/array_column_data.cpp +37 -17
  1205. package/src/duckdb/src/storage/table/chunk_info.cpp +38 -0
  1206. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -6
  1207. package/src/duckdb/src/storage/table/column_data.cpp +252 -31
  1208. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +2 -12
  1209. package/src/duckdb/src/storage/table/column_segment.cpp +63 -34
  1210. package/src/duckdb/src/storage/table/list_column_data.cpp +34 -15
  1211. package/src/duckdb/src/storage/table/row_group.cpp +228 -120
  1212. package/src/duckdb/src/storage/table/row_group_collection.cpp +122 -120
  1213. package/src/duckdb/src/storage/table/row_version_manager.cpp +27 -1
  1214. package/src/duckdb/src/storage/table/scan_state.cpp +101 -18
  1215. package/src/duckdb/src/storage/table/standard_column_data.cpp +20 -34
  1216. package/src/duckdb/src/storage/table/struct_column_data.cpp +39 -42
  1217. package/src/duckdb/src/storage/table/table_statistics.cpp +2 -1
  1218. package/src/duckdb/src/storage/table/update_segment.cpp +9 -8
  1219. package/src/duckdb/src/storage/table/validity_column_data.cpp +2 -2
  1220. package/src/duckdb/src/storage/table_index_list.cpp +8 -7
  1221. package/src/duckdb/src/storage/temporary_file_manager.cpp +11 -9
  1222. package/src/duckdb/src/storage/temporary_memory_manager.cpp +227 -39
  1223. package/src/duckdb/src/storage/wal_replay.cpp +68 -28
  1224. package/src/duckdb/src/storage/write_ahead_log.cpp +56 -47
  1225. package/src/duckdb/src/transaction/cleanup_state.cpp +9 -1
  1226. package/src/duckdb/src/transaction/commit_state.cpp +7 -170
  1227. package/src/duckdb/src/transaction/duck_transaction.cpp +87 -19
  1228. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +65 -10
  1229. package/src/duckdb/src/transaction/meta_transaction.cpp +18 -3
  1230. package/src/duckdb/src/transaction/transaction_context.cpp +21 -17
  1231. package/src/duckdb/src/transaction/undo_buffer.cpp +20 -14
  1232. package/src/duckdb/src/transaction/wal_write_state.cpp +292 -0
  1233. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +0 -1
  1234. package/src/duckdb/third_party/brotli/common/brotli_constants.h +204 -0
  1235. package/src/duckdb/third_party/brotli/common/brotli_platform.h +543 -0
  1236. package/src/duckdb/third_party/brotli/common/constants.cpp +17 -0
  1237. package/src/duckdb/third_party/brotli/common/context.cpp +156 -0
  1238. package/src/duckdb/third_party/brotli/common/context.h +110 -0
  1239. package/src/duckdb/third_party/brotli/common/dictionary.cpp +5912 -0
  1240. package/src/duckdb/third_party/brotli/common/dictionary.h +60 -0
  1241. package/src/duckdb/third_party/brotli/common/platform.cpp +24 -0
  1242. package/src/duckdb/third_party/brotli/common/shared_dictionary.cpp +517 -0
  1243. package/src/duckdb/third_party/brotli/common/shared_dictionary_internal.h +71 -0
  1244. package/src/duckdb/third_party/brotli/common/transform.cpp +287 -0
  1245. package/src/duckdb/third_party/brotli/common/transform.h +77 -0
  1246. package/src/duckdb/third_party/brotli/common/version.h +51 -0
  1247. package/src/duckdb/third_party/brotli/dec/bit_reader.cpp +74 -0
  1248. package/src/duckdb/third_party/brotli/dec/bit_reader.h +419 -0
  1249. package/src/duckdb/third_party/brotli/dec/decode.cpp +2758 -0
  1250. package/src/duckdb/third_party/brotli/dec/huffman.cpp +338 -0
  1251. package/src/duckdb/third_party/brotli/dec/huffman.h +118 -0
  1252. package/src/duckdb/third_party/brotli/dec/prefix.h +733 -0
  1253. package/src/duckdb/third_party/brotli/dec/state.cpp +178 -0
  1254. package/src/duckdb/third_party/brotli/dec/state.h +386 -0
  1255. package/src/duckdb/third_party/brotli/enc/backward_references.cpp +3775 -0
  1256. package/src/duckdb/third_party/brotli/enc/backward_references.h +36 -0
  1257. package/src/duckdb/third_party/brotli/enc/backward_references_hq.cpp +935 -0
  1258. package/src/duckdb/third_party/brotli/enc/backward_references_hq.h +92 -0
  1259. package/src/duckdb/third_party/brotli/enc/bit_cost.cpp +410 -0
  1260. package/src/duckdb/third_party/brotli/enc/bit_cost.h +60 -0
  1261. package/src/duckdb/third_party/brotli/enc/block_splitter.cpp +1653 -0
  1262. package/src/duckdb/third_party/brotli/enc/block_splitter.h +48 -0
  1263. package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp +1431 -0
  1264. package/src/duckdb/third_party/brotli/enc/brotli_bit_stream.h +85 -0
  1265. package/src/duckdb/third_party/brotli/enc/brotli_hash.h +4352 -0
  1266. package/src/duckdb/third_party/brotli/enc/brotli_params.h +47 -0
  1267. package/src/duckdb/third_party/brotli/enc/cluster.cpp +1025 -0
  1268. package/src/duckdb/third_party/brotli/enc/cluster.h +1017 -0
  1269. package/src/duckdb/third_party/brotli/enc/command.cpp +24 -0
  1270. package/src/duckdb/third_party/brotli/enc/command.h +187 -0
  1271. package/src/duckdb/third_party/brotli/enc/compound_dictionary.cpp +209 -0
  1272. package/src/duckdb/third_party/brotli/enc/compound_dictionary.h +75 -0
  1273. package/src/duckdb/third_party/brotli/enc/compress_fragment.cpp +796 -0
  1274. package/src/duckdb/third_party/brotli/enc/compress_fragment.h +82 -0
  1275. package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp +653 -0
  1276. package/src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.h +68 -0
  1277. package/src/duckdb/third_party/brotli/enc/dictionary_hash.cpp +1844 -0
  1278. package/src/duckdb/third_party/brotli/enc/dictionary_hash.h +21 -0
  1279. package/src/duckdb/third_party/brotli/enc/encode.cpp +1990 -0
  1280. package/src/duckdb/third_party/brotli/enc/encoder_dict.cpp +636 -0
  1281. package/src/duckdb/third_party/brotli/enc/encoder_dict.h +153 -0
  1282. package/src/duckdb/third_party/brotli/enc/entropy_encode.cpp +500 -0
  1283. package/src/duckdb/third_party/brotli/enc/entropy_encode.h +119 -0
  1284. package/src/duckdb/third_party/brotli/enc/entropy_encode_static.h +538 -0
  1285. package/src/duckdb/third_party/brotli/enc/fast_log.cpp +101 -0
  1286. package/src/duckdb/third_party/brotli/enc/fast_log.h +63 -0
  1287. package/src/duckdb/third_party/brotli/enc/find_match_length.h +68 -0
  1288. package/src/duckdb/third_party/brotli/enc/histogram.cpp +96 -0
  1289. package/src/duckdb/third_party/brotli/enc/histogram.h +210 -0
  1290. package/src/duckdb/third_party/brotli/enc/literal_cost.cpp +176 -0
  1291. package/src/duckdb/third_party/brotli/enc/literal_cost.h +28 -0
  1292. package/src/duckdb/third_party/brotli/enc/memory.cpp +190 -0
  1293. package/src/duckdb/third_party/brotli/enc/memory.h +127 -0
  1294. package/src/duckdb/third_party/brotli/enc/metablock.cpp +1225 -0
  1295. package/src/duckdb/third_party/brotli/enc/metablock.h +102 -0
  1296. package/src/duckdb/third_party/brotli/enc/prefix.h +50 -0
  1297. package/src/duckdb/third_party/brotli/enc/quality.h +202 -0
  1298. package/src/duckdb/third_party/brotli/enc/ringbuffer.h +164 -0
  1299. package/src/duckdb/third_party/brotli/enc/state.h +106 -0
  1300. package/src/duckdb/third_party/brotli/enc/static_dict.cpp +538 -0
  1301. package/src/duckdb/third_party/brotli/enc/static_dict.h +37 -0
  1302. package/src/duckdb/third_party/brotli/enc/static_dict_lut.h +5862 -0
  1303. package/src/duckdb/third_party/brotli/enc/utf8_util.cpp +81 -0
  1304. package/src/duckdb/third_party/brotli/enc/utf8_util.h +29 -0
  1305. package/src/duckdb/third_party/brotli/enc/write_bits.h +84 -0
  1306. package/src/duckdb/third_party/brotli/include/brotli/decode.h +405 -0
  1307. package/src/duckdb/third_party/brotli/include/brotli/encode.h +489 -0
  1308. package/src/duckdb/third_party/brotli/include/brotli/port.h +238 -0
  1309. package/src/duckdb/third_party/brotli/include/brotli/shared_dictionary.h +96 -0
  1310. package/src/duckdb/third_party/brotli/include/brotli/types.h +83 -0
  1311. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +20 -4
  1312. package/src/duckdb/third_party/fmt/include/fmt/format.h +54 -10
  1313. package/src/duckdb/third_party/fsst/fsst.h +2 -2
  1314. package/src/duckdb/third_party/fsst/libfsst.hpp +2 -2
  1315. package/src/duckdb/third_party/httplib/httplib.hpp +6763 -5580
  1316. package/src/duckdb/third_party/hyperloglog/hyperloglog.cpp +13 -30
  1317. package/src/duckdb/third_party/hyperloglog/hyperloglog.hpp +8 -2
  1318. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
  1319. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +22 -9
  1320. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1041 -554
  1321. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
  1322. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +2 -1
  1323. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +21605 -21752
  1324. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +538 -299
  1325. package/src/duckdb/third_party/mbedtls/include/mbedtls/mbedtls_config.h +1 -0
  1326. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +36 -12
  1327. package/src/duckdb/third_party/mbedtls/library/md.cpp +6 -6
  1328. package/src/duckdb/third_party/mbedtls/library/sha1.cpp +2 -0
  1329. package/src/duckdb/third_party/mbedtls/library/sha256.cpp +3 -0
  1330. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +99 -47
  1331. package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
  1332. package/src/duckdb/third_party/re2/re2/prog.cc +2 -2
  1333. package/src/duckdb/third_party/snappy/snappy-internal.h +398 -0
  1334. package/src/duckdb/third_party/snappy/snappy-sinksource.cc +111 -9
  1335. package/src/duckdb/third_party/snappy/snappy-sinksource.h +158 -0
  1336. package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +523 -3
  1337. package/src/duckdb/third_party/snappy/snappy-stubs-public.h +34 -1
  1338. package/src/duckdb/third_party/snappy/snappy.cc +2626 -0
  1339. package/src/duckdb/third_party/snappy/snappy.h +223 -0
  1340. package/src/duckdb/third_party/snappy/snappy_version.hpp +11 -0
  1341. package/src/duckdb/third_party/utf8proc/include/utf8proc.hpp +69 -101
  1342. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +53 -0
  1343. package/src/duckdb/third_party/utf8proc/utf8proc.cpp +627 -678
  1344. package/src/duckdb/third_party/utf8proc/utf8proc_data.cpp +15008 -12868
  1345. package/src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp +185 -29
  1346. package/src/duckdb/ub_extension_json_json_functions.cpp +6 -0
  1347. package/src/duckdb/ub_src_catalog_default.cpp +4 -0
  1348. package/src/duckdb/ub_src_common.cpp +7 -1
  1349. package/src/duckdb/ub_src_common_arrow.cpp +10 -0
  1350. package/src/duckdb/ub_src_common_enums.cpp +2 -0
  1351. package/src/duckdb/ub_src_common_tree_renderer.cpp +10 -0
  1352. package/src/duckdb/ub_src_common_types.cpp +2 -0
  1353. package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +4 -0
  1354. package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +2 -0
  1355. package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +2 -0
  1356. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +2 -4
  1357. package/src/duckdb/ub_src_core_functions_scalar_map.cpp +2 -0
  1358. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +4 -0
  1359. package/src/duckdb/ub_src_execution_index_art.cpp +5 -3
  1360. package/src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp +2 -0
  1361. package/src/duckdb/ub_src_execution_operator_helper.cpp +4 -0
  1362. package/src/duckdb/ub_src_function.cpp +4 -0
  1363. package/src/duckdb/ub_src_function_cast.cpp +2 -0
  1364. package/src/duckdb/ub_src_function_scalar_generic.cpp +4 -0
  1365. package/src/duckdb/ub_src_function_scalar_list.cpp +0 -2
  1366. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  1367. package/src/duckdb/ub_src_function_table.cpp +2 -0
  1368. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  1369. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  1370. package/src/duckdb/ub_src_main.cpp +4 -0
  1371. package/src/duckdb/ub_src_main_buffered_data.cpp +4 -0
  1372. package/src/duckdb/ub_src_main_capi.cpp +10 -0
  1373. package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
  1374. package/src/duckdb/ub_src_main_relation.cpp +2 -0
  1375. package/src/duckdb/ub_src_main_secret.cpp +2 -0
  1376. package/src/duckdb/ub_src_optimizer.cpp +8 -0
  1377. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +2 -0
  1378. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  1379. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  1380. package/src/duckdb/ub_src_parallel.cpp +4 -0
  1381. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  1382. package/src/duckdb/ub_src_planner.cpp +2 -0
  1383. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
  1384. package/src/duckdb/ub_src_planner_binder_tableref.cpp +4 -0
  1385. package/src/duckdb/ub_src_storage_statistics.cpp +0 -2
  1386. package/src/duckdb/ub_src_transaction.cpp +2 -0
  1387. package/test/columns.test.ts +1 -1
  1388. package/test/prepare.test.ts +1 -1
  1389. package/test/test_all_types.test.ts +1 -1
@@ -2,12 +2,14 @@
2
2
 
3
3
  #include "duckdb/common/algorithm.hpp"
4
4
  #include "duckdb/common/helper.hpp"
5
+ #include "duckdb/common/sort/partition_state.hpp"
5
6
  #include "duckdb/common/vector_operations/vector_operations.hpp"
6
7
  #include "duckdb/execution/merge_sort_tree.hpp"
7
8
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
8
9
  #include "duckdb/execution/window_executor.hpp"
9
10
 
10
11
  #include <numeric>
12
+ #include <thread>
11
13
  #include <utility>
12
14
 
13
15
  namespace duckdb {
@@ -18,50 +20,201 @@ namespace duckdb {
18
20
  WindowAggregatorState::WindowAggregatorState() : allocator(Allocator::DefaultAllocator()) {
19
21
  }
20
22
 
21
- WindowAggregator::WindowAggregator(AggregateObject aggr_p, const LogicalType &result_type_p,
22
- const WindowExcludeMode exclude_mode_p, idx_t partition_count_p)
23
- : aggr(std::move(aggr_p)), result_type(result_type_p), partition_count(partition_count_p),
24
- state_size(aggr.function.state_size()), filter_pos(0), exclude_mode(exclude_mode_p) {
23
+ class WindowAggregatorGlobalState : public WindowAggregatorState {
24
+ public:
25
+ WindowAggregatorGlobalState(const WindowAggregator &aggregator_p, idx_t group_count)
26
+ : aggregator(aggregator_p), winputs(inputs), locals(0), finalized(0) {
27
+
28
+ if (!aggregator.arg_types.empty()) {
29
+ winputs.Initialize(Allocator::DefaultAllocator(), aggregator.arg_types, group_count);
30
+ }
31
+ if (aggregator.aggr.filter) {
32
+ // Start with all invalid and set the ones that pass
33
+ filter_mask.Initialize(group_count, false);
34
+ }
35
+ }
36
+
37
+ //! The aggregator data
38
+ const WindowAggregator &aggregator;
39
+
40
+ //! Partition data chunk
41
+ DataChunk inputs;
42
+ WindowDataChunk winputs;
43
+
44
+ //! The filtered rows in inputs.
45
+ ValidityArray filter_mask;
46
+
47
+ //! Lock for single threading
48
+ mutable mutex lock;
49
+
50
+ //! Count of local tasks
51
+ mutable std::atomic<idx_t> locals;
52
+
53
+ //! Number of finalised states
54
+ std::atomic<idx_t> finalized;
55
+ };
56
+
57
+ WindowAggregator::WindowAggregator(AggregateObject aggr_p, const vector<LogicalType> &arg_types_p,
58
+ const LogicalType &result_type_p, const WindowExcludeMode exclude_mode_p)
59
+ : aggr(std::move(aggr_p)), arg_types(arg_types_p), result_type(result_type_p),
60
+ state_size(aggr.function.state_size(aggr.function)), exclude_mode(exclude_mode_p) {
25
61
  }
26
62
 
27
63
  WindowAggregator::~WindowAggregator() {
28
64
  }
29
65
 
30
- void WindowAggregator::Sink(DataChunk &payload_chunk, SelectionVector *filter_sel, idx_t filtered) {
31
- if (!inputs.ColumnCount() && payload_chunk.ColumnCount()) {
32
- inputs.Initialize(Allocator::DefaultAllocator(), payload_chunk.GetTypes());
33
- }
34
- if (inputs.ColumnCount()) {
35
- inputs.Append(payload_chunk, true);
66
+ unique_ptr<WindowAggregatorState> WindowAggregator::GetGlobalState(idx_t group_count, const ValidityMask &) const {
67
+ return make_uniq<WindowAggregatorGlobalState>(*this, group_count);
68
+ }
69
+
70
+ void WindowAggregator::Sink(WindowAggregatorState &gsink, WindowAggregatorState &lstate, DataChunk &arg_chunk,
71
+ idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
72
+ auto &gasink = gsink.Cast<WindowAggregatorGlobalState>();
73
+ auto &winputs = gasink.winputs;
74
+ auto &filter_mask = gasink.filter_mask;
75
+ if (winputs.chunk.ColumnCount()) {
76
+ winputs.Copy(arg_chunk, input_idx);
36
77
  }
37
78
  if (filter_sel) {
38
- // Lazy instantiation
39
- if (!filter_mask.IsMaskSet()) {
40
- // Start with all invalid and set the ones that pass
41
- filter_bits.resize(ValidityMask::ValidityMaskSize(partition_count), 0);
42
- filter_mask.Initialize(filter_bits.data());
43
- }
44
79
  for (idx_t f = 0; f < filtered; ++f) {
45
- filter_mask.SetValid(filter_pos + filter_sel->get_index(f));
80
+ filter_mask.SetValid(input_idx + filter_sel->get_index(f));
46
81
  }
47
- filter_pos += payload_chunk.size();
48
82
  }
49
83
  }
50
84
 
51
- void WindowAggregator::Finalize(const FrameStats &stats) {
85
+ void WindowAggregator::Finalize(WindowAggregatorState &gstate, WindowAggregatorState &lstate, const FrameStats &stats) {
52
86
  }
53
87
 
54
88
  //===--------------------------------------------------------------------===//
55
- // WindowConstantAggregate
89
+ // WindowConstantAggregator
56
90
  //===--------------------------------------------------------------------===//
57
- WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const LogicalType &result_type,
58
- const ValidityMask &partition_mask,
59
- const WindowExcludeMode exclude_mode_p, const idx_t count)
60
- : WindowAggregator(std::move(aggr), result_type, exclude_mode_p, count), partition(0), row(0), state(state_size),
61
- statep(Value::POINTER(CastPointerToValue(state.data()))),
62
- statef(Value::POINTER(CastPointerToValue(state.data()))) {
91
+ struct WindowAggregateStates {
92
+ explicit WindowAggregateStates(const AggregateObject &aggr);
93
+ ~WindowAggregateStates() {
94
+ Destroy();
95
+ }
63
96
 
64
- statef.SetVectorType(VectorType::FLAT_VECTOR); // Prevent conversion of results to constants
97
+ //! The number of states
98
+ idx_t GetCount() const {
99
+ return states.size() / state_size;
100
+ }
101
+ data_ptr_t *GetData() {
102
+ return FlatVector::GetData<data_ptr_t>(*statef);
103
+ }
104
+ data_ptr_t GetStatePtr(idx_t idx) {
105
+ return states.data() + idx * state_size;
106
+ }
107
+ const_data_ptr_t GetStatePtr(idx_t idx) const {
108
+ return states.data() + idx * state_size;
109
+ }
110
+ //! Initialise all the states
111
+ void Initialize(idx_t count);
112
+ //! Combine the states into the target
113
+ void Combine(WindowAggregateStates &target,
114
+ AggregateCombineType combine_type = AggregateCombineType::PRESERVE_INPUT);
115
+ //! Finalize the states into an output vector
116
+ void Finalize(Vector &result);
117
+ //! Destroy the states
118
+ void Destroy();
119
+
120
+ //! A description of the aggregator
121
+ const AggregateObject aggr;
122
+ //! The size of each state
123
+ const idx_t state_size;
124
+ //! The allocator to use
125
+ ArenaAllocator allocator;
126
+ //! Data pointer that contains the state data
127
+ vector<data_t> states;
128
+ //! Reused result state container for the window functions
129
+ unique_ptr<Vector> statef;
130
+ };
131
+
132
+ WindowAggregateStates::WindowAggregateStates(const AggregateObject &aggr)
133
+ : aggr(aggr), state_size(aggr.function.state_size(aggr.function)), allocator(Allocator::DefaultAllocator()) {
134
+ }
135
+
136
+ void WindowAggregateStates::Initialize(idx_t count) {
137
+ states.resize(count * state_size);
138
+ auto state_ptr = states.data();
139
+
140
+ statef = make_uniq<Vector>(LogicalType::POINTER, count);
141
+ auto state_f_data = FlatVector::GetData<data_ptr_t>(*statef);
142
+
143
+ for (idx_t i = 0; i < count; ++i, state_ptr += state_size) {
144
+ state_f_data[i] = state_ptr;
145
+ aggr.function.initialize(aggr.function, state_ptr);
146
+ }
147
+
148
+ // Prevent conversion of results to constants
149
+ statef->SetVectorType(VectorType::FLAT_VECTOR);
150
+ }
151
+
152
+ void WindowAggregateStates::Combine(WindowAggregateStates &target, AggregateCombineType combine_type) {
153
+ AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator, AggregateCombineType::ALLOW_DESTRUCTIVE);
154
+ aggr.function.combine(*statef, *target.statef, aggr_input_data, GetCount());
155
+ }
156
+
157
+ void WindowAggregateStates::Finalize(Vector &result) {
158
+ AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
159
+ aggr.function.finalize(*statef, aggr_input_data, result, GetCount(), 0);
160
+ }
161
+
162
+ void WindowAggregateStates::Destroy() {
163
+ if (states.empty()) {
164
+ return;
165
+ }
166
+
167
+ AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
168
+ if (aggr.function.destructor) {
169
+ aggr.function.destructor(*statef, aggr_input_data, GetCount());
170
+ }
171
+
172
+ states.clear();
173
+ }
174
+
175
+ class WindowConstantAggregatorGlobalState : public WindowAggregatorGlobalState {
176
+ public:
177
+ WindowConstantAggregatorGlobalState(const WindowConstantAggregator &aggregator, idx_t count,
178
+ const ValidityMask &partition_mask);
179
+
180
+ void Finalize(const FrameStats &stats);
181
+
182
+ //! Partition starts
183
+ vector<idx_t> partition_offsets;
184
+ //! Reused result state container for the window functions
185
+ WindowAggregateStates statef;
186
+ //! Aggregate results
187
+ unique_ptr<Vector> results;
188
+ };
189
+
190
+ class WindowConstantAggregatorLocalState : public WindowAggregatorState {
191
+ public:
192
+ explicit WindowConstantAggregatorLocalState(const WindowConstantAggregatorGlobalState &gstate);
193
+ ~WindowConstantAggregatorLocalState() override {
194
+ }
195
+
196
+ void Sink(DataChunk &payload_chunk, idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered);
197
+ void Combine(WindowConstantAggregatorGlobalState &gstate);
198
+
199
+ public:
200
+ //! The global state we are sharing
201
+ const WindowConstantAggregatorGlobalState &gstate;
202
+ //! Reusable chunk for sinking
203
+ DataChunk inputs;
204
+ //! A vector of pointers to "state", used for intermediate window segment aggregation
205
+ Vector statep;
206
+ //! Reused result state container for the window functions
207
+ WindowAggregateStates statef;
208
+ //! The current result partition being read
209
+ idx_t partition;
210
+ //! Shared SV for evaluation
211
+ SelectionVector matches;
212
+ };
213
+
214
+ WindowConstantAggregatorGlobalState::WindowConstantAggregatorGlobalState(const WindowConstantAggregator &aggregator,
215
+ idx_t group_count,
216
+ const ValidityMask &partition_mask)
217
+ : WindowAggregatorGlobalState(aggregator, STANDARD_VECTOR_SIZE), statef(aggregator.aggr) {
65
218
 
66
219
  // Locate the partition boundaries
67
220
  if (partition_mask.AllValid()) {
@@ -69,7 +222,7 @@ WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const L
69
222
  } else {
70
223
  idx_t entry_idx;
71
224
  idx_t shift;
72
- for (idx_t start = 0; start < count;) {
225
+ for (idx_t start = 0; start < group_count;) {
73
226
  partition_mask.GetEntryIndex(start, entry_idx, shift);
74
227
 
75
228
  // If start is aligned with the start of a block,
@@ -81,7 +234,7 @@ WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const L
81
234
  }
82
235
 
83
236
  // Loop over the block
84
- for (; shift < ValidityMask::BITS_PER_VALUE && start < count; ++shift, ++start) {
237
+ for (; shift < ValidityMask::BITS_PER_VALUE && start < group_count; ++shift, ++start) {
85
238
  if (partition_mask.RowIsValid(block, shift)) {
86
239
  partition_offsets.emplace_back(start);
87
240
  }
@@ -90,45 +243,70 @@ WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const L
90
243
  }
91
244
 
92
245
  // Initialise the vector for caching the results
93
- results = make_uniq<Vector>(result_type, partition_offsets.size());
94
- partition_offsets.emplace_back(count);
246
+ results = make_uniq<Vector>(aggregator.result_type, partition_offsets.size());
95
247
 
96
- // Create an aggregate state for intermediate aggregates
97
- gstate = make_uniq<WindowAggregatorState>();
248
+ // Initialise the final states
249
+ statef.Initialize(partition_offsets.size());
98
250
 
99
- // Start the first aggregate
100
- AggregateInit();
251
+ // Add final guard
252
+ partition_offsets.emplace_back(group_count);
101
253
  }
102
254
 
103
- void WindowConstantAggregator::AggregateInit() {
104
- aggr.function.initialize(state.data());
255
+ WindowConstantAggregatorLocalState::WindowConstantAggregatorLocalState(
256
+ const WindowConstantAggregatorGlobalState &gstate)
257
+ : gstate(gstate), statep(Value::POINTER(0)), statef(gstate.statef.aggr), partition(0) {
258
+ matches.Initialize();
259
+
260
+ // Start the aggregates
261
+ auto &partition_offsets = gstate.partition_offsets;
262
+ auto &aggregator = gstate.aggregator;
263
+ statef.Initialize(partition_offsets.size() - 1);
264
+
265
+ // Set up shared buffer
266
+ inputs.Initialize(Allocator::DefaultAllocator(), aggregator.arg_types);
267
+
268
+ gstate.locals++;
105
269
  }
106
270
 
107
- void WindowConstantAggregator::AggegateFinal(Vector &result, idx_t rid) {
108
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), gstate->allocator);
109
- aggr.function.finalize(statef, aggr_input_data, result, 1, rid);
271
+ WindowConstantAggregator::WindowConstantAggregator(AggregateObject aggr, const vector<LogicalType> &arg_types,
272
+ const LogicalType &result_type,
273
+ const WindowExcludeMode exclude_mode_p)
274
+ : WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode_p) {
275
+ }
110
276
 
111
- if (aggr.function.destructor) {
112
- aggr.function.destructor(statef, aggr_input_data, 1);
113
- }
277
+ unique_ptr<WindowAggregatorState> WindowConstantAggregator::GetGlobalState(idx_t group_count,
278
+ const ValidityMask &partition_mask) const {
279
+ return make_uniq<WindowConstantAggregatorGlobalState>(*this, group_count, partition_mask);
114
280
  }
115
281
 
116
- void WindowConstantAggregator::Sink(DataChunk &payload_chunk, SelectionVector *filter_sel, idx_t filtered) {
282
+ void WindowConstantAggregator::Sink(WindowAggregatorState &gsink, WindowAggregatorState &lstate, DataChunk &arg_chunk,
283
+ idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
284
+ auto &lastate = lstate.Cast<WindowConstantAggregatorLocalState>();
285
+
286
+ lastate.Sink(arg_chunk, input_idx, filter_sel, filtered);
287
+ }
288
+
289
+ void WindowConstantAggregatorLocalState::Sink(DataChunk &payload_chunk, idx_t row,
290
+ optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
291
+ auto &partition_offsets = gstate.partition_offsets;
292
+ auto &aggregator = gstate.aggregator;
293
+ const auto &aggr = aggregator.aggr;
117
294
  const auto chunk_begin = row;
118
295
  const auto chunk_end = chunk_begin + payload_chunk.size();
296
+ idx_t partition =
297
+ idx_t(std::upper_bound(partition_offsets.begin(), partition_offsets.end(), row) - partition_offsets.begin()) -
298
+ 1;
119
299
 
120
- if (!inputs.ColumnCount() && payload_chunk.ColumnCount()) {
121
- inputs.Initialize(Allocator::DefaultAllocator(), payload_chunk.GetTypes());
122
- }
300
+ auto state_f_data = statef.GetData();
301
+ auto state_p_data = FlatVector::GetData<data_ptr_t>(statep);
123
302
 
124
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), gstate->allocator);
303
+ AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
125
304
  idx_t begin = 0;
126
305
  idx_t filter_idx = 0;
127
306
  auto partition_end = partition_offsets[partition + 1];
128
307
  while (row < chunk_end) {
129
308
  if (row == partition_end) {
130
- AggegateFinal(*results, partition++);
131
- AggregateInit();
309
+ ++partition;
132
310
  partition_end = partition_offsets[partition + 1];
133
311
  }
134
312
  partition_end = MinValue(partition_end, chunk_end);
@@ -174,9 +352,11 @@ void WindowConstantAggregator::Sink(DataChunk &payload_chunk, SelectionVector *f
174
352
 
175
353
  // Aggregate the filtered rows into a single state
176
354
  const auto count = inputs.size();
355
+ auto state = state_f_data[partition];
177
356
  if (aggr.function.simple_update) {
178
- aggr.function.simple_update(inputs.data.data(), aggr_input_data, inputs.ColumnCount(), state.data(), count);
357
+ aggr.function.simple_update(inputs.data.data(), aggr_input_data, inputs.ColumnCount(), state, count);
179
358
  } else {
359
+ state_p_data[0] = state_f_data[partition];
180
360
  aggr.function.update(inputs.data.data(), aggr_input_data, inputs.ColumnCount(), statep, count);
181
361
  }
182
362
 
@@ -186,34 +366,36 @@ void WindowConstantAggregator::Sink(DataChunk &payload_chunk, SelectionVector *f
186
366
  }
187
367
  }
188
368
 
189
- void WindowConstantAggregator::Finalize(const FrameStats &stats) {
190
- AggegateFinal(*results, partition++);
191
- }
369
+ void WindowConstantAggregator::Finalize(WindowAggregatorState &gstate, WindowAggregatorState &lstate,
370
+ const FrameStats &stats) {
371
+ auto &gastate = gstate.Cast<WindowConstantAggregatorGlobalState>();
372
+ auto &lastate = lstate.Cast<WindowConstantAggregatorLocalState>();
192
373
 
193
- class WindowConstantAggregatorState : public WindowAggregatorState {
194
- public:
195
- WindowConstantAggregatorState() : partition(0) {
196
- matches.Initialize();
197
- }
198
- ~WindowConstantAggregatorState() override {
199
- }
374
+ // Single-threaded combine
375
+ lock_guard<mutex> finalize_guard(gastate.lock);
376
+ lastate.statef.Combine(gastate.statef);
377
+ lastate.statef.Destroy();
200
378
 
201
- public:
202
- //! The current result partition being read
203
- idx_t partition;
204
- //! Shared SV for evaluation
205
- SelectionVector matches;
206
- };
379
+ // Last one out turns off the lights!
380
+ if (++gastate.finalized == gastate.locals) {
381
+ gastate.statef.Finalize(*gastate.results);
382
+ gastate.statef.Destroy();
383
+ }
384
+ }
207
385
 
208
- unique_ptr<WindowAggregatorState> WindowConstantAggregator::GetLocalState() const {
209
- return make_uniq<WindowConstantAggregatorState>();
386
+ unique_ptr<WindowAggregatorState> WindowConstantAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
387
+ return make_uniq<WindowConstantAggregatorLocalState>(gstate.Cast<WindowConstantAggregatorGlobalState>());
210
388
  }
211
389
 
212
- void WindowConstantAggregator::Evaluate(WindowAggregatorState &lstate, const DataChunk &bounds, Vector &target,
213
- idx_t count, idx_t row_idx) const {
390
+ void WindowConstantAggregator::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
391
+ const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
392
+ auto &gasink = gsink.Cast<WindowConstantAggregatorGlobalState>();
393
+ const auto &partition_offsets = gasink.partition_offsets;
394
+ const auto &results = *gasink.results;
395
+
214
396
  auto begins = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
215
397
  // Chunk up the constants and copy them one at a time
216
- auto &lcstate = lstate.Cast<WindowConstantAggregatorState>();
398
+ auto &lcstate = lstate.Cast<WindowConstantAggregatorLocalState>();
217
399
  idx_t matched = 0;
218
400
  idx_t target_offset = 0;
219
401
  for (idx_t i = 0; i < count; ++i) {
@@ -222,7 +404,7 @@ void WindowConstantAggregator::Evaluate(WindowAggregatorState &lstate, const Dat
222
404
  while (partition_offsets[lcstate.partition + 1] <= begin) {
223
405
  // Flush the previous partition's data
224
406
  if (matched) {
225
- VectorOperations::Copy(*results, target, lcstate.matches, matched, 0, target_offset);
407
+ VectorOperations::Copy(results, result, lcstate.matches, matched, 0, target_offset);
226
408
  target_offset += matched;
227
409
  matched = 0;
228
410
  }
@@ -234,16 +416,22 @@ void WindowConstantAggregator::Evaluate(WindowAggregatorState &lstate, const Dat
234
416
 
235
417
  // Flush the last partition
236
418
  if (matched) {
237
- VectorOperations::Copy(*results, target, lcstate.matches, matched, 0, target_offset);
419
+ // Optimize constant result
420
+ if (target_offset == 0 && matched == count) {
421
+ VectorOperations::Copy(results, result, lcstate.matches, 1, 0, target_offset);
422
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
423
+ } else {
424
+ VectorOperations::Copy(results, result, lcstate.matches, matched, 0, target_offset);
425
+ }
238
426
  }
239
427
  }
240
428
 
241
429
  //===--------------------------------------------------------------------===//
242
430
  // WindowCustomAggregator
243
431
  //===--------------------------------------------------------------------===//
244
- WindowCustomAggregator::WindowCustomAggregator(AggregateObject aggr, const LogicalType &result_type,
245
- const WindowExcludeMode exclude_mode_p, idx_t count)
246
- : WindowAggregator(std::move(aggr), result_type, exclude_mode_p, count) {
432
+ WindowCustomAggregator::WindowCustomAggregator(AggregateObject aggr, const vector<LogicalType> &arg_types,
433
+ const LogicalType &result_type, const WindowExcludeMode exclude_mode)
434
+ : WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode) {
247
435
  }
248
436
 
249
437
  WindowCustomAggregator::~WindowCustomAggregator() {
@@ -282,12 +470,28 @@ static void InitSubFrames(SubFrames &frames, const WindowExcludeMode exclude_mod
282
470
  frames.resize(nframes, {0, 0});
283
471
  }
284
472
 
473
+ class WindowCustomAggregatorGlobalState : public WindowAggregatorGlobalState {
474
+ public:
475
+ explicit WindowCustomAggregatorGlobalState(const WindowCustomAggregator &aggregator, idx_t group_count)
476
+ : WindowAggregatorGlobalState(aggregator, group_count) {
477
+
478
+ gcstate = make_uniq<WindowCustomAggregatorState>(aggregator.aggr, aggregator.exclude_mode);
479
+ }
480
+
481
+ //! Traditional packed filter mask for API
482
+ ValidityMask filter_packed;
483
+ //! Data pointer that contains a single local state, used for global custom window execution state
484
+ unique_ptr<WindowCustomAggregatorState> gcstate;
485
+ //! Partition description for custom window APIs
486
+ unique_ptr<WindowPartitionInput> partition_input;
487
+ };
488
+
285
489
  WindowCustomAggregatorState::WindowCustomAggregatorState(const AggregateObject &aggr,
286
490
  const WindowExcludeMode exclude_mode)
287
- : aggr(aggr), state(aggr.function.state_size()), statef(Value::POINTER(CastPointerToValue(state.data()))),
288
- frames(3, {0, 0}) {
491
+ : aggr(aggr), state(aggr.function.state_size(aggr.function)),
492
+ statef(Value::POINTER(CastPointerToValue(state.data()))), frames(3, {0, 0}) {
289
493
  // if we have a frame-by-frame method, share the single state
290
- aggr.function.initialize(state.data());
494
+ aggr.function.initialize(aggr.function, state.data());
291
495
 
292
496
  InitSubFrames(frames, exclude_mode);
293
497
  }
@@ -299,21 +503,41 @@ WindowCustomAggregatorState::~WindowCustomAggregatorState() {
299
503
  }
300
504
  }
301
505
 
302
- void WindowCustomAggregator::Finalize(const FrameStats &stats) {
303
- WindowAggregator::Finalize(stats);
304
- partition_input =
305
- make_uniq<WindowPartitionInput>(inputs.data.data(), inputs.ColumnCount(), inputs.size(), filter_mask, stats);
506
+ unique_ptr<WindowAggregatorState> WindowCustomAggregator::GetGlobalState(idx_t group_count,
507
+ const ValidityMask &) const {
508
+ return make_uniq<WindowCustomAggregatorGlobalState>(*this, group_count);
509
+ }
510
+
511
+ void WindowCustomAggregator::Finalize(WindowAggregatorState &gsink, WindowAggregatorState &lstate,
512
+ const FrameStats &stats) {
513
+ // Single threaded Finalize for now
514
+ auto &gcsink = gsink.Cast<WindowCustomAggregatorGlobalState>();
515
+ lock_guard<mutex> gestate_guard(gcsink.lock);
516
+ if (gcsink.finalized) {
517
+ return;
518
+ }
519
+
520
+ WindowAggregator::Finalize(gsink, lstate, stats);
521
+
522
+ auto &inputs = gcsink.inputs;
523
+ auto &filter_mask = gcsink.filter_mask;
524
+ auto &filter_packed = gcsink.filter_packed;
525
+ filter_mask.Pack(filter_packed, filter_mask.target_count);
526
+
527
+ gcsink.partition_input =
528
+ make_uniq<WindowPartitionInput>(inputs.data.data(), inputs.ColumnCount(), inputs.size(), filter_packed, stats);
306
529
 
307
530
  if (aggr.function.window_init) {
308
- gstate = GetLocalState();
309
- auto &gcstate = gstate->Cast<WindowCustomAggregatorState>();
531
+ auto &gcstate = *gcsink.gcstate;
310
532
 
311
533
  AggregateInputData aggr_input_data(aggr.GetFunctionData(), gcstate.allocator);
312
- aggr.function.window_init(aggr_input_data, *partition_input, gcstate.state.data());
534
+ aggr.function.window_init(aggr_input_data, *gcsink.partition_input, gcstate.state.data());
313
535
  }
536
+
537
+ ++gcsink.finalized;
314
538
  }
315
539
 
316
- unique_ptr<WindowAggregatorState> WindowCustomAggregator::GetLocalState() const {
540
+ unique_ptr<WindowAggregatorState> WindowCustomAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
317
541
  return make_uniq<WindowCustomAggregatorState>(aggr, exclude_mode);
318
542
  }
319
543
 
@@ -374,29 +598,30 @@ static void EvaluateSubFrames(const DataChunk &bounds, const WindowExcludeMode e
374
598
  }
375
599
  }
376
600
 
377
- void WindowCustomAggregator::Evaluate(WindowAggregatorState &lstate, const DataChunk &bounds, Vector &result,
378
- idx_t count, idx_t row_idx) const {
601
+ void WindowCustomAggregator::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
602
+ const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
379
603
  auto &lcstate = lstate.Cast<WindowCustomAggregatorState>();
380
604
  auto &frames = lcstate.frames;
381
605
  const_data_ptr_t gstate_p = nullptr;
382
- if (gstate) {
383
- auto &gcstate = gstate->Cast<WindowCustomAggregatorState>();
384
- gstate_p = gcstate.state.data();
606
+ auto &gcsink = gsink.Cast<WindowCustomAggregatorGlobalState>();
607
+ if (gcsink.gcstate) {
608
+ gstate_p = gcsink.gcstate->state.data();
385
609
  }
386
610
 
387
611
  EvaluateSubFrames(bounds, exclude_mode, count, row_idx, frames, [&](idx_t i) {
388
612
  // Extract the range
389
613
  AggregateInputData aggr_input_data(aggr.GetFunctionData(), lstate.allocator);
390
- aggr.function.window(aggr_input_data, *partition_input, gstate_p, lcstate.state.data(), frames, result, i);
614
+ aggr.function.window(aggr_input_data, *gcsink.partition_input, gstate_p, lcstate.state.data(), frames, result,
615
+ i);
391
616
  });
392
617
  }
393
618
 
394
619
  //===--------------------------------------------------------------------===//
395
620
  // WindowNaiveAggregator
396
621
  //===--------------------------------------------------------------------===//
397
- WindowNaiveAggregator::WindowNaiveAggregator(AggregateObject aggr, const LogicalType &result_type,
398
- const WindowExcludeMode exclude_mode_p, idx_t partition_count)
399
- : WindowAggregator(std::move(aggr), result_type, exclude_mode_p, partition_count) {
622
+ WindowNaiveAggregator::WindowNaiveAggregator(AggregateObject aggr, const vector<LogicalType> &arg_types,
623
+ const LogicalType &result_type, const WindowExcludeMode exclude_mode)
624
+ : WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode) {
400
625
  }
401
626
 
402
627
  WindowNaiveAggregator::~WindowNaiveAggregator() {
@@ -405,44 +630,47 @@ WindowNaiveAggregator::~WindowNaiveAggregator() {
405
630
  class WindowNaiveState : public WindowAggregatorState {
406
631
  public:
407
632
  struct HashRow {
408
- explicit HashRow(WindowNaiveState &state) : state(state) {
633
+ HashRow(WindowNaiveState &state, const DataChunk &inputs) : state(state), inputs(inputs) {
409
634
  }
410
635
 
411
636
  size_t operator()(const idx_t &i) const {
412
- return state.Hash(i);
637
+ return state.Hash(inputs, i);
413
638
  }
414
639
 
415
640
  WindowNaiveState &state;
641
+ const DataChunk &inputs;
416
642
  };
417
643
 
418
644
  struct EqualRow {
419
- explicit EqualRow(WindowNaiveState &state) : state(state) {
645
+ EqualRow(WindowNaiveState &state, const DataChunk &inputs) : state(state), inputs(inputs) {
420
646
  }
421
647
 
422
648
  bool operator()(const idx_t &lhs, const idx_t &rhs) const {
423
- return state.KeyEqual(lhs, rhs);
649
+ return state.KeyEqual(inputs, lhs, rhs);
424
650
  }
425
651
 
426
652
  WindowNaiveState &state;
653
+ const DataChunk &inputs;
427
654
  };
428
655
 
429
656
  using RowSet = std::unordered_set<idx_t, HashRow, EqualRow>;
430
657
 
431
- explicit WindowNaiveState(const WindowNaiveAggregator &gstate);
658
+ explicit WindowNaiveState(const WindowNaiveAggregator &gsink);
432
659
 
433
- void Evaluate(const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
660
+ void Evaluate(const WindowAggregatorGlobalState &gsink, const DataChunk &bounds, Vector &result, idx_t count,
661
+ idx_t row_idx);
434
662
 
435
663
  protected:
436
664
  //! Flush the accumulated intermediate states into the result states
437
- void FlushStates();
665
+ void FlushStates(const WindowAggregatorGlobalState &gsink);
438
666
 
439
667
  //! Hashes a value for the hash table
440
- size_t Hash(idx_t rid);
668
+ size_t Hash(const DataChunk &inputs, idx_t rid);
441
669
  //! Compares two values for the hash table
442
- bool KeyEqual(const idx_t &lhs, const idx_t &rhs);
670
+ bool KeyEqual(const DataChunk &inputs, const idx_t &lhs, const idx_t &rhs);
443
671
 
444
672
  //! The global state
445
- const WindowNaiveAggregator &gstate;
673
+ const WindowNaiveAggregator &aggregator;
446
674
  //! Data pointer that contains a vector of states, used for row aggregation
447
675
  vector<data_t> state;
448
676
  //! Reused result state container for the aggregate
@@ -459,21 +687,12 @@ protected:
459
687
  SubFrames frames;
460
688
  //! The optional hash table used for DISTINCT
461
689
  Vector hashes;
462
- HashRow hash_row;
463
- EqualRow equal_row;
464
- RowSet row_set;
465
690
  };
466
691
 
467
- WindowNaiveState::WindowNaiveState(const WindowNaiveAggregator &gstate)
468
- : gstate(gstate), state(gstate.state_size * STANDARD_VECTOR_SIZE), statef(LogicalType::POINTER),
469
- statep((LogicalType::POINTER)), flush_count(0), hashes(LogicalType::HASH), hash_row(*this), equal_row(*this),
470
- row_set(STANDARD_VECTOR_SIZE, hash_row, equal_row) {
471
- InitSubFrames(frames, gstate.exclude_mode);
472
-
473
- auto &inputs = gstate.GetInputs();
474
- if (inputs.ColumnCount() > 0) {
475
- leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes());
476
- }
692
+ WindowNaiveState::WindowNaiveState(const WindowNaiveAggregator &aggregator_p)
693
+ : aggregator(aggregator_p), state(aggregator.state_size * STANDARD_VECTOR_SIZE), statef(LogicalType::POINTER),
694
+ statep((LogicalType::POINTER)), flush_count(0), hashes(LogicalType::HASH) {
695
+ InitSubFrames(frames, aggregator.exclude_mode);
477
696
 
478
697
  update_sel.Initialize();
479
698
 
@@ -485,28 +704,26 @@ WindowNaiveState::WindowNaiveState(const WindowNaiveAggregator &gstate)
485
704
  auto fdata = FlatVector::GetData<data_ptr_t>(statef);
486
705
  for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; ++i) {
487
706
  fdata[i] = state_ptr;
488
- state_ptr += gstate.state_size;
707
+ state_ptr += aggregator.state_size;
489
708
  }
490
709
  }
491
710
 
492
- void WindowNaiveState::FlushStates() {
711
+ void WindowNaiveState::FlushStates(const WindowAggregatorGlobalState &gsink) {
493
712
  if (!flush_count) {
494
713
  return;
495
714
  }
496
715
 
497
- auto &inputs = gstate.GetInputs();
716
+ auto &inputs = gsink.inputs;
498
717
  leaves.Slice(inputs, update_sel, flush_count);
499
718
 
500
- auto &aggr = gstate.aggr;
719
+ auto &aggr = aggregator.aggr;
501
720
  AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
502
721
  aggr.function.update(leaves.data.data(), aggr_input_data, leaves.ColumnCount(), statep, flush_count);
503
722
 
504
723
  flush_count = 0;
505
724
  }
506
725
 
507
- size_t WindowNaiveState::Hash(idx_t rid) {
508
- auto &inputs = gstate.GetInputs();
509
-
726
+ size_t WindowNaiveState::Hash(const DataChunk &inputs, idx_t rid) {
510
727
  auto s = UnsafeNumericCast<sel_t>(rid);
511
728
  SelectionVector sel(&s);
512
729
  leaves.Slice(inputs, sel, 1);
@@ -515,9 +732,7 @@ size_t WindowNaiveState::Hash(idx_t rid) {
515
732
  return *FlatVector::GetData<hash_t>(hashes);
516
733
  }
517
734
 
518
- bool WindowNaiveState::KeyEqual(const idx_t &lhs, const idx_t &rhs) {
519
- auto &inputs = gstate.GetInputs();
520
-
735
+ bool WindowNaiveState::KeyEqual(const DataChunk &inputs, const idx_t &lhs, const idx_t &rhs) {
521
736
  auto l = UnsafeNumericCast<sel_t>(lhs);
522
737
  SelectionVector lsel(&l);
523
738
 
@@ -538,16 +753,26 @@ bool WindowNaiveState::KeyEqual(const idx_t &lhs, const idx_t &rhs) {
538
753
  return true;
539
754
  }
540
755
 
541
- void WindowNaiveState::Evaluate(const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
542
- auto &aggr = gstate.aggr;
543
- auto &filter_mask = gstate.GetFilterMask();
756
+ void WindowNaiveState::Evaluate(const WindowAggregatorGlobalState &gsink, const DataChunk &bounds, Vector &result,
757
+ idx_t count, idx_t row_idx) {
758
+ auto &aggr = aggregator.aggr;
759
+ auto &filter_mask = gsink.filter_mask;
760
+ auto &inputs = gsink.inputs;
761
+
762
+ if (leaves.ColumnCount() == 0 && inputs.ColumnCount() > 0) {
763
+ leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes());
764
+ }
544
765
 
545
766
  auto fdata = FlatVector::GetData<data_ptr_t>(statef);
546
767
  auto pdata = FlatVector::GetData<data_ptr_t>(statep);
547
768
 
548
- EvaluateSubFrames(bounds, gstate.exclude_mode, count, row_idx, frames, [&](idx_t rid) {
769
+ HashRow hash_row(*this, inputs);
770
+ EqualRow equal_row(*this, inputs);
771
+ RowSet row_set(STANDARD_VECTOR_SIZE, hash_row, equal_row);
772
+
773
+ EvaluateSubFrames(bounds, aggregator.exclude_mode, count, row_idx, frames, [&](idx_t rid) {
549
774
  auto agg_state = fdata[rid];
550
- aggr.function.initialize(agg_state);
775
+ aggr.function.initialize(aggr.function, agg_state);
551
776
 
552
777
  // Just update the aggregate with the unfiltered input rows
553
778
  row_set.clear();
@@ -565,14 +790,14 @@ void WindowNaiveState::Evaluate(const DataChunk &bounds, Vector &result, idx_t c
565
790
  pdata[flush_count] = agg_state;
566
791
  update_sel[flush_count++] = UnsafeNumericCast<sel_t>(f);
567
792
  if (flush_count >= STANDARD_VECTOR_SIZE) {
568
- FlushStates();
793
+ FlushStates(gsink);
569
794
  }
570
795
  }
571
796
  }
572
797
  });
573
798
 
574
799
  // Flush the final states
575
- FlushStates();
800
+ FlushStates(gsink);
576
801
 
577
802
  // Finalise the result aggregates and write to the result
578
803
  AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
@@ -584,55 +809,57 @@ void WindowNaiveState::Evaluate(const DataChunk &bounds, Vector &result, idx_t c
584
809
  }
585
810
  }
586
811
 
587
- unique_ptr<WindowAggregatorState> WindowNaiveAggregator::GetLocalState() const {
812
+ unique_ptr<WindowAggregatorState> WindowNaiveAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
588
813
  return make_uniq<WindowNaiveState>(*this);
589
814
  }
590
815
 
591
- void WindowNaiveAggregator::Evaluate(WindowAggregatorState &lstate, const DataChunk &bounds, Vector &result,
592
- idx_t count, idx_t row_idx) const {
593
- auto &ldstate = lstate.Cast<WindowNaiveState>();
594
- ldstate.Evaluate(bounds, result, count, row_idx);
816
+ void WindowNaiveAggregator::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
817
+ const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
818
+ const auto &gnstate = gsink.Cast<WindowAggregatorGlobalState>();
819
+ auto &lnstate = lstate.Cast<WindowNaiveState>();
820
+ lnstate.Evaluate(gnstate, bounds, result, count, row_idx);
595
821
  }
596
822
 
597
823
  //===--------------------------------------------------------------------===//
598
824
  // WindowSegmentTree
599
825
  //===--------------------------------------------------------------------===//
600
- WindowSegmentTree::WindowSegmentTree(AggregateObject aggr, const LogicalType &result_type, WindowAggregationMode mode_p,
601
- const WindowExcludeMode exclude_mode_p, idx_t count)
602
- : WindowAggregator(std::move(aggr), result_type, exclude_mode_p, count), internal_nodes(0), mode(mode_p) {
603
- }
826
+ class WindowSegmentTreeGlobalState : public WindowAggregatorGlobalState {
827
+ public:
828
+ using AtomicCounters = vector<std::atomic<idx_t>>;
604
829
 
605
- void WindowSegmentTree::Finalize(const FrameStats &stats) {
606
- WindowAggregator::Finalize(stats);
830
+ WindowSegmentTreeGlobalState(const WindowSegmentTree &aggregator, idx_t group_count);
607
831
 
608
- gstate = GetLocalState();
609
- if (inputs.ColumnCount() > 0) {
610
- if (aggr.function.combine && UseCombineAPI()) {
611
- ConstructTree();
612
- }
832
+ ArenaAllocator &CreateTreeAllocator() {
833
+ lock_guard<mutex> tree_lock(lock);
834
+ tree_allocators.emplace_back(make_uniq<ArenaAllocator>(Allocator::DefaultAllocator()));
835
+ return *tree_allocators.back();
613
836
  }
614
- }
615
837
 
616
- WindowSegmentTree::~WindowSegmentTree() {
617
- if (!aggr.function.destructor || !gstate) {
618
- // nothing to destroy
619
- return;
620
- }
621
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), gstate->allocator);
622
- // call the destructor for all the intermediate states
623
- data_ptr_t address_data[STANDARD_VECTOR_SIZE];
624
- Vector addresses(LogicalType::POINTER, data_ptr_cast(address_data));
625
- idx_t count = 0;
626
- for (idx_t i = 0; i < internal_nodes; i++) {
627
- address_data[count++] = data_ptr_t(levels_flat_native.get() + i * state_size);
628
- if (count == STANDARD_VECTOR_SIZE) {
629
- aggr.function.destructor(addresses, aggr_input_data, count);
630
- count = 0;
631
- }
632
- }
633
- if (count > 0) {
634
- aggr.function.destructor(addresses, aggr_input_data, count);
635
- }
838
+ //! The owning aggregator
839
+ const WindowSegmentTree &tree;
840
+ //! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
841
+ WindowAggregateStates levels_flat_native;
842
+ //! For each level, the starting location in the levels_flat_native array
843
+ vector<idx_t> levels_flat_start;
844
+ //! The level being built (read)
845
+ std::atomic<idx_t> build_level;
846
+ //! The number of entries started so far at each level
847
+ unique_ptr<AtomicCounters> build_started;
848
+ //! The number of entries completed so far at each level
849
+ unique_ptr<AtomicCounters> build_completed;
850
+ //! The tree allocators.
851
+ //! We need to hold onto them for the tree lifetime,
852
+ //! not the lifetime of the local state that constructed part of the tree
853
+ vector<unique_ptr<ArenaAllocator>> tree_allocators;
854
+
855
+ // TREE_FANOUT needs to cleanly divide STANDARD_VECTOR_SIZE
856
+ static constexpr idx_t TREE_FANOUT = 16;
857
+ };
858
+
859
+ WindowSegmentTree::WindowSegmentTree(AggregateObject aggr, const vector<LogicalType> &arg_types,
860
+ const LogicalType &result_type, WindowAggregationMode mode_p,
861
+ const WindowExcludeMode exclude_mode_p)
862
+ : WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode_p), mode(mode_p) {
636
863
  }
637
864
 
638
865
  class WindowSegmentTreePart {
@@ -643,7 +870,7 @@ public:
643
870
  enum FramePart : uint8_t { FULL = 0, LEFT = 1, RIGHT = 2 };
644
871
 
645
872
  WindowSegmentTreePart(ArenaAllocator &allocator, const AggregateObject &aggr, const DataChunk &inputs,
646
- const ValidityMask &filter_mask);
873
+ const ValidityArray &filter_mask);
647
874
  ~WindowSegmentTreePart();
648
875
 
649
876
  unique_ptr<WindowSegmentTreePart> Copy() const {
@@ -652,23 +879,23 @@ public:
652
879
 
653
880
  void FlushStates(bool combining);
654
881
  void ExtractFrame(idx_t begin, idx_t end, data_ptr_t current_state);
655
- void WindowSegmentValue(const WindowSegmentTree &tree, idx_t l_idx, idx_t begin, idx_t end,
882
+ void WindowSegmentValue(const WindowSegmentTreeGlobalState &tree, idx_t l_idx, idx_t begin, idx_t end,
656
883
  data_ptr_t current_state);
657
884
  //! Writes result and calls destructors
658
885
  void Finalize(Vector &result, idx_t count);
659
886
 
660
887
  void Combine(WindowSegmentTreePart &other, idx_t count);
661
888
 
662
- void Evaluate(const WindowSegmentTree &tree, const idx_t *begins, const idx_t *ends, Vector &result, idx_t count,
663
- idx_t row_idx, FramePart frame_part);
889
+ void Evaluate(const WindowSegmentTreeGlobalState &tree, const idx_t *begins, const idx_t *ends, Vector &result,
890
+ idx_t count, idx_t row_idx, FramePart frame_part);
664
891
 
665
892
  protected:
666
893
  //! Initialises the accumulation state vector (statef)
667
894
  void Initialize(idx_t count);
668
895
  //! Accumulate upper tree levels
669
- void EvaluateUpperLevels(const WindowSegmentTree &tree, const idx_t *begins, const idx_t *ends, idx_t count,
670
- idx_t row_idx, FramePart frame_part);
671
- void EvaluateLeaves(const WindowSegmentTree &tree, const idx_t *begins, const idx_t *ends, idx_t count,
896
+ void EvaluateUpperLevels(const WindowSegmentTreeGlobalState &tree, const idx_t *begins, const idx_t *ends,
897
+ idx_t count, idx_t row_idx, FramePart frame_part);
898
+ void EvaluateLeaves(const WindowSegmentTreeGlobalState &tree, const idx_t *begins, const idx_t *ends, idx_t count,
672
899
  idx_t row_idx, FramePart frame_part, FramePart leaf_part);
673
900
 
674
901
  public:
@@ -681,7 +908,7 @@ public:
681
908
  //! The partition arguments
682
909
  const DataChunk &inputs;
683
910
  //! The filtered rows in inputs
684
- const ValidityMask &filter_mask;
911
+ const ValidityArray &filter_mask;
685
912
  //! The size of a single aggregate state
686
913
  const idx_t state_size;
687
914
  //! Data pointer that contains a vector of states, used for intermediate window segment aggregation
@@ -704,28 +931,41 @@ public:
704
931
 
705
932
  class WindowSegmentTreeState : public WindowAggregatorState {
706
933
  public:
707
- WindowSegmentTreeState(const AggregateObject &aggr, const DataChunk &inputs, const ValidityMask &filter_mask)
708
- : aggr(aggr), inputs(inputs), filter_mask(filter_mask), part(allocator, aggr, inputs, filter_mask) {
934
+ WindowSegmentTreeState() {
709
935
  }
710
936
 
711
- //! The aggregate function
712
- const AggregateObject &aggr;
713
- //! The aggregate function
714
- const DataChunk &inputs;
715
- //! The filtered rows in inputs
716
- const ValidityMask &filter_mask;
937
+ void Finalize(WindowSegmentTreeGlobalState &gstate);
938
+ void Evaluate(const WindowSegmentTreeGlobalState &gsink, const DataChunk &bounds, Vector &result, idx_t count,
939
+ idx_t row_idx);
717
940
  //! The left (default) segment tree part
718
- WindowSegmentTreePart part;
941
+ unique_ptr<WindowSegmentTreePart> part;
719
942
  //! The right segment tree part (for EXCLUDE)
720
943
  unique_ptr<WindowSegmentTreePart> right_part;
721
944
  };
722
945
 
946
+ void WindowSegmentTree::Finalize(WindowAggregatorState &gsink, WindowAggregatorState &lstate, const FrameStats &stats) {
947
+
948
+ auto &gasink = gsink.Cast<WindowSegmentTreeGlobalState>();
949
+ auto &inputs = gasink.inputs;
950
+
951
+ WindowAggregator::Finalize(gsink, lstate, stats);
952
+
953
+ if (inputs.ColumnCount() > 0) {
954
+ if (aggr.function.combine && UseCombineAPI()) {
955
+ lstate.Cast<WindowSegmentTreeState>().Finalize(gasink);
956
+ }
957
+ }
958
+
959
+ ++gasink.finalized;
960
+ }
961
+
723
962
  WindowSegmentTreePart::WindowSegmentTreePart(ArenaAllocator &allocator, const AggregateObject &aggr,
724
- const DataChunk &inputs, const ValidityMask &filter_mask)
963
+ const DataChunk &inputs, const ValidityArray &filter_mask)
725
964
  : allocator(allocator), aggr(aggr),
726
965
  order_insensitive(aggr.function.order_dependent == AggregateOrderDependent::NOT_ORDER_DEPENDENT), inputs(inputs),
727
- filter_mask(filter_mask), state_size(aggr.function.state_size()), state(state_size * STANDARD_VECTOR_SIZE),
728
- statep(LogicalType::POINTER), statel(LogicalType::POINTER), statef(LogicalType::POINTER), flush_count(0) {
966
+ filter_mask(filter_mask), state_size(aggr.function.state_size(aggr.function)),
967
+ state(state_size * STANDARD_VECTOR_SIZE), statep(LogicalType::POINTER), statel(LogicalType::POINTER),
968
+ statef(LogicalType::POINTER), flush_count(0) {
729
969
  if (inputs.ColumnCount() > 0) {
730
970
  leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes());
731
971
  filter_sel.Initialize();
@@ -746,8 +986,13 @@ WindowSegmentTreePart::WindowSegmentTreePart(ArenaAllocator &allocator, const Ag
746
986
  WindowSegmentTreePart::~WindowSegmentTreePart() {
747
987
  }
748
988
 
749
- unique_ptr<WindowAggregatorState> WindowSegmentTree::GetLocalState() const {
750
- return make_uniq<WindowSegmentTreeState>(aggr, inputs, filter_mask);
989
+ unique_ptr<WindowAggregatorState> WindowSegmentTree::GetGlobalState(idx_t group_count,
990
+ const ValidityMask &partition_mask) const {
991
+ return make_uniq<WindowSegmentTreeGlobalState>(*this, group_count);
992
+ }
993
+
994
+ unique_ptr<WindowAggregatorState> WindowSegmentTree::GetLocalState(const WindowAggregatorState &gstate) const {
995
+ return make_uniq<WindowSegmentTreeState>();
751
996
  }
752
997
 
753
998
  void WindowSegmentTreePart::FlushStates(bool combining) {
@@ -800,8 +1045,8 @@ void WindowSegmentTreePart::ExtractFrame(idx_t begin, idx_t end, data_ptr_t stat
800
1045
  }
801
1046
  }
802
1047
 
803
- void WindowSegmentTreePart::WindowSegmentValue(const WindowSegmentTree &tree, idx_t l_idx, idx_t begin, idx_t end,
804
- data_ptr_t state_ptr) {
1048
+ void WindowSegmentTreePart::WindowSegmentValue(const WindowSegmentTreeGlobalState &tree, idx_t l_idx, idx_t begin,
1049
+ idx_t end, data_ptr_t state_ptr) {
805
1050
  D_ASSERT(begin <= end);
806
1051
  if (begin == end || inputs.ColumnCount() == 0) {
807
1052
  return;
@@ -812,9 +1057,9 @@ void WindowSegmentTreePart::WindowSegmentValue(const WindowSegmentTree &tree, id
812
1057
  ExtractFrame(begin, end, state_ptr);
813
1058
  } else {
814
1059
  // find out where the states begin
815
- auto begin_ptr = tree.levels_flat_native.get() + state_size * (begin + tree.levels_flat_start[l_idx - 1]);
1060
+ auto begin_ptr = tree.levels_flat_native.GetStatePtr(begin + tree.levels_flat_start[l_idx - 1]);
816
1061
  // set up a vector of pointers that point towards the set of states
817
- auto ldata = FlatVector::GetData<data_ptr_t>(statel);
1062
+ auto ldata = FlatVector::GetData<const_data_ptr_t>(statel);
818
1063
  auto pdata = FlatVector::GetData<data_ptr_t>(statep);
819
1064
  for (idx_t i = 0; i < count; i++) {
820
1065
  pdata[flush_count] = state_ptr;
@@ -837,20 +1082,12 @@ void WindowSegmentTreePart::Finalize(Vector &result, idx_t count) {
837
1082
  }
838
1083
  }
839
1084
 
840
- void WindowSegmentTree::ConstructTree() {
841
- D_ASSERT(inputs.ColumnCount() > 0);
1085
+ WindowSegmentTreeGlobalState::WindowSegmentTreeGlobalState(const WindowSegmentTree &aggregator, idx_t group_count)
1086
+ : WindowAggregatorGlobalState(aggregator, group_count), tree(aggregator), levels_flat_native(aggregator.aggr) {
842
1087
 
843
- // Use a temporary scan state to build the tree
844
- auto &gtstate = gstate->Cast<WindowSegmentTreeState>().part;
1088
+ D_ASSERT(inputs.ColumnCount() > 0);
845
1089
 
846
1090
  // compute space required to store internal nodes of segment tree
847
- internal_nodes = 0;
848
- idx_t level_nodes = inputs.size();
849
- do {
850
- level_nodes = (level_nodes + (TREE_FANOUT - 1)) / TREE_FANOUT;
851
- internal_nodes += level_nodes;
852
- } while (level_nodes > 1);
853
- levels_flat_native = make_unsafe_uniq_array<data_t>(internal_nodes * state_size);
854
1091
  levels_flat_start.push_back(0);
855
1092
 
856
1093
  idx_t levels_flat_offset = 0;
@@ -861,12 +1098,6 @@ void WindowSegmentTree::ConstructTree() {
861
1098
  while ((level_size =
862
1099
  (level_current == 0 ? inputs.size() : levels_flat_offset - levels_flat_start[level_current - 1])) > 1) {
863
1100
  for (idx_t pos = 0; pos < level_size; pos += TREE_FANOUT) {
864
- // compute the aggregate for this entry in the segment tree
865
- data_ptr_t state_ptr = levels_flat_native.get() + (levels_flat_offset * state_size);
866
- aggr.function.initialize(state_ptr);
867
- gtstate.WindowSegmentValue(*this, level_current, pos, MinValue(level_size, pos + TREE_FANOUT), state_ptr);
868
- gtstate.FlushStates(level_current > 0);
869
-
870
1101
  levels_flat_offset++;
871
1102
  }
872
1103
 
@@ -876,46 +1107,120 @@ void WindowSegmentTree::ConstructTree() {
876
1107
 
877
1108
  // Corner case: single element in the window
878
1109
  if (levels_flat_offset == 0) {
879
- aggr.function.initialize(levels_flat_native.get());
1110
+ ++levels_flat_offset;
1111
+ }
1112
+
1113
+ levels_flat_native.Initialize(levels_flat_offset);
1114
+
1115
+ // Start by building from the bottom level
1116
+ build_level = 0;
1117
+
1118
+ build_started = make_uniq<AtomicCounters>(levels_flat_start.size());
1119
+ for (auto &counter : *build_started) {
1120
+ counter = 0;
1121
+ }
1122
+
1123
+ build_completed = make_uniq<AtomicCounters>(levels_flat_start.size());
1124
+ for (auto &counter : *build_completed) {
1125
+ counter = 0;
880
1126
  }
881
1127
  }
882
1128
 
883
- void WindowSegmentTree::Evaluate(WindowAggregatorState &lstate, const DataChunk &bounds, Vector &result, idx_t count,
884
- idx_t row_idx) const {
1129
+ void WindowSegmentTreeState::Finalize(WindowSegmentTreeGlobalState &gstate) {
1130
+ // Single part for constructing the tree
1131
+ auto &inputs = gstate.inputs;
1132
+ auto &tree = gstate.tree;
1133
+ auto &filter_mask = gstate.filter_mask;
1134
+ WindowSegmentTreePart gtstate(gstate.CreateTreeAllocator(), tree.aggr, inputs, filter_mask);
1135
+
1136
+ auto &levels_flat_native = gstate.levels_flat_native;
1137
+ const auto &levels_flat_start = gstate.levels_flat_start;
1138
+ // iterate over the levels of the segment tree
1139
+ for (;;) {
1140
+ const idx_t level_current = gstate.build_level.load();
1141
+ if (level_current >= levels_flat_start.size()) {
1142
+ break;
1143
+ }
1144
+
1145
+ // level 0 is data itself
1146
+ const auto level_size =
1147
+ (level_current == 0 ? inputs.size()
1148
+ : levels_flat_start[level_current] - levels_flat_start[level_current - 1]);
1149
+ if (level_size <= 1) {
1150
+ break;
1151
+ }
1152
+ const idx_t build_count = (level_size + gstate.TREE_FANOUT - 1) / gstate.TREE_FANOUT;
1153
+
1154
+ // Build the next fan-in
1155
+ const idx_t build_idx = (*gstate.build_started).at(level_current)++;
1156
+ if (build_idx >= build_count) {
1157
+ // Nothing left at this level, so wait until other threads are done.
1158
+ // Since we are only building TREE_FANOUT values at a time, this will be quick.
1159
+ while (level_current == gstate.build_level.load()) {
1160
+ std::this_thread::sleep_for(std::chrono::milliseconds(1));
1161
+ }
1162
+ continue;
1163
+ }
1164
+
1165
+ // compute the aggregate for this entry in the segment tree
1166
+ const idx_t pos = build_idx * gstate.TREE_FANOUT;
1167
+ const idx_t levels_flat_offset = levels_flat_start[level_current] + build_idx;
1168
+ auto state_ptr = levels_flat_native.GetStatePtr(levels_flat_offset);
1169
+ gtstate.WindowSegmentValue(gstate, level_current, pos, MinValue(level_size, pos + gstate.TREE_FANOUT),
1170
+ state_ptr);
1171
+ gtstate.FlushStates(level_current > 0);
1172
+
1173
+ // If that was the last one, mark the level as complete.
1174
+ const idx_t build_complete = ++(*gstate.build_completed).at(level_current);
1175
+ if (build_complete == build_count) {
1176
+ gstate.build_level++;
1177
+ continue;
1178
+ }
1179
+ }
1180
+ }
885
1181
 
1182
+ void WindowSegmentTree::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
1183
+ const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
1184
+ const auto &gtstate = gsink.Cast<WindowSegmentTreeGlobalState>();
886
1185
  auto &ltstate = lstate.Cast<WindowSegmentTreeState>();
1186
+ ltstate.Evaluate(gtstate, bounds, result, count, row_idx);
1187
+ }
1188
+
1189
+ void WindowSegmentTreeState::Evaluate(const WindowSegmentTreeGlobalState &gtstate, const DataChunk &bounds,
1190
+ Vector &result, idx_t count, idx_t row_idx) {
887
1191
  auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
888
1192
  auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
889
1193
  auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
890
1194
  auto peer_end = FlatVector::GetData<const idx_t>(bounds.data[PEER_END]);
891
1195
 
892
- auto &part = ltstate.part;
893
- if (exclude_mode != WindowExcludeMode::NO_OTHER) {
1196
+ if (!part) {
1197
+ part =
1198
+ make_uniq<WindowSegmentTreePart>(allocator, gtstate.aggregator.aggr, gtstate.inputs, gtstate.filter_mask);
1199
+ }
1200
+
1201
+ if (gtstate.aggregator.exclude_mode != WindowExcludeMode::NO_OTHER) {
894
1202
  // 1. evaluate the tree left of the excluded part
895
- part.Evaluate(*this, window_begin, peer_begin, result, count, row_idx, WindowSegmentTreePart::LEFT);
1203
+ part->Evaluate(gtstate, window_begin, peer_begin, result, count, row_idx, WindowSegmentTreePart::LEFT);
896
1204
 
897
1205
  // 2. set up a second state for the right of the excluded part
898
- if (!ltstate.right_part) {
899
- ltstate.right_part = part.Copy();
1206
+ if (!right_part) {
1207
+ right_part = part->Copy();
900
1208
  }
901
- auto &right_part = *ltstate.right_part;
902
1209
 
903
1210
  // 3. evaluate the tree right of the excluded part
904
- right_part.Evaluate(*this, peer_end, window_end, result, count, row_idx, WindowSegmentTreePart::RIGHT);
1211
+ right_part->Evaluate(gtstate, peer_end, window_end, result, count, row_idx, WindowSegmentTreePart::RIGHT);
905
1212
 
906
1213
  // 4. combine the buffer state into the Segment Tree State
907
- part.Combine(right_part, count);
1214
+ part->Combine(*right_part, count);
908
1215
  } else {
909
- part.Evaluate(*this, window_begin, window_end, result, count, row_idx, WindowSegmentTreePart::FULL);
1216
+ part->Evaluate(gtstate, window_begin, window_end, result, count, row_idx, WindowSegmentTreePart::FULL);
910
1217
  }
911
1218
 
912
- part.Finalize(result, count);
1219
+ part->Finalize(result, count);
913
1220
  }
914
1221
 
915
- void WindowSegmentTreePart::Evaluate(const WindowSegmentTree &tree, const idx_t *begins, const idx_t *ends,
1222
+ void WindowSegmentTreePart::Evaluate(const WindowSegmentTreeGlobalState &tree, const idx_t *begins, const idx_t *ends,
916
1223
  Vector &result, idx_t count, idx_t row_idx, FramePart frame_part) {
917
- D_ASSERT(aggr.function.combine && tree.UseCombineAPI());
918
-
919
1224
  Initialize(count);
920
1225
 
921
1226
  if (order_insensitive) {
@@ -936,15 +1241,15 @@ void WindowSegmentTreePart::Initialize(idx_t count) {
936
1241
  auto fdata = FlatVector::GetData<data_ptr_t>(statef);
937
1242
  for (idx_t rid = 0; rid < count; ++rid) {
938
1243
  auto state_ptr = fdata[rid];
939
- aggr.function.initialize(state_ptr);
1244
+ aggr.function.initialize(aggr.function, state_ptr);
940
1245
  }
941
1246
  }
942
1247
 
943
- void WindowSegmentTreePart::EvaluateUpperLevels(const WindowSegmentTree &tree, const idx_t *begins, const idx_t *ends,
944
- idx_t count, idx_t row_idx, FramePart frame_part) {
1248
+ void WindowSegmentTreePart::EvaluateUpperLevels(const WindowSegmentTreeGlobalState &tree, const idx_t *begins,
1249
+ const idx_t *ends, idx_t count, idx_t row_idx, FramePart frame_part) {
945
1250
  auto fdata = FlatVector::GetData<data_ptr_t>(statef);
946
1251
 
947
- const auto exclude_mode = tree.exclude_mode;
1252
+ const auto exclude_mode = tree.tree.exclude_mode;
948
1253
  const bool begin_on_curr_row = frame_part == FramePart::RIGHT && exclude_mode == WindowExcludeMode::CURRENT_ROW;
949
1254
  const bool end_on_curr_row = frame_part == FramePart::LEFT && exclude_mode == WindowExcludeMode::CURRENT_ROW;
950
1255
 
@@ -1034,8 +1339,9 @@ void WindowSegmentTreePart::EvaluateUpperLevels(const WindowSegmentTree &tree, c
1034
1339
  FlushStates(true);
1035
1340
  }
1036
1341
 
1037
- void WindowSegmentTreePart::EvaluateLeaves(const WindowSegmentTree &tree, const idx_t *begins, const idx_t *ends,
1038
- idx_t count, idx_t row_idx, FramePart frame_part, FramePart leaf_part) {
1342
+ void WindowSegmentTreePart::EvaluateLeaves(const WindowSegmentTreeGlobalState &tree, const idx_t *begins,
1343
+ const idx_t *ends, idx_t count, idx_t row_idx, FramePart frame_part,
1344
+ FramePart leaf_part) {
1039
1345
 
1040
1346
  auto fdata = FlatVector::GetData<data_ptr_t>(statef);
1041
1347
 
@@ -1044,7 +1350,7 @@ void WindowSegmentTreePart::EvaluateLeaves(const WindowSegmentTree &tree, const
1044
1350
  // The current row is the leftmost value of the right hand side.
1045
1351
  const bool compute_left = leaf_part != FramePart::RIGHT;
1046
1352
  const bool compute_right = leaf_part != FramePart::LEFT;
1047
- const auto exclude_mode = tree.exclude_mode;
1353
+ const auto exclude_mode = tree.tree.exclude_mode;
1048
1354
  const bool begin_on_curr_row = frame_part == FramePart::RIGHT && exclude_mode == WindowExcludeMode::CURRENT_ROW;
1049
1355
  const bool end_on_curr_row = frame_part == FramePart::LEFT && exclude_mode == WindowExcludeMode::CURRENT_ROW;
1050
1356
  // with EXCLUDE TIES, in addition to the frame part right of the peer group's end, we also need to consider the
@@ -1087,81 +1393,236 @@ void WindowSegmentTreePart::EvaluateLeaves(const WindowSegmentTree &tree, const
1087
1393
  //===--------------------------------------------------------------------===//
1088
1394
  // WindowDistinctAggregator
1089
1395
  //===--------------------------------------------------------------------===//
1090
- WindowDistinctAggregator::WindowDistinctAggregator(AggregateObject aggr, const LogicalType &result_type,
1091
- const WindowExcludeMode exclude_mode_p, idx_t count,
1092
- ClientContext &context)
1093
- : WindowAggregator(std::move(aggr), result_type, exclude_mode_p, count), context(context),
1094
- allocator(Allocator::DefaultAllocator()) {
1396
+ WindowDistinctAggregator::WindowDistinctAggregator(AggregateObject aggr, const vector<LogicalType> &arg_types,
1397
+ const LogicalType &result_type,
1398
+ const WindowExcludeMode exclude_mode_p, ClientContext &context)
1399
+ : WindowAggregator(std::move(aggr), arg_types, result_type, exclude_mode_p), context(context) {
1400
+ }
1401
+
1402
+ class WindowDistinctAggregatorLocalState;
1403
+
1404
+ class WindowDistinctAggregatorGlobalState;
1405
+
1406
+ class WindowDistinctSortTree : public MergeSortTree<idx_t, idx_t> {
1407
+ public:
1408
+ // prev_idx, input_idx
1409
+ using ZippedTuple = std::tuple<idx_t, idx_t>;
1410
+ using ZippedElements = vector<ZippedTuple>;
1411
+
1412
+ explicit WindowDistinctSortTree(WindowDistinctAggregatorGlobalState &gdastate, idx_t count) : gdastate(gdastate) {
1413
+ // Set up for parallel build
1414
+ build_level = 0;
1415
+ build_complete = 0;
1416
+ build_run = 0;
1417
+ build_run_length = 1;
1418
+ build_num_runs = count;
1419
+ }
1420
+
1421
+ void Build(WindowDistinctAggregatorLocalState &ldastate);
1422
+
1423
+ protected:
1424
+ bool TryNextRun(idx_t &level_idx, idx_t &run_idx);
1425
+ void BuildRun(idx_t level_nr, idx_t i, WindowDistinctAggregatorLocalState &ldastate);
1095
1426
 
1427
+ WindowDistinctAggregatorGlobalState &gdastate;
1428
+ };
1429
+
1430
+ class WindowDistinctAggregatorGlobalState : public WindowAggregatorGlobalState {
1431
+ public:
1432
+ using GlobalSortStatePtr = unique_ptr<GlobalSortState>;
1433
+ using ZippedTuple = WindowDistinctSortTree::ZippedTuple;
1434
+ using ZippedElements = WindowDistinctSortTree::ZippedElements;
1435
+
1436
+ WindowDistinctAggregatorGlobalState(const WindowDistinctAggregator &aggregator, idx_t group_count);
1437
+
1438
+ //! Compute the block starts
1439
+ void MeasurePayloadBlocks();
1440
+ //! Patch up the previous index block boundaries
1441
+ void PatchPrevIdcs();
1442
+ bool TryPrepareNextStage(WindowDistinctAggregatorLocalState &lstate);
1443
+
1444
+ // Single threaded sorting for now
1445
+ ClientContext &context;
1446
+ idx_t memory_per_thread;
1447
+
1448
+ //! Finalize guard
1449
+ mutex lock;
1450
+ //! Finalize stage
1451
+ atomic<PartitionSortStage> stage;
1452
+ //! Tasks launched
1453
+ idx_t total_tasks = 0;
1454
+ //! Tasks launched
1455
+ idx_t tasks_assigned = 0;
1456
+ //! Tasks landed
1457
+ mutable atomic<idx_t> tasks_completed;
1458
+
1459
+ //! The sorted payload data types (partition index)
1460
+ vector<LogicalType> payload_types;
1461
+ //! The aggregate arguments + partition index
1462
+ vector<LogicalType> sort_types;
1463
+
1464
+ //! Sorting operations
1465
+ GlobalSortStatePtr global_sort;
1466
+ //! The block starts (the scanner doesn't know this) plus the total count
1467
+ vector<idx_t> block_starts;
1468
+
1469
+ //! The block boundary seconds
1470
+ mutable ZippedElements seconds;
1471
+ //! The MST with the distinct back pointers
1472
+ mutable MergeSortTree<ZippedTuple> zipped_tree;
1473
+ //! The merge sort tree for the aggregate.
1474
+ WindowDistinctSortTree merge_sort_tree;
1475
+
1476
+ //! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
1477
+ WindowAggregateStates levels_flat_native;
1478
+ //! For each level, the starting location in the levels_flat_native array
1479
+ vector<idx_t> levels_flat_start;
1480
+ };
1481
+
1482
+ WindowDistinctAggregatorGlobalState::WindowDistinctAggregatorGlobalState(const WindowDistinctAggregator &aggregator,
1483
+ idx_t group_count)
1484
+ : WindowAggregatorGlobalState(aggregator, group_count), context(aggregator.context),
1485
+ stage(PartitionSortStage::INIT), tasks_completed(0), merge_sort_tree(*this, group_count),
1486
+ levels_flat_native(aggregator.aggr) {
1096
1487
  payload_types.emplace_back(LogicalType::UBIGINT);
1097
- payload_chunk.Initialize(Allocator::DefaultAllocator(), payload_types);
1098
- }
1099
1488
 
1100
- WindowDistinctAggregator::~WindowDistinctAggregator() {
1101
- if (!aggr.function.destructor) {
1102
- // nothing to destroy
1103
- return;
1489
+ // 1: functionComputePrevIdcs(𝑖𝑛)
1490
+ // 2: sorted []
1491
+ // We sort the aggregate arguments and use the partition index as a tie-breaker.
1492
+ // TODO: Use a hash table?
1493
+ sort_types = aggregator.arg_types;
1494
+ for (const auto &type : payload_types) {
1495
+ sort_types.emplace_back(type);
1104
1496
  }
1105
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
1106
- // call the destructor for all the intermediate states
1107
- data_ptr_t address_data[STANDARD_VECTOR_SIZE];
1108
- Vector addresses(LogicalType::POINTER, data_ptr_cast(address_data));
1109
- idx_t count = 0;
1110
- for (idx_t i = 0; i < internal_nodes; i++) {
1111
- address_data[count++] = data_ptr_t(levels_flat_native.get() + i * state_size);
1112
- if (count == STANDARD_VECTOR_SIZE) {
1113
- aggr.function.destructor(addresses, aggr_input_data, count);
1114
- count = 0;
1115
- }
1497
+
1498
+ vector<BoundOrderByNode> orders;
1499
+ for (const auto &type : sort_types) {
1500
+ auto expr = make_uniq<BoundConstantExpression>(Value(type));
1501
+ orders.emplace_back(BoundOrderByNode(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(expr)));
1116
1502
  }
1117
- if (count > 0) {
1118
- aggr.function.destructor(addresses, aggr_input_data, count);
1503
+
1504
+ RowLayout payload_layout;
1505
+ payload_layout.Initialize(payload_types);
1506
+
1507
+ global_sort = make_uniq<GlobalSortState>(BufferManager::GetBufferManager(context), orders, payload_layout);
1508
+
1509
+ memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
1510
+
1511
+ // 6: prevIdcs ← []
1512
+ // 7: prevIdcs[0] ← “-”
1513
+ auto &prev_idcs = zipped_tree.Allocate(group_count);
1514
+
1515
+ // To handle FILTER clauses we make the missing elements
1516
+ // point to themselves so they won't be counted.
1517
+ for (idx_t i = 0; i < group_count; ++i) {
1518
+ prev_idcs[i] = ZippedTuple(i + 1, i);
1519
+ }
1520
+
1521
+ // compute space required to store aggregation states of merge sort tree
1522
+ // this is one aggregate state per entry per level
1523
+ idx_t internal_nodes = 0;
1524
+ levels_flat_start.push_back(internal_nodes);
1525
+ for (idx_t level_nr = 0; level_nr < zipped_tree.tree.size(); ++level_nr) {
1526
+ internal_nodes += zipped_tree.tree[level_nr].first.size();
1527
+ levels_flat_start.push_back(internal_nodes);
1528
+ }
1529
+ levels_flat_native.Initialize(internal_nodes);
1530
+
1531
+ merge_sort_tree.tree.reserve(zipped_tree.tree.size());
1532
+ for (idx_t level_nr = 0; level_nr < zipped_tree.tree.size(); ++level_nr) {
1533
+ auto &zipped_level = zipped_tree.tree[level_nr].first;
1534
+ WindowDistinctSortTree::Elements level;
1535
+ WindowDistinctSortTree::Offsets cascades;
1536
+ level.resize(zipped_level.size());
1537
+ merge_sort_tree.tree.emplace_back(std::move(level), std::move(cascades));
1119
1538
  }
1120
1539
  }
1121
1540
 
1122
- void WindowDistinctAggregator::Sink(DataChunk &arg_chunk, SelectionVector *filter_sel, idx_t filtered) {
1123
- WindowAggregator::Sink(arg_chunk, filter_sel, filtered);
1541
+ class WindowDistinctAggregatorLocalState : public WindowAggregatorState {
1542
+ public:
1543
+ explicit WindowDistinctAggregatorLocalState(const WindowDistinctAggregatorGlobalState &aggregator);
1544
+
1545
+ void Sink(DataChunk &arg_chunk, idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered);
1546
+ void Sorted();
1547
+ void ExecuteTask();
1548
+ void Evaluate(const WindowDistinctAggregatorGlobalState &gdstate, const DataChunk &bounds, Vector &result,
1549
+ idx_t count, idx_t row_idx);
1550
+
1551
+ //! Thread-local sorting data
1552
+ LocalSortState local_sort;
1553
+ //! Finalize stage
1554
+ PartitionSortStage stage = PartitionSortStage::INIT;
1555
+ //! Finalize scan block index
1556
+ idx_t block_idx;
1557
+ //! Thread-local tree aggregation
1558
+ Vector update_v;
1559
+ Vector source_v;
1560
+ Vector target_v;
1561
+ DataChunk leaves;
1562
+ SelectionVector sel;
1124
1563
 
1125
- // We sort the arguments and use the partition index as a tie-breaker.
1126
- // TODO: Use a hash table?
1127
- if (!global_sort) {
1128
- // 1: functionComputePrevIdcs(𝑖𝑛)
1129
- // 2: sorted ← []
1130
- vector<LogicalType> sort_types;
1131
- for (const auto &col : arg_chunk.data) {
1132
- sort_types.emplace_back(col.GetType());
1133
- }
1564
+ protected:
1565
+ //! Flush the accumulated intermediate states into the result states
1566
+ void FlushStates();
1134
1567
 
1135
- for (const auto &type : payload_types) {
1136
- sort_types.emplace_back(type);
1137
- }
1568
+ //! The aggregator we are working with
1569
+ const WindowDistinctAggregatorGlobalState &gastate;
1570
+ DataChunk sort_chunk;
1571
+ DataChunk payload_chunk;
1572
+ //! Reused result state container for the window functions
1573
+ WindowAggregateStates statef;
1574
+ //! A vector of pointers to "state", used for buffering intermediate aggregates
1575
+ Vector statep;
1576
+ //! Reused state pointers for combining tree elements
1577
+ Vector statel;
1578
+ //! Count of buffered values
1579
+ idx_t flush_count;
1580
+ //! The frame boundaries, used for the window functions
1581
+ SubFrames frames;
1582
+ };
1138
1583
 
1139
- vector<BoundOrderByNode> orders;
1140
- for (const auto &type : sort_types) {
1141
- auto expr = make_uniq<BoundConstantExpression>(Value(type));
1142
- orders.emplace_back(BoundOrderByNode(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(expr)));
1143
- }
1584
+ WindowDistinctAggregatorLocalState::WindowDistinctAggregatorLocalState(
1585
+ const WindowDistinctAggregatorGlobalState &gastate)
1586
+ : update_v(LogicalType::POINTER), source_v(LogicalType::POINTER), target_v(LogicalType::POINTER), gastate(gastate),
1587
+ statef(gastate.aggregator.aggr), statep(LogicalType::POINTER), statel(LogicalType::POINTER), flush_count(0) {
1588
+ InitSubFrames(frames, gastate.aggregator.exclude_mode);
1589
+ payload_chunk.Initialize(Allocator::DefaultAllocator(), gastate.payload_types);
1144
1590
 
1145
- RowLayout payload_layout;
1146
- payload_layout.Initialize(payload_types);
1591
+ auto &global_sort = gastate.global_sort;
1592
+ local_sort.Initialize(*global_sort, global_sort->buffer_manager);
1147
1593
 
1148
- global_sort = make_uniq<GlobalSortState>(BufferManager::GetBufferManager(context), orders, payload_layout);
1149
- local_sort.Initialize(*global_sort, global_sort->buffer_manager);
1594
+ sort_chunk.Initialize(Allocator::DefaultAllocator(), gastate.sort_types);
1595
+ sort_chunk.data.back().Reference(payload_chunk.data[0]);
1150
1596
 
1151
- sort_chunk.Initialize(Allocator::DefaultAllocator(), sort_types);
1152
- sort_chunk.data.back().Reference(payload_chunk.data[0]);
1153
- payload_pos = 0;
1154
- memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
1155
- }
1597
+ //! Input data chunk, used for leaf segment aggregation
1598
+ leaves.Initialize(Allocator::DefaultAllocator(), gastate.inputs.GetTypes());
1599
+ sel.Initialize();
1600
+
1601
+ gastate.locals++;
1602
+ }
1603
+
1604
+ unique_ptr<WindowAggregatorState> WindowDistinctAggregator::GetGlobalState(idx_t group_count,
1605
+ const ValidityMask &partition_mask) const {
1606
+ return make_uniq<WindowDistinctAggregatorGlobalState>(*this, group_count);
1607
+ }
1608
+
1609
+ void WindowDistinctAggregator::Sink(WindowAggregatorState &gsink, WindowAggregatorState &lstate, DataChunk &arg_chunk,
1610
+ idx_t input_idx, optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
1611
+ WindowAggregator::Sink(gsink, lstate, arg_chunk, input_idx, filter_sel, filtered);
1156
1612
 
1613
+ auto &ldstate = lstate.Cast<WindowDistinctAggregatorLocalState>();
1614
+ ldstate.Sink(arg_chunk, input_idx, filter_sel, filtered);
1615
+ }
1616
+
1617
+ void WindowDistinctAggregatorLocalState::Sink(DataChunk &arg_chunk, idx_t input_idx,
1618
+ optional_ptr<SelectionVector> filter_sel, idx_t filtered) {
1157
1619
  // 3: for i ← 0 to in.size do
1158
1620
  // 4: sorted[i] ← (in[i], i)
1159
1621
  const auto count = arg_chunk.size();
1160
1622
  payload_chunk.Reset();
1161
1623
  auto &sorted_vec = payload_chunk.data[0];
1162
1624
  auto sorted = FlatVector::GetData<idx_t>(sorted_vec);
1163
- std::iota(sorted, sorted + count, payload_pos);
1164
- payload_pos += count;
1625
+ std::iota(sorted, sorted + count, input_idx);
1165
1626
 
1166
1627
  for (column_t c = 0; c < arg_chunk.ColumnCount(); ++c) {
1167
1628
  sort_chunk.data[c].Reference(arg_chunk.data[c]);
@@ -1178,61 +1639,178 @@ void WindowDistinctAggregator::Sink(DataChunk &arg_chunk, SelectionVector *filte
1178
1639
 
1179
1640
  local_sort.SinkChunk(sort_chunk, payload_chunk);
1180
1641
 
1181
- if (local_sort.SizeInBytes() > memory_per_thread) {
1182
- local_sort.Sort(*global_sort, true);
1642
+ if (local_sort.SizeInBytes() > gastate.memory_per_thread) {
1643
+ local_sort.Sort(*gastate.global_sort, true);
1183
1644
  }
1184
1645
  }
1185
1646
 
1186
- class WindowDistinctAggregator::DistinctSortTree : public MergeSortTree<idx_t, idx_t> {
1187
- public:
1188
- // prev_idx, input_idx
1189
- using ZippedTuple = std::tuple<idx_t, idx_t>;
1190
- using ZippedElements = vector<ZippedTuple>;
1647
+ void WindowDistinctAggregatorLocalState::ExecuteTask() {
1648
+ auto &global_sort = *gastate.global_sort;
1649
+ switch (stage) {
1650
+ case PartitionSortStage::INIT:
1651
+ // AddLocalState is thread-safe
1652
+ global_sort.AddLocalState(local_sort);
1653
+ break;
1654
+ case PartitionSortStage::MERGE: {
1655
+ MergeSorter merge_sorter(global_sort, global_sort.buffer_manager);
1656
+ merge_sorter.PerformInMergeRound();
1657
+ break;
1658
+ }
1659
+ case PartitionSortStage::SORTED:
1660
+ Sorted();
1661
+ break;
1662
+ default:
1663
+ break;
1664
+ }
1191
1665
 
1192
- DistinctSortTree(ZippedElements &&prev_idcs, WindowDistinctAggregator &wda);
1193
- };
1666
+ ++gastate.tasks_completed;
1667
+ }
1194
1668
 
1195
- void WindowDistinctAggregator::Finalize(const FrameStats &stats) {
1196
- // 5: Sort sorted lexicographically increasing
1197
- global_sort->AddLocalState(local_sort);
1198
- global_sort->PrepareMergePhase();
1199
- while (global_sort->sorted_blocks.size() > 1) {
1669
+ void WindowDistinctAggregatorGlobalState::MeasurePayloadBlocks() {
1670
+ const auto &blocks = global_sort->sorted_blocks[0]->payload_data->data_blocks;
1671
+ idx_t count = 0;
1672
+ for (const auto &block : blocks) {
1673
+ block_starts.emplace_back(count);
1674
+ count += block->count;
1675
+ }
1676
+ block_starts.emplace_back(count);
1677
+ }
1678
+
1679
+ bool WindowDistinctAggregatorGlobalState::TryPrepareNextStage(WindowDistinctAggregatorLocalState &lstate) {
1680
+ lock_guard<mutex> stage_guard(lock);
1681
+
1682
+ switch (stage.load()) {
1683
+ case PartitionSortStage::INIT:
1684
+ // Wait for all the local sorts to be processed
1685
+ if (tasks_completed < locals) {
1686
+ return false;
1687
+ }
1688
+ global_sort->PrepareMergePhase();
1689
+ if (!(global_sort->sorted_blocks.size() / 2)) {
1690
+ if (global_sort->sorted_blocks.empty()) {
1691
+ lstate.stage = stage = PartitionSortStage::FINISHED;
1692
+ return true;
1693
+ }
1694
+ MeasurePayloadBlocks();
1695
+ seconds.resize(block_starts.size() - 1);
1696
+ total_tasks = seconds.size();
1697
+ tasks_completed = 0;
1698
+ tasks_assigned = 0;
1699
+ lstate.stage = stage = PartitionSortStage::SORTED;
1700
+ lstate.block_idx = tasks_assigned++;
1701
+ return true;
1702
+ }
1200
1703
  global_sort->InitializeMergeRound();
1201
- MergeSorter merge_sorter(*global_sort, global_sort->buffer_manager);
1202
- merge_sorter.PerformInMergeRound();
1704
+ lstate.stage = stage = PartitionSortStage::MERGE;
1705
+ total_tasks = locals;
1706
+ tasks_assigned = 1;
1707
+ tasks_completed = 0;
1708
+ return true;
1709
+ case PartitionSortStage::MERGE:
1710
+ if (tasks_assigned < total_tasks) {
1711
+ lstate.stage = PartitionSortStage::MERGE;
1712
+ ++tasks_assigned;
1713
+ return true;
1714
+ } else if (tasks_completed < tasks_assigned) {
1715
+ return false;
1716
+ }
1203
1717
  global_sort->CompleteMergeRound(true);
1718
+ if (!(global_sort->sorted_blocks.size() / 2)) {
1719
+ MeasurePayloadBlocks();
1720
+ seconds.resize(block_starts.size() - 1);
1721
+ total_tasks = seconds.size();
1722
+ tasks_completed = 0;
1723
+ tasks_assigned = 0;
1724
+ lstate.stage = stage = PartitionSortStage::SORTED;
1725
+ lstate.block_idx = tasks_assigned++;
1726
+ return true;
1727
+ }
1728
+ global_sort->InitializeMergeRound();
1729
+ lstate.stage = PartitionSortStage::MERGE;
1730
+ total_tasks = locals;
1731
+ tasks_assigned = 1;
1732
+ tasks_completed = 0;
1733
+ return true;
1734
+ case PartitionSortStage::SORTED:
1735
+ if (tasks_assigned < total_tasks) {
1736
+ lstate.stage = PartitionSortStage::SORTED;
1737
+ lstate.block_idx = tasks_assigned++;
1738
+ return true;
1739
+ } else if (tasks_completed < tasks_assigned) {
1740
+ lstate.stage = PartitionSortStage::FINISHED;
1741
+ // Sleep while other tasks finish
1742
+ return false;
1743
+ }
1744
+ // Last task patches the boundaries
1745
+ PatchPrevIdcs();
1746
+ break;
1747
+ default:
1748
+ break;
1204
1749
  }
1205
1750
 
1206
- DataChunk scan_chunk;
1207
- scan_chunk.Initialize(Allocator::DefaultAllocator(), payload_types);
1751
+ lstate.stage = stage = PartitionSortStage::FINISHED;
1208
1752
 
1209
- auto scanner = make_uniq<PayloadScanner>(*global_sort);
1210
- const auto in_size = scanner->Remaining();
1211
- scanner->Scan(scan_chunk);
1212
- idx_t scan_idx = 0;
1753
+ return true;
1754
+ }
1213
1755
 
1214
- // 6: prevIdcs []
1215
- // 7: prevIdcs[0] “-”
1216
- const auto count = inputs.size();
1217
- using ZippedTuple = DistinctSortTree::ZippedTuple;
1218
- DistinctSortTree::ZippedElements prev_idcs;
1219
- prev_idcs.resize(count);
1756
+ void WindowDistinctAggregator::Finalize(WindowAggregatorState &gsink, WindowAggregatorState &lstate,
1757
+ const FrameStats &stats) {
1758
+ auto &gdsink = gsink.Cast<WindowDistinctAggregatorGlobalState>();
1759
+ auto &ldstate = lstate.Cast<WindowDistinctAggregatorLocalState>();
1220
1760
 
1221
- // To handle FILTER clauses we make the missing elements
1222
- // point to themselves so they won't be counted.
1223
- if (in_size < count) {
1224
- for (idx_t i = 0; i < count; ++i) {
1225
- prev_idcs[i] = ZippedTuple(i + 1, i);
1761
+ // 5: Sort sorted lexicographically increasing
1762
+ ldstate.ExecuteTask();
1763
+
1764
+ // Merge in parallel
1765
+ while (gdsink.stage.load() != PartitionSortStage::FINISHED) {
1766
+ if (gdsink.TryPrepareNextStage(ldstate)) {
1767
+ ldstate.ExecuteTask();
1768
+ } else {
1769
+ std::this_thread::yield();
1226
1770
  }
1227
1771
  }
1228
1772
 
1773
+ // These are a parallel implementations,
1774
+ // so every thread can call them.
1775
+ gdsink.zipped_tree.Build();
1776
+ gdsink.merge_sort_tree.Build(ldstate);
1777
+
1778
+ ++gdsink.finalized;
1779
+ }
1780
+
1781
+ void WindowDistinctAggregatorLocalState::Sorted() {
1782
+ using ZippedTuple = WindowDistinctAggregatorGlobalState::ZippedTuple;
1783
+ auto &global_sort = gastate.global_sort;
1784
+ auto &prev_idcs = gastate.zipped_tree.LowestLevel();
1785
+ auto &aggregator = gastate.aggregator;
1786
+ auto &scan_chunk = payload_chunk;
1787
+
1788
+ auto scanner = make_uniq<PayloadScanner>(*global_sort, block_idx);
1789
+ const auto in_size = gastate.block_starts.at(block_idx + 1);
1790
+ scanner->Scan(scan_chunk);
1791
+ idx_t scan_idx = 0;
1792
+
1229
1793
  auto *input_idx = FlatVector::GetData<idx_t>(scan_chunk.data[0]);
1230
- auto i = input_idx[scan_idx++];
1231
- prev_idcs[i] = ZippedTuple(0, i);
1794
+ idx_t i = 0;
1232
1795
 
1233
1796
  SBIterator curr(*global_sort, ExpressionType::COMPARE_LESSTHAN);
1234
1797
  SBIterator prev(*global_sort, ExpressionType::COMPARE_LESSTHAN);
1235
- auto prefix_layout = global_sort->sort_layout.GetPrefixComparisonLayout(sort_chunk.ColumnCount() - 1);
1798
+ auto prefix_layout = global_sort->sort_layout.GetPrefixComparisonLayout(aggregator.arg_types.size());
1799
+
1800
+ const auto block_begin = gastate.block_starts.at(block_idx);
1801
+ if (!block_begin) {
1802
+ // First block, so set up initial sentinel
1803
+ i = input_idx[scan_idx++];
1804
+ prev_idcs[i] = ZippedTuple(0, i);
1805
+ std::get<0>(gastate.seconds[block_idx]) = i;
1806
+ } else {
1807
+ // Move to the to end of the previous block
1808
+ // so we can record the comparison result for the first row
1809
+ curr.SetIndex(block_begin - 1);
1810
+ prev.SetIndex(block_begin - 1);
1811
+ scan_idx = 0;
1812
+ std::get<0>(gastate.seconds[block_idx]) = input_idx[scan_idx];
1813
+ }
1236
1814
 
1237
1815
  // 8: for i ← 1 to in.size do
1238
1816
  for (++curr; curr.GetIndex() < in_size; ++curr, ++prev) {
@@ -1265,105 +1843,148 @@ void WindowDistinctAggregator::Finalize(const FrameStats &stats) {
1265
1843
  prev_idcs[i] = ZippedTuple(0, i);
1266
1844
  }
1267
1845
  }
1846
+
1847
+ // Save the last value of i for patching up the block boundaries
1848
+ std::get<1>(gastate.seconds[block_idx]) = i;
1849
+ }
1850
+
1851
+ void WindowDistinctAggregatorGlobalState::PatchPrevIdcs() {
1268
1852
  // 13: return prevIdcs
1269
1853
 
1270
- merge_sort_tree = make_uniq<DistinctSortTree>(std::move(prev_idcs), *this);
1854
+ // Patch up the indices at block boundaries
1855
+ // (We don't need to patch block 0.)
1856
+ auto &prev_idcs = zipped_tree.LowestLevel();
1857
+ for (idx_t block_idx = 1; block_idx < seconds.size(); ++block_idx) {
1858
+ // We only need to patch if the first index in the block
1859
+ // was a back link to the previous block (10:)
1860
+ auto i = std::get<0>(seconds.at(block_idx));
1861
+ if (std::get<0>(prev_idcs[i])) {
1862
+ auto second = std::get<1>(seconds.at(block_idx - 1));
1863
+ prev_idcs[i] = ZippedTuple(second + 1, i);
1864
+ }
1865
+ }
1866
+ }
1867
+
1868
+ bool WindowDistinctSortTree::TryNextRun(idx_t &level_idx, idx_t &run_idx) {
1869
+ const auto fanout = FANOUT;
1870
+
1871
+ lock_guard<mutex> stage_guard(build_lock);
1872
+
1873
+ // Verify we are not done
1874
+ if (build_level >= tree.size()) {
1875
+ return false;
1876
+ }
1877
+
1878
+ // Finished with this level?
1879
+ if (build_complete >= build_num_runs) {
1880
+ auto &zipped_tree = gdastate.zipped_tree;
1881
+ std::swap(tree[build_level].second, zipped_tree.tree[build_level].second);
1882
+
1883
+ ++build_level;
1884
+ if (build_level >= tree.size()) {
1885
+ zipped_tree.tree.clear();
1886
+ return false;
1887
+ }
1888
+
1889
+ const auto count = LowestLevel().size();
1890
+ build_run_length *= fanout;
1891
+ build_num_runs = (count + build_run_length - 1) / build_run_length;
1892
+ build_run = 0;
1893
+ build_complete = 0;
1894
+ }
1895
+
1896
+ // If all runs are in flight,
1897
+ // yield until the next level is ready
1898
+ if (build_run >= build_num_runs) {
1899
+ return false;
1900
+ }
1901
+
1902
+ level_idx = build_level;
1903
+ run_idx = build_run++;
1904
+
1905
+ return true;
1906
+ }
1907
+
1908
+ void WindowDistinctSortTree::Build(WindowDistinctAggregatorLocalState &ldastate) {
1909
+ // Fan in parent levels until we are at the top
1910
+ // Note that we don't build the top layer as that would just be all the data.
1911
+ while (build_level.load() < tree.size()) {
1912
+ idx_t level_idx;
1913
+ idx_t run_idx;
1914
+ if (TryNextRun(level_idx, run_idx)) {
1915
+ BuildRun(level_idx, run_idx, ldastate);
1916
+ } else {
1917
+ std::this_thread::yield();
1918
+ }
1919
+ }
1271
1920
  }
1272
1921
 
1273
- WindowDistinctAggregator::DistinctSortTree::DistinctSortTree(ZippedElements &&prev_idcs,
1274
- WindowDistinctAggregator &wda) {
1275
- auto &inputs = wda.inputs;
1276
- auto &aggr = wda.aggr;
1277
- auto &allocator = wda.allocator;
1278
- const auto state_size = wda.state_size;
1279
- auto &internal_nodes = wda.internal_nodes;
1280
- auto &levels_flat_native = wda.levels_flat_native;
1281
- auto &levels_flat_start = wda.levels_flat_start;
1922
+ void WindowDistinctSortTree::BuildRun(idx_t level_nr, idx_t run_idx, WindowDistinctAggregatorLocalState &ldastate) {
1923
+ auto &aggr = gdastate.aggregator.aggr;
1924
+ auto &allocator = gdastate.allocator;
1925
+ auto &inputs = gdastate.inputs;
1926
+ auto &levels_flat_native = gdastate.levels_flat_native;
1282
1927
 
1283
1928
  //! Input data chunk, used for leaf segment aggregation
1284
- DataChunk leaves;
1285
- leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes());
1286
- SelectionVector sel;
1287
- sel.Initialize();
1929
+ auto &leaves = ldastate.leaves;
1930
+ auto &sel = ldastate.sel;
1288
1931
 
1289
1932
  AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
1290
1933
 
1291
1934
  //! The states to update
1292
- Vector update_v(LogicalType::POINTER);
1935
+ auto &update_v = ldastate.update_v;
1293
1936
  auto updates = FlatVector::GetData<data_ptr_t>(update_v);
1294
- idx_t nupdate = 0;
1295
1937
 
1296
- Vector source_v(LogicalType::POINTER);
1938
+ auto &source_v = ldastate.source_v;
1297
1939
  auto sources = FlatVector::GetData<data_ptr_t>(source_v);
1298
- Vector target_v(LogicalType::POINTER);
1940
+ auto &target_v = ldastate.target_v;
1299
1941
  auto targets = FlatVector::GetData<data_ptr_t>(target_v);
1300
- idx_t ncombine = 0;
1301
-
1302
- // compute space required to store aggregation states of merge sort tree
1303
- // this is one aggregate state per entry per level
1304
- MergeSortTree<ZippedTuple> zipped_tree(std::move(prev_idcs));
1305
- internal_nodes = 0;
1306
- for (idx_t level_nr = 0; level_nr < zipped_tree.tree.size(); ++level_nr) {
1307
- internal_nodes += zipped_tree.tree[level_nr].first.size();
1308
- }
1309
- levels_flat_native = make_unsafe_uniq_array<data_t>(internal_nodes * state_size);
1310
- levels_flat_start.push_back(0);
1311
- idx_t levels_flat_offset = 0;
1312
1942
 
1313
- // Walk the distinct value tree building the intermediate aggregates
1314
- tree.reserve(zipped_tree.tree.size());
1315
- idx_t level_width = 1;
1316
- for (idx_t level_nr = 0; level_nr < zipped_tree.tree.size(); ++level_nr) {
1317
- auto &zipped_level = zipped_tree.tree[level_nr].first;
1318
- vector<ElementType> level;
1319
- level.reserve(zipped_level.size());
1320
-
1321
- for (idx_t i = 0; i < zipped_level.size(); i += level_width) {
1322
- // Reset the combine state
1323
- data_ptr_t prev_state = nullptr;
1324
- auto next_limit = MinValue<idx_t>(zipped_level.size(), i + level_width);
1325
- for (auto j = i; j < next_limit; ++j) {
1326
- // Initialise the next aggregate
1327
- auto curr_state = levels_flat_native.get() + (levels_flat_offset++ * state_size);
1328
- aggr.function.initialize(curr_state);
1329
-
1330
- // Update this state (if it matches)
1331
- const auto prev_idx = std::get<0>(zipped_level[j]);
1332
- level.emplace_back(prev_idx);
1333
- if (prev_idx < i + 1) {
1334
- updates[nupdate] = curr_state;
1335
- // input_idx
1336
- sel[nupdate] = UnsafeNumericCast<sel_t>(std::get<1>(zipped_level[j]));
1337
- ++nupdate;
1338
- }
1943
+ auto &zipped_tree = gdastate.zipped_tree;
1944
+ auto &zipped_level = zipped_tree.tree[level_nr].first;
1945
+ auto &level = tree[level_nr].first;
1339
1946
 
1340
- // Merge the previous state (if any)
1341
- if (prev_state) {
1342
- sources[ncombine] = prev_state;
1343
- targets[ncombine] = curr_state;
1344
- ++ncombine;
1345
- }
1346
- prev_state = curr_state;
1347
-
1348
- // Flush the states if one is maxed out.
1349
- if (MaxValue<idx_t>(ncombine, nupdate) >= STANDARD_VECTOR_SIZE) {
1350
- // Push the updates first so they propagate
1351
- leaves.Reference(inputs);
1352
- leaves.Slice(sel, nupdate);
1353
- aggr.function.update(leaves.data.data(), aggr_input_data, leaves.ColumnCount(), update_v, nupdate);
1354
- nupdate = 0;
1355
-
1356
- // Combine the states sequentially
1357
- aggr.function.combine(source_v, target_v, aggr_input_data, ncombine);
1358
- ncombine = 0;
1359
- }
1360
- }
1947
+ // Reset the combine state
1948
+ idx_t nupdate = 0;
1949
+ idx_t ncombine = 0;
1950
+ data_ptr_t prev_state = nullptr;
1951
+ idx_t i = run_idx * build_run_length;
1952
+ auto next_limit = MinValue<idx_t>(zipped_level.size(), i + build_run_length);
1953
+ idx_t levels_flat_offset = level_nr * zipped_level.size() + i;
1954
+ for (auto j = i; j < next_limit; ++j) {
1955
+ // Initialise the next aggregate
1956
+ auto curr_state = levels_flat_native.GetStatePtr(levels_flat_offset++);
1957
+
1958
+ // Update this state (if it matches)
1959
+ const auto prev_idx = std::get<0>(zipped_level[j]);
1960
+ level[j] = prev_idx;
1961
+ if (prev_idx < i + 1) {
1962
+ updates[nupdate] = curr_state;
1963
+ // input_idx
1964
+ sel[nupdate] = UnsafeNumericCast<sel_t>(std::get<1>(zipped_level[j]));
1965
+ ++nupdate;
1361
1966
  }
1362
1967
 
1363
- tree.emplace_back(std::move(level), std::move(zipped_tree.tree[level_nr].second));
1364
-
1365
- levels_flat_start.push_back(levels_flat_offset);
1366
- level_width *= FANOUT;
1968
+ // Merge the previous state (if any)
1969
+ if (prev_state) {
1970
+ sources[ncombine] = prev_state;
1971
+ targets[ncombine] = curr_state;
1972
+ ++ncombine;
1973
+ }
1974
+ prev_state = curr_state;
1975
+
1976
+ // Flush the states if one is maxed out.
1977
+ if (MaxValue<idx_t>(ncombine, nupdate) >= STANDARD_VECTOR_SIZE) {
1978
+ // Push the updates first so they propagate
1979
+ leaves.Reference(inputs);
1980
+ leaves.Slice(sel, nupdate);
1981
+ aggr.function.update(leaves.data.data(), aggr_input_data, leaves.ColumnCount(), update_v, nupdate);
1982
+ nupdate = 0;
1983
+
1984
+ // Combine the states sequentially
1985
+ aggr.function.combine(source_v, target_v, aggr_input_data, ncombine);
1986
+ ncombine = 0;
1987
+ }
1367
1988
  }
1368
1989
 
1369
1990
  // Flush any remaining states
@@ -1378,64 +1999,16 @@ WindowDistinctAggregator::DistinctSortTree::DistinctSortTree(ZippedElements &&pr
1378
1999
  aggr.function.combine(source_v, target_v, aggr_input_data, ncombine);
1379
2000
  ncombine = 0;
1380
2001
  }
1381
- }
1382
-
1383
- class WindowDistinctState : public WindowAggregatorState {
1384
- public:
1385
- WindowDistinctState(const AggregateObject &aggr, const DataChunk &inputs, const WindowDistinctAggregator &tree);
1386
-
1387
- void Evaluate(const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
1388
-
1389
- protected:
1390
- //! Flush the accumulated intermediate states into the result states
1391
- void FlushStates();
1392
-
1393
- //! The aggregate function
1394
- const AggregateObject &aggr;
1395
- //! The aggregate function
1396
- const DataChunk &inputs;
1397
- //! The merge sort tree data
1398
- const WindowDistinctAggregator &tree;
1399
- //! The size of a single aggregate state
1400
- const idx_t state_size;
1401
- //! Data pointer that contains a vector of states, used for row aggregation
1402
- vector<data_t> state;
1403
- //! Reused result state container for the window functions
1404
- Vector statef;
1405
- //! A vector of pointers to "state", used for buffering intermediate aggregates
1406
- Vector statep;
1407
- //! Reused state pointers for combining tree elements
1408
- Vector statel;
1409
- //! Count of buffered values
1410
- idx_t flush_count;
1411
- //! The frame boundaries, used for the window functions
1412
- SubFrames frames;
1413
- };
1414
2002
 
1415
- WindowDistinctState::WindowDistinctState(const AggregateObject &aggr, const DataChunk &inputs,
1416
- const WindowDistinctAggregator &tree)
1417
- : aggr(aggr), inputs(inputs), tree(tree), state_size(aggr.function.state_size()),
1418
- state((state_size * STANDARD_VECTOR_SIZE)), statef(LogicalType::POINTER), statep(LogicalType::POINTER),
1419
- statel(LogicalType::POINTER), flush_count(0) {
1420
- InitSubFrames(frames, tree.exclude_mode);
1421
-
1422
- // Build the finalise vector that just points to the result states
1423
- data_ptr_t state_ptr = state.data();
1424
- D_ASSERT(statef.GetVectorType() == VectorType::FLAT_VECTOR);
1425
- statef.SetVectorType(VectorType::CONSTANT_VECTOR);
1426
- statef.Flatten(STANDARD_VECTOR_SIZE);
1427
- auto fdata = FlatVector::GetData<data_ptr_t>(statef);
1428
- for (idx_t i = 0; i < STANDARD_VECTOR_SIZE; ++i) {
1429
- fdata[i] = state_ptr;
1430
- state_ptr += state_size;
1431
- }
2003
+ ++build_complete;
1432
2004
  }
1433
2005
 
1434
- void WindowDistinctState::FlushStates() {
2006
+ void WindowDistinctAggregatorLocalState::FlushStates() {
1435
2007
  if (!flush_count) {
1436
2008
  return;
1437
2009
  }
1438
2010
 
2011
+ const auto &aggr = gastate.aggregator.aggr;
1439
2012
  AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
1440
2013
  statel.Verify(flush_count);
1441
2014
  aggr.function.combine(statel, statep, aggr_input_data, flush_count);
@@ -1443,17 +2016,20 @@ void WindowDistinctState::FlushStates() {
1443
2016
  flush_count = 0;
1444
2017
  }
1445
2018
 
1446
- void WindowDistinctState::Evaluate(const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1447
- auto fdata = FlatVector::GetData<data_ptr_t>(statef);
1448
- auto ldata = FlatVector::GetData<data_ptr_t>(statel);
2019
+ void WindowDistinctAggregatorLocalState::Evaluate(const WindowDistinctAggregatorGlobalState &gdstate,
2020
+ const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
2021
+ auto ldata = FlatVector::GetData<const_data_ptr_t>(statel);
1449
2022
  auto pdata = FlatVector::GetData<data_ptr_t>(statep);
1450
2023
 
1451
- const auto &merge_sort_tree = *tree.merge_sort_tree;
1452
- const auto running_aggs = tree.levels_flat_native.get();
2024
+ const auto &merge_sort_tree = gdstate.merge_sort_tree;
2025
+ const auto &levels_flat_native = gdstate.levels_flat_native;
2026
+ const auto exclude_mode = gdstate.aggregator.exclude_mode;
1453
2027
 
1454
- EvaluateSubFrames(bounds, tree.exclude_mode, count, row_idx, frames, [&](idx_t rid) {
1455
- auto agg_state = fdata[rid];
1456
- aggr.function.initialize(agg_state);
2028
+ // Build the finalise vector that just points to the result states
2029
+ statef.Initialize(count);
2030
+
2031
+ EvaluateSubFrames(bounds, exclude_mode, count, row_idx, frames, [&](idx_t rid) {
2032
+ auto agg_state = statef.GetStatePtr(rid);
1457
2033
 
1458
2034
  // TODO: Extend AggregateLowerBound to handle subframes, just like SelectNth.
1459
2035
  const auto lower = frames[0].start;
@@ -1463,8 +2039,8 @@ void WindowDistinctState::Evaluate(const DataChunk &bounds, Vector &result, idx_
1463
2039
  if (run_pos != run_begin) {
1464
2040
  // Find the source aggregate
1465
2041
  // Buffer a merge of the indicated state into the current state
1466
- const auto agg_idx = tree.levels_flat_start[level] + run_pos - 1;
1467
- const auto running_agg = running_aggs + agg_idx * state_size;
2042
+ const auto agg_idx = gdstate.levels_flat_start[level] + run_pos - 1;
2043
+ const auto running_agg = levels_flat_native.GetStatePtr(agg_idx);
1468
2044
  pdata[flush_count] = agg_state;
1469
2045
  ldata[flush_count++] = running_agg;
1470
2046
  if (flush_count >= STANDARD_VECTOR_SIZE) {
@@ -1478,23 +2054,20 @@ void WindowDistinctState::Evaluate(const DataChunk &bounds, Vector &result, idx_
1478
2054
  FlushStates();
1479
2055
 
1480
2056
  // Finalise the result aggregates and write to the result
1481
- AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator);
1482
- aggr.function.finalize(statef, aggr_input_data, result, count, 0);
1483
-
1484
- // Destruct the result aggregates
1485
- if (aggr.function.destructor) {
1486
- aggr.function.destructor(statef, aggr_input_data, count);
1487
- }
2057
+ statef.Finalize(result);
2058
+ statef.Destroy();
1488
2059
  }
1489
2060
 
1490
- unique_ptr<WindowAggregatorState> WindowDistinctAggregator::GetLocalState() const {
1491
- return make_uniq<WindowDistinctState>(aggr, inputs, *this);
2061
+ unique_ptr<WindowAggregatorState> WindowDistinctAggregator::GetLocalState(const WindowAggregatorState &gstate) const {
2062
+ return make_uniq<WindowDistinctAggregatorLocalState>(gstate.Cast<const WindowDistinctAggregatorGlobalState>());
1492
2063
  }
1493
2064
 
1494
- void WindowDistinctAggregator::Evaluate(WindowAggregatorState &lstate, const DataChunk &bounds, Vector &result,
1495
- idx_t count, idx_t row_idx) const {
1496
- auto &ldstate = lstate.Cast<WindowDistinctState>();
1497
- ldstate.Evaluate(bounds, result, count, row_idx);
2065
+ void WindowDistinctAggregator::Evaluate(const WindowAggregatorState &gsink, WindowAggregatorState &lstate,
2066
+ const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) const {
2067
+
2068
+ const auto &gdstate = gsink.Cast<WindowDistinctAggregatorGlobalState>();
2069
+ auto &ldstate = lstate.Cast<WindowDistinctAggregatorLocalState>();
2070
+ ldstate.Evaluate(gdstate, bounds, result, count, row_idx);
1498
2071
  }
1499
2072
 
1500
2073
  } // namespace duckdb