duckdb 0.8.2-dev145.0 → 0.8.2-dev1493.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (476) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +42 -5
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/file_system.cpp +19 -0
  61. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  62. package/src/duckdb/src/common/local_file_system.cpp +2 -2
  63. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  64. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  65. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  66. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  67. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  68. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  69. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  70. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  71. package/src/duckdb/src/common/types/bit.cpp +51 -0
  72. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  73. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  74. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  75. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  76. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  77. package/src/duckdb/src/common/types/date.cpp +9 -0
  78. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  79. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  80. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  81. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  82. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  83. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  84. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  85. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  86. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  88. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  92. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  93. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  94. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  95. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  96. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  97. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  98. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  99. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  100. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  102. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  103. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  104. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  105. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  106. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  107. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  108. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  109. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  110. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  111. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  112. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  113. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  114. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  115. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  116. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  117. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  118. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  119. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  121. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
  122. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  123. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  124. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  125. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  126. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  127. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  128. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  129. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  130. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  131. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  132. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  133. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  134. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  135. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  136. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  137. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  138. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  139. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  140. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  141. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  142. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  143. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  144. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  145. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  146. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  147. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  148. package/src/duckdb/src/function/function.cpp +3 -1
  149. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  150. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  151. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  152. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  153. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  154. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  155. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  156. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  157. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  158. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  159. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  160. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  161. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  162. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  163. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  164. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  165. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  166. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  168. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  169. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  170. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  171. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  172. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  173. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  174. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  175. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  176. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  177. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  178. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  179. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  180. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  181. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  182. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  183. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  184. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  185. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  186. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  187. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  188. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  189. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  190. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  191. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  192. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  193. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  194. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  195. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  196. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  197. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  198. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  199. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  200. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  201. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  203. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  204. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  205. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  206. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  207. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  210. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  211. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  213. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  214. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  218. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  219. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  220. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  221. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  222. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  223. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  224. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  225. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  226. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  227. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  228. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  229. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  230. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  231. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  232. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  233. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  234. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  235. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  236. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  237. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  238. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  239. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  240. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  241. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  242. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  243. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  244. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  245. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  246. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  247. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  248. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  249. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  250. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  251. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  252. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  253. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  254. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  255. package/src/duckdb/src/include/duckdb/main/settings.hpp +30 -1
  256. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  257. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  258. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  259. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  260. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  261. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  262. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  263. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  264. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  265. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  266. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  267. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  268. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  269. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  270. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  271. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  272. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  273. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  274. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  275. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  277. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  278. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  279. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  280. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  281. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  282. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  283. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  284. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  285. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  286. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  287. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  288. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  289. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  291. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  292. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  293. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  294. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  295. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  296. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  297. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  298. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  299. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  300. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  301. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  302. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  303. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  304. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  305. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  306. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  307. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  308. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  309. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  310. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  311. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  312. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  313. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  314. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  315. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  316. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  317. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  318. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  319. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  320. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  321. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  322. package/src/duckdb/src/include/duckdb.h +28 -0
  323. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  324. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  325. package/src/duckdb/src/main/config.cpp +3 -0
  326. package/src/duckdb/src/main/database.cpp +1 -1
  327. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  328. package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
  329. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  330. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  331. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  332. package/src/duckdb/src/main/relation.cpp +6 -5
  333. package/src/duckdb/src/main/settings/settings.cpp +64 -18
  334. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  335. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  336. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  337. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  338. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  339. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  340. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  341. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  342. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  343. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  344. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  345. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  346. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  347. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  348. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  349. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  350. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  351. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  352. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  353. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  354. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  355. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  356. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  357. package/src/duckdb/src/parallel/executor.cpp +15 -0
  358. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  359. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  360. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  361. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  362. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  363. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  364. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  365. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  366. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  367. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  368. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  369. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  370. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  371. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  372. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  373. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  374. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  375. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  376. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  377. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  378. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  379. package/src/duckdb/src/parser/parser.cpp +8 -2
  380. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  381. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  382. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  383. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  384. package/src/duckdb/src/parser/query_node.cpp +15 -37
  385. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  386. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  387. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  388. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  389. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  390. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  391. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  392. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  393. package/src/duckdb/src/parser/tableref.cpp +0 -44
  394. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  395. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  396. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  397. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  398. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  399. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  400. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  401. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  402. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  403. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  404. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  405. package/src/duckdb/src/parser/transformer.cpp +15 -0
  406. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  407. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  408. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  409. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  410. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  411. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  412. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  413. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  414. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  415. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  416. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  417. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  418. package/src/duckdb/src/planner/binder.cpp +5 -0
  419. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  420. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  421. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  422. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  423. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  424. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  425. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  426. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  427. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  428. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  429. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  430. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  431. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  432. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  433. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  434. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  435. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  436. package/src/duckdb/src/storage/data_table.cpp +1 -1
  437. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  438. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  439. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  440. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  441. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  442. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  443. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  444. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  445. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  446. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  447. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  448. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  449. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  450. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  451. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  452. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  453. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  454. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  455. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  456. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  457. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  458. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  459. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  460. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  461. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  462. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  463. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  464. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  465. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  466. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  467. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  468. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  469. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  470. package/src/duckdb/ub_src_storage_serialization.cpp +8 -0
  471. package/src/statement.cpp +10 -3
  472. package/test/test_all_types.test.ts +233 -0
  473. package/tsconfig.json +1 -0
  474. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  475. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  476. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -271,6 +271,8 @@ static inline bool BoundaryNeedsPeer(const WindowBoundary &boundary) {
271
271
  }
272
272
  }
273
273
 
274
+ enum WindowBounds : uint8_t { PARTITION_BEGIN, PARTITION_END, PEER_BEGIN, PEER_END, WINDOW_BEGIN, WINDOW_END };
275
+
274
276
  struct WindowBoundariesState {
275
277
  static inline bool IsScalar(const unique_ptr<Expression> &expr) {
276
278
  return expr ? expr->IsScalar() : true;
@@ -287,7 +289,11 @@ struct WindowBoundariesState {
287
289
  needs_peer(BoundaryNeedsPeer(wexpr.end) || wexpr.type == ExpressionType::WINDOW_CUME_DIST) {
288
290
  }
289
291
 
290
- void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t source_offset,
292
+ void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t chunk_idx,
293
+ WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
294
+ const ValidityMask &partition_mask, const ValidityMask &order_mask);
295
+
296
+ void Bounds(DataChunk &bounds, idx_t row_idx, WindowInputColumn &range, const idx_t count,
291
297
  WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
292
298
  const ValidityMask &partition_mask, const ValidityMask &order_mask);
293
299
 
@@ -311,15 +317,9 @@ struct WindowBoundariesState {
311
317
  idx_t valid_end = 0;
312
318
  int64_t window_start = -1;
313
319
  int64_t window_end = -1;
314
- bool is_same_partition = false;
315
- bool is_peer = false;
320
+ FrameBounds prev;
316
321
  };
317
322
 
318
- static bool WindowNeedsRank(const BoundWindowExpression &wexpr) {
319
- return wexpr.type == ExpressionType::WINDOW_PERCENT_RANK || wexpr.type == ExpressionType::WINDOW_RANK ||
320
- wexpr.type == ExpressionType::WINDOW_RANK_DENSE || wexpr.type == ExpressionType::WINDOW_CUME_DIST;
321
- }
322
-
323
323
  template <typename T>
324
324
  static T GetCell(DataChunk &chunk, idx_t column, idx_t index) {
325
325
  D_ASSERT(chunk.ColumnCount() > column);
@@ -343,7 +343,7 @@ static void CopyCell(DataChunk &chunk, idx_t column, idx_t index, Vector &target
343
343
  template <typename T>
344
344
  struct WindowColumnIterator {
345
345
  using iterator = WindowColumnIterator<T>;
346
- using iterator_category = std::forward_iterator_tag;
346
+ using iterator_category = std::random_access_iterator_tag;
347
347
  using difference_type = std::ptrdiff_t;
348
348
  using value_type = T;
349
349
  using reference = T;
@@ -352,6 +352,7 @@ struct WindowColumnIterator {
352
352
  explicit WindowColumnIterator(WindowInputColumn &coll_p, pointer pos_p = 0) : coll(&coll_p), pos(pos_p) {
353
353
  }
354
354
 
355
+ // Forward iterator
355
356
  inline reference operator*() const {
356
357
  return coll->GetCell<T>(pos);
357
358
  }
@@ -369,12 +370,64 @@ struct WindowColumnIterator {
369
370
  return result;
370
371
  }
371
372
 
373
+ // Bidirectional iterator
374
+ inline iterator &operator--() {
375
+ --pos;
376
+ return *this;
377
+ }
378
+ inline iterator operator--(int) {
379
+ auto result = *this;
380
+ --(*this);
381
+ return result;
382
+ }
383
+
384
+ // Random Access
385
+ inline iterator &operator+=(difference_type n) {
386
+ pos += n;
387
+ return *this;
388
+ }
389
+ inline iterator &operator-=(difference_type n) {
390
+ pos -= n;
391
+ return *this;
392
+ }
393
+
394
+ inline reference operator[](difference_type m) const {
395
+ return coll->GetCell<T>(pos + m);
396
+ }
397
+
398
+ friend inline iterator operator+(const iterator &a, difference_type n) {
399
+ return iterator(a.coll, a.pos + n);
400
+ }
401
+
402
+ friend inline iterator operator-(const iterator &a, difference_type n) {
403
+ return iterator(a.coll, a.pos - n);
404
+ }
405
+
406
+ friend inline iterator operator+(difference_type n, const iterator &a) {
407
+ return a + n;
408
+ }
409
+ friend inline difference_type operator-(const iterator &a, const iterator &b) {
410
+ return difference_type(a.pos - b.pos);
411
+ }
412
+
372
413
  friend inline bool operator==(const iterator &a, const iterator &b) {
373
414
  return a.pos == b.pos;
374
415
  }
375
416
  friend inline bool operator!=(const iterator &a, const iterator &b) {
376
417
  return a.pos != b.pos;
377
418
  }
419
+ friend inline bool operator<(const iterator &a, const iterator &b) {
420
+ return a.pos < b.pos;
421
+ }
422
+ friend inline bool operator<=(const iterator &a, const iterator &b) {
423
+ return a.pos <= b.pos;
424
+ }
425
+ friend inline bool operator>(const iterator &a, const iterator &b) {
426
+ return a.pos > b.pos;
427
+ }
428
+ friend inline bool operator>=(const iterator &a, const iterator &b) {
429
+ return a.pos >= b.pos;
430
+ }
378
431
 
379
432
  private:
380
433
  optional_ptr<WindowInputColumn> coll;
@@ -390,13 +443,30 @@ struct OperationCompare : public std::function<bool(T, T)> {
390
443
 
391
444
  template <typename T, typename OP, bool FROM>
392
445
  static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
393
- WindowInputExpression &boundary, const idx_t boundary_row) {
394
- D_ASSERT(!boundary.CellIsNull(boundary_row));
395
- const auto val = boundary.GetCell<T>(boundary_row);
446
+ WindowInputExpression &boundary, const idx_t chunk_idx, const FrameBounds &prev) {
447
+ D_ASSERT(!boundary.CellIsNull(chunk_idx));
448
+ const auto val = boundary.GetCell<T>(chunk_idx);
396
449
 
397
450
  OperationCompare<T, OP> comp;
398
451
  WindowColumnIterator<T> begin(over, order_begin);
399
452
  WindowColumnIterator<T> end(over, order_end);
453
+
454
+ if (order_begin < prev.first && prev.first < order_end) {
455
+ const auto first = over.GetCell<T>(prev.first);
456
+ if (!comp(val, first)) {
457
+ // prev.first <= val, so we can start further forward
458
+ begin += (prev.first - order_begin);
459
+ }
460
+ }
461
+ if (order_begin <= prev.second && prev.second < order_end) {
462
+ const auto second = over.GetCell<T>(prev.second);
463
+ if (!comp(second, val)) {
464
+ // val <= prev.second, so we can end further back
465
+ // (prev.second is the largest peer)
466
+ end -= (order_end - prev.second - 1);
467
+ }
468
+ }
469
+
400
470
  if (FROM) {
401
471
  return idx_t(std::lower_bound(begin, end, val, comp));
402
472
  } else {
@@ -406,35 +476,35 @@ static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begi
406
476
 
407
477
  template <typename OP, bool FROM>
408
478
  static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
409
- WindowInputExpression &boundary, const idx_t expr_idx) {
479
+ WindowInputExpression &boundary, const idx_t chunk_idx, const FrameBounds &prev) {
410
480
  D_ASSERT(boundary.chunk.ColumnCount() == 1);
411
481
  D_ASSERT(boundary.chunk.data[0].GetType().InternalType() == over.input_expr.ptype);
412
482
 
413
483
  switch (over.input_expr.ptype) {
414
484
  case PhysicalType::INT8:
415
- return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
485
+ return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
416
486
  case PhysicalType::INT16:
417
- return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
487
+ return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
418
488
  case PhysicalType::INT32:
419
- return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
489
+ return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
420
490
  case PhysicalType::INT64:
421
- return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
491
+ return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
422
492
  case PhysicalType::UINT8:
423
- return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
493
+ return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
424
494
  case PhysicalType::UINT16:
425
- return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
495
+ return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
426
496
  case PhysicalType::UINT32:
427
- return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
497
+ return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
428
498
  case PhysicalType::UINT64:
429
- return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
499
+ return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
430
500
  case PhysicalType::INT128:
431
- return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
501
+ return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
432
502
  case PhysicalType::FLOAT:
433
- return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
503
+ return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
434
504
  case PhysicalType::DOUBLE:
435
- return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
505
+ return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
436
506
  case PhysicalType::INTERVAL:
437
- return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
507
+ return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
438
508
  default:
439
509
  throw InternalException("Unsupported column type for RANGE");
440
510
  }
@@ -442,123 +512,126 @@ static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, co
442
512
 
443
513
  template <bool FROM>
444
514
  static idx_t FindOrderedRangeBound(WindowInputColumn &over, const OrderType range_sense, const idx_t order_begin,
445
- const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
515
+ const idx_t order_end, WindowInputExpression &boundary, const idx_t chunk_idx,
516
+ const FrameBounds &prev) {
446
517
  switch (range_sense) {
447
518
  case OrderType::ASCENDING:
448
- return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
519
+ return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
449
520
  case OrderType::DESCENDING:
450
- return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
521
+ return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, chunk_idx, prev);
451
522
  default:
452
523
  throw InternalException("Unsupported ORDER BY sense for RANGE");
453
524
  }
454
525
  }
455
526
 
456
- void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t expr_idx,
527
+ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t chunk_idx,
457
528
  WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
458
529
  const ValidityMask &partition_mask, const ValidityMask &order_mask) {
459
530
 
460
- auto &bounds = *this;
461
- if (bounds.partition_count + bounds.order_count > 0) {
531
+ if (partition_count + order_count > 0) {
462
532
 
463
533
  // determine partition and peer group boundaries to ultimately figure out window size
464
- bounds.is_same_partition = !partition_mask.RowIsValidUnsafe(row_idx);
465
- bounds.is_peer = !order_mask.RowIsValidUnsafe(row_idx);
534
+ const auto is_same_partition = !partition_mask.RowIsValidUnsafe(row_idx);
535
+ const auto is_peer = !order_mask.RowIsValidUnsafe(row_idx);
466
536
 
467
537
  // when the partition changes, recompute the boundaries
468
- if (!bounds.is_same_partition) {
469
- bounds.partition_start = row_idx;
470
- bounds.peer_start = row_idx;
538
+ if (!is_same_partition) {
539
+ partition_start = row_idx;
540
+ peer_start = row_idx;
471
541
 
472
542
  // find end of partition
473
- bounds.partition_end = bounds.input_size;
474
- if (bounds.partition_count) {
543
+ partition_end = input_size;
544
+ if (partition_count) {
475
545
  idx_t n = 1;
476
- bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, bounds.input_size, n);
546
+ partition_end = FindNextStart(partition_mask, partition_start + 1, input_size, n);
477
547
  }
478
548
 
479
549
  // Find valid ordering values for the new partition
480
550
  // so we can exclude NULLs from RANGE expression computations
481
- bounds.valid_start = bounds.partition_start;
482
- bounds.valid_end = bounds.partition_end;
551
+ valid_start = partition_start;
552
+ valid_end = partition_end;
483
553
 
484
- if ((bounds.valid_start < bounds.valid_end) && bounds.has_preceding_range) {
554
+ if ((valid_start < valid_end) && has_preceding_range) {
485
555
  // Exclude any leading NULLs
486
- if (range_collection.CellIsNull(bounds.valid_start)) {
556
+ if (range_collection.CellIsNull(valid_start)) {
487
557
  idx_t n = 1;
488
- bounds.valid_start = FindNextStart(order_mask, bounds.valid_start + 1, bounds.valid_end, n);
558
+ valid_start = FindNextStart(order_mask, valid_start + 1, valid_end, n);
489
559
  }
490
560
  }
491
561
 
492
- if ((bounds.valid_start < bounds.valid_end) && bounds.has_following_range) {
562
+ if ((valid_start < valid_end) && has_following_range) {
493
563
  // Exclude any trailing NULLs
494
- if (range_collection.CellIsNull(bounds.valid_end - 1)) {
564
+ if (range_collection.CellIsNull(valid_end - 1)) {
495
565
  idx_t n = 1;
496
- bounds.valid_end = FindPrevStart(order_mask, bounds.valid_start, bounds.valid_end, n);
566
+ valid_end = FindPrevStart(order_mask, valid_start, valid_end, n);
497
567
  }
498
- }
499
568
 
500
- } else if (!bounds.is_peer) {
501
- bounds.peer_start = row_idx;
569
+ // Reset range hints
570
+ prev.first = valid_start;
571
+ prev.second = valid_end;
572
+ }
573
+ } else if (!is_peer) {
574
+ peer_start = row_idx;
502
575
  }
503
576
 
504
- if (bounds.needs_peer) {
505
- bounds.peer_end = bounds.partition_end;
506
- if (bounds.order_count) {
577
+ if (needs_peer) {
578
+ peer_end = partition_end;
579
+ if (order_count) {
507
580
  idx_t n = 1;
508
- bounds.peer_end = FindNextStart(order_mask, bounds.peer_start + 1, bounds.partition_end, n);
581
+ peer_end = FindNextStart(order_mask, peer_start + 1, partition_end, n);
509
582
  }
510
583
  }
511
584
 
512
585
  } else {
513
- bounds.is_same_partition = false;
514
- bounds.is_peer = true;
515
- bounds.partition_end = bounds.input_size;
516
- bounds.peer_end = bounds.partition_end;
586
+ // OVER()
587
+ partition_end = input_size;
588
+ peer_end = partition_end;
517
589
  }
518
590
 
519
591
  // determine window boundaries depending on the type of expression
520
- bounds.window_start = -1;
521
- bounds.window_end = -1;
592
+ window_start = -1;
593
+ window_end = -1;
522
594
 
523
- switch (bounds.start_boundary) {
595
+ switch (start_boundary) {
524
596
  case WindowBoundary::UNBOUNDED_PRECEDING:
525
- bounds.window_start = bounds.partition_start;
597
+ window_start = partition_start;
526
598
  break;
527
599
  case WindowBoundary::CURRENT_ROW_ROWS:
528
- bounds.window_start = row_idx;
600
+ window_start = row_idx;
529
601
  break;
530
602
  case WindowBoundary::CURRENT_ROW_RANGE:
531
- bounds.window_start = bounds.peer_start;
603
+ window_start = peer_start;
532
604
  break;
533
605
  case WindowBoundary::EXPR_PRECEDING_ROWS: {
534
- if (!TrySubtractOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
535
- bounds.window_start)) {
606
+ if (!TrySubtractOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(chunk_idx),
607
+ window_start)) {
536
608
  throw OutOfRangeException("Overflow computing ROWS PRECEDING start");
537
609
  }
538
610
  break;
539
611
  }
540
612
  case WindowBoundary::EXPR_FOLLOWING_ROWS: {
541
- if (!TryAddOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
542
- bounds.window_start)) {
613
+ if (!TryAddOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(chunk_idx), window_start)) {
543
614
  throw OutOfRangeException("Overflow computing ROWS FOLLOWING start");
544
615
  }
545
616
  break;
546
617
  }
547
618
  case WindowBoundary::EXPR_PRECEDING_RANGE: {
548
- if (boundary_start.CellIsNull(expr_idx)) {
549
- bounds.window_start = bounds.peer_start;
619
+ if (boundary_start.CellIsNull(chunk_idx)) {
620
+ window_start = peer_start;
550
621
  } else {
551
- bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, bounds.valid_start,
552
- row_idx, boundary_start, expr_idx);
622
+ prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, valid_start, row_idx,
623
+ boundary_start, chunk_idx, prev);
624
+ window_start = prev.first;
553
625
  }
554
626
  break;
555
627
  }
556
628
  case WindowBoundary::EXPR_FOLLOWING_RANGE: {
557
- if (boundary_start.CellIsNull(expr_idx)) {
558
- bounds.window_start = bounds.peer_start;
629
+ if (boundary_start.CellIsNull(chunk_idx)) {
630
+ window_start = peer_start;
559
631
  } else {
560
- bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, row_idx,
561
- bounds.valid_end, boundary_start, expr_idx);
632
+ prev.first = FindOrderedRangeBound<true>(range_collection, range_sense, row_idx, valid_end, boundary_start,
633
+ chunk_idx, prev);
634
+ window_start = prev.first;
562
635
  }
563
636
  break;
564
637
  }
@@ -566,43 +639,44 @@ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range
566
639
  throw InternalException("Unsupported window start boundary");
567
640
  }
568
641
 
569
- switch (bounds.end_boundary) {
642
+ switch (end_boundary) {
570
643
  case WindowBoundary::CURRENT_ROW_ROWS:
571
- bounds.window_end = row_idx + 1;
644
+ window_end = row_idx + 1;
572
645
  break;
573
646
  case WindowBoundary::CURRENT_ROW_RANGE:
574
- bounds.window_end = bounds.peer_end;
647
+ window_end = peer_end;
575
648
  break;
576
649
  case WindowBoundary::UNBOUNDED_FOLLOWING:
577
- bounds.window_end = bounds.partition_end;
650
+ window_end = partition_end;
578
651
  break;
579
652
  case WindowBoundary::EXPR_PRECEDING_ROWS:
580
- if (!TrySubtractOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
581
- bounds.window_end)) {
653
+ if (!TrySubtractOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(chunk_idx),
654
+ window_end)) {
582
655
  throw OutOfRangeException("Overflow computing ROWS PRECEDING end");
583
656
  }
584
657
  break;
585
658
  case WindowBoundary::EXPR_FOLLOWING_ROWS:
586
- if (!TryAddOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
587
- bounds.window_end)) {
659
+ if (!TryAddOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(chunk_idx), window_end)) {
588
660
  throw OutOfRangeException("Overflow computing ROWS FOLLOWING end");
589
661
  }
590
662
  break;
591
663
  case WindowBoundary::EXPR_PRECEDING_RANGE: {
592
- if (boundary_end.CellIsNull(expr_idx)) {
593
- bounds.window_end = bounds.peer_end;
664
+ if (boundary_end.CellIsNull(chunk_idx)) {
665
+ window_end = peer_end;
594
666
  } else {
595
- bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, bounds.valid_start,
596
- row_idx, boundary_end, expr_idx);
667
+ prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, valid_start, row_idx,
668
+ boundary_end, chunk_idx, prev);
669
+ window_end = prev.second;
597
670
  }
598
671
  break;
599
672
  }
600
673
  case WindowBoundary::EXPR_FOLLOWING_RANGE: {
601
- if (boundary_end.CellIsNull(expr_idx)) {
602
- bounds.window_end = bounds.peer_end;
674
+ if (boundary_end.CellIsNull(chunk_idx)) {
675
+ window_end = peer_end;
603
676
  } else {
604
- bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, row_idx,
605
- bounds.valid_end, boundary_end, expr_idx);
677
+ prev.second = FindOrderedRangeBound<false>(range_collection, range_sense, row_idx, valid_end, boundary_end,
678
+ chunk_idx, prev);
679
+ window_end = prev.second;
606
680
  }
607
681
  break;
608
682
  }
@@ -611,41 +685,69 @@ void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range
611
685
  }
612
686
 
613
687
  // clamp windows to partitions if they should exceed
614
- if (bounds.window_start < (int64_t)bounds.partition_start) {
615
- bounds.window_start = bounds.partition_start;
688
+ if (window_start < (int64_t)partition_start) {
689
+ window_start = partition_start;
616
690
  }
617
- if (bounds.window_start > (int64_t)bounds.partition_end) {
618
- bounds.window_start = bounds.partition_end;
691
+ if (window_start > (int64_t)partition_end) {
692
+ window_start = partition_end;
619
693
  }
620
- if (bounds.window_end < (int64_t)bounds.partition_start) {
621
- bounds.window_end = bounds.partition_start;
694
+ if (window_end < (int64_t)partition_start) {
695
+ window_end = partition_start;
622
696
  }
623
- if (bounds.window_end > (int64_t)bounds.partition_end) {
624
- bounds.window_end = bounds.partition_end;
697
+ if (window_end > (int64_t)partition_end) {
698
+ window_end = partition_end;
625
699
  }
626
700
 
627
- if (bounds.window_start < 0 || bounds.window_end < 0) {
701
+ if (window_start < 0 || window_end < 0) {
628
702
  throw InternalException("Failed to compute window boundaries");
629
703
  }
630
704
  }
631
705
 
706
+ void WindowBoundariesState::Bounds(DataChunk &bounds, idx_t row_idx, WindowInputColumn &range, const idx_t count,
707
+ WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
708
+ const ValidityMask &partition_mask, const ValidityMask &order_mask) {
709
+ bounds.Reset();
710
+ D_ASSERT(bounds.ColumnCount() == 6);
711
+ auto partition_begin_data = FlatVector::GetData<idx_t>(bounds.data[PARTITION_BEGIN]);
712
+ auto partition_end_data = FlatVector::GetData<idx_t>(bounds.data[PARTITION_END]);
713
+ auto peer_begin_data = FlatVector::GetData<idx_t>(bounds.data[PEER_BEGIN]);
714
+ auto peer_end_data = FlatVector::GetData<idx_t>(bounds.data[PEER_END]);
715
+ auto window_begin_data = FlatVector::GetData<int64_t>(bounds.data[WINDOW_BEGIN]);
716
+ auto window_end_data = FlatVector::GetData<int64_t>(bounds.data[WINDOW_END]);
717
+ for (idx_t chunk_idx = 0; chunk_idx < count; ++chunk_idx, ++row_idx) {
718
+ Update(row_idx, range, chunk_idx, boundary_start, boundary_end, partition_mask, order_mask);
719
+ *partition_begin_data++ = partition_start;
720
+ *partition_end_data++ = partition_end;
721
+ if (needs_peer) {
722
+ *peer_begin_data++ = peer_start;
723
+ *peer_end_data++ = peer_end;
724
+ }
725
+ *window_begin_data++ = window_start;
726
+ *window_end_data++ = window_end;
727
+ }
728
+ bounds.SetCardinality(count);
729
+ }
730
+
632
731
  struct WindowExecutor {
633
- static bool IsConstantAggregate(const BoundWindowExpression &wexpr);
732
+ bool IsConstantAggregate();
733
+ bool IsCustomAggregate();
634
734
 
635
735
  WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
636
- const idx_t count);
736
+ const idx_t count, WindowAggregationMode mode);
637
737
 
638
738
  void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count);
639
- void Finalize(WindowAggregationMode mode);
739
+ void Finalize();
640
740
 
641
741
  void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
642
742
  const ValidityMask &order_mask);
643
743
 
644
744
  // The function
645
745
  BoundWindowExpression &wexpr;
746
+ const WindowAggregationMode mode;
646
747
 
647
748
  // Frame management
648
- WindowBoundariesState bounds;
749
+ WindowBoundariesState state;
750
+ DataChunk bounds;
649
751
  uint64_t dense_rank = 1;
650
752
  uint64_t rank_equal = 0;
651
753
  uint64_t rank = 1;
@@ -656,8 +758,6 @@ struct WindowExecutor {
656
758
  DataChunk payload_chunk;
657
759
 
658
760
  ExpressionExecutor filter_executor;
659
- ValidityMask filter_mask;
660
- vector<validity_t> filter_bits;
661
761
  SelectionVector filter_sel;
662
762
 
663
763
  // LEAD/LAG Evaluation
@@ -674,15 +774,25 @@ struct WindowExecutor {
674
774
  // IGNORE NULLS
675
775
  ValidityMask ignore_nulls;
676
776
 
677
- // build a segment tree for frame-adhering aggregates
678
- // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
679
- unique_ptr<WindowSegmentTree> segment_tree = nullptr;
680
-
681
- // all aggregate values are the same for each partition
682
- unique_ptr<WindowConstantAggregate> constant_aggregate = nullptr;
777
+ // aggregate computation algorithm
778
+ unique_ptr<WindowAggregateState> aggregate_state = nullptr;
779
+
780
+ protected:
781
+ void NextRank(idx_t partition_begin, idx_t peer_begin, idx_t row_idx);
782
+ void Aggregate(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
783
+ void RowNumber(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
784
+ void Rank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
785
+ void DenseRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
786
+ void PercentRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
787
+ void CumeDist(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
788
+ void Ntile(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
789
+ void LeadLag(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
790
+ void FirstValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
791
+ void LastValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
792
+ void NthValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx);
683
793
  };
684
794
 
685
- bool WindowExecutor::IsConstantAggregate(const BoundWindowExpression &wexpr) {
795
+ bool WindowExecutor::IsConstantAggregate() {
686
796
  if (!wexpr.aggregate) {
687
797
  return false;
688
798
  }
@@ -736,28 +846,44 @@ bool WindowExecutor::IsConstantAggregate(const BoundWindowExpression &wexpr) {
736
846
  return true;
737
847
  }
738
848
 
849
+ bool WindowExecutor::IsCustomAggregate() {
850
+ if (!wexpr.aggregate) {
851
+ return false;
852
+ }
853
+
854
+ if (!AggregateObject(wexpr).function.window) {
855
+ return false;
856
+ }
857
+
858
+ return (mode < WindowAggregationMode::COMBINE);
859
+ }
860
+
739
861
  WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
740
- const idx_t count)
741
- : wexpr(wexpr), bounds(wexpr, count), payload_collection(), payload_executor(context), filter_executor(context),
742
- leadlag_offset(wexpr.offset_expr.get(), context), leadlag_default(wexpr.default_expr.get(), context),
743
- boundary_start(wexpr.start_expr.get(), context), boundary_end(wexpr.end_expr.get(), context),
744
- range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr.orders[0].expression.get() : nullptr,
862
+ const idx_t count, WindowAggregationMode mode)
863
+ : wexpr(wexpr), mode(mode), state(wexpr, count), payload_collection(), payload_executor(context),
864
+ filter_executor(context), leadlag_offset(wexpr.offset_expr.get(), context),
865
+ leadlag_default(wexpr.default_expr.get(), context), boundary_start(wexpr.start_expr.get(), context),
866
+ boundary_end(wexpr.end_expr.get(), context),
867
+ range((state.has_preceding_range || state.has_following_range) ? wexpr.orders[0].expression.get() : nullptr,
745
868
  context, count)
746
869
 
747
870
  {
748
871
  // TODO we could evaluate those expressions in parallel
749
872
 
750
873
  // Check for constant aggregate
751
- if (IsConstantAggregate(wexpr)) {
752
- constant_aggregate =
874
+ if (IsConstantAggregate()) {
875
+ aggregate_state =
753
876
  make_uniq<WindowConstantAggregate>(AggregateObject(wexpr), wexpr.return_type, partition_mask, count);
877
+ } else if (IsCustomAggregate()) {
878
+ aggregate_state = make_uniq<WindowCustomAggregate>(AggregateObject(wexpr), wexpr.return_type, count);
879
+ } else if (wexpr.aggregate) {
880
+ // build a segment tree for frame-adhering aggregates
881
+ // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
882
+ aggregate_state = make_uniq<WindowSegmentTree>(AggregateObject(wexpr), wexpr.return_type, count, mode);
754
883
  }
755
884
 
756
885
  // evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
757
886
  if (wexpr.filter_expr) {
758
- // Start with all invalid and set the ones that pass
759
- filter_bits.resize(ValidityMask::ValidityMaskSize(count), 0);
760
- filter_mask.Initialize(filter_bits.data());
761
887
  filter_executor.AddExpression(*wexpr.filter_expr);
762
888
  filter_sel.Initialize(STANDARD_VECTOR_SIZE);
763
889
  }
@@ -771,6 +897,9 @@ WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &cont
771
897
  if (!types.empty()) {
772
898
  payload_collection.Initialize(Allocator::Get(context), types);
773
899
  }
900
+
901
+ vector<LogicalType> bounds_types(6, LogicalType(LogicalTypeId::UBIGINT));
902
+ bounds.Initialize(Allocator::Get(context), bounds_types);
774
903
  }
775
904
 
776
905
  void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
@@ -800,17 +929,14 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const i
800
929
  if (wexpr.filter_expr) {
801
930
  filtering = &filter_sel;
802
931
  filtered = filter_executor.SelectExpression(input_chunk, filter_sel);
803
- for (idx_t f = 0; f < filtered; ++f) {
804
- filter_mask.SetValid(input_idx + filter_sel[f]);
805
- }
806
932
  }
807
933
 
808
934
  if (!wexpr.children.empty()) {
809
935
  payload_chunk.Reset();
810
936
  payload_executor.Execute(input_chunk, payload_chunk);
811
937
  payload_chunk.Verify();
812
- if (constant_aggregate) {
813
- constant_aggregate->Sink(payload_chunk, filtering, filtered);
938
+ if (aggregate_state) {
939
+ aggregate_state->Sink(payload_chunk, filtering, filtered);
814
940
  } else {
815
941
  payload_collection.Append(payload_chunk, true);
816
942
  }
@@ -840,19 +966,18 @@ void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const i
840
966
  }
841
967
  }
842
968
  }
969
+ } else if (aggregate_state) {
970
+ // Zero-argument aggregate (e.g., COUNT(*)
971
+ payload_chunk.SetCardinality(input_chunk);
972
+ aggregate_state->Sink(payload_chunk, filtering, filtered);
843
973
  }
844
974
 
845
975
  range.Append(input_chunk);
846
976
  }
847
977
 
848
- void WindowExecutor::Finalize(WindowAggregationMode mode) {
849
- // build a segment tree for frame-adhering aggregates
850
- // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
851
- if (constant_aggregate) {
852
- constant_aggregate->Finalize();
853
- } else if (wexpr.aggregate) {
854
- segment_tree = make_uniq<WindowSegmentTree>(AggregateObject(wexpr), wexpr.return_type, &payload_collection,
855
- filter_mask, mode);
978
+ void WindowExecutor::Finalize() {
979
+ if (aggregate_state) {
980
+ aggregate_state->Finalize();
856
981
  }
857
982
  }
858
983
 
@@ -865,188 +990,275 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
865
990
  leadlag_offset.Execute(input_chunk);
866
991
  leadlag_default.Execute(input_chunk);
867
992
 
868
- // this is the main loop, go through all sorted rows and compute window function result
869
- for (idx_t output_offset = 0; output_offset < input_chunk.size(); ++output_offset, ++row_idx) {
870
- // special case, OVER (), aggregate over everything
871
- bounds.Update(row_idx, range, output_offset, boundary_start, boundary_end, partition_mask, order_mask);
872
- if (WindowNeedsRank(wexpr)) {
873
- if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
874
- dense_rank = 1;
875
- rank = 1;
876
- rank_equal = 0;
877
- } else if (!bounds.is_peer) {
878
- dense_rank++;
879
- rank += rank_equal;
880
- rank_equal = 0;
881
- }
882
- rank_equal++;
883
- }
993
+ const auto count = input_chunk.size();
994
+ bounds.Reset();
995
+ state.Bounds(bounds, row_idx, range, input_chunk.size(), boundary_start, boundary_end, partition_mask, order_mask);
884
996
 
885
- // if no values are read for window, result is NULL
886
- if (bounds.window_start >= bounds.window_end) {
887
- FlatVector::SetNull(result, output_offset, true);
888
- continue;
889
- }
997
+ switch (wexpr.type) {
998
+ case ExpressionType::WINDOW_AGGREGATE:
999
+ Aggregate(bounds, result, count, row_idx);
1000
+ break;
1001
+ case ExpressionType::WINDOW_ROW_NUMBER:
1002
+ RowNumber(bounds, result, count, row_idx);
1003
+ break;
1004
+ case ExpressionType::WINDOW_RANK_DENSE:
1005
+ DenseRank(bounds, result, count, row_idx);
1006
+ break;
1007
+ case ExpressionType::WINDOW_RANK:
1008
+ Rank(bounds, result, count, row_idx);
1009
+ break;
1010
+ case ExpressionType::WINDOW_PERCENT_RANK:
1011
+ PercentRank(bounds, result, count, row_idx);
1012
+ break;
1013
+ case ExpressionType::WINDOW_CUME_DIST:
1014
+ CumeDist(bounds, result, count, row_idx);
1015
+ break;
1016
+ case ExpressionType::WINDOW_NTILE:
1017
+ Ntile(bounds, result, count, row_idx);
1018
+ break;
1019
+ case ExpressionType::WINDOW_LEAD:
1020
+ case ExpressionType::WINDOW_LAG:
1021
+ LeadLag(bounds, result, count, row_idx);
1022
+ break;
1023
+ case ExpressionType::WINDOW_FIRST_VALUE:
1024
+ FirstValue(bounds, result, count, row_idx);
1025
+ break;
1026
+ case ExpressionType::WINDOW_LAST_VALUE:
1027
+ LastValue(bounds, result, count, row_idx);
1028
+ break;
1029
+ case ExpressionType::WINDOW_NTH_VALUE:
1030
+ NthValue(bounds, result, count, row_idx);
1031
+ break;
1032
+ default:
1033
+ throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
1034
+ }
890
1035
 
891
- switch (wexpr.type) {
892
- case ExpressionType::WINDOW_AGGREGATE: {
893
- if (constant_aggregate) {
894
- constant_aggregate->Compute(result, output_offset, bounds.window_start, bounds.window_end);
1036
+ result.Verify(count);
1037
+ }
1038
+
1039
+ void WindowExecutor::NextRank(idx_t partition_begin, idx_t peer_begin, idx_t row_idx) {
1040
+ if (partition_begin == row_idx) {
1041
+ dense_rank = 1;
1042
+ rank = 1;
1043
+ rank_equal = 0;
1044
+ } else if (peer_begin == row_idx) {
1045
+ dense_rank++;
1046
+ rank += rank_equal;
1047
+ rank_equal = 0;
1048
+ }
1049
+ rank_equal++;
1050
+ }
1051
+
1052
+ void WindowExecutor::Aggregate(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1053
+ D_ASSERT(aggregate_state);
1054
+ auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
1055
+ auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
1056
+ aggregate_state->Evaluate(window_begin, window_end, result, count);
1057
+ }
1058
+
1059
+ void WindowExecutor::RowNumber(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1060
+ auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
1061
+ auto rdata = FlatVector::GetData<int64_t>(result);
1062
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1063
+ rdata[i] = row_idx - partition_begin[i] + 1;
1064
+ }
1065
+ }
1066
+
1067
+ void WindowExecutor::Rank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1068
+ auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
1069
+ auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
1070
+ auto rdata = FlatVector::GetData<int64_t>(result);
1071
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1072
+ NextRank(partition_begin[i], peer_begin[i], row_idx);
1073
+ rdata[i] = rank;
1074
+ }
1075
+ }
1076
+
1077
+ void WindowExecutor::DenseRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1078
+ auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
1079
+ auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
1080
+ auto rdata = FlatVector::GetData<int64_t>(result);
1081
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1082
+ NextRank(partition_begin[i], peer_begin[i], row_idx);
1083
+ rdata[i] = dense_rank;
1084
+ }
1085
+ }
1086
+
1087
+ void WindowExecutor::PercentRank(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1088
+ auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
1089
+ auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
1090
+ auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
1091
+ auto rdata = FlatVector::GetData<double>(result);
1092
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1093
+ NextRank(partition_begin[i], peer_begin[i], row_idx);
1094
+ int64_t denom = partition_end[i] - partition_begin[i] - 1;
1095
+ double percent_rank = denom > 0 ? ((double)rank - 1) / denom : 0;
1096
+ rdata[i] = percent_rank;
1097
+ }
1098
+ }
1099
+
1100
+ void WindowExecutor::CumeDist(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1101
+ auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
1102
+ auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
1103
+ auto peer_begin = FlatVector::GetData<const idx_t>(bounds.data[PEER_BEGIN]);
1104
+ auto peer_end = FlatVector::GetData<const idx_t>(bounds.data[PEER_END]);
1105
+ auto rdata = FlatVector::GetData<double>(result);
1106
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1107
+ NextRank(partition_begin[i], peer_begin[i], row_idx);
1108
+ int64_t denom = partition_end[i] - partition_begin[i];
1109
+ double cume_dist = denom > 0 ? ((double)(peer_end[i] - partition_begin[i])) / denom : 0;
1110
+ rdata[i] = cume_dist;
1111
+ }
1112
+ }
1113
+
1114
+ void WindowExecutor::Ntile(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1115
+ D_ASSERT(payload_collection.ColumnCount() == 1);
1116
+ auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
1117
+ auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
1118
+ auto rdata = FlatVector::GetData<int64_t>(result);
1119
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1120
+ if (CellIsNull(payload_collection, 0, row_idx)) {
1121
+ FlatVector::SetNull(result, i, true);
1122
+ } else {
1123
+ auto n_param = GetCell<int64_t>(payload_collection, 0, row_idx);
1124
+ if (n_param < 1) {
1125
+ throw InvalidInputException("Argument for ntile must be greater than zero");
1126
+ }
1127
+ // With thanks from SQLite's ntileValueFunc()
1128
+ int64_t n_total = partition_end[i] - partition_begin[i];
1129
+ if (n_param > n_total) {
1130
+ // more groups allowed than we have values
1131
+ // map every entry to a unique group
1132
+ n_param = n_total;
1133
+ }
1134
+ int64_t n_size = (n_total / n_param);
1135
+ // find the row idx within the group
1136
+ D_ASSERT(row_idx >= partition_begin[i]);
1137
+ int64_t adjusted_row_idx = row_idx - partition_begin[i];
1138
+ // now compute the ntile
1139
+ int64_t n_large = n_total - n_param * n_size;
1140
+ int64_t i_small = n_large * (n_size + 1);
1141
+ int64_t result_ntile;
1142
+
1143
+ D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total);
1144
+
1145
+ if (adjusted_row_idx < i_small) {
1146
+ result_ntile = 1 + adjusted_row_idx / (n_size + 1);
895
1147
  } else {
896
- segment_tree->Compute(result, output_offset, bounds.window_start, bounds.window_end);
1148
+ result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size;
897
1149
  }
898
- break;
1150
+ // result has to be between [1, NTILE]
1151
+ D_ASSERT(result_ntile >= 1 && result_ntile <= n_param);
1152
+ rdata[i] = result_ntile;
899
1153
  }
900
- case ExpressionType::WINDOW_ROW_NUMBER: {
901
- auto rdata = FlatVector::GetData<int64_t>(result);
902
- rdata[output_offset] = row_idx - bounds.partition_start + 1;
903
- break;
1154
+ }
1155
+ }
1156
+
1157
+ void WindowExecutor::LeadLag(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1158
+ auto partition_begin = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_BEGIN]);
1159
+ auto partition_end = FlatVector::GetData<const idx_t>(bounds.data[PARTITION_END]);
1160
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1161
+ int64_t offset = 1;
1162
+ if (wexpr.offset_expr) {
1163
+ offset = leadlag_offset.GetCell<int64_t>(i);
904
1164
  }
905
- case ExpressionType::WINDOW_RANK_DENSE: {
906
- auto rdata = FlatVector::GetData<int64_t>(result);
907
- rdata[output_offset] = dense_rank;
908
- break;
1165
+ int64_t val_idx = (int64_t)row_idx;
1166
+ if (wexpr.type == ExpressionType::WINDOW_LEAD) {
1167
+ val_idx += offset;
1168
+ } else {
1169
+ val_idx -= offset;
909
1170
  }
910
- case ExpressionType::WINDOW_RANK: {
911
- auto rdata = FlatVector::GetData<int64_t>(result);
912
- rdata[output_offset] = rank;
913
- break;
1171
+
1172
+ idx_t delta = 0;
1173
+ if (val_idx < (int64_t)row_idx) {
1174
+ // Count backwards
1175
+ delta = idx_t(row_idx - val_idx);
1176
+ val_idx = FindPrevStart(ignore_nulls, partition_begin[i], row_idx, delta);
1177
+ } else if (val_idx > (int64_t)row_idx) {
1178
+ delta = idx_t(val_idx - row_idx);
1179
+ val_idx = FindNextStart(ignore_nulls, row_idx + 1, partition_end[i], delta);
914
1180
  }
915
- case ExpressionType::WINDOW_PERCENT_RANK: {
916
- int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start - 1;
917
- double percent_rank = denom > 0 ? ((double)rank - 1) / denom : 0;
918
- auto rdata = FlatVector::GetData<double>(result);
919
- rdata[output_offset] = percent_rank;
920
- break;
1181
+ // else offset is zero, so don't move.
1182
+
1183
+ if (!delta) {
1184
+ CopyCell(payload_collection, 0, val_idx, result, i);
1185
+ } else if (wexpr.default_expr) {
1186
+ leadlag_default.CopyCell(result, i);
1187
+ } else {
1188
+ FlatVector::SetNull(result, i, true);
921
1189
  }
922
- case ExpressionType::WINDOW_CUME_DIST: {
923
- int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start;
924
- double cume_dist = denom > 0 ? ((double)(bounds.peer_end - bounds.partition_start)) / denom : 0;
925
- auto rdata = FlatVector::GetData<double>(result);
926
- rdata[output_offset] = cume_dist;
927
- break;
1190
+ }
1191
+ }
1192
+
1193
+ void WindowExecutor::FirstValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1194
+ auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
1195
+ auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
1196
+ auto &rmask = FlatVector::Validity(result);
1197
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1198
+ if (window_begin[i] >= window_end[i]) {
1199
+ rmask.SetInvalid(i);
1200
+ continue;
928
1201
  }
929
- case ExpressionType::WINDOW_NTILE: {
930
- D_ASSERT(payload_collection.ColumnCount() == 1);
931
- if (CellIsNull(payload_collection, 0, row_idx)) {
932
- FlatVector::SetNull(result, output_offset, true);
933
- } else {
934
- auto n_param = GetCell<int64_t>(payload_collection, 0, row_idx);
935
- if (n_param < 1) {
936
- throw InvalidInputException("Argument for ntile must be greater than zero");
937
- }
938
- // With thanks from SQLite's ntileValueFunc()
939
- int64_t n_total = bounds.partition_end - bounds.partition_start;
940
- if (n_param > n_total) {
941
- // more groups allowed than we have values
942
- // map every entry to a unique group
943
- n_param = n_total;
944
- }
945
- int64_t n_size = (n_total / n_param);
946
- // find the row idx within the group
947
- D_ASSERT(row_idx >= bounds.partition_start);
948
- int64_t adjusted_row_idx = row_idx - bounds.partition_start;
949
- // now compute the ntile
950
- int64_t n_large = n_total - n_param * n_size;
951
- int64_t i_small = n_large * (n_size + 1);
952
- int64_t result_ntile;
953
-
954
- D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total);
955
-
956
- if (adjusted_row_idx < i_small) {
957
- result_ntile = 1 + adjusted_row_idx / (n_size + 1);
958
- } else {
959
- result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size;
960
- }
961
- // result has to be between [1, NTILE]
962
- D_ASSERT(result_ntile >= 1 && result_ntile <= n_param);
963
- auto rdata = FlatVector::GetData<int64_t>(result);
964
- rdata[output_offset] = result_ntile;
965
- }
966
- break;
1202
+ // Same as NTH_VALUE(..., 1)
1203
+ idx_t n = 1;
1204
+ const auto first_idx = FindNextStart(ignore_nulls, window_begin[i], window_end[i], n);
1205
+ if (!n) {
1206
+ CopyCell(payload_collection, 0, first_idx, result, i);
1207
+ } else {
1208
+ FlatVector::SetNull(result, i, true);
967
1209
  }
968
- case ExpressionType::WINDOW_LEAD:
969
- case ExpressionType::WINDOW_LAG: {
970
- int64_t offset = 1;
971
- if (wexpr.offset_expr) {
972
- offset = leadlag_offset.GetCell<int64_t>(output_offset);
973
- }
974
- int64_t val_idx = (int64_t)row_idx;
975
- if (wexpr.type == ExpressionType::WINDOW_LEAD) {
976
- val_idx += offset;
977
- } else {
978
- val_idx -= offset;
979
- }
980
-
981
- idx_t delta = 0;
982
- if (val_idx < (int64_t)row_idx) {
983
- // Count backwards
984
- delta = idx_t(row_idx - val_idx);
985
- val_idx = FindPrevStart(ignore_nulls, bounds.partition_start, row_idx, delta);
986
- } else if (val_idx > (int64_t)row_idx) {
987
- delta = idx_t(val_idx - row_idx);
988
- val_idx = FindNextStart(ignore_nulls, row_idx + 1, bounds.partition_end, delta);
989
- }
990
- // else offset is zero, so don't move.
1210
+ }
1211
+ }
991
1212
 
992
- if (!delta) {
993
- CopyCell(payload_collection, 0, val_idx, result, output_offset);
994
- } else if (wexpr.default_expr) {
995
- leadlag_default.CopyCell(result, output_offset);
996
- } else {
997
- FlatVector::SetNull(result, output_offset, true);
998
- }
999
- break;
1213
+ void WindowExecutor::LastValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1214
+ auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
1215
+ auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
1216
+ auto &rmask = FlatVector::Validity(result);
1217
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1218
+ if (window_begin[i] >= window_end[i]) {
1219
+ rmask.SetInvalid(i);
1220
+ continue;
1000
1221
  }
1001
- case ExpressionType::WINDOW_FIRST_VALUE: {
1002
- // Same as NTH_VALUE(..., 1)
1003
- idx_t n = 1;
1004
- const auto first_idx = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1005
- if (!n) {
1006
- CopyCell(payload_collection, 0, first_idx, result, output_offset);
1007
- } else {
1008
- FlatVector::SetNull(result, output_offset, true);
1009
- }
1010
- break;
1222
+ idx_t n = 1;
1223
+ const auto last_idx = FindPrevStart(ignore_nulls, window_begin[i], window_end[i], n);
1224
+ if (!n) {
1225
+ CopyCell(payload_collection, 0, last_idx, result, i);
1226
+ } else {
1227
+ FlatVector::SetNull(result, i, true);
1011
1228
  }
1012
- case ExpressionType::WINDOW_LAST_VALUE: {
1013
- idx_t n = 1;
1014
- const auto last_idx = FindPrevStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1015
- if (!n) {
1016
- CopyCell(payload_collection, 0, last_idx, result, output_offset);
1017
- } else {
1018
- FlatVector::SetNull(result, output_offset, true);
1019
- }
1020
- break;
1229
+ }
1230
+ }
1231
+
1232
+ void WindowExecutor::NthValue(DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx) {
1233
+ D_ASSERT(payload_collection.ColumnCount() == 2);
1234
+
1235
+ auto window_begin = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_BEGIN]);
1236
+ auto window_end = FlatVector::GetData<const idx_t>(bounds.data[WINDOW_END]);
1237
+ auto &rmask = FlatVector::Validity(result);
1238
+ for (idx_t i = 0; i < count; ++i, ++row_idx) {
1239
+ if (window_begin[i] >= window_end[i]) {
1240
+ rmask.SetInvalid(i);
1241
+ continue;
1021
1242
  }
1022
- case ExpressionType::WINDOW_NTH_VALUE: {
1023
- D_ASSERT(payload_collection.ColumnCount() == 2);
1024
- // Returns value evaluated at the row that is the n'th row of the window frame (counting from 1);
1025
- // returns NULL if there is no such row.
1026
- if (CellIsNull(payload_collection, 1, row_idx)) {
1027
- FlatVector::SetNull(result, output_offset, true);
1243
+ // Returns value evaluated at the row that is the n'th row of the window frame (counting from 1);
1244
+ // returns NULL if there is no such row.
1245
+ if (CellIsNull(payload_collection, 1, row_idx)) {
1246
+ FlatVector::SetNull(result, i, true);
1247
+ } else {
1248
+ auto n_param = GetCell<int64_t>(payload_collection, 1, row_idx);
1249
+ if (n_param < 1) {
1250
+ FlatVector::SetNull(result, i, true);
1028
1251
  } else {
1029
- auto n_param = GetCell<int64_t>(payload_collection, 1, row_idx);
1030
- if (n_param < 1) {
1031
- FlatVector::SetNull(result, output_offset, true);
1252
+ auto n = idx_t(n_param);
1253
+ const auto nth_index = FindNextStart(ignore_nulls, window_begin[i], window_end[i], n);
1254
+ if (!n) {
1255
+ CopyCell(payload_collection, 0, nth_index, result, i);
1032
1256
  } else {
1033
- auto n = idx_t(n_param);
1034
- const auto nth_index = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1035
- if (!n) {
1036
- CopyCell(payload_collection, 0, nth_index, result, output_offset);
1037
- } else {
1038
- FlatVector::SetNull(result, output_offset, true);
1039
- }
1257
+ FlatVector::SetNull(result, i, true);
1040
1258
  }
1041
1259
  }
1042
- break;
1043
- }
1044
- default:
1045
- throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
1046
1260
  }
1047
1261
  }
1048
-
1049
- result.Verify(input_chunk.size());
1050
1262
  }
1051
1263
 
1052
1264
  //===--------------------------------------------------------------------===//
@@ -1281,7 +1493,7 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1281
1493
  for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
1282
1494
  D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
1283
1495
  auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
1284
- auto wexec = make_uniq<WindowExecutor>(wexpr, context, partition_mask, count);
1496
+ auto wexec = make_uniq<WindowExecutor>(wexpr, context, partition_mask, count, gstate.mode);
1285
1497
  window_execs.emplace_back(std::move(wexec));
1286
1498
  }
1287
1499
 
@@ -1305,7 +1517,7 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1305
1517
 
1306
1518
  // TODO: Parallelization opportunity
1307
1519
  for (auto &wexec : window_execs) {
1308
- wexec->Finalize(gstate.mode);
1520
+ wexec->Finalize();
1309
1521
  }
1310
1522
 
1311
1523
  // External scanning assumes all blocks are swizzled.