duckdb 0.8.2-dev33.0 → 0.8.2-dev3300.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (998) hide show
  1. package/README.md +7 -0
  2. package/binding.gyp +25 -13
  3. package/binding.gyp.in +1 -1
  4. package/configure.py +8 -3
  5. package/duckdb_extension_config.cmake +10 -0
  6. package/package.json +1 -1
  7. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  8. package/src/duckdb/extension/icu/icu-datefunc.cpp +10 -1
  9. package/src/duckdb/extension/icu/icu-datepart.cpp +162 -41
  10. package/src/duckdb/extension/icu/icu-datesub.cpp +3 -2
  11. package/src/duckdb/extension/icu/icu-datetrunc.cpp +2 -1
  12. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-makedate.cpp +19 -6
  14. package/src/duckdb/extension/icu/icu-strptime.cpp +5 -24
  15. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  16. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  17. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  18. package/src/duckdb/extension/icu/icu_extension.cpp +5 -7
  19. package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -0
  20. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -19
  21. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  22. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -1
  23. package/src/duckdb/extension/json/include/json_enums.hpp +60 -0
  24. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  25. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  26. package/src/duckdb/extension/json/include/json_scan.hpp +14 -10
  27. package/src/duckdb/extension/json/include/json_serializer.hpp +1 -1
  28. package/src/duckdb/extension/json/include/json_transform.hpp +3 -0
  29. package/src/duckdb/extension/json/json_common.cpp +272 -40
  30. package/src/duckdb/extension/json/json_deserializer.cpp +16 -14
  31. package/src/duckdb/extension/json/json_enums.cpp +105 -0
  32. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
  33. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  34. package/src/duckdb/extension/json/json_functions/json_transform.cpp +93 -38
  35. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  36. package/src/duckdb/extension/json/json_functions.cpp +26 -25
  37. package/src/duckdb/extension/json/json_scan.cpp +47 -6
  38. package/src/duckdb/extension/json/json_serializer.cpp +11 -11
  39. package/src/duckdb/extension/json/serialize_json.cpp +92 -0
  40. package/src/duckdb/extension/parquet/column_reader.cpp +37 -25
  41. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  42. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  43. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  44. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  45. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  46. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  47. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  48. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  49. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +4 -0
  50. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  51. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  52. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +1 -0
  53. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +28 -5
  54. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  55. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  56. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  57. package/src/duckdb/extension/parquet/parquet_extension.cpp +258 -40
  58. package/src/duckdb/extension/parquet/parquet_reader.cpp +10 -10
  59. package/src/duckdb/extension/parquet/parquet_statistics.cpp +25 -8
  60. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +6 -0
  61. package/src/duckdb/extension/parquet/parquet_writer.cpp +149 -31
  62. package/src/duckdb/extension/parquet/serialize_parquet.cpp +26 -0
  63. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  64. package/src/duckdb/src/catalog/catalog.cpp +3 -7
  65. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +8 -11
  66. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +17 -41
  67. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +2 -10
  68. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +4 -14
  69. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +11 -28
  70. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +11 -42
  71. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +7 -26
  72. package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +11 -27
  73. package/src/duckdb/src/catalog/catalog_entry.cpp +25 -1
  74. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  75. package/src/duckdb/src/catalog/catalog_set.cpp +0 -63
  76. package/src/duckdb/src/catalog/default/default_functions.cpp +21 -0
  77. package/src/duckdb/src/catalog/dependency_manager.cpp +0 -36
  78. package/src/duckdb/src/common/adbc/adbc.cpp +541 -171
  79. package/src/duckdb/src/common/adbc/driver_manager.cpp +92 -39
  80. package/src/duckdb/src/common/adbc/nanoarrow/allocator.cpp +57 -0
  81. package/src/duckdb/src/common/adbc/nanoarrow/metadata.cpp +121 -0
  82. package/src/duckdb/src/common/adbc/nanoarrow/schema.cpp +474 -0
  83. package/src/duckdb/src/common/adbc/nanoarrow/single_batch_array_stream.cpp +84 -0
  84. package/src/duckdb/src/common/allocator.cpp +14 -2
  85. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
  86. package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
  87. package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
  88. package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
  89. package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
  90. package/src/duckdb/src/common/arrow/arrow_appender.cpp +95 -666
  91. package/src/duckdb/src/common/arrow/arrow_converter.cpp +65 -37
  92. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +37 -42
  93. package/src/duckdb/src/common/assert.cpp +3 -0
  94. package/src/duckdb/src/common/constants.cpp +2 -1
  95. package/src/duckdb/src/common/enum_util.cpp +4838 -4429
  96. package/src/duckdb/src/common/enums/date_part_specifier.cpp +2 -0
  97. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  98. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  99. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  100. package/src/duckdb/src/common/exception.cpp +2 -2
  101. package/src/duckdb/src/common/extra_type_info.cpp +483 -0
  102. package/src/duckdb/src/common/field_writer.cpp +1 -1
  103. package/src/duckdb/src/common/file_system.cpp +25 -6
  104. package/src/duckdb/src/common/filename_pattern.cpp +1 -1
  105. package/src/duckdb/src/common/gzip_file_system.cpp +7 -12
  106. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  107. package/src/duckdb/src/common/http_state.cpp +78 -0
  108. package/src/duckdb/src/common/local_file_system.cpp +36 -28
  109. package/src/duckdb/src/common/multi_file_reader.cpp +193 -20
  110. package/src/duckdb/src/common/operator/cast_operators.cpp +92 -1
  111. package/src/duckdb/src/common/operator/string_cast.cpp +45 -8
  112. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  113. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  114. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  115. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +8 -3
  116. package/src/duckdb/src/common/serializer/binary_serializer.cpp +14 -9
  117. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +0 -9
  118. package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
  119. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  120. package/src/duckdb/src/common/sort/partition_state.cpp +70 -50
  121. package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
  122. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  123. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  124. package/src/duckdb/src/common/types/bit.cpp +51 -0
  125. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  126. package/src/duckdb/src/common/types/column/column_data_collection.cpp +68 -2
  127. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +20 -6
  128. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  129. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  130. package/src/duckdb/src/common/types/date.cpp +15 -0
  131. package/src/duckdb/src/common/types/hugeint.cpp +40 -0
  132. package/src/duckdb/src/common/types/interval.cpp +3 -0
  133. package/src/duckdb/src/common/types/list_segment.cpp +56 -198
  134. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  135. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  136. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  137. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  138. package/src/duckdb/src/common/types/string_heap.cpp +4 -0
  139. package/src/duckdb/src/common/types/time.cpp +105 -0
  140. package/src/duckdb/src/common/types/timestamp.cpp +7 -0
  141. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  142. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  143. package/src/duckdb/src/common/types/value.cpp +65 -47
  144. package/src/duckdb/src/common/types/vector.cpp +52 -25
  145. package/src/duckdb/src/common/types.cpp +38 -724
  146. package/src/duckdb/src/common/virtual_file_system.cpp +142 -1
  147. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +26 -0
  148. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
  149. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +64 -19
  150. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +30 -0
  151. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +1 -0
  152. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +83 -59
  153. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  154. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  155. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  156. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  157. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  158. package/src/duckdb/src/core_functions/function_list.cpp +10 -4
  159. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -0
  160. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +380 -89
  161. package/src/duckdb/src/core_functions/scalar/date/date_sub.cpp +2 -0
  162. package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +4 -0
  163. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  164. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  165. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +10 -0
  166. package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
  167. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
  168. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  169. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +22 -3
  170. package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +2 -2
  171. package/src/duckdb/src/core_functions/scalar/string/to_base.cpp +66 -0
  172. package/src/duckdb/src/core_functions/scalar/union/union_tag.cpp +1 -1
  173. package/src/duckdb/src/execution/aggregate_hashtable.cpp +40 -18
  174. package/src/duckdb/src/execution/column_binding_resolver.cpp +10 -7
  175. package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
  176. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  177. package/src/duckdb/src/execution/index/art/art.cpp +219 -259
  178. package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
  179. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +11 -15
  180. package/src/duckdb/src/execution/index/art/iterator.cpp +130 -214
  181. package/src/duckdb/src/execution/index/art/leaf.cpp +300 -266
  182. package/src/duckdb/src/execution/index/art/node.cpp +211 -205
  183. package/src/duckdb/src/execution/index/art/node16.cpp +10 -19
  184. package/src/duckdb/src/execution/index/art/node256.cpp +10 -18
  185. package/src/duckdb/src/execution/index/art/node4.cpp +21 -23
  186. package/src/duckdb/src/execution/index/art/node48.cpp +10 -20
  187. package/src/duckdb/src/execution/index/art/prefix.cpp +308 -338
  188. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  189. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  190. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +14 -11
  191. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +6 -4
  192. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  193. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +46 -34
  194. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +332 -1067
  195. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  196. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +12 -9
  197. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +2 -2
  198. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +10 -8
  199. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +7 -5
  200. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +7 -5
  201. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +449 -288
  202. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  203. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  204. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +13 -6
  205. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +28 -15
  206. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +35 -17
  207. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  208. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +7 -4
  209. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +31 -10
  210. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  211. package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -5
  212. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +7 -5
  213. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  214. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  215. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  216. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  217. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  218. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +14 -10
  219. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +11 -9
  220. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +9 -7
  221. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +14 -12
  222. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +11 -11
  223. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -2
  224. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  225. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +24 -27
  226. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  227. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -12
  228. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +2 -1
  229. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +198 -0
  230. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -6
  231. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  232. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  233. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  234. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +37 -6
  235. package/src/duckdb/src/execution/physical_operator.cpp +20 -16
  236. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  237. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +57 -35
  238. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +32 -15
  239. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +45 -34
  240. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  241. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +2 -5
  242. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  243. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  244. package/src/duckdb/src/execution/physical_plan_generator.cpp +6 -11
  245. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  246. package/src/duckdb/src/execution/window_executor.cpp +1284 -0
  247. package/src/duckdb/src/execution/window_segment_tree.cpp +408 -144
  248. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -13
  249. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +6 -12
  250. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  251. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  252. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  253. package/src/duckdb/src/function/cast/string_cast.cpp +2 -2
  254. package/src/duckdb/src/function/cast/time_casts.cpp +7 -6
  255. package/src/duckdb/src/function/function.cpp +3 -1
  256. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  257. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  258. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  259. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  260. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  261. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  262. package/src/duckdb/src/function/scalar/operators/add.cpp +9 -0
  263. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +6 -3
  264. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  265. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +39 -5
  266. package/src/duckdb/src/function/scalar_function.cpp +5 -20
  267. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
  268. package/src/duckdb/src/function/table/arrow.cpp +110 -88
  269. package/src/duckdb/src/function/table/arrow_conversion.cpp +86 -73
  270. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  271. package/src/duckdb/src/function/table/read_csv.cpp +124 -21
  272. package/src/duckdb/src/function/table/system/test_all_types.cpp +48 -21
  273. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  274. package/src/duckdb/src/function/table/table_scan.cpp +44 -0
  275. package/src/duckdb/src/function/table/version/pragma_version.cpp +49 -2
  276. package/src/duckdb/src/function/table_function.cpp +4 -3
  277. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +3 -3
  278. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +1 -4
  279. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -5
  280. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +1 -6
  281. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +2 -13
  282. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +1 -4
  283. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -5
  284. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +14 -0
  285. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +0 -6
  286. package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1 -0
  287. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +4 -1
  288. package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +16 -0
  289. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  290. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
  291. package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
  292. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
  293. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
  294. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
  295. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
  296. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
  297. package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
  298. package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
  299. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
  300. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +9 -4
  301. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +3 -5
  302. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +5 -3
  303. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.h +462 -0
  304. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.hpp +14 -0
  305. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +4 -0
  306. package/src/duckdb/src/include/duckdb/common/assert.hpp +1 -1
  307. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +70 -55
  308. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  309. package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
  310. package/src/duckdb/src/include/duckdb/common/constants.hpp +4 -0
  311. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  312. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +660 -580
  313. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  314. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +9 -1
  315. package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +4 -3
  316. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  317. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  318. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +5 -1
  319. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  320. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  321. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  322. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  323. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +215 -0
  324. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +0 -4
  325. package/src/duckdb/src/include/duckdb/common/file_system.hpp +10 -8
  326. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +1 -1
  327. package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
  328. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  329. package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
  330. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +15 -0
  331. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +12 -0
  332. package/src/duckdb/src/include/duckdb/common/limits.hpp +52 -149
  333. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +11 -5
  334. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +12 -42
  335. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  336. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +48 -0
  337. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +6 -2
  338. package/src/duckdb/src/include/duckdb/common/operator/add.hpp +5 -2
  339. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +65 -4
  340. package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +3 -2
  341. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  342. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
  343. package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +3 -2
  344. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  345. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  346. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  347. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +35 -7
  348. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +14 -6
  349. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +0 -4
  350. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +110 -0
  351. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +94 -16
  352. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +73 -40
  353. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +26 -4
  354. package/src/duckdb/src/include/duckdb/common/serializer.hpp +0 -7
  355. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +23 -8
  356. package/src/duckdb/src/include/duckdb/common/stack_checker.hpp +34 -0
  357. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  358. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  359. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
  360. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  361. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  362. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +11 -1
  363. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -1
  364. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +3 -1
  365. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  366. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -3
  367. package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -5
  368. package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +46 -3
  369. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +11 -15
  370. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +5 -2
  371. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
  372. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  373. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  374. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
  375. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  376. package/src/duckdb/src/include/duckdb/common/types/time.hpp +5 -0
  377. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +16 -10
  378. package/src/duckdb/src/include/duckdb/common/types/value.hpp +7 -2
  379. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +7 -0
  380. package/src/duckdb/src/include/duckdb/common/types.hpp +6 -25
  381. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
  382. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +40 -97
  383. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  384. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  385. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  386. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
  387. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  388. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  389. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  390. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  391. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  392. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  393. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  394. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +40 -11
  395. package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
  396. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  397. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  398. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +7 -5
  399. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  400. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +6 -4
  401. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +4 -2
  402. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  403. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +12 -1
  404. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  405. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  406. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  407. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  408. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +13 -12
  409. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
  410. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
  411. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +32 -28
  412. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +46 -51
  413. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +134 -53
  414. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +5 -7
  415. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +5 -7
  416. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +7 -9
  417. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +5 -7
  418. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  419. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +3 -3
  420. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  421. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -3
  422. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -2
  423. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
  424. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
  425. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +1 -1
  426. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -1
  427. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -2
  428. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +5 -12
  429. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
  430. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +2 -2
  431. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -2
  432. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +3 -3
  433. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -2
  434. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +3 -3
  435. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  436. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -2
  437. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -2
  438. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  439. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  440. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +10 -1
  441. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  442. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  443. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
  444. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  445. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -2
  446. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +2 -2
  447. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -2
  448. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +1 -1
  449. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  450. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -5
  451. package/src/duckdb/src/include/duckdb/execution/operator/schema/{physical_create_index.hpp → physical_create_art_index.hpp} +14 -7
  452. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  453. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  454. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  455. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
  456. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +6 -5
  457. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +11 -0
  458. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +6 -2
  459. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  460. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
  461. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +79 -63
  462. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -4
  463. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  464. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  465. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +6 -1
  466. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +81 -0
  467. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +3 -0
  468. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  469. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  470. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  471. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +8 -0
  472. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  473. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +8 -3
  474. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
  475. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
  476. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
  477. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -0
  478. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +5 -1
  479. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
  480. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
  481. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +2 -1
  482. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -1
  483. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +4 -3
  484. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +29 -0
  485. package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +43 -0
  486. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -2
  487. package/src/duckdb/src/include/duckdb/main/client_context.hpp +16 -14
  488. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +25 -0
  489. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -1
  490. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  491. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +22 -0
  492. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +8 -0
  493. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +4 -0
  494. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  495. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
  496. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
  497. package/src/duckdb/src/include/duckdb/main/query_result.hpp +2 -27
  498. package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
  499. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  500. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  501. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  502. package/src/duckdb/src/include/duckdb/main/settings.hpp +41 -11
  503. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  504. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  505. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  506. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
  507. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +38 -64
  508. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  509. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  510. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  511. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +8 -22
  512. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -12
  513. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  514. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +19 -30
  515. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  516. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  517. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  518. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  519. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  520. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  521. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  522. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  523. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -3
  524. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -2
  525. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +9 -1
  526. package/src/duckdb/src/include/duckdb/parser/column_definition.hpp +6 -5
  527. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +4 -0
  528. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  529. package/src/duckdb/src/include/duckdb/parser/constraint.hpp +5 -0
  530. package/src/duckdb/src/include/duckdb/parser/constraints/check_constraint.hpp +3 -0
  531. package/src/duckdb/src/include/duckdb/parser/constraints/foreign_key_constraint.hpp +6 -0
  532. package/src/duckdb/src/include/duckdb/parser/constraints/not_null_constraint.hpp +3 -0
  533. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +6 -0
  534. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +4 -1
  535. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +1 -1
  536. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +4 -1
  537. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +4 -1
  538. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +4 -1
  539. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +4 -1
  540. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
  541. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +4 -1
  542. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  543. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
  544. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +4 -1
  545. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +21 -4
  546. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +18 -2
  547. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +4 -1
  548. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +1 -1
  549. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +1 -1
  550. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -1
  551. package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
  552. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +12 -1
  553. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +66 -2
  554. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +8 -1
  555. package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +8 -1
  556. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +9 -1
  557. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +9 -2
  558. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +3 -0
  559. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_schema_info.hpp +3 -0
  560. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +3 -0
  561. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp +3 -0
  562. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +3 -0
  563. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +3 -0
  564. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +7 -0
  565. package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +7 -0
  566. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +7 -0
  567. package/src/duckdb/src/include/duckdb/parser/parsed_data/load_info.hpp +13 -3
  568. package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +22 -0
  569. package/src/duckdb/src/include/duckdb/parser/parsed_data/pragma_info.hpp +10 -0
  570. package/src/duckdb/src/include/duckdb/parser/parsed_data/show_select_info.hpp +7 -0
  571. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +10 -0
  572. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -0
  573. package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
  574. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  575. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  576. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  577. package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
  578. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  579. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  580. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  581. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  582. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +23 -26
  583. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  584. package/src/duckdb/src/include/duckdb/planner/bound_constraint.hpp +0 -8
  585. package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
  586. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +6 -0
  587. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  588. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +9 -0
  589. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  590. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  591. package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +6 -0
  592. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +6 -0
  593. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +6 -0
  594. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +3 -0
  595. package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +3 -0
  596. package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +3 -0
  597. package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +3 -0
  598. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +3 -0
  599. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +4 -0
  600. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +3 -1
  601. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +3 -0
  602. package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +3 -0
  603. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +24 -6
  604. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +9 -2
  605. package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +3 -0
  606. package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +3 -0
  607. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +3 -0
  608. package/src/duckdb/src/include/duckdb/planner/expression/list.hpp +1 -0
  609. package/src/duckdb/src/include/duckdb/planner/expression.hpp +3 -0
  610. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  611. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +13 -1
  612. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
  613. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
  614. package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
  615. package/src/duckdb/src/include/duckdb/planner/joinside.hpp +3 -0
  616. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +3 -2
  617. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -2
  618. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +3 -3
  619. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +3 -0
  620. package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +3 -0
  621. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +4 -0
  622. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +12 -7
  623. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +2 -0
  624. package/src/duckdb/src/include/duckdb/planner/operator/logical_create.hpp +9 -6
  625. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +12 -23
  626. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +10 -6
  627. package/src/duckdb/src/include/duckdb/planner/operator/logical_cross_product.hpp +3 -0
  628. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +9 -2
  629. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +7 -0
  630. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +3 -0
  631. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  632. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +6 -10
  633. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +2 -0
  634. package/src/duckdb/src/include/duckdb/planner/operator/logical_empty_result.hpp +2 -0
  635. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -0
  636. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +3 -0
  637. package/src/duckdb/src/include/duckdb/planner/operator/logical_extension_operator.hpp +8 -0
  638. package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +3 -0
  639. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +11 -1
  640. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +6 -0
  641. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit.hpp +3 -0
  642. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +3 -0
  643. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +52 -0
  644. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +7 -35
  645. package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +6 -0
  646. package/src/duckdb/src/include/duckdb/planner/operator/logical_positional_join.hpp +3 -0
  647. package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +3 -0
  648. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +10 -7
  649. package/src/duckdb/src/include/duckdb/planner/operator/logical_reset.hpp +4 -0
  650. package/src/duckdb/src/include/duckdb/planner/operator/logical_sample.hpp +6 -0
  651. package/src/duckdb/src/include/duckdb/planner/operator/logical_set.hpp +4 -0
  652. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +4 -0
  653. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +3 -0
  654. package/src/duckdb/src/include/duckdb/planner/operator/logical_simple.hpp +3 -0
  655. package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +4 -0
  656. package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +2 -0
  657. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +6 -0
  658. package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +3 -0
  659. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +1 -0
  660. package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
  661. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  662. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  663. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  664. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  665. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  666. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  667. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +7 -1
  668. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  669. package/src/duckdb/src/include/duckdb/planner/tableref/bound_pivotref.hpp +3 -0
  670. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
  671. package/src/duckdb/src/include/duckdb/storage/block.hpp +27 -4
  672. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +11 -11
  673. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +5 -5
  674. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_reader.hpp +2 -2
  675. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
  676. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +19 -16
  677. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
  678. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +2 -2
  679. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +2 -2
  680. package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -2
  681. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +88 -0
  682. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +54 -0
  683. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +45 -0
  684. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  685. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -2
  686. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +8 -5
  687. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  688. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
  689. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -2
  690. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  691. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -2
  692. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -3
  693. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -3
  694. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -2
  695. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  696. package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
  697. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +3 -4
  698. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  699. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
  700. package/src/duckdb/src/include/duckdb.h +86 -1
  701. package/src/duckdb/src/main/appender.cpp +3 -1
  702. package/src/duckdb/src/main/attached_database.cpp +2 -2
  703. package/src/duckdb/src/main/capi/arrow-c.cpp +196 -8
  704. package/src/duckdb/src/main/capi/duckdb-c.cpp +16 -0
  705. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  706. package/src/duckdb/src/main/capi/pending-c.cpp +23 -0
  707. package/src/duckdb/src/main/capi/prepared-c.cpp +106 -28
  708. package/src/duckdb/src/main/capi/result-c.cpp +3 -1
  709. package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +53 -0
  710. package/src/duckdb/src/main/chunk_scan_state.cpp +48 -0
  711. package/src/duckdb/src/main/client_context.cpp +42 -19
  712. package/src/duckdb/src/main/client_verify.cpp +17 -0
  713. package/src/duckdb/src/main/config.cpp +4 -1
  714. package/src/duckdb/src/main/database.cpp +2 -11
  715. package/src/duckdb/src/main/db_instance_cache.cpp +14 -6
  716. package/src/duckdb/src/main/extension/extension_helper.cpp +107 -88
  717. package/src/duckdb/src/main/extension/extension_install.cpp +10 -1
  718. package/src/duckdb/src/main/extension/extension_load.cpp +26 -6
  719. package/src/duckdb/src/main/extension/extension_util.cpp +16 -0
  720. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  721. package/src/duckdb/src/main/prepared_statement.cpp +38 -11
  722. package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
  723. package/src/duckdb/src/main/query_result.cpp +0 -21
  724. package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
  725. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  726. package/src/duckdb/src/main/relation/join_relation.cpp +6 -6
  727. package/src/duckdb/src/main/relation.cpp +10 -9
  728. package/src/duckdb/src/main/settings/settings.cpp +79 -33
  729. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  730. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +2 -4
  731. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  732. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  733. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  734. package/src/duckdb/src/optimizer/compressed_materialization.cpp +477 -0
  735. package/src/duckdb/src/optimizer/deliminator.cpp +180 -323
  736. package/src/duckdb/src/optimizer/filter_pushdown.cpp +23 -6
  737. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  738. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  739. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  740. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  741. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1047
  742. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  743. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  744. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +52 -41
  745. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  746. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  747. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  748. package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
  749. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  750. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  751. package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +34 -7
  752. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  753. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  754. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  755. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  756. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  757. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  758. package/src/duckdb/src/optimizer/topn_optimizer.cpp +27 -10
  759. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +3 -5
  760. package/src/duckdb/src/parallel/executor.cpp +25 -1
  761. package/src/duckdb/src/parallel/pipeline.cpp +0 -17
  762. package/src/duckdb/src/parallel/pipeline_executor.cpp +33 -13
  763. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +55 -1
  764. package/src/duckdb/src/parallel/task_scheduler.cpp +18 -2
  765. package/src/duckdb/src/parser/column_definition.cpp +20 -32
  766. package/src/duckdb/src/parser/column_list.cpp +8 -0
  767. package/src/duckdb/src/parser/constraints/foreign_key_constraint.cpp +3 -0
  768. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +3 -0
  769. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  770. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -25
  771. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  772. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  773. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  774. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  775. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  776. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  777. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  778. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  779. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  780. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  781. package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -19
  782. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  783. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  784. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  785. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  786. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +5 -2
  787. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +38 -0
  788. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +17 -1
  789. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +2 -0
  790. package/src/duckdb/src/parser/parsed_data/detach_info.cpp +1 -1
  791. package/src/duckdb/src/parser/parsed_data/drop_info.cpp +1 -1
  792. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +0 -18
  793. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +4 -1
  794. package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +1 -1
  795. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  796. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  797. package/src/duckdb/src/parser/parser.cpp +62 -36
  798. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  799. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  800. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  801. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  802. package/src/duckdb/src/parser/query_node.cpp +15 -47
  803. package/src/duckdb/src/parser/result_modifier.cpp +0 -87
  804. package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
  805. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -10
  806. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  807. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  808. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  809. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  810. package/src/duckdb/src/parser/tableref/pivotref.cpp +6 -45
  811. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  812. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  813. package/src/duckdb/src/parser/tableref.cpp +0 -44
  814. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +55 -38
  815. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
  816. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  817. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  818. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
  819. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  820. package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
  821. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  822. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +16 -1
  823. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  824. package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +32 -17
  825. package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
  826. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  827. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  828. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  829. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  830. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
  831. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  832. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  833. package/src/duckdb/src/parser/transformer.cpp +44 -25
  834. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +5 -3
  835. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
  836. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  837. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  838. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  839. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  840. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +36 -33
  841. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +14 -52
  842. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +0 -23
  843. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
  844. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +29 -4
  845. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +24 -5
  846. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  847. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  848. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  849. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +67 -31
  850. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  851. package/src/duckdb/src/planner/binder.cpp +44 -31
  852. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +24 -1
  853. package/src/duckdb/src/planner/expression/bound_between_expression.cpp +4 -0
  854. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +13 -8
  855. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +22 -0
  856. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +28 -20
  857. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +48 -4
  858. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  859. package/src/duckdb/src/planner/expression_binder.cpp +23 -0
  860. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  861. package/src/duckdb/src/planner/logical_operator.cpp +19 -7
  862. package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -6
  863. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +4 -2
  864. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +8 -0
  865. package/src/duckdb/src/planner/operator/logical_create.cpp +14 -0
  866. package/src/duckdb/src/planner/operator/logical_create_index.cpp +36 -7
  867. package/src/duckdb/src/planner/operator/logical_create_table.cpp +16 -0
  868. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  869. package/src/duckdb/src/planner/operator/logical_delete.cpp +9 -2
  870. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  871. package/src/duckdb/src/planner/operator/logical_distinct.cpp +13 -0
  872. package/src/duckdb/src/planner/operator/logical_explain.cpp +1 -1
  873. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +39 -0
  874. package/src/duckdb/src/planner/operator/logical_get.cpp +82 -4
  875. package/src/duckdb/src/planner/operator/logical_insert.cpp +8 -2
  876. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +22 -0
  877. package/src/duckdb/src/planner/operator/logical_order.cpp +39 -0
  878. package/src/duckdb/src/planner/operator/logical_pivot.cpp +3 -0
  879. package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +5 -5
  880. package/src/duckdb/src/planner/operator/logical_sample.cpp +3 -0
  881. package/src/duckdb/src/planner/operator/logical_update.cpp +8 -2
  882. package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +4 -2
  883. package/src/duckdb/src/planner/planner.cpp +18 -7
  884. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  885. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  886. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  887. package/src/duckdb/src/storage/arena_allocator.cpp +13 -2
  888. package/src/duckdb/src/storage/buffer/block_manager.cpp +13 -9
  889. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  890. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -4
  891. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -7
  892. package/src/duckdb/src/storage/checkpoint_manager.cpp +74 -69
  893. package/src/duckdb/src/storage/compression/bitpacking.cpp +87 -63
  894. package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +295 -0
  895. package/src/duckdb/src/storage/compression/fsst.cpp +1 -1
  896. package/src/duckdb/src/storage/compression/rle.cpp +52 -13
  897. package/src/duckdb/src/storage/data_table.cpp +36 -25
  898. package/src/duckdb/src/storage/index.cpp +4 -26
  899. package/src/duckdb/src/storage/local_storage.cpp +3 -4
  900. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +267 -0
  901. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +80 -0
  902. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +86 -0
  903. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +98 -0
  904. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +194 -0
  905. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +283 -0
  906. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +762 -0
  907. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +62 -0
  908. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +432 -0
  909. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +419 -0
  910. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +342 -0
  911. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  912. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +97 -0
  913. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +22 -0
  914. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +97 -0
  915. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +164 -0
  916. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  917. package/src/duckdb/src/storage/single_file_block_manager.cpp +69 -51
  918. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  919. package/src/duckdb/src/storage/storage_info.cpp +3 -2
  920. package/src/duckdb/src/storage/storage_manager.cpp +11 -5
  921. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  922. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
  923. package/src/duckdb/src/storage/table/list_column_data.cpp +6 -3
  924. package/src/duckdb/src/storage/table/persistent_table_data.cpp +1 -2
  925. package/src/duckdb/src/storage/table/row_group.cpp +34 -19
  926. package/src/duckdb/src/storage/table/row_group_collection.cpp +23 -19
  927. package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
  928. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  929. package/src/duckdb/src/storage/wal_replay.cpp +24 -24
  930. package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
  931. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
  932. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  933. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +5 -2
  934. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  935. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  936. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +10 -0
  937. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  938. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -0
  939. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11057 -10328
  940. package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +9 -0
  941. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +10 -0
  942. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +31 -1
  943. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  944. package/src/duckdb/ub_src_common.cpp +4 -0
  945. package/src/duckdb/ub_src_common_adbc_nanoarrow.cpp +8 -0
  946. package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
  947. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  948. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
  949. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  950. package/src/duckdb/ub_src_execution.cpp +2 -0
  951. package/src/duckdb/ub_src_execution_index_art.cpp +0 -6
  952. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  953. package/src/duckdb/ub_src_execution_operator_schema.cpp +1 -1
  954. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  955. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  956. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  957. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  958. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  959. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  960. package/src/duckdb/ub_src_main.cpp +2 -0
  961. package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
  962. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  963. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  964. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
  965. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  966. package/src/duckdb/ub_src_parser.cpp +0 -2
  967. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  968. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  969. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  970. package/src/duckdb/ub_src_planner_operator.cpp +3 -3
  971. package/src/duckdb/ub_src_storage.cpp +0 -4
  972. package/src/duckdb/ub_src_storage_compression.cpp +2 -0
  973. package/src/duckdb/ub_src_storage_metadata.cpp +6 -0
  974. package/src/duckdb/ub_src_storage_serialization.cpp +28 -0
  975. package/src/duckdb_node.hpp +1 -0
  976. package/src/statement.cpp +10 -5
  977. package/test/columns.test.ts +25 -3
  978. package/test/extension.test.ts +1 -1
  979. package/test/test_all_types.test.ts +234 -0
  980. package/tsconfig.json +1 -0
  981. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
  982. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  983. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
  984. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -193
  985. package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +0 -25
  986. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
  987. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  988. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
  989. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +0 -27
  990. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_join.hpp +0 -32
  991. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -49
  992. package/src/duckdb/src/include/duckdb/storage/meta_block_writer.hpp +0 -50
  993. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
  994. package/src/duckdb/src/parser/common_table_expression_info.cpp +0 -19
  995. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +0 -14
  996. package/src/duckdb/src/planner/operator/logical_delim_join.cpp +0 -27
  997. package/src/duckdb/src/storage/meta_block_reader.cpp +0 -78
  998. package/src/duckdb/src/storage/meta_block_writer.cpp +0 -80
@@ -15,6 +15,7 @@
15
15
  #include "duckdb/common/windows_undefs.hpp"
16
16
  #include "duckdb/execution/expression_executor.hpp"
17
17
  #include "duckdb/execution/partitionable_hashtable.hpp"
18
+ #include "duckdb/execution/window_executor.hpp"
18
19
  #include "duckdb/execution/window_segment_tree.hpp"
19
20
  #include "duckdb/main/client_config.hpp"
20
21
  #include "duckdb/main/config.hpp"
@@ -32,7 +33,7 @@ namespace duckdb {
32
33
  class WindowGlobalSinkState : public GlobalSinkState {
33
34
  public:
34
35
  WindowGlobalSinkState(const PhysicalWindow &op, ClientContext &context)
35
- : mode(DBConfig::GetConfig(context).options.window_mode) {
36
+ : op(op), mode(DBConfig::GetConfig(context).options.window_mode) {
36
37
 
37
38
  D_ASSERT(op.select_list[0]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
38
39
  auto &wexpr = op.select_list[0]->Cast<BoundWindowExpression>();
@@ -42,6 +43,7 @@ public:
42
43
  wexpr.partitions_stats, op.estimated_cardinality);
43
44
  }
44
45
 
46
+ const PhysicalWindow &op;
45
47
  unique_ptr<PartitionGlobalSinkState> global_partition;
46
48
  WindowAggregationMode mode;
47
49
  };
@@ -78,975 +80,38 @@ PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expr
78
80
  }
79
81
  }
80
82
 
81
- static idx_t FindNextStart(const ValidityMask &mask, idx_t l, const idx_t r, idx_t &n) {
82
- if (mask.AllValid()) {
83
- auto start = MinValue(l + n - 1, r);
84
- n -= MinValue(n, r - l);
85
- return start;
86
- }
87
-
88
- while (l < r) {
89
- // If l is aligned with the start of a block, and the block is blank, then skip forward one block.
90
- idx_t entry_idx;
91
- idx_t shift;
92
- mask.GetEntryIndex(l, entry_idx, shift);
93
-
94
- const auto block = mask.GetValidityEntry(entry_idx);
95
- if (mask.NoneValid(block) && !shift) {
96
- l += ValidityMask::BITS_PER_VALUE;
97
- continue;
98
- }
99
-
100
- // Loop over the block
101
- for (; shift < ValidityMask::BITS_PER_VALUE && l < r; ++shift, ++l) {
102
- if (mask.RowIsValid(block, shift) && --n == 0) {
103
- return MinValue(l, r);
104
- }
105
- }
106
- }
107
-
108
- // Didn't find a start so return the end of the range
109
- return r;
110
- }
111
-
112
- static idx_t FindPrevStart(const ValidityMask &mask, const idx_t l, idx_t r, idx_t &n) {
113
- if (mask.AllValid()) {
114
- auto start = (r <= l + n) ? l : r - n;
115
- n -= r - start;
116
- return start;
117
- }
118
-
119
- while (l < r) {
120
- // If r is aligned with the start of a block, and the previous block is blank,
121
- // then skip backwards one block.
122
- idx_t entry_idx;
123
- idx_t shift;
124
- mask.GetEntryIndex(r - 1, entry_idx, shift);
125
-
126
- const auto block = mask.GetValidityEntry(entry_idx);
127
- if (mask.NoneValid(block) && (shift + 1 == ValidityMask::BITS_PER_VALUE)) {
128
- // r is nonzero (> l) and word aligned, so this will not underflow.
129
- r -= ValidityMask::BITS_PER_VALUE;
130
- continue;
131
- }
132
-
133
- // Loop backwards over the block
134
- // shift is probing r-1 >= l >= 0
135
- for (++shift; shift-- > 0; --r) {
136
- if (mask.RowIsValid(block, shift) && --n == 0) {
137
- return MaxValue(l, r - 1);
138
- }
139
- }
140
- }
141
-
142
- // Didn't find a start so return the start of the range
143
- return l;
144
- }
145
-
146
- static void PrepareInputExpressions(vector<unique_ptr<Expression>> &exprs, ExpressionExecutor &executor,
147
- DataChunk &chunk) {
148
- if (exprs.empty()) {
149
- return;
150
- }
151
-
152
- vector<LogicalType> types;
153
- for (idx_t expr_idx = 0; expr_idx < exprs.size(); ++expr_idx) {
154
- types.push_back(exprs[expr_idx]->return_type);
155
- executor.AddExpression(*exprs[expr_idx]);
156
- }
157
-
158
- if (!types.empty()) {
159
- auto &allocator = executor.GetAllocator();
160
- chunk.Initialize(allocator, types);
161
- }
162
- }
163
-
164
- static void PrepareInputExpression(Expression &expr, ExpressionExecutor &executor, DataChunk &chunk) {
165
- vector<LogicalType> types;
166
- types.push_back(expr.return_type);
167
- executor.AddExpression(expr);
168
-
169
- auto &allocator = executor.GetAllocator();
170
- chunk.Initialize(allocator, types);
171
- }
172
-
173
- struct WindowInputExpression {
174
- WindowInputExpression(optional_ptr<Expression> expr_p, ClientContext &context)
175
- : expr(expr_p), ptype(PhysicalType::INVALID), scalar(true), executor(context) {
176
- if (expr) {
177
- PrepareInputExpression(*expr, executor, chunk);
178
- ptype = expr->return_type.InternalType();
179
- scalar = expr->IsScalar();
180
- }
181
- }
182
-
183
- void Execute(DataChunk &input_chunk) {
184
- if (expr) {
185
- chunk.Reset();
186
- executor.Execute(input_chunk, chunk);
187
- chunk.Verify();
188
- }
189
- }
190
-
191
- template <typename T>
192
- inline T GetCell(idx_t i) const {
193
- D_ASSERT(!chunk.data.empty());
194
- const auto data = FlatVector::GetData<T>(chunk.data[0]);
195
- return data[scalar ? 0 : i];
196
- }
197
-
198
- inline bool CellIsNull(idx_t i) const {
199
- D_ASSERT(!chunk.data.empty());
200
- if (chunk.data[0].GetVectorType() == VectorType::CONSTANT_VECTOR) {
201
- return ConstantVector::IsNull(chunk.data[0]);
202
- }
203
- return FlatVector::IsNull(chunk.data[0], i);
204
- }
205
-
206
- inline void CopyCell(Vector &target, idx_t target_offset) const {
207
- D_ASSERT(!chunk.data.empty());
208
- auto &source = chunk.data[0];
209
- auto source_offset = scalar ? 0 : target_offset;
210
- VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset);
211
- }
212
-
213
- optional_ptr<Expression> expr;
214
- PhysicalType ptype;
215
- bool scalar;
216
- ExpressionExecutor executor;
217
- DataChunk chunk;
218
- };
219
-
220
- struct WindowInputColumn {
221
- WindowInputColumn(Expression *expr_p, ClientContext &context, idx_t capacity_p)
222
- : input_expr(expr_p, context), count(0), capacity(capacity_p) {
223
- if (input_expr.expr) {
224
- target = make_uniq<Vector>(input_expr.chunk.data[0].GetType(), capacity);
225
- }
226
- }
227
-
228
- void Append(DataChunk &input_chunk) {
229
- if (input_expr.expr) {
230
- const auto source_count = input_chunk.size();
231
- D_ASSERT(count + source_count <= capacity);
232
- if (!input_expr.scalar || !count) {
233
- input_expr.Execute(input_chunk);
234
- auto &source = input_expr.chunk.data[0];
235
- VectorOperations::Copy(source, *target, source_count, 0, count);
236
- }
237
- count += source_count;
238
- }
239
- }
240
-
241
- inline bool CellIsNull(idx_t i) {
242
- D_ASSERT(target);
243
- D_ASSERT(i < count);
244
- return FlatVector::IsNull(*target, input_expr.scalar ? 0 : i);
245
- }
246
-
247
- template <typename T>
248
- inline T GetCell(idx_t i) const {
249
- D_ASSERT(target);
250
- D_ASSERT(i < count);
251
- const auto data = FlatVector::GetData<T>(*target);
252
- return data[input_expr.scalar ? 0 : i];
253
- }
254
-
255
- WindowInputExpression input_expr;
256
-
257
- private:
258
- unique_ptr<Vector> target;
259
- idx_t count;
260
- idx_t capacity;
261
- };
262
-
263
- static inline bool BoundaryNeedsPeer(const WindowBoundary &boundary) {
264
- switch (boundary) {
265
- case WindowBoundary::CURRENT_ROW_RANGE:
266
- case WindowBoundary::EXPR_PRECEDING_RANGE:
267
- case WindowBoundary::EXPR_FOLLOWING_RANGE:
268
- return true;
269
- default:
270
- return false;
271
- }
272
- }
273
-
274
- struct WindowBoundariesState {
275
- static inline bool IsScalar(const unique_ptr<Expression> &expr) {
276
- return expr ? expr->IsScalar() : true;
277
- }
278
-
279
- WindowBoundariesState(BoundWindowExpression &wexpr, const idx_t input_size)
280
- : type(wexpr.type), input_size(input_size), start_boundary(wexpr.start), end_boundary(wexpr.end),
281
- partition_count(wexpr.partitions.size()), order_count(wexpr.orders.size()),
282
- range_sense(wexpr.orders.empty() ? OrderType::INVALID : wexpr.orders[0].type),
283
- has_preceding_range(wexpr.start == WindowBoundary::EXPR_PRECEDING_RANGE ||
284
- wexpr.end == WindowBoundary::EXPR_PRECEDING_RANGE),
285
- has_following_range(wexpr.start == WindowBoundary::EXPR_FOLLOWING_RANGE ||
286
- wexpr.end == WindowBoundary::EXPR_FOLLOWING_RANGE),
287
- needs_peer(BoundaryNeedsPeer(wexpr.end) || wexpr.type == ExpressionType::WINDOW_CUME_DIST) {
288
- }
289
-
290
- void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t source_offset,
291
- WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
292
- const ValidityMask &partition_mask, const ValidityMask &order_mask);
293
-
294
- // Cached lookups
295
- const ExpressionType type;
296
- const idx_t input_size;
297
- const WindowBoundary start_boundary;
298
- const WindowBoundary end_boundary;
299
- const size_t partition_count;
300
- const size_t order_count;
301
- const OrderType range_sense;
302
- const bool has_preceding_range;
303
- const bool has_following_range;
304
- const bool needs_peer;
305
-
306
- idx_t partition_start = 0;
307
- idx_t partition_end = 0;
308
- idx_t peer_start = 0;
309
- idx_t peer_end = 0;
310
- idx_t valid_start = 0;
311
- idx_t valid_end = 0;
312
- int64_t window_start = -1;
313
- int64_t window_end = -1;
314
- bool is_same_partition = false;
315
- bool is_peer = false;
316
- };
317
-
318
- static bool WindowNeedsRank(const BoundWindowExpression &wexpr) {
319
- return wexpr.type == ExpressionType::WINDOW_PERCENT_RANK || wexpr.type == ExpressionType::WINDOW_RANK ||
320
- wexpr.type == ExpressionType::WINDOW_RANK_DENSE || wexpr.type == ExpressionType::WINDOW_CUME_DIST;
321
- }
322
-
323
- template <typename T>
324
- static T GetCell(DataChunk &chunk, idx_t column, idx_t index) {
325
- D_ASSERT(chunk.ColumnCount() > column);
326
- auto &source = chunk.data[column];
327
- const auto data = FlatVector::GetData<T>(source);
328
- return data[index];
329
- }
330
-
331
- static bool CellIsNull(DataChunk &chunk, idx_t column, idx_t index) {
332
- D_ASSERT(chunk.ColumnCount() > column);
333
- auto &source = chunk.data[column];
334
- return FlatVector::IsNull(source, index);
335
- }
336
-
337
- static void CopyCell(DataChunk &chunk, idx_t column, idx_t index, Vector &target, idx_t target_offset) {
338
- D_ASSERT(chunk.ColumnCount() > column);
339
- auto &source = chunk.data[column];
340
- VectorOperations::Copy(source, target, index + 1, index, target_offset);
341
- }
342
-
343
- template <typename T>
344
- struct WindowColumnIterator {
345
- using iterator = WindowColumnIterator<T>;
346
- using iterator_category = std::forward_iterator_tag;
347
- using difference_type = std::ptrdiff_t;
348
- using value_type = T;
349
- using reference = T;
350
- using pointer = idx_t;
351
-
352
- explicit WindowColumnIterator(WindowInputColumn &coll_p, pointer pos_p = 0) : coll(&coll_p), pos(pos_p) {
353
- }
354
-
355
- inline reference operator*() const {
356
- return coll->GetCell<T>(pos);
357
- }
358
- inline explicit operator pointer() const {
359
- return pos;
360
- }
361
-
362
- inline iterator &operator++() {
363
- ++pos;
364
- return *this;
365
- }
366
- inline iterator operator++(int) {
367
- auto result = *this;
368
- ++(*this);
369
- return result;
370
- }
371
-
372
- friend inline bool operator==(const iterator &a, const iterator &b) {
373
- return a.pos == b.pos;
374
- }
375
- friend inline bool operator!=(const iterator &a, const iterator &b) {
376
- return a.pos != b.pos;
377
- }
378
-
379
- private:
380
- optional_ptr<WindowInputColumn> coll;
381
- pointer pos;
382
- };
383
-
384
- template <typename T, typename OP>
385
- struct OperationCompare : public std::function<bool(T, T)> {
386
- inline bool operator()(const T &lhs, const T &val) const {
387
- return OP::template Operation(lhs, val);
388
- }
389
- };
390
-
391
- template <typename T, typename OP, bool FROM>
392
- static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
393
- WindowInputExpression &boundary, const idx_t boundary_row) {
394
- D_ASSERT(!boundary.CellIsNull(boundary_row));
395
- const auto val = boundary.GetCell<T>(boundary_row);
396
-
397
- OperationCompare<T, OP> comp;
398
- WindowColumnIterator<T> begin(over, order_begin);
399
- WindowColumnIterator<T> end(over, order_end);
400
- if (FROM) {
401
- return idx_t(std::lower_bound(begin, end, val, comp));
402
- } else {
403
- return idx_t(std::upper_bound(begin, end, val, comp));
404
- }
405
- }
406
-
407
- template <typename OP, bool FROM>
408
- static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
409
- WindowInputExpression &boundary, const idx_t expr_idx) {
410
- D_ASSERT(boundary.chunk.ColumnCount() == 1);
411
- D_ASSERT(boundary.chunk.data[0].GetType().InternalType() == over.input_expr.ptype);
412
-
413
- switch (over.input_expr.ptype) {
414
- case PhysicalType::INT8:
415
- return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
416
- case PhysicalType::INT16:
417
- return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
418
- case PhysicalType::INT32:
419
- return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
420
- case PhysicalType::INT64:
421
- return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
422
- case PhysicalType::UINT8:
423
- return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
424
- case PhysicalType::UINT16:
425
- return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
426
- case PhysicalType::UINT32:
427
- return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
428
- case PhysicalType::UINT64:
429
- return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
430
- case PhysicalType::INT128:
431
- return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
432
- case PhysicalType::FLOAT:
433
- return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
434
- case PhysicalType::DOUBLE:
435
- return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
436
- case PhysicalType::INTERVAL:
437
- return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
438
- default:
439
- throw InternalException("Unsupported column type for RANGE");
440
- }
441
- }
442
-
443
- template <bool FROM>
444
- static idx_t FindOrderedRangeBound(WindowInputColumn &over, const OrderType range_sense, const idx_t order_begin,
445
- const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
446
- switch (range_sense) {
447
- case OrderType::ASCENDING:
448
- return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
449
- case OrderType::DESCENDING:
450
- return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
451
- default:
452
- throw InternalException("Unsupported ORDER BY sense for RANGE");
453
- }
454
- }
455
-
456
- void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t expr_idx,
457
- WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
458
- const ValidityMask &partition_mask, const ValidityMask &order_mask) {
459
-
460
- auto &bounds = *this;
461
- if (bounds.partition_count + bounds.order_count > 0) {
462
-
463
- // determine partition and peer group boundaries to ultimately figure out window size
464
- bounds.is_same_partition = !partition_mask.RowIsValidUnsafe(row_idx);
465
- bounds.is_peer = !order_mask.RowIsValidUnsafe(row_idx);
466
-
467
- // when the partition changes, recompute the boundaries
468
- if (!bounds.is_same_partition) {
469
- bounds.partition_start = row_idx;
470
- bounds.peer_start = row_idx;
471
-
472
- // find end of partition
473
- bounds.partition_end = bounds.input_size;
474
- if (bounds.partition_count) {
475
- idx_t n = 1;
476
- bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, bounds.input_size, n);
477
- }
478
-
479
- // Find valid ordering values for the new partition
480
- // so we can exclude NULLs from RANGE expression computations
481
- bounds.valid_start = bounds.partition_start;
482
- bounds.valid_end = bounds.partition_end;
483
-
484
- if ((bounds.valid_start < bounds.valid_end) && bounds.has_preceding_range) {
485
- // Exclude any leading NULLs
486
- if (range_collection.CellIsNull(bounds.valid_start)) {
487
- idx_t n = 1;
488
- bounds.valid_start = FindNextStart(order_mask, bounds.valid_start + 1, bounds.valid_end, n);
489
- }
490
- }
491
-
492
- if ((bounds.valid_start < bounds.valid_end) && bounds.has_following_range) {
493
- // Exclude any trailing NULLs
494
- if (range_collection.CellIsNull(bounds.valid_end - 1)) {
495
- idx_t n = 1;
496
- bounds.valid_end = FindPrevStart(order_mask, bounds.valid_start, bounds.valid_end, n);
497
- }
498
- }
499
-
500
- } else if (!bounds.is_peer) {
501
- bounds.peer_start = row_idx;
502
- }
503
-
504
- if (bounds.needs_peer) {
505
- bounds.peer_end = bounds.partition_end;
506
- if (bounds.order_count) {
507
- idx_t n = 1;
508
- bounds.peer_end = FindNextStart(order_mask, bounds.peer_start + 1, bounds.partition_end, n);
509
- }
510
- }
511
-
512
- } else {
513
- bounds.is_same_partition = false;
514
- bounds.is_peer = true;
515
- bounds.partition_end = bounds.input_size;
516
- bounds.peer_end = bounds.partition_end;
517
- }
518
-
519
- // determine window boundaries depending on the type of expression
520
- bounds.window_start = -1;
521
- bounds.window_end = -1;
522
-
523
- switch (bounds.start_boundary) {
524
- case WindowBoundary::UNBOUNDED_PRECEDING:
525
- bounds.window_start = bounds.partition_start;
526
- break;
527
- case WindowBoundary::CURRENT_ROW_ROWS:
528
- bounds.window_start = row_idx;
529
- break;
530
- case WindowBoundary::CURRENT_ROW_RANGE:
531
- bounds.window_start = bounds.peer_start;
532
- break;
533
- case WindowBoundary::EXPR_PRECEDING_ROWS: {
534
- if (!TrySubtractOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
535
- bounds.window_start)) {
536
- throw OutOfRangeException("Overflow computing ROWS PRECEDING start");
537
- }
538
- break;
539
- }
540
- case WindowBoundary::EXPR_FOLLOWING_ROWS: {
541
- if (!TryAddOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
542
- bounds.window_start)) {
543
- throw OutOfRangeException("Overflow computing ROWS FOLLOWING start");
544
- }
545
- break;
546
- }
547
- case WindowBoundary::EXPR_PRECEDING_RANGE: {
548
- if (boundary_start.CellIsNull(expr_idx)) {
549
- bounds.window_start = bounds.peer_start;
550
- } else {
551
- bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, bounds.valid_start,
552
- row_idx, boundary_start, expr_idx);
553
- }
554
- break;
555
- }
556
- case WindowBoundary::EXPR_FOLLOWING_RANGE: {
557
- if (boundary_start.CellIsNull(expr_idx)) {
558
- bounds.window_start = bounds.peer_start;
559
- } else {
560
- bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, row_idx,
561
- bounds.valid_end, boundary_start, expr_idx);
562
- }
563
- break;
564
- }
565
- default:
566
- throw InternalException("Unsupported window start boundary");
567
- }
568
-
569
- switch (bounds.end_boundary) {
570
- case WindowBoundary::CURRENT_ROW_ROWS:
571
- bounds.window_end = row_idx + 1;
572
- break;
573
- case WindowBoundary::CURRENT_ROW_RANGE:
574
- bounds.window_end = bounds.peer_end;
575
- break;
576
- case WindowBoundary::UNBOUNDED_FOLLOWING:
577
- bounds.window_end = bounds.partition_end;
578
- break;
579
- case WindowBoundary::EXPR_PRECEDING_ROWS:
580
- if (!TrySubtractOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
581
- bounds.window_end)) {
582
- throw OutOfRangeException("Overflow computing ROWS PRECEDING end");
583
- }
584
- break;
585
- case WindowBoundary::EXPR_FOLLOWING_ROWS:
586
- if (!TryAddOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
587
- bounds.window_end)) {
588
- throw OutOfRangeException("Overflow computing ROWS FOLLOWING end");
589
- }
590
- break;
591
- case WindowBoundary::EXPR_PRECEDING_RANGE: {
592
- if (boundary_end.CellIsNull(expr_idx)) {
593
- bounds.window_end = bounds.peer_end;
594
- } else {
595
- bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, bounds.valid_start,
596
- row_idx, boundary_end, expr_idx);
597
- }
598
- break;
599
- }
600
- case WindowBoundary::EXPR_FOLLOWING_RANGE: {
601
- if (boundary_end.CellIsNull(expr_idx)) {
602
- bounds.window_end = bounds.peer_end;
603
- } else {
604
- bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, row_idx,
605
- bounds.valid_end, boundary_end, expr_idx);
606
- }
607
- break;
608
- }
609
- default:
610
- throw InternalException("Unsupported window end boundary");
611
- }
612
-
613
- // clamp windows to partitions if they should exceed
614
- if (bounds.window_start < (int64_t)bounds.partition_start) {
615
- bounds.window_start = bounds.partition_start;
616
- }
617
- if (bounds.window_start > (int64_t)bounds.partition_end) {
618
- bounds.window_start = bounds.partition_end;
619
- }
620
- if (bounds.window_end < (int64_t)bounds.partition_start) {
621
- bounds.window_end = bounds.partition_start;
622
- }
623
- if (bounds.window_end > (int64_t)bounds.partition_end) {
624
- bounds.window_end = bounds.partition_end;
625
- }
626
-
627
- if (bounds.window_start < 0 || bounds.window_end < 0) {
628
- throw InternalException("Failed to compute window boundaries");
629
- }
630
- }
631
-
632
- struct WindowExecutor {
633
- static bool IsConstantAggregate(const BoundWindowExpression &wexpr);
634
-
635
- WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
636
- const idx_t count);
637
-
638
- void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count);
639
- void Finalize(WindowAggregationMode mode);
640
-
641
- void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
642
- const ValidityMask &order_mask);
643
-
644
- // The function
645
- BoundWindowExpression &wexpr;
646
-
647
- // Frame management
648
- WindowBoundariesState bounds;
649
- uint64_t dense_rank = 1;
650
- uint64_t rank_equal = 0;
651
- uint64_t rank = 1;
652
-
653
- // Expression collections
654
- DataChunk payload_collection;
655
- ExpressionExecutor payload_executor;
656
- DataChunk payload_chunk;
657
-
658
- ExpressionExecutor filter_executor;
659
- ValidityMask filter_mask;
660
- vector<validity_t> filter_bits;
661
- SelectionVector filter_sel;
662
-
663
- // LEAD/LAG Evaluation
664
- WindowInputExpression leadlag_offset;
665
- WindowInputExpression leadlag_default;
666
-
667
- // evaluate boundaries if present. Parser has checked boundary types.
668
- WindowInputExpression boundary_start;
669
- WindowInputExpression boundary_end;
670
-
671
- // evaluate RANGE expressions, if needed
672
- WindowInputColumn range;
673
-
674
- // IGNORE NULLS
675
- ValidityMask ignore_nulls;
676
-
677
- // build a segment tree for frame-adhering aggregates
678
- // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
679
- unique_ptr<WindowSegmentTree> segment_tree = nullptr;
680
-
681
- // all aggregate values are the same for each partition
682
- unique_ptr<WindowConstantAggregate> constant_aggregate = nullptr;
683
- };
684
-
685
- bool WindowExecutor::IsConstantAggregate(const BoundWindowExpression &wexpr) {
686
- if (!wexpr.aggregate) {
687
- return false;
688
- }
689
-
690
- // COUNT(*) is already handled efficiently by segment trees.
691
- if (wexpr.children.empty()) {
692
- return false;
693
- }
694
-
695
- /*
696
- The default framing option is RANGE UNBOUNDED PRECEDING, which
697
- is the same as RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT
698
- ROW; it sets the frame to be all rows from the partition start
699
- up through the current row's last peer (a row that the window's
700
- ORDER BY clause considers equivalent to the current row; all
701
- rows are peers if there is no ORDER BY). In general, UNBOUNDED
702
- PRECEDING means that the frame starts with the first row of the
703
- partition, and similarly UNBOUNDED FOLLOWING means that the
704
- frame ends with the last row of the partition, regardless of
705
- RANGE, ROWS or GROUPS mode. In ROWS mode, CURRENT ROW means that
706
- the frame starts or ends with the current row; but in RANGE or
707
- GROUPS mode it means that the frame starts or ends with the
708
- current row's first or last peer in the ORDER BY ordering. The
709
- offset PRECEDING and offset FOLLOWING options vary in meaning
710
- depending on the frame mode.
711
- */
712
- switch (wexpr.start) {
713
- case WindowBoundary::UNBOUNDED_PRECEDING:
714
- break;
715
- case WindowBoundary::CURRENT_ROW_RANGE:
716
- if (!wexpr.orders.empty()) {
717
- return false;
718
- }
719
- break;
720
- default:
721
- return false;
722
- }
723
-
724
- switch (wexpr.end) {
725
- case WindowBoundary::UNBOUNDED_FOLLOWING:
726
- break;
727
- case WindowBoundary::CURRENT_ROW_RANGE:
728
- if (!wexpr.orders.empty()) {
729
- return false;
730
- }
83
+ static unique_ptr<WindowExecutor> WindowExecutorFactory(BoundWindowExpression &wexpr, ClientContext &context,
84
+ const ValidityMask &partition_mask,
85
+ const ValidityMask &order_mask, const idx_t payload_count,
86
+ WindowAggregationMode mode) {
87
+ switch (wexpr.type) {
88
+ case ExpressionType::WINDOW_AGGREGATE:
89
+ return make_uniq<WindowAggregateExecutor>(wexpr, context, payload_count, partition_mask, order_mask, mode);
90
+ case ExpressionType::WINDOW_ROW_NUMBER:
91
+ return make_uniq<WindowRowNumberExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
92
+ case ExpressionType::WINDOW_RANK_DENSE:
93
+ return make_uniq<WindowDenseRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
94
+ case ExpressionType::WINDOW_RANK:
95
+ return make_uniq<WindowRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
96
+ case ExpressionType::WINDOW_PERCENT_RANK:
97
+ return make_uniq<WindowPercentRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
98
+ case ExpressionType::WINDOW_CUME_DIST:
99
+ return make_uniq<WindowCumeDistExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
100
+ case ExpressionType::WINDOW_NTILE:
101
+ return make_uniq<WindowNtileExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
102
+ case ExpressionType::WINDOW_LEAD:
103
+ case ExpressionType::WINDOW_LAG:
104
+ return make_uniq<WindowLeadLagExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
105
+ case ExpressionType::WINDOW_FIRST_VALUE:
106
+ return make_uniq<WindowFirstValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
107
+ case ExpressionType::WINDOW_LAST_VALUE:
108
+ return make_uniq<WindowLastValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
109
+ case ExpressionType::WINDOW_NTH_VALUE:
110
+ return make_uniq<WindowNthValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
731
111
  break;
732
112
  default:
733
- return false;
734
- }
735
-
736
- return true;
737
- }
738
-
739
- WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
740
- const idx_t count)
741
- : wexpr(wexpr), bounds(wexpr, count), payload_collection(), payload_executor(context), filter_executor(context),
742
- leadlag_offset(wexpr.offset_expr.get(), context), leadlag_default(wexpr.default_expr.get(), context),
743
- boundary_start(wexpr.start_expr.get(), context), boundary_end(wexpr.end_expr.get(), context),
744
- range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr.orders[0].expression.get() : nullptr,
745
- context, count)
746
-
747
- {
748
- // TODO we could evaluate those expressions in parallel
749
-
750
- // Check for constant aggregate
751
- if (IsConstantAggregate(wexpr)) {
752
- constant_aggregate =
753
- make_uniq<WindowConstantAggregate>(AggregateObject(wexpr), wexpr.return_type, partition_mask, count);
754
- }
755
-
756
- // evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
757
- if (wexpr.filter_expr) {
758
- // Start with all invalid and set the ones that pass
759
- filter_bits.resize(ValidityMask::ValidityMaskSize(count), 0);
760
- filter_mask.Initialize(filter_bits.data());
761
- filter_executor.AddExpression(*wexpr.filter_expr);
762
- filter_sel.Initialize(STANDARD_VECTOR_SIZE);
763
- }
764
-
765
- // TODO: child may be a scalar, don't need to materialize the whole collection then
766
-
767
- // evaluate inner expressions of window functions, could be more complex
768
- PrepareInputExpressions(wexpr.children, payload_executor, payload_chunk);
769
-
770
- auto types = payload_chunk.GetTypes();
771
- if (!types.empty()) {
772
- payload_collection.Initialize(Allocator::Get(context), types);
773
- }
774
- }
775
-
776
- void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
777
- // Single pass over the input to produce the global data.
778
- // Vectorisation for the win...
779
-
780
- // Set up a validity mask for IGNORE NULLS
781
- bool check_nulls = false;
782
- if (wexpr.ignore_nulls) {
783
- switch (wexpr.type) {
784
- case ExpressionType::WINDOW_LEAD:
785
- case ExpressionType::WINDOW_LAG:
786
- case ExpressionType::WINDOW_FIRST_VALUE:
787
- case ExpressionType::WINDOW_LAST_VALUE:
788
- case ExpressionType::WINDOW_NTH_VALUE:
789
- check_nulls = true;
790
- break;
791
- default:
792
- break;
793
- }
794
- }
795
-
796
- const auto count = input_chunk.size();
797
-
798
- idx_t filtered = 0;
799
- SelectionVector *filtering = nullptr;
800
- if (wexpr.filter_expr) {
801
- filtering = &filter_sel;
802
- filtered = filter_executor.SelectExpression(input_chunk, filter_sel);
803
- for (idx_t f = 0; f < filtered; ++f) {
804
- filter_mask.SetValid(input_idx + filter_sel[f]);
805
- }
806
- }
807
-
808
- if (!wexpr.children.empty()) {
809
- payload_chunk.Reset();
810
- payload_executor.Execute(input_chunk, payload_chunk);
811
- payload_chunk.Verify();
812
- if (constant_aggregate) {
813
- constant_aggregate->Sink(payload_chunk, filtering, filtered);
814
- } else {
815
- payload_collection.Append(payload_chunk, true);
816
- }
817
-
818
- // process payload chunks while they are still piping hot
819
- if (check_nulls) {
820
- UnifiedVectorFormat vdata;
821
- payload_chunk.data[0].ToUnifiedFormat(count, vdata);
822
- if (!vdata.validity.AllValid()) {
823
- // Lazily materialise the contents when we find the first NULL
824
- if (ignore_nulls.AllValid()) {
825
- ignore_nulls.Initialize(total_count);
826
- }
827
- // Write to the current position
828
- if (input_idx % ValidityMask::BITS_PER_VALUE == 0) {
829
- // If we are at the edge of an output entry, just copy the entries
830
- auto dst = ignore_nulls.GetData() + ignore_nulls.EntryCount(input_idx);
831
- auto src = vdata.validity.GetData();
832
- for (auto entry_count = vdata.validity.EntryCount(count); entry_count-- > 0;) {
833
- *dst++ = *src++;
834
- }
835
- } else {
836
- // If not, we have ragged data and need to copy one bit at a time.
837
- for (idx_t i = 0; i < count; ++i) {
838
- ignore_nulls.Set(input_idx + i, vdata.validity.RowIsValid(i));
839
- }
840
- }
841
- }
842
- }
843
- }
844
-
845
- range.Append(input_chunk);
846
- }
847
-
848
- void WindowExecutor::Finalize(WindowAggregationMode mode) {
849
- // build a segment tree for frame-adhering aggregates
850
- // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
851
- if (constant_aggregate) {
852
- constant_aggregate->Finalize();
853
- } else if (wexpr.aggregate) {
854
- segment_tree = make_uniq<WindowSegmentTree>(AggregateObject(wexpr), wexpr.return_type, &payload_collection,
855
- filter_mask, mode);
856
- }
857
- }
858
-
859
- void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
860
- const ValidityMask &order_mask) {
861
- // Evaluate the row-level arguments
862
- boundary_start.Execute(input_chunk);
863
- boundary_end.Execute(input_chunk);
864
-
865
- leadlag_offset.Execute(input_chunk);
866
- leadlag_default.Execute(input_chunk);
867
-
868
- // this is the main loop, go through all sorted rows and compute window function result
869
- for (idx_t output_offset = 0; output_offset < input_chunk.size(); ++output_offset, ++row_idx) {
870
- // special case, OVER (), aggregate over everything
871
- bounds.Update(row_idx, range, output_offset, boundary_start, boundary_end, partition_mask, order_mask);
872
- if (WindowNeedsRank(wexpr)) {
873
- if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
874
- dense_rank = 1;
875
- rank = 1;
876
- rank_equal = 0;
877
- } else if (!bounds.is_peer) {
878
- dense_rank++;
879
- rank += rank_equal;
880
- rank_equal = 0;
881
- }
882
- rank_equal++;
883
- }
884
-
885
- // if no values are read for window, result is NULL
886
- if (bounds.window_start >= bounds.window_end) {
887
- FlatVector::SetNull(result, output_offset, true);
888
- continue;
889
- }
890
-
891
- switch (wexpr.type) {
892
- case ExpressionType::WINDOW_AGGREGATE: {
893
- if (constant_aggregate) {
894
- constant_aggregate->Compute(result, output_offset, bounds.window_start, bounds.window_end);
895
- } else {
896
- segment_tree->Compute(result, output_offset, bounds.window_start, bounds.window_end);
897
- }
898
- break;
899
- }
900
- case ExpressionType::WINDOW_ROW_NUMBER: {
901
- auto rdata = FlatVector::GetData<int64_t>(result);
902
- rdata[output_offset] = row_idx - bounds.partition_start + 1;
903
- break;
904
- }
905
- case ExpressionType::WINDOW_RANK_DENSE: {
906
- auto rdata = FlatVector::GetData<int64_t>(result);
907
- rdata[output_offset] = dense_rank;
908
- break;
909
- }
910
- case ExpressionType::WINDOW_RANK: {
911
- auto rdata = FlatVector::GetData<int64_t>(result);
912
- rdata[output_offset] = rank;
913
- break;
914
- }
915
- case ExpressionType::WINDOW_PERCENT_RANK: {
916
- int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start - 1;
917
- double percent_rank = denom > 0 ? ((double)rank - 1) / denom : 0;
918
- auto rdata = FlatVector::GetData<double>(result);
919
- rdata[output_offset] = percent_rank;
920
- break;
921
- }
922
- case ExpressionType::WINDOW_CUME_DIST: {
923
- int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start;
924
- double cume_dist = denom > 0 ? ((double)(bounds.peer_end - bounds.partition_start)) / denom : 0;
925
- auto rdata = FlatVector::GetData<double>(result);
926
- rdata[output_offset] = cume_dist;
927
- break;
928
- }
929
- case ExpressionType::WINDOW_NTILE: {
930
- D_ASSERT(payload_collection.ColumnCount() == 1);
931
- if (CellIsNull(payload_collection, 0, row_idx)) {
932
- FlatVector::SetNull(result, output_offset, true);
933
- } else {
934
- auto n_param = GetCell<int64_t>(payload_collection, 0, row_idx);
935
- if (n_param < 1) {
936
- throw InvalidInputException("Argument for ntile must be greater than zero");
937
- }
938
- // With thanks from SQLite's ntileValueFunc()
939
- int64_t n_total = bounds.partition_end - bounds.partition_start;
940
- if (n_param > n_total) {
941
- // more groups allowed than we have values
942
- // map every entry to a unique group
943
- n_param = n_total;
944
- }
945
- int64_t n_size = (n_total / n_param);
946
- // find the row idx within the group
947
- D_ASSERT(row_idx >= bounds.partition_start);
948
- int64_t adjusted_row_idx = row_idx - bounds.partition_start;
949
- // now compute the ntile
950
- int64_t n_large = n_total - n_param * n_size;
951
- int64_t i_small = n_large * (n_size + 1);
952
- int64_t result_ntile;
953
-
954
- D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total);
955
-
956
- if (adjusted_row_idx < i_small) {
957
- result_ntile = 1 + adjusted_row_idx / (n_size + 1);
958
- } else {
959
- result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size;
960
- }
961
- // result has to be between [1, NTILE]
962
- D_ASSERT(result_ntile >= 1 && result_ntile <= n_param);
963
- auto rdata = FlatVector::GetData<int64_t>(result);
964
- rdata[output_offset] = result_ntile;
965
- }
966
- break;
967
- }
968
- case ExpressionType::WINDOW_LEAD:
969
- case ExpressionType::WINDOW_LAG: {
970
- int64_t offset = 1;
971
- if (wexpr.offset_expr) {
972
- offset = leadlag_offset.GetCell<int64_t>(output_offset);
973
- }
974
- int64_t val_idx = (int64_t)row_idx;
975
- if (wexpr.type == ExpressionType::WINDOW_LEAD) {
976
- val_idx += offset;
977
- } else {
978
- val_idx -= offset;
979
- }
980
-
981
- idx_t delta = 0;
982
- if (val_idx < (int64_t)row_idx) {
983
- // Count backwards
984
- delta = idx_t(row_idx - val_idx);
985
- val_idx = FindPrevStart(ignore_nulls, bounds.partition_start, row_idx, delta);
986
- } else if (val_idx > (int64_t)row_idx) {
987
- delta = idx_t(val_idx - row_idx);
988
- val_idx = FindNextStart(ignore_nulls, row_idx + 1, bounds.partition_end, delta);
989
- }
990
- // else offset is zero, so don't move.
991
-
992
- if (!delta) {
993
- CopyCell(payload_collection, 0, val_idx, result, output_offset);
994
- } else if (wexpr.default_expr) {
995
- leadlag_default.CopyCell(result, output_offset);
996
- } else {
997
- FlatVector::SetNull(result, output_offset, true);
998
- }
999
- break;
1000
- }
1001
- case ExpressionType::WINDOW_FIRST_VALUE: {
1002
- // Same as NTH_VALUE(..., 1)
1003
- idx_t n = 1;
1004
- const auto first_idx = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1005
- if (!n) {
1006
- CopyCell(payload_collection, 0, first_idx, result, output_offset);
1007
- } else {
1008
- FlatVector::SetNull(result, output_offset, true);
1009
- }
1010
- break;
1011
- }
1012
- case ExpressionType::WINDOW_LAST_VALUE: {
1013
- idx_t n = 1;
1014
- const auto last_idx = FindPrevStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1015
- if (!n) {
1016
- CopyCell(payload_collection, 0, last_idx, result, output_offset);
1017
- } else {
1018
- FlatVector::SetNull(result, output_offset, true);
1019
- }
1020
- break;
1021
- }
1022
- case ExpressionType::WINDOW_NTH_VALUE: {
1023
- D_ASSERT(payload_collection.ColumnCount() == 2);
1024
- // Returns value evaluated at the row that is the n'th row of the window frame (counting from 1);
1025
- // returns NULL if there is no such row.
1026
- if (CellIsNull(payload_collection, 1, row_idx)) {
1027
- FlatVector::SetNull(result, output_offset, true);
1028
- } else {
1029
- auto n_param = GetCell<int64_t>(payload_collection, 1, row_idx);
1030
- if (n_param < 1) {
1031
- FlatVector::SetNull(result, output_offset, true);
1032
- } else {
1033
- auto n = idx_t(n_param);
1034
- const auto nth_index = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1035
- if (!n) {
1036
- CopyCell(payload_collection, 0, nth_index, result, output_offset);
1037
- } else {
1038
- FlatVector::SetNull(result, output_offset, true);
1039
- }
1040
- }
1041
- }
1042
- break;
1043
- }
1044
- default:
1045
- throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
1046
- }
113
+ throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
1047
114
  }
1048
-
1049
- result.Verify(input_chunk.size());
1050
115
  }
1051
116
 
1052
117
  //===--------------------------------------------------------------------===//
@@ -1060,9 +125,11 @@ SinkResultType PhysicalWindow::Sink(ExecutionContext &context, DataChunk &chunk,
1060
125
  return SinkResultType::NEED_MORE_INPUT;
1061
126
  }
1062
127
 
1063
- void PhysicalWindow::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
1064
- auto &lstate = lstate_p.Cast<WindowLocalSinkState>();
128
+ SinkCombineResultType PhysicalWindow::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
129
+ auto &lstate = input.local_state.Cast<WindowLocalSinkState>();
1065
130
  lstate.Combine();
131
+
132
+ return SinkCombineResultType::FINISHED;
1066
133
  }
1067
134
 
1068
135
  unique_ptr<LocalSinkState> PhysicalWindow::GetLocalSinkState(ExecutionContext &context) const {
@@ -1075,8 +142,8 @@ unique_ptr<GlobalSinkState> PhysicalWindow::GetGlobalSinkState(ClientContext &co
1075
142
  }
1076
143
 
1077
144
  SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
1078
- GlobalSinkState &gstate_p) const {
1079
- auto &state = gstate_p.Cast<WindowGlobalSinkState>();
145
+ OperatorSinkFinalizeInput &input) const {
146
+ auto &state = input.global_state.Cast<WindowGlobalSinkState>();
1080
147
 
1081
148
  // Did we get any data?
1082
149
  if (!state.global_partition->count) {
@@ -1106,64 +173,93 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
1106
173
  //===--------------------------------------------------------------------===//
1107
174
  // Source
1108
175
  //===--------------------------------------------------------------------===//
176
+ class WindowPartitionSourceState;
177
+
1109
178
  class WindowGlobalSourceState : public GlobalSourceState {
1110
179
  public:
1111
- explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : gsink(*gsink.global_partition), next_bin(0) {
1112
- }
180
+ using HashGroupSourcePtr = unique_ptr<WindowPartitionSourceState>;
181
+ using ScannerPtr = unique_ptr<RowDataCollectionScanner>;
182
+ using Task = std::pair<WindowPartitionSourceState *, ScannerPtr>;
1113
183
 
1114
- PartitionGlobalSinkState &gsink;
1115
- //! The output read position.
1116
- atomic<idx_t> next_bin;
184
+ WindowGlobalSourceState(ClientContext &context_p, WindowGlobalSinkState &gsink_p);
185
+
186
+ //! Get the next task
187
+ Task NextTask(idx_t hash_bin);
188
+
189
+ //! Context for executing computations
190
+ ClientContext &context;
191
+ //! All the sunk data
192
+ WindowGlobalSinkState &gsink;
193
+ //! The next group to build.
194
+ atomic<idx_t> next_build;
195
+ //! The built groups
196
+ vector<HashGroupSourcePtr> built;
197
+ //! Serialise access to the built hash groups
198
+ mutable mutex built_lock;
199
+ //! The number of unfinished tasks
200
+ atomic<idx_t> tasks_remaining;
1117
201
 
1118
202
  public:
1119
203
  idx_t MaxThreads() override {
1120
- // If there is only one partition, we have to process it on one thread.
1121
- if (!gsink.grouping_data) {
1122
- return 1;
1123
- }
204
+ return tasks_remaining;
205
+ }
206
+
207
+ private:
208
+ Task CreateTask(idx_t hash_bin);
209
+ Task StealWork();
210
+ };
211
+
212
+ WindowGlobalSourceState::WindowGlobalSourceState(ClientContext &context_p, WindowGlobalSinkState &gsink_p)
213
+ : context(context_p), gsink(gsink_p), next_build(0), tasks_remaining(0) {
214
+ auto &hash_groups = gsink.global_partition->hash_groups;
1124
215
 
1125
- // If there is not a lot of data, process serially.
1126
- if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
1127
- return 1;
216
+ auto &gpart = gsink.global_partition;
217
+ if (hash_groups.empty()) {
218
+ // OVER()
219
+ built.resize(1);
220
+ if (gpart->rows) {
221
+ tasks_remaining += gpart->rows->blocks.size();
1128
222
  }
223
+ } else {
224
+ built.resize(hash_groups.size());
225
+ for (auto &hash_group : hash_groups) {
226
+ if (!hash_group) {
227
+ continue;
228
+ }
229
+ auto &global_sort_state = *hash_group->global_sort;
230
+ if (global_sort_state.sorted_blocks.empty()) {
231
+ continue;
232
+ }
1129
233
 
1130
- return gsink.hash_groups.size();
234
+ D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
235
+ auto &sb = *global_sort_state.sorted_blocks[0];
236
+ auto &sd = *sb.payload_data;
237
+ tasks_remaining += sd.data_blocks.size();
238
+ }
1131
239
  }
1132
- };
240
+ }
1133
241
 
1134
- // Per-thread read state
1135
- class WindowLocalSourceState : public LocalSourceState {
242
+ // Per-bin evaluation state (build and evaluate)
243
+ class WindowPartitionSourceState {
1136
244
  public:
1137
245
  using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
1138
- using WindowExecutorPtr = unique_ptr<WindowExecutor>;
1139
- using WindowExecutors = vector<WindowExecutorPtr>;
1140
-
1141
- WindowLocalSourceState(const PhysicalWindow &op_p, ExecutionContext &context, WindowGlobalSourceState &gsource)
1142
- : context(context.client), op(op_p), gsink(gsource.gsink) {
1143
-
1144
- vector<LogicalType> output_types;
1145
- for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
1146
- D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
1147
- auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
1148
- output_types.emplace_back(wexpr.return_type);
1149
- }
1150
- output_chunk.Initialize(Allocator::Get(context.client), output_types);
246
+ using ExecutorPtr = unique_ptr<WindowExecutor>;
247
+ using Executors = vector<ExecutorPtr>;
1151
248
 
1152
- const auto &input_types = gsink.payload_types;
1153
- layout.Initialize(input_types);
1154
- input_chunk.Initialize(gsink.allocator, input_types);
249
+ WindowPartitionSourceState(ClientContext &context, WindowGlobalSourceState &gsource)
250
+ : context(context), op(gsource.gsink.op), gsource(gsource), read_block_idx(0), unscanned(0) {
251
+ layout.Initialize(gsource.gsink.global_partition->payload_types);
1155
252
  }
1156
253
 
254
+ unique_ptr<RowDataCollectionScanner> GetScanner() const;
1157
255
  void MaterializeSortedData();
1158
- void GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
1159
- void Scan(DataChunk &chunk);
256
+ void BuildPartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
1160
257
 
1161
- HashGroupPtr hash_group;
1162
258
  ClientContext &context;
1163
259
  const PhysicalWindow &op;
260
+ WindowGlobalSourceState &gsource;
1164
261
 
1165
- PartitionGlobalSinkState &gsink;
1166
-
262
+ HashGroupPtr hash_group;
1167
263
  //! The generated input chunks
1168
264
  unique_ptr<RowDataCollection> rows;
1169
265
  unique_ptr<RowDataCollection> heap;
@@ -1174,20 +270,21 @@ public:
1174
270
  //! The order boundary mask
1175
271
  vector<validity_t> order_bits;
1176
272
  ValidityMask order_mask;
273
+ //! External paging
274
+ bool external;
1177
275
  //! The current execution functions
1178
- WindowExecutors window_execs;
276
+ Executors executors;
1179
277
 
1180
- //! The read partition
278
+ //! The bin number
1181
279
  idx_t hash_bin;
1182
- //! The read cursor
1183
- unique_ptr<RowDataCollectionScanner> scanner;
1184
- //! Buffer for the inputs
1185
- DataChunk input_chunk;
1186
- //! Buffer for window results
1187
- DataChunk output_chunk;
280
+
281
+ //! The next block to read.
282
+ mutable atomic<idx_t> read_block_idx;
283
+ //! The number of remaining unscanned blocks.
284
+ atomic<idx_t> unscanned;
1188
285
  };
1189
286
 
1190
- void WindowLocalSourceState::MaterializeSortedData() {
287
+ void WindowPartitionSourceState::MaterializeSortedData() {
1191
288
  auto &global_sort_state = *hash_group->global_sort;
1192
289
  if (global_sort_state.sorted_blocks.empty()) {
1193
290
  return;
@@ -1226,7 +323,21 @@ void WindowLocalSourceState::MaterializeSortedData() {
1226
323
  [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
1227
324
  }
1228
325
 
1229
- void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
326
+ unique_ptr<RowDataCollectionScanner> WindowPartitionSourceState::GetScanner() const {
327
+ auto &gsink = *gsource.gsink.global_partition;
328
+ if ((gsink.rows && !hash_bin) || hash_bin < gsink.hash_groups.size()) {
329
+ const auto block_idx = read_block_idx++;
330
+ if (block_idx >= rows->blocks.size()) {
331
+ return nullptr;
332
+ }
333
+ // Second pass can flush
334
+ --gsource.tasks_remaining;
335
+ return make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, block_idx, true);
336
+ }
337
+ return nullptr;
338
+ }
339
+
340
+ void WindowPartitionSourceState::BuildPartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
1230
341
  // Get rid of any stale data
1231
342
  hash_bin = hash_bin_p;
1232
343
 
@@ -1236,11 +347,12 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1236
347
  // 3. Multiple partitions (sorting and hashing)
1237
348
 
1238
349
  // How big is the partition?
350
+ auto &gpart = *gsource.gsink.global_partition;
1239
351
  idx_t count = 0;
1240
- if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
1241
- count = gsink.hash_groups[hash_bin]->count;
1242
- } else if (gsink.rows && !hash_bin) {
1243
- count = gsink.count;
352
+ if (hash_bin < gpart.hash_groups.size() && gpart.hash_groups[hash_bin]) {
353
+ count = gpart.hash_groups[hash_bin]->count;
354
+ } else if (gpart.rows && !hash_bin) {
355
+ count = gpart.count;
1244
356
  } else {
1245
357
  return;
1246
358
  }
@@ -1256,19 +368,20 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1256
368
  order_mask.Initialize(order_bits.data());
1257
369
 
1258
370
  // Scan the sorted data into new Collections
1259
- auto external = gsink.external;
1260
- if (gsink.rows && !hash_bin) {
371
+ external = gpart.external;
372
+ if (gpart.rows && !hash_bin) {
1261
373
  // Simple mask
1262
374
  partition_mask.SetValidUnsafe(0);
1263
375
  order_mask.SetValidUnsafe(0);
1264
376
  // No partition - align the heap blocks with the row blocks
1265
- rows = gsink.rows->CloneEmpty(gsink.rows->keep_pinned);
1266
- heap = gsink.strings->CloneEmpty(gsink.strings->keep_pinned);
1267
- RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gsink.rows, *gsink.strings, layout);
377
+ rows = gpart.rows->CloneEmpty(gpart.rows->keep_pinned);
378
+ heap = gpart.strings->CloneEmpty(gpart.strings->keep_pinned);
379
+ RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gpart.rows, *gpart.strings, layout);
1268
380
  external = true;
1269
- } else if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
381
+ } else if (hash_bin < gpart.hash_groups.size()) {
1270
382
  // Overwrite the collections with the sorted data
1271
- hash_group = std::move(gsink.hash_groups[hash_bin]);
383
+ D_ASSERT(gpart.hash_groups[hash_bin].get());
384
+ hash_group = std::move(gpart.hash_groups[hash_bin]);
1272
385
  hash_group->ComputeMasks(partition_mask, order_mask);
1273
386
  external = hash_group->global_sort->external;
1274
387
  MaterializeSortedData();
@@ -1277,17 +390,18 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1277
390
  }
1278
391
 
1279
392
  // Create the executors for each function
1280
- window_execs.clear();
393
+ executors.clear();
1281
394
  for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
1282
395
  D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
1283
396
  auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
1284
- auto wexec = make_uniq<WindowExecutor>(wexpr, context, partition_mask, count);
1285
- window_execs.emplace_back(std::move(wexec));
397
+ auto wexec = WindowExecutorFactory(wexpr, context, partition_mask, order_mask, count, gstate.mode);
398
+ executors.emplace_back(std::move(wexec));
1286
399
  }
1287
400
 
1288
401
  // First pass over the input without flushing
1289
- // TODO: Factor out the constructor data as global state
1290
- scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
402
+ DataChunk input_chunk;
403
+ input_chunk.Initialize(gpart.allocator, gpart.payload_types);
404
+ auto scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
1291
405
  idx_t input_idx = 0;
1292
406
  while (true) {
1293
407
  input_chunk.Reset();
@@ -1297,38 +411,207 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1297
411
  }
1298
412
 
1299
413
  // TODO: Parallelization opportunity
1300
- for (auto &wexec : window_execs) {
414
+ for (auto &wexec : executors) {
1301
415
  wexec->Sink(input_chunk, input_idx, scanner->Count());
1302
416
  }
1303
417
  input_idx += input_chunk.size();
1304
418
  }
1305
419
 
1306
420
  // TODO: Parallelization opportunity
1307
- for (auto &wexec : window_execs) {
1308
- wexec->Finalize(gstate.mode);
421
+ for (auto &wexec : executors) {
422
+ wexec->Finalize();
1309
423
  }
1310
424
 
1311
425
  // External scanning assumes all blocks are swizzled.
1312
426
  scanner->ReSwizzle();
1313
427
 
1314
- // Second pass can flush
1315
- scanner->Reset(true);
428
+ // Start the block countdown
429
+ unscanned = rows->blocks.size();
430
+ }
431
+
432
+ // Per-thread scan state
433
+ class WindowLocalSourceState : public LocalSourceState {
434
+ public:
435
+ using ReadStatePtr = unique_ptr<WindowExecutorState>;
436
+ using ReadStates = vector<ReadStatePtr>;
437
+
438
+ explicit WindowLocalSourceState(WindowGlobalSourceState &gsource);
439
+ bool NextPartition();
440
+ void Scan(DataChunk &chunk);
441
+
442
+ //! The shared source state
443
+ WindowGlobalSourceState &gsource;
444
+ //! The current bin being processed
445
+ idx_t hash_bin;
446
+ //! The current source being processed
447
+ optional_ptr<WindowPartitionSourceState> partition_source;
448
+ //! The read cursor
449
+ unique_ptr<RowDataCollectionScanner> scanner;
450
+ //! Buffer for the inputs
451
+ DataChunk input_chunk;
452
+ //! Executor read states.
453
+ ReadStates read_states;
454
+ //! Buffer for window results
455
+ DataChunk output_chunk;
456
+ };
457
+
458
+ WindowLocalSourceState::WindowLocalSourceState(WindowGlobalSourceState &gsource)
459
+ : gsource(gsource), hash_bin(gsource.built.size()) {
460
+ auto &gsink = *gsource.gsink.global_partition;
461
+ auto &op = gsource.gsink.op;
462
+
463
+ input_chunk.Initialize(gsink.allocator, gsink.payload_types);
464
+
465
+ vector<LogicalType> output_types;
466
+ for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
467
+ D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
468
+ auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
469
+ output_types.emplace_back(wexpr.return_type);
470
+ }
471
+ output_chunk.Initialize(Allocator::Get(gsource.context), output_types);
472
+ }
473
+
474
+ WindowGlobalSourceState::Task WindowGlobalSourceState::CreateTask(idx_t hash_bin) {
475
+ // Build outside the lock so no one tries to steal before we are done.
476
+ auto partition_source = make_uniq<WindowPartitionSourceState>(context, *this);
477
+ partition_source->BuildPartition(gsink, hash_bin);
478
+ Task result(partition_source.get(), partition_source->GetScanner());
479
+
480
+ // Is there any data to scan?
481
+ if (result.second) {
482
+ lock_guard<mutex> built_guard(built_lock);
483
+ built[hash_bin] = std::move(partition_source);
484
+
485
+ return result;
486
+ }
487
+
488
+ return Task();
489
+ }
490
+
491
+ WindowGlobalSourceState::Task WindowGlobalSourceState::StealWork() {
492
+ for (idx_t hash_bin = 0; hash_bin < built.size(); ++hash_bin) {
493
+ lock_guard<mutex> built_guard(built_lock);
494
+ auto &partition_source = built[hash_bin];
495
+ if (!partition_source) {
496
+ continue;
497
+ }
498
+
499
+ Task result(partition_source.get(), partition_source->GetScanner());
500
+
501
+ // Is there any data to scan?
502
+ if (result.second) {
503
+ return result;
504
+ }
505
+ }
506
+
507
+ // Nothing to steal
508
+ return Task();
509
+ }
510
+
511
+ WindowGlobalSourceState::Task WindowGlobalSourceState::NextTask(idx_t hash_bin) {
512
+ auto &hash_groups = gsink.global_partition->hash_groups;
513
+ const auto bin_count = built.size();
514
+
515
+ // Flush unneeded data
516
+ if (hash_bin < bin_count) {
517
+ // Lock and delete when all blocks have been scanned
518
+ // We do this here instead of in NextScan so the WindowLocalSourceState
519
+ // has a chance to delete its state objects first,
520
+ // which may reference the partition_source
521
+
522
+ // Delete data outside the lock in case it is slow
523
+ HashGroupSourcePtr killed;
524
+ lock_guard<mutex> built_guard(built_lock);
525
+ auto &partition_source = built[hash_bin];
526
+ if (partition_source && !partition_source->unscanned) {
527
+ killed = std::move(partition_source);
528
+ }
529
+ }
530
+
531
+ hash_bin = next_build++;
532
+ if (hash_bin < bin_count) {
533
+ // Find a non-empty hash group.
534
+ for (; hash_bin < hash_groups.size(); hash_bin = next_build++) {
535
+ if (hash_groups[hash_bin]) {
536
+ auto result = CreateTask(hash_bin);
537
+ if (result.second) {
538
+ return result;
539
+ }
540
+ }
541
+ }
542
+
543
+ // OVER() doesn't have a hash_group
544
+ if (hash_groups.empty()) {
545
+ auto result = CreateTask(hash_bin);
546
+ if (result.second) {
547
+ return result;
548
+ }
549
+ }
550
+ }
551
+
552
+ // Work stealing
553
+ while (!context.interrupted && tasks_remaining) {
554
+ auto result = StealWork();
555
+ if (result.second) {
556
+ return result;
557
+ }
558
+
559
+ // If there is nothing to steal but there are unfinished partitions,
560
+ // yield until any pending builds are done.
561
+ TaskScheduler::GetScheduler(context).YieldThread();
562
+ }
563
+
564
+ return Task();
565
+ }
566
+
567
+ bool WindowLocalSourceState::NextPartition() {
568
+ // Release old states before the source
569
+ scanner.reset();
570
+ read_states.clear();
571
+
572
+ // Get a partition_source that is not finished
573
+ while (!scanner) {
574
+ auto task = gsource.NextTask(hash_bin);
575
+ if (!task.first) {
576
+ return false;
577
+ }
578
+ partition_source = task.first;
579
+ scanner = std::move(task.second);
580
+ hash_bin = partition_source->hash_bin;
581
+ }
582
+
583
+ for (auto &wexec : partition_source->executors) {
584
+ read_states.emplace_back(wexec->GetExecutorState());
585
+ }
586
+
587
+ return true;
1316
588
  }
1317
589
 
1318
590
  void WindowLocalSourceState::Scan(DataChunk &result) {
1319
591
  D_ASSERT(scanner);
1320
592
  if (!scanner->Remaining()) {
1321
- return;
593
+ lock_guard<mutex> built_guard(gsource.built_lock);
594
+ --partition_source->unscanned;
595
+ scanner = partition_source->GetScanner();
596
+
597
+ if (!scanner) {
598
+ partition_source = nullptr;
599
+ read_states.clear();
600
+ return;
601
+ }
1322
602
  }
1323
603
 
1324
604
  const auto position = scanner->Scanned();
1325
605
  input_chunk.Reset();
1326
606
  scanner->Scan(input_chunk);
1327
607
 
608
+ auto &executors = partition_source->executors;
1328
609
  output_chunk.Reset();
1329
- for (idx_t expr_idx = 0; expr_idx < window_execs.size(); ++expr_idx) {
1330
- auto &executor = *window_execs[expr_idx];
1331
- executor.Evaluate(position, input_chunk, output_chunk.data[expr_idx], partition_mask, order_mask);
610
+ for (idx_t expr_idx = 0; expr_idx < executors.size(); ++expr_idx) {
611
+ auto &executor = *executors[expr_idx];
612
+ auto &lstate = *read_states[expr_idx];
613
+ auto &result = output_chunk.data[expr_idx];
614
+ executor.Evaluate(position, input_chunk, result, lstate);
1332
615
  }
1333
616
  output_chunk.SetCardinality(input_chunk);
1334
617
  output_chunk.Verify();
@@ -1345,43 +628,25 @@ void WindowLocalSourceState::Scan(DataChunk &result) {
1345
628
  }
1346
629
 
1347
630
  unique_ptr<LocalSourceState> PhysicalWindow::GetLocalSourceState(ExecutionContext &context,
1348
- GlobalSourceState &gstate_p) const {
1349
- auto &gstate = gstate_p.Cast<WindowGlobalSourceState>();
1350
- return make_uniq<WindowLocalSourceState>(*this, context, gstate);
631
+ GlobalSourceState &gsource_p) const {
632
+ auto &gsource = gsource_p.Cast<WindowGlobalSourceState>();
633
+ return make_uniq<WindowLocalSourceState>(gsource);
1351
634
  }
1352
635
 
1353
636
  unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext &context) const {
1354
637
  auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
1355
- return make_uniq<WindowGlobalSourceState>(gsink);
638
+ return make_uniq<WindowGlobalSourceState>(context, gsink);
1356
639
  }
1357
640
 
1358
641
  SourceResultType PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk,
1359
642
  OperatorSourceInput &input) const {
1360
643
  auto &lsource = input.local_state.Cast<WindowLocalSourceState>();
1361
- auto &gsource = input.global_state.Cast<WindowGlobalSourceState>();
1362
- auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
1363
-
1364
- auto &hash_groups = gsink.global_partition->hash_groups;
1365
- const auto bin_count = hash_groups.empty() ? 1 : hash_groups.size();
1366
-
1367
644
  while (chunk.size() == 0) {
1368
645
  // Move to the next bin if we are done.
1369
- while (!lsource.scanner || !lsource.scanner->Remaining()) {
1370
- lsource.scanner.reset();
1371
- lsource.rows.reset();
1372
- lsource.heap.reset();
1373
- lsource.hash_group.reset();
1374
- auto hash_bin = gsource.next_bin++;
1375
- if (hash_bin >= bin_count) {
646
+ while (!lsource.scanner) {
647
+ if (!lsource.NextPartition()) {
1376
648
  return chunk.size() > 0 ? SourceResultType::HAVE_MORE_OUTPUT : SourceResultType::FINISHED;
1377
649
  }
1378
-
1379
- for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
1380
- if (hash_groups[hash_bin]) {
1381
- break;
1382
- }
1383
- }
1384
- lsource.GeneratePartition(gsink, hash_bin);
1385
650
  }
1386
651
 
1387
652
  lsource.Scan(chunk);