duckdb 0.8.2-dev37.0 → 0.8.2-dev3989.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1087) hide show
  1. package/README.md +7 -0
  2. package/binding.gyp +29 -13
  3. package/binding.gyp.in +1 -1
  4. package/configure.py +11 -3
  5. package/duckdb_extension_config.cmake +10 -0
  6. package/package.json +1 -1
  7. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  8. package/src/duckdb/extension/icu/icu-datefunc.cpp +10 -1
  9. package/src/duckdb/extension/icu/icu-datepart.cpp +162 -41
  10. package/src/duckdb/extension/icu/icu-datesub.cpp +3 -2
  11. package/src/duckdb/extension/icu/icu-datetrunc.cpp +2 -1
  12. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-makedate.cpp +19 -6
  14. package/src/duckdb/extension/icu/icu-strptime.cpp +5 -24
  15. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  16. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  17. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  18. package/src/duckdb/extension/icu/icu_extension.cpp +10 -12
  19. package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -0
  20. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -19
  21. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  22. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  23. package/src/duckdb/extension/json/include/json_enums.hpp +60 -0
  24. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  25. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  26. package/src/duckdb/extension/json/include/json_scan.hpp +14 -10
  27. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  28. package/src/duckdb/extension/json/include/json_transform.hpp +3 -0
  29. package/src/duckdb/extension/json/json_common.cpp +272 -40
  30. package/src/duckdb/extension/json/json_deserializer.cpp +37 -73
  31. package/src/duckdb/extension/json/json_enums.cpp +105 -0
  32. package/src/duckdb/extension/json/json_functions/json_create.cpp +21 -2
  33. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  34. package/src/duckdb/extension/json/json_functions/json_transform.cpp +93 -38
  35. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  36. package/src/duckdb/extension/json/json_functions.cpp +26 -25
  37. package/src/duckdb/extension/json/json_scan.cpp +47 -6
  38. package/src/duckdb/extension/json/json_serializer.cpp +29 -72
  39. package/src/duckdb/extension/json/serialize_json.cpp +92 -0
  40. package/src/duckdb/extension/parquet/column_reader.cpp +37 -25
  41. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  42. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  43. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  44. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  45. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  46. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  47. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  48. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  49. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +4 -0
  50. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  51. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  52. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +1 -0
  53. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +28 -5
  54. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  55. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  56. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  57. package/src/duckdb/extension/parquet/parquet_extension.cpp +258 -40
  58. package/src/duckdb/extension/parquet/parquet_reader.cpp +10 -10
  59. package/src/duckdb/extension/parquet/parquet_statistics.cpp +25 -8
  60. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +6 -0
  61. package/src/duckdb/extension/parquet/parquet_writer.cpp +149 -31
  62. package/src/duckdb/extension/parquet/serialize_parquet.cpp +26 -0
  63. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  64. package/src/duckdb/src/catalog/catalog.cpp +147 -70
  65. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +8 -11
  66. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +17 -41
  67. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +2 -10
  68. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +4 -14
  69. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +11 -28
  70. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +11 -42
  71. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +7 -26
  72. package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +11 -27
  73. package/src/duckdb/src/catalog/catalog_entry.cpp +25 -1
  74. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  75. package/src/duckdb/src/catalog/catalog_set.cpp +0 -63
  76. package/src/duckdb/src/catalog/default/default_functions.cpp +21 -0
  77. package/src/duckdb/src/catalog/dependency_manager.cpp +0 -36
  78. package/src/duckdb/src/common/adbc/adbc.cpp +541 -171
  79. package/src/duckdb/src/common/adbc/driver_manager.cpp +92 -39
  80. package/src/duckdb/src/common/adbc/nanoarrow/allocator.cpp +57 -0
  81. package/src/duckdb/src/common/adbc/nanoarrow/metadata.cpp +121 -0
  82. package/src/duckdb/src/common/adbc/nanoarrow/schema.cpp +474 -0
  83. package/src/duckdb/src/common/adbc/nanoarrow/single_batch_array_stream.cpp +84 -0
  84. package/src/duckdb/src/common/allocator.cpp +14 -2
  85. package/src/duckdb/src/common/arrow/appender/bool_data.cpp +44 -0
  86. package/src/duckdb/src/common/arrow/appender/list_data.cpp +78 -0
  87. package/src/duckdb/src/common/arrow/appender/map_data.cpp +86 -0
  88. package/src/duckdb/src/common/arrow/appender/struct_data.cpp +45 -0
  89. package/src/duckdb/src/common/arrow/appender/union_data.cpp +70 -0
  90. package/src/duckdb/src/common/arrow/arrow_appender.cpp +95 -666
  91. package/src/duckdb/src/common/arrow/arrow_converter.cpp +65 -37
  92. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +37 -42
  93. package/src/duckdb/src/common/assert.cpp +3 -0
  94. package/src/duckdb/src/common/constants.cpp +2 -1
  95. package/src/duckdb/src/common/enum_util.cpp +4979 -4458
  96. package/src/duckdb/src/common/enums/date_part_specifier.cpp +2 -0
  97. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  98. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  99. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  100. package/src/duckdb/src/common/exception.cpp +15 -2
  101. package/src/duckdb/src/common/extra_type_info.cpp +487 -0
  102. package/src/duckdb/src/common/field_writer.cpp +1 -1
  103. package/src/duckdb/src/common/file_buffer.cpp +1 -1
  104. package/src/duckdb/src/common/file_system.cpp +46 -12
  105. package/src/duckdb/src/common/filename_pattern.cpp +1 -1
  106. package/src/duckdb/src/common/gzip_file_system.cpp +7 -12
  107. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  108. package/src/duckdb/src/common/http_state.cpp +78 -0
  109. package/src/duckdb/src/common/local_file_system.cpp +36 -28
  110. package/src/duckdb/src/common/multi_file_reader.cpp +193 -20
  111. package/src/duckdb/src/common/operator/cast_operators.cpp +92 -1
  112. package/src/duckdb/src/common/operator/string_cast.cpp +45 -8
  113. package/src/duckdb/src/common/radix_partitioning.cpp +34 -39
  114. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  115. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  116. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  117. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +63 -73
  118. package/src/duckdb/src/common/serializer/binary_serializer.cpp +85 -80
  119. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +0 -9
  120. package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
  121. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  122. package/src/duckdb/src/common/sort/partition_state.cpp +102 -74
  123. package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
  124. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  125. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  126. package/src/duckdb/src/common/types/bit.cpp +51 -0
  127. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  128. package/src/duckdb/src/common/types/column/column_data_collection.cpp +68 -2
  129. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +20 -6
  130. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  131. package/src/duckdb/src/common/types/data_chunk.cpp +46 -10
  132. package/src/duckdb/src/common/types/date.cpp +15 -0
  133. package/src/duckdb/src/common/types/hugeint.cpp +40 -0
  134. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  135. package/src/duckdb/src/common/types/interval.cpp +6 -0
  136. package/src/duckdb/src/common/types/list_segment.cpp +56 -198
  137. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +251 -131
  138. package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
  139. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  140. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  141. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +41 -26
  142. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  143. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  144. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  145. package/src/duckdb/src/common/types/string_heap.cpp +4 -0
  146. package/src/duckdb/src/common/types/time.cpp +105 -0
  147. package/src/duckdb/src/common/types/timestamp.cpp +7 -0
  148. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  149. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  150. package/src/duckdb/src/common/types/value.cpp +99 -60
  151. package/src/duckdb/src/common/types/vector.cpp +73 -80
  152. package/src/duckdb/src/common/types.cpp +38 -724
  153. package/src/duckdb/src/common/virtual_file_system.cpp +142 -1
  154. package/src/duckdb/src/core_functions/aggregate/holistic/approximate_quantile.cpp +26 -0
  155. package/src/duckdb/src/core_functions/aggregate/holistic/mode.cpp +5 -7
  156. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +64 -19
  157. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +30 -0
  158. package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +1 -0
  159. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +83 -59
  160. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  161. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  162. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  163. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  164. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  165. package/src/duckdb/src/core_functions/function_list.cpp +10 -4
  166. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +2 -0
  167. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +380 -89
  168. package/src/duckdb/src/core_functions/scalar/date/date_sub.cpp +2 -0
  169. package/src/duckdb/src/core_functions/scalar/date/date_trunc.cpp +4 -0
  170. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  171. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  172. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +10 -0
  173. package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
  174. package/src/duckdb/src/core_functions/scalar/enum/enum_functions.cpp +16 -12
  175. package/src/duckdb/src/core_functions/scalar/generic/current_setting.cpp +3 -1
  176. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
  177. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  178. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +23 -3
  179. package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +2 -2
  180. package/src/duckdb/src/core_functions/scalar/string/to_base.cpp +66 -0
  181. package/src/duckdb/src/core_functions/scalar/union/union_tag.cpp +1 -1
  182. package/src/duckdb/src/execution/aggregate_hashtable.cpp +226 -346
  183. package/src/duckdb/src/execution/column_binding_resolver.cpp +10 -7
  184. package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
  185. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  186. package/src/duckdb/src/execution/index/art/art.cpp +219 -259
  187. package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
  188. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +11 -15
  189. package/src/duckdb/src/execution/index/art/iterator.cpp +130 -214
  190. package/src/duckdb/src/execution/index/art/leaf.cpp +300 -266
  191. package/src/duckdb/src/execution/index/art/node.cpp +211 -205
  192. package/src/duckdb/src/execution/index/art/node16.cpp +10 -19
  193. package/src/duckdb/src/execution/index/art/node256.cpp +10 -18
  194. package/src/duckdb/src/execution/index/art/node4.cpp +21 -23
  195. package/src/duckdb/src/execution/index/art/node48.cpp +10 -20
  196. package/src/duckdb/src/execution/index/art/prefix.cpp +308 -338
  197. package/src/duckdb/src/execution/join_hashtable.cpp +9 -10
  198. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  199. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +250 -317
  200. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +6 -4
  201. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  202. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +231 -190
  203. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +367 -1068
  204. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +157 -174
  205. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  206. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  207. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  208. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  209. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +67 -28
  210. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  211. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  212. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +46 -47
  213. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  214. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  215. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  216. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  217. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  218. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  219. package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
  220. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +12 -9
  221. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +2 -2
  222. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +10 -8
  223. package/src/duckdb/src/execution/operator/helper/physical_load.cpp +2 -1
  224. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +7 -5
  225. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +3 -1
  226. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +3 -1
  227. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +7 -5
  228. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +449 -288
  229. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  230. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
  231. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +13 -6
  232. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +28 -15
  233. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +35 -17
  234. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  235. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +7 -4
  236. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +31 -10
  237. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  238. package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -5
  239. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +7 -5
  240. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  241. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +14 -10
  242. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +11 -9
  243. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +9 -7
  244. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +14 -12
  245. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +11 -11
  246. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -2
  247. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  248. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +24 -27
  249. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  250. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -12
  251. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +2 -1
  252. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +198 -0
  253. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -6
  254. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  255. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +16 -7
  256. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +37 -6
  257. package/src/duckdb/src/execution/physical_operator.cpp +20 -16
  258. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  259. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +57 -35
  260. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +32 -15
  261. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +45 -34
  262. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  263. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +2 -5
  264. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  265. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  266. package/src/duckdb/src/execution/physical_plan_generator.cpp +6 -11
  267. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +636 -349
  268. package/src/duckdb/src/execution/window_executor.cpp +1285 -0
  269. package/src/duckdb/src/execution/window_segment_tree.cpp +408 -144
  270. package/src/duckdb/src/function/aggregate/distributive/count.cpp +2 -13
  271. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +6 -12
  272. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  273. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  274. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  275. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  276. package/src/duckdb/src/function/cast/string_cast.cpp +2 -2
  277. package/src/duckdb/src/function/cast/time_casts.cpp +7 -6
  278. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +7 -2
  279. package/src/duckdb/src/function/function.cpp +3 -1
  280. package/src/duckdb/src/function/pragma/pragma_queries.cpp +7 -1
  281. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  282. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  283. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  284. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  285. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  286. package/src/duckdb/src/function/scalar/operators/add.cpp +9 -0
  287. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +6 -3
  288. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  289. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  290. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +39 -5
  291. package/src/duckdb/src/function/scalar_function.cpp +5 -20
  292. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
  293. package/src/duckdb/src/function/table/arrow.cpp +110 -88
  294. package/src/duckdb/src/function/table/arrow_conversion.cpp +86 -73
  295. package/src/duckdb/src/function/table/copy_csv.cpp +102 -97
  296. package/src/duckdb/src/function/table/read_csv.cpp +263 -141
  297. package/src/duckdb/src/function/table/system/test_all_types.cpp +48 -21
  298. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  299. package/src/duckdb/src/function/table/table_scan.cpp +42 -0
  300. package/src/duckdb/src/function/table/version/pragma_version.cpp +49 -2
  301. package/src/duckdb/src/function/table_function.cpp +4 -3
  302. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +20 -5
  303. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +3 -3
  304. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +1 -4
  305. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -5
  306. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/sequence_catalog_entry.hpp +1 -6
  307. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +2 -13
  308. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +1 -4
  309. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/view_catalog_entry.hpp +2 -5
  310. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +14 -0
  311. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +0 -6
  312. package/src/duckdb/src/include/duckdb/common/adbc/adbc.h +1 -0
  313. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +4 -1
  314. package/src/duckdb/src/include/duckdb/common/adbc/single_batch_array_stream.hpp +16 -0
  315. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  316. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +109 -0
  317. package/src/duckdb/src/include/duckdb/common/arrow/appender/bool_data.hpp +15 -0
  318. package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +69 -0
  319. package/src/duckdb/src/include/duckdb/common/arrow/appender/list.hpp +8 -0
  320. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +18 -0
  321. package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +18 -0
  322. package/src/duckdb/src/include/duckdb/common/arrow/appender/scalar_data.hpp +88 -0
  323. package/src/duckdb/src/include/duckdb/common/arrow/appender/struct_data.hpp +18 -0
  324. package/src/duckdb/src/include/duckdb/common/arrow/appender/union_data.hpp +21 -0
  325. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +105 -0
  326. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +9 -4
  327. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +3 -5
  328. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +5 -3
  329. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.h +462 -0
  330. package/src/duckdb/src/include/duckdb/common/arrow/nanoarrow/nanoarrow.hpp +14 -0
  331. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +4 -0
  332. package/src/duckdb/src/include/duckdb/common/assert.hpp +1 -1
  333. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +70 -55
  334. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  335. package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
  336. package/src/duckdb/src/include/duckdb/common/constants.hpp +4 -0
  337. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  338. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +681 -577
  339. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  340. package/src/duckdb/src/include/duckdb/common/enums/date_part_specifier.hpp +9 -1
  341. package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +4 -3
  342. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  343. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  344. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +5 -1
  345. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  346. package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
  347. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  348. package/src/duckdb/src/include/duckdb/common/exception.hpp +15 -1
  349. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  350. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +215 -0
  351. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +0 -4
  352. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  353. package/src/duckdb/src/include/duckdb/common/file_system.hpp +10 -8
  354. package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +1 -1
  355. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  356. package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
  357. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  358. package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
  359. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +15 -0
  360. package/src/duckdb/src/include/duckdb/common/index_vector.hpp +12 -0
  361. package/src/duckdb/src/include/duckdb/common/limits.hpp +52 -149
  362. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +11 -5
  363. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +12 -42
  364. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  365. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +48 -0
  366. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +6 -2
  367. package/src/duckdb/src/include/duckdb/common/operator/add.hpp +5 -2
  368. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +65 -4
  369. package/src/duckdb/src/include/duckdb/common/operator/multiply.hpp +3 -2
  370. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  371. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
  372. package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +3 -2
  373. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  374. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  375. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  376. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  377. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  378. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  379. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +71 -30
  380. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +48 -39
  381. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +0 -4
  382. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +128 -0
  383. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  384. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +186 -133
  385. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +166 -121
  386. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +27 -4
  387. package/src/duckdb/src/include/duckdb/common/serializer.hpp +0 -7
  388. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  389. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +34 -13
  390. package/src/duckdb/src/include/duckdb/common/stack_checker.hpp +34 -0
  391. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  392. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  393. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
  394. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  395. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  396. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +11 -1
  397. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -1
  398. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +3 -1
  399. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  400. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +6 -3
  401. package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -5
  402. package/src/duckdb/src/include/duckdb/common/types/datetime.hpp +46 -3
  403. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  404. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  405. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +11 -15
  406. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +46 -11
  407. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +10 -1
  408. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  409. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  410. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +7 -3
  411. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  412. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  413. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +6 -2
  414. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
  415. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  416. package/src/duckdb/src/include/duckdb/common/types/time.hpp +5 -0
  417. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +16 -10
  418. package/src/duckdb/src/include/duckdb/common/types/value.hpp +7 -2
  419. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +10 -3
  420. package/src/duckdb/src/include/duckdb/common/types.hpp +6 -25
  421. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  422. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +7 -2
  423. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +40 -97
  424. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  425. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  426. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  427. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +4 -2
  428. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  429. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  430. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  431. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  432. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  433. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  434. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  435. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +40 -11
  436. package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
  437. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  438. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  439. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +7 -5
  440. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  441. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +6 -4
  442. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +4 -2
  443. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  444. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +12 -1
  445. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  446. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  447. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +128 -131
  448. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  449. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +13 -12
  450. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
  451. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
  452. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +32 -28
  453. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +46 -51
  454. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +134 -53
  455. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +5 -7
  456. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +5 -7
  457. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +7 -9
  458. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +5 -7
  459. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  460. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +8 -7
  461. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  462. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -3
  463. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +6 -5
  464. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
  465. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
  466. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +1 -1
  467. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -1
  468. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -2
  469. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +5 -12
  470. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
  471. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +2 -2
  472. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -2
  473. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +3 -3
  474. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -2
  475. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +3 -3
  476. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  477. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -2
  478. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -2
  479. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  480. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
  481. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  482. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -2
  483. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +2 -2
  484. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -2
  485. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +1 -1
  486. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +19 -19
  487. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  488. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  489. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  490. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  491. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +5 -4
  492. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +61 -28
  493. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  494. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  495. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  496. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +22 -28
  497. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  498. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  499. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -5
  500. package/src/duckdb/src/include/duckdb/execution/operator/schema/{physical_create_index.hpp → physical_create_art_index.hpp} +14 -7
  501. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  502. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  503. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +4 -2
  504. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +6 -5
  505. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +11 -0
  506. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +6 -2
  507. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +19 -21
  508. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
  509. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +79 -63
  510. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +12 -4
  511. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  512. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  513. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +6 -1
  514. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +80 -0
  515. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +3 -0
  516. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  517. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  518. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  519. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +12 -4
  520. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  521. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +8 -3
  522. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
  523. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
  524. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
  525. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +24 -12
  526. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +5 -1
  527. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
  528. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
  529. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +2 -1
  530. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -1
  531. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +4 -3
  532. package/src/duckdb/src/include/duckdb/main/chunk_scan_state/query_result.hpp +29 -0
  533. package/src/duckdb/src/include/duckdb/main/chunk_scan_state.hpp +43 -0
  534. package/src/duckdb/src/include/duckdb/main/client_config.hpp +7 -2
  535. package/src/duckdb/src/include/duckdb/main/client_context.hpp +16 -14
  536. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  537. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  538. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +25 -0
  539. package/src/duckdb/src/include/duckdb/main/config.hpp +16 -1
  540. package/src/duckdb/src/include/duckdb/main/connection.hpp +3 -4
  541. package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +27 -0
  542. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +210 -144
  543. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +41 -6
  544. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +4 -0
  545. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
  546. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
  547. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
  548. package/src/duckdb/src/include/duckdb/main/query_result.hpp +2 -27
  549. package/src/duckdb/src/include/duckdb/main/relation/aggregate_relation.hpp +4 -1
  550. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  551. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  552. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  553. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  554. package/src/duckdb/src/include/duckdb/main/settings.hpp +71 -11
  555. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  556. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  557. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  558. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +7 -0
  559. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +38 -64
  560. package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
  561. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  562. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
  563. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +8 -22
  564. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -12
  565. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
  566. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +19 -30
  567. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
  568. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
  569. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
  570. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  571. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  572. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  573. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  574. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  575. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  576. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -3
  577. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -2
  578. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +9 -1
  579. package/src/duckdb/src/include/duckdb/parser/column_definition.hpp +6 -5
  580. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +4 -0
  581. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  582. package/src/duckdb/src/include/duckdb/parser/constraint.hpp +5 -0
  583. package/src/duckdb/src/include/duckdb/parser/constraints/check_constraint.hpp +3 -0
  584. package/src/duckdb/src/include/duckdb/parser/constraints/foreign_key_constraint.hpp +6 -0
  585. package/src/duckdb/src/include/duckdb/parser/constraints/not_null_constraint.hpp +3 -0
  586. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +6 -0
  587. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +4 -1
  588. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +1 -1
  589. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +4 -1
  590. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +4 -1
  591. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +4 -1
  592. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +4 -1
  593. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
  594. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +4 -1
  595. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  596. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +4 -1
  597. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +4 -1
  598. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +21 -4
  599. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +18 -2
  600. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +4 -1
  601. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +1 -1
  602. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +1 -1
  603. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -1
  604. package/src/duckdb/src/include/duckdb/parser/group_by_node.hpp +11 -0
  605. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +12 -1
  606. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +66 -2
  607. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +8 -1
  608. package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +8 -1
  609. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +9 -1
  610. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +9 -2
  611. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +3 -0
  612. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_schema_info.hpp +3 -0
  613. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +3 -0
  614. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_info.hpp +3 -0
  615. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +3 -0
  616. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +3 -0
  617. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +7 -0
  618. package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +7 -0
  619. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +7 -0
  620. package/src/duckdb/src/include/duckdb/parser/parsed_data/load_info.hpp +17 -3
  621. package/src/duckdb/src/include/duckdb/parser/parsed_data/parse_info.hpp +22 -0
  622. package/src/duckdb/src/include/duckdb/parser/parsed_data/pragma_info.hpp +10 -0
  623. package/src/duckdb/src/include/duckdb/parser/parsed_data/show_select_info.hpp +7 -0
  624. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +10 -0
  625. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -0
  626. package/src/duckdb/src/include/duckdb/parser/parser.hpp +4 -0
  627. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  628. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  629. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  630. package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
  631. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  632. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  633. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  634. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  635. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +23 -26
  636. package/src/duckdb/src/include/duckdb/planner/binder.hpp +16 -5
  637. package/src/duckdb/src/include/duckdb/planner/bound_constraint.hpp +0 -8
  638. package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
  639. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +6 -0
  640. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  641. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +9 -0
  642. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  643. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  644. package/src/duckdb/src/include/duckdb/planner/expression/bound_between_expression.hpp +6 -0
  645. package/src/duckdb/src/include/duckdb/planner/expression/bound_case_expression.hpp +6 -0
  646. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +6 -0
  647. package/src/duckdb/src/include/duckdb/planner/expression/bound_columnref_expression.hpp +3 -0
  648. package/src/duckdb/src/include/duckdb/planner/expression/bound_comparison_expression.hpp +3 -0
  649. package/src/duckdb/src/include/duckdb/planner/expression/bound_conjunction_expression.hpp +3 -0
  650. package/src/duckdb/src/include/duckdb/planner/expression/bound_constant_expression.hpp +3 -0
  651. package/src/duckdb/src/include/duckdb/planner/expression/bound_default_expression.hpp +3 -0
  652. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +4 -0
  653. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambda_expression.hpp +3 -1
  654. package/src/duckdb/src/include/duckdb/planner/expression/bound_lambdaref_expression.hpp +3 -0
  655. package/src/duckdb/src/include/duckdb/planner/expression/bound_operator_expression.hpp +3 -0
  656. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +24 -6
  657. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +9 -2
  658. package/src/duckdb/src/include/duckdb/planner/expression/bound_reference_expression.hpp +3 -0
  659. package/src/duckdb/src/include/duckdb/planner/expression/bound_unnest_expression.hpp +3 -0
  660. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +3 -0
  661. package/src/duckdb/src/include/duckdb/planner/expression/list.hpp +1 -0
  662. package/src/duckdb/src/include/duckdb/planner/expression.hpp +3 -0
  663. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  664. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +13 -1
  665. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +4 -0
  666. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +2 -0
  667. package/src/duckdb/src/include/duckdb/planner/filter/null_filter.hpp +4 -0
  668. package/src/duckdb/src/include/duckdb/planner/joinside.hpp +3 -0
  669. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +3 -2
  670. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -2
  671. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +3 -3
  672. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +3 -0
  673. package/src/duckdb/src/include/duckdb/planner/operator/logical_any_join.hpp +3 -0
  674. package/src/duckdb/src/include/duckdb/planner/operator/logical_column_data_get.hpp +4 -0
  675. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +12 -7
  676. package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +2 -0
  677. package/src/duckdb/src/include/duckdb/planner/operator/logical_create.hpp +9 -6
  678. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +12 -23
  679. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_table.hpp +10 -6
  680. package/src/duckdb/src/include/duckdb/planner/operator/logical_cross_product.hpp +3 -0
  681. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +9 -2
  682. package/src/duckdb/src/include/duckdb/planner/operator/logical_delete.hpp +7 -0
  683. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_get.hpp +3 -0
  684. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  685. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +6 -10
  686. package/src/duckdb/src/include/duckdb/planner/operator/logical_dummy_scan.hpp +2 -0
  687. package/src/duckdb/src/include/duckdb/planner/operator/logical_empty_result.hpp +2 -0
  688. package/src/duckdb/src/include/duckdb/planner/operator/logical_explain.hpp +4 -0
  689. package/src/duckdb/src/include/duckdb/planner/operator/logical_expression_get.hpp +3 -0
  690. package/src/duckdb/src/include/duckdb/planner/operator/logical_extension_operator.hpp +8 -0
  691. package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +3 -0
  692. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +11 -1
  693. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +6 -0
  694. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit.hpp +3 -0
  695. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +3 -0
  696. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +52 -0
  697. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +7 -35
  698. package/src/duckdb/src/include/duckdb/planner/operator/logical_pivot.hpp +6 -0
  699. package/src/duckdb/src/include/duckdb/planner/operator/logical_positional_join.hpp +3 -0
  700. package/src/duckdb/src/include/duckdb/planner/operator/logical_projection.hpp +3 -0
  701. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +10 -7
  702. package/src/duckdb/src/include/duckdb/planner/operator/logical_reset.hpp +4 -0
  703. package/src/duckdb/src/include/duckdb/planner/operator/logical_sample.hpp +6 -0
  704. package/src/duckdb/src/include/duckdb/planner/operator/logical_set.hpp +4 -0
  705. package/src/duckdb/src/include/duckdb/planner/operator/logical_set_operation.hpp +4 -0
  706. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +3 -0
  707. package/src/duckdb/src/include/duckdb/planner/operator/logical_simple.hpp +3 -0
  708. package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +4 -0
  709. package/src/duckdb/src/include/duckdb/planner/operator/logical_unnest.hpp +2 -0
  710. package/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +6 -0
  711. package/src/duckdb/src/include/duckdb/planner/operator/logical_window.hpp +3 -0
  712. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +1 -0
  713. package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
  714. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  715. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  716. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  717. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  718. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  719. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  720. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +7 -1
  721. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  722. package/src/duckdb/src/include/duckdb/planner/tableref/bound_pivotref.hpp +3 -0
  723. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -1
  724. package/src/duckdb/src/include/duckdb/storage/block.hpp +33 -4
  725. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +11 -11
  726. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  727. package/src/duckdb/src/include/duckdb/storage/checkpoint/row_group_writer.hpp +5 -5
  728. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_reader.hpp +2 -2
  729. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -3
  730. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +19 -16
  731. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +1 -1
  732. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +2 -2
  733. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +2 -2
  734. package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -2
  735. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +88 -0
  736. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +54 -0
  737. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_writer.hpp +45 -0
  738. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  739. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -2
  740. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +8 -5
  741. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  742. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  743. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  744. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  745. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  746. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +7 -0
  747. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  748. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +2 -2
  749. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -2
  750. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +18 -3
  751. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +2 -2
  752. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +8 -3
  753. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +3 -3
  754. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -2
  755. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +1 -1
  756. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  757. package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
  758. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +3 -4
  759. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  760. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  761. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
  762. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  763. package/src/duckdb/src/include/duckdb.h +98 -1
  764. package/src/duckdb/src/main/appender.cpp +3 -1
  765. package/src/duckdb/src/main/attached_database.cpp +2 -2
  766. package/src/duckdb/src/main/capi/arrow-c.cpp +196 -8
  767. package/src/duckdb/src/main/capi/duckdb-c.cpp +16 -0
  768. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  769. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  770. package/src/duckdb/src/main/capi/pending-c.cpp +23 -0
  771. package/src/duckdb/src/main/capi/prepared-c.cpp +106 -28
  772. package/src/duckdb/src/main/capi/result-c.cpp +3 -1
  773. package/src/duckdb/src/main/chunk_scan_state/query_result.cpp +53 -0
  774. package/src/duckdb/src/main/chunk_scan_state.cpp +48 -0
  775. package/src/duckdb/src/main/client_context.cpp +42 -19
  776. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  777. package/src/duckdb/src/main/client_verify.cpp +18 -0
  778. package/src/duckdb/src/main/config.cpp +9 -3
  779. package/src/duckdb/src/main/connection.cpp +3 -3
  780. package/src/duckdb/src/main/database.cpp +3 -12
  781. package/src/duckdb/src/main/db_instance_cache.cpp +14 -6
  782. package/src/duckdb/src/main/extension/extension_helper.cpp +164 -88
  783. package/src/duckdb/src/main/extension/extension_install.cpp +76 -15
  784. package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
  785. package/src/duckdb/src/main/extension/extension_util.cpp +16 -0
  786. package/src/duckdb/src/main/pending_query_result.cpp +9 -1
  787. package/src/duckdb/src/main/prepared_statement.cpp +38 -11
  788. package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
  789. package/src/duckdb/src/main/query_result.cpp +0 -21
  790. package/src/duckdb/src/main/relation/aggregate_relation.cpp +20 -10
  791. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  792. package/src/duckdb/src/main/relation/join_relation.cpp +6 -6
  793. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  794. package/src/duckdb/src/main/relation.cpp +10 -9
  795. package/src/duckdb/src/main/settings/settings.cpp +125 -33
  796. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  797. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +2 -4
  798. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  799. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  800. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  801. package/src/duckdb/src/optimizer/compressed_materialization.cpp +477 -0
  802. package/src/duckdb/src/optimizer/deliminator.cpp +180 -323
  803. package/src/duckdb/src/optimizer/filter_pushdown.cpp +23 -6
  804. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
  805. package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
  806. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  807. package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
  808. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1047
  809. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  810. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
  811. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +52 -41
  812. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
  813. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
  814. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
  815. package/src/duckdb/src/optimizer/optimizer.cpp +49 -14
  816. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  817. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  818. package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +34 -7
  819. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  820. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  821. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  822. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  823. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  824. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  825. package/src/duckdb/src/optimizer/topn_optimizer.cpp +27 -10
  826. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +3 -5
  827. package/src/duckdb/src/parallel/executor.cpp +25 -1
  828. package/src/duckdb/src/parallel/pipeline.cpp +0 -17
  829. package/src/duckdb/src/parallel/pipeline_executor.cpp +33 -13
  830. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +55 -1
  831. package/src/duckdb/src/parallel/task_scheduler.cpp +18 -2
  832. package/src/duckdb/src/parser/column_definition.cpp +20 -32
  833. package/src/duckdb/src/parser/column_list.cpp +8 -0
  834. package/src/duckdb/src/parser/constraints/foreign_key_constraint.cpp +3 -0
  835. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +3 -0
  836. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  837. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -25
  838. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  839. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  840. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  841. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  842. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  843. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  844. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  845. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  846. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  847. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  848. package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -19
  849. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  850. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  851. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  852. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  853. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +5 -2
  854. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +38 -0
  855. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +17 -1
  856. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +2 -0
  857. package/src/duckdb/src/parser/parsed_data/detach_info.cpp +1 -1
  858. package/src/duckdb/src/parser/parsed_data/drop_info.cpp +1 -1
  859. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +0 -18
  860. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +4 -1
  861. package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +1 -1
  862. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  863. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  864. package/src/duckdb/src/parser/parser.cpp +62 -36
  865. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  866. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  867. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  868. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  869. package/src/duckdb/src/parser/query_node.cpp +15 -47
  870. package/src/duckdb/src/parser/result_modifier.cpp +0 -87
  871. package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
  872. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -10
  873. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  874. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  875. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  876. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  877. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -55
  878. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  879. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  880. package/src/duckdb/src/parser/tableref.cpp +0 -44
  881. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +55 -38
  882. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
  883. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  884. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  885. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -0
  886. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  887. package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
  888. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  889. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +16 -1
  890. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  891. package/src/duckdb/src/parser/transform/statement/transform_create_index.cpp +32 -17
  892. package/src/duckdb/src/parser/transform/statement/transform_create_type.cpp +1 -1
  893. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  894. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  895. package/src/duckdb/src/parser/transform/statement/transform_load.cpp +1 -0
  896. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  897. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  898. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
  899. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  900. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  901. package/src/duckdb/src/parser/transformer.cpp +44 -25
  902. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +5 -3
  903. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
  904. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  905. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  906. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  907. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  908. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +36 -33
  909. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  910. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +14 -52
  911. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +0 -23
  912. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
  913. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +70 -29
  914. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +93 -28
  915. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  916. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  917. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  918. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +67 -31
  919. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  920. package/src/duckdb/src/planner/binder.cpp +44 -31
  921. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +24 -1
  922. package/src/duckdb/src/planner/expression/bound_between_expression.cpp +4 -0
  923. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +13 -8
  924. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +22 -0
  925. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +28 -20
  926. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +48 -4
  927. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  928. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +5 -4
  929. package/src/duckdb/src/planner/expression_binder.cpp +23 -0
  930. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  931. package/src/duckdb/src/planner/logical_operator.cpp +19 -7
  932. package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -6
  933. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +4 -2
  934. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +8 -0
  935. package/src/duckdb/src/planner/operator/logical_create.cpp +14 -0
  936. package/src/duckdb/src/planner/operator/logical_create_index.cpp +36 -7
  937. package/src/duckdb/src/planner/operator/logical_create_table.cpp +16 -0
  938. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  939. package/src/duckdb/src/planner/operator/logical_delete.cpp +9 -2
  940. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  941. package/src/duckdb/src/planner/operator/logical_distinct.cpp +13 -0
  942. package/src/duckdb/src/planner/operator/logical_explain.cpp +1 -1
  943. package/src/duckdb/src/planner/operator/logical_extension_operator.cpp +39 -0
  944. package/src/duckdb/src/planner/operator/logical_get.cpp +82 -4
  945. package/src/duckdb/src/planner/operator/logical_insert.cpp +8 -2
  946. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +22 -0
  947. package/src/duckdb/src/planner/operator/logical_order.cpp +39 -0
  948. package/src/duckdb/src/planner/operator/logical_pivot.cpp +3 -0
  949. package/src/duckdb/src/planner/operator/logical_recursive_cte.cpp +5 -5
  950. package/src/duckdb/src/planner/operator/logical_sample.cpp +3 -0
  951. package/src/duckdb/src/planner/operator/logical_update.cpp +8 -2
  952. package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +4 -2
  953. package/src/duckdb/src/planner/planner.cpp +18 -7
  954. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  955. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  956. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  957. package/src/duckdb/src/storage/arena_allocator.cpp +13 -2
  958. package/src/duckdb/src/storage/buffer/block_manager.cpp +13 -9
  959. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -1
  960. package/src/duckdb/src/storage/checkpoint/table_data_reader.cpp +3 -4
  961. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +7 -7
  962. package/src/duckdb/src/storage/checkpoint_manager.cpp +78 -72
  963. package/src/duckdb/src/storage/compression/bitpacking.cpp +87 -63
  964. package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +295 -0
  965. package/src/duckdb/src/storage/compression/fsst.cpp +1 -1
  966. package/src/duckdb/src/storage/compression/rle.cpp +52 -13
  967. package/src/duckdb/src/storage/data_table.cpp +36 -25
  968. package/src/duckdb/src/storage/index.cpp +4 -26
  969. package/src/duckdb/src/storage/local_storage.cpp +3 -4
  970. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +267 -0
  971. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +80 -0
  972. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +86 -0
  973. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +98 -0
  974. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +194 -0
  975. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +283 -0
  976. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +762 -0
  977. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +62 -0
  978. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +461 -0
  979. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +421 -0
  980. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +342 -0
  981. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  982. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +97 -0
  983. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +22 -0
  984. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  985. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +97 -0
  986. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +164 -0
  987. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  988. package/src/duckdb/src/storage/single_file_block_manager.cpp +69 -51
  989. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  990. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  991. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  992. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  993. package/src/duckdb/src/storage/statistics/string_stats.cpp +44 -2
  994. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  995. package/src/duckdb/src/storage/storage_info.cpp +3 -2
  996. package/src/duckdb/src/storage/storage_manager.cpp +11 -5
  997. package/src/duckdb/src/storage/table/chunk_info.cpp +99 -3
  998. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
  999. package/src/duckdb/src/storage/table/list_column_data.cpp +6 -3
  1000. package/src/duckdb/src/storage/table/persistent_table_data.cpp +1 -2
  1001. package/src/duckdb/src/storage/table/row_group.cpp +102 -20
  1002. package/src/duckdb/src/storage/table/row_group_collection.cpp +23 -19
  1003. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  1004. package/src/duckdb/src/storage/table/update_segment.cpp +1 -1
  1005. package/src/duckdb/src/storage/table_index_list.cpp +1 -1
  1006. package/src/duckdb/src/storage/wal_replay.cpp +26 -26
  1007. package/src/duckdb/src/storage/write_ahead_log.cpp +3 -2
  1008. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  1009. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
  1010. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  1011. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  1012. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +5 -2
  1013. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  1014. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  1015. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +11 -0
  1016. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  1017. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +13 -0
  1018. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +11019 -10364
  1019. package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +9 -0
  1020. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +10 -0
  1021. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +31 -1
  1022. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  1023. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  1024. package/src/duckdb/ub_src_common.cpp +4 -0
  1025. package/src/duckdb/ub_src_common_adbc_nanoarrow.cpp +8 -0
  1026. package/src/duckdb/ub_src_common_arrow_appender.cpp +10 -0
  1027. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  1028. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
  1029. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  1030. package/src/duckdb/ub_src_execution.cpp +2 -2
  1031. package/src/duckdb/ub_src_execution_index_art.cpp +0 -6
  1032. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  1033. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  1034. package/src/duckdb/ub_src_execution_operator_persistent.cpp +1 -11
  1035. package/src/duckdb/ub_src_execution_operator_schema.cpp +1 -1
  1036. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  1037. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  1038. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  1039. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  1040. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  1041. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
  1042. package/src/duckdb/ub_src_main.cpp +2 -0
  1043. package/src/duckdb/ub_src_main_chunk_scan_state.cpp +2 -0
  1044. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  1045. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  1046. package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
  1047. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  1048. package/src/duckdb/ub_src_parser.cpp +0 -2
  1049. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  1050. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  1051. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  1052. package/src/duckdb/ub_src_planner_operator.cpp +3 -3
  1053. package/src/duckdb/ub_src_storage.cpp +0 -4
  1054. package/src/duckdb/ub_src_storage_compression.cpp +2 -0
  1055. package/src/duckdb/ub_src_storage_metadata.cpp +6 -0
  1056. package/src/duckdb/ub_src_storage_serialization.cpp +30 -0
  1057. package/src/duckdb_node.hpp +1 -0
  1058. package/src/statement.cpp +10 -5
  1059. package/test/columns.test.ts +25 -3
  1060. package/test/extension.test.ts +1 -1
  1061. package/test/test_all_types.test.ts +234 -0
  1062. package/tsconfig.json +1 -0
  1063. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
  1064. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  1065. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
  1066. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  1067. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  1068. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  1069. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -193
  1070. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -172
  1071. package/src/duckdb/src/include/duckdb/common/arrow/arrow_options.hpp +0 -25
  1072. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
  1073. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  1074. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
  1075. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  1076. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  1077. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -69
  1078. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +0 -27
  1079. package/src/duckdb/src/include/duckdb/planner/operator/logical_delim_join.hpp +0 -32
  1080. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +0 -49
  1081. package/src/duckdb/src/include/duckdb/storage/meta_block_writer.hpp +0 -50
  1082. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
  1083. package/src/duckdb/src/parser/common_table_expression_info.cpp +0 -19
  1084. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +0 -14
  1085. package/src/duckdb/src/planner/operator/logical_delim_join.cpp +0 -27
  1086. package/src/duckdb/src/storage/meta_block_reader.cpp +0 -78
  1087. package/src/duckdb/src/storage/meta_block_writer.cpp +0 -80
@@ -14,7 +14,7 @@
14
14
  #include "duckdb/common/vector_operations/vector_operations.hpp"
15
15
  #include "duckdb/common/windows_undefs.hpp"
16
16
  #include "duckdb/execution/expression_executor.hpp"
17
- #include "duckdb/execution/partitionable_hashtable.hpp"
17
+ #include "duckdb/execution/window_executor.hpp"
18
18
  #include "duckdb/execution/window_segment_tree.hpp"
19
19
  #include "duckdb/main/client_config.hpp"
20
20
  #include "duckdb/main/config.hpp"
@@ -32,7 +32,7 @@ namespace duckdb {
32
32
  class WindowGlobalSinkState : public GlobalSinkState {
33
33
  public:
34
34
  WindowGlobalSinkState(const PhysicalWindow &op, ClientContext &context)
35
- : mode(DBConfig::GetConfig(context).options.window_mode) {
35
+ : op(op), mode(DBConfig::GetConfig(context).options.window_mode) {
36
36
 
37
37
  D_ASSERT(op.select_list[0]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
38
38
  auto &wexpr = op.select_list[0]->Cast<BoundWindowExpression>();
@@ -42,6 +42,7 @@ public:
42
42
  wexpr.partitions_stats, op.estimated_cardinality);
43
43
  }
44
44
 
45
+ const PhysicalWindow &op;
45
46
  unique_ptr<PartitionGlobalSinkState> global_partition;
46
47
  WindowAggregationMode mode;
47
48
  };
@@ -78,977 +79,40 @@ PhysicalWindow::PhysicalWindow(vector<LogicalType> types, vector<unique_ptr<Expr
78
79
  }
79
80
  }
80
81
 
81
- static idx_t FindNextStart(const ValidityMask &mask, idx_t l, const idx_t r, idx_t &n) {
82
- if (mask.AllValid()) {
83
- auto start = MinValue(l + n - 1, r);
84
- n -= MinValue(n, r - l);
85
- return start;
86
- }
87
-
88
- while (l < r) {
89
- // If l is aligned with the start of a block, and the block is blank, then skip forward one block.
90
- idx_t entry_idx;
91
- idx_t shift;
92
- mask.GetEntryIndex(l, entry_idx, shift);
93
-
94
- const auto block = mask.GetValidityEntry(entry_idx);
95
- if (mask.NoneValid(block) && !shift) {
96
- l += ValidityMask::BITS_PER_VALUE;
97
- continue;
98
- }
99
-
100
- // Loop over the block
101
- for (; shift < ValidityMask::BITS_PER_VALUE && l < r; ++shift, ++l) {
102
- if (mask.RowIsValid(block, shift) && --n == 0) {
103
- return MinValue(l, r);
104
- }
105
- }
106
- }
107
-
108
- // Didn't find a start so return the end of the range
109
- return r;
110
- }
111
-
112
- static idx_t FindPrevStart(const ValidityMask &mask, const idx_t l, idx_t r, idx_t &n) {
113
- if (mask.AllValid()) {
114
- auto start = (r <= l + n) ? l : r - n;
115
- n -= r - start;
116
- return start;
117
- }
118
-
119
- while (l < r) {
120
- // If r is aligned with the start of a block, and the previous block is blank,
121
- // then skip backwards one block.
122
- idx_t entry_idx;
123
- idx_t shift;
124
- mask.GetEntryIndex(r - 1, entry_idx, shift);
125
-
126
- const auto block = mask.GetValidityEntry(entry_idx);
127
- if (mask.NoneValid(block) && (shift + 1 == ValidityMask::BITS_PER_VALUE)) {
128
- // r is nonzero (> l) and word aligned, so this will not underflow.
129
- r -= ValidityMask::BITS_PER_VALUE;
130
- continue;
131
- }
132
-
133
- // Loop backwards over the block
134
- // shift is probing r-1 >= l >= 0
135
- for (++shift; shift-- > 0; --r) {
136
- if (mask.RowIsValid(block, shift) && --n == 0) {
137
- return MaxValue(l, r - 1);
138
- }
139
- }
140
- }
141
-
142
- // Didn't find a start so return the start of the range
143
- return l;
144
- }
145
-
146
- static void PrepareInputExpressions(vector<unique_ptr<Expression>> &exprs, ExpressionExecutor &executor,
147
- DataChunk &chunk) {
148
- if (exprs.empty()) {
149
- return;
150
- }
151
-
152
- vector<LogicalType> types;
153
- for (idx_t expr_idx = 0; expr_idx < exprs.size(); ++expr_idx) {
154
- types.push_back(exprs[expr_idx]->return_type);
155
- executor.AddExpression(*exprs[expr_idx]);
156
- }
157
-
158
- if (!types.empty()) {
159
- auto &allocator = executor.GetAllocator();
160
- chunk.Initialize(allocator, types);
161
- }
162
- }
163
-
164
- static void PrepareInputExpression(Expression &expr, ExpressionExecutor &executor, DataChunk &chunk) {
165
- vector<LogicalType> types;
166
- types.push_back(expr.return_type);
167
- executor.AddExpression(expr);
168
-
169
- auto &allocator = executor.GetAllocator();
170
- chunk.Initialize(allocator, types);
171
- }
172
-
173
- struct WindowInputExpression {
174
- WindowInputExpression(optional_ptr<Expression> expr_p, ClientContext &context)
175
- : expr(expr_p), ptype(PhysicalType::INVALID), scalar(true), executor(context) {
176
- if (expr) {
177
- PrepareInputExpression(*expr, executor, chunk);
178
- ptype = expr->return_type.InternalType();
179
- scalar = expr->IsScalar();
180
- }
181
- }
182
-
183
- void Execute(DataChunk &input_chunk) {
184
- if (expr) {
185
- chunk.Reset();
186
- executor.Execute(input_chunk, chunk);
187
- chunk.Verify();
188
- }
189
- }
190
-
191
- template <typename T>
192
- inline T GetCell(idx_t i) const {
193
- D_ASSERT(!chunk.data.empty());
194
- const auto data = FlatVector::GetData<T>(chunk.data[0]);
195
- return data[scalar ? 0 : i];
196
- }
197
-
198
- inline bool CellIsNull(idx_t i) const {
199
- D_ASSERT(!chunk.data.empty());
200
- if (chunk.data[0].GetVectorType() == VectorType::CONSTANT_VECTOR) {
201
- return ConstantVector::IsNull(chunk.data[0]);
202
- }
203
- return FlatVector::IsNull(chunk.data[0], i);
204
- }
205
-
206
- inline void CopyCell(Vector &target, idx_t target_offset) const {
207
- D_ASSERT(!chunk.data.empty());
208
- auto &source = chunk.data[0];
209
- auto source_offset = scalar ? 0 : target_offset;
210
- VectorOperations::Copy(source, target, source_offset + 1, source_offset, target_offset);
211
- }
212
-
213
- optional_ptr<Expression> expr;
214
- PhysicalType ptype;
215
- bool scalar;
216
- ExpressionExecutor executor;
217
- DataChunk chunk;
218
- };
219
-
220
- struct WindowInputColumn {
221
- WindowInputColumn(Expression *expr_p, ClientContext &context, idx_t capacity_p)
222
- : input_expr(expr_p, context), count(0), capacity(capacity_p) {
223
- if (input_expr.expr) {
224
- target = make_uniq<Vector>(input_expr.chunk.data[0].GetType(), capacity);
225
- }
226
- }
227
-
228
- void Append(DataChunk &input_chunk) {
229
- if (input_expr.expr) {
230
- const auto source_count = input_chunk.size();
231
- D_ASSERT(count + source_count <= capacity);
232
- if (!input_expr.scalar || !count) {
233
- input_expr.Execute(input_chunk);
234
- auto &source = input_expr.chunk.data[0];
235
- VectorOperations::Copy(source, *target, source_count, 0, count);
236
- }
237
- count += source_count;
238
- }
239
- }
240
-
241
- inline bool CellIsNull(idx_t i) {
242
- D_ASSERT(target);
243
- D_ASSERT(i < count);
244
- return FlatVector::IsNull(*target, input_expr.scalar ? 0 : i);
245
- }
246
-
247
- template <typename T>
248
- inline T GetCell(idx_t i) const {
249
- D_ASSERT(target);
250
- D_ASSERT(i < count);
251
- const auto data = FlatVector::GetData<T>(*target);
252
- return data[input_expr.scalar ? 0 : i];
253
- }
254
-
255
- WindowInputExpression input_expr;
256
-
257
- private:
258
- unique_ptr<Vector> target;
259
- idx_t count;
260
- idx_t capacity;
261
- };
262
-
263
- static inline bool BoundaryNeedsPeer(const WindowBoundary &boundary) {
264
- switch (boundary) {
265
- case WindowBoundary::CURRENT_ROW_RANGE:
266
- case WindowBoundary::EXPR_PRECEDING_RANGE:
267
- case WindowBoundary::EXPR_FOLLOWING_RANGE:
268
- return true;
269
- default:
270
- return false;
271
- }
272
- }
273
-
274
- struct WindowBoundariesState {
275
- static inline bool IsScalar(const unique_ptr<Expression> &expr) {
276
- return expr ? expr->IsScalar() : true;
277
- }
278
-
279
- WindowBoundariesState(BoundWindowExpression &wexpr, const idx_t input_size)
280
- : type(wexpr.type), input_size(input_size), start_boundary(wexpr.start), end_boundary(wexpr.end),
281
- partition_count(wexpr.partitions.size()), order_count(wexpr.orders.size()),
282
- range_sense(wexpr.orders.empty() ? OrderType::INVALID : wexpr.orders[0].type),
283
- has_preceding_range(wexpr.start == WindowBoundary::EXPR_PRECEDING_RANGE ||
284
- wexpr.end == WindowBoundary::EXPR_PRECEDING_RANGE),
285
- has_following_range(wexpr.start == WindowBoundary::EXPR_FOLLOWING_RANGE ||
286
- wexpr.end == WindowBoundary::EXPR_FOLLOWING_RANGE),
287
- needs_peer(BoundaryNeedsPeer(wexpr.end) || wexpr.type == ExpressionType::WINDOW_CUME_DIST) {
288
- }
289
-
290
- void Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t source_offset,
291
- WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
292
- const ValidityMask &partition_mask, const ValidityMask &order_mask);
293
-
294
- // Cached lookups
295
- const ExpressionType type;
296
- const idx_t input_size;
297
- const WindowBoundary start_boundary;
298
- const WindowBoundary end_boundary;
299
- const size_t partition_count;
300
- const size_t order_count;
301
- const OrderType range_sense;
302
- const bool has_preceding_range;
303
- const bool has_following_range;
304
- const bool needs_peer;
305
-
306
- idx_t partition_start = 0;
307
- idx_t partition_end = 0;
308
- idx_t peer_start = 0;
309
- idx_t peer_end = 0;
310
- idx_t valid_start = 0;
311
- idx_t valid_end = 0;
312
- int64_t window_start = -1;
313
- int64_t window_end = -1;
314
- bool is_same_partition = false;
315
- bool is_peer = false;
316
- };
317
-
318
- static bool WindowNeedsRank(const BoundWindowExpression &wexpr) {
319
- return wexpr.type == ExpressionType::WINDOW_PERCENT_RANK || wexpr.type == ExpressionType::WINDOW_RANK ||
320
- wexpr.type == ExpressionType::WINDOW_RANK_DENSE || wexpr.type == ExpressionType::WINDOW_CUME_DIST;
321
- }
322
-
323
- template <typename T>
324
- static T GetCell(DataChunk &chunk, idx_t column, idx_t index) {
325
- D_ASSERT(chunk.ColumnCount() > column);
326
- auto &source = chunk.data[column];
327
- const auto data = FlatVector::GetData<T>(source);
328
- return data[index];
329
- }
330
-
331
- static bool CellIsNull(DataChunk &chunk, idx_t column, idx_t index) {
332
- D_ASSERT(chunk.ColumnCount() > column);
333
- auto &source = chunk.data[column];
334
- return FlatVector::IsNull(source, index);
335
- }
336
-
337
- static void CopyCell(DataChunk &chunk, idx_t column, idx_t index, Vector &target, idx_t target_offset) {
338
- D_ASSERT(chunk.ColumnCount() > column);
339
- auto &source = chunk.data[column];
340
- VectorOperations::Copy(source, target, index + 1, index, target_offset);
341
- }
342
-
343
- template <typename T>
344
- struct WindowColumnIterator {
345
- using iterator = WindowColumnIterator<T>;
346
- using iterator_category = std::forward_iterator_tag;
347
- using difference_type = std::ptrdiff_t;
348
- using value_type = T;
349
- using reference = T;
350
- using pointer = idx_t;
351
-
352
- explicit WindowColumnIterator(WindowInputColumn &coll_p, pointer pos_p = 0) : coll(&coll_p), pos(pos_p) {
353
- }
354
-
355
- inline reference operator*() const {
356
- return coll->GetCell<T>(pos);
357
- }
358
- inline explicit operator pointer() const {
359
- return pos;
360
- }
361
-
362
- inline iterator &operator++() {
363
- ++pos;
364
- return *this;
365
- }
366
- inline iterator operator++(int) {
367
- auto result = *this;
368
- ++(*this);
369
- return result;
370
- }
371
-
372
- friend inline bool operator==(const iterator &a, const iterator &b) {
373
- return a.pos == b.pos;
374
- }
375
- friend inline bool operator!=(const iterator &a, const iterator &b) {
376
- return a.pos != b.pos;
377
- }
378
-
379
- private:
380
- optional_ptr<WindowInputColumn> coll;
381
- pointer pos;
382
- };
383
-
384
- template <typename T, typename OP>
385
- struct OperationCompare : public std::function<bool(T, T)> {
386
- inline bool operator()(const T &lhs, const T &val) const {
387
- return OP::template Operation(lhs, val);
388
- }
389
- };
390
-
391
- template <typename T, typename OP, bool FROM>
392
- static idx_t FindTypedRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
393
- WindowInputExpression &boundary, const idx_t boundary_row) {
394
- D_ASSERT(!boundary.CellIsNull(boundary_row));
395
- const auto val = boundary.GetCell<T>(boundary_row);
396
-
397
- OperationCompare<T, OP> comp;
398
- WindowColumnIterator<T> begin(over, order_begin);
399
- WindowColumnIterator<T> end(over, order_end);
400
- if (FROM) {
401
- return idx_t(std::lower_bound(begin, end, val, comp));
402
- } else {
403
- return idx_t(std::upper_bound(begin, end, val, comp));
404
- }
405
- }
406
-
407
- template <typename OP, bool FROM>
408
- static idx_t FindRangeBound(WindowInputColumn &over, const idx_t order_begin, const idx_t order_end,
409
- WindowInputExpression &boundary, const idx_t expr_idx) {
410
- D_ASSERT(boundary.chunk.ColumnCount() == 1);
411
- D_ASSERT(boundary.chunk.data[0].GetType().InternalType() == over.input_expr.ptype);
412
-
413
- switch (over.input_expr.ptype) {
414
- case PhysicalType::INT8:
415
- return FindTypedRangeBound<int8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
416
- case PhysicalType::INT16:
417
- return FindTypedRangeBound<int16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
418
- case PhysicalType::INT32:
419
- return FindTypedRangeBound<int32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
420
- case PhysicalType::INT64:
421
- return FindTypedRangeBound<int64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
422
- case PhysicalType::UINT8:
423
- return FindTypedRangeBound<uint8_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
424
- case PhysicalType::UINT16:
425
- return FindTypedRangeBound<uint16_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
426
- case PhysicalType::UINT32:
427
- return FindTypedRangeBound<uint32_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
428
- case PhysicalType::UINT64:
429
- return FindTypedRangeBound<uint64_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
430
- case PhysicalType::INT128:
431
- return FindTypedRangeBound<hugeint_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
432
- case PhysicalType::FLOAT:
433
- return FindTypedRangeBound<float, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
434
- case PhysicalType::DOUBLE:
435
- return FindTypedRangeBound<double, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
436
- case PhysicalType::INTERVAL:
437
- return FindTypedRangeBound<interval_t, OP, FROM>(over, order_begin, order_end, boundary, expr_idx);
438
- default:
439
- throw InternalException("Unsupported column type for RANGE");
440
- }
441
- }
442
-
443
- template <bool FROM>
444
- static idx_t FindOrderedRangeBound(WindowInputColumn &over, const OrderType range_sense, const idx_t order_begin,
445
- const idx_t order_end, WindowInputExpression &boundary, const idx_t expr_idx) {
446
- switch (range_sense) {
447
- case OrderType::ASCENDING:
448
- return FindRangeBound<LessThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
449
- case OrderType::DESCENDING:
450
- return FindRangeBound<GreaterThan, FROM>(over, order_begin, order_end, boundary, expr_idx);
451
- default:
452
- throw InternalException("Unsupported ORDER BY sense for RANGE");
453
- }
454
- }
455
-
456
- void WindowBoundariesState::Update(const idx_t row_idx, WindowInputColumn &range_collection, const idx_t expr_idx,
457
- WindowInputExpression &boundary_start, WindowInputExpression &boundary_end,
458
- const ValidityMask &partition_mask, const ValidityMask &order_mask) {
459
-
460
- auto &bounds = *this;
461
- if (bounds.partition_count + bounds.order_count > 0) {
462
-
463
- // determine partition and peer group boundaries to ultimately figure out window size
464
- bounds.is_same_partition = !partition_mask.RowIsValidUnsafe(row_idx);
465
- bounds.is_peer = !order_mask.RowIsValidUnsafe(row_idx);
466
-
467
- // when the partition changes, recompute the boundaries
468
- if (!bounds.is_same_partition) {
469
- bounds.partition_start = row_idx;
470
- bounds.peer_start = row_idx;
471
-
472
- // find end of partition
473
- bounds.partition_end = bounds.input_size;
474
- if (bounds.partition_count) {
475
- idx_t n = 1;
476
- bounds.partition_end = FindNextStart(partition_mask, bounds.partition_start + 1, bounds.input_size, n);
477
- }
478
-
479
- // Find valid ordering values for the new partition
480
- // so we can exclude NULLs from RANGE expression computations
481
- bounds.valid_start = bounds.partition_start;
482
- bounds.valid_end = bounds.partition_end;
483
-
484
- if ((bounds.valid_start < bounds.valid_end) && bounds.has_preceding_range) {
485
- // Exclude any leading NULLs
486
- if (range_collection.CellIsNull(bounds.valid_start)) {
487
- idx_t n = 1;
488
- bounds.valid_start = FindNextStart(order_mask, bounds.valid_start + 1, bounds.valid_end, n);
489
- }
490
- }
491
-
492
- if ((bounds.valid_start < bounds.valid_end) && bounds.has_following_range) {
493
- // Exclude any trailing NULLs
494
- if (range_collection.CellIsNull(bounds.valid_end - 1)) {
495
- idx_t n = 1;
496
- bounds.valid_end = FindPrevStart(order_mask, bounds.valid_start, bounds.valid_end, n);
497
- }
498
- }
499
-
500
- } else if (!bounds.is_peer) {
501
- bounds.peer_start = row_idx;
502
- }
503
-
504
- if (bounds.needs_peer) {
505
- bounds.peer_end = bounds.partition_end;
506
- if (bounds.order_count) {
507
- idx_t n = 1;
508
- bounds.peer_end = FindNextStart(order_mask, bounds.peer_start + 1, bounds.partition_end, n);
509
- }
510
- }
511
-
512
- } else {
513
- bounds.is_same_partition = false;
514
- bounds.is_peer = true;
515
- bounds.partition_end = bounds.input_size;
516
- bounds.peer_end = bounds.partition_end;
517
- }
518
-
519
- // determine window boundaries depending on the type of expression
520
- bounds.window_start = -1;
521
- bounds.window_end = -1;
522
-
523
- switch (bounds.start_boundary) {
524
- case WindowBoundary::UNBOUNDED_PRECEDING:
525
- bounds.window_start = bounds.partition_start;
526
- break;
527
- case WindowBoundary::CURRENT_ROW_ROWS:
528
- bounds.window_start = row_idx;
529
- break;
530
- case WindowBoundary::CURRENT_ROW_RANGE:
531
- bounds.window_start = bounds.peer_start;
532
- break;
533
- case WindowBoundary::EXPR_PRECEDING_ROWS: {
534
- if (!TrySubtractOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
535
- bounds.window_start)) {
536
- throw OutOfRangeException("Overflow computing ROWS PRECEDING start");
537
- }
538
- break;
539
- }
540
- case WindowBoundary::EXPR_FOLLOWING_ROWS: {
541
- if (!TryAddOperator::Operation(int64_t(row_idx), boundary_start.GetCell<int64_t>(expr_idx),
542
- bounds.window_start)) {
543
- throw OutOfRangeException("Overflow computing ROWS FOLLOWING start");
544
- }
545
- break;
546
- }
547
- case WindowBoundary::EXPR_PRECEDING_RANGE: {
548
- if (boundary_start.CellIsNull(expr_idx)) {
549
- bounds.window_start = bounds.peer_start;
550
- } else {
551
- bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, bounds.valid_start,
552
- row_idx, boundary_start, expr_idx);
553
- }
554
- break;
555
- }
556
- case WindowBoundary::EXPR_FOLLOWING_RANGE: {
557
- if (boundary_start.CellIsNull(expr_idx)) {
558
- bounds.window_start = bounds.peer_start;
559
- } else {
560
- bounds.window_start = FindOrderedRangeBound<true>(range_collection, bounds.range_sense, row_idx,
561
- bounds.valid_end, boundary_start, expr_idx);
562
- }
82
+ static unique_ptr<WindowExecutor> WindowExecutorFactory(BoundWindowExpression &wexpr, ClientContext &context,
83
+ const ValidityMask &partition_mask,
84
+ const ValidityMask &order_mask, const idx_t payload_count,
85
+ WindowAggregationMode mode) {
86
+ switch (wexpr.type) {
87
+ case ExpressionType::WINDOW_AGGREGATE:
88
+ return make_uniq<WindowAggregateExecutor>(wexpr, context, payload_count, partition_mask, order_mask, mode);
89
+ case ExpressionType::WINDOW_ROW_NUMBER:
90
+ return make_uniq<WindowRowNumberExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
91
+ case ExpressionType::WINDOW_RANK_DENSE:
92
+ return make_uniq<WindowDenseRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
93
+ case ExpressionType::WINDOW_RANK:
94
+ return make_uniq<WindowRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
95
+ case ExpressionType::WINDOW_PERCENT_RANK:
96
+ return make_uniq<WindowPercentRankExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
97
+ case ExpressionType::WINDOW_CUME_DIST:
98
+ return make_uniq<WindowCumeDistExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
99
+ case ExpressionType::WINDOW_NTILE:
100
+ return make_uniq<WindowNtileExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
101
+ case ExpressionType::WINDOW_LEAD:
102
+ case ExpressionType::WINDOW_LAG:
103
+ return make_uniq<WindowLeadLagExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
104
+ case ExpressionType::WINDOW_FIRST_VALUE:
105
+ return make_uniq<WindowFirstValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
106
+ case ExpressionType::WINDOW_LAST_VALUE:
107
+ return make_uniq<WindowLastValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
108
+ case ExpressionType::WINDOW_NTH_VALUE:
109
+ return make_uniq<WindowNthValueExecutor>(wexpr, context, payload_count, partition_mask, order_mask);
563
110
  break;
564
- }
565
111
  default:
566
- throw InternalException("Unsupported window start boundary");
567
- }
568
-
569
- switch (bounds.end_boundary) {
570
- case WindowBoundary::CURRENT_ROW_ROWS:
571
- bounds.window_end = row_idx + 1;
572
- break;
573
- case WindowBoundary::CURRENT_ROW_RANGE:
574
- bounds.window_end = bounds.peer_end;
575
- break;
576
- case WindowBoundary::UNBOUNDED_FOLLOWING:
577
- bounds.window_end = bounds.partition_end;
578
- break;
579
- case WindowBoundary::EXPR_PRECEDING_ROWS:
580
- if (!TrySubtractOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
581
- bounds.window_end)) {
582
- throw OutOfRangeException("Overflow computing ROWS PRECEDING end");
583
- }
584
- break;
585
- case WindowBoundary::EXPR_FOLLOWING_ROWS:
586
- if (!TryAddOperator::Operation(int64_t(row_idx + 1), boundary_end.GetCell<int64_t>(expr_idx),
587
- bounds.window_end)) {
588
- throw OutOfRangeException("Overflow computing ROWS FOLLOWING end");
589
- }
590
- break;
591
- case WindowBoundary::EXPR_PRECEDING_RANGE: {
592
- if (boundary_end.CellIsNull(expr_idx)) {
593
- bounds.window_end = bounds.peer_end;
594
- } else {
595
- bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, bounds.valid_start,
596
- row_idx, boundary_end, expr_idx);
597
- }
598
- break;
599
- }
600
- case WindowBoundary::EXPR_FOLLOWING_RANGE: {
601
- if (boundary_end.CellIsNull(expr_idx)) {
602
- bounds.window_end = bounds.peer_end;
603
- } else {
604
- bounds.window_end = FindOrderedRangeBound<false>(range_collection, bounds.range_sense, row_idx,
605
- bounds.valid_end, boundary_end, expr_idx);
606
- }
607
- break;
608
- }
609
- default:
610
- throw InternalException("Unsupported window end boundary");
611
- }
612
-
613
- // clamp windows to partitions if they should exceed
614
- if (bounds.window_start < (int64_t)bounds.partition_start) {
615
- bounds.window_start = bounds.partition_start;
616
- }
617
- if (bounds.window_start > (int64_t)bounds.partition_end) {
618
- bounds.window_start = bounds.partition_end;
619
- }
620
- if (bounds.window_end < (int64_t)bounds.partition_start) {
621
- bounds.window_end = bounds.partition_start;
622
- }
623
- if (bounds.window_end > (int64_t)bounds.partition_end) {
624
- bounds.window_end = bounds.partition_end;
625
- }
626
-
627
- if (bounds.window_start < 0 || bounds.window_end < 0) {
628
- throw InternalException("Failed to compute window boundaries");
112
+ throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
629
113
  }
630
114
  }
631
115
 
632
- struct WindowExecutor {
633
- static bool IsConstantAggregate(const BoundWindowExpression &wexpr);
634
-
635
- WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
636
- const idx_t count);
637
-
638
- void Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count);
639
- void Finalize(WindowAggregationMode mode);
640
-
641
- void Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
642
- const ValidityMask &order_mask);
643
-
644
- // The function
645
- BoundWindowExpression &wexpr;
646
-
647
- // Frame management
648
- WindowBoundariesState bounds;
649
- uint64_t dense_rank = 1;
650
- uint64_t rank_equal = 0;
651
- uint64_t rank = 1;
652
-
653
- // Expression collections
654
- DataChunk payload_collection;
655
- ExpressionExecutor payload_executor;
656
- DataChunk payload_chunk;
657
-
658
- ExpressionExecutor filter_executor;
659
- ValidityMask filter_mask;
660
- vector<validity_t> filter_bits;
661
- SelectionVector filter_sel;
662
-
663
- // LEAD/LAG Evaluation
664
- WindowInputExpression leadlag_offset;
665
- WindowInputExpression leadlag_default;
666
-
667
- // evaluate boundaries if present. Parser has checked boundary types.
668
- WindowInputExpression boundary_start;
669
- WindowInputExpression boundary_end;
670
-
671
- // evaluate RANGE expressions, if needed
672
- WindowInputColumn range;
673
-
674
- // IGNORE NULLS
675
- ValidityMask ignore_nulls;
676
-
677
- // build a segment tree for frame-adhering aggregates
678
- // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
679
- unique_ptr<WindowSegmentTree> segment_tree = nullptr;
680
-
681
- // all aggregate values are the same for each partition
682
- unique_ptr<WindowConstantAggregate> constant_aggregate = nullptr;
683
- };
684
-
685
- bool WindowExecutor::IsConstantAggregate(const BoundWindowExpression &wexpr) {
686
- if (!wexpr.aggregate) {
687
- return false;
688
- }
689
-
690
- // COUNT(*) is already handled efficiently by segment trees.
691
- if (wexpr.children.empty()) {
692
- return false;
693
- }
694
-
695
- /*
696
- The default framing option is RANGE UNBOUNDED PRECEDING, which
697
- is the same as RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT
698
- ROW; it sets the frame to be all rows from the partition start
699
- up through the current row's last peer (a row that the window's
700
- ORDER BY clause considers equivalent to the current row; all
701
- rows are peers if there is no ORDER BY). In general, UNBOUNDED
702
- PRECEDING means that the frame starts with the first row of the
703
- partition, and similarly UNBOUNDED FOLLOWING means that the
704
- frame ends with the last row of the partition, regardless of
705
- RANGE, ROWS or GROUPS mode. In ROWS mode, CURRENT ROW means that
706
- the frame starts or ends with the current row; but in RANGE or
707
- GROUPS mode it means that the frame starts or ends with the
708
- current row's first or last peer in the ORDER BY ordering. The
709
- offset PRECEDING and offset FOLLOWING options vary in meaning
710
- depending on the frame mode.
711
- */
712
- switch (wexpr.start) {
713
- case WindowBoundary::UNBOUNDED_PRECEDING:
714
- break;
715
- case WindowBoundary::CURRENT_ROW_RANGE:
716
- if (!wexpr.orders.empty()) {
717
- return false;
718
- }
719
- break;
720
- default:
721
- return false;
722
- }
723
-
724
- switch (wexpr.end) {
725
- case WindowBoundary::UNBOUNDED_FOLLOWING:
726
- break;
727
- case WindowBoundary::CURRENT_ROW_RANGE:
728
- if (!wexpr.orders.empty()) {
729
- return false;
730
- }
731
- break;
732
- default:
733
- return false;
734
- }
735
-
736
- return true;
737
- }
738
-
739
- WindowExecutor::WindowExecutor(BoundWindowExpression &wexpr, ClientContext &context, const ValidityMask &partition_mask,
740
- const idx_t count)
741
- : wexpr(wexpr), bounds(wexpr, count), payload_collection(), payload_executor(context), filter_executor(context),
742
- leadlag_offset(wexpr.offset_expr.get(), context), leadlag_default(wexpr.default_expr.get(), context),
743
- boundary_start(wexpr.start_expr.get(), context), boundary_end(wexpr.end_expr.get(), context),
744
- range((bounds.has_preceding_range || bounds.has_following_range) ? wexpr.orders[0].expression.get() : nullptr,
745
- context, count)
746
-
747
- {
748
- // TODO we could evaluate those expressions in parallel
749
-
750
- // Check for constant aggregate
751
- if (IsConstantAggregate(wexpr)) {
752
- constant_aggregate =
753
- make_uniq<WindowConstantAggregate>(AggregateObject(wexpr), wexpr.return_type, partition_mask, count);
754
- }
755
-
756
- // evaluate the FILTER clause and stuff it into a large mask for compactness and reuse
757
- if (wexpr.filter_expr) {
758
- // Start with all invalid and set the ones that pass
759
- filter_bits.resize(ValidityMask::ValidityMaskSize(count), 0);
760
- filter_mask.Initialize(filter_bits.data());
761
- filter_executor.AddExpression(*wexpr.filter_expr);
762
- filter_sel.Initialize(STANDARD_VECTOR_SIZE);
763
- }
764
-
765
- // TODO: child may be a scalar, don't need to materialize the whole collection then
766
-
767
- // evaluate inner expressions of window functions, could be more complex
768
- PrepareInputExpressions(wexpr.children, payload_executor, payload_chunk);
769
-
770
- auto types = payload_chunk.GetTypes();
771
- if (!types.empty()) {
772
- payload_collection.Initialize(Allocator::Get(context), types);
773
- }
774
- }
775
-
776
- void WindowExecutor::Sink(DataChunk &input_chunk, const idx_t input_idx, const idx_t total_count) {
777
- // Single pass over the input to produce the global data.
778
- // Vectorisation for the win...
779
-
780
- // Set up a validity mask for IGNORE NULLS
781
- bool check_nulls = false;
782
- if (wexpr.ignore_nulls) {
783
- switch (wexpr.type) {
784
- case ExpressionType::WINDOW_LEAD:
785
- case ExpressionType::WINDOW_LAG:
786
- case ExpressionType::WINDOW_FIRST_VALUE:
787
- case ExpressionType::WINDOW_LAST_VALUE:
788
- case ExpressionType::WINDOW_NTH_VALUE:
789
- check_nulls = true;
790
- break;
791
- default:
792
- break;
793
- }
794
- }
795
-
796
- const auto count = input_chunk.size();
797
-
798
- idx_t filtered = 0;
799
- SelectionVector *filtering = nullptr;
800
- if (wexpr.filter_expr) {
801
- filtering = &filter_sel;
802
- filtered = filter_executor.SelectExpression(input_chunk, filter_sel);
803
- for (idx_t f = 0; f < filtered; ++f) {
804
- filter_mask.SetValid(input_idx + filter_sel[f]);
805
- }
806
- }
807
-
808
- if (!wexpr.children.empty()) {
809
- payload_chunk.Reset();
810
- payload_executor.Execute(input_chunk, payload_chunk);
811
- payload_chunk.Verify();
812
- if (constant_aggregate) {
813
- constant_aggregate->Sink(payload_chunk, filtering, filtered);
814
- } else {
815
- payload_collection.Append(payload_chunk, true);
816
- }
817
-
818
- // process payload chunks while they are still piping hot
819
- if (check_nulls) {
820
- UnifiedVectorFormat vdata;
821
- payload_chunk.data[0].ToUnifiedFormat(count, vdata);
822
- if (!vdata.validity.AllValid()) {
823
- // Lazily materialise the contents when we find the first NULL
824
- if (ignore_nulls.AllValid()) {
825
- ignore_nulls.Initialize(total_count);
826
- }
827
- // Write to the current position
828
- if (input_idx % ValidityMask::BITS_PER_VALUE == 0) {
829
- // If we are at the edge of an output entry, just copy the entries
830
- auto dst = ignore_nulls.GetData() + ignore_nulls.EntryCount(input_idx);
831
- auto src = vdata.validity.GetData();
832
- for (auto entry_count = vdata.validity.EntryCount(count); entry_count-- > 0;) {
833
- *dst++ = *src++;
834
- }
835
- } else {
836
- // If not, we have ragged data and need to copy one bit at a time.
837
- for (idx_t i = 0; i < count; ++i) {
838
- ignore_nulls.Set(input_idx + i, vdata.validity.RowIsValid(i));
839
- }
840
- }
841
- }
842
- }
843
- }
844
-
845
- range.Append(input_chunk);
846
- }
847
-
848
- void WindowExecutor::Finalize(WindowAggregationMode mode) {
849
- // build a segment tree for frame-adhering aggregates
850
- // see http://www.vldb.org/pvldb/vol8/p1058-leis.pdf
851
- if (constant_aggregate) {
852
- constant_aggregate->Finalize();
853
- } else if (wexpr.aggregate) {
854
- segment_tree = make_uniq<WindowSegmentTree>(AggregateObject(wexpr), wexpr.return_type, &payload_collection,
855
- filter_mask, mode);
856
- }
857
- }
858
-
859
- void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &result, const ValidityMask &partition_mask,
860
- const ValidityMask &order_mask) {
861
- // Evaluate the row-level arguments
862
- boundary_start.Execute(input_chunk);
863
- boundary_end.Execute(input_chunk);
864
-
865
- leadlag_offset.Execute(input_chunk);
866
- leadlag_default.Execute(input_chunk);
867
-
868
- // this is the main loop, go through all sorted rows and compute window function result
869
- for (idx_t output_offset = 0; output_offset < input_chunk.size(); ++output_offset, ++row_idx) {
870
- // special case, OVER (), aggregate over everything
871
- bounds.Update(row_idx, range, output_offset, boundary_start, boundary_end, partition_mask, order_mask);
872
- if (WindowNeedsRank(wexpr)) {
873
- if (!bounds.is_same_partition || row_idx == 0) { // special case for first row, need to init
874
- dense_rank = 1;
875
- rank = 1;
876
- rank_equal = 0;
877
- } else if (!bounds.is_peer) {
878
- dense_rank++;
879
- rank += rank_equal;
880
- rank_equal = 0;
881
- }
882
- rank_equal++;
883
- }
884
-
885
- // if no values are read for window, result is NULL
886
- if (bounds.window_start >= bounds.window_end) {
887
- FlatVector::SetNull(result, output_offset, true);
888
- continue;
889
- }
890
-
891
- switch (wexpr.type) {
892
- case ExpressionType::WINDOW_AGGREGATE: {
893
- if (constant_aggregate) {
894
- constant_aggregate->Compute(result, output_offset, bounds.window_start, bounds.window_end);
895
- } else {
896
- segment_tree->Compute(result, output_offset, bounds.window_start, bounds.window_end);
897
- }
898
- break;
899
- }
900
- case ExpressionType::WINDOW_ROW_NUMBER: {
901
- auto rdata = FlatVector::GetData<int64_t>(result);
902
- rdata[output_offset] = row_idx - bounds.partition_start + 1;
903
- break;
904
- }
905
- case ExpressionType::WINDOW_RANK_DENSE: {
906
- auto rdata = FlatVector::GetData<int64_t>(result);
907
- rdata[output_offset] = dense_rank;
908
- break;
909
- }
910
- case ExpressionType::WINDOW_RANK: {
911
- auto rdata = FlatVector::GetData<int64_t>(result);
912
- rdata[output_offset] = rank;
913
- break;
914
- }
915
- case ExpressionType::WINDOW_PERCENT_RANK: {
916
- int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start - 1;
917
- double percent_rank = denom > 0 ? ((double)rank - 1) / denom : 0;
918
- auto rdata = FlatVector::GetData<double>(result);
919
- rdata[output_offset] = percent_rank;
920
- break;
921
- }
922
- case ExpressionType::WINDOW_CUME_DIST: {
923
- int64_t denom = (int64_t)bounds.partition_end - bounds.partition_start;
924
- double cume_dist = denom > 0 ? ((double)(bounds.peer_end - bounds.partition_start)) / denom : 0;
925
- auto rdata = FlatVector::GetData<double>(result);
926
- rdata[output_offset] = cume_dist;
927
- break;
928
- }
929
- case ExpressionType::WINDOW_NTILE: {
930
- D_ASSERT(payload_collection.ColumnCount() == 1);
931
- if (CellIsNull(payload_collection, 0, row_idx)) {
932
- FlatVector::SetNull(result, output_offset, true);
933
- } else {
934
- auto n_param = GetCell<int64_t>(payload_collection, 0, row_idx);
935
- if (n_param < 1) {
936
- throw InvalidInputException("Argument for ntile must be greater than zero");
937
- }
938
- // With thanks from SQLite's ntileValueFunc()
939
- int64_t n_total = bounds.partition_end - bounds.partition_start;
940
- if (n_param > n_total) {
941
- // more groups allowed than we have values
942
- // map every entry to a unique group
943
- n_param = n_total;
944
- }
945
- int64_t n_size = (n_total / n_param);
946
- // find the row idx within the group
947
- D_ASSERT(row_idx >= bounds.partition_start);
948
- int64_t adjusted_row_idx = row_idx - bounds.partition_start;
949
- // now compute the ntile
950
- int64_t n_large = n_total - n_param * n_size;
951
- int64_t i_small = n_large * (n_size + 1);
952
- int64_t result_ntile;
953
-
954
- D_ASSERT((n_large * (n_size + 1) + (n_param - n_large) * n_size) == n_total);
955
-
956
- if (adjusted_row_idx < i_small) {
957
- result_ntile = 1 + adjusted_row_idx / (n_size + 1);
958
- } else {
959
- result_ntile = 1 + n_large + (adjusted_row_idx - i_small) / n_size;
960
- }
961
- // result has to be between [1, NTILE]
962
- D_ASSERT(result_ntile >= 1 && result_ntile <= n_param);
963
- auto rdata = FlatVector::GetData<int64_t>(result);
964
- rdata[output_offset] = result_ntile;
965
- }
966
- break;
967
- }
968
- case ExpressionType::WINDOW_LEAD:
969
- case ExpressionType::WINDOW_LAG: {
970
- int64_t offset = 1;
971
- if (wexpr.offset_expr) {
972
- offset = leadlag_offset.GetCell<int64_t>(output_offset);
973
- }
974
- int64_t val_idx = (int64_t)row_idx;
975
- if (wexpr.type == ExpressionType::WINDOW_LEAD) {
976
- val_idx += offset;
977
- } else {
978
- val_idx -= offset;
979
- }
980
-
981
- idx_t delta = 0;
982
- if (val_idx < (int64_t)row_idx) {
983
- // Count backwards
984
- delta = idx_t(row_idx - val_idx);
985
- val_idx = FindPrevStart(ignore_nulls, bounds.partition_start, row_idx, delta);
986
- } else if (val_idx > (int64_t)row_idx) {
987
- delta = idx_t(val_idx - row_idx);
988
- val_idx = FindNextStart(ignore_nulls, row_idx + 1, bounds.partition_end, delta);
989
- }
990
- // else offset is zero, so don't move.
991
-
992
- if (!delta) {
993
- CopyCell(payload_collection, 0, val_idx, result, output_offset);
994
- } else if (wexpr.default_expr) {
995
- leadlag_default.CopyCell(result, output_offset);
996
- } else {
997
- FlatVector::SetNull(result, output_offset, true);
998
- }
999
- break;
1000
- }
1001
- case ExpressionType::WINDOW_FIRST_VALUE: {
1002
- // Same as NTH_VALUE(..., 1)
1003
- idx_t n = 1;
1004
- const auto first_idx = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1005
- if (!n) {
1006
- CopyCell(payload_collection, 0, first_idx, result, output_offset);
1007
- } else {
1008
- FlatVector::SetNull(result, output_offset, true);
1009
- }
1010
- break;
1011
- }
1012
- case ExpressionType::WINDOW_LAST_VALUE: {
1013
- idx_t n = 1;
1014
- const auto last_idx = FindPrevStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1015
- if (!n) {
1016
- CopyCell(payload_collection, 0, last_idx, result, output_offset);
1017
- } else {
1018
- FlatVector::SetNull(result, output_offset, true);
1019
- }
1020
- break;
1021
- }
1022
- case ExpressionType::WINDOW_NTH_VALUE: {
1023
- D_ASSERT(payload_collection.ColumnCount() == 2);
1024
- // Returns value evaluated at the row that is the n'th row of the window frame (counting from 1);
1025
- // returns NULL if there is no such row.
1026
- if (CellIsNull(payload_collection, 1, row_idx)) {
1027
- FlatVector::SetNull(result, output_offset, true);
1028
- } else {
1029
- auto n_param = GetCell<int64_t>(payload_collection, 1, row_idx);
1030
- if (n_param < 1) {
1031
- FlatVector::SetNull(result, output_offset, true);
1032
- } else {
1033
- auto n = idx_t(n_param);
1034
- const auto nth_index = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1035
- if (!n) {
1036
- CopyCell(payload_collection, 0, nth_index, result, output_offset);
1037
- } else {
1038
- FlatVector::SetNull(result, output_offset, true);
1039
- }
1040
- }
1041
- }
1042
- break;
1043
- }
1044
- default:
1045
- throw InternalException("Window aggregate type %s", ExpressionTypeToString(wexpr.type));
1046
- }
1047
- }
1048
-
1049
- result.Verify(input_chunk.size());
1050
- }
1051
-
1052
116
  //===--------------------------------------------------------------------===//
1053
117
  // Sink
1054
118
  //===--------------------------------------------------------------------===//
@@ -1060,9 +124,11 @@ SinkResultType PhysicalWindow::Sink(ExecutionContext &context, DataChunk &chunk,
1060
124
  return SinkResultType::NEED_MORE_INPUT;
1061
125
  }
1062
126
 
1063
- void PhysicalWindow::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
1064
- auto &lstate = lstate_p.Cast<WindowLocalSinkState>();
127
+ SinkCombineResultType PhysicalWindow::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
128
+ auto &lstate = input.local_state.Cast<WindowLocalSinkState>();
1065
129
  lstate.Combine();
130
+
131
+ return SinkCombineResultType::FINISHED;
1066
132
  }
1067
133
 
1068
134
  unique_ptr<LocalSinkState> PhysicalWindow::GetLocalSinkState(ExecutionContext &context) const {
@@ -1075,8 +141,8 @@ unique_ptr<GlobalSinkState> PhysicalWindow::GetGlobalSinkState(ClientContext &co
1075
141
  }
1076
142
 
1077
143
  SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
1078
- GlobalSinkState &gstate_p) const {
1079
- auto &state = gstate_p.Cast<WindowGlobalSinkState>();
144
+ OperatorSinkFinalizeInput &input) const {
145
+ auto &state = input.global_state.Cast<WindowGlobalSinkState>();
1080
146
 
1081
147
  // Did we get any data?
1082
148
  if (!state.global_partition->count) {
@@ -1106,64 +172,97 @@ SinkFinalizeType PhysicalWindow::Finalize(Pipeline &pipeline, Event &event, Clie
1106
172
  //===--------------------------------------------------------------------===//
1107
173
  // Source
1108
174
  //===--------------------------------------------------------------------===//
175
+ class WindowPartitionSourceState;
176
+
1109
177
  class WindowGlobalSourceState : public GlobalSourceState {
1110
178
  public:
1111
- explicit WindowGlobalSourceState(WindowGlobalSinkState &gsink) : gsink(*gsink.global_partition), next_bin(0) {
1112
- }
179
+ using HashGroupSourcePtr = unique_ptr<WindowPartitionSourceState>;
180
+ using ScannerPtr = unique_ptr<RowDataCollectionScanner>;
181
+ using Task = std::pair<WindowPartitionSourceState *, ScannerPtr>;
1113
182
 
1114
- PartitionGlobalSinkState &gsink;
1115
- //! The output read position.
1116
- atomic<idx_t> next_bin;
183
+ WindowGlobalSourceState(ClientContext &context_p, WindowGlobalSinkState &gsink_p);
184
+
185
+ //! Get the next task
186
+ Task NextTask(idx_t hash_bin);
187
+
188
+ //! Context for executing computations
189
+ ClientContext &context;
190
+ //! All the sunk data
191
+ WindowGlobalSinkState &gsink;
192
+ //! The next group to build.
193
+ atomic<idx_t> next_build;
194
+ //! The built groups
195
+ vector<HashGroupSourcePtr> built;
196
+ //! Serialise access to the built hash groups
197
+ mutable mutex built_lock;
198
+ //! The number of unfinished tasks
199
+ atomic<idx_t> tasks_remaining;
1117
200
 
1118
201
  public:
1119
202
  idx_t MaxThreads() override {
1120
- // If there is only one partition, we have to process it on one thread.
1121
- if (!gsink.grouping_data) {
1122
- return 1;
1123
- }
203
+ return tasks_remaining;
204
+ }
1124
205
 
1125
- // If there is not a lot of data, process serially.
1126
- if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
1127
- return 1;
206
+ private:
207
+ Task CreateTask(idx_t hash_bin);
208
+ Task StealWork();
209
+ };
210
+
211
+ WindowGlobalSourceState::WindowGlobalSourceState(ClientContext &context_p, WindowGlobalSinkState &gsink_p)
212
+ : context(context_p), gsink(gsink_p), next_build(0), tasks_remaining(0) {
213
+ auto &hash_groups = gsink.global_partition->hash_groups;
214
+
215
+ auto &gpart = gsink.global_partition;
216
+ if (hash_groups.empty()) {
217
+ // OVER()
218
+ built.resize(1);
219
+ if (gpart->rows) {
220
+ tasks_remaining += gpart->rows->blocks.size();
1128
221
  }
222
+ } else {
223
+ built.resize(hash_groups.size());
224
+ idx_t batch_base = 0;
225
+ for (auto &hash_group : hash_groups) {
226
+ if (!hash_group) {
227
+ continue;
228
+ }
229
+ auto &global_sort_state = *hash_group->global_sort;
230
+ if (global_sort_state.sorted_blocks.empty()) {
231
+ continue;
232
+ }
233
+
234
+ D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
235
+ auto &sb = *global_sort_state.sorted_blocks[0];
236
+ auto &sd = *sb.payload_data;
237
+ tasks_remaining += sd.data_blocks.size();
1129
238
 
1130
- return gsink.hash_groups.size();
239
+ hash_group->batch_base = batch_base;
240
+ batch_base += sd.data_blocks.size();
241
+ }
1131
242
  }
1132
- };
243
+ }
1133
244
 
1134
- // Per-thread read state
1135
- class WindowLocalSourceState : public LocalSourceState {
245
+ // Per-bin evaluation state (build and evaluate)
246
+ class WindowPartitionSourceState {
1136
247
  public:
1137
248
  using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
1138
- using WindowExecutorPtr = unique_ptr<WindowExecutor>;
1139
- using WindowExecutors = vector<WindowExecutorPtr>;
249
+ using ExecutorPtr = unique_ptr<WindowExecutor>;
250
+ using Executors = vector<ExecutorPtr>;
1140
251
 
1141
- WindowLocalSourceState(const PhysicalWindow &op_p, ExecutionContext &context, WindowGlobalSourceState &gsource)
1142
- : context(context.client), op(op_p), gsink(gsource.gsink) {
1143
-
1144
- vector<LogicalType> output_types;
1145
- for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
1146
- D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
1147
- auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
1148
- output_types.emplace_back(wexpr.return_type);
1149
- }
1150
- output_chunk.Initialize(Allocator::Get(context.client), output_types);
1151
-
1152
- const auto &input_types = gsink.payload_types;
1153
- layout.Initialize(input_types);
1154
- input_chunk.Initialize(gsink.allocator, input_types);
252
+ WindowPartitionSourceState(ClientContext &context, WindowGlobalSourceState &gsource)
253
+ : context(context), op(gsource.gsink.op), gsource(gsource), read_block_idx(0), unscanned(0) {
254
+ layout.Initialize(gsource.gsink.global_partition->payload_types);
1155
255
  }
1156
256
 
257
+ unique_ptr<RowDataCollectionScanner> GetScanner() const;
1157
258
  void MaterializeSortedData();
1158
- void GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
1159
- void Scan(DataChunk &chunk);
259
+ void BuildPartition(WindowGlobalSinkState &gstate, const idx_t hash_bin);
1160
260
 
1161
- HashGroupPtr hash_group;
1162
261
  ClientContext &context;
1163
262
  const PhysicalWindow &op;
263
+ WindowGlobalSourceState &gsource;
1164
264
 
1165
- PartitionGlobalSinkState &gsink;
1166
-
265
+ HashGroupPtr hash_group;
1167
266
  //! The generated input chunks
1168
267
  unique_ptr<RowDataCollection> rows;
1169
268
  unique_ptr<RowDataCollection> heap;
@@ -1174,20 +273,21 @@ public:
1174
273
  //! The order boundary mask
1175
274
  vector<validity_t> order_bits;
1176
275
  ValidityMask order_mask;
276
+ //! External paging
277
+ bool external;
1177
278
  //! The current execution functions
1178
- WindowExecutors window_execs;
279
+ Executors executors;
1179
280
 
1180
- //! The read partition
281
+ //! The bin number
1181
282
  idx_t hash_bin;
1182
- //! The read cursor
1183
- unique_ptr<RowDataCollectionScanner> scanner;
1184
- //! Buffer for the inputs
1185
- DataChunk input_chunk;
1186
- //! Buffer for window results
1187
- DataChunk output_chunk;
283
+
284
+ //! The next block to read.
285
+ mutable atomic<idx_t> read_block_idx;
286
+ //! The number of remaining unscanned blocks.
287
+ atomic<idx_t> unscanned;
1188
288
  };
1189
289
 
1190
- void WindowLocalSourceState::MaterializeSortedData() {
290
+ void WindowPartitionSourceState::MaterializeSortedData() {
1191
291
  auto &global_sort_state = *hash_group->global_sort;
1192
292
  if (global_sort_state.sorted_blocks.empty()) {
1193
293
  return;
@@ -1226,7 +326,21 @@ void WindowLocalSourceState::MaterializeSortedData() {
1226
326
  [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
1227
327
  }
1228
328
 
1229
- void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
329
+ unique_ptr<RowDataCollectionScanner> WindowPartitionSourceState::GetScanner() const {
330
+ auto &gsink = *gsource.gsink.global_partition;
331
+ if ((gsink.rows && !hash_bin) || hash_bin < gsink.hash_groups.size()) {
332
+ const auto block_idx = read_block_idx++;
333
+ if (block_idx >= rows->blocks.size()) {
334
+ return nullptr;
335
+ }
336
+ // Second pass can flush
337
+ --gsource.tasks_remaining;
338
+ return make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, block_idx, true);
339
+ }
340
+ return nullptr;
341
+ }
342
+
343
+ void WindowPartitionSourceState::BuildPartition(WindowGlobalSinkState &gstate, const idx_t hash_bin_p) {
1230
344
  // Get rid of any stale data
1231
345
  hash_bin = hash_bin_p;
1232
346
 
@@ -1236,11 +350,12 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1236
350
  // 3. Multiple partitions (sorting and hashing)
1237
351
 
1238
352
  // How big is the partition?
353
+ auto &gpart = *gsource.gsink.global_partition;
1239
354
  idx_t count = 0;
1240
- if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
1241
- count = gsink.hash_groups[hash_bin]->count;
1242
- } else if (gsink.rows && !hash_bin) {
1243
- count = gsink.count;
355
+ if (hash_bin < gpart.hash_groups.size() && gpart.hash_groups[hash_bin]) {
356
+ count = gpart.hash_groups[hash_bin]->count;
357
+ } else if (gpart.rows && !hash_bin) {
358
+ count = gpart.count;
1244
359
  } else {
1245
360
  return;
1246
361
  }
@@ -1256,19 +371,20 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1256
371
  order_mask.Initialize(order_bits.data());
1257
372
 
1258
373
  // Scan the sorted data into new Collections
1259
- auto external = gsink.external;
1260
- if (gsink.rows && !hash_bin) {
374
+ external = gpart.external;
375
+ if (gpart.rows && !hash_bin) {
1261
376
  // Simple mask
1262
377
  partition_mask.SetValidUnsafe(0);
1263
378
  order_mask.SetValidUnsafe(0);
1264
379
  // No partition - align the heap blocks with the row blocks
1265
- rows = gsink.rows->CloneEmpty(gsink.rows->keep_pinned);
1266
- heap = gsink.strings->CloneEmpty(gsink.strings->keep_pinned);
1267
- RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gsink.rows, *gsink.strings, layout);
380
+ rows = gpart.rows->CloneEmpty(gpart.rows->keep_pinned);
381
+ heap = gpart.strings->CloneEmpty(gpart.strings->keep_pinned);
382
+ RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gpart.rows, *gpart.strings, layout);
1268
383
  external = true;
1269
- } else if (hash_bin < gsink.hash_groups.size() && gsink.hash_groups[hash_bin]) {
384
+ } else if (hash_bin < gpart.hash_groups.size()) {
1270
385
  // Overwrite the collections with the sorted data
1271
- hash_group = std::move(gsink.hash_groups[hash_bin]);
386
+ D_ASSERT(gpart.hash_groups[hash_bin].get());
387
+ hash_group = std::move(gpart.hash_groups[hash_bin]);
1272
388
  hash_group->ComputeMasks(partition_mask, order_mask);
1273
389
  external = hash_group->global_sort->external;
1274
390
  MaterializeSortedData();
@@ -1277,17 +393,18 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1277
393
  }
1278
394
 
1279
395
  // Create the executors for each function
1280
- window_execs.clear();
396
+ executors.clear();
1281
397
  for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
1282
398
  D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
1283
399
  auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
1284
- auto wexec = make_uniq<WindowExecutor>(wexpr, context, partition_mask, count);
1285
- window_execs.emplace_back(std::move(wexec));
400
+ auto wexec = WindowExecutorFactory(wexpr, context, partition_mask, order_mask, count, gstate.mode);
401
+ executors.emplace_back(std::move(wexec));
1286
402
  }
1287
403
 
1288
404
  // First pass over the input without flushing
1289
- // TODO: Factor out the constructor data as global state
1290
- scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
405
+ DataChunk input_chunk;
406
+ input_chunk.Initialize(gpart.allocator, gpart.payload_types);
407
+ auto scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
1291
408
  idx_t input_idx = 0;
1292
409
  while (true) {
1293
410
  input_chunk.Reset();
@@ -1297,38 +414,221 @@ void WindowLocalSourceState::GeneratePartition(WindowGlobalSinkState &gstate, co
1297
414
  }
1298
415
 
1299
416
  // TODO: Parallelization opportunity
1300
- for (auto &wexec : window_execs) {
417
+ for (auto &wexec : executors) {
1301
418
  wexec->Sink(input_chunk, input_idx, scanner->Count());
1302
419
  }
1303
420
  input_idx += input_chunk.size();
1304
421
  }
1305
422
 
1306
423
  // TODO: Parallelization opportunity
1307
- for (auto &wexec : window_execs) {
1308
- wexec->Finalize(gstate.mode);
424
+ for (auto &wexec : executors) {
425
+ wexec->Finalize();
1309
426
  }
1310
427
 
1311
428
  // External scanning assumes all blocks are swizzled.
1312
429
  scanner->ReSwizzle();
1313
430
 
1314
- // Second pass can flush
1315
- scanner->Reset(true);
431
+ // Start the block countdown
432
+ unscanned = rows->blocks.size();
433
+ }
434
+
435
+ // Per-thread scan state
436
+ class WindowLocalSourceState : public LocalSourceState {
437
+ public:
438
+ using ReadStatePtr = unique_ptr<WindowExecutorState>;
439
+ using ReadStates = vector<ReadStatePtr>;
440
+
441
+ explicit WindowLocalSourceState(WindowGlobalSourceState &gsource);
442
+ void UpdateBatchIndex();
443
+ bool NextPartition();
444
+ void Scan(DataChunk &chunk);
445
+
446
+ //! The shared source state
447
+ WindowGlobalSourceState &gsource;
448
+ //! The current bin being processed
449
+ idx_t hash_bin;
450
+ //! The current batch index (for output reordering)
451
+ idx_t batch_index;
452
+ //! The current source being processed
453
+ optional_ptr<WindowPartitionSourceState> partition_source;
454
+ //! The read cursor
455
+ unique_ptr<RowDataCollectionScanner> scanner;
456
+ //! Buffer for the inputs
457
+ DataChunk input_chunk;
458
+ //! Executor read states.
459
+ ReadStates read_states;
460
+ //! Buffer for window results
461
+ DataChunk output_chunk;
462
+ };
463
+
464
+ WindowLocalSourceState::WindowLocalSourceState(WindowGlobalSourceState &gsource)
465
+ : gsource(gsource), hash_bin(gsource.built.size()), batch_index(0) {
466
+ auto &gsink = *gsource.gsink.global_partition;
467
+ auto &op = gsource.gsink.op;
468
+
469
+ input_chunk.Initialize(gsink.allocator, gsink.payload_types);
470
+
471
+ vector<LogicalType> output_types;
472
+ for (idx_t expr_idx = 0; expr_idx < op.select_list.size(); ++expr_idx) {
473
+ D_ASSERT(op.select_list[expr_idx]->GetExpressionClass() == ExpressionClass::BOUND_WINDOW);
474
+ auto &wexpr = op.select_list[expr_idx]->Cast<BoundWindowExpression>();
475
+ output_types.emplace_back(wexpr.return_type);
476
+ }
477
+ output_chunk.Initialize(Allocator::Get(gsource.context), output_types);
478
+ }
479
+
480
+ WindowGlobalSourceState::Task WindowGlobalSourceState::CreateTask(idx_t hash_bin) {
481
+ // Build outside the lock so no one tries to steal before we are done.
482
+ auto partition_source = make_uniq<WindowPartitionSourceState>(context, *this);
483
+ partition_source->BuildPartition(gsink, hash_bin);
484
+ Task result(partition_source.get(), partition_source->GetScanner());
485
+
486
+ // Is there any data to scan?
487
+ if (result.second) {
488
+ lock_guard<mutex> built_guard(built_lock);
489
+ built[hash_bin] = std::move(partition_source);
490
+
491
+ return result;
492
+ }
493
+
494
+ return Task();
495
+ }
496
+
497
+ WindowGlobalSourceState::Task WindowGlobalSourceState::StealWork() {
498
+ for (idx_t hash_bin = 0; hash_bin < built.size(); ++hash_bin) {
499
+ lock_guard<mutex> built_guard(built_lock);
500
+ auto &partition_source = built[hash_bin];
501
+ if (!partition_source) {
502
+ continue;
503
+ }
504
+
505
+ Task result(partition_source.get(), partition_source->GetScanner());
506
+
507
+ // Is there any data to scan?
508
+ if (result.second) {
509
+ return result;
510
+ }
511
+ }
512
+
513
+ // Nothing to steal
514
+ return Task();
515
+ }
516
+
517
+ WindowGlobalSourceState::Task WindowGlobalSourceState::NextTask(idx_t hash_bin) {
518
+ auto &hash_groups = gsink.global_partition->hash_groups;
519
+ const auto bin_count = built.size();
520
+
521
+ // Flush unneeded data
522
+ if (hash_bin < bin_count) {
523
+ // Lock and delete when all blocks have been scanned
524
+ // We do this here instead of in NextScan so the WindowLocalSourceState
525
+ // has a chance to delete its state objects first,
526
+ // which may reference the partition_source
527
+
528
+ // Delete data outside the lock in case it is slow
529
+ HashGroupSourcePtr killed;
530
+ lock_guard<mutex> built_guard(built_lock);
531
+ auto &partition_source = built[hash_bin];
532
+ if (partition_source && !partition_source->unscanned) {
533
+ killed = std::move(partition_source);
534
+ }
535
+ }
536
+
537
+ hash_bin = next_build++;
538
+ if (hash_bin < bin_count) {
539
+ // Find a non-empty hash group.
540
+ for (; hash_bin < hash_groups.size(); hash_bin = next_build++) {
541
+ if (hash_groups[hash_bin]) {
542
+ auto result = CreateTask(hash_bin);
543
+ if (result.second) {
544
+ return result;
545
+ }
546
+ }
547
+ }
548
+
549
+ // OVER() doesn't have a hash_group
550
+ if (hash_groups.empty()) {
551
+ auto result = CreateTask(hash_bin);
552
+ if (result.second) {
553
+ return result;
554
+ }
555
+ }
556
+ }
557
+
558
+ // Work stealing
559
+ while (!context.interrupted && tasks_remaining) {
560
+ auto result = StealWork();
561
+ if (result.second) {
562
+ return result;
563
+ }
564
+
565
+ // If there is nothing to steal but there are unfinished partitions,
566
+ // yield until any pending builds are done.
567
+ TaskScheduler::GetScheduler(context).YieldThread();
568
+ }
569
+
570
+ return Task();
571
+ }
572
+
573
+ void WindowLocalSourceState::UpdateBatchIndex() {
574
+ D_ASSERT(partition_source);
575
+ D_ASSERT(scanner.get());
576
+
577
+ batch_index = partition_source->hash_group ? partition_source->hash_group->batch_base : 0;
578
+ batch_index += scanner->BlockIndex();
579
+ }
580
+
581
+ bool WindowLocalSourceState::NextPartition() {
582
+ // Release old states before the source
583
+ scanner.reset();
584
+ read_states.clear();
585
+
586
+ // Get a partition_source that is not finished
587
+ while (!scanner) {
588
+ auto task = gsource.NextTask(hash_bin);
589
+ if (!task.first) {
590
+ return false;
591
+ }
592
+ partition_source = task.first;
593
+ scanner = std::move(task.second);
594
+ hash_bin = partition_source->hash_bin;
595
+ UpdateBatchIndex();
596
+ }
597
+
598
+ for (auto &wexec : partition_source->executors) {
599
+ read_states.emplace_back(wexec->GetExecutorState());
600
+ }
601
+
602
+ return true;
1316
603
  }
1317
604
 
1318
605
  void WindowLocalSourceState::Scan(DataChunk &result) {
1319
606
  D_ASSERT(scanner);
1320
607
  if (!scanner->Remaining()) {
1321
- return;
608
+ lock_guard<mutex> built_guard(gsource.built_lock);
609
+ --partition_source->unscanned;
610
+ scanner = partition_source->GetScanner();
611
+
612
+ if (!scanner) {
613
+ partition_source = nullptr;
614
+ read_states.clear();
615
+ return;
616
+ }
617
+
618
+ UpdateBatchIndex();
1322
619
  }
1323
620
 
1324
621
  const auto position = scanner->Scanned();
1325
622
  input_chunk.Reset();
1326
623
  scanner->Scan(input_chunk);
1327
624
 
625
+ auto &executors = partition_source->executors;
1328
626
  output_chunk.Reset();
1329
- for (idx_t expr_idx = 0; expr_idx < window_execs.size(); ++expr_idx) {
1330
- auto &executor = *window_execs[expr_idx];
1331
- executor.Evaluate(position, input_chunk, output_chunk.data[expr_idx], partition_mask, order_mask);
627
+ for (idx_t expr_idx = 0; expr_idx < executors.size(); ++expr_idx) {
628
+ auto &executor = *executors[expr_idx];
629
+ auto &lstate = *read_states[expr_idx];
630
+ auto &result = output_chunk.data[expr_idx];
631
+ executor.Evaluate(position, input_chunk, result, lstate);
1332
632
  }
1333
633
  output_chunk.SetCardinality(input_chunk);
1334
634
  output_chunk.Verify();
@@ -1345,43 +645,42 @@ void WindowLocalSourceState::Scan(DataChunk &result) {
1345
645
  }
1346
646
 
1347
647
  unique_ptr<LocalSourceState> PhysicalWindow::GetLocalSourceState(ExecutionContext &context,
1348
- GlobalSourceState &gstate_p) const {
1349
- auto &gstate = gstate_p.Cast<WindowGlobalSourceState>();
1350
- return make_uniq<WindowLocalSourceState>(*this, context, gstate);
648
+ GlobalSourceState &gsource_p) const {
649
+ auto &gsource = gsource_p.Cast<WindowGlobalSourceState>();
650
+ return make_uniq<WindowLocalSourceState>(gsource);
1351
651
  }
1352
652
 
1353
653
  unique_ptr<GlobalSourceState> PhysicalWindow::GetGlobalSourceState(ClientContext &context) const {
1354
654
  auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
1355
- return make_uniq<WindowGlobalSourceState>(gsink);
655
+ return make_uniq<WindowGlobalSourceState>(context, gsink);
656
+ }
657
+
658
+ bool PhysicalWindow::SupportsBatchIndex() const {
659
+ // We can only preserve order for single partitioning
660
+ // or work stealing causes out of order batch numbers
661
+ auto &wexpr = select_list[0]->Cast<BoundWindowExpression>();
662
+ return wexpr.partitions.empty() && !wexpr.orders.empty();
663
+ }
664
+
665
+ OrderPreservationType PhysicalWindow::SourceOrder() const {
666
+ return SupportsBatchIndex() ? OrderPreservationType::FIXED_ORDER : OrderPreservationType::NO_ORDER;
667
+ }
668
+
669
+ idx_t PhysicalWindow::GetBatchIndex(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate_p,
670
+ LocalSourceState &lstate_p) const {
671
+ auto &lstate = lstate_p.Cast<WindowLocalSourceState>();
672
+ return lstate.batch_index;
1356
673
  }
1357
674
 
1358
675
  SourceResultType PhysicalWindow::GetData(ExecutionContext &context, DataChunk &chunk,
1359
676
  OperatorSourceInput &input) const {
1360
677
  auto &lsource = input.local_state.Cast<WindowLocalSourceState>();
1361
- auto &gsource = input.global_state.Cast<WindowGlobalSourceState>();
1362
- auto &gsink = sink_state->Cast<WindowGlobalSinkState>();
1363
-
1364
- auto &hash_groups = gsink.global_partition->hash_groups;
1365
- const auto bin_count = hash_groups.empty() ? 1 : hash_groups.size();
1366
-
1367
678
  while (chunk.size() == 0) {
1368
679
  // Move to the next bin if we are done.
1369
- while (!lsource.scanner || !lsource.scanner->Remaining()) {
1370
- lsource.scanner.reset();
1371
- lsource.rows.reset();
1372
- lsource.heap.reset();
1373
- lsource.hash_group.reset();
1374
- auto hash_bin = gsource.next_bin++;
1375
- if (hash_bin >= bin_count) {
680
+ while (!lsource.scanner) {
681
+ if (!lsource.NextPartition()) {
1376
682
  return chunk.size() > 0 ? SourceResultType::HAVE_MORE_OUTPUT : SourceResultType::FINISHED;
1377
683
  }
1378
-
1379
- for (; hash_bin < hash_groups.size(); hash_bin = gsource.next_bin++) {
1380
- if (hash_groups[hash_bin]) {
1381
- break;
1382
- }
1383
- }
1384
- lsource.GeneratePartition(gsink, hash_bin);
1385
684
  }
1386
685
 
1387
686
  lsource.Scan(chunk);