duckdb 0.8.2-dev150.0 → 0.8.2-dev1559.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (489) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  61. package/src/duckdb/src/common/file_system.cpp +19 -0
  62. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  63. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  64. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  65. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  66. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  67. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  68. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  69. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  70. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  71. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  72. package/src/duckdb/src/common/types/bit.cpp +51 -0
  73. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  74. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  75. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  76. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  77. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  78. package/src/duckdb/src/common/types/date.cpp +9 -0
  79. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  80. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  81. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  82. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  83. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  84. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  85. package/src/duckdb/src/common/types.cpp +8 -655
  86. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  87. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  88. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  94. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  95. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  96. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  97. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  98. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  99. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  100. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  102. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  103. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  104. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  105. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  106. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  107. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  108. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  109. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  110. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  111. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  112. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  113. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  114. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  115. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  116. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  117. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  118. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  119. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  121. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  122. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  124. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  125. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  126. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  127. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  128. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  129. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  130. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  131. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  132. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  133. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  134. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  135. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  136. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  137. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  138. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  139. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  140. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  141. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +13 -22
  142. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  143. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  144. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  145. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  146. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  147. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  148. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  149. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  150. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  151. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  152. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  153. package/src/duckdb/src/function/function.cpp +3 -1
  154. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  155. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  156. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  157. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  158. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  159. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  160. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  161. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  162. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  163. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  164. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  165. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  166. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  168. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  169. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  170. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  171. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  172. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  173. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  174. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  176. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  177. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  178. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  179. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  180. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  181. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  182. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  183. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  184. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  185. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  186. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  187. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  188. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  189. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  190. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  191. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  192. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  193. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  194. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  195. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  196. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  197. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  198. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  199. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  200. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  201. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  202. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  203. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  204. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  205. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  206. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  207. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  210. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  211. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  213. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  214. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  218. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  221. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  222. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  226. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  227. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  228. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  229. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  230. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  231. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  232. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  233. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  234. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  235. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  236. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  237. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  238. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  239. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  240. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  241. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  242. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  243. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  244. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  245. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  246. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  247. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  248. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  249. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  250. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  251. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  252. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  253. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  254. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  255. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  256. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  257. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  258. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  259. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  260. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  261. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  262. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  263. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  264. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  265. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  266. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  267. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  268. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  269. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  270. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  271. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  272. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  273. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  274. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  275. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  277. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  278. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  279. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  280. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  281. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  282. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  283. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  284. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  285. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  286. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  287. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  288. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  289. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  291. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  293. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  294. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  295. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  296. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  297. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  298. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  299. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  300. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  301. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  302. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  303. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  304. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  305. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  306. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  307. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  308. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  309. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  310. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  311. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  312. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  313. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  314. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  315. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  316. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  317. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  318. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  319. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  320. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  321. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  322. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  323. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  324. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  325. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  326. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  327. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  328. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  329. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  330. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  331. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  332. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  333. package/src/duckdb/src/include/duckdb.h +28 -0
  334. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  335. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  336. package/src/duckdb/src/main/config.cpp +4 -0
  337. package/src/duckdb/src/main/database.cpp +1 -1
  338. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  339. package/src/duckdb/src/main/extension/extension_install.cpp +6 -0
  340. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  341. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  342. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  343. package/src/duckdb/src/main/relation.cpp +6 -5
  344. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  345. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  346. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  347. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  348. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  349. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  350. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  351. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  352. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  353. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  354. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  355. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  356. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  357. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  358. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  359. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  360. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  361. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  362. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  363. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  364. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  365. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  366. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  367. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  368. package/src/duckdb/src/parallel/executor.cpp +15 -0
  369. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  370. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  371. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  372. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  373. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  374. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  375. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  376. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  377. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  378. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  379. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  380. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  381. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  382. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  383. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  384. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  385. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  386. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  387. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  388. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  389. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  390. package/src/duckdb/src/parser/parser.cpp +8 -2
  391. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  392. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  393. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  394. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  395. package/src/duckdb/src/parser/query_node.cpp +15 -37
  396. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  397. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  398. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  399. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  400. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  401. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  402. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  403. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  404. package/src/duckdb/src/parser/tableref.cpp +0 -44
  405. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  406. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  407. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  408. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  409. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  410. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  411. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  412. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  413. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  414. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  415. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  416. package/src/duckdb/src/parser/transformer.cpp +15 -0
  417. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  418. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  419. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  420. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  421. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  422. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  423. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  424. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  425. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  426. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  427. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  428. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  429. package/src/duckdb/src/planner/binder.cpp +44 -31
  430. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  431. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  432. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  433. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  434. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  435. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  436. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  437. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  438. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  439. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  440. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  441. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  442. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  443. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  444. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  445. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  446. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  447. package/src/duckdb/src/storage/data_table.cpp +1 -1
  448. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  449. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  450. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  451. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  452. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  453. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  454. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  455. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  456. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  457. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  458. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  459. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  460. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  461. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  462. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  463. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  464. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  465. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  466. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  467. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  468. package/src/duckdb/ub_src_common.cpp +2 -0
  469. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  470. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  471. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  472. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  473. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  474. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  475. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  476. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  477. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  478. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  479. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  480. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  481. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  482. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  483. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  484. package/src/statement.cpp +10 -3
  485. package/test/test_all_types.test.ts +233 -0
  486. package/tsconfig.json +1 -0
  487. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  488. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  489. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -1,5 +1,8 @@
1
1
  #include "duckdb/execution/radix_partitioned_hashtable.hpp"
2
2
 
3
+ #include "duckdb/common/radix_partitioning.hpp"
4
+ #include "duckdb/common/types/row/tuple_data_collection.hpp"
5
+ #include "duckdb/execution/executor.hpp"
3
6
  #include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
4
7
  #include "duckdb/parallel/event.hpp"
5
8
  #include "duckdb/parallel/task_scheduler.hpp"
@@ -59,8 +62,8 @@ class RadixHTGlobalState : public GlobalSinkState {
59
62
  public:
60
63
  explicit RadixHTGlobalState(ClientContext &context)
61
64
  : is_empty(true), multi_scan(true), partitioned(false),
62
- partition_info(
63
- MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads())) {
65
+ partition_info(make_uniq<RadixPartitionInfo>(
66
+ MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads()))) {
64
67
  }
65
68
 
66
69
  vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
@@ -78,8 +81,16 @@ public:
78
81
  bool is_finalized = false;
79
82
  bool is_partitioned = false;
80
83
 
81
- RadixPartitionInfo partition_info;
84
+ unique_ptr<RadixPartitionInfo> partition_info;
82
85
  AggregateHTAppendState append_state;
86
+
87
+ //! Repartitioned HT info
88
+ bool repartitioned = false;
89
+ idx_t repartition_tasks_per_partition;
90
+ vector<vector<unique_ptr<PartitionableHashTable>>> repartition_tasks;
91
+ unique_array<atomic<idx_t>> repartition_tasks_assigned;
92
+ unique_array<atomic<idx_t>> repartition_tasks_done;
93
+ unique_array<atomic<bool>> finalize_assigned;
83
94
  };
84
95
 
85
96
  class RadixHTLocalState : public LocalSinkState {
@@ -146,9 +157,9 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk
146
157
  gstate.is_empty = gstate.is_empty && group_chunk.size() == 0;
147
158
  if (gstate.finalized_hts.empty()) {
148
159
  // Create a finalized ht in the global state, that we can populate
149
- gstate.finalized_hts.push_back(
150
- make_shared<GroupedAggregateHashTable>(context.client, Allocator::Get(context.client), group_types,
151
- op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64));
160
+ gstate.finalized_hts.push_back(make_shared<GroupedAggregateHashTable>(
161
+ context.client, BufferAllocator::Get(context.client), group_types, op.payload_types, op.bindings,
162
+ HtEntryType::HT_WIDTH_64));
152
163
  }
153
164
  D_ASSERT(gstate.finalized_hts.size() == 1);
154
165
  D_ASSERT(gstate.finalized_hts[0]);
@@ -163,12 +174,15 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk
163
174
 
164
175
  if (!llstate.ht) {
165
176
  llstate.ht =
166
- make_uniq<PartitionableHashTable>(context.client, Allocator::Get(context.client), gstate.partition_info,
167
- group_types, op.payload_types, op.bindings);
177
+ make_uniq<PartitionableHashTable>(context.client, BufferAllocator::Get(context.client),
178
+ *gstate.partition_info, group_types, op.payload_types, op.bindings);
179
+ if (context.client.config.force_external) {
180
+ gstate.partitioned = true;
181
+ }
168
182
  }
169
183
 
170
184
  llstate.total_groups += llstate.ht->AddChunk(group_chunk, payload_input,
171
- gstate.partitioned && gstate.partition_info.n_partitions > 1, filter);
185
+ gstate.partitioned && gstate.partition_info->n_partitions > 1, filter);
172
186
  if (llstate.total_groups >= radix_limit) {
173
187
  gstate.partitioned = true;
174
188
  }
@@ -192,8 +206,8 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
192
206
  return; // no data
193
207
  }
194
208
 
195
- if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.partitioned) {
196
- llstate.ht->Partition();
209
+ if (!llstate.ht->IsPartitioned() && gstate.partition_info->n_partitions > 1 && gstate.partitioned) {
210
+ llstate.ht->Partition(true);
197
211
  }
198
212
 
199
213
  // we will never add new values to these HTs so we can drop the first part of the HT
@@ -207,13 +221,23 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
207
221
  gstate.intermediate_hts.push_back(std::move(llstate.ht));
208
222
  }
209
223
 
224
+ void RadixPartitionedHashTable::InitializeFinalizedHTs(ClientContext &context, GlobalSinkState &gstate_p) const {
225
+ auto &gstate = gstate_p.Cast<RadixHTGlobalState>();
226
+ auto &allocator = BufferAllocator::Get(context);
227
+ gstate.finalized_hts.resize(gstate.partition_info->n_partitions);
228
+ for (idx_t r = 0; r < gstate.partition_info->n_partitions; r++) {
229
+ gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
230
+ context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
231
+ }
232
+ }
233
+
210
234
  bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState &gstate_p) const {
211
235
  auto &gstate = gstate_p.Cast<RadixHTGlobalState>();
212
236
  D_ASSERT(!gstate.is_finalized);
213
237
  gstate.is_finalized = true;
214
238
 
215
239
  // special case if we have non-combinable aggregates
216
- // we have already aggreagted into a global shared HT that does not require any additional finalization steps
240
+ // we have already aggregated into a global shared HT that does not require any additional finalization steps
217
241
  if (ForceSingleHT(gstate)) {
218
242
  D_ASSERT(gstate.finalized_hts.size() <= 1);
219
243
  D_ASSERT(gstate.finalized_hts.empty() || gstate.finalized_hts[0]);
@@ -221,31 +245,17 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
221
245
  }
222
246
 
223
247
  // we can have two cases now, non-partitioned for few groups and radix-partitioned for very many groups.
224
- // go through all of the child hts and see if we ever called partition() on any of them
225
- // if we did, its the latter case.
226
- bool any_partitioned = false;
227
- for (auto &pht : gstate.intermediate_hts) {
228
- if (pht->IsPartitioned()) {
229
- any_partitioned = true;
230
- break;
231
- }
232
- }
233
-
234
- auto &allocator = Allocator::Get(context);
235
- if (any_partitioned) {
248
+ auto &allocator = BufferAllocator::Get(context);
249
+ if (AnyPartitioned(gstate_p)) {
236
250
  // if one is partitioned, all have to be
237
251
  // this should mostly have already happened in Combine, but if not we do it here
238
252
  for (auto &pht : gstate.intermediate_hts) {
239
253
  if (!pht->IsPartitioned()) {
240
- pht->Partition();
254
+ pht->Partition(true);
241
255
  }
242
256
  }
243
257
  // schedule additional tasks to combine the partial HTs
244
- gstate.finalized_hts.resize(gstate.partition_info.n_partitions);
245
- for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) {
246
- gstate.finalized_hts[r] = make_shared<GroupedAggregateHashTable>(
247
- context, allocator, group_types, op.payload_types, op.bindings, HtEntryType::HT_WIDTH_64);
248
- }
258
+ InitializeFinalizedHTs(context, gstate_p);
249
259
  gstate.is_partitioned = true;
250
260
  return true;
251
261
  } else { // in the non-partitioned case we immediately combine all the unpartitioned hts created by the threads.
@@ -269,7 +279,7 @@ bool RadixPartitionedHashTable::Finalize(ClientContext &context, GlobalSinkState
269
279
  }
270
280
  }
271
281
 
272
- // this task is run in multiple threads and combines the radix-partitioned hash tables into a single onen and then
282
+ // this task is run in multiple threads and combines the radix-partitioned hash tables into a single one and then
273
283
  // folds them into the global ht finally.
274
284
  class RadixAggregateFinalizeTask : public ExecutorTask {
275
285
  public:
@@ -279,10 +289,21 @@ public:
279
289
  }
280
290
 
281
291
  static void FinalizeHT(RadixHTGlobalState &gstate, idx_t radix) {
282
- D_ASSERT(gstate.partition_info.n_partitions <= gstate.finalized_hts.size());
292
+ D_ASSERT(gstate.partition_info->n_partitions <= gstate.finalized_hts.size());
283
293
  D_ASSERT(gstate.finalized_hts[radix]);
284
- for (auto &pht : gstate.intermediate_hts) {
285
- for (auto &ht : pht->GetPartition(radix)) {
294
+
295
+ idx_t pht_idx_from = 0;
296
+ idx_t pht_idx_to = gstate.intermediate_hts.size();
297
+ if (gstate.repartitioned) {
298
+ const auto num_partitions_before = gstate.repartition_tasks.size();
299
+ const auto multiplier = gstate.partition_info->n_partitions / num_partitions_before;
300
+ const auto radix_before = radix / multiplier;
301
+ pht_idx_from = radix_before * gstate.repartition_tasks_per_partition;
302
+ pht_idx_to = pht_idx_from + gstate.repartition_tasks_per_partition;
303
+ }
304
+
305
+ for (idx_t i = pht_idx_from; i < pht_idx_to; i++) {
306
+ for (auto &ht : gstate.intermediate_hts[i]->GetPartition(radix)) {
286
307
  gstate.finalized_hts[radix]->Combine(*ht);
287
308
  ht.reset();
288
309
  }
@@ -302,22 +323,247 @@ private:
302
323
  idx_t radix;
303
324
  };
304
325
 
326
+ class RadixAggregateRepartitionTask : public ExecutorTask {
327
+ public:
328
+ RadixAggregateRepartitionTask(Executor &executor, shared_ptr<Event> event_p, RadixHTGlobalState &state_p,
329
+ idx_t num_partitions_before_p)
330
+ : ExecutorTask(executor), event(std::move(event_p)), state(state_p),
331
+ num_partitions_before(num_partitions_before_p) {
332
+ }
333
+
334
+ TaskExecutionResult ExecuteTask(TaskExecutionMode mode) override {
335
+ const auto multiplier = state.partition_info->n_partitions / num_partitions_before;
336
+
337
+ idx_t repartition_radix = 0;
338
+ idx_t finalize_radix = 0;
339
+ while (repartition_radix < num_partitions_before && finalize_radix < state.partition_info->n_partitions) {
340
+ // Loop over original partitions until we find one that we can repartition
341
+ for (; repartition_radix < num_partitions_before; repartition_radix++) {
342
+ auto task_idx = state.repartition_tasks_assigned[repartition_radix]++;
343
+ if (task_idx >= state.repartition_tasks_per_partition) {
344
+ continue;
345
+ }
346
+ auto &ht = state.repartition_tasks[repartition_radix][task_idx];
347
+ ht->Partition(true);
348
+ state.intermediate_hts[repartition_radix * state.repartition_tasks_per_partition + task_idx] =
349
+ std::move(ht);
350
+ state.repartition_tasks_done[repartition_radix]++;
351
+ break;
352
+ }
353
+
354
+ // Loop over repartitioned partitions
355
+ for (; finalize_radix < state.partition_info->n_partitions; finalize_radix++) {
356
+ const auto original_radix = finalize_radix / multiplier;
357
+ if (state.repartition_tasks_done[original_radix] != state.repartition_tasks_per_partition) {
358
+ break; // Needs more repartitioning
359
+ }
360
+
361
+ if (state.finalize_assigned[finalize_radix]) {
362
+ continue; // Already assigned
363
+ }
364
+
365
+ {
366
+ lock_guard<mutex> guard(state.lock);
367
+ if (state.finalize_assigned[finalize_radix]) {
368
+ // LCOV_EXCL_START
369
+ continue; // Check again with lock, but already assigned
370
+ // LCOV_EXCL_STOP
371
+ }
372
+ state.finalize_assigned[finalize_radix] = true;
373
+ }
374
+
375
+ // We can finalize!
376
+ RadixAggregateFinalizeTask::FinalizeHT(state, finalize_radix);
377
+ }
378
+ }
379
+ event->FinishTask();
380
+ return TaskExecutionResult::TASK_FINISHED;
381
+ }
382
+
383
+ private:
384
+ shared_ptr<Event> event;
385
+ RadixHTGlobalState &state;
386
+ const idx_t num_partitions_before;
387
+ };
388
+
305
389
  void RadixPartitionedHashTable::ScheduleTasks(Executor &executor, const shared_ptr<Event> &event,
306
390
  GlobalSinkState &state, vector<shared_ptr<Task>> &tasks) const {
307
391
  auto &gstate = state.Cast<RadixHTGlobalState>();
308
392
  if (!gstate.is_partitioned) {
309
393
  return;
310
394
  }
311
- for (idx_t r = 0; r < gstate.partition_info.n_partitions; r++) {
312
- D_ASSERT(gstate.partition_info.n_partitions <= gstate.finalized_hts.size());
313
- D_ASSERT(gstate.finalized_hts[r]);
314
- tasks.push_back(make_uniq<RadixAggregateFinalizeTask>(executor, event, gstate, r));
395
+
396
+ idx_t repartition_radix_bits;
397
+ idx_t concurrent_repartitions;
398
+ idx_t tasks_per_partition;
399
+ GetRepartitionInfo(executor.context, state, repartition_radix_bits, concurrent_repartitions, tasks_per_partition);
400
+ if (repartition_radix_bits == gstate.partition_info->radix_bits) {
401
+ // No repartitioning necessary
402
+ for (idx_t r = 0; r < gstate.partition_info->n_partitions; r++) {
403
+ D_ASSERT(gstate.partition_info->n_partitions <= gstate.finalized_hts.size());
404
+ D_ASSERT(gstate.finalized_hts[r]);
405
+ tasks.push_back(make_uniq<RadixAggregateFinalizeTask>(executor, event, gstate, r));
406
+ }
407
+ } else {
408
+ // Schedule repartition / finalize tasks
409
+ ScheduleRepartitionTasks(executor, event, state, tasks, repartition_radix_bits, concurrent_repartitions,
410
+ tasks_per_partition);
411
+ }
412
+ }
413
+
414
+ void RadixPartitionedHashTable::ScheduleRepartitionTasks(Executor &executor, const shared_ptr<Event> &event,
415
+ GlobalSinkState &state, vector<shared_ptr<Task>> &tasks,
416
+ const idx_t repartition_radix_bits,
417
+ const idx_t concurrent_repartitions,
418
+ const idx_t tasks_per_partition) const {
419
+ auto &gstate = state.Cast<RadixHTGlobalState>();
420
+ D_ASSERT(repartition_radix_bits > gstate.partition_info->radix_bits);
421
+ const auto num_partitions_before = gstate.partition_info->n_partitions;
422
+ const auto multiplier = RadixPartitioning::NumberOfPartitions(repartition_radix_bits) / num_partitions_before;
423
+
424
+ // Inititialize gstate
425
+ auto new_partition_info =
426
+ make_uniq<RadixPartitionInfo>(RadixPartitioning::NumberOfPartitions(repartition_radix_bits));
427
+ gstate.repartitioned = true;
428
+ gstate.repartition_tasks_per_partition = tasks_per_partition;
429
+ gstate.repartition_tasks.resize(num_partitions_before);
430
+ gstate.repartition_tasks_assigned = make_uniq_array<atomic<idx_t>>(num_partitions_before);
431
+ gstate.repartition_tasks_done = make_uniq_array<atomic<idx_t>>(num_partitions_before);
432
+ gstate.finalize_assigned = make_uniq_array<atomic<bool>>(new_partition_info->n_partitions);
433
+ for (idx_t partition_idx = 0; partition_idx < num_partitions_before; partition_idx++) {
434
+ gstate.repartition_tasks_assigned[partition_idx] = 0;
435
+ gstate.repartition_tasks_done[partition_idx] = 0;
436
+
437
+ // Grab intermediate data from gstate
438
+ HashTableList partition_list;
439
+ for (auto &pht : gstate.intermediate_hts) {
440
+ for (auto &ht : pht->GetPartition(partition_idx)) {
441
+ partition_list.push_back(std::move(ht));
442
+ }
443
+ }
444
+
445
+ // Spread the data across the tasks
446
+ const idx_t hts_per_task = (partition_list.size() + tasks_per_partition - 1) / tasks_per_partition;
447
+ idx_t ht_idx = 0;
448
+ for (idx_t task_idx = 0; task_idx < tasks_per_partition; task_idx++) {
449
+ auto task_ht =
450
+ make_uniq<PartitionableHashTable>(executor.context, BufferAllocator::Get(executor.context),
451
+ *new_partition_info, group_types, op.payload_types, op.bindings);
452
+ auto ht_idx_to = MinValue<idx_t>(ht_idx + hts_per_task, partition_list.size());
453
+ for (; ht_idx < ht_idx_to; ht_idx++) {
454
+ auto &ht = partition_list[ht_idx];
455
+ task_ht->Append(*ht);
456
+ ht.reset();
457
+ }
458
+ gstate.repartition_tasks[partition_idx].push_back(std::move(task_ht));
459
+ }
460
+
461
+ for (idx_t i = 0; i < multiplier; i++) {
462
+ gstate.finalize_assigned[partition_idx * multiplier + i] = false;
463
+ }
464
+ }
465
+
466
+ // Schedule tasks equal to number of therads
467
+ const idx_t num_threads = TaskScheduler::GetScheduler(executor.context).NumberOfThreads();
468
+ for (idx_t i = 0; i < num_threads; i++) {
469
+ tasks.emplace_back(make_shared<RadixAggregateRepartitionTask>(executor, event, gstate, num_partitions_before));
470
+ }
471
+
472
+ gstate.intermediate_hts.clear();
473
+ gstate.intermediate_hts.resize(num_partitions_before * tasks_per_partition);
474
+
475
+ gstate.partition_info = std::move(new_partition_info);
476
+ InitializeFinalizedHTs(executor.context, state);
477
+ }
478
+
479
+ bool RadixPartitionedHashTable::ForceSingleHT(GlobalSinkState &state) {
480
+ auto &gstate = state.Cast<RadixHTGlobalState>();
481
+ return gstate.partition_info->n_partitions < 2;
482
+ }
483
+
484
+ bool RadixPartitionedHashTable::AnyPartitioned(GlobalSinkState &state) {
485
+ auto &gstate = state.Cast<RadixHTGlobalState>();
486
+ for (auto &pht : gstate.intermediate_hts) {
487
+ if (pht->IsPartitioned()) {
488
+ return true;
489
+ }
315
490
  }
491
+ return false;
316
492
  }
317
493
 
318
- bool RadixPartitionedHashTable::ForceSingleHT(GlobalSinkState &state) const {
494
+ void RadixPartitionedHashTable::GetRepartitionInfo(ClientContext &context, GlobalSinkState &state,
495
+ idx_t &repartition_radix_bits, idx_t &concurrent_repartitions,
496
+ idx_t &tasks_per_partition) {
319
497
  auto &gstate = state.Cast<RadixHTGlobalState>();
320
- return gstate.partition_info.n_partitions < 2;
498
+ const auto num_partitions = gstate.partition_info->n_partitions;
499
+ const auto radix_bits = gstate.partition_info->radix_bits;
500
+ D_ASSERT(IsPowerOfTwo(num_partitions));
501
+
502
+ vector<idx_t> partition_counts(num_partitions, 0);
503
+ vector<idx_t> partition_sizes(num_partitions, 0);
504
+ for (const auto &ht : gstate.intermediate_hts) {
505
+ for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
506
+ partition_counts[partition_idx] += ht->GetPartitionCount(partition_idx);
507
+ partition_sizes[partition_idx] += ht->GetPartitionSize(partition_idx);
508
+ }
509
+ }
510
+
511
+ idx_t total_size = 0;
512
+ idx_t max_partition_idx = 0;
513
+ idx_t max_partition_size = 0;
514
+ for (idx_t partition_idx = 0; partition_idx < num_partitions; partition_idx++) {
515
+ const auto &partition_count = partition_counts[partition_idx];
516
+ const auto &partition_size = partition_sizes[partition_idx];
517
+ auto partition_ht_size =
518
+ partition_size + GroupedAggregateHashTable::FirstPartSize(partition_count, HtEntryType::HT_WIDTH_64);
519
+ if (partition_ht_size > max_partition_size) {
520
+ max_partition_idx = partition_idx;
521
+ max_partition_size = partition_ht_size;
522
+ }
523
+ total_size += partition_ht_size;
524
+ }
525
+
526
+ // Switch to out-of-core finalize at ~60%
527
+ const auto max_ht_size = double(0.6) * BufferManager::GetBufferManager(context).GetMaxMemory();
528
+ const idx_t n_threads = PreviousPowerOfTwo(TaskScheduler::GetScheduler(context).NumberOfThreads());
529
+ D_ASSERT(IsPowerOfTwo(n_threads));
530
+ if (!context.config.force_external && total_size < max_ht_size) {
531
+ // In-memory finalize
532
+ if (num_partitions >= n_threads) { // Can already keep all threads busy
533
+ repartition_radix_bits = radix_bits;
534
+ tasks_per_partition = 1;
535
+ } else { // Repartition to keep all threads busy
536
+ // Can't have coverage because RadixHTGlobalState::MAX_RADIX_PARTITIONS > threads on github actions
537
+ // LCOV_EXCL_START
538
+ repartition_radix_bits = RadixPartitioning::RadixBits(NextPowerOfTwo(n_threads));
539
+ tasks_per_partition = n_threads / num_partitions;
540
+ // LCOV_EXCL_STOP
541
+ }
542
+ concurrent_repartitions = num_partitions;
543
+ return;
544
+ }
545
+
546
+ // Out-of-core finalize
547
+ const auto partition_count = partition_counts[max_partition_idx];
548
+ const auto partition_size = partition_sizes[max_partition_idx];
549
+
550
+ const auto max_added_bits = RadixPartitioning::MAX_RADIX_BITS - radix_bits;
551
+ idx_t added_bits;
552
+ for (added_bits = 1; added_bits < max_added_bits; added_bits++) {
553
+ double partition_multiplier = RadixPartitioning::NumberOfPartitions(added_bits);
554
+
555
+ auto new_estimated_count = double(partition_count) / partition_multiplier;
556
+ auto new_estimated_size = double(partition_size) / partition_multiplier;
557
+ auto new_estimated_ht_size = new_estimated_size + GroupedAggregateHashTable::FirstPartSize(
558
+ new_estimated_count, HtEntryType::HT_WIDTH_64);
559
+
560
+ if (new_estimated_ht_size <= max_ht_size / n_threads) {
561
+ break; // Max HT size is safe
562
+ }
563
+ }
564
+ repartition_radix_bits = radix_bits + added_bits;
565
+ concurrent_repartitions = MinValue<idx_t>(MaxValue<idx_t>(1, max_ht_size / max_partition_size), n_threads);
566
+ tasks_per_partition = NextPowerOfTwo(n_threads / concurrent_repartitions);
321
567
  }
322
568
 
323
569
  //===--------------------------------------------------------------------===//
@@ -342,7 +588,7 @@ public:
342
588
  class RadixHTLocalSourceState : public LocalSourceState {
343
589
  public:
344
590
  explicit RadixHTLocalSourceState(ExecutionContext &context, const RadixPartitionedHashTable &ht) {
345
- auto &allocator = Allocator::Get(context.client);
591
+ auto &allocator = BufferAllocator::Get(context.client);
346
592
  auto scan_chunk_types = ht.group_types;
347
593
  for (auto &aggr_type : ht.op.aggregate_return_types) {
348
594
  scan_chunk_types.push_back(aggr_type);
@@ -361,7 +607,7 @@ public:
361
607
  };
362
608
 
363
609
  unique_ptr<GlobalSourceState> RadixPartitionedHashTable::GetGlobalSourceState(ClientContext &context) const {
364
- return make_uniq<RadixHTGlobalSourceState>(Allocator::Get(context), *this);
610
+ return make_uniq<RadixHTGlobalSourceState>(BufferAllocator::Get(context), *this);
365
611
  }
366
612
 
367
613
  unique_ptr<LocalSourceState> RadixPartitionedHashTable::GetLocalSourceState(ExecutionContext &context) const {
@@ -401,13 +647,14 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
401
647
  chunk.data[null_group].SetVectorType(VectorType::CONSTANT_VECTOR);
402
648
  ConstantVector::SetNull(chunk.data[null_group], true);
403
649
  }
650
+ ArenaAllocator allocator(BufferAllocator::Get(context.client));
404
651
  for (idx_t i = 0; i < op.aggregates.size(); i++) {
405
652
  D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
406
653
  auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
407
654
  auto aggr_state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
408
655
  aggr.function.initialize(aggr_state.get());
409
656
 
410
- AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
657
+ AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
411
658
  Vector state_vector(Value::POINTER(CastPointerToValue(aggr_state.get())));
412
659
  aggr.function.finalize(state_vector, aggr_input_data, chunk.data[null_groups.size() + i], 1, 0);
413
660
  if (aggr.function.destructor) {